pandas.DataFrame

index & iteration

import numpy as np
import pandas as pd 
d = {'name':['tom','jerry','lucy'],'age':[24,22,21],'sex':['male','male','female'],'score':[55,89,77.5]}
df1 = pd.DataFrame(data=d)
print(df1)
print('------------')
print(df1.convert_dtypes().dtypes)
print('------------')

    name  age     sex  score
0    tom   24    male   55.0
1  jerry   22    male   89.0
2   lucy   21  female   77.5
------------
name      string
age        Int64
sex       string
score    float64
dtype: object
iloc[]
df2 = df1.copy()
print(df2.iloc[0]) #取第一行,中括号内单个值 索引的就是行
print(type(df2.iloc[0])) #<class 'pandas.core.series.Series'>
name      tom
age        24
sex      male
score      55
Name: 0, dtype: object
<class 'pandas.core.series.Series'>
print(df2.iloc[0,1]) # 第一个中括号内有逗号隔开的2个元素 索引的就是value了
24
print(df2.iloc[[0,1],1]) #取第0,1行的第1列,age列
0    24
1    22
Name: age, dtype: int64
print(df2.iloc[:,[0,3]]) # 逗号前的冒号:表示切片,代表所有行,也可以取第1,2行 ,写成1:3,列取第0,3列
    name  score
0    tom   55.0
1  jerry   89.0
2   lucy   77.5
print(df2.iloc[1:3,0:2]) #取第1,2行 第0,1列
    name  age
1  jerry   22
2   lucy   21
print(df2.iloc[[0]])
print(type(df2.iloc[[0]])) #加2个中括号 就是slice 的效果,切片
  name  age   sex  score
0  tom   24  male   55.0
<class 'pandas.core.frame.DataFrame'>
print(df2.iloc[[0,1]])
print(type(df2.iloc[[0,1]])) #取第0 ,1 行
    name  age   sex  score
0    tom   24  male   55.0
1  jerry   22  male   89.0
<class 'pandas.core.frame.DataFrame'>
print(df2.iloc[[True,False,True]]) #取第0,2行
   name  age     sex  score
0   tom   24    male   55.0
2  lucy   21  female   77.5
print(df2.iloc[lambda x:x.index %2==0]) #x是个变量,是lambda的参数,同时传递给iloc ,那么它就拥有index属性,这里默认是从0-2,可以换么?换成row1 row2 row3?
   name  age     sex  score
0   tom   24    male   55.0
2  lucy   21  female   77.5
insert()
# df2.insert(4,'class',['class1','class2','calss1']) #插入列 class 班级 
print(df2)
    name  age     sex  score   class
0    tom   24    male   55.0  class1
1  jerry   22    male   89.0  class2
2   lucy   21  female   77.5  calss1
for item in df2.items():
    print(item)
('name', 0      tom
1    jerry
2     lucy
Name: name, dtype: object)
('age', 0    24
1    22
2    21
Name: age, dtype: int64)
('sex', 0      male
1      male
2    female
Name: sex, dtype: object)
('score', 0    55.0
1    89.0
2    77.5
Name: score, dtype: float64)
('class', 0    class1
1    class2
2    calss1
Name: class, dtype: object)
for item in df2.iteritems(): #和items效果一样
    print(item)
('name', 0      tom
1    jerry
2     lucy
Name: name, dtype: object)
('age', 0    24
1    22
2    21
Name: age, dtype: int64)
('sex', 0      male
1      male
2    female
Name: sex, dtype: object)
('score', 0    55.0
1    89.0
2    77.5
Name: score, dtype: float64)
('class', 0    class1
1    class2
2    calss1
Name: class, dtype: object)
for key in df2.keys():
    print(key) #取列头
print(df2.columns) #与df2.columns相似
name
age
sex
score
class
Index(['name', 'age', 'sex', 'score', 'class'], dtype='object')
for row in df2.iterrows():print(row,type(row))  #行号和内容组成的元组
(0, name        tom
age          24
sex        male
score        55
class    class1
Name: 0, dtype: object) <class 'tuple'>
(1, name      jerry
age          22
sex        male
score        89
class    class2
Name: 1, dtype: object) <class 'tuple'>
(2, name       lucy
age          21
sex      female
score      77.5
class    calss1
Name: 2, dtype: object) <class 'tuple'>
df2.pop('class') #删除列
print(df2)
    name  age     sex  score
0    tom   24    male   55.0
1  jerry   22    male   89.0
2   lucy   21  female   77.5
print(df2.tail(2)) #显示最后2行
    name  age     sex  score
1  jerry   22    male   89.0
2   lucy   21  female   77.5
d1 = {'num_legs': [4, 4, 2, 2],
     'num_wings': [0, 0, 2, 2],
     'class': ['mammal', 'mammal', 'mammal', 'bird'],
     'animal': ['cat', 'dog', 'bat', 'penguin'],
     'locomotion': ['walks', 'walks', 'flies', 'walks']}
df3 = pd.DataFrame(data=d1)
df3 = df3.set_index(['class', 'animal', 'locomotion'])
print(df3)
print('---------------')
print(df3.xs('mammal')) #只查哺乳动物
print('---------------')
print(df3.xs(('mammal','dog')))#显示哺乳动物里的狗狗
print('---------------')
print(df3.xs('cat',level=1))

                           num_legs  num_wings
class  animal  locomotion                     
mammal cat     walks              4          0
       dog     walks              4          0
       bat     flies              2          2
bird   penguin walks              2          2
---------------
                   num_legs  num_wings
animal locomotion                     
cat    walks              4          0
dog    walks              4          0
bat    flies              2          2
---------------
            num_legs  num_wings
locomotion                     
walks              4          0
---------------
                   num_legs  num_wings
class  locomotion                     
mammal walks              4          0


c:\python\lib\site-packages\IPython\core\interactiveshell.py:2922: PerformanceWarning: indexing past lexsort depth may impact performance.
  return runner(coro)
print(df2.get('name'),type(df2.get('name'))) #获取values
0      tom
1    jerry
2     lucy
Name: name, dtype: object <class 'pandas.core.series.Series'>
df4 = pd.read_excel('C:\Users\lyq\Desktop\test1.xls',)
df5 = pd.read_excel('C:\Users\lyq\Desktop\test2.xls')
print(df4)
print(df5)
  File "<ipython-input-89-67aca58fab8b>", line 1
    df4 = pd.read_excel('C:\Users\lyq\Desktop\test1.xls')
                        ^
SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \UXXXXXXXX escape
posted on 2020-09-20 22:51  94小渣渣  阅读(151)  评论(0编辑  收藏  举报