pandas.DataFrame
index & iteration
import numpy as np
import pandas as pd
d = {'name':['tom','jerry','lucy'],'age':[24,22,21],'sex':['male','male','female'],'score':[55,89,77.5]}
df1 = pd.DataFrame(data=d)
print(df1)
print('------------')
print(df1.convert_dtypes().dtypes)
print('------------')
name age sex score
0 tom 24 male 55.0
1 jerry 22 male 89.0
2 lucy 21 female 77.5
------------
name string
age Int64
sex string
score float64
dtype: object
iloc[]
df2 = df1.copy()
print(df2.iloc[0]) #取第一行,中括号内单个值 索引的就是行
print(type(df2.iloc[0])) #<class 'pandas.core.series.Series'>
name tom
age 24
sex male
score 55
Name: 0, dtype: object
<class 'pandas.core.series.Series'>
print(df2.iloc[0,1]) # 第一个中括号内有逗号隔开的2个元素 索引的就是value了
24
print(df2.iloc[[0,1],1]) #取第0,1行的第1列,age列
0 24
1 22
Name: age, dtype: int64
print(df2.iloc[:,[0,3]]) # 逗号前的冒号:表示切片,代表所有行,也可以取第1,2行 ,写成1:3,列取第0,3列
name score
0 tom 55.0
1 jerry 89.0
2 lucy 77.5
print(df2.iloc[1:3,0:2]) #取第1,2行 第0,1列
name age
1 jerry 22
2 lucy 21
print(df2.iloc[[0]])
print(type(df2.iloc[[0]])) #加2个中括号 就是slice 的效果,切片
name age sex score
0 tom 24 male 55.0
<class 'pandas.core.frame.DataFrame'>
print(df2.iloc[[0,1]])
print(type(df2.iloc[[0,1]])) #取第0 ,1 行
name age sex score
0 tom 24 male 55.0
1 jerry 22 male 89.0
<class 'pandas.core.frame.DataFrame'>
print(df2.iloc[[True,False,True]]) #取第0,2行
name age sex score
0 tom 24 male 55.0
2 lucy 21 female 77.5
print(df2.iloc[lambda x:x.index %2==0]) #x是个变量,是lambda的参数,同时传递给iloc ,那么它就拥有index属性,这里默认是从0-2,可以换么?换成row1 row2 row3?
name age sex score
0 tom 24 male 55.0
2 lucy 21 female 77.5
insert()
# df2.insert(4,'class',['class1','class2','calss1']) #插入列 class 班级
print(df2)
name age sex score class
0 tom 24 male 55.0 class1
1 jerry 22 male 89.0 class2
2 lucy 21 female 77.5 calss1
for item in df2.items():
print(item)
('name', 0 tom
1 jerry
2 lucy
Name: name, dtype: object)
('age', 0 24
1 22
2 21
Name: age, dtype: int64)
('sex', 0 male
1 male
2 female
Name: sex, dtype: object)
('score', 0 55.0
1 89.0
2 77.5
Name: score, dtype: float64)
('class', 0 class1
1 class2
2 calss1
Name: class, dtype: object)
for item in df2.iteritems(): #和items效果一样
print(item)
('name', 0 tom
1 jerry
2 lucy
Name: name, dtype: object)
('age', 0 24
1 22
2 21
Name: age, dtype: int64)
('sex', 0 male
1 male
2 female
Name: sex, dtype: object)
('score', 0 55.0
1 89.0
2 77.5
Name: score, dtype: float64)
('class', 0 class1
1 class2
2 calss1
Name: class, dtype: object)
for key in df2.keys():
print(key) #取列头
print(df2.columns) #与df2.columns相似
name
age
sex
score
class
Index(['name', 'age', 'sex', 'score', 'class'], dtype='object')
for row in df2.iterrows():print(row,type(row)) #行号和内容组成的元组
(0, name tom
age 24
sex male
score 55
class class1
Name: 0, dtype: object) <class 'tuple'>
(1, name jerry
age 22
sex male
score 89
class class2
Name: 1, dtype: object) <class 'tuple'>
(2, name lucy
age 21
sex female
score 77.5
class calss1
Name: 2, dtype: object) <class 'tuple'>
df2.pop('class') #删除列
print(df2)
name age sex score
0 tom 24 male 55.0
1 jerry 22 male 89.0
2 lucy 21 female 77.5
print(df2.tail(2)) #显示最后2行
name age sex score
1 jerry 22 male 89.0
2 lucy 21 female 77.5
d1 = {'num_legs': [4, 4, 2, 2],
'num_wings': [0, 0, 2, 2],
'class': ['mammal', 'mammal', 'mammal', 'bird'],
'animal': ['cat', 'dog', 'bat', 'penguin'],
'locomotion': ['walks', 'walks', 'flies', 'walks']}
df3 = pd.DataFrame(data=d1)
df3 = df3.set_index(['class', 'animal', 'locomotion'])
print(df3)
print('---------------')
print(df3.xs('mammal')) #只查哺乳动物
print('---------------')
print(df3.xs(('mammal','dog')))#显示哺乳动物里的狗狗
print('---------------')
print(df3.xs('cat',level=1))
num_legs num_wings
class animal locomotion
mammal cat walks 4 0
dog walks 4 0
bat flies 2 2
bird penguin walks 2 2
---------------
num_legs num_wings
animal locomotion
cat walks 4 0
dog walks 4 0
bat flies 2 2
---------------
num_legs num_wings
locomotion
walks 4 0
---------------
num_legs num_wings
class locomotion
mammal walks 4 0
c:\python\lib\site-packages\IPython\core\interactiveshell.py:2922: PerformanceWarning: indexing past lexsort depth may impact performance.
return runner(coro)
print(df2.get('name'),type(df2.get('name'))) #获取values
0 tom
1 jerry
2 lucy
Name: name, dtype: object <class 'pandas.core.series.Series'>
df4 = pd.read_excel('C:\Users\lyq\Desktop\test1.xls',)
df5 = pd.read_excel('C:\Users\lyq\Desktop\test2.xls')
print(df4)
print(df5)
File "<ipython-input-89-67aca58fab8b>", line 1
df4 = pd.read_excel('C:\Users\lyq\Desktop\test1.xls')
^
SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \UXXXXXXXX escape