pandas demo 示例

 

#构造

import pandas as pd
import pickle
import numpy as np

dates=pd.date_range('20180310',periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
print(df)

输出:

                   A         B         C         D
2018-03-10  0.474957 -0.789351  0.827287  0.632483
2018-03-11 -0.147661  2.093837  0.565236 -0.282967
2018-03-12  0.871652 -0.492781  0.213760  1.046995
2018-03-13  0.735719  0.827546  0.139042  1.764413
2018-03-14  0.442560 -0.065412 -1.209434  0.690070
2018-03-15 -0.303560  1.389159 -0.397401 -0.650598

 

#切片选择指定行

import pandas as pd
import pickle
import numpy as np

dates=pd.date_range('20180310',periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
print(df)
#切片选择指定行
print(df[0:3])

输出

                   A         B         C         D
2018-03-10  0.474957 -0.789351  0.827287  0.632483
2018-03-11 -0.147661  2.093837  0.565236 -0.282967
2018-03-12  0.871652 -0.492781  0.213760  1.046995
2018-03-13  0.735719  0.827546  0.139042  1.764413
2018-03-14  0.442560 -0.065412 -1.209434  0.690070
2018-03-15 -0.303560  1.389159 -0.397401 -0.650598
                   A         B         C         D
2018-03-10  0.474957 -0.789351  0.827287  0.632483
2018-03-11 -0.147661  2.093837  0.565236 -0.282967
2018-03-12  0.871652 -0.492781  0.213760  1.046995

 

#通过行标记获取指定行(包含两端)

import pandas as pd
import pickle
import numpy as np

dates=pd.date_range('20180310',periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
print(df)
#通过行标记获取指定行(包含两端)
print(df['20180311':'20180313'])

输出

2018-03-10  0.474957 -0.789351  0.827287  0.632483
2018-03-11 -0.147661  2.093837  0.565236 -0.282967
2018-03-12  0.871652 -0.492781  0.213760  1.046995
2018-03-13  0.735719  0.827546  0.139042  1.764413
2018-03-14  0.442560 -0.065412 -1.209434  0.690070
2018-03-15 -0.303560  1.389159 -0.397401 -0.650598
                   A         B         C         D
2018-03-11 -0.147661  2.093837  0.565236 -0.282967
2018-03-12  0.871652 -0.492781  0.213760  1.046995
2018-03-13  0.735719  0.827546  0.139042  1.764413

 

#输出指定行指定列的数据

import pandas as pd
import pickle
import numpy as np

dates=pd.date_range('20180310',periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
print(df)
#输出指定行指定列的数据
print(df.loc['20180312', ['A','B','C','D']])

输出

                   A         B         C         D
2018-03-10  0.474957 -0.789351  0.827287  0.632483
2018-03-11 -0.147661  2.093837  0.565236 -0.282967
2018-03-12  0.871652 -0.492781  0.213760  1.046995
2018-03-13  0.735719  0.827546  0.139042  1.764413
2018-03-14  0.442560 -0.065412 -1.209434  0.690070
2018-03-15 -0.303560  1.389159 -0.397401 -0.650598

A    0.871652
B   -0.492781
C    0.213760
D    1.046995
Name: 2018-03-12 00:00:00, dtype: float64

 


#输出第三行第一列的数据

import pandas as pd
import pickle
import numpy as np

dates=pd.date_range('20180310',periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
print(df)
#输出第三行第一列的数据
print(df.iloc[3, 1])

输出

                   A         B         C         D
2018-03-10  0.474957 -0.789351  0.827287  0.632483
2018-03-11 -0.147661  2.093837  0.565236 -0.282967
2018-03-12  0.871652 -0.492781  0.213760  1.046995
2018-03-13  0.735719  0.827546  0.139042  1.764413
2018-03-14  0.442560 -0.065412 -1.209434  0.690070
2018-03-15 -0.303560  1.389159 -0.397401 -0.650598

0.8275459967949839

 

#df.A 选择某列

import pandas as pd
import pickle
import numpy as np

dates=pd.date_range('20180310',periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
print(df)

#或者df.A 选择某列
print(df['A'])

输出

                   A         B         C         D
2018-03-10 -1.537480  1.082599  0.174229 -1.841898
2018-03-11 -1.691014 -0.164473 -2.199268 -1.488417
2018-03-12 -1.324199 -0.420854  0.104982  0.754717
2018-03-13  0.138477  1.003904 -0.437110 -2.542149
2018-03-14 -1.049416  0.318146  1.249720  0.781054
2018-03-15 -1.595190 -0.391273  0.783752 -1.225756
2018-03-10   -1.537480
2018-03-11   -1.691014
2018-03-12   -1.324199
2018-03-13    0.138477
2018-03-14   -1.049416
2018-03-15   -1.595190
Freq: D, Name: A, dtype: float64

 

#进行切片选择,指定行,指定列

import pandas as pd
import pickle
import numpy as np

dates=pd.date_range('20180310',periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
print(df)

#进行切片选择,指定行,指定列
print(df.iloc[2:5,0:2])

输出

                   A         B         C         D
2018-03-10 -1.537480  1.082599  0.174229 -1.841898
2018-03-11 -1.691014 -0.164473 -2.199268 -1.488417
2018-03-12 -1.324199 -0.420854  0.104982  0.754717
2018-03-13  0.138477  1.003904 -0.437110 -2.542149
2018-03-14 -1.049416  0.318146  1.249720  0.781054
2018-03-15 -1.595190 -0.391273  0.783752 -1.225756

                   A         B
2018-03-12 -1.324199 -0.420854
2018-03-13  0.138477  1.003904
2018-03-14 -1.049416  0.318146

 

#进行不连续筛选

import pandas as pd
import pickle
import numpy as np

dates=pd.date_range('20180310',periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
print(df)

#进行不连续筛选
print(df.iloc[[1,2,4],[0,2]])

输出

                   A         B         C         D
2018-03-10  0.900440 -0.062287 -1.483173 -1.586545
2018-03-11 -0.351609 -2.337686  0.471770  0.122194
2018-03-12  1.598436  0.795936  1.102541 -0.471931
2018-03-13  2.753501  0.184064  0.610561 -0.577957
2018-03-14 -2.081754  0.666256  0.345566  0.969266
2018-03-15  0.089630 -0.310928 -0.439767  0.944149
                   A         C
2018-03-11 -0.351609  0.471770
2018-03-12  1.598436  1.102541
2018-03-14 -2.081754  0.345566

 

#筛选出df.A大于0的元素 布尔条件筛选

import pandas as pd
import pickle
import numpy as np

dates=pd.date_range('20180310',periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
print(df)

#筛选出df.A大于0的元素 布尔条件筛选
print(df[df.A > 0])

输出

                   A         B         C         D
2018-03-10  0.892268  0.713791 -0.144297  0.739862
2018-03-11  0.991796 -1.688081  1.333420 -0.524965
2018-03-12  2.251776 -1.514738 -0.720530  1.052735
2018-03-13 -0.297195 -0.945455 -1.796431  2.998356
2018-03-14 -0.236509 -0.369757 -0.438734  0.408940
2018-03-15  0.498061  0.778591 -0.282689  1.879702
                   A         B         C         D
2018-03-10  0.892268  0.713791 -0.144297  0.739862
2018-03-11  0.991796 -1.688081  1.333420 -0.524965
2018-03-12  2.251776 -1.514738 -0.720530  1.052735
2018-03-15  0.498061  0.778591 -0.282689  1.879702

 

#将df.A大于0的值改变

import pandas as pd
import pickle
import numpy as np

dates=pd.date_range('20180310',periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
print(df)

#将df.A大于0的值改变
df[df.A>0]=999
print(df)

输出

                   A         B         C         D
2018-03-10 -0.587337  0.166438 -1.536473 -2.118008
2018-03-11  0.327482 -0.383389  0.353157  0.592067
2018-03-12 -0.483211 -2.066614 -0.313845  0.989347
2018-03-13  1.230698 -1.196974 -1.465180  0.585245
2018-03-14  0.180381 -1.289805  0.264123  0.731016
2018-03-15  0.288694 -1.318865 -1.550989  0.467802
                     A           B           C           D
2018-03-10   -0.587337    0.166438   -1.536473   -2.118008
2018-03-11  999.000000  999.000000  999.000000  999.000000
2018-03-12   -0.483211   -2.066614   -0.313845    0.989347
2018-03-13  999.000000  999.000000  999.000000  999.000000
2018-03-14  999.000000  999.000000  999.000000  999.000000
2018-03-15  999.000000  999.000000  999.000000  999.000000

 

#新增一列不赋值

import pandas as pd
import pickle
import numpy as np

dates=pd.date_range('20180310',periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
print(df)

#新增一列
df['F']=np.nan
print(df)

输出

                   A         B         C         D
2018-03-10 -0.227356  1.229450  0.488290 -0.910271
2018-03-11  0.756934 -0.489924  1.125287  0.148251
2018-03-12 -1.157556 -0.703575  1.488778 -0.713087
2018-03-13  0.942155  0.972845 -1.765062  0.991459
2018-03-14  1.053055 -0.685858  0.604448  0.837986
2018-03-15  0.809910  0.771260  0.674058  0.420373
                   A         B         C         D   F
2018-03-10 -0.227356  1.229450  0.488290 -0.910271 NaN
2018-03-11  0.756934 -0.489924  1.125287  0.148251 NaN
2018-03-12 -1.157556 -0.703575  1.488778 -0.713087 NaN
2018-03-13  0.942155  0.972845 -1.765062  0.991459 NaN
2018-03-14  1.053055 -0.685858  0.604448  0.837986 NaN
2018-03-15  0.809910  0.771260  0.674058  0.420373 NaN

 

#新增一列赋值

import pandas as pd
import pickle
import numpy as np

dates=pd.date_range('20180310',periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A','B','C','D'])#生成6行4列位置
print(df)

#新增一列
df['E']  = pd.Series([1,2,3,4,5,6], index=pd.date_range('20180310', periods=6))
print(df)

输出

                   A         B         C         D
2018-03-10 -0.565898  0.647803  1.018365 -1.269129
2018-03-11 -1.049725  0.718618  0.745133 -2.976616
2018-03-12 -0.859447 -0.686062  0.332352 -0.065416
2018-03-13 -0.291780  1.144493 -1.387311 -0.752532
2018-03-14  0.469711  0.129786  0.677650  0.723333
2018-03-15  0.876061  0.441140  1.566190 -1.628274
                   A         B         C         D  E
2018-03-10 -0.565898  0.647803  1.018365 -1.269129  1
2018-03-11 -1.049725  0.718618  0.745133 -2.976616  2
2018-03-12 -0.859447 -0.686062  0.332352 -0.065416  3
2018-03-13 -0.291780  1.144493 -1.387311 -0.752532  4
2018-03-14  0.469711  0.129786  0.677650  0.723333  5
2018-03-15  0.876061  0.441140  1.566190 -1.628274  6

 

 

posted @ 2019-01-25 14:49  anobscureretreat  阅读(949)  评论(0)    收藏  举报