原文地址:https://github.com/wk2014/PandasLearnClass/blob/master/DataFrame.Apply.ipynb


from pandas import DataFrame, Series
import pandas as pd
import numpy as np
frame = DataFrame(np.random.randn(4,3),columns=list('bde'),index=['Utah','Ohio','Texas','Oregon'])
frame
 bde
Utah 1.011072 -1.198988 -0.794338
Ohio 1.611431 0.149355 -1.316985
Texas 0.671086 -1.427144 -1.764187
Oregon -0.496996 -0.279463 0.523360
 
#numpy 的 ufuncs(元素级数组方法) 可用于操作pandas 对象
np.abs(frame)


 bde
Utah 1.011072 1.198988 0.794338
Ohio 1.611431 0.149355 1.316985
Texas 0.671086 1.427144 1.764187
Oregon 0.496996 0.279463 0.523360
 
np.round(frame,decimals=2)


 bde
Utah 1.01 -1.20 -0.79
Ohio 1.61 0.15 -1.32
Texas 0.67 -1.43 -1.76
Oregon -0.50 -0.28 0.52
 
#元素级python函数应用
format = lambda x: '%.2f' % x
frame.applymap(format)


 bde
Utah 1.01 -1.20 -0.79
Ohio 1.61 0.15 -1.32
Texas 0.67 -1.43 -1.76
Oregon -0.50 -0.28 0.52
frame['e'].map(format)
Utah      -0.79
Ohio      -1.32
Texas     -1.76
Oregon     0.52
Name: e, dtype: object
#将函数应用到由各行或列所形成的一维数组上。==>frame.apply(f)
#1.返回元素值
f = lambda x: x.max() - x.min()
#沿行广播,操作列
frame.apply(f)
b    2.108427
d    1.576499
e    2.287547
dtype: float64
 
#沿列广播,操作行对象
frame.apply(f,axis=1)
Utah      2.210060
Ohio      2.928416
Texas     2.435273
Oregon    1.020355
dtype: float64
 
#return 普通 Series 对象
def f2(x):
    return Series([x.min(),x.max()],index=['min','max'])
frame.apply(f2)


 bde
min -0.496996 -1.427144 -1.764187
max 1.611431 0.149355 0.523360
 
#return MultiIndex Series 对象
def f3(x):
    mIndex = pd.MultiIndex(levels=[['max','min'],['city','value']],labels=[[0,0,1,1],[0,1,0,1]])
    se= Series([x.argmax(),x.max(),x.argmin(),x.min()],index=mIndex)
    return se
frame.apply(f3)


  bde
maxcity Ohio Ohio Oregon
value 1.61143 0.149355 0.52336
mincity Oregon Texas Texas
value -0.496996 -1.42714 -1.76419
 
#return MultiIndex Series 对象
def f3(x):
    mIndex = pd.MultiIndex(levels=[['max','min'],['city','value']],labels=[[0,0,1,1],[0,1,0,1]])
    se= Series([x.argmax(),x.max(),x.argmin(),x.min()],index=mIndex)
    return se
frame.apply(f3).unstack()


 bde
 cityvaluecityvaluecityvalue
max Ohio 1.61143 Ohio 0.149355 Oregon 0.52336
min Oregon -0.496996 Texas -1.42714 Texas -1.76419
posted on 2018-05-17 14:13  水利IT人  阅读(349)  评论(0)    收藏  举报