pandas 数值计算

# axis,skipna 基本参数
df =pd.DataFrame({
    'key1':[4,5,3,np.nan,2],
    'key2':[1,2,np.nan,4,5],
    'key3':[1,2,3,'j','k']},
 index= ['a','b','c','d','e'])
print(df,'\n')
print('-'*50)

print(df.mean(axis=1)) # 按行求均值 

print(df.mean(axis=0,skipna=False)) #  默认按列求均值,默认skipna忽略空值,False可以计算空值。
# 主要数学计算方法,可用于Series和DataFrame(1)

df = pd.DataFrame({'key1':np.arange(10),
                  'key2':np.random.rand(10)*10})
print(df)
print('-----')

print(df.count(),'→ count统计非Na值的数量\n')
print(df.min(),'→ min统计最小值\n',df['key2'].max(),'→ max统计最大值\n')
print(df.quantile(q=0.75),'→ quantile统计分位数,参数q确定位置\n')
print(df.sum(),'→ sum求和\n')
print(df.mean(),'→ mean求平均值\n')
print(df.median(),'→ median求算数中位数,50%分位数\n')
print(df.std(),'\n',df.var(),'→ std,var分别求标准差,方差\n')
print(df.skew(),'→ skew样本的偏度\n')
print(df.kurt(),'→ kurt样本的峰度\n')

 

df['key1_s'] = df['key1'].cumsum()
df['key2_s'] = df['key2'].cumsum()
print(df,'→ cumsum样本的累计和\n')

df['key1_p'] = df['key1'].cumprod()
df['key2_p'] = df['key2'].cumprod()
print(df,'→ cumprod样本的累计积\n')

print(df.cummax(),'\n',df.cummin(),'→ cummax,cummin分别求累计最大值,累计最小值\n')
# 会填充key1,和key2的值

 

# 直接一次性计算 总数,平均值,最大,最小值,标准差,四分位数。
a = pd.Series([1,2,3,4,5,6,7,8])
print(a.describe())

count    8.00000
mean     4.50000
std      2.44949
min      1.00000
25%      2.75000
50%      4.50000
75%      6.25000
max      8.00000
dtype: float64

 

posted @ 2020-05-06 15:34  籽俊  阅读(173)  评论(0编辑  收藏  举报