已信任
Jupyter 服务器: 本地
Python 3: Not Started
[1]
import pandas as pd
import numpy as np
[4]
d = {
'name':pd.Series(['小明','小黑','小红']),
'age':pd.Series([12,16,14]),
'score':pd.Series([98,90,77])
}
df = pd.DataFrame(d)
df
name age score
0 小明 12 98
1 小黑 16 90
2 小红 14 77
[5]
# sum()求和,默认按照列轴求和
df.sum()
name 小明小黑小红
age 42
score 265
dtype: object
[6]
# 按行求和
df.sum(1)
0 110
1 106
2 91
dtype: int64
[7]
# 求平均 按列
df.mean()
age 14.000000
score 88.333333
dtype: float64
[8]
# 求标准差 按列
df.std()
age 2.000000
score 10.598742
dtype: float64
[9]
# 求最大值
df.max()
name 小黑
age 16
score 98
dtype: object
[10]
# 求绝对值
df[['age', 'score']].abs()
age score
0 12 98
1 16 90
2 14 77
[12]
df.describe()
age score
count 3.0 3.000000
mean 14.0 88.333333
std 2.0 10.598742
min 12.0 77.000000
25% 13.0 83.500000
50% 14.0 90.000000
75% 15.0 94.000000
max 16.0 98.000000
[13]
# 按照类别
df.describe(include='object')
name
count 3
unique 3
top 小红
freq 1
[14]
# 查看所有
df.describe(include='all')
name age score
count 3 3.0 3.000000
unique 3 NaN NaN
top 小红 NaN NaN
freq 1 NaN NaN
mean NaN 14.0 88.333333
std NaN 2.0 10.598742
min NaN 12.0 77.000000
25% NaN 13.0 83.500000
50% NaN 14.0 90.000000
75% NaN 15.0 94.000000
max NaN 16.0 98.000000
[-]