In [2]:
import numpy as np
import pandas as pd
In [3]:
ser = pd.Series([5,0,3,8,4],index=['red','blue','yellow','white','green'])
ser.index
In [21]:
ser
Out[21]:
red 5
blue 0
yellow 3
white 8
green 4
dtype: int64
In [20]:
# 更换索引时可调整标签顺序,删除或增加
ser.reindex(['white','blue','glod','red','yellow'])
Out[20]:
white 8.0
blue 0.0
glod NaN
red 5.0
yellow 3.0
dtype: float64
In [23]:
ser3 = pd.Series([1,5,6,3],index=[0,3,5,6])
ser3
Out[23]:
0 1
3 5
5 6
6 3
dtype: int64
In [24]:
ser3.reindex(range(6),method='ffill')
Out[24]:
0 1
1 1
2 1
3 5
4 5
5 6
dtype: int64
In [25]:
ser3.reindex(range(7),method='bfill')
Out[25]:
0 1
1 5
2 5
3 5
4 6
5 6
6 3
dtype: int64
In [29]:
ser.drop(['red','white'])
Out[29]:
blue 0
yellow 3
green 4
dtype: int64
In [33]:
frame = pd.DataFrame(np.arange(16).reshape((4,4)),index=['red','blue','yellow','white'],columns=['ball','pen','pencil','paper'])
frame
Out[33]:
ball pen pencil paper
red 0 1 2 3
blue 4 5 6 7
yellow 8 9 10 11
white 12 13 14 15
In [34]:
ser = pd.Series(np.arange(4),index=['ball','pen','pencil','paper'])
ser
Out[34]:
ball 0
pen 1
pencil 2
paper 3
dtype: int32
In [35]:
frame - ser
Out[35]:
ball pen pencil paper
red 0 0 0 0
blue 4 4 4 4
yellow 8 8 8 8
white 12 12 12 12
In [37]:
ser['boll'] = 9
ser
Out[37]:
ball 0
pen 1
pencil 2
paper 3
boll 9
dtype: int64
In [38]:
frame - ser
Out[38]:
ball boll paper pen pencil
red 0 NaN 0 0 0
blue 4 NaN 4 4 4
yellow 8 NaN 8 8 8
white 12 NaN 12 12 12
In [39]:
np.sqrt(frame)
Out[39]:
ball pen pencil paper
red 0.000000 1.000000 1.414214 1.732051
blue 2.000000 2.236068 2.449490 2.645751
yellow 2.828427 3.000000 3.162278 3.316625
white 3.464102 3.605551 3.741657 3.872983
In [40]:
f = lambda x : x.max() - x.min()
# 调用函数
frame.apply(f)
Out[40]:
ball 12
pen 12
pencil 12
paper 12
dtype: int64
In [43]:
frame.apply(f,axis=1)
Out[43]:
red 3
blue 3
yellow 3
white 3
dtype: int64
In [44]:
# 也可以返回Series对象
def f(x):
return pd.Series([x.min(),x.max()],index=['min','max'])
frame.apply(f)
Out[44]:
ball pen pencil paper
min 0 1 2 3
max 12 13 14 15
In [46]:
frame.sum(axis=1)
Out[46]:
red 6
blue 22
yellow 38
white 54
dtype: int64
In [47]:
frame.mean()
Out[47]:
ball 6.0
pen 7.0
pencil 8.0
paper 9.0
dtype: float64
In [50]:
frame.describe()
Out[50]:
ball pen pencil paper
count 4.000000 4.000000 4.000000 4.000000
mean 6.000000 7.000000 8.000000 9.000000
std 5.163978 5.163978 5.163978 5.163978
min 0.000000 1.000000 2.000000 3.000000
25% 3.000000 4.000000 5.000000 6.000000
50% 6.000000 7.000000 8.000000 9.000000
75% 9.000000 10.000000 11.000000 12.000000
max 12.000000 13.000000 14.000000 15.000000
In [52]:
ser = pd.Series([5,0,3,8,4],index=['red','blue','yellow','whiite','green'])
ser
Out[52]:
red 5
blue 0
yellow 3
whiite 8
green 4
dtype: int64
In [53]:
# 按照首字母升序
ser.sort_index()
Out[53]:
blue 0
green 4
red 5
whiite 8
yellow 3
dtype: int64
In [54]:
ser.sort_index(ascending=False)
Out[54]:
yellow 3
whiite 8
red 5
green 4
blue 0
dtype: int64
In [55]:
frame = pd.DataFrame(np.arange(16).reshape((4,4)),index=['red','blue','yellow','white'],columns=['ball','pen','pencil','paper'])
frame
Out[55]:
ball pen pencil paper
red 0 1 2 3
blue 4 5 6 7
yellow 8 9 10 11
white 12 13 14 15
In [56]:
frame.sort_index(axis=1)
Out[56]:
ball paper pen pencil
red 0 3 1 2
blue 4 7 5 6
yellow 8 11 9 10
white 12 15 13 14
In [66]:
frame.sort_index(by=['pen','paper'])
D:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: by argument to sort_index is deprecated, pls use .sort_values(by=...)
"""Entry point for launching an IPython kernel.
Out[66]:
ball pen pencil paper
red 0 1 2 3
blue 4 5 6 7
yellow 8 9 10 11
white 12 13 14 15
In [67]:
ser.rank()
Out[67]:
red 4.0
blue 1.0
yellow 2.0
whiite 5.0
green 3.0
dtype: float64
In [68]:
ser.rank(method='first')
Out[68]:
red 4.0
blue 1.0
yellow 2.0
whiite 5.0
green 3.0
dtype: float64
In [69]:
ser.rank(ascending=False)
Out[69]:
red 2.0
blue 5.0
yellow 4.0
whiite 1.0
green 3.0
dtype: float64