1 >>> frame2
2 addr age name
3 0 beijing 12 zhang
4 1 shanghai 24 li
5 2 hangzhou 24 cao
6 >>> frame1
7 addr name
8 0 beijing zhang
9 1 shanghai li
10 2 hangzhou cao
11 3 shenzhen han
12 >>> pd.merge(frame1,frame2) 以name列为连接进行拼接
13 addr name age
14 0 beijing zhang 12
15 1 shanghai li 24
16 2 hangzhou cao 24
17 >>> pd.merge(frame1,frame2,on='name') 指定 列 和拼接方式
18 addr_x name addr_y age
19 0 beijing zhang beijing 12
20 1 shanghai li shanghai 24
21 2 hangzhou cao hangzhou 24
22 >>> pd.merge(frame1,frame2,on='name',how='outer')
23 addr_x name addr_y age
24 0 beijing zhang beijing 12.0
25 1 shanghai li shanghai 24.0
26 2 hangzhou cao hangzhou 24.0
27 3 shenzhen han NaN NaN
28 >>> pd.merge(frame1,frame2,on='name',how='inner')
29 addr_x name addr_y age
30 0 beijing zhang beijing 12
31 1 shanghai li shanghai 24
32 2 hangzhou cao hangzhou 24
33 >>> pd.merge(frame1,frame2,on='name',how='left')
34 addr_x name addr_y age
35 0 beijing zhang beijing 12.0
36 1 shanghai li shanghai 24.0
37 2 hangzhou cao hangzhou 24.0
38 3 shenzhen han NaN NaN
39 >>> pd.merge(frame1,frame2,on='name',how='right')
40 addr_x name addr_y age
41 0 beijing zhang beijing 12
42 1 shanghai li shanghai 24
43 2 hangzhou cao hangzhou 24
44 >>> pd.merge(frame1,frame2,on='name',left_index=True)
45 addr_x name addr_y age
46 0 beijing zhang beijing 12
47 1 shanghai li shanghai 24
48 2 hangzhou cao hangzhou 24
49 >>> pd.merge(frame1,frame2,on='name',right_index=True)
50 addr_x name addr_y age
51 0 beijing zhang beijing 12
52 1 shanghai li shanghai 24
53 2 hangzhou cao hangzhou 24
54 >>> pd.merge(frame1,frame2,on='addr',right_index=True)
55 addr name_x age name_y
56 0 beijing zhang 12 zhang
57 1 shanghai li 24 li
58 2 hangzhou cao 24 cao
1 >>> frame1.columns=['addr1','name1']
2 >>> frame1.join(frame2)
3 addr1 name1 addr age name 修改掉重复的列名称,然后join()
4 0 beijing zhang beijing 12.0 zhang
5 1 shanghai li shanghai 24.0 li
6 2 hangzhou cao hangzhou 24.0 cao
7 3 shenzhen han NaN NaN NaN
1 >>> array1
2 array([[0, 1, 2],
3 [3, 4, 5],
4 [6, 7, 8]])
5 >>> array1=np.arange(9).reshape((3,3))+6
6 >>> array2=np.arange(9).reshape((3,3))
7 >>> array1
8 array([[ 6, 7, 8],
9 [ 9, 10, 11],
10 [12, 13, 14]])
11 >>> np.concatenate([array1,array2],axis=1) np模块中对元组进行concatenate()
12 array([[ 6, 7, 8, 0, 1, 2],
13 [ 9, 10, 11, 3, 4, 5],
14 [12, 13, 14, 6, 7, 8]])
15 >>> np.concatenate([array1,array2],axis=0)
16 array([[ 6, 7, 8],
17 [ 9, 10, 11],
18 [12, 13, 14],
19 [ 0, 1, 2],
20 [ 3, 4, 5],
21 [ 6, 7, 8]])
22 >>>
23 >>> np.concatenate([array1,array2])
24 array([[ 6, 7, 8],
25 [ 9, 10, 11],
26 [12, 13, 14],
27 [ 0, 1, 2],
28 [ 3, 4, 5],
29 [ 6, 7, 8]])
1 >>> ser1=pd.Series(np.random.rand(4)) pd模块中也有concat()
2 >>> ser1
3 0 0.998915
4 1 0.117503
5 2 0.747180
6 3 0.641508
7 dtype: float64
8 >>> ser1=pd.Series(np.random.rand(4)*100)
9 >>> ser1
10 0 8.818592
11 1 42.317816
12 2 43.274021
13 3 23.245148
14 dtype: float64
15 >>> ser2=pd.Series(np.random.rand(4)*100,index=[5,6,7,8])
16 >>> ser2
17 5 58.416554
18 6 11.840838
19 7 38.146851
20 8 0.135517
21 dtype: float64
22 >>> pd.concat([ser1,ser2])
23 0 8.818592
24 1 42.317816
25 2 43.274021
26 3 23.245148
27 5 58.416554
28 6 11.840838
29 7 38.146851
30 8 0.135517
31 dtype: float64
32 >>> pd.concat([ser1,ser2],axis=1)
33 0 1
34 0 8.818592 NaN
35 1 42.317816 NaN
36 2 43.274021 NaN
37 3 23.245148 NaN
38 5 NaN 58.416554
39 6 NaN 11.840838
40 7 NaN 38.146851
41 8 NaN 0.135517
1 >> pd.concat([ser1,ser2],axis=1,keys=[1,2])
2 1 2
3 0 8.818592 NaN
4 1 42.317816 79.632793
5 2 43.274021 96.700070
6 3 23.245148 64.573269
7 4 NaN 68.629709
8 >>> ser2.index=[2,4,5,6]
9 >>> ser2
10 2 79.632793
11 4 96.700070
12 5 64.573269
13 6 68.629709
14 dtype: float64
15 >>> ser1.combine_first(ser2) 对缺额的数据进行填充 combin_first()
16 0 8.818592
17 1 42.317816
18 2 43.274021
19 3 23.245148
20 4 96.700070
21 5 64.573269
22 6 68.629709
23 dtype: float64
1 >>> ser1
2 0 a
3 1 b
4 2 c
5 3 d
6 dtype: object
7 >>> ser2
8 2 0
9 4 1
10 5 2
11 6 3
12 dtype: int32
13 >>> ser2.combine_first(ser1) ser1在后
14 0 a
15 1 b
16 2 0
17 3 d
18 4 1
19 5 2
20 6 3
21 dtype: object
22 >>> ser1[:2].combine_first(ser2) ser1在前
23 0 a
24 1 b
25 2 0
26 4 1
27 5 2
28 6 3
29 dtype: object
1 >>> frame1=pd.DataFrame({'name':['zhang','li','wang'],'age':[12,45,34],'addr':['beijing','shanghai','shenzhen']})
2 >>> frame1
3 addr age name
4 0 beijing 12 zhang
5 1 shanghai 45 li
6 2 shenzhen 34 wang
7 >>> frame1.stack() frame的进栈和出栈
8 0 addr beijing
9 age 12
10 name zhang
11 1 addr shanghai
12 age 45
13 name li
14 2 addr shenzhen
15 age 34
16 name wang
17 dtype: object
18 >>> frame1.stack().unstack()
19 addr age name
20 0 beijing 12 zhang
21 1 shanghai 45 li
22 2 shenzhen 34 wang
23 >>> frame1.stack().unstack(0) 列和索引转换
24 0 1 2
25 addr beijing shanghai shenzhen
26 age 12 45 34
27 name zhang li wang
1 >>> longframe=pd.DataFrame({'color':['white','white','white','red','red','red','black','black','black'],'item':['ball','pen','mug','ball','pen','mug','ball','pen','mug'],'value':np.random.rand(9)})
2 >>> longframe
3 color item value 对冗余的消除,将longframe转换为wideframe
4 0 white ball 0.260358
5 1 white pen 0.543955
6 2 white mug 0.456874
7 3 red ball 0.967021
8 4 red pen 0.657271
9 5 red mug 0.984256
10 6 black ball 0.550236
11 7 black pen 0.731625
12 8 black mug 0.006728
13 >>> wideframe=longframe.pivot('color','item')
14 >>> wideframe
15 value
16 item ball mug pen
17 color
18 black 0.550236 0.006728 0.731625
19 red 0.967021 0.984256 0.657271
20 white 0.260358 0.456874 0.543955
21 >>> frame1
22 addr age name
23 0 beijing 12 zhang
24 1 shanghai 12 li
25 2 beijing 12 wang
26 >>> del frame['addr']
27 Traceback (most recent call last):
28 File "<pyshell#103>", line 1, in <module>
29 del frame['addr']
30 NameError: name 'frame' is not defined
31 >>> del frame1['addr']
32 >>> frame1
33 age name
34 0 12 zhang
35 1 12 li
36 2 12 wang