import numpy as np
import pandas as pd
df1 = pd.DataFrame(np.ones((3, 4)) * 0, columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'])
df3 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['a', 'b', 'c', 'd'])
# print(df1)
# print(df2)
# print(df3)
# 纵向合并
res = pd.concat([df1, df2, df3], axis=0)
print(res)
# 重置index索引
res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)
print(res)
# 纵向外合并
df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
res = pd.concat([df3, df4], axis=1, join='outer', ignore_index=True)
print(res)
# 纵向内合并
df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
res = pd.concat([df3, df4], axis=0, join='inner', ignore_index=True)
print(res)
# 按照某个指定的轴来对齐数据
df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
res = pd.concat([df1, df2], axis=1, join_axes=[df1.index])
print(res)
# append合并
df3 = pd.DataFrame(np.ones((3, 4)) * 1, columns=['a', 'b', 'c', 'd'], index=[1, 2, 3])
df4 = pd.DataFrame(np.ones((3, 4)) * 2, columns=['b', 'c', 'd', 'e'], index=[2, 3, 4])
res = df1.append(df2,ignore_index=True)
print(res)
输出结果:
a b c d
0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0
0 1.0 1.0 1.0 1.0
1 1.0 1.0 1.0 1.0
2 1.0 1.0 1.0 1.0
0 2.0 2.0 2.0 2.0
1 2.0 2.0 2.0 2.0
2 2.0 2.0 2.0 2.0
a b c d
0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0
3 1.0 1.0 1.0 1.0
4 1.0 1.0 1.0 1.0
5 1.0 1.0 1.0 1.0
6 2.0 2.0 2.0 2.0
7 2.0 2.0 2.0 2.0
8 2.0 2.0 2.0 2.0
0 1 2 3 4 5 6 7
1 1.0 1.0 1.0 1.0 NaN NaN NaN NaN
2 1.0 1.0 1.0 1.0 2.0 2.0 2.0 2.0
3 1.0 1.0 1.0 1.0 2.0 2.0 2.0 2.0
4 NaN NaN NaN NaN 2.0 2.0 2.0 2.0
b c d
0 1.0 1.0 1.0
1 1.0 1.0 1.0
2 1.0 1.0 1.0
3 2.0 2.0 2.0
4 2.0 2.0 2.0
5 2.0 2.0 2.0
a b c d a b c d
0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
1 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
a b c d
0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0
3 1.0 1.0 1.0 1.0
4 1.0 1.0 1.0 1.0
5 1.0 1.0 1.0 1.0
import numpy as np
import pandas as pd
a = pd.Series([np.nan, 2.5, np.nan, 3.5, 4.5, np.nan], index=['f', 'e', 'd', 'c', 'b', 'a'])
print(a)
b = pd.Series([1, np.nan, 3, 4, 5, np.nan], index=['f', 'e', 'd', 'c', 'b', 'a'])
print(b)
# 用a的数据填充b的缺失值
print(b.combine_first(a))
# 用b的数据填充a的缺失值
print(a.combine_first(b))
输出结果:
f NaN
e 2.5
d NaN
c 3.5
b 4.5
a NaN
dtype: float64
f 1.0
e NaN
d 3.0
c 4.0
b 5.0
a NaN
dtype: float64
f 1.0
e 2.5
d 3.0
c 4.0
b 5.0
a NaN
dtype: float64
f 1.0
e 2.5
d 3.0
c 3.5
b 4.5
a NaN
dtype: float64