pandas数据合并之concat

数据合并concat

#concat 函数
#参数解释
concat(
    objs: Iterable[NDFrame] | Mapping[HashableT, NDFrame],
    axis: Axis = 0,
    join: str = "outer",#设置函数的并集、交集问题
    ignore_index: bool = False,#是否重置索引
    keys=None,#	设置多重索引名称
    levels=None,
    names=None,#对多重索引结果的名称进行设置
    verify_integrity: bool = False,#检查数据合并方向上是否有重复的索引标签值,如果有重复值则会报错
    sort: bool = False,
    copy: bool = True,
) 

#针对Series
import pandas as pd
a = pd.Series([0,3,6,9])
b = pd.Series([12,15,18,21])
c = pd.concat([a,b],ignore_index=True)# ignore_index:是否忽视行索引
'''
#输出结果
0     0
1     3
2     6
3     9
4    12
5    15
6    18
7    21
'''
c = pd.concat([a,b],ignore_index=True,axis=1)# ignore_index:是否忽视行索引
'''
输出结果
  0   1
0  0  12
1  3  15
2  6  18
3  9  21
'''
c = pd.concat([a,b],keys=["s1","s2"])# 行索引为多重索引,keys
'''
#输出结果
s1  0     0
    1     3
    2     6
    3     9
s2  0    12
    1    15
    2    18
    3    21
'''
c = pd.concat([a,b],keys=["s1","s2",],axis=1)# axis=1,s1","s2"为列明

'''
输出结果
   s1  s2
0   0  12
1   3  15
2   6  18
3   9  21
'''
c = pd.concat([a,b],keys=["s1","s2",],names=["s_name","ID"])# 对多重索引结果的名称进行设置
"""
输出结果
s_name  ID
s1      0      0
        1      3
        2      6
        3      9
s2      0     12
        1     15
        2     18
        3     21
"""


#在数据框中的应用
#对于索引相同的
data1 = {"a":[0,3,6,9],"b":[1,4,7,10],"c":[2,5,8,11]}
df1 = pd.DataFrame(data1)
data2 = {"a":[12,15,18,21],"b":[13,16,19,22],"c":[14,17,20,23]}
df2 = pd.DataFrame(data2,)
a = pd.concat([df1,df2],ignore_index=True)#沿0轴大方向直接合并
a = pd.concat([df1,df2],axis =1)#沿1轴大方向直接合并

#对于索引不一致
data1 = {"a":[0,3,6,9],"b":[1,4,7,10],"c":[2,5,8,11]}
df1 = pd.DataFrame(data1)
data2 = {"a":[12,15,18,21],"f":[13,16,19,22],"g":[14,17,20,23]}
df2 = pd.DataFrame(data2,index=[3,"x","y","z"])
a = pd.concat([df1,df2])#按照列名称,将列名称一致的合并
a = pd.concat([df1,df2],ignore_index=True)#对列索引名称重新进行排序
a = pd.concat([df1,df2],axis=1)#按照行名称,将行名称一致的合并

"""
输出结果
     a     b     c     a     f     g
0  0.0   1.0   2.0   NaN   NaN   NaN
1  3.0   4.0   5.0   NaN   NaN   NaN
2  6.0   7.0   8.0   NaN   NaN   NaN
3  9.0  10.0  11.0  12.0  13.0  14.0
x  NaN   NaN   NaN  15.0  16.0  17.0
y  NaN   NaN   NaN  18.0  19.0  20.0
z  NaN   NaN   NaN  21.0  22.0  23.0
"""

#contat函数中join参数并集、交集
data1 = {"a":[0,3,6,9],"b":[1,4,7,10],"c":[2,5,8,11]}
df1 = pd.DataFrame(data1)
data2 = {"a":[12,15,18,21],"f":[13,16,19,22],"g":[14,17,20,23]}
df2 = pd.DataFrame(data2,index=[3,"x","y","z"])
a = pd.concat([df1,df2],join="inner")#join="inner",交集,合并列标签相同的列
"""
    a
0   0
1   3
2   6
3   9
3  12
x  15
y  18
z  21
"""
print(df1)
print(df2)
a = pd.concat([df1,df2],join="inner",axis=1)#join="inner",交集,axis=1,合并行标签相同的列

print(a)

#保留某数据框的行索引标签与列索引标签
data1 = {"a":[0,3,6,9],"b":[1,4,7,10],"c":[2,5,8,11]}
df1 = pd.DataFrame(data1)
data2 = {"a":[12,15,18,21],"f":[13,16,19,22],"g":[14,17,20,23]}
df2 = pd.DataFrame(data2,index=[3,"x","y","z"])
a = pd.concat([df1,df2],axis=1).reindex(df1.index)#参数:reset_index,保留data1的行标签
"""
     a     b     c     a     f     g
0  0.0   1.0   2.0   NaN   NaN   NaN
1  3.0   4.0   5.0   NaN   NaN   NaN
2  6.0   7.0   8.0   NaN   NaN   NaN
3  9.0  10.0  11.0  12.0  13.0  14.0
"""
a = pd.concat([df1,df2],axis=1)[df1.columns]#参数:reset_index,保留data1的列标签

"""
#输出结果
    a     a     b     c
0  0.0   NaN   1.0   2.0
1  3.0   NaN   4.0   5.0
2  6.0   NaN   7.0   8.0
3  9.0  12.0  10.0  11.0
x  NaN  15.0   NaN   NaN
y  NaN  18.0   NaN   NaN
z  NaN  21.0   NaN   NaN
"""

#keys
a = pd.concat([df1,df2],axis=1,keys=["data1","data2"])#通过keys设置多重索引标签


data1 = {"a":[0,3,6,9],"b":[1,4,7,10],"c":[2,5,8,11]}
df1 = pd.DataFrame(data1)
data2 = {"a":[12,15,18,21],"f":[13,16,19,22],"g":[14,17,20,23]}
df2 = pd.DataFrame(data2,index=[3,"x","y","z"])
s1 = df1["b"]#构建一个Seris


#合并某一列
a = pd.concat([df1,s1],axis=1)#数据框与series合并时,直接在右侧进行合并
"""
   a   b   c   b
0  0   1   2   1
1  3   4   5   4
2  6   7   8   7
3  9  10  11  10
"""
print(df1)
print(df2)
s2 = df1.loc[0]
print(s2)
st = s2.to_frame().T
print(st)
a = pd.concat([df2,st])
"""
    a     f     g    b    c
3  12  13.0  14.0  NaN  NaN
x  15  16.0  17.0  NaN  NaN
y  18  19.0  20.0  NaN  NaN
z  21  22.0  23.0  NaN  NaN
0   0   NaN   NaN  1.0  2.0
"""
#合并某一行
#由于serie天然纵向的特性,因此需要对series进行转置操作


#contac还可将数据框组成的字典对象进行合并

data1 = {"a":[0,3,6,9],"b":[1,4,7,10],"c":[2,5,8,11]}
df1 = pd.DataFrame(data1)
data2 = {"a":[12,15,18,21],"f":[13,16,19,22],"g":[14,17,20,23]}
df2 = pd.DataFrame(data2,index=["e","x","y","z"])
a = pd.concat([df1,df2],verify_integrity=True)
# 还可将数据框组成的字典对象进行合并
fracm = {"x":df1,"y":df2}
df = pd.concat(fracm)
print(df)

"""
      a     b     c     f     g
x 0   0   1.0   2.0   NaN   NaN
  1   3   4.0   5.0   NaN   NaN
  2   6   7.0   8.0   NaN   NaN
  3   9  10.0  11.0   NaN   NaN
y e  12   NaN   NaN  13.0  14.0
  x  15   NaN   NaN  16.0  17.0
  y  18   NaN   NaN  19.0  20.0
  z  21   NaN   NaN  22.0  23.0
"""
#




posted @ 2023-02-21 15:24  小杨的冥想课  阅读(165)  评论(0编辑  收藏  举报