1 import openpyxl
2 import pandas as pd
3 import os
4
5
6 # 导入文件夹的所有文件
7 def get_files_name():
8 """
9 用于获取文件名
10 :return: 返回值为文件名组成的列表
11 """
12 file_list = os.listdir ('./data')
13 return file_list
14
15
16 # 选择要处理的excel表
17 def load_data(file_list):
18 for item_file in file_list:
19 # 获取表格文件
20 file_name = './data/' + item_file
21 print('要处理的文件是:'+file_name)
22 handle (file_name)
23
24
25 # Excel数据处理
26 def handle(file_name):
27 try:
28 with pd.ExcelFile (file_name) as xlsx:
29 # 读取包含带Pandas的公式的excel单元格值
30 workbook = openpyxl.load_workbook (filename=xlsx, data_only=True)
31 # 工作表A
32 sheetA = pd.read_excel (workbook, sheet_name="A", engine='openpyxl')
33 sheetA.drop_duplicates (subset='stu_no', keep='first', inplace=True)
34 data_listA = sheetA['stu_no'].values.tolist () # 将某一列读到列表中
35 # print("data_listA:")
36 # print(data_listA)
37 # 工作表B
38 sheetB = pd.read_excel (workbook, sheet_name="B", engine='openpyxl')
39 sheetB.drop_duplicates (subset='stu_no', keep='first', inplace=True)
40 data_listB = sheetB['stu_no'].values.tolist () # 将某一列读到列表中
41 # print ("data_listA:")
42 # print (data_listA)
43 # 取两个工作表中,指定列的相同数据
44 same_data = [x for x in data_listB if x in data_listA]
45 # 要处理的列名
46 need_col = pd.read_excel (xlsx, sheet_name="A", index_col='trade_code')
47 # print ("need_col:")
48 # print (need_col)
49 # 定义一个空列表
50 need_data = []
51 # for 处理两个工作表中的数据源
52 for item_same in same_data:
53 item_list = need_col.loc[[item_same]]
54 need_data.append (item_list)
55 print ("need_data:")
56 print (need_data)
57 # 合并查出来的DataFrame的所有数据
58 need_data = pd.concat (need_data)
59 print("need_data合并后的:")
60 print(need_data)
61 # 定义一个writer
62 writer = pd.ExcelWriter (file_name, engine='openpyxl', mode='a', if_sheet_exists='replace')
63 # 接下来还是调用to_excel, 但是第一个参数不再是文件名, 而是上面的writer,将DataFrame写进去
64 need_data.to_excel (writer, sheet_name="same")
65 # 保存并关闭writer, 写入磁盘
66 writer.close ()
67 print("文件处理完成")
68 except Exception as e:
69 print(e)
70 pass
71
72
73 if __name__ == "__main__":
74 # 导入文件
75 files = get_files_name ()
76 data = {}
77 print ("当前data文件夹下的文件如下:")
78 num = 1
79 for file in files:
80 print (num, file)
81 num += 1
82
83 choice_file_list = []
84 while 1:
85 index_str = input (
86 "请选择需要处理的文件序号(多个文件导入时用空格分开,输入0则导入所有文件,输入多文件则自动合并):")
87 index_list = index_str.split (' ')
88 try:
89 index_list.remove ('')
90 except:
91 pass
92 if index_list[0] == '0':
93 choice_file_list = files
94 break
95 else:
96 try:
97 for item in index_list:
98 choice_file_list.append (files[int (item) - 1])
99 except:
100 print ("输入序号有误")
101 continue
102 if choice_file_list:
103 break
104 else:
105 print ("输入序号有误")
106 load_data (choice_file_list)