1 # encoding:utf-8
2
3 from tkinter import *
4 from tkinter import messagebox
5 from tkinter import filedialog
6 import pandas as pd
7 '''
8 画图形界面,供user操作
9 界面功能:
10 1.USER选择类型:BC1,MBM
11 2.USER导入EXCEL
12 3.点击数据处理,生成处理后的excel
13 '''
14
15 class OpenMyXLS(Frame):
16 def __init__(self, master=None):
17 super().__init__(master)
18 self.master = master
19 self.pack()
20 self.openxls()
21
22 def openxls(self):
23 #创建一个标签:请选择数据处理类型
24 self.lab1 = Label(root, text="请选择要处理的数据类型:", font=("Arial", 12))
25 self.lab1.place(x=30, y=30)
26 #创建两个单选框:BC1,MLB
27
28 self.var = StringVar()
29 self.strPath = StringVar()
30 self.var.set("BC1")
31 self.radb1 = Radiobutton(root, text='BC1', value="BC1", variable=self.var, command=self.songhua)
32 self.radb1.place(x=30, y=60)
33 self.radb2 = Radiobutton(root, text='MBM', value="MBM", variable=self.var, command=self.songhua)
34 self.radb2.place(x=90, y=60)
35 #创建一个文本选择框(文本,输入框,按钮)
36 self.lab2 = Label(root, text="xlxs格式:", font=("Arial", 12))
37 self.lab2.place(x=30, y=90)
38 self.ent = Entry(root, width=40, textvariable=self.strPath)
39 self.ent.place(x=110, y=90)
40 self.bt1 = Button(root, text="选择文件", font=("Arial", 12), width=10, height=1, command=self.pathCallBack)
41 self.bt1.place(x=400, y=85)
42
43 # 选择文件夹
44 #Folderpath = filedialog.askdirectory()
45 # 选择文件
46 #Filepath = filedialog.askopenfilename()
47 # 打印文件夹路径
48 #print('Folderpath:', Folderpath)
49 # 打印文件路径
50 #print('Filepath:', Filepath)
51
52 #创建一个按钮:开始处理数据
53 self.bt2 = Button(root, text="开始数据处理", font=("Arial", 12), width=20, height=2, command=self.chuliExcel)
54 self.bt2.place(x=180, y=120)
55 #创建一个提示信息:显示处理后的文档位置
56 self.lab3 = Label(root, text="", font=("Arial", 12))
57 self.lab3.place(x=30, y=180)
58 return None
59
60 def songhua(self):
61 messagebox.showinfo("信息选择", "你想要处理的数据是:"+self.var.get())
62
63 def pathCallBack(self):
64 filePath = filedialog.askopenfilename(title="Select excel file", filetypes=(("excel files", "*.xlsx"),))
65 if (filePath != ''):
66 self.strPath.set(filePath)
67 print('Filepath:', filePath)
68
69 def chuliExcel(self):
70 if self.ent.get():
71 print(self.ent.get())
72 print(self.var.get())
73 print("开始处理EXCEL文件")
74 self.openxls1(self.ent.get(), self.var.get())
75 self.lab3.config(text="excel文件处理OK!!!",fg="green")
76 else:
77 messagebox.showerror("错误信息", "请选择文件!!!")
78
79 '''开始处理数据'''
80 def openxls1(self, readFilePath,selectVar):
81 #1.对源数据处理,只想要对应的列数据
82 ''''''
83 '''
84 步骤:
85 1.获取所有的sheet名称
86 2.获取每个sheet的行标题
87 3.对每个sheet的行标题进行塞选出想要的行标题
88 4.对每个sheet筛选出符合的列数据
89 5.对数据删除空行,删除第一列为空的整行
90 6.写入新的excel中
91 7.缺点:数据有重复
92 '''
93 sheetNameList = self.getSheetNames(readFilePath)
94 toFilePath = '../reports/to01.xlsx'
95 self.excel01(readFilePath, toFilePath, sheetNameList)
96
97 #2.对上面有重复的数据处理:删除重复数据
98 '''
99 步骤:
100 1.获取获取所有的sheet名称
101 2.获取每个sheet对应的数据
102 3.对数据去除重复的
103 4.将数据重新写入新的excel中
104 5.缺点:sheet太多
105 '''
106 readFilePath = toFilePath
107 sheetNameList = self.getSheetNames(readFilePath)
108 toFilePath = '../reports/to02.xlsx'
109 self.qcfxls(readFilePath, sheetNameList, toFilePath)
110
111 #3.对上面数据整合在一个sheet中,包含81BOM,HSG,MLB
112 '''
113 步骤:
114 1.获取每个sheet中的数据
115 2.将每个sheet中的数据追加到一个空数组中
116 3.将数组通过concat结合在一起
117 4.写入新的excel中
118 5.缺点:HSG跟MLB在一起
119 '''
120 readFilePath = toFilePath
121 sheetNameList = self.getSheetNames(readFilePath)
122 toFilePath1 = '../reports/to03_key.xlsx'
123 toFilePath2 = '../reports/to03.xlsx'
124 self.zhengheALLsheet(readFilePath, sheetNameList, toFilePath1, toFilePath2)
125
126 #4.对上面数据只想获取想要的81BOM,HSG
127 '''
128 步骤:
129 1.获取每个sheet中的数据
130 2.根据sheet只获取想要的81BOM跟HSG数据头
131 3.根据表头获取对应列的数据
132 4.对数据删除空行
133 5.对数据删除81BOM列中有nan的行
134 6.写入到新的excel中
135 7.缺点:有重复行
136 '''
137 readFilePath = toFilePath2
138 sheetNameList = self.getSheetNames(readFilePath)
139 toFilePath = '../reports/to04.xlsx'
140 myTopName = selectVar
141 self.foundtopnamedata(readFilePath, sheetNameList, toFilePath, myTopName)
142
143 #5.对上面数据去除重复
144 '''
145 步骤:
146 1.调用去除重复函数
147 2.缺点:一个81对应多个70在一行数据中
148 '''
149 readFilePath = toFilePath
150 sheetNameList = self.getSheetNames(readFilePath)
151 toFilePath = '../reports/to05.xlsx'
152 self.qcfxls(readFilePath, sheetNameList, toFilePath)
153
154 #6.对上面数据处理,使81跟70一一对应
155 '''
156 步骤:
157 1.读取excel数据
158 2.获取excel中的行数跟列数
159 3.获取81跟70配对,组合成2维数组
160 4.将二维数据写入新excel中
161 5.缺点:有重复数据,第二列有空数据
162 '''
163 readFilePath = toFilePath
164 toFilePath = '../reports/to06.xlsx'
165 sheetNameList = self.getSheetNames(readFilePath)
166 df = pd.read_excel(readFilePath, sheet_name=sheetNameList[0])
167 hanglie = df.shape
168 hang = hanglie[0]
169 lie = hanglie[1]
170 result2 = self.get8170lists(df, hang, lie)
171 self.writerexcel(sheetNameList,toFilePath, result2)
172
173 #7.对上面数据进行处理,去除重复行
174 readFilePath = toFilePath
175 sheetNameList = self.getSheetNames(readFilePath)
176 toFilePath = '../reports/to07.xlsx'
177 self.qcfxls(readFilePath, sheetNameList, toFilePath)
178
179 #8.去除第二列有nan的整行数据
180 readFilePath = toFilePath
181 toFilePath = '../reports/to08.xlsx'
182 sheetNameList = self.getSheetNames(readFilePath)
183 df = pd.read_excel(readFilePath, sheet_name=sheetNameList[0])
184 topNames = (df.keys()).values
185 self.quchunan(topNames, sheetNameList, readFilePath, toFilePath)
186
187 '''去除第二列有为空的整行数据'''
188 def quchunan(self, topNames, sheetNameList, readFilePath, toFilePath):
189 writer = pd.ExcelWriter(toFilePath)
190 df = pd.read_excel(readFilePath, sheet_name=sheetNameList[0])
191 d = self.returnDict(topNames, df)
192 todf = pd.DataFrame(d)
193 todf = todf.dropna(subset=topNames[1])
194 todf.to_excel(writer, sheet_name=sheetNameList[0], index=False)
195 writer.save()
196 writer.close()
197 print("创建excel OK,去除了第二列为空的数据!!!")
198
199 '''将二维数据写入excel中'''
200 def writerexcel(self, sheetNameList, toFilePath, result2):
201 writer = pd.ExcelWriter(toFilePath)
202 df = pd.DataFrame(result2)
203 df.to_excel(writer, sheet_name=sheetNameList[0], index=False)
204 writer.save()
205 writer.close()
206
207 '''获取81跟70配对,组合成2维数组'''
208 def get8170lists(self, df, hang, lie):
209 n = 0
210 m = 1
211 h_list = []
212 h_lists = []
213 while n < hang:
214 while m < lie:
215 valueij = df.values[n, 0]
216 h_list.append(valueij)
217
218 valueij = df.values[n, m]
219 h_list.append(valueij)
220
221 h_lists.append(h_list)
222 h_list = []
223 m += 1
224 n += 1
225 m = 1
226 return h_lists
227
228 '''根据sheet名称获取对应列名的数据,写入新的excel中'''
229 def foundtopnamedata(self, readFilePath, sheetNameList, toFilePath, myTopName):
230 sheetName = sheetNameList[0]
231 print(sheetName)
232 df = pd.read_excel(readFilePath, sheet_name=sheetName)
233 topNames = (df.keys()).values
234 if myTopName=="BC1":
235 print("获取HSG与81的配对")
236 findArr = self.returnFiindHSG(topNames)
237 elif myTopName=="MBM":
238 print("获取MLB与81的配对")
239 findArr = self.returnFiindMLB(topNames)
240 else:
241 findArr = []
242
243 if findArr:
244 print("获取对应列的所有数据")
245 d = self.returnDict(findArr, df)
246 todf = pd.DataFrame(d)
247 self.dfWriteexcel(todf, toFilePath, findArr, myTopName)
248 else:
249 print("没有找到想要的列表头")
250
251 '''数据写入excel'''
252 def dfWriteexcel(self, todf, toFilePath, findArr, sheetName):
253 writer = pd.ExcelWriter(toFilePath)
254 print("删除空行")
255 todf = todf.dropna(how='all')
256 print("删除81BOM列中有nan的行")
257 todf = todf.dropna(subset=findArr[0])
258 todf.to_excel(writer, sheet_name=sheetName, index=False)
259 writer.save()
260 writer.close()
261 print("创建excel OK,一个sheet,只有想要的81BOM,HSG列或81BOM,MLB列!!!")
262
263 '''整合所有sheet放在第一个sheet中'''
264 def zhengheALLsheet(self, readFilePath, sheetNameList, toFilePath1, toFilePath2):
265 writer1 = pd.ExcelWriter(toFilePath1)
266 writer2 = pd.ExcelWriter(toFilePath2)
267
268 k = 0
269 frames = []
270 while k < len(sheetNameList):
271 sheetName = sheetNameList[k]
272 print(sheetName)
273 df = pd.read_excel(readFilePath, sheet_name=sheetName)
274 # print(df1)
275 frames.append(df)
276 k += 1
277 print(frames)
278
279 result1 = pd.concat(frames, keys=sheetNameList)
280 result1.to_excel(writer1, sheet_name="allhsgmlb")
281 writer1.save()
282 writer1.close()
283
284 result2 = pd.concat(frames)
285 result2.to_excel(writer2, sheet_name="allhsgmlb", index=False)
286 writer2.save()
287 writer2.close()
288 print("数据整合到一个sheet中成功!")
289
290 '''去除重复的数据'''
291 def qcfxls(self, readFilePath, sheetNameList, toFilePath):
292 writer = pd.ExcelWriter(toFilePath)
293 # 读取excel中的数据
294 j = 0
295 while j < len(sheetNameList):
296 sheetName = sheetNameList[j]
297 data = pd.DataFrame(pd.read_excel(readFilePath, sheetName))
298 # 查看去除重复行的数据
299 no_re_row = data.drop_duplicates()
300 print(no_re_row)
301 # 将去除重复行的数据输出到excel表中
302 no_re_row.to_excel(writer, sheet_name=sheetName, index=False)
303 j += 1
304 writer.save()
305 writer.close()
306 print("去除重复数据OK")
307
308 '''第一次处理excel,获取想要的81BOM,HSG,MLB,数据有重复,去除了空行跟第一列为空的整行数据'''
309 def excel01(self, readFilePath, toFilePath, sheetNameList):
310 k = 0
311 writer = pd.ExcelWriter(toFilePath)
312 while k < len(sheetNameList):
313 sheetName = sheetNameList[k]
314 print(sheetName)
315 df = pd.read_excel(readFilePath, sheet_name=sheetName)
316 print("获取表头")
317 topNames = (df.keys()).values
318 print(topNames)
319 print("查找是否有81BOM,HSG,MLB列")
320 findArr = self.returnFiindAll(topNames)
321 print("整合后的表头数据")
322 print(findArr)
323
324 if findArr:
325 print("获取对应列的所有数据")
326 d = self.returnDict(findArr, df)
327 todf = pd.DataFrame(d)
328 print("删除空行")
329 todf = todf.dropna(how='all')
330 print("删除该列中有nan的行")
331 todf = todf.dropna(subset=findArr[0])
332 todf.to_excel(writer, sheet_name=sheetName, index=False)
333 else:
334 print("没有找到想要的列表头")
335 k += 1
336 writer.save()
337 writer.close()
338 print("创建excel OK,该excel只留下81BOM,HSG,MLB数据,有重复!!!")
339 return None
340
341 '''去取字符串左右空格'''
342 def qukongge(self, lieDatas):
343 allData_strip = []
344 for allData in lieDatas:
345 if isinstance(allData, str):
346 # print(allData.strip())
347 allData_strip.append(allData.strip())
348 else:
349 # print(allData)
350 allData_strip.append(allData)
351 return allData_strip
352
353 '''返回插入EXCEL数据的字典形式'''
354 def returnDict(self, findArr, df):
355 i = 0
356 d = {}
357 while i < len(findArr):
358 allData1 = df.loc[:, findArr[i]].values
359 allData1a = self.qukongge(allData1)
360 d[findArr[i]] = allData1a
361 i += 1
362 return d
363
364 '''返回符合要求的MLB表头'''
365 def returnFiindMLB(self, topNames):
366 MLB = '70 MLB Bin'
367 findArr = []
368 for topName in topNames:
369 if topName == '81BOM':
370 findArr.append(topName)
371 print(findArr)
372 if MLB in topName:
373 findArr.append(topName)
374 print(findArr)
375 return findArr
376
377 '''返回符合要求的HSG表头'''
378 def returnFiindHSG(self, topNames):
379 HSG = '70 HSG Bin'
380 findArr = []
381 for topName in topNames:
382 if topName == '81BOM':
383 findArr.append(topName)
384 print(findArr)
385 if HSG in topName:
386 findArr.append(topName)
387 print(findArr)
388 return findArr
389
390 '''返回符合要求的81BOM,HSG,MLB表头'''
391 def returnFiindAll(self, topNames):
392 findArr = []
393 for topName in topNames:
394 if topName == '81BOM':
395 findArr.append(topName)
396 print(findArr)
397 if '70 HSG Bin' in topName:
398 findArr.append(topName)
399 print(findArr)
400 if '70 MLB Bin' in topName:
401 findArr.append(topName)
402 print(findArr)
403 return findArr
404
405 '''获取excel中所有的sheet名称'''
406 def getSheetNames(self, readFilePath, sheetName=None):
407 df = pd.read_excel(readFilePath, sheet_name=sheetName)
408 sheetNameList = list(df)
409 return sheetNameList
410
411 if __name__ == '__main__':
412 root = Tk()
413 root.geometry("500x300+200+300")
414 root.title("81 mapping 70 数据处理小程序")
415 app = OpenMyXLS(master=root)
416 root.mainloop()