1 # encoding:utf-8
2
3 import pandas as pd
4 from openpyxl import load_workbook
5
6 '''
7 本案例获取所有符合的列数据
8 去除了每个栏位的空格
9 去除了空行数据
10 去除了第一列为空的整行数据
11 多个sheet
12
13 对多个sheet数据整合在一个sheet中
14
15 对整合在一个sheet中的数据只获取想要的列
16
17 获取对应行对应列的数据写到excel中
18 即第一列与后面每一列一一对应放入到新的一行中
19 组成一个2维数组,写入倒新的EXCEL中
20
21 去出重复,去除第二列有空行的数据
22 '''
23
24 class OpenMyXLS():
25 def __init__(self, filepath):
26 self.filepath = filepath
27 self.openxls()
28
29 def openxls(self):
30 '''获取第一个sheet的数据'''
31 df = pd.read_excel(self.filepath, sheet_name=0)
32 '''获取行数列数'''
33 hanglie = df.shape
34 hang = hanglie[0]
35 lie = hanglie[1]
36 print(hanglie)
37
38 '''将第i行第j列的值加到excel中,组成只有两列的数据'''
39 tofilepath = "../HSGtotest81_zh2_dange.xlsx"
40 result2 = self.get8170lists(df, hang, lie)
41 self.writerexcel(tofilepath,result2)
42
43 '''去除重复的行'''
44 readfilepath = tofilepath
45 sheetNameList = self.getSheetNames(readfilepath)
46 tofilepath = "../HSGtotest81_zh2_dange_qc.xlsx"
47 self.qcfxls(readfilepath, sheetNameList, tofilepath)
48
49 '''去除第二列有nan的整行数据'''
50 #findArr = [0, 1]
51 readfilepath = tofilepath
52 tofilepath = "../HSGtotest81_zh2_dange_qc_nan.xlsx"
53 sheetNameList = self.getSheetNames(readfilepath)
54 df = pd.read_excel(readfilepath, sheet_name=sheetNameList[0])
55 topNames = (df.keys()).values
56 print(topNames)
57 self.quchunan(topNames, sheetNameList, readfilepath, tofilepath)
58
59 '''去除第二列有为空的整行数据'''
60 def quchunan(self, findArr, sheetNameList, readfilepath, tofilepath):
61 writer = pd.ExcelWriter(tofilepath)
62 df = pd.read_excel(readfilepath, sheet_name=sheetNameList[0])
63 d = self.returnDict(findArr, df)
64 todf = pd.DataFrame(d)
65 todf = todf.dropna(subset=findArr[1])
66 todf.to_excel(writer, sheet_name=sheetNameList[0], index=False)
67 writer.save()
68 writer.close()
69 print("创建excel OK!!!")
70
71 '''将二维数据写入excel中'''
72 def writerexcel(self, tofilepath, result2):
73 writer = pd.ExcelWriter(tofilepath)
74 df = pd.DataFrame(result2)
75 df.to_excel(writer, sheet_name="allhsgmlb", index=False)
76 writer.save()
77 writer.close()
78
79 '''获取81跟70配对,组合成2维数组'''
80 def get8170lists(self, df, hang, lie):
81 n = 0
82 m = 1
83 h_list = []
84 h_lists = []
85 while n < hang:
86 while m < lie:
87 valueij = df.values[n, 0]
88 h_list.append(valueij)
89
90 valueij = df.values[n, m]
91 h_list.append(valueij)
92 # print(h_list)
93
94 h_lists.append(h_list)
95 print(h_lists)
96 h_list = []
97 m += 1
98 n += 1
99 m = 1
100 return h_lists
101
102 '''获取excel中所有sheet的名称'''
103 def getSheetNames(self, filepath):
104 df = pd.read_excel(filepath, sheet_name=None)
105 print("所有的sheet名称:")
106 sheetNameList = list(df)
107 print(sheetNameList)
108 return sheetNameList
109
110 '''根据sheet名称获取对应列名的数据,写入新的excel中'''
111 def foundtopnamedata(self, sheetNameList, file1):
112 k = 0
113 writer = pd.ExcelWriter(file1)
114 while k < len(sheetNameList):
115 sheetName = sheetNameList[k]
116 print(sheetName)
117 df = pd.read_excel(self.filepath, sheet_name=sheetName)
118 print("获取表头")
119 topNames = (df.keys()).values
120 print(topNames)
121 print("查找是否有81BOM列")
122 findArr = self.returnFiindHSG(topNames)
123 # findArr = self.returnFiindMLB(topNames)
124 print("整合后的表头数据")
125 print(findArr)
126
127 if findArr:
128 print("获取对应列的所有数据")
129 d = self.returnDict(findArr, df)
130 todf = pd.DataFrame(d)
131 print("删除空行")
132 todf = todf.dropna(how='all')
133 print("删除81BOM列中有nan的行")
134 todf = todf.dropna(subset='81BOM')
135 todf.to_excel(writer, sheet_name=sheetName, index=False)
136 else:
137 print("没有找到想要的列表头")
138 k += 1
139 writer.save()
140 writer.close()
141 print("创建excel OK!!!")
142
143 '''整合所有sheet放在第一个sheet中'''
144 def zhengheALLsheet(self, readfilepath, tofilepath1, tofilepath2):
145 writer1 = pd.ExcelWriter(tofilepath1)
146 writer2 = pd.ExcelWriter(tofilepath2)
147
148 sheetNameList = self.getSheetNames(readfilepath)
149 print(len(sheetNameList))
150
151 k = 0
152 frames = []
153 while k < len(sheetNameList):
154 sheetName = sheetNameList[k]
155 print(sheetName)
156 df1 = pd.read_excel(readfilepath, sheet_name=sheetName)
157 #print(df1)
158 frames.append(df1)
159 k += 1
160 print(frames)
161
162 result1 = pd.concat(frames, keys=sheetNameList)
163 result1.to_excel(writer1, sheet_name="allhsgmlb")
164 writer1.save()
165 writer1.close()
166
167 result2 = pd.concat(frames)
168 result2.to_excel(writer2, sheet_name="allhsgmlb", index=False)
169 writer2.save()
170 writer2.close()
171 print("数据整合到一个sheet中成功!")
172
173 '''去除重复行的数据'''
174 def qcfxls(self, readfilepath, sheetNameList, tofilepath):
175 writer = pd.ExcelWriter(tofilepath)
176 # 读取excel中的数据
177 j = 0
178 while j < len(sheetNameList):
179 sheetName = sheetNameList[j]
180 data = pd.DataFrame(pd.read_excel(readfilepath, sheetName))
181 # 查看获取的数据
182 #print(data)
183 # 查看去除重复行的数据
184 no_re_row = data.drop_duplicates()
185 print(no_re_row)
186 # 将去除重复行的数据输出到excel表中
187 no_re_row.to_excel(writer, sheet_name=sheetName, index=False)
188 j += 1
189 writer.save()
190 writer.close()
191 print("去除重复数据OK")
192
193 '''返回插入EXCEL数据的字典形式'''
194 def returnDict(self, findArr, df):
195 i = 0
196 d = {}
197 while i < len(findArr):
198 allData1 = df.loc[:, findArr[i]].values
199 allData1a = self.qukongge(allData1)
200 d[findArr[i]] = allData1a
201 i += 1
202 return d
203
204 '''返回符合要求的表头'''
205 def returnFiindMLB(self, topNames):
206 findArr = []
207 for topName in topNames:
208 if topName == '81BOM':
209 findArr.append(topName)
210 print(findArr)
211 if '70 MLB Bin' in topName:
212 findArr.append(topName)
213 print(findArr)
214 return findArr
215
216 '''返回符合要求的表头'''
217 def returnFiindHSG(self, topNames):
218 findArr = []
219 for topName in topNames:
220 if topName == '81BOM':
221 findArr.append(topName)
222 print(findArr)
223 if '70 HSG Bin' in topName:
224 findArr.append(topName)
225 print(findArr)
226 return findArr
227
228 '''返回符合要求的表头'''
229 def returnFiindAll(self, topNames):
230 findArr = []
231 for topName in topNames:
232 if topName == '81BOM':
233 findArr.append(topName)
234 print(findArr)
235 if '70 HSG Bin' in topName:
236 findArr.append(topName)
237 print(findArr)
238 if '70 MLB Bin' in topName:
239 findArr.append(topName)
240 print(findArr)
241 return findArr
242
243 '''去取字符串左右空格'''
244 def qukongge(self, lieDatas):
245 allData_strip = []
246 for allData in lieDatas:
247 if isinstance(allData, str):
248 # print(allData.strip())
249 allData_strip.append(allData.strip())
250 else:
251 # print(allData)
252 allData_strip.append(allData)
253 return allData_strip
254
255
256 if __name__ == '__main__':
257 filepath = '../HSGtotest81_zh2.xlsx'
258 oxl = OpenMyXLS(filepath)