1 # encoding:utf-8
2
3 import pandas as pd
4 from openpyxl import load_workbook
5
6 '''
7 本案例获取所有符合的列数据
8 去除了每个栏位的空格
9 去除了空行数据
10 去除了第一列为空的整行数据
11 多个sheet
12
13 对多个sheet数据整合在一个sheet中
14
15 对整合在一个sheet中的数据只获取想要的列
16
17 获取对应行对应列的数据写到excel中
18 即第一列与后面每一列一一对应放入到新的一行中
19 组成一个2维数组,写入倒新的EXCEL中
20 '''
21
22 class OpenMyXLS():
23 def __init__(self, filepath):
24 self.filepath = filepath
25 self.openxls()
26
27 def openxls(self):
28 '''获取第一个sheet的数据'''
29 df = pd.read_excel(self.filepath, sheet_name=0)
30 '''获取行数列数'''
31 hanglie = df.shape
32 hang = hanglie[0]
33 lie = hanglie[1]
34 print(hanglie)
35
36 '''将第i行第j列的值加到excel中,组成只有两列的数据'''
37 tofilepath = "../HSGtotest81_zh2_dange.xlsx"
38 result2 = self.get8170lists(df, hang, lie)
39 self.writerexcel(tofilepath,result2)
40
41 '''将二维数据写入excel中'''
42 def writerexcel(self, tofilepath, result2):
43 writer = pd.ExcelWriter(tofilepath)
44 df = pd.DataFrame(result2)
45 df.to_excel(writer, sheet_name="allhsgmlb", index=False)
46 writer.save()
47 writer.close()
48
49 '''获取81跟70配对,组合成2维数组'''
50 def get8170lists(self, df, hang, lie):
51 n = 0
52 m = 1
53 h_list = []
54 h_lists = []
55 while n < hang:
56 while m < lie:
57 valueij = df.values[n, 0]
58 h_list.append(valueij)
59
60 valueij = df.values[n, m]
61 h_list.append(valueij)
62 # print(h_list)
63
64 h_lists.append(h_list)
65 print(h_lists)
66 h_list = []
67 m += 1
68 n += 1
69 m = 1
70 return h_lists
71
72 '''获取excel中所有sheet的名称'''
73 def getSheetNames(self, filepath):
74 df = pd.read_excel(filepath, sheet_name=None)
75 print("所有的sheet名称:")
76 sheetNameList = list(df)
77 return sheetNameList
78
79 '''根据sheet名称获取对应列名的数据,写入新的excel中'''
80 def foundtopnamedata(self, sheetNameList, file1):
81 k = 0
82 writer = pd.ExcelWriter(file1)
83 while k < len(sheetNameList):
84 sheetName = sheetNameList[k]
85 print(sheetName)
86 df = pd.read_excel(self.filepath, sheet_name=sheetName)
87 print("获取表头")
88 topNames = (df.keys()).values
89 print(topNames)
90 print("查找是否有81BOM列")
91 findArr = self.returnFiindHSG(topNames)
92 # findArr = self.returnFiindMLB(topNames)
93 print("整合后的表头数据")
94 print(findArr)
95
96 if findArr:
97 print("获取对应列的所有数据")
98 d = self.returnDict(findArr, df)
99 todf = pd.DataFrame(d)
100 print("删除空行")
101 todf = todf.dropna(how='all')
102 print("删除81BOM列中有nan的行")
103 todf = todf.dropna(subset='81BOM')
104 todf.to_excel(writer, sheet_name=sheetName, index=False)
105 else:
106 print("没有找到想要的列表头")
107 k += 1
108 writer.save()
109 writer.close()
110 print("创建excel OK!!!")
111
112 '''整合所有sheet放在第一个sheet中'''
113 def zhengheALLsheet(self, readfilepath, tofilepath1, tofilepath2):
114 writer1 = pd.ExcelWriter(tofilepath1)
115 writer2 = pd.ExcelWriter(tofilepath2)
116
117 sheetNameList = self.getSheetNames(readfilepath)
118 print(len(sheetNameList))
119
120 k = 0
121 frames = []
122 while k < len(sheetNameList):
123 sheetName = sheetNameList[k]
124 print(sheetName)
125 df1 = pd.read_excel(readfilepath, sheet_name=sheetName)
126 #print(df1)
127 frames.append(df1)
128 k += 1
129 print(frames)
130
131 result1 = pd.concat(frames, keys=sheetNameList)
132 result1.to_excel(writer1, sheet_name="allhsgmlb")
133 writer1.save()
134 writer1.close()
135
136 result2 = pd.concat(frames)
137 result2.to_excel(writer2, sheet_name="allhsgmlb", index=False)
138 writer2.save()
139 writer2.close()
140 print("数据整合到一个sheet中成功!")
141
142 '''去除重复行的数据'''
143 def qcfxls(self, readfilepath, sheetNameList, tofilepath):
144 writer = pd.ExcelWriter(tofilepath)
145 # 读取excel中的数据
146 j = 0
147 while j < len(sheetNameList):
148 sheetName = sheetNameList[j]
149 data = pd.DataFrame(pd.read_excel(readfilepath, sheetName))
150 # 查看获取的数据
151 #print(data)
152 # 查看去除重复行的数据
153 no_re_row = data.drop_duplicates()
154 print(no_re_row)
155 # 将去除重复行的数据输出到excel表中
156 no_re_row.to_excel(writer, sheet_name=sheetName, index=False)
157 j +=1
158 writer.save()
159 writer.close()
160 print("去除重复数据OK")
161
162 '''返回插入EXCEL数据的字典形式'''
163 def returnDict(self, findArr, df):
164 i = 0
165 d = {}
166 while i < len(findArr):
167 allData1 = df.loc[:, findArr[i]].values
168 allData1a = self.qukongge(allData1)
169 d[findArr[i]] = allData1a
170 i += 1
171 return d
172
173 '''返回符合要求的表头'''
174 def returnFiindMLB(self, topNames):
175 findArr = []
176 for topName in topNames:
177 if topName == '81BOM':
178 findArr.append(topName)
179 print(findArr)
180 if '70 MLB Bin' in topName:
181 findArr.append(topName)
182 print(findArr)
183 return findArr
184
185 '''返回符合要求的表头'''
186 def returnFiindHSG(self, topNames):
187 findArr = []
188 for topName in topNames:
189 if topName == '81BOM':
190 findArr.append(topName)
191 print(findArr)
192 if '70 HSG Bin' in topName:
193 findArr.append(topName)
194 print(findArr)
195 return findArr
196
197 '''返回符合要求的表头'''
198 def returnFiindAll(self, topNames):
199 findArr = []
200 for topName in topNames:
201 if topName == '81BOM':
202 findArr.append(topName)
203 print(findArr)
204 if '70 HSG Bin' in topName:
205 findArr.append(topName)
206 print(findArr)
207 if '70 MLB Bin' in topName:
208 findArr.append(topName)
209 print(findArr)
210 return findArr
211
212 '''去取字符串左右空格'''
213 def qukongge(self, lieDatas):
214 allData_strip = []
215 for allData in lieDatas:
216 if isinstance(allData, str):
217 # print(allData.strip())
218 allData_strip.append(allData.strip())
219 else:
220 # print(allData)
221 allData_strip.append(allData)
222 return allData_strip
223
224
225 if __name__ == '__main__':
226 filepath = '../HSGtotest81_zh2.xlsx'
227 oxl = OpenMyXLS(filepath)