# encoding:utf-8
import pandas as pd
class OpenMyXLS():
def __init__(self, filepath):
self.filepath = filepath
self.openxls()
def openxls(self):
df = pd.read_excel(self.filepath,sheet_name=None)
print("所有的sheet名称:")
sheetNameList = list(df)
print(sheetNameList)
print('获取sheet数量:')
sheetNums = len(sheetNameList)
print(sheetNums)
print("获取索引对应的sheet对应的名称")
k = 0
writer = pd.ExcelWriter('../totest81.xlsx')
while k < len(sheetNameList):
sheetName = sheetNameList[k]
print(sheetName)
df = pd.read_excel(self.filepath, sheet_name=sheetName)
print("获取表头")
topNames = (df.keys()).values
print(topNames)
print("查找是否有81BOM列")
findArr = self.returnFiindAll(topNames)
print("整合后的表头数据")
print(findArr)
if findArr:
print("获取对应列的所有数据")
d = self.returnDict(findArr, df)
todf = pd.DataFrame(d)
print("删除空行")
todf = todf.dropna(how='all')
print("删除该列中有nan的行")
todf = todf.dropna(subset='81BOM')
todf.to_excel(writer, sheet_name=sheetName, index=False)
else:
print("没有找到想要的列表头")
k += 1
writer.save()
writer.close()
print("创建excel OK!!!")
df = pd.read_excel("../totest81.xlsx", sheet_name=None)
sheetNameList = list(df)
print(sheetNameList)
self.qcfxls('../totest81.xlsx', sheetNameList)
def qcfxls(self, filepath, sheetNameList):
writer = pd.ExcelWriter('../totest82.xlsx')
# 读取excel中的数据
j = 0
while j < len(sheetNameList):
sheetName = sheetNameList[j]
data = pd.DataFrame(pd.read_excel(filepath, sheetName))
# 查看获取的数据
#print(data)
# 查看去除重复行的数据
no_re_row = data.drop_duplicates()
print(no_re_row)
# 将去除重复行的数据输出到excel表中
no_re_row.to_excel(writer, sheet_name=sheetName, index=False)
j +=1
writer.save()
writer.close()
print("去除重复数据OK")
'''返回插入EXCEL数据的字典形式'''
def returnDict(self, findArr, df):
i = 0
d = {}
while i < len(findArr):
allData1 = df.loc[:, findArr[i]].values
allData1a = self.qukongge(allData1)
d[findArr[i]] = allData1a
i += 1
return d
'''返回符合要求的表头'''
def returnFiindAll(self, topNames):
findArr = []
for topName in topNames:
if topName == '81BOM':
findArr.append(topName)
print(findArr)
if '70 HSG Bin' in topName:
findArr.append(topName)
print(findArr)
if '70 MLB Bin' in topName:
findArr.append(topName)
print(findArr)
return findArr
'''去取字符串左右空格'''
def qukongge(self, lieDatas):
allData_strip = []
for allData in lieDatas:
if isinstance(allData, str):
# print(allData.strip())
allData_strip.append(allData.strip())
else:
# print(allData)
allData_strip.append(allData)
return allData_strip
if __name__ == '__main__':
filepath = '../test81.xlsx'
oxl = OpenMyXLS(filepath)