使用pywin32处理excel文件

  1 #!/usr/bin/env python
  2 #-*- coding:utf-8 -*-
  3 
  4 #######################################################
  5 #  用于批量删除excel的指定行                          #
  6 #  适用于所有office,前提需要安装pywin32和office软件  #
  7 #######################################################
  8 
  9 import os
 10 import sys
 11 import time
 12 import glob
 13 import shutil
 14 import string
 15 import os.path
 16 import traceback
 17 import ConfigParser
 18 import win32com.client
 19 
 20 SPATH = ""            #需处理的excel文件目录
 21 DPATH = ""            #处理后的excel存放目录
 22 
 23 SKIP_FILE_LIST = []   #需要跳过的文件列表
 24 MAX_SHEET_INDEX = 1   #每个excel文件的前几个表需要处理
 25 DELETE_ROW_LIST = []  #需要删除的行号
 26 
 27 def dealPath(pathname=''):
 28     '''deal with windows file path'''
 29     if pathname:
 30         pathname = pathname.strip()
 31     if pathname:
 32         pathname = r'%s'%pathname
 33         pathname = string.replace(pathname, r'/', '\\')
 34         pathname = os.path.abspath(pathname)
 35         if pathname.find(":\\") == -1:
 36             pathname = os.path.join(os.getcwd(), pathname)
 37     return pathname
 38 
 39 class EasyExcel(object):
 40     '''class of easy to deal with excel'''
 41     
 42     def __init__(self):
 43         '''initial excel application'''
 44         self.m_filename = ''
 45         self.m_exists = False
 46         self.m_excel = win32com.client.DispatchEx('Excel.Application') #也可以用Dispatch,前者开启新进程,后者会复用进程中的excel进程
 47         self.m_excel.DisplayAlerts = False                             #覆盖同名文件时不弹出确认框
 48     
 49     def open(self, filename=''):
 50         '''open excel file'''
 51         if getattr(self, 'm_book', False):
 52             self.m_book.Close()
 53         self.m_filename = dealPath(filename) or ''
 54         self.m_exists = os.path.isfile(self.m_filename)
 55         if not self.m_filename or not self.m_exists:
 56             self.m_book = self.m_excel.Workbooks.Add()
 57         else:
 58             self.m_book = self.m_excel.Workbooks.Open(self.m_filename)
 59     
 60     def reset(self):
 61         '''reset'''
 62         self.m_excel = None
 63         self.m_book = None
 64         self.m_filename = ''
 65         
 66     def save(self, newfile=''):
 67         '''save the excel content'''
 68         assert type(newfile) is str, 'filename must be type string'
 69         newfile = dealPath(newfile) or self.m_filename
 70         if not newfile or (self.m_exists and newfile == self.m_filename):
 71             self.m_book.Save()
 72             return
 73         pathname = os.path.dirname(newfile)
 74         if not os.path.isdir(pathname):
 75             os.makedirs(pathname)
 76         self.m_filename = newfile
 77         self.m_book.SaveAs(newfile)
 78     
 79     def close(self):
 80         '''close the application'''
 81         self.m_book.Close(SaveChanges=1)
 82         self.m_excel.Quit()
 83         time.sleep(2)
 84         self.reset()
 85     
 86     def addSheet(self, sheetname=None):
 87         '''add new sheet, the name of sheet can be modify,but the workbook can't '''
 88         sht = self.m_book.Worksheets.Add()
 89         sht.Name = sheetname if sheetname else sht.Name
 90         return sht
 91     
 92     def getSheet(self, sheet=1):
 93         '''get the sheet object by the sheet index'''
 94         assert sheet > 0, 'the sheet index must bigger then 0'
 95         return self.m_book.Worksheets(sheet)
 96         
 97     def getSheetByName(self, name):
 98         '''get the sheet object by the sheet name'''
 99         for i in xrange(1, self.getSheetCount()+1):
100             sheet = self.getSheet(i)
101             if name == sheet.Name:
102                 return sheet
103         return None
104     
105     def getCell(self, sheet=1, row=1, col=1):
106         '''get the cell object'''
107         assert row>0 and col>0, 'the row and column index must bigger then 0'
108         return self.getSheet(sheet).Cells(row, col)
109         
110     def getRow(self, sheet=1, row=1):
111         '''get the row object'''
112         assert row>0, 'the row index must bigger then 0'
113         return self.getSheet(sheet).Rows(row)
114         
115     def getCol(self, sheet, col):
116         '''get the column object'''
117         assert col>0, 'the column index must bigger then 0'
118         return self.getSheet(sheet).Columns(col)
119     
120     def getRange(self, sheet, row1, col1, row2, col2):
121         '''get the range object'''
122         sht = self.getSheet(sheet)
123         return sht.Range(self.getCell(sheet, row1, col1), self.getCell(sheet, row2, col2))
124     
125     def getCellValue(self, sheet, row, col):
126         '''Get value of one cell'''
127         return self.getCell(sheet,row, col).Value
128         
129     def setCellValue(self, sheet, row, col, value):
130         '''set value of one cell'''
131         self.getCell(sheet, row, col).Value = value
132         
133     def getRowValue(self, sheet, row):
134         '''get the row values'''
135         return self.getRow(sheet, row).Value
136         
137     def setRowValue(self, sheet, row, values):
138         '''set the row values'''
139         self.getRow(sheet, row).Value = values
140     
141     def getColValue(self, sheet, col):
142         '''get the row values'''
143         return self.getCol(sheet, col).Value
144         
145     def setColValue(self, sheet, col, values):
146         '''set the row values'''
147         self.getCol(sheet, col).Value = values
148     
149     def getRangeValue(self, sheet, row1, col1, row2, col2):
150         '''return a tuples of tuple)'''
151         return self.getRange(sheet, row1, col1, row2, col2).Value
152     
153     def setRangeValue(self, sheet, row1, col1, data):
154         '''set the range values'''
155         row2 = row1 + len(data) - 1
156         col2 = col1 + len(data[0]) - 1
157         range = self.getRange(sheet, row1, col1, row2, col2)
158         range.Clear()
159         range.Value = data
160         
161     def getSheetCount(self):
162         '''get the number of sheet'''
163         return self.m_book.Worksheets.Count
164     
165     def getMaxRow(self, sheet):
166         '''get the max row number, not the count of used row number'''
167         return self.getSheet(sheet).Rows.Count
168         
169     def getMaxCol(self, sheet):
170         '''get the max col number, not the count of used col number'''
171         return self.getSheet(sheet).Columns.Count
172         
173     def clearCell(self, sheet, row, col):
174         '''clear the content of the cell'''
175         self.getCell(sheet,row,col).Clear()
176         
177     def deleteCell(self, sheet, row, col):
178         '''delete the cell'''
179         self.getCell(sheet, row, col).Delete()
180         
181     def clearRow(self, sheet, row):
182         '''clear the content of the row'''
183         self.getRow(sheet, row).Clear()
184         
185     def deleteRow(self, sheet, row):
186         '''delete the row'''
187         self.getRow(sheet, row).Delete()
188         
189     def clearCol(self, sheet, col):
190         '''clear the col'''
191         self.getCol(sheet, col).Clear()
192         
193     def deleteCol(self, sheet, col):
194         '''delete the col'''
195         self.getCol(sheet, col).Delete()
196         
197     def clearSheet(self, sheet):
198         '''clear the hole sheet'''
199         self.getSheet(sheet).Clear()
200         
201     def deleteSheet(self, sheet):
202         '''delete the hole sheet'''
203         self.getSheet(sheet).Delete()
204     
205     def deleteRows(self, sheet, fromRow, count=1):
206         '''delete count rows of the sheet'''
207         maxRow = self.getMaxRow(sheet)
208         maxCol = self.getMaxCol(sheet)
209         endRow = fromRow+count-1
210         if fromRow > maxRow or endRow < 1:
211             return
212         self.getRange(sheet, fromRow, 1, endRow, maxCol).Delete()
213         
214     def deleteCols(self, sheet, fromCol, count=1):
215         '''delete count cols of the sheet'''
216         maxRow = self.getMaxRow(sheet)
217         maxCol = self.getMaxCol(sheet)
218         endCol = fromCol + count - 1
219         if fromCol > maxCol or endCol < 1:
220             return
221         self.getRange(sheet, 1, fromCol, maxRow, endCol).Delete()
222         
223 
224 def echo(msg):
225     '''echo message'''
226     print msg
227     
228 def dealSingle(excel, sfile, dfile):
229     '''deal with single excel file'''
230     echo("deal with %s"%sfile)
231     basefile = os.path.basename(sfile)
232     excel.open(sfile)
233     sheetcount = excel.getSheetCount()
234     if not (basefile in SKIP_FILE_LIST or file in SKIP_FILE_LIST):
235         for sheet in range(1, sheetcount+1):
236             if sheet > MAX_SHEET_INDEX: 
237                 continue
238             reduce = 0
239             for row in DELETE_ROW_LIST:
240                 excel.deleteRow(sheet, row-reduce)
241                 reduce += 1
242             #excel.deleteRows(sheet, 2, 2)
243     excel.save(dfile)
244     
245 def dealExcel(spath, dpath):
246     '''deal with excel files'''
247     start = time.time()
248     #check source path exists or not 
249     spath = dealPath(spath)
250     if not os.path.isdir(spath):
251         echo("No this directory :%s"%spath)
252         return
253     #check destination path exists or not
254     dpath = dealPath(dpath)
255     if not os.path.isdir(dpath):
256         os.makedirs(dpath)
257     shutil.rmtree(dpath)
258     #list the excel file
259     filelist = glob.glob(os.path.join(spath, '*.xlsx'))
260     if not filelist:
261         echo('The path of %s has no excel file'%spath)
262         return
263     #deal with excel file
264     excel = EasyExcel()
265     for file in filelist:
266         basefile = os.path.basename(file)
267         destfile = os.path.join(dpath, basefile)
268         dealSingle(excel, file, destfile)
269     echo('Use time:%s'%(time.time()-start))
270     excel.close()
271     
272 def loadConfig(configfile='./config.ini'):
273     '''parse config file'''
274     global SPATH
275     global DPATH
276     global SKIP_FILE_LIST
277     global MAX_SHEET_INDEX
278     global DELETE_ROW_LIST
279     
280     file = dealPath(configfile)
281     if not os.path.isfile(file):
282         echo('Can not find the config.ini')
283         return False
284     parser = ConfigParser.ConfigParser()
285     parser.read(file)
286     SPATH = parser.get('pathconfig', 'spath').strip()
287     DPATH = parser.get('pathconfig', 'dpath').strip()
288     filelist = parser.get('otherconfig', 'filelist').strip()
289     index = parser.get('otherconfig', 'maxindex').strip()
290     rowlist = parser.get('otherconfig', 'deleterows').strip()
291     if filelist:
292         SKIP_FILE_LIST = filelist.split(";")
293     if rowlist:
294         DELETE_ROW_LIST = map(int, rowlist.split(";"))
295     MAX_SHEET_INDEX = int(index) if index else MAX_SHEET_INDEX
296         
297     
298 def main():
299     '''main function'''
300     loadConfig()
301     if SPATH and DPATH and MAX_SHEET_INDEX:
302         dealExcel(SPATH, DPATH)
303     raw_input("Please press any key to exit!")
304     
305 if __name__=="__main__":
306     main()

 

posted on 2017-11-06 12:21  帅胡  阅读(4071)  评论(0编辑  收藏  举报

导航