#!/usr/bin/env python
import os
import os.path
import sys
import re
import shutil
import csv
from multiprocessing import Pool #support muti thread map reduce function
specialChars = ['.', '^' , '$', '*', '+', '?' ,'\\', '[', ']', '|', '(', ')' ]
g_max_thread_pool_size = 30#max thread pool size
def dealSpecialChars(str):
s = ''
for c in str:
if c in specialChars:
s = s + ('[' + c + ']')
else:
s = s+c
return s
def readDictsFromCsv(filePath):
#print filePath
dicts = dict()
with open(filePath, 'rb') as csvfile:
dictsReader = csv.DictReader(csvfile, fieldnames=['srcName', 'newName'], restkey=None, delimiter=',', quotechar='|')
for d in dictsReader:
#dicts[dealSpecialChars(d['srcName'])] = d['newName']
dicts[d['srcName']] = d['newName']
return dicts
BinaryExtList = ['.bmp', '.avi', '.res', '.xls', '.doc', '.dll', '.lib', '.bpl', '.exe', '.chm']
replaceDicts = {r"RNC820V400R008C00SPC500": r"93" }
def ApplyReplace(str, keys, replaceDicts):
ret = str
for pattern in keys:
try:
ret = re.sub(pattern, replaceDicts[pattern], ret)
except:
print "Unexpected error ApplyReplace(str, keys, replaceDicts):",str,
finally:
print str, ret
return ret
def NeedReplace(str, keys):
for pattern in keys:
#print pattern, str
try:
if re.search(pattern, str):
return True
except:
print "Unexpected error NeedReplace(str, keys):",str, ':'
return False
defaultExtList = ['.txt', '.xml']
def findFile(srcDir, filter = None):
if(filter == None):
filter = defaultExtList
filelist = []
for name in os.listdir(srcDir):
fullPath = srcPath + '\\' + name
if os.path.isdir(fullPath):
filelist.append(findFile(fullPath))
else:
if os.path.splitext(fullPath)[1].lower() in filter:
filelist.append(fullPath)
return filelist
def ReplaceAllStrInFile(file, dicts, keys, filter = defaultExtList):
print 'ReplaceAllStrInFile:file-',file, 'begin!'
for key in keys:
print key, dicts[key]#for test
fullPath = file
if os.path.splitext(fullPath)[1].lower() not in filter:
return
#print 'convert file:', fullPath
try:
srcFile = open(fullPath, 'r')
tempfile = fullPath+'temp'
destFile = open(tempfile, 'w')
needRewrite = False
try:
for line in srcFile:
if NeedReplace(line, keys):
line = ApplyReplace(line, keys, dicts)
destFile.write(line)
srcFile.close()
destFile.close()
os.remove(fullPath)
os.rename(tempfile, fullPath)
print 'convert file:', fullPath, 'success!'
except:
print 'convert file:', fullPath, 'failed!'
srcFile.close()
destFile.close()
os.remove(tempfile)
except:
print 'convert file:', fullPath, 'failed!'
return False
return True
def ReplaceAllStrInFileByRows(srcfile, csvfilePath, maxRow = 10):
if maxRow < 1:
return false
dicts = readDictsFromCsv(csvfilePath)
keys=(sorted(dicts.keys(), key=lambda key: len(key), reverse=True))#reverse keys by elements' length
length = len(keys)
rows = range(length/maxRow + 1)
for i in rows:
ReplaceAllStrInFile(srcfile, dicts, keys[(i*maxRow):(i+1)*maxRow])
print ' '.join(keys[(i*maxRow):(i+1)*maxRow])
def f(x):
return ReplaceAllStrInFileByRows(x[0], x[1])
if __name__ == "__main__":
args = sys.argv
if len(args) <> 3:
print '''
usage: python testcsv.py D:\ss\temp.csv D:\ss\test\
'''
exit
csvfilePath = args[1]
srcPath = args[2]
filelist = findFile(srcPath)
#print filelist, len(filelist)
dataItems = []
for file in filelist:
dataItems.append([file, csvfilePath])
#ReplaceAllStrInFileByRows(dataItems[0][0], dataItems[0][1])
pool_size = g_max_thread_pool_size
if len(filelist) < g_max_thread_pool_size:
pool_size = len(filelist)
pool = Pool(processes=pool_size)#muti thread
pool.map(f, dataItems)
#print result.get(timeout=10)
#pool.map