【原创小工具】Python批量处理Word文档
欢迎你试用《Word文档批量操作》!
界面UI(doc2docx.ui)代码:
查看代码
<?xmlversion="1.0"encoding="UTF-8"?>
<ui version="4.0">
<class>MainWindow</class>
<widgetclass="QMainWindow"name="MainWindow">
<propertyname="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>500</width>
<height>430</height>
</rect>
</property>
<propertyname="minimumSize">
<size>
<width>500</width>
<height>430</height>
</size>
</property>
<propertyname="maximumSize">
<size>
<width>500</width>
<height>430</height>
</size>
</property>
<propertyname="windowTitle">
<string>doc2docx</string>
</property>
<widgetclass="QWidget"name="centralwidget">
<widgetclass="QLineEdit"name="docLineEdit">
<propertyname="geometry">
<rect>
<x>60</x>
<y>40</y>
<width>260</width>
<height>20</height>
</rect>
</property>
</widget>
<widgetclass="QPushButton"name="docBtn">
<propertyname="geometry">
<rect>
<x>340</x>
<y>40</y>
<width>100</width>
<height>23</height>
</rect>
</property>
<propertyname="text">
<string>选择输入文件夹</string>
</property>
</widget>
<widgetclass="QProgressBar"name="progressBar">
<propertyname="geometry">
<rect>
<x>60</x>
<y>350</y>
<width>270</width>
<height>23</height>
</rect>
</property>
<propertyname="value">
<number>24</number>
</property>
</widget>
<widgetclass="QPushButton"name="docxBtn">
<propertyname="geometry">
<rect>
<x>340</x>
<y>80</y>
<width>100</width>
<height>23</height>
</rect>
</property>
<propertyname="text">
<string>选择输出文件夹</string>
</property>
</widget>
<widgetclass="QLineEdit"name="docxLineEdit">
<propertyname="geometry">
<rect>
<x>60</x>
<y>80</y>
<width>260</width>
<height>20</height>
</rect>
</property>
</widget>
<widgetclass="QTextEdit"name="textEdit">
<propertyname="geometry">
<rect>
<x>60</x>
<y>140</y>
<width>380</width>
<height>190</height>
</rect>
</property>
</widget>
<widgetclass="QPushButton"name="startBtn">
<propertyname="geometry">
<rect>
<x>340</x>
<y>350</y>
<width>100</width>
<height>23</height>
</rect>
</property>
<propertyname="text">
<string>开始转换</string>
</property>
</widget>
</widget>
<widgetclass="QMenuBar"name="menubar">
<propertyname="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>500</width>
<height>23</height>
</rect>
</property>
</widget>
<widgetclass="QStatusBar"name="statusbar"/>
</widget>
<resources/>
<connections/>
</ui>
主程序代码:
查看代码
# -*- coding: utf-8 -*-
#导入装换的包
importwin32com
importxlwt
importos
importre
importshutil
fromwin32com.clientimportDispatch,constants
#导入界面的包
fromdoc2docximportUi_MainWindow
importsys
importdoc2docx
fromPyQt5.QtWidgetsimportQApplication,QMainWindow,QFileDialog,QMessageBox,QAction
importdocx
fromdocx.sharedimportCm,Pt
fromdocximportDocument
fromdocx.enum.textimportWD_ALIGN_PARAGRAPH
classSimpleDialogForm(Ui_MainWindow,QMainWindow):
# 文件总数
totalList= []
# 转换正确的文件总数
successList= []
# 转换错误的文件总数
errorList= []
def__init__(self,parent=None):
super(SimpleDialogForm,self).__init__()
self.setupUi(self) # 在此设置界面
# 父类的progressBar的值为24,这里设置为0
self.progressBar.setProperty("value",0)
#兴建关于的条目
self.about=QAction("关于")
self.contact=QAction("联系")
#加上帮助菜单栏
helpMenu=self.menubar.addMenu("帮助")
#帮助菜单栏上加上条目
helpMenu.addAction(self.about)
helpMenu.addAction(self.contact)
#选中doc文件夹绑定的槽函数
self.docBtn.clicked.connect(self.setDocUrl)
#选中docx文件夹绑定的槽函数
self.docxBtn.clicked.connect(self.setDocxUrl)
#输入doc路径的输入框文本发生变化时绑定的槽
self.docLineEdit.textChanged.connect(self.initGUI)
#输入docx路径的输入框文本发生变化时绑定的槽
self.docxLineEdit.textChanged.connect(self.initGUI)
#开始转换按钮绑定的槽函数
self.startBtn1.clicked.connect(self.renFilename)
self.startBtn2.clicked.connect(self.startConvertD)
self.startBtn3.clicked.connect(self.remove_H_F)
self.startBtn4.clicked.connect(self.addPageNumber)
self.startBtn5.clicked.connect(self.remove_H)
self.startBtn6.clicked.connect(self.startConvertP)
#显示关于窗口
self.about.triggered.connect(self.showAbout)
#显示联系作者窗口
self.contact.triggered.connect(self.contactAuthor)
#生成操作信息
defwriteMsg(self):
self.textEdit.append("生成(操作信息.xlsx)")
docUrl=self.docLineEdit.text().strip()
ifself.docxLineEdit.text()=='':
docxUrl=docUrl
else:
docxUrl=self.docxLineEdit.text().strip()
msgExcel=docxUrl+'/操作信息.xlsx'
print (msgExcel)
excel=xlwt.Workbook(encoding='utf-8')
# 这个是指定sheet页的名称
sheet1=excel.add_sheet('统计信息')
sheet2=excel.add_sheet('详细信息')
sheet1.write(0,0,'文件总数')
sheet1.write(0,1,'操作成功')
sheet1.write(0,2,'操作失败')
sheet1.write(1,0,len(self.totalList))
sheet1.write(1,1,len(self.successList))
sheet1.write(1,2,len(self.errorList))
sheet2.write(0,0,'文件总数')
sheet2.write(0,1,'操作成功')
sheet2.write(0,2,'操作失败')
row=1
forxinself.totalList:
sheet2.write(row,0,x)
row+=1
row=1
forxinself.successList:
sheet2.write(row,1,x)
row+=1
row=1
forxinself.errorList:
sheet2.write(row,2,x)
row+=1
excel.save(msgExcel)
self.statusbar.showMessage("操作完成,请到生成目录下打开(操作信息.xlsx)查看详细信息",10000)
self.setOp(True)
defsetOp(self,flag):
self.docLineEdit.setEnabled(flag)
self.docxLineEdit.setEnabled(flag)
self.docBtn.setEnabled(flag)
self.docxBtn.setEnabled(flag)
self.startBtn1.setEnabled(flag)
self.startBtn2.setEnabled(flag)
self.startBtn3.setEnabled(flag)
self.startBtn4.setEnabled(flag)
self.startBtn5.setEnabled(flag)
self.startBtn6.setEnabled(flag)
defsetDocUrl(self):
#重新选择输入和输出目录时,进度条设置为0,文本框的内容置空
str=QFileDialog.getExistingDirectory(self,"选中源文件所在目录",r"E:\\")
self.docLineEdit.setText(str)
defsetDocxUrl(self):
#重新选择输入和输出目录时,进度条设置为0,文本框的内容置空
str=QFileDialog.getExistingDirectory(self,"选中新文件所在目录",r"E:\BaiduNetdiskWorkspace\test")
self.docxLineEdit.setText(str)
#将图形界面的内容各种进度条,文本框初始化
definitGUI(self):
self.progressBar.setProperty("value",0)
self.textEdit.setText("")
definitConfig(self):
# 文件总数
self.totalList= []
# 转换正确的文件总数
self.successList= []
# 装换错误的文件总数
self.errorList= []
#处理文件名
defrenFilename(self):
#replaceStr[]
# 输入文件夹地址
path=self.docLineEdit.text().strip()
ifself.docxLineEdit.text()=='':
path2=path
else:
path2=self.docxLineEdit.text().strip()
files=os.listdir(path)
pt=r'(\n)'
res=re.split(pt,self.textEdit.toPlainText())[0:]#读取textEdit中的内容,用第二行字符替换第一字符,第二行可为空
print("替换字符:",res)
fileTotal=0
total=0
forfileinfiles:
print(file)# 输出所有文件名,只是为了看一下
fileTotal=len(files)
print("文件数:",fileTotal)
# 获取旧名和新名
j=0
whilej < len(res):
total=0
files=os.listdir(path)
forfileinfiles:
# old 旧名称的信息
old=path+os.sep+files[total]
#print(total,"old:"+old)
# new是新名称的信息,这里的操作是用res[j]替换res[j+2]代表的字符,换行符也是一个字符串,故+2
new=path2+os.sep+file.replace(res[j],res[j+2])
#print(total,"new:"+new)
ifself.docxLineEdit.text()=='':
os.rename(old,new) # 新旧替换
else:
shutil.copyfile(old,new) #新文件路径非空时,复制并替换
self.textEdit.append(files[total])
total+=1
value=total*1.0/fileTotal*100
self.progressBar.setValue(int(value))
j+=4
self.textEdit.append("%d个文件名替换成功!"%fileTotal)
self.textEdit.append("本次操作结束!")
print("%d个文件名替换成功!"%fileTotal)
#转换为DOCX
defstartConvertD(self):
ifself.docLineEdit.text() == '' :#or self.docxLineEdit.text() == '':
msgBox = QMessageBox(QMessageBox.Warning,"警告","请选中要转换的目录")
msgBox.exec()
return
self.setOp(False)
self.initConfig()
#获取文本同时去掉空格
docUrl = self.docLineEdit.text().strip()
print("原文件路径:"+docUrl)
ifself.docxLineEdit.text()=='':
docxUrl=docUrl
else:
docxUrl=self.docxLineEdit.text().strip()
print("新文件路径:"+docxUrl)
word=win32com.client.Dispatch('kwps.application') #win32com.client.gencache.EnsureDispatch('kwps.Application')
word.DisplayAlerts=0
word.Visible=0
fileTotal=0
total=0
for root, dirs, files in os.walk(docUrl):
fornameinfiles:
length=len(name)
index=name.rfind('.')
after=name[index:length]
after=after.lower()
ifafter=='.doc'orafter=='.docx':
fileTotal+=1
for root, dirs, files in os.walk(docUrl):
self.statusbar.showMessage("正在转换["+root+"]中的"+repr(fileTotal)+"个word文件。")
self.textEdit.append("正在转换["+root+"]中的"+repr(fileTotal)+"个word文件。")
fornameinfiles:
length=len(name)
index=name.rfind('.')
after=name[index:length]
after=after.lower()
total+=1
ifafter=='.doc'orafter=='.docx':
self.totalList.append(name)
value=total*1.0/fileTotal*100
self.progressBar.setValue(int(value))
fileName=root+"/"+name
#print(fileName)
try:
print(fileName)
doc=word.Documents.Open(fileName)
# 这个是保存的目录
doc.SaveAs(docxUrl+"/"+name+"x",12)
#print(docxUrl + "/" + name + "x")
doc.Close()
str = name + "转换成功!"
print(str)
self.textEdit.append(str)
self.successList.append(name)
exceptExceptionase:
#print(fileName)
str=name+"转换失败!"
print(str)
self.textEdit.append(str)
self.errorList.append(name)
continue
word.Quit()
self.textEdit.append("本次操作结束!")
self.setOp(True)
#self.writeMsg()
#转换为PDF
defstartConvertP(self):
ifself.docLineEdit.text() == '':# or self.docxLineEdit.text() == '':
msgBox = QMessageBox(QMessageBox.Warning,"警告","请选中要转换的目录")
msgBox.exec()
return
self.setOp(False)
self.initConfig()
#获取文本同时去掉空格
docUrl = self.docLineEdit.text().strip()
print("原文件路径:"+docUrl)
ifself.docxLineEdit.text()=='':
docxUrl=docUrl
else:
docxUrl=self.docxLineEdit.text().strip()
print("新文件路径:"+docxUrl)
word=win32com.client.Dispatch('kwps.application') # win32com.client.gencache.EnsureDispatch('kwps.Application')
word.DisplayAlerts=0
word.Visible=0
fileTotal=0
total=0
for root, dirs, files in os.walk(docUrl):
fornameinfiles:
length=len(name)
index=name.rfind('.')
after=name[index:length]
after=after.lower()
ifafter=='.doc'orafter=='.docx':
fileTotal+=1
for root, dirs, files in os.walk(docUrl):
self.statusbar.showMessage("正在转换["+root+"]中的"+repr(fileTotal)+"个word文件。")
fornameinfiles:
length=len(name)
index=name.rfind('.')
after=name[index:length]
after=after.lower()
total+=1
ifafter=='.doc'orafter=='.docx':
self.totalList.append(name)
value=total*1.0/fileTotal*100
self.progressBar.setValue(int(value))
fileName=root+"/"+name
try:
doc=word.Documents.Open(fileName)
# 这个是保存的目录
doc.SaveAs(docxUrl+"/"+os.path.splitext(name)[0]+".pdf",17)
doc.Close()
str = name + "转换成功!"
self.textEdit.append(str)
self.successList.append(name)
exceptExceptionase:
str=name+"转换失败!"
self.textEdit.append(str)
self.errorList.append(name)
continue
word.Quit()
self.textEdit.append("本次操作结束!")
self.setOp(True)
#self.writeMsg()
#删除页眉页脚
defremove_H_F(self):
replace_dict={
".":".",
"。":".",
",":",",
";":";",
":":":",
"-":"-",
"+":"+",
"^b":"",
"【分析】^p":"【分析】",
"【详解】^p":"【详解】",
"【点睛】^p":"【点睛】",
}
defcheck_and_change(document,replace_dict):
"""
遍历word中的所有 paragraphs,在每一段中发现含有key 的内容,就替换为 value 。
(key 和 value 都是replace_dict中的键值对。)
"""
forparaindocument.paragraphs:
foriinrange(len(para.runs)):
for key, value in replace_dict.items():
ifkeyinpara.runs[i].text:
para.runs[i].text=para.runs[i].text.replace(key,value)
returndocument
ifself.docLineEdit.text() == '':# or self.docxLineEdit.text() == '':
msgBox = QMessageBox(QMessageBox.Warning,"警告","请选中要操作的目录")
msgBox.exec()
return
self.setOp(False)
self.initConfig()
#获取文本同时去掉空格
docUrl = self.docLineEdit.text().strip()
print("原文件路径:"+docUrl)
ifself.docxLineEdit.text()=='':
docxUrl=docUrl
else:
docxUrl=self.docxLineEdit.text().strip()
print("新文件路径:"+docxUrl)
word=win32com.client.DispatchEx('word.application') #win32com.client.gencache.EnsureDispatch('kwps.Application')
word.DisplayAlerts=0
word.Visible=0
fileTotal=0
total=0
for root, dirs, files in os.walk(docUrl):
fornameinfiles:
length=len(name)
index=name.rfind('.')
after=name[index:length]
after=after.lower()
ifafter=='.doc'orafter=='.docx':
fileTotal+=1
for root, dirs, files in os.walk(docUrl):
self.statusbar.showMessage("正在操作["+root+"]中的"+repr(fileTotal)+"个word文件。")
fornameinfiles:
length=len(name)
index=name.rfind('.')
after=name[index:length]
after=after.lower()
total+=1
ifafter=='.doc'orafter=='.docx':
self.totalList.append(name)
value=total*1.0/fileTotal*100
self.progressBar.setValue(int(value))
fileName=root+"/"+name
print(fileName,total)
try:
#print("1:"+fileName)
#print("2:"+name)
self.textEdit.append(name)
#doc = word.Documents.Open(fileName)
# 这个是保存的目录
file=docx.Document(fileName)
#取消"首页不同",对每一节分别设置
#file.sections[0].different_first_page_header_footer = False
# 取消"奇偶页不同",对整个文档有效
#file.settings.odd_and_even_pages_header_footer=False
sstr="本文档共有"+repr(len(file.sections))+"节。"
print(sstr)
self.textEdit.append(sstr)
a=0
whilea < len(file.sections):#文档有多节时,要分别设置,以下是统一设置成一样格式
# 删除页眉、页脚
file.sections[a].header.is_linked_to_previous=True
file.sections[a].footer.is_linked_to_previous=True
#file.sections[a].header.paragraphs[0].text= ''#你要修改成的页眉'
#file.sections[a].footer.paragraphs[0].text= ''#你要修改成的页脚'
#修改页边距
file.sections[a].mirrorMargins=True
file.sections[a].top_margin=Cm(1.8)
file.sections[a].bottom_margin=Cm(1.8)
file.sections[a].left_margin=Cm(1.8)
file.sections[a].right_margin=Cm(1.8)
#修改页眉页脚离页面边距
file.sections[a].header_distance=Cm(1.2)
file.sections[a].footer_distance=Cm(1.2)
'''
#设置纸张方向和大小
file.sections[0].page_height = Cm(29.7) # 设置A4纸的高度
file.sections[0].page_width = Cm(21) # 设置A4纸的宽
file.sections[0].orientation = WD_ORIENTATION.PORTRAIT # 设置纸张方向为横向,可以不设置 默认为横向
file.sections[1].orientation = WD_ORIENTATION.LANDSCAPE # 设置纸张方向为纵向
file.sections[1].page_height = Cm(21) # 设置A4纸的高度
file.sections[1].page_width = Cm(29.7) # 设置A4纸的宽
#设置分栏
file.sections[0]._sectPr.xpath('./w:cols')[0].set(qn('w:num'), '2') #把第一节设置为2栏
#设置段落间距
paragraph_format=file.paragraphs[0].paragraph_format
paragraph_format.space_before=Pt(18) #段前间距
paragraph_format.space_after=Pt(12) #段后间距
paragraph_format.line_spacing=Pt(12) #行距
'''
for paragraph in file.paragraphs:
if paragraph.style.name=='Normal':
paragraph.paragraph_format.line_spacing=1.0#设置为单倍行距
check_and_change(file,replace_dict)#替换字符
file.save(os.path.join(docxUrl,name))
a+=1
str=name+"删除页眉页脚成功!"
print(str)
self.textEdit.append(str)
self.successList.append(name)
exceptExceptionase:
self.errorList.append(name)
print(e)
str=name+"删除页眉页脚失败!"
print(str)
self.textEdit.append(str)
self.errorList.append(name)
continue
print("操作完成!")
self.statusbar.showMessage("操作完成!")
word.Quit()
self.textEdit.append("本次操作结束!")
self.setOp(True)
#self.writeMsg()
#添加页码
defaddPageNumber(self):
ifself.docLineEdit.text() == '':# or self.docxLineEdit.text() == '':
msgBox = QMessageBox(QMessageBox.Warning,"警告","请选中要转换的目录")
msgBox.exec()
return
self.setOp(False)
self.initConfig()
#获取文本同时去掉空格
docUrl = self.docLineEdit.text().strip()
print("原文件路径:"+docUrl)
ifself.docxLineEdit.text()=='':
docxUrl=docUrl
else:
docxUrl=self.docxLineEdit.text().strip()
print("新文件路径:"+docxUrl)
word=win32com.client.DispatchEx('kwps.application') # win32com.client.gencache.EnsureDispatch('kwps.Application')
word.DisplayAlerts=0
word.Visible=0
fileTotal=0
total=0
for root, dirs, files in os.walk(docUrl):
fornameinfiles:
length=len(name)
index=name.rfind('.')
after=name[index:length]
after=after.lower()
ifafter=='.doc'orafter=='.docx':
fileTotal+=1
for root, dirs, files in os.walk(docUrl):
self.statusbar.showMessage("正在操作["+root+"]中的"+repr(fileTotal)+"个word文件。")
fornameinfiles:
length=len(name)
index=name.rfind('.')
after=name[index:length]
after=after.lower()
total+=1
ifafter=='.doc'orafter=='.docx':
self.totalList.append(name)
value=total*1.0/fileTotal*100
self.progressBar.setValue(int(value))
fileName=root+"/"+name
newName=docxUrl+"/"+name
#print(newName,total)
try:
doc=word.Documents.Open(fileName)
wd_section=doc.Sections(1) #注意section内部成员编号是从1开始的
wd_section.Footers(constants.wdHeaderFooterPrimary).PageNumbers.Add(PageNumberAlignment=constants.wdAlignPageNumberCenter) #添加页码
doc.SaveAs(newName)
doc.Close()
str = name + "添加页码成功!"
print(str)
self.textEdit.append(str)
self.successList.append(name)
exceptExceptionase:
print(e)
str=name+"添加页码失败!"
print(str)
self.textEdit.append(str)
self.errorList.append(name)
continue
print("操作完成!")
self.statusbar.showMessage("操作完成!")
word.Quit()
self.textEdit.append("本次操作结束!")
self.setOp(True)
#self.writeMsg()#添加页码
#删除页眉
defremove_H(self):
ifself.docLineEdit.text() == '':# or self.docxLineEdit.text() == '':
msgBox = QMessageBox(QMessageBox.Warning,"警告","请选中要操作的目录!")
msgBox.exec()
return
self.setOp(False)
self.initConfig()
#获取文本同时去掉空格
docUrl = self.docLineEdit.text().strip()
print("原文件路径:"+docUrl)
ifself.docxLineEdit.text()=='':
docxUrl=docUrl
else:
docxUrl=self.docxLineEdit.text().strip()
print("新文件路径:"+docxUrl)
word=win32com.client.DispatchEx('kwps.application') # win32com.client.gencache.EnsureDispatch('kwps.Application')
word.DisplayAlerts=0
word.Visible=0
fileTotal=0
total=0
for root, dirs, files in os.walk(docUrl):
fornameinfiles:
length=len(name)
index=name.rfind('.')
after=name[index:length]
after=after.lower()
ifafter=='.doc'orafter=='.docx':
fileTotal+=1
for root, dirs, files in os.walk(docUrl):
self.statusbar.showMessage("正在操作["+root+"]中的"+repr(fileTotal)+"个word文件。")
fornameinfiles:
length=len(name)
index=name.rfind('.')
after=name[index:length]
after=after.lower()
total+=1
ifafter=='.doc'orafter=='.docx':
self.totalList.append(name)
value=total*1.0/fileTotal*100
self.progressBar.setValue(int(value))
fileName=root+"/"+name
print(fileName,total)
try:
#print("1:"+fileName)
#print("2:"+name)
self.textEdit.append(name)
#doc = word.Documents.Open(fileName)
# 这个是保存的目录
file=docx.Document(fileName)
#取消"首页不同",对每一节分别设置
#file.sections[0].different_first_page_header_footer = False
# 取消"奇偶页不同",对整个文档有效
#file.settings.odd_and_even_pages_header_footer=False
sstr="本文档共有"+repr(len(file.sections))+"节。"
print(sstr)
self.textEdit.append(sstr)
a=0
whilea < len(file.sections):#文档有多节时,要分别设置,以下是统一设置成一样格式
# 删除页眉
file.sections[a].header.is_linked_to_previous=True
file.save(os.path.join(docxUrl,name))
a+=1
str=name+"删除页眉成功!"
print(str)
self.textEdit.append(str)
self.successList.append(name)
exceptExceptionase:
self.errorList.append(name)
print(e)
str=name+"删除页眉失败!"
print(str)
self.textEdit.append(str)
self.errorList.append(name)
continue
print("操作完成!")
self.statusbar.showMessage("操作完成!")
word.Quit()
self.textEdit.append("本次操作结束!")
self.setOp(True)
#self.writeMsg()
defshowAbout(self):
msgBox=QMessageBox(QMessageBox.Question,"关于程序"," 请选择源文件夹和新文件夹(可不选则默认与源文件夹相同)。按顺序点击所需操作按钮,处理完成后在新文件夹下有(操作信息.xlsx),记录了操作的文件总数,操作成功的文件数,操作失败的文件数等详细信息。")
msgBox.exec()
def contactAuthor(self):
msgBox=QMessageBox(QMessageBox.Information,"联系作者","如您发现BUG及有更好的意见或建议,请联系作者:ZJZ(15263796588)")
msgBox.exec()
if__name__=="__main__":
app=QApplication(sys.argv)
main=SimpleDialogForm()
main.show()
sys.exit(app.exec_())
本文来自 博客园,作者:近我者赤
转载请注明原文链接:https://www.cnblogs.com/zjzBlogs/p/18824623
本博客所有文章来自于读书、参考、引用、复制和粘贴等多种方式,仅用于记录、学习、研究和交流目的,欢迎非商业性质转载。