python修改pdf元信息 metadata
from PyPDF2 import PdfFileReader, PdfFileWriter from multiprocessing import Process, Queue import os,time import getopt, sys,shutil def update_metadata(pdf): readFile = pdf['source'] writeFile = pdf['to'] # 获取一个 PdfFileReader 对象 pdfReader = PdfFileReader(open(readFile, 'rb')) print(pdfReader.getDocumentInfo()) # 获取一个 PdfFileWriter 对象 pdfWriter = PdfFileWriter() # 这里输入要修改的元信息,当然又可以在原来的信息里面加,我不会高级的设计模式,大牛改进后可以发给我 pdfWriter.addMetadata({'/Author':'youngboy','/Title':'youngboy','/Creator':'youngboy'}) # 将一个 PageObject 加入到 PdfFileWriter 中 pdfWriter.appendPagesFromReader(pdfReader) # 输出到文件中 pdfWriter.write(open(writeFile, 'wb+')) def long_time_task(q): while not q.empty(): print("剩余任务"+str(q.qsize())) v=q.get() update_metadata(v) def usage(): print(""" - r root 目录 - p 进程数(程池不会用所以这个参数没意义) """) if __name__=='__main__': print(sys.argv[1:]) try: opts, args = getopt.getopt(sys.argv[1:], "hr:p:") except getopt.GetoptError as err: # print help information and exit: print(err) usage() sys.exit(2) root = None pnum = 3; verbose = False for o, a in opts: if o == "-r": root = a elif o in ("-h", "--help"): usage() sys.exit() elif o in ("-p", "--process"): pnum = a else: assert False, "unhandled option" q = Queue() ## 把任务装进队列 shutil.rmtree(root+'/dest') for (r, dirs, files) in os.walk(root): for f in files: dd = r.replace(root,'') to_path=root+'/dest'+dd+'/'+f os.makedirs(root+'/dest'+dd,exist_ok=True) print(dd+"--"+r+"--"+root) if f.find('pdf')>-1: q.put({ 'source':r+'/'+f, 'to':to_path }) print(q.qsize()) # 使用两个进程,进程池不会用所以就手动new pw1 = Process(target=long_time_task, args=(q,)) pw1.start() pw2 = Process(target=long_time_task, args=(q,)) pw2.start() pw1.join() pw2.join() print('All subprocesses done.')
使用示例
python xx.py -r D:/pdf
转:https://www.codenong.com/js4e940e6d4616/
浙公网安备 33010602011771号