python: using pdfplumber Lib read pdf file
from openpyxl import Workbook from openpyxl.styles import PatternFill,Side,Border import pdfplumber l=[] def visitDir(path): if not os.path.isdir(path): print('Error:"',path,'" is not a directory or does not exist.') return list_dirs = os.walk(path) #os.walk返回一个元组,包括3个元素:#所有路径名、所有目录列表与文件列表 for root, dirs, files in list_dirs: #遍历该元组的目录和文件信息 for f in files: if f.endswith(".pdf"): l.append(os.path.join(root, f)) def writeExcel(l): wb = Workbook() ws1 = wb.active data =[] for i in l: with pdfplumber.open(i) as pdf: for page in pdf.pages: textdata =page.extract_text() l = textdata.split() data.append(l) border=Border(top=Side(border_style='thin',color='000000'), bottom=Side(border_style='thin',color='000000'), left=Side(border_style='thin',color='000000'), right=Side(border_style='thin',color='000000')) ws1["A1"]="合同序号" ws1["B1"]="合同名称" ws1["C1"]="合同金额" ws1["A1"].fill=PatternFill(fill_type='solid', fgColor="8B008B") ws1["B1"].fill=PatternFill(fill_type='solid', fgColor="8B008B") ws1["C1"].fill=PatternFill(fill_type='solid', fgColor="8B008B") ws1["A1"].border = border ws1["B1"].border = border ws1["C1"].border = border fill = PatternFill(fill_type='solid', fgColor="FFC0CB") for i in range(len(data)): for j in range(len(data[0])): ws1.cell(i+2,j+1,data[i][j]).fill=fill ws1.cell(i+2,j+1,data[i][j]).border=border wb.save("data/合同信息导出.xlsx") wb.close() if __name__ == '__main__': print_hi('PyCharm,geovin du study') visitDir('data') writeExcel(l)
哲学管理(学)人生, 文学艺术生活, 自动(计算机学)物理(学)工作, 生物(学)化学逆境, 历史(学)测绘(学)时间, 经济(学)数学金钱(理财), 心理(学)医学情绪, 诗词美容情感, 美学建筑(学)家园, 解构建构(分析)整合学习, 智商情商(IQ、EQ)运筹(学)生存.---Geovin Du(涂聚文)