python: using pdfplumber Lib read pdf file
from openpyxl import Workbook
from openpyxl.styles import PatternFill,Side,Border
import pdfplumber
l=[]
def visitDir(path):
if not os.path.isdir(path):
print('Error:"',path,'" is not a directory or does not exist.')
return
list_dirs = os.walk(path) #os.walk返回一个元组,包括3个元素:#所有路径名、所有目录列表与文件列表
for root, dirs, files in list_dirs: #遍历该元组的目录和文件信息
for f in files:
if f.endswith(".pdf"):
l.append(os.path.join(root, f))
def writeExcel(l):
wb = Workbook()
ws1 = wb.active
data =[]
for i in l:
with pdfplumber.open(i) as pdf:
for page in pdf.pages:
textdata =page.extract_text()
l = textdata.split()
data.append(l)
border=Border(top=Side(border_style='thin',color='000000'),
bottom=Side(border_style='thin',color='000000'),
left=Side(border_style='thin',color='000000'),
right=Side(border_style='thin',color='000000'))
ws1["A1"]="合同序号"
ws1["B1"]="合同名称"
ws1["C1"]="合同金额"
ws1["A1"].fill=PatternFill(fill_type='solid', fgColor="8B008B")
ws1["B1"].fill=PatternFill(fill_type='solid', fgColor="8B008B")
ws1["C1"].fill=PatternFill(fill_type='solid', fgColor="8B008B")
ws1["A1"].border = border
ws1["B1"].border = border
ws1["C1"].border = border
fill = PatternFill(fill_type='solid', fgColor="FFC0CB")
for i in range(len(data)):
for j in range(len(data[0])):
ws1.cell(i+2,j+1,data[i][j]).fill=fill
ws1.cell(i+2,j+1,data[i][j]).border=border
wb.save("data/合同信息导出.xlsx")
wb.close()
if __name__ == '__main__':
print_hi('PyCharm,geovin du study')
visitDir('data')
writeExcel(l)
哲学管理(学)人生, 文学艺术生活, 自动(计算机学)物理(学)工作, 生物(学)化学逆境, 历史(学)测绘(学)时间, 经济(学)数学金钱(理财), 心理(学)医学情绪, 诗词美容情感, 美学建筑(学)家园, 解构建构(分析)整合学习, 智商情商(IQ、EQ)运筹(学)生存.---Geovin Du(涂聚文)
浙公网安备 33010602011771号