(python库)pdf发票销售方信息批量提取

import re
import pandas as pd
import pdfplumber
import glob

file = glob.glob(r'./发票/*.pdf', recursive=True)
data=[]
for i in file:
      with pdfplumber.open(i) as pdf:
           for page in pdf.pages:
               df=page.extract_table()
               df=df[-1][1].split("\n")
               name=df[0][4:]
               code=df[1][7:]
               Bank=re.findall('[\u4e00-\u9fa5]+',str(df[3][7:]))[0]
               account=re.findall('\d+',str(df[3][7:]))[0]
               suju=[]
               suju.append(name)
               suju.append(code)
               suju.append(Bank)
               suju.append(account)
               data.append(suju)
               
ff=pd.DataFrame(data,columns=("名称","统一信用编号","开户行","账号"))
ff.to_excel("保存文件名称.xlsx",index=False)

 

posted @ 2022-09-12 21:58  十万神马  阅读(184)  评论(0编辑  收藏  举报