python-记录一个pdf转xlsx的方法

import tkinter.filedialog
import pandas as pd
import os
import pdfplumber
import tkinter


class PdfReader:
    # output_file = tkinter.filedialog.asksaveasfile(title='保存到', initialfile=f"{name}.xlsx", filetypes=[("excel文件", ".xlsx")])
    @staticmethod
    def catch(input_file):
        with pdfplumber.open(input_file) as pdf:
            # 提取文件名,父目录地址
            name = input_file.split('/')[-1][:-4]
            path = os.path.dirname(input_file)
            # 抓取pdf表格
            pages = pdf.pages
            writer = pd.ExcelWriter(os.path.join(path, f'{name}.xlsx'))
            count = 1
            for page in pages:
                table = page.extract_table()
                # 生成结果
                df = pd.DataFrame(columns=range(len(table[0])), data=table)
                df.to_excel(writer, sheet_name=f'Sheet{count}', index=None)
                count += 1
            writer.close()

    def process(self):
        input_files = tkinter.filedialog.askopenfilenames(title='选择转化文件')
        for i in input_files:
            try:
                self.catch(input_file=i)
                print(i, '转化成功')
            except:
                print(i, '转化失败')


if __name__ == '__main__':
    PdfReader().process()
posted @ 2024-05-27 23:29  AZ26  阅读(65)  评论(0)    收藏  举报