pandas记录

pandas记录

'''
资料
https://www.cnblogs.com/insane-Mr-Li/p/13231644.html
https://blog.csdn.net/toshibahuai/article/details/79034829
'''

# encoding='utf-8'
import pandas as pd

def main():
    with open("账单.html", "r", encoding="utf-8") as f:       ###读入文件
        htmll = f.read()
    html_data = pd.read_html(htmll)                            # 识别html的表格标签table  如果有多个会全部都识别出来
    print(type(html_data), len(html_data))

    # 多个table的情况
    table_date = pd.DataFrame(html_data[0])
    print(table_date)
    table_date.to_csv('账单.csv',encoding='utf-8')
    ###########################################################

    # 一个table的情况
    # for i in html_data:
    #     # print(i)
    #     # exit()
    #     table_date = pd.DataFrame(i)
    #     ret = table_date.fillna(value="")   # 处理表格里为空的数据
    #     print(type(ret), len(ret))
    #     print(ret)

    #     ret.to_csv('账单.csv',encoding='utf-8')
    #     # ret.to_csv('C:/Users/think/Desktop/Result.csv',header=0) #不保存列名


if __name__ == '__main__':
    main()
posted @ 2022-11-20 16:58  是谁走漏了消息  阅读(17)  评论(0)    收藏  举报