Loading

Python自动化

Office

创建excel表格

import openpyxl

workbook=openpyxl.Workbook()     #创建文件
worksheet = workbook.active         #激活表格1
worksheet.title="mySheet"

worksheet.cell(1, 1, "askgdajds")

workbook.save(filename='/Users/xingkong2/Desktop/haha.xlsx')

json数据写入excel

需要创建新表

import json
import openpyxl


#鄂州 黄冈 黄石 荆门 十堰 咸宁 襄阳 孝感 宜昌

position='长沙'

path='/Users/xingkong2/Desktop/湖南/'+position+'/content2.json'
path2='/Users/xingkong2/Desktop/湖南/'+position+'/all.xlsx'

with open(path,  encoding='utf-8-sig') as f:
    data = json.load(f)

rows=data['dataPack']['rows']

allInfo=[]
info=[]

for row in rows:
    info.append(row['FNAME'])
    info.append(row['FC47'])
    info.append(row['FTEL'])
    info.append(row['FPHONE'])
    allInfo.append(info)
    info=[]

#当表格已经存在时,不需要再创建
'''
workbook=openpyxl.Workbook()     #创建文件         
worksheet = workbook.active         #激活表格1
worksheet.title="Sheet1"
workbook.save(filename='/Users/xingkong2/Desktop/table.xlsx')
'''

workbook=openpyxl.load_workbook(path2)
worksheet=workbook.worksheets[0]

for info in allInfo:
    worksheet.append(info)    #添加已经存在的表格

workbook.save(filename=path2)
print(position+"将json数据写入Excel成功")

excel数据去重

import pandas as pd

positions=['安阳', '鹤壁', '济源', '焦作', '开封', '洛阳', '漯河', '南阳', '平顶山', '濮阳', '三门峡','商丘','新乡','信阳','许昌','郑州','周口','驻马店']

for position in positions:

    path='/Users/xingkong2/Desktop/河南/'+position+'/all.xlsx'

    path2='/Users/xingkong2/Desktop/河南/'+position+'/'+position+'唯一化.xlsx'

    # 读取Excel中Sheet1中的数据
    data = pd.DataFrame(pd.read_excel(path, 'Sheet1'))

    # 查看基于[物品]列去除重复行的数据
    wp = data.drop_duplicates(['客户名称'])

    # 将去除重复行的数据输出到excel表中
    wp.to_excel(path)
    print(position+"去重完成")



#positions=['安阳', '鹤壁', '济源', '焦作', '开封', '洛阳', '漯河', '南阳', '平顶山', '濮阳', '三门峡','商丘','新乡','信阳','许昌','郑州','周口','驻马店']
#positions=['常德', '郴州', '衡阳', '怀化', '娄底', '邵阳', '湘潭', '益阳', '永州', '岳阳', '张家界','长沙','株洲']

淘宝秒杀

from selenium import webdriver
import datetime
import time

def login():
    # 打开淘宝登录页,并进行扫码登录
    browser.get("https://login.taobao.com/member/login.jhtml?redirectURL=http%3A%2F%2Fcart.taobao.com%2Fcart.htm")
    time.sleep(15)
    browser.get("https://cart.taobao.com/cart.htm")

def buy(times, choose):
    # 点击购物车里全选按钮
    while True:
        now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
        # 对比时间,时间到的话就点击结算
        if now > times:                 #bug:00:00:00   是特殊点

            if choose == 1:
                while True:         #一直循环直到元素出现
                    try:
                        if browser.find_element_by_id("J_SelectAll1"):
                            browser.find_element_by_id("J_SelectAll1").click()
                            print("全选点击成功")
                            break
                    except:
                        print("找不到全选按钮")
            # 点击结算按钮
            time.sleep(0.15)
            while True:
                try:
                    if browser.find_element_by_id("J_Go"):
                        browser.find_element_by_id("J_Go").click()
                        print("结算成功")
                        break
                except:
                    pass

            while True:
                try:
                    if browser.find_element_by_link_text('提交订单'):
                        browser.find_element_by_link_text('提交订单').click()
                        break
                except:
                    pass


            while True:
                try:
                    pswInput = browser.find_element_by_id('payPassword_rsainput')
                    pswInput.send_keys('999999')
                    J_authSubmit = browser.find_element_by_id('J_authSubmit')
                    J_authSubmit.click()
                    print("支付成功")
                    break
                except:
                    pass
            time.sleep(0.09)

if __name__ == "__main__":
    times = "2018-12-12 08:51:00.000000"
    browser = webdriver.Chrome()
    browser.maximize_window()
    login()
    buy(times, 1)

PDF操作

在pdf中找到想找的关键字,并且将找到的页面合并成为一个新的pdf

'''
1 拿到文件并且pdfminer解析
2 遍历每一页,碰到关键字就退出,记录关键所在的位置,页面(上中下三个)
3 去重记录信息 把相应的页面截取并重组
'''
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LTTextBoxHorizontal,LAParams
from pdfminer.pdfpage import PDFPage
from PyPDF2 import PdfFileReader, PdfFileWriter
import re
import logging
logging.Logger.propagate = False
logging.getLogger().setLevel(logging.ERROR)   #消除警告信息

def Input():
    path=input("请输入PDF文件路径:")
    path = path+".pdf"
    keyword=input("请输入要搜索的关键字:")
    output_path = input("请输入合并文件需要存放的路径:")
    output_path = output_path+".pdf"
    return path, keyword,output_path

def PDF(path,keyword):        #输入:文档路径 关键字 输出:关键字所在的页面
    info=[]
    pdfFileReader = PdfFileReader(path)            #如果文档加密了就会出错
    numPages = pdfFileReader.getNumPages()

    document = open(path, 'rb')
    rsrcmgr = PDFResourceManager()  # 创建PDf 资源管理器 来管理共享资源
		#创建一个PDF设备对象
    laparams = LAParams()   
    device = PDFPageAggregator(rsrcmgr, laparams=laparams)
		# 创建一个PDF解释器对象
    interpreter = PDFPageInterpreter(rsrcmgr, device)

    page_count = 0
    for page in PDFPage.get_pages(document):
        page_count = page_count + 1
        interpreter.process_page(page)
        layout = device.get_result()
        for element in layout:
            if isinstance(element, LTTextBoxHorizontal):
                bulk=element.get_text().strip()
                match=re.search(keyword,str(bulk),flags=0)
                if match:
                    #if page_count!=1:             #如果匹配  就将前后页面和当前页面保存
                        #info.append(page_count-1)
                    info.append(page_count)
                    #if page_count!=numPages:
                        #info.append(page_count+1)
                    break;
    info=list(set(info))     #截取上中下界面可能会有页面重合,需要去重
    info.sort()
    return info

def merge(path,page_index,output_path):    #输入:文件路径,需要合并的页面,输出路径
    pdfFileWriter = PdfFileWriter()
    # 获取 PdfFileReader 对象
    pdfFileReader = PdfFileReader(path)  # 或者这个方式:pdfFileReader = PdfFileReader(open(readFile, 'rb'))
    # 获取 PDF 文件的文档信息
    if len(page_index):
        for index in page_index:      #注意:序号是从零开始
            pageObj = pdfFileReader.getPage(index-1)  # 获取某一个页面
            pdfFileWriter.addPage(pageObj)
        pdfFileWriter.write(open(output_path, 'wb'))  # 添加完每页,再一起保存至文件中
    else:
        print("页数为空")
    print("完成")

if __name__ == "__main__":
    path,keyword,output_path=Input()
    page_index=PDF(path,keyword)
    merge(path,page_index,output_path)
posted @ 2021-05-30 20:01  兔子翻书  阅读(84)  评论(0)    收藏  举报