Python自动化
Office
创建excel表格
import openpyxl
workbook=openpyxl.Workbook() #创建文件
worksheet = workbook.active #激活表格1
worksheet.title="mySheet"
worksheet.cell(1, 1, "askgdajds")
workbook.save(filename='/Users/xingkong2/Desktop/haha.xlsx')
json数据写入excel
需要创建新表
import json
import openpyxl
#鄂州 黄冈 黄石 荆门 十堰 咸宁 襄阳 孝感 宜昌
position='长沙'
path='/Users/xingkong2/Desktop/湖南/'+position+'/content2.json'
path2='/Users/xingkong2/Desktop/湖南/'+position+'/all.xlsx'
with open(path, encoding='utf-8-sig') as f:
data = json.load(f)
rows=data['dataPack']['rows']
allInfo=[]
info=[]
for row in rows:
info.append(row['FNAME'])
info.append(row['FC47'])
info.append(row['FTEL'])
info.append(row['FPHONE'])
allInfo.append(info)
info=[]
#当表格已经存在时,不需要再创建
'''
workbook=openpyxl.Workbook() #创建文件
worksheet = workbook.active #激活表格1
worksheet.title="Sheet1"
workbook.save(filename='/Users/xingkong2/Desktop/table.xlsx')
'''
workbook=openpyxl.load_workbook(path2)
worksheet=workbook.worksheets[0]
for info in allInfo:
worksheet.append(info) #添加已经存在的表格
workbook.save(filename=path2)
print(position+"将json数据写入Excel成功")
excel数据去重
import pandas as pd
positions=['安阳', '鹤壁', '济源', '焦作', '开封', '洛阳', '漯河', '南阳', '平顶山', '濮阳', '三门峡','商丘','新乡','信阳','许昌','郑州','周口','驻马店']
for position in positions:
path='/Users/xingkong2/Desktop/河南/'+position+'/all.xlsx'
path2='/Users/xingkong2/Desktop/河南/'+position+'/'+position+'唯一化.xlsx'
# 读取Excel中Sheet1中的数据
data = pd.DataFrame(pd.read_excel(path, 'Sheet1'))
# 查看基于[物品]列去除重复行的数据
wp = data.drop_duplicates(['客户名称'])
# 将去除重复行的数据输出到excel表中
wp.to_excel(path)
print(position+"去重完成")
#positions=['安阳', '鹤壁', '济源', '焦作', '开封', '洛阳', '漯河', '南阳', '平顶山', '濮阳', '三门峡','商丘','新乡','信阳','许昌','郑州','周口','驻马店']
#positions=['常德', '郴州', '衡阳', '怀化', '娄底', '邵阳', '湘潭', '益阳', '永州', '岳阳', '张家界','长沙','株洲']
淘宝秒杀
from selenium import webdriver
import datetime
import time
def login():
# 打开淘宝登录页,并进行扫码登录
browser.get("https://login.taobao.com/member/login.jhtml?redirectURL=http%3A%2F%2Fcart.taobao.com%2Fcart.htm")
time.sleep(15)
browser.get("https://cart.taobao.com/cart.htm")
def buy(times, choose):
# 点击购物车里全选按钮
while True:
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
# 对比时间,时间到的话就点击结算
if now > times: #bug:00:00:00 是特殊点
if choose == 1:
while True: #一直循环直到元素出现
try:
if browser.find_element_by_id("J_SelectAll1"):
browser.find_element_by_id("J_SelectAll1").click()
print("全选点击成功")
break
except:
print("找不到全选按钮")
# 点击结算按钮
time.sleep(0.15)
while True:
try:
if browser.find_element_by_id("J_Go"):
browser.find_element_by_id("J_Go").click()
print("结算成功")
break
except:
pass
while True:
try:
if browser.find_element_by_link_text('提交订单'):
browser.find_element_by_link_text('提交订单').click()
break
except:
pass
while True:
try:
pswInput = browser.find_element_by_id('payPassword_rsainput')
pswInput.send_keys('999999')
J_authSubmit = browser.find_element_by_id('J_authSubmit')
J_authSubmit.click()
print("支付成功")
break
except:
pass
time.sleep(0.09)
if __name__ == "__main__":
times = "2018-12-12 08:51:00.000000"
browser = webdriver.Chrome()
browser.maximize_window()
login()
buy(times, 1)
PDF操作
在pdf中找到想找的关键字,并且将找到的页面合并成为一个新的pdf
'''
1 拿到文件并且pdfminer解析
2 遍历每一页,碰到关键字就退出,记录关键所在的位置,页面(上中下三个)
3 去重记录信息 把相应的页面截取并重组
'''
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LTTextBoxHorizontal,LAParams
from pdfminer.pdfpage import PDFPage
from PyPDF2 import PdfFileReader, PdfFileWriter
import re
import logging
logging.Logger.propagate = False
logging.getLogger().setLevel(logging.ERROR) #消除警告信息
def Input():
path=input("请输入PDF文件路径:")
path = path+".pdf"
keyword=input("请输入要搜索的关键字:")
output_path = input("请输入合并文件需要存放的路径:")
output_path = output_path+".pdf"
return path, keyword,output_path
def PDF(path,keyword): #输入:文档路径 关键字 输出:关键字所在的页面
info=[]
pdfFileReader = PdfFileReader(path) #如果文档加密了就会出错
numPages = pdfFileReader.getNumPages()
document = open(path, 'rb')
rsrcmgr = PDFResourceManager() # 创建PDf 资源管理器 来管理共享资源
#创建一个PDF设备对象
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
# 创建一个PDF解释器对象
interpreter = PDFPageInterpreter(rsrcmgr, device)
page_count = 0
for page in PDFPage.get_pages(document):
page_count = page_count + 1
interpreter.process_page(page)
layout = device.get_result()
for element in layout:
if isinstance(element, LTTextBoxHorizontal):
bulk=element.get_text().strip()
match=re.search(keyword,str(bulk),flags=0)
if match:
#if page_count!=1: #如果匹配 就将前后页面和当前页面保存
#info.append(page_count-1)
info.append(page_count)
#if page_count!=numPages:
#info.append(page_count+1)
break;
info=list(set(info)) #截取上中下界面可能会有页面重合,需要去重
info.sort()
return info
def merge(path,page_index,output_path): #输入:文件路径,需要合并的页面,输出路径
pdfFileWriter = PdfFileWriter()
# 获取 PdfFileReader 对象
pdfFileReader = PdfFileReader(path) # 或者这个方式:pdfFileReader = PdfFileReader(open(readFile, 'rb'))
# 获取 PDF 文件的文档信息
if len(page_index):
for index in page_index: #注意:序号是从零开始
pageObj = pdfFileReader.getPage(index-1) # 获取某一个页面
pdfFileWriter.addPage(pageObj)
pdfFileWriter.write(open(output_path, 'wb')) # 添加完每页,再一起保存至文件中
else:
print("页数为空")
print("完成")
if __name__ == "__main__":
path,keyword,output_path=Input()
page_index=PDF(path,keyword)
merge(path,page_index,output_path)

浙公网安备 33010602011771号