1 # -*- coding: utf-8 -*-
2 # @Time : 2020/7/6 13:46
3 # @Author : Chunfang
4 # @Email : 3470959534@qq.com
5 # @File : test02.py
6 # @Software: PyCharm
7
8 import os,xlrd,time
9 import re
10 import datetime
11 from openpyxl import load_workbook
12 from selenium import webdriver
13 from time import sleep
14 from selenium.webdriver.chrome.service import Service
15
16 def down_data():
17 start = datetime.datetime.now()
18 filepath2 = 'SKU-URL-weight.xlsx' # 新建表格,将唯一的sku,url写入
19
20 wb2 = load_workbook(filepath2)
21 ws2 = wb2.worksheets[0]
22
23 def down_data(url): # 获取每个链接的数据
24 # 加载浏览器带表头数据爬虫
25 c_service = Service(r'D:\Python\Scripts\chromedriver.exe')
26 c_service.command_line_args()
27 c_service.start()
28 profile_directory = r'--user-data-dir=C:\Users\Administrator\AppData\Local\Google\Chrome\User Data'
29 option = webdriver.ChromeOptions()
30 option.add_argument(profile_directory)
31 driver = webdriver.Chrome(options=option)
32 driver.implicitly_wait(3)
33 driver.get(url)
34 sleep(3)
35 data = driver.page_source
36 sleep(2)
37 driver.quit()
38 sleep(2)
39 c_service.stop()
40 return data
41 def station(data):
42 busy = re.findall('<div class="tips" style=".*?<p>.*?(亲.*?回来).*?</p>', data, re.S)
43 # print(busy)
44 error_404 = re.findall('h3 class="title">.*?<em>(抱歉.*?)</em>', data, re.S)
45 # print(error_404)
46 pro_weight = re.findall('<span>.*?<b>.*?重量</b>.*?<em>(.*?)</em>', data, re.S)
47 # print(pro_weight)
48 right = re.findall('title="点击此按钮.*?rel="nofollow"><span>(.*?订购)</span></a>', data, re.S)
49 # print(right)
50 stations.append(busy)
51 stations.append(error_404)
52 stations.append(pro_weight)
53 stations.append(right)
54
55 for i in range(16635,ws2.max_row+1):
56 print('第' + str(i) + '个sku:' + str(ws2.cell(i, 1).value))
57 stations = []
58 data=down_data(ws2.cell(i,2).value)
59 # print(data)
60 station(data)
61 while len(stations[0])!=0:#没有加载出来
62 stations = []
63 data = down_data(ws2.cell(i, 2).value)
64 station(data)
65 print(stations)
66 if len(stations[1])==0:#判断404
67 if len(stations[2])==0:#判断重量
68 if len(stations[3])==0:#判断产品是否有货,可订购
69 ws2.cell(i, 3).value = '产品下架'
70 else:
71 ws2.cell(i,3).value = '产品有货,没有标注重量'
72 else:
73 ws2.cell(i, 3).value = stations[2][1]#添加重量
74 else:
75 ws2.cell(i,3).value = stations[1][0]#抱歉404
76 print(stations)
77
78 wb2.save(filepath2)
79
80 end = datetime.datetime.now()
81 print('Running time: %s Seconds'%(end-start))
82
83 #发邮件给对方邮箱
84 import smtplib
85 from email.mime.text import MIMEText
86 from email.mime.multipart import MIMEMultipart
87 from email.mime.image import MIMEImage
88 from email.mime.application import MIMEApplication
89
90 #设置服务器所需信息
91 fromaddr ='3470959534@qq.com'
92 password = '验证码'#qq的邮箱验证码
93 toaddrs = ['3470959534@qq.com','1725714926@qq.com']
94
95 #邮件内容设置
96 message = MIMEText('hello! 这是跑货源结果,请查收','plain','utf-8')
97 message['Subject']='测试邮件'
98
99 excel_file =filepath2
100 excel_apart = MIMEApplication(open(excel_file,'rb').read())
101 excel_apart.add_header('Content-Disposition','atttachment',filename=excel_file)
102
103 m = MIMEMultipart()
104 m.attach(message)
105 m.attach(excel_apart)
106
107 try:
108 server = smtplib.SMTP('smtp.qq.com')
109 server.login(fromaddr,password)
110 server.sendmail(fromaddr,toaddrs,m.as_string(),)
111 print('success')
112 server.quit()
113 except smtplib.SMTPException as e:
114 print('error:',e)
115
116 down_data()
117 #设置时间跑店小秘货源
118 # while True:
119 # time_now = time.strftime('%H:%M:%S',time.localtime())
120 #
121 # if time_now =="20:00:10":
122 # down_data()
123 # # print('Hello')
124 # subject = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())+'定时发送测试'
125 # print(subject)
126 # time.sleep(2)