1 #encoding: utf-8
2 import selenium
3 from selenium import webdriver
4 from selenium.webdriver.common.by import By
5 import urllib.request as req
6 import time
7
8 start_time = time.time()
9
10 from selenium.webdriver.edge.options import Options # => 引入Chrome的配置
11 import time
12
13 # 配置
14 ch_options = Options()
15 ch_options.add_argument("--headless") # => 为Chrome配置无头模式
16 ch_options.add_experimental_option('excludeSwitches', ['enable-logging'])
17
18
19
20 url__Jrtt='''https://mp.weixin.qq.com/s/8mWoT3_4g4qCI6zE4PBeMA'''
21 url=url__Jrtt.replace( '\n', "")
22 driver=webdriver.Edge( options=ch_options) # => 注意这里的参数
23 #第三步,如使用浏览器一样开始对网站进行访问
24 driver.minimize_window() #设置窗口最大化
25 driver.implicitly_wait(3) #设置等待3秒后打开目标网页
26 driver.get(url)
27 time.sleep(1)
28
29 ArticsLinks=driver.find_element( By.XPATH , '//*[@id="js_content"]')
30 links=ArticsLinks.find_elements(By.TAG_NAME , 'a')
31
32 flag=0
33 datestmp=''
34
35 import re
36 for i in links:
37 flag+=1
38 title=i.text
39 # if flag<1105:
40 # continue
41
42 i.click()
43 time.sleep( 0.2 )
44 ok=driver.find_element( By.XPATH , '//*[@id="js_link_dialog_ok"]')
45 ok.click()
46
47 time.sleep( .2 )
48 windows = driver.window_handles
49 driver.switch_to.window(windows[-1])
50
51 artics=driver.find_elements( By.XPATH , '//*[@id="js_content"]')
52 try:
53 datestmp=driver.find_element( By.XPATH , '//*[@id="publish_time"]')
54 except:
55 datestmp="--已删除--"
56 else:
57 datestmp=datestmp.text
58 datestmp=datestmp.split( " ")[0]
59
60 AllText=""
61 for k in artics:
62 AllText =AllText +k.text
63 # ArticsContext=i.text
64 # print( flag,datestmp ,'===',title , "===",AllText )
65
66
67 title = re.sub('[\/:*?"<>|]','-',title)#去掉非法字符
68 ph='.\\记忆承载文章\\' +datestmp+' '+title+'.txt' #++r"__"
69 with open( ph, 'w',encoding='utf-8') as f:
70 f.write ( AllText )
71 print( flag , ":" , title , 'finish.')
72
73 driver.close()
74 driver.switch_to.window(windows[0])
75
76 # for link in driver.find_elements( By.CLASS_NAME,"a"):
77 # id+=1
78 # print ( id , link)
79
80 # f=open( 'out.txt', 'w')
81 # with open( 'out.xml', 'w',encoding='utf-8') as f:
82 # f.write ( driver.page_source )
83 # f.close()
84 end_time = time.time()
85 print("总耗时: {:.2f}秒".format(end_time - start_time))
86 input( "all finished press Enter to quit:")
87 driver.quit()