python selenium

 

 

 1 #encoding: utf-8
 2 import selenium 
 3 from selenium import webdriver
 4 from selenium.webdriver.common.by import By
 5 import urllib.request as req
 6 import time
 7 
 8 start_time = time.time()
 9 
10 from selenium.webdriver.edge.options import Options # => 引入Chrome的配置
11 import time
12 
13 # 配置
14 ch_options = Options()
15 ch_options.add_argument("--headless")  # => 为Chrome配置无头模式
16 ch_options.add_experimental_option('excludeSwitches', ['enable-logging'])
17   
18 
19 
20 url__Jrtt='''https://mp.weixin.qq.com/s/8mWoT3_4g4qCI6zE4PBeMA'''
21 url=url__Jrtt.replace( '\n', "")
22 driver=webdriver.Edge( options=ch_options) # => 注意这里的参数
23 #第三步,如使用浏览器一样开始对网站进行访问
24 driver.minimize_window()  #设置窗口最大化
25 driver.implicitly_wait(3) #设置等待3秒后打开目标网页
26 driver.get(url)
27 time.sleep(1)
28 
29 ArticsLinks=driver.find_element( By.XPATH ,  '//*[@id="js_content"]')
30 links=ArticsLinks.find_elements(By.TAG_NAME , 'a')
31 
32 flag=0
33 datestmp=''
34 
35 import re
36 for i in links:  
37     flag+=1
38     title=i.text 
39 #     if flag<1105:
40 #         continue
41   
42     i.click()
43     time.sleep( 0.2 )
44     ok=driver.find_element( By.XPATH , '//*[@id="js_link_dialog_ok"]')
45     ok.click()
46     
47     time.sleep( .2 )
48     windows = driver.window_handles  
49     driver.switch_to.window(windows[-1])
50     
51     artics=driver.find_elements( By.XPATH , '//*[@id="js_content"]')
52     try:
53         datestmp=driver.find_element( By.XPATH , '//*[@id="publish_time"]')
54     except:
55         datestmp="--已删除--"
56     else:
57         datestmp=datestmp.text
58         datestmp=datestmp.split( " ")[0]
59         
60     AllText=""
61     for k in artics:
62         AllText =AllText +k.text 
63 #         ArticsContext=i.text 
64 #         print( flag,datestmp ,'===',title , "===",AllText )
65     
66     
67     title = re.sub('[\/:*?"<>|]','-',title)#去掉非法字符  
68     ph='.\\记忆承载文章\\' +datestmp+' '+title+'.txt' #++r"__"
69     with open( ph, 'w',encoding='utf-8') as f: 
70         f.write ( AllText   )
71     print( flag , ":" , title , 'finish.')
72 
73     driver.close() 
74     driver.switch_to.window(windows[0])   
75 
76 # for link in driver.find_elements( By.CLASS_NAME,"a"):
77 #     id+=1
78 #     print ( id , link)
79     
80 # f=open( 'out.txt',  'w')  
81 # with open( 'out.xml', 'w',encoding='utf-8') as f: 
82 #     f.write (  driver.page_source    )        
83 #     f.close()
84 end_time = time.time()
85 print("总耗时: {:.2f}秒".format(end_time - start_time))
86 input( "all finished press Enter to quit:")
87 driver.quit()

 

posted @ 2023-09-02 19:50  陳亞林  阅读(33)  评论(0)    收藏  举报