Python爬虫增加CSDN博客访问量

 前言:Faker库的安装与例子可以看我这篇文章python的faker库批量生成User-Agent

 1 from faker import Factory
 2 import requests
 3 import re
 4 import time
 5  
 6  
 7 def get_page_id(home_url):
 8     user_agent = Factory.create()
 9     headers = {"User-Agent": user_agent.user_agent()}
10     home = requests.get(url=home_url, headers=headers).text
11     home_id = re.findall('<a href="{}/article/details/(.*?)" target="_blank">'.format(home_url), home)
12     if len(home_id) == 0:
13         get_page_id(home_url)
14     page_id = list(set(home_id))
15     page_id.sort(key=home_id.index)
16     return page_id
17  
18  
19 def browse_csdn(home_url):
20     page_id = get_page_id(home_url)
21     while True:
22         for i in page_id:
23             user_agent = Factory.create()
24             headers = {"User-Agent": user_agent.user_agent()}
25             page_url = '{}/article/details/{}'.format(home_url, i)
26             requests.get(url=page_url,  headers=headers)
27             print('{}\tOK'.format(page_url))
28             time.sleep(20)
29  
30  
31 if __name__ == '__main__':
32     home_url = ''
33     browse_csdn(home_url)

 

posted @ 2020-04-22 11:11  秃秃的测试  阅读(312)  评论(0)    收藏  举报