Python爬虫增加CSDN博客访问量
前言:Faker库的安装与例子可以看我这篇文章python的faker库批量生成User-Agent
1 from faker import Factory 2 import requests 3 import re 4 import time 5 6 7 def get_page_id(home_url): 8 user_agent = Factory.create() 9 headers = {"User-Agent": user_agent.user_agent()} 10 home = requests.get(url=home_url, headers=headers).text 11 home_id = re.findall('<a href="{}/article/details/(.*?)" target="_blank">'.format(home_url), home) 12 if len(home_id) == 0: 13 get_page_id(home_url) 14 page_id = list(set(home_id)) 15 page_id.sort(key=home_id.index) 16 return page_id 17 18 19 def browse_csdn(home_url): 20 page_id = get_page_id(home_url) 21 while True: 22 for i in page_id: 23 user_agent = Factory.create() 24 headers = {"User-Agent": user_agent.user_agent()} 25 page_url = '{}/article/details/{}'.format(home_url, i) 26 requests.get(url=page_url, headers=headers) 27 print('{}\tOK'.format(page_url)) 28 time.sleep(20) 29 30 31 if __name__ == '__main__': 32 home_url = '' 33 browse_csdn(home_url)

浙公网安备 33010602011771号