python——爬虫之百度贴吧
1 import requests 2 3 4 class Bar: 5 def __init__(self, name): 6 """初始化""" 7 self.bar_name = name 8 self.url = "https://tieba.baidu.com/f?kw=" + self.bar_name + "ie=utf-8&pn={}" 9 10 def get_url_list(self): 11 """获取url集合""" 12 # print([self.url.format(i * 50) for i in range(3)]) 13 return [self.url.format(i * 50) for i in range(3)] 14 15 def parse_url(self, url): 16 """发送请求并解析数据""" 17 response = requests.get(url=url) 18 return response.content.decode('GBK') 19 20 def save_data(self, content, page_count): 21 """存入数据库或者文件中""" 22 file_name = "{}-第{}页".format(self.bar_name, page_count) 23 with open('./tieba/' + file_name, 'w', encoding='utf-8') as f: 24 f.write(content) 25 26 def run(self): 27 """爬虫启动程序""" 28 # 1.构造url地址# pass 29 requests_list = self.get_url_list() 30 for url in requests_list: 31 content = self.parse_url(url) 32 page_count = requests_list.index(url) + 1 33 self.save_data(content, page_count) 34 35 36 if __name__ == '__main__': 37 Bar("抗压").run()
运行结果:


浙公网安备 33010602011771号