python——爬虫之百度贴吧

 1 import requests
 2 
 3 
 4 class Bar:
 5     def __init__(self, name):
 6         """初始化"""
 7         self.bar_name = name
 8         self.url = "https://tieba.baidu.com/f?kw=" + self.bar_name + "ie=utf-8&pn={}"
 9 
10     def get_url_list(self):
11         """获取url集合"""
12         # print([self.url.format(i * 50) for i in range(3)])
13         return [self.url.format(i * 50) for i in range(3)]
14 
15     def parse_url(self, url):
16         """发送请求并解析数据"""
17         response = requests.get(url=url)
18         return response.content.decode('GBK')
19 
20     def save_data(self, content, page_count):
21         """存入数据库或者文件中"""
22         file_name = "{}-第{}页".format(self.bar_name, page_count)
23         with open('./tieba/' + file_name, 'w', encoding='utf-8') as f:
24             f.write(content)
25 
26     def run(self):
27         """爬虫启动程序"""
28         # 1.构造url地址# pass
29         requests_list = self.get_url_list()
30         for url in requests_list:
31             content = self.parse_url(url)
32             page_count = requests_list.index(url) + 1
33             self.save_data(content, page_count)
34 
35 
36 if __name__ == '__main__':
37     Bar("抗压").run()

运行结果:

posted @ 2019-12-16 10:30  菜鸟小远  阅读(185)  评论(0)    收藏  举报