requests爬取百度贴吧:python 美女 3

import requests
import sys


class Tieba(object):
    def __init__(self, tieba_name, pn):
        self.tieba_name = tieba_name
        self.base_url = 'https://tieba.baidu.com/f?kw=%spn=' % (tieba_name)
        # print(self.base_url)

        self.url_list = []
        for i in range(pn):
            url = self.base_url + str(i * 50)
            self.url_list.append(url)
        # print(self.url_list)

        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36'
        }

        pass

    # 获取url对应的响应
    def get_page(self, url):
        response = requests.get(url, headers=self.headers)
        return response.content

    # 保存数据
    def save_content(self, content, index):
        filename = self.tieba_name + '_' + str(index) + '.html'
        with open(filename, 'wb')as f:
            f.write(content)

    def run(self):
        # 构建基础url
        # 生成url列表
        # 构建请求头
        # 遍历url列表
        for url in self.url_list:
            # 发送请求
            content = self.get_page(url)

            # 保存响应
            index = self.url_list.index(url)
            self.save_content(content, index)
        pass


if __name__ == '__main__':
    name = sys.argv[1]
    pn = sys.argv[2]
    tieba = Tieba(name, int(pn))
    tieba.run()

  

 

使用方法

python  代码所在文件名   美女  3

 

posted @ 2017-12-11 22:45  安迪9468  阅读(201)  评论(0编辑  收藏  举报