Python爬图片（面向对象版）

import requests
from lxml import etree
from threading import Thread


class Spider(object):

    def __init__(self):
        self.header = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"}

    def catch(self, page_num):
        self.url = f"https://www.169tp.com/xingganmeinv/list_1_{page_num}.html"
        res = requests.get(self.url,headers=self.header)
        res.encoding = "gbk"
        text = res.text

        tree = etree.HTML(text)
        lis = tree.xpath("/html/body/div[4]/ul/li")

        for i in lis:
            addr = i.xpath("./a/img/@src")[0]
            title = i.xpath("./a/p/text()")

            detail = requests.get(addr, headers=self.header).content

            with open(f"imgs/{title}.jpg", mode="wb") as f:
                f.write(detail)
            print(f" ------------------- {title}.jpg 完成 -----------------------")

    def start(self):
        for num in range(1,500):
            self.catch(num)
            print(f"-------------------------- 第 {num} 页完成-----------------------------------")



if __name__ == '__main__':

    spider = Spider()
    spider.start()

posted @ 2023-04-11 22:16 映辉阅读(46) 评论(0) 收藏举报

刷新页面返回顶部

yhBlog

Python爬图片（面向对象版）

公告