import requests
from lxml import etree
from threading import Thread
class Spider(object):
def __init__(self):
self.header = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"}
def catch(self, page_num):
self.url = f"https://www.169tp.com/xingganmeinv/list_1_{page_num}.html"
res = requests.get(self.url,headers=self.header)
res.encoding = "gbk"
text = res.text
tree = etree.HTML(text)
lis = tree.xpath("/html/body/div[4]/ul/li")
for i in lis:
addr = i.xpath("./a/img/@src")[0]
title = i.xpath("./a/p/text()")
detail = requests.get(addr, headers=self.header).content
with open(f"imgs/{title}.jpg", mode="wb") as f:
f.write(detail)
print(f" ------------------- {title}.jpg 完成 -----------------------")
def start(self):
for num in range(1,500):
self.catch(num)
print(f"-------------------------- 第 {num} 页完成-----------------------------------")
if __name__ == '__main__':
spider = Spider()
spider.start()