B站路飞学城爬虫教学lxml爬取4K图片

import requests
from lxml import etree
import os

if __name__ == '__main__':
    url="https://pic.netbian.com/4kmeinv/index_%d.html"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'
    }
    if not os.path.exists("./4k图片"):
        os.mkdir("./4k图片")
    for page_num in range(1,173):
        new_url=format(url%page_num)
        r=requests.get(url=new_url,headers=headers)
        r.encoding=r.apparent_encoding
        page_text=r.text

        tree=etree.HTML(page_text)
        li_list=tree.xpath("//div[@class='slist']//li")
        for li in li_list:
            img_name=li.xpath("./a/img/@alt")[0]+'.jpg'
            detail_url="https://pic.netbian.com"+li.xpath("./a/img/@src")[0]
            img_data=requests.get(url=detail_url,headers=headers).content
            img_path="./4k图片/"+img_name
            with open(img_path,"wb")as fp:
                fp.write(img_data)
            print(img_name+"下载成功")

    print("over!!")

 

posted @ 2021-04-27 16:21  奋进的少年  阅读(168)  评论(0)    收藏  举报