更新的爬数据

# __author:Dell
# date: 2022/6/14
# coding:utf-8
import requests,re

def index():
    z = 32
    print("开始下载,请稍后....")
    for i in range(1000):
        url = 'https://www.youmzi.com/xinggan/2378_' + str(i) +".html"
        hasder = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'}
        reps = requests.get(url=url,headers=hasder)
        date = reps.text
        res = re.findall("https://img.youmzi.com/d/file/bigpic/.*?.jpg",date)
        #res = re.findall("https://img.youmzi.com/d/file/2020031409.*?.jpg", date)
        for v in res:
            reu = requests.get(v)
            with open('mzi1\\' + str(z) + '.jpg', 'wb') as f:
                f.write(reu.content)
                f.close()
            z += 1
    else:
        exit("结束")
    print("下载完成")
if __name__ == '__main__':
    index()

 

posted @ 2022-06-14 17:36  Mis168  阅读(20)  评论(0)    收藏  举报