爬取url网页里面所有的图片,把url填上就能用了,下面的自己看着改改进行筛选。

 

import requests
import re
import time
import random
import os
def Find(string):
    # findall() 查找匹配正则表达式的字符串
    st = re.findall('src="(.*?)"',string,re.S)
    return st
url = ''

user_agent= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
headers = {"User-Agent":user_agent}
headers2 = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36 ",
}
name = 'xxx'
os.chdir('D:\\temp')
os.makedirs(name)
os.chdir('D:\\temp' + '\\' + name)
html_r = requests.get(url,headers = headers2)

#print(html_r.text, type(html_r.text))
lis = Find(html_r.text)
print(len(lis), lis)


i = 0
for ele in lis:
    i += 1
    if len(ele) > 20 and ele.startswith('https://'):
        img_url = ele
        img = requests.get(img_url, headers = headers2, stream=True)
        savename = name + str(i) + '.jpg'
        with open(savename, 'wb') as w:
            w.write(img.content)
        time.sleep(random.randrange(5, 8))