python获取网页端的图片

import requests
import re
from bs4 import BeautifulSoup


rp = r"<a href=(.*?) target"
rgm = r"<img src=(.*?) alt"
path = r"./Picture/chengshilvyou"
url_base = "https://www.ivsky.com"
parmas = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Connection":"keep-alive"
}

class Picture():
    def getHtml(self):
        html = requests.get(url,params=parmas).text
        soup = BeautifulSoup(html,features="html.parser")
        return soup

    def getPic_lis(self,url):
        '''获取首页图片的列表'''
        html_soup = picture.getHtml()
        pic_list = []
        T_list= html_soup.find_all("div",class_=re.compile("il_img"))
        for i in T_list:
            pic = re.findall(rp,str(i))
            pic_list.append(url_base + str(pic[0]).replace('"',""))
        print("msgone"+str(pic_list))
        return pic_list

    def getImg_list(self):
        '''从首页获取的图片列表中依次获取图片集列表'''
        img_list = []
        index_list = picture.getPic_lis(url)
        for x in index_list:
            html_img = requests.get(x,params=parmas).text
            print("metend"+ str(html_img))
            # soup_img = BeautifulSoup(html_img)
            # I_list = soup_img.find_all("div",class_=re.compile("il_img"))
            I_list = re.findall(rgm,html_img)
            print("msg"+str(I_list))
            for n in I_list:
                # img_url = re.findall(rg,str(n))
                img_list.append("https:" + str(n).strip('"'))
                print("msg2"+str(img_list))
        return img_list

    def download(self,num):
        imgs_url = picture.getImg_list()
        m = 0
        for img_url in imgs_url:
            print('***** ' + str(m) + '.jpg *****' + ' Downloading...')
            dir = path + str(num) + str(m) + '.jpg'
            response = requests.get(img_url)
            with open(dir,"wb") as  file:
                file.write(response.content)
                print('***** ' + str(m) + '.jpg *****' + 'Done.')
                m = m + 1





if __name__ == "__main__":
    num = 1
    while num < 101:
        url = "https://www.ivsky.com/tupian/chengshilvyou/index_" + str(num) + ".html"
        picture = Picture()
        picture.getPic_lis(url)
        picture.getImg_list()
        picture.download(num)
        num += 1
posted @ 2020-09-24 23:28 紫恋蓝辉阅读(340) 评论(0) 收藏举报
刷新页面返回顶部