• 博客园logo
  • 会员
  • 众包
  • 新闻
  • 博问
  • 闪存
  • 赞助商
  • HarmonyOS
  • Chat2DB
    • 搜索
      所有博客
    • 搜索
      当前博客
  • 写随笔 我的博客 短消息 简洁模式
    用户头像
    我的博客 我的园子 账号设置 会员中心 简洁模式 ... 退出登录
    注册 登录
紫恋蓝辉
博客园    首页    新随笔    联系   管理    订阅  订阅

python获取网页端的图片

import requests
import re
from bs4 import BeautifulSoup


rp = r"<a href=(.*?) target"
rgm = r"<img src=(.*?) alt"
path = r"./Picture/chengshilvyou"
url_base = "https://www.ivsky.com"
parmas = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Connection":"keep-alive"
}

class Picture():
def getHtml(self):
html = requests.get(url,params=parmas).text
soup = BeautifulSoup(html,features="html.parser")
return soup

def getPic_lis(self,url):
'''获取首页图片的列表'''
html_soup = picture.getHtml()
pic_list = []
T_list= html_soup.find_all("div",class_=re.compile("il_img"))
for i in T_list:
pic = re.findall(rp,str(i))
pic_list.append(url_base + str(pic[0]).replace('"',""))
print("msgone"+str(pic_list))
return pic_list

def getImg_list(self):
'''从首页获取的图片列表中依次获取图片集列表'''
img_list = []
index_list = picture.getPic_lis(url)
for x in index_list:
html_img = requests.get(x,params=parmas).text
print("metend"+ str(html_img))
# soup_img = BeautifulSoup(html_img)
# I_list = soup_img.find_all("div",class_=re.compile("il_img"))
I_list = re.findall(rgm,html_img)
print("msg"+str(I_list))
for n in I_list:
# img_url = re.findall(rg,str(n))
img_list.append("https:" + str(n).strip('"'))
print("msg2"+str(img_list))
return img_list

def download(self,num):
imgs_url = picture.getImg_list()
m = 0
for img_url in imgs_url:
print('***** ' + str(m) + '.jpg *****' + ' Downloading...')
dir = path + str(num) + str(m) + '.jpg'
response = requests.get(img_url)
with open(dir,"wb") as file:
file.write(response.content)
print('***** ' + str(m) + '.jpg *****' + 'Done.')
m = m + 1





if __name__ == "__main__":
num = 1
while num < 101:
url = "https://www.ivsky.com/tupian/chengshilvyou/index_" + str(num) + ".html"
picture = Picture()
picture.getPic_lis(url)
picture.getImg_list()
picture.download(num)
num += 1






posted @ 2020-09-24 23:28  紫恋蓝辉  阅读(323)  评论(0)    收藏  举报
刷新页面返回顶部
博客园  ©  2004-2025
浙公网安备 33010602011771号 浙ICP备2021040463号-3