https://m.ivsky.com/
# 1、拿到主页面的源代码,然后提取到子页面的链接地址,href
# 2、通过href拿到子页面的内容,从子页面中找到图片的下载地址img -> src
# 3、下载图片
import requests
from bs4 import BeautifulSoup
import time
url = "https://www.ivsky.com/"
headers = {
"user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Mobile Safari/537.36 Edg/96.0.1054.43"
}
resp = requests.get(url, headers=headers)
resp.encoding = "utf-8"
# 把页面源代码交给BeautifulSoup管理,产生bs4对象
main_page = BeautifulSoup(resp.text, "html.parser")
ileft = main_page.find("div", attrs={"id": "sy_load"})
uls = ileft.find_all("ul")
for ul in uls:
lis = ul.find_all("li")
for li in lis:
div = li.find("div")
a = div.find("a")
child_url = url + a.get("href") # 通过get可以获得标签的属性值
resp1 = requests.get(child_url, headers=headers)
resp1.encoding = "utf-8"
# 将子页面的源代码交给bs4管理
child_page = BeautifulSoup(resp1.text, "html.parser")
# print(child_page)
ul = child_page.find("ul", attrs={"class": "ul_third"})
# print(len(ul))
lis = ul.find_all("li")
for li in lis:
img = li.find("img")
src = img.get("src") # 获取到每一个图片的路径
# print(src)
img_name = src.split("/")[-1] # 我们要保存的图片的名字
# 下载图片
img_resp = requests.get("https:" + src)
with open("img/"+img_name, mode="wb") as f:
f.write(img_resp.content) # 这里img_resp.content拿到的是字节
time.sleep(1)
print(img_name)