import requests
import re
from bs4 import BeautifulSoup
import time
import os
# 1.目标网站
url = 'http://xxxx.xxx/' # 目标网站
# 2.请求数据 text:获取文本数据
res = requests.get(url)
res.encoding = 'utf-8'
html = res.text
# 3.解析数据
ss = '<td><a href="/film/.*?" title="(.*?)">.*?</a></td>'
name = re.findall(ss, html)
# print(name)
soup = BeautifulSoup(html, 'lxml') # 解析文本数据
img = soup.find_all('img')[1:]
index = 0
for i in img:
img_url = i.get('src') # 提取图片的URL
time.sleep(1.5) # 耗时任务
imgs = requests.get(img_url).content # 下载图片(content:二进制数据)
# 判断tv目录是否存在
if os.path.exists('tv') ==False:
os.mkdir('tv')
# 存储数据,自动创建文件
with open(f'tv/{index}.{name[index]}.png', 'wb') as file:
file.write(imgs) # 写入数据
index += 1