import time
import os
import requests as re
from tqdm import tqdm
from bs4 import BeautifulSoup
download_src = "https://m.tuiimg.com/" #网站url
now_file =os.getcwd() #当前文件的路径
def create_file(path):
if not os.path.exists(path):
os.makdir(path)
else:
print('文件已存在')
path = os.path.abspath(path)
return path
def download(path,download_src):
file_path = re.get(download_src,timeout=5)
file_path.encoding = file_path.apparent_encoding
soup = BeautifulSoup(file_path.text,'html.parser') #格式化页面
li = soup.find('ul',{'id':'main','class':'main'}).find_all('li') #匹配出所有的li标签
img = [] #赋值一个空列表后期传递图片地址使用
src = []
for list in range(0,len(li)) : #遍历所有li标签的值
list_img= li[list].find('img') #筛选出li标签里面的img标签
img.append(list_img) #将img标签添加到Img列表里面
# print(img)
if img[list].attrs['src'] != None: #如果遍历的内容不为none的话则赋值给,
download_url_1 = img[list].attrs['realsrc'] #匹配出所有的img的src
src.append(download_url_1) #添加到src列表中
# print(src)
#获取页面的图片数量信息
pdar = tqdm(src,ncols=100,desc='文件下载进度',colour='#96b97d')
for x in pdar:
img_url = 'https://m.tuiimg.com/meinv/'+ x[-11:-6] #拼接到图片的访问地址然后去请求url,从而下载图片
list_img_url = re.get(img_url)
list_img_url.encoding = list_img_url.apparent_encoding
img_soup = BeautifulSoup(list_img_url.text,'html.parser')
i = img_soup.find('span',{'class':'all'}).find('i',{'id':'allbtn'}) #提取出对应文章的url里面的limit标签信息,从而知道页面有多少图片
img_num = str(i)[-7:-5] #取到对应页面的图片的里面具体图片limit数量
Folder_path = path +'/'+ x[-11:-6] #创建文件夹名
if not os.path.exists(Folder_path):
os.mkdir(Folder_path)
for i in range(1,int(img_num)) :
download_img = x[:-6] + str(i) + '.jpg' #拼接url路径并下载文件
Img_name = f'{str(i)}.jpg' #文件的具体名称
try :
down_img = re.get(download_img,timeout=5)
except :
continue
img_name_path = os.path.join(Folder_path,Img_name)
# img_name = Img_path +str(i)+'.jpg' #对应的图片的路径
with open (img_name_path,'wb') as fp :
fp.write(down_img.content)
time.sleep(0.1)
# # #具体的图片下载
'''for i in range(1,int(img_num)) :
download_img = x[:-6] + str(i) + '.jpg'
# print(download_img)
try :
down_img = re.get(download_img,timeout=5)
except :
continue
img_name = img_src_path +str(i)+'.jpg' #对应的图片的路径
with open (img_name,'wb') as fp :
fp.write(down_img.content) '''
if __name__ == "__main__":
file_path = create_file(now_file)
print(f'当前文件路径{file_path}')
download(file_path,download_src)