关于阴阳师壁纸爬取

目标网站：https://yys.163.com/media/picture.html

横板的

# 阴阳师原画壁纸

import re
import os
import time
import requests
from bs4 import BeautifulSoup

url = 'https://yys.163.com/media/picture.html'
response = requests.get(url).content.decode('utf-8')
soup = BeautifulSoup(response, 'html.parser')
wallpaper = soup.find_all('div', {'class': 'tab-cont'})  # 横板加竖版

pc = wallpaper[0].find_all('div', {'class': 'mask'})  # 横版图片
mo = wallpaper[1].find_all('div', {'class': 'mask'})  # 竖版图片

pc_list = []  # 新建一个空列表存放横版1920*1080的图片
for i in range(len(pc)):
    a = pc[i].find_all('a')
    if len(a) == 6:
        url = re.findall('href="(.*?)" target', str(a[3]))[0]  # 提取1920*1080的图片地址
        pc_list.append(url)
    elif len(a) == 5:
        url = re.findall('href="(.*?)" target', str(a[2]))[0]
        pc_list.append(url)
    elif len(a) == 4:
        url = re.findall('href="(.*?)" target', str(a[1]))[0]
        pc_list.append(url)
    elif len(a) == 3:
        url = re.findall('href="(.*?)" target', str(a[0]))[0]
        pc_list.append(url)

if not os.path.exists('./横版19201080'):
    os.mkdir('./横版19201080')
os.chdir('./横版19201080')

for i in range(len(pc_list)):
    time.sleep(0.3)  # 爬取延时，防止被察觉，远程关闭连接
    img = requests.get(pc_list[i])
    if img.status_code == 200:
        open(f'{i}.jpg', 'wb').write(img.content)
        print(f'{i} 下载成功')
    else:
        print(f'{i} 下载失败  原因：{img.status_code}')

那么，在此基础上，稍微修改下代码就可以下载竖版的啦

竖版的

# 阴阳师原画壁纸

import re
import os
import time
import requests
from bs4 import BeautifulSoup

url = 'https://yys.163.com/media/picture.html'
response = requests.get(url).content.decode('utf-8')
soup = BeautifulSoup(response, 'html.parser')
wallpaper = soup.find_all('div', {'class': 'tab-cont'})  # 横板加竖版

pc = wallpaper[0].find_all('div', {'class': 'mask'})  # 横版图片
mo = wallpaper[1].find_all('div', {'class': 'mask'})  # 竖版图片

pc_list = []  # 新建一个空列表存放竖版1920*1080的图片
for i in range(len(mo)):
    a = mo[i].find_all('a')
    # if len(a) == 6:
    #     url = re.findall('href="(.*?)" target', str(a[3]))[0]  # 提取1920*1080的图片地址
    #     pc_list.append(url)
    if len(a) == 5:
        url = re.findall('href="(.*?)" target', str(a[4]))[0]
        pc_list.append(url)
    elif len(a) == 4:
        url = re.findall('href="(.*?)" target', str(a[3]))[0]
        pc_list.append(url)
    elif len(a) == 3:
        url = re.findall('href="(.*?)" target', str(a[2]))[0]
        pc_list.append(url)

if not os.path.exists('./竖版1080x1920'):
    os.mkdir('./竖版1080x1920')
os.chdir('./竖版1080x1920')

for i in range(len(pc_list)):
    time.sleep(0.3)  # 爬取延时，防止被察觉，远程关闭连接
    img = requests.get(pc_list[i])
    if img.status_code == 200:
        open(f'{i}.jpg', 'wb').write(img.content)
        print(f'{i} 下载成功')
    else:
        print(f'{i} 下载失败  原因：{img.status_code}')

源代码来自网络，侵权则删；代码仅供学习，出任何事情，概不负责。

posted @ 2021-02-16 05:45 X同学阅读(57) 评论(0) 收藏举报

刷新页面返回顶部

X同学

关于阴阳师壁纸爬取

公告