至美化爬虫
就先上代码吧。
# 手机壁纸
# 单页版本
import os
import requests
from bs4 import BeautifulSoup
all_in_url = 'https://zhutix.com/mobile/page/1/'
res = BeautifulSoup((requests.get(all_in_url)).text, 'lxml')
all_li = res.findAll('a', class_="imglist-char shu")
all_a = res.findAll('a', class_="imglist-char shu", target="_blank")
print('这一页共有' + str(len(all_li))+'个合集')
name_link = {}
for i in range(len(all_a)):
aa = all_a[i].getText()
a = all_li[i]['href']
cont = name_link[aa] = a
the_num = 0
# 这个页面的所有合集
for value in name_link.values():
all_imgs = []
res = BeautifulSoup((requests.get(value)).text, 'lxml')
img = res.findAll('div', class_="thumbCont")
all_img = img[0]('img')
for i in range(len(all_img)):
all_imgs.append(all_img[i]['url'])
# 创建文件夹合集对应的 与 适当的提示
k_key = list(name_link.keys())[list(name_link.values()).index(str(value))]
the_num += 1
print('第‖ {} ‖合集『{}』正在解析'.format(the_num, k_key))
dd = "C:/Users/Administrator/PycharmProjects/2021合集/爬虫/至美化爬虫/手机壁纸/"
dir = dd + 'imgs/' + str(k_key)
try:
if os.path.exists(dir):
print('Yes')
else:
os.mkdir(dir)
except:
os.mkdir(dir)
# 下载图片
num = 0
for lin in all_imgs:
num = num + 1
name = str(num) + '.jpg'
name = dir + '/' + name
r = requests.get(lin)
try:
with open(name, "wb") as f:
f.write(r.content)
print('{}合集的第{}张下载成功'.format(k_key, num))
except:
print("============文件名有误============")
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 多页版本
import os
import requests
from bs4 import BeautifulSoup
url = 'https://zhutix.com/mobile/'
res = requests.get(url)
bres = BeautifulSoup(res.text, 'lxml')
num = bres.findAll('div', class_="pagination shu")
t_num = len(num[0]('li'))-3
print('共有' + str(t_num) + '页')
for i in range(t_num):
all_in_url = 'https://zhutix.com/mobile/page/{}/'.format(str(i+1))
res = BeautifulSoup((requests.get(all_in_url)).text, 'lxml')
all_li = res.findAll('a', class_="imglist-char shu")
all_a = res.findAll('a', class_="imglist-char shu", target="_blank")
print('这一页共有' + str(len(all_li))+'个合集')
name_link = {}
for i in range(len(all_a)):
aa = all_a[i].getText()
a = all_li[i]['href']
cont = name_link[aa] = a
the_num = 0
# 这个页面的所有合集
for value in name_link.values():
all_imgs = []
res = BeautifulSoup((requests.get(value)).text, 'lxml')
img = res.findAll('div', class_="thumbCont")
all_img = img[0]('img')
for i in range(len(all_img)):
all_imgs.append(all_img[i]['url'])
# 创建文件夹合集对应的 与 适当的提示
k_key = list(name_link.keys())[list(name_link.values()).index(str(value))]
the_num += 1
print('第‖ {} ‖合集『{}』正在解析'.format(the_num, k_key))
dd = "C:/Users/Administrator/PycharmProjects/2021合集/爬虫/至美化爬虫/手机壁纸/"
dir = dd + 'imgs/' + str(k_key)
try:
if os.path.exists(dir):
print('Yes')
else:
os.mkdir(dir)
except:
os.mkdir(dir)
# 下载图片
num = 0
for lin in all_imgs:
num = num + 1
name = str(num) + '.jpg'
name = dir + '/' + name
r = requests.get(lin)
try:
with open(name, "wb") as f:
f.write(r.content)
print('{}合集的第{}张下载成功'.format(k_key, num))
except:
print("============文件名有误============")
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 电脑壁纸
# 直接在手机壁纸多页版本上进行设计
import os
import requests
from bs4 import BeautifulSoup
url = 'https://zhutix.com/wallpaper/'
res = requests.get(url)
bres = BeautifulSoup(res.text, 'lxml')
num = bres.findAll('div', class_="pagination shu")
t_num = len(num[0]('li'))-3
print('共有' + str(t_num) + '页')
for i in range(t_num):
all_in_url = 'https://zhutix.com/wallpaper/page/{}/'.format(str(i+1))
res = BeautifulSoup((requests.get(all_in_url)).text, 'lxml')
all_li = res.findAll('a', class_="imglist-char shu")
all_a = res.findAll('a', class_="imglist-char shu", target="_blank")
print('这一页共有' + str(len(all_li))+'个合集')
name_link = {}
for i in range(len(all_a)):
aa = all_a[i].getText()
a = all_li[i]['href']
cont = name_link[aa] = a
the_num = 0
# 这个页面的所有合集
for value in name_link.values():
all_imgs = []
res = BeautifulSoup((requests.get(value)).text, 'lxml')
img = res.findAll('div', class_="thumbCont")
all_img = img[0]('img')
for i in range(len(all_img)):
all_imgs.append(all_img[i]['url'])
# 创建文件夹合集对应的 与 适当的提示
k_key = list(name_link.keys())[list(name_link.values()).index(str(value))]
the_num += 1
print('第‖ {} ‖合集『{}』正在解析'.format(the_num, k_key))
dd = "C:/Users/Administrator/PycharmProjects/2021合集/爬虫/至美化爬虫/电脑壁纸/"
dir = dd + 'imgs/' + str(k_key)
try:
if os.path.exists(dir):
print('Yes')
else:
os.mkdir(dir)
except:
os.mkdir(dir)
# 下载图片
num = 0
for lin in all_imgs:
num = num + 1
name = str(num) + '.jpg'
name = dir + '/' + name
r = requests.get(lin)
try:
with open(name, "wb") as f:
f.write(r.content)
print('{}合集的第{}张下载成功'.format(k_key, num))
except:
print("============文件名有误============")
```
`
目前就这些了。

浙公网安备 33010602011771号