学习伟大的Python的第九天
昨天的 补充
主页:
图标地址、下载次数、大小、详情页地址
详情页:
游戏名、好评率、评论数、小编点评、下载地址、简介、网友评论、1-5张截图链接地址、
https://www.wandoujia.com/wdjweb/api/category/more?catId=6001&subCatId=0&page=1&ctoken=FRsWKgWBqMBZLdxLaK4iem9B
https://www.wandoujia.com/wdjweb/api/category/more?catId=6001&subCatId=0&page=2&ctoken=FRsWKgWBqMBZLdxLaK4iem9B
https://www.wandoujia.com/wdjweb/api/category/more?catId=6001&subCatId=0&page=3&ctoken=FRsWKgWBqMBZLdxLaK4iem9B
32
'''
import requests
from bs4 import BeautifulSoup
# 1、发送请求
def get_page(url):
response = requests.get(url)
return response
# 2、开始解析
# 解析详情页
def parse_detail(text):
soup = BeautifulSoup(text, 'lxml')
# print(soup)
# app名称
name = soup.find(name="span", attrs={"class": "title"}).text
# print(name)
# 好评率
love = soup.find(name='span', attrs={"class": "love"}).text
# print(love)
# 评论数
commit_num = soup.find(name='a', attrs={"class": "comment-open"}).text
# print(commit_num)
# 小编点评
commit_content = soup.find(name='div', attrs={"class": "con"}).text
# print(commit_content)
# app下载链接
download_url = soup.find(name='a', attrs={"class": "normal-dl-btn"}).attrs['href']
# print(download_url)
print(
f'''
============= tank ==============
app名称:{name}
好评率: {love}
评论数: {commit_num}
小编点评: {commit_content}
app下载链接: {download_url}
============= end ==============
'''
)
# 解析主页
def parse_index(data):
soup = BeautifulSoup(data, 'lxml')
# 获取所有app的li标签
app_list = soup.find_all(name='li', attrs={"class": "card"})
for app in app_list:
# print(app)
# print('tank' * 1000)
# print('tank *' * 1000)
# print(app)
# 图标地址
# 获取第一个img标签中的data-original属性
img = app.find(name='img').attrs['data-original']
print(img)
# 下载次数
# 获取class为install-count的span标签中的文本
down_num = app.find(name='span', attrs={"class": "install-count"}).text
print(down_num)
import re
# 大小
# 根据文本正则获取到文本中包含 数字 + MB(\d+代表数字)的span标签中的文本
size = soup.find(name='span', text=re.compile("\d+MB")).text
print(size)
# 详情页地址
# 获取class为detail-check-btn的a标签中的href属性
# detail_url = soup.find(name='a', attrs={"class": "name"}).attrs['href']
# print(detail_url)
# 详情页地址
detail_url = app.find(name='a').attrs['href']
print(detail_url)
# 3、往app详情页发送请求
response = get_page(detail_url)
# 4、解析app详情页
parse_detail(response.text)
def main():
for line in range(1, 33):
url = f"https://www.wandoujia.com/wdjweb/api/category/more?catId=6001&subCatId=0&page={line}&ctoken=FRsWKgWBqMBZLdxLaK4iem9B"
# 1、往app接口发送请求
response = get_page(url)
# print(response.text)
print('*' * 1000)
# 反序列化为字典
data = response.json()
# 获取接口中app标签数据
app_li = data['data']['content']
# print(app_li)
# 2、解析app标签数据
parse_index(app_li)
if __name__ == '__main__':