20183423 刘子毅 2019-2020 《python程序设计》 实验四 实验报告
# 学号20183423 2019-2020-2 《Python程序设计》实验四报告
课程:《Python程序设计》
班级: 201834
姓名: 刘子毅
学号:20183423
实验教师:王志强
实验日期:2020年6月9日
必修/选修: 公选课
## 1.实验内容
按热度排序某直播平台的主播顺序
## 2. 实验过程及结果
代码如下:
from urllib import request
import re
class Spider():
# 需要抓取的网络链接
url = "https://www.panda.tv/cate/kingglory"
reString_div = '<div class="video-info">([\s\S]*?)</div>'
# 获取主播名
reString_name = '</i>([\s\S]*?)</span>'
# 取视频浏览量
reString_number = '<span class="video-number">([\s\S]*?)</span>'
def __fetch_content(self)
r = request.urlopen(Spider.url)
data = r.read()
htmlString = str(data, encoding="utf-8")
return htmlString
def __alalysis(self, htmlString):
videoInfos = re.findall(Spider.reString_div, htmlString)
anchors = []
# print(videoInfos[0])
for html in videoInfos:
name = re.findall(Spider.reString_name, html)
number = re.findall(Spider.reString_number, html)
anchor = {"name": name, "number": number}
anchors.append(anchor)
# print(anchors[0])
return anchors
def __sort(self, anchors):
# 按浏览量从大到小排序
anchors = sorted(anchors, key=self.__sort_seed, reverse=True)
return anchors
def __sort_seed(self, anchor):
list_nums = re.findall('\d*', anchor["number"])
number = float(list_nums[0])
if '万' in anchor["number"]:
number = number * 10000
return number
def __show(self, anchors):
# 打印数据
for rank in range(0, len(anchors)):
print("第" + str(rank + 1) + "名 " + anchors[rank]["number"] + "\t" + anchors[rank]["name"])
def startRun(self):
# 运行程序入口
htmlString = self.__fetch_content()
anchors = self.__alalysis(htmlString)
anchors = self.__refine(anchors)
anchors = self.__sort(anchors)
self.__show(anchors)
# 爬取数据
spider = Spider()
spider.startRun()
运行结果如下:
码云链接:https://gitee.com/lzzzy/zz2/commit/5af736a39f17e62fcebc080b54e69410720d483b