微小宝最近一周排名爬取

首先,安装Python包,如requests、re、json、xlwt、time

接下来请看源码:

 1 import requests, re
 2 import json
 3 import xlwt, time
 4 
 5 book = xlwt.Workbook(encoding='utf-8')
 6 sheet = book.add_sheet("wxbdata")
 7 sheet.write(0, 0, label="文章标题")
 8 sheet.write(0, 1, label="公众号名称")
 9 sheet.write(0, 2, label="阅读人数")
10 sheet.write(0, 3, label="在看人数")
11 sheet.write(0, 4, label="微小宝指数")
12 sheet.write(0, 5, label="是否原创")
13 sheet.write(0, 6, label="文章发布时间")
14 sheet.write(0, 7, label="文章链接地址")
15 
16 header = {"Accept": "application/json, text/plain, */*",
17           "Accept-Encoding": "gzip, deflate, br",
18           "Accept-Language": "zh-CN,zh;q=0.9",
19           "Connection": "keep-alive",
20           "Cookie": 'Cookie: aliyungf_tc=AQAAAIGT9DRJPAgAAfZe0x0+wCiV9I+V; PHPSESSID=4255c7f8e6c7eb86a9cde76e0383de1e; Hm_lvt_5859c7e2fd49a1739a0b0f5a28532d91=1596991292,1596991942,1596991959; Hm_lpvt_5859c7e2fd49a1739a0b0f5a28532d91=1596991959; Qs_lvt_288791=1596991943%2C1596991958; Qs_pv_288791=2043451915572373200%2C1155957040924487200; mediav=%7B%22eid%22%3A%22288503%22%2C%22ep%22%3A%22%22%2C%22vid%22%3A%227lg2w%2B2aNg%3AZz1-9sGAm%22%2C%22ctn%22%3A%22%22%2C%22vvid%22%3A%227lg2w%2B2aNg%3AZz1-9sGAm%22%7D',
21           "Host": "www.wxb.com",
22           "Referer": "https://www.wxb.com/wxpush/article/rank?page=1&pageSize=30&type=3&baidu_tag=&baidu_cat=%E6%80%BB%E6%A6%9C&q=&order=index_scores-desc",
23           "Sec-Fetch-Dest": "empty",
24           "Sec-Fetch-Mode": "cors",
25           "Sec-Fetch-Site": "same-origin",
26           "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36",
27           "X-Requested-With": "XMLHttpRequest"}
28 x = 1
29 for page in range(1, 8):
30     orginal_url = "https://www.wxb.com/article/rank" + "?page=" + str(
31         page) + "&pageSize=30&type=4&baidu_tag=&baidu_cat=%E7%A7%91%E6%8A%80&q=&order=index_scores-desc"
32     html = requests.get(orginal_url, headers=header)
33     data = json.loads(html.text)
34     time.sleep(2)  # 休眠防止过快爬取被封
35     for i in range(30):
36         title = data["data"][i]["title"]
37         account = data["data"][i]["account"]
38         wxbindex = data["data"][i]["index_scores"]  # 微小宝传播指数
39         orginal = ["原创" if data["data"][i]["is_original"] == 1 else "非原创"]  # 0是非原创,1是原创
40         readnum = data["data"][i]["read_num"]
41         like_num = data["data"][i]["like_num"]  # 在看人数
42         dataurl = data["data"][i]["url"]
43         showtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(data["data"][i]["push_time"])))  # 时间戳转换为时间
44 
45         sheet.write(x, 0, label=title)
46         sheet.write(x, 1, label=account)
47         sheet.write(x, 2, label=readnum)
48         sheet.write(x, 3, label=like_num)
49         sheet.write(x, 4, label=wxbindex)
50         sheet.write(x, 5, label=orginal)
51         sheet.write(x, 6, label=showtime)
52         sheet.write(x, 7, label=dataurl)
53 
54         time.sleep(0.5)
55         x += 1
56 book.save("微小宝最近一周微小宝排名.xls")

 

posted @ 2020-08-10 09:45  璐璐呦  阅读(192)  评论(0)    收藏  举报