微小宝最近一周排名爬取
首先,安装Python包,如requests、re、json、xlwt、time
接下来请看源码:
1 import requests, re 2 import json 3 import xlwt, time 4 5 book = xlwt.Workbook(encoding='utf-8') 6 sheet = book.add_sheet("wxbdata") 7 sheet.write(0, 0, label="文章标题") 8 sheet.write(0, 1, label="公众号名称") 9 sheet.write(0, 2, label="阅读人数") 10 sheet.write(0, 3, label="在看人数") 11 sheet.write(0, 4, label="微小宝指数") 12 sheet.write(0, 5, label="是否原创") 13 sheet.write(0, 6, label="文章发布时间") 14 sheet.write(0, 7, label="文章链接地址") 15 16 header = {"Accept": "application/json, text/plain, */*", 17 "Accept-Encoding": "gzip, deflate, br", 18 "Accept-Language": "zh-CN,zh;q=0.9", 19 "Connection": "keep-alive", 20 "Cookie": 'Cookie: aliyungf_tc=AQAAAIGT9DRJPAgAAfZe0x0+wCiV9I+V; PHPSESSID=4255c7f8e6c7eb86a9cde76e0383de1e; Hm_lvt_5859c7e2fd49a1739a0b0f5a28532d91=1596991292,1596991942,1596991959; Hm_lpvt_5859c7e2fd49a1739a0b0f5a28532d91=1596991959; Qs_lvt_288791=1596991943%2C1596991958; Qs_pv_288791=2043451915572373200%2C1155957040924487200; mediav=%7B%22eid%22%3A%22288503%22%2C%22ep%22%3A%22%22%2C%22vid%22%3A%227lg2w%2B2aNg%3AZz1-9sGAm%22%2C%22ctn%22%3A%22%22%2C%22vvid%22%3A%227lg2w%2B2aNg%3AZz1-9sGAm%22%7D', 21 "Host": "www.wxb.com", 22 "Referer": "https://www.wxb.com/wxpush/article/rank?page=1&pageSize=30&type=3&baidu_tag=&baidu_cat=%E6%80%BB%E6%A6%9C&q=&order=index_scores-desc", 23 "Sec-Fetch-Dest": "empty", 24 "Sec-Fetch-Mode": "cors", 25 "Sec-Fetch-Site": "same-origin", 26 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36", 27 "X-Requested-With": "XMLHttpRequest"} 28 x = 1 29 for page in range(1, 8): 30 orginal_url = "https://www.wxb.com/article/rank" + "?page=" + str( 31 page) + "&pageSize=30&type=4&baidu_tag=&baidu_cat=%E7%A7%91%E6%8A%80&q=&order=index_scores-desc" 32 html = requests.get(orginal_url, headers=header) 33 data = json.loads(html.text) 34 time.sleep(2) # 休眠防止过快爬取被封 35 for i in range(30): 36 title = data["data"][i]["title"] 37 account = data["data"][i]["account"] 38 wxbindex = data["data"][i]["index_scores"] # 微小宝传播指数 39 orginal = ["原创" if data["data"][i]["is_original"] == 1 else "非原创"] # 0是非原创,1是原创 40 readnum = data["data"][i]["read_num"] 41 like_num = data["data"][i]["like_num"] # 在看人数 42 dataurl = data["data"][i]["url"] 43 showtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(data["data"][i]["push_time"]))) # 时间戳转换为时间 44 45 sheet.write(x, 0, label=title) 46 sheet.write(x, 1, label=account) 47 sheet.write(x, 2, label=readnum) 48 sheet.write(x, 3, label=like_num) 49 sheet.write(x, 4, label=wxbindex) 50 sheet.write(x, 5, label=orginal) 51 sheet.write(x, 6, label=showtime) 52 sheet.write(x, 7, label=dataurl) 53 54 time.sleep(0.5) 55 x += 1 56 book.save("微小宝最近一周微小宝排名.xls")

浙公网安备 33010602011771号