Python爬取《你好李焕英》猫眼实时票房
Python爬取《你好李焕英》豆瓣短评并基于SnowNLP做情感分析
Python爬取你好李焕英豆瓣短评并利用stylecloud制作更酷炫的词云图
目标网站:
https://piaofang.maoyan.com/dashboard/movie

数据接口:
F12大法开启:
观察呗,哪个像就点进去preview一下:

有眼感了一下子就抓到了:

点进response,右键open一下:
需要的信息都在了,接下来手撕代码:

完整代码:
1 # -*- coding: utf-8 -*- 2 # !/usr/bin/env python 3 # 猫眼票房:https://piaofang.maoyan.com/dashboard 4 5 import datetime 6 import os 7 import time 8 import requests 9 10 class PF(object): 11 def __init__(self): 12 self.url = 'https://piaofang.maoyan.com/dashboard-ajax?orderType=0&uuid=173d6dd20a2c8-0559692f1032d2-393e5b09-1fa400-173d6dd20a2c8&riskLevel=71&optimusCode=10' 13 self.headers = { 14 "Referer": "https://piaofang.maoyan.com/dashboard", 15 "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36", 16 } 17 18 def main(self): 19 ''' 20 主程序,打印最终结果 21 :return: 22 ''' 23 while True: 24 # 需在dos命令下运行此文件,才能清屏 25 os.system('cls') 26 result_json = self.get_parse() 27 if not result_json: 28 break 29 results = self.parse(result_json) 30 # 获取时间 31 calendar = result_json['calendar']['serverTimestamp'] 32 t = calendar.split('.')[0].split('T') 33 t = t[0] + " " + (datetime.datetime.strptime(t[1], "%H:%M:%S") + datetime.timedelta(hours=8)).strftime( 34 "%H:%M:%S") 35 print("北京时间:", t) 36 x_line = '-' * 155 37 # 总票房 38 total_box = result_json['movieList']['data']['nationBoxInfo']['nationBoxSplitUnit']['num'] 39 # 总票房单位 40 total_box_unit = result_json['movieList']['data']['nationBoxInfo']['nationBoxSplitUnit']['unit'] 41 print(f"今日总票房: {total_box} {total_box_unit}", end=f'\n{x_line}\n') 42 # print("{:^10}\t{:^23}".format("企业ID", "企业名称")) 43 print('电影名称'.ljust(14), '综合票房'.ljust(11), '票房占比'.ljust(13), '场均上座率'.ljust(11), '场均人次'.ljust(11), 44 '排片场次'.ljust(12), 45 '排片占比'.ljust(12), '累积总票房'.ljust(11), '上映天数', sep='\t', end=f'\n{x_line}\n') 46 for result in results: 47 print( 48 result['movieName'][:10].ljust(9), # 电影名称 49 result['boxSplitUnit'][:8].rjust(10), # 综合票房 50 result['boxRate'][:8].rjust(13), # 票房占比 51 result['avgSeatView'][:8].rjust(13), # 场均上座率 52 result['avgShowView'][:8].rjust(13), # 场均人次 53 result['showCount'][:8].rjust(13), # '排片场次' 54 result['showCountRate'][:8].rjust(13), # 排片占比 55 result['sumBoxDesc'][:8].rjust(13), # 累积总票房 56 result['releaseInfo'][:8].rjust(13), # 上映信息 57 sep='\t', end='\n\n' 58 ) 59 break # 把break注释掉,打印的是所有电影实时票房,否则只打印榜首 60 61 time.sleep(4) 62 63 def get_parse(self): 64 ''' 65 网页是否成功获取,频繁操作会有验证 66 :return: 67 ''' 68 try: 69 response = requests.get(self.url, headers=self.headers) 70 if response.status_code == 200: 71 # print("success!") 72 return response.json() 73 except requests.ConnectionError as e: 74 print("ERROR:", e) 75 return None 76 77 def parse(self, result_json): 78 ''' 79 获取数据 80 :return: 81 ''' 82 if result_json: 83 movies = result_json['movieList']['data']['list'] 84 # movies = [{},{},{}] 85 # 场均上座率, 场均人次, 票房占比, 电影名称, 86 # 上映信息(上映天数), 排片场次, 排片占比, 综合票房,累积总票房 87 ticks = ['avgSeatView', 'avgShowView', 'boxRate', 'movieName', 88 'releaseInfo', 'showCount', 'showCountRate', 'boxSplitUnit', 'sumBoxDesc'] 89 for movie in movies: 90 self.piaofang = {} 91 for tick in ticks: 92 # 数字和单位分开需要join 93 if tick == 'boxSplitUnit': 94 movie[tick] = ''.join([str(i) for i in movie[tick].values()]) 95 # 多层字典嵌套 96 if tick == 'movieName' or tick == 'releaseInfo': 97 movie[tick] = movie['movieInfo'][tick] 98 if movie[tick] == '': 99 movie[tick] = '此项数据为空' 100 self.piaofang[tick] = str(movie[tick]) 101 yield self.piaofang 102 103 104 if __name__ == '__main__': 105 pf = PF() 106 pf.main()
抓取结果:



浙公网安备 33010602011771号