Python爬取《你好李焕英》猫眼实时票房

Python爬取《你好李焕英》豆瓣短评并基于SnowNLP做情感分析

Python爬取你好李焕英豆瓣短评生成词云

Python爬取你好李焕英豆瓣短评并利用stylecloud制作更酷炫的词云图

目标网站:

https://piaofang.maoyan.com/dashboard/movie

在这里插入图片描述

数据接口:

F12大法开启:
观察呗,哪个像就点进去preview一下:
在这里插入图片描述
有眼感了一下子就抓到了:
在这里插入图片描述
点进response,右键open一下:
在这里插入图片描述需要的信息都在了,接下来手撕代码:
在这里插入图片描述

完整代码:

  1 # -*- coding: utf-8 -*-
  2 # !/usr/bin/env python
  3 # 猫眼票房:https://piaofang.maoyan.com/dashboard
  4 
  5 import datetime
  6 import os
  7 import time
  8 import requests
  9 
 10 class PF(object):
 11     def __init__(self):
 12         self.url = 'https://piaofang.maoyan.com/dashboard-ajax?orderType=0&uuid=173d6dd20a2c8-0559692f1032d2-393e5b09-1fa400-173d6dd20a2c8&riskLevel=71&optimusCode=10'
 13         self.headers = {
 14             "Referer": "https://piaofang.maoyan.com/dashboard",
 15             "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36",
 16         }
 17 
 18     def main(self):
 19         '''
 20         主程序,打印最终结果
 21         :return:
 22         '''
 23         while True:
 24             # 需在dos命令下运行此文件,才能清屏
 25             os.system('cls')
 26             result_json = self.get_parse()
 27             if not result_json:
 28                 break
 29             results = self.parse(result_json)
 30             # 获取时间
 31             calendar = result_json['calendar']['serverTimestamp']
 32             t = calendar.split('.')[0].split('T')
 33             t = t[0] + " " + (datetime.datetime.strptime(t[1], "%H:%M:%S") + datetime.timedelta(hours=8)).strftime(
 34                 "%H:%M:%S")
 35             print("北京时间:", t)
 36             x_line = '-' * 155
 37             # 总票房
 38             total_box = result_json['movieList']['data']['nationBoxInfo']['nationBoxSplitUnit']['num']
 39             # 总票房单位
 40             total_box_unit = result_json['movieList']['data']['nationBoxInfo']['nationBoxSplitUnit']['unit']
 41             print(f"今日总票房: {total_box} {total_box_unit}", end=f'\n{x_line}\n')
 42             # print("{:^10}\t{:^23}".format("企业ID", "企业名称"))
 43             print('电影名称'.ljust(14), '综合票房'.ljust(11), '票房占比'.ljust(13), '场均上座率'.ljust(11), '场均人次'.ljust(11),
 44                   '排片场次'.ljust(12),
 45                   '排片占比'.ljust(12), '累积总票房'.ljust(11), '上映天数', sep='\t', end=f'\n{x_line}\n')
 46             for result in results:
 47                 print(
 48                     result['movieName'][:10].ljust(9),  # 电影名称
 49                     result['boxSplitUnit'][:8].rjust(10),  # 综合票房
 50                     result['boxRate'][:8].rjust(13),  # 票房占比
 51                     result['avgSeatView'][:8].rjust(13),  # 场均上座率
 52                     result['avgShowView'][:8].rjust(13),  # 场均人次
 53                     result['showCount'][:8].rjust(13),  # '排片场次'
 54                     result['showCountRate'][:8].rjust(13),  # 排片占比
 55                     result['sumBoxDesc'][:8].rjust(13),  # 累积总票房
 56                     result['releaseInfo'][:8].rjust(13),  # 上映信息
 57                     sep='\t', end='\n\n'
 58                 )
 59                 break # 把break注释掉,打印的是所有电影实时票房,否则只打印榜首
 60 
 61             time.sleep(4)
 62 
 63     def get_parse(self):
 64         '''
 65         网页是否成功获取,频繁操作会有验证
 66         :return:
 67         '''
 68         try:
 69             response = requests.get(self.url, headers=self.headers)
 70             if response.status_code == 200:
 71                 # print("success!")
 72                 return response.json()
 73         except requests.ConnectionError as e:
 74             print("ERROR:", e)
 75             return None
 76 
 77     def parse(self, result_json):
 78         '''
 79         获取数据
 80         :return:
 81         '''
 82         if result_json:
 83             movies = result_json['movieList']['data']['list']
 84             # movies = [{},{},{}]
 85             # 场均上座率, 场均人次, 票房占比, 电影名称,
 86             # 上映信息(上映天数), 排片场次, 排片占比, 综合票房,累积总票房
 87             ticks = ['avgSeatView', 'avgShowView', 'boxRate', 'movieName',
 88                      'releaseInfo', 'showCount', 'showCountRate', 'boxSplitUnit', 'sumBoxDesc']
 89             for movie in movies:
 90                 self.piaofang = {}
 91                 for tick in ticks:
 92                     # 数字和单位分开需要join
 93                     if tick == 'boxSplitUnit':
 94                         movie[tick] = ''.join([str(i) for i in movie[tick].values()])
 95                     # 多层字典嵌套
 96                     if tick == 'movieName' or tick == 'releaseInfo':
 97                         movie[tick] = movie['movieInfo'][tick]
 98                     if movie[tick] == '':
 99                         movie[tick] = '此项数据为空'
100                     self.piaofang[tick] = str(movie[tick])
101                 yield self.piaofang
102 
103 
104 if __name__ == '__main__':
105     pf = PF()
106     pf.main()

 

抓取结果:

在这里插入图片描述

在这里插入图片描述

posted @ 2021-02-27 13:05  BugMiaowu2021  阅读(252)  评论(0)    收藏  举报