爬取猫眼电影最受欢迎前100 方法2

import re, requests
import sys


def maoyan(url, i):
    if i == 0:
        i = 40
    else:
        i = i - 10
    header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0',
              'Host': 'maoyan.com',
              'Referer': 'http://maoyan.com/board/6?offset=%s' % i}
    date = requests.get(url, headers=header)
    html = date.text
    paiming = re.findall('<i class="board-index board-index-(.*?)">(.*?)</i>', html, re.S)
    mingcheng = re.findall(
        '<p class="name"><a href="/films/(.*?)" title="(.*?)" data-act="boarditem-click" data-val="{movieId:(.*?)}">(.*?)</a></p>',
        html, re.S)
    zhuyan = re.findall('<p class="star">(.*?)</p>', html, re.S)
    shijian = re.findall('<p class="releasetime">(.*?)</p>', html, re.S)
    for i, l in enumerate(mingcheng):
        name = l[1]
        pm = paiming[i][0]
        sj = shijian[i]
        zy = zhuyan[i].strip()

        print("--------------------------------")
        print("排名:第%s名" % pm)
        print("电影名称:%s" % name)
        print(zy)
        print(sj)



class Logger(object):
  def __init__(self, filename="Default.log"):
    self.terminal = sys.stdout
    self.log = open(filename, "a",errors='ignore')
  def write(self, message):
    self.terminal.write(message)
    self.log.write(message)
  def flush(self):
    pass

sys.stdout = Logger('猫眼最受期待榜.txt')



if __name__ == '__main__':
    num = 0
    while num < 100:
        maoyan("http://maoyan.com/board/6?offset=%s" % num, num)
        num = num + 10

 

posted @ 2020-09-22 17:41  凹凸曼大人  阅读(163)  评论(0)    收藏  举报