爬取猫眼电影最受欢迎前100 方法2
import re, requests import sys def maoyan(url, i): if i == 0: i = 40 else: i = i - 10 header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0', 'Host': 'maoyan.com', 'Referer': 'http://maoyan.com/board/6?offset=%s' % i} date = requests.get(url, headers=header) html = date.text paiming = re.findall('<i class="board-index board-index-(.*?)">(.*?)</i>', html, re.S) mingcheng = re.findall( '<p class="name"><a href="/films/(.*?)" title="(.*?)" data-act="boarditem-click" data-val="{movieId:(.*?)}">(.*?)</a></p>', html, re.S) zhuyan = re.findall('<p class="star">(.*?)</p>', html, re.S) shijian = re.findall('<p class="releasetime">(.*?)</p>', html, re.S) for i, l in enumerate(mingcheng): name = l[1] pm = paiming[i][0] sj = shijian[i] zy = zhuyan[i].strip() print("--------------------------------") print("排名:第%s名" % pm) print("电影名称:%s" % name) print(zy) print(sj) class Logger(object): def __init__(self, filename="Default.log"): self.terminal = sys.stdout self.log = open(filename, "a",errors='ignore') def write(self, message): self.terminal.write(message) self.log.write(message) def flush(self): pass sys.stdout = Logger('猫眼最受期待榜.txt') if __name__ == '__main__': num = 0 while num < 100: maoyan("http://maoyan.com/board/6?offset=%s" % num, num) num = num + 10
如果人生还有重来,那就不叫人生。

浙公网安备 33010602011771号