1 以抓取豆瓣分页为例
2 from re import S
3
4 import requests
5 from setuptools import findall # S多行匹配 M单行匹配
6 from urllib3 import response
7
8
9 def get_all_movies():
10 page = 10
11 # 请求头
12 header = {
13 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'
14 }
15 for n in range(page):
16 url = f'https://movie.douban.com/top250?start={n * 25}&filter='
17 response = requests.get(url, headers=header)
18 print(response.text)
19 print(f'==============第{n + 1}页=============')
20 if response.status_code == 200:
21 analysis_data(response.text)
22 else:
23 print(f'爬取失败')