1 # URL地址栏不发生变化显示数据——异步加载
2
3 # 服务器先通过客户端访问的初始URL,返回一个cookie,然后再访问真实url
4 start_url = 'https://www.lagou.com/jobs/list_%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90/p-city_0?&cl=false&fromSearch=true&labelWords=&suginput='
5
6 url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false'
7
8 headers = {'User-Agent': "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0",
9 'Connection': 'keep-alive', # 会话保持
10 'Referer': 'https://www.lagou.com/jobs/list_%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90?labelWords=sug&fromSearch=true&suginput=%E6%95%B0%E6%8D%AE'
11 }
12 data = {'first': 'true',
13 'pn': '1',
14 'kd': '数据分析'}
15 # response = requests.post(url, headers = headers, data = data)
16
17 # 创建一个session对象
18 session = requests.session()
19
20 # 用session对象先发送一次请求, 获得cookie保持
21 session.get(start_url, headers = headers)
22
23 # 再用一个带着cookies的session发送一个POST请求,这次是一次真正的访问
24 response = session.post(url, headers = headers, data = data, cookies = session.cookies)
25 print(response.status_code)
26 print(response.text)