爬虫拉钩职位
import requests
import pandas as pd
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
'Referer': 'https://www.lagou.com/jobs/list_Java/p-city_0?px=default',
'Accept': 'application/json, text/javascript, */*; q=0.01',
}
data={'first': 'false',
'pn': '1',
'kd': 'Java',
'sid': '5ab0519c713e46e593cba52cdea8ae2e'
}
# 导入session回话
s=requests.session()
# 请求页面地址获取cookies
url_cookie='https://www.lagou.com/jobs/list_Java/p-city_0?px=default'
s.get(url=url_cookie,headers=headers)
cookie=s.cookies
# 携带cookies值继续请求
url='https://www.lagou.com/jobs/positionAjax.json?px=default&needAddtionalResult=false'
response=requests.post(url=url,headers=headers,data=data,cookies=cookie)
job=response.json()['content']['positionResult']['result']
df=pd.DataFrame.from_dict(job)
df.to_excel('job.xlsx')