爬虫入门之拉勾网职位信息

爬虫拉钩职位

import requests
import pandas as pd

headers={
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
    'Referer': 'https://www.lagou.com/jobs/list_Java/p-city_0?px=default',
    'Accept': 'application/json, text/javascript, */*; q=0.01',
}

data={'first': 'false',
      'pn': '1',
      'kd': 'Java',
      'sid': '5ab0519c713e46e593cba52cdea8ae2e'
      }

# 导入session回话
s=requests.session()
# 请求页面地址获取cookies
url_cookie='https://www.lagou.com/jobs/list_Java/p-city_0?px=default'
s.get(url=url_cookie,headers=headers)
cookie=s.cookies
# 携带cookies值继续请求
url='https://www.lagou.com/jobs/positionAjax.json?px=default&needAddtionalResult=false'

response=requests.post(url=url,headers=headers,data=data,cookies=cookie)
job=response.json()['content']['positionResult']['result']

df=pd.DataFrame.from_dict(job)
df.to_excel('job.xlsx')
posted @ 2020-03-12 22:00  Mario_ok  阅读(265)  评论(0)    收藏  举报