1.  #拉勾网需要User-Agent请求头才能获取到内容

 

from urllib import request

url="https://www.lagou.com/jobs/list_python%20?labelWords=&fromSearch=true&suginput="


headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
}


req = request.Request(url,headers=headers)

resp = request.urlopen(req)

print(resp.read())

 

 

 

 

2.json解析后的内容

 

 

 3.尝试获取(需要User-Agent请求头以及Referer

from urllib import request,parse

#需要User-Agent请求头以及Referer

# url="https://www.lagou.com/jobs/list_python%20?labelWords=&fromSearch=true&suginput="
urlajax="https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false"


headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.96 Safari/537.36',
 'Referer': 'https://www.lagou.com/jobs/list_python%20?labelWords=&fromSearch=true&suginput='
}

data={
'first':'true',
'pn':1,
'kd':'python'
}


req = request.Request(urlajax,headers=headers,data=parse.urlencode(data).encode('utf-8'),method='POST')

resp = request.urlopen(req)

print(resp.read().decode('utf-8'))