第四节 request模拟浏览器

 1 import urllib.request
 2 import urllib.parse
 3 
 4 
 5 #拉钩网,反爬虫策略很好,请求头需要添加Referer(引用; 引用页; 推荐人; 参照页),请求对象需要data=,method='POST'
 6 # url = 'https://baike.baidu.com/item/%E8%99%9A%E6%8B%9F%E7%8E%AF%E5%A2%83/6529153?fr=aladdin'
 7 url = 'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput='
 8 headers = {
 9     "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
10     'Referer':'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput='
11 }
12 data = {
13     'first':"true",
14     'pn':1,
15     'kd':'python'
16 }
17 #调用urllib.request中的Request类,创建一个req对象
18 req = urllib.request.Request(url,headers=headers,data=urllib.parse.urlencode(data).encode('utf-8'),method='POST')
19 #将创建的req对象传给urllib.request.urlopen
20 resp = urllib.request.urlopen(req)
21 print(resp.read().decode('utf-8'))

 

posted @ 2020-03-16 21:21  kog_maw  阅读(287)  评论(0编辑  收藏  举报