requests
requests-get请求
1 import requests
2
3 response= requests.get('http://www.baidu.com')#get方法请求网址
4 print(response)
5 print(response.status_code)#状态码
6 print(response.text)#响应体
7 print(response.cookies)#获取cookies
另外还有response.url,response.history历史记录
1 #requests的各种请求方式
2 import requests
3 requests.get('http://httpbin.org/get')
4 requests.post('http://httpbin.org/post')
5 requests.delete('http://httpbin.org/delete')
6 requests.head('http://httpbin.org/head')
7 requests.options('http://httpbin.org/options')
1 #简单的get请求
2 #通过response.text获得响应体
3 import requests
4 response = requests.get('http://httpbin.org/get')
5 print(response.text)
6
7 #带参数的请求
8 #利用params将字典形式数据传入进去,相当于urllib.parse.urlencode
9 data = {
10 'name':'germy',
11 'age':22
12 }
13 response = requests.get('http://httpbin.org/get',params=data)
14 print(response.text)
1 #解析json
2 #response.json()相当于json.loads()方法
3 import requests
4 import json
5 response = requests.get('http://httpbin.org/get')
6 print(response.json())
7 print('*'*100)
8 print(json.loads(response.text))
1 #获取并保存二进制数据,response.content即二进制数据
2 import requests
3 response= requests.get('http://inews.gtimg.com/newsapp_ls/0/1531939223/0')
4 #print(response.content)
5 with open('D://tomas.jpg','wb') as f:
6 f.write(response.content)
1 #添加headers
2 import requests
3 response = requests.get('https://www.zhihu.com/explore')
4 #print(response.text)#结果返回服务器端错误,证实爬虫被知乎禁止了
5 #结果:<html><body><h1>500 Server Error</h1>
6
7 #解决的方法是添加headers,方法非常简单,加进去就可以了
8 headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 \
9 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 LBBROWSER'}
10 response = requests.get('https://www.zhihu.com/explore',headers=headers)
11 print(response.text)
1 import requests 2 requests.get('http://httpbin.org/cookies/set/number/123456') 3 response = requests.get('http://httpbin.org/cookies') 4 print(response.text) 5 #以上结果为空,原来设想通过第一步的设置cookies,然后通过第二步得到cookies 6 #而实际上,两次get请求被看做两个完全独立的操作,互相没有任何牵涉 7 #所以当第二步想要get到cookies时,完全没有任何cookies返回 8 #解决这种问题的方法是,引入requests.Session() 9 10 s = requests.Session() 11 s.get('http://httpbin.org/cookies/set/number/123456') 12 response = s.get('http://httpbin.org/cookies') 13 print(response.text) 14 #requests.Session能够跨请求的保持某些参数,比如cookies,即在同一个session实例 15 #发出的所有请求都保持同一个cookies,而requests模块每次会自动处理cookies,这样就很方便的解决了问题
1 import requests 2 3 proxies={ 4 'http':'http://192.168.1.1:88' 5 'https':'https://192.168.1.1:88' 6 #如果代理ip需要用户名和密码的话 'http':'user:password@192.168.1.1:88' 7 } 8 response = requests.get(url,proxies=proxies) 9 print(response.status_code) 10 11 #如果代理类型不是http或者https,而是socks代理 12 #需要先 pip3 install 'requests[socks] 13 proxies={ 14 'http':'socks5://192.168.1.1:88' 15 'https':'socks5://192.168.1.1:88' 16 #如果代理ip需要用户名和密码的话 'http':'user:password@192.168.1.1:88' 17 }
# post一个json数据 import requests headers={ "Accept":"application/json, text/plain, */*", "Accept-Encoding":"gzip, deflate", "Accept-Language":"zh-CN,zh;q=0.8", "appkey":"8dc7959eeee2792ac2eebb490e60deed", "Connection":"keep-alive", "Content-Length":"107", "Content-Type":"application/x-www-form-urlencoded", "Host":"cq.gsxt.gov.cn", "Origin":"http://cq.gsxt.gov.cn", "Referer":"http://q.gsxt.gov.cn/xxgg/xxgg_list.html", "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36", "X-Requested-With":"XMLHttpRequest" } data = { "json":'''{ "qc": { "noticetype": "12" }, "page": { "pagesize":"10", "currentpage":"1" } }''' } url='http://cq.gsxt.gov.cn/gsxt/api/affichebase/queryList' response =requests.post(url,data = data, headers=headers) # requests.post() print(response.text)
1 import requests 2 #response = requests.get('https://www.12306.cn') 3 #print(response.status_code) 4 #以上会显示错误,因为需要证书验证 5 6 #解决证书问题,我们有两种方法 7 8 #方法一,我们可以通过设置verify=False来忽略证书验证 9 response = requests.get('https://www.12306.cn',verify=False) 10 print(response.status_code) 11 #以上解决了证书验证问题,但是仍然是有警告抛出:InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. 12 #为了忽略警告,可以引入以下 13 #from requests.packages import urllib3 14 #urllib3.disable_warning() 15 16 #方法二,手动传入证书,如果有的话 17 response = requests.get('https://www.12306.cn',cert=('/path/server.vrt','/path/key'))
1 #如果需要用户名和密码才能登陆网站,则需要认证设置auth=() 2 import requests 3 response = requests.get(url,auth=('user','password')) 4 5 #或者 6 from requests.auth import HTTPBasicAuth 7 response = requests.get(url,auth=HTTPBasicAuth('user','password'))


浙公网安备 33010602011771号