requests

requests-get请求

 
复制代码
1 import requests
2 
3 response= requests.get('http://www.baidu.com')#get方法请求网址
4 print(response)
5 print(response.status_code)#状态码
6 print(response.text)#响应体
7 print(response.cookies)#获取cookies
另外还有response.url,response.history历史记录
复制代码
复制代码
1 #requests的各种请求方式
2 import requests
3 requests.get('http://httpbin.org/get')
4 requests.post('http://httpbin.org/post')
5 requests.delete('http://httpbin.org/delete')
6 requests.head('http://httpbin.org/head')
7 requests.options('http://httpbin.org/options')
复制代码
复制代码
 1 #简单的get请求
 2 #通过response.text获得响应体
 3 import requests
 4 response = requests.get('http://httpbin.org/get')
 5 print(response.text)
 6 
 7 #带参数的请求
 8 #利用params将字典形式数据传入进去,相当于urllib.parse.urlencode
 9 data = {
10     'name':'germy',
11     'age':22
12 }
13 response = requests.get('http://httpbin.org/get',params=data)
14 print(response.text)
复制代码
复制代码
1 #解析json
2 #response.json()相当于json.loads()方法
3 import requests
4 import json
5 response = requests.get('http://httpbin.org/get')
6 print(response.json())
7 print('*'*100)
8 print(json.loads(response.text))
复制代码
1 #获取并保存二进制数据,response.content即二进制数据
2 import requests
3 response= requests.get('http://inews.gtimg.com/newsapp_ls/0/1531939223/0')
4 #print(response.content)
5 with open('D://tomas.jpg','wb') as f:
6     f.write(response.content)
复制代码
 1 #添加headers
 2 import requests
 3 response = requests.get('https://www.zhihu.com/explore')
 4 #print(response.text)#结果返回服务器端错误,证实爬虫被知乎禁止了
 5 #结果:<html><body><h1>500 Server Error</h1>
 6 
 7 #解决的方法是添加headers,方法非常简单,加进去就可以了
 8 headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 \
 9 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 LBBROWSER'}
10 response = requests.get('https://www.zhihu.com/explore',headers=headers)
11 print(response.text)
复制代码
1 import requests
 2 requests.get('http://httpbin.org/cookies/set/number/123456')
 3 response = requests.get('http://httpbin.org/cookies')
 4 print(response.text)
 5 #以上结果为空,原来设想通过第一步的设置cookies,然后通过第二步得到cookies
 6 #而实际上,两次get请求被看做两个完全独立的操作,互相没有任何牵涉
 7 #所以当第二步想要get到cookies时,完全没有任何cookies返回
 8 #解决这种问题的方法是,引入requests.Session()
 9 
10 s = requests.Session()
11 s.get('http://httpbin.org/cookies/set/number/123456')
12 response =  s.get('http://httpbin.org/cookies')
13 print(response.text)
14 #requests.Session能够跨请求的保持某些参数,比如cookies,即在同一个session实例
15 #发出的所有请求都保持同一个cookies,而requests模块每次会自动处理cookies,这样就很方便的解决了问题
requests-模拟登陆
 1 import requests
 2 
 3 proxies={
 4     'http':'http://192.168.1.1:88'
 5     'https':'https://192.168.1.1:88'
 6 #如果代理ip需要用户名和密码的话 'http':'user:password@192.168.1.1:88'
 7 }
 8 response = requests.get(url,proxies=proxies)
 9 print(response.status_code)
10 
11 #如果代理类型不是http或者https,而是socks代理
12 #需要先 pip3 install 'requests[socks]
13 proxies={
14     'http':'socks5://192.168.1.1:88'
15     'https':'socks5://192.168.1.1:88'
16 #如果代理ip需要用户名和密码的话 'http':'user:password@192.168.1.1:88'
17 }
requests代理设置
# post一个json数据
import requests

headers={

"Accept":"application/json, text/plain, */*",
"Accept-Encoding":"gzip, deflate",
"Accept-Language":"zh-CN,zh;q=0.8",
"appkey":"8dc7959eeee2792ac2eebb490e60deed",
"Connection":"keep-alive",
"Content-Length":"107",
"Content-Type":"application/x-www-form-urlencoded",
"Host":"cq.gsxt.gov.cn",
"Origin":"http://cq.gsxt.gov.cn",
"Referer":"http://q.gsxt.gov.cn/xxgg/xxgg_list.html",
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36",
"X-Requested-With":"XMLHttpRequest"

}

data = {
          "json":'''{
              "qc": {
                "noticetype": "12"
              },
              "page": {
                "pagesize":"10",
                "currentpage":"1"
              }
            }'''
          }


url='http://cq.gsxt.gov.cn/gsxt/api/affichebase/queryList'

response =requests.post(url,data = data, headers=headers)
# requests.post()
print(response.text)
post一个json数据

 

1 import requests
 2 #response = requests.get('https://www.12306.cn')
 3 #print(response.status_code)
 4 #以上会显示错误,因为需要证书验证
 5 
 6 #解决证书问题,我们有两种方法
 7 
 8 #方法一,我们可以通过设置verify=False来忽略证书验证
 9 response = requests.get('https://www.12306.cn',verify=False)
10 print(response.status_code)
11 #以上解决了证书验证问题,但是仍然是有警告抛出:InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised.
12 #为了忽略警告,可以引入以下
13 #from requests.packages import urllib3
14 #urllib3.disable_warning()
15 
16 #方法二,手动传入证书,如果有的话
17 response = requests.get('https://www.12306.cn',cert=('/path/server.vrt','/path/key'))
requests-证书验证
1 #如果需要用户名和密码才能登陆网站,则需要认证设置auth=()
2 import requests
3 response = requests.get(url,auth=('user','password'))
4 
5 #或者
6 from requests.auth import HTTPBasicAuth
7 response = requests.get(url,auth=HTTPBasicAuth('user','password'))
认证设置

 

posted @ 2018-08-31 21:05  我笑了  阅读(669)  评论(0)    收藏  举报