requests

非原创
转载于:https://www.cnblogs.com/caochao-/p/9004377.html




爬虫
        </h1>
        <div class="clear"></div>
        <div class="postBody">
            <div id="cnblogs_post_body" class="blogpost-body blogpost-body-html">

爬虫:编写程序向网站发起请求,获取资源后分析并提取有用数据

requests

get请求

# 1、无参数实例

import requests

ret = requests.get('https://github.com/timeline.json')

print ret.url
print ret.text

2、有参数实例

import requests

payload = {'key1': 'value1', 'key2': 'value2'}
ret
= requests.get("http://httpbin.org/get", params=payload)

print ret.url
print ret.text

View Code

post请求

# 1、基本POST实例

import requests

payload = {'key1': 'value1', 'key2': 'value2'}
ret
= requests.post("http://httpbin.org/post", data=payload)

print ret.text

2、发送请求头和数据实例

import requests
import json

url = 'https://api.github.com/some/endpoint'
payload
= {'some': 'data'}
headers
= {'content-type': 'application/json'}


ret
= requests.post(url, data=json.dumps(payload), headers=headers) 或
ret = requests.post(url, json=payload, headers=headers)

  
print ret.text
print ret.cookies

 

其它请求

requests.get(url, params=None, **kwargs)
requests.post(url, data=None, json=None, **kwargs)
requests.put(url, data=None, **kwargs)
requests.head(url, **kwargs)
requests.delete(url, **kwargs)
requests.patch(url, data=None, **kwargs)
requests.options(url, **kwargs)

以上方法均是在此方法的基础上构建

requests.request(method, url, **kwargs)

View Code

参数

def param_method_url():
    # requests.request(method='get', url='http://127.0.0.1:8000/test/')
    # requests.request(method='post', url='http://127.0.0.1:8000/test/')
    pass

def param_param():
# - 可以是字典
#
- 可以是字符串
#
- 可以是字节(ascii编码以内)

# requests.request(method</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">get</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# url</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://127.0.0.1:8000/test/</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# </span><span style="color: rgba(0, 0, 255, 1)">params</span>={<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">k1</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">v1</span><span style="color: rgba(128, 0, 0, 1)">'</span>, <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">k2</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">水电费</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">})

# requests.request(method</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">get</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# url</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://127.0.0.1:8000/test/</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# </span><span style="color: rgba(0, 0, 255, 1)">params</span>=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">k1=v1&amp;k2=水电费&amp;k3=v3&amp;k3=vv3</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">)

# requests.request(method</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">get</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# url</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://127.0.0.1:8000/test/</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# </span><span style="color: rgba(0, 0, 255, 1)">params</span>=bytes(<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">k1=v1&amp;k2=k2&amp;k3=v3&amp;k3=vv3</span><span style="color: rgba(128, 0, 0, 1)">"</span>, encoding=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">utf8</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">))

# 错误
# requests.request(method</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">get</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# url</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://127.0.0.1:8000/test/</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# </span><span style="color: rgba(0, 0, 255, 1)">params</span>=bytes(<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">k1=v1&amp;k2=水电费&amp;k3=v3&amp;k3=vv3</span><span style="color: rgba(128, 0, 0, 1)">"</span>, encoding=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">utf8</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">))
pass

def param_data():
# 可以是字典
# 可以是字符串
# 可以是字节
# 可以是文件对象

# requests.request(method</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">POST</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# url</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://127.0.0.1:8000/test/</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# data</span>={<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">k1</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">v1</span><span style="color: rgba(128, 0, 0, 1)">'</span>, <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">k2</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">水电费</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">})

# requests.request(method</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">POST</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# url</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://127.0.0.1:8000/test/</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# data</span>=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">k1=v1; k2=v2; k3=v3; k3=v4</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">
# )

# requests.request(method</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">POST</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# url</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://127.0.0.1:8000/test/</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# data</span>=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">k1=v1;k2=v2;k3=v3;k3=v4</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">,
# headers</span>={<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">Content-Type</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">application/x-www-form-urlencoded</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">}
# )

# requests.request(method</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">POST</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# url</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://127.0.0.1:8000/test/</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# data</span>=open(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">data_file.py</span><span style="color: rgba(128, 0, 0, 1)">'</span>, mode=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">r</span><span style="color: rgba(128, 0, 0, 1)">'</span>, encoding=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">utf-8</span><span style="color: rgba(128, 0, 0, 1)">'</span>), # 文件内容是:k1=v1;k2=v2;k3=v3;k3=<span style="color: rgba(0, 0, 0, 1)">v4
# headers</span>={<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">Content-Type</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">application/x-www-form-urlencoded</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">}
# )
pass

def param_json():
# 将json中对应的数据进行序列化成一个字符串,json.dumps(...)
# 然后发送到服务器端的body中,并且Content-Type是 {'Content-Type': 'application/json'}
requests.request(method
='POST',
url
='http://127.0.0.1:8000/test/',
json
={'k1': 'v1', 'k2': '水电费'})

def param_headers():
# 发送请求头到服务器端
requests.request(method='POST',
url
='http://127.0.0.1:8000/test/',
json
={'k1': 'v1', 'k2': '水电费'},
headers
={'Content-Type': 'application/x-www-form-urlencoded'}
)

def param_cookies():
# 发送Cookie到服务器端
requests.request(method='POST',
url
='http://127.0.0.1:8000/test/',
data
={'k1': 'v1', 'k2': 'v2'},
cookies
={'cook1': 'value1'},
)
# 也可以使用CookieJar(字典形式就是在此基础上封装)
from http.cookiejar import CookieJar
from http.cookiejar import Cookie

obj </span>=<span style="color: rgba(0, 0, 0, 1)"> CookieJar()
obj.set_cookie(Cookie(version</span>=<span style="color: rgba(128, 0, 128, 1)">0</span>, name=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">c1</span><span style="color: rgba(128, 0, 0, 1)">'</span>, value=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">v1</span><span style="color: rgba(128, 0, 0, 1)">'</span>, port=None, domain=<span style="color: rgba(128, 0, 0, 1)">''</span>, path=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">/</span><span style="color: rgba(128, 0, 0, 1)">'</span>, secure=False, expires=<span style="color: rgba(0, 0, 0, 1)">None,
                      discard</span>=True, comment=None, comment_url=None, rest={<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">HttpOnly</span><span style="color: rgba(128, 0, 0, 1)">'</span>: None}, rfc2109=<span style="color: rgba(0, 0, 0, 1)">False,
                      port_specified</span>=False, domain_specified=False, domain_initial_dot=False, path_specified=<span style="color: rgba(0, 0, 0, 1)">False)
               )
requests.request(method</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">POST</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
                 url</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://127.0.0.1:8000/test/</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
                 data</span>={<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">k1</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">v1</span><span style="color: rgba(128, 0, 0, 1)">'</span>, <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">k2</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">v2</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">},
                 cookies</span>=<span style="color: rgba(0, 0, 0, 1)">obj)

def param_proxies():
#代理,如果封了ip就可以用代理

# proxies </span>=<span style="color: rgba(0, 0, 0, 1)"> {
# </span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">http</span><span style="color: rgba(128, 0, 0, 1)">"</span>: <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">61.172.249.96:80</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">,
# </span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">https</span><span style="color: rgba(128, 0, 0, 1)">"</span>: <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">http://61.185.219.126:3128</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">,
# }

# proxies </span>= {<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://10.20.1.128</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://10.10.1.10:5323</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">}

# ret </span>= requests.<span style="color: rgba(0, 0, 255, 1)">get</span>(<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">http://www.proxy360.cn/Proxy</span><span style="color: rgba(128, 0, 0, 1)">"</span>, proxies=<span style="color: rgba(0, 0, 0, 1)">proxies)
# print(ret.headers)


# </span><span style="color: rgba(0, 0, 255, 1)">from</span><span style="color: rgba(0, 0, 0, 1)"> requests.auth import HTTPProxyAuth
#
# proxyDict </span>=<span style="color: rgba(0, 0, 0, 1)"> {
# </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">77.75.105.165</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">https</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">77.75.105.165</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">
# }
# auth </span>= HTTPProxyAuth(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">username</span><span style="color: rgba(128, 0, 0, 1)">'</span>, <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">mypassword</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">)
#
# r </span>= requests.<span style="color: rgba(0, 0, 255, 1)">get</span>(<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">http://www.google.com</span><span style="color: rgba(128, 0, 0, 1)">"</span>, proxies=proxyDict, auth=<span style="color: rgba(0, 0, 0, 1)">auth)
# print(r.text)

pass

def param_files():
# 发送文件
# file_dict = {
#
'f1': open('readme', 'rb')
# }
# requests.request(method
='POST',
# url
='http://127.0.0.1:8000/test/',
# files
=file_dict)

# 发送文件,定制文件名
# file_dict </span>=<span style="color: rgba(0, 0, 0, 1)"> {
# </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">f1</span><span style="color: rgba(128, 0, 0, 1)">'</span>: (<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">test.txt</span><span style="color: rgba(128, 0, 0, 1)">'</span>, open(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">readme</span><span style="color: rgba(128, 0, 0, 1)">'</span>, <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">rb</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">))
# }
# requests.request(method</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">POST</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# url</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://127.0.0.1:8000/test/</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# files</span>=<span style="color: rgba(0, 0, 0, 1)">file_dict)

# 发送文件,定制文件名
# file_dict </span>=<span style="color: rgba(0, 0, 0, 1)"> {
# </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">f1</span><span style="color: rgba(128, 0, 0, 1)">'</span>: (<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">test.txt</span><span style="color: rgba(128, 0, 0, 1)">'</span>, <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">hahsfaksfa9kasdjflaksdjf</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">)
# }
# requests.request(method</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">POST</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# url</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://127.0.0.1:8000/test/</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# files</span>=<span style="color: rgba(0, 0, 0, 1)">file_dict)

# 发送文件,定制文件名
# file_dict </span>=<span style="color: rgba(0, 0, 0, 1)"> {
#     </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">f1</span><span style="color: rgba(128, 0, 0, 1)">'</span>: (<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">test.txt</span><span style="color: rgba(128, 0, 0, 1)">'</span>, <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">hahsfaksfa9kasdjflaksdjf</span><span style="color: rgba(128, 0, 0, 1)">"</span>, <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">application/text</span><span style="color: rgba(128, 0, 0, 1)">'</span>, {<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">k1</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">0</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">})
# }
# requests.request(method</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">POST</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
#                  url</span>=<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://127.0.0.1:8000/test/</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
#                  files</span>=<span style="color: rgba(0, 0, 0, 1)">file_dict)

pass

def param_auth():
from requests.auth import HTTPBasicAuth, HTTPDigestAuth

ret </span>= requests.<span style="color: rgba(0, 0, 255, 1)">get</span>(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">https://api.github.com/user</span><span style="color: rgba(128, 0, 0, 1)">'</span>, auth=HTTPBasicAuth(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">wupeiqi</span><span style="color: rgba(128, 0, 0, 1)">'</span>, <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">sdfasdfasdf</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">))
print(ret.text)

# ret </span>= requests.<span style="color: rgba(0, 0, 255, 1)">get</span>(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://192.168.1.1</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">,
# auth</span>=HTTPBasicAuth(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">admin</span><span style="color: rgba(128, 0, 0, 1)">'</span>, <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">admin</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">))
# ret.encoding </span>= <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">gbk</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">
# print(ret.text)

# ret </span>= requests.<span style="color: rgba(0, 0, 255, 1)">get</span>(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://httpbin.org/digest-auth/auth/user/pass</span><span style="color: rgba(128, 0, 0, 1)">'</span>, auth=HTTPDigestAuth(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">user</span><span style="color: rgba(128, 0, 0, 1)">'</span>, <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">pass</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">))
# print(ret)
#

def param_timeout():
# ret = requests.get('http://google.com/', timeout=1)
# print(ret)

# ret </span>= requests.<span style="color: rgba(0, 0, 255, 1)">get</span>(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://google.com/</span><span style="color: rgba(128, 0, 0, 1)">'</span>, timeout=(<span style="color: rgba(128, 0, 128, 1)">5</span>, <span style="color: rgba(128, 0, 128, 1)">1</span><span style="color: rgba(0, 0, 0, 1)">))
# print(ret)
pass

def param_allow_redirects():
ret = requests.get('http://127.0.0.1:8000/test/', allow_redirects=False)
print(ret.text)

def param_stream():
ret = requests.get('http://127.0.0.1:8000/test/', stream=True)
print(ret.content)
ret.close()

# </span><span style="color: rgba(0, 0, 255, 1)">from</span><span style="color: rgba(0, 0, 0, 1)"> contextlib import closing
# with closing(requests.</span><span style="color: rgba(0, 0, 255, 1)">get</span>(<span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">http://httpbin.org/get</span><span style="color: rgba(128, 0, 0, 1)">'</span>, stream=True)) <span style="color: rgba(0, 0, 255, 1)">as</span><span style="color: rgba(0, 0, 0, 1)"> r:
# # 在此处理响应。
# </span><span style="color: rgba(0, 0, 255, 1)">for</span> i <span style="color: rgba(0, 0, 255, 1)">in</span><span style="color: rgba(0, 0, 0, 1)"> r.iter_content():
# print(i)

def requests_session():
import requests

session </span>=<span style="color: rgba(0, 0, 0, 1)"> requests.Session()

### </span><span style="color: rgba(128, 0, 128, 1)">1</span><span style="color: rgba(0, 0, 0, 1)">、首先登陆任何页面,获取cookie

i1 </span>= session.<span style="color: rgba(0, 0, 255, 1)">get</span>(url=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">http://dig.chouti.com/help/service</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">)

### </span><span style="color: rgba(128, 0, 128, 1)">2</span><span style="color: rgba(0, 0, 0, 1)">、用户登陆,携带上一次的cookie,后台对cookie中的 gpsd 进行授权
i2 </span>=<span style="color: rgba(0, 0, 0, 1)"> session.post(
    url</span>=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">http://dig.chouti.com/login</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">,
    data</span>=<span style="color: rgba(0, 0, 0, 1)">{
        </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">phone</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">8615131255089</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">,
        </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">password</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">xxxxxx</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">,
        </span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">oneMonth</span><span style="color: rgba(128, 0, 0, 1)">'</span>: <span style="color: rgba(128, 0, 0, 1)">""</span><span style="color: rgba(0, 0, 0, 1)">
    }
)

i3 </span>=<span style="color: rgba(0, 0, 0, 1)"> session.post(
    url</span>=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">http://dig.chouti.com/link/vote?linksId=8589623</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">,
)
print(i3.text)</span></pre>
View Code

BeautifulSoup

BeautifulSoup是一个模块,该模块用于接收一个HTML或XML字符串,然后将其进行格式化,之后则可以使用他提供的方法进行快速查找指定元素,从而使得在HTML或XML中查找指定元素变得简单

安装:pip3 install beautifulsoup4

使用:

 from bs4 import BeautifulSoup
bs4的用处:
  -解析爬虫数据
  -解析XHML数据
  -用户提交数据,进行格式校验(KindEditor、UEditor)
1 soup = BeautifulSoup(html, "html.parser")
2 # 找到第一个a标签
3 tag1 = soup.find(name='a')
4 # 找到所有的a标签
5 tag2 = soup.find_all(name='a')
6 # 找到id=link2的标签
7 tag3 = soup.select(attes={id='link2')

实例

import requests
from bs4 import BeautifulSoup

# 1、下载页面
ret=requests.get(
url
="https://www.autohome.com.cn/news/",
)

# 字节内容

print(ret.content)

# 该网页的字节编码

print(ret.apparent_encoding)

ret.encoding=ret.apparent_encoding
# 将byte转化为字符串格式

print(ret.text)

# 2、解析

获取指定内容

soup=BeautifulSoup(ret.text,"html.parser") #解析器

div
=soup.find(name="div",id="auto-channel-lazyload-article")

li_list=div.find_all(name="li")
# print(li_list)
for li in li_list:
h3
=li.find(name="h3")
if not h3:
continue

a</span>=li.find(name=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">a</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">)
href</span>=(a.get(<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">href</span><span style="color: rgba(128, 0, 0, 1)">"</span>)).strip(<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">//</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">)

p</span>=li.find(name=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">p</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">)

img</span>=li.find(name=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">img</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">)
src</span>=img.get(<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">src</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">)

file_name</span>=src.split(<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">__</span><span style="color: rgba(128, 0, 0, 1)">"</span>)[1<span style="color: rgba(0, 0, 0, 1)">]
</span><span style="color: rgba(0, 128, 0, 1)">#</span><span style="color: rgba(0, 128, 0, 1)"> 获取图片</span>
img_list=requests.get(url=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">https:%s</span><span style="color: rgba(128, 0, 0, 1)">"</span>%<span style="color: rgba(0, 0, 0, 1)">src)
</span><span style="color: rgba(0, 0, 255, 1)">print</span><span style="color: rgba(0, 0, 0, 1)">(img_list)
</span><span style="color: rgba(0, 128, 0, 1)">#</span><span style="color: rgba(0, 128, 0, 1)"> 写入文件</span>
with open(<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">img/%s</span><span style="color: rgba(128, 0, 0, 1)">"</span>%file_name,<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">wb</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">) as f:
    f.write(img_list.content)


</span><span style="color: rgba(0, 128, 0, 1)">#</span><span style="color: rgba(0, 128, 0, 1)"> print(h3.text)</span>
<span style="color: rgba(0, 128, 0, 1)">#</span><span style="color: rgba(0, 128, 0, 1)"> print(href)</span>
<span style="color: rgba(0, 128, 0, 1)">#</span><span style="color: rgba(0, 128, 0, 1)"> print(p.text)</span></pre>
汽车之家新闻
import requests
from bs4 import BeautifulSoup

# 获取未授权的cookies
ret1=requests.get(
url
="https://dig.chouti.com/",
headers
={
"User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
)
ret1_cookie
=ret1.cookies.get_dict()

# 登录
ret=requests.post(
url
='https://dig.chouti.com/login',
data
={
"phone":'xx',
"password":"xx",
"oneMonth":1
},
headers
={
"User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
},
cookies
=ret1_cookie
)

# 获取每页id
for id in range(1,2):

ret2</span>=<span style="color: rgba(0, 0, 0, 1)">requests.get(
    url</span>=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">https://dig.chouti.com/all/hot/recent/%s</span><span style="color: rgba(128, 0, 0, 1)">"</span>%<span style="color: rgba(0, 0, 0, 1)">id,
    headers</span>=<span style="color: rgba(0, 0, 0, 1)">{
        </span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">User-Agent</span><span style="color: rgba(128, 0, 0, 1)">"</span>: <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">
    },
    cookies</span>=<span style="color: rgba(0, 0, 0, 1)">ret1_cookie

)

</span><span style="color: rgba(0, 128, 0, 1)">#</span><span style="color: rgba(0, 128, 0, 1)"> print(ret2.text)</span>
soup=BeautifulSoup(ret2.text,"html.parser")
div</span>=soup.find(name=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">div</span><span style="color: rgba(128, 0, 0, 1)">"</span>,attrs={<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">class</span><span style="color: rgba(128, 0, 0, 1)">"</span>:<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">content-list</span><span style="color: rgba(128, 0, 0, 1)">"</span>,<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">id</span><span style="color: rgba(128, 0, 0, 1)">"</span>:<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">content-list</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">})

items</span>=div.find_all(name=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">div</span><span style="color: rgba(128, 0, 0, 1)">"</span>,attrs={<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">class</span><span style="color: rgba(128, 0, 0, 1)">"</span>:<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">item</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">})


</span><span style="color: rgba(0, 0, 255, 1)">for</span> i <span style="color: rgba(0, 0, 255, 1)">in</span><span style="color: rgba(0, 0, 0, 1)"> items:
    par2</span>=i.find(name=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">div</span><span style="color: rgba(128, 0, 0, 1)">"</span>,attrs={<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">class</span><span style="color: rgba(128, 0, 0, 1)">"</span>:<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">part2</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">})
    nid</span>=par2.get(<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">share-linkid</span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(0, 0, 0, 1)">)

    </span><span style="color: rgba(0, 128, 0, 1)">#</span><span style="color: rgba(0, 128, 0, 1)"> 点赞</span>
    ret3=<span style="color: rgba(0, 0, 0, 1)">requests.post(
        url</span>=<span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">https://dig.chouti.com/link/vote?linksId=%s</span><span style="color: rgba(128, 0, 0, 1)">"</span>%<span style="color: rgba(0, 0, 0, 1)">nid,
        headers</span>=<span style="color: rgba(0, 0, 0, 1)">{
        </span><span style="color: rgba(128, 0, 0, 1)">"</span><span style="color: rgba(128, 0, 0, 1)">User-Agent</span><span style="color: rgba(128, 0, 0, 1)">"</span>: <span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(128, 0, 0, 1)">Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36</span><span style="color: rgba(128, 0, 0, 1)">'</span><span style="color: rgba(0, 0, 0, 1)">
     },
        cookies</span>=<span style="color: rgba(0, 0, 0, 1)">ret1_cookie
    )
    </span><span style="color: rgba(0, 0, 255, 1)">print</span>(ret3.text)</pre>
抽屉登录点赞
posted @ 2022-02-23 21:47  下个ID见  阅读(67)  评论(0)    收藏  举报