requests.session

# -*- coding: utf-8 -*-

"""
requests.session
~~~~~~~~~~~~~~~~

This module provides a Session object to manage and persist settings across
requests (cookies, auth, proxies).
"""
import os
from collections import Mapping
from datetime import datetime

from .auth import _basic_auth_str
from .compat import cookielib, OrderedDict, urljoin, urlparse
from .cookies import (
cookiejar_from_dict, extract_cookies_to_jar, RequestsCookieJar, merge_cookies)
from .models import Request, PreparedRequest, DEFAULT_REDIRECT_LIMIT
from .hooks import default_hooks, dispatch_hook
from ._internal_utils import to_native_string
from .utils import to_key_val_list, default_headers
from .exceptions import (
TooManyRedirects, InvalidSchema, ChunkedEncodingError, ContentDecodingError)
from .packages.urllib3._collections import RecentlyUsedContainer
from .structures import CaseInsensitiveDict

from .adapters import HTTPAdapter

from .utils import (
requote_uri, get_environ_proxies, get_netrc_auth, should_bypass_proxies,
get_auth_from_url, rewind_body
)

from .status_codes import codes

# formerly defined here, reexposed here for backward compatibility
from .models import REDIRECT_STATI

REDIRECT_CACHE_SIZE = 1000


def merge_setting(request_setting, session_setting, dict_class=OrderedDict):
"""Determines appropriate setting for a given request, taking into account
the explicit setting on that request, and the setting in the session. If a
setting is a dictionary, they will be merged together using `dict_class`
"""
"""
session_setting or request_setting 其中一个为空时则返回另一个
如果都不是Mapping 类型 则返回request_setting
两者都不为空 且均为Mapping对象 则将request_setting中的设置更新于session_setting中
并删除其中value为空的key
然后返回合并后ordereddict object
"""

if session_setting is None:
return request_setting

if request_setting is None:
return session_setting

# Bypass if not a dictionary (e.g. verify)
if not (
isinstance(session_setting, Mapping) and
isinstance(request_setting, Mapping)
):
return request_setting
# s= dict_class(to_key_val_list(session_setting)) a OrderedDict() object
# such as s:OrderedDict([('key', 'val')])
# >>> s['key']
# val

merged_setting = dict_class(to_key_val_list(session_setting))
# 将request_session 中的设置 更新于 session_setting中
# 如果request_session 的设置已经存在于session_setting中 则不增加 否则新增于session_setting中

merged_setting.update(to_key_val_list(request_setting))

# Remove keys that are set to None. Extract keys first to avoid altering
# the dictionary during iteration.
none_keys = [k for (k, v) in merged_setting.items() if v is None]
for key in none_keys:
del merged_setting[key]
# 将session_setting 中value为空的key删除
return merged_setting


def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict):
"""Properly merges both requests and session hooks.

This is necessary because when request_hooks == {'response': []}, the
merge breaks Session hooks entirely.
"""
if session_hooks is None or session_hooks.get('response') == []:
return request_hooks

if request_hooks is None or request_hooks.get('response') == []:
return session_hooks

return merge_setting(request_hooks, session_hooks, dict_class)


class SessionRedirectMixin(object):
def resolve_redirects(self, resp, req, stream=False, timeout=None,
verify=True, cert=None, proxies=None, **adapter_kwargs):
"""Receives a Response. Returns a generator of Responses."""

i = 0
hist = [] # keep track of history

while resp.is_redirect:
prepared_request = req.copy()

if i > 0:
# Update history and keep track of redirects.
hist.append(resp)
new_hist = list(hist)
resp.history = new_hist

try:
# get the response content
resp.content # Consume socket so it can be released
except (ChunkedEncodingError, ContentDecodingError, RuntimeError):
resp.raw.read(decode_content=False)

if i >= self.max_redirects:
raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects, response=resp)

# Release the connection back into the pool.
resp.close()

url = resp.headers['location']

# Handle redirection without scheme (see: RFC 1808 Section 4)
if url.startswith('//'):
parsed_rurl = urlparse(resp.url)
url = '%s:%s' % (parsed_rurl.scheme, url)

# The scheme should be lower case...
parsed = urlparse(url)
url = parsed.geturl()

# Facilitate relative 'location' headers, as allowed by RFC 7231.
# (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
# Compliant with RFC3986, we percent encode the url.
if not parsed.netloc:
url = urljoin(resp.url, requote_uri(url))
else:
url = requote_uri(url)

prepared_request.url = to_native_string(url)
# Cache the url, unless it redirects to itself.
if resp.is_permanent_redirect and req.url != prepared_request.url:
self.redirect_cache[req.url] = prepared_request.url

self.rebuild_method(prepared_request, resp)

# https://github.com/kennethreitz/requests/issues/1084
if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect):
# https://github.com/kennethreitz/requests/issues/3490
purged_headers = ('Content-Length', 'Content-Type', 'Transfer-Encoding')
for header in purged_headers:
prepared_request.headers.pop(header, None)
prepared_request.body = None

headers = prepared_request.headers
try:
del headers['Cookie']
except KeyError:
pass

# Extract any cookies sent on the response to the cookiejar
# in the new request. Because we've mutated our copied prepared
# request, use the old one that we haven't yet touched.
extract_cookies_to_jar(prepared_request._cookies, req, resp.raw)
merge_cookies(prepared_request._cookies, self.cookies)
prepared_request.prepare_cookies(prepared_request._cookies)

# Rebuild auth and proxy information.
proxies = self.rebuild_proxies(prepared_request, proxies)
self.rebuild_auth(prepared_request, resp)

# A failed tell() sets `_body_position` to `object()`. This non-None
# value ensures `rewindable` will be True, allowing us to raise an
# UnrewindableBodyError, instead of hanging the connection.
rewindable = (
prepared_request._body_position is not None and
('Content-Length' in headers or 'Transfer-Encoding' in headers)
)

# Attempt to rewind consumed file-like object.
if rewindable:
rewind_body(prepared_request)

# Override the original request.
req = prepared_request

resp = self.send(
req,
stream=stream,
timeout=timeout,
verify=verify,
cert=cert,
proxies=proxies,
allow_redirects=False,
**adapter_kwargs
)

extract_cookies_to_jar(self.cookies, prepared_request, resp.raw)

i += 1
yield resp

def rebuild_auth(self, prepared_request, response):
"""When being redirected we may want to strip authentication from the
request to avoid leaking credentials. This method intelligently removes
and reapplies authentication where possible to avoid credential loss.
"""
"""
如果请求头中包含授权认证信息;
重定向后会先从请求头中删除授权信息;
重新获取用户和密码;
再次准备授权信息
"""
headers = prepared_request.headers
url = prepared_request.url

if 'Authorization' in headers:
# If we get redirected to a new host, we should strip out any
# authentication headers.
original_parsed = urlparse(response.request.url)
redirect_parsed = urlparse(url)

if (original_parsed.hostname != redirect_parsed.hostname):
del headers['Authorization']

# .netrc might have more auth for us on our new host.
new_auth = get_netrc_auth(url) if self.trust_env else None
if new_auth is not None:
prepared_request.prepare_auth(new_auth)

return

def rebuild_proxies(self, prepared_request, proxies):
"""This method re-evaluates the proxy configuration by considering the
environment variables. If we are redirected to a URL covered by
NO_PROXY, we strip the proxy configuration. Otherwise, we set missing
proxy keys for this URL (in case they were stripped by a previous
redirect).

This method also replaces the Proxy-Authorization header where
necessary.

:rtype: dict
"""
# 请求头信息
headers = prepared_request.headers
# 请求url
url = prepared_request.url
# http 、https
scheme = urlparse(url).scheme
new_proxies = proxies.copy() if proxies is not None else {}
# url 绕过代理
if self.trust_env and not should_bypass_proxies(url):
# 从环境变量中获取代理信息
environ_proxies = get_environ_proxies(url)

proxy = environ_proxies.get(scheme, environ_proxies.get('all'))

if proxy:
new_proxies.setdefault(scheme, proxy)

if 'Proxy-Authorization' in headers:
# 删除请求头的授权信息
del headers['Proxy-Authorization']

try: # new_proxies {'http':'http://172.21.2.3:8009'}
username, password = get_auth_from_url(new_proxies[scheme])
except KeyError:
username, password = None, None

if username and password:
headers['Proxy-Authorization'] = _basic_auth_str(username, password)

return new_proxies

def rebuild_method(self, prepared_request, response):
"""When being redirected we may want to change the method of the request
based on certain specs or browser behavior.
"""
# get the prepare request object method
method = prepared_request.method

# http://tools.ietf.org/html/rfc7231#section-6.4.4
# 303: ('see_other', 'other'),
if response.status_code == codes.see_other and method != 'HEAD':
method = 'GET'

# Do what the browsers do, despite standards...
# First, turn 302s into GETs.
if response.status_code == codes.found and method != 'HEAD':
method = 'GET'

# Second, if a POST is responded to with a 301, turn it into a GET.
# This bizarre behaviour is explained in Issue 1704.
if response.status_code == codes.moved and method == 'POST':
method = 'GET'

prepared_request.method = method


class Session(SessionRedirectMixin):
"""A Requests session.

Provides cookie persistence, connection-pooling, and configuration.

Basic Usage::

>>> import requests
>>> s = requests.Session()
>>> s.get('http://httpbin.org/get')
<Response [200]>

Or as a context manager::

>>> with requests.Session() as s:
>>> s.get('http://httpbin.org/get')
<Response [200]>
"""

__attrs__ = [
'headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify',
'cert', 'prefetch', 'adapters', 'stream', 'trust_env',
'max_redirects',
]

def __init__(self):

#: A case-insensitive dictionary of headers to be sent on each
#: :class:`Request <Request>` sent from this
#: :class:`Session <Session>`.
# the default header the default user agetnt is :python-requests/2.1.3
# {
# 'User-Agent': 'python-requests/2.1.3',
# 'Accept-Encoding': ', '.join(('gzip', 'deflate')),
# 'Accept': '*/*',
# 'Connection': 'keep-alive',
# }
self.headers = default_headers()

#: Default Authentication tuple or object to attach to
#: :class:`Request <Request>`.
self.auth = None

#: Dictionary mapping protocol or protocol and host to the URL of the proxy
#: (e.g. {'http': 'foo.bar:3128', 'http://host.name': 'foo.bar:4012'}) to
#: be used on each :class:`Request <Request>`.
self.proxies = {}

#: Event-handling hooks.
# the default hooks is {'response':[]}
self.hooks = default_hooks()

#: Dictionary of querystring data to attach to each
#: :class:`Request <Request>`. The dictionary values may be lists for
#: representing multivalued query parameters.
self.params = {}

#: Stream response content default.
self.stream = False

#: SSL Verification default.
self.verify = True

#: SSL client certificate default.
self.cert = None

#: Maximum number of redirects allowed. If the request exceeds this
#: limit, a :class:`TooManyRedirects` exception is raised.
#: This defaults to requests.models.DEFAULT_REDIRECT_LIMIT, which is
#: 30.
self.max_redirects = DEFAULT_REDIRECT_LIMIT

#: Trust environment settings for proxy configuration, default
#: authentication and similar.
self.trust_env = True

#: A CookieJar containing all currently outstanding cookies set on this
#: session. By default it is a
#: :class:`RequestsCookieJar <requests.cookies.RequestsCookieJar>`, but
#: may be any other ``cookielib.CookieJar`` compatible object.
self.cookies = cookiejar_from_dict({})

# Default connection adapters.
self.adapters = OrderedDict()
self.mount('https://', HTTPAdapter())
self.mount('http://', HTTPAdapter())

# Only store 1000 redirects to prevent using infinite memory
# a dict-like object
self.redirect_cache = RecentlyUsedContainer(REDIRECT_CACHE_SIZE)

def __enter__(self):
# first get the session object
return self

def __exit__(self, *args):
# finaly coloe the session
self.close()

def prepare_request(self, request):
"""Constructs a :class:`PreparedRequest <PreparedRequest>` for
transmission and returns it. The :class:`PreparedRequest` has settings
merged from the :class:`Request <Request>` instance and those of the
:class:`Session`.

:param request: :class:`Request` instance to prepare with this
session's settings.
:rtype: requests.PreparedRequest
"""
cookies = request.cookies or {}

# Bootstrap CookieJar.
if not isinstance(cookies, cookielib.CookieJar):
cookies = cookiejar_from_dict(cookies)

# Merge with session cookies
merged_cookies = merge_cookies(
merge_cookies(RequestsCookieJar(), self.cookies), cookies)

# Set environment's basic authentication if not explicitly set.
auth = request.auth
if self.trust_env and not auth and not self.auth:
auth = get_netrc_auth(request.url)

p = PreparedRequest()
p.prepare(
method=request.method.upper(),
url=request.url,
files=request.files,
data=request.data,
json=request.json,
headers=merge_setting(request.headers, self.headers, dict_class=CaseInsensitiveDict),
params=merge_setting(request.params, self.params),
auth=merge_setting(auth, self.auth),
cookies=merged_cookies,
hooks=merge_hooks(request.hooks, self.hooks),
)
return p

def request(self, method, url,
params=None,
data=None,
headers=None,
cookies=None,
files=None,
auth=None,
timeout=None,
allow_redirects=True,
proxies=None,
hooks=None,
stream=None,
verify=None,
cert=None,
json=None):
"""Constructs a :class:`Request <Request>`, prepares it and sends it.
Returns :class:`Response <Response>` object.

:param method: method for the new :class:`Request` object.
:param url: URL for the new :class:`Request` object.
:param params: (optional) Dictionary or bytes to be sent in the query
string for the :class:`Request`.
:param data: (optional) Dictionary, bytes, or file-like object to send
in the body of the :class:`Request`.
:param json: (optional) json to send in the body of the
:class:`Request`.
:param headers: (optional) Dictionary of HTTP Headers to send with the
:class:`Request`.
:param cookies: (optional) Dict or CookieJar object to send with the
:class:`Request`.
:param files: (optional) Dictionary of ``'filename': file-like-objects``
for multipart encoding upload.
:param auth: (optional) Auth tuple or callable to enable
Basic/Digest/Custom HTTP Auth.
:param timeout: (optional) How long to wait for the server to send
data before giving up, as a float, or a :ref:`(connect timeout,
read timeout) <timeouts>` tuple.
:type timeout: float or tuple
:param allow_redirects: (optional) Set to True by default.
:type allow_redirects: bool
:param proxies: (optional) Dictionary mapping protocol or protocol and
hostname to the URL of the proxy.
:param stream: (optional) whether to immediately download the response
content. Defaults to ``False``.
:param verify: (optional) whether the SSL cert will be verified.
A CA_BUNDLE path can also be provided. Defaults to ``True``.
:param cert: (optional) if String, path to ssl client cert file (.pem).
If Tuple, ('cert', 'key') pair.
:rtype: requests.Response
"""
# Create the Request.
req = Request(
method = method.upper(),
url = url,
headers = headers,
files = files,
data = data or {},
json = json,
params = params or {},
auth = auth,
cookies = cookies,
hooks = hooks,
)
# prepare the Request <Request> object
prep = self.prepare_request(req)

proxies = proxies or {}

settings = self.merge_environment_settings(
prep.url, proxies, stream, verify, cert
)

# Send the request.
send_kwargs = {
'timeout': timeout,
'allow_redirects': allow_redirects,
}
send_kwargs.update(settings)
resp = self.send(prep, **send_kwargs)

return resp

def get(self, url, **kwargs):
"""Sends a GET request. Returns :class:`Response` object.

:param url: URL for the new :class:`Request` object.
:param \*\*kwargs: Optional arguments that ``request`` takes.
:rtype: requests.Response
"""
# the allow_redirects not in kwargs,then add allow_redirects
kwargs.setdefault('allow_redirects', True)
return self.request('GET', url, **kwargs)

def options(self, url, **kwargs):
"""Sends a OPTIONS request. Returns :class:`Response` object.

:param url: URL for the new :class:`Request` object.
:param \*\*kwargs: Optional arguments that ``request`` takes.
:rtype: requests.Response
"""

kwargs.setdefault('allow_redirects', True)
return self.request('OPTIONS', url, **kwargs)

def head(self, url, **kwargs):
"""Sends a HEAD request. Returns :class:`Response` object.

:param url: URL for the new :class:`Request` object.
:param \*\*kwargs: Optional arguments that ``request`` takes.
:rtype: requests.Response
"""

kwargs.setdefault('allow_redirects', False)
return self.request('HEAD', url, **kwargs)

def post(self, url, data=None, json=None, **kwargs):
"""Sends a POST request. Returns :class:`Response` object.

:param url: URL for the new :class:`Request` object.
:param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
:param json: (optional) json to send in the body of the :class:`Request`.
:param \*\*kwargs: Optional arguments that ``request`` takes.
:rtype: requests.Response
"""

return self.request('POST', url, data=data, json=json, **kwargs)

def put(self, url, data=None, **kwargs):
"""Sends a PUT request. Returns :class:`Response` object.

:param url: URL for the new :class:`Request` object.
:param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
:param \*\*kwargs: Optional arguments that ``request`` takes.
:rtype: requests.Response
"""

return self.request('PUT', url, data=data, **kwargs)

def patch(self, url, data=None, **kwargs):
"""Sends a PATCH request. Returns :class:`Response` object.

:param url: URL for the new :class:`Request` object.
:param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
:param \*\*kwargs: Optional arguments that ``request`` takes.
:rtype: requests.Response
"""

return self.request('PATCH', url, data=data, **kwargs)

def delete(self, url, **kwargs):
"""Sends a DELETE request. Returns :class:`Response` object.

:param url: URL for the new :class:`Request` object.
:param \*\*kwargs: Optional arguments that ``request`` takes.
:rtype: requests.Response
"""

return self.request('DELETE', url, **kwargs)

def send(self, request, **kwargs):
"""
Send a given PreparedRequest.

:rtype: requests.Response
"""
# Set defaults that the hooks can utilize to ensure they always have
# the correct parameters to reproduce the previous request.
kwargs.setdefault('stream', self.stream)
kwargs.setdefault('verify', self.verify)
kwargs.setdefault('cert', self.cert)
kwargs.setdefault('proxies', self.proxies)

# It's possible that users might accidentally send a Request object.
# Guard against that specific failure case.
if isinstance(request, Request):
raise ValueError('You can only send PreparedRequests.')

# Set up variables needed for resolve_redirects and dispatching of hooks
# if allow_redirects in exists then return kwargs['allow_redirects'] else return True
allow_redirects = kwargs.pop('allow_redirects', True)
stream = kwargs.get('stream')
hooks = request.hooks

# Resolve URL in redirect cache, if available.
if allow_redirects:
checked_urls = set()
while request.url in self.redirect_cache:
# add the request.ur to the checked_urls set()
checked_urls.add(request.url)
new_url = self.redirect_cache.get(request.url)
if new_url in checked_urls:
break
request.url = new_url

# Get the appropriate adapter to use
adapter = self.get_adapter(url=request.url)

# Start time (approximately) of the request
start = datetime.utcnow()

# Send the request
r = adapter.send(request, **kwargs)

# Total elapsed time of the request (approximately)
r.elapsed = datetime.utcnow() - start

# Response manipulation hooks
r = dispatch_hook('response', hooks, r, **kwargs)

# Persist cookies
if r.history:

# If the hooks create history then we want those cookies too
for resp in r.history:
extract_cookies_to_jar(self.cookies, resp.request, resp.raw)

extract_cookies_to_jar(self.cookies, request, r.raw)

# Redirect resolving generator.
gen = self.resolve_redirects(r, request, **kwargs)

# Resolve redirects if allowed.
history = [resp for resp in gen] if allow_redirects else []

# Shuffle things around if there's history.
if history:
# Insert the first (original) request at the start
history.insert(0, r)
# Get the last request made
r = history.pop()
r.history = history

if not stream:
r.content

return r

def merge_environment_settings(self, url, proxies, stream, verify, cert):
"""
Check the environment and merge it with some settings.

:rtype: dict
"""
# Gather clues from the surrounding environment.
if self.trust_env:
# Set environment's proxies.
env_proxies = get_environ_proxies(url) or {}
for (k, v) in env_proxies.items():
proxies.setdefault(k, v)

# Look for requests environment configuration and be compatible
# with cURL.
if verify is True or verify is None:
verify = (os.environ.get('REQUESTS_CA_BUNDLE') or
os.environ.get('CURL_CA_BUNDLE'))

# Merge all the kwargs.
proxies = merge_setting(proxies, self.proxies)
stream = merge_setting(stream, self.stream)
verify = merge_setting(verify, self.verify)
cert = merge_setting(cert, self.cert)

return {'verify': verify, 'proxies': proxies, 'stream': stream,
'cert': cert}

def get_adapter(self, url):
"""
Returns the appropriate connection adapter for the given URL.

:rtype: requests.adapters.BaseAdapter
"""
for (prefix, adapter) in self.adapters.items():

if url.lower().startswith(prefix):
return adapter

# Nothing matches :-/
raise InvalidSchema("No connection adapters were found for '%s'" % url)

def close(self):
"""Closes all adapters and as such the session"""
for v in self.adapters.values():
v.close()

def mount(self, prefix, adapter):
"""Registers a connection adapter to a prefix.

Adapters are sorted in descending order by key length.
"""
# get the adapters
self.adapters[prefix] = adapter
keys_to_move = [k for k in self.adapters if len(k) < len(prefix)]

for key in keys_to_move:
self.adapters[key] = self.adapters.pop(key)

def __getstate__(self):
state = dict((attr, getattr(self, attr, None)) for attr in self.__attrs__)
state['redirect_cache'] = dict(self.redirect_cache)
return state

def __setstate__(self, state):
# 恢复Session()对象的属性
redirect_cache = state.pop('redirect_cache', {})
for attr, value in state.items():
setattr(self, attr, value)

self.redirect_cache = RecentlyUsedContainer(REDIRECT_CACHE_SIZE)
for redirect, to in redirect_cache.items():
self.redirect_cache[redirect] = to


def session():
"""
Returns a :class:`Session` for context-management.

:rtype: Session
"""

return Session()

posted @ 2018-08-10 13:36  tny_leyon  阅读(593)  评论(0编辑  收藏  举报