自己写了个http请求模板,方便爬虫调取。
# coding:utf-8
import requests
from fake_useragent import UserAgent
from random import choice
# 通用性http请求模块
class model_of_request:
def __init__(self):
self.urls = 'http://1212.ip138.com/ic.asp'
self.is_active_proxy_ip = []
# 获取user-agent
def get_user_agent(self):
ua = UserAgent()
headers = {'User-Agent': ua.random}
return headers
# 将post请求转换字典
def postdata_to_dict(self, response):
data = {}
if '&' in response:
resp = [line.split('=') for line in response.split('&')]
for i in resp:
data.update({i[0]: i[1]})
else:
resp = response.split('=')
data.update({resp[0]: resp[1]})
return data
# 获取代理ip
def get_active_proxy(self, file_name):
# 从文件中获取代理ip
with open(file_name, 'r') as f:
proxy_urls = ['http://' + line.strip() for line in f.readlines()]
# 验证代理ip是否存活
for url in proxy_urls:
try:
html = requests.get(self.urls, proxies=url)
if html:
self.is_active_proxy_ip.append(url)
except:
pass
def get_random_proxy(self):
# 随机获取一个代理ip
proxies = {
'http': choice(self.is_active_proxy_ip)
}
return proxies
def monkey_patch(self):
'''
requests库中文乱码补丁
'''
prop = requests.models.Response.content
def content(self):
_content = prop.fget(self)
if self.encoding == 'ISO-8859-1':
encodings = requests.utils.get_encodings_from_content(_content)
if encodings:
self.encoding = encodings[0]
else:
self.encoding = self.apparent_encoding
_content = _content.decode(self.encoding, 'replace').encode('utf8', 'replace')
self._content = _content
return _content
requests.models.Response.content = property(content)
# post请求模板
def self_post(self, url_name, post_data):
self.monkey_patch()
for i in range(1, 3):
try:
response_data = requests.post(url_name, headers=self.get_user_agent(), data=post_data,
proxies=self.get_random_proxy())
if response_data.status_code >= 500:
pass
else:
return response_data
except:
pass
# get请求模板
def self_get(self, url_name):
self.monkey_patch()
# print(self.get_user_agent())
for i in range(1, 3):
try:
respose_data = requests.get(url_name, headers=headers, proxies=self.get_random_proxy())
if html.status_code >= 500:
return False
else:
return respose_data
except:
pass
if __name__ == '__main__':
respon = model_of_request()
print(respon.get_random_proxy())