23.获取代理IP并使用代理
1.获取代理
# -*- coding:utf-8 -*-
import HTMLParser
import requests
import json
import re
class MyParser(HTMLParser.HTMLParser):
def __init__(self):
HTMLParser.HTMLParser.__init__(self)
self.handledtags = ['ul', 'li']
self.processing = None
self.data = []
def handle_starttag(self,tag,attrs):
if tag in self.handledtags:
self.processing = tag
def handle_data(self,data):
if self.processing:
self.data.append(data)
def handle_endtag(self,tag):
if tag == self.processing:
self.processing = None
def get_iplist():
ip_list = []
hds = {
"Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
"Host": "www.data5u.com",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
"Referer": "http://www.data5u.com/free/gngn/index.shtml"
}
rs = requests.get('http://www.data5u.com/free/gngn/index.shtml', headers=hds)
mp = MyParser()
mp.feed(rs.content)
item = []
begin = False
for each in mp.data:
content = each.strip('\n')
if not begin and len(content.split('.')) == 4:
aa=re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$",content)
if not aa:
continue
begin = True
item.append(content)
continue
if begin:
item.append(content)
if len(item) == 9:
#print json.dumps(item, ensure_ascii=False)
if item[3] == 'http':
ip_withport = '%s:%s' % (item[0], item[1])
ip_list.append(ip_withport)
item = []
begin = False
print ip_list
if __name__ == '__main__':
get_iplist()
2.使用代理
def http_get(url):
"""http get 请求获取数据"""
time.sleep(10)
s = requests.session()
if 'cloud-in' not in url:
ip_withport = random.choice(proxy_list)
print ip_withport
s.proxies = {'http': ip_withport}
#opener = urllib2.build_opener()
#f = opener.open(url.strip('\n'))
rs = s.get(url)
data = rs.text
return data.decode('utf-8')
def http_post(post_url, param, hds):
time.sleep(10)
"""http post 请求获取数据"""
s = requests.session()
if 'cloud-in' not in post_url:
ip_withport = random.choice(proxy_list)
print ip_withport
s.proxies = {'http': ip_withport}
rs = s.post(post_url, data=param, headers=hds)
return rs.text
http://www.cnblogs.com/makexu/

浙公网安备 33010602011771号