23.获取代理IP并使用代理
1.获取代理
# -*- coding:utf-8 -*-
import HTMLParser
import requests
import json
import re
class MyParser(HTMLParser.HTMLParser):
    def __init__(self):
        HTMLParser.HTMLParser.__init__(self)
        self.handledtags = ['ul', 'li']
        self.processing = None
        self.data = []
 
    def handle_starttag(self,tag,attrs):
        if tag in self.handledtags:
            self.processing = tag
 
    def handle_data(self,data):
        if self.processing:
            self.data.append(data)
 
    def handle_endtag(self,tag):
        if tag == self.processing:
            self.processing = None
            
def get_iplist():
    ip_list = []
    hds = {
        "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
        "Host": "www.data5u.com",
        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
        "Referer": "http://www.data5u.com/free/gngn/index.shtml"
    }
    rs = requests.get('http://www.data5u.com/free/gngn/index.shtml', headers=hds)
    mp = MyParser()
    mp.feed(rs.content)
    item = []
    begin = False
    for each in mp.data:
        content = each.strip('\n')
        if not begin and len(content.split('.')) == 4:
            aa=re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$",content)
            if not aa:
                continue
            begin = True
            item.append(content)
            continue
        if begin:
            item.append(content)
        if len(item) == 9:
            #print json.dumps(item, ensure_ascii=False)
            if item[3] == 'http':
                ip_withport = '%s:%s' % (item[0], item[1])
                ip_list.append(ip_withport)
            item = []
            begin = False
            
    print ip_list
 
if __name__ == '__main__':
    get_iplist()
2.使用代理
def http_get(url):
    """http get 请求获取数据"""
    time.sleep(10)
    s = requests.session()
    if 'cloud-in' not in url:
        ip_withport = random.choice(proxy_list)
        print ip_withport
        s.proxies = {'http': ip_withport}    
    #opener = urllib2.build_opener()
    #f = opener.open(url.strip('\n'))
 
    rs = s.get(url)
    data = rs.text
    return data.decode('utf-8')
 
 
def http_post(post_url, param, hds):
    time.sleep(10)
    """http post 请求获取数据"""
    s = requests.session()
    if 'cloud-in' not in post_url:
        ip_withport = random.choice(proxy_list)
        print ip_withport
        s.proxies = {'http': ip_withport}   
    rs = s.post(post_url, data=param, headers=hds) 
    return rs.text
    http://www.cnblogs.com/makexu/

 
                
            
         
         浙公网安备 33010602011771号
浙公网安备 33010602011771号