WebClient下载代理的IP+Port获取方法

在做下载的时候通常会用到代理!

如下获取下载代理的代码,其中通过cnproxy,heibai还有proxy360得到的代理IP,里面也有些获取的限制,具体怎么解决的见代码吧!

View Code
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using System.Net;
using System.IO;
using System.Threading.Tasks;
using System.Threading;
using System.Data.SqlClient;
using System.Data;
using HtmlAgilityPack;
using System.Web;
using System.Text.RegularExpressions;

namespace ProxyTools
{
    public class ProxyTest
    {
        public static bool Error = false;

        public int GetProxyTotal = 0;
        public int ProxyHashCount = 0;
        public List<string> proxyResultList;

        private int RepeateTimes = 5;       //下载测试次数
        private int MaxThread = 300;        //最大线程数
        private int FileLength;             //下载内容大小
        private readonly ProxyDetail[] Proxys;      //代理类数组
        private Uri TestUri = new Uri("http://www.baidu.com/");         //下载baidu.com来测试代理

        Regex ipPattern = new Regex(@"^\d+\.\d+\.\d+\.\d+$", RegexOptions.Compiled);
        Regex numPattern = new Regex(@"^\d+$", RegexOptions.Compiled);
        Regex numCnPattern = new Regex(@"(?<word>[a-z])=""(?<num>\d)""", RegexOptions.Compiled);
        Regex ipPortPattern = new Regex(@"(?is)(?<ip>\d+\.\d+\.\d+\.\d+)[^+]*?(?<word>(\+[a-z])+)", RegexOptions.Compiled);
        Regex dailiipPattern = new Regex(@"(?is)<td>(?<ip>(\d{1,3}\.){3}\d{1,3})</td>.*?<td>(?<port>\d+)</td>", RegexOptions.Compiled);
        Regex xkerPattern = new Regex(@"(?is)(?<ip>(\d{1,3}\.){3}\d{1,3})([:\s]|(</?div>.*?(port\d*"">)))(?<port>\d+)", RegexOptions.Compiled);
        Regex daili18Pattern = new Regex(@"(?is)<td>(?<ip>(\d{1,3}\.){3}\d{1,3})</td><td>(?<port>\d+)", RegexOptions.Compiled);
        Regex wl35Pattern = new Regex(@"(?is)<td[^>]*?>(?<ip>(\d{1,3}\.){3}\d{1,3})\s*</td>[^>]*>(?<port>\d+)", RegexOptions.Compiled);
        Regex proxiedPattern = new Regex(@"<td>(?<ip>(\d{1,3}\.){3}\d{1,3})</td>\s*?<td[^>]*>(?<port>\d+)</td>", RegexOptions.Compiled);
        Regex numnntimePattern = new Regex(@"(?<word>[a-z])=(?<num>\d)", RegexOptions.Compiled);
        Regex nntimePattern = new Regex(@"<td>(?<ip>(\d{1,3}\.){3}\d{1,3}).*?(?<word>(\+[a-z])*)\)", RegexOptions.Compiled);
        Regex realboosterPattern = new Regex(@"(?<ip>(\d{1,3}\.){3}\d{1,3}):(?<port>\d+)</td>", RegexOptions.Compiled);
        Regex simpleproxylistPattern = new Regex(@"(?<ip>(\d{1,3}\.){3}\d{1,3}).*?(?<word>(&#\d{2})+)</td>", RegexOptions.Compiled);

        public ProxyTest(int maxThread,int repeateTimes,string downUrl,CheckProxyWeb checkWeb)
        {
            this.MaxThread = maxThread;
            this.RepeateTimes = repeateTimes;
            this.TestUri = new Uri(downUrl);

            FileLength = GetFileLength();           //得到下载测试内容的大小

            if (FileLength == 0)
            {
                Error = true;
                return;
            }

            string[] pArray = GetProxyFromWeb(checkWeb).ToArray();   //读取需要测试的代理列表

            List<ProxyDetail> pList = new List<ProxyDetail>();

            foreach (var item in pArray)
            {
                WebProxy proxy;
                try { proxy = new WebProxy(item); }
                catch { continue; }

                ProxyDetail pDetail = new ProxyDetail();
                pDetail.Proxy = proxy;
                pDetail.ProxyString = item;
                pList.Add(pDetail);
            }

            proxyResultList = pList.Select(a => a.ProxyString).ToList();
            ProxyHashCount = pList.Count;
            Proxys = pList.ToArray();           //得到需要测试的代理列表
        }

        //得到下载测试内容的大小
        private int GetFileLength()
        {
            try
            {

                WebClient client = new WebClient();
                byte[] datas = client.DownloadData(TestUri);
                return datas.Length;
            }
            catch { return 0; }
        }

        //从cnProxy得到代理地址
        private HashSet<string> GetFromCnproxy()
        {
            HashSet<string> proxyHash = new HashSet<string>();

            WebClient client = new WebClient();
            string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
            string DefaultPostContentType = "application/x-www-form-urlencoded";
            client.Credentials = CredentialCache.DefaultCredentials;
            client.Headers.Add("User-Agent", DefaultUserAgent);
            client.Headers.Add("Content-Type", DefaultPostContentType);

            for (int pageIndex = 1; pageIndex < 11; ++pageIndex)
            {
                Uri uri = new Uri(string.Format("http://www.cnproxy.com/proxy{0}.html", pageIndex));

                client.Headers.Add("Referer", uri.AbsoluteUri);
                string content;
                try
                {
                    content = Encoding.GetEncoding("gb2312").GetString(client.DownloadData(uri));
                }
                catch { continue; }
                //该网站的代理端口是通过js得到的
                Dictionary<string, string> wordToNum = new Dictionary<string, string>();
                foreach (Match m in numCnPattern.Matches(content))
                    if (!wordToNum.ContainsKey(m.Groups["word"].Value))
                        wordToNum.Add(m.Groups["word"].Value, m.Groups["num"].Value);

                //正则匹配到代理地址等
                foreach (Match m in ipPortPattern.Matches(content))
                {
                    try
                    {
                        string ip = m.Groups["ip"].Value + ":";
                        string[] words = m.Groups["word"].Value.Split(new char[] { '+' }, StringSplitOptions.RemoveEmptyEntries);
                        foreach (var word in words)
                            ip += wordToNum[word];
                        proxyHash.Add(ip);
                    }
                    catch { }
                }
            }
            return proxyHash;
        }

        //通过heibai网站得到代理地址
        private HashSet<string> GetFromHeibai()
        {
            HashSet<string> ipHash = new HashSet<string>();
            HtmlDocument htmlDoc = new HtmlDocument();
            WebClient client = new WebClient();
            string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
            string DefaultPostContentType = "application/x-www-form-urlencoded";

            client.Credentials = CredentialCache.DefaultCredentials;
            client.Headers.Add("User-Agent", DefaultUserAgent);
            client.Headers.Add("Content-Type", DefaultPostContentType);

            //得到十页的代理内容
            for (int pageIndex = 1; pageIndex < 11; ++pageIndex)
            {
                Uri uri = new Uri(string.Format("http://www.heibai.net/proxy/index.php?act=list&port=&type=&country=&page={0}", pageIndex));

                client.Headers.Add("Referer", uri.AbsoluteUri);
                byte[] bytes ;string content ;
                try
                {
                    bytes = client.DownloadData(uri);
                    content = WebUtility.HtmlDecode(Encoding.GetEncoding("utf-8").GetString(bytes));
                }
                catch { continue; }

                htmlDoc.LoadHtml(content);

                //get paraNodes part
                var paraNodes = htmlDoc.DocumentNode.SelectNodes(@"//tr[@class='cells']");

                if (paraNodes == null)
                    return ipHash;

                HtmlDocument htmlD = new HtmlDocument();

                foreach (var node in paraNodes)
                {
                    htmlD.LoadHtml(node.OuterHtml);
                    var ipNode = htmlD.DocumentNode.SelectSingleNode(@"//td[2]");
                    var portNode = htmlD.DocumentNode.SelectSingleNode(@"//td[3]");

                    if ((ipNode != null && ipPattern.IsMatch(ipNode.InnerText.Trim()) &&
                    portNode != null) && numPattern.IsMatch(portNode.InnerText.Trim()))
                        ipHash.Add(ipNode.InnerText.Trim() + ":" + portNode.InnerText.Trim());
                }
            }
            return ipHash;
        }

        //通过proxy360得到代理地址
        private HashSet<string> GetFromProxy360()
        {
            HashSet<string> ipHash = new HashSet<string>();
            HtmlDocument htmlDoc = new HtmlDocument();
            WebClient client = new WebClient();
            string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
            string DefaultPostContentType = "application/x-www-form-urlencoded";

            client.Credentials = CredentialCache.DefaultCredentials;
            client.Headers.Add("User-Agent", DefaultUserAgent);
            client.Headers.Add("Content-Type", DefaultPostContentType);

            Uri uri = new Uri("http://www.proxy360.cn/default.aspx");

            client.Headers.Add("Referer", uri.AbsoluteUri);

            byte[] bytes;
            try { bytes = client.DownloadData(uri); }
            catch { return ipHash; }
            string content = WebUtility.HtmlDecode(Encoding.GetEncoding("utf-8").GetString(bytes));

            htmlDoc.LoadHtml(content);

            //get paraNodes part
            var paraNodes = htmlDoc.DocumentNode.SelectNodes(@"//div[@class='proxylistitem']");

            if (paraNodes == null)
                return ipHash;

            HtmlDocument htmlD = new HtmlDocument();

            foreach (var node in paraNodes)
            {
                htmlD.LoadHtml(node.OuterHtml);
                var ipNode = htmlD.DocumentNode.SelectSingleNode(@"//span[@class='tbBottomLine'][1]");
                var portNode = htmlD.DocumentNode.SelectSingleNode(@"//span[@class='tbBottomLine'][2]");

                if ((ipNode != null && ipPattern.IsMatch(ipNode.InnerText.Trim()) &&
                    portNode != null) && numPattern.IsMatch(portNode.InnerText.Trim()))
                    ipHash.Add(ipNode.InnerText.Trim() + ":" + portNode.InnerText.Trim());
            }

            return ipHash;
        }

        //通过dailiip网站得到代理地址
        private HashSet<string> GetFromDailiip()
        {
            HashSet<string> ipHash = new HashSet<string>();
            WebClient client = new WebClient();
            string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
            string DefaultPostContentType = "application/x-www-form-urlencoded";

            client.Credentials = CredentialCache.DefaultCredentials;
            client.Headers.Add("User-Agent", DefaultUserAgent);
            client.Headers.Add("Content-Type", DefaultPostContentType);

            Uri uri = new Uri("http://www.dailiip.com/");

            client.Headers.Add("Referer", uri.AbsoluteUri);

            byte[] bytes;
            try { bytes = client.DownloadData(uri); }
            catch { return ipHash; }
            string content = WebUtility.HtmlDecode(Encoding.GetEncoding("gb2312").GetString(bytes));
            foreach (Match m in dailiipPattern.Matches(content))
            {
                ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
            }
            return ipHash;
        }

        //通过xker网站得到代理地址
        private HashSet<string> GetFromXker()
        {
            HashSet<string> ipHash = new HashSet<string>();
            WebClient client = new WebClient();
            string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
            string DefaultPostContentType = "application/x-www-form-urlencoded";

            client.Credentials = CredentialCache.DefaultCredentials;
            client.Headers.Add("User-Agent", DefaultUserAgent);
            client.Headers.Add("Content-Type", DefaultPostContentType);

            Uri uri = new Uri("http://www.xker.com/ip/");

            client.Headers.Add("Referer", uri.AbsoluteUri);

            byte[] bytes;
            try { bytes = client.DownloadData(uri); }
            catch { return ipHash; }
            string content = WebUtility.HtmlDecode(Encoding.GetEncoding("gb2312").GetString(bytes));
            foreach (Match m in xkerPattern.Matches(content))
            {
                ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
            }
            return ipHash;
        }

        //通过18daili网站得到代理地址
        private HashSet<string> GetFrom18daili()
        {
            HashSet<string> ipHash = new HashSet<string>();
            WebClient client = new WebClient();
            string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
            string DefaultPostContentType = "application/x-www-form-urlencoded";

            client.Credentials = CredentialCache.DefaultCredentials;
            client.Headers.Add("User-Agent", DefaultUserAgent);
            client.Headers.Add("Content-Type", DefaultPostContentType);

            Uri uri = new Uri("http://www.18daili.com/SearchLocationForAjax.php");

            client.Headers.Add("Referer", uri.AbsoluteUri);

            byte[] bytes;
            try { bytes = client.DownloadData(uri); }
            catch { return ipHash; }
            string content = WebUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes));
            foreach (Match m in daili18Pattern.Matches(content))
            {
                ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
            }
            return ipHash;
        }

        //通过35wl网站得到代理地址
        private HashSet<string> GetFrom35wl()
        {
            HashSet<string> ipHash = new HashSet<string>();
            WebClient client = new WebClient();
            string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
            string DefaultPostContentType = "application/x-www-form-urlencoded";

            client.Credentials = CredentialCache.DefaultCredentials;
            client.Headers.Add("User-Agent", DefaultUserAgent);
            client.Headers.Add("Content-Type", DefaultPostContentType);

            Uri uri = new Uri("http://www.35wl.com/tools/dlfwq.htm");

            client.Headers.Add("Referer", uri.AbsoluteUri);

            byte[] bytes;
            try { bytes = client.DownloadData(uri); }
            catch { return ipHash; }
            string content = WebUtility.HtmlDecode(Encoding.GetEncoding("gb2312").GetString(bytes));
            foreach (Match m in wl35Pattern.Matches(content))
            {
                ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
            }
            return ipHash;
        }

        //通过51proxied网站得到代理地址
        private HashSet<string> GetFrom51proxied()
        {
            HashSet<string> ipHash = new HashSet<string>();
            WebClient client = new WebClient();

            string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
            string DefaultPostContentType = "application/x-www-form-urlencoded";

            client.Credentials = CredentialCache.DefaultCredentials;
            client.Headers.Add("User-Agent", DefaultUserAgent);
            client.Headers.Add("Content-Type", DefaultPostContentType);

            List<Uri> uriList = new List<Uri>();
            uriList.Add(new Uri("http://www.51proxied.com/http_fast.html"));
            uriList.Add(new Uri("http://www.51proxied.com/http_anonymous.html"));
            uriList.Add(new Uri("http://www.51proxied.com/http_non_anonymous.html"));
            uriList.Add(new Uri("http://www.51proxied.com/socks5.html"));
            uriList.ForEach(uri =>
                {
                    client.Headers.Add("Referer", uri.AbsoluteUri);

                    byte[] bytes;
                    try { bytes = client.DownloadData(uri); }
                    catch { return; }
                    string content = WebUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes));
                    foreach (Match m in proxiedPattern.Matches(content))
                    {
                        ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
                    }
                });
            return ipHash;
        }

        //通过nntime网站得到代理地址
        private HashSet<string> GetFromnntime()
        {

            HashSet<string> ipHash = new HashSet<string>();
            WebClient client = new WebClient();
            string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
            string DefaultPostContentType = "application/x-www-form-urlencoded";

            client.Credentials = CredentialCache.DefaultCredentials;
            client.Headers.Add("User-Agent", DefaultUserAgent);
            client.Headers.Add("Content-Type", DefaultPostContentType);

            Uri uri = new Uri("http://nntime.com/");

            client.Headers.Add("Referer", uri.AbsoluteUri);
            byte[] bytes;
            try
            {
                bytes = client.DownloadData(uri);
            }
            catch { return ipHash; }
            string content = WebUtility.HtmlDecode(Encoding.GetEncoding("iso-8859-1").GetString(bytes));
            Dictionary<string, string> charDic = new Dictionary<string, string>();
            foreach (Match m in numnntimePattern.Matches(content))
                if (!charDic.ContainsKey(m.Groups["word"].Value))
                    charDic.Add(m.Groups["word"].Value, m.Groups["num"].Value);

            foreach (Match m in nntimePattern.Matches(content))
            {
                try
                {
                    string ip = m.Groups["ip"].Value + ":";
                    string[] words = m.Groups["word"].Value.Split(new char[] { '+' }, StringSplitOptions.RemoveEmptyEntries);
                    foreach (var word in words)
                        ip += charDic[word];
                    ipHash.Add(ip);
                }
                catch { }
            }
            return ipHash;
        }

        //通过realbooster网站得到代理地址
        private HashSet<string> GetFromrealbooster()
        {
            HashSet<string> ipHash = new HashSet<string>();
            WebClient client = new WebClient();
            string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
            string DefaultPostContentType = "application/x-www-form-urlencoded";

            client.Credentials = CredentialCache.DefaultCredentials;
            client.Headers.Add("User-Agent", DefaultUserAgent);
            client.Headers.Add("Content-Type", DefaultPostContentType);

            Uri uri = new Uri("http://realbooster.com/seo-services/free-proxy-list-tool/");

            client.Headers.Add("Referer", uri.AbsoluteUri);

            byte[] bytes;
            try { bytes = client.DownloadData(uri); }
            catch { return ipHash; }
            string content = WebUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes));

            foreach (Match m in realboosterPattern.Matches(content))
            {
                try
                {
                    ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);
                }
                catch { }
            }
            return ipHash;
        }

        //通过simpleproxylist网站得到代理地址
        private HashSet<string> GetFromsimpleproxylist()
        {

            HashSet<string> ipHash = new HashSet<string>();
            WebClient client = new WebClient();
            string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
            string DefaultPostContentType = "application/x-www-form-urlencoded";

            client.Credentials = CredentialCache.DefaultCredentials;
            client.Headers.Add("User-Agent", DefaultUserAgent);
            client.Headers.Add("Content-Type", DefaultPostContentType);
            for (int i = 1; i < 20; ++i)
            {
                Uri uri = new Uri("http://simpleproxylist.com/search.php?p=" + i.ToString() + "&country=CN");

                client.Headers.Add("Referer", uri.AbsoluteUri);

                byte[] bytes;
                try { bytes = client.DownloadData(uri); }
                catch { continue; }
                string content = HttpUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes));

                foreach (Match m in simpleproxylistPattern.Matches(content))
                {
                    try
                    {
                        string ip = m.Groups["ip"].Value + ":";
                        string[] word = m.Groups["word"].Value.Split(new string[] { "&#" }, StringSplitOptions.RemoveEmptyEntries);
                        foreach (var w in word)
                            ip += (char)(int.Parse(w));
                        ipHash.Add(ip);
                    }
                    catch { }
                }
            }
            return ipHash;
        }

        //通过proxy-ip-list网站得到代理地址
        private static HashSet<string> GetFromproxyiplist()
        {

            HashSet<string> ipHash = new HashSet<string>();
            WebClient client = new WebClient();
            string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
            string DefaultPostContentType = "application/x-www-form-urlencoded";

            client.Credentials = CredentialCache.DefaultCredentials;
            client.Headers.Add("User-Agent", DefaultUserAgent);
            client.Headers.Add("Content-Type", DefaultPostContentType);
            Uri uri = new Uri("http://proxy-ip-list.com/");

            client.Headers.Add("Referer", uri.AbsoluteUri);

            byte[] bytes;
            try { bytes = client.DownloadData(uri); }
            catch { return ipHash; }
            string content = HttpUtility.HtmlDecode(HttpUtility.HtmlDecode(Encoding.GetEncoding("UTF-8").GetString(bytes)));


            Regex proxyiplistPattern = new Regex(@"(?<ip>(\d{1,3}\.){3}\d{1,3}):(?<port>\d+)</td>", RegexOptions.Compiled);

            foreach (Match m in proxyiplistPattern.Matches(content))
            {
                try
                {
                    ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);

                }
                catch { }
            }

            return ipHash;
        }

        //通过Webs
        private static HashSet<string> GetFromWebs()
        {
            HashSet<string> ipHash = new HashSet<string>();
            WebClient client = new WebClient();
            string DefaultUserAgent = @"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1";
            string DefaultPostContentType = "application/x-www-form-urlencoded";

            client.Credentials = CredentialCache.DefaultCredentials;
            client.Headers.Add("User-Agent", DefaultUserAgent);
            client.Headers.Add("Content-Type", DefaultPostContentType);
            Uri uri = new Uri("http://anonymous-proxy-list.webs.com/");

            client.Headers.Add("Referer", uri.AbsoluteUri);

            byte[] bytes;
            try { bytes = client.DownloadData(uri); }
            catch { return ipHash; }
            string content = HttpUtility.HtmlDecode(HttpUtility.HtmlDecode(Encoding.GetEncoding("ISO-8859-1").GetString(bytes)));


            Regex proxyiplistPattern = new Regex(@"(?<ip>(\d{1,3}\.){3}\d{1,3}):(?<port>\d+)", RegexOptions.Compiled);

            foreach (Match m in proxyiplistPattern.Matches(content))
            {
                try
                {
                    ipHash.Add(m.Groups["ip"].Value + ":" + m.Groups["port"].Value);

                }
                catch { }
            }

            return ipHash;
        }

        //在网站上得到新代理地址
        private HashSet<string> GetProxyFromWeb(CheckProxyWeb checkWeb)
        {
            HashSet<string> proxyHash = new HashSet<string>();
            HashSet<string> hash;

            ///cnproxy
            if (checkWeb.CnProxy)
            {
                hash = GetFromCnproxy();
                GetProxyTotal += hash.Count;
                foreach (var proxy in hash)
                    proxyHash.Add(proxy);
            }
            ///heibai
            if (checkWeb.Heibai)
            {
                hash = GetFromHeibai();
                GetProxyTotal += hash.Count;
                foreach(var proxy in hash)
                    proxyHash.Add(proxy);
            }
            ///proxy360
            if (checkWeb.Proxy360)
            {
                hash = GetFromProxy360();
                GetProxyTotal += hash.Count;
                foreach (var proxy in hash)
                    proxyHash.Add(proxy);
            }
            ///dailiip
            if (checkWeb.Dailiip)
            {
                hash = GetFromDailiip();
                GetProxyTotal += hash.Count;
                foreach (var proxy in hash)
                    proxyHash.Add(proxy);
            }
            ///xker
            if (checkWeb.Xker)
            {
                hash =  GetFromXker();
                GetProxyTotal += hash.Count;
                foreach (var proxy in hash)
                    proxyHash.Add(proxy);
            }
            ///18daili
            if (checkWeb.Daili18)
            {
                hash = GetFrom18daili();
                GetProxyTotal += hash.Count;
                foreach (var proxy in hash)
                    proxyHash.Add(proxy);
            }
            ///35wl
            if (checkWeb.Wl35)
            {
                hash = GetFrom35wl();
                GetProxyTotal += hash.Count;
                foreach (var proxy in hash)
                    proxyHash.Add(proxy);
            }
            ///51proxied
            if (checkWeb.Proxied51)
            {
                hash = GetFrom51proxied();
                GetProxyTotal += hash.Count;
                foreach (var proxy in hash)
                    proxyHash.Add(proxy);
            }
            ///nntime
            if (checkWeb.Nntime)
            {
                hash = GetFromnntime();
                GetProxyTotal += hash.Count;
                foreach (var proxy in hash)
                    proxyHash.Add(proxy);
            }
            ///realbooster
            if (checkWeb.Realbooster)
            {
                hash =  GetFromrealbooster();
                GetProxyTotal += hash.Count;
                foreach (var proxy in hash)
                    proxyHash.Add(proxy);
            }
            ///simpleproxylist
            if (checkWeb.Simpleproxylist)
            {
                hash = GetFromsimpleproxylist();
                GetProxyTotal += hash.Count;
                foreach (var proxy in hash)
                    proxyHash.Add(proxy);
            }
            ///proxyiplist
            if (checkWeb.Proxyiplist)
            {
                hash =  GetFromproxyiplist();
                GetProxyTotal += hash.Count;
                foreach (var proxy in hash)
                    proxyHash.Add(proxy);
            }
            ///webs
            if (checkWeb.Webs)
            {
                hash = GetFromWebs();
                GetProxyTotal += hash.Count;
                foreach (var proxy in hash)
                    proxyHash.Add(proxy);
            }

            return proxyHash;
        }

        //测试代理是否可用
        public void Start(ProxyHelp help)
        {

            ParallelOptions taskParallet = new ParallelOptions();
            taskParallet.MaxDegreeOfParallelism = MaxThread;
            int count = 0;

            for (int i = 0; i < RepeateTimes; i++)
            {
                Parallel.ForEach(Proxys, taskParallet, current =>
                {
                    if (current.Error != null)
                        return;

                    Stopwatch sw = new Stopwatch();
                    WebClient wc = new WebClient();
                    InitWebClient(wc, TestUri, TestUri);
                    wc.Proxy = current.Proxy;
                    sw.Restart();
                    try
                    {
                        byte[] data = wc.DownloadData(TestUri);
                        sw.Stop();
                        current.Milliseconds = sw.ElapsedMilliseconds;
                        current.ContentLength = data.Length;

                        if (current.ContentLength != FileLength)
                            current.Error = new Exception("下载错误");

                        string log = string.Format("{0} - {1} - {2} - {3}", current.ProxyString, current.ContentLength, current.Milliseconds, count);
                        help.AddText(log);
                        //Console.WriteLine(log);
                    }
                    catch (Exception e)
                    {
                        current.Error = e;
                        string log = string.Format("{0} - {1} - {2}", current.ProxyString, e.Message, count);
                        help.AddText(log);
                        //Console.WriteLine(log);
                    }

                    Interlocked.Increment(ref count);
                });

                Array.Sort(Proxys, new Comparison<ProxyDetail>((a, b) => b.Milliseconds.CompareTo(a.Milliseconds)));
            }

            SaveProxy();
            help.AddText("测试代理地址结束!");
            Console.WriteLine("结束");
        }

        //保存代理地址
        private void SaveProxy()
        {
            //在这里可以排序取前多少可以使用的
            //Array.Sort(Proxys, new Comparison<ProxyDetail>((a, b) => a.Milliseconds.CompareTo(b.Milliseconds)));

            //这里取了下载时间小于10000毫秒的代理
            //for (int i = 0; i < Proxys.Length; ++i)
            //    if (Proxys[i].Error == null && Proxys[i].Milliseconds < 10000)
            //        proxyList.Add(new ProxyTable(Proxys[i].ProxyString));

            proxyResultList = Proxys.Where(a => a.Error == null).Select(b=>b.ProxyString).ToList();
        }

        private const string DefaultUserAgent = @"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; CIBA; .NET4.0C; .NET4.0E)";
        private const string DefaultPostContentType = "application/x-www-form-urlencoded";

        //初始化WebClient
        private static void InitWebClient(WebClient webClient, Uri uri, Uri pUri)
        {
            webClient.Credentials = CredentialCache.DefaultCredentials;
            webClient.Headers.Add("User-Agent", DefaultUserAgent);
            webClient.Headers.Add("Content-Type", DefaultPostContentType);
            webClient.Headers.Add("Referer", pUri.AbsoluteUri);
            ServicePoint servicePoint = ServicePointManager.FindServicePoint(uri);
            if (servicePoint.Expect100Continue == true)
                servicePoint.Expect100Continue = false;
        }

    }

    /// <summary>
    /// 测试Proxy时需要用到的一些参数
    /// </summary>
    public class ProxyDetail
    {
        public WebProxy Proxy;
        public string ProxyString;
        public int ContentLength;
        public long Milliseconds;
        public Exception Error;
    }

    public class CheckProxyWeb
    {
        public bool CnProxy;
        public bool Heibai;
        public bool Proxy360;
        public bool Dailiip;
        public bool Xker;
        public bool Daili18;
        public bool Wl35;
        public bool Proxied51;
        public bool Nntime;
        public bool Realbooster;
        public bool Simpleproxylist;
        public bool Proxyiplist;
        public bool Webs;
    }
}

 

其中调用的代码就很简单了

由于我是隔段时间获取一次,所以用到了Timer的内容,也贴在这里了

View Code
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Runtime.InteropServices;
using System.Threading;
using System.IO;

namespace ProxyTest
{
class Program
{

static System.Timers.Timer proxyTimer = new System.Timers.Timer();

static void Main(string[] args)
{

proxyTimer = new System.Timers.Timer();
proxyTimer.Interval = 1000 * 60;
proxyTimer.Elapsed += new System.Timers.ElapsedEventHandler(DoProxyTest);
proxyTimer.Enabled = true;
proxyTimer.Start();

Thread.Sleep(-1);
}

static bool proxyIsRunning = false;

static void DoProxyTest(object sender, System.Timers.ElapsedEventArgs e)
{
Write("Start to running ProxyTest function!");
if (proxyIsRunning)
{
Write("End ProxyTest function! by : ProxyTest is running now!" + Environment.NewLine);
return;
}

proxyTimer.Interval = 1000 * 60 * 60 * 24; //每天执行一次

int rowCount = Pmars.DataBaseHelper.GetHelper().GetRowCount("ProxyTable");
Write("ProxyTable Count:" + rowCount);
Pmars.ProxyTest proxy = new Pmars.ProxyTest();
proxy.Start();
rowCount = Pmars.DataBaseHelper.GetHelper().GetRowCount("ProxyTable");
Write("ProxyTable Count:" + rowCount);

Write("End ProxyTest function!" + Environment.NewLine);
}

static void Write(string contents)
{
File.AppendAllText(@"log/proxyLog.txt", DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + " : " + contents + Environment.NewLine);
}
}
}

 

第一部分的代码后来改了很多,也用不到timer部分的代码了,但是下面的timer部分的代码就不删掉了,稍微改下其实就可以的!

贴在这里,学习一下!




posted @ 2012-01-20 14:52  pmars  阅读(2996)  评论(3)    收藏  举报