1 # coding: utf-8
2
3 import urllib2
4 import re
5 import time
6
7 def getDL(page):
8 url = 'http://www.xicidaili.com/nt/{}'.format(page)
9 header = {
10 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
11 }
12
13 req=urllib2.Request(url, headers=header)
14 res=urllib2.urlopen(req)
15 html=res.read()
16
17 srclist=re.findall(r'<tr class=(.|\n)*?<td>(\d+\.\d+\.\d+\.\d+)</td>(.|\n)*?<td>(\d+)</td>(.|\n)*?<td>(HTTP|HTTPS)</td>', html)
18 xlist = []
19 for item in srclist:
20 xlist.append((item[5],item[1],item[3]))
21 return xlist
22
23 def testDL(ipstr):
24 proxy= urllib2.ProxyHandler({'http':"{}:{}".format(ipstr[1], ipstr[2])})
25 opener=urllib2.build_opener(proxy)
26 urllib2.install_opener(opener)
27
28 try:
29 testUrl = 'http://httpbin.org/ip'
30 testUrl = 'http://2017.ip138.com/ic.asp'
31 req=urllib2.Request(testUrl)
32 res=urllib2.urlopen(req).read()
33 print "********************* √ {} -- {}".format(ipstr, res)
34
35 with open("ok.txt","a") as f:
36 f.write("{} {} {}\n".format(ipstr[0], ipstr[1], ipstr[2]))
37 f.close()
38 except Exception as e:
39 print "******** ×, {} -- {}".format(ipstr, e)
40 time.sleep(1)
41
42 def startTask():
43 for page in xrange(5):
44 list=getDL(page+1)
45 for item in list:
46 testDL(item)
47
48 if __name__ == '__main__':
49 startTask()