import requests
import re
import pymysql
import struct
import socket
from bs4 import BeautifulSoup
db = pymysql.connect('localhost','root','oracle','xici_proxy',use_unicode=True, charset="utf8")
cursor = db.cursor()
cursor.execute("DROP TABLE IF EXISTS IPLIST")
sql = '''create table iplist(
id INT NOT NULL AUTO_INCREMENT,
ip long,
port int,
address char(40),
anony char(20),
protocol char(20),
speed char(40),
time char(40),
PRIMARY KEY ( id )
);
'''
cursor.execute(sql)
for page in range(1,3):
url='http://www.xicidaili.com/nn/{}'.format(page)
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36'}
data=requests.get(url=url,headers=headers).text
#print (url,requests.get(url=url,headers=headers).status_code)
soup=BeautifulSoup(data,'html.parser')
trs1 = soup.find('table',id='ip_list')
#print (trs1)
trs = trs1.find_all('tr')
for tr in trs[1:]:
tds = tr.find_all('td')
if tds[1].find('img') is None :
nation = '未知'
locate = '未知'
else:
nation = tds[1].find('img')['alt'].strip()
locate = tds[4].text.strip()
ip = tds[1].text.strip()
ip_num=p=socket.ntohl(struct.unpack("I",socket.inet_aton(ip))[0])
port = tds[2].text.strip()
address = tds[3].text.strip()
anony = tds[4].text.strip()
protocol= tds[5].text.strip()
speed = tds[6].find('div')['title'].strip()
time = tds[9].text.strip()
sql1 ='''insert into iplist
(ip,port,address,anony,protocol,speed,time)
values({},{},'{}','{}','{}','{}','{}');'''.format(ip_num,port,address,anony,protocol,speed,time)
print (sql1)
try:
cursor.execute(sql1)
db.commit()
except:
db.rollback()
print('回滚')
print ('ip:',ip_num,'port:',port,'address:',address,'anony:',anony,'protocol:',protocol,'speed:',speed,'time:',time)
db.close()