from urllib.request import urlopen
from bs4 import BeautifulSoup
import pymysql
conn=pymysql.connect(host='127.0.0.1',user='root',passwd='password',port= 3306
,db='ISO',charset='utf8')
cursor = conn.cursor()
if conn:
print("database[ISO] 连接成功!")
url = 'https://www.iso.org/standards-catalogue/browse-by-tc.html'
address_url = 'https://www.iso.org'
text = urlopen(url).read()
soup = BeautifulSoup(text,'html.parser')
table = soup.find('table',id='datatable-committees')
# print('table:',table)
tbody = table.find('tbody')
# print('tbody:',tbody)
TC = set()
i = 1
for tr in tbody:
if len(tr) > 1:
td = tr.findAll('td')
id = i
tc = tr.a.string.strip()
title = td[1].string.strip()
address = address_url+tr.a['href']
sql = 'insert into tc_basic values (%s,%s,%s,%s)'
cursor.execute(sql,(id,tc,title,address))
conn.commit()
i += 1
print('采集完毕!共计',i-1,'条数据。')