from urllib.request import urlopen
from bs4 import BeautifulSoup
url = 'https://www.iso.org/standards-catalogue/browse-by-tc.html'
addrss_url = 'https://www.iso.org'
text = urlopen(url).read()
soup = BeautifulSoup(text,'html.parser')
table = soup.find('table',id='datatable-committees')
# print('table:',table)
tbody = table.find('tbody')
# print('tbody:',tbody)
TC = set()
for tr in tbody:
if len(tr) > 1:
# print('tr:',tr)
# print('-----------------------')
td = tr.findAll('td')
title = td[1].string.strip()
# print('title:',title)
title1 = set()
title1.add('{}(title:{})'.format(tr.a.string,title))
TC.add('{}(address:{})'.format(title1,addrss_url+tr.a['href']))
print('\n'.join(sorted(TC,key=str.lower)))
# print('TC:',TC)
print('采集完毕!')