大学排名定向爬虫
import requests import bs4 from bs4 import BeautifulSoup def getHTMLText(url): try: r = requests.get(url,timeout = 30) r.raise_for_status() #print(r.text) r.encoding = r.apparent_encoding return r.text except: print('发生错误') return '' def fillunivList(ulist,html): soup = BeautifulSoup(html,'html.parser') for tr in soup.find('tbody').children: if isinstance(tr,bs4.element.Tag): tds = tr('td') ulist.append([tds[0].next_element,tds[1].string,tds[2].string]) def printUnivlList(ulist,num): #转化成中文空格 print('{0:^10}\t{1:{3}^10}\t{2:^10}'.format('排名','学校','城市',chr(12288))) for i in range(num): u = ulist[i] print('{0:^10}\t{1:{3}^10}\t{2:^10}'.format(u[0],u[1],u[2],chr(12288))) #print(u) print('Suc' + str(num)) def main(): uinfo = [] url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2017.html' getHTMLText(url) html = getHTMLText(url) fillunivList(uinfo,html) printUnivlList(uinfo,80)#only 20 schools main()
posted on 2019-02-20 15:09 ZhangのBlog 阅读(205) 评论(0) 收藏 举报
浙公网安备 33010602011771号