25

  中国大学排名网站  http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html

2018年中国大学排名

#1
import requests from bs4 import BeautifulSoup import bs4 def getHTMLText(url): try: r = requests.get(url,timeout = 30) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: return "404" def fillUnivList(ulist,html): soup = BeautifulSoup(html,"html.parser") for tr in soup.find('tbody').children: if isinstance(tr,bs4.element.Tag): tds = tr('td') ulist.append([tds[0].string,tds[1].string,tds[2]]) pass def printUnivList(ulist,num): print("{:^6}".format("学校名称")) for i in range(num): u = ulist[i] print("{:^6}".format(u[1])) def main(): uinfo = [] url = 'http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html' html = getHTMLText(url) fillUnivList(uinfo,html) printUnivList(uinfo,20) main()

  

 

 

#2
import requests from bs4 import BeautifulSoup import bs4 allUniv = [] def getHTMLText(url): try: r = requests.get(url,timeout = 30) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: return "" def fillUnivList(soup): data = soup.find_all('tr') for tr in data: ltd = tr.find_all('td') if len(ltd)==0: continue singleUniv = [] for td in ltd: singleUniv.append(td.string) allUniv.append(singleUniv) def printUnivList(num): print("{:^10}".format("学校名称")) for i in range(num): u = allUniv[i] print("{:^10}".format(u[1])) def main(): url = 'http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html' html = getHTMLText(url) soup = BeautifulSoup(html,"html.parser") fillUnivList(soup) printUnivList(20) main()

  

 

posted on 2020-05-13 16:23  #f#f  阅读(104)  评论(0)    收藏  举报

导航