1 import requests
2 from bs4 import BeautifulSoup
3 alluniv = []
4 def getHTMLText(url):
5 try:
6 r = requests.get(url,timeout = 30)
7 r.raise_for_status()
8 r.encoding = 'utf-8'
9 return r.text
10 except:
11 return "error"
12 def xunhuang(url):
13 for i in range(20):
14 getHTMLText(url)
15 def fillunivlist(soup):
16 data=soup.find_all('tr')
17 for tr in data:
18 ltd =tr.find_all('td')
19 if len(ltd)==0:
20 continue
21 singleuniv=[]
22 for td in ltd:
23 singleuniv.append(td.string)
24 alluniv.append(singleuniv)
25 def printf():
26 print("\n")
27 print("\n")
28 print("\n")
29 def main():
30 url = "http://www.google.com"
31 html=getHTMLText(url)
32 xunhuang(url)
33 print(html)
34 soup=BeautifulSoup(html,"html.parser")
35 fillunivlist(soup)
36 print(html)
37 printf()
38 print(soup.title)
39 printf()
40 print(soup.head)
41 printf()
42 print(soup.body)
43 main()