第五次作业
import requests from bs4 import BeautifulSoup import pandas as pd url="http://top.baidu.com/buzz.php?p=hotstocks"#百度今日热搜 headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/69.0.3497.100 Safari/537.36'} #伪装爬虫 r=requests.get(url)#请求网站 r.encoding=r.apparent_encoding x=r.text#获取源代码 soup=BeautifulSoup(x,'lxml')#构造Soup的对象 a=[] b=[] for i in soup.find_all(class_="keyword"): a.append(i.get_text().strip()) for l in soup.find_all(class_="icon-rise"): b.append(l.get_text().strip()) data=[a,b] print(data) h=pd.DataFrame(data,index=["标题","热度"]) print(h.T)
导入搜索网站:http://top.baidu.com/buzz.php?p=hotstocks
后打开网页的源代码找到所需要的