爬取百度热搜榜
1.打开网站http://top.baidu.com/buzz?b=1&fr=topindex
2.右键找到源代码
3.用工具爬取数据
import requests from bs4 import BeautifulSoup import pandas as pd titles=[] hots=[] url='http://top.baidu.com/buzz?b=1&fr=topindex'#百度今日热搜 headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/69.0.3497.100 Safari/537.36'}#伪装爬虫 r=requests.get(url)#请求网站 r.raise_for_status() r.encoding = r.apparent_encoding html = r.text table = BeautifulSoup(html,"html.parser").find("table") soup=BeautifulSoup(html,'lxml')#使用工具 for m in soup.find_all(class_="list-title"): titles.append(m.get_text().strip()) for n in soup.find_all(class_="icon-rise"): hots.append(n.get_text().strip()) final=[titles,hots] print(final) s=pd.DataFrame(final,index=["标题","搜索指数"]) print(s.T)

浙公网安备 33010602011771号