获取微博的热点前十名的标题信息以及热度数据
1 import requests 2 from lxml import etree 3 import pandas as pd 4 import os 5 headers = {'User-Agent':'abc'} 6 url = "https://tophub.today/" 7 8 html = requests.get(url,headers = headers) 9 #print(html.text)#查看页面结构和内容 10 html = html.content.decode('utf-8') 11 html = etree.HTML(html) 12 div = html.xpath("//div[@id='node-1']/div") 13 for a in div: 14 titles = a.xpath(".//span[@class='t']/text()")[0:10] 15 numbers = a.xpath(".//span[@class='e']/text()")[0:10] 16 os.chdir(r'C:\Users\战神者一号\Desktop') 17 c={'今日热议':titles,'热度':numbers} 18 file = pd.DataFrame(c) 19 print(file) 20 file.to_csv('微博热点前十名的标题信息以及热度数据.csv')
浙公网安备 33010602011771号