获取微博的热点前十名的标题信息以及热度数据

 1 import requests
 2 from lxml import etree
 3 import pandas as pd
 4 import os
 5 headers = {'User-Agent':'abc'}
 6 url =  "https://tophub.today/"
 7 
 8 html = requests.get(url,headers = headers)
 9 #print(html.text)#查看页面结构和内容
10 html = html.content.decode('utf-8')
11 html = etree.HTML(html)
12 div = html.xpath("//div[@id='node-1']/div")
13 for a in div:
14     titles = a.xpath(".//span[@class='t']/text()")[0:10]
15     numbers = a.xpath(".//span[@class='e']/text()")[0:10]
16 os.chdir(r'C:\Users\战神者一号\Desktop')
17 c={'今日热议':titles,'热度':numbers}
18 file = pd.DataFrame(c)
19 print(file)
20 file.to_csv('微博热点前十名的标题信息以及热度数据.csv')

      

posted @ 2020-03-18 11:21  杨洪强  阅读(698)  评论(0)    收藏  举报