python爬虫练习2-百度热榜

import requests
from lxml import etree

url = 'https://top.baidu.com/board?tab=realtime'
headers ={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
}
bd_rot = requests.get(url,headers=headers)
data = etree.HTML(bd_rot.text)
rot_data = data.xpath('//div[@class="category-wrap_iQLoo horizontal_1eKyQ"]')
for j,i in enumerate(rot_data):
    title = i.xpath('./div[@class="content_1YWBm"]/a/div[1]/text()')
    rot_text = i.xpath('./div[@class="trend_2RttY hide-icon"]/div[3]/text()')
    rot_num = i.xpath('./div[@class="trend_2RttY hide-icon"]/div[2]/text()')
    rot1 = "".join(title)
    rot2 = "".join(rot_text)
    rot3 = "".join(rot_num)
    rot = "标题:"+rot1.strip()+' '+rot2.strip()+''+rot3
    if j == 0:
        j="置顶"
    print(j,rot)

 

posted @ 2023-09-09 23:26  未央央  阅读(96)  评论(0)    收藏  举报