某小说网站之内容获取

import requests

headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Cache-Control': 'no-cache',
    'Pragma': 'no-cache',
    'Proxy-Connection': 'keep-alive',
    'Referer': 'http://www.h528.com/',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
}

response = requests.get('http://www.h528.com/post/25126.html', headers=headers, verify=False)
html=etree.HTML(response.text)
href=html.xpath('//div[@class="entry"]/p/text()')
print(''.join(href))
posted @ 2024-04-21 15:20  萧海~  阅读(2)  评论(0编辑  收藏  举报