爬虫 17k小说

# coding=gbk
import requests
from lxml import etree


url = 'https://www.17k.com/list/3357123.html'

response = requests.get(url,
                        headers={
                            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
                        })
res2 = response.text
html1 = etree.HTML(response.text)
goods_li = html1.xpath("/html/body/div[5]/dl/dd/a/@href")
s = type
for i in goods_li:
    text_url = 'https://www.17k.com'+i
    response = requests.get(text_url,
                            headers={
                                'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
                            })
    response.encoding = 'utf-8'
    res2 = response.text
    html1 = etree.HTML(response.text)
    goods_li1 = html1.xpath("//*[@id='readArea']/div[1]/div[2]/p//text()")
    for i in goods_li1:
        with open("17kk.txt", "a+",encoding='utf-8') as f:
            f.write(i)
            f.write('\r\n')
            s = f
s.close()

 

posted @ 2021-10-25 16:07  杨鑫Zz  阅读(54)  评论(0编辑  收藏  举报