# coding=gbk
import requests
from lxml import etree
url = 'https://www.17k.com/list/3357123.html'
response = requests.get(url,
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
})
res2 = response.text
html1 = etree.HTML(response.text)
goods_li = html1.xpath("/html/body/div[5]/dl/dd/a/@href")
s = type
for i in goods_li:
text_url = 'https://www.17k.com'+i
response = requests.get(text_url,
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
})
response.encoding = 'utf-8'
res2 = response.text
html1 = etree.HTML(response.text)
goods_li1 = html1.xpath("//*[@id='readArea']/div[1]/div[2]/p//text()")
for i in goods_li1:
with open("17kk.txt", "a+",encoding='utf-8') as f:
f.write(i)
f.write('\r\n')
s = f
s.close()