damo1

import requests
from lxml import etree

BASE_DOMAIN = 'http://dytt8.net'
headers = {
'Host':'dytt8.net',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36',
'Cookie':'UM_distinctid=170ffb6ddae42f-03e426d466cb69-396f7f07-13c680-170ffb6ddaf68a; CNZZDATA1260535040=1199264958-1584833667-https%253A%252F%252Fdytt8.net%252F%7C1584833667; XLA_CI=3afc5746fa9b4f157414c8c9e8419720'
}

url = 'https://dytt8.net/html/gndy/dyzz/list_23_3.html'

response = requests.get(url,headers=headers)
text = (response.content.decode('gbk',errors='ignore'))
html = etree.HTML(text)
detail_urls = html.xpath("//table[@class='tbspan']//a/@href")
for urla in detail_urls:
print(BASE_DOMAIN+urla)



获取网址
posted @ 2020-04-07 21:56  1王新  阅读(1)  评论(0)    收藏  举报