待完成案例-爬取台湾学术经典网站: https://p.udpweb.com/diw/i/word

import requests
import urllib3
urllib3.disable_warnings()
from lxml import etree

url = "https://p.udpweb.com/diw/i/word"


headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36"
}
session = requests.session()
requests.adapters.DEFAULT_RETRIES = 5
try:
	resp = session.get(url, headers=headers,verify=False)
except:
	resp = session.get(url, headers=headers,verify=False)
resp.encoding = "UTF-8"
page = etree.HTML(resp.text)
trs = page.xpath('//div[@class="v-virtual-scroll__item"]')
print(resp.text)

 

posted @ 2022-12-05 23:31  屠魔的少年  阅读(11)  评论(0)    收藏  举报