1 import requests
2 import turtle
3 import time
4 from lxml import etree
5
6
7 wb = open('1.txt', 'w')
8 wb.close()
9 url = 'https://www.xxbiqudu.com/132_132381/171133333.html'
10 har = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53'}
11 while True:
12 wb = open('1.txt', 'a')
13 xsnr = open('xsnr.txt','a')
14 try:
15 response = requests.get(url)
16 wb.write(('chenggong' + '\n'))
17 wb.close()
18 except :
19 try:
20 response = requests.get(url,
21 headers=har)
22 except :
23 try:
24 time.sleep(10)
25 response = requests.get(url,
26 headers=har)
27 except :
28 try:
29 time.sleep(60)
30 response = requests.get(url,
31 headers=har)
32 except :
33 wb.write((url + '\n'))
34 wb.close()
35 print('......................................................................................................')
36 html = etree.HTML(response.text)
37 xp = html.xpath('//a/@href')
38 print(xp)
39 url = xp[17]
40 print(url)
41 ho = html.xpath('//h1/text()')
42 xsnr.write((ho[0] + '\n'))
43 xiaosuo = html.xpath('//*[@id="content"]/text()')
44 del xiaosuo[-1]
45 print(xiaosuo)
46 for aaa in range(len(xiaosuo)):
47 xiaosuox = xiaosuo[aaa]
48 xsnr.write((xiaosuox+ '\n'))
49 xsnr.close()
50 if (url == 'https://www.xxbiqudu.com/132_132381/177764835.html'):
51 break