追忆似水年华--记第一次写python爬虫

 1 import requests
 2 import turtle
 3 import time
 4 from lxml import etree
 5 
 6 
 7 wb = open('1.txt', 'w')
 8 wb.close()
 9 url = 'https://www.xxbiqudu.com/132_132381/171133333.html'
10 har = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53'}
11 while True:
12     wb = open('1.txt', 'a')
13     xsnr = open('xsnr.txt','a')
14     try:
15         response = requests.get(url)
16         wb.write(('chenggong' + '\n'))
17         wb.close()
18     except :
19         try:
20             response = requests.get(url,
21             headers=har)
22         except :
23             try:
24                 time.sleep(10)
25                 response = requests.get(url,
26                 headers=har)
27             except :
28                 try:
29                     time.sleep(60)
30                     response = requests.get(url,
31                     headers=har)
32                 except :
33                     wb.write((url + '\n'))
34                     wb.close()
35     print('......................................................................................................')
36     html = etree.HTML(response.text)
37     xp = html.xpath('//a/@href')
38     print(xp)
39     url = xp[17]
40     print(url)
41     ho = html.xpath('//h1/text()')
42     xsnr.write((ho[0] + '\n'))
43     xiaosuo = html.xpath('//*[@id="content"]/text()')
44     del xiaosuo[-1]
45     print(xiaosuo)
46     for aaa in range(len(xiaosuo)):
47         xiaosuox = xiaosuo[aaa]
48         xsnr.write((xiaosuox+ '\n'))
49     xsnr.close()
50     if (url == 'https://www.xxbiqudu.com/132_132381/177764835.html'):
51         break

 

posted @ 2022-05-28 21:28  404CatNotFound  阅读(1)  评论(0)    收藏  举报