Fork me on Github

爬取eight sister网

 1 import requests
 2 from lxml import etree
 3 
 4 kw = input('任务关键字:')
 5 url = "https://task.zbj.com/hall/list/?k={}&s=1".format(kw)
 6 print(url)
 7 header = {
 8     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
 9                   "Chrome/60.0.3100.0 Safari/537.36 "
10 }
11 
12 resp = requests.get(url, headers=header)
13 # print(resp.text)
14 
15 # 使用xpath解析html
16 html = etree.HTML(resp.text)
17 
18 divs = html.xpath('//*[@id="utopia_widget_6"]/div/div[1]/div')
19 # 写入文件
20 with open('txt/八戒.txt', mode='w', encoding='utf-8') as f:
21     for div in divs:
22         price = div.xpath('./div[4]/span/text()')[0]
23         flag = div.xpath('./div[4]/div[1]/a[2]/text()')
24         if not flag:
25             continue
26         fl = flag[0]
27         if price == '可议价':
28             continue
29         if fl != '参与投标':
30             continue
31         title = div.xpath('./div[1]/h4[1]/@title')[0]
32         href = 'https:' + div.xpath('./div[1]/h4[1]//a[1]/@href')[0]
33         price = price[1:-3]
34         f.write(price + ',' + title + ',' + href + '\n')
35 f.close()
36 print('SUCCESS!!!\n')

 

posted @ 2022-05-29 22:39  昂昂呀  阅读(31)  评论(0)    收藏  举报