1 from selenium import webdriver
2 from selenium.webdriver.chrome.options import Options
3 from selenium.webdriver.common.by import By
4 from lxml import etree
5 import time
6
7
8 class lagou():
9
10 def __init__(self):
11 self.browers_path = r'C:\Users\Administrator\AppData\Local\360Chrome\Chrome\Application\360chrome.exe'
12 self.chrome_option = Options()
13 self.chrome_option.binary_location = self.browers_path
14 self.driver = webdriver.Chrome(options=self.chrome_option)
15 self.url = "https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput="
16
17 def run(self):
18 all_list = []
19 sourse = self.driver.get(self.url)
20 elementi= self.driver.find_element_by_xpath('//div[@class="body-btn"]')
21 elementi.click()
22 time.sleep(5)
23 sourse = self.driver.page_source
24 self.parse_path(sourse)
25 # all_list.extend(s)
26 # return all_list
27
28 def parse_path(self,sourse):
29 position_list = []
30 html = etree.HTML(sourse)
31 links = html.xpath('//a[@class="position_link"]//@href')
32 for link in links:
33 self.xiangqingye(link)
34 # position_list.append(h)
35 # return position_list
36
37 def xiangqingye(self,ur):
38 # self.driver.execute_script("window.open('https://www.lagou.com/jobs/6232081.html?show=23decdaefd344a719f63cfa436b063b8')" )
39 self.driver.execute_script("window.open('%s')"%ur)
40 self.driver.switch_to.window(self.driver.window_handles[1])
41 time.sleep(2)
42 xiang_source = self.driver.page_source
43 html = etree.HTML(xiang_source)
44 job_resqust_spans = html.xpath('//dd[@class="job_request"]//span')
45 salary = job_resqust_spans[0].xpath('.//text()')[0].strip()
46 city = job_resqust_spans[1].xpath('.//text()')[0].strip()
47 workyears = job_resqust_spans[2].xpath('.//text()')[0].strip()
48 position = {
49 'salary':salary,
50 'city':city,
51 'workyears':workyears
52 }
53 print(position)
54 time.sleep(1)
55 self.driver.close()
56 self.driver.switch_to.window(self.driver.window_handles[0])
57 # print('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
58
59 if __name__ == '__main__':
60 lagou_shili = lagou()
61 q = lagou_shili.run()
62 print(q)
63
64 # browers_driver.execute_script('window.open("https://www.douban.com")')
65 # window_list = browers_driver.window_handles#获取窗口列表
66 # browers_driver.switch_to.window(window_list[1])#将browers_driver的指针转移到指定的窗口
67 # print(browers_driver.current_url)#打印browers_driver指向的窗口网址