python+前程无忧爬虫

from selenium import webdriver
from selenium.webdriver.common.by import By
import time,csv


class QCWY:

def __init__(self,keyword,city,maxpagenum):
self.keyword=keyword
self.city=city
self.maxpageunm=maxpagenum

def run(self):
driver=webdriver.Chrome()
driver.implicitly_wait(10)
driver.get('https://www.51job.com/')
#输入关键字
driver.find_element(By.ID,'kwdselectid').send_keys(self.keyword)
#选择城市
driver.find_element(By.ID,'work_position_input').click()
time.sleep(1)

#选择城市,点击上方当前已经选中的城市,去掉这些
#selectcity=driver.find_elements(By.CLASS_NAME,'tin')
selectcity=driver.find_elements(By.CSS_SELECTOR,'#work_position_click_multiple_selected >span')
for one in selectcity:
one.click()

#然后再选择我们要选择的城市
cityeles=driver.find_elements(By.CSS_SELECTOR,'#work_position_click_center_right_list_000000 em')

target=None
for cityele in cityeles:
#如果城市相同,找到
if cityele.text==self.city:
target=cityele
break
#没有找到该名称的城市
if target is None:
input(f'{self.city}不在热门城市列表中,请手动点击城市后,按回车键继续。。。')
else:
target.click()
#保存城市选择
driver.find_element(By.ID,'work_position_click_bottom_save').click()

#点击搜索
driver.find_element(By.CSS_SELECTOR,'div.ush> button').click()
driver.implicitly_wait(10)

with open(f'前程无忧招聘——关键字_{self.keyword}_城市_{self.city}.csv','w',newline='',encoding='gbk') as f:

f_csv=csv.DictWriter(f,
['职位名称',
'发布时间',
'薪资',
'工作地点',
'经验',
'福利信息',])
f_csv.writeheader()

for pageNo in range(1,self.maxpageunm+1):
#设置页码
pageNoInput=driver.find_element(By.ID,'jump_page')
pageNoInput.clear()
pageNoInput.send_keys(str(pageNo))
driver.find_element(By.CSS_SELECTOR,'span.og_but').click()
time.sleep(1)

rows=self.handleOnePage(driver)
f_csv.writerows(rows)

#是否到最后一页
if self.lastpage(driver):
break
#是否到最后一页
def islastpage(self,driver):
#如果下一页
NextPageButton=driver.find_element(By.CSS_SELECTOR,'div.j_page li:last-child')
driver.implicity_wait(2)
hasLink=NextPageButton.find_element(By.TAG_NAME,'e_icons')
driver.implicity_wait(10)
if hasLink:
return False
else:
return True

#每页处理函数
def handleOnePage(self,driver):
rows=[]
# 处理每页信息
jobs = driver.find_elements(By.XPATH, "//*[@class='j_joblist']/div")
for job in jobs:
fields = job.find_elements(By.TAG_NAME, 'span')
stringfields = [field.text for field in fields]
print(stringfields)

date={
"职位名称":stringfields[0],
"发布时间": stringfields[1],
"薪资": stringfields[2],
"工作地点": stringfields[3],
"福利信息": stringfields[4],
}
rows.append(date)
return rows

QCWY(keyword='python',city='上海',maxpagenum=1).run()









posted @ 2022-07-03 21:33  予璇CC123  阅读(191)  评论(0)    收藏  举报