不爱贞子爱爽子
バキューン

1、环境python2.7+selenium+PhantomJS(软件安装和库的安装网上都有教程我们跳过,so easy)

2、原理

绕过首页登录需要验证码,直接进入搜索栏,输入搜索的职位+地区搜索出职位列表,点击全选,选择该页的所有工作,当然培训的也选起了,未做筛选的工作,亲们可以加上,第一次提交不需要验证码,第二次提交就需要验证码了,手动输入,验证码输入用的raw_input,输入之后程序继续运行,往复操作,总的来说也不智能,需要修改的地方很多,就当大家了解下selenium+PhantomJS的功能。

3、贴上代码

#!/usr/bin/Python
# -*- coding: utf-8 -*-
import os
from selenium import webdriver
import urllib2
import time
from os import path
import image
d = path.dirname(__file__)
class Zhilian:
    def __init__(self):
        self.arr = []
        print 1
    def main(self, browser):
        '''函数主体'''
        try:
            for v in browser.find_elements_by_class_name('newlist'):
                job_name = v.find_element_by_class_name('zwmc').text
                price = v.find_element_by_class_name('zwyx').text
                print job_name+'-'+price
        except:
            print 'select error!!'
        time.sleep(2)
        ###获取用户信息
        user_name = browser.find_element_by_xpath('/html/body/div[1]/div[1]/div/div/div[1]').text
        try:
            user_name = browser.find_element_by_xpath('/html/body/div[1]/div[1]/div/div/div[1]').text
            if user_name ==u'登录注册':
                is_logined = 0
            else:
                is_logined = 1
        except:
            print 'head error!!'
        try:
            browser.find_element_by_xpath('//*[@id="checkbox4al2"]').click()
            browser.find_element_by_xpath('//*[@id="newlist_list_div"]/p[1]/a[1]').click()        
        except:
            print 'error'
        time.sleep(2)
        ####第一次需要执行登录
        print is_logined
        test(browser)
        if is_logined == 0:
            try:
                time.sleep(3)
                ####开始输入你的用户名和密码
                name = raw_input('please input your zhilian user_name: ')
                pwd = raw_input('Please input your zhilian password: ')
                browser.find_element_by_xpath('//*[@id="simplaceholder"]').clear()
                browser.find_element_by_xpath('//*[@id="simplaceholder"]').send_keys(name)
                browser.find_element_by_xpath('//*[@id="loginBlock"]/form/ul/li[3]/label/input').clear()
                browser.find_element_by_xpath('//*[@id="loginBlock"]/form/ul/li[3]/label/input').send_keys(pwd)
                browser.find_element_by_xpath('//*[@id="submitBlock"]/div[1]/a').click()
                print 'login success!!'
            except:
                print "login false or or this account is online!!"
            time.sleep(3)    
        ##第二次开始需要填入验证码
        try:
            if browser.find_element_by_xpath('//*[@id="validate"]'):
                ###开始截图
                browser.get_screenshot_as_file(path.join(d,'1.png'))
                ###自动打开截图
                ####根绝截图输入验证码
                os.system('start E:\\test\\zhilian\\1.png')
                ###开始输入验证码
                code = raw_input('please input the code: ')
                ###将验证码放入输入框
                browser.find_element_by_xpath('//*[@id="validate"]').clear()
                browser.find_element_by_xpath('//*[@id="validate"]').send_keys(code)
                browser.find_element_by_xpath('//*[@id="applynowbutton"]').click()
                print 'apply success!!'
        except:
            print 'code error!!'
        ###开始查询下一页
        print 'We can select next page!!'
        next_page = browser.find_element_by_xpath('/html/body/div[3]/div[3]/div[3]/form/div[1]/div[1]/div[3]/ul/li[7]/a')
        if next_page:
            browser.find_element_by_xpath('/html/body/div[3]/div[3]/div[3]/form/div[1]/div[1]/div[3]/ul/li[7]/a').click()
            self.main(browser)
        
        '''    
        try:
            next_page = browser.find_element_by_xpath('/html/body/div[3]/div[3]/div[3]/form/div[1]/div[1]/div[3]/ul/li[7]/a')
            if next_page:
                self.main(city,job,page)
        except:
            print 'this is the last page!!'    
        '''    
def check():
    job = raw_input('please input the job name which you want to select: ')
    city = raw_input('please input the city name which you want to select: ')
    answer = raw_input('Do you sure the  city name is "'+city+'" and the job name is "'+job+'", please input yes or no to check: ')
    if answer =='yes':
        ###确认无误
        list = []
        list.append(city)
        list.append(job)
    else:
        check()
    if list:
        return list

def test(browser):
    '''测试函数'''
    ###开始截图
    browser.get_screenshot_as_file(path.join(d,'1.png'))
    ###自动打开截图
    ####根绝截图输入验证码
    os.system('start E:\\test\\zhilian\\1.png')
            
if __name__ == '__main__':
    zhilian = Zhilian()
    ###设置关键字
    list = check()
    page=1
    city = list[0].decode('gbk')
    job = list[1].decode('gbk')
    browser = webdriver.PhantomJS('E:\\p_python\\Scripts\\phantomjs\\bin\\phantomjs.exe')
    #browser.get('http://www.baidu.com')
    browser.get('http://sou.zhaopin.com/jobs/searchresult.ashx')
    time.sleep(3)
    ####开始模拟搜索条件
    try:
        browser.find_element_by_xpath('//*[@id="KeyWord_kw2"]').clear()
        browser.find_element_by_xpath('//*[@id="KeyWord_kw2"]').send_keys(job)
        browser.find_element_by_xpath('//*[@id="JobLocation"]').clear()
        browser.find_element_by_xpath('//*[@id="JobLocation"]').send_keys(city)
        browser.find_element_by_xpath('//*[@id="searchForm"]/form/div[6]/button').click()
    except:
        print 'input select_info error!!'
    time.sleep(2)
    zhilian.main(browser)
    #test()

 

posted on 2018-01-02 13:50  不爱贞子爱爽子  阅读(1458)  评论(1编辑  收藏  举报

! !