# -*-coding:utf-8-*-
'''
FileName:LaG爬取岗位信息
CreatTime:2018-4-10
Author: ___dx___
FileDescript:
'''
import requests
import xlwt
import ssl
ssl._create_default_https_context = ssl._create_unverified_context # https校验证书
class Lagou_job(object):
def __init__(self):
self.url = 'https://www.lagou.com/jobs/positionAjax.json?px=new&needAddtionalResult=false'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'Referer': 'https://www.lagou.com/jobs/list_%E6%B5%8B%E8%AF%95?city=%E5%85%A8%E5%9B%BD&cl=false&fromSearch=true&labelWords=&suginput=',
'Connection': 'keep - alive',
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
'Origin':'https://www.lagou.com',
'X-Anit-Forge-Code': '0',
'X-Anit-Forge-Token': 'None',
'X-Requested-With': 'XMLHttpRequest'
}
# 抓取接口函数
def getJobList(self, page):
self.data = {
'first': 'true',
'pn': page,
'kd': '测试'
}
session = requests.Session()
res =session.post(self.url, data=self.data, headers=self.headers)
result = res.json()
print(result) # debug
print(res.status_code)
jobs = result['content']['positionResult']['result']
return jobs
# 抓取结果存入excel
def saveExcel(self):
excelTabel = xlwt.Workbook() # 创建excel对象
# 如果对一个单元格重复操作,会引发
# returns error:
# Exception: Attempt to overwrite cell:
# sheetname=u'sheet 1' rowx=0 colx=0
# 所以在打开时加cell_overwrite_ok=True 解决
sheet_1 = excelTabel.add_sheet('daixiang', cell_overwrite_ok=True) #创建sheet页
sheet_1.write(0, 0, u'公司全名')
sheet_1.write(0, 1, u'公司简称')
sheet_1.write(0, 2, u'城市')
sheet_1.write(0, 3, u'区域')
sheet_1.write(0, 4, u'工作性质')
sheet_1.write(0, 5, u'职位名称')
sheet_1.write(0, 6, u'薪资范围')
sheet_1.write(0, 7, u'职位')
sheet_1.write(0, 8, u'工作年限')
sheet_1.write(0, 9, u'公司规模')
sheet_1.write(0, 10, u'学历要求')
n = 1
for page in range(1, 2): # 前99页
for job in self.getJobList(page=page):
if '' in job['workYear'] and u'' in job['jobNature'] and u'' in job['education']:
if '' in job['workYear'] and u'全职' in job['jobNature'] and u'深圳' in job['city']:
sheet_1.write(n, 0, job['companyFullName'])
sheet_1.write(n, 1, job['companyShortName'])
sheet_1.write(n, 2, job['city'])
sheet_1.write(n, 3, job['district'])
sheet_1.write(n, 4, job['jobNature'])
sheet_1.write(n, 5, job['positionName'])
sheet_1.write(n, 6, job['salary'])
sheet_1.write(n, 7, job['secondType'])
sheet_1.write(n, 8, job['workYear'])
sheet_1.write(n, 9, job['companySize'])
sheet_1.write(n, 10, job['education'])
n += 1
print (job['companyShortName'],job['salary'])
#print ('{},{}'.format(job['companyShortName'].encode('utf-8'),job['salary'].encode('utf-8')))
#print "{0[0]} is {0[1]} years old".format(li)
#print {0}{1}.format(job['companyShortName'], job['salary'])
#print('[{name:<{len}}\tx'.format(name=job['companyShortName'] + ']', len=50 - len(job['companyShortName'].encode('utf-8')) + len(job['companyShortName'])))
# 保存文件到excel
#excelTabel.save('daidai.xls')
excelTabel.save("深圳测试_By_dx.xls")
if __name__ == '__main__':
lagou_job = Lagou_job()
#lagou_job.getJobList(1)
lagou_job.saveExcel()