1 import requests
2 import time
3 import json
4 import threading
5 # 采集https://careers.tencent.com/search.html 网站的招聘信息
6
7 # 时间戳
8 timestamp = '%d' % (time.time() * 1000)
9
10
11 # 请求url,解析数据
12 def parse_url(json_url):
13 # 发起请求
14 res = requests.get(json_url).json()
15 for i in res['Data']['Posts']:
16 # 职位名称
17 title = i['RecruitPostName']
18 # 工作职责
19 resbity = i['Responsibility']
20 # 职位ID
21 id = i['PostId']
22 # 职位链接
23 posi_url = 'https://careers.tencent.com/jobdesc.html?postId=' + id
24 # 根据ID找到工作详情页的内容
25 id_url = 'https://careers.tencent.com/tencentcareer/api/post/ByPostId?timestamp={}&postId={}&language=zh-cn'.format(
26 timestamp, id)
27 res_ment = requests.get(id_url).json()
28 # 工作要求
29 rement = res_ment['Data']['Requirement']
30 # 发布时间
31 posi_time = i['LastUpdateTime']
32 item = {
33 '职位': title,
34 '职责': resbity,
35 '要求': rement,
36 '链接': posi_url,
37 '时间': posi_time
38 }
39 print('正在写入 → ', item)
40 with open('腾讯招聘.json', 'a', encoding='utf-8') as f:
41 f.write(json.dumps(item, ensure_ascii=False) + '\n')
42
43
44 # 页数
45 num = 10
46 t_list = []
47 for count in range(1, num + 1):
48 print('加载第{}页数据'.format(count))
49 # json数据源
50 json_url = 'https://careers.tencent.com/tencentcareer/api/post/Query?timestamp={}&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword=&pageIndex={}&pageSize=10&language=zh-cn&area=cn'.format(
51 timestamp, count)
52 t = threading.Thread(target=parse_url, args=((json_url,)))
53 t_list.append(t)
54
55 for t in t_list:
56 t.start()
57 for t in t_list:
58 t.join()