使用bs4爬虫当前网页,并下载网页数据

需求:

"""获取所有的职业信息,并以岗位、公司、地区、薪资格式来分别显示数据"""

# 导入需要的库或包
import bs4,csv
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

# 唤起chrome浏览器
browser = webdriver.Chrome()
browser.get('https://www.zhipin.com/')
browser.implicitly_wait(10)

# 输入关键词搜索
check_ele = browser.find_element_by_css_selector('.ipt-search')
check_ele.send_keys('python开发工程师')
# 回车
check_ele.send_keys(Keys.ENTER)

# 信息提取
info = []

def get_info(html):
soup = BeautifulSoup(html, 'lxml')
job_primary = soup.select_one('#main').select('.job-primary')
# print(type(job_primary))
for job in job_primary:
if isinstance(job, bs4.element.Tag):
job_name = job.select_one('.job-name').text
job_area = job.select_one('.job-area').text
job_limit = job.select_one('.red').text
company_name = job.select_one('.company-text').select_one('.name').text
sub_info = [job_name, job_area, job_limit, company_name]
info.append(sub_info)
# 保存数据
def save_data(data):
with open('../job_info.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['岗位名称', '工作区域', '薪资', '公司名称'])
for a in data:
print(a)
writer.writerow(a)
# 调用 信息提取 & 保存数据 ,关闭浏览器
get_info(browser.page_source)
save_data(info)
browser.close()
posted @ 2020-12-08 15:43  槐夏  阅读(243)  评论(0编辑  收藏  举报