如果无法打开 xlsx:

pip uninstall xlrd
pip install xlrd==1.2.0

 

check函数里面有个row_value数组,对应的是专业、学历等东西,print(row_value)一看就明白了,可以增减/更改这个函数查找指定的职位

# encoding:utf-8
import xlrd
import xlwt
import re

def check(row_value, major, edu, year, p_edu, p_major, p_fresh):
ma = row_value[p_major]
# 该岗位所需专业
ed = row_value[p_edu]
# 该岗位所需学历
fresh = row_value[p_fresh]
# 该岗位所需年限
loc= row_value[0]
# 该岗位位置
#check_major(ma, major) and check_edu(ed, edu) and
if check_major(ma, major) and check_edu(ed, edu) and checkSpecial(fresh, year):
#print(value)
print(row_value)
print()
print()
return True
else:
return False

def check_major(value, major):
# 检查是否满足专业要求
pat = re.compile(major)
if re.search(pat, value):
return True
return False

# 检查是否满足学历要求
def check_edu(value, edu):
pat = re.compile(edu)
if re.search(pat, value):
return True
return False

# 检查基层年限设置
def checkSpecial(value, year):
pat = re.compile(year)
if re.search(pat, value):
return False
return True

# 根据条件筛选出职位
def filterTitle(file, major, edu, year):
data = xlrd.open_workbook(file)
# 将表格数据读取到data中
res = ''
output = xlwt.Workbook(encoding='utf-8')
for sheet in data.sheets():
output_sheet = output.add_sheet(sheet.name)
# 筛选出来的文件中也添加这些子表格
for col in range(sheet.ncols):
# 添加第二行的列信息
output_sheet.row(0).write(col, sheet.cell(1,col).value)
#print(type(sheet), sheet)
output_row = 1
cnt = 0
p_edu, p_major, p_fresh = 0, 0, 0
for row in range(sheet.nrows):
# sheet指的是xlsx的一个板块、单元
row_value = sheet.row_values(row)

if '录用人数' in row_value:
p_fresh = row_value.index('是否限应届毕业生报考')
p_edu = row_value.index('学历')
p_major = row_value.index('研究生专业\n名称及代码')

# d = row_value.index('招考单位')
#print(a)
choosed = check(row_value, major, edu, year, p_edu, p_major, p_fresh)
# 是否满足三个条件(专业、学历、基层限制)

if choosed:
# 满足则输出到文件中
res += str(row_value)
res += '\n\n'
for col in range(sheet.ncols):
output_sheet.row(output_row).write(col, sheet.cell(row, col).value)
output_sheet.flush_row_data()
output_row += 1
s = file[0:6]
txtname = s + '.txt'
with open(txtname, 'w', encoding="utf-8") as f:
f.write(res)
filename = s + '.xls'
output.save(filename)

if __name__ == '__main__':
filterTitle('2021省考公务员.xls', '计算机技术', '研究生', '否')
filterTitle('2022省考公务员.xls', '计算机技术', '研究生', '否')
filterTitle('2023省考公务员.xls', '计算机技术', '研究生', '否')

 筛选国考的

# encoding:utf-8
import xlrd
import xlwt
import re
'''
如果无法打开 xlsx
pip uninstall xlrd
pip install xlrd==1.2.0
'''
def check(row_value, major, edu, year, p_edu, p_major, p_others, p_city):

    c = True
    if p_city:
        city = row_value[p_city]
        c = check_v(city, '广东')
    if edu:
        ed = row_value[p_edu]
        c = c and check_v(ed, edu)
    if major:
        ma = row_value[p_major]
        c = c and check_v(ma, major)
        # 该岗位所需专业
    if p_others:
        others = row_value[p_others]
        c = c and check_v(others, '仅限应届毕业生')
        # 该岗位所需年限

    if c:
        #print(value)
        print(row_value)
        print()
        print()
        return True
    else:
        return False


# 检查是否满足学历要求
def check_v(value, goal):
    pat = re.compile(goal)
    if re.search(pat, value):
        return True
    return False


# 根据条件筛选出职位
def filter_guo_kao(file, major, edu, year):
    data = xlrd.open_workbook(file)
    # 将表格数据读取到data中
    res = ''
    output = xlwt.Workbook(encoding='utf-8')
    for sheet in data.sheets():
        output_sheet = output.add_sheet(sheet.name)
        # 筛选出来的文件中也添加这些子表格
        for col in range(sheet.ncols):
            # 添加第二行的列信息
            output_sheet.row(0).write(col, sheet.cell(1,col).value)
        #print(type(sheet), sheet)
        output_row = 1
        cnt = 0
        p_edu, p_major, p_fresh, p_city, p_others = 0, 0, 0, 0, 0
        for row in range(sheet.nrows):
            # sheet指的是xlsx的一个板块、单元
            row_value = sheet.row_values(row)

            if '部门名称' in row_value:
                #print(row_value)
                p_fresh = row_value.index('基层工作最低年限')
                p_major = row_value.index('专业')
                p_city = row_value.index('工作地点')
                p_others = row_value.index('备注')
                # d = row_value.index('招考单位')
                #print(a)
            choosed = check(row_value, major, None, year, None, p_major, p_others, p_city)
            # 是否满足三个条件(专业、学历、基层限制)

            if choosed:
                # 满足则输出到文件中
                res += str(row_value)
                res += '\n\n'
                for col in range(sheet.ncols):
                    output_sheet.row(output_row).write(col, sheet.cell(row, col).value)
                output_sheet.flush_row_data()
                output_row += 1
    s = file[0:6]
    txtname = s + '.txt'
    with open(txtname, 'w', encoding="utf-8") as f:
        f.write(res)
    # 要是想要xls的话就取消下面的注释
    '''
    filename = s + '.xls'
    output.save(filename)
    '''

if __name__ == '__main__':

    #filter_guo_kao('2021国考公务员.xls', None, None, '应届')
    # 不知道为什么筛选不了2021的 懒得改了(开摆)

    filter_guo_kao('2022国考公务员.xls', '计算机', None, '应届')
    # 因为岗位数量太少了 直接筛选应届计算机的 没有筛选学历 到时候自己筛选一下

 

github:

 https://github.com/SmartChen2/gwy_job_selector

 

参考:

https://zhuanlan.zhihu.com/p/140917342

https://blog.csdn.net/weixin_44073728/article/details/111054157