python查询某年所有非工作日期


"""
   查询 某年所有非工作日期
"""

import holidays
import datetime
import requests
import json
import pandas as pd
pd.set_option('display.width', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)


class get_not_work_day(object):
    def __init__(self, calcultaion_year="2023"):
        self.calcultaion_year = calcultaion_year
        super(get_not_work_day, self).__init__()
        holidays_ = holidays.CN(years=int(calcultaion_year))
        self.dict_ = {k.strftime("%Y-%m-%d") if isinstance(k, datetime.date) else k: v for k, v in holidays_.items()}
        chinese = []
        for i in holidays_.values():
            if '(' in i:
                i = i.split('(')[1].split(')')[0]
                chinese.append(i)
            else:
                chinese.append(i)
        self.legal_holiday = []
        for i in holidays_.keys():
            self.legal_holiday.append(i)


        # 从百度的php接口中获取到数据
    def catch_url_from_baidu(self, month):
            headers = {
                "Content-Type": "application/json;charset=UTF-8"
            }
            param = {
                "query": self.calcultaion_year + "年" + month + "月",
                "resource_id": "39043",
                "t": "1604395059555",
                "ie": "utf8",
                "oe": "gbk",
                "format": "json",
                "tn": "wisetpl",
                "cb": ""
            }
            proxy = {
                'https': 'http://10.22.96.29:8080',
                'http': 'http://10.22.96.29:8080'
            }
            # 抓取位置:百度搜索框搜索日历,上面的日历的接口,可以在页面上进行核对
            r = requests.get(url="https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php",
                             headers=headers, params=param, proxies=proxy).text
            month_data = json.loads(r)["data"][0]["almanac"]
            not_work_day = []
            for one in month_data:
                if (one["cnDay"] == '日' or one["cnDay"] == '六'):
                    if ('status' in one):
                        if (one["status"] == "2"):
                            # status为2的时候表示周末的工作日,百度日历上会有班标志的数据
                            continue
                        else:
                            # 普通周末时间
                            not_work_day.append(one)
                            continue
                    else:
                        # 普通周末时间。(接口中,如果左上角没有特殊表示,则不会返回status)
                        not_work_day.append(one)
                        continue
                if ('status' in one and one["status"] == "1"):
                    # status为1的时候表示休息日,百度日历上会有休标志的数据
                    not_work_day.append(one)
            a = self.print_info(not_work_day)
            return a


    def print_info(self, not_work_day):
            date_ = []
            for one in not_work_day:
                date_.append(f"({one['year']}, {one['month']}, {one['day']})")
            return date_


    def cer_(self, x):
            return self.dict_[x.strftime("%Y-%m-%d")] if x in self.legal_holiday else'休息日'


    def calc_day(self):
            # 因该接口传入的时间,查询了前一个月,当前月和后一个月的数据,所以只需要2、5、8、11即可全部获取到。
            # 比如查询5月份,则会查询4,5,6月分的数据
            calculation_month = ["2", "5", "8", "11"]
            date_ = []
            for one_month in calculation_month:
                date_.append(self.catch_url_from_baidu(one_month))
    
            date_ = sum(date_, [])
            time_ = []
            for i in date_:
                i = i.replace('(', '')
                i = i.replace(')', '')
                i = list(map(int, i.split(',')))
                time_.append(datetime.date(i[0], i[1], i[2]))
            df_ = pd.DataFrame(time_, columns=['notWorkDay'])
            df_['dateType'] = df_['notWorkDay'].apply(self.cer_)
            return df_


if __name__ == '__main__':
    df = get_not_work_day("2024").calc_day()
    print(df)

posted @ 2023-11-13 16:53  冀未然  阅读(47)  评论(0)    收藏  举报