Hive生成日期维度表

 

1.日期表结构

 

 

 

 

 

2.生成代码

 

/*
create table if not exists dim_pub_date ( 
 date_id                   string         comment '日期(yyyymmdd)' 
,datestr                   string         comment '日期(yyyy-mm-dd)' 
,date_name                 string         comment '日期名称中文' 
,weekid                    bigint         comment '周(0-6,周日~周六)' 
,week_cn_name              string         comment '周_名称_中文' 
,week_en_name              string         comment '周_名称_英文' 
,week_en_nm                string         comment '周_名称_英文缩写' 
,yearmonthid               string         comment '月份id(yyyymm)' 
,yearmonthstr              string         comment '月份(yyyy-mm)' 
,monthid                   bigint         comment '月份id(1-12)' 
,monthstr                  string         comment '月份' 
,month_cn_name             string         comment '月份名称_中文' 
,month_en_name             string         comment '月份名称_英文' 
,month_en_nm               string         comment '月份名称_简写_英文' 
,quarterid                 bigint         comment '季度id(1-4)' 
,quarterstr                string         comment '季度名称' 
,quarter_cn_name           string         comment '季度名称_中文' 
,quarter_en_name           string         comment '季度名称_英文' 
,quarter_cn_nm             string         comment '季度名称_简写中文' 
,quarter_en_nm             string         comment '季度名称_简写英文' 
,yearid                    bigint         comment '年份id' 
,year_cn_name              string         comment '年份名称_中文' 
,year_en_name              string         comment '年份名称_英文' 
,month_start_date          string         comment '当月1号(yyyy-mm-dd)' 
,month_end_date            string         comment '当月最后日期(yyyy-mm-dd)' 
,month_timespan            bigint         comment '月跨天数' 
,week_of_year              bigint         comment '当年第几周' 
,workday_flag              string         comment '是否工作日(周一至周五Y,否则:N)' 
,weekend_flag              string         comment '是否周末(周六和周日Y,否则:N)'
,week_start_date           string         comment '当周起始日期'
,week_end_date             string         comment '当周结束日期'
,year_week_cn_name         string         comment '当年第几周(xxxx年第xx周)'
,dayid                     string         comment '当日序号id(1-31)'
,daystr                    string         comment '当日序号字符(01-31)'
)comment '日期维度表'
stored as parquet;
*/



insert overwrite table dim_pub_date
select  date_id 
    ,datestr 
    ,concat(yearid,'',monthid,'',substr(datestr,9,2),'') as date_name
    ,weekid 
    ,case weekid when 0 then '星期日'
        when 1 then '星期一'
        when 2 then '星期二'
        when 3 then '星期三'
        when 4 then '星期四'
        when 5 then '星期五'
        when 6 then '星期六'
    end as week_cn_name
    ,case weekid when 0 then 'Sunday'
        when 1 then 'Monday'
        when 2 then 'Tuesday'
        when 3 then 'Wednesday'
        when 4 then 'Thurday'
        when 5 then 'Friday'
        when 6 then 'Saturday'
    end as week_en_name
    ,case weekid when 0 then 'Sun'
        when 1 then 'Mon'
        when 2 then 'Tues'
        when 3 then 'Wed'
        when 4 then 'Thur'
        when 5 then 'Fri'
        when 6 then 'Sat'
    end as week_en_nm
    ,substr(date_id,1,6) as yearmonthid
    ,substr(datestr,1,7) as yearmonthstr
    ,monthid
    ,concat(yearid,'',monthid,'') as monthstr
    ,concat(monthid,'') as month_cn_name
    ,case monthid when 1 then 'January'
        when 2 then 'February'
        when 3 then 'March'
        when 4 then 'April'
        when 5 then 'May'
        when 6 then 'June'
        when 7 then 'July'
        when 8 then 'August'
        when 9 then 'September'
        when 10 then 'October'
        when 11 then 'November'
        when 12 then 'December'
    end as month_en_name
    ,case monthid when 1 then 'Jan'
        when 2 then 'Feb'
        when 3 then 'Mar'
        when 4 then 'Apr'
        when 5 then 'May'
        when 6 then 'Jun'
        when 7 then 'Jul'
        when 8 then 'Aug'
        when 9 then 'Sept'
        when 10 then 'Oct'
        when 11 then 'Nov'
        when 12 then 'Dec'
    end as month_en_nm
    ,quarterid
    ,concat(yearid,quarterid) as quarterstr
    ,concat(yearid,'年第',quarterid,'季度') as quarter_cn_name
    ,concat(yearid,'Q',quarterid) as quarter_en_name
    ,case quarterid when 1 then '第一季度'
        when 2 then '第二季度'
        when 3 then '第三季度'
        when 4 then '第四季度'
    end as quarter_cn_nm
    ,concat('Q',quarterid) as quarter_en_nm
    ,yearid 
    ,concat(yearid,'') as year_cn_name
    ,yearid as year_en_name
    ,month_start_date
    ,month_end_date
    ,datediff(month_end_date,month_start_date) + 1 as month_timespan
    ,week_of_year 
    ,case when weekid in (1,2,3,4,5) then 'Y' else 'N' end as workday_flag
    ,case when weekid in (0,6) then 'Y' else 'N' end as weekend_flag
    ,case when weekid = 0 then date_sub(datestr,6) 
        else date_sub(datestr,weekid-1)
    end as week_start_date   --周起始日期
    ,case when weekid = 0 then datestr
        else date_add(datestr,6-weekid+1)
    end as week_end_date  --周结束日期
    ,concat(year(date_sub(next_day(datestr,'MO'),4)),'年第',week_of_year,'') as year_week_cn_name
    ,cast(from_unixtime(unix_timestamp(datestr,'yyyy-MM-dd'),'dd') as bigint) as dayid
    ,from_unixtime(unix_timestamp(datestr,'yyyy-MM-dd'),'dd') as daystr
from 
(
    select   from_unixtime(unix_timestamp(datestr,'yyyy-MM-dd'),'yyyyMMdd') as date_id
        ,datestr as datestr
        ,pmod(datediff(datestr, '2012-01-01'), 7)  as weekid
        ,concat(substr(datestr,1,4),substr(datestr,6,2)) as yearmonthid
        ,substr(datestr,1,7) as yearmonthstr
        ,cast(substr(datestr,6,2) as int) as monthid
        ,case when cast(substr(datestr,6,2) as int) <= 3 then 1 
            when cast(substr(datestr,6,2) as int) <= 6 then 2
            when cast(substr(datestr,6,2) as int) <= 9 then 3
            when cast(substr(datestr,6,2) as int) <= 12 then 4
        end as quarterid
        ,substr(datestr,1,4) as yearid 
        ,date_sub(datestr,dayofmonth(datestr)-1) as  month_start_date  --当月第一天
        ,last_day(date_sub(datestr,dayofmonth(datestr)-1)) month_end_date --当月最后一天
        ,weekofyear(datestr) as week_of_year
    from 
    (
        select date_add('1900-01-01',t0.pos) as datestr
            from 
            (
                select posexplode(
                        split(
                            repeat('o', datediff(from_unixtime(unix_timestamp('20501231','yyyymmdd'),'yyyy-mm-dd'), '1900-01-01')), 'o'
                        )
                    ) 
            ) t0
    ) t 

) A 
;

 

 

create table if not exists public.dwd_ast_rdos_batch_task_dd(
 id                                                                bigint               comment    'id' 
,project_identifier                                                string               comment    '项目标识' 
,name                                                              string               comment    '任务名称' 
,tenant_id                                                         bigint               comment    '租户id' 
,dtuic_tenant_id                                                   bigint               comment    '租户id' 
,tenant_name                                                       string               comment    '用户名称' 
,tenant_desc                                                       string               comment    '租户描述' 
,project_id                                                        bigint               comment    '项目id' 
,project_name                                                      string               comment    '项目名称' 
,project_alias                                                     string               comment    '项目别名' 
,task_type                                                         string               comment    '任务类型 -1:虚节点,0:sparksql,1:spark,2:数据同步,3:pyspark,4:R,5:深度学习,6:python,7:shell,8:机器学习,9:hadoopMR,10:工作流,12:carbonSQL,13:notebook,14:算法实验,15:libra sql,16:kylin,17:hiveSQL' 
,task_type_name                                                    string               comment    'task_type_name' 
,period_type                                                       string               comment    '周期类型' 
,period_type_name                                                  string               comment    'period_type_name' 
,engine_type                                                       string               comment    '执行引擎类型 0:flink,1:spark,2:datax,3:learning,4:shell,5:python2,6:dtyarnshell,7:python3,8:hadoop,9:carbon,10:postgresql,11:kylin,12:hive' 
,engine_type_name                                                  string               comment    'engine_type_name' 
,schedule_status                                                   string               comment    '0未开始,1正常调度,2暂停' 
,schedule_status_name                                              string               comment    'schedule_status_name' 
,submit_status                                                     string               comment    '0未提交,1已提交' 
,submit_status_name                                                string               comment    'submit_status_name' 
,version                                                           bigint               comment    'task版本' 
,is_deleted                                                        string               comment    '0正常 1逻辑删除' 
,create_user_name                                                  string               comment    '用户名称' 
,gmt_create                                                        string               comment    '新增时间' 
,modify_user_name                                                  string               comment    '用户名称' 
,gmt_modified                                                      string               comment    '修改时间' 
,owner_user_name                                                   string               comment    '用户名称' 
,sql_text                                                          string               comment    'sql 文本' 
,task_params                                                       string               comment    '任务参数' 
,schedule_conf                                                     string               comment    '调度配置 json格式' 
,job_status                                                        string               comment    '周期实例状态' 
,exec_start_time                                                   string               comment    '周期实例开始执行时间' 
,exec_end_time                                                     string               comment    '周期实例结束时间' 
,exec_time                                                         double               comment    '周期实例执行耗时' 
,job_retry_num                                                     string               comment    '实例重试次数' 
,job_business_date                                                 string               comment    '实例业务日期' 
,node_pid                                                          string               comment    '父文件夹id' 
,project_schedule_status                                           string               comment    '项目调度状态编码' 
,project_schedule_status_name                                      string               comment    '项目调度状态名称' 
,last_business_date                                                string               comment    '最后一次调度业务日期' 
,last_exec_start_time                                              string               comment    '最后一次调度任务开始时间' 
,last_exec_end_time                                                string               comment    '最后一次调度任务结束时间' 
,last_job_status                                                   string               comment    '最后一次调度任务结束时间' 
,last_exec_time                                                    double               comment    '最后一次周期实例执行耗时' 
,task_level                                                        string               comment    '任务层级(0:DIM,1:ODS,2:DWD,3:DWS,4:ADS,5:OUTPUT-999999:OTHRTS)' 
,task_level_name                                                   string               comment    '任务层级名称' 
,avg_exec_time_30d                                                 double               comment    '近30天平均运行时长(秒)' 
,parent_task_num                                                   bigint               comment    '直接上游任务个数' 
,parent_prj_active_task_num                                        bigint               comment    '直接上游任务个数(调度开启项目)' 
,parent_active_task_num                                            bigint               comment    '直接上游任务个数(正常调度任务)' 
,next_task_num                                                     bigint               comment    '直接下游任务个数' 
,next_prj_active_task_num                                          bigint               comment    '直接下游任务个数(调度开启项目)' 
,next_active_task_num                                              bigint               comment    '直接下游任务个数(正常调度任务)' 
,error_cross_layer_flag                                            string               comment    '是否依赖倒置(1:是 0:否)' 
,parent_task_name_list_error                                       string               comment    '依赖倒置上游任务列表' 
,cross_layer_flag                                                  string               comment    '是否跨层依赖(1:是,0:否)' 
,parent_task_name_list_waring                                      string               comment    '直接上游任务列表(跨层依赖)' 
,parent_task_name_list                                             string               comment    '直接上游任务列表' 
,next_task_name_list                                               string               comment    '直接下游任务列表' 
,next_prj_active_task_name_list                                    string               comment    '直接下游任务列表(调度开启项目)' 
,next_active_task_name_list                                        string               comment    '直接下游任务列表(正常调度任务)' 
,error_alarm_flag                                                  string               comment    '是否配置失败告警(1:是,0:否)' 
,unfinish_ontime_alarm_flag                                        string               comment    '是否配置定时未完成告警(1:是,0:否)' 
,uncomplete_time                                                   string               comment    '定时未完成告警时间' 
,cur_error_alarm_cnt                                               bigint               comment    '当日失败告警天数' 
,cur_unfinish_ontime_alarm_cnt                                     bigint               comment    '当日定时未完成告警天数' 
,cur_alarm_cnt                                                     bigint               comment    '当日告警天数' 
,cur_alarm_flag                                                    string               comment    '当日是否触发告警(1:是0:否)' 
,error_alarm_cnt_30d                                               bigint               comment    '近30天失败触发告警天数' 
,unfinish_ontime_alarm_cnt_30d                                     bigint               comment    '近30天定时未完成触发告警天数' 
,alarm_cnt_30d                                                     bigint               comment    '近30天触发告警天数' 
,alarm_cnt_30d_flag                                                string               comment    '近30天是否触发告警(1:是,0:否)' 
,act_schedule_flag                                                 string               comment    '是否实际在调度(1:是,0:否)' 
,full_node_name                                                    string               comment    '层级目录拼接' 
,node_name0                                                        string               comment    '所属目录' 
,node_name1                                                        string               comment    '上1级目录' 
,node_name2                                                        string               comment    '上2级目录' 
,node_name3                                                        string               comment    '上3级目录' 
,node_name4                                                        string               comment    '上4级目录' 
,node_name5                                                        string               comment    '上5级目录' 
,node_name6                                                        string               comment    '上6级目录' 
,cluster_name                                                      string               comment    '集群名称' 
,sql_text_base64                                                   string               comment    'sql脚本base64' 
,unfin_ontime_alarm_gatenames                                      string               comment    '定时未完成告警方式列表(逗号拼接)' 
,unfin_ontime_alarm_user_names                                     string               comment    '定时未完成告警接收人名称列表(逗号拼接)' 
) comment '任务表' 
partitioned by (`pt` string)
stored as parquet;
    

 

posted @ 2021-07-28 15:38  Blue眼泪2016  阅读(1715)  评论(2)    收藏  举报