Hive生成日期维度表
1.日期表结构
2.生成代码
/* create table if not exists dim_pub_date ( date_id string comment '日期(yyyymmdd)' ,datestr string comment '日期(yyyy-mm-dd)' ,date_name string comment '日期名称中文' ,weekid bigint comment '周(0-6,周日~周六)' ,week_cn_name string comment '周_名称_中文' ,week_en_name string comment '周_名称_英文' ,week_en_nm string comment '周_名称_英文缩写' ,yearmonthid string comment '月份id(yyyymm)' ,yearmonthstr string comment '月份(yyyy-mm)' ,monthid bigint comment '月份id(1-12)' ,monthstr string comment '月份' ,month_cn_name string comment '月份名称_中文' ,month_en_name string comment '月份名称_英文' ,month_en_nm string comment '月份名称_简写_英文' ,quarterid bigint comment '季度id(1-4)' ,quarterstr string comment '季度名称' ,quarter_cn_name string comment '季度名称_中文' ,quarter_en_name string comment '季度名称_英文' ,quarter_cn_nm string comment '季度名称_简写中文' ,quarter_en_nm string comment '季度名称_简写英文' ,yearid bigint comment '年份id' ,year_cn_name string comment '年份名称_中文' ,year_en_name string comment '年份名称_英文' ,month_start_date string comment '当月1号(yyyy-mm-dd)' ,month_end_date string comment '当月最后日期(yyyy-mm-dd)' ,month_timespan bigint comment '月跨天数' ,week_of_year bigint comment '当年第几周' ,workday_flag string comment '是否工作日(周一至周五Y,否则:N)' ,weekend_flag string comment '是否周末(周六和周日Y,否则:N)' ,week_start_date string comment '当周起始日期' ,week_end_date string comment '当周结束日期' ,year_week_cn_name string comment '当年第几周(xxxx年第xx周)' ,dayid string comment '当日序号id(1-31)' ,daystr string comment '当日序号字符(01-31)' )comment '日期维度表' stored as parquet; */ insert overwrite table dim_pub_date select date_id ,datestr ,concat(yearid,'年',monthid,'月',substr(datestr,9,2),'日') as date_name ,weekid ,case weekid when 0 then '星期日' when 1 then '星期一' when 2 then '星期二' when 3 then '星期三' when 4 then '星期四' when 5 then '星期五' when 6 then '星期六' end as week_cn_name ,case weekid when 0 then 'Sunday' when 1 then 'Monday' when 2 then 'Tuesday' when 3 then 'Wednesday' when 4 then 'Thurday' when 5 then 'Friday' when 6 then 'Saturday' end as week_en_name ,case weekid when 0 then 'Sun' when 1 then 'Mon' when 2 then 'Tues' when 3 then 'Wed' when 4 then 'Thur' when 5 then 'Fri' when 6 then 'Sat' end as week_en_nm ,substr(date_id,1,6) as yearmonthid ,substr(datestr,1,7) as yearmonthstr ,monthid ,concat(yearid,'年',monthid,'月') as monthstr ,concat(monthid,'月') as month_cn_name ,case monthid when 1 then 'January' when 2 then 'February' when 3 then 'March' when 4 then 'April' when 5 then 'May' when 6 then 'June' when 7 then 'July' when 8 then 'August' when 9 then 'September' when 10 then 'October' when 11 then 'November' when 12 then 'December' end as month_en_name ,case monthid when 1 then 'Jan' when 2 then 'Feb' when 3 then 'Mar' when 4 then 'Apr' when 5 then 'May' when 6 then 'Jun' when 7 then 'Jul' when 8 then 'Aug' when 9 then 'Sept' when 10 then 'Oct' when 11 then 'Nov' when 12 then 'Dec' end as month_en_nm ,quarterid ,concat(yearid,quarterid) as quarterstr ,concat(yearid,'年第',quarterid,'季度') as quarter_cn_name ,concat(yearid,'Q',quarterid) as quarter_en_name ,case quarterid when 1 then '第一季度' when 2 then '第二季度' when 3 then '第三季度' when 4 then '第四季度' end as quarter_cn_nm ,concat('Q',quarterid) as quarter_en_nm ,yearid ,concat(yearid,'年') as year_cn_name ,yearid as year_en_name ,month_start_date ,month_end_date ,datediff(month_end_date,month_start_date) + 1 as month_timespan ,week_of_year ,case when weekid in (1,2,3,4,5) then 'Y' else 'N' end as workday_flag ,case when weekid in (0,6) then 'Y' else 'N' end as weekend_flag ,case when weekid = 0 then date_sub(datestr,6) else date_sub(datestr,weekid-1) end as week_start_date --周起始日期 ,case when weekid = 0 then datestr else date_add(datestr,6-weekid+1) end as week_end_date --周结束日期 ,concat(year(date_sub(next_day(datestr,'MO'),4)),'年第',week_of_year,'周') as year_week_cn_name ,cast(from_unixtime(unix_timestamp(datestr,'yyyy-MM-dd'),'dd') as bigint) as dayid ,from_unixtime(unix_timestamp(datestr,'yyyy-MM-dd'),'dd') as daystr from ( select from_unixtime(unix_timestamp(datestr,'yyyy-MM-dd'),'yyyyMMdd') as date_id ,datestr as datestr ,pmod(datediff(datestr, '2012-01-01'), 7) as weekid ,concat(substr(datestr,1,4),substr(datestr,6,2)) as yearmonthid ,substr(datestr,1,7) as yearmonthstr ,cast(substr(datestr,6,2) as int) as monthid ,case when cast(substr(datestr,6,2) as int) <= 3 then 1 when cast(substr(datestr,6,2) as int) <= 6 then 2 when cast(substr(datestr,6,2) as int) <= 9 then 3 when cast(substr(datestr,6,2) as int) <= 12 then 4 end as quarterid ,substr(datestr,1,4) as yearid ,date_sub(datestr,dayofmonth(datestr)-1) as month_start_date --当月第一天 ,last_day(date_sub(datestr,dayofmonth(datestr)-1)) month_end_date --当月最后一天 ,weekofyear(datestr) as week_of_year from ( select date_add('1900-01-01',t0.pos) as datestr from ( select posexplode( split( repeat('o', datediff(from_unixtime(unix_timestamp('20501231','yyyymmdd'),'yyyy-mm-dd'), '1900-01-01')), 'o' ) ) ) t0 ) t ) A ;
create table if not exists public.dwd_ast_rdos_batch_task_dd( id bigint comment 'id' ,project_identifier string comment '项目标识' ,name string comment '任务名称' ,tenant_id bigint comment '租户id' ,dtuic_tenant_id bigint comment '租户id' ,tenant_name string comment '用户名称' ,tenant_desc string comment '租户描述' ,project_id bigint comment '项目id' ,project_name string comment '项目名称' ,project_alias string comment '项目别名' ,task_type string comment '任务类型 -1:虚节点,0:sparksql,1:spark,2:数据同步,3:pyspark,4:R,5:深度学习,6:python,7:shell,8:机器学习,9:hadoopMR,10:工作流,12:carbonSQL,13:notebook,14:算法实验,15:libra sql,16:kylin,17:hiveSQL' ,task_type_name string comment 'task_type_name' ,period_type string comment '周期类型' ,period_type_name string comment 'period_type_name' ,engine_type string comment '执行引擎类型 0:flink,1:spark,2:datax,3:learning,4:shell,5:python2,6:dtyarnshell,7:python3,8:hadoop,9:carbon,10:postgresql,11:kylin,12:hive' ,engine_type_name string comment 'engine_type_name' ,schedule_status string comment '0未开始,1正常调度,2暂停' ,schedule_status_name string comment 'schedule_status_name' ,submit_status string comment '0未提交,1已提交' ,submit_status_name string comment 'submit_status_name' ,version bigint comment 'task版本' ,is_deleted string comment '0正常 1逻辑删除' ,create_user_name string comment '用户名称' ,gmt_create string comment '新增时间' ,modify_user_name string comment '用户名称' ,gmt_modified string comment '修改时间' ,owner_user_name string comment '用户名称' ,sql_text string comment 'sql 文本' ,task_params string comment '任务参数' ,schedule_conf string comment '调度配置 json格式' ,job_status string comment '周期实例状态' ,exec_start_time string comment '周期实例开始执行时间' ,exec_end_time string comment '周期实例结束时间' ,exec_time double comment '周期实例执行耗时' ,job_retry_num string comment '实例重试次数' ,job_business_date string comment '实例业务日期' ,node_pid string comment '父文件夹id' ,project_schedule_status string comment '项目调度状态编码' ,project_schedule_status_name string comment '项目调度状态名称' ,last_business_date string comment '最后一次调度业务日期' ,last_exec_start_time string comment '最后一次调度任务开始时间' ,last_exec_end_time string comment '最后一次调度任务结束时间' ,last_job_status string comment '最后一次调度任务结束时间' ,last_exec_time double comment '最后一次周期实例执行耗时' ,task_level string comment '任务层级(0:DIM,1:ODS,2:DWD,3:DWS,4:ADS,5:OUTPUT-999999:OTHRTS)' ,task_level_name string comment '任务层级名称' ,avg_exec_time_30d double comment '近30天平均运行时长(秒)' ,parent_task_num bigint comment '直接上游任务个数' ,parent_prj_active_task_num bigint comment '直接上游任务个数(调度开启项目)' ,parent_active_task_num bigint comment '直接上游任务个数(正常调度任务)' ,next_task_num bigint comment '直接下游任务个数' ,next_prj_active_task_num bigint comment '直接下游任务个数(调度开启项目)' ,next_active_task_num bigint comment '直接下游任务个数(正常调度任务)' ,error_cross_layer_flag string comment '是否依赖倒置(1:是 0:否)' ,parent_task_name_list_error string comment '依赖倒置上游任务列表' ,cross_layer_flag string comment '是否跨层依赖(1:是,0:否)' ,parent_task_name_list_waring string comment '直接上游任务列表(跨层依赖)' ,parent_task_name_list string comment '直接上游任务列表' ,next_task_name_list string comment '直接下游任务列表' ,next_prj_active_task_name_list string comment '直接下游任务列表(调度开启项目)' ,next_active_task_name_list string comment '直接下游任务列表(正常调度任务)' ,error_alarm_flag string comment '是否配置失败告警(1:是,0:否)' ,unfinish_ontime_alarm_flag string comment '是否配置定时未完成告警(1:是,0:否)' ,uncomplete_time string comment '定时未完成告警时间' ,cur_error_alarm_cnt bigint comment '当日失败告警天数' ,cur_unfinish_ontime_alarm_cnt bigint comment '当日定时未完成告警天数' ,cur_alarm_cnt bigint comment '当日告警天数' ,cur_alarm_flag string comment '当日是否触发告警(1:是0:否)' ,error_alarm_cnt_30d bigint comment '近30天失败触发告警天数' ,unfinish_ontime_alarm_cnt_30d bigint comment '近30天定时未完成触发告警天数' ,alarm_cnt_30d bigint comment '近30天触发告警天数' ,alarm_cnt_30d_flag string comment '近30天是否触发告警(1:是,0:否)' ,act_schedule_flag string comment '是否实际在调度(1:是,0:否)' ,full_node_name string comment '层级目录拼接' ,node_name0 string comment '所属目录' ,node_name1 string comment '上1级目录' ,node_name2 string comment '上2级目录' ,node_name3 string comment '上3级目录' ,node_name4 string comment '上4级目录' ,node_name5 string comment '上5级目录' ,node_name6 string comment '上6级目录' ,cluster_name string comment '集群名称' ,sql_text_base64 string comment 'sql脚本base64' ,unfin_ontime_alarm_gatenames string comment '定时未完成告警方式列表(逗号拼接)' ,unfin_ontime_alarm_user_names string comment '定时未完成告警接收人名称列表(逗号拼接)' ) comment '任务表' partitioned by (`pt` string) stored as parquet;