SPARK程序_数据定时清理

package org

import org.apache.spark.sql.SparkSession
import java.text.SimpleDateFormat
import java.util.Calendar

object Data_Deletion_03 {

  val spark:SparkSession  = SparkSession
    .builder()
    .appName("Java Spark Hive Example")
    // .master("local[*]")
    //.config("spark.sql.warehouse.dir","hdfs://nameservice1/user/hive/warehouse/stage.db/")
    .enableHiveSupport()
    .getOrCreate();


  def main(args: Array[String]): Unit = {
    val dol_day =  args(0).toString   //"2022-03-15"
    val table_name = args(1).toString  // "stage.odps_dwd_acc_loan_dd"
    val day_cnt = args(2).toInt          // -3

    val sdf = new SimpleDateFormat("yyyy-MM-dd")
    val cal =Calendar.getInstance()
    val dol_day_01 = sdf.parse(dol_day)

    //  1. 获取传入日期上月第一天
    cal.setTime(dol_day_01)
    cal.set(Calendar.DAY_OF_MONTH,0)
    val month_firstday = sdf.format(cal.getTime)
    println(month_firstday)

    //  2. 获取传入日期前三天
    cal.setTime(dol_day_01)
    cal.add(Calendar.DATE,day_cnt)
    val threedaysago = sdf.format(cal.getTime)
    println(threedaysago)

    val sql = "alter table " + table_name + " drop if exists partition(data_date='" + threedaysago + "')"
    println(sql)
    //  3. 删除最近3天数据,进行判断,如果是上月末最后一天,不删除
    // 3.1 判断3天前如果是月末就跳过,如果不是就删除分区
    if(threedaysago!=month_firstday){
      spark.sql(sql)
    }

    spark.stop()
  }
}



posted @ 2022-03-13 00:41  付十一。  阅读(123)  评论(0)    收藏  举报