spark 异常值过滤 IQR

def getIQR(df:DataFrame,colName:String):Array[Double]={
    val tmpDf = df.withColumn(colName,
                             col(colName).cast(DoubleType))
    val stats = tmpDf.stat.approxQuantile(colName,
                                      Array(0.25,0.5,0.7),
                                      0.1)
    val Q1 = stats(0)
    val Q2 = stats(1)
    val Q3 = stats(2)
    
    val IQR = Q3-Q1
    
    val lowerRange = Q1-1.5*IQR
    val upperRange = Q3+1.5*IQR
    
    Array(lowerRange,upperRange)
}
posted @ 2021-07-17 17:55  real-zhouyc  阅读(197)  评论(0编辑  收藏  举报