package org.onepiece.bigdata.windows
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{Row, SQLContext, SaveMode, SparkSession, types}
import scala.util.matching.Regex
object App {
/**
* 屏蔽不必要的日志信息显示
**/
def init(): Unit = {
//Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
org.apache.log4j.Logger.getLogger("org.apache.spark").setLevel(org.apache.log4j.Level.ERROR)
org.apache.log4j.Logger.getLogger("org.apache.spark.sql").setLevel(org.apache.log4j.Level.ERROR)
}
case class Order(user: String, cardId: String)
def main(args: Array[String]): Unit = {
init()
val conf = new SparkConf().setMaster("local[*]").setAppName("spark-udf-test")
val spark = SparkSession.builder()
.config(conf)
//.enableHiveSupport()
.getOrCreate()
import spark.implicits._
//注册UDF
udf_CardId(spark)
regex_CardId(spark)
val orders = List(
Order("AA", "445321199910201030"),
Order("BB", "44532120100910103X"),
Order("CC", ""),
Order("SS", "445198010201030"),
Order("TT", "44520110910103X"),
Order("NN", null),
Order("XX", "445321"),
Order("YY", "441900X")
)
val df = orders.toDF()
//df.printSchema()
//df.show()
df.createOrReplaceTempView("cte_order")
val result = spark.sql(
"""
|select
| user, cardId
| ,udf_CardId(cardId) as udf_cardId
| ,regex_CardId(cardId) as regex_cardId
|from cte_order
|order by user
""".stripMargin)
result.show()
}
def udf_CardId(sparkSession: SparkSession) = {
//注册自定义函数
sparkSession.udf.register("udf_CardId", (str: String) => {
if (str != null && str.trim != "") {
val s = str.trim
val result = if (s.length == 18) s.takeRight(6) else s.takeRight(4)
result
}
else {
""
}
})
}
def regex_CardId(sparkSession: SparkSession) = {
//注册自定义函数
sparkSession.udf.register("regex_CardId", (str: String) => {
if (str != null && str.trim != "") {
val s = str.trim
val result = regex_string(s, "^[0-9]*$")
result
}
else {
false
}
})
}
def regex_string(str: String, regex: String): Boolean = {
if (str == null || str.trim == "") return false;
//regex = "^[0-9]*$"
val pattern = new Regex(regex)
val result = pattern findFirstMatchIn (str.trim)
return result.nonEmpty;
}
def regex_string(str: String, regex: Regex): Boolean = {
if (str == null || str.trim == "") return false;
//regex = "^[0-9]*$".r
val result = regex findFirstMatchIn (str.trim)
return result.nonEmpty;
}
}
root
|-- user: string (nullable = true)
|-- cardId: string (nullable = true)
+----+------------------+----------+------------+
|user| cardId|udf_cardId|regex_cardId|
+----+------------------+----------+------------+
| AA|445321199910201030| 201030| true|
| BB|44532120100910103X| 10103X| false|
| CC| | | false|
| NN| null| | false|
| SS| 445198010201030| 1030| true|
| TT| 44520110910103X| 103X| false|
| XX| 445321| 5321| true|
| YY| 441900X| 900X| false|
+----+------------------+----------+------------+