UDF

package com.bjsxt.scala.spark.UDF_UDAF

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.types.StringType

object UDF {
  
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
        .setMaster("local") 
        .setAppName("UDF")
    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)
  
    val names = Array("yarn", "Marry", "Jack", "Tom") 
    
    val namesRDD = sc.parallelize(names, 4) 
    
    
    val namesRowRDD = namesRDD.map { name => Row(name) }
    
    
    val structType = StructType(Array(StructField("name", StringType, true)))  
    
    val namesDF = sqlContext.createDataFrame(namesRowRDD, structType) 
    
    // 注册一张names表
    namesDF.registerTempTable("names")  
    
    //自定义的聚合函数:计算一下每一条数据的长度
    sqlContext.udf.register("strLen", (str: String,num:Integer) => str.length()+num)
  
    // 使用自定义函数
    sqlContext.sql("select name,strLen(name,1000) from names").show
    
    sc.stop()
  }
}

  

posted @ 2018-06-23 16:53  uuhh  阅读(141)  评论(0)    收藏  举报