SparkSQL 自定义标准函数

本文的前提条件: SparkSQL in Java
参考地址: Scalar User Defined Functions (UDFs)

完整代码

package cn.coreqi;

import static org.apache.spark.sql.functions.udf;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.*;
import org.apache.spark.sql.expressions.UserDefinedFunction;
import org.apache.spark.sql.types.DataTypes;

public class Main {
    public static void main(String[] args) {
        // 创建SparkConf对象
        SparkConf sparkConf = new SparkConf()
                .setMaster("local[*]")
                .setAppName("sparkSql");

        SparkSession spark = SparkSession
                .builder()
                .config(sparkConf)
                .getOrCreate();

        Dataset<Row> df = spark.read().json("datas/user.json");
        df.show();

        // DataFrames => SQL
        df.createOrReplaceTempView("user");

        UserDefinedFunction perfixName = udf(
                (name) -> "Name: " + name, DataTypes.StringType
        );
        perfixName.asNondeterministic();
        spark.udf().register("perfixName", perfixName);
        spark.sql("select age, perfixName(username) from user").show();

        // 关闭
        spark.close();
    }
}
posted @ 2024-01-14 13:16  SpringCore  阅读(6)  评论(0编辑  收藏  举报