自定义函数
1.UDF(user-defined function)
一进一出(一行数据)
1.1 定义函数
(1)继承 org.apache.hadoop.hive.ql.exec.UDF
(2)需要实现evaluate函数,evaluate()支持重载
(3)UDF必须有返回值类型,可以返回null,但不能为void
(4)打包成jar包
<hive.version>0.13.1</hive.version> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>${hive.version}</version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-exec</artifactId> <version>${hive.version}</version> </dependency>
package com.atguigu.myudf; import org.apache.hadoop.hive.ql.exec.UDF; public class MyUdf extends UDF { public int evaluate(int data){ return data+5; } }
1.2 hive调用函数
(1)把jar包上传到hdfs hadoop -dfs -put /home/kg/qiaoruihua/hive/jar/addfive.jar /user/kg/hive_udf (2)hive添加jar包 add jar hdfs://cluster/user/kg/hive_udf/addfive.jar (3)创建函数 create function addFive as "com.atguigu.myudf.MyUdf"; (4)调用函数 select addFive(cost) from student;
2.UDAF(user-defined aggregation function)
类似于聚合函数:max,sum,avg
多进一出
3.UDTF(user-defined table-generating function)
一进多出
select splitStr2("hello-world-at-guigu","-");
package com.atguigu.myudtf; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import java.util.ArrayList; import java.util.List; public class MyUdtf extends GenericUDTF { private List<String> dataList=new ArrayList<>(); //定义输出数据得列名和数据类型 @Override public StructObjectInspector initialize(StructObjectInspector argOIs) throws UDFArgumentException { //数据列名 List<String> fieldNames = new ArrayList<>(); fieldNames.add("name"); //数据类型 List<ObjectInspector> fieldOIs = new ArrayList<>(); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,fieldOIs); } @Override public void process(Object[] args) throws HiveException { //1.获取数据 String data = args[0].toString(); //2.获取分隔符 String splitKey = args[1].toString(); //3.切分数据 String[] words = data.split(splitKey); //4.写出数据 for(String word:words){ //5.将数据放到集合 dataList.clear(); dataList.add(word); //6.写出数据 forward(dataList); } } @Override public void close() throws HiveException { } }
posted on 2020-12-09 17:13 happygril3 阅读(58) 评论(0) 收藏 举报