1 回顾:
2 hive
3 --------------------------------
4 数据仓库OLAP 在线分析处理,延迟较高
5 数据库:OLTP 在线事务处理,事务支持
6
7 运行在hadoop 类sql运行,sql,hql,mr运算
8 结构化数据
9 schema(模式,元信息,存放到数据库中)HDFS 文件 derby,mysql
10 数据库和表都是文件夹路径。
11 hive
12 ------------------------------------------
13 类似mysql 结构化数据
14 配置hive bin/env.sh
15 schematool -initshcema --dertype derby //初始化模式
16 create database
17
18 常见hive命令:
19 0.启动hadoop
20 1.初始化schema库
21 hive/bin/schematool -initschema -dbtype derby
22 2.完成后,在当前目录下创建一个metastore_db(元数据库)
23 3.进入hive shell
24 hive
25 hive常见命令,类似于mysql
26 ------------------------------------------
27 >show databases;
28 >create databases myhive;
29 >user myhive;
30 >show tables;
31 //常见表
32 >create table if not exists myhive.employee
33 (eid int,name string,salary string,destination string)
34 comment 'employee details'
35 row format delimited
36 fields terminated BY '\t'
37 lines terminated by '\n'
38 stored as textfile;
39 //加载数据到hive(hdfs)
40 >load data (local) inpath 'filepath' (overwrite) into table tablename(partition (partcol1=val1,partcol2=val...))
41 准备数据employee 数据
42 1201 gopal 40000 technical manager
43 1202 manisha 45000 proof reader
44 1203 masthancali 400000 technical writer
45 1204 krian 4000 hr admin
46 1205 kranthi 30000 op admin
47 /t
48 show tables; 查看表
49 desc employee;
50 ok int
51 name string
52 salary string
53 destination string
54 //加载数据
55 ---------------------------------------------------------
56 hive>load data local inpath '/home/ubuntu/desktop/employees.txt' into table employee
57 //插入数据=====上传
58 insert
59 select * from employee
60
61 hdfs dfs -cat /user/hive/warehouse/myhive.db/employee/employees.txt
62
63 select eid,name from employee 0rder by eid limit 2,4;
64
65 //支持插入,不支持删除和更新
66
67 //修改表alter table
68 启动hiveserver2服务-----接受多个客户端连接请求
69 hive --service hiveserver2 start
70 查看作业
71 jobs
72 netstat -ano|more 10000号端口
73
74 在eclipse中创建maven项目,使用jdbc连接操作hive数据仓库
75 -------------------------------------------------------
76
77 查看表
78 dfs -LSR /;
79
80 常用聚集函数
81 ------------------------------------------------------
82 count()
83 sum()
84 max()
85 min()
86
87 解决beeline命令行终端的上下键导航历史命令的bug
88 -----------------------------------------------------
89 修改行
90 if((!$(ps -o stat=-p $$)=~ +));then
91 改为:
92 if((!$(ps -o stat=-p $$)=~ '+'));then
93
94 hive命令
95 -----------------------------------------------------
96 $hive>dfs -lsr //执行dfs命令
97 $hive>!clear; //执行shell脚本
98 $hive -e “select * from test” //-e execute 执行的意思
99 $hive -S -e “select * from test” >/tmp/myquery/ 重定向 -S execute 静默模式 输出的时候去掉ok等行
100 $hive > tab键tab键 //528个可能么 y(显示函数和关键字)
101 $hive >this is a comment
102 $hive >hive -f /x/x/a.sql //-f 执行一个文件,通常用于批处理,调度
103 $hive>--this is a comment //显示所有命令
104 $hive>set hive.cli.print.header=true //显示字段名称
105
106 $hive>create database if exists not xxx
107 $hive>drop database if exists xxx //存在即删除
108 $hive>alter database hive3 set dbpropertues('author'='you')
109 $hive>drop database if exists xxx //存在即删除
110 $hive>drop database if exists xxx cascde //级联删除
111 $hive>create database hive2 location '/user/ununtu/'
112 $hive>create database hive3 with dbpropertime('author'='xupc','createtime'='today') //创建指定表,指定属性
113
114 [创建表语法]
115 create (temporary)(external)table(if not exists)(db_name.)table_name
116 ((col_name data_type (commebt col_comment),....))
117 (comment table_comment)
118 (row format row_format)
119 (stored as file_format)
120
121 例如:
122 create table if not exists employee(eid int,name string,salary string,destination string)
123 comment 'employee details' //注释
124 row format delimited fields terminated by '\t' //字符结束符
125 lines terminated by '\n' //行结束符
126 stored as textfile; //存储何种文件
127
128 [带分区的创建表]
129 create table hive1.test5(id int,name string,age int )
130 partitoned by (provence string,city string)
131 row format delimited fields terminated by '\t' //字符结束符
132 lines terminated by '\n' //行结束符
133 stored as textfile;
134
135 [加载数据===insert]
136 load data (loacal) inpath 'filepath'(overwrite)into table tablename(partition (partcol1=val1,partcol=val2...))
137 [加载数据===例子]
138 load data local inpath '/home/user/sample/txt' overwrite into table employee partitoned by (provence string,city string);
139
140
141 [创建分区]
142 create table hive1.test5(id int,name string,age int )partitoned by (provence string,city string);//按照省份和城市分区
143 [加载数据到指定分区]
144 load data local inpath '/home/user/sample/employee' overwrite into table hive1.test5 partitoned by (provence='shanxi',city='jizhong');
145
146
147 [手动增加分区]
148 alter table hive1.test5 add partition(provice='henan',city='piingdingshan');
149 alter table hive1.test5 add partition(area='hebei',provice='henan',city='piingdingshan');//增加不存在的分区列,是非法的。
150
151 [修改表]
152 $hive>alter table hive1.test5 rename to hive1.test6; //重命名
153 $hive>alter table hive1.test5 add partion(provive='hebei',city='zhangjiakou') location 'xxx' //添加多个分区
154 partion(provive='hebei',city='zhangjiakou')
155 partion(provive='hebei',city='zhangjiakou')
156 partion(provive='hebei',city='zhangjiakou')
157 $hive>alter table hive1.test5 add partion(provive='hebei',city='zhangjiakou') set location 'xxx' //移动分区
158 $hive>alter table hive1.test6 add columns(birth string,fire string); //增加列
159 $hive>alter table hive1.test6 replace columns(birth string,fire string); //增加列
160 $h$hive>alter table hive1.test5 set tblproperties('a'='x',......) //修改表属性
161
162 [启用归档]
163 $hive>set hive.archive.enabled=true //设置可归档,默认false
164 insert into hive2.test5 partition(province='hebei',city='baoding') select * from hive1.test2;//
165 insert overwrite.table employees partition(country='us',state='or') select * from stated_employee se where se .cnty='us' and se.st='or';//字段个数要相同
166 查询时候,分区通过where字句指定
167 //插入时,分区用partition指定 overwrite 覆盖
168 [动态分区]
169 insert overwrite table hive1.test6 partition(province,city) select id,...,province,city,from table2; //动态分区
170 hdfs dfs -lsr /; 查看路径
171
172 [查看数据hdfs]
173 /user/hive/warehouse/hive1.test5/provice=hebei/city=baoding/employee.txt
174 [分区表的查询模式:strict/nostrict]
175 set hive.mapred.mode=strict //严格模式,默认是nostrict
176 show pattitions hive1.test5;
177 show pattitions hive1.test5.partition(provice='hebei');
178
179
180
181 $hive>desc extended hive1.test1; //显示扩展信息
182 $hive>desc database hive2; //描述数据库
183 $hive>desc formatted hive1.test1; //显格式化信息
184 $hive>create table hive2.test4 like hive1.test1;//复制表
185 $hive>show tables in hive2; //显示指定数据库的表的集合
186 hive控制元数据,删除托管表时,数据不被删除
187
188
189 $hive>create external table hive1.test3 like hive1.test1; //创建外部表external,只复制表结构,没有有数据
190 $hive>create external table hive1.test4 as select * from hive1.test1; //创建外部表external,只复制表结构,有数据
191
192
193 $hive> use hive3
194 使用beeline客户端可以实现远程jdbc连接
195 -----------------------------------------------------
196 1.连接
197 $hive --service beeline -u jdbc:hive2://s100:10000/hive1;
198 $beeline -u jdbc:hive2://s100:10000/hive1;
199 $beeline>!sh clear;
200 $beeline>show databases;
201 $beeline>!help
202 $beeline>!dbinfo //帮助
203
204 配置hive的仓库位置
205 -----------------------------------------------------
206 hive-site.xml
207 hive-metastore.warehouse.dir=/user/hive/warehouse/
208
209 hive的数据类型
210 ------------------------------------------------------
211 bytes: 案例
212 tinyint 1 20
213 smallint 2 20
214 int 4 20
215 bigint 8 20
216 boolean true or false true
217 float single pricision floating point 3.14
218 double double pricision floating point 3.14
219
220 string 'now is the time','for all good men'
221 timestamp
222 binary 字节数组
223 集合类型
224 struct struct('john','doe')
225 hap map('first','hohn','last','doe')
226 array array ('john','doe')
227 mysql>select * from tbls
228 hive>show tables;
229 创建表:
230 create table hive.test1(id int,name string,age int) tblproperties('author'='you');
231
232 hive 所谓的读模式
233 ------------------------------------------------------
234
235 托管表
236 --------------------------------------------------------
237 hive默认的表都是托管表,hive控制数据的生命周期,删除托管表时候,元数据和数据都被删除
238 外部表
239 -------------------
240 hive控制元数据,删除托管表时,数据不被删除
241 create external table hive1.test3 like hive1.test1;
242 create external table hive1.test4 as select * from hive1.test1; 有数据
243
244 -------------------------------------------------------------------------------
245 创建分区表
246 --------------
247 create external table hive1.test2(id int,name string,age int)
248 partitioned by (province string,city string)
249 row format delimted
250 field terminated by '\t'
251 lines terminated by '\n'
252 store as testfile;
253
254 手动添加分区
255 -------------------------------------------------------
256 alter table hive1.test2 add partition(provice='hebei',city='baoding')
257 插入数据
258 ------------------------------------------------------
259 insert into hive1.test2 partition(province='hebei',city=baoding')
260 selet * from hive1.test6 where province='hebei' and city='shijiazhuang' and id >5
261
262 ---------------------------------------------------------------------------------
263 insert overwrite table test2 partition(provice='hebei',city='baoding')select id,name,,age from test1;
264
265
266 动态分区:
267 --------------------------------------------------------------------------------------
268 创建test3分区表
269 create table test3(id int,name string,age int)
270 prititioned by (province string,city string)
271 row format delimted
272 field terminated by '\t'
273 lines terminated by '\n'
274 store as testfile;
275 动态分区,复制一个表数据到分区表,动态创建分区
276 如果两个都是动态分区需要关闭严格模式
277 set hive.exec.dynamic.partition.mode=nonstrict; //关闭动态分区的严格模式
278 insert overwrite table hive1.test6 partition(provice,city) select id,name,age'henan' as provice,'kaifeng' as city table2;
279 使用分区的动态和静态的混合
280 insert overwrite table hive1.test6 partition(provice='henan',city) select id,name,age'henan' as provice,'kaifeng' as city table2;
281
282
283 查询期间动态创建,并将数据写入创建表中
284 create tbale test3 as select id ,name from test2 where province='hebei' and city='baoding'
285
286 导出hive的数据到本地目录(下载)
287 ---------------------------------------------------------------------------------------------
288 insert overwrite local directory '/home/ubuntun/a.txt' select * from test2 where province='hebei'
289
290 导出hive的数据到HDFS目录(下载)
291 ---------------------------------------------------------------------------------------------
292 insert overwrite local directory 'hdfs:s100:8020/user/ubuntu/xxx' select * from test2 where province='hebei'
293
294
295 查询数据向多个目录同时输出
296 from test2 t
297 insert overwrite local directory '/home/ubuntu/hebei' select * where t.province='hebei'
298 insert overwrite local directory '/home/ubuntu/henan' select * where t.province='henan';
299 查询
300 ---------------------
301 查询,投影查询
302 select col1,col2,...from table t;
303 查询,投影查询,指定表的别名
304 select upper(name) from test2;
305 select lowper(name) from test2;
306
307 select id,name,-age from table
308 数学函数
309 --------------------------------------------
310 select round(12.345) //四舍五入 12
311 select ceil(12.345) 13 //天花板
312 select floor(12.345) 12 //地板
313 select round(10) 随机数
314
315 聚集函数
316 ------------------------------------------------
317 select count(*) from test2;
318 select max(*) from test2;
319 select min(*) from test2;
320 select avg(*) from test2;
321 select sum(*) from test2;
322
323 去重,distinct;
324 --------------------------------------------------
325 select count(distinct name) from test2;
326 表生成函数
327 -----------------------------------------------
328 select explode(array('tom','tomas','tomslee')) from test2; tom tomas tomslee
329
330 ascii函数,字符串收个字母ascii值
331 ----------------------------------------------------------------------------------
332 select ascii('abc'); 返回第一个97
333
334 base64字符串编码,需要二进制数
335 binary 函数,可以将字符串转换成二进制数据
336 select base64(binary('http://localhost:8080/helloworld'));
337
338 类型转换
339 ----------------
340 select cast('120' as bigint)
341 字符串连接
342 -------------------------------------
343 select concat('120','200') 120200
344
345 limit 分页查询
346 --------------------------------------
347 select * from test2,limit1,3 //offset,length
348 嵌套查询
349 ------------------------------------------
350 from(select * from test2 where province='hebei')e select e.id,e.name,e.age where e.city='baoding';
351 case when then
352 ---------------------------------------------
353 select id,name,
354 case when age<=12 then 'young'
355 when age>12 and age<=13 then 'middle'
356 when age>13 and age<=15 then 'old'
357 else 'too old'
358 end as yearstate from test2;
359
360 select id ,name n,age from test2 where n like '%t' //语法错误,where中不能使用字段名
361
362 浮点数比较的规避的方案
363 ----------------------------------------------------------------------
364 select cast(0.2 as float);
365
366 hive 的join操作,只支持等值连接
367
368 创建customers和orders表 一对多关系
369 customers
370 create table customers(id int,name string,age int)
371 row format delimited
372 fields terminated by '\t'
373 lines terminated by '\n'
374 stored as textfile;
375
376 orders
377 create table orders(id int,orderno string,price float,cid int)
378 row format delimited
379 fields terminated by '\t'
380 lines terminated by '\n'
381 stored as textfile;
382
383 customers.txt 数据
384
385 1 tom1 12
386 2 tom2 13
387 3 tom3 14
388
389 orders.txt 数据
390
391 1 NO001 121.34 1
392 2 NO002 123 1
393 3 NO003 232 1
394 4 NO004 32 2
395 5 NO005 324 2
396 6 NO006 234 2
397 7 NO007 5654 null
398
399
400 内连接 join on
401 --------------------------------------------------------------------------------------
402 select a.id,a.name,b.id,b.orderno,b.price from customers a join orders b on a.id=b.cid;
403
404 连接查询的优惠手段,查询表的大小从左到右是递增的。
405 select c.id,c.name,c.age,o.orderno,o.price from customers c join orders o on c.id=o.cid where ... //right
406 select c.id,c.name,c.age,o.orderno,o.price from orders o join customers c on c.id=o.cid where ... //wrong
407 ------使用查询暗示
408 select /**streamtable(c)/c.id,c.name,c.age,o.orderno,o.price from orders o join customers c on c.id=o.cid where ... //wrong
409
410 left outer join
411 ----------------------------
412 select c.id,c.name,c.age,o.orderno,o.price from customers a left outer join order b on c.id=o.cid;
413
414 right outer join
415 select c.id ,c.name,c.age,o.orderno,o.price from customers a full outer join order b on c.id=o.cid;
416
417 左半连接,select和where子句不能引用到右边的表字段
418 左表的记录在右表中一旦找到对应的记录,右侧表即停止扫描
419 -----------------------------------------------------
420 hive不支持右半连接 right semi join xxxx
421 --------------------------------------------------------
422
423 笛卡尔积m*n
424 ----------------------------------------------------------
425 select c.id,c.name,o.orderno from customers c.join orders o;
426 map 连接,一张小表,通过mapper的时候,将小表完全载入内存中
427 select /++mapjoin (c)*/c.id ,c.name,o.orderno from customers c join orders o;
428
429
430 order by 全排序,对所有数据通过一个reduce进行排序
431 select * from orders order by asc,price desc; ----全局排序
432 sort by 局部排序 每个reduce进行排序(局部排序)
433 select * from orders sort by cid asc,price desc; ----局部排序
434 distrbute by 等价于自定义分区函数
435 select * from orders distribute cid sort by price desc; 局部排序
436
437 cluster by 排序 =====districte by ... sort by.............
438
439 分桶采样
440 select * from orders tablesample(bucket 3 out of 10 on number );
441 按照数据块百分比采样,100块,抽取10块,如果总共1块,没有采样。
442 select* from orders tablesample(0.1 percent);
443
444
445 union all 联合操作 ,字段的类型和个数需要匹配。
446 ------------------------------------------------------------------
447 select id,name from customers union all select id ,order select id ,orderno from orders;
448
449 -------view视图---降低查询的复杂度 --创建视图
450 create view as select...
451 create view view1 as select c.id,c.name,c.age,o.id,o.orderno,o.price from customers c left outer join orders o on c.id=o.cid;
452 通过视图直接查询
453 select * from view1 where price>200;
454
455 使用like方式创建视图
456 create view view2 like view;
457
458 删除视图
459 drop view if exists v2;
460
461
462
463 hive 索引 创建索引,deferred rebuild 该选项时,索引为空白状态,需要rebuild才能够初始化。
464 -----------------------------------------------------------------
465 create index idx_test2_d on table customers(id) as 'org.apache.hadoop.hive.ql.index.compact.compactindexhandler' with deferred
466 rebuild idxprpperties ('creator'='me') in table customers_index comment 'this is a comment';
467
468 alter index idx_customers_id on customers rebuild; --------------重建索引
469
470 删除索引
471 drop index idx_customers_id on table customers;
472
473 ----------桶表------------------------------------bucket
474 分区是路径,是目录,是文件逻辑隔离,有效降低查询量
475
476 桶表 是文件,
477 创建桶表
478 create table ... clustered by (filed_name) into n buckers;
479 create table orderitems (id int,itemanme string,oid int ) clusrered by (oid)into 3 buckets row delimited field terminated by '\t' lines terminated by '\n' sored as textfile;
480
481
482
483
484
485
486
487
488
489
490
491 hive在什么情况下可以避免mr操作
492 ----------------------------------------------
493 不是mr的作业就是本地模式
494 1.全表扫描,没有where字句
495 select * from test2
496 2.where 子句作用只有分区字段。也不需要mr
497 3.设置hive.exec.model.local.auto=true
498 该属性hive会尽量使用local模式查询
499 4.其余所有的查询都会转换成mr
500
501
502 group by分组查询
503 ------------------------------------------------
504 select count(*),province from test2 group by province;
505 select count(*),as c,province from test2 group by province having c>3;//having组内过滤
506
507 hive的join操作,只支持等值连接
508 --------------------------------------------------
509
510
511 回顾2018.10.3
512 hive
513 ------------------------------------------------------
514 1.内部表
515 数据生命周期
516 2.外部表
517 删除外部表,比没有删除数,删掉了schema(rdbms)
518 3.分区表
519 表目录的子目录
520 create table xx(...)pratitioned()
521 load data local inpath... into table xxx partition(....)
522 4.bucket表
523 数据文件
524
525 调优
526 ------------------------------------------------------
527 1. explain
528 解释执行计划
529 explain select sum(age) from test2;
530 2.启用limit调优,避免全表扫描,使用抽样机制
531 select * from xxx limit1,2
532 hive.limit.optimize.enable=true
533 3.join
534 使用map端连接(/**streamtable(table)*/)
535 连接查询表的大小是从左至右依次增长
536 4.设置本地模式,在单台机器上处理所有任务。
537 适用于小数据情况
538 5.并行执行job
539 如果job之间没有依赖关系,可以并发执行,缩短执行时间
540 set.hive.exec.parallel=true
541 6.严格模式
542 set hive.mapred.strict=true //不推荐,早期使用
543 set hive.strict.checks.large.query=false //默认false,该设置会禁用一下操作
544 //1.不指定limit的orderby
545 2.对enquiry表不指定分区进行查询
546 3.对分区表不指定分区进行查询,和数据量无关,只是个 查询模式
547 set hive.strict.checks.type.safety=true;//严格类型的安全检查,不允许以下操作
548 1.bigint和string之间比较
549 2.bigint和double之间比较
550
551 set hive.strict.checks.checks.cartesian.product=true//不允许笛卡尔积连接
552 7.调整map,reduce个数
553 set hive.exec.reducers.bytes.per.reducer=256000000//每个reduce task的字节数
554 set hive.exec.reducers.max=1009 //每个reduce task的最大值,属性为负数时,会使用该属性
555
556 8.jvm重用
557 使同一个jvm在一个job(map*,reduce* )中执行多次,避免启动jvm的开销
558 set mapreduce.job.ubertask.enable=true;//是否启用uber,jvm重用
559 set mapreduce.job.ubertask.maxmaps=9 //uber,降低
560 set mapreduce.job.ubertask.maxreduces=1 //uber
561
562 set hive.exec.reducers.bytes.per.reducer=100000000
563 set hive.exec.reducers.max=5
564 set mapreduce.job.reduces=3
565
566 9.索引
567 使用index
568 10.动态分区严格模式
569 set hive.exec.dynamic.partition.mode=strict //动态分区严格模式
570 set hive.max.dynamic.partitions=300000 //设置最大分区
571 set hive.max.dynamic.partitions.pernode=1000 //设置每个节点的最大分区数
572 11.推测执行,让mapreduce多个实例并发执行
573 set maprduce.map.speculative=true //map推测
574 set mapreduce.reduce.speculative=true //reduce推测
575 12.多个分组优化
576 //若多个group by 操作使用的是一个公共的字段。则这些groupby 可以生成一个mr
577 hive.multigroupby.singlereducer=true; //默认true
578 13.虚拟列
579 hive.exec.rowoffset=false //是否使用虚拟列
580 select input_file_name,block_offset_inside_file from test2;//
581
582
583
584 压缩
585 ------------------------------------------------------------------------
586 1.查看压缩的编解码器
587 set io.compression.codecs
588 2.set hive.exec.compress.intermediate=true
589 .set mapred.compress.map.output=
590 set mapred.map.output.compression.codec=
591 3.修改map输出结构的压缩,默认defaultcodec
592 4.设置最终的job输出结果是否为压缩
593 set hive.exec.compress.output.=true
594 5.使用seqencefule 作为存储格式
595 create table seqfile(...)stored as sequencefuile //使用序列文件存储
596 insert into t_seq select * from test1k 复制test1数据到t_seq
597
598 6.控制sequencefule中map端输出文件的压缩类型,使用block压缩
599 set mapred.map.output.compression.codec=org.apache.hadoop.io.compress.snappycodec;
600 set hive.exec.compress.intermediate=true;
601 set hive.exec.compress.output=true;
602 set mapred.output.compression.codec=org.apache.hadoop.io.compress.gzipcodec;
603 set mapred.outout.comoression.type.Block;
604 7.对分区进行归档
605 ------------------------------------------------------------------------
606 1.启用hive分区档
607 set hive archive enabled=true;是否允许归档操作
608 2.对标的指定分区进行归档(只能对内部分区表进行归档,外部表不可以)
609 alter table test2 archaive partition ('proveince='hebei',city='baoding')
610 3.操作
611 create table test3 as select id ,name,age from test2;
612 注意:运行时缺少hadooparchive.class类,查看日志/tmp/ubuntu/hive/hive.log 可见
613 复制${hadoop_home}/shared/hadoop/tools/hadoops-archive-xxx.jar hive/auxlib/
614 复制${hadoop_home}/shared/hadoop/tools/hadoops-archive-xxx.jar hive/lib/
615 函数(udf:user)
616 ------------------------
617 1.procedure+function
618 2.函数操作 show function
619 desc(ribe) function case; //查看函数的帮助
620 desc extended function |;//查看函数的扩展帮助
621 desc function concat;
622 abs()绝对值
623
624 3.udf
625 输入一行或者多行时,输出单行。
626 4.udaf:user define aggregate function 用户自定义聚集函数
627 一行或者多行的n个列作为输入,输出一个值
628 5.UDTF表生成函数
629 n个输入,多行作为输入或者多行作为输入
630 select explode(arry(1,2,3));//
631 6.自定义函数
632 1、创建一个类
633 2.将函数导出jar包
634 eclipse --exoort---jar
635 2.通过hive命令将jar添加到hive的类路径
636 add jar home/ubuntu/desktop/todate.jar
637 3.注册函数
638 create temporary function to_date as 'com.it18zhang.myhive210.func.todate';
639 4.调用函数
640 desc function to_date;
641 自定义表生成函数
642 1.创建一个类UDTF
643 2.编译,打包,导出
644 mvn package -dskuotests
645 3.复制jar到ubuntu
646 4.使用hive的add jar命令,添加jar到classpath
647 5.创建临时函数
648 create temporary function forx as 'x.x.x.xxx.forudtf'
649 6.调用函数
650 select forx(1,5);
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680