官方文档
spark sql 函数
表结构相关
显示该表的建表语句
SHOW CREATE TABLE `database`.`tableName`
显示该表的列名、列的类型、列的注释
DESC `database`.`tableName`
显示该表的列名
SHOW COLUMNS FROM `database`.`tableName`
读相关
字符串
-- 将字符串按某个字符分割成数组
select split('a,b,c', ',')[0]; -> a
-- 将字符串中的特定字符串 替换为 指定字符串
select replace('abc bca test', ' ',',') -> abc,bca,test
-- 截取字符串
select subStr('123456abcd',1,5) -> 12345
case when
(case `col` when xxx then xxx else xxx end ) as xxx;
select
str,
(case (split(str,'-')[0]) when split(str,'-')[0] then split(str,'-')[0] else '' end ) as first_id,
(case (split(str,'-')[1]) when split(str,'-')[1] then split(str,'-')[1] else '' end ) as sec_id,
(case (split(str,'-')[2]) when split(str,'-')[2] then split(str,'-')[2] else '' end ) as third_id
from (
select '123-456' as str
) a
str first_id sec_id third_id
123-456 123 456
日期、时间
-- 当前时间,type:timeStamp
SELECT now();
-- 将字符串 转为 date 类型
SELECT to_date('2022-11-10 11:00:01')
-- 计算两个时间相差月份,type: double
-- months_between(CAST(2022-11-20 AS TIMESTAMP), CAST(2022-05-02 AS TIMESTAMP), true)
select months_between('2022-11-20','2022-05-02')
-- 计算两个时间相差天数
-- DATEDIFF(now(), '2023-10-08 00:00:00')
select DATEDIFF('2023-10-08 00:00:00', now())
-- timestamp
-- to_timestamp(String,formatter)
select to_timestamp('202211211100','yyyyMMddHHmm') time 2022-11-21 11:00:00.0
Map
-- 转为Map<K,V> 类型
select map('key','value')
Array
-- 判断数组是否包含'a'
select array_contains (split('a,b,c', ','),'a'); -> true
-- 转为 Array 类型
select array("1","2","3")
Struct
-- 转为struct<col1:string,col2:string,col3:string> 类型
select struct('a','b','c');
Agg
// 把多行聚合成一行,type:list
collect_list(`col`)
// 同上,type: set
collect_set(`col`)
explode
// 将数组拆成多行
select explode(array("1","2","3")) as col
col
1
2
3
lateral view + explode
-- 将列名拼接起来
select id, number from
(
select 1 as id , array("1","2","3") as arr
) a lateral view explode (arr) as number
id number
1 1
1 2
1 3
Json
-- 转为json 字符串
select to_json(struct('a','b','c')); -> {"col1":"a","col2":"b","col3":"c"}
select to_json(map('key','value')); -> {"key":"value"}
select to_json(split('a,b,c', ',')); -> ["a","b","c"]
-- 解析json
select get_json_object('["a","b","c"]','$[0]'); -> a
select get_json_object('["a","b","c"]','$[*]'); -> a,b,c
select get_json_object('{"key":"value"}','$.key') -> value
写相关
插入数据进某表
INSERT OVERWRITE TABLE `database`.`tableName`
(
SELECT * FROM `tableName2`
2