hive ddl dml hivesql命令大全
SELECT T.sname,T.ctfid,T.gender,t.address,
count(*) OVER(PARTITION BY T.sname) AS FM_CNT
FROM test_db.room3 T WHERE T.address like "%北京%" AND instr(T.ctfid,'310')>0
ORDER BY FM_CNT DESC;
select gender,sum(cnt) as cnt from test_db.room2_view where length(trim(gender))>0 group by gender order by cnt desc;
desc test_db.room2;
-- DROP TABLE default.room2_gender_out PURGE ;
-- 删除数据库
//DROP DATABASE db_hive1;
// 1.创建数据库
CREATE DATABASE IF NOT EXISTS db_hive1
COMMENT "hive学习教程数据库" --数据库备注
LOCATION '/hivedata/db_hive1' --数据库存储hdfs路径
WITH DBPROPERTIES ("create_dt"="2025-12-07","create_user"="hadoop") --数据库信息k,v
;
//查看数据库
SHOW DATABASES LIKE "*db*";
//查看数据库的信息
describe DATABASE EXTENDED db_hive1;
//查看更多信息
DESC DATABASE EXTENDED db_hive1;
//2.修改数据库
ALTER DATABASE db_hive1 set DBPROPERTIES ("create_dt"="2025-12-08","create_user"="wwwdp") ;
ALTER DATABASE db_hive1 set LOCATION '/hivedata/db_hive1'; --数据库存储hdfs路径
ALTER DATABASE db_hive1 set OWNER USER hadoop; -- 修改数据库归属用户
-- 删除数据库
//DROP DATABASE db_hive1 RESTRICT ; --数据库不为空删除失败
//DROP DATABASE db_hive1 CASCADE ; -- 将表和库一并 删除
//切换数据库
use test_db;
// 3 建表操作
-- 内部表
drop table db_hive1.tb_test1 purge ;
CREATE TABLE db_hive1.tb_test1
(dat_dt date COMMENT "DATE",
idky bigint COMMENT "主键",
amt decimal(30,4) COMMENT "AMT",
name varchar(4000) COMMENT "NAME",
work_list array<string> COMMENT "ARRAY",
addr_map MAP<string,string> COMMENT "Map"
)
COMMENT "nei bu biao"
--location "/hivedata/db_hive1/tb_test1"
-- row format delimited
-- ROW FORMAT SERDE ''
STORED AS orc -- textfile/orc/parquet
TBLPROPERTIES ("create_dt"="20251207")
;
INSERT INTO db_hive1.tb_test1 VALUES
(date '2023-10-01', 123456789, 1234.5678, 'John Doe',
array('work1', 'work2'), map('city', 'New York', 'country', 'USA'));
select * from db_hive1.tb_test1 t;
select t.dat_dt,t.idky,t.name,
t.work_list[0],
t.work_list[1],
t.addr_map["city"],
t.addr_map["country"]
from db_hive1.tb_test1 t;
-- 查看建表信息
desc extended db_hive1.tb_test1;
//会话临时表
CREATE TEMPORARY TABLE db_hive1.tb_test2
(dat_dt date,
idky int,
name string)
COMMENT "nei bu biao"
STORED AS ORC -- textfile/orc/parquet
;
-- select * from db_hive1.tb_test2;
--外部表
CREATE EXTERNAL TABLE db_hive1.tb_test3
(dat_dt date,
idky int,
name string)
COMMENT "nei bu biao"
location "/hivedata/db_hive1/tb_test3"
STORED AS orc -- textfile/orc/parquet
TBLPROPERTIES ("tab"="tb_test3")
;
desc extended db_hive1.tb_test3;
select date '2024-10-01' dat;
-- 数据类型
select cast('908.342' as decimal(30,4)) as vles;
-- 建表 like语法
CREATE TABLE db_hive1.room4 like db_hive1.tb_test1;
-- 建表 as语法
drop table db_hive1.room4 purge ;
create table db_hive1.room4 as
select * from test_db.room3;
use db_hive1;
select * from db_hive1.room4 r where r.address like "%剑河%";
-- 查看 ddl 建表语句
show create table db_hive1.tb_test1;
SELECT T.sname,T.ctfid,T.gender,t.address,
count(*) OVER(PARTITION BY T.sname) AS FM_CNT
FROM db_hive1.room4 T WHERE T.address like "%北京%" AND instr(T.ctfid,'310')>0
ORDER BY FM_CNT DESC;
-- 建表指定分隔符
CREATE TABLE db_hive1.tb_test5
(dat_dt date,
idky int,
name string)
COMMENT "nei bu biao"
location "/hivedata/db_hive1/tb_test5"
ROW FORMAT DELIMITED FIELDS TERMINATED BY "|@|"
STORED AS orc -- textfile/orc/parquet
TBLPROPERTIES ("tab"="tb_test3")
;
-- 可以使用 hdfs 提交数据值 tb_test5 要求 input.txt 每行字段直接分隔符苇 |@|
-- hdfs dfs -put input.txt /hivedata/db_hive1/tb_test5
-- 创建json对接hive表可以直接 put就可以插入数据
drop table db_hive1.teacher;
create table db_hive1.teacher(
name string comment "name",
friend array<varchar(256)> comment "fred",
students map<string,int> comment "students",
adderss struct<city:string,amt:decimal(30,4),room:array<int>> comment "adderss"
)
--row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe' -- 指定 json
--location "/hivedata/db_hive1/teacher"
;
INSERT INTO db_hive1.teacher
select "hadoop1" as name,
array("fdaf","fdafdfff") as friend,
map("city",64) as students,
struct("guiyangshi",123.543,array(9,0,32)) as adderss;
INSERT INTO db_hive1.teacher
values ("hadoop1",
array("fdaf", "fdaffdas"),
map("city", 64),
struct("guiyangshi", 123.543, `array`(1,2,3,4)));
select * from teacher;
// 查看表信息
desc extended teacher;
desc formatted teacher;
show tables in db_hive1 "teacher" ;
//修改表的语法
drop table stu;
create table db_hive1.stu
(id int,
name string)
row format delimited fields terminated by ",";
ALTER TABLE stu RENAME TO stu1;
//添加字段
ALTER TABLE stu add columns(gender string);
-- 修改字段
set hive.metastore.disallow.incompatible.col.type.changes=false;
alter table stu1 change column gender gend string after name;
alter table db_hive1.stu1 replace columns(id int,name string,gend string);
-- 清空表只对内部表有用,对外部表无效
truncate table db_hive1.stu;
desc stu;
-- 二、 数据增删改查 DML
-- 本地导入 OVERWRITE 覆盖的意思,不加则 追加。 inpath 对应的local 节点是 你服务启动的 节点
LOAD DATA LOCAL INPATH "/home/hadoop/stu.txt" INTO TABLE db_hive1.stu;
-- HDFS分布式文件路径
LOAD DATA INPATH "" OVERWRITE INTO TABLE db_hive1.stu;
-- 分区表
-- LOAD DATA INPATH "" OVERWRITE INTO TABLE db_hive1.stu PARTITION (....);
select * from db_hive1.stu;
//多条数据插入
insert into db_hive1.stu
values(1,"faf"),
(2,"test");
-- 把查询sql的数据 写入到本地对应路径的
insert overwrite local directory "/home/hadoop/data/stu" row format serde "org.apache.hadoop.hive.serde2.JsonSerDe"
select id,name from db_hive1.stu;
-- 导出和导入
EXPORT TABLE db_hive1.stu to "/hivedata/stuout"; -- 导出数据及元数据,到hdfs 文件系统上
IMPORT /*external*/ TABLE stu from "hivedata/stuout"; -- 导入表元数据及数据 external 声明外部表
// 三、hive查询语句
//SELECT * FROM stu WHERE ID = 1 GROUP BY id HAVING count(*)>1 ORDER BY id LIMIT 1,10;
SELECT * FROM test_db.room2_view limit 4;
SELECT * FROM test_db.room2_view w where not w.gender ='12';
SELECT * FROM db_hive1.stu T WHERE T.id BETWEEN 0 AND 2;
SELECT * FROM db_hive1.stu T WHERE nvl(T.id,1) IN(0,1);
SELECT * FROM db_hive1.stu T where T.name like "%f%";
SELECT * FROM db_hive1.stu T where T.name regexp "正则表达式";
-- 逻辑语句 and or not
-- 聚合函数 count(*) avg() max() min(*)
-- 本地模式
// set mapreduce.framework.name local;
select * from stu;
SELECT
row_number() over (order by t.ctfid) as rn,
T.* FROM db_hive1.room4 T LIMIT 1000;
SELECT t.ctftp,count(*) as cnt FROM db_hive1.room4 t where t.ctftp is not null group by t.ctftp order by count(*) desc;
-- 排序语法 order by 语法 后面尽量跟limit 这个直接优化map端 不会导致 内存溢出
SELECT t.ctftp,count(*) as cnt FROM db_hive1.room4 t
where t.ctftp is not null group by t.ctftp order by count(*) DESC limit 100;
set mapreduce.job.reduces;
SELECT count(1) FROM test_db.room3;
WITH TEMP AS (
SELECT t.ctftp,count(*) as cnt FROM db_hive1.room4 t
where t.ctftp is not null group by t.ctftp order by count(*) DESC limit 100
)
select * from TEMP where ctftp NOT like 'I%';
-- ORDER BY 和 sort by 的区别,order by 全局排序, sort by 指的是 map 到 reduce 端的排序,它只保证单个reduce 排序。
WITH result as (
select * from db_hive1.room4
)
,TEMP AS (
SELECT t.ctftp,count(*) as cnt FROM result t
where t.ctftp is not null group by t.ctftp order by count(*) DESC limit 100
)
SELECT * FROM TEMP SORT BY ctftp DESC ; -- 只对单个分区进行排序
select * from test_db.room3;
-- 函数
show functions ;
DESC FUNCTION substring;
DESC FUNCTION EXTENDED substring;
-- UDF 一进一出
SELECT round(3.342,2) AS RS;
SELECT substring("12345678",3,2);
SELECT substr("12345678",3,2);
SELECT substr("12345678",3);
-- 时间字符串转为时间字符串
SELECT unix_timestamp();-- 当前时间戳
SELECT unix_timestamp("2025-12-10 22:29:30","yyyy-MM-dd HH:mm:ss");
DESC FUNCTION unix_timestamp;
-- 时间戳转时间字符串
SELECT from_unixtime(unix_timestamp()) dat;
SELECT from_unixtime(1765423770,'yyyy-MM-dd HH:mm:ss') dat;
SELECT from_utc_timestamp(`cast`(1765378139 AS BIGINT)*1000,'GMT+8');
SELECT current_date();
select date_format(current_timestamp(),'yyyy-MM-dd HH:mm:ss');
SELECT hour(current_timestamp());
select datediff();
select date_add('2025-12-10',1); -- 加天数
select date_sub('2025-12-10',1); -- 减天数
SELECT date_format(date '2025-12-10','yyyy-MM-dd');
-- 条件控制语句
select case when 1=2 then 0 else 1 end ;
select `if`(1=6,1,0);
select * from test_db.room2_view T WHERE T.gender IS NOT NULL;
select * from db_hive1.room4;
-- 集合函数
SELECT `array`(1,2,34);
select `map`("city","gzhou","addr","adfas","rs",1);
-- 判断元素是否存在 数组里面
select array_contains(array(1,2,34),1);
select sort_array(array(1,2,34,3,5,7));
select size(array(1,2,34,3,5,7));
select map_keys(map("city","gzhou","addr","adfas","rs",1));
select map_values(map("city","gzhou","addr","adfas","rs",1));
select struct("fdafj","fafda","fdaf");
select named_struct("key1",1,"cl1","fdaf");
select month(`current_date`());
select date '2025-12-11';
select datediff(date '2025-12-11' , date '2024-11-11' ) as dat;
select decode(1,1,1,0);
desc function extended decode;
SELECT decode(2, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic');
-- 高级集合函数
SELECT
collect_list(gender) a_list, -- 把多行聚合为一行
collect_set(cnt) as a_set -- 把多行聚合为一行 去重
FROM test_db.room2_view WHERE length(trim(gender))>0;
-- UDTF 一进多出
SELECT explode(ARRAY("21",'22',324));
SELECT posexplode(ARRAY("21",'22',324));
SELECT inline(ARRAY("21",'22',"FDA"));
SELECT T.idky,posexplode(T.addr_map) FROM db_hive1.tb_test1 T;
select a.name,inline(`array`(a.adderss)),a.adderss from teacher a;
select A.name,explode(A.friend) as fd
from db_hive1.teacher A ;
select A.name,A.friend,
fd
from db_hive1.teacher A
lateral view explode(a.friend) tem as fd;
;
select t.adderss["room"] from db_hive1.teacher t;
select posexplode(split("wzp,jq,xiaowang",','));
-- 窗口函数
SELECT * FROM (
select
R.sname,
r.ctfid,
count(*) over (partition by r.ctfid) cnt
from db_hive1.room4 r WHERE R.ctfid IS NOT NULL
) W WHERE W.CNT >1
;
-- 如何定义窗口范围,基于行的 和基于值的
select
gender,cnt,
sum(cnt) over (order by v.gender desc rows between 2 preceding and current row ) as lj
from test_db.room2_view v where length(trim(v.gender))>0
--order by v.gender asc--limit 100
;
-- 累计求和
select
gender,cnt,
sum(cnt) over (order by v.gender desc
rows between unbounded preceding and current row ) as lj
from test_db.room2_view v where length(trim(v.gender))>0
;
-- 基于值的 窗口函数 order by 不是排序, 而是居于 那个字段对应的值进行划分窗口
-- order by [column] rows/range between 起点 and 终点
-- 对每一个 gender 分组值进行 累和
select
gender,cnt,
sum(cnt) over (partition by v.gender order by v.cnt asc
rows between unbounded preceding and current row ) as lj
from test_db.room2_view v where length(trim(v.gender))>0
;
自动化学习。

浙公网安备 33010602011771号