hive ddl dml hivesql命令大全

SELECT T.sname,T.ctfid,T.gender,t.address,
       count(*) OVER(PARTITION BY T.sname) AS FM_CNT
       FROM test_db.room3 T WHERE  T.address like "%北京%" AND instr(T.ctfid,'310')>0
ORDER BY FM_CNT DESC;


select gender,sum(cnt) as cnt from test_db.room2_view where length(trim(gender))>0 group by gender order by cnt desc;


desc test_db.room2;

-- DROP TABLE default.room2_gender_out PURGE ;

-- 删除数据库
//DROP DATABASE db_hive1;

// 1.创建数据库
CREATE DATABASE IF NOT EXISTS db_hive1
    COMMENT "hive学习教程数据库" --数据库备注
    LOCATION '/hivedata/db_hive1' --数据库存储hdfs路径
    WITH DBPROPERTIES ("create_dt"="2025-12-07","create_user"="hadoop") --数据库信息k,v
;

//查看数据库
SHOW DATABASES LIKE "*db*";

//查看数据库的信息
describe DATABASE EXTENDED db_hive1;

//查看更多信息
DESC DATABASE EXTENDED db_hive1;

//2.修改数据库
ALTER DATABASE db_hive1 set DBPROPERTIES ("create_dt"="2025-12-08","create_user"="wwwdp") ;
ALTER DATABASE db_hive1 set LOCATION '/hivedata/db_hive1'; --数据库存储hdfs路径
ALTER DATABASE db_hive1 set OWNER USER hadoop; -- 修改数据库归属用户


-- 删除数据库
//DROP DATABASE db_hive1 RESTRICT ; --数据库不为空删除失败
//DROP DATABASE db_hive1 CASCADE ; -- 将表和库一并 删除

//切换数据库
use test_db;

// 3 建表操作
-- 内部表
drop table db_hive1.tb_test1 purge ;
CREATE TABLE db_hive1.tb_test1
(dat_dt date COMMENT "DATE",
idky bigint COMMENT "主键",
amt decimal(30,4) COMMENT "AMT",
name varchar(4000) COMMENT "NAME",
work_list array<string> COMMENT "ARRAY",
addr_map MAP<string,string> COMMENT "Map"
)
COMMENT "nei bu biao"
--location "/hivedata/db_hive1/tb_test1"
-- row format delimited
-- ROW FORMAT SERDE ''
STORED AS orc -- textfile/orc/parquet
TBLPROPERTIES ("create_dt"="20251207")
;
INSERT INTO db_hive1.tb_test1 VALUES
(date '2023-10-01', 123456789, 1234.5678, 'John Doe',
 array('work1', 'work2'), map('city', 'New York', 'country', 'USA'));

select * from db_hive1.tb_test1 t;
select t.dat_dt,t.idky,t.name,
       t.work_list[0],
       t.work_list[1],
       t.addr_map["city"],
       t.addr_map["country"]
       from db_hive1.tb_test1 t;

-- 查看建表信息
desc extended db_hive1.tb_test1;


//会话临时表
CREATE TEMPORARY TABLE db_hive1.tb_test2
(dat_dt date,
idky int,
name string)
COMMENT "nei bu biao"
STORED AS ORC -- textfile/orc/parquet
;
-- select * from db_hive1.tb_test2;

--外部表
CREATE EXTERNAL TABLE db_hive1.tb_test3
(dat_dt date,
idky int,
name string)
COMMENT "nei bu biao"
location "/hivedata/db_hive1/tb_test3"
STORED AS orc -- textfile/orc/parquet
TBLPROPERTIES ("tab"="tb_test3")
;

desc extended db_hive1.tb_test3;


select date '2024-10-01' dat;



-- 数据类型
select cast('908.342' as decimal(30,4)) as vles;


-- 建表 like语法
CREATE TABLE db_hive1.room4 like db_hive1.tb_test1;

-- 建表 as语法
drop table  db_hive1.room4 purge ;
create table db_hive1.room4 as
    select * from test_db.room3;

use db_hive1;

select * from db_hive1.room4 r where r.address like "%剑河%";


-- 查看 ddl 建表语句
show create table db_hive1.tb_test1;



SELECT T.sname,T.ctfid,T.gender,t.address,
       count(*) OVER(PARTITION BY T.sname) AS FM_CNT
       FROM db_hive1.room4 T WHERE  T.address like "%北京%" AND instr(T.ctfid,'310')>0
ORDER BY FM_CNT DESC;


-- 建表指定分隔符
CREATE TABLE db_hive1.tb_test5
(dat_dt date,
idky int,
name string)
COMMENT "nei bu biao"
location "/hivedata/db_hive1/tb_test5"
ROW FORMAT DELIMITED FIELDS TERMINATED BY "|@|"
STORED AS orc -- textfile/orc/parquet
TBLPROPERTIES ("tab"="tb_test3")
;

-- 可以使用 hdfs 提交数据值 tb_test5 要求 input.txt 每行字段直接分隔符苇 |@|
-- hdfs dfs -put input.txt /hivedata/db_hive1/tb_test5



-- 创建json对接hive表可以直接 put就可以插入数据
drop table db_hive1.teacher;
create table db_hive1.teacher(
    name string comment "name",
    friend array<varchar(256)> comment "fred",
    students map<string,int> comment "students",
    adderss struct<city:string,amt:decimal(30,4),room:array<int>> comment "adderss"
)
--row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'  -- 指定 json
--location "/hivedata/db_hive1/teacher"
;

INSERT INTO db_hive1.teacher
select "hadoop1" as name,
       array("fdaf","fdafdfff") as friend,
       map("city",64) as students,
       struct("guiyangshi",123.543,array(9,0,32)) as adderss;



INSERT INTO db_hive1.teacher
values ("hadoop1",
        array("fdaf", "fdaffdas"),
        map("city", 64),
        struct("guiyangshi", 123.543, `array`(1,2,3,4)));


select * from teacher;

// 查看表信息
desc extended teacher;
desc formatted teacher;
show tables in db_hive1 "teacher" ;

//修改表的语法
drop table stu;
create table db_hive1.stu
(id int,
name string)
row format delimited fields terminated by ",";

ALTER TABLE stu RENAME TO  stu1;
//添加字段
ALTER TABLE stu add columns(gender string);
-- 修改字段
set hive.metastore.disallow.incompatible.col.type.changes=false;

alter table stu1 change column gender  gend string after name;

alter table db_hive1.stu1 replace columns(id int,name string,gend string);

-- 清空表只对内部表有用,对外部表无效
truncate table db_hive1.stu;

desc stu;

-- 二、 数据增删改查  DML
-- 本地导入  OVERWRITE 覆盖的意思,不加则 追加。 inpath 对应的local 节点是 你服务启动的 节点
LOAD DATA LOCAL INPATH "/home/hadoop/stu.txt"  INTO TABLE db_hive1.stu;
-- HDFS分布式文件路径
LOAD DATA INPATH "" OVERWRITE INTO TABLE db_hive1.stu;

-- 分区表
-- LOAD DATA INPATH "" OVERWRITE INTO TABLE db_hive1.stu PARTITION (....);

select * from db_hive1.stu;

//多条数据插入
insert into db_hive1.stu
values(1,"faf"),
      (2,"test");

-- 把查询sql的数据 写入到本地对应路径的
insert overwrite local directory "/home/hadoop/data/stu" row format serde "org.apache.hadoop.hive.serde2.JsonSerDe"
select id,name from db_hive1.stu;

-- 导出和导入

EXPORT TABLE db_hive1.stu to "/hivedata/stuout"; -- 导出数据及元数据,到hdfs 文件系统上
IMPORT /*external*/ TABLE stu from "hivedata/stuout"; -- 导入表元数据及数据 external 声明外部表


// 三、hive查询语句
//SELECT * FROM stu WHERE ID = 1 GROUP BY id HAVING count(*)>1 ORDER BY id LIMIT 1,10;

SELECT * FROM test_db.room2_view limit 4;

SELECT * FROM test_db.room2_view w where not w.gender ='12';

SELECT * FROM db_hive1.stu T WHERE T.id BETWEEN 0 AND 2;

SELECT * FROM db_hive1.stu T WHERE nvl(T.id,1) IN(0,1);

SELECT * FROM db_hive1.stu T where T.name like "%f%";

SELECT * FROM db_hive1.stu T where T.name regexp "正则表达式";
-- 逻辑语句 and or not
-- 聚合函数 count(*) avg() max() min(*)


-- 本地模式
// set mapreduce.framework.name local;


select * from stu;



SELECT
       row_number() over (order by t.ctfid) as rn,
       T.* FROM db_hive1.room4  T LIMIT 1000;



SELECT t.ctftp,count(*) as cnt FROM db_hive1.room4 t where t.ctftp is not null group by t.ctftp order by count(*) desc;


-- 排序语法 order by 语法 后面尽量跟limit 这个直接优化map端 不会导致 内存溢出
SELECT t.ctftp,count(*) as cnt FROM db_hive1.room4 t
where t.ctftp is not null group by t.ctftp order by count(*) DESC limit 100;

set mapreduce.job.reduces;

SELECT count(1) FROM test_db.room3;

WITH TEMP AS (
    SELECT t.ctftp,count(*) as cnt FROM db_hive1.room4 t
where t.ctftp is not null group by t.ctftp order by count(*) DESC limit 100
)
select * from TEMP where ctftp NOT like 'I%';


-- ORDER BY  和 sort by 的区别,order by 全局排序, sort by 指的是 map 到 reduce 端的排序,它只保证单个reduce 排序。
WITH result as (
   select * from db_hive1.room4
)
  ,TEMP AS (
    SELECT t.ctftp,count(*) as cnt FROM result t
where t.ctftp is not null group by t.ctftp order by count(*) DESC limit 100
)
SELECT * FROM TEMP SORT BY ctftp DESC ; -- 只对单个分区进行排序

select * from test_db.room3;

-- 函数

show functions ;


DESC FUNCTION substring;

DESC FUNCTION EXTENDED substring;


-- UDF 一进一出
SELECT round(3.342,2) AS RS;

SELECT substring("12345678",3,2);
SELECT substr("12345678",3,2);
SELECT substr("12345678",3);

-- 时间字符串转为时间字符串
SELECT unix_timestamp();-- 当前时间戳
SELECT unix_timestamp("2025-12-10 22:29:30","yyyy-MM-dd HH:mm:ss");


DESC FUNCTION unix_timestamp;
-- 时间戳转时间字符串
SELECT from_unixtime(unix_timestamp()) dat;

SELECT from_unixtime(1765423770,'yyyy-MM-dd HH:mm:ss') dat;



SELECT from_utc_timestamp(`cast`(1765378139 AS BIGINT)*1000,'GMT+8');


SELECT current_date();

select date_format(current_timestamp(),'yyyy-MM-dd HH:mm:ss');

SELECT hour(current_timestamp());

select datediff();

select date_add('2025-12-10',1); -- 加天数
select date_sub('2025-12-10',1); -- 减天数


SELECT date_format(date '2025-12-10','yyyy-MM-dd');

-- 条件控制语句
select case when 1=2 then 0 else 1 end ;

select `if`(1=6,1,0);



select * from test_db.room2_view T WHERE T.gender IS NOT NULL;

select * from db_hive1.room4;


-- 集合函数


SELECT `array`(1,2,34);

select `map`("city","gzhou","addr","adfas","rs",1);

-- 判断元素是否存在 数组里面
select array_contains(array(1,2,34),1);

select sort_array(array(1,2,34,3,5,7));

select size(array(1,2,34,3,5,7));

select map_keys(map("city","gzhou","addr","adfas","rs",1));

select map_values(map("city","gzhou","addr","adfas","rs",1));

select struct("fdafj","fafda","fdaf");

select named_struct("key1",1,"cl1","fdaf");

select month(`current_date`());

select date '2025-12-11';
select datediff(date '2025-12-11' , date '2024-11-11' ) as dat;

select decode(1,1,1,0);

desc function extended decode;

SELECT decode(2, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic');


-- 高级集合函数

SELECT
    collect_list(gender) a_list, -- 把多行聚合为一行
    collect_set(cnt) as a_set -- 把多行聚合为一行 去重
    FROM test_db.room2_view WHERE length(trim(gender))>0;

-- UDTF 一进多出

SELECT explode(ARRAY("21",'22',324));

SELECT posexplode(ARRAY("21",'22',324));


SELECT inline(ARRAY("21",'22',"FDA"));


SELECT T.idky,posexplode(T.addr_map) FROM db_hive1.tb_test1 T;


select a.name,inline(`array`(a.adderss)),a.adderss from teacher a;



select A.name,explode(A.friend) as fd
       from db_hive1.teacher A ;

select A.name,A.friend,
       fd
       from db_hive1.teacher A
    lateral view explode(a.friend) tem as fd;
;

select t.adderss["room"] from db_hive1.teacher t;


select posexplode(split("wzp,jq,xiaowang",','));


-- 窗口函数
SELECT * FROM (
select
    R.sname,
    r.ctfid,
    count(*) over (partition by r.ctfid) cnt
    from db_hive1.room4 r WHERE R.ctfid IS NOT NULL
        ) W WHERE W.CNT >1
;

-- 如何定义窗口范围,基于行的 和基于值的

select
    gender,cnt,
    sum(cnt) over (order by v.gender desc rows between 2 preceding and current row ) as lj
from test_db.room2_view v where length(trim(v.gender))>0
--order by v.gender asc--limit 100
;
-- 累计求和
select
    gender,cnt,
    sum(cnt) over (order by v.gender desc
        rows between unbounded preceding and current row ) as lj
from test_db.room2_view v where length(trim(v.gender))>0
;

-- 基于值的 窗口函数 order by 不是排序, 而是居于 那个字段对应的值进行划分窗口
-- order by [column] rows/range between 起点   and  终点

-- 对每一个 gender 分组值进行 累和
select
    gender,cnt,
    sum(cnt) over (partition by v.gender order by v.cnt asc
        rows between unbounded preceding and current row ) as lj
from test_db.room2_view v where length(trim(v.gender))>0
;

  

posted @ 2025-12-08 21:41  ARYOUOK  阅读(9)  评论(0)    收藏  举报