Hive使用

1566633088084

数据库操作

 创建数据库
create database if not exists hive; 
 
查看存在的数据库
show databases;
查看以 "h" 开头的数据库
show databases like 'h.*'; 
 
#查看hive数据库位置等信息
describe databases;
 
删除不包含表的数据库
drop database if exists hive;
#删除数据库和它中的表
drop database if exists hive cascade;
 
切换到hive数据库
use hive;

内部表

 -- 创建内部表
create table if not exists table_test(id int,name string,sex string) row format delimited fields terminated by "\t" lines terminated by "\n" stored as textfile;

外部表

 -- 1. 在hdfs上创建外部表文件夹
hdfs dfs -mkdir /user/hive/warehouse/external
 
-- 2. 上传数据
hdfs dfs -put data.txt /user/hive/warehouse/external 
 
-- 3. 创建外部表(只能是在hdfs上文件)
create table external table_external (name string,age int) row format delimited fields terminated by "\t" location '/user/hive/warehouse/external';

分区表

创建分区表

 -- Hive表的分区就是一个目录，分区字段不和表的字段重复
create table tb_partition(id string, name string) PARTITIONED BY (month string) row format delimited fields terminated by '\t';

导入数据到分区表中

 -- 导入本地数据
load data local inpath '/home/hadoop/files/nameinfo.txt' overwrite into table tb_partition partition(month='201709');
 
-- 插入查询的数据
insert overwrite table tb_partition partition(month='201707') select id, name from tmp_table;
 
-- 手动上传文件到分区目录，进行加载，手动上传文件到分区目录，但是查询表的时候是查询不到数据的，需要更新元数据信息。
-- 创建分组目录
hdfs dfs -mkdir /user/hive/warehouse/tb_partition/month=201710
-- 上传数据
hdfs dfs -put nameinfo.txt /user/hive/warehouse/tb_partition/month=201710
-- 更新元数据
msck repair table tb_partition;

修改分区路径

 alter table usr2 partition(city=”beijing”,state=”China”) set location '/usr/local/hive/warehouse/usr2/CH/beijing';

删除分区

 alter table usr2 drop if exists  partition(city=”beijing”,state=”China”)

分区表的其他操作：https://www.cnblogs.com/one--way/p/7550795.html

创建相同表结构的表

 create table if not exists hive.usr1 like hive.usr;

查看表信息

 -- 查看存在的表
show tables;
show tables in hive;
 
-- 查看以 “u"开头的表
show tables 'u.*';
 
-- 查看usr表的信息
describe hive.usr;
 
-- 查看表结构
desc hive.usr

表操作

 -- 重命名表
alter table usr rename to custom;
 
-- 修改列属性
alter table table_name change col1 col2 string;
 
-- 添加列
alter table usr add columns(hobby string); 
 
-- 删除所有列并替换
alter table usr replace columns(uname string);
 
-- 删除表
drop table if exists usr1;

视图

 -- 创建视图
create view view_name as select * from usr;
 
-- 删除视图
drop view if exists view_name;

查询

 -- 模糊匹配
select name detail from where name like "%s数据%s";
 
-- 统计列不重复的数据数
select count(distinct uid) from bigdata_user;

导入数据

 -- 导入local数据 -> hive
load data local inpath 'xiong.txt' into table xiong;
 
-- 导入hdfs数据 -> hive
load data inpath '/home/xiong/add.txt' into table xiong;

执行脚本

 -- 执行hql或sql脚本
source my.hql

shell使用Hive

 -- 查看表结构
hive -e 'desc cleantable;'
 
-- 将执行结果写入本地文件
hive -S -e 'desc cleantable;' > desc.txt
 
-- 执行本地hql脚本
hive -S -f /my.hql

posted @ 2019-08-27 18:52 会走的树阅读(267) 评论(0) 收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

公告

2025年7月

日

一

二

三

四

五

六

会走的树

Hive使用

数据库操作

内部表

外部表

分区表

创建相同表结构的表

查看表信息

表操作

视图

查询

导入数据

执行脚本

shell使用Hive

公告

搜索

常用链接

我的标签

随笔分类

随笔档案

阅读排行榜

推荐排行榜

	创建数据库
	create database if not exists hive;

	查看存在的数据库
	show databases;
	查看以 "h" 开头的数据库
	show databases like 'h.*';

	#查看hive数据库位置等信息
	describe databases;

	删除不包含表的数据库
	drop database if exists hive;
	#删除数据库和它中的表
	drop database if exists hive cascade;

	切换到hive数据库
	use hive;

	-- 创建内部表
	create table if not exists table_test(id int,name string,sex string) row format delimited fields terminated by "\t" lines terminated by "\n" stored as textfile;

	-- 1. 在hdfs上创建外部表文件夹
	hdfs dfs -mkdir /user/hive/warehouse/external

	-- 2. 上传数据
	hdfs dfs -put data.txt /user/hive/warehouse/external

	-- 3. 创建外部表(只能是在hdfs上文件)
	create table external table_external (name string,age int) row format delimited fields terminated by "\t" location '/user/hive/warehouse/external';

	-- Hive表的分区就是一个目录，分区字段不和表的字段重复
	create table tb_partition(id string, name string) PARTITIONED BY (month string) row format delimited fields terminated by '\t';

	-- 导入本地数据
	load data local inpath '/home/hadoop/files/nameinfo.txt' overwrite into table tb_partition partition(month='201709');

	-- 插入查询的数据
	insert overwrite table tb_partition partition(month='201707') select id, name from tmp_table;

	-- 手动上传文件到分区目录，进行加载，手动上传文件到分区目录，但是查询表的时候是查询不到数据的，需要更新元数据信息。
	-- 创建分组目录
	hdfs dfs -mkdir /user/hive/warehouse/tb_partition/month=201710
	-- 上传数据
	hdfs dfs -put nameinfo.txt /user/hive/warehouse/tb_partition/month=201710
	-- 更新元数据
	msck repair table tb_partition;

	-- 查看存在的表
	show tables;
	show tables in hive;

	-- 查看以 “u"开头的表
	show tables 'u.*';

	-- 查看usr表的信息
	describe hive.usr;

	-- 查看表结构
	desc hive.usr

	-- 重命名表
	alter table usr rename to custom;

	-- 修改列属性
	alter table table_name change col1 col2 string;

	-- 添加列
	alter table usr add columns(hobby string);

	-- 删除所有列并替换
	alter table usr replace columns(uname string);

	-- 删除表
	drop table if exists usr1;

	-- 创建视图
	create view view_name as select * from usr;

	-- 删除视图
	drop view if exists view_name;

	-- 模糊匹配
	select name detail from where name like "%s数据%s";

	-- 统计列不重复的数据数
	select count(distinct uid) from bigdata_user;

	-- 导入local数据 -> hive
	load data local inpath 'xiong.txt' into table xiong;

	-- 导入hdfs数据 -> hive
	load data inpath '/home/xiong/add.txt' into table xiong;

	-- 查看表结构
	hive -e 'desc cleantable;'

	-- 将执行结果写入本地文件
	hive -S -e 'desc cleantable;' > desc.txt

	-- 执行本地hql脚本
	hive -S -f /my.hql