Hadoop-Hive安装及工作原理和优化
安装
mkdir /opt/hive && tar -zxf /root/apache-hive-3.1.1-bin.tar.gz -C /opt/hive && \
tar -zxf /root/mysql-connector-java-8.0.16.tar.gz && \
cp /root/mysql-connector-java-8.0.16/mysql-connector-java-8.0.16.jar /opt/hive/apache-hive-3.1.1-bin/lib
配置服务
echo '<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://47.107.140.154:3306/hadoophive?allowMultiQueries=true&useSSL=false&verifyServerCertificate=false</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>cshare</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>aBc123456</value>
</property>
<property>
<name>datanucleus.readOnlyDatastore</name>
<value>false</value>
</property>
<property>
<name>datanucleus.fixedDatastore</name>
<value>false</value>
</property>
<property>
<name>datanucleus.autoCreateSchema</name>
<value>true</value>
</property>
<property>
<name>datanucleus.autoCreateTables</name>
<value>true</value>
</property>
<property>
<name>datanucleus.autoCreateColumns</name>
<value>true</value>
</property>
<property>
<name>hive.support.concurrency</name>
<value>true</value>
</property>
<property>
<name>hive.zookeeper.quorum</name>
<value>worker3:2181,worker4:2181,worker5:2181</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>worker3:2181,worker4:2181,worker5:2181</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>worker3</value>
</property>
<property>
<name>hive.server2.transport.mode</name>
<value>http</value>
</property>
<property>
<name>hive.server2.webui.host</name>
<value>worker3</value>
</property>
<property>
<name>hive.server2.webui.port</name>
<value>10002</value>
</property>
<property>
<name>hive.server2.authentication</name>
<value>NOSASL</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>FALSE</value>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>FALSE</value>
</property>
</configuration>
' >/opt/hive/apache-hive-3.1.1-bin/conf/hive-site.xml
启动服务
# Hive
# 第一次需要初始化表
[root@worker3]# schematool -dbType mysql -initSchema
[root@worker3]# hive --service metastore >/dev/null 2>/dev/null &
[root@worker3]# hive --service hiveserver2 >/dev/null 2>/dev/null &
工作原理
Hive语句
一:设计数据库
1.数据库(DATABASE)
创建:hive> CREATE DATABASE d1;
删除:hive> DROP DATABASE d1;
查看:hive> SHOW DATABASES;
切换:hive> USE d1;
2.数据表(TABLE)
创建:
内部表
hive>
create table t1(
number1 TINYINT comment '注释'
,number2 SMALLINT comment '注释'
,number3 INT comment '注释'
,number4 BIGINT comment '注释'
,name1 BOOLEAN comment '注释'
,name2 FLOAT comment '注释'
,name3 decimal comment '注释'
,name4 DOUBLE comment '注释'
,name5 STRING comment '注释')
COMMENT 'This is the page view table'
PARTITIONED BY(dt STRING, country STRING)
CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
COLLECTION ITEMS TERMINATED BY '\002'
MAP KEYS TERMINATED BY '\003'
STORED AS SEQUENCEFILE
LOCATION '<hdfs_location>';
压缩:
stored as textfile
stored as sequencefile
stored as rcfile
stored as parquet
hive> create table t2 like t1;
外部表
hive> create external table t3;
hive> create external table t3(number1 int) partitioned by (part1 string);
删除:
hive> DROP TABLE t1;
hive> DROP TABLE IF EXISTS t1;
查看:
hive> SHOW TABLES;
hive> DESC t1;
hive> SHOW CREATE table t1;
hive> SHOW partitions t1;
改动:
hive> alter table t1 rename to t2;
hive> alter table t1 add columns (col1 int comment ‘新增’);
hive> alter table t1 change col1 col2 int;
hive> alter table t1 replace columns (col1 int,col2 string,col3 string);
二:管理单表
1.SELECT(子查询)
*简单选取
hive> SELECT column1,column2 FROM t1;
hive> SELECT * FROM t1;
hive> SELECT DISTINCT column1 FROM t1;--去重
*复杂选取
--WHERE子句{=,<>,>,<,>=,<=,BETWEEN,LIKE,IN}
hive> SELECT * FROM t1 WHERE column1='Cshare';
hive> SELECT * FROM t1 WHERE column1>100;
hive> SELECT * FROM t1 WHERE column1 BETWEEN value1 AND value2
hive> SELECT * FROM t1 WHERE column1 BETWEEN 'a' AND 'h';
--BETWEEN可以是数字,文本,日期;包含value1,value2;NOT BETWEEN 相补
--文本型时慎用
hive> SELECT * FROM t1 WHERE column1 LIKE 'c%'
hive> SELECT * FROM t1 WHERE column1 NOT LIKE '%c%'
--'%'表示一个或多个,'_'表示一个,[charlist],[^charlist]表示字符列中的一个,也用于show语句
hive> SELECT * FROM t1 WHERE column1 IN (value1,value2,...)
hive> SELECT * FROM t1 WHERE column1 IN (SELECT column2 FROM t2)
--IN,可以嵌套子查询
--AND和OR运算符
hive> SELECT * FROM t1 WHERE column1='Cshare' AND column2>100;
hive> SELECT * FROM t1 WHERE column1='Cshare' OR column2>100;
--ORDER BY语句(默认ASC表示升序,DESC表示降序)
hive> SELECT * FROM t1 ORDER BY column1;
hive> SELECT * FROM t1 ORDER BY column1,column2;
--column1升序下column2升序
hive> SELECT * FROM t1 ORDER BY column1 DESC;
hive> SELECT * FROM t1 ORDER BY column1 DESC,column2 ASC;
--column1降序下column2升序
--LIMIT语句
hive> SELECT * FROM t1 LIMIT 3;
--选取前三条
hive> SELECT * FROM t1 LIMIT 2,3;
--从第3条开始选取3条数据,第一个参数默认从0开始
2.INSERT
hive> LOAD DATA INPATH '/20131101' OVERWRITE INTO TABLE t1 PARTITION(part1='20131101');
--导入分区
hive> INSERT OVERWRITE TABLE t1 SELECT a,b FROM t2;
-- 插入数据
hive> INSERT TABLE t1 SELECT a,b FROM t2;
-- 追加数据
3.DELETE
hive> ALTER TABLE t1 DROP PARTITION (dt='20101202');
--删除分区
hive> TRUNCATE TABLE t1;
三:管理多表
--别名 Aliases
hive> SELECT column1 FROM table1 AS t1
hive> SELECT column1 AS co1 FROM t1
hive> SELECT t1.column1,t2.column1 FROM table1 AS t1,table2 AS t2
--以column1,column2为列构成的表,若不足则最后一条数据向下填充
JOIN:--用于根据两个或多个表中的列之间的关系,从这些表中查询数据
hive> SELECT t1.column1,t2.column1 WHERE t1.column2=t2.column2;
--相同于INNER JOIN,当t1.column2与t2.column2分别含有n,m个相同重复值时,则会生成n*m条数据
INNER JOIN--A∩B 在表中存在至少一个匹配时,INNER JOIN 关键字返回行
hive> SELECT t1.column1,t2.column1 FROM t1 INNER JOIN t2 ON t1.column2=t2.column2
UNION ALL:--UNION操作符用于合并两个或多个具有相似结构的SELECT语句
hive> SELECT * FROM t1 UNION ALL SELECT * FROM t2;
--UNION操作符选取不同的值,如果允许重复的值,请使用 UNION ALL
【1】原生函数:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF
【2】基本操作:https://www.cnblogs.com/smartloli/p/4354291.html

浙公网安备 33010602011771号