macbook配置hive

配置hive

1 下载hive版本

下载hive

2 解压hive

$ tar -xzvf hive-x.y.z.tar.gz
$ cd hive-x.y.z

3 设置环境变量

export HIVE_HOME=/Users/$User/software/hive/apache-hive-2.1.1
export HADOOP_HOME=/Users/$User/software/hadoop/hadoop-2.9.1
export PATH=$HIVE_HOME/bin:$PATH
export HIVE_CONF_DIR=/Users/$User/software/hive/apache-hive-2.1.1/conf

4 建立目录

$HADOOP_HOME/bin/hadoop fs -rm -r  /tmp
$HADOOP_HOME/bin/hadoop fs -rm -r  /user/
$HADOOP_HOME/bin/hadoop fs -mkdir /tmp
$HADOOP_HOME/bin/hadoop fs -mkdir -p /user/hive/warehouse
$HADOOP_HOME/bin/hadoop fs -chmod g+w /tmp
$HADOOP_HOME/bin/hadoop fs -chmod g+w  /user/hive/

5 创建元数据表

mysql -u root -p

输入密码root

drop database hive;
create database hive;
use hive;
drop user hive@'%';
CREATE USER  'hive'@'%'  IDENTIFIED BY '123456';
GRANT ALL PRIVILEGES ON  *.* TO 'hive'@'%' WITH GRANT OPTION;
flush privileges;

6 初始化元数据:

$HIVE_HOME/bin/schematool -dbType mysql -initSchema

7 hive启动服务:

cd $HIVE_HOME
$HIVE_HOME/bin/hive --service metastore &  远程服务端启动
$HIVE_HOME/bin/hive --service hiveserver2 &  远程客户端启动
$HIVE_HOME/bin/beeline
!connect jdbc:hive2://localhost:10000 $user_name $password

8 查看执行情况:

http://localhost:8088
http://localhost:8042
http://localhost:50070
http://localhost:50075

9 案例:

创建关于电影的一张表:

0: jdbc:hive2://localhost:10000> 
CREATE TABLE u_data (
  userid INT,
  movieid INT,
  rating INT,
  unixtime STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE;

下载数据源

$ curl --remote-name http://files.grouplens.org/datasets/movielens/ml-100k.zip

文件下载在当前目录下,解压数据包

$ unzip ml-100k.zip

导入数据包到表里面

0: jdbc:hive2://localhost:10000> 
LOAD DATA LOCAL INPATH '/Users/xxxxxxxx/Downloads/ml-100k/u.data'
OVERWRITE INTO TABLE u_data;

查询总的记录数,应该是100000条

0: jdbc:hive2://localhost:10000> 
SELECT COUNT(*) FROM u_data;

创建一张新的表用于存放每周的记录书

0: jdbc:hive2://localhost:10000> 
CREATE TABLE u_data_new (
  userid INT,
  movieid INT,
  rating INT,
  weekday INT)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t';

输入

$ vi weekday_mapper.py
import sys
import datetime
for line in sys.stdin:
  line = line.strip()
  userid, movieid, rating, unixtime = line.split('\t')
  weekday = datetime.datetime.fromtimestamp(float(unixtime)).isoweekday()
  print '\t'.join([userid, movieid, rating, str(weekday)])
0: jdbc:hive2://localhost:10000> 
add FILE /Users/$User/Downloads/weekday_mapper.py;
0: jdbc:hive2://localhost:10000> 
INSERT OVERWRITE TABLE u_data_new
SELECT
  TRANSFORM (userid, movieid, rating, unixtime)
  USING 'python weekday_mapper.py'
  AS (userid, movieid, rating, weekday)
FROM u_data;
0: jdbc:hive2://localhost:10000> 
SELECT weekday, COUNT(*)
FROM u_data_new
GROUP BY weekday;
posted @ 2018-11-07 23:10  liucenxi  阅读(226)  评论(0)    收藏  举报