Centos7部署Hbase2.1.2

参考文章:

https://blog.csdn.net/langkye/article/details/97240948

https://www.cnblogs.com/swordfall/p/8819668.html

https://www.cnblogs.com/zhengna/p/9358578.html

环境

系统|软件 版本
Centos 7.6.1810
Hadoop 2.7.6
Hbase 2.1.2
Zookeeper 3.4.9
JDK 1.8
SSH 8.1p1

1. 安装Hadoop

# 下载软件包
cd ~ && wget http://archive.apache.org/dist/hadoop/core/hadoop-2.7.6/hadoop-2.7.6.tar.gz

# 解压
mkdir /opt/app && tar -zxf hadoop-2.7.6.tar.gz -C /opt/app

# 配置环境变量
cat >>/etc/profile<<EOF
export HADOOP_HOME=/opt/app/hadoop-2.7.6
export PATH=\$PATH:\$HADOOP_HOME/bin
EOF
source /etc/profile

# 修改hadoop配置文件
# vim /opt/app/hadoop-2.7.6/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/local/java/jdk1.8.0_11

# vim /opt/app/hadoop-2.7.6/etc/hadoop/core-site.xml
<configuration>
        <property>
                <name>hadoop.tmp.dir</name>
                <value>file:///opt/app/hadoop-2.7.6</value>
                <description>Abase for other temporary directories.</description>
        </property>
        <property>
                 <name>fs.defaultFS</name>
                 <value>hdfs://主机IP:9000</value>
        </property>

</configuration>

#vim /opt/app/hadoop-2.7.6/etc/hadoop/hdfs-site.xml
<configuration>
        <property>
            <name>dfs.replication</name>
            <value>1</value>
        </property>
        <property>
            <name>dfs.namenode.name.dir</name>
            <value>file:///opt/app/hadoop-2.7.6/tmp/dfs/name</value>
        </property>
        <property>
            <name>dfs.datanode.data.dir</name>
            <value>file:///opt/app/hadoop-2.7.6/tmp/dfs/data</value>
        </property>
        <property>
            <name>dfs.namenode.rpc-bind-host</name>
            <value>0.0.0.0</value>
        </property>
</configuration>

# SSH免密登陆配置(自行配置)

# 格式化hdfs(出现询问,输入yes即可)
cd /opt/app/hadoop-2.7.6 && ./bin/hdfs namenode -format
# 启动hdfs(停止./sbin/stop-dfs.sh)
./sbin/start-dfs.sh

2. 安装Zookeeper

# 下载软件包
cd /opt/app && wget http://archive.apache.org/dist/zookeeper/

# 解压缩
tar -zxvf zookeeper-3.4.9.tar.gz

# 修改zoo.cfg
cd zookeeper-3.4.9/conf && cp zoo_sample.cfg zoo.cfg
mkdir ../data

# 添加下面的参数
vi zoo.cfg

dataDir=/opt/app/zookeeper-3.4.9/data
dataLogDir=/opt/app/zookeeper-3.4.9/data/log 
server.1=hadoop252:2888:3888

# 生成myid文件
echo 1 > /opt/app/zookeeper-3.4.9/data/myid

# 开启zookeeper
cd /opt/app/zookeeper-3.4.9/bin/ && ./zkServer.sh start

# 查看状态
jps #出现QuorumPeerMain代表zookeeper正常运转
zkServer.sh status #单机模式,只有一种角色standalone

3. 安装Hbase

# 下载软件包
wget http://archive.apache.org/dist/hbase/2.1.2/hbase-2.1.2-bin.tar.gz

# 修改主机名(随意修改)
hostname hadoop252
echo "hadoop252" > /etc/hostname

# 同步时间
ntpdate ntp1.aliyun.com

# 解压
tar -xzf hbase-2.1.2-bin.tar.gz -C /opt/app/

# 修改hbase-env.sh配置
# vim /opt/app/hbase-2.1.2/conf/hbase-env.sh
export JAVA_HOME=/usr/local/java/jdk1.8.0_11
export HBASE_HOME=/opt/app/hbase-2.1.2
export HBASE_CLASSPATH=/opt/app/hadoop-2.7.6/etc/hadoop
export HBASE_PID_DIR=/opt/app/hbase-2.1.2/pids
export HBASE_MANAGES_ZK=false

# 修改hbase-site.xml
# vim /opt/app/hbase-2.1.2/conf/hbase-site.xml
<configuration>
 <!-- 存储目录 这里的hdfs可以是单机版的-->
 <property>
  <name>hbase.rootdir</name>
  <value>hdfs://hadoop252:9000/hbase</value>
  <description>The directory shared byregion servers.</description>
 </property>
 <property>
  <name>hbase.tmp.dir</name>
  <value>/opt/app/hbase-2.1.2/tmp</value>
 </property>
 <!-- false是单机模式,true是分布式模式  -->
 <property>
  <name>hbase.cluster.distributed</name>
  <value>true</value>
 </property>
 <property>
  <name>hbase.zookeeper.quorum</name>
  <value>hadoop252:2181</value>
 </property>
 <property>
  <name>hbase.zookeeper.property.dataDir</name>
  <value>/opt/app/zookeeper-3.4.9</value>
 </property>
</configuration>

# 访问IP:50070 hadoop首页

4. 启动Hbase

cd /opt/app/hbase-2.1.2/bin && ./start-hbase.sh

# 启动后访问IP:16010

5. 数据迁移 

5.1 表数据导出

# 1.直接导出到服务器
# 导出data_his表数据至服务器/opt/hbase-data/data_his/目录下
hbase org.apache.hadoop.hbase.mapreduce.Export data_his file:///opt/hbase-data/data_his/

# 2.导入到HDFS再导出到本地
# 导出data_his表数据至HDFS /backup/test目录下
hbase org.apache.hadoop.hbase.mapreduce.Export 'data_his' /backup/data_his
# 将/backup/test文件下的数据拉取到本地
hdfs dfs -get /backup/data_his ./
# 删除hdfs上的/backup/test文件夹
hdfs dfs -rm -r /backup/data_his

5.2 表数据导入

# 导入data_his文件到HDFS /data/目录下(-f 覆盖)
hdfs dfs -put -f /opt/data/data_his /backup/
# 查看HDFS /data下的数据
hdfs dfs -ls -h /backup
# 从HDFS导入data_his表数据
hbase org.apache.hadoop.hbase.mapreduce.Import data_his /backup/data_his
# 指定命名空间下的表数据导入
hbase org.apache.hadoop.hbase.mapreduce.Import template:data_his /backup/data_his

5.3 导出指定表部分数据

echo "scan 'gp_energy_data', {TIMERANGE => [1640966400000, 1647401603000],  LIMIT=>20000000000}"| hbase shell > dataxcd

6. 常用操作

6.1 hbase shell

# 查看组件状态
status
# 查看所有表
list
# 查看命名空间
list_namespace
# 创建命名空间
create_namespace 'template'
# 在template命名空间下创建表
create 'template:表名',{NAME=...}
# 删除表
drop 'table'
# 清空表
truncate 'table'
# 查看表记录
scan 'table'
# 统计表记录数
count 'table'
# 查看表是否存在
exists 'table'

6.2 hdfs dfs

# 复制文件夹
hdfs dfs -cp /hbase/data/default /backup/
# 创建文件
hdfs dfs -mkdir -p /hbase/data/abc/efg
# 删除文件
hdfs dfs -rm /hbase/abc
# 递归删除
hdfs dfs -rm -r /hbase/data/default/abc
# 查看根下的文件
hdfs dfs -ls /
# 查看文件夹下的文件大小
hdfs dfs -du -h /hbase

7. 部署phoenix

7.1 Hbase与Phoenix版本对应关系

从Phoenix下载地址查看:http://phoenix.apache.org/download.html

下载Phoenix

wget https://mirrors.bfsu.edu.cn/apache/phoenix/apache-phoenix-5.0.0-HBase-2.0/bin/apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz

7.2 安装

# 解压
tar -zxvf apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz
# 将phoenix-5.0.0-HBase-2.0-server.jar拷贝到所有hbase节点lib目录下
cp phoenix-core-5.0.0-HBase-2.0.jar phoenix-5.0.0-HBase-2.0-server.jar /home/app/hbase-2.1.2/lib/

修改hbase-site.xml配置文件

<!--使用Phoenix建立hbase二级索引配置 -->
<property>
    <name>hbase.regionserver.wal.codec</name>
    <value>org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec</value>
</property>

<property>
    <name>hbase.region.server.rpc.scheduler.factory.class</name>
    <value>org.apache.hadoop.hbase.ipc.PhoenixRpcSchedulerFactory</value>
    <description>Factory to create the Phoenix RPC Scheduler that uses separate queues for index and metadata updates</description>
</property>

<property>
    <name>hbase.rpc.controllerfactory.class</name>
    <value>org.apache.hadoop.hbase.ipc.controller.ServerRpcControllerFactory</value>
    <description>Factory to create the Phoenix RPC Scheduler that uses separate queues for index and metadata updates</description>
</property>

<property>
    <name>hbase.master.loadbalancer.class</name>
    <value>org.apache.phoenix.hbase.index.balancer.IndexLoadBalancer</value>
</property>

<property>
    <name>hbase.coprocessor.master.classes</name>
    <value>org.apache.phoenix.hbase.index.master.IndexMasterObserver</value>
</property>
<!--使用 schema 和 Namespace 对应的配置 -->
<property>
<name>phoenix.schema.isNamespaceMappingEnabled</name>
<value>true</value>
</property>

<property>
<name>phoenix.schema.mapSystemTablesToNamespace</name>
<value>true</value>
</property>

 将hbase/conf/hbase-site.xml复制到phoenix中的bin/目录下

cp hbase-site.xml /home/app/apache-phoenix-5.0.0-HBase-2.0-bin/bin/

重启Hbase

7.3 验证

[root@localhostconf]# cd /home/app/apache-phoenix-5.0.0-HBase-2.0-bin/bin/
[root@localhostbin]# ./sqlline.py hadoop3:2181
Setting property: [incremental, false]
Setting property: [isolation, TRANSACTION_READ_COMMITTED]
issuing: !connect jdbc:phoenix:hadoop3:2181 none none org.apache.phoenix.jdbc.PhoenixDriver
Connecting to jdbc:phoenix:hadoop3:2181
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/home/app/apache-phoenix-5.0.0-HBase-2.0-bin/phoenix-5.0.0-HBase-2.0-client.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/home/app/hadoop-2.7.6/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
21/12/06 11:40:16 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Connected to: Phoenix (version 5.0)
Driver: PhoenixEmbeddedDriver (version 5.0)
Autocommit status: true
Transaction isolation: TRANSACTION_READ_COMMITTED
Building list of tables and columns for tab-completion (set fastconnect to true to skip)...
133/133 (100%) Done
Done
sqlline version 1.2.0
0: jdbc:phoenix:hadoop3:2181> 

8. 脚本

8.1 删除命名空间及其下所有表

#!/bin/bash
#读取命名空间下的所有表名
# $1:命名空间名称  $2:表名称
list_tables(){
/home/hbase/hbase-1.0.1.1/bin/hbase shell << EOF
list_namespace_tables "$1"
EOF
}
#删除该命名空间下的表
drop_tables(){
/home/hbase/hbase-1.0.1.1/bin/hbase shell << EOF
disable "$1:$2"
drop "$1:$2"
EOF
}
#删除命名空间
drop_namespace(){
/home/hbase/hbase-1.0.1.1/bin/hbase shell << EOF
drop_namespace "$1"
EOF
}
#获取所有表名
table_info=$(list_tables $1)
#过滤后只剩表名
tables_name=$(echo $table_info | awk -F " TABLE " '{print $2}' | awk -F " row" '{print $1}' | awk -F " " 'OFS=" "{$NF="";print}')
#如果不存在该命名空间就停止执行
first=$(echo $tables_name | awk -F " " '{print $1}')
if [ $first == "ERROR:" ];then
exit 0
fi
#循环执行删除该命名空间下的表
for s in ${tables_name[@]}
do
echo "传入参数:$1"
echo "表名:$s"
    drop_tables $1 $s
done
#删除命名空间
drop_namespace $1

脚本摘自:https://blog.csdn.net/ck978105293/article/details/109047528

9. 遇到的问题

9.1 Waiting for dfs to exit safe mode...

# hadoop日志显示Waiting for dfs to exit safe mode...
# 这是因为hadoop处于安全模式
# 关闭安全模式
[root@10.4.7.200 hadoop]# hadoop dfsadmin -safemode leave
DEPRECATED: Use of this script to execute hdfs command is deprecated.
Instead use the hdfs command for it.

Safe mode is OFF

9.2 ERROR: For input string: "FOREVER"

# 建表时指定TTL值为FOREVER报错(hbase版本1.0.1.1)
hbase(main):011:0> create 'eco', {NAME => 'column', DATA_BLOCK_ENCODING => 'NONE', BLOOMFILTER => 'ROW', REPLICATION_SCOPE => '0', VERSIONS => '1', COMPRESSION => 'NONE', MIN_VERSIONS => '0', TTL => 'FOREVER', KEEP_DELETED_CELLS => 'false', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'}

ERROR: For input string: "FOREVER"

Here is some help for this command:
Creates a table. Pass a table name, and a set of column family
specifications (at least one), and, optionally, table configuration.
Column specification can be a simple string (name), or a dictionary
(dictionaries are described below in main help output), necessarily 
including NAME attribute. 
Examples:

Create a table with namespace=ns1 and table qualifier=t1
  hbase> create 'ns1:t1', {NAME => 'f1', VERSIONS => 5}

Create a table with namespace=default and table qualifier=t1
  hbase> create 't1', {NAME => 'f1'}, {NAME => 'f2'}, {NAME => 'f3'}
  hbase> # The above in shorthand would be the following:
  hbase> create 't1', 'f1', 'f2', 'f3'
  hbase> create 't1', {NAME => 'f1', VERSIONS => 1, TTL => 2592000, BLOCKCACHE => true}
  hbase> create 't1', {NAME => 'f1', CONFIGURATION => {'hbase.hstore.blockingStoreFiles' => '10'}}
  
Table configuration options can be put at the end.
Examples:

  hbase> create 'ns1:t1', 'f1', SPLITS => ['10', '20', '30', '40']
  hbase> create 't1', 'f1', SPLITS => ['10', '20', '30', '40']
  hbase> create 't1', 'f1', SPLITS_FILE => 'splits.txt', OWNER => 'johndoe'
  hbase> create 't1', {NAME => 'f1', VERSIONS => 5}, METADATA => { 'mykey' => 'myvalue' }
  hbase> # Optionally pre-split the table into NUMREGIONS, using
  hbase> # SPLITALGO ("HexStringSplit", "UniformSplit" or classname)
  hbase> create 't1', 'f1', {NUMREGIONS => 15, SPLITALGO => 'HexStringSplit'}
  hbase> create 't1', 'f1', {NUMREGIONS => 15, SPLITALGO => 'HexStringSplit', REGION_REPLICATION => 2, CONFIGURATION => {'hbase.hregion.scan.loadColumnFamiliesOnDemand' => 'true'}}

You can also keep around a reference to the created table:

  hbase> t1 = create 't1', 'f1'

Which gives you a reference to the table named 't1', on which you can then
call methods.

# 解决办法:指定TTL值为2147483647
hbase(main):014:0> create 'eco', {NAME => 'column', DATA_BLOCK_ENCODING => 'NONE', BLOOMFILTER => 'ROW', REPLICATION_SCOPE => '0', VERSIONS => '1', COMPRESSION => 'NONE', MIN_VERSIONS => '0', TTL => 2147483647, KEEP_DELETED_CELLS => 'false', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'}
0 row(s) in 0.7010 seconds

=> Hbase::Table - eco

9.3 java.io.FileNotFoundException: File does not exist hdfs://*.jar

# 使用一系列搜索引擎后找到这篇文章 https://stackoverflow.com/questions/28213244/hadoop-accessing-3rd-party-libraries-from-local-file-system-of-a-hadoop-node
# 需要把相关的jar包都put到HDFS上
hadoop fs -mkdir -p hdfsPath
hadoop fs -put localfile/*.jar hdfsPath
posted @ 2020-07-23 21:25  MegaloBox  阅读(461)  评论(0编辑  收藏  举报