Apache hadoop启用HA

阶段二hadoop启用HA

 

master

slave1

slave2

HDFS

NameNode

NameNode

 

DataNode

DataNode

DataNode

journalnode

journalnode

journalnode

YARM

Resourcemanager

JobHistory Server

Resourcemanager

NodeManager

NodeManager

NodeManager

zookeeper

zk-server

zk-server

zk-server

一、zookeeper安装

tar -zxvf apache-zookeeper-3.6.3-bin.tar.gz -C /opt/module/

cd /opt/module

mv apache-zookeeper-3.6.3-bin zookeeper

cd /opt/module/zookeeper/conf

mv zoo_sample.cfg zoo.cfg

vim zoo.cfg

tickTime=2000

initLimit=10

syncLimit=5

dataDir=/opt/module/zookeeper/data

clientPort=2181

server.1=master:2888:3888

server.2=slave1:2888:3888

server.3=slave2:2888:3888

touch /opt/module/zookeeper/data/myid

向这个 myid 文件中写入 ID(ID 与前面 server.x 的 x 一致)

vi /opt/module/zookeeper/data/myid

chown -R hadoop:hadoop zookeeper

scp -r zookeeper slave1:/opt/module/

scp -r zookeeper slave2:/opt/module/

修改slave1、slave2的myid

注意:zookeeper3.5版本以后需要下载apache-zookeeper-3.5.5-bin.tar.gz ,下载apache-zookeeper-3.5.5.tar.gz是未编译的,启动不了。

二、配置hadoop集群的高可用

vim core-site.xml

<configuration>

  <!--Configurations for NameNode(SecondaryNameNode)、DataNode、NodeManager:-->

  <!-- 指定 NameNode 的地址 -->

  <property>

    <name>fs.defaultFS</name>

    <value>hdfs://myNameNodeServer</value>

    <description>NameNode URI</description>

  </property>

  <!-- 指定 hadoop 数据的存储目录 -->

  <property>

    <name>hadoop.tmp.dir</name>

    <value>/opt/module/hadoop-3.2.2/data</value>

  </property>

  <!-- 配置 HDFS 网页登录使用的静态用户为 hadoop -->

  <property>

    <name>hadoop.http.staticuser.user</name>

    <value>hadoop</value>

  </property>

  <!-- 指定zookeeper地址 -->

  <property>

    <name>ha.zookeeper.quorum</name>

    <value>master:2181,slave1:2181,slave2:2181</value>

  </property>

  <property>

    <name>io.file.buffer.size</name>

    <value>131072</value>

    <description>Size of read/write buffer used in SequenceFiles,The default value is 131072</description>

  </property>

</configuration>

vim hdfs-site.xml

<!-- 指定 NameNode 的地址 -->

  <property>

    <name>dfs.nameservices</name>

    <value>myNameNodeServer</value>

    <description>NameNode URI</description>

  </property>

       <!-- myNameService1下面有两个NameNode,分别是nn1,nn2 -->

  <property>

    <name>dfs.ha.namenodes.myNameNodeServer</name>

    <value>nn1,nn2</value>

  </property>



  <!-- nn1的RPC通信地址 -->

  <property>

     <name>dfs.namenode.rpc-address.myNameService1.nn1</name>

     <value>master:9000</value>

  </property>



  <!-- nn1的http通信地址 -->

  <property>

     <name>dfs.namenode.http-address.myNameService1.nn1</name>

     <value>master:9870</value>

  </property>



  <!-- nn2的RPC通信地址 -->

  <property>

     <name>dfs.namenode.rpc-address.myNameService1.nn2</name>

     <value>slave1:9000</value>

  </property>



  <!-- nn2的http通信地址 -->

  <property>

     <name>dfs.namenode.http-address.myNameService1.nn2</name>

     <value>slave1:9870</value>

  </property>



  <!-- 指定NameNode的元数据在JournalNode上的存放位置 -->

  <property>

    <name>dfs.namenode.shared.edits.dir</name>

    <value>qjournal://master:8485;slave1:8485;slave2:8485/myNameNodeServer</value>

  </property>



  <!-- 指定JournalNode在本地磁盘存放数据的位置 -->

  <property>

    <name>dfs.journalnode.edits.dir</name>

    <value>/opt/module/hadoop-3.2.2/journalData</value>

  </property>

  <!-- 开启NameNode失败自动切换 -->

  <property>

    <name>dfs.ha.automatic-failover.enabled</name>

    <value>true</value>

  </property>



  <!-- 配置失败自动切换实现方式 -->

  <property>

    <name>dfs.client.failover.proxy.provider.myNameNodeServer</name>

    <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>

  </property>



  <!-- 配置隔离机制方法,Failover后防止停掉的Namenode启动,造成两个服务,多个机制用换行分割,即每个机制暂用一行-->

  <property>

    <name>dfs.ha.fencing.methods</name>

    <value>

      sshfence

      shell(/bin/true)

    </value>

  </property>



  <!-- 使用sshfence隔离机制时需要ssh免登陆,注意换成自己的用户名 -->

  <property>

    <name>dfs.ha.fencing.ssh.private-key-files</name>

    <value>/home/hadoop/.ssh/id_rsa</value>

  </property>



  <!-- 配置sshfence隔离机制超时时间 -->

  <property>

    <name>dfs.ha.fencing.ssh.connect-timeout</name>

    <value>30000</value>

  </property>

vim yarn-site.xml

<configuration>

  <!--Configurations for ResourceManager and NodeManager:-->

  <property>

    <name>yarn.acl.enable</name>

    <value>false</value>

    <description>Enable ACLs? Defaults to false. The value of the optional is "true" or "false"</description>

  </property>

        <property>

    <name>yarn.admin.acl</name>

    <value>*</value>

    <description>ACL to set admins on the cluster. ACLs are of for comma-separated-usersspacecomma-separated-groups. Defaults to special value of * which means anyone. Special value of jus

t space means no one has access</description>

  </property>

  <property>

    <name>yarn.resourcemanager.ha.enabled</name>

    <value>true</value>

  </property>

  <property>

    <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>

    <value>true</value>

  </property>

  <property>

    <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>

    <value>true</value>

  </property>

  <property>

    <name>yarn.resourcemanager.recovery.enabled</name>

    <value>true</value>

  </property>

  <property>

    <name>yarn.resourcemanager.zk-address</name>

    <value>master:2181,slave1:2181,slave2:2181</value>

  </property>

  <property>

    <name>yarn.resourcemanager.store.class</name>

    <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>

  </property>

  <property>

    <name>yarn.client.failover-sleep-base-ms</name>

    <value>100</value>

  </property>

  <property>

    <name>yarn.client.failover-sleep-max-ms</name>

    <value>2000</value>

  </property>

  <property>

    <name>yarn.resourcemanager.cluster-id</name>

    <value>yarnRM</value>

  </property>

  <property>

    <name>yarn.resourcemanager.address.rm210</name>

    <value>master:8032</value>

  </property>

  <property>

    <name>yarn.resourcemanager.scheduler.address.rm210</name>

    <value>master:8030</value>

  </property>

  <property>

    <name>yarn.resourcemanager.resource-tracker.address.rm210</name>

    <value>master:8031</value>

  </property>

  <property>

    <name>yarn.resourcemanager.admin.address.rm210</name>

    <value>master:8033</value>

  </property>

  <property>

    <name>yarn.resourcemanager.webapp.address.rm210</name>

    <value>master:8088</value>

  </property>

  <property>

    <name>yarn.resourcemanager.webapp.https.address.rm210</name>

    <value>master:8090</value>

  </property>

  <property>

    <name>yarn.resourcemanager.address.rm209</name>

    <value>slave2:8032</value>

  </property>

  <property>

    <name>yarn.resourcemanager.scheduler.address.rm209</name>

    <value>slave2:8030</value>

  </property>

  <property>

    <name>yarn.resourcemanager.resource-tracker.address.rm209</name>

    <value>slave2:8031</value>

  </property>

  <property>

    <name>yarn.resourcemanager.admin.address.rm209</name>

    <value>slave2:8033</value>

  </property>

  <property>

    <name>yarn.resourcemanager.webapp.address.rm209</name>

    <value>slave2:8088</value>

  </property>

  <property>

    <name>yarn.resourcemanager.webapp.https.address.rm209</name>

    <value>slave2:8090</value>

  </property>

  <property>

    <name>yarn.resourcemanager.ha.rm-ids</name>

    <value>rm210,rm209</value>

  </property>

  <property>

    <name>yarn.log-aggregation-enable</name>

    <value>true</value>

    <description>Configuration to enable or disable log aggregation</description>

  </property>

  <!--Congrations for ResourceManager:-->

  <property>

    <name>yarn.resourcemanager.scheduler.class</name>

    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>

    <description>ResourceManager Scheduler class CapacityScheduler (recommended), FairScheduler (also recommended), or FifoScheduler.The default value is "org.apache.hadoop.yarn.server.res

ourcemanager.scheduler.capacity.CapacityScheduler".

    </description>

  </property>

  <property>

    <name>yarn.scheduler.minimum-allocation-mb</name>

    <value>512</value>

    <description>Minimum limit of memory to allocate to each container request at the Resource Manager.NOTES:In MBs</description>

  </property>

  <property>

    <name>yarn.scheduler.maximum-allocation-mb</name>

    <value>1024</value>

    <description>Maximum  limit of memory to allocate to each container request at the Resource Manager.NOTES:In MBs</description>

  </property>

  <!--Congrations for History Server:-->

  <property>

    <name>yarn.log-aggregation.retain-seconds</name>

    <value>-1</value>

    <description>How long to keep aggregation logs before deleting them. -1 disables. Be careful, set this too small and you will spam the name node.</description>

  </property>

  <property>

    <name>yarn.log-aggregation.retain-check-interval-seconds</name>

    <value>-1</value>

    <description>Time between checks for aggregated log retention. If set to 0 or a negative value then the value is computed as one-tenth of the aggregated log retention time. Be careful,

 set this too small and you will spam the name node.</description>

  </property>

  <!--Configurations for Configurations for NodeManager:-->

  <property>

    <name>yarn.nodemanager.resource.memory-mb</name>

    <value>1024</value>

    <description>Resource i.e. available physical memory, in MB, for given NodeManager.

        The default value is 8192.

        NOTES:Defines total available resources on the NodeManager to be made available to running containers

    </description>

  </property>

  <property>

    <name>yarn.nodemanager.vmem-pmem-ratio</name>

    <value>2.1</value>

    <description>Maximum ratio by which virtual memory usage of tasks may exceed physical memory.

        The default value is 2.1

        NOTES:The virtual memory usage of each task may exceed its physical memory limit by this ratio. The total amount of virtual memory used by tasks on the NodeManager may exceed its p

hysical memory usage by this ratio.

    </description>

  </property>

  <property>

    <name>yarn.nodemanager.local-dir</name>

    <value>/opt/module/hadoop-3.2.2/nm-local-dir</value>

    <description>Comma-separated list of paths on the local filesystem where intermediate data is written.

        The default value is "${hadoop.tmp.dir}/nm-local-dir"

        NOTES:Multiple paths help spread disk i/o.

    </description>

  </property>

  <property>

    <name>yarn.nodemanager.log-dirs</name>

    <value>/opt/module/hadoop-3.2.2/userlogs</value>

    <description>Comma-separated list of paths on the local filesystem where logs are written

        The default value is "${yarn.log.dir}/userlogs"

        NOTES:Multiple paths help spread disk i/o.

        </description>

  </property>

   <property>

    <name>yarn.nodemanager.log.retain-seconds</name>

    <value>10800</value>

    <description>Default time (in seconds) to retain log files on the NodeManager Only applicable if log-aggregation is disabled.

        The default value is "10800"

    </description>

  </property>

  <property>

    <name>yarn.application.classpath</name>

    <value>/opt/module/hadoop-3.2.2/etc/hadoop:/opt/module/hadoop-3.2.2/share/hadoop/common/lib/*:/opt/module/hadoop-3.2.2/share/hadoop/common/*:/opt/module/hadoop-3.2.2/share/hadoop/hdfs:

/opt/module/hadoop-3.2.2/share/hadoop/hdfs/lib/*:/opt/module/hadoop-3.2.2/share/hadoop/hdfs/*:/opt/module/hadoop-3.2.2/share/hadoop/mapreduce/lib/*:/opt/module/hadoop-3.2.2/share/hadoop/ma

preduce/*:/opt/module/hadoop-3.2.2/share/hadoop/yarn:/opt/module/hadoop-3.2.2/share/hadoop/yarn/lib/*:/opt/module/hadoop-3.2.2/share/hadoop/yarn/*</value>

  </property>

  <property>

    <name>yarn.nodemanager.remote-app-log-dir</name>

    <value>/logs</value>

    <description>HDFS directory where the application logs are moved on application completion. Need to set appropriate permissions. Only applicable if log-aggregation is enabled.

    The default value is "/logs" or "/tmp/logs"

    </description>

  </property>

   <property>

    <name>yarn.nodemanager.remote-app-log-dir-suffix</name>

    <value>logs</value>

    <description>Suffix appended to the remote log dir. Logs will be aggregated to ${yarn.nodemanager.remote-app-log-dir}/${user}/${thisParam} Only applicable if log-aggregation is enabled

.</description>

  </property>

   <property>

    <name>yarn.nodemanager.aux-services</name>

    <value>mapreduce_shuffle</value>

    <description>Shuffle service that needs to be set for Map Reduce applications.</description>

  </property>

</configuration>

三、启动

1、启动zookeeper集群

每台服务器执行:zkServer.sh start

2、启动journalnode

每台服务器执行:hadoop-daemon.sh start journalnode

3、格式化hdfs

第一次启动需要格式化,后面启动不再需要,格式化会根据 core-site.xml 中的 hadoop.tmp.dir 配置生成一个目录,如果之前有格式化过,那么先删除所有节点的该目录,比如我这里配置的是 /usr/local/hadoop/tmp,我之前有格式化过,那么我需要先删除所有结点的该目录,在三个节点上执行。

rm -rf /opt/module/hadoop-3.2.2/data

一台namenode上执行hdfs的格式化:

hdfs namenode -format

出现:2021-08-05 09:50:12,290 INFO common.Storage: Storage directory /opt/module/hadoop-3.2.2/data/dfs/name has been successfully formatted.表示成功。

将/opt/module/hadoop-3.2.2/data同步到另一个namenode节点。

4、格式化zookeeper

hdfs zkfc -formatZK

5、启动hdfs、yarn

Namenode节点执行:start-dfs.sh

Resourcemanager节点执行:start-yarn.sh

jobhistory 节点执行:mr-jobhistory-daemon.sh start historyserver

posted @ 2021-08-09 17:47  Family_zp  阅读(19)  评论(0)    收藏  举报