Hadoop+Zookeeper高可用集群
dd dd1 : NameNode DFSZKFailover
dd2 dd3: DataNode ResourceManager
--------------------------------------------------------------------------------
#dd
mv /app/zookeeper/conf/zoo_sample.cfg /app/zookeeper/conf/zoo.cfg
mkdir /app/zookeeper/data
vi /app/zookeeper/conf/zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
dataLogDir=/app/zookeeper/logs
dataDir=/app/zookeeper/data
clientPort=2181
server.1= dd:2888:3888
server.2= dd1:2888:3888
server.3= dd2:2888:3888
server.4= dd3:2888:3888:observer
for ((x=1;x<=4;x++));do scp -r /app/zookeeper/conf/ dd$x:/app/zookeeper/;done
echo '1'>/app/zookeeper/data/myid
#dd3
echo peerType=observer >>/app/zookeeper/conf/zoo.cfg
#dd dd1 dd2 dd3
/app/zookeeper/bin/zkServer.sh start
jps
or
/app/zookeeper/bin/zkServer.sh status
--------------------------------------------------------------------------------
mkdir -p /app/hadoop/{tmp,var}
mkdir -p /app/hadoop/dfs/{data,name}
mkdir -p /app/hadoop/jour
--------------------------------------------------------------------------------
for ((x=1;x<=4;x++));do scp -r /app/hadoop/etc/hadoop/ dd$x:/app/hadoop/etc/;done
#dd dd1 dd2 dd3
/app/hadoop/sbin/hadoop-daemon.sh start journalnode
#dd
/app/hadoop/bin/hdfs namenode -format
/app/hadoop/sbin/hadoop-daemon.sh start namenode
#dd1 同步dd NameNode
/app/hadoop/bin/hdfs namenode -bootstrapStandby
cat /app/hadoop/dfs/name/current/VERSION
/app/hadoop/sbin/hadoop-daemon.sh start namenode
#dd2 dd3
/app/hadoop/sbin/hadoop-daemon.sh start datanode
jps
dd:50070 standby | dd1:50070 standby
#dd
/app/hadoop/bin/hdfs haadmin -transitionToActive nn1 --forcemanual
强制设置 dd 作为 active 节点
dd:50070 active | dd1:50070 standby
#dd 开启集群的 DFSZKFailoverController进程
/app/hadoop/bin/hdfs zkfc -formatZK
/app/hadoop/sbin/hadoop-daemon.sh start zkfc
#dd1
/app/hadoop/sbin/hadoop-daemon.sh start zkfc
#dd 测试分布式数据存储
touch /tmp/1.file
hadoop fs -mkdir /dd
hadoop fs -put /tmp/1.file /dd/
hadoop fs -ls /
hadoop fs -ls /dd
#dd dd1
zkCli.sh
ls /
quit
#dd 开启各节点Nodemanger进程
/app/hadoop/sbin/start-yarn.sh
#dd dd1
/app/hadoop/sbin/mr-jobhistory-daemon.sh start historyserver
#dd1 dd2
/app/hadoop/sbin/hadoop-daemon.sh start resourcemanager
#dd
hdfs haadmin -getServiceState nn1
hdfs haadmin -getServiceState nn2
yarn haadmin -getServiceState rm1
yarn haadmin -getServiceState rm2
集群 Namenode 节点高可用测试
#dd1
jps
kill -9 id(namenode)
集群 ResourceManager 节点高可用测试:
#dd1
jps
kill -9 id(resourceManager)
--------------------------------------------------------------------------------
vi /app/hadoop/etc/hadoop/core-site.xml
<property>a
<name>hadoop.tmp.dir</name>
<value>/app/hadoop/tmp</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ddhc</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>dd:2181,dd1:2181,dd2:2181,dd3:2181</value>
</property>
vi /app/hadoop/etc/hadoop/hdfs-site.xml
<property>
<name>dfs.name.dir</name>
<value>/app/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/app/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>ddhc</value>
</property>
<property>
<name>dfs.ha.namenodes.ddhc</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.ddhc.nn1</name>
<value>dd:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.ddhc.nn2</name>
<value>dd1:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.ddhc.nn1</name>
<value>dd:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.ddhc.nn2</name>
<value>dd1:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://dd:8485;dd1:8485;dd1:8485;dd2:8485/ddhc</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/app/hadoop/jour</value>
</property>
<property>
<name>dfs.permissions.enable</name>
<value>false</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.ddhc</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
mv /app/hadoop/etc/hadoop/mapred-site.xml.template /app/hadoop/etc/hadoop/mapred-site.xml
vi /app/hadoop/etc/hadoop/mapred-site.xml
<property>
<name>mapred.local.dir</name>
<value>/app/hadoop/var</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>dd:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>dd:19888</value>
</property>
<property>
<name>mapreduce.jobhistory.joblist.cache.size</name>
<value>20000</value>
</property>
vi /app/hadoop/etc/hadoop/yarn-site.xml
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rmCluster</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>dd1</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>dd2</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>dd:2181,dd1:2181,dd2:2181,dd3:2181</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
vi /app/hadoop/etc/hadoop/slaves
dd
dd1
dd2
dd3
vi /app/hadoop/etc/hadoop/hadoop-env.sh
vi /app/hadoop/etc/hadoop/yarn-env.sh