[Hadoop][Hadoop]Cluster and HA

#!/bin/bash

 

# 时间服务器

yum install -y ntp

systemctl enable ntpd

systemctl start ntpd

 

# 免密登录

ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa

cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys

chmod 0600 ~/.ssh/authorized_keys

ssh-copy-id -i /home/hadoop/.ssh/id_dsa.pub $USER@$HOSTNAME

 

# HADOOP HOST

zookeeper01=10.211.55.101

zookeeper02=10.211.55.102

zookeeper03=10.211.55.103

journalnode01=10.211.55.101

journalnode02=10.211.55.102

journalnode03=10.211.55.103

namenode01=10.211.55.101

namenode02=10.211.55.102

datanode01=10.211.55.101

datanode02=10.211.55.102

datanode03=10.211.55.103

resourcemanager01=10.211.55.101

resourcemanager02=10.211.55.102

 

# HOSTS 文件

cat <<EOF | tee /etc/hosts

$zookeeper01    zookeeper01

$zookeeper02    zookeeper02

$zookeeper03    zookeeper03

$journalnode01    journalnode01

$journalnode02    journalnode02

$journalnode03    journalnode03

$namenode01    namenode01

$namenode02    namenode02

$datanode01    datanode01

$datanode02    datanode02

$datanode03    datanode03

$resourcemanager01    resourcemanager01

$resourcemanager02    resourcemanager02

EOF

 

# JAVA JDK

rpm -ivh /tmp/jdk-8u131-linux-x64.rpm

 

# JAVA 环境变量

echo 'JAVA_HOME=/usr/java/default' >> /etc/profile

echo 'JRE_HOME=$JAVA_HOME/jre' >> /etc/profile

echo 'CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JRE_HOME/lib' >> /etc/profile

echo 'PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin' >> /etc/profile

echo 'export JAVA_HOME JRE_HOME PATH CLASSPATH' >> /etc/profile

source /etc/profile

 

# 修改 JAVA DNS 缓存

sed -i 's/#networkaddress.cache.ttl=-1/networkaddress.cache.ttl=60/' $JRE_HOME/lib/security/java.security

 

# ZOOKEEPER 国内安装文件下载地址

# ZOOKEEPER_WEB_FILE=https://mirrors.tuna.tsinghua.edu.cn/apache/zookeeper/zookeeper-3.4.10/zookeeper-3.4.10.tar.gz

 

# 是否下载 ZOOKEEPER 安装文件

# wget -P /tmp $ZOOKEEPER_WEB_FILE

 

# ZOOKEEPER 安装文件

# ZOOKEEPER_INSTALL_FILE=/tmp/zookeeper-3.4.10.tar.gz

 

# ZOOKEEPER 目录

# ZOOKEEPER_INSTALL_DIR=/opt/zookeeper-3.4.10

# ZOOKEEPER_HOME=/opt/zookeeper

 

# 安装 ZOOKEEPER

tar -C /opt -xf $ZOOKEEPER_INSTALL_FILE

ln -s $ZOOKEEPER_INSTALL_DIR $ZOOKEEPER_HOME

 

# 创建组和用户

groupadd zookeeper

useradd -g zookeeper zookeeper -s /sbin/nologin

 

# 文件存放目录

mkdir -p /mnt/zookeeper/data

mkdir -p /var/log/zookeeper

chown zookeeper:zookeeper -R /mnt/zookeeper/data

chown zookeeper:zookeeper -R /var/log/zookeeper

 

#创建配置文件

cat <<EOF | tee $ZOOKEEPER_HOME/conf/zoo.cfg

tickTime=2000

initLimit=10

syncLimit=5

dataDir=/mnt/zookeeper/data

dataLogDir=/var/log/zookeeper

clientPort=2181

server.1=zookeeper01:2888:3888

server.2=zookeeper02:2888:3888

server.3=zookeeper03:2888:3888

EOF

 

chown zookeeper:zookeeper -R $ZOOKEEPER_INSTALL_DIR

chown zookeeper:zookeeper -R $ZOOKEEPER_LINK_DIR

 

#systemd开机配置

cat <<EOF | tee /usr/lib/systemd/system/zookeeper.service 

[Unit]

Description=zookeeper

After=syslog.target network.target

 

[Service]

Type=forking

Environment=ZOO_LOG_DIR=/var/log/zookeeper

# Environment=ZOO_LOG4J_PROP="INFO,ROLLINGFILE"

ExecStart=/opt/zookeeper/bin/zkServer.sh start

ExecStop=/opt/zookeeper/bin/zkServer.sh stop

Restart=always

User=zookeeper

Group=zookeeper

 

[Install]

WantedBy=multi-user.target

EOF

 

#每台zookeeper的id不一样

# echo "1" > /mnt/zookeeper/data/myid

# echo "2" > /mnt/zookeeper/data/myid

# echo "3" > /mnt/zookeeper/data/myid

 

systemctl enable zookeeper

systemctl start zookeeper

systemctl status zookeeper

 

firewall-cmd --zone=public --add-port=2181/tcp --permanent

firewall-cmd --zone=public --add-port=2888/tcp --permanent

firewall-cmd --zone=public --add-port=3888/tcp --permanent

firewall-cmd --reload

 

#检测是否可用

#echo conf | nc $HOSTNAME 2181

 

 

# HADOOP 国内安装文件下载地址

# HADOOPO_WEB_FILE=https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz

# HADOOPO_WEB_FILE=https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-2.8.0/hadoop-2.8.0.tar.gz

 

# 是否下载 HADOOP 安装文件

# wget -P /tmp $HADOOPO_WEB_FILE

 

# HADOOP 安装文件

# HADOOP_INSTALL_FILE=/tmp/hadoop-2.8.0.tar.gz

 

# HADOOP 目录

# HADOOP_INSTALL_DIR=/opt/hadoop-2.8.0

# HADOOP_HOME=/opt/hadoop

# HADOOP_WORK_DIR=/mnt/hadoop

 

# 安装依赖包

# OPEN SSL

yum install -y openssl-devel

 

# 安装 HADOOP

if [ -f $HADOOP_INSTALL_FILE ]; then

    tar -C /opt -xf $HADOOP_INSTALL_FILE

    ln -s $HADOOP_INSTALL_DIR /opt/hadoop

else

    echo 'require Hadoop install file'

    return

fi

 

# 创建组和用户

groupadd hadoop

useradd -g hadoop hadoop -s /bin/bash

 

# 文件存放目录

mkdir -p $HADOOP_WORK_DIR/name

mkdir -p $HADOOP_WORK_DIR/data

mkdir -p $HADOOP_WORK_DIR/journal

mkdir -p $HADOOP_WORK_DIR/tmp

mkdir -p $HADOOP_HOME/run

chown hadoop:hadoop -R $HADOOP_INSTALL_DIR

chown hadoop:hadoop -R $HADOOP_HOME

chown hadoop:hadoop -R $HADOOP_WORK_DIR

 

 

# 系统变量

if [ -z $HADOOP_PREFIX ]; then 

    echo "HADOOP_HOME=/opt/hadoop" >> /etc/profile

    echo 'HADOOP_PREFIX=$HADOOP_HOME' >> /etc/profile

    echo 'HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadooop' >> /etc/profile

    echo 'PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin' >> /etc/profile

    echo 'HADOOP_PID_DIR=$HADOOP_PREFIX/run' >> /etc/profile

    echo 'YARN_PID_DIR=$HADOOP_PREFIX/run' >> /etc/profile

    echo "export HADOOP_HOME HADOOP_PREFIX HADOOP_CONF_DIR HADOOP_PID_DIR YARN_PID_DIR PATH" >> /etc/profile

    source /etc/profile

else

    echo "HADOOP_PREFIX=$HADOOP_PREFIX"

fi

 

# core-site.xml 初始配置

cat <<EOF | tee $HADOOP_HOME/etc/hadoop/core-site.xml

<?xml version="1.0" encoding="UTF-8"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>

    <property>

        <name>fs.defaultFS</name>

        <value>hdfs://nameservices</value>

    </property>

    <property>

        <name>hadoop.tmp.dir</name>

        <value>/mnt/hadoop/tmp</value>

    </property>

    <property>

        <name>dfs.journalnode.edits.dir</name>

        <value>/mnt/hadoop/journal</value>

    </property>

    <property>

        <name>ha.zookeeper.quorum</name>

        <value>zookeeper01:2181,zookeeper02:2181,zookeeper03:2181</value>

    </property>

</configuration>

EOF

 

# hdfs-site.xml初始配置

cat <<EOF | tee $HADOOP_HOME/etc/hadoop/hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>

    <property>

        <name>dfs.namenode.name.dir</name>

        <value>/mnt/hadoop/name</value>

    </property>

    <property>

        <name>dfs.datanode.data.dir</name>

        <value>/mnt/hadoop/data</value>

    </property>

    <property>

        <name>dfs.replication</name>

        <value>3</value>

    </property>

    <property>

        <name>dfs.nameservices</name>

        <value>nameservices</value>

    </property>

    <property>

        <name>dfs.ha.namenodes.nameservices</name>

        <value>nn1,nn2</value>

    </property>

    <property>

        <name>dfs.namenode.rpc-address.nameservices.nn1</name>

        <value>namenode01:8020</value>

    </property>

    <property>

        <name>dfs.namenode.rpc-address.nameservices.nn2</name>

        <value>namenode02:8020</value>

    </property>

    <property>

        <name>dfs.namenode.http-address.nameservices.nn1</name>

        <value>namenode01:50070</value>

    </property>

    <property>

        <name>dfs.namenode.http-address.nameservices.nn2</name>

        <value>namenode02:50070</value>

    </property>

    <property>

        <name>dfs.namenode.shared.edits.dir</name>

        <value>qjournal://journalnode01:8485;journalnode02;journalnode03:8485/nameservices</value>

    </property>

    <property>

        <name>dfs.client.failover.proxy.provider.nameservices</name>

        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>

    </property>

    <property>

        <name>dfs.ha.fencing.methods</name>

        <value>sshfence</value>

    </property>

    <property>

        <name>dfs.ha.fencing.ssh.private-key-files</name>

        <value>/home/hadoop/.ssh/id_rsa</value>

    </property>

     <property>

        <name>dfs.ha.automatic-failover.enabled</name>

        <value>true</value>

     </property>

</configuration>

EOF

 

# mapred-site.xml初始配置

cat <<EOF | tee $HADOOP_HOME/etc/hadoop/mapred-site.xml

<configuration>

    <property>

        <name>mapreduce.framework.name</name>

        <value>yarn</value>

    </property>

</configuration>

EOF

 

# yarn-site.xml初始配置

cat <<EOF | tee $HADOOP_HOME/etc/hadoop/yarn-site.xml

<configuration>

    <property>

        <name>yarn.nodemanager.aux-services</name>

        <value>mapreduce_shuffle</value>

    </property>

    <property>

        <name>yarn.resourcemanager.ha.enabled</name>

        <value>true</value>

    </property>

    <property>

        <name>yarn.resourcemanager.cluster-id</name>

        <value>resouceCluster</value>

    </property>

    <property>

        <name>yarn.resourcemanager.ha.rm-ids</name>

        <value>rm1,rm2</value>

    </property>

    <property>

        <name>yarn.resourcemanager.hostname.rm1</name>

        <value>resourcemanager01</value>

    </property>

    <property>

        <name>yarn.resourcemanager.hostname.rm2</name>

        <value>resourcemanager02</value>

    </property>

    <property>

        <name>yarn.resourcemanager.webapp.address.rm1</name>

        <value>resourcemanager01:8088</value>

    </property>

    <property>

        <name>yarn.resourcemanager.webapp.address.rm2</name>

        <value>resourcemanager02:8088</value>

    </property>

    <property>

        <name>yarn.resourcemanager.zk-address</name>

        <value>zookeeper01:2181,zookeeper02:2181,zookeeper03:2181</value>

</property>

</configuration>

EOF

 

# systems 开机配置 journalnode

cat <<EOF | tee /usr/lib/systemd/system/hdfs-journalnode.service

[Unit]

Description=Hadoop DFS Journal Node

After=syslog.target network.target remote-fs.target nss-lookup.target network-online.target

Requires=network-online.target

 

[Service]

User=hadoop

Group=hadoop

Type=forking

ExecStart=/opt/hadoop/sbin/hadoop-daemon.sh --script hdfs start journalnode

ExecStop=/opt/hadoop/sbin/hadoop-daemon.sh --script hdfs stop journalnode

Environment=JAVA_HOME=$JAVA_HOME

Environment=HADOOP_HOME=$HADOOP_HOME

Environment=HADOOP_PREFIX=$HADOOP_PREFIX

Environment=HADOOP_PID_DIR=$HADOOP_PID_DIR

Restart=on-failure

 

[Install]

WantedBy=multi-user.target

EOF

 

firewall-cmd --zone=public --add-port=8485/tcp --permanent

firewall-cmd --zone=public --add-port=8480/tcp --permanent

firewall-cmd --reload

 

systemctl enable hdfs-journalnode

systemctl start hdfs-journalnode

systemctl status hdfs-journalnode

 

# systems 开机配置 zkfc

cat <<EOF | tee /usr/lib/systemd/system/hdfs-zkfc.service

[Unit]

Description=Hadoop DFS HA state in ZooKeeper

After=syslog.target network.target remote-fs.target nss-lookup.target network-online.target

Requires=network-online.target

 

[Service]

User=hadoop

Group=hadoop

Type=forking

ExecStart=/opt/hadoop/sbin/hadoop-daemon.sh --script hdfs start zkfc

ExecStop=/opt/hadoop/sbin/hadoop-daemon.sh --script hdfs stop zkfc

Environment=JAVA_HOME=$JAVA_HOME

Environment=HADOOP_HOME=$HADOOP_HOME

Environment=HADOOP_PREFIX=$HADOOP_PREFIX

Environment=HADOOP_PID_DIR=$HADOOP_PID_DIR

Restart=on-failure

 

[Install]

WantedBy=multi-user.target

EOF

 

firewall-cmd --zone=public --add-port=8019/tcp --permanent

firewall-cmd --reload

 

su hadoop - -c "hdfs zkfc -formatZK"

 

systemctl enable hdfs-zkfc

systemctl start hdfs-zkfc

systemctl status hdfs-zkfc

 

# systems 开机配置 namenode

cat <<EOF | tee /usr/lib/systemd/system/hdfs-nameode.service

[Unit]

Description=Hadoop DFS Name Node

After=syslog.target network.target remote-fs.target nss-lookup.target network-online.target

Requires=network-online.target

 

[Service]

User=hadoop

Group=hadoop

Type=forking

ExecStart=/opt/hadoop/sbin/hadoop-daemon.sh --script hdfs start namenode

ExecStop=/opt/hadoop/sbin/hadoop-daemon.sh --script hdfs stop namenode

Environment=JAVA_HOME=$JAVA_HOME

Environment=HADOOP_HOME=$HADOOP_HOME

Environment=HADOOP_PREFIX=$HADOOP_PREFIX

Environment=HADOOP_PREFIX=$HADOOP_PREFIX

Environment=HADOOP_PID_DIR=$HADOOP_PID_DIR

Restart=on-failure

 

[Install]

WantedBy=multi-user.target

EOF

 

firewall-cmd --zone=public --add-port=8020/tcp --permanent

firewall-cmd --zone=public --add-port=50070/tcp --permanent

firewall-cmd --reload

 

# 每个NameNode执行

su hadoop - -c "hdfs namenode -format"

 

systemctl enable hdfs-nameode

systemctl start hdfs-nameode

systemctl status hdfs-nameode

 

su hadoop - -c "hdfs haadmin -getServiceState nn1"

su hadoop - -c "hdfs haadmin -getServiceState nn2"

 

 

# systems 开机配置 datanode

cat <<EOF | tee /usr/lib/systemd/system/hdfs-datanode.service

[Unit]

Description=Hadoop DFS Data Node

After=syslog.target network.target remote-fs.target nss-lookup.target network-online.target

Requires=network-online.target

 

[Service]

User=hadoop

Group=hadoop

Type=forking

ExecStart=/opt/hadoop/sbin/hadoop-daemon.sh --script hdfs start datanode

ExecStop=/opt/hadoop/sbin/hadoop-daemon.sh --script hdfs stop datanode

Environment=JAVA_HOME=$JAVA_HOME

Environment=HADOOP_HOME=$HADOOP_HOME

Environment=HADOOP_PREFIX=$HADOOP_PREFIX

Environment=HADOOP_PREFIX=$HADOOP_PREFIX

Environment=HADOOP_PID_DIR=$HADOOP_PID_DIR

Restart=on-failure

 

[Install]

WantedBy=multi-user.target

EOF

 

firewall-cmd --zone=public --add-port=50010/tcp --permanent

firewall-cmd --zone=public --add-port=50020/tcp --permanent

firewall-cmd --zone=public --add-port=50075/tcp --permanent

firewall-cmd --reload

 

systemctl enable hdfs-datanode

systemctl start hdfs-datanode

systemctl status hdfs-datanode

 

# systems 开机配置 resourcemanager

cat <<EOF | tee /usr/lib/systemd/system/yarn-resourcemanager.service

[Unit]

Description=Hadoop YARN Resource Manager

After=syslog.target network.target remote-fs.target nss-lookup.target network-online.target

Requires=network-online.target

 

[Service]

User=hadoop

Group=hadoop

Type=forking

ExecStart=/opt/hadoop/sbin/yarn-daemon.sh start resourcemanager

ExecStop=/opt/hadoop/sbin/yarn-daemon.sh stop resourcemanager

Environment=JAVA_HOME=$JAVA_HOME

Environment=HADOOP_HOME=$HADOOP_HOME

Environment=HADOOP_PREFIX=$HADOOP_PREFIX

Environment=HADOOP_PREFIX=$HADOOP_PREFIX

Environment=YARN_PID_DIR=$YARN_PID_DIR

Restart=on-failure

 

[Install]

WantedBy=multi-user.target

EOF

 

firewall-cmd --zone=public --add-port=8030/tcp --permanent

firewall-cmd --zone=public --add-port=8031/tcp --permanent

firewall-cmd --zone=public --add-port=8032/tcp --permanent

firewall-cmd --zone=public --add-port=8033/tcp --permanent

firewall-cmd --zone=public --add-port=8088/tcp --permanent

firewall-cmd --reload

 

systemctl enable yarn-resourcemanager

systemctl start yarn-resourcemanager

systemctl status yarn-resourcemanager

 

# systems 开机配置 nodemanager

cat <<EOF | tee /usr/lib/systemd/system/yarn-nodemanager.service

[Unit]

Description=Hadoop YARN Node Manager

After=syslog.target network.target remote-fs.target nss-lookup.target network-online.target

Requires=network-online.target

 

[Service]

User=hadoop

Group=hadoop

Type=forking

ExecStart=/opt/hadoop/sbin/yarn-daemon.sh start nodemanager

ExecStop=/opt/hadoop/sbin/yarn-daemon.sh stop nodemanager

Environment=JAVA_HOME=$JAVA_HOME

Environment=HADOOP_HOME=$HADOOP_HOME

Environment=HADOOP_PREFIX=$HADOOP_PREFIX

Environment=HADOOP_PREFIX=$HADOOP_PREFIX

Environment=YARN_PID_DIR=$YARN_PID_DIR

Restart=on-failure

 

[Install]

WantedBy=multi-user.target

EOF

 

firewall-cmd --zone=public --add-port=8040/tcp --permanent

firewall-cmd --zone=public --add-port=8041/tcp --permanent

firewall-cmd --zone=public --add-port=8042/tcp --permanent

firewall-cmd --reload

 

systemctl enable yarn-nodemanager

systemctl start yarn-nodemanager

systemctl status yarn-nodemanager

posted @ 2016-08-19 19:28  wuzhenzhou  阅读(308)  评论(0)    收藏  举报