hadoop的集群配置


集群搭建

1 配置yum:

mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup curl -o /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo sed -i -e '/mirrors.cloud.aliyuncs.com/d' -e '/mirrors.aliyuncs.com/d' /etc/yum.repos.d/CentOS-Base.repo yum makecache

#配置epel镜像 yum install -y epel-release mv /etc/yum.repos.d/epel.repo /etc/yum.repos.d/epel.repo.backup mv /etc/yum.repos.d/epel-testing.repo /etc/yum.repos.d/epel-testing.repo.backup curl -o /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo yum makecache

2 安装相应的工具

yum install -y psmisc nc net-tools rsync vim lrzsz ntp libzstd openssl-static tree iotop git

3修改静态ip 主机名 和关闭防火墙

/etc/sysconfig/network-scripts/ifcfg-ens33

        TYPE="Ethernet"   #网络类型(通常是Ethemet)
PROXY_METHOD="none"
BROWSER_ONLY="no"
BOOTPROTO="static" #IP的配置方法[none|static|bootp|dhcp](引导时不 使用协议|静态分配IP|BOOTP协议|DHCP协议)
DEFROUTE="yes"
IPV4_FAILURE_FATAL="no"
IPV6INIT="yes"
      IPV6_AUTOCONF="yes"
IPV6_DEFROUTE="yes"
IPV6_FAILURE_FATAL="no"
IPV6_ADDR_GEN_MODE="stable-privacy"
NAME="ens33"  
UUID="e83804c1-3257-4584-81bb-660665ac22f6" #随机id
DEVICE="ens33" #接口名(设备,网卡)
ONBOOT="yes" #系统启动的时候网络接口是否有效(yes/no)
#IP地址
IPADDR=192.168.1.100
#网关
GATEWAY=192.168.1.2  
#域名解析器
DNS1=192.168.1.2

vim /etc/hosts

192.168.1.100 hadoop100
192.168.1.101 hadoop101
192.168.1.102 hadoop102
192.168.1.103 hadoop103
192.168.1.104 hadoop104
192.168.1.105 hadoop105

vi /etc/hostname #修改自己的主机名 或者 hostnamectl --static set-hostname

systemctl stop firewalld #关闭防火墙 但是重新启动防火墙又开了

systemctl disable firewalld #永久防火墙不随关机而变化

4添加用户并设置具有root权限

useradd sun

passwd sun

vim /etc/sudoers

## Allow root to run any commands anywhere
root   ALL=(ALL)     ALL
atguigu   ALL=(ALL)     NOPASSWD:ALL

5配置jdk和hadoop的环境变量

#JAVA_HOME
export JAVA_HOME=/opt/module/jdk1.8.0_212
export PATH=$PATH:$JAVA_HOME/bin
#HADOOP_HOME
export HADOOP_HOME=/opt/module/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

6配置集群同步脚本

#!/bin/bash
if [ $# -lt 1 ]
then
echo Not Enough Arguement!
exit;
fi
for host in hadoop102 hadoop103 hadoop104
do
echo =================== $host ==================
for file in $@
do
if [ -e $file ]
then
pdir=$(cd -P $(dirname $file); pwd)
fname=$(basename $file)
ssh $host "mkdir -p $pdir"
rsync -av $pdir/$fname $host:$pdir
else
echo $file does not exists!
fi
done
done

7免密登录配置

1.在hadoop102生成秘钥对

ssh-keygen -t ecdsa

2.将公钥安装到本机

ssh-copy-id hadoop102

3.将本机的/home/atguigu/.ssh同步到集群

xsync /home/sun/.ssh

8配置hadoop

配置/opt/module/hadoop-3.1.3/etc/hadoop/core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
   <!---- 指定namenode的地址 ----->
<prooerty>
  <name>fs.deFaultFS</name>
       <value>hdfs://hadoop102:9820</value>
   </prooerty>
   <!----指定hadoop的存储目录-->
   <property>
  <name>hadoop.tmp.dir</name>
       <values>/opt/module/hadoop-3.1.3/data</values>
   </property>
   <!-- 配置HDFS网页登录使用的静态用户为atguigu -->
   <property>
       <name>hadoop.http.staticuser.user</name>
       <value>atguigu</value>
</property>
   <!-- 配置该atguigu(superUser)允许通过代理访问的主机节点 -->
   <property>
       <name>hadoop.proxyuser.atguigu.hosts</name>
       <value>*</value>
   </property>
   <!-- 配置该atguigu(superUser)允许通过代理用户所属组 -->
   <property>
       <name>hadoop.proxyuser.atguigu.groups</name>
       <value>*</value>
</property>
</configuration>

配置/opt/module/hadoop-3.1.3/etc/hadoop/hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
 <!-- nn web端访问地址-->
 <property>
   <name>dfs.namenode.http-address</name>
   <value>hadoop102:9870</value>
 </property>
 <!-- 2nn web端访问地址-->
 <property>
   <name>dfs.namenode.secondary.http-address</name>
   <value>hadoop104:9868</value>
 </property>
</configuration>

配置/opt/module/hadoop-3.1.3/etc/hadoop/yarn-site.xml

<?xml version="1.0"?>
<configuration>

<!-- 指定MR走shuffle -->
<property>
 <name>yarn.nodemanager.aux-services</name>
 <value>mapreduce_shuffle</value>
</property>
<!-- 指定ResourceManager的地址-->
<property>
 <name>yarn.resourcemanager.hostname</name>
 <value>hadoop103</value>
</property>
<!-- 环境变量的继承 -->
<property>
 <name>yarn.nodemanager.env-whitelist</name>
 <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<!-- yarn容器允许分配的最大最小内存 -->
<property>
 <name>yarn.scheduler.minimum-allocation-mb</name>
 <value>512</value>
</property>
<property>
 <name>yarn.scheduler.maximum-allocation-mb</name>
 <value>4096</value>
</property>
<!-- yarn容器允许管理的物理内存大小 -->
<property>
 <name>yarn.nodemanager.resource.memory-mb</name>
 <value>4096</value>
</property>
<!-- 关闭yarn对物理内存和虚拟内存的限制检查 -->
<property>
 <name>yarn.nodemanager.pmem-check-enabled</name>
 <value>false</value>
</property>
<property>
 <name>yarn.nodemanager.vmem-check-enabled</name>
 <value>false</value>
</property>
</configuration>

配置/opt/module/hadoop-3.1.3/etc/hadoop/mapred-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 指定MapReduce程序运行在Yarn上 -->
 <property>
   <name>mapreduce.framework.name</name>
   <value>yarn</value>
 </property>
</configuration>

配置/opt/module/hadoop-3.1.3/etc/hadoop/workers

hadoop102
hadoop104
hadoop103

在集群内部同步配置文件

xsync /opt/module/hadoop-3.1.3/etc

9历史服务器和日志聚集

历史服务器:修改mapred-site.xml,追加如下配置

<!-- 历史服务器端地址 -->
<property>
   <name>mapreduce.jobhistory.address</name>
   <value>hadoop102:10020</value>
</property>

<!-- 历史服务器web端地址 -->
<property>
   <name>mapreduce.jobhistory.webapp.address</name>
   <value>hadoop102:19888</value>
</property>

日志聚集:修改yarn-site.xml,追加如下配置

<!-- 开启日志聚集功能 -->
<property>
   <name>yarn.log-aggregation-enable</name>
   <value>true</value>
</property>
<!-- 设置日志聚集服务器地址 -->
<property>  
   <name>yarn.log.server.url</name>  
   <value>http://hadoop102:19888/jobhistory/logs</value>
</property>
<!-- 设置日志保留时间为7天 -->
<property>
   <name>yarn.log-aggregation.retain-seconds</name>
   <value>604800</value>
</property>

同步配置文件

xsync $HADOOP_HOME/etc/hadoop/yarn-site.xml

10编写hadoop集群常用脚本

1 查看三台服务器java进程脚本 jpsall

查看

#!/bin/bash
for host in hadoop102 hadoop103 hadoop104
do
echo ============== $host ================
ssh $host jps $@ | grep -v Jps
done

保存后退出,然后赋予脚本执行权限

2 hadoop集群启停脚本

#!/bin/bash
if [ $# -lt 1 ]
then
echo "NO Args Input..."
exit;
fi
case $1 in
"start")
echo "============== 启动hadoop集群 =============="

echo "--------------- 启动 hdfs ---------------"
ssh hadoop102 "/opt/module/hadoop-3.1.3/sbin/start-dfs.sh"
       echo " --------------- 启动 yarn ---------------"
       ssh hadoop103 "/opt/module/hadoop-3.1.3/sbin/start-yarn.sh"
       echo " --------------- 启动 historyserver ---------------"
       ssh hadoop102 "/opt/module/hadoop-3.1.3/bin/mapred --daemon start historyserver"
;;
"stop")
       echo " =================== 关闭 hadoop集群 ==================="

       echo " --------------- 关闭 historyserver ---------------"
       ssh hadoop102 "/opt/module/hadoop-3.1.3/bin/mapred --daemon stop historyserver"
       echo " --------------- 关闭 yarn ---------------"
       ssh hadoop103 "/opt/module/hadoop-3.1.3/sbin/stop-yarn.sh"
       echo " --------------- 关闭 hdfs ---------------"
       ssh hadoop102 "/opt/module/hadoop-3.1.3/sbin/stop-dfs.sh"
;;
*)
   echo "Input Args Error..."
;;
esac



posted @ 2020-12-11 23:00  Stk1  阅读(249)  评论(0)    收藏  举报