Hadoop/Hbase/Spark集群部署
一、环境准备
1.1 主机及操作系统
1.1.1 操作系统 和 JAVA
- CentOS 8兼容版本(根据现状建议选择AlmaLinux 8.8或RockyLinux 8.8)
- OpenJDK 8U372b07(Eclipse Temurin)
1.1.2 软件版本:
- Zookeeper 3.8.1或3.8.2
- Hadoop 2.10.2(至少使用2.7.3或更高版本,目前不要使用3.x)
- Hbase 2.5.3~2.5.5(至少使用2.2.3或更高版本)
- Spark 2.1.0(不要使用其他版本,否则Driver端需要修改pom文件并重新编译构建)
1.2 DNS 和 域名解析、ssh免密登录
- 在存在DNS服务器的环境中,配置节点到 DNS 中,节点直接使用DNS进行域名解析。
- 如果没有DNS服务器环境中,我们需要在每个节点的
/etc/hosts中配置域名解析,本文默认你没有DNS服务器。
# root 在 master节点
$ mkdir ~/sparkDeploy && cd ~/sparkDeploy
$ mkdir envSet && cd envSet
$ cat <<'EOF' > setHosts.sh
cat >> /etc/hosts <<EOF1
主节点IP master
工作节点1IP worker1
工作节点2IP worker2
# 还有的话你就自己加
EOF1
EOF
$ bash -x setHosts.sh
# root 在 master节点
# 这几步如果你有条件也可以用sshpass设置免密
$ cd ~
$ ssh-keygen -t rsa # 这里的rsa是表示登录的加密算法,如果遇到有国密需求的场景,请自行更换sm3算法 , 默认的话你就一路回车
$ ssh-copy-id root@master # 分别输入yes和目标服务器的密码
$ ssh-copy-id root@worker1 # 如果有其他从节点,那就多执行几次,注意worker[你第几个从节点]
# root 在 master节点
$ cd ~/sparkDeploy/envSet
$ hostnamectl set-hostname master
# 定义你有几个从节点,我这里有3个从节点(不包括master还有3个worker的意思)
$ export WORKER_COUNT=3
$ for i in $(seq 1 $WORKER_COUNT); do \
ssh "worker"$i "mkdir -p /root/sparkDeploy/envSet"; \
scp setHosts.sh "worker"$i:/root/sparkDeploy/envSet; \
ssh "worker"$i "bash -x /root/sparkDeploy/envSet/setHosts.sh"; \
ssh "worker"$i "rm -fr ~/sparkDeploy"; \
ssh "hostnamectl set-hostname worker$i"; \
done
$ rm -fr ~/sparkDeploy/
1.3 创建hadoop用户
# root 在 master节点
$ useradd hadoop
$ echo "hadoop" | passwd --stdin hadoop
$ for i in $(seq 1 $WORKER_COUNT); do \
ssh "worker"$i "useradd hadoop"; \
ssh "worker"$i 'echo "hadoop" | passwd --stdin hadoop'; \
done
$ echo "hadoop ALL=(ALL) ALL" >> /etc/sudoers
$ su hadoop
$ ssh-keygen -t rsa # 目录选/home/hadoop/.ssh/
$ ssh-copy-id hadoop@master # 分别输入yes和目标hadoop的密码:hadoop
$ ssh-copy-id hadoop@worker1 # 如果有其他从节点,那就多执行几次,注意worker[你第几个从节点]
1.4 安装fontconfig
gskernel的libjavaport.so依赖于libfontconfig.so.1, RockyLinux8/AlmaLinux8等兼容发行版默认可能没有安装该文件:
# root 在 master节点
$ dnf install fontconfig -y
$ for i in $(seq 1 $WORKER_COUNT); do \
ssh worker$i "dnf install fontconfig tar -y"; \
done
1.5 程序及其安装包
上传ClusterDeploy.tgz到master服务器的/root/目录
# root 在 master节点
$ mkdir -p /home/hadoop/sparkDeploy/pakage/
&& cd /home/hadoop/sparkDeploy/pakage/
$ mv ~/ClusterDeploy.tgz /home/hadoop/sparkDeploy/pakage/
$ tar xvf /home/hadoop/sparkDeploy/pakage/ClusterDeploy.tgz -C /home/hadoop/sparkDeploy/pakage/
&& chown -R hadoop:hadoop /home/hadoop/
# 解压以后文件如下:
#.
#├── ClusterDeploy
#│ ├── GsKernel
#│ │ ├── GsKernel_r47653_with_jdk8u372.tgz
#│ │ └── data.tgz
#│ ├── Hadoop
#│ │ └── hadoop-2.10.2.tgz
#│ ├── Hbase
#│ │ ├── geomesa-hbase-distributed-runtime-hbase2_2.11-3.5.1.jar
#│ │ └── hbase-2.5.5-bin.tgz
#│ ├── Hbase+Spark集群搭建指南.docx
#│ ├── JDK
#│ │ └── OpenJDK8U-jdk_x64_linux_hotspot_8u372b07.tgz
#│ ├── Spark
#│ │ └── spark-2.1.0-bin-hadoop2.7.tgz
#│ ├── ZooKeeper
#│ │ └── apache-zookeeper-3.8.2-bin.tgz
#│ └── spark依赖jar
#│ ├── QLExpress-3.2.0.jar
#│ ├── com.bea.core.datasource-1.6.0.0.jar
#│ ├── geomesa-hbase-spark-runtime-hbase2_2.11-3.0.0.jar
#│ ├── ggserver-core-8.0.jar
#│ ├── ggserver-core-analyst-8.0.jar
#│ ├── ggserver-core-gbase-8.0.jar
#│ ├── ggserver-core-highgo-8.0.jar
#│ ├── ggserver-core-kingbase-8.0.jar
#│ ├── ggserver-core-overlap-v2-8.0.jar
#│ ├── ggserver-core-postgis-8.0.jar
#│ ├── lucene-core-4.0.0.jar
#│ └── 依赖包说明.txt
1.6 配置环境变量
# 切换到hadoop用户
$ su hadoop
# 定义你有几个从节点,我这里有3个从节点(不包括master还有3个worker的意思),这个参数对于后续部署很重要
$ export WORKER_COUNT=3
$ export DEPLOY_DIR=/home/hadoop/sparkDeploy/pakage
$ mkdir /home/hadoop/.bashrc.d
$ touch /home/hadoop/.bashrc.d/hadoop_env.sh
$ cat >> /home/hadoop/.bashrc.d/hadoop_env.sh <<EOF
export JAVA_HOME=/home/hadoop/jdk
export GEOSMARTER_HOME=/home/hadoop/GeoSmarter
export GEOGLOBESERVER_HOME=/home/hadoop/GeoSmarter
export HADOOP_HOME=/home/hadoop/hadoop
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
export YARN_HOME=$HADOOP_HOME
export HBASE_HOME=/home/hadoop/hbase
export ZOOKEEPER_HOME=/home/hadoop/zookeeper
export SPARK_HOME=/home/hadoop/spark
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/bin:$ZOOKEEPER_HOME/bin:$HBASE_HOME/bin:$SPARK_HOME/sbin:$SPARK_HOME/bin:/home/hadoop/GeoSmarter/support/native
export CLASSPATH=$CLASSPATH:$HADOOP_HOME/share/hadoop/common/lib:$SPARK_HOME/jars
export LD_LIBRARY_PATH=$JAVA_HOME/jre/lib/amd64/server:$HADOOP_HOME/lib/native:/home/hadoop/GeoSmarter/support/native:$LD_LIBRARY_PATH
EOF
$ source /home/hadoop/.bashrc.d/hadoop_env.sh
$ for i in $(seq 1 $WORKER_COUNT); do \
scp -r /home/hadoop/.bashrc.d/ hadoop@worker$i:/home/hadoop/; \
ssh hadoop@worker$i "source /home/hadoop/.bashrc.d/hadoop_env.sh"; \
done
1.7 禁用防火墙和SELINUX
# root 在master节点
$ systemctl stop firewalld &&
systemctl disable firewalld &&
setenforce 0 &&
sed -i 's/SELINUX=enforcing/SELINUX=permissive/g' /etc/selinux/config
$ for i in $(seq 1 $WORKER_COUNT); do \
ssh worker$i "systemctl stop firewalld && systemctl disable firewalld && setenforce 0 && sed -i 's/SELINUX=enforcing/SELINUX=permissive/g' /etc/selinux/config"; \
done
1.8 配置NTP
# root 在master节点
# ZooKeeper集群可能可能需要做时间同步
$ dnf install -y chrony
$ sed -i 's/pool/#pool/g' /etc/chrony.conf
$ echo "allow all" >> /etc/chrony.conf
$ echo "local stratum 10" >> /etc/chrony.conf
$ systemctl enable chronyd && systemctl restart chronyd
$ for i in $(seq 1 $WORKER_COUNT); do \
ssh worker$i "dnf install -y chrony"; \
ssh worker$i "sed -i 's/pool/#pool/g' /etc/chrony.conf"; \
ssh worker$i "echo 'server master iburst' >> /etc/chrony.conf "; \
ssh worker$i "systemctl enable chronyd && systemctl restart chronyd"; \
ssh worker$i "chronyc sources"; \
done
1.9 配置JDK
# root 在master节点
$ dnf remove -y java-*
$ for i in $(seq 1 $WORKER_COUNT);do
ssh worker$i "dnf remove -y java-*";
done
# hadoop用户 在 master节点
$ su hadoop
$ mkdir /home/hadoop/jdk
$ tar xvf $DEPLOY_DIR/JDK/OpenJDK8U-jdk_x64_linux_hotspot_8u372b07.tgz --strip-components=1 -C /home/hadoop/jdk
$ tar zcvf /home/hadoop/jdk.tgz /home/hadoop/jdk
$ for i in $(seq 1 $WORKER_COUNT);do \
scp -r /home/hadoop/jdk.tgz worker$i:/home/hadoop/; \
ssh worker$i "tar xvf /home/hadoop/jdk.tgz --strip-components=2 -C /home/hadoop/"; \
ssh worker$i "rm -f /home/hadoop/jdk.tgz"; \
done
$ rm -f /home/hadoop/jdk.tgz
二、Hadoop集群部署和配置
2.1 解压缩Hadoop安装包
# hadoop用户在 master节点
$ tar xvf $DEPLOY_DIR/Hadoop/hadoop-2.10.2.tgz -C /home/hadoop/
$ ln -s /home/hadoop/hadoop-2.10.2 /home/hadoop/hadoop
$ mkdir -p /home/hadoop/hadoop/hdfs/name
$ mkdir -p /home/hadoop/hadoop/hdfs/data
2.2 修改Hadoop配置
# hadoop 用户在 master节点
$ mv $HADOOP_HOME/etc/hadoop/core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml_bak
$ mv $HADOOP_HOME/etc/hadoop/hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml_bak
$ mv $HADOOP_HOME/etc/hadoop/mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml_bak #如果报错没有你就忽略
$ mv $HADOOP_HOME/etc/hadoop/yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml_bak
$ touch $HADOOP_HOME/etc/hadoop/core-site.xml
$ touch $HADOOP_HOME/etc/hadoop/hdfs-site.xml
$ touch $HADOOP_HOME/etc/hadoop/mapred-site.xml
$ touch $HADOOP_HOME/etc/hadoop/yarn-site.xml
2.2.1 core-site.xml文件:
# hadoop 用户在 master节点
$ cat >> $HADOOP_HOME/etc/hadoop/core-site.xml <<EOF
<configuration>
<!-- 指定HADOOP所使用的文件系统schema(URI). master替换为主节点域名,下同-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>65536</value>
</property>
<!-- 指定hadoop运行时产生文件的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/hadoop/tmp</value>
</property>
<property>
<!-- namenode连接datanode时,不进行host解析查询 -->
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<property>
<!-- 检查文件操作权限-->
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
EOF
2.2.2 hdfs-site.xml文件:
# hadoop 用户在 master节点
$ cat >> $HADOOP_HOME/etc/hadoop/hdfs-site.xml <<EOF
<configuration>
<!-- 指定SecondaryNamenode所在地址,master替换为主节点域名,下同 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:50090</value>
</property>
<!-- 指定HDFS冗余度(单个block存储几份) -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/hadoop/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/hadoop/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
EOF
2.2.3 mapred-site.xml文件:
# hadoop 用户在 master节点
$ cat >> $HADOOP_HOME/etc/hadoop/mapred-site.xml <<EOF
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--master替换为主节点域名,下同 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
</configuration>
EOF
2.2.4 yarn-site.xml文件:
# hadoop 用户在 master节点
$ cat >> $HADOOP_HOME/etc/hadoop/yarn-site.xml <<EOF
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--master替换为主节点域名,下同 -->
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>
EOF
2.2.5 master && slaves
# hadoop 用户在 master节点
$ echo "master" > $HADOOP_HOME/etc/hadoop/masters
$ echo "master" > $HADOOP_HOME/etc/hadoop/slaves
$ for i in $(seq 1 $WORKER_COUNT); do
echo "worker$i" >> $HADOOP_HOME/etc/hadoop/slaves;
done
2.3 复制Hadoop文件到从节点
# hadoop 用户在 master节点
$ tar zcvf /home/hadoop/hadoop.tgz /home/hadoop/hadoop*
$ for i in $(seq 1 $WORKER_COUNT); do
scp /home/hadoop/hadoop.tgz worker$i:/home/hadoop/; \
ssh hadoop@worker$i "tar xvf /home/hadoop/hadoop.tgz --strip-components=2 -C /home/hadoop/"; \
ssh hadoop@worker$i "rm -f /home/hadoop/hadoop.tgz"; \
done
$ rm -f /home/hadoop/hadoop.tgz
2.4 启动Hadoop集群
# 格式化节点,hadoop用户在master节点
$ hdfs namenode -format
# 启动hadoop
$ mv /home/hadoop/hadoop/sbin/start-all.sh /home/hadoop/hadoop/sbin/start-hadoop-all.sh
$ mv /home/hadoop/hadoop/sbin/stop-all.sh /home/hadoop/hadoop/sbin/stop-hadoop-all.sh
$ /home/hadoop/hadoop/sbin/start-hadoop-all.sh
# 如果只用HDFS的话也可以只启动start-dfs.sh
访问hdfs文件管理页面http://master:50070(当没有配置DNS时,将master替换为主节点的IP地址,否则基于安全原因,Firefox和Chromium等现代浏览器会拒绝加载首页)进行验证
三、ZooKeeper集群部署
3.1 解压缩ZooKeeper程序
# hadoop用户在 master节点
$ mkdir /home/hadoop/zookeeper
$ tar xvf $DEPLOY_DIR/ZooKeeper/apache-zookeeper-3.8.2-bin.tgz --strip-components=1 -C /home/hadoop/zookeeper
$ mkdir /home/hadoop/zookeeper/data
$ mkdir /home/hadoop/zookeeper/logs
3.2 修改ZooKeeper配置
3.2.1 myid
# hadoop用户在master节点
$ echo "1" > /home/hadoop/zookeeper/data/myid
3.2.2 zoo.cfg
# hadoop用户在master节点
# 修改zk配置
$ cp /home/hadoop/zookeeper/conf/zoo_sample.cfg /home/hadoop/zookeeper/conf/zoo.cfg
$ cat > /home/hadoop/zookeeper/conf/zoo.cfg <<EOF
ticketTime=2000
clientPort=2181
dataDir=/home/hadoop/zookeeper/data
dataLogDir=/home/hadoop/zookeeper/logs
initLimit=10
syncLimit=5
server.1=master:2888:3888
EOF
$ for i in $(seq 1 $WORKER_COUNT); do \
t=$((i+1)); \
echo "server.$t=worker$i:2888:3888" >> /home/hadoop/zookeeper/conf/zoo.cfg; \
done
3.3 复制ZooKeeper到从节点
# hadoop用户在master节点
# 压缩传输
$ tar zcvf /home/hadoop/zookeeper.tgz /home/hadoop/zookeeper
# 复制文件到从节点
$ for i in $(seq 1 $WORKER_COUNT); do \
t=$((i+1)); \
scp /home/hadoop/zookeeper.tgz worker$i:/home/hadoop/; \
ssh worker$i "tar xvf /home/hadoop/zookeeper.tgz --strip-components=2 -C /home/hadoop/"; \
ssh worker$i 'echo '"$t"' > /home/hadoop/zookeeper/data/myid'; \
ssh worker$i "rm -f /home/hadoop/zookeeper.tgz"; \
done
# 删除主节点压缩包
$ rm -f /home/hadoop/zookeeper.tgz
3.4 启动ZooKeeper集群
# hadoop用户在master节点
# 启动zk
$ /home/hadoop/zookeeper/bin/zkServer.sh start
$ for i in $(seq 1 $WORKER_COUNT); do \
ssh worker$i "/home/hadoop/zookeeper/bin/zkServer.sh start"; \
done
可在每个节点上运行/home/hadoop/zookeeper/bin/zkServer.sh status查看zookeeper是否正常运行。
该命令输出最后一行的Mode可能为leader或follower,该结果由分布式选举算法产生,并非主节点一定是leader。
四、HBase集群部署
4.1 解压缩Hbase程序
将安装包解压到/home/hadoop,生成hbase-2.5.5目录,并创建指向hbase-2.5.5的符号链接hbase,
# hadoop用户在master节点
$ tar xvf $DEPLOY_DIR/Hbase/hbase-2.5.5-bin.tgz -C /home/hadoop
$ ln -s /home/hadoop/hbase-2.5.5 /home/hadoop/hbase
$ mv $DEPLOY_DIR/Hbase/geomesa-hbase-distributed-runtime-hbase2_2.11-3.5.1.jar /home/hadoop/hbase/lib
$ cp $HADOOP_HOME/etc/hadoop/core-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml /home/hadoop/hbase/conf/
$ mv /home/hadoop/hbase/conf/hbase-site.xml /home/hadoop/hbase/conf/hbase-site.xml_bak
$ touch /home/hadoop/hbase/conf/hbase-site.xml
4.2 修改HBase配置
4.2.1 hbase-site.xml
# hadoop用户在master节点
$ cat >> /home/hadoop/hbase/conf/hbase-site.xml <<EOF
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 指定hbase在HDFS上存储的路径 -->
<property>
<name>hbase.rootdir</name>
<value>hdfs://master:9000/hbase</value>
</property>
<!-- 指定hbase是分布式的 -->
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<!-- 指定zk的地址,多个用“,”分割 -->
<property>
<name>hbase.zookeeper.quorum</name>
<value>master</value>
</property>
<!-- zookerper dataDir -->
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/home/hadoop/zookeeper/data</value>
</property>
<property>
<name>hbase.master.port</name>
<value>16000</value>
</property>
<!-- hbase master web界面绑定端口 -->
<property>
<name>hbase.master.info.port</name>
<value>16010</value>
</property>
<!-- regionserver 信息 web界面接口 -->
<property>
<name>hbase.regionserver.info.port</name>
<value>16030</value>
</property>
<property>
<name>hbase.coprocessor.user.region.classes</name>
<value>org.locationtech.geomesa.hbase.server.coprocessor.GeoMesaCoprocessor</value>
</property>
</configuration>
EOF
$ ZKADDR="master"
$ for i in $(seq 1 $WORKER_COUNT);do \
ZKADDR="${ZKADDR},worker${i}"; \
done
$ sed -i "s|<value>master</value>|<value>$ZKADDR</value>|" /home/hadoop/hbase/conf/hbase-site.xml
4.2.2 regionservers
修改每个节点的regionservers,内容同Hadoop配置文件的slaves文件,即每一行依次为主节点和每个从节点的域名。
# hadoop用户在master节点
$ cp /home/hadoop/hbase/conf/regionservers /home/hadoop/hbase/conf/regionservers_bak
$ cp $HADOOP_HOME/etc/hadoop/slaves /home/hadoop/hbase/conf/regionservers
4.3 复制HBase文件到从节点
# hadoop用户在master节点
$ tar zcvf /home/hadoop/hbase.tgz /home/hadoop/hbase*
$ for i in $(seq 1 $WORKER_COUNT);do \
scp /home/hadoop/hbase.tgz worker$i:/home/hadoop/; \
ssh worker$i "tar xvf /home/hadoop/hbase.tgz --strip-components=2 -C /home/hadoop/"; \
ssh worker$i "rm -f /home/hadoop/hbase.tgz"; \
done
$ rm -f /home/hadoop/hbase.tgz
4.4 启动HbBase集群
# hadoop用户在master节点
$ /home/hadoop/hbase/bin/start-hbase.sh
打开浏览器,访问http://master:16010/,检查hbase是否正常启动。在没有配置DNS的前提下,将master替换为主节点IP地址,与Hadoop的情况类似。
五、Spark集群部署
5.1 解压缩Spark程序
# hadoop用户在master节点
$ mkdir -p /home/hadoop/spark
$ tar xvf $DEPLOY_DIR/Spark/spark-2.1.0-bin-hadoop2.7.tgz --strip-components=1 -C /home/hadoop/spark
5.2 修改Spark配置
5.2.1 spark-defaults.conf
# hadoop用户在master节点
$ touch /home/hadoop/spark/conf/spark-defaults.conf
$ cat >> /home/hadoop/spark/conf/spark-defaults.conf <<EOF
spark.eventLog.enabled true
spark.shuffle.service.enabled true
EOF
5.2.2 spark-env.sh
# hadoop用户在master节点
$ touch /home/hadoop/spark/conf/spark-env.sh
$ cat >> /home/hadoop/spark/conf/spark-env.sh <<EOF
export JAVA_HOME=/home/hadoop/jdk
export CLASSPATH=$CLASSPATH:$SPARK_HOME/jars
export HADOOP_HOME=/home/hadoop/hadoop
export GEOSMARTER_HOME=/home/hadoop/GeoSmarter
export GEOGLOBESERVER_HOME=/home/hadoop/GeoSmarter
export LD_LIBRARY_PATH=$JAVA_HOME/jre/lib/amd64/server:$HADOOP_HOME/lib/native:$GEOSMARTER_HOME/support/native:$LD_LIBRARY_PATH
export SPARK_JAVA_OPTS="-XX:+UseG1GC -XX:NewRatio=1 -XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark -XX:InitiatingHeapOccupancyPercent=35 -XX:+DisableExplicitGC"
export SPARK_PID_DIR=/home/hadoop/spark/pids
export SPARK_WORKER_OPTS="-Dspark.workerceanup.enabled=true -Dspark.workercleanup.interval=1800 -Dspark.workercleanup.appDataTtl=86400"
EOF
$ source /home/hadoop/spark/conf/spark-env.sh
5.2.3slaves
# hadoop用户在master节点
$ cp /home/hadoop/hadoop/etc/hadoop/slaves /home/hadoop/spark/conf/
5.3 导入相关扩展jar包、gskernel
geomesa和gskernel相关的jar都在/home/hadoop/sparkDeploy/pakage/spark依赖jar下,我们需要把它复制到/home/hadoop/spark/jars
# hadoop用户在master节点
$ cp -r $DEPLOY_DIR/spark依赖jar/*.jar /home/hadoop/spark/jars/
# hadoop用户在master节点
$ mkdir -p /home/hadoop/GeoSmarter
$ tar xvf $DEPLOY_DIR/GsKernel/GsKernel_r47653_with_jdk8u372.tgz --strip-components=1 -C /home/hadoop/GeoSmarter
$ tar xvf $DEPLOY_DIR/GsKernel/data.tgz -C /home/hadoop/GeoSmarter/support/native
5.4 复制Spark文件到从节点
# hadoop用户在master节点
$ tar zcvf /home/hadoop/spark.tgz /home/hadoop/spark
$ for i in $(seq 1 $WORKER_COUNT);do \
scp /home/hadoop/spark.tgz worker$i:/home/hadoop/; \
ssh worker$i "tar xvf /home/hadoop/spark.tgz --strip-components=2 -C /home/hadoop/"; \
ssh worker$i "rm -f /home/hadoop/spark.tgz"; \
done
$ rm -f spark.tgz
5.4 启动Spark集群
# hadoop用户在master节点
# 修改脚本名避免重名或者误解
$ mv /home/hadoop/spark/sbin/start-all.sh /home/hadoop/spark/sbin/start-spark-all.sh
$ mv /home/hadoop/spark/sbin/stop-all.sh /home/hadoop/spark/sbin/stop-spark-all.sh
# 启动
$ /home/hadoop/spark/sbin/start-spark-all.sh
进入Spark的Web管理页面http://master:8080查看,管理端口默认是8080,但是8080端口如果被占用,程序会自动漂移端口,具体端口以启动日志为准。
六、清理部署环境变量
# hadoop用户在master节点
$ unset DEPLOY_DIR
$ unset WORKER_COUNT
$ unset ZKADDR
浙公网安备 33010602011771号