# 解压安装包
tar -zxvf /export/server/spark-2.4.5-bin-hadoop2.7.tgz
# 创建软连接,方便后期升级
ln -s /export/server/spark-2.4.5-bin-hadoop2.7 /export/server/spark
# 如果有权限问题,可以修改为root,方便学习时操作,实际中使用运维分配的用户和权限即可
chown -R root /export/server/spark-2.4.5-bin-hadoop2.7
chgrp -R root /export/server/spark-2.4.5-bin-hadoop2.7
cd /export/server/spark/conf
# 修改文件名
mv spark-env.sh.template spark-env.sh
# 编辑配置文件,增加以下内容
vim /export/server/spark/conf/spark-env.sh
HADOOP_CONF_DIR=/export/server/hadoop-2.7.5/etc/hadoop
YARN_CONF_DIR=/export/server/hadoop-2.7.5/etc/hadoop
cd /export/server/hadoop-3.3.0/etc/hadoop
vim /export/server/hadoop-3.3.0/etc/hadoop/yarn-site.xml
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>20480</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>2.1</value>
</property>
cd /export/server/spark/conf
mv spark-defaults.conf.template spark-defaults.conf
# 编辑文件,增加以下内容
vim spark-defaults.conf
spark.eventLog.enabled true
spark.eventLog.dir hdfs://node1:8020/user/spark/log/
spark.eventLog.compress true
spark.yarn.historyServer.address node1:18080
# 注意:sparklog需要手动创建
hdfs dfs -mkdir -p /user/spark/log/
cd /export/server/spark/conf
# 编辑文件,并增加以下内容
vim spark-env.sh
## 配置spark历史服务器地址
SPARK_HISTORY_OPTS="-Dspark.history.fs.logDirectory=hdfs://node1:8020/user/spark/log/ -Dspark.history.fs.cleaner.enabled=true"
# hdfs上创建存储spark相关jar包目录
hdfs dfs -mkdir -p /user/spark/jars/
# 上传$SPARK_HOME/jars所有jar包
hdfs dfs -put /export/server/spark/jars/* /user/spark/jars/
vim /export/server/spark/conf/spark-defaults.conf
# 增加以下内容
spark.yarn.jars hdfs://node1:8020/user/spark/jars/*
# 在 node1 上启动整个 Hadoop 集群
start-all.sh
# 启动 MRHistoryServer 服务,在 node1 执行命令
mr-jobhistory-daemon.sh start historyserver
# WARNING: Use of this script to start the MR JobHistory daemon is deprecated.
# WARNING: Attempting to execute replacement "mapred --daemon start" instead.
# 启动 Spark HistoryServer 服务,,在 node1 执行命令
/export/server/spark/sbin/start-history-server.sh
# starting org.apache.spark.deploy.history.HistoryServer, logging to /export/server/spark/logs/spark-root-org.apache.spark.deploy.history.HistoryServer-1-node1.itcast.cn.out
# 测试:
node1:9870
http://node1:18080
http://node1:19888/jobhistory
http://node1:8088/cluster