1.Hadoop(HDFS)环境搭建
使用的Hadoop相关的版本:CDH
CDH相关资源下载地址 https://archive.cloudera.com/cdh5/cdh/5/
Hadoop版本:hadoop-2.6.0-cdh5.16.2
Hadoop下载: wget https://archive.cloudera.com/cdh5/cdh/5/hadoop-2.6.0-cdh5.16.2.tar.gz
2.Hadoop安装前置要求
3.创建文件夹
mkdir -p /usr/local/hadoop2.6 /usr/local/hadoop2.6/data
cd /usr/local/hadoop2.6
4.下载hadoop
wget https://archive.cloudera.com/cdh5/cdh/5/hadoop-2.6.0-cdh5.16.2.tar.gz
5.解压
tar -zvxf hadoop-2.6.0-cdh5.16.2.tar.gz
hadoop软件包常见目录说明
bin: hadoop客户端名单
etc/hadoop: hadoop相关的配置文件存放目录
sbin: 启动hadoop相关进程脚本
share: 常用的例子
6.修改配置文件
vim /etc/profile
export HADOOP_HOME=/usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2
export PATH=$HADOOP_HOME/bin:$PATH
source /etc/profile
7.修改配置文件
vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/hadoop-env.sh yarn-env.sh mapred-env.sh
修改 export JAVA_HOME= ${JAVA_HOME} 为 export JAVA_HOME=/usr/local/java1.8/jdk1.8.0_241
vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/core-site.xml
加入这个配置
<property>
<name>fs.defaultFS</name>
<value>hdfs://192.168.107.216:8020</value>
</property>
<property>
<name>mapreduce.reduce.input.buffer.percent</name>
<value>0.10</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.memory.limit.percent</name>
<value>0.10</value>
</property>
<property>
<name>mapreduce.reduce.shuffle.input.buffer.percent</name>
<value>0.10</value>
</property>
<property>
<name>mapred.child.java.opts</name>
<value>-Xmx2024m</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop2.6/data</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>${hadoop.tmp.dir}/nm-local-dir</value>
</property>
vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>1</value> # 单机版 就一个节点
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>${hadoop.tmp.dir}/nm-local-dir</value>
</property>
vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.cluster.local.dir</name>
<value>${hadoop.tmp.dir}/mapred/local</value>
</property>
vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/yarn-site.xml
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>${hadoop.tmp.dir}/nm-local-dir</value>
</property>
vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/slaves/
默认是localhost 可以不改,也可以改成本地ip
8.启动HDFS
第一次执行启动的时候一定要格式化文件系统,后面不要重复执行
hadoop namenode -format
hdfs namenode -format # 格式化文件系统
bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/start-dfs.sh
推荐这样启动
bash hadoop-daemon.sh start/stop namenode # 先启动namenode
bash hadoop-daemon.sh start/stop datanode # 然后启动datanode
9.验证
jps # 查看hadoop进程 出现下面三个证明启动成功
5191 SecondaryNameNode
3003 NameNode
3278 DataNode
systemctl stop firewalld # 关闭防火墙
http://192.168.107.216:50070/ # 直接访问 能访问证明启动成功
10.查看启动日志
cd /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs
cat /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs/yarn-root-nodemanager-localhost.localdomain.log | tail -n 100
cat /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs/hadoop-root-datanode-localhost.localdomain.log | tail -n 100
cat /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs/hadoop-root-namenode-localhost.localdomain.log | tail -n 100
11.停止HDFS
bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/stop-dfs.sh
推荐这样停止
bash hadoop-daemon.sh stop datanode # 先停止datanode
bash hadoop-daemon.sh stop namenode # 然后停止namenode
12.hadoop 常用命令
bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/start-dfs.sh
bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/stop-dfs.sh
jps
bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/start-yarn.sh
bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/stop-yarn.sh
bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/stop-yarn.sh && bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.9.0/sbin/stop-dfs.sh && bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.9.0/sbin/start-dfs.sh && bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.9.0/sbin/start-yarn.sh && jps && bash /home/hadoop/lib/pv.sh
bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/stop-yarn.sh && jps
hdfs namenode -format
hadoop namenode -format
bash /home/hadoop/lib/pv.sh
cd /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs
cat /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs/yarn-root-nodemanager-localhost.localdomain.log | tail -n 100
cat /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs/hadoop-root-datanode-localhost.localdomain.log | tail -n 100
cat /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs/hadoop-root-namenode-localhost.localdomain.log | tail -n 100
cd /usr/local/hadoop2.6/data
vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/core-site.xml
vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/yarn-site.xml
vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/mapred-site.xml
vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/hdfs-site.xml
job.getConfiguration().setStrings("mapreduce.reduce.shuffle.memory.limit.percent", "0.1");
set mapreduce.reduce.shuffle.memory.limit.percent=0.1;