Hadoop部署

Hadoop

环境

g@Mint-19

安装

Scala

软件管理器中安装Scala

SSH

sudo apt install ssh openssh-server pdsh

JDK

Hadoop

配置环境变量

vim ~/.bashrc

export JAVA_HOME=/home/g/App/SOURCE/jdk-11.0.5
export J=$JAVA_HOME
export HADOOP_HOME=/home/g/App/BINARY/hadoop-3.2.1
export H=$HADOOP_HOME
export PATH=$JAVA_HOME/bin:$PATH:$JAVA_HOME/bin:$JAVA_HOME/sbin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
# SPARK_HOME=/home/g/App/BINARY/spark-3.0.0-preview2-bin-hadoop3.2
# S=$SPARK_HOME
# PATH=$PATH:$SPARK_HOME/sbin:$SPARK_HOME/bin

伪分布式配置[1][2]

hadoop-env.sh

export JAVA_HOME=/home/g/App/SOURCE/jdk1.8.0_241
export HADOOP_HOME=/home/g/App/BINARY/hadoop-3.2.1
export HADOOP_HEAPSIZE_MAX=1024

yarn-env.sh

export JAVA_HOME=/home/g/App/SOURCE/jdk1.8.0_241
# export JAVA_HOME=/home/g/App/SOURCE/jdk-11.0.5
# export YARN_RESOURCEMANAGER_OPTS="--add-modules=ALL-SYSTEM"
# export YARN_NODEMANAGER_OPTS="--add-modules=ALL-SYSTEM"

core-site.xml

<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://localhost:9000</value>
    </property>
	<property>
		<name>hadoop.tmp.dir</name>
		<value>/tmp/hadoop</value>
		<description>namenode上本地的hadoop临时文件夹</description>
	</property>
</configuration>

hdfs-site.xml

<configuration>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>

	<property>
		<name>dfs.name.dir</name>
		<value>/tmp/hadoop/hdfs</value>
		<description>namenode上存储hdfs名字空间元数据 </description> 
	</property>
	 
	<property>
		<name>dfs.data.dir</name>
		<value>/tmp/hadoop/hdfs/data</value>
		<description>datanode上数据块的物理存储位置</description>
	</property>
    <property>
        <name>dfs.http.address</name>
        <value>0.0.0.0:50070</value>
    </property>
</configuration>

mapred-site.xml

<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>mapreduce.application.classpath</name>
        <value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
    </property>
    
    <property>
      <name>mapred.job.tracker.http.address</name>
      <value>0.0.0.0:50030</value>
    </property>
    <property>
      <name>mapred.task.tracker.http.address</name>
      <value>0.0.0.0:50060</value>
    </property>
</configuration>

yarn-site.xml

<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.env-whitelist</name>
        <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
    </property>
    
    <property>
        <name>yarn.resourcemanager.webapp.address</name>
        <value>localhost:8088</value>
        <description>这个地址是mr管理界面的</description>
</property>
</configuration>

伪分布式启动

不要用start-all.sh命令,因为有重合冲突。除非指定

hdfs namenode -format
start-dfs.sh
start-yarn.sh
# $H/sbin/start-all.sh

关闭

stop-dfs.sh
stop-yarn.sh
# $H/sbin/stop-all

Hadoop:9870 控制台

50070是老版本的端口

http://localhost:9870

排错

启动失败

设置pdsh的ssh权限

sudo echo "ssh" > /etc/pdsh/rcmd_default

start-dfs.sh和stop-dfs.sh文件,添加下列参数:

HDFS_DATANODE_USER=g
HADOOP_SECURE_DN_USER=hdfs
HDFS_NAMENODE_USER=g
HDFS_SECONDARYNAMENODE_USER=g

start-yarn.sh和stop-yarn.sh文件,添加下列参数:

YARN_RESOURCEMANAGER_USER=g
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=g

有时候datanote会运行不起来这就需要去将hdfs中的data文件删掉

rm -rf /tmp/hadoop/hdfs/data

控制台不显示

Yarn:8088 监控不显示[单机不一定需要]

/sbin/iptables -I INPUT -p tcp --dport 8088 -j ACCEPT 

单点

mkdir input
cp etc/hadoop/*.xml input
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.1.jar grep input output 'dfs[a-z.]+'
cat output/*

分布式

SSH免密登陆

/etc/ssh/sshd_config,去掉注释,或者添加

#RSAAuthentication yes
#PubkeyAuthentication yes

HDFS常用命令

#显示/下的所有文件夹信息
#hadoop fs -ls /
$H/bin/hdfs dfs -ls /

#递归显示所有文件夹和子文件(夹)
hadoop fs -lsr

#创建/user/hadoop目录
hadoop fs -mkdir /user/hadoop

#把a.txt放到集群/user/hadoop/文件夹下
hadoop fs -put a.txt /user/hadoop/

#把集群上的/user/hadoop/a.txt拉到本地/目录下
hadoop fs -get /user/hadoop/a.txt /

#集群上复制文件
hadoop fs -cp src dst

#集群上移动文件
hadoop fs -mv src dst

#查看集群上文件/user/hadoop/a.txt的内容
hadoop fs -cat /user/hadoop/a.txt

#删除集群上/user/hadoop/a.txt文件
hadoop fs -rm /user/hadoop/a.txt

#删除目录和目录下所有文件
hadoop fs -rmr /user/hadoop/a.txt

#与hadoop fs -put功能类似
hadoop fs -copyFromLocal localsrc dst 

#将本地文件上传到hdfs,同时删除本地文件
hadoop fs -moveFromLocal localsrc dst 

  1. https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html ↩︎

  2. https://blog.csdn.net/CoffeeAndIce/article/details/78879151 ↩︎

posted @ 2021-01-20 15:24  庵摩罗果  阅读(96)  评论(0)    收藏  举报