hadoop docker
1.ubuntu
docker pull ubuntu:14.04
docker run -it ubuntu:14.04
2.升级apt-get apt-get update
4.安装vim apt-get install vim
5.替换apt-get镜像源 vim /etc/apt/sources.list
全部替换为如下内容
deb-src http://archive.ubuntu.com/ubuntu xenial main restricted #Added by software-properties
deb http://mirrors.aliyun.com/ubuntu/ xenial main restricted
deb-src http://mirrors.aliyun.com/ubuntu/ xenial main restricted multiverse universe #Added by software-properties
deb http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted
deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted multiverse universe #Added by software-properties
deb http://mirrors.aliyun.com/ubuntu/ xenial universe
deb http://mirrors.aliyun.com/ubuntu/ xenial-updates universe
deb http://mirrors.aliyun.com/ubuntu/ xenial multiverse
deb http://mirrors.aliyun.com/ubuntu/ xenial-updates multiverse
deb http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse
deb-src http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted universe multiverse #Added by software-properties
deb http://archive.canonical.com/ubuntu xenial partner
deb-src http://archive.canonical.com/ubuntu xenial partner
deb http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted
deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted multiverse universe #Added by software-properties
deb http://mirrors.aliyun.com/ubuntu/ xenial-security universe
deb http://mirrors.aliyun.com/ubuntu/ xenial-security multiverse
6.重新升级apt-get apt-get update
7.安装 opened-8-jdk
sudo apt-get install openjdk-8-jdk
update-alternatives --config java
8.安装wget apt-get install wget
9.创建hadoop目录 mkdir -p soft/apache/hadoop/
10.进入hadoop目录 cd soft/apache/hadoop
11.下载hadoop wget http://mirrors.sonic.net/apache/hadoop/common/hadoop-2.7.7/hadoop-2.7.7.tar.gz
12.解压hadoop tar -xvzf Hadoop-2.7.7.tar.gz
13.配置环境变量 vim ~/.bashrc
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export HADOOP_HOME=/soft/apache/hadoop/hadoop-2.7.7
export HADOOP_CONFIG_HOME=$HADOOP_HOME/etc/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
14.重启配置文件 source ~/.bashrc
15.创建文件夹
cd $HADOOP_HOME
mkdir tmp
mkdir namenode
mkdir datanode
16.修改配置文件
cd $HADOOP_CONFIG_HOME
vim core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/soft/apache/hadoop/hadoop-2.7.7/tmp</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://master:9000</value>
<final>true</final>
</property>
</configuration>
vim hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
<final>true</final>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/soft/apache/hadoop/hadoop-2.7.7/namenode</value>
<final>true</final>
</property>
<property>
<name>dfs.datanode.name.dir</name>
<value>/soft/apache/hadoop/hadoop-2.7.7/datanode</value>
<final>true</final>
</property>
</configuration>
cp mapred-site.xml.template mapred-site.xml
vim mapred-site.xml
<configuration>
<property>
<name>mapred.job.tarcker</name>
<value>master:9001</value>
</property>
</configuration>
17.修改hadoop环境变量
vim hadoop-env.sh
在该文件最后加入:
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
18.刷新数据源
hadoop namenode -format
19.安装ssh
apt-get install ssh
20.并将ssh自启动写入配置文件
vim ~/.bashrc
在该文件最后写入
/usr/sbin/sshd
创建sshd目录
mkdir -p ~/var/run/sshd
21.生成访问密钥
cd ~/
ssh-keygen -t rsa -P '' -f ~/.ssh/id_dsa
cd .ssh
cat id_dsa.pub >> authorized_keys
22.修改ssh配置
ssh_config文件
vim etc/ssh/ssh_config
StrictHostKeyChecking no #将ask改为no
sshd_config文件
vim etc/ssh/sshd_config
#禁用密码验证
PasswordAuthentication no
#启用密钥验证
RSAAuthentication yes
PubkeyAuthentication yes
AuthorizedKeysFile .ssh/authorized_keys
23.退出容器,进入docker控制台,保存当前容器
docker commit xxx ubuntu:hadoop
docker run -ti --name ubuntu-hadoop-master -p 50070:50070 -p 8088:8088 -h master ubuntu:hadoop
其中xxx为容器id
24.启动matser、slave1、slave2,三个容器
docker run -ti --name ubuntu-hadoop-master -p 50070:50070 -p 8088:8088 -h master ubuntu:hadoop
docker run -ti --name ubuntu-hadoop-slave1 -p 50070:50070 -p 8088:8088 -h slave1 ubuntu:hadoop
docker run -ti --name ubuntu-hadoop-slave2 -p 50070:50070 -p 8088:8088 -h slave2 ubuntu:hadoop
25.修改每个容器的host文件
对matser、slave1、slave2里的host文件,分别加入其他两个容器的ip
vim /etc/hosts
127.0.0.1 localhost
::1 localhost ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
172.17.0.2 master
172.17.0.3 slave1
172.17.0.4 slave2
26.修改master的slaves配置文件
cd $HADOOP_CONFIG_HOME/
vim slaves
将两个slave的name写入slaves配置文件
slave1
slave2
27.启动hadoop!
在master节点启动hadoop
start-all.sh
28.查看各节点状态
在master节点输入
hdfs dfsa
web界面的地址:
HDFS页面:50070
YARN的管理界面:8088
HistoryServer的管理界面:19888
Zookeeper的服务端口号:2181
Mysql的服务端口号:3306
Hive.server1=10000
Kafka的服务端口号:9092
azkaban界面:8443
Hbase界面:16010,60010
Spark的界面:8080
Spark的URL:7077

浙公网安备 33010602011771号