docker之hdfs安装

  一、Hadoop分布式文件系统(HDFS)是指被设计成适合运行在通用硬件(commodity hardware)上的分布式文件系统(Distributed File System)。它和现有的分布式文件系统有很多共同点。但同时,它和其他的分布式文件系统的区别也是很明显的。HDFS是一个高度容错性的系统,适合部署在廉价的机器上。HDFS能提供高吞吐量的数据访问,非常适合大规模数据集上的应用。HDFS放宽了一部分POSIX约束,来实现流式读取文件系统数据的目的。HDFS在最开始是作为Apache Nutch搜索引擎项目的基础架构而开发的。HDFS是Apache Hadoop Core项目的一部分。

  二、安装:

version: '3.8'

x-hdfs-common: &hdfs-common
  image: gradiant/hdfs:3.2.2
  restart: always
  privileged: true
  user: root
  network_mode: host
  extra_hosts:
    - hdfs-namenode:127.0.0.1
    - hdfs-datanode:127.0.0.1

services:
  hdfs-namenode:
    <<: *hdfs-common
    container_name: hdfs-namenode
    hostname: hdfs-namenode
    environment:
      - TZ=Asia/Shanghai
      - CORE_CONF_fs_defaultFS=hdfs://hdfs-namenode:8020
      - HDFS_CONF_dfs_replication=1
      - HDFS_CONF_dfs_permissions_enabled=false
    volumes:
      - /opt/apps/data/hdfs/name:/dfs/name
    command: namenode
  hdfs-datanode:
    <<: *hdfs-common
    container_name: hdfs-datanode
    hostname: hdfs-datanode
    environment:
      - TZ=Asia/Shanghai
      - CORE_CONF_fs_defaultFS=hdfs://hdfs-namenode:8020
      - HDFS_CONF_dfs_replication=1
    volumes:
      - /opt/apps/data/hdfs/data:/dfs/data
    command: datanode
    depends_on:
      - hdfs-namenode

  三、在访问的机器上面配置hosts

127.0.0.1 hdfs-namenode
127.0.0.1 hdfs-datanode

   四、建议使用host模式,除非在一个网络下。

   五、集群模式安装,HA方式,使用apache/hadoop:3.3.6

version: "3.8"

x-zk-common: &zk-common
  image: zookeeper:3.8
  restart: always
  privileged: true
  networks:
    - xbd

x-hadoop-common: &hadoop-common
  image: apache/hadoop:3.3.6
  restart: always
  user: root
  privileged: true
  env_file:
    - hdfs.conf
  networks:
    - xbd

services:
  xbd-zk-1:
    <<: *zk-common
    container_name: xbd-zk-1
    hostname: xbd-zk-1
    ports:
      - 2181:2181
    environment:
      - ZOO_MY_ID=1
      - ZOO_CLIENT_PORT=2181
      - ZOO_ADMINSERVER_ENABLED=false
      - ZOO_SERVERS=server.1=xbd-zk-1:2888:3888;2181 server.2=xbd-zk-2:2889:3889;2182 server.3=xbd-zk-3:2890:3890;2183
    volumes:
      - /opt/apps/data/zookeeper/zk-1:/data
  xbd-zk-2:
    <<: *zk-common
    container_name: xbd-zk-2
    hostname: xbd-zk-2
    ports:
      - 2182:2182
    environment:
      - ZOO_MY_ID=2
      - ZOO_CLIENT_PORT=2182
      - ZOO_ADMINSERVER_ENABLED=false
      - ZOO_SERVERS=server.1=xbd-zk-1:2888:3888;2181 server.2=xbd-zk-2:2889:3889;2182 server.3=xbd-zk-3:2890:3890;2183
    volumes:
      - /opt/apps/data/zookeeper/zk-2:/data
  xbd-zk-3:
    <<: *zk-common
    container_name: xbd-zk-3
    hostname: xbd-zk-3
    ports:
      - 2183:2183
    environment:
      - ZOO_MY_ID=3
      - ZOO_CLIENT_PORT=2183
      - ZOO_ADMINSERVER_ENABLED=false
      - ZOO_SERVERS=server.1=xbd-zk-1:2888:3888;2181 server.2=xbd-zk-2:2889:3889;2182 server.3=xbd-zk-3:2890:3890;2183
    volumes:
      - /opt/apps/data/zookeeper/zk-3:/data
  xbd-jn-1:
    <<: *hadoop-common
    container_name: xbd-jn-1
    hostname: xbd-jn-1
    environment:
      - HDFS-SITE.XML_dfs.journalnode.rpc-address=0.0.0.0:8485
      - HDFS-SITE.XML_dfs.journalnode.http-address=0.0.0.0:8480
      - HDFS-SITE.XML_dfs.journalnode.edits.dir=/data
    volumes:
      - /opt/apps/data/hdfs/jn-1:/data
    command:
      - /bin/sh
      - -c
      - |
        hdfs journalnode
  xbd-jn-2:
    <<: *hadoop-common
    container_name: xbd-jn-2
    hostname: xbd-jn-2
    environment:
      - HDFS-SITE.XML_dfs.journalnode.rpc-address=0.0.0.0:8486
      - HDFS-SITE.XML_dfs.journalnode.http-address=0.0.0.0:8481
      - HDFS-SITE.XML_dfs.journalnode.edits.dir=/data
    volumes:
      - /opt/apps/data/hdfs/jn-2:/data
    command:
      - /bin/sh
      - -c
      - |
        hdfs journalnode
  xbd-jn-3:
    <<: *hadoop-common
    container_name: xbd-jn-3
    hostname: xbd-jn-3
    environment:
      - HDFS-SITE.XML_dfs.journalnode.rpc-address=0.0.0.0:8487
      - HDFS-SITE.XML_dfs.journalnode.http-address=0.0.0.0:8482
      - HDFS-SITE.XML_dfs.journalnode.edits.dir=/data
    volumes:
      - /opt/apps/data/hdfs/jn-3:/data
    command:
      - /bin/sh
      - -c
      - |
        hdfs journalnode
  xbd-nn-1:
    <<: *hadoop-common
    container_name: xbd-nn-1
    hostname: xbd-nn-1
    ports:
      - 9870:9870
    environment:
      - HDFS-SITE.XML_dfs.namenode.rpc-address=0.0.0.0:8020
      - HDFS-SITE.XML_dfs.namenode.http-address=0.0.0.0:9870
      - HDFS-SITE.XML_dfs.namenode.name.dir=/data
    volumes:
      - /opt/apps/data/hdfs/nn-1:/data
    command:
      - /bin/sh
      - -c
      - |
        if [[ -e '/data/complete' ]]; then 
          /opt/hadoop/sbin/hadoop-daemon.sh start zkfc 
          hdfs namenode 
        else 
          sleep 120s 
          hdfs namenode -format
          hdfs zkfc -formatZK 
          mkdir /data -p && touch /data/complete
        fi
        

  xbd-nn-2:
    <<: *hadoop-common
    container_name: xbd-nn-2
    hostname: xbd-nn-2
    ports:
      - 9871:9871
    environment:
      - HDFS-SITE.XML_dfs.namenode.rpc-address=0.0.0.0:8021
      - HDFS-SITE.XML_dfs.namenode.http-address=0.0.0.0:9871
      - HDFS-SITE.XML_dfs.namenode.name.dir=/data
    volumes:
      - /opt/apps/data/hdfs/nn-2:/data
    command:
      - /bin/sh
      - -c
      - |
        if [[ -e '/data/complete' ]]; then 
          /opt/hadoop/sbin/hadoop-daemon.sh start zkfc 
          hdfs namenode 
        else 
          sleep 240s 
          hdfs namenode -bootstrapStandby 
          mkdir /data -p && touch /data/complete
        fi
        

  xbd-dn-1:
    <<: *hadoop-common
    container_name: xbd-dn-1
    hostname: xbd-dn-1
    environment:
      - HDFS-SITE.XML_dfs.datanode.http-address=0.0.0.0:9864
      - HDFS-SITE.XML_dfs.datanode.address=0.0.0.0:9866
      - HDFS-SITE.XML_dfs.datanode.ipc-address=0.0.0.0:9867
      - HDFS-SITE.XML_dfs.datanode.data.dir=/data
    volumes:
      - /opt/apps/data/hdfs/dn-1:/data
    command:
      - /bin/sh
      - -c
      - |
        hdfs datanode
  xbd-dn-2:
    <<: *hadoop-common
    container_name: xbd-dn-2
    hostname: xbd-dn-2
    environment:
      - HDFS-SITE.XML_dfs.datanode.http-address=0.0.0.0:9874
      - HDFS-SITE.XML_dfs.datanode.address=0.0.0.0:9876
      - HDFS-SITE.XML_dfs.datanode.ipc-address=0.0.0.0:9877
      - HDFS-SITE.XML_dfs.datanode.data.dir=/data
    volumes:
      - /opt/apps/data/hdfs/dn-2:/data
    command:
      - /bin/sh
      - -c
      - |
        hdfs datanode
  xbd-dn-3:
    <<: *hadoop-common
    container_name: xbd-dn-3
    hostname: xbd-dn-3
    environment:
      - HDFS-SITE.XML_dfs.datanode.http-address=0.0.0.0:9884
      - HDFS-SITE.XML_dfs.datanode.address=0.0.0.0:9886
      - HDFS-SITE.XML_dfs.datanode.ipc-address=0.0.0.0:9887
      - HDFS-SITE.XML_dfs.datanode.data.dir=/data
    volumes:
      - /opt/apps/data/hdfs/dn-3:/data
    command:
      - /bin/sh
      - -c
      - |
        hdfs datanode

networks:
  xbd:
    external: true

  公共配置文件:hdfs.conf(其实就是公共变量)

CORE-SITE.XML_fs.defaultFS=hdfs://hdfs-cluster
CORE-SITE.XML_ha.zookeeper.quorum=xbd-zk-1:2181,xbd-zk-2:2182,xbd-zk-3:2183
HDFS-SITE.XML_dfs.nameservices=hdfs-cluster
HDFS-SITE.XML_dfs.ha.namenodes.hdfs-cluster=xbd-nn-1,xbd-nn-2
HDFS-SITE.XML_dfs.namenode.rpc-address.hdfs-cluster.xbd-nn-1=xbd-nn-1:8020
HDFS-SITE.XML_dfs.namenode.http-address.hdfs-cluster.xbd-nn-1=xbd-nn-1:9870
HDFS-SITE.XML_dfs.namenode.rpc-address.hdfs-cluster.xbd-nn-2=xbd-nn-2:8021
HDFS-SITE.XML_dfs.namenode.http-address.hdfs-cluster.xbd-nn-2=xbd-nn-2:9871
HDFS-SITE.XML_dfs.namenode.shared.edits.dir=qjournal://xbd-jn-1:8485;xbd-jn-2:8486;xbd-jn-3:8487/hdfs-cluster
HDFS-SITE.XML_dfs.ha.fencing.methods=shell(/bin/true)
HDFS-SITE.XML_dfs.permissions.enable=false
HDFS-SITE.XML_dfs.ha.automatic-failover.enabled=true
HDFS-SITE.XML_dfs.client.failover.proxy.provider.hdfs-cluster=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
posted @ 2025-04-14 18:38  小不点丶  阅读(145)  评论(0)    收藏  举报