Docker compose 基于官方的镜像部署Apache Hadoop 2.10

923 阅读1分钟

Docker Hub

先创建docker网络bigdata,后续部署kafka,flink,spark,mysql都可以直接使用这个网络进行通信
docker network create --driver bridge bigdata

docker-compose.yml

version: '3'

networks:
  bigdata:
    driver: bridge
    external: true

services:
  namenode:
    image: apache/hadoop:2.10.2
    hostname: namenode
    restart: always
    command: ["hdfs", "namenode"]
    ports:
      - "50070:50070"
    volumes:
      - ~/hadoop2/data/:/data
    environment:
      ENSURE_NAMENODE_DIR: "/data/dfs/name"
    env_file:
      - ./config
    networks:
      - bigdata
  datanode1:
    image: apache/hadoop:2.10.2
    hostname: datanode1
    command: ["hdfs", "datanode"]
    depends_on:
      - namenode
    volumes:
      - ~/hadoop2/data/dfs/data1:/data/dfs/data
    env_file:
      - ./config
    networks:
      - bigdata
  datanode2:
    image: apache/hadoop:2.10.2
    hostname: datanode2
    command: [ "hdfs", "datanode" ]
    depends_on:
      - namenode
    volumes:
      - ~/hadoop2/data/dfs/data2:/data/dfs/data
    env_file:
      - ./config
    networks:
      - bigdata
  resourcemanager:
    image: apache/hadoop:2.10.2
    hostname: resourcemanager
    command: [ "yarn", "resourcemanager" ]
    ports:
      - "8088:8088"
    env_file:
      - ./config
    networks:
      - bigdata
  nodemanager1:
      image: apache/hadoop:2.10.2
      command: [ "yarn", "nodemanager" ]
      hostname: nodemanager1
      depends_on:
        - resourcemanager
      env_file:
        - ./config
      networks:
        - bigdata
  nodemanager2:
    image: apache/hadoop:2.10.2
    command: [ "yarn", "nodemanager" ]
    hostname: nodemanager2
    depends_on:
      - resourcemanager
    env_file:
      - ./config
    networks:
      - bigdata

config文件

CORE-SITE.XML_fs.default.name=hdfs://namenode
CORE-SITE.XML_fs.defaultFS=hdfs://namenode
HDFS-SITE.XML_dfs.namenode.rpc-address=namenode:8020
HDFS-SITE.XML_dfs.replication=1
HDFS-SITE.XML_dfs.namenode.name.dir=/data/dfs/name
HDFS-SITE.XML_dfs.datanode.data.dir=/data/dfs/data
MAPRED-SITE.XML_mapreduce.framework.name=yarn
MAPRED-SITE.XML_yarn.app.mapreduce.am.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
MAPRED-SITE.XML_mapreduce.map.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
MAPRED-SITE.XML_mapreduce.reduce.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
YARN-SITE.XML_yarn.resourcemanager.hostname=resourcemanager
YARN-SITE.XML_yarn.nodemanager.pmem-check-enabled=false
YARN-SITE.XML_yarn.nodemanager.delete.debug-delay-sec=600
YARN-SITE.XML_yarn.nodemanager.vmem-check-enabled=false
YARN-SITE.XML_yarn.nodemanager.aux-services=mapreduce_shuffle
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-applications=10000
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-am-resource-percent=0.1
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.resource-calculator=org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.queues=default
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.capacity=100
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.user-limit-factor=1
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.maximum-capacity=100
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.state=RUNNING
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_submit_applications=*
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_administer_queue=*
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.node-locality-delay=40
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings=
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings-override.enable=false

ENSURE_NAMENODE_DIR在namenode的environment有配置,表示格式化namenode目录(hdfs namenode -format -force) 如果指定了ENSURE_NAMENODE_CLUSTERID,可以自定义clusterid。

hadoop的一些参数:hadoop.apache.org/docs/r2.10.…

设置hadoop home的环境变量,config文件中会使用到,不是本机的路径,是docker 容器的hadoop的home路径
export HADOOP_HOME=/opt/hadoop
启动服务
docker-compose -p hadoop2 up -d
删除服务
docker-compose -p hadoop2 down

如果是安装hadoop 3.x, 理论上修改镜像为apache/hadoop:3.3.6即可, 请参考hadoop/docker-compose.yaml at docker-hadoop-3 · apache/hadoop · GitHub