先创建docker网络bigdata,后续部署kafka,flink,spark,mysql都可以直接使用这个网络进行通信
docker network create --driver bridge bigdata
docker-compose.yml
version: '3'
networks:
bigdata:
driver: bridge
external: true
services:
namenode:
image: apache/hadoop:2.10.2
hostname: namenode
restart: always
command: ["hdfs", "namenode"]
ports:
- "50070:50070"
volumes:
- ~/hadoop2/data/:/data
environment:
ENSURE_NAMENODE_DIR: "/data/dfs/name"
env_file:
- ./config
networks:
- bigdata
datanode1:
image: apache/hadoop:2.10.2
hostname: datanode1
command: ["hdfs", "datanode"]
depends_on:
- namenode
volumes:
- ~/hadoop2/data/dfs/data1:/data/dfs/data
env_file:
- ./config
networks:
- bigdata
datanode2:
image: apache/hadoop:2.10.2
hostname: datanode2
command: [ "hdfs", "datanode" ]
depends_on:
- namenode
volumes:
- ~/hadoop2/data/dfs/data2:/data/dfs/data
env_file:
- ./config
networks:
- bigdata
resourcemanager:
image: apache/hadoop:2.10.2
hostname: resourcemanager
command: [ "yarn", "resourcemanager" ]
ports:
- "8088:8088"
env_file:
- ./config
networks:
- bigdata
nodemanager1:
image: apache/hadoop:2.10.2
command: [ "yarn", "nodemanager" ]
hostname: nodemanager1
depends_on:
- resourcemanager
env_file:
- ./config
networks:
- bigdata
nodemanager2:
image: apache/hadoop:2.10.2
command: [ "yarn", "nodemanager" ]
hostname: nodemanager2
depends_on:
- resourcemanager
env_file:
- ./config
networks:
- bigdata
config文件
CORE-SITE.XML_fs.default.name=hdfs://namenode
CORE-SITE.XML_fs.defaultFS=hdfs://namenode
HDFS-SITE.XML_dfs.namenode.rpc-address=namenode:8020
HDFS-SITE.XML_dfs.replication=1
HDFS-SITE.XML_dfs.namenode.name.dir=/data/dfs/name
HDFS-SITE.XML_dfs.datanode.data.dir=/data/dfs/data
MAPRED-SITE.XML_mapreduce.framework.name=yarn
MAPRED-SITE.XML_yarn.app.mapreduce.am.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
MAPRED-SITE.XML_mapreduce.map.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
MAPRED-SITE.XML_mapreduce.reduce.env=HADOOP_MAPRED_HOME=$HADOOP_HOME
YARN-SITE.XML_yarn.resourcemanager.hostname=resourcemanager
YARN-SITE.XML_yarn.nodemanager.pmem-check-enabled=false
YARN-SITE.XML_yarn.nodemanager.delete.debug-delay-sec=600
YARN-SITE.XML_yarn.nodemanager.vmem-check-enabled=false
YARN-SITE.XML_yarn.nodemanager.aux-services=mapreduce_shuffle
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-applications=10000
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-am-resource-percent=0.1
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.resource-calculator=org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.queues=default
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.capacity=100
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.user-limit-factor=1
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.maximum-capacity=100
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.state=RUNNING
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_submit_applications=*
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_administer_queue=*
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.node-locality-delay=40
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings=
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings-override.enable=false
ENSURE_NAMENODE_DIR在namenode的environment有配置,表示格式化namenode目录(hdfs namenode -format -force) 如果指定了ENSURE_NAMENODE_CLUSTERID,可以自定义clusterid。
hadoop的一些参数:hadoop.apache.org/docs/r2.10.…
设置hadoop home的环境变量,config文件中会使用到,不是本机的路径,是docker 容器的hadoop的home路径
export HADOOP_HOME=/opt/hadoop
启动服务
docker-compose -p hadoop2 up -d
删除服务
docker-compose -p hadoop2 down
如果是安装hadoop 3.x, 理论上修改镜像为apache/hadoop:3.3.6即可, 请参考hadoop/docker-compose.yaml at docker-hadoop-3 · apache/hadoop · GitHub