需求
使用docker快速部署Hadoop,且将数据持久化存储。
准备配置文件
准备文件compose.yaml
注意:
image可替换为自定义构建的镜像,也可以使用docker hub已有的镜像。
volumes:
hadoop-nn-data:
hadoop-dn-data:
services:
namenode:
build: .
image: zhuyifeiruichuang/hadoop:3.1.1
hostname: namenode
command: ["sh", "-c", "sudo mkdir -p /opt/hadoop/data/nn && sudo chown -R hadoop:hadoop /opt/hadoop/data && if [ ! -d /opt/hadoop/data/nn/current ]; then echo 'Formatting NameNode...' && hdfs namenode -format; fi && hdfs namenode"]
ports:
- 9870:9870
- 9000:9000
env_file:
- ./config
environment:
ENSURE_NAMENODE_DIR: "/opt/hadoop/data/nn"
volumes:
- hadoop-nn-data:/opt/hadoop/data/nn
restart: unless-stopped
datanode:
build: .
image: zhuyifeiruichuang/hadoop:3.1.1
command: ["sh", "-c", "sudo mkdir -p /opt/hadoop/data/dn && sudo chown -R hadoop:hadoop /opt/hadoop/data && hdfs datanode"]
env_file:
- ./config
ports:
- 9866:9866
volumes:
- hadoop-dn-data:/opt/hadoop/data/dn
depends_on:
- namenode
restart: unless-stopped
resourcemanager:
build: .
image: zhuyifeiruichuang/hadoop:3.1.1
hostname: resourcemanager
command: ["yarn", "resourcemanager"]
ports:
- 8088:8088
- 8032:8032
env_file:
- ./config
depends_on:
- namenode
restart: unless-stopped
nodemanager:
build: .
hostname: nodemanager
image: zhuyifeiruichuang/hadoop:3.1.1
command: ["yarn", "nodemanager"]
env_file:
- ./config
ports:
- 8042:8042
depends_on:
- resourcemanager
- namenode
restart: unless-stopped
准备文件config
CORE-SITE.XML_fs.default.name=hdfs://namenode
CORE-SITE.XML_fs.defaultFS=hdfs://namenode
HDFS-SITE.XML_dfs.namenode.name.dir=/opt/hadoop/data/nn
HDFS-SITE.XML_dfs.datanode.data.dir=/opt/hadoop/data/dn
HDFS-SITE.XML_dfs.namenode.rpc-address=namenode:8020
HDFS-SITE.XML_dfs.replication=1
MAPRED-SITE.XML_mapreduce.framework.name=yarn
MAPRED-SITE.XML_yarn.app.mapreduce.am.env=HADOOP_MAPRED_HOME=/opt/hadoop
MAPRED-SITE.XML_mapreduce.map.env=HADOOP_MAPRED_HOME=/opt/hadoop
MAPRED-SITE.XML_mapreduce.reduce.env=HADOOP_MAPRED_HOME=/opt/hadoop
YARN-SITE.XML_yarn.resourcemanager.hostname=resourcemanager
YARN-SITE.XML_yarn.nodemanager.pmem-check-enabled=false
YARN-SITE.XML_yarn.nodemanager.delete.debug-delay-sec=600
YARN-SITE.XML_yarn.nodemanager.vmem-check-enabled=false
YARN-SITE.XML_yarn.nodemanager.aux-services=mapreduce_shuffle
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-applications=10000
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.maximum-am-resource-percent=0.1
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.resource-calculator=org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.queues=default
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.capacity=100
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.user-limit-factor=1
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.maximum-capacity=100
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.state=RUNNING
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_submit_applications=*
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.root.default.acl_administer_queue=*
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.node-locality-delay=40
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings=
CAPACITY-SCHEDULER.XML_yarn.scheduler.capacity.queue-mappings-override.enable=false
部署容器
docker compose -f compose.yaml up -d