需求
在k8s部署指定Hadoop集群。参考文档
准备容器镜像
参考文档
准备文件
文件hadoop-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: hadoop-config
namespace: bigdata4
data:
core-site.xml: |
<?xml version="1.0"?>
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://namenode-0.namenode.bigdata4:9000</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://namenode-0.namenode.bigdata4:9000</value>
</property>
</configuration>
hdfs-site.xml: |
<?xml version="1.0"?>
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/hadoop/data/nn</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/hadoop/data/dn</value>
</property>
<property>
<name>dfs.namenode.rpc-address</name>
<value>namenode-0.namenode.bigdata4:9000</value>
</property>
<!-- 绑定 0.0.0.0 解决 Unresolved Address -->
<property>
<name>dfs.namenode.rpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>dfs.namenode.http-bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>true</value>
</property>
</configuration>
yarn-site.xml: |
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>resourcemanager-0.resourcemanager.bigdata4</value>
</property>
<property>
<name>yarn.resourcemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.nodemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.delete.debug-delay-sec</name>
<value>600</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
mapred-site.xml: |
<?xml version="1.0"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
</configuration>
capacity-scheduler.xml: |
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.scheduler.capacity.maximum-applications</name>
<value>10000</value>
</property>
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>0.1</value>
</property>
<property>
<name>yarn.scheduler.capacity.resource-calculator</name>
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>default</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
<value>1</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.state</name>
<value>RUNNING</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.node-locality-delay</name>
<value>40</value>
</property>
</configuration>
文件hadoop-namenode.yaml
apiVersion: v1
kind: Service
metadata:
name: namenode
namespace: bigdata4
labels:
app: hadoop-namenode
spec:
ports:
- port: 9870
name: web
- port: 9000
name: rpc
#- port: 8020
#name: ipc
clusterIP: None # 必须保留 Headless,供 StatefulSet 内部使用
selector:
app: hadoop-namenode
---
apiVersion: v1
kind: Service
metadata:
name: namenode-external # 新增外部访问专用 Service
namespace: bigdata4
labels:
app: hadoop-namenode
spec:
type: NodePort # 设置为 NodePort
ports:
- port: 9870
name: web
targetPort: 9870
- port: 9000
name: rpc
targetPort: 9000
selector:
app: hadoop-namenode
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: namenode
namespace: bigdata4
spec:
serviceName: namenode # 必须指向 Headless Service (上面的第一个 Service)
replicas: 1
selector:
matchLabels:
app: hadoop-namenode
template:
metadata:
labels:
app: hadoop-namenode
spec:
containers:
- name: namenode
image: zhuyifeiruichuang/hadoop:3.1.1
imagePullPolicy: IfNotPresent
command: ["sh", "-c"]
args:
- |
sudo mkdir -p /opt/hadoop/data/nn && \
sudo chown -R hadoop:hadoop /opt/hadoop/data && \
if [ ! -d /opt/hadoop/data/nn/current ]; then \
echo 'Formatting NameNode...' && \
hdfs namenode -format; \
fi && \
hdfs namenode
env:
- name: ENSURE_NAMENODE_DIR
value: "/opt/hadoop/data/nn"
- name: HADOOP_CONF_DIR
value: "/opt/hadoop/etc/hadoop"
ports:
- containerPort: 9870
name: web
- containerPort: 9000
name: rpc
volumeMounts:
- name: hadoop-nn-data
mountPath: /opt/hadoop/data/nn
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
volumes:
- name: hadoop-config-volume
configMap:
name: hadoop-config
volumeClaimTemplates:
- metadata:
name: hadoop-nn-data
spec:
accessModes: [ "ReadWriteOnce" ]
storageClassName: "local"
resources:
requests:
storage: 10Gi
文件hadoop-datanode.yaml
apiVersion: v1
kind: Service
metadata:
name: datanode
namespace: bigdata4
labels:
app: hadoop-datanode
spec:
ports:
- port: 9864
name: web
- port: 9866
name: data
clusterIP: None
selector:
app: hadoop-datanode
---
apiVersion: v1
kind: Service
metadata:
name: datanode-external
namespace: bigdata4
labels:
app: hadoop-datanode
spec:
type: NodePort
ports:
- port: 9864
name: web
targetPort: 9864
- port: 9866
name: data
targetPort: 9866
selector:
app: hadoop-datanode
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: datanode
namespace: bigdata4
spec:
serviceName: datanode
replicas: 1
selector:
matchLabels:
app: hadoop-datanode
template:
metadata:
labels:
app: hadoop-datanode
spec:
containers:
- name: datanode
image: zhuyifeiruichuang/hadoop:3.1.1
imagePullPolicy: IfNotPresent
command: ["sh", "-c", "sudo mkdir -p /opt/hadoop/data/dn && sudo chown -R hadoop:hadoop /opt/hadoop/data && hdfs datanode"]
env:
- name: HADOOP_CONF_DIR
value: "/opt/hadoop/etc/hadoop"
ports:
- containerPort: 9864
name: web
- containerPort: 9866
name: data
volumeMounts:
- name: hadoop-dn-data
mountPath: /opt/hadoop/data/dn
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
volumes:
- name: hadoop-config-volume
configMap:
name: hadoop-config
volumeClaimTemplates:
- metadata:
name: hadoop-dn-data
spec:
accessModes: [ "ReadWriteOnce" ]
storageClassName: "local"
resources:
requests:
storage: 20Gi
文件hadoop-resourcemanager.yaml
apiVersion: v1
kind: Service
metadata:
name: resourcemanager
namespace: bigdata4
labels:
app: hadoop-resourcemanager
spec:
ports:
- port: 8088
name: web
- port: 8032
name: rpc
clusterIP: None
selector:
app: hadoop-resourcemanager
---
apiVersion: v1
kind: Service
metadata:
name: resourcemanager-external
namespace: bigdata4
labels:
app: hadoop-resourcemanager
spec:
type: NodePort
ports:
- port: 8088
name: web
targetPort: 8088
- port: 8032
name: rpc
targetPort: 8032
selector:
app: hadoop-resourcemanager
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: resourcemanager
namespace: bigdata4
spec:
serviceName: resourcemanager
replicas: 1
selector:
matchLabels:
app: hadoop-resourcemanager
template:
metadata:
labels:
app: hadoop-resourcemanager
spec:
containers:
- name: resourcemanager
image: zhuyifeiruichuang/hadoop:3.1.1
imagePullPolicy: IfNotPresent
command: ["yarn", "resourcemanager"]
env:
- name: HADOOP_CONF_DIR
value: "/opt/hadoop/etc/hadoop"
ports:
- containerPort: 8088
- containerPort: 8032
volumeMounts:
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/capacity-scheduler.xml
subPath: capacity-scheduler.xml
volumes:
- name: hadoop-config-volume
configMap:
name: hadoop-config
文件hadoop-nodemanager.yaml
apiVersion: v1
kind: Service
metadata:
name: nodemanager
namespace: bigdata4
labels:
app: hadoop-nodemanager
spec:
ports:
- port: 8042
name: web
clusterIP: None
selector:
app: hadoop-nodemanager
---
apiVersion: v1
kind: Service
metadata:
name: nodemanager-external
namespace: bigdata4
labels:
app: hadoop-nodemanager
spec:
type: NodePort
ports:
- port: 8042
name: web
targetPort: 8042
selector:
app: hadoop-nodemanager
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: nodemanager
namespace: bigdata4
spec:
serviceName: nodemanager
replicas: 1
selector:
matchLabels:
app: hadoop-nodemanager
template:
metadata:
labels:
app: hadoop-nodemanager
spec:
containers:
- name: nodemanager
image: zhuyifeiruichuang/hadoop:3.1.1
imagePullPolicy: IfNotPresent
command: ["yarn", "nodemanager"]
env:
- name: HADOOP_CONF_DIR
value: "/opt/hadoop/etc/hadoop"
ports:
- containerPort: 8042
volumeMounts:
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
volumes:
- name: hadoop-config-volume
configMap:
name: hadoop-config
部署
kubectl create namespace bigdata4
kubectl apply -f hadoop-configmap.yaml
kubectl apply -f hadoop-namenode.yaml
kubectl apply -f hadoop-datanode.yaml
kubectl apply -f hadoop-resourcemanager.yaml
kubectl apply -f hadoop-nodemanager.yaml