软件部署-使用k8s部署Hadoop任意版本

40 阅读3分钟

需求

在k8s部署指定Hadoop集群。参考文档

准备容器镜像

参考文档

准备文件

文件hadoop-configmap.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: hadoop-config
  namespace: bigdata4
data:
  core-site.xml: |
    <?xml version="1.0"?>
    <configuration>
      <property>
        <name>fs.default.name</name>
        <value>hdfs://namenode-0.namenode.bigdata4:9000</value>
      </property>
      <property>
        <name>fs.defaultFS</name>
        <value>hdfs://namenode-0.namenode.bigdata4:9000</value>
      </property>
    </configuration>

  hdfs-site.xml: |
    <?xml version="1.0"?>
    <configuration>
      <property>
        <name>dfs.namenode.name.dir</name>
        <value>/opt/hadoop/data/nn</value>
      </property>
      <property>
        <name>dfs.datanode.data.dir</name>
        <value>/opt/hadoop/data/dn</value>
      </property>
      <property>
        <name>dfs.namenode.rpc-address</name>
        <value>namenode-0.namenode.bigdata4:9000</value>
      </property>
      <!-- 绑定 0.0.0.0 解决 Unresolved Address -->
      <property>
        <name>dfs.namenode.rpc-bind-host</name>
        <value>0.0.0.0</value>
      </property>
      <property>
        <name>dfs.namenode.http-bind-host</name>
        <value>0.0.0.0</value>
      </property>
      <property>
        <name>dfs.replication</name>
        <value>1</value>
      </property>
      <property>
        <name>dfs.client.use.datanode.hostname</name>
        <value>true</value>
      </property>
    </configuration>

  yarn-site.xml: |
    <?xml version="1.0"?>
    <configuration>
      <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>resourcemanager-0.resourcemanager.bigdata4</value>
      </property>
      <property>
        <name>yarn.resourcemanager.bind-host</name>
        <value>0.0.0.0</value>
      </property>
      <property>
        <name>yarn.nodemanager.bind-host</name>
        <value>0.0.0.0</value>
      </property>
      <property>
        <name>yarn.nodemanager.pmem-check-enabled</name>
        <value>false</value>
      </property>
      <property>
        <name>yarn.nodemanager.delete.debug-delay-sec</name>
        <value>600</value>
      </property>
      <property>
        <name>yarn.nodemanager.vmem-check-enabled</name>
        <value>false</value>
      </property>
      <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
      </property>
    </configuration>

  mapred-site.xml: |
    <?xml version="1.0"?>
    <configuration>
      <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
      </property>
      <property>
        <name>yarn.app.mapreduce.am.env</name>
        <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
      </property>
      <property>
        <name>mapreduce.map.env</name>
        <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
      </property>
      <property>
        <name>mapreduce.reduce.env</name>
        <value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
      </property>
    </configuration>
    
  capacity-scheduler.xml: |
    <?xml version="1.0"?>
    <configuration>
      <property>
        <name>yarn.scheduler.capacity.maximum-applications</name>
        <value>10000</value>
      </property>
      <property>
        <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
        <value>0.1</value>
      </property>
      <property>
        <name>yarn.scheduler.capacity.resource-calculator</name>
        <value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
      </property>
      <property>
        <name>yarn.scheduler.capacity.root.queues</name>
        <value>default</value>
      </property>
      <property>
        <name>yarn.scheduler.capacity.root.default.capacity</name>
        <value>100</value>
      </property>
      <property>
        <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
        <value>1</value>
      </property>
      <property>
        <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
        <value>100</value>
      </property>
      <property>
        <name>yarn.scheduler.capacity.root.default.state</name>
        <value>RUNNING</value>
      </property>
      <property>
        <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
        <value>*</value>
      </property>
      <property>
        <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
        <value>*</value>
      </property>
      <property>
        <name>yarn.scheduler.capacity.node-locality-delay</name>
        <value>40</value>
      </property>
    </configuration>

文件hadoop-namenode.yaml

apiVersion: v1
kind: Service
metadata:
  name: namenode
  namespace: bigdata4
  labels:
    app: hadoop-namenode
spec:
  ports:
    - port: 9870
      name: web
    - port: 9000
      name: rpc
    #- port: 8020
      #name: ipc
  clusterIP: None  # 必须保留 Headless,供 StatefulSet 内部使用
  selector:
    app: hadoop-namenode
---
apiVersion: v1
kind: Service
metadata:
  name: namenode-external  # 新增外部访问专用 Service
  namespace: bigdata4
  labels:
    app: hadoop-namenode
spec:
  type: NodePort  # 设置为 NodePort
  ports:
    - port: 9870
      name: web
      targetPort: 9870
    - port: 9000
      name: rpc
      targetPort: 9000
  selector:
    app: hadoop-namenode
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: namenode
  namespace: bigdata4
spec:
  serviceName: namenode  # 必须指向 Headless Service (上面的第一个 Service)
  replicas: 1
  selector:
    matchLabels:
      app: hadoop-namenode
  template:
    metadata:
      labels:
        app: hadoop-namenode
    spec:
      containers:
      - name: namenode
        image: zhuyifeiruichuang/hadoop:3.1.1
        imagePullPolicy: IfNotPresent
        command: ["sh", "-c"]
        args:
          - |
            sudo mkdir -p /opt/hadoop/data/nn && \
            sudo chown -R hadoop:hadoop /opt/hadoop/data && \
            if [ ! -d /opt/hadoop/data/nn/current ]; then \
              echo 'Formatting NameNode...' && \
              hdfs namenode -format; \
            fi && \
            hdfs namenode
        env:
          - name: ENSURE_NAMENODE_DIR
            value: "/opt/hadoop/data/nn"
          - name: HADOOP_CONF_DIR
            value: "/opt/hadoop/etc/hadoop"
        ports:
          - containerPort: 9870
            name: web
          - containerPort: 9000
            name: rpc
        volumeMounts:
          - name: hadoop-nn-data
            mountPath: /opt/hadoop/data/nn
          - name: hadoop-config-volume
            mountPath: /opt/hadoop/etc/hadoop/core-site.xml
            subPath: core-site.xml
          - name: hadoop-config-volume
            mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
            subPath: hdfs-site.xml
      volumes:
        - name: hadoop-config-volume
          configMap:
            name: hadoop-config
  volumeClaimTemplates:
  - metadata:
      name: hadoop-nn-data
    spec:
      accessModes: [ "ReadWriteOnce" ]
      storageClassName: "local"
      resources:
        requests:
          storage: 10Gi

文件hadoop-datanode.yaml

apiVersion: v1
kind: Service
metadata:
  name: datanode
  namespace: bigdata4
  labels:
    app: hadoop-datanode
spec:
  ports:
    - port: 9864
      name: web
    - port: 9866
      name: data
  clusterIP: None
  selector:
    app: hadoop-datanode
---
apiVersion: v1
kind: Service
metadata:
  name: datanode-external
  namespace: bigdata4
  labels:
    app: hadoop-datanode
spec:
  type: NodePort
  ports:
    - port: 9864
      name: web
      targetPort: 9864
    - port: 9866
      name: data
      targetPort: 9866
  selector:
    app: hadoop-datanode
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: datanode
  namespace: bigdata4
spec:
  serviceName: datanode
  replicas: 1
  selector:
    matchLabels:
      app: hadoop-datanode
  template:
    metadata:
      labels:
        app: hadoop-datanode
    spec:
      containers:
      - name: datanode
        image: zhuyifeiruichuang/hadoop:3.1.1
        imagePullPolicy: IfNotPresent
        command: ["sh", "-c", "sudo mkdir -p /opt/hadoop/data/dn && sudo chown -R hadoop:hadoop /opt/hadoop/data && hdfs datanode"]
        env:
          - name: HADOOP_CONF_DIR
            value: "/opt/hadoop/etc/hadoop"
        ports:
          - containerPort: 9864
            name: web
          - containerPort: 9866
            name: data
        volumeMounts:
          - name: hadoop-dn-data
            mountPath: /opt/hadoop/data/dn
          - name: hadoop-config-volume
            mountPath: /opt/hadoop/etc/hadoop/core-site.xml
            subPath: core-site.xml
          - name: hadoop-config-volume
            mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
            subPath: hdfs-site.xml
          - name: hadoop-config-volume
            mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
            subPath: yarn-site.xml
      volumes:
        - name: hadoop-config-volume
          configMap:
            name: hadoop-config
  volumeClaimTemplates:
  - metadata:
      name: hadoop-dn-data
    spec:
      accessModes: [ "ReadWriteOnce" ]
      storageClassName: "local"
      resources:
        requests:
          storage: 20Gi

文件hadoop-resourcemanager.yaml

apiVersion: v1
kind: Service
metadata:
  name: resourcemanager
  namespace: bigdata4
  labels:
    app: hadoop-resourcemanager
spec:
  ports:
    - port: 8088
      name: web
    - port: 8032
      name: rpc
  clusterIP: None
  selector:
    app: hadoop-resourcemanager
---
apiVersion: v1
kind: Service
metadata:
  name: resourcemanager-external
  namespace: bigdata4
  labels:
    app: hadoop-resourcemanager
spec:
  type: NodePort
  ports:
    - port: 8088
      name: web
      targetPort: 8088
    - port: 8032
      name: rpc
      targetPort: 8032
  selector:
    app: hadoop-resourcemanager
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: resourcemanager
  namespace: bigdata4
spec:
  serviceName: resourcemanager
  replicas: 1
  selector:
    matchLabels:
      app: hadoop-resourcemanager
  template:
    metadata:
      labels:
        app: hadoop-resourcemanager
    spec:
      containers:
      - name: resourcemanager
        image: zhuyifeiruichuang/hadoop:3.1.1
        imagePullPolicy: IfNotPresent
        command: ["yarn", "resourcemanager"]
        env:
          - name: HADOOP_CONF_DIR
            value: "/opt/hadoop/etc/hadoop"
        ports:
          - containerPort: 8088
          - containerPort: 8032
        volumeMounts:
          - name: hadoop-config-volume
            mountPath: /opt/hadoop/etc/hadoop/core-site.xml
            subPath: core-site.xml
          - name: hadoop-config-volume
            mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
            subPath: hdfs-site.xml
          - name: hadoop-config-volume
            mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
            subPath: yarn-site.xml
          - name: hadoop-config-volume
            mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
            subPath: mapred-site.xml
          - name: hadoop-config-volume
            mountPath: /opt/hadoop/etc/hadoop/capacity-scheduler.xml
            subPath: capacity-scheduler.xml
      volumes:
        - name: hadoop-config-volume
          configMap:
            name: hadoop-config

文件hadoop-nodemanager.yaml

apiVersion: v1
kind: Service
metadata:
  name: nodemanager
  namespace: bigdata4
  labels:
    app: hadoop-nodemanager
spec:
  ports:
    - port: 8042
      name: web
  clusterIP: None
  selector:
    app: hadoop-nodemanager
---
apiVersion: v1
kind: Service
metadata:
  name: nodemanager-external
  namespace: bigdata4
  labels:
    app: hadoop-nodemanager
spec:
  type: NodePort
  ports:
    - port: 8042
      name: web
      targetPort: 8042
  selector:
    app: hadoop-nodemanager
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: nodemanager
  namespace: bigdata4
spec:
  serviceName: nodemanager
  replicas: 1
  selector:
    matchLabels:
      app: hadoop-nodemanager
  template:
    metadata:
      labels:
        app: hadoop-nodemanager
    spec:
      containers:
      - name: nodemanager
        image: zhuyifeiruichuang/hadoop:3.1.1
        imagePullPolicy: IfNotPresent
        command: ["yarn", "nodemanager"]
        env:
          - name: HADOOP_CONF_DIR
            value: "/opt/hadoop/etc/hadoop"
        ports:
          - containerPort: 8042
        volumeMounts:
          - name: hadoop-config-volume
            mountPath: /opt/hadoop/etc/hadoop/core-site.xml
            subPath: core-site.xml
          - name: hadoop-config-volume
            mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
            subPath: hdfs-site.xml
          - name: hadoop-config-volume
            mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
            subPath: yarn-site.xml
          - name: hadoop-config-volume
            mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
            subPath: mapred-site.xml
      volumes:
        - name: hadoop-config-volume
          configMap:
            name: hadoop-config

部署

kubectl create namespace bigdata4
kubectl apply -f hadoop-configmap.yaml
kubectl apply -f hadoop-namenode.yaml
kubectl apply -f hadoop-datanode.yaml
kubectl apply -f hadoop-resourcemanager.yaml
kubectl apply -f hadoop-nodemanager.yaml