kubesphere部署和Devops实战

1,369 阅读4分钟

kubesphere

KubeSphere 是在 Kubernetes 之上构建的面向云原生应用的分布式操作系统,完全开源,支持多云与多集群管理,提供全栈的 IT 自动化运维能力,简化企业的 DevOps 工作流。作为全栈的多租户容器平台,KubeSphere 提供了运维友好的向导式操作界面,帮助企业快速构建一个强大和功能丰富的容器云平台。KubeSphere 为用户提供构建企业级 Kubernetes 环境所需的多项功能,例如多云与多集群管理、Kubernetes 资源管理、DevOps、应用生命周期管理、微服务治理(服务网格)、日志查询与收集、服务与网络、多租户管理、监控告警、事件与审计查询、存储管理、访问权限控制、GPU 支持、网络策略、镜像仓库管理以及安全管理等。

高可用本地仓库部署

安装包

apt-get install -y conntrack ipvsadm ipset

节点需要固化内核参数

cat <<EOF | sudo tee -a /etc/sysctl.d/90-kubesphere.conf
vm.max_map_count = 262144
fs.may_detach_mounts = 1
net.ipv4.ip_forward = 1
vm.swappiness=1
kernel.pid_max =1000000
fs.inotify.max_user_instances=524288
EOF
sudo sysctl -p

$ systemctl status systemd-sysctl.service
● systemd-sysctl.service - Apply Kernel Variables
     Loaded: loaded (/lib/systemd/system/systemd-sysctl.service; static)
     Active: active (exited) since Sat 2024-08-31 08:22:36 CST; 2s ago
       Docs: man:systemd-sysctl.service(8)
             man:sysctl.d(5)
    Process: 10188 ExecStart=/lib/systemd/systemd-sysctl (code=exited, status=0/SUCCESS)
   Main PID: 10188 (code=exited, status=0/SUCCESS)
        CPU: 11ms

Aug 31 08:22:36 xxx systemd[1]: Starting Apply Kernel Variables...
Aug 31 08:22:36 xxx systemd-sysctl[10188]: Couldn't write '4194304' to 'net/ipv4/netfilter/ip_conntrack_max', ignoring: No such file or d>
Aug 31 08:22:36 xxx systemd[1]: Finished Apply Kernel Variables.

镜像

下载镜像(直接用我的脚本上传镜像更好)

# 获取镜像列表,可以使用aliyuncs的
curl -L -O https://github.com/kubesphere/ks-installer/releases/download/v3.4.1/images-list.txt
# 获取下载工具
curl -L -O https://github.com/kubesphere/ks-installer/releases/download/v3.4.1/offline-installation-tool.sh
chmod +x offline-installation-tool.sh
# 下载镜像
./offline-installation-tool.sh -s -l images-list.txt -d ./kubesphere-images
# 上传镜像
./offline-installation-tool.sh -l images-list.txt -d ./kubesphere-images -r harbor.XXX.com/sre

注:images-list.txt添加kubernetes的镜像,一起下载上传到私有仓库

##coredns-images
registry.cn-beijing.aliyuncs.com/kubesphereio/coredns:1.8.6
##calico-images
registry.cn-beijing.aliyuncs.com/kubesphereio/kube-controllers:v3.26.1
registry.cn-beijing.aliyuncs.com/kubesphereio/cni:v3.26.1
registry.cn-beijing.aliyuncs.com/kubesphereio/node:v3.26.1
registry.cn-beijing.aliyuncs.com/kubesphereio/pod2daemon-flexvol:v3.26.1
##openebs-images
registry.cn-beijing.aliyuncs.com/kubesphereio/provisioner-localpv:3.3.0
registry.cn-beijing.aliyuncs.com/kubesphereio/linux-utils:3.3.0
##kubesphere-images
registry.cn-beijing.aliyuncs.com/kubesphereio/kube-apiserver:v1.23.17
registry.cn-beijing.aliyuncs.com/kubesphereio/kube-proxy:v1.23.17
registry.cn-beijing.aliyuncs.com/kubesphereio/kube-controller-manager:v1.23.17
registry.cn-beijing.aliyuncs.com/kubesphereio/kube-scheduler:v1.23.17
registry.cn-beijing.aliyuncs.com/kubesphereio/pause:3.6
registry.cn-beijing.aliyuncs.com/kubesphereio/k8s-dns-node-cache:1.15.12

国内镜像修改为原始镜像再传到自己的仓库

#!/bin/bash
# 原始:images-list.txt
# 国内:images-list-al.txt
# 附加:images-list-add.txt

reg="harbor.XXX.com/sre"

# 国内镜像下载
echo "====== pull aliyun images ======"
while read line; do
    projline=$(echo $line |grep "^##")
    if [ x"$projline" == "x" ]; then
        docker pull $line
    fi
done < images-list-al.txt

# 原始镜像推送
echo "====== push harbor images ======"
while read line; do
    projline=$(echo $line |grep "^##")
    if [ x"$projline" == "x" ]; then
        # 镜像名称
        imagename=$(echo $line | awk -F"/" '{print $NF}')
        # 阿里镜像地址
        al_image=$( grep $imagename images-list-al.txt|head -n 1)
        # 如果$line没有/,则添加library/
        if [ x"echo $line |grep '/'" == "x" ]; then
            line="library/$line"
        fi
        # 如果quay.io/开头,则删除quay.io/
        if [ x"echo $line |grep 'quay.io/'" != "x" ]; then
            line=$(echo $line |sed 's/quay.io\///g')
        fi
        # 如果ghcr.io/开头,则删除ghcr.io/
        if [ x"echo $line |grep 'ghcr.io/'" != "x" ]; then
            line=$(echo $line |sed 's/ghcr.io\///g')
        fi
        docker tag $al_image $reg/$line
        docker push $reg/$line
    fi
done < images-list.txt

# 附加镜像推送
echo "====== push harbor images add ======"
while read line; do
    projline=$(echo $line |grep "^##")
    if [ x"$projline" != "x" ]; then
        pro=$(echo $projline |sed 's/##//g' | sed 's/-images//g')
    fi
    if [ x"$projline" == "x" ]; then
        image=$(echo $line | awk -F"/" '{print $NF}')
        docker pull $line
        docker tag $line $reg/$pro/$image
        docker push $reg/$pro/$image
    fi
done < images-list-add.txt

kubekey构建集群

下载kk

export KKZONE=cn
curl -sfL https://get-kk.kubesphere.io | VERSION=v3.0.13 sh -
chmod +x kk
# 生成默认配置文件
./kk create config --with-kubesphere v3.4.1 --with-kubernetes v1.23.17

修改config-sample.yaml

  controlPlaneEndpoint:
    ## Internal loadbalancer for apiservers
    # internalLoadbalancer: haproxy # 无需开启
    domain: lb.kubesphere.local
    address: 10.x.23.6 # 修改为已经部署的haproxy的VIP
    port: 6443
# 本地仓库,配置后所有镜像都会加上$privateRegistry/
  registry:
    privateRegistry: "harbor.xxx.com/sre"
……
  name: ks-installer
  local_registry: "harbor.xxx.com/sre"
# 插件
  openpitrix:
    store:
      enabled: true # 组建可以安装的时候就部署,也可以部署后再开启

构建k8s和kubesphere

./kk create cluster -f config-sample.yaml

部署完成

$ kubectl logs -n kubesphere-system $(kubectl get pod -n kubesphere-system -l 'app in (ks-install, ks-installer)' -o jsonpath='{.items[0].metadata.name}') -f
Console: http://10.x.1.95:30880
Account: admin
Password: P@88w0rd
$ cat /etc/hosts
# kubekey hosts BEGIN
10.x.1.98  betagz-kvm-kubesphere-node1.gy.ntes.cluster.local betagz-kvm-kubesphere-node1.gy.ntes
10.x.1.99  betagz-kvm-kubesphere-node2.gy.ntes.cluster.local betagz-kvm-kubesphere-node2.gy.ntes
10.x.1.94  betagz-kvm-kubesphere-node3.gy.ntes.cluster.local betagz-kvm-kubesphere-node3.gy.ntes
10.x.1.95  betagz-kvm-kubesphere-master1.gy.ntes.cluster.local betagz-kvm-kubesphere-master1.gy.ntes
10.x.1.96  betagz-kvm-kubesphere-master2.gy.ntes.cluster.local betagz-kvm-kubesphere-master2.gy.ntes
10.x.1.97  betagz-kvm-kubesphere-master3.gy.ntes.cluster.local betagz-kvm-kubesphere-master3.gy.ntes
10.x.23.6  lb.kubesphere.local
# kubekey hosts END

部署问题

问题一

部署完成后可能会出现pod无法访问kubernetes ClusterIP 10.233.0.1的情况,例如

$ kubectl logs  -n kube-system calico-kube-controllers-96847b8b5-7vtt8
2024-08-31 09:04:51.543 [INFO][1] main.go 138: Failed to initialize datastore error=Get "https://10.233.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": dial tcp 10.233.0.1:443: i/o timeout

抓包分析

# 异常pod宿主机上抓包
$ sudo ipvsadm -L -n |grep -A3 10.233.0.1
TCP  10.233.0.1:443 rr
  -> 10.x.1.95:6443              Masq    1      43         0
  -> 10.x.1.96:6443              Masq    1      43         0
  -> 10.x.1.97:6443              Masq    1      41         0
$ sudo tcpdump -eni any port 443 and src host 10.233.99.11

# 进入异常pod去telnet
$ sudo docker inspect 30303dbcafc3 |grep Pid
$ sudo nsenter --target  15889 --net

# 再去10.x.1.95抓包,会发现宿主机正常将包发到了lvs
17:00:46.551939 calidb226d66e00 In  ifindex 24 ae:e5:2e:4c:08:f4 ethertype IPv4 (0x0800), length 80: 10.233.99.11.50076 > 10.233.0.1.443: Flags [S], seq 87517820, win 64800, options [mss 1440,sackOK,TS val 3547890089 ecr 0,nop,wscale 10], length 0

# 排查原因,发现是puppet设置了net.ipv4.ip_forward没有设置为1..
$ sudo sysctl -p
net.ipv4.ip_forward = 0
sysctl: cannot stat /proc/sys/net/ipv4/netfilter/ip_conntrack_max: No such file or directory
net.netfilter.nf_conntrack_max = 4194304
net.ipv4.conf.eth0.arp_ignore = 1
# 手工修改恢复
$ sudo sysctl net.ipv4.ip_forward=0

问题二

validate-devops-kubesphere-io-v1alpha1-s2ibuildertemplate 证书过期,相关反馈:ask.kubesphere.io/forum/d/232…

$ kubectl logs -n kubesphere-system ks-installer-d46d775f-7vzx5
          "item": {
            "kind": "S2iBuilderTemplate",
            "ns": "kubesphere-devops-system",
            "release": "devops",
            "resource": "python"
          },
          "msg": "non-zero return code",
          "stderr_lines": [
            "Error from server (InternalError): Internal error occurred: failed calling webhook "s2ibuildertemplate.kb.io": failed to call webhook: Post "https://webhook-server-service.kubesphere-devops-system.svc:443/validate-devops-kubesphere-io-v1alpha1-s2ibuildertemplate?timeout=10s": x509: certificate has expired or is not yet valid: current time 2024-08-31T09:48:57Z is after 2024-02-14T06:08:48Z",
            "Error from server (InternalError): Internal error occurred: failed calling webhook "s2ibuildertemplate.kb.io": failed to call webhook: Post "https://webhook-server-service.kubesphere-devops-system.svc:443/validate-devops-kubesphere-io-v1alpha1-s2ibuildertemplate?timeout=10s": x509: certificate has expired or is not yet valid: current time 2024-08-31T09:48:57Z is after 2024-02-14T06:08:48Z",
            "Error from server (InternalError): Internal error occurred: failed calling webhook "s2ibuildertemplate.kb.io": failed to call webhook: Post "https://webhook-server-service.kubesphere-devops-system.svc:443/validate-devops-kubesphere-io-v1alpha1-s2ibuildertemplate?timeout=10s": x509: certificate has expired or is not yet valid: current time 2024-08-31T09:48:57Z is after 2024-02-14T06:08:48Z"
          ],

$ kubectl logs -n kubesphere-devops-system s2ioperator-0
2024/09/03 15:31:02 http: TLS handshake error from 10.233.77.0:58370: remote error: tls: bad certificate

更新证书

# 根据tar来更新证书即可
$ openssl x509 -in ca.crt -noout -dates -subject
notBefore=Feb 20 04:28:31 2024 GMT
notAfter=Jul  8 04:28:31 2051 GMT
subject=C = CN, ST = HB, O = QC, CN = webhook-server-service
$ openssl x509 -in server.crt -noout -dates -subject
notBefore=Feb 20 04:28:31 2024 GMT
notAfter=Jul  8 04:28:31 2051 GMT
subject=C = CN, ST = HB, O = QC, CN = webhook-server-service.kubesphere-devops-system.svc

插件

接下来就可以启用插件,见:www.kubesphere.io/zh/docs/v3.…

权限

接下来在“平台管理-访问控制”中,先创建用户,再创建企业空间,再在企业空间中创建项目

添加、删除节点

添加

# 编辑config-sample.yaml,添加对应的节点信息
./kk add nodes -f config-sample.yaml

删除

# 确保节点在config-sample.yaml配置文件中,删除后就可以删除配置文件的信息了
./kk delete node <nodeName> -f config-sample.yaml

添加应用仓库

使用企业空间管理员账号登录,到https://artifacthub.io/搜索镜像,点击install里面有仓库的地址,在“应用管理-应用仓库”中添加对应的仓库

Devops

问题:-n kubesphere-devops-system deploy/devops-controller 需要为1副本,否则会重复触发流水线?

podTemplate

KubeSphere 内置了 4 种类型的 podTemplate:basenodejsmavengo,用户无需编写 YAML 文件。参考:www.kubesphere.io/zh/docs/v3.…

自定义podTemplate

pipeline {
  agent {
    kubernetes {
      //cloud 'kubernetes'
      label 'mypod'
      yaml """
apiVersion: v1
kind: Pod
spec:
  containers:
  - name: maven-3.3.9
    image: maven:3.3.9-jdk-8-alpine
    command: ['cat']
    tty: true
"""
    }
  }
  stages {
    stage('Run maven') {
      steps {
        container('maven-3.3.9') {
          sh 'mvn -version'
        }
      }
    }
  }
}

自定义 Jenkins Agent

定义后就和自带的basenodejsmavengo一样了

  1. 使用admin用户登录
  2. 点击平台管理,选择集群管理,然后在左侧导航栏点击配置下的配置字典,其实就是kubectl get cm -n kubesphere-devops-system jenkins-casc-config
  3. 编辑jenkins-casc-config
  4. 在data.jenkins_user.yaml:jenkins.clouds.kubernetes.templates添加,这里也可以修改资源的默认值
              - name: "nodejs-19.1.0"
                label: "nodejs-19.1.0"
                inheritFrom: "base"
                containers:
                - name: "nodejs19"
                  image: "harbor.xxx/sre/node:19.1.0"
                  command: "cat"
                  ttyEnabled: true
                  resourceRequestCpu: "100m"
                  resourceLimitCpu: "4000m"
                  resourceRequestMemory: "100Mi"
                  resourceLimitMemory: "8192Mi"

使用自定义Jenkins Agent的pipeline示例

pipeline {
  agent {
    kubernetes {
      inheritFrom 'nodejs-19.1.0'
      defaultContainer 'base'
    }

  }
  stages {
    stage('Git clone repository') {
      agent none
      steps {
        git(url: 'https://g.xxx.com/xxx/xx.git', credentialsId: 'git-token', branch: 'master', changelog: true, poll: false)
      }
    }

    stage('Yarn install and build') {
      agent none
      steps {
        container('nodejs19') {
          sh 'npm config set registry https://registry.npmmirror.com'
          sh 'yarn config set registry https://registry.npmmirror.com'
          sh 'yarn install'
          sh 'NODE_OPTIONS=--max_old_space_size=$NODE_MAX_MEM && yarn build'
        }

      }
    }

    stage('Docker build and push') {
      agent none
      steps {
        sh 'cat Dockerfile'
        withCredentials([usernamePassword(credentialsId: 'docker-token', passwordVariable: 'DOCKER_PASSWORD', usernameVariable: 'DOCKER_USERNAME')]) {
          sh 'echo "$DOCKER_PASSWORD" | docker login $REGISTRY -u "$DOCKER_USERNAME" --password-stdin'
        }
        sh 'docker build -t $REGISTRY/$DOCKERHUB_NAMESPACE/$APP_NAME:v1.$BUILD_NUMBER .'
        sh 'docker push $REGISTRY/$DOCKERHUB_NAMESPACE/$APP_NAME:v1.$BUILD_NUMBER'
      }
    }

    stage('Kubernetes apply') {
      agent none
      steps {
        withCredentials([kubeconfigContent(credentialsId: 'kubeconfig', variable: 'KUBECONFIG_CONFIG')]) {
          sh 'mkdir -p ~/.kube/'
          sh 'echo "$KUBECONFIG_CONFIG" > ~/.kube/config'
        }
        sh "sed -e 's#{IMAGE_URL}#${REGISTRY}/${APP_NAME}#g;s#{IMAGE_TAG}#v1.${BUILD_NUMBER}#g;s#{APP_NAME}#${APP_NAME}#g;s#{NAMESPACE}#${NAMESPACE}#g;s#{REPLICAS_NUM}#${REPLICAS_NUM}#g' k8s-deployment-tpl.yaml > k8s-deployment.yml"
        sh 'cat k8s-deployment.yml'
        sh 'kubectl apply -f k8s-deployment.yml'
      }
    }

  }
  environment {
    NODE_MAX_MEM = '8192'
    REGISTRY = 'harbor.xxx.com'
    DOCKERHUB_NAMESPACE = 'sre'
    APP_NAME = 'xxx'
    NAMESPACE = 'xxx-project'
  }
}

DevOps ns说明

DevOps 项目是一个独立的命名空间,其中定义了一组流水线。用户可以按照自己的方式对流水线进行分组(例如:项目类型、组织类型)。

xxx-project                      Active   16d   # 项目的ns
xxx-react7csf9                   Active   4d18h # devops项目的ns

如果使用默认的kubeconfig,想要发布到别的ns(例如项目的ns)下面,需要对当前用户的kubeconfig授权

apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: xxx-admin
  namespace: yyy-project # 对应项目
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: admin
subjects:
- apiGroup: rbac.authorization.k8s.io
  kind: User
  name: xxx

多分支流水线

前面我们是使用普通流水线,没有Webhook功能,就是gitlab自动无法触发流水线,所以想要使用webhook我们就需要创建多分支流水线。

  1. Devop项目 中左侧的 代码仓库 添加对应的git代码仓库地址http地址和对应的凭证
  2. 新建 多分支流水线,选择对应的代码仓库创建,正则过滤 建议开启,例如就使用"master"(这样就不会扫描其他分支)
  3. 将之前的pipeline写入Jenkinsfile
  4. 左侧的更多操作中选择 扫描仓库,就会对匹配分支检查是否存在Jenkinsfile,就会开始构建
  5. 构建成功后复制webhookurl到gitlab,Trigger选择Push eventsMerge request events,这样我们merge到master就会自动触发流水线
  6. 在github上点击test测试效果,会发现自动触发了流水线