试用rollout-可以跳过
前提:准备一个k8s集群,准备对应的yaml文件,yaml文件示例如下
echoserver.yaml
包含了deployment、service、ingress的创建
apiVersion: apps/v1
kind: Deployment
metadata:
name: echoserver
labels:
app: echoserver
spec:
replicas: 6
selector:
matchLabels:
app: echoserver
template:
metadata:
labels:
app: echoserver
spec:
containers:
- name: echoserver
# Mac M1 should choics image can support arm64,such as image e2eteam/echoserver:2.2-linux-arm64
image: openkruise-registry.cn-shanghai.cr.aliyuncs.com/openkruise/demo:1.10.2
imagePullPolicy: IfNotPresent
ports:
- containerPort: 8080
env:
- name: NODE_NAME
value: version1
- name: PORT
value: '8080'
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
---
apiVersion: v1
kind: Service
metadata:
name: echoserver
labels:
app: echoserver
spec:
ports:
- port: 80
targetPort: 8080
protocol: TCP
name: http
selector:
app: echoserver
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: echoserver
annotations:
kubernetes.io/ingress.class: nginx
spec:
rules:
- http:
paths:
- backend:
service:
name: echoserver
port:
number: 80
path: /apis/echo
pathType: Exact
rollout.yaml
# 将如下内容存储到文件rollout.yaml。
apiVersion: rollouts.kruise.io/v1alpha1
kind: Rollout
metadata:
name: rollouts-demo
annotations:
rollouts.kruise.io/rolling-style: partition
spec:
objectRef:
workloadRef:
apiVersion: apps/v1
kind: Deployment
# Deployment Name
name: echoserver
strategy:
canary:
steps:
# 此批次灰度的流量比例30%。
- weight: 20
replicas: 20%
# 暂停60s自动进入下一批次,如果需要手动决定,请配置为:pause: {}。
pause: {duration: 60}
- weight: 50
replicas: 50%
# 暂停60s自动进入下一批次,如果需要手动决定,请配置为:pause: {}。
pause: {duration: 120}
- weight: 100
replicas: 100%
pause: {duration: 300}
trafficRoutings:
# Service Name
- service: echoserver
ingress:
# Ingress Name
name: echoserver
部署上面的yaml
先执行echoserver.yaml,在执行rollout.yaml,会自动创建出来对应的CR
deployment
replicaSet
service
ingress
rollout
更新deployment
deployment
更新后可以看到deployment中的内容被修改了,添加了paused:true 这个修改对很重要
能看到有显著变化的CR如下
replicaSet
多个了一个版本
rollout
开始生效
batchRelease
rollout创建出来的一个CR
service
rollout自动创建了一个灰度的service,里面增加了selector,具体如下:
echoserver-canary.yaml
apiVersion: v1
kind: Service
metadata:
creationTimestamp: "2023-11-16T03:55:25Z"
name: echoserver-canary
namespace: real-name
ownerReferences:
- apiVersion: rollouts.kruise.io/v1alpha1
blockOwnerDeletion: true
controller: true
kind: Rollout
name: rollouts-demo
uid: e7a61b26-f640-4f15-80a3-402012fbe3a5
resourceVersion: "121090625"
uid: 9de0a780-201d-4b84-b271-8de656dcd898
spec:
clusterIP: 1.1.1.1
clusterIPs:
- 1.1.1.1
internalTrafficPolicy: Cluster
ipFamilies:
- IPv4
ipFamilyPolicy: SingleStack
ports:
- name: http
port: 80
protocol: TCP
targetPort: 8080
selector:
app: echoserver
# 此配置版本为pod新版本中的hash, 在新增加的rs中也pod-template-hash是一样的值
pod-template-hash: dd4845c7c
sessionAffinity: None
type: ClusterIP
status:
loadBalancer: {}
关键CR状态更新大概流程
上面的图被竖线分为了四部分,前三部分均是rollout模块进行控制的,将在源码讲解中进行详细的说明;第四部分为k8s原生的能力,因不进行讲解。
源码详解
rollout支持多种workload(k8s原生:Deployment、ReplicaSet、StatefulSet;Kruise CRD: CloneSet、DaemonSet、StatefulSet),下面的源码讲解均是基于最熟悉的deployment进行;
在正式讲解前三部分前,需要先从rollout模块中的webhook进行入口,这是rollout能实现灰度的关键点;
deployment的webhook逻辑
openkruise/rollouts/pkg/webhook/workload/mutating/workload_update_handler.go # Handle()中的逻辑进行对deployment的 update操作进行了拦截处理:
- 用于判断此deployment的更新是否需要rollout进行控制;
- 如果需要会自动给deployment添加对应的注解
rollouts.kruise.io/in-progressing
rollouts.kruise.io/deployment-strategy
描述逻辑以及添加pause: true
的配置,这样k8s就不会对此deployment进行操作;
// Handle handles admission requests.
func (h *WorkloadHandler) Handle(ctx context.Context, req admission.Request) admission.Response {
...
// Because kruise Rollout is a bypassed approach, needs to be determined in the webhook if the workload meet to enter the rollout progressing:
// 1. Traffic Routing, all the following conditions must be met
// a. PodTemplateSpec is changed
// b. Workload must only contain one version of Pods
// 2. No Traffic Routing, Only Release in batches
// a. No RolloutId
// - PodTemplateSpec is changed
// b. Configure RolloutId
// - RolloutId and PodTemplateSpec change, enter the rollout progressing.
// - RolloutId changes and PodTemplateSpec no change, enter the rollout progressing
// - RolloutId no change and PodTemplateSpec change, do not enter the rollout progressing
switch req.Kind.Group {
// kruise cloneSet
case kruiseappsv1alpha1.GroupVersion.Group:
switch req.Kind.Kind {
case util.ControllerKruiseKindCS.Kind:
// check cloneset
...
case util.ControllerKruiseKindDS.Kind:
// check daemonset
...
// native k8s deloyment
case apps.SchemeGroupVersion.Group:
switch req.Kind.Kind {
case util.ControllerKindDep.Kind:
// check deployment
...
// 处理deployment
changed, err := h.handleDeployment(newObjClone, oldObj)
if err != nil {
return admission.Errored(http.StatusBadRequest, err)
}
...
// 打个补丁,添加对应handleDeployment中设置的注解以及pause
return admission.PatchResponseFromRaw(original, marshalled)
}
}
rollouts.kruise.io/deployment-strategy
此注解在后续会deployment计算每个rs副本数量时会被用到
func (h *WorkloadHandler) handleDeployment(newObj, oldObj *apps.Deployment) (bool, error) {
// in rollout progressing
if newObj.Annotations[util.InRolloutProgressingAnnotation] != "" {
modified := false
if !newObj.Spec.Paused {
modified = true
newObj.Spec.Paused = true
}
//
strategy := util.GetDeploymentStrategy(newObj)
switch strings.ToLower(string(strategy.RollingStyle)) {
case strings.ToLower(string(appsv1alpha1.PartitionRollingStyle)):
// Make sure it is always Recreate to disable native controller
if newObj.Spec.Strategy.Type == apps.RollingUpdateDeploymentStrategyType {
modified = true
newObj.Spec.Strategy.Type = apps.RecreateDeploymentStrategyType
}
if newObj.Spec.Strategy.RollingUpdate != nil {
modified = true
// Allow to modify RollingUpdate config during rolling
strategy.RollingUpdate = newObj.Spec.Strategy.RollingUpdate
newObj.Spec.Strategy.RollingUpdate = nil
}
if isEffectiveDeploymentRevisionChange(oldObj, newObj) {
modified = true
strategy.Paused = true
}
setDeploymentStrategyAnnotation(strategy, newObj)
default:
// Do not allow to modify strategy as Recreate during rolling
if newObj.Spec.Strategy.Type == apps.RecreateDeploymentStrategyType {
modified = true
newObj.Spec.Strategy = oldObj.Spec.Strategy
klog.Warningf("")
}
}
return modified, nil
}
...
// isEffectiveDeploymentRevisionChange此方法如果返回false,则此次deployment更新是不走rollout控制的
// 具体逻辑见下文
if !isEffectiveDeploymentRevisionChange(oldObj, newObj) {
return false, nil
}
// 如果rollout list中的worklowdRef没有和此workload相关的 则直接返回 说明此deployment则不会rollout
rollout, err := h.fetchMatchedRollout(newObj)
if err != nil {
return false, err
} else if rollout == nil || rollout.Spec.Strategy.Canary == nil {
return false, nil
}
....
// need set workload paused = true ------ 重要的点 让deployment阻塞更新的配置
newObj.Spec.Paused = true
state := &util.RolloutState{RolloutName: rollout.Name}
by, _ := json.Marshal(state)
if newObj.Annotations == nil {
newObj.Annotations = map[string]string{}
}
// 添加注解, 此注解表明目前deployment是被那个rollout在操作
newObj.Annotations[util.InRolloutProgressingAnnotation] = string(by)
klog.Infof("Deployment(%s/%s) will be released incrementally based on Rollout(%s)", newObj.Namespace, newObj.Name, rollout.Name)
return true, nil
}
因此在更新deployment时,webhook会拦截住此操作进行前置的逻辑处理 此处是把deployment的paused设置为true,因此deployment的更新就会暂停, 就会进入rollout的处理逻辑
func isEffectiveDeploymentRevisionChange(oldObj, newObj *apps.Deployment) bool {
if newObj.Annotations[appsv1alpha1.RolloutIDLabel] != "" &&
oldObj.Annotations[appsv1alpha1.RolloutIDLabel] == newObj.Annotations[appsv1alpha1.RolloutIDLabel] {
return false
} else if newObj.Annotations[appsv1alpha1.RolloutIDLabel] == "" &&
util.EqualIgnoreHash(&oldObj.Spec.Template, &newObj.Spec.Template) {
// 如果只修改deployment的replicas 则返回false,因为replicas并不在template中
return false
}
return true
}
// 设置策略的注解
func setDeploymentStrategyAnnotation(strategy appsv1alpha1.DeploymentStrategy, d *apps.Deployment) {
strategyAnno, _ := json.Marshal(&strategy)
d.Annotations[appsv1alpha1.DeploymentStrategyAnnotation] = string(strategyAnno)
}
总结
含有一个版本pod的deployment,进行的修改:
注解:
rollouts.kruise.io/stable-revision
spec: Paused = true
如果含有两个版本pod的deployment,进行的修改
注解:
rollouts.kruise.io/stable-revision
rollouts.kruise.io/in-progressing
spec: Paused = true
deployment已经在受rollout的控制含有rollouts.kruise.io/in-progressing
,此时进行的修改:
注解:
rollouts.kruise.io/deployment-strategy
spec:
- Paused = true
- Strategy.Type = RollingUpdate
rollout 如何更新batchRelease
先对rollout.status中的内容进行以下说明,rollout Reconcile的处理逻辑均是对status中不同状态的流转的处理,此处仅对当Status.Phase=Progressing
cond.type=Progressing && cond.Reason=InRolling
时的一个逻辑处理,因为在这个状态下是对batchRelease有对应的更新操作,使得整个灰度发布继续发布的核心所在;
注意:本文将避开流量相关内容,流量相关将在后续文章单独进行讲解
apiVersion: rollouts.kruise.io/v1alpha1
kind: Rollout
metadata:
annotations:
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"rollouts.kruise.io/v1alpha1","kind":"Rollout","metadata":{"annotations":{"rollouts.kruise.io/rolling-style":"partition"},"name":"rollouts-demo","namespace":"test"},"spec":{"objectRef":{"workloadRef":{"apiVersion":"apps/v1","kind":"Deployment","name":"echoserver"}},"strategy":{"canary":{"steps":[{"pause":{"duration":600},"replicas":"50%","weight":50},{"pause":{"duration":600},"replicas":"100%","weight":100}],"trafficRoutings":[{"ingress":{"name":"echoserver"},"service":"echoserver"}]}}}}
rollouts.kruise.io/hash: 4d5dx25f76vbb498v9w9w9b6z6f9fw9w4z54bvxdfw664wbfd96dxzb7zxvf4zd8
rollouts.kruise.io/rolling-style: partition
creationTimestamp: "2023-10-24T03:13:19Z"
finalizers:
- rollouts.kruise.io/rollout
generation: 1
name: rollouts-demo
namespace: test
resourceVersion: "104658052"
uid: 2eb27368-22a2-4b9a-a3da-68d0eeffd5d0
spec:
objectRef:
workloadRef:
apiVersion: apps/v1
kind: Deployment
name: echoserver
strategy:
canary:
steps:
- pause:
duration: 600
replicas: 50%
weight: 50
- pause:
duration: 600
replicas: 100%
weight: 100
trafficRoutings:
- ingress:
name: echoserver
service: echoserver
status:
# 针对spec.strategy.canary分批步骤的执行的记录
canaryStatus:
canaryReadyReplicas: 3
canaryReplicas: 3
canaryRevision: 645754b49f # 要灰度的版本 与新的podTemplateHash一致
currentStepIndex: 1 # 当前执行的批次
currentStepState: StepPaused # 批次的状态(StepUpgrade、StepTrafficRouting、StepMetricsAnalysis、StepPaused、StepReady、Completed)
lastUpdateTime: "2023-10-24T03:44:45Z"
message: BatchRelease is at state Ready, rollout-id , step 1
observedWorkloadGeneration: 10
podTemplateHash: 645754b49f
rolloutHash: 4d5dx25f76vbb498v9w9w9b6z6f9fw9w4z54bvxdfw664wbfd96dxzb7zxvf4zd8
stableRevision: 54455bd6bd # 稳定版本 与老的podTemplateHash一致
conditions:
- lastTransitionTime: "2023-10-24T03:44:31Z"
lastUpdateTime: "2023-10-24T03:44:31Z"
message: Rollout is in Progressing
reason: InRolling # 取值(Initializing、InRolling、Finalising、Completed、Cancelling、Paused)
status: "True"
type: Progressing
message: Rollout is in step(1/2), and wait duration(600 seconds) to enter the next
step
observedGeneration: 1
phase: Progressing # rollout的阶段状态(Initial、Healthy、Progressing、Terminating、Disabled、Disabling)
核心代码流转流程
github.com/openkruise/rollouts/pkg/controller/rollout/rollout_controller.go # Reconcile (对rollout进行调和入口) ->
github.com/openkruise/rollouts/pkg/controller/rollout/rollout_progressing.go # reconcileRolloutProgressing (当status.conditon.type=Progressing时,cond.Reason的流转) ->
github.com/openkruise/rollouts/pkg/controller/rollout/rollout_progressing.go # doProgressingInRolling ->
github.com/openkruise/rollouts/pkg/controller/rollout/rollout_progressing.go # handleNormalRolling ->
github.com/openkruise/rollouts/pkg/controller/rollout/rollout_canary.go # runCanary(核心是对status.canaryStatus.CurrentStepState状态流转)->
github.com/openkruise/rollouts/pkg/controller/rollout/rollout_canary.go # doCanaryUpgrade ->
github.com/openkruise/rollouts/pkg/controller/rollout/rollout_canary.go # runBatchRelease
核心片段讲解
github.com/openkruise/rollouts/pkg/controller/rollout/rollout_canary.go # doCanaryUpgrade
func (m *canaryReleaseManager) doCanaryUpgrade(c *RolloutContext) (bool, error) {
// verify whether batchRelease configuration is the latest
steps := len(c.Rollout.Spec.Strategy.Canary.Steps)
canaryStatus := c.NewStatus.CanaryStatus
cond := util.GetRolloutCondition(*c.NewStatus, v1alpha1.RolloutConditionProgressing)
cond.Message = fmt.Sprintf("Rollout is in step(%d/%d), and upgrade workload to new version", canaryStatus.CurrentStepIndex, steps)
c.NewStatus.Message = cond.Message
// run batch release to upgrade the workloads
// canaryStatus.CurrentStepIndex 首次参数是在reconcileRolloutProgressing中进行设置,初始化为1
// canaryStatus.CurrentStepIndex 在后续更新中是在runCanary方法中对canaryStatus.CurrentStepState=StepReady中再次进行赋值,如果状态流转到StepUpgrade,则canaryStatus.CurrentStepIndex++;如果状态流转到Completed则不再更新
done, br, err := m.runBatchRelease(c.Rollout, getRolloutID(c.Workload), canaryStatus.CurrentStepIndex, c.Workload.IsInRollback)
if err != nil {
return false, err
} else if !done {
return false, nil
}
...
}
github.com/openkruise/rollouts/pkg/controller/rollout/rollout_canary.go # runBatchRelease
func (m *canaryReleaseManager) runBatchRelease(rollout *v1alpha1.Rollout, rolloutId string, batch int32, isRollback bool) (bool, *v1alpha1.BatchRelease, error) {
batch = batch - 1
br, err := m.fetchBatchRelease(rollout.Namespace, rollout.Name)
if errors.IsNotFound(err) {
// create new BatchRelease Crd
br = createBatchRelease(rollout, rolloutId, batch, isRollback)
if err = m.Create(context.TODO(), br); err != nil && !errors.IsAlreadyExists(err) {
klog.Errorf("rollout(%s/%s) create BatchRelease failed: %s", rollout.Namespace, rollout.Name, err.Error())
return false, nil, err
}
klog.Infof("rollout(%s/%s) create BatchRelease(%s) success", rollout.Namespace, rollout.Name, util.DumpJSON(br))
return false, br, nil
} else if err != nil {
klog.Errorf("rollout(%s/%s) fetch BatchRelease failed: %s", rollout.Namespace, rollout.Name, err.Error())
return false, nil, err
}
// check whether batchRelease configuration is the latest
newBr := createBatchRelease(rollout, rolloutId, batch, isRollback)
if reflect.DeepEqual(br.Spec, newBr.Spec) && reflect.DeepEqual(br.Annotations, newBr.Annotations) {
klog.Infof("rollout(%s/%s) do batchRelease batch(%d) success", rollout.Namespace, rollout.Name, batch+1)
return true, br, nil
}
// update batchRelease to the latest version
if err = retry.RetryOnConflict(retry.DefaultBackoff, func() error {
if err = m.Get(context.TODO(), client.ObjectKey{Namespace: newBr.Namespace, Name: newBr.Name}, br); err != nil {
klog.Errorf("error getting BatchRelease(%s/%s) from client", newBr.Namespace, newBr.Name)
return err
}
br.Spec = newBr.Spec
br.Annotations = newBr.Annotations
// update batchRelease,更新后batchRelease Reconcile通过watch机制后可以监控到br的变更然后进行逻辑处理
return m.Client.Update(context.TODO(), br)
}); err != nil {
klog.Errorf("rollout(%s/%s) update batchRelease failed: %s", rollout.Namespace, rollout.Name, err.Error())
return false, nil, err
}
klog.Infof("rollout(%s/%s) update batchRelease(%s) configuration to latest", rollout.Namespace, rollout.Name, util.DumpJSON(br))
return false, br, nil
}
github.com/openkruise/rollouts/pkg/controller/rollout/rollout_canary.go # createBatchRelease
// 生成br描述文件
func createBatchRelease(rollout *v1alpha1.Rollout, rolloutID string, batch int32, isRollback bool) *v1alpha1.BatchRelease {
var batches []v1alpha1.ReleaseBatch
// 根据steps设置batchRelease中的batches
for _, step := range rollout.Spec.Strategy.Canary.Steps {
if step.Replicas == nil {
batches = append(batches, v1alpha1.ReleaseBatch{CanaryReplicas: intstr.FromString(strconv.Itoa(int(*step.Weight)) + "%")})
} else {
batches = append(batches, v1alpha1.ReleaseBatch{CanaryReplicas: *step.Replicas})
}
}
br := &v1alpha1.BatchRelease{
ObjectMeta: metav1.ObjectMeta{
Namespace: rollout.Namespace,
Name: rollout.Name,
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(rollout, rolloutControllerKind)},
},
Spec: v1alpha1.BatchReleaseSpec{
TargetRef: v1alpha1.ObjectRef{
WorkloadRef: &v1alpha1.WorkloadRef{
APIVersion: rollout.Spec.ObjectRef.WorkloadRef.APIVersion,
Kind: rollout.Spec.ObjectRef.WorkloadRef.Kind,
Name: rollout.Spec.ObjectRef.WorkloadRef.Name,
},
},
ReleasePlan: v1alpha1.ReleasePlan{
Batches: batches,
RolloutID: rolloutID,
// 设置当前要执行的batch,此字段后续batchRelease在进行计算stats中的currentBatch的时候会用到此字段
BatchPartition: utilpointer.Int32Ptr(batch),
FailureThreshold: rollout.Spec.Strategy.Canary.FailureThreshold,
PatchPodTemplateMetadata: rollout.Spec.Strategy.Canary.PatchPodTemplateMetadata,
},
},
}
annotations := map[string]string{}
if isRollback {
annotations[v1alpha1.RollbackInBatchAnnotation] = rollout.Annotations[v1alpha1.RollbackInBatchAnnotation]
}
if style, ok := rollout.Annotations[v1alpha1.RolloutStyleAnnotation]; ok {
annotations[v1alpha1.RolloutStyleAnnotation] = style
}
if len(annotations) > 0 {
br.Annotations = annotations
}
return br
}
总结:做的几件事情
- rollout自身生命周期管理
- 对batchRelease进行管理和更新
- 对流量的控制(本文没有涉及)
batchRelease如何更新deployment
apiVersion: rollouts.kruise.io/v1alpha1
kind: BatchRelease
metadata:
annotations:
rollouts.kruise.io/rolling-style: partition
creationTimestamp: "2023-10-25T09:19:16Z"
finalizers:
- rollouts.kruise.io/batch-release-finalizer
generation: 1
name: rollouts-demo
namespace: test
ownerReferences:
- apiVersion: rollouts.kruise.io/v1alpha1
blockOwnerDeletion: true
controller: true
kind: Rollout
name: rollouts-demo
uid: 6745a9a7-52c6-4ce8-a090-1d26977a12d3
resourceVersion: "105406988"
uid: f01ed932-1473-4499-af39-be8594e29093
# spec中的内容均是在rollout中的进行的初始化
spec:
releasePlan:
# 要进行的批次
batchPartition: 0
batches:
# 根据rollout中的批次进行的初始化
- canaryReplicas: 50%
- canaryReplicas: 100%
targetReference:
workloadRef:
apiVersion: apps/v1
kind: Deployment
name: echoserver
status:
canaryStatus:
batchReadyTime: "2023-10-25T09:19:20Z"
batchState: Ready # 状态流转Upgrading -> Verifying -> Ready
currentBatch: 0 # 此字段根据spec.releasePlan.batchPartiton
updatedReadyReplicas: 3
updatedReplicas: 3
observedGeneration: 1
observedReleasePlanHash: 41ed02abe4a3a620a5967efc58407ae1e1ad1bbbd70ee993363d98bdc1864fcb
observedWorkloadReplicas: 6
phase: Progressing # 状态流转 Preparing -> Progressing-> Finalizing -> Completed
stableRevision: 54455bd6bd # 老得版本
updateRevision: 645754b49f # 需要更新的podTemplateHash
核心代码流转流程
github.com/openkruise/rollouts/pkg/controller/batchrelease/batchrelease_controller.go # Reconcile - >
github.com/openkruise/rollouts/pkg/controller/batchrelease/batchrelease_executor.go # Do (两件事情1,通过此方法syncStatusBeforeExecuting初始化status中的信息 以及 判断更新是否暂停;2,executeBatchReleasePlan执行更新) ->
github.com/openkruise/rollouts/pkg/controller/batchrelease/batchrelease_executor.go # executeBatchReleasePlan (newStatus.Phase进行流转) ->
github.com/openkruise/rollouts/pkg/controller/batchrelease/batchrelease_executor.go # progressBatches (newStatus.CanaryStatus.CurrentBatchState状态流转)->
github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/control_plane.go # UpgradeBatch (使用CalculateBatchContext进行一些更新br.status的相关信息) ->
github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go # UpgradeBatch (更新deployment注解rollouts.kruise.io/deployment-strategy,把对应批次的更新百分比进行更新)
核心片段
github.com/openkruise/rollouts/pkg/controller/batchrelease/batchrelease_status.go # signalRecalculate
func signalRecalculate(release *v1alpha1.BatchRelease, newStatus *v1alpha1.BatchReleaseStatus) {
...
if release.Spec.ReleasePlan.BatchPartition != nil && release.Spec.ReleasePlan.RolloutID == observedRolloutID {
// 根据batchPartition初始化status中的currentBatch
currentBatch = integer.Int32Min(*release.Spec.ReleasePlan.BatchPartition, int32(len(release.Spec.ReleasePlan.Batches)-1))
}
klog.Infof("BatchRelease(%v) canary batch changed from %v to %v when the release plan changed, observed-rollout-id: %s, current-rollout-id: %s",
client.ObjectKeyFromObject(release), newStatus.CanaryStatus.CurrentBatch, currentBatch, observedRolloutID, release.Spec.ReleasePlan.RolloutID)
newStatus.CanaryStatus.BatchReadyTime = nil
newStatus.CanaryStatus.CurrentBatch = currentBatch
newStatus.ObservedRolloutID = release.Spec.ReleasePlan.RolloutID
newStatus.CanaryStatus.CurrentBatchState = v1alpha1.UpgradingBatchState
newStatus.ObservedReleasePlanHash = util.HashReleasePlanBatches(&release.Spec.ReleasePlan)
}
github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go # CalculateBatchContext
func (rc *realController) CalculateBatchContext(release *v1alpha1.BatchRelease) (*batchcontext.BatchContext, error) {
rolloutID := release.Spec.ReleasePlan.RolloutID
if rolloutID != "" {
// if rollout-id is set, the pod will be patched batch label,
// so we have to list pod here.
if _, err := rc.ListOwnedPods(); err != nil {
return nil, err
}
}
currentBatch := release.Status.CanaryStatus.CurrentBatch
// 获取下个批次要更新百分比
desiredPartition := release.Spec.ReleasePlan.Batches[currentBatch].CanaryReplicas
PlannedUpdatedReplicas := deploymentutil.NewRSReplicasLimit(desiredPartition, rc.object)
return &batchcontext.BatchContext{
Pods: rc.pods,
RolloutID: rolloutID,
CurrentBatch: currentBatch,
UpdateRevision: release.Status.UpdateRevision,
DesiredPartition: desiredPartition,
FailureThreshold: release.Spec.ReleasePlan.FailureThreshold,
Replicas: rc.Replicas,
UpdatedReplicas: rc.Status.UpdatedReplicas,
UpdatedReadyReplicas: rc.Status.UpdatedReadyReplicas,
PlannedUpdatedReplicas: PlannedUpdatedReplicas,
DesiredUpdatedReplicas: PlannedUpdatedReplicas,
}, nil
}
github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go # UpgradeBatch
func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error {
...
strategy := util.GetDeploymentStrategy(rc.object)
if control.IsCurrentMoreThanOrEqualToDesired(strategy.Partition, ctx.DesiredPartition) {
return nil // Satisfied, no need patch again.
}
d := rc.object.DeepCopy()
// 在CalculateBatchContext中设置了DesiredPartition信息
strategy.Partition = ctx.DesiredPartition
patchData := patch.NewDeploymentPatch()
// v1alpha1.DeploymentStrategyAnnotation=rollouts.kruise.io/deployment-strategy
patchData.InsertAnnotation(v1alpha1.DeploymentStrategyAnnotation, util.DumpJSON(&strategy))
// 对deployment进行patch操作
return rc.client.Patch(context.TODO(), d, patchData)
}
总结:做的几件事情
- batchRelease生命周期管理
- 对deployment 注解的更新
deployment如何更新replicaSet
deployment的rollingStyle更新有两种方式
- Partition:不会创创建新的deployment
- Canary:以金丝雀方式滚动,并且会创建一个canary deployment
此文章是以Partition的方式为例进行说明。
apiVersion: apps/v1
kind: Deployment
metadata:
annotations:
batchrelease.rollouts.kruise.io/control-info: '{"apiVersion":"rollouts.kruise.io/v1alpha1","kind":"BatchRelease","name":"rollouts-demo","uid":"2cf90da0-bb70-4aca-abef-9a0632377147","controller":true,"blockOwnerDeletion":true}'
deployment.kubernetes.io/revision: "4"
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"labels":{"app":"echoserver"},"name":"echoserver","namespace":"ghx-test"},"spec":{"replicas":6,"selector":{"matchLabels":{"app":"echoserver"}},"template":{"metadata":{"labels":{"app":"echoserver"}},"spec":{"containers":[{"env":[{"name":"NODE_NAME","value":"version1"},{"name":"PORT","value":"8080"},{"name":"POD_NAME","valueFrom":{"fieldRef":{"fieldPath":"metadata.name"}}},{"name":"POD_NAMESPACE","valueFrom":{"fieldRef":{"fieldPath":"metadata.namespace"}}},{"name":"POD_IP","valueFrom":{"fieldRef":{"fieldPath":"status.podIP"}}}],"image":"openkruise-registry.cn-shanghai.cr.aliyuncs.com/openkruise/demo:1.10.2","imagePullPolicy":"IfNotPresent","name":"echoserver","ports":[{"containerPort":8080}]}]}}}}
rollouts.kruise.io/deployment-extra-status: '{"updatedReadyReplicas":3,"expectedUpdatedReplicas":3}'
# batchRelease会把需要更新批次的partition进行更新,同时标记有滚动的方式
rollouts.kruise.io/deployment-strategy: '{"rollingStyle":"Partition","rollingUpdate":{"maxUnavailable":"25%","maxSurge":"25%"},"partition":"20%"}'
rollouts.kruise.io/in-progressing: '{"rolloutName":"rollouts-demo"}'
creationTimestamp: "2023-10-24T02:27:30Z"
generation: 25
labels:
app: echoserver
rollouts.kruise.io/controlled-by-advanced-deployment-controller: "true"
rollouts.kruise.io/stable-revision: 645754b49f
name: echoserver
namespace: ghx-test
resourceVersion: "104752031"
uid: 2328acab-51c4-4bc2-87c1-501bd65838b8
spec:
# 在webhook的时候设置了此字段
paused: true
...
在 deployment的webhook逻辑 一节讲解了,会把deployment中的pause=true,因此k8s不会对deployment进行任何的操作;因此rollout模块单独实现了deployment Reconcile
核心代码流程
github.com/openkruise/rollouts/pkg/controller/deployment/controller.go # Reconcile - >
github.com/openkruise/rollouts/pkg/controller/deployment/deployment_controller.go # syncDeployment ->
github.com/openkruise/rollouts/pkg/controller/deployment/rolling.go # rolloutRolling ->
github.com/openkruise/rollouts/pkg/controller/deployment/rolling.go # reconcileNewReplicaSet(计算新版本的副本数量) ->
github.com/openkruise/rollouts/pkg/controller/deployment/sync.go # scaleReplicaSetAndRecordEvent->
github.com/openkruise/rollouts/pkg/controller/deployment/sync.go # scaleReplicaSet
核心片段
// 根据deployment注解rollouts.kruise.io/deployment-strategy中的partition计算新版本的副本数量
func NewRSReplicasLimit(partition intstrutil.IntOrString, deployment *apps.Deployment) int32 {
replicas := int(*deployment.Spec.Replicas)
replicaLimit, _ := intstrutil.GetScaledValueFromIntOrPercent(&partition, replicas, true)
replicaLimit = integer.IntMax(integer.IntMin(replicaLimit, replicas), 0)
if replicas > 1 && partition.Type == intstrutil.String && partition.String() != "100%" {
replicaLimit = integer.IntMin(replicaLimit, replicas-1)
}
return int32(replicaLimit)
}
github.com/openkruise/rollouts/pkg/controller/deployment/sync.go # scaleReplicaSet
// 更新rs副本数量
func (dc *DeploymentController) scaleReplicaSet(ctx context.Context, rs *apps.ReplicaSet, newScale int32, deployment *apps.Deployment, scalingOperation string) (bool, *apps.ReplicaSet, error) {
sizeNeedsUpdate := *(rs.Spec.Replicas) != newScale
annotationsNeedUpdate := deploymentutil.ReplicasAnnotationsNeedUpdate(rs, *(deployment.Spec.Replicas), *(deployment.Spec.Replicas)+deploymentutil.MaxSurge(deployment, &dc.strategy))
scaled := false
var err error
if sizeNeedsUpdate || annotationsNeedUpdate {
oldScale := *(rs.Spec.Replicas)
rsCopy := rs.DeepCopy()
*(rsCopy.Spec.Replicas) = newScale
deploymentutil.SetReplicasAnnotations(rsCopy, *(deployment.Spec.Replicas), *(deployment.Spec.Replicas)+deploymentutil.MaxSurge(deployment, &dc.strategy))
rs, err = dc.client.AppsV1().ReplicaSets(rsCopy.Namespace).Update(ctx, rsCopy, metav1.UpdateOptions{})
if err == nil && sizeNeedsUpdate {
scaled = true
dc.eventRecorder.Eventf(deployment, v1.EventTypeNormal, "ScalingReplicaSet", "Scaled %s replica set %s to %d from %d", scalingOperation, rs.Name, newScale, oldScale)
}
}
return scaled, rs, err
}
github.com/openkruise/rollouts/pkg/controller/deployment/sync.go # getNewReplicaSet
// 如果没有新版本的rs则创建后返回,如果有则进行进行更新
func (dc *DeploymentController) getNewReplicaSet(ctx context.Context, d *apps.Deployment, rsList, oldRSs []*apps.ReplicaSet, createIfNotExisted bool) (*apps.ReplicaSet, error) {
// 根据template返回最新的RS
existingNewRS := deploymentutil.FindNewReplicaSet(d, rsList)
// Calculate the max revision number among all old RSes
maxOldRevision := deploymentutil.MaxRevision(oldRSs)
// Calculate revision number for this new replica set
newRevision := strconv.FormatInt(maxOldRevision+1, 10)
// Latest replica set exists. We need to sync its annotations (includes copying all but
// annotationsToSkip from the parent deployment, and update revision, desiredReplicas,
// and maxReplicas) and also update the revision annotation in the deployment with the
// latest revision.
if existingNewRS != nil {
rsCopy := existingNewRS.DeepCopy()
// Set existing new replica set's annotation
annotationsUpdated := deploymentutil.SetNewReplicaSetAnnotations(d, rsCopy, &dc.strategy, newRevision, true, maxRevHistoryLengthInChars)
minReadySecondsNeedsUpdate := rsCopy.Spec.MinReadySeconds != d.Spec.MinReadySeconds
if annotationsUpdated || minReadySecondsNeedsUpdate {
rsCopy.Spec.MinReadySeconds = d.Spec.MinReadySeconds
return dc.client.AppsV1().ReplicaSets(rsCopy.ObjectMeta.Namespace).Update(ctx, rsCopy, metav1.UpdateOptions{})
}
// Should use the revision in existingNewRS's annotation, since it set by before
needsUpdate := deploymentutil.SetDeploymentRevision(d, rsCopy.Annotations[deploymentutil.RevisionAnnotation])
// If no other Progressing condition has been recorded and we need to estimate the progress
// of this deployment then it is likely that old users started caring about progress. In that
// case we need to take into account the first time we noticed their new replica set.
cond := deploymentutil.GetDeploymentCondition(d.Status, apps.DeploymentProgressing)
if deploymentutil.HasProgressDeadline(d) && cond == nil {
msg := fmt.Sprintf("Found new replica set %q", rsCopy.Name)
condition := deploymentutil.NewDeploymentCondition(apps.DeploymentProgressing, v1.ConditionTrue, deploymentutil.FoundNewRSReason, msg)
deploymentutil.SetDeploymentCondition(&d.Status, *condition)
needsUpdate = true
}
...
return rsCopy, nil
}
// new ReplicaSet does not exist, create one.
newRSTemplate := *d.Spec.Template.DeepCopy()
podTemplateSpecHash := util.ComputeHash(&newRSTemplate, d.Status.CollisionCount)
newRSTemplate.Labels = labelsutil.CloneAndAddLabel(d.Spec.Template.Labels, apps.DefaultDeploymentUniqueLabelKey, podTemplateSpecHash)
// Add podTemplateHash label to selector.
newRSSelector := labelsutil.CloneSelectorAndAddLabel(d.Spec.Selector, apps.DefaultDeploymentUniqueLabelKey, podTemplateSpecHash)
// Create new ReplicaSet
newRS := apps.ReplicaSet{
ObjectMeta: metav1.ObjectMeta{
// Make the name deterministic, to ensure idempotence
Name: d.Name + "-" + podTemplateSpecHash,
Namespace: d.Namespace,
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(d, controllerKind)},
Labels: newRSTemplate.Labels,
},
Spec: apps.ReplicaSetSpec{
Replicas: new(int32),
MinReadySeconds: d.Spec.MinReadySeconds,
Selector: newRSSelector,
Template: newRSTemplate,
},
}
allRSs := append(oldRSs, &newRS)
newReplicasCount, err := deploymentutil.NewRSNewReplicas(d, allRSs, &newRS, &dc.strategy)
if err != nil {
return nil, err
}
// We ensure that newReplicasLowerBound is greater than 0 unless deployment is 0,
// this is because if we set new replicas as 0, the native deployment controller
// will flight with ours.
newReplicasLowerBound := deploymentutil.NewRSReplicasLowerBound(d, &dc.strategy)
*(newRS.Spec.Replicas) = integer.Int32Max(newReplicasCount, newReplicasLowerBound)
// Set new replica set's annotation
deploymentutil.SetNewReplicaSetAnnotations(d, &newRS, &dc.strategy, newRevision, false, maxRevHistoryLengthInChars)
// Create the new ReplicaSet. If it already exists, then we need to check for possible
// hash collisions. If there is any other error, we need to report it in the status of
// the Deployment.
alreadyExists := false
createdRS, err := dc.client.AppsV1().ReplicaSets(d.Namespace).Create(ctx, &newRS, metav1.CreateOptions{})
switch {
// We may end up hitting this due to a slow cache or a fast resync of the Deployment.
case errors.IsAlreadyExists(err):
alreadyExists = true
// Fetch a copy of the ReplicaSet.
rs, rsErr := dc.rsLister.ReplicaSets(newRS.Namespace).Get(newRS.Name)
if rsErr != nil {
return nil, rsErr
}
// If the Deployment owns the ReplicaSet and the ReplicaSet's PodTemplateSpec is semantically
// deep equal to the PodTemplateSpec of the Deployment, it's the Deployment's new ReplicaSet.
// Otherwise, this is a hash collision and we need to increment the collisionCount field in
// the status of the Deployment and requeue to try the creation in the next sync.
controllerRef := metav1.GetControllerOf(rs)
if controllerRef != nil && controllerRef.UID == d.UID && deploymentutil.EqualIgnoreHash(&d.Spec.Template, &rs.Spec.Template) {
createdRS = rs
err = nil
break
}
// Matching ReplicaSet is not equal - increment the collisionCount in the DeploymentStatus
// and requeue the Deployment.
if d.Status.CollisionCount == nil {
d.Status.CollisionCount = new(int32)
}
preCollisionCount := *d.Status.CollisionCount
*d.Status.CollisionCount++
// Update the collisionCount for the Deployment and let it requeue by returning the original
// error.
_, dErr := dc.client.AppsV1().Deployments(d.Namespace).UpdateStatus(ctx, d, metav1.UpdateOptions{})
if dErr == nil {
klog.V(2).Infof("Found a hash collision for deployment %q - bumping collisionCount (%d->%d) to resolve it", d.Name, preCollisionCount, *d.Status.CollisionCount)
}
return nil, err
case errors.HasStatusCause(err, v1.NamespaceTerminatingCause):
// if the namespace is terminating, all subsequent creates will fail and we can safely do nothing
return nil, err
case err != nil:
msg := fmt.Sprintf("Failed to create new replica set %q: %v", newRS.Name, err)
if deploymentutil.HasProgressDeadline(d) {
cond := deploymentutil.NewDeploymentCondition(apps.DeploymentProgressing, v1.ConditionFalse, deploymentutil.FailedRSCreateReason, msg)
deploymentutil.SetDeploymentCondition(&d.Status, *cond)
// We don't really care about this error at this point, since we have a bigger issue to report.
// TODO: Identify which errors are permanent and switch DeploymentIsFailed to take into account
// these reasons as well. Related issue: https://github.com/kubernetes/kubernetes/issues/18568
_, _ = dc.client.AppsV1().Deployments(d.Namespace).UpdateStatus(ctx, d, metav1.UpdateOptions{})
}
dc.eventRecorder.Eventf(d, v1.EventTypeWarning, deploymentutil.FailedRSCreateReason, msg)
return nil, err
}
if !alreadyExists && newReplicasCount > 0 {
dc.eventRecorder.Eventf(d, v1.EventTypeNormal, "ScalingReplicaSet", "Scaled up replica set %s to %d", createdRS.Name, newReplicasCount)
}
needsUpdate := deploymentutil.SetDeploymentRevision(d, newRevision)
if !alreadyExists && deploymentutil.HasProgressDeadline(d) {
msg := fmt.Sprintf("Created new replica set %q", createdRS.Name)
condition := deploymentutil.NewDeploymentCondition(apps.DeploymentProgressing, v1.ConditionTrue, deploymentutil.NewReplicaSetReason, msg)
deploymentutil.SetDeploymentCondition(&d.Status, *condition)
needsUpdate = true
}
if needsUpdate {
_, err = dc.client.AppsV1().Deployments(d.Namespace).UpdateStatus(ctx, d, metav1.UpdateOptions{})
}
return createdRS, err
}
总结:做的几件事情
- 对rs创建、更新、管理
总结
通过以上源码的分析,可以清楚的看到每个CRD只专注于做自己的事情,同时进行对有关联CR的一个更新;逻辑清晰,通过对上面的汇总可以看到对应的每个小节的核心步骤如下:
彩蛋
当deployment有变更时,rollout Reconcile如何获取后进行工作
EventHandler(事件处理程序)
在Kubernetes(K8s)中,EventHandler(事件处理程序)是一个接口,用于处理和响应Kubernetes集群中发生的事件。EventHandler定义了一组方法,用于处理不同类型的事件,包括对象的创建、更新和删除等。
EventHandler接口通常由Kubernetes中的控制器(Controller)实现。控制器是一种用于管理和协调Kubernetes资源的组件,它监视集群中的对象状态,并根据需要采取相应的操作。
以下是EventHandler接口的几个常见方法:
Create(event.CreateEvent, workqueue.RateLimitingInterface)
:当一个新对象被添加到集群中时调用。参数obj
是被添加的对象。Update(event.UpdateEvent, workqueue.RateLimitingInterface)
:当一个已有对象的状态被更新时调用。参数oldObj
是更新前的对象,newObj
是更新后的对象。Delete(event.DeleteEvent, workqueue.RateLimitingInterface)
:当一个对象被从集群中删除时调用。参数obj
是被删除的对象。
控制器实现EventHandler接口时,可以根据需要对不同类型的事件进行处理。例如,当一个新的Pod对象被添加到集群中时,控制器可以在OnAdd
方法中执行一些操作,如启动相关的容器。
通过实现EventHandler接口,控制器可以对Kubernetes集群中发生的事件做出响应,并根据需要执行相应的操作,以确保集群中的对象状态与期望一致。
实现逻辑
github.com/openkruise/rollouts/main.go # main ->
github.com/openkruise/rollouts/pkg/controller/rollout/rollout_controller.go # SetupWithManager ->
github.com/openkruise/rollouts/pkg/controller/rollout/rollout_event_handler.go # enqueueRequestForWorkload
核心代码
github.com/openkruise/rollouts/pkg/controller/rollout/rollout_controller.go # SetupWithManager
// SetupWithManager sets up the controller with the Manager.
func (r *RolloutReconciler) SetupWithManager(mgr ctrl.Manager) error {
// Create a new controller
c, err := controller.New("rollout-controller", mgr, controller.Options{
Reconciler: r, MaxConcurrentReconciles: concurrentReconciles})
if err != nil {
return err
}
// Watch for changes to rollout
if err = c.Watch(&source.Kind{Type: &v1alpha1.Rollout{}}, &handler.EnqueueRequestForObject{}); err != nil {
return err
}
// Watch for changes to batchRelease
if err = c.Watch(&source.Kind{Type: &v1alpha1.BatchRelease{}}, &enqueueRequestForBatchRelease{reader: mgr.GetCache()}); err != nil {
return err
}
runtimeController = c
// workload时间的监听
workloadHandler = &enqueueRequestForWorkload{reader: mgr.GetCache(), scheme: r.Scheme}
if err = util.AddWorkloadWatcher(c, workloadHandler); err != nil {
return err
}
r.finder = util.NewControllerFinder(mgr.GetClient())
r.trafficRoutingManager = trafficrouting.NewTrafficRoutingManager(mgr.GetClient())
r.canaryManager = &canaryReleaseManager{
Client: mgr.GetClient(),
trafficRoutingManager: r.trafficRoutingManager,
recorder: r.Recorder,
}
return nil
}
github.com/openkruise/rollouts/pkg/controller/rollout/rollout_event_handler.go # handleEvent
// 针对workload时间进行监听处理
var _ handler.EventHandler = &enqueueRequestForWorkload{}
func (w *enqueueRequestForWorkload) Create(evt event.CreateEvent, q workqueue.RateLimitingInterface) {
w.handleEvent(q, evt.Object)
}
func (w *enqueueRequestForWorkload) Delete(evt event.DeleteEvent, q workqueue.RateLimitingInterface) {
w.handleEvent(q, evt.Object)
}
func (w *enqueueRequestForWorkload) Update(evt event.UpdateEvent, q workqueue.RateLimitingInterface) {
w.handleEvent(q, evt.ObjectNew)
}
func (w *enqueueRequestForWorkload) handleEvent(q workqueue.RateLimitingInterface, obj client.Object) {
key := types.NamespacedName{
Namespace: obj.GetNamespace(),
Name: obj.GetName(),
}
kinds, _, err := w.scheme.ObjectKinds(obj)
if err != nil {
klog.Errorf("scheme ObjectKinds key(%s) failed: %s", key.String(), err.Error())
return
}
gvk := kinds[0]
rollout, err := w.getRolloutForWorkload(key, gvk)
if err != nil {
klog.Errorf("unable to get Rollout related with %s (%s/%s), err: %v", gvk.Kind, key.Namespace, key.Name, err)
return
}
if rollout != nil {
klog.Infof("workload(%s/%s) and reconcile Rollout (%s/%s)", key.Namespace, key.Name, rollout.Namespace, rollout.Name)
nsn := types.NamespacedName{Namespace: rollout.GetNamespace(), Name: rollout.GetName()}
// 把rollout
q.Add(reconcile.Request{NamespacedName: nsn})
}
}
ctrl.Result{RequeueAfter: time.Until(recheckTime)} 与 ctrl.Result{}的区别
ctrl.Result{RequeueAfter: time.Until(*recheckTime)}
表示控制器希望在一定时间后重新调度进行下一次的Reconcile
操作。这里使用了RequeueAfter
字段来指定重新调度的时间间隔,它的值是一个time.Duration
类型的值,表示从当前时间开始,需要等待的时间。具体的等待时间由recheckTime
变量决定。
使用RequeueAfter
字段可以实现控制器在一段时间后再次处理自定义资源,这在某些情况下非常有用。例如,当自定义资源的状态发生变化,但还需要等待一段时间才能执行下一步操作时,可以使用RequeueAfter
来延迟下一次的调度。
需要注意的是,Reconcile
函数的返回值除了ctrl.Result
对象外,还可以返回一个错误对象。如果返回一个非空的错误对象,控制器将会记录错误并终止当前的Reconcile
操作。在这种情况下,RequeueAfter
字段将被忽略。
总结来说,ctrl.Result{RequeueAfter: time.Until(*recheckTime)}
表示控制器希望在一定时间后重新调度进行下一次的Reconcile
操作,以继续处理自定义资源。