Kubernetes HPA 源码阅读

1,535 阅读4分钟

HPA(Horizontal Pod Autoscaling) 依据资源变化进行 POD 数弹性伸缩。

主要方法

1、computeReplicasForMetrics

依据资源计算当前预期 POD 数的方法, 针对不同资源类型调用不同的方法

  • computeStatusForObjectMetric
  • computeStatusForPodsMetric
  • computeStatusForResourceMetric
  • computeStatusForExternalMetric

核心计算公式(稳定状态下):

当前资源使用率与用户限定扩容使用率的百分比: usageRatio = float64(currentUtilization) / float64(targetUtilization)

预期 POD 数:replicaCountProposal = math.Ceil(usageRatio * float64(readyPodCount))

注意点:

  • horizontal-pod-autoscaler-tolerance 参数(默认值 0.1)限制了伸缩容忍度,在一定变化率内不进行扩缩容操作
  • horizontal-pod-autoscaler-cpu-initialization-period 参数(默认值: 5min)用于标记刚启动一定时间内的 Pod 为 ignoredPod;实时获取不到信息的 Pod 被标记为 missingPod。针对扩容和缩容的场景,两种 Pod 资源使用率置为不同的值加入计算,但新计算的结果不能改变扩缩容的趋势
// computeReplicasForMetrics computes the desired number of replicas for the metric specifications listed in the HPA,
// returning the maximum  of the computed replica counts, a description of the associated metric, and the statuses of
// all metrics computed.
func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.HorizontalPodAutoscaler, scale *autoscalingv1.Scale,
	metricSpecs []autoscalingv2.MetricSpec) (replicas int32, metric string, statuses []autoscalingv2.MetricStatus, timestamp time.Time, err error) {
	// ......
	switch metricSpec.Type {
		case autoscalingv2.ObjectMetricSourceType:
			metricSelector, err := metav1.LabelSelectorAsSelector(metricSpec.Object.Metric.Selector)
			if err != nil {
				a.eventRecorder.Event(hpa, v1.EventTypeWarning, "FailedGetObjectMetric", err.Error())
				setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "FailedGetObjectMetric", "the HPA was unable to compute the replica count: %v", err)
				return 0, "", nil, time.Time{}, fmt.Errorf("failed to get object metric value: %v", err)
			}
			replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForObjectMetric(currentReplicas, metricSpec, hpa, selector, &statuses[i], metricSelector)
			if err != nil {
				return 0, "", nil, time.Time{}, fmt.Errorf("failed to get object metric value: %v", err)
			}
		case autoscalingv2.PodsMetricSourceType:
			metricSelector, err := metav1.LabelSelectorAsSelector(metricSpec.Pods.Metric.Selector)
			if err != nil {
				a.eventRecorder.Event(hpa, v1.EventTypeWarning, "FailedGetPodsMetric", err.Error())
				setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "FailedGetPodsMetric", "the HPA was unable to compute the replica count: %v", err)
				return 0, "", nil, time.Time{}, fmt.Errorf("failed to get pods metric value: %v", err)
			}
			replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForPodsMetric(currentReplicas, metricSpec, hpa, selector, &statuses[i], metricSelector)
			if err != nil {
				return 0, "", nil, time.Time{}, fmt.Errorf("failed to get object metric value: %v", err)
			}
		case autoscalingv2.ResourceMetricSourceType:
			replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForResourceMetric(currentReplicas, metricSpec, hpa, selector, &statuses[i])
			if err != nil {
				return 0, "", nil, time.Time{}, err
			}
		case autoscalingv2.ExternalMetricSourceType:
			replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForExternalMetric(currentReplicas, metricSpec, hpa, selector, &statuses[i])
			if err != nil {
				return 0, "", nil, time.Time{}, err
			}
		default:
			errMsg := fmt.Sprintf("unknown metric source type %q", string(metricSpec.Type))
			a.eventRecorder.Event(hpa, v1.EventTypeWarning, "InvalidMetricSourceType", errMsg)
			setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "InvalidMetricSourceType", "the HPA was unable to compute the replica count: %s", errMsg)
			return 0, "", nil, time.Time{}, fmt.Errorf(errMsg)
	}
	
	setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionTrue, "ValidMetricFound", "the HPA was able to successfully calculate a replica count from %s", metric)
	return replicas, metric, statuses, timestamp, nil
}

2、stabilizeRecommendation

依据资源计算出当前预期 POD 数后,都会调用 stabilizeRecommendation 方法对结果进行“标准化”,使目标 POD 数更加合理

“标准化”后 POD 数为稳定窗口内依据资源计算的最大值,可通过 horizontal-pod-autoscaler-downscale-stabilization 参数调整缩容稳定窗口(默认5min)

// file: pkg/controller/podautoscaler/horizontal.go
// stabilizeRecommendation:
// - replaces old recommendation with the newest recommendation,
// - returns max of recommendations that are not older than downscaleStabilisationWindow.
func (a *HorizontalController) stabilizeRecommendation(key string, prenormalizedDesiredReplicas int32) int32 {
	maxRecommendation := prenormalizedDesiredReplicas
	foundOldSample := false
	oldSampleIndex := 0
	// downscaleStabilisationWindow: 缩容稳定窗口
	cutoff := time.Now().Add(-a.downscaleStabilisationWindow)
	for i, rec := range a.recommendations[key] {
		if rec.timestamp.Before(cutoff) {
			// 通过循环取一条稳定窗口前的记录,并记录 index
			foundOldSample = true
			oldSampleIndex = i
		} else if rec.recommendation > maxRecommendation {
			// 获取稳定窗口内建议调整的最大 POD 数量
			maxRecommendation = rec.recommendation
		}
	}
	if foundOldSample {
		// 如果找到旧的稳定窗口前的记录,直接用来更新,避免 a.recommendations[key] 数组不断增长
		a.recommendations[key][oldSampleIndex] = timestampedRecommendation{prenormalizedDesiredReplicas, time.Now()}
	} else {
		a.recommendations[key] = append(a.recommendations[key], timestampedRecommendation{prenormalizedDesiredReplicas, time.Now()})
	}
	return maxRecommendation
}

3、convertDesiredReplicasWithRules

目标 POD 数要满足一定规则,通过 convertDesiredReplicasWithRules 方法进行约束

依据规则转换后 POD 数:

  • 在 hpaMinReplicas~hpaMaxReplicas 之间
  • 大于等于1
  • 小于等于 Min(hpaMaxReplicas, Max(current*2, 4))
// convertDesiredReplicas performs the actual normalization, without depending on `HorizontalController` or `HorizontalPodAutoscaler`
func convertDesiredReplicasWithRules(currentReplicas, desiredReplicas, hpaMinReplicas, hpaMaxReplicas int32) (int32, string, string) {
	var minimumAllowedReplicas int32
	var maximumAllowedReplicas int32
	
	var possibleLimitingCondition string
	var possibleLimitingReason string

	// 依据扩容范围下限 hpaMinReplicas 更新最小允许 Pods 数 minimumAllowedReplicas
	// minimumAllowedReplicas 必须大于等于 1
	if hpaMinReplicas == 0 {
		minimumAllowedReplicas = 1
		possibleLimitingReason = "the desired replica count is zero"
	} else {
		minimumAllowedReplicas = hpaMinReplicas
		possibleLimitingReason = "the desired replica count is less than the minimum replica count"
	}

    
	// Do not upscale too much to prevent incorrect rapid increase of the number of master replicas caused by
	// bogus CPU usage report from heapster/kubelet (like in issue #32304).
	// 通过 calculateScaleUpLimit 方法计算本次扩容上限,避免 Pod 数急剧变化
	// 公式:int32(math.Max(scaleUpLimitFactor*float64(currentReplicas), scaleUpLimitMinimum))
	// 其中 scaleUpLimitFactor = 2.0, scaleUpLimitMinimum = 4.0
	scaleUpLimit := calculateScaleUpLimit(currentReplicas)
	
	// 依据业务配置扩容上限 hpaMaxReplicas 及程序约束上限 scaleUpLimit 更新最大允许 Pods 数 maximumAllowedReplicas,取较小值
	if hpaMaxReplicas > scaleUpLimit {
		maximumAllowedReplicas = scaleUpLimit

		possibleLimitingCondition = "ScaleUpLimit"
		possibleLimitingReason = "the desired replica count is increasing faster than the maximum scale rate"
	} else {
		maximumAllowedReplicas = hpaMaxReplicas

		possibleLimitingCondition = "TooManyReplicas"
		possibleLimitingReason = "the desired replica count is more than the maximum replica count"
	}
	
	// 返回指定范围 minimumAllowedReplicas~maximumAllowedReplicas 之间的合理 Pod 数
	if desiredReplicas < minimumAllowedReplicas {
		possibleLimitingCondition = "TooFewReplicas"

		return minimumAllowedReplicas, possibleLimitingCondition, possibleLimitingReason
	} else if desiredReplicas > maximumAllowedReplicas {
		return maximumAllowedReplicas, possibleLimitingCondition, possibleLimitingReason
	}

	return desiredReplicas, "DesiredWithinRange", "the desired count is within the acceptable range"
}