HPA(Horizontal Pod Autoscaling) 依据资源变化进行 POD 数弹性伸缩。
主要方法
1、computeReplicasForMetrics
依据资源计算当前预期 POD 数的方法, 针对不同资源类型调用不同的方法
- computeStatusForObjectMetric
- computeStatusForPodsMetric
- computeStatusForResourceMetric
- computeStatusForExternalMetric
核心计算公式(稳定状态下):
当前资源使用率与用户限定扩容使用率的百分比: usageRatio = float64(currentUtilization) / float64(targetUtilization)
预期 POD 数:replicaCountProposal = math.Ceil(usageRatio * float64(readyPodCount))
注意点:
- horizontal-pod-autoscaler-tolerance 参数(默认值 0.1)限制了伸缩容忍度,在一定变化率内不进行扩缩容操作
- horizontal-pod-autoscaler-cpu-initialization-period 参数(默认值: 5min)用于标记刚启动一定时间内的 Pod 为 ignoredPod;实时获取不到信息的 Pod 被标记为 missingPod。针对扩容和缩容的场景,两种 Pod 资源使用率置为不同的值加入计算,但新计算的结果不能改变扩缩容的趋势
// computeReplicasForMetrics computes the desired number of replicas for the metric specifications listed in the HPA,
// returning the maximum of the computed replica counts, a description of the associated metric, and the statuses of
// all metrics computed.
func (a *HorizontalController) computeReplicasForMetrics(hpa *autoscalingv2.HorizontalPodAutoscaler, scale *autoscalingv1.Scale,
metricSpecs []autoscalingv2.MetricSpec) (replicas int32, metric string, statuses []autoscalingv2.MetricStatus, timestamp time.Time, err error) {
// ......
switch metricSpec.Type {
case autoscalingv2.ObjectMetricSourceType:
metricSelector, err := metav1.LabelSelectorAsSelector(metricSpec.Object.Metric.Selector)
if err != nil {
a.eventRecorder.Event(hpa, v1.EventTypeWarning, "FailedGetObjectMetric", err.Error())
setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "FailedGetObjectMetric", "the HPA was unable to compute the replica count: %v", err)
return 0, "", nil, time.Time{}, fmt.Errorf("failed to get object metric value: %v", err)
}
replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForObjectMetric(currentReplicas, metricSpec, hpa, selector, &statuses[i], metricSelector)
if err != nil {
return 0, "", nil, time.Time{}, fmt.Errorf("failed to get object metric value: %v", err)
}
case autoscalingv2.PodsMetricSourceType:
metricSelector, err := metav1.LabelSelectorAsSelector(metricSpec.Pods.Metric.Selector)
if err != nil {
a.eventRecorder.Event(hpa, v1.EventTypeWarning, "FailedGetPodsMetric", err.Error())
setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "FailedGetPodsMetric", "the HPA was unable to compute the replica count: %v", err)
return 0, "", nil, time.Time{}, fmt.Errorf("failed to get pods metric value: %v", err)
}
replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForPodsMetric(currentReplicas, metricSpec, hpa, selector, &statuses[i], metricSelector)
if err != nil {
return 0, "", nil, time.Time{}, fmt.Errorf("failed to get object metric value: %v", err)
}
case autoscalingv2.ResourceMetricSourceType:
replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForResourceMetric(currentReplicas, metricSpec, hpa, selector, &statuses[i])
if err != nil {
return 0, "", nil, time.Time{}, err
}
case autoscalingv2.ExternalMetricSourceType:
replicaCountProposal, timestampProposal, metricNameProposal, err = a.computeStatusForExternalMetric(currentReplicas, metricSpec, hpa, selector, &statuses[i])
if err != nil {
return 0, "", nil, time.Time{}, err
}
default:
errMsg := fmt.Sprintf("unknown metric source type %q", string(metricSpec.Type))
a.eventRecorder.Event(hpa, v1.EventTypeWarning, "InvalidMetricSourceType", errMsg)
setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionFalse, "InvalidMetricSourceType", "the HPA was unable to compute the replica count: %s", errMsg)
return 0, "", nil, time.Time{}, fmt.Errorf(errMsg)
}
setCondition(hpa, autoscalingv2.ScalingActive, v1.ConditionTrue, "ValidMetricFound", "the HPA was able to successfully calculate a replica count from %s", metric)
return replicas, metric, statuses, timestamp, nil
}
2、stabilizeRecommendation
依据资源计算出当前预期 POD 数后,都会调用 stabilizeRecommendation 方法对结果进行“标准化”,使目标 POD 数更加合理
“标准化”后 POD 数为稳定窗口内依据资源计算的最大值,可通过 horizontal-pod-autoscaler-downscale-stabilization 参数调整缩容稳定窗口(默认5min)
// file: pkg/controller/podautoscaler/horizontal.go
// stabilizeRecommendation:
// - replaces old recommendation with the newest recommendation,
// - returns max of recommendations that are not older than downscaleStabilisationWindow.
func (a *HorizontalController) stabilizeRecommendation(key string, prenormalizedDesiredReplicas int32) int32 {
maxRecommendation := prenormalizedDesiredReplicas
foundOldSample := false
oldSampleIndex := 0
// downscaleStabilisationWindow: 缩容稳定窗口
cutoff := time.Now().Add(-a.downscaleStabilisationWindow)
for i, rec := range a.recommendations[key] {
if rec.timestamp.Before(cutoff) {
// 通过循环取一条稳定窗口前的记录,并记录 index
foundOldSample = true
oldSampleIndex = i
} else if rec.recommendation > maxRecommendation {
// 获取稳定窗口内建议调整的最大 POD 数量
maxRecommendation = rec.recommendation
}
}
if foundOldSample {
// 如果找到旧的稳定窗口前的记录,直接用来更新,避免 a.recommendations[key] 数组不断增长
a.recommendations[key][oldSampleIndex] = timestampedRecommendation{prenormalizedDesiredReplicas, time.Now()}
} else {
a.recommendations[key] = append(a.recommendations[key], timestampedRecommendation{prenormalizedDesiredReplicas, time.Now()})
}
return maxRecommendation
}
3、convertDesiredReplicasWithRules
目标 POD 数要满足一定规则,通过 convertDesiredReplicasWithRules 方法进行约束
依据规则转换后 POD 数:
- 在 hpaMinReplicas~hpaMaxReplicas 之间
- 大于等于1
- 小于等于 Min(hpaMaxReplicas, Max(current*2, 4))
// convertDesiredReplicas performs the actual normalization, without depending on `HorizontalController` or `HorizontalPodAutoscaler`
func convertDesiredReplicasWithRules(currentReplicas, desiredReplicas, hpaMinReplicas, hpaMaxReplicas int32) (int32, string, string) {
var minimumAllowedReplicas int32
var maximumAllowedReplicas int32
var possibleLimitingCondition string
var possibleLimitingReason string
// 依据扩容范围下限 hpaMinReplicas 更新最小允许 Pods 数 minimumAllowedReplicas
// minimumAllowedReplicas 必须大于等于 1
if hpaMinReplicas == 0 {
minimumAllowedReplicas = 1
possibleLimitingReason = "the desired replica count is zero"
} else {
minimumAllowedReplicas = hpaMinReplicas
possibleLimitingReason = "the desired replica count is less than the minimum replica count"
}
// Do not upscale too much to prevent incorrect rapid increase of the number of master replicas caused by
// bogus CPU usage report from heapster/kubelet (like in issue #32304).
// 通过 calculateScaleUpLimit 方法计算本次扩容上限,避免 Pod 数急剧变化
// 公式:int32(math.Max(scaleUpLimitFactor*float64(currentReplicas), scaleUpLimitMinimum))
// 其中 scaleUpLimitFactor = 2.0, scaleUpLimitMinimum = 4.0
scaleUpLimit := calculateScaleUpLimit(currentReplicas)
// 依据业务配置扩容上限 hpaMaxReplicas 及程序约束上限 scaleUpLimit 更新最大允许 Pods 数 maximumAllowedReplicas,取较小值
if hpaMaxReplicas > scaleUpLimit {
maximumAllowedReplicas = scaleUpLimit
possibleLimitingCondition = "ScaleUpLimit"
possibleLimitingReason = "the desired replica count is increasing faster than the maximum scale rate"
} else {
maximumAllowedReplicas = hpaMaxReplicas
possibleLimitingCondition = "TooManyReplicas"
possibleLimitingReason = "the desired replica count is more than the maximum replica count"
}
// 返回指定范围 minimumAllowedReplicas~maximumAllowedReplicas 之间的合理 Pod 数
if desiredReplicas < minimumAllowedReplicas {
possibleLimitingCondition = "TooFewReplicas"
return minimumAllowedReplicas, possibleLimitingCondition, possibleLimitingReason
} else if desiredReplicas > maximumAllowedReplicas {
return maximumAllowedReplicas, possibleLimitingCondition, possibleLimitingReason
}
return desiredReplicas, "DesiredWithinRange", "the desired count is within the acceptable range"
}