Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions vertical-pod-autoscaler/docs/features.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
- [In-Place Updates (<code>InPlaceOrRecreate</code>)](#in-place-updates-inplaceorrecreate)
- [Usage](#usage)
- [Behavior](#behavior)
- [Skipping Disruption Budget for Non-Disruptive Updates](#skipping-disruption-budget-for-non-disruptive-updates)
- [When Disruption Budgets Are Still Respected](#when-disruption-budgets-are-still-respected)
- [Requirements:](#requirements)
- [Configuration](#configuration)
- [Limitations](#limitations)
Expand Down Expand Up @@ -89,7 +91,7 @@ To enable this feature, set the `--round-memory-bytes` flag when running the VPA

## In-Place Updates (`InPlaceOrRecreate`)

> [!WARNING]
> [!WARNING]
> FEATURE STATE: VPA v1.4.0 [alpha]
> FEATURE STATE: VPA v1.5.0 [beta]

Expand Down Expand Up @@ -123,6 +125,19 @@ Important Notes

* Memory Limit Downscaling: In the beta version, memory limit downscaling is not supported for pods with resizePolicy: PreferNoRestart. In such cases, VPA will fall back to pod recreation.

### Skipping Disruption Budget for Non-Disruptive Updates

By default, VPA respects disruption budgets (eviction tolerance, min replicas) even for in-place updates. However, when an in-place update doesn't require container restarts, it's truly non-disruptive and these checks may be unnecessarily restrictive.

The `--in-place-skip-disruption-budget` flag (default: `false`) allows VPA to skip disruption budget checks for in-place updates when all containers in the pod have `NotRequired` resize policy for both CPU and memory or no resize policy is defined.

#### When Disruption Budgets Are Still Respected

Even with this flag enabled, disruption budgets are enforced when:
* Any container has `RestartContainer` resize policy for any resource
* The update would result in pod eviction/recreation (fallback scenarios)


### Requirements:

* Kubernetes 1.33+ with `InPlacePodVerticalScaling` feature gate enabled
Expand All @@ -134,7 +149,7 @@ Enable the feature by setting the following flags in VPA components ( for both u

```bash
--feature-gates=InPlaceOrRecreate=true
```
```

### Limitations

Expand Down
2 changes: 1 addition & 1 deletion vertical-pod-autoscaler/docs/flags.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ This document is auto-generated from the flag definitions in the VPA updater cod
| `eviction-tolerance` | float | 0.5 | Fraction of replica count that can be evicted for update, if more than one pod can be evicted. |
| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true)<br>PerVPAConfig=true\|false (ALPHA - default=false) |
| `ignored-vpa-object-namespaces` | string | | A comma-separated list of namespaces to ignore when searching for VPA objects. Leave empty to avoid ignoring any namespaces. These namespaces will not be cleaned by the garbage collector. |
| `in-place-skip-disruption-budget` | | | [ALPHA] If true, VPA updater skips disruption budget checks for in-place pod updates when all containers have NotRequired resize policy (or no policy defined) for both CPU and memory resources. Disruption budgets are still respected when any container has RestartContainer resize policy for any resource. |
| `in-recommendation-bounds-eviction-lifetime-threshold` | | 12h0m0s | duration Pods that live for at least that long can be evicted even if their request is within the [MinRecommended...MaxRecommended] range |
| `kube-api-burst` | float | 100 | QPS burst limit when making requests to Kubernetes apiserver |
| `kube-api-qps` | float | 50 | QPS limit when making requests to Kubernetes apiserver |
Expand Down Expand Up @@ -180,4 +181,3 @@ This document is auto-generated from the flag definitions in the VPA updater cod
| `v,` | | : 4 | , --v Level set the log level verbosity (default 4) |
| `vmodule` | moduleSpec | | comma-separated list of pattern=N settings for file-filtered logging |
| `vpa-object-namespace` | string | | Specifies the namespace to search for VPA objects. Leave empty to include all namespaces. If provided, the garbage collector will only clean this namespace. |

2 changes: 2 additions & 0 deletions vertical-pod-autoscaler/pkg/updater/logic/updater.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ func NewUpdater(
evictionRateBurst int,
evictionToleranceFraction float64,
useAdmissionControllerStatus bool,
inPlaceSkipDisruptionBudget bool,
statusNamespace string,
recommendationProcessor vpa_api_util.RecommendationProcessor,
evictionAdmission priority.PodEvictionAdmission,
Expand All @@ -111,6 +112,7 @@ func NewUpdater(
minReplicasForEviction,
evictionToleranceFraction,
patchCalculators,
inPlaceSkipDisruptionBudget,
)
if err != nil {
return nil, fmt.Errorf("failed to create restriction factory: %v", err)
Expand Down
8 changes: 8 additions & 0 deletions vertical-pod-autoscaler/pkg/updater/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ var (
useAdmissionControllerStatus = flag.Bool("use-admission-controller-status", true,
"If true, updater will only evict pods when admission controller status is valid.")

inPlaceSkipDisruptionBudget = flag.Bool(
"in-place-skip-disruption-budget",
false,
"[ALPHA] If true, VPA updater skips disruption budget checks for in-place pod updates when all containers have NotRequired resize policy (or no policy defined) for both CPU and memory resources. "+
"Disruption budgets are still respected when any container has RestartContainer resize policy for any resource.",
)

Comment thread
omerap12 marked this conversation as resolved.
namespace = os.Getenv("NAMESPACE")
)

Expand Down Expand Up @@ -217,6 +224,7 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) {
*evictionRateBurst,
*evictionToleranceFraction,
*useAdmissionControllerStatus,
*inPlaceSkipDisruptionBudget,
admissionControllerStatusNamespace,
vpa_api_util.NewCappingRecommendationProcessor(limitRangeCalculator),
priority.NewScalingDirectionPodEvictionAdmission(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func TestEvictTooFewReplicas(t *testing.T) {
}

basicVpa := getBasicVpa()
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 10, 0.5, nil, nil, nil)
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 10, 0.5, nil, nil, nil, false)
assert.NoError(t, err)
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, basicVpa)
assert.NoError(t, err)
Expand Down Expand Up @@ -94,7 +94,7 @@ func TestEvictionTolerance(t *testing.T) {
}

basicVpa := getBasicVpa()
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2 /*minReplicas*/, tolerance, nil, nil, nil)
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2 /*minReplicas*/, tolerance, nil, nil, nil, false)
assert.NoError(t, err)
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, basicVpa)
assert.NoError(t, err)
Expand Down Expand Up @@ -138,7 +138,7 @@ func TestEvictAtLeastOne(t *testing.T) {
}

basicVpa := getBasicVpa()
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2, tolerance, nil, nil, nil)
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2, tolerance, nil, nil, nil, false)
assert.NoError(t, err)
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, basicVpa)
assert.NoError(t, err)
Expand Down Expand Up @@ -230,7 +230,7 @@ func TestEvictEmitEvent(t *testing.T) {
pods = append(pods, p.pod)
}
clock := baseclocktest.NewFakeClock(time.Time{})
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2, testCase.evictionTolerance, clock, map[string]time.Time{}, nil)
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2, testCase.evictionTolerance, clock, map[string]time.Time{}, nil, false)
assert.NoError(t, err)
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, testCase.vpa)
assert.NoError(t, err)
Expand All @@ -257,3 +257,45 @@ func TestEvictEmitEvent(t *testing.T) {
}
}
}

// This test ensures that in-place-skip-disruption-budget only affects in-place
// updates and does not bypass eviction tolerance when performing pod evictions.
func TestEvictTooFewReplicasWithInPlaceSkipDisruptionBudget(t *testing.T) {
replicas := int32(5)
livePods := 5

rc := apiv1.ReplicationController{
ObjectMeta: metav1.ObjectMeta{
Name: "rc",
Namespace: "default",
},
TypeMeta: metav1.TypeMeta{
Kind: "ReplicationController",
},
Spec: apiv1.ReplicationControllerSpec{
Replicas: &replicas,
},
}

pods := make([]*apiv1.Pod, livePods)
for i := range pods {
pods[i] = test.Pod().WithName(getTestPodName(i)).WithCreator(&rc.ObjectMeta, &rc.TypeMeta).Get()
}

basicVpa := getBasicVpa()
// factory with inPlaceSkipDisruptionBudget on
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 10, 0.5, nil, nil, nil, true)
assert.NoError(t, err)
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, basicVpa)
assert.NoError(t, err)
eviction := factory.NewPodsEvictionRestriction(creatorToSingleGroupStatsMap, podToReplicaCreatorMap)

for _, pod := range pods {
assert.False(t, eviction.CanEvict(pod))
}

for _, pod := range pods {
err := eviction.Evict(pod, basicVpa, test.FakeEventRecorder())
assert.Error(t, err, "Error expected")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ type PodsInPlaceRestrictionImpl struct {
patchCalculators []patch.Calculator
clock clock.Clock
lastInPlaceAttemptTimeMap map[string]time.Time
inPlaceSkipDisruptionBudget bool
}

// CanInPlaceUpdate checks if pod can be safely updated
Expand All @@ -89,6 +90,13 @@ func (ip *PodsInPlaceRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) utils.InP
}
return utils.InPlaceDeferred
}
if ip.inPlaceSkipDisruptionBudget {
if utils.IsNonDisruptiveResize(pod) {
klog.V(4).InfoS("in-place-skip-disruption-budget enabled, skipping disruption budget check for in-place update")
return utils.InPlaceApproved
}
klog.V(4).InfoS("in-place-skip-disruption-budget enabled, but pod has RestartContainer resize policy", "pod", klog.KObj(pod))
}
if singleGroupStats.isPodDisruptable() {
return utils.InPlaceApproved
}
Expand Down
Loading