Skip to content

Commit 67b0869

Browse files
authored
Merge pull request #8987 from omerap12/issue-8980
Add --in-place-skip-disruption-budget flag
2 parents a383bdd + cb86e2b commit 67b0869

13 files changed

Lines changed: 521 additions & 68 deletions

vertical-pod-autoscaler/docs/features.md

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
- [In-Place Updates (<code>InPlaceOrRecreate</code>)](#in-place-updates-inplaceorrecreate)
1111
- [Usage](#usage)
1212
- [Behavior](#behavior)
13+
- [Skipping Disruption Budget for Non-Disruptive Updates](#skipping-disruption-budget-for-non-disruptive-updates)
14+
- [When Disruption Budgets Are Still Respected](#when-disruption-budgets-are-still-respected)
1315
- [Requirements:](#requirements)
1416
- [Configuration](#configuration)
1517
- [Limitations](#limitations)
@@ -89,7 +91,7 @@ To enable this feature, set the `--round-memory-bytes` flag when running the VPA
8991

9092
## In-Place Updates (`InPlaceOrRecreate`)
9193

92-
> [!WARNING]
94+
> [!WARNING]
9395
> FEATURE STATE: VPA v1.4.0 [alpha]
9496
> FEATURE STATE: VPA v1.5.0 [beta]
9597
@@ -123,6 +125,19 @@ Important Notes
123125

124126
* Memory Limit Downscaling: In the beta version, memory limit downscaling is not supported for pods with resizePolicy: PreferNoRestart. In such cases, VPA will fall back to pod recreation.
125127

128+
### Skipping Disruption Budget for Non-Disruptive Updates
129+
130+
By default, VPA respects disruption budgets (eviction tolerance, min replicas) even for in-place updates. However, when an in-place update doesn't require container restarts, it's truly non-disruptive and these checks may be unnecessarily restrictive.
131+
132+
The `--in-place-skip-disruption-budget` flag (default: `false`) allows VPA to skip disruption budget checks for in-place updates when all containers in the pod have `NotRequired` resize policy for both CPU and memory or no resize policy is defined.
133+
134+
#### When Disruption Budgets Are Still Respected
135+
136+
Even with this flag enabled, disruption budgets are enforced when:
137+
* Any container has `RestartContainer` resize policy for any resource
138+
* The update would result in pod eviction/recreation (fallback scenarios)
139+
140+
126141
### Requirements:
127142

128143
* Kubernetes 1.33+ with `InPlacePodVerticalScaling` feature gate enabled
@@ -134,7 +149,7 @@ Enable the feature by setting the following flags in VPA components ( for both u
134149

135150
```bash
136151
--feature-gates=InPlaceOrRecreate=true
137-
```
152+
```
138153

139154
### Limitations
140155

vertical-pod-autoscaler/docs/flags.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ This document is auto-generated from the flag definitions in the VPA updater cod
152152
| `eviction-tolerance` | float | 0.5 | Fraction of replica count that can be evicted for update, if more than one pod can be evicted. |
153153
| `feature-gates` | mapStringBool | | A set of key=value pairs that describe feature gates for alpha/experimental features. Options are:<br>AllAlpha=true\|false (ALPHA - default=false)<br>AllBeta=true\|false (BETA - default=false)<br>InPlaceOrRecreate=true\|false (BETA - default=true)<br>PerVPAConfig=true\|false (ALPHA - default=false) |
154154
| `ignored-vpa-object-namespaces` | string | | A comma-separated list of namespaces to ignore when searching for VPA objects. Leave empty to avoid ignoring any namespaces. These namespaces will not be cleaned by the garbage collector. |
155+
| `in-place-skip-disruption-budget` | | | [ALPHA] If true, VPA updater skips disruption budget checks for in-place pod updates when all containers have NotRequired resize policy (or no policy defined) for both CPU and memory resources. Disruption budgets are still respected when any container has RestartContainer resize policy for any resource. |
155156
| `in-recommendation-bounds-eviction-lifetime-threshold` | | 12h0m0s | duration Pods that live for at least that long can be evicted even if their request is within the [MinRecommended...MaxRecommended] range |
156157
| `kube-api-burst` | float | 100 | QPS burst limit when making requests to Kubernetes apiserver |
157158
| `kube-api-qps` | float | 50 | QPS limit when making requests to Kubernetes apiserver |

vertical-pod-autoscaler/pkg/updater/logic/updater.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ func NewUpdater(
9393
evictionRateBurst int,
9494
evictionToleranceFraction float64,
9595
useAdmissionControllerStatus bool,
96+
inPlaceSkipDisruptionBudget bool,
9697
statusNamespace string,
9798
recommendationProcessor vpa_api_util.RecommendationProcessor,
9899
evictionAdmission priority.PodEvictionAdmission,
@@ -111,6 +112,7 @@ func NewUpdater(
111112
minReplicasForEviction,
112113
evictionToleranceFraction,
113114
patchCalculators,
115+
inPlaceSkipDisruptionBudget,
114116
)
115117
if err != nil {
116118
return nil, fmt.Errorf("failed to create restriction factory: %v", err)

vertical-pod-autoscaler/pkg/updater/main.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,13 @@ var (
7575
useAdmissionControllerStatus = flag.Bool("use-admission-controller-status", true,
7676
"If true, updater will only evict pods when admission controller status is valid.")
7777

78+
inPlaceSkipDisruptionBudget = flag.Bool(
79+
"in-place-skip-disruption-budget",
80+
false,
81+
"[ALPHA] If true, VPA updater skips disruption budget checks for in-place pod updates when all containers have NotRequired resize policy (or no policy defined) for both CPU and memory resources. "+
82+
"Disruption budgets are still respected when any container has RestartContainer resize policy for any resource.",
83+
)
84+
7885
namespace = os.Getenv("NAMESPACE")
7986
)
8087

@@ -217,6 +224,7 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) {
217224
*evictionRateBurst,
218225
*evictionToleranceFraction,
219226
*useAdmissionControllerStatus,
227+
*inPlaceSkipDisruptionBudget,
220228
admissionControllerStatusNamespace,
221229
vpa_api_util.NewCappingRecommendationProcessor(limitRangeCalculator),
222230
priority.NewScalingDirectionPodEvictionAdmission(),

vertical-pod-autoscaler/pkg/updater/restriction/pods_eviction_restriction_test.go

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ func TestEvictTooFewReplicas(t *testing.T) {
5454
}
5555

5656
basicVpa := getBasicVpa()
57-
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 10, 0.5, nil, nil, nil)
57+
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 10, 0.5, nil, nil, nil, false)
5858
assert.NoError(t, err)
5959
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, basicVpa)
6060
assert.NoError(t, err)
@@ -94,7 +94,7 @@ func TestEvictionTolerance(t *testing.T) {
9494
}
9595

9696
basicVpa := getBasicVpa()
97-
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2 /*minReplicas*/, tolerance, nil, nil, nil)
97+
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2 /*minReplicas*/, tolerance, nil, nil, nil, false)
9898
assert.NoError(t, err)
9999
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, basicVpa)
100100
assert.NoError(t, err)
@@ -138,7 +138,7 @@ func TestEvictAtLeastOne(t *testing.T) {
138138
}
139139

140140
basicVpa := getBasicVpa()
141-
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2, tolerance, nil, nil, nil)
141+
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2, tolerance, nil, nil, nil, false)
142142
assert.NoError(t, err)
143143
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, basicVpa)
144144
assert.NoError(t, err)
@@ -230,7 +230,7 @@ func TestEvictEmitEvent(t *testing.T) {
230230
pods = append(pods, p.pod)
231231
}
232232
clock := baseclocktest.NewFakeClock(time.Time{})
233-
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2, testCase.evictionTolerance, clock, map[string]time.Time{}, nil)
233+
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 2, testCase.evictionTolerance, clock, map[string]time.Time{}, nil, false)
234234
assert.NoError(t, err)
235235
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, testCase.vpa)
236236
assert.NoError(t, err)
@@ -257,3 +257,45 @@ func TestEvictEmitEvent(t *testing.T) {
257257
}
258258
}
259259
}
260+
261+
// This test ensures that in-place-skip-disruption-budget only affects in-place
262+
// updates and does not bypass eviction tolerance when performing pod evictions.
263+
func TestEvictTooFewReplicasWithInPlaceSkipDisruptionBudget(t *testing.T) {
264+
replicas := int32(5)
265+
livePods := 5
266+
267+
rc := apiv1.ReplicationController{
268+
ObjectMeta: metav1.ObjectMeta{
269+
Name: "rc",
270+
Namespace: "default",
271+
},
272+
TypeMeta: metav1.TypeMeta{
273+
Kind: "ReplicationController",
274+
},
275+
Spec: apiv1.ReplicationControllerSpec{
276+
Replicas: &replicas,
277+
},
278+
}
279+
280+
pods := make([]*apiv1.Pod, livePods)
281+
for i := range pods {
282+
pods[i] = test.Pod().WithName(getTestPodName(i)).WithCreator(&rc.ObjectMeta, &rc.TypeMeta).Get()
283+
}
284+
285+
basicVpa := getBasicVpa()
286+
// factory with inPlaceSkipDisruptionBudget on
287+
factory, err := getRestrictionFactory(&rc, nil, nil, nil, 10, 0.5, nil, nil, nil, true)
288+
assert.NoError(t, err)
289+
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := factory.GetCreatorMaps(pods, basicVpa)
290+
assert.NoError(t, err)
291+
eviction := factory.NewPodsEvictionRestriction(creatorToSingleGroupStatsMap, podToReplicaCreatorMap)
292+
293+
for _, pod := range pods {
294+
assert.False(t, eviction.CanEvict(pod))
295+
}
296+
297+
for _, pod := range pods {
298+
err := eviction.Evict(pod, basicVpa, test.FakeEventRecorder())
299+
assert.Error(t, err, "Error expected")
300+
}
301+
}

vertical-pod-autoscaler/pkg/updater/restriction/pods_inplace_restriction.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ type PodsInPlaceRestrictionImpl struct {
6767
patchCalculators []patch.Calculator
6868
clock clock.Clock
6969
lastInPlaceAttemptTimeMap map[string]time.Time
70+
inPlaceSkipDisruptionBudget bool
7071
}
7172

7273
// CanInPlaceUpdate checks if pod can be safely updated
@@ -89,6 +90,13 @@ func (ip *PodsInPlaceRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) utils.InP
8990
}
9091
return utils.InPlaceDeferred
9192
}
93+
if ip.inPlaceSkipDisruptionBudget {
94+
if utils.IsNonDisruptiveResize(pod) {
95+
klog.V(4).InfoS("in-place-skip-disruption-budget enabled, skipping disruption budget check for in-place update")
96+
return utils.InPlaceApproved
97+
}
98+
klog.V(4).InfoS("in-place-skip-disruption-budget enabled, but pod has RestartContainer resize policy", "pod", klog.KObj(pod))
99+
}
92100
if singleGroupStats.isPodDisruptable() {
93101
return utils.InPlaceApproved
94102
}

0 commit comments

Comments
 (0)