Skip to content

Commit 3ef7a60

Browse files
author
Abhishek Bansal
committed
fix: unexpected downtime in rollouts
Signed-off-by: Abhishek Bansal <[email protected]>
1 parent 53c4f12 commit 3ef7a60

2 files changed

Lines changed: 89 additions & 0 deletions

File tree

rollout/trafficrouting.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
replicasetutil "github.com/argoproj/argo-rollouts/utils/replicaset"
2828
rolloututil "github.com/argoproj/argo-rollouts/utils/rollout"
2929
"github.com/argoproj/argo-rollouts/utils/weightutil"
30+
appsv1 "k8s.io/api/apps/v1"
3031
)
3132

3233
// NewTrafficRoutingReconciler identifies return the TrafficRouting Plugin that the rollout wants to modify
@@ -133,6 +134,23 @@ func (c *Controller) NewTrafficRoutingReconciler(roCtx *rolloutContext) ([]traff
133134
return nil, nil
134135
}
135136

137+
func (c *rolloutContext) checkReplicasAvailable(rs *appsv1.ReplicaSet, desiredWeight int32) bool {
138+
if rs == nil {
139+
return false
140+
}
141+
availableReplicas := rs.Status.AvailableReplicas
142+
totalReplicas := *c.rollout.Spec.Replicas
143+
144+
desiredReplicas := (desiredWeight * totalReplicas) / 100
145+
if availableReplicas < desiredReplicas {
146+
c.log.Infof("ReplicaSet '%s' has %d available replicas, waiting for %d", rs.Name, availableReplicas, desiredReplicas)
147+
return false
148+
}
149+
150+
return true
151+
152+
}
153+
136154
// this currently only be used in the canary strategy
137155
func (c *rolloutContext) reconcileTrafficRouting() error {
138156
reconcilers, err := c.newTrafficRoutingReconciler(c)
@@ -243,6 +261,10 @@ func (c *rolloutContext) reconcileTrafficRouting() error {
243261
desiredWeight = weightutil.MaxTrafficWeight(c.rollout)
244262
}
245263
}
264+
265+
if !c.checkReplicasAvailable(c.stableRS, weightutil.MaxTrafficWeight(c.rollout)-desiredWeight) {
266+
return nil
267+
}
246268
// We need to check for revision > 1 because when we first install the rollout we run step 0 this prevents that.
247269
// There is a bigger fix needed for the reasons on why we run step 0 on rollout install, that needs to be explored.
248270
revision, revisionFound := annotations.GetRevisionAnnotation(c.rollout)

rollout/trafficrouting_test.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1431,3 +1431,70 @@ func TestDontWeightOrHaveManagedRoutesDuringInterruptedUpdate(t *testing.T) {
14311431
f.fakeTrafficRouting.AssertCalled(t, "RemoveManagedRoutes", mock.Anything, mock.Anything)
14321432

14331433
}
1434+
1435+
// This test verifies that if we are shifting traffic to stable replicaset without the stable replicaset being available proportional to the weight, the traffic shouldn't be switched immediately to the stable replicaset.
1436+
func TestCheckReplicaSetAvailable(t *testing.T) {
1437+
fix := newFixture(t)
1438+
defer fix.Close()
1439+
1440+
steps := []v1alpha1.CanaryStep{
1441+
{
1442+
SetWeight: pointer.Int32(60),
1443+
},
1444+
{
1445+
Pause: &v1alpha1.RolloutPause{},
1446+
},
1447+
}
1448+
1449+
rollout1 := newCanaryRollout("test-rollout", 10, nil, steps, pointer.Int32(1), intstr.FromInt(1), intstr.FromInt(1))
1450+
rollout1.Spec.Strategy.Canary.DynamicStableScale = true
1451+
rollout1.Spec.Strategy.Canary.TrafficRouting = &v1alpha1.RolloutTrafficRouting{
1452+
SMI: &v1alpha1.SMITrafficRouting{},
1453+
}
1454+
rollout1.Spec.Strategy.Canary.CanaryService = "canary-service"
1455+
rollout1.Spec.Strategy.Canary.StableService = "stable-service"
1456+
rollout1.Status.ReadyReplicas = 10
1457+
rollout1.Status.AvailableReplicas = 10
1458+
1459+
rollout2 := bumpVersion(rollout1)
1460+
1461+
replicaSet1 := newReplicaSetWithStatus(rollout1, 1, 1)
1462+
replicaSet2 := newReplicaSetWithStatus(rollout2, 9, 9)
1463+
1464+
replicaSet1Hash := replicaSet1.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]
1465+
replicaSet2Hash := replicaSet2.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]
1466+
canarySelector := map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: replicaSet2Hash}
1467+
stableSelector := map[string]string{v1alpha1.DefaultRolloutUniqueLabelKey: replicaSet1Hash}
1468+
canarySvc := newService("canary-service", 80, canarySelector, rollout1)
1469+
stableSvc := newService("stable-service", 80, stableSelector, rollout1)
1470+
1471+
rollout2.Spec = rollout1.Spec
1472+
rollout2.Status.StableRS = replicaSet1Hash
1473+
rollout2.Status.CurrentPodHash = replicaSet1Hash
1474+
rollout2.Status.Canary.Weights = &v1alpha1.TrafficWeights{
1475+
Canary: v1alpha1.WeightDestination{
1476+
Weight: 10,
1477+
ServiceName: "canary-service",
1478+
PodTemplateHash: replicaSet2Hash,
1479+
},
1480+
Stable: v1alpha1.WeightDestination{
1481+
Weight: 90,
1482+
ServiceName: "stable-service",
1483+
PodTemplateHash: replicaSet1Hash,
1484+
},
1485+
}
1486+
1487+
fix.kubeobjects = append(fix.kubeobjects, replicaSet1, replicaSet2, canarySvc, stableSvc)
1488+
fix.replicaSetLister = append(fix.replicaSetLister, replicaSet1, replicaSet2)
1489+
1490+
fix.rolloutLister = append(fix.rolloutLister, rollout2)
1491+
fix.objects = append(fix.objects, rollout2)
1492+
1493+
fix.expectUpdateReplicaSetAction(replicaSet1)
1494+
fix.expectUpdateRolloutAction(rollout2)
1495+
fix.expectUpdateReplicaSetAction(replicaSet1)
1496+
fix.expectPatchRolloutAction(rollout2)
1497+
fix.fakeTrafficRouting = newUnmockedFakeTrafficRoutingReconciler()
1498+
fix.fakeTrafficRouting.On("RemoveManagedRoutes", mock.Anything, mock.Anything, mock.Anything).Return(nil)
1499+
fix.run(getKey(rollout1, t))
1500+
}

0 commit comments

Comments
 (0)