Skip to content

Commit 761c084

Browse files
authored
xds/ringhash: cache connectivity state of subchannels inside picker (#6351)
1 parent 1b66663 commit 761c084

File tree

4 files changed

+22
-32
lines changed

4 files changed

+22
-32
lines changed

xds/internal/balancer/ringhash/e2e/ringhash_balancer_test.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,7 @@ func (s) TestRingHash_ReconnectToMoveOutOfTransientFailure(t *testing.T) {
119119
// Make an RPC to get the ring_hash LB policy to reconnect and thereby move
120120
// to TRANSIENT_FAILURE upon connection failure.
121121
client.EmptyCall(ctx, &testpb.Empty{})
122-
for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) {
123-
if cc.GetState() == connectivity.TransientFailure {
124-
break
125-
}
122+
for state := cc.GetState(); state != connectivity.TransientFailure && cc.WaitForStateChange(ctx, state); state = cc.GetState() {
126123
}
127124
if err := ctx.Err(); err != nil {
128125
t.Fatalf("Timeout waiting for channel to reach %q after server shutdown: %v", connectivity.TransientFailure, err)

xds/internal/balancer/ringhash/picker.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,17 @@ import (
2929
)
3030

3131
type picker struct {
32-
ring *ring
33-
logger *grpclog.PrefixLogger
32+
ring *ring
33+
logger *grpclog.PrefixLogger
34+
subConnStates map[*subConn]connectivity.State
3435
}
3536

3637
func newPicker(ring *ring, logger *grpclog.PrefixLogger) *picker {
37-
return &picker{ring: ring, logger: logger}
38+
states := make(map[*subConn]connectivity.State)
39+
for _, e := range ring.items {
40+
states[e.sc] = e.sc.effectiveState()
41+
}
42+
return &picker{ring: ring, logger: logger, subConnStates: states}
3843
}
3944

4045
// handleRICSResult is the return type of handleRICS. It's needed to wrap the
@@ -54,7 +59,7 @@ type handleRICSResult struct {
5459
// or Shutdown. If it's true, the PickResult and error should be returned from
5560
// Pick() as is.
5661
func (p *picker) handleRICS(e *ringEntry) (handleRICSResult, bool) {
57-
switch state := e.sc.effectiveState(); state {
62+
switch state := p.subConnStates[e.sc]; state {
5863
case connectivity.Ready:
5964
return handleRICSResult{pr: balancer.PickResult{SubConn: e.sc.sc}}, true
6065
case connectivity.Idle:
@@ -118,7 +123,7 @@ func (p *picker) handleTransientFailure(e *ringEntry) (balancer.PickResult, erro
118123
// but don't not trigger Connect() on the other SubConns.
119124
var firstNonFailedFound bool
120125
for ee := nextSkippingDuplicates(p.ring, e2); ee != e; ee = nextSkippingDuplicates(p.ring, ee) {
121-
scState := ee.sc.effectiveState()
126+
scState := p.subConnStates[ee.sc]
122127
if scState == connectivity.Ready {
123128
return balancer.PickResult{SubConn: ee.sc.sc}, nil
124129
}

xds/internal/balancer/ringhash/picker_test.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ import (
2626
"github.com/google/go-cmp/cmp"
2727
"google.golang.org/grpc/balancer"
2828
"google.golang.org/grpc/connectivity"
29+
"google.golang.org/grpc/grpclog"
30+
igrpclog "google.golang.org/grpc/internal/grpclog"
2931
"google.golang.org/grpc/internal/testutils"
3032
)
3133

@@ -96,7 +98,7 @@ func (s) TestPickerPickFirstTwo(t *testing.T) {
9698
}
9799
for _, tt := range tests {
98100
t.Run(tt.name, func(t *testing.T) {
99-
p := &picker{ring: tt.ring}
101+
p := newPicker(tt.ring, igrpclog.NewPrefixLogger(grpclog.Component("xds"), "rh_test"))
100102
got, err := p.Pick(balancer.PickInfo{
101103
Ctx: SetRequestHash(context.Background(), tt.hash),
102104
})
@@ -126,7 +128,7 @@ func (s) TestPickerPickTriggerTFConnect(t *testing.T) {
126128
connectivity.TransientFailure, connectivity.TransientFailure, connectivity.TransientFailure, connectivity.TransientFailure,
127129
connectivity.Idle, connectivity.TransientFailure, connectivity.TransientFailure, connectivity.TransientFailure,
128130
})
129-
p := &picker{ring: ring}
131+
p := newPicker(ring, igrpclog.NewPrefixLogger(grpclog.Component("xds"), "rh_test"))
130132
_, err := p.Pick(balancer.PickInfo{Ctx: SetRequestHash(context.Background(), 5)})
131133
if err == nil {
132134
t.Fatalf("Pick() error = %v, want non-nil", err)
@@ -156,7 +158,7 @@ func (s) TestPickerPickTriggerTFReturnReady(t *testing.T) {
156158
ring := newTestRing([]connectivity.State{
157159
connectivity.TransientFailure, connectivity.TransientFailure, connectivity.TransientFailure, connectivity.Ready,
158160
})
159-
p := &picker{ring: ring}
161+
p := newPicker(ring, igrpclog.NewPrefixLogger(grpclog.Component("xds"), "rh_test"))
160162
pr, err := p.Pick(balancer.PickInfo{Ctx: SetRequestHash(context.Background(), 5)})
161163
if err != nil {
162164
t.Fatalf("Pick() error = %v, want nil", err)
@@ -182,7 +184,7 @@ func (s) TestPickerPickTriggerTFWithIdle(t *testing.T) {
182184
ring := newTestRing([]connectivity.State{
183185
connectivity.TransientFailure, connectivity.TransientFailure, connectivity.Idle, connectivity.TransientFailure, connectivity.TransientFailure,
184186
})
185-
p := &picker{ring: ring}
187+
p := newPicker(ring, igrpclog.NewPrefixLogger(grpclog.Component("xds"), "rh_test"))
186188
_, err := p.Pick(balancer.PickInfo{Ctx: SetRequestHash(context.Background(), 5)})
187189
if err == balancer.ErrNoSubConnAvailable {
188190
t.Fatalf("Pick() error = %v, want %v", err, balancer.ErrNoSubConnAvailable)

xds/internal/balancer/ringhash/ringhash.go

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -347,37 +347,23 @@ func (b *ringhashBalancer) UpdateSubConnState(sc balancer.SubConn, state balance
347347
newSCState := scs.effectiveState()
348348
b.logger.Infof("SubConn's effective old state was: %v, new state is %v", oldSCState, newSCState)
349349

350-
var sendUpdate bool
351-
oldBalancerState := b.state
352350
b.state = b.csEvltr.recordTransition(oldSCState, newSCState)
353-
if oldBalancerState != b.state {
354-
sendUpdate = true
355-
}
356351

357352
switch s {
358-
case connectivity.Idle:
359-
// No need to send an update. No queued RPC can be unblocked. If the
360-
// overall state changed because of this, sendUpdate is already true.
361-
case connectivity.Connecting:
362-
// No need to send an update. No queued RPC can be unblocked. If the
363-
// overall state changed because of this, sendUpdate is already true.
364-
case connectivity.Ready:
365-
// We need to regenerate the picker even if the ring has not changed
366-
// because we could be moving from TRANSIENT_FAILURE to READY, in which
367-
// case, we need to update the error picker returned earlier.
368-
b.regeneratePicker()
369-
sendUpdate = true
370353
case connectivity.TransientFailure:
371354
// Save error to be reported via picker.
372355
b.connErr = state.ConnectionError
373-
b.regeneratePicker()
374356
case connectivity.Shutdown:
375357
// When an address was removed by resolver, b called RemoveSubConn but
376358
// kept the sc's state in scStates. Remove state for this sc here.
377359
delete(b.scStates, sc)
378360
}
379361

380-
if sendUpdate {
362+
if oldSCState != newSCState {
363+
// Because the picker caches the state of the subconns, we always
364+
// regenerate and update the picker when the effective SubConn state
365+
// changes.
366+
b.regeneratePicker()
381367
b.logger.Infof("Pushing new state %v and picker %p", b.state, b.picker)
382368
b.cc.UpdateState(balancer.State{ConnectivityState: b.state, Picker: b.picker})
383369
}

0 commit comments

Comments
 (0)