Skip to content

Commit 03fee09

Browse files
authored
balancer: fix connectivity state aggregation algorithm to follow the spec (#5473)
1 parent 0d04c6f commit 03fee09

File tree

3 files changed

+315
-53
lines changed

3 files changed

+315
-53
lines changed

balancer/balancer.go

Lines changed: 0 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -371,56 +371,3 @@ type ClientConnState struct {
371371
// ErrBadResolverState may be returned by UpdateClientConnState to indicate a
372372
// problem with the provided name resolver data.
373373
var ErrBadResolverState = errors.New("bad resolver state")
374-
375-
// ConnectivityStateEvaluator takes the connectivity states of multiple SubConns
376-
// and returns one aggregated connectivity state.
377-
//
378-
// It's not thread safe.
379-
type ConnectivityStateEvaluator struct {
380-
numReady uint64 // Number of addrConns in ready state.
381-
numConnecting uint64 // Number of addrConns in connecting state.
382-
numTransientFailure uint64 // Number of addrConns in transient failure state.
383-
numIdle uint64 // Number of addrConns in idle state.
384-
}
385-
386-
// RecordTransition records state change happening in subConn and based on that
387-
// it evaluates what aggregated state should be.
388-
//
389-
// - If at least one SubConn in Ready, the aggregated state is Ready;
390-
// - Else if at least one SubConn in Connecting, the aggregated state is Connecting;
391-
// - Else if at least one SubConn is TransientFailure, the aggregated state is Transient Failure;
392-
// - Else if at least one SubConn is Idle, the aggregated state is Idle;
393-
// - Else there are no subconns and the aggregated state is Transient Failure
394-
//
395-
// Shutdown is not considered.
396-
func (cse *ConnectivityStateEvaluator) RecordTransition(oldState, newState connectivity.State) connectivity.State {
397-
// Update counters.
398-
for idx, state := range []connectivity.State{oldState, newState} {
399-
updateVal := 2*uint64(idx) - 1 // -1 for oldState and +1 for new.
400-
switch state {
401-
case connectivity.Ready:
402-
cse.numReady += updateVal
403-
case connectivity.Connecting:
404-
cse.numConnecting += updateVal
405-
case connectivity.TransientFailure:
406-
cse.numTransientFailure += updateVal
407-
case connectivity.Idle:
408-
cse.numIdle += updateVal
409-
}
410-
}
411-
412-
// Evaluate.
413-
if cse.numReady > 0 {
414-
return connectivity.Ready
415-
}
416-
if cse.numConnecting > 0 {
417-
return connectivity.Connecting
418-
}
419-
if cse.numTransientFailure > 0 {
420-
return connectivity.TransientFailure
421-
}
422-
if cse.numIdle > 0 {
423-
return connectivity.Idle
424-
}
425-
return connectivity.TransientFailure
426-
}

balancer/conn_state_evaluator.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
*
3+
* Copyright 2022 gRPC authors.
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*
17+
*/
18+
19+
package balancer
20+
21+
import "google.golang.org/grpc/connectivity"
22+
23+
// ConnectivityStateEvaluator takes the connectivity states of multiple SubConns
24+
// and returns one aggregated connectivity state.
25+
//
26+
// It's not thread safe.
27+
type ConnectivityStateEvaluator struct {
28+
numReady uint64 // Number of addrConns in ready state.
29+
numConnecting uint64 // Number of addrConns in connecting state.
30+
numTransientFailure uint64 // Number of addrConns in transient failure state.
31+
numIdle uint64 // Number of addrConns in idle state.
32+
}
33+
34+
// RecordTransition records state change happening in subConn and based on that
35+
// it evaluates what aggregated state should be.
36+
//
37+
// - If at least one SubConn in Ready, the aggregated state is Ready;
38+
// - Else if at least one SubConn in Connecting, the aggregated state is Connecting;
39+
// - Else if at least one SubConn is Idle, the aggregated state is Idle;
40+
// - Else if at least one SubConn is TransientFailure (or there are no SubConns), the aggregated state is Transient Failure.
41+
//
42+
// Shutdown is not considered.
43+
func (cse *ConnectivityStateEvaluator) RecordTransition(oldState, newState connectivity.State) connectivity.State {
44+
// Update counters.
45+
for idx, state := range []connectivity.State{oldState, newState} {
46+
updateVal := 2*uint64(idx) - 1 // -1 for oldState and +1 for new.
47+
switch state {
48+
case connectivity.Ready:
49+
cse.numReady += updateVal
50+
case connectivity.Connecting:
51+
cse.numConnecting += updateVal
52+
case connectivity.TransientFailure:
53+
cse.numTransientFailure += updateVal
54+
case connectivity.Idle:
55+
cse.numIdle += updateVal
56+
}
57+
}
58+
59+
// Evaluate.
60+
if cse.numReady > 0 {
61+
return connectivity.Ready
62+
}
63+
if cse.numConnecting > 0 {
64+
return connectivity.Connecting
65+
}
66+
if cse.numIdle > 0 {
67+
return connectivity.Idle
68+
}
69+
return connectivity.TransientFailure
70+
}
Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
/*
2+
*
3+
* Copyright 2022 gRPC authors.
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*
17+
*/
18+
19+
package balancer
20+
21+
import (
22+
"testing"
23+
24+
"google.golang.org/grpc/connectivity"
25+
"google.golang.org/grpc/internal/grpctest"
26+
)
27+
28+
type s struct {
29+
grpctest.Tester
30+
}
31+
32+
func Test(t *testing.T) {
33+
grpctest.RunSubTests(t, s{})
34+
}
35+
36+
// TestRecordTransition_FirstStateChange tests the first call to
37+
// RecordTransition where the `oldState` is usually set to `Shutdown` (a state
38+
// that the ConnectivityStateEvaluator is set to ignore).
39+
func (s) TestRecordTransition_FirstStateChange(t *testing.T) {
40+
tests := []struct {
41+
newState connectivity.State
42+
wantState connectivity.State
43+
}{
44+
{
45+
newState: connectivity.Idle,
46+
wantState: connectivity.Idle,
47+
},
48+
{
49+
newState: connectivity.Connecting,
50+
wantState: connectivity.Connecting,
51+
},
52+
{
53+
newState: connectivity.Ready,
54+
wantState: connectivity.Ready,
55+
},
56+
{
57+
newState: connectivity.TransientFailure,
58+
wantState: connectivity.TransientFailure,
59+
},
60+
{
61+
newState: connectivity.Shutdown,
62+
wantState: connectivity.TransientFailure,
63+
},
64+
}
65+
for _, test := range tests {
66+
cse := &ConnectivityStateEvaluator{}
67+
if gotState := cse.RecordTransition(connectivity.Shutdown, test.newState); gotState != test.wantState {
68+
t.Fatalf("RecordTransition(%v, %v) = %v, want %v", connectivity.Shutdown, test.newState, gotState, test.wantState)
69+
}
70+
}
71+
}
72+
73+
// TestRecordTransition_SameState tests the scenario where state transitions to
74+
// the same state are recorded multiple times.
75+
func (s) TestRecordTransition_SameState(t *testing.T) {
76+
tests := []struct {
77+
newState connectivity.State
78+
wantState connectivity.State
79+
}{
80+
{
81+
newState: connectivity.Idle,
82+
wantState: connectivity.Idle,
83+
},
84+
{
85+
newState: connectivity.Connecting,
86+
wantState: connectivity.Connecting,
87+
},
88+
{
89+
newState: connectivity.Ready,
90+
wantState: connectivity.Ready,
91+
},
92+
{
93+
newState: connectivity.TransientFailure,
94+
wantState: connectivity.TransientFailure,
95+
},
96+
{
97+
newState: connectivity.Shutdown,
98+
wantState: connectivity.TransientFailure,
99+
},
100+
}
101+
const numStateChanges = 5
102+
for _, test := range tests {
103+
cse := &ConnectivityStateEvaluator{}
104+
var prevState, gotState connectivity.State
105+
prevState = connectivity.Shutdown
106+
for i := 0; i < numStateChanges; i++ {
107+
gotState = cse.RecordTransition(prevState, test.newState)
108+
prevState = test.newState
109+
}
110+
if gotState != test.wantState {
111+
t.Fatalf("RecordTransition() = %v, want %v", gotState, test.wantState)
112+
}
113+
}
114+
}
115+
116+
// TestRecordTransition_SingleSubConn_DifferentStates tests some common
117+
// connectivity state change scenarios, on a single subConn.
118+
func (s) TestRecordTransition_SingleSubConn_DifferentStates(t *testing.T) {
119+
tests := []struct {
120+
name string
121+
states []connectivity.State
122+
wantState connectivity.State
123+
}{
124+
{
125+
name: "regular transition to ready",
126+
states: []connectivity.State{connectivity.Idle, connectivity.Connecting, connectivity.Ready},
127+
wantState: connectivity.Ready,
128+
},
129+
{
130+
name: "regular transition to transient failure",
131+
states: []connectivity.State{connectivity.Idle, connectivity.Connecting, connectivity.TransientFailure},
132+
wantState: connectivity.TransientFailure,
133+
},
134+
{
135+
name: "regular transition to ready",
136+
states: []connectivity.State{connectivity.Idle, connectivity.Connecting, connectivity.Ready, connectivity.Idle},
137+
wantState: connectivity.Idle,
138+
},
139+
{
140+
name: "transition from ready to transient failure",
141+
states: []connectivity.State{connectivity.Idle, connectivity.Connecting, connectivity.Ready, connectivity.TransientFailure},
142+
wantState: connectivity.TransientFailure,
143+
},
144+
{
145+
name: "transition from transient failure back to ready",
146+
states: []connectivity.State{connectivity.Idle, connectivity.Connecting, connectivity.Ready, connectivity.TransientFailure, connectivity.Ready},
147+
wantState: connectivity.Ready,
148+
},
149+
{
150+
// This state transition is usually suppressed at the LB policy level, by
151+
// not calling RecordTransition.
152+
name: "transition from transient failure back to idle",
153+
states: []connectivity.State{connectivity.Idle, connectivity.Connecting, connectivity.Ready, connectivity.TransientFailure, connectivity.Idle},
154+
wantState: connectivity.Idle,
155+
},
156+
{
157+
// This state transition is usually suppressed at the LB policy level, by
158+
// not calling RecordTransition.
159+
name: "transition from transient failure back to connecting",
160+
states: []connectivity.State{connectivity.Idle, connectivity.Connecting, connectivity.Ready, connectivity.TransientFailure, connectivity.Connecting},
161+
wantState: connectivity.Connecting,
162+
},
163+
}
164+
165+
for _, test := range tests {
166+
t.Run(test.name, func(t *testing.T) {
167+
cse := &ConnectivityStateEvaluator{}
168+
var prevState, gotState connectivity.State
169+
prevState = connectivity.Shutdown
170+
for _, newState := range test.states {
171+
gotState = cse.RecordTransition(prevState, newState)
172+
prevState = newState
173+
}
174+
if gotState != test.wantState {
175+
t.Fatalf("RecordTransition() = %v, want %v", gotState, test.wantState)
176+
}
177+
})
178+
}
179+
}
180+
181+
// TestRecordTransition_MultipleSubConns_DifferentStates tests state transitions
182+
// among multiple subConns, and verifies that the connectivity state aggregation
183+
// algorithm produces the expected aggregate connectivity state.
184+
func (s) TestRecordTransition_MultipleSubConns_DifferentStates(t *testing.T) {
185+
tests := []struct {
186+
name string
187+
// Each entry in this slice corresponds to the state changes happening on an
188+
// individual subConn.
189+
subConnStates [][]connectivity.State
190+
wantState connectivity.State
191+
}{
192+
{
193+
name: "atleast one ready",
194+
subConnStates: [][]connectivity.State{
195+
{connectivity.Idle, connectivity.Connecting, connectivity.Ready},
196+
{connectivity.Idle},
197+
{connectivity.Idle, connectivity.Connecting},
198+
{connectivity.Idle, connectivity.Connecting, connectivity.TransientFailure},
199+
},
200+
wantState: connectivity.Ready,
201+
},
202+
{
203+
name: "atleast one connecting",
204+
subConnStates: [][]connectivity.State{
205+
{connectivity.Idle, connectivity.Connecting, connectivity.Ready, connectivity.Connecting},
206+
{connectivity.Idle},
207+
{connectivity.Idle, connectivity.Connecting, connectivity.TransientFailure},
208+
},
209+
wantState: connectivity.Connecting,
210+
},
211+
{
212+
name: "atleast one idle",
213+
subConnStates: [][]connectivity.State{
214+
{connectivity.Idle, connectivity.Connecting, connectivity.Ready, connectivity.Idle},
215+
{connectivity.Idle, connectivity.Connecting, connectivity.TransientFailure},
216+
},
217+
wantState: connectivity.Idle,
218+
},
219+
{
220+
name: "atleast one transient failure",
221+
subConnStates: [][]connectivity.State{
222+
{connectivity.Idle, connectivity.Connecting, connectivity.Ready, connectivity.TransientFailure},
223+
{connectivity.TransientFailure},
224+
},
225+
wantState: connectivity.TransientFailure,
226+
},
227+
}
228+
229+
for _, test := range tests {
230+
t.Run(test.name, func(t *testing.T) {
231+
cse := &ConnectivityStateEvaluator{}
232+
var prevState, gotState connectivity.State
233+
for _, scStates := range test.subConnStates {
234+
prevState = connectivity.Shutdown
235+
for _, newState := range scStates {
236+
gotState = cse.RecordTransition(prevState, newState)
237+
prevState = newState
238+
}
239+
}
240+
if gotState != test.wantState {
241+
t.Fatalf("RecordTransition() = %v, want %v", gotState, test.wantState)
242+
}
243+
})
244+
}
245+
}

0 commit comments

Comments
 (0)