Skip to content

Commit d8e42c2

Browse files
committed
e2e test and bug fix
1 parent 7b70e67 commit d8e42c2

File tree

3 files changed

+179
-131
lines changed

3 files changed

+179
-131
lines changed

test/xds/xds_client_outlier_detection_test.go

Lines changed: 142 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,31 @@ package xds_test
2020

2121
import (
2222
"context"
23+
"errors"
2324
"fmt"
25+
"strings"
2426
"testing"
27+
"time"
2528

29+
v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
30+
v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
31+
v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3"
32+
v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3"
2633
"google.golang.org/grpc"
2734
"google.golang.org/grpc/credentials/insecure"
2835
"google.golang.org/grpc/internal"
2936
"google.golang.org/grpc/internal/envconfig"
37+
"google.golang.org/grpc/internal/stubserver"
3038
"google.golang.org/grpc/internal/testutils/xds/e2e"
31-
3239
testgrpc "google.golang.org/grpc/test/grpc_testing"
3340
testpb "google.golang.org/grpc/test/grpc_testing"
41+
"google.golang.org/protobuf/types/known/durationpb"
42+
"google.golang.org/protobuf/types/known/wrapperspb"
3443
)
3544

45+
// TestOutlierDetection tests an xDS configured ClientConn with an Outlier
46+
// Detection present in the system which is a logical no-op. An RPC should
47+
// proceed as normal.
3648
func (s) TestOutlierDetection(t *testing.T) {
3749
oldOD := envconfig.XDSOutlierDetection
3850
envconfig.XDSOutlierDetection = true
@@ -74,3 +86,132 @@ func (s) TestOutlierDetection(t *testing.T) {
7486
t.Fatalf("rpc EmptyCall() failed: %v", err)
7587
}
7688
}
89+
90+
// defaultClientResourcesSpecifyingMultipleBackendsAndOutlierDetection returns
91+
// xDS resources which correspond to multiple upstreams, corresponding different
92+
// backends listening on different localhost:port combinations. The resources
93+
// also configure an Outlier Detection Balancer set up with Failure Percentage
94+
// Algorithm, which ejects endpoints based on failure rate.
95+
func defaultClientResourcesSpecifyingMultipleBackendsAndOutlierDetection(params e2e.ResourceParams, ports []uint32) e2e.UpdateOptions {
96+
routeConfigName := "route-" + params.DialTarget
97+
clusterName := "cluster-" + params.DialTarget
98+
endpointsName := "endpoints-" + params.DialTarget
99+
return e2e.UpdateOptions{
100+
NodeID: params.NodeID,
101+
Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(params.DialTarget, routeConfigName)},
102+
Routes: []*v3routepb.RouteConfiguration{e2e.DefaultRouteConfig(routeConfigName, params.DialTarget, clusterName)},
103+
Clusters: []*v3clusterpb.Cluster{defaultClusterWithOutlierDetection(clusterName, endpointsName, params.SecLevel)},
104+
Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(endpointsName, params.Host, ports)},
105+
}
106+
}
107+
108+
func defaultClusterWithOutlierDetection(clusterName, edsServiceName string, secLevel e2e.SecurityLevel) *v3clusterpb.Cluster {
109+
cluster := e2e.DefaultCluster(clusterName, edsServiceName, secLevel)
110+
cluster.OutlierDetection = &v3clusterpb.OutlierDetection{
111+
Interval: &durationpb.Duration{
112+
Nanos: 500000000,
113+
},
114+
BaseEjectionTime: &durationpb.Duration{Seconds: 30},
115+
MaxEjectionTime: &durationpb.Duration{Seconds: 300},
116+
MaxEjectionPercent: &wrapperspb.UInt32Value{Value: 1},
117+
FailurePercentageThreshold: &wrapperspb.UInt32Value{Value: 50},
118+
EnforcingFailurePercentage: &wrapperspb.UInt32Value{Value: 100},
119+
FailurePercentageRequestVolume: &wrapperspb.UInt32Value{Value: 1},
120+
FailurePercentageMinimumHosts: &wrapperspb.UInt32Value{Value: 1},
121+
}
122+
return cluster
123+
}
124+
125+
// TestOutlierDetectionWithOutlier tests the Outlier Detection Balancer e2e. It
126+
// spins up three backends, one which consistently errors, and configures the
127+
// ClientConn using xDS to connect to all three of those backends. The Outlier
128+
// Detection Balancer should eject the connection to the backend which
129+
// constantly errors, and thus RPC's should mainly go to backend 1 and 2.
130+
func (s) TestOutlierDetectionWithOutlier(t *testing.T) {
131+
oldOD := envconfig.XDSOutlierDetection
132+
envconfig.XDSOutlierDetection = true
133+
internal.RegisterOutlierDetectionBalancerForTesting()
134+
defer func() {
135+
envconfig.XDSOutlierDetection = oldOD
136+
internal.UnregisterOutlierDetectionBalancerForTesting()
137+
}()
138+
139+
managementServer, nodeID, _, resolver, cleanup := e2e.SetupManagementServer(t)
140+
defer cleanup()
141+
142+
// counters for how many times backends got called
143+
var count1, count2, count3 int
144+
145+
// Working backend 1.
146+
port1, cleanup1 := startTestService(t, &stubserver.StubServer{
147+
EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
148+
count1++
149+
return &testpb.Empty{}, nil
150+
},
151+
Address: "localhost:0",
152+
})
153+
defer cleanup1()
154+
155+
// Working backend 2.
156+
port2, cleanup2 := startTestService(t, &stubserver.StubServer{
157+
EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
158+
count2++
159+
return &testpb.Empty{}, nil
160+
},
161+
Address: "localhost:0",
162+
})
163+
defer cleanup2()
164+
// Backend 3 that will always return an error and eventually ejected.
165+
port3, cleanup3 := startTestService(t, &stubserver.StubServer{
166+
EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
167+
count3++
168+
return nil, errors.New("some error")
169+
},
170+
Address: "localhost:0",
171+
})
172+
defer cleanup3()
173+
174+
const serviceName = "my-service-client-side-xds"
175+
resources := defaultClientResourcesSpecifyingMultipleBackendsAndOutlierDetection(e2e.ResourceParams{
176+
DialTarget: serviceName,
177+
NodeID: nodeID,
178+
Host: "localhost",
179+
SecLevel: e2e.SecurityLevelNone,
180+
}, []uint32{port1, port2, port3})
181+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*100)
182+
defer cancel()
183+
if err := managementServer.Update(ctx, resources); err != nil {
184+
t.Fatal(err)
185+
}
186+
187+
cc, err := grpc.Dial(fmt.Sprintf("xds:///%s", serviceName), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(resolver))
188+
if err != nil {
189+
t.Fatalf("failed to dial local test server: %v", err)
190+
}
191+
defer cc.Close()
192+
193+
client := testgrpc.NewTestServiceClient(cc)
194+
for i := 0; i < 2000; i++ {
195+
// Can either error or not depending on the backend called.
196+
if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil && !strings.Contains(err.Error(), "some error") {
197+
t.Fatalf("rpc EmptyCall() failed: %v", err)
198+
}
199+
time.Sleep(time.Millisecond)
200+
}
201+
202+
// Backend 1 should've gotten more than 1/3rd of the load as backend 3
203+
// should get ejected, leaving only 1 and 2.
204+
if count1 < 700 {
205+
t.Fatalf("backend 1 should've gotten more than 1/3rd of the load")
206+
}
207+
// Backend 2 should've gotten more than 1/3rd of the load as backend 3
208+
// should get ejected, leaving only 1 and 2.
209+
if count2 < 700 {
210+
t.Fatalf("backend 2 should've gotten more than 1/3rd of the load")
211+
}
212+
// Backend 3 should've gotten less than 1/3rd of the load since it gets
213+
// ejected.
214+
if count3 > 650 {
215+
t.Fatalf("backend 1 should've gotten more than 1/3rd of the load")
216+
}
217+
}

0 commit comments

Comments
 (0)