Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 60 additions & 4 deletions balancer_wrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,10 +277,17 @@ type healthData struct {
// to the LB policy. This is stored to avoid sending updates when the
// SubConn has already exited connectivity state READY.
connectivityState connectivity.State
// closeHealthProducer stores function to close the ref counted health
// producer. The health producer is automatically closed when the SubConn
// state changes.
closeHealthProducer func()
}

func newHealthData(s connectivity.State) *healthData {
return &healthData{connectivityState: s}
return &healthData{
connectivityState: s,
closeHealthProducer: func() {},
}
}

// updateState is invoked by grpc to push a subConn state update to the
Expand Down Expand Up @@ -413,6 +420,34 @@ func (acbw *acBalancerWrapper) closeProducers() {
}
}

// healthProducerRegisterFn is a type alias for the health producer's function
// for registering listeners.
type healthProducerRegisterFn = func(context.Context, balancer.SubConn, string, func(balancer.SubConnState)) func()

// healthServiceOpts returns the options for client side health checking.
// It returns a nil registerHealthListenerFn if client side health checks are
// disabled.
// Client side health checking is enabled when all the following
// conditions are satisfied:
// 1. Health checking is not disabled using the dial option.
// 2. The health package is imported.
// 3. The health check config is present in the service config.
func (acbw *acBalancerWrapper) healthServiceOpts() (string, healthProducerRegisterFn) {
if acbw.ccb.cc.dopts.disableHealthCheck {
return "", nil
}
regHealthLisFn := internal.RegisterClientHealthCheckListener
if regHealthLisFn == nil {
// The health package is not imported.
return "", nil
}
cfg := acbw.ac.cc.healthCheckConfig()
if cfg == nil {
return "", nil
}
return cfg.ServiceName, regHealthLisFn.(healthProducerRegisterFn)
}

// RegisterHealthListener accepts a health listener from the LB policy. It sends
// updates to the health listener as long as the SubConn's connectivity state
// doesn't change and a new health listener is not registered. To invalidate
Expand All @@ -421,6 +456,7 @@ func (acbw *acBalancerWrapper) closeProducers() {
func (acbw *acBalancerWrapper) RegisterHealthListener(listener func(balancer.SubConnState)) {
acbw.healthMu.Lock()
defer acbw.healthMu.Unlock()
acbw.healthData.closeHealthProducer()
// listeners should not be registered when the connectivity state
// isn't Ready. This may happen when the balancer registers a listener
// after the connectivityState is updated, but before it is notified
Expand All @@ -436,17 +472,37 @@ func (acbw *acBalancerWrapper) RegisterHealthListener(listener func(balancer.Sub
return
}

serviceName, registerFn := acbw.healthServiceOpts()
acbw.ccb.serializer.TrySchedule(func(ctx context.Context) {
if ctx.Err() != nil || acbw.ccb.balancer == nil {
return
}
// Don't send updates if a new listener is registered.
acbw.healthMu.Lock()
defer acbw.healthMu.Unlock()
curHD := acbw.healthData
if curHD != hd {
if acbw.healthData != hd {
return
}
if registerFn == nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to implement a nopHealthListener that is actually a real implementation that only reports Ready, once, to avoid special-case handling here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Refactored to make this method return either the heallh producer's register listener function or a no-op register listener function.

listener(balancer.SubConnState{ConnectivityState: connectivity.Ready})
return
}
listener(balancer.SubConnState{ConnectivityState: connectivity.Ready})
// Serialize the health updates from the health producer with
// other calls into the LB policy.
listenerWrapper := func(scs balancer.SubConnState) {
acbw.ccb.serializer.TrySchedule(func(ctx context.Context) {
if ctx.Err() != nil || acbw.ccb.balancer == nil {
return
}
acbw.healthMu.Lock()
defer acbw.healthMu.Unlock()
if acbw.healthData != hd {
return
}
listener(scs)
})
}

hd.closeHealthProducer = registerFn(ctx, acbw, serviceName, listenerWrapper)
})
}
116 changes: 116 additions & 0 deletions health/producer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
*
* Copyright 2024 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package health

import (
"context"
"sync"

"google.golang.org/grpc"
"google.golang.org/grpc/balancer"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/internal"
"google.golang.org/grpc/internal/grpcsync"
"google.golang.org/grpc/status"
)

func init() {
producerBuilderSingleton = &producerBuilder{}
internal.RegisterClientHealthCheckListener = registerClientSideHealthCheckListener
}

type producerBuilder struct{}

var producerBuilderSingleton *producerBuilder

// Build constructs and returns a producer and its cleanup function.
func (*producerBuilder) Build(cci any) (balancer.Producer, func()) {
doneCh := make(chan struct{})
p := &healthServiceProducer{
cc: cci.(grpc.ClientConnInterface),
cancelDone: doneCh,
cancel: grpcsync.OnceFunc(func() {
close(doneCh)
}),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider a grpcsync.Event instead of having two fields here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was using the two fields, one to trigger the cancellation of the context that is passed to the health client and the second to wait till the health check client to return. Waiting for the client close isn't required. The health checking in addrConn also cancels the context and lets the client shut down asynchronously. I got rid of the channel to wait for the client to return here.

grpc-go/clientconn.go

Lines 1342 to 1354 in c1b6b37

onClose := func(r transport.GoAwayReason) {
ac.mu.Lock()
defer ac.mu.Unlock()
// adjust params based on GoAwayReason
ac.adjustParams(r)
if ctx.Err() != nil {
// Already shut down or connection attempt canceled. tearDown() or
// updateAddrs() already cleared the transport and canceled hctx
// via ac.ctx, and we expected this connection to be closed, so do
// nothing here.
return
}
hcancel()

}
return p, func() {
p.mu.Lock()
defer p.mu.Unlock()
p.cancel()
<-p.cancelDone
}
}

type healthServiceProducer struct {
// The following fields are initialized at build time and read-only after
// that and therefore do not need to be guarded by a mutex.
cc grpc.ClientConnInterface

mu sync.Mutex
cancel func()
cancelDone chan (struct{})
}

// registerClientSideHealthCheckListener accepts a listener to provide server
// health state via the health service.
func registerClientSideHealthCheckListener(ctx context.Context, sc balancer.SubConn, serviceName string, listener func(balancer.SubConnState)) func() {
pr, closeFn := sc.GetOrBuildProducer(producerBuilderSingleton)
p := pr.(*healthServiceProducer)
p.mu.Lock()
defer p.mu.Unlock()
p.cancel()
<-p.cancelDone
if listener == nil {
return closeFn
}

Check warning on line 82 in health/producer.go

View check run for this annotation

Codecov / codecov/patch

health/producer.go#L81-L82

Added lines #L81 - L82 were not covered by tests

p.cancelDone = make(chan struct{})
ctx, cancel := context.WithCancel(ctx)
p.cancel = cancel

go p.startHealthCheck(ctx, sc, serviceName, listener, p.cancelDone)
return closeFn
}

func (p *healthServiceProducer) startHealthCheck(ctx context.Context, sc balancer.SubConn, serviceName string, listener func(balancer.SubConnState), closeCh chan struct{}) {
defer close(closeCh)
newStream := func(method string) (any, error) {
return p.cc.NewStream(ctx, &grpc.StreamDesc{ServerStreams: true}, method)
}

setConnectivityState := func(state connectivity.State, err error) {
listener(balancer.SubConnState{
ConnectivityState: state,
ConnectionError: err,
})
}

// Call the function through the internal variable as tests use it for
// mocking.
err := internal.HealthCheckFunc(ctx, newStream, setConnectivityState, serviceName)
if err == nil {
return
}
if status.Code(err) == codes.Unimplemented {
logger.Errorf("Subchannel health check is unimplemented at server side, thus health check is disabled for SubConn %p", sc)
} else {
logger.Errorf("Health checking failed for SubConn %p: %v", sc, err)
}

Check warning on line 115 in health/producer.go

View check run for this annotation

Codecov / codecov/patch

health/producer.go#L114-L115

Added lines #L114 - L115 were not covered by tests
}
4 changes: 4 additions & 0 deletions internal/internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ import (
var (
// HealthCheckFunc is used to provide client-side LB channel health checking
HealthCheckFunc HealthChecker
// RegisterClientHealthCheckListener is used to provide a listener for
// updates from the client-side health checking service. It returns a
// function that can be called to stop the health producer.
RegisterClientHealthCheckListener any // func(ctx context.Context, sc balancer.SubConn, serviceName string, listener func(balancer.SubConnState)) func()
// BalancerUnregister is exported by package balancer to unregister a balancer.
BalancerUnregister func(name string)
// KeepaliveMinPingTime is the minimum ping interval. This must be 10s by
Expand Down
Loading
Loading