diff --git a/balancer/balancer.go b/balancer/balancer.go index d79560a2e268..b1ef32f11622 100644 --- a/balancer/balancer.go +++ b/balancer/balancer.go @@ -232,8 +232,8 @@ type BuildOptions struct { // implementations which do not communicate with a remote load balancer // server can ignore this field. Authority string - // ChannelzParentID is the parent ClientConn's channelz ID. - ChannelzParentID *channelz.Identifier + // ChannelzParent is the parent ClientConn's channelz channel. + ChannelzParent channelz.Identifier // CustomUserAgent is the custom user agent set on the parent ClientConn. // The balancer should set the same custom user agent if it creates a // ClientConn. diff --git a/balancer/grpclb/grpclb_remote_balancer.go b/balancer/grpclb/grpclb_remote_balancer.go index f8b5229c3536..506fae0d4e2d 100644 --- a/balancer/grpclb/grpclb_remote_balancer.go +++ b/balancer/grpclb/grpclb_remote_balancer.go @@ -246,7 +246,7 @@ func (lb *lbBalancer) newRemoteBalancerCCWrapper() error { // Explicitly set pickfirst as the balancer. dopts = append(dopts, grpc.WithDefaultServiceConfig(`{"loadBalancingPolicy":"pick_first"}`)) dopts = append(dopts, grpc.WithResolvers(lb.manualResolver)) - dopts = append(dopts, grpc.WithChannelzParentID(lb.opt.ChannelzParentID)) + dopts = append(dopts, grpc.WithChannelzParentID(lb.opt.ChannelzParent)) // Enable Keepalive for grpclb client. dopts = append(dopts, grpc.WithKeepaliveParams(keepalive.ClientParameters{ diff --git a/balancer_wrapper.go b/balancer_wrapper.go index b5e30cff0215..9ab7feab4110 100644 --- a/balancer_wrapper.go +++ b/balancer_wrapper.go @@ -72,13 +72,13 @@ func newCCBalancerWrapper(cc *ClientConn) *ccBalancerWrapper { ccb := &ccBalancerWrapper{ cc: cc, opts: balancer.BuildOptions{ - DialCreds: cc.dopts.copts.TransportCredentials, - CredsBundle: cc.dopts.copts.CredsBundle, - Dialer: cc.dopts.copts.Dialer, - Authority: cc.authority, - CustomUserAgent: cc.dopts.copts.UserAgent, - ChannelzParentID: cc.channelzID, - Target: cc.parsedTarget, + DialCreds: cc.dopts.copts.TransportCredentials, + CredsBundle: cc.dopts.copts.CredsBundle, + Dialer: cc.dopts.copts.Dialer, + Authority: cc.authority, + CustomUserAgent: cc.dopts.copts.UserAgent, + ChannelzParent: cc.channelz, + Target: cc.parsedTarget, }, serializer: grpcsync.NewCallbackSerializer(ctx), serializerCancel: cancel, @@ -155,14 +155,14 @@ func (ccb *ccBalancerWrapper) switchTo(name string) { func (ccb *ccBalancerWrapper) buildLoadBalancingPolicy(name string) { builder := balancer.Get(name) if builder == nil { - channelz.Warningf(logger, ccb.cc.channelzID, "Channel switches to new LB policy %q, since the specified LB policy %q was not registered", PickFirstBalancerName, name) + channelz.Warningf(logger, ccb.cc.channelz, "Channel switches to new LB policy %q, since the specified LB policy %q was not registered", PickFirstBalancerName, name) builder = newPickfirstBuilder() } else { - channelz.Infof(logger, ccb.cc.channelzID, "Channel switches to new LB policy %q", name) + channelz.Infof(logger, ccb.cc.channelz, "Channel switches to new LB policy %q", name) } if err := ccb.balancer.SwitchTo(builder); err != nil { - channelz.Errorf(logger, ccb.cc.channelzID, "Channel failed to build new LB policy %q: %v", name, err) + channelz.Errorf(logger, ccb.cc.channelz, "Channel failed to build new LB policy %q: %v", name, err) return } ccb.curBalancerName = builder.Name() @@ -175,7 +175,7 @@ func (ccb *ccBalancerWrapper) close() { ccb.mu.Lock() ccb.closed = true ccb.mu.Unlock() - channelz.Info(logger, ccb.cc.channelzID, "ccBalancerWrapper: closing") + channelz.Info(logger, ccb.cc.channelz, "ccBalancerWrapper: closing") ccb.serializer.Schedule(func(context.Context) { if ccb.balancer == nil { return @@ -212,7 +212,7 @@ func (ccb *ccBalancerWrapper) NewSubConn(addrs []resolver.Address, opts balancer } ac, err := ccb.cc.newAddrConnLocked(addrs, opts) if err != nil { - channelz.Warningf(logger, ccb.cc.channelzID, "acBalancerWrapper: NewSubConn: failed to newAddrConn: %v", err) + channelz.Warningf(logger, ccb.cc.channelz, "acBalancerWrapper: NewSubConn: failed to newAddrConn: %v", err) return nil, err } acbw := &acBalancerWrapper{ @@ -304,7 +304,7 @@ func (acbw *acBalancerWrapper) updateState(s connectivity.State, err error) { } func (acbw *acBalancerWrapper) String() string { - return fmt.Sprintf("SubConn(id:%d)", acbw.ac.channelzID.Int()) + return fmt.Sprintf("SubConn(id:%d)", acbw.ac.channelz.ID) } func (acbw *acBalancerWrapper) UpdateAddresses(addrs []resolver.Address) { diff --git a/channelz/service/func_linux.go b/channelz/service/func_linux.go index 0873603c8520..7523a52813dc 100644 --- a/channelz/service/func_linux.go +++ b/channelz/service/func_linux.go @@ -33,6 +33,9 @@ func convertToPtypesDuration(sec int64, usec int64) *durpb.Duration { } func sockoptToProto(skopts *channelz.SocketOptionData) []*channelzpb.SocketOption { + if skopts == nil { + return nil + } var opts []*channelzpb.SocketOption if skopts.Linger != nil { additional, err := anypb.New(&channelzpb.SocketOptionLinger{ diff --git a/channelz/service/service.go b/channelz/service/service.go index ae19ed3792ea..51a0a33f433a 100644 --- a/channelz/service/service.go +++ b/channelz/service/service.go @@ -22,6 +22,7 @@ package service import ( "context" "net" + "time" "github.com/golang/protobuf/ptypes" wrpb "github.com/golang/protobuf/ptypes/wrappers" @@ -62,8 +63,11 @@ type serverImpl struct { channelzgrpc.UnimplementedChannelzServer } -func connectivityStateToProto(s connectivity.State) *channelzpb.ChannelConnectivityState { - switch s { +func connectivityStateToProto(s *connectivity.State) *channelzpb.ChannelConnectivityState { + if s == nil { + return &channelzpb.ChannelConnectivityState{State: channelzpb.ChannelConnectivityState_UNKNOWN} + } + switch *s { case connectivity.Idle: return &channelzpb.ChannelConnectivityState{State: channelzpb.ChannelConnectivityState_IDLE} case connectivity.Connecting: @@ -81,6 +85,9 @@ func connectivityStateToProto(s connectivity.State) *channelzpb.ChannelConnectiv func channelTraceToProto(ct *channelz.ChannelTrace) *channelzpb.ChannelTrace { pbt := &channelzpb.ChannelTrace{} + if ct == nil { + return pbt + } pbt.NumEventsLogged = ct.EventNum if ts, err := ptypes.TimestampProto(ct.CreationTime); err == nil { pbt.CreationTimestamp = ts @@ -108,73 +115,67 @@ func channelTraceToProto(ct *channelz.ChannelTrace) *channelzpb.ChannelTrace { return pbt } -func channelMetricToProto(cm *channelz.ChannelMetric) *channelzpb.Channel { +func strFromPointer(s *string) string { + if s == nil { + return "" + } + return *s +} + +func channelMetricToProto(cm *channelz.Channel) *channelzpb.Channel { c := &channelzpb.Channel{} c.Ref = &channelzpb.ChannelRef{ChannelId: cm.ID, Name: cm.RefName} c.Data = &channelzpb.ChannelData{ - State: connectivityStateToProto(cm.ChannelData.State), - Target: cm.ChannelData.Target, - CallsStarted: cm.ChannelData.CallsStarted, - CallsSucceeded: cm.ChannelData.CallsSucceeded, - CallsFailed: cm.ChannelData.CallsFailed, + State: connectivityStateToProto(cm.ChannelMetrics.State.Load()), + Target: strFromPointer(cm.ChannelMetrics.Target.Load()), + CallsStarted: cm.ChannelMetrics.CallsStarted.Load(), + CallsSucceeded: cm.ChannelMetrics.CallsSucceeded.Load(), + CallsFailed: cm.ChannelMetrics.CallsFailed.Load(), } - if ts, err := ptypes.TimestampProto(cm.ChannelData.LastCallStartedTimestamp); err == nil { + if ts, err := ptypes.TimestampProto(time.Unix(0, cm.ChannelMetrics.LastCallStartedTimestamp.Load())); err == nil { c.Data.LastCallStartedTimestamp = ts } - nestedChans := make([]*channelzpb.ChannelRef, 0, len(cm.NestedChans)) - for id, ref := range cm.NestedChans { + ncs := cm.NestedChans() + nestedChans := make([]*channelzpb.ChannelRef, 0, len(ncs)) + for id, ref := range ncs { nestedChans = append(nestedChans, &channelzpb.ChannelRef{ChannelId: id, Name: ref}) } c.ChannelRef = nestedChans - subChans := make([]*channelzpb.SubchannelRef, 0, len(cm.SubChans)) - for id, ref := range cm.SubChans { + scs := cm.SubChans() + subChans := make([]*channelzpb.SubchannelRef, 0, len(scs)) + for id, ref := range scs { subChans = append(subChans, &channelzpb.SubchannelRef{SubchannelId: id, Name: ref}) } c.SubchannelRef = subChans - sockets := make([]*channelzpb.SocketRef, 0, len(cm.Sockets)) - for id, ref := range cm.Sockets { - sockets = append(sockets, &channelzpb.SocketRef{SocketId: id, Name: ref}) - } - c.SocketRef = sockets - c.Data.Trace = channelTraceToProto(cm.Trace) + c.Data.Trace = channelTraceToProto(cm.Trace()) return c } -func subChannelMetricToProto(cm *channelz.SubChannelMetric) *channelzpb.Subchannel { +func subChannelMetricToProto(cm *channelz.SubChannel) *channelzpb.Subchannel { sc := &channelzpb.Subchannel{} sc.Ref = &channelzpb.SubchannelRef{SubchannelId: cm.ID, Name: cm.RefName} sc.Data = &channelzpb.ChannelData{ - State: connectivityStateToProto(cm.ChannelData.State), - Target: cm.ChannelData.Target, - CallsStarted: cm.ChannelData.CallsStarted, - CallsSucceeded: cm.ChannelData.CallsSucceeded, - CallsFailed: cm.ChannelData.CallsFailed, + State: connectivityStateToProto(cm.ChannelMetrics.State.Load()), + Target: strFromPointer(cm.ChannelMetrics.Target.Load()), + CallsStarted: cm.ChannelMetrics.CallsStarted.Load(), + CallsSucceeded: cm.ChannelMetrics.CallsSucceeded.Load(), + CallsFailed: cm.ChannelMetrics.CallsFailed.Load(), } - if ts, err := ptypes.TimestampProto(cm.ChannelData.LastCallStartedTimestamp); err == nil { + if ts, err := ptypes.TimestampProto(time.Unix(0, cm.ChannelMetrics.LastCallStartedTimestamp.Load())); err == nil { sc.Data.LastCallStartedTimestamp = ts } - nestedChans := make([]*channelzpb.ChannelRef, 0, len(cm.NestedChans)) - for id, ref := range cm.NestedChans { - nestedChans = append(nestedChans, &channelzpb.ChannelRef{ChannelId: id, Name: ref}) - } - sc.ChannelRef = nestedChans - subChans := make([]*channelzpb.SubchannelRef, 0, len(cm.SubChans)) - for id, ref := range cm.SubChans { - subChans = append(subChans, &channelzpb.SubchannelRef{SubchannelId: id, Name: ref}) - } - sc.SubchannelRef = subChans - - sockets := make([]*channelzpb.SocketRef, 0, len(cm.Sockets)) - for id, ref := range cm.Sockets { + skts := cm.Sockets() + sockets := make([]*channelzpb.SocketRef, 0, len(skts)) + for id, ref := range skts { sockets = append(sockets, &channelzpb.SocketRef{SocketId: id, Name: ref}) } sc.SocketRef = sockets - sc.Data.Trace = channelTraceToProto(cm.Trace) + sc.Data.Trace = channelTraceToProto(cm.Trace()) return sc } @@ -199,6 +200,9 @@ func securityToProto(se credentials.ChannelzSecurityValue) *channelzpb.Security } func addrToProto(a net.Addr) *channelzpb.Address { + if a == nil { + return nil + } switch a.Network() { case "udp": // TODO: Address_OtherAddress{}. Need proto def for Value. @@ -218,75 +222,70 @@ func addrToProto(a net.Addr) *channelzpb.Address { return &channelzpb.Address{} } -func socketMetricToProto(sm *channelz.SocketMetric) *channelzpb.Socket { +func socketMetricToProto(skt *channelz.Socket) *channelzpb.Socket { s := &channelzpb.Socket{} - s.Ref = &channelzpb.SocketRef{SocketId: sm.ID, Name: sm.RefName} + s.Ref = &channelzpb.SocketRef{SocketId: skt.ID, Name: skt.RefName} s.Data = &channelzpb.SocketData{ - StreamsStarted: sm.SocketData.StreamsStarted, - StreamsSucceeded: sm.SocketData.StreamsSucceeded, - StreamsFailed: sm.SocketData.StreamsFailed, - MessagesSent: sm.SocketData.MessagesSent, - MessagesReceived: sm.SocketData.MessagesReceived, - KeepAlivesSent: sm.SocketData.KeepAlivesSent, - } - if ts, err := ptypes.TimestampProto(sm.SocketData.LastLocalStreamCreatedTimestamp); err == nil { + StreamsStarted: skt.SocketMetrics.StreamsStarted.Load(), + StreamsSucceeded: skt.SocketMetrics.StreamsSucceeded.Load(), + StreamsFailed: skt.SocketMetrics.StreamsFailed.Load(), + MessagesSent: skt.SocketMetrics.MessagesSent.Load(), + MessagesReceived: skt.SocketMetrics.MessagesReceived.Load(), + KeepAlivesSent: skt.SocketMetrics.KeepAlivesSent.Load(), + } + if ts, err := ptypes.TimestampProto(time.Unix(0, skt.SocketMetrics.LastLocalStreamCreatedTimestamp.Load())); err == nil { s.Data.LastLocalStreamCreatedTimestamp = ts } - if ts, err := ptypes.TimestampProto(sm.SocketData.LastRemoteStreamCreatedTimestamp); err == nil { + if ts, err := ptypes.TimestampProto(time.Unix(0, skt.SocketMetrics.LastRemoteStreamCreatedTimestamp.Load())); err == nil { s.Data.LastRemoteStreamCreatedTimestamp = ts } - if ts, err := ptypes.TimestampProto(sm.SocketData.LastMessageSentTimestamp); err == nil { + if ts, err := ptypes.TimestampProto(time.Unix(0, skt.SocketMetrics.LastMessageSentTimestamp.Load())); err == nil { s.Data.LastMessageSentTimestamp = ts } - if ts, err := ptypes.TimestampProto(sm.SocketData.LastMessageReceivedTimestamp); err == nil { + if ts, err := ptypes.TimestampProto(time.Unix(0, skt.SocketMetrics.LastMessageReceivedTimestamp.Load())); err == nil { s.Data.LastMessageReceivedTimestamp = ts } - s.Data.LocalFlowControlWindow = &wrpb.Int64Value{Value: sm.SocketData.LocalFlowControlWindow} - s.Data.RemoteFlowControlWindow = &wrpb.Int64Value{Value: sm.SocketData.RemoteFlowControlWindow} - - if sm.SocketData.SocketOptions != nil { - s.Data.Option = sockoptToProto(sm.SocketData.SocketOptions) - } - if sm.SocketData.Security != nil { - s.Security = securityToProto(sm.SocketData.Security) + if skt.EphemeralMetrics != nil { + e := skt.EphemeralMetrics() + s.Data.LocalFlowControlWindow = &wrpb.Int64Value{Value: e.LocalFlowControlWindow} + s.Data.RemoteFlowControlWindow = &wrpb.Int64Value{Value: e.RemoteFlowControlWindow} } - if sm.SocketData.LocalAddr != nil { - s.Local = addrToProto(sm.SocketData.LocalAddr) - } - if sm.SocketData.RemoteAddr != nil { - s.Remote = addrToProto(sm.SocketData.RemoteAddr) - } - s.RemoteName = sm.SocketData.RemoteName + s.Data.Option = sockoptToProto(skt.SocketOptions) + s.Security = securityToProto(skt.Security) + s.Local = addrToProto(skt.LocalAddr) + s.Remote = addrToProto(skt.RemoteAddr) + s.RemoteName = skt.RemoteName return s } func (s *serverImpl) GetTopChannels(ctx context.Context, req *channelzpb.GetTopChannelsRequest) (*channelzpb.GetTopChannelsResponse, error) { - metrics, end := channelz.GetTopChannels(req.GetStartChannelId(), req.GetMaxResults()) + chans, end := channelz.GetTopChannels(req.GetStartChannelId(), int(req.GetMaxResults())) resp := &channelzpb.GetTopChannelsResponse{} - for _, m := range metrics { - resp.Channel = append(resp.Channel, channelMetricToProto(m)) + for _, ch := range chans { + resp.Channel = append(resp.Channel, channelMetricToProto(ch)) } resp.End = end return resp, nil } -func serverMetricToProto(sm *channelz.ServerMetric) *channelzpb.Server { +func serverMetricToProto(sm *channelz.Server) *channelzpb.Server { s := &channelzpb.Server{} s.Ref = &channelzpb.ServerRef{ServerId: sm.ID, Name: sm.RefName} s.Data = &channelzpb.ServerData{ - CallsStarted: sm.ServerData.CallsStarted, - CallsSucceeded: sm.ServerData.CallsSucceeded, - CallsFailed: sm.ServerData.CallsFailed, + CallsStarted: sm.ServerMetrics.CallsStarted.Load(), + CallsSucceeded: sm.ServerMetrics.CallsSucceeded.Load(), + CallsFailed: sm.ServerMetrics.CallsFailed.Load(), } - if ts, err := ptypes.TimestampProto(sm.ServerData.LastCallStartedTimestamp); err == nil { + if ts, err := ptypes.TimestampProto(time.Unix(0, sm.ServerMetrics.LastCallStartedTimestamp.Load())); err == nil { s.Data.LastCallStartedTimestamp = ts } - sockets := make([]*channelzpb.SocketRef, 0, len(sm.ListenSockets)) - for id, ref := range sm.ListenSockets { + lss := sm.ListenSockets() + sockets := make([]*channelzpb.SocketRef, 0, len(lss)) + for id, ref := range lss { sockets = append(sockets, &channelzpb.SocketRef{SocketId: id, Name: ref}) } s.ListenSocket = sockets @@ -294,7 +293,7 @@ func serverMetricToProto(sm *channelz.ServerMetric) *channelzpb.Server { } func (s *serverImpl) GetServers(ctx context.Context, req *channelzpb.GetServersRequest) (*channelzpb.GetServersResponse, error) { - metrics, end := channelz.GetServers(req.GetStartServerId(), req.GetMaxResults()) + metrics, end := channelz.GetServers(req.GetStartServerId(), int(req.GetMaxResults())) resp := &channelzpb.GetServersResponse{} for _, m := range metrics { resp.Server = append(resp.Server, serverMetricToProto(m)) @@ -304,9 +303,9 @@ func (s *serverImpl) GetServers(ctx context.Context, req *channelzpb.GetServersR } func (s *serverImpl) GetServerSockets(ctx context.Context, req *channelzpb.GetServerSocketsRequest) (*channelzpb.GetServerSocketsResponse, error) { - metrics, end := channelz.GetServerSockets(req.GetServerId(), req.GetStartSocketId(), req.GetMaxResults()) + skts, end := channelz.GetServerSockets(req.GetServerId(), req.GetStartSocketId(), int(req.GetMaxResults())) resp := &channelzpb.GetServerSocketsResponse{} - for _, m := range metrics { + for _, m := range skts { resp.SocketRef = append(resp.SocketRef, &channelzpb.SocketRef{SocketId: m.ID, Name: m.RefName}) } resp.End = end @@ -314,25 +313,25 @@ func (s *serverImpl) GetServerSockets(ctx context.Context, req *channelzpb.GetSe } func (s *serverImpl) GetChannel(ctx context.Context, req *channelzpb.GetChannelRequest) (*channelzpb.GetChannelResponse, error) { - var metric *channelz.ChannelMetric - if metric = channelz.GetChannel(req.GetChannelId()); metric == nil { + ch := channelz.GetChannel(req.GetChannelId()) + if ch == nil { return nil, status.Errorf(codes.NotFound, "requested channel %d not found", req.GetChannelId()) } - resp := &channelzpb.GetChannelResponse{Channel: channelMetricToProto(metric)} + resp := &channelzpb.GetChannelResponse{Channel: channelMetricToProto(ch)} return resp, nil } func (s *serverImpl) GetSubchannel(ctx context.Context, req *channelzpb.GetSubchannelRequest) (*channelzpb.GetSubchannelResponse, error) { - var metric *channelz.SubChannelMetric - if metric = channelz.GetSubChannel(req.GetSubchannelId()); metric == nil { + subChan := channelz.GetSubChannel(req.GetSubchannelId()) + if subChan == nil { return nil, status.Errorf(codes.NotFound, "requested sub channel %d not found", req.GetSubchannelId()) } - resp := &channelzpb.GetSubchannelResponse{Subchannel: subChannelMetricToProto(metric)} + resp := &channelzpb.GetSubchannelResponse{Subchannel: subChannelMetricToProto(subChan)} return resp, nil } func (s *serverImpl) GetSocket(ctx context.Context, req *channelzpb.GetSocketRequest) (*channelzpb.GetSocketResponse, error) { - var metric *channelz.SocketMetric + var metric *channelz.Socket if metric = channelz.GetSocket(req.GetSocketId()); metric == nil { return nil, status.Errorf(codes.NotFound, "requested socket %d not found", req.GetSocketId()) } @@ -341,8 +340,8 @@ func (s *serverImpl) GetSocket(ctx context.Context, req *channelzpb.GetSocketReq } func (s *serverImpl) GetServer(ctx context.Context, req *channelzpb.GetServerRequest) (*channelzpb.GetServerResponse, error) { - var metric *channelz.ServerMetric - if metric = channelz.GetServer(req.GetServerId()); metric == nil { + metric := channelz.GetServer(req.GetServerId()) + if metric == nil { return nil, status.Errorf(codes.NotFound, "requested server %d not found", req.GetServerId()) } resp := &channelzpb.GetServerResponse{Server: serverMetricToProto(metric)} diff --git a/channelz/service/service_sktopt_test.go b/channelz/service/service_sktopt_test.go index 1da38aa7fbf3..8c6473ed7b61 100644 --- a/channelz/service/service_sktopt_test.go +++ b/channelz/service/service_sktopt_test.go @@ -28,138 +28,71 @@ package service import ( "context" - "strconv" "testing" + "time" - "github.com/golang/protobuf/ptypes" "github.com/google/go-cmp/cmp" "golang.org/x/sys/unix" "google.golang.org/grpc/internal/channelz" + "google.golang.org/grpc/internal/testutils" "google.golang.org/protobuf/testing/protocmp" + "google.golang.org/protobuf/types/known/durationpb" - durpb "github.com/golang/protobuf/ptypes/duration" channelzpb "google.golang.org/grpc/channelz/grpc_channelz_v1" ) -func init() { - // Assign protoToSocketOption to protoToSocketOpt in order to enable socket option - // data conversion from proto message to channelz defined struct. - protoToSocketOpt = protoToSocketOption -} - -func convertToDuration(d *durpb.Duration) (sec int64, usec int64) { - if d != nil { - if dur, err := ptypes.Duration(d); err == nil { - sec = int64(int64(dur) / 1e9) - usec = (int64(dur) - sec*1e9) / 1e3 - } - } - return -} - -func protoToLinger(protoLinger *channelzpb.SocketOptionLinger) *unix.Linger { - linger := &unix.Linger{} - if protoLinger.GetActive() { - linger.Onoff = 1 - } - lv, _ := convertToDuration(protoLinger.GetDuration()) - linger.Linger = int32(lv) - return linger -} - -func protoToSocketOption(skopts []*channelzpb.SocketOption) *channelz.SocketOptionData { - skdata := &channelz.SocketOptionData{} - for _, opt := range skopts { - switch opt.GetName() { - case "SO_LINGER": - protoLinger := &channelzpb.SocketOptionLinger{} - err := ptypes.UnmarshalAny(opt.GetAdditional(), protoLinger) - if err == nil { - skdata.Linger = protoToLinger(protoLinger) - } - case "SO_RCVTIMEO": - protoTimeout := &channelzpb.SocketOptionTimeout{} - err := ptypes.UnmarshalAny(opt.GetAdditional(), protoTimeout) - if err == nil { - skdata.RecvTimeout = protoToTime(protoTimeout) - } - case "SO_SNDTIMEO": - protoTimeout := &channelzpb.SocketOptionTimeout{} - err := ptypes.UnmarshalAny(opt.GetAdditional(), protoTimeout) - if err == nil { - skdata.SendTimeout = protoToTime(protoTimeout) - } - case "TCP_INFO": - tcpi := &channelzpb.SocketOptionTcpInfo{} - err := ptypes.UnmarshalAny(opt.GetAdditional(), tcpi) - if err == nil { - skdata.TCPInfo = &unix.TCPInfo{ - State: uint8(tcpi.TcpiState), - Ca_state: uint8(tcpi.TcpiCaState), - Retransmits: uint8(tcpi.TcpiRetransmits), - Probes: uint8(tcpi.TcpiProbes), - Backoff: uint8(tcpi.TcpiBackoff), - Options: uint8(tcpi.TcpiOptions), - Rto: tcpi.TcpiRto, - Ato: tcpi.TcpiAto, - Snd_mss: tcpi.TcpiSndMss, - Rcv_mss: tcpi.TcpiRcvMss, - Unacked: tcpi.TcpiUnacked, - Sacked: tcpi.TcpiSacked, - Lost: tcpi.TcpiLost, - Retrans: tcpi.TcpiRetrans, - Fackets: tcpi.TcpiFackets, - Last_data_sent: tcpi.TcpiLastDataSent, - Last_ack_sent: tcpi.TcpiLastAckSent, - Last_data_recv: tcpi.TcpiLastDataRecv, - Last_ack_recv: tcpi.TcpiLastAckRecv, - Pmtu: tcpi.TcpiPmtu, - Rcv_ssthresh: tcpi.TcpiRcvSsthresh, - Rtt: tcpi.TcpiRtt, - Rttvar: tcpi.TcpiRttvar, - Snd_ssthresh: tcpi.TcpiSndSsthresh, - Snd_cwnd: tcpi.TcpiSndCwnd, - Advmss: tcpi.TcpiAdvmss, - Reordering: tcpi.TcpiReordering} - } - } - } - return skdata -} - func (s) TestGetSocketOptions(t *testing.T) { - ss := []*dummySocket{ - { - socketOptions: &channelz.SocketOptionData{ - Linger: &unix.Linger{Onoff: 1, Linger: 2}, - RecvTimeout: &unix.Timeval{Sec: 10, Usec: 1}, - SendTimeout: &unix.Timeval{}, - TCPInfo: &unix.TCPInfo{State: 1}, - }, + ss := &channelz.Socket{ + SocketOptions: &channelz.SocketOptionData{ + Linger: &unix.Linger{Onoff: 1, Linger: 2}, + RecvTimeout: &unix.Timeval{Sec: 10, Usec: 1}, + SendTimeout: &unix.Timeval{}, + TCPInfo: &unix.TCPInfo{State: 1}, }, } svr := newCZServer() - ids := make([]*channelz.Identifier, len(ss)) - svrID := channelz.RegisterServer(&dummyServer{}, "") - defer channelz.RemoveEntry(svrID) - for i, s := range ss { - ids[i], _ = channelz.RegisterNormalSocket(s, svrID, strconv.Itoa(i)) - defer channelz.RemoveEntry(ids[i]) - } + czServer := channelz.RegisterServer("test svr") + defer channelz.RemoveEntry(czServer.ID) + id := channelz.RegisterSocket(&channelz.Socket{SocketType: channelz.SocketTypeNormal, RefName: "0", Parent: czServer, SocketOptions: ss.SocketOptions}) + defer channelz.RemoveEntry(id.ID) ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() - for i, s := range ss { - resp, _ := svr.GetSocket(ctx, &channelzpb.GetSocketRequest{SocketId: ids[i].Int()}) - got, want := resp.GetSocket().GetRef(), &channelzpb.SocketRef{SocketId: ids[i].Int(), Name: strconv.Itoa(i)} - if !cmp.Equal(got, want, protocmp.Transform()) { - t.Fatalf("resp.GetSocket() returned metrics.GetRef() = %#v, want %#v", got, want) + resp, _ := svr.GetSocket(ctx, &channelzpb.GetSocketRequest{SocketId: id.ID}) + { + got, want := resp.GetSocket().GetRef(), &channelzpb.SocketRef{SocketId: id.ID, Name: "0"} + if diff := cmp.Diff(got, want, protocmp.Transform()); diff != "" { + t.Fatal("resp.GetSocket() ref (-got +want): ", diff) } - socket, err := socketProtoToStruct(resp.GetSocket()) - if err != nil { - t.Fatal(err) - } - if diff := cmp.Diff(s, socket, protocmp.Transform(), cmp.AllowUnexported(dummySocket{})); diff != "" { - t.Fatalf("unexpected socket, diff (-want +got):\n%s", diff) + } + { + got := resp.GetSocket().GetData().GetOption() + want := []*channelzpb.SocketOption{{ + Name: "SO_LINGER", + Additional: testutils.MarshalAny( + t, + &channelzpb.SocketOptionLinger{Active: true, Duration: durationpb.New(2 * time.Second)}, + ), + }, { + Name: "SO_RCVTIMEO", + Additional: testutils.MarshalAny( + t, + &channelzpb.SocketOptionTimeout{Duration: durationpb.New(10*time.Second + time.Microsecond)}, + ), + }, { + Name: "SO_SNDTIMEO", + Additional: testutils.MarshalAny( + t, + &channelzpb.SocketOptionTimeout{Duration: durationpb.New(0)}, + ), + }, { + Name: "TCP_INFO", + Additional: testutils.MarshalAny( + t, + &channelzpb.SocketOptionTcpInfo{TcpiState: 1}, + ), + }} + if diff := cmp.Diff(got, want, protocmp.Transform()); diff != "" { + t.Fatal("resp.GetSocket() options (-got +want): ", diff) } } } diff --git a/channelz/service/service_test.go b/channelz/service/service_test.go index 38b1f7dda7d8..4e4154226bbb 100644 --- a/channelz/service/service_test.go +++ b/channelz/service/service_test.go @@ -35,6 +35,7 @@ import ( "google.golang.org/grpc/internal/channelz" "google.golang.org/grpc/internal/grpctest" "google.golang.org/protobuf/testing/protocmp" + "google.golang.org/protobuf/types/known/timestamppb" channelzpb "google.golang.org/grpc/channelz/grpc_channelz_v1" ) @@ -51,209 +52,37 @@ func Test(t *testing.T) { grpctest.RunSubTests(t, s{}) } -type protoToSocketOptFunc func([]*channelzpb.SocketOption) *channelz.SocketOptionData - -// protoToSocketOpt is used in function socketProtoToStruct to extract socket option -// data from unmarshaled proto message. -// It is only defined under linux environment on x86 architecture. -var protoToSocketOpt protoToSocketOptFunc - const defaultTestTimeout = 10 * time.Second -type dummyChannel struct { - state connectivity.State - target string - callsStarted int64 - callsSucceeded int64 - callsFailed int64 - lastCallStartedTimestamp time.Time -} - -func (d *dummyChannel) ChannelzMetric() *channelz.ChannelInternalMetric { - return &channelz.ChannelInternalMetric{ - State: d.state, - Target: d.target, - CallsStarted: d.callsStarted, - CallsSucceeded: d.callsSucceeded, - CallsFailed: d.callsFailed, - LastCallStartedTimestamp: d.lastCallStartedTimestamp, - } -} - -type dummyServer struct { - callsStarted int64 - callsSucceeded int64 - callsFailed int64 - lastCallStartedTimestamp time.Time -} - -func (d *dummyServer) ChannelzMetric() *channelz.ServerInternalMetric { - return &channelz.ServerInternalMetric{ - CallsStarted: d.callsStarted, - CallsSucceeded: d.callsSucceeded, - CallsFailed: d.callsFailed, - LastCallStartedTimestamp: d.lastCallStartedTimestamp, - } -} - -type dummySocket struct { - streamsStarted int64 - streamsSucceeded int64 - streamsFailed int64 - messagesSent int64 - messagesReceived int64 - keepAlivesSent int64 - lastLocalStreamCreatedTimestamp time.Time - lastRemoteStreamCreatedTimestamp time.Time - lastMessageSentTimestamp time.Time - lastMessageReceivedTimestamp time.Time - localFlowControlWindow int64 - remoteFlowControlWindow int64 - socketOptions *channelz.SocketOptionData - localAddr net.Addr - remoteAddr net.Addr - security credentials.ChannelzSecurityValue - remoteName string -} - -func (d *dummySocket) ChannelzMetric() *channelz.SocketInternalMetric { - return &channelz.SocketInternalMetric{ - StreamsStarted: d.streamsStarted, - StreamsSucceeded: d.streamsSucceeded, - StreamsFailed: d.streamsFailed, - MessagesSent: d.messagesSent, - MessagesReceived: d.messagesReceived, - KeepAlivesSent: d.keepAlivesSent, - LastLocalStreamCreatedTimestamp: d.lastLocalStreamCreatedTimestamp, - LastRemoteStreamCreatedTimestamp: d.lastRemoteStreamCreatedTimestamp, - LastMessageSentTimestamp: d.lastMessageSentTimestamp, - LastMessageReceivedTimestamp: d.lastMessageReceivedTimestamp, - LocalFlowControlWindow: d.localFlowControlWindow, - RemoteFlowControlWindow: d.remoteFlowControlWindow, - SocketOptions: d.socketOptions, - LocalAddr: d.localAddr, - RemoteAddr: d.remoteAddr, - Security: d.security, - RemoteName: d.remoteName, - } -} - -func channelProtoToStruct(c *channelzpb.Channel) (*dummyChannel, error) { - dc := &dummyChannel{} +func channelProtoToStruct(c *channelzpb.Channel) (*channelz.ChannelMetrics, error) { + cm := &channelz.ChannelMetrics{} pdata := c.GetData() + var s connectivity.State switch pdata.GetState().GetState() { case channelzpb.ChannelConnectivityState_UNKNOWN: // TODO: what should we set here? case channelzpb.ChannelConnectivityState_IDLE: - dc.state = connectivity.Idle + s = connectivity.Idle case channelzpb.ChannelConnectivityState_CONNECTING: - dc.state = connectivity.Connecting + s = connectivity.Connecting case channelzpb.ChannelConnectivityState_READY: - dc.state = connectivity.Ready + s = connectivity.Ready case channelzpb.ChannelConnectivityState_TRANSIENT_FAILURE: - dc.state = connectivity.TransientFailure + s = connectivity.TransientFailure case channelzpb.ChannelConnectivityState_SHUTDOWN: - dc.state = connectivity.Shutdown - } - dc.target = pdata.GetTarget() - dc.callsStarted = pdata.CallsStarted - dc.callsSucceeded = pdata.CallsSucceeded - dc.callsFailed = pdata.CallsFailed + s = connectivity.Shutdown + } + cm.State.Store(&s) + tgt := pdata.GetTarget() + cm.Target.Store(&tgt) + cm.CallsStarted.Store(pdata.CallsStarted) + cm.CallsSucceeded.Store(pdata.CallsSucceeded) + cm.CallsFailed.Store(pdata.CallsFailed) if err := pdata.GetLastCallStartedTimestamp().CheckValid(); err != nil { return nil, err } - dc.lastCallStartedTimestamp = pdata.GetLastCallStartedTimestamp().AsTime() - return dc, nil -} - -func serverProtoToStruct(s *channelzpb.Server) (*dummyServer, error) { - ds := &dummyServer{} - pdata := s.GetData() - ds.callsStarted = pdata.CallsStarted - ds.callsSucceeded = pdata.CallsSucceeded - ds.callsFailed = pdata.CallsFailed - if err := pdata.GetLastCallStartedTimestamp().CheckValid(); err != nil { - return nil, err - } - ds.lastCallStartedTimestamp = pdata.GetLastCallStartedTimestamp().AsTime() - return ds, nil -} - -func socketProtoToStruct(s *channelzpb.Socket) (*dummySocket, error) { - ds := &dummySocket{} - pdata := s.GetData() - ds.streamsStarted = pdata.GetStreamsStarted() - ds.streamsSucceeded = pdata.GetStreamsSucceeded() - ds.streamsFailed = pdata.GetStreamsFailed() - ds.messagesSent = pdata.GetMessagesSent() - ds.messagesReceived = pdata.GetMessagesReceived() - ds.keepAlivesSent = pdata.GetKeepAlivesSent() - if err := pdata.GetLastLocalStreamCreatedTimestamp().CheckValid(); err != nil { - return nil, err - } - ds.lastLocalStreamCreatedTimestamp = pdata.GetLastLocalStreamCreatedTimestamp().AsTime() - if err := pdata.GetLastRemoteStreamCreatedTimestamp().CheckValid(); err != nil { - return nil, err - } - ds.lastRemoteStreamCreatedTimestamp = pdata.GetLastRemoteStreamCreatedTimestamp().AsTime() - if err := pdata.GetLastMessageSentTimestamp().CheckValid(); err != nil { - return nil, err - } - ds.lastMessageSentTimestamp = pdata.GetLastMessageSentTimestamp().AsTime() - if err := pdata.GetLastMessageReceivedTimestamp().CheckValid(); err != nil { - return nil, err - } - ds.lastMessageReceivedTimestamp = pdata.GetLastMessageReceivedTimestamp().AsTime() - if v := pdata.GetLocalFlowControlWindow(); v != nil { - ds.localFlowControlWindow = v.Value - } - if v := pdata.GetRemoteFlowControlWindow(); v != nil { - ds.remoteFlowControlWindow = v.Value - } - if v := pdata.GetOption(); v != nil && protoToSocketOpt != nil { - ds.socketOptions = protoToSocketOpt(v) - } - if v := s.GetSecurity(); v != nil { - ds.security = protoToSecurity(v) - } - if local := s.GetLocal(); local != nil { - ds.localAddr = protoToAddr(local) - } - if remote := s.GetRemote(); remote != nil { - ds.remoteAddr = protoToAddr(remote) - } - ds.remoteName = s.GetRemoteName() - return ds, nil -} - -func protoToSecurity(protoSecurity *channelzpb.Security) credentials.ChannelzSecurityValue { - switch v := protoSecurity.Model.(type) { - case *channelzpb.Security_Tls_: - return &credentials.TLSChannelzSecurityValue{StandardName: v.Tls.GetStandardName(), LocalCertificate: v.Tls.GetLocalCertificate(), RemoteCertificate: v.Tls.GetRemoteCertificate()} - case *channelzpb.Security_Other: - sv := &credentials.OtherChannelzSecurityValue{Name: v.Other.GetName()} - var x ptypes.DynamicAny - if err := ptypes.UnmarshalAny(v.Other.GetValue(), &x); err == nil { - sv.Value = x.Message - } - return sv - } - return nil -} - -func protoToAddr(a *channelzpb.Address) net.Addr { - switch v := a.Address.(type) { - case *channelzpb.Address_TcpipAddress: - if port := v.TcpipAddress.GetPort(); port != 0 { - return &net.TCPAddr{IP: v.TcpipAddress.GetIpAddress(), Port: int(port)} - } - return &net.IPAddr{IP: v.TcpipAddress.GetIpAddress()} - case *channelzpb.Address_UdsAddress_: - return &net.UnixAddr{Name: v.UdsAddress.GetFilename(), Net: "unix"} - case *channelzpb.Address_OtherAddress_: - // TODO: - } - return nil + cm.LastCallStartedTimestamp.Store(int64(pdata.GetLastCallStartedTimestamp().AsTime().UnixNano())) + return cm, nil } func convertSocketRefSliceToMap(sktRefs []*channelzpb.SocketRef) map[int64]string { @@ -279,36 +108,37 @@ func init() { } func (s) TestGetTopChannels(t *testing.T) { - tcs := []*dummyChannel{ - { - state: connectivity.Connecting, - target: "test.channelz:1234", - callsStarted: 6, - callsSucceeded: 2, - callsFailed: 3, - lastCallStartedTimestamp: time.Now().UTC(), - }, - { - state: connectivity.Connecting, - target: "test.channelz:1234", - callsStarted: 1, - callsSucceeded: 2, - callsFailed: 3, - lastCallStartedTimestamp: time.Now().UTC(), - }, - { - state: connectivity.Shutdown, - target: "test.channelz:8888", - callsStarted: 0, - callsSucceeded: 0, - callsFailed: 0, - }, - {}, + tcs := []*channelz.ChannelMetrics{ + channelz.NewChannelMetricForTesting( + connectivity.Connecting, + "test.channelz:1234", + 6, + 2, + 3, + time.Now().UTC().UnixNano(), + ), + channelz.NewChannelMetricForTesting( + connectivity.Connecting, + "test.channelz:1234", + 1, + 2, + 3, + time.Now().UTC().UnixNano(), + ), + channelz.NewChannelMetricForTesting( + connectivity.Shutdown, + "test.channelz:8888", + 0, + 0, + 0, + 0, + ), } for _, c := range tcs { - id := channelz.RegisterChannel(c, nil, "") - defer channelz.RemoveEntry(id) + cz := channelz.RegisterChannel(nil, "test channel") + cz.ChannelMetrics.CopyFrom(c) + defer channelz.RemoveEntry(cz.ID) } s := newCZServer() ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) @@ -322,13 +152,13 @@ func (s) TestGetTopChannels(t *testing.T) { if err != nil { t.Fatal(err) } - if diff := cmp.Diff(tcs[i], channel, protocmp.Transform(), cmp.AllowUnexported(dummyChannel{})); diff != "" { + if diff := cmp.Diff(tcs[i], channel, protocmp.Transform()); diff != "" { t.Fatalf("unexpected channel, diff (-want +got):\n%s", diff) } } for i := 0; i < 50; i++ { - id := channelz.RegisterChannel(tcs[0], nil, "") - defer channelz.RemoveEntry(id) + cz := channelz.RegisterChannel(nil, "") + defer channelz.RemoveEntry(cz.ID) } resp, _ = s.GetTopChannels(ctx, &channelzpb.GetTopChannelsRequest{StartChannelId: 0}) if resp.GetEnd() { @@ -337,30 +167,35 @@ func (s) TestGetTopChannels(t *testing.T) { } func (s) TestGetServers(t *testing.T) { - ss := []*dummyServer{ - { - callsStarted: 6, - callsSucceeded: 2, - callsFailed: 3, - lastCallStartedTimestamp: time.Now().UTC(), - }, - { - callsStarted: 1, - callsSucceeded: 2, - callsFailed: 3, - lastCallStartedTimestamp: time.Now().UTC(), - }, - { - callsStarted: 1, - callsSucceeded: 0, - callsFailed: 0, - lastCallStartedTimestamp: time.Now().UTC(), - }, - } - - for _, s := range ss { - id := channelz.RegisterServer(s, "") - defer channelz.RemoveEntry(id) + ss := []*channelz.ServerMetrics{ + channelz.NewServerMetricsForTesting( + 6, + 2, + 3, + time.Now().UnixNano(), + ), + channelz.NewServerMetricsForTesting( + 1, + 2, + 3, + time.Now().UnixNano(), + ), + channelz.NewServerMetricsForTesting( + 1, + 0, + 0, + time.Now().UnixNano(), + ), + } + + firstID := int64(0) + for i, s := range ss { + svr := channelz.RegisterServer("") + if i == 0 { + firstID = svr.ID + } + svr.ServerMetrics.CopyFrom(s) + defer channelz.RemoveEntry(svr.ID) } svr := newCZServer() ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) @@ -369,17 +204,40 @@ func (s) TestGetServers(t *testing.T) { if !resp.GetEnd() { t.Fatalf("resp.GetEnd() want true, got %v", resp.GetEnd()) } - for i, s := range resp.GetServer() { - server, err := serverProtoToStruct(s) - if err != nil { - t.Fatal(err) - } - if diff := cmp.Diff(ss[i], server, protocmp.Transform(), cmp.AllowUnexported(dummyServer{})); diff != "" { - t.Fatalf("unexpected server, diff (-want +got):\n%s", diff) - } + serversWant := []*channelzpb.Server{ + { + Ref: &channelzpb.ServerRef{ServerId: firstID, Name: ""}, + Data: &channelzpb.ServerData{ + CallsStarted: 6, + CallsSucceeded: 2, + CallsFailed: 3, + LastCallStartedTimestamp: timestamppb.New(time.Unix(0, ss[0].LastCallStartedTimestamp.Load())), + }, + }, + { + Ref: &channelzpb.ServerRef{ServerId: firstID + 1, Name: ""}, + Data: &channelzpb.ServerData{ + CallsStarted: 1, + CallsSucceeded: 2, + CallsFailed: 3, + LastCallStartedTimestamp: timestamppb.New(time.Unix(0, ss[1].LastCallStartedTimestamp.Load())), + }, + }, + { + Ref: &channelzpb.ServerRef{ServerId: firstID + 2, Name: ""}, + Data: &channelzpb.ServerData{ + CallsStarted: 1, + CallsSucceeded: 0, + CallsFailed: 0, + LastCallStartedTimestamp: timestamppb.New(time.Unix(0, ss[2].LastCallStartedTimestamp.Load())), + }, + }, + } + if diff := cmp.Diff(serversWant, resp.GetServer(), protocmp.Transform()); diff != "" { + t.Fatalf("unexpected server, diff (-want +got):\n%s", diff) } for i := 0; i < 50; i++ { - id := channelz.RegisterServer(ss[0], "") + id := channelz.RegisterServer("").ID defer channelz.RemoveEntry(id) } resp, _ = svr.GetServers(ctx, &channelzpb.GetServersRequest{StartServerId: 0}) @@ -389,37 +247,37 @@ func (s) TestGetServers(t *testing.T) { } func (s) TestGetServerSockets(t *testing.T) { - svrID := channelz.RegisterServer(&dummyServer{}, "") - defer channelz.RemoveEntry(svrID) + svrID := channelz.RegisterServer("") + defer channelz.RemoveEntry(svrID.ID) refNames := []string{"listen socket 1", "normal socket 1", "normal socket 2"} - ids := make([]*channelz.Identifier, 3) - ids[0], _ = channelz.RegisterListenSocket(&dummySocket{}, svrID, refNames[0]) - ids[1], _ = channelz.RegisterNormalSocket(&dummySocket{}, svrID, refNames[1]) - ids[2], _ = channelz.RegisterNormalSocket(&dummySocket{}, svrID, refNames[2]) + ids := make([]int64, 3) + ids[0] = channelz.RegisterSocket(&channelz.Socket{SocketType: channelz.SocketTypeListen, Parent: svrID, RefName: refNames[0]}).ID + ids[1] = channelz.RegisterSocket(&channelz.Socket{SocketType: channelz.SocketTypeNormal, Parent: svrID, RefName: refNames[1]}).ID + ids[2] = channelz.RegisterSocket(&channelz.Socket{SocketType: channelz.SocketTypeNormal, Parent: svrID, RefName: refNames[2]}).ID for _, id := range ids { defer channelz.RemoveEntry(id) } svr := newCZServer() ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() - resp, _ := svr.GetServerSockets(ctx, &channelzpb.GetServerSocketsRequest{ServerId: svrID.Int(), StartSocketId: 0}) + resp, _ := svr.GetServerSockets(ctx, &channelzpb.GetServerSocketsRequest{ServerId: svrID.ID, StartSocketId: 0}) if !resp.GetEnd() { t.Fatalf("resp.GetEnd() want: true, got: %v", resp.GetEnd()) } // GetServerSockets only return normal sockets. want := map[int64]string{ - ids[1].Int(): refNames[1], - ids[2].Int(): refNames[2], + ids[1]: refNames[1], + ids[2]: refNames[2], } - if !cmp.Equal(convertSocketRefSliceToMap(resp.GetSocketRef()), want) { - t.Fatalf("GetServerSockets want: %#v, got: %#v", want, resp.GetSocketRef()) + if got := convertSocketRefSliceToMap(resp.GetSocketRef()); !cmp.Equal(got, want) { + t.Fatalf("GetServerSockets want: %#v, got: %#v (resp=%v)", want, got, proto.MarshalTextString(resp)) } for i := 0; i < 50; i++ { - id, _ := channelz.RegisterNormalSocket(&dummySocket{}, svrID, "") - defer channelz.RemoveEntry(id) + id := channelz.RegisterSocket(&channelz.Socket{SocketType: channelz.SocketTypeNormal, Parent: svrID}) + defer channelz.RemoveEntry(id.ID) } - resp, _ = svr.GetServerSockets(ctx, &channelzpb.GetServerSocketsRequest{ServerId: svrID.Int(), StartSocketId: 0}) + resp, _ = svr.GetServerSockets(ctx, &channelzpb.GetServerSocketsRequest{ServerId: svrID.ID, StartSocketId: 0}) if resp.GetEnd() { t.Fatalf("resp.GetEnd() want false, got %v", resp.GetEnd()) } @@ -428,13 +286,13 @@ func (s) TestGetServerSockets(t *testing.T) { // This test makes a GetServerSockets with a non-zero start ID, and expect only // sockets with ID >= the given start ID. func (s) TestGetServerSocketsNonZeroStartID(t *testing.T) { - svrID := channelz.RegisterServer(&dummyServer{}, "") - defer channelz.RemoveEntry(svrID) + svrID := channelz.RegisterServer("test server") + defer channelz.RemoveEntry(svrID.ID) refNames := []string{"listen socket 1", "normal socket 1", "normal socket 2"} - ids := make([]*channelz.Identifier, 3) - ids[0], _ = channelz.RegisterListenSocket(&dummySocket{}, svrID, refNames[0]) - ids[1], _ = channelz.RegisterNormalSocket(&dummySocket{}, svrID, refNames[1]) - ids[2], _ = channelz.RegisterNormalSocket(&dummySocket{}, svrID, refNames[2]) + ids := make([]int64, 3) + ids[0] = channelz.RegisterSocket(&channelz.Socket{SocketType: channelz.SocketTypeListen, Parent: svrID, RefName: refNames[0]}).ID + ids[1] = channelz.RegisterSocket(&channelz.Socket{SocketType: channelz.SocketTypeNormal, Parent: svrID, RefName: refNames[1]}).ID + ids[2] = channelz.RegisterSocket(&channelz.Socket{SocketType: channelz.SocketTypeNormal, Parent: svrID, RefName: refNames[2]}).ID for _, id := range ids { defer channelz.RemoveEntry(id) } @@ -443,14 +301,14 @@ func (s) TestGetServerSocketsNonZeroStartID(t *testing.T) { defer cancel() // Make GetServerSockets with startID = ids[1]+1, so socket-1 won't be // included in the response. - resp, _ := svr.GetServerSockets(ctx, &channelzpb.GetServerSocketsRequest{ServerId: svrID.Int(), StartSocketId: ids[1].Int() + 1}) + resp, _ := svr.GetServerSockets(ctx, &channelzpb.GetServerSocketsRequest{ServerId: svrID.ID, StartSocketId: ids[1] + 1}) if !resp.GetEnd() { t.Fatalf("resp.GetEnd() want: true, got: %v", resp.GetEnd()) } // GetServerSockets only return normal socket-2, socket-1 should be // filtered by start ID. want := map[int64]string{ - ids[2].Int(): refNames[2], + ids[2]: refNames[2], } if !cmp.Equal(convertSocketRefSliceToMap(resp.GetSocketRef()), want) { t.Fatalf("GetServerSockets want: %#v, got: %#v", want, resp.GetSocketRef()) @@ -459,71 +317,68 @@ func (s) TestGetServerSocketsNonZeroStartID(t *testing.T) { func (s) TestGetChannel(t *testing.T) { refNames := []string{"top channel 1", "nested channel 1", "sub channel 2", "nested channel 3"} - ids := make([]*channelz.Identifier, 4) - ids[0] = channelz.RegisterChannel(&dummyChannel{}, nil, refNames[0]) - channelz.AddTraceEvent(logger, ids[0], 0, &channelz.TraceEventDesc{ + cids := make([]*channelz.Channel, 3) + cids[0] = channelz.RegisterChannel(nil, refNames[0]) + channelz.AddTraceEvent(logger, cids[0], 0, &channelz.TraceEvent{ Desc: "Channel Created", Severity: channelz.CtInfo, }) - ids[1] = channelz.RegisterChannel(&dummyChannel{}, ids[0], refNames[1]) - channelz.AddTraceEvent(logger, ids[1], 0, &channelz.TraceEventDesc{ + cids[1] = channelz.RegisterChannel(cids[0], refNames[1]) + channelz.AddTraceEvent(logger, cids[1], 0, &channelz.TraceEvent{ Desc: "Channel Created", Severity: channelz.CtInfo, - Parent: &channelz.TraceEventDesc{ - Desc: fmt.Sprintf("Nested Channel(id:%d) created", ids[1].Int()), + Parent: &channelz.TraceEvent{ + Desc: fmt.Sprintf("Nested Channel(id:%d) created", cids[1].ID), Severity: channelz.CtInfo, }, }) - var err error - ids[2], err = channelz.RegisterSubChannel(&dummyChannel{}, ids[0], refNames[2]) - if err != nil { - t.Fatalf("channelz.RegisterSubChannel() failed: %v", err) - } - channelz.AddTraceEvent(logger, ids[2], 0, &channelz.TraceEventDesc{ + subChan := channelz.RegisterSubChannel(cids[0].ID, refNames[2]) + channelz.AddTraceEvent(logger, subChan, 0, &channelz.TraceEvent{ Desc: "SubChannel Created", Severity: channelz.CtInfo, - Parent: &channelz.TraceEventDesc{ - Desc: fmt.Sprintf("SubChannel(id:%d) created", ids[2].Int()), + Parent: &channelz.TraceEvent{ + Desc: fmt.Sprintf("SubChannel(id:%d) created", subChan.ID), Severity: channelz.CtInfo, }, }) + defer channelz.RemoveEntry(subChan.ID) - ids[3] = channelz.RegisterChannel(&dummyChannel{}, ids[1], refNames[3]) - channelz.AddTraceEvent(logger, ids[3], 0, &channelz.TraceEventDesc{ + cids[2] = channelz.RegisterChannel(cids[1], refNames[3]) + channelz.AddTraceEvent(logger, cids[2], 0, &channelz.TraceEvent{ Desc: "Channel Created", Severity: channelz.CtInfo, - Parent: &channelz.TraceEventDesc{ - Desc: fmt.Sprintf("Nested Channel(id:%d) created", ids[3].Int()), + Parent: &channelz.TraceEvent{ + Desc: fmt.Sprintf("Nested Channel(id:%d) created", cids[2].ID), Severity: channelz.CtInfo, }, }) - channelz.AddTraceEvent(logger, ids[0], 0, &channelz.TraceEventDesc{ + channelz.AddTraceEvent(logger, cids[0], 0, &channelz.TraceEvent{ Desc: fmt.Sprintf("Channel Connectivity change to %v", connectivity.Ready), Severity: channelz.CtInfo, }) - channelz.AddTraceEvent(logger, ids[0], 0, &channelz.TraceEventDesc{ + channelz.AddTraceEvent(logger, cids[0], 0, &channelz.TraceEvent{ Desc: "Resolver returns an empty address list", Severity: channelz.CtWarning, }) - for _, id := range ids { - defer channelz.RemoveEntry(id) + for _, id := range cids { + defer channelz.RemoveEntry(id.ID) } svr := newCZServer() ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() - resp, _ := svr.GetChannel(ctx, &channelzpb.GetChannelRequest{ChannelId: ids[0].Int()}) + resp, _ := svr.GetChannel(ctx, &channelzpb.GetChannelRequest{ChannelId: cids[0].ID}) metrics := resp.GetChannel() subChans := metrics.GetSubchannelRef() - if len(subChans) != 1 || subChans[0].GetName() != refNames[2] || subChans[0].GetSubchannelId() != ids[2].Int() { - t.Fatalf("metrics.GetSubChannelRef() want %#v, got %#v", []*channelzpb.SubchannelRef{{SubchannelId: ids[2].Int(), Name: refNames[2]}}, subChans) + if len(subChans) != 1 || subChans[0].GetName() != refNames[2] || subChans[0].GetSubchannelId() != subChan.ID { + t.Fatalf("metrics.GetSubChannelRef() want %#v, got %#v", []*channelzpb.SubchannelRef{{SubchannelId: subChan.ID, Name: refNames[2]}}, subChans) } nestedChans := metrics.GetChannelRef() - if len(nestedChans) != 1 || nestedChans[0].GetName() != refNames[1] || nestedChans[0].GetChannelId() != ids[1].Int() { - t.Fatalf("metrics.GetChannelRef() want %#v, got %#v", []*channelzpb.ChannelRef{{ChannelId: ids[1].Int(), Name: refNames[1]}}, nestedChans) + if len(nestedChans) != 1 || nestedChans[0].GetName() != refNames[1] || nestedChans[0].GetChannelId() != cids[1].ID { + t.Fatalf("metrics.GetChannelRef() want %#v, got %#v", []*channelzpb.ChannelRef{{ChannelId: cids[1].ID, Name: refNames[1]}}, nestedChans) } trace := metrics.GetData().GetTrace() want := []struct { @@ -533,8 +388,8 @@ func (s) TestGetChannel(t *testing.T) { childRef string }{ {desc: "Channel Created", severity: channelzpb.ChannelTraceEvent_CT_INFO}, - {desc: fmt.Sprintf("Nested Channel(id:%d) created", ids[1].Int()), severity: channelzpb.ChannelTraceEvent_CT_INFO, childID: ids[1].Int(), childRef: refNames[1]}, - {desc: fmt.Sprintf("SubChannel(id:%d) created", ids[2].Int()), severity: channelzpb.ChannelTraceEvent_CT_INFO, childID: ids[2].Int(), childRef: refNames[2]}, + {desc: fmt.Sprintf("Nested Channel(id:%d) created", cids[1].ID), severity: channelzpb.ChannelTraceEvent_CT_INFO, childID: cids[1].ID, childRef: refNames[1]}, + {desc: fmt.Sprintf("SubChannel(id:%d) created", subChan.ID), severity: channelzpb.ChannelTraceEvent_CT_INFO, childID: subChan.ID, childRef: refNames[2]}, {desc: fmt.Sprintf("Channel Connectivity change to %v", connectivity.Ready), severity: channelzpb.ChannelTraceEvent_CT_INFO}, {desc: "Resolver returns an empty address list", severity: channelzpb.ChannelTraceEvent_CT_WARNING}, } @@ -555,11 +410,11 @@ func (s) TestGetChannel(t *testing.T) { } } } - resp, _ = svr.GetChannel(ctx, &channelzpb.GetChannelRequest{ChannelId: ids[1].Int()}) + resp, _ = svr.GetChannel(ctx, &channelzpb.GetChannelRequest{ChannelId: cids[1].ID}) metrics = resp.GetChannel() nestedChans = metrics.GetChannelRef() - if len(nestedChans) != 1 || nestedChans[0].GetName() != refNames[3] || nestedChans[0].GetChannelId() != ids[3].Int() { - t.Fatalf("metrics.GetChannelRef() want %#v, got %#v", []*channelzpb.ChannelRef{{ChannelId: ids[3].Int(), Name: refNames[3]}}, nestedChans) + if len(nestedChans) != 1 || nestedChans[0].GetName() != refNames[3] || nestedChans[0].GetChannelId() != cids[2].ID { + t.Fatalf("metrics.GetChannelRef() want %#v, got %#v", []*channelzpb.ChannelRef{{ChannelId: cids[2].ID, Name: refNames[3]}}, nestedChans) } } @@ -571,46 +426,42 @@ func (s) TestGetSubChannel(t *testing.T) { ) refNames := []string{"top channel 1", "sub channel 1", "socket 1", "socket 2"} - ids := make([]*channelz.Identifier, 4) - ids[0] = channelz.RegisterChannel(&dummyChannel{}, nil, refNames[0]) - channelz.AddTraceEvent(logger, ids[0], 0, &channelz.TraceEventDesc{ + chann := channelz.RegisterChannel(nil, refNames[0]) + defer channelz.RemoveEntry(chann.ID) + channelz.AddTraceEvent(logger, chann, 0, &channelz.TraceEvent{ Desc: "Channel Created", Severity: channelz.CtInfo, }) - var err error - ids[1], err = channelz.RegisterSubChannel(&dummyChannel{}, ids[0], refNames[1]) - if err != nil { - t.Fatalf("channelz.RegisterSubChannel() failed: %v", err) - } - channelz.AddTraceEvent(logger, ids[1], 0, &channelz.TraceEventDesc{ + subChan := channelz.RegisterSubChannel(chann.ID, refNames[1]) + defer channelz.RemoveEntry(subChan.ID) + channelz.AddTraceEvent(logger, subChan, 0, &channelz.TraceEvent{ Desc: subchanCreated, Severity: channelz.CtInfo, - Parent: &channelz.TraceEventDesc{ - Desc: fmt.Sprintf("Nested Channel(id:%d) created", ids[0].Int()), + Parent: &channelz.TraceEvent{ + Desc: fmt.Sprintf("Nested Channel(id:%d) created", chann.ID), Severity: channelz.CtInfo, }, }) - ids[2], _ = channelz.RegisterNormalSocket(&dummySocket{}, ids[1], refNames[2]) - ids[3], _ = channelz.RegisterNormalSocket(&dummySocket{}, ids[1], refNames[3]) - channelz.AddTraceEvent(logger, ids[1], 0, &channelz.TraceEventDesc{ + skt1 := channelz.RegisterSocket(&channelz.Socket{SocketType: channelz.SocketTypeNormal, Parent: subChan, RefName: refNames[2]}) + defer channelz.RemoveEntry(skt1.ID) + skt2 := channelz.RegisterSocket(&channelz.Socket{SocketType: channelz.SocketTypeNormal, Parent: subChan, RefName: refNames[3]}) + defer channelz.RemoveEntry(skt2.ID) + channelz.AddTraceEvent(logger, subChan, 0, &channelz.TraceEvent{ Desc: subchanConnectivityChange, Severity: channelz.CtInfo, }) - channelz.AddTraceEvent(logger, ids[1], 0, &channelz.TraceEventDesc{ + channelz.AddTraceEvent(logger, subChan, 0, &channelz.TraceEvent{ Desc: subChanPickNewAddress, Severity: channelz.CtInfo, }) - for _, id := range ids { - defer channelz.RemoveEntry(id) - } svr := newCZServer() ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() - resp, _ := svr.GetSubchannel(ctx, &channelzpb.GetSubchannelRequest{SubchannelId: ids[1].Int()}) + resp, _ := svr.GetSubchannel(ctx, &channelzpb.GetSubchannelRequest{SubchannelId: subChan.ID}) metrics := resp.GetSubchannel() want := map[int64]string{ - ids[2].Int(): refNames[2], - ids[3].Int(): refNames[3], + skt1.ID: refNames[2], + skt2.ID: refNames[3], } if !cmp.Equal(convertSocketRefSliceToMap(metrics.GetSocketRef()), want) { t.Fatalf("metrics.GetSocketRef() want %#v: got: %#v", want, metrics.GetSocketRef()) @@ -645,100 +496,270 @@ func (s) TestGetSubChannel(t *testing.T) { } } +type czSocket struct { + streamsStarted int64 + streamsSucceeded int64 + streamsFailed int64 + messagesSent int64 + messagesReceived int64 + keepAlivesSent int64 + lastLocalStreamCreatedTimestamp time.Time + lastRemoteStreamCreatedTimestamp time.Time + lastMessageSentTimestamp time.Time + lastMessageReceivedTimestamp time.Time + localFlowControlWindow int64 + remoteFlowControlWindow int64 + + localAddr net.Addr + remoteAddr net.Addr + remoteName string + socketOptions *channelz.SocketOptionData + security credentials.ChannelzSecurityValue +} + +func newSocket(cs czSocket) *channelz.Socket { + if cs.lastLocalStreamCreatedTimestamp.IsZero() { + cs.lastLocalStreamCreatedTimestamp = time.Unix(0, 0) + } + if cs.lastRemoteStreamCreatedTimestamp.IsZero() { + cs.lastRemoteStreamCreatedTimestamp = time.Unix(0, 0) + } + if cs.lastMessageSentTimestamp.IsZero() { + cs.lastMessageSentTimestamp = time.Unix(0, 0) + } + if cs.lastMessageReceivedTimestamp.IsZero() { + cs.lastMessageReceivedTimestamp = time.Unix(0, 0) + } + + s := &channelz.Socket{ + LocalAddr: cs.localAddr, + RemoteAddr: cs.remoteAddr, + RemoteName: cs.remoteName, + SocketOptions: cs.socketOptions, + Security: cs.security, + } + s.SocketMetrics.StreamsStarted.Store(cs.streamsStarted) + s.SocketMetrics.StreamsSucceeded.Store(cs.streamsSucceeded) + s.SocketMetrics.StreamsFailed.Store(cs.streamsFailed) + s.SocketMetrics.MessagesSent.Store(cs.messagesSent) + s.SocketMetrics.MessagesReceived.Store(cs.messagesReceived) + s.SocketMetrics.KeepAlivesSent.Store(cs.keepAlivesSent) + s.SocketMetrics.LastLocalStreamCreatedTimestamp.Store(cs.lastLocalStreamCreatedTimestamp.UnixNano()) + s.SocketMetrics.LastRemoteStreamCreatedTimestamp.Store(cs.lastRemoteStreamCreatedTimestamp.UnixNano()) + s.SocketMetrics.LastMessageSentTimestamp.Store(cs.lastMessageSentTimestamp.UnixNano()) + s.SocketMetrics.LastMessageReceivedTimestamp.Store(cs.lastMessageReceivedTimestamp.UnixNano()) + s.EphemeralMetrics = func() *channelz.EphemeralSocketMetrics { + return &channelz.EphemeralSocketMetrics{ + LocalFlowControlWindow: cs.localFlowControlWindow, + RemoteFlowControlWindow: cs.remoteFlowControlWindow, + } + } + return s +} + func (s) TestGetSocket(t *testing.T) { - ss := []*dummySocket{ - { - streamsStarted: 10, - streamsSucceeded: 2, - streamsFailed: 3, - messagesSent: 20, - messagesReceived: 10, - keepAlivesSent: 2, - lastLocalStreamCreatedTimestamp: time.Now().UTC(), - lastRemoteStreamCreatedTimestamp: time.Now().UTC(), - lastMessageSentTimestamp: time.Now().UTC(), - lastMessageReceivedTimestamp: time.Now().UTC(), - localFlowControlWindow: 65536, - remoteFlowControlWindow: 1024, - localAddr: &net.TCPAddr{IP: net.ParseIP("1.0.0.1"), Port: 10001}, - remoteAddr: &net.TCPAddr{IP: net.ParseIP("12.0.0.1"), Port: 10002}, - remoteName: "remote.remote", + ss := []*channelz.Socket{newSocket(czSocket{ + streamsStarted: 10, + streamsSucceeded: 2, + streamsFailed: 3, + messagesSent: 20, + messagesReceived: 10, + keepAlivesSent: 2, + lastLocalStreamCreatedTimestamp: time.Unix(0, 0), + lastRemoteStreamCreatedTimestamp: time.Unix(1, 0), + lastMessageSentTimestamp: time.Unix(2, 0), + lastMessageReceivedTimestamp: time.Unix(3, 0), + localFlowControlWindow: 65536, + remoteFlowControlWindow: 1024, + localAddr: &net.TCPAddr{IP: net.ParseIP("1.0.0.1"), Port: 10001}, + remoteAddr: &net.TCPAddr{IP: net.ParseIP("12.0.0.1"), Port: 10002}, + remoteName: "remote.remote", + }), newSocket(czSocket{ + streamsStarted: 10, + streamsSucceeded: 2, + streamsFailed: 3, + messagesSent: 20, + messagesReceived: 10, + keepAlivesSent: 2, + lastLocalStreamCreatedTimestamp: time.Unix(0, 0), + lastRemoteStreamCreatedTimestamp: time.Unix(5, 0), + lastMessageSentTimestamp: time.Unix(6, 0), + lastMessageReceivedTimestamp: time.Unix(7, 0), + localFlowControlWindow: 65536, + remoteFlowControlWindow: 1024, + localAddr: &net.UnixAddr{Name: "file.path", Net: "unix"}, + remoteAddr: &net.UnixAddr{Name: "another.path", Net: "unix"}, + remoteName: "remote.remote", + }), newSocket(czSocket{ + streamsStarted: 5, + streamsSucceeded: 2, + streamsFailed: 3, + messagesSent: 20, + messagesReceived: 10, + keepAlivesSent: 2, + lastLocalStreamCreatedTimestamp: time.Unix(10, 10), + lastRemoteStreamCreatedTimestamp: time.Unix(0, 0), + lastMessageSentTimestamp: time.Unix(0, 0), + lastMessageReceivedTimestamp: time.Unix(0, 0), + localFlowControlWindow: 65536, + remoteFlowControlWindow: 10240, + localAddr: &net.IPAddr{IP: net.ParseIP("1.0.0.1")}, + remoteAddr: &net.IPAddr{IP: net.ParseIP("9.0.0.1")}, + remoteName: "", + }), newSocket(czSocket{ + localAddr: &net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: 10001}, + }), newSocket(czSocket{ + security: &credentials.TLSChannelzSecurityValue{ + StandardName: "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", + RemoteCertificate: []byte{48, 130, 2, 156, 48, 130, 2, 5, 160}, }, - { - streamsStarted: 10, - streamsSucceeded: 2, - streamsFailed: 3, - messagesSent: 20, - messagesReceived: 10, - keepAlivesSent: 2, - lastRemoteStreamCreatedTimestamp: time.Now().UTC(), - lastMessageSentTimestamp: time.Now().UTC(), - lastMessageReceivedTimestamp: time.Now().UTC(), - localFlowControlWindow: 65536, - remoteFlowControlWindow: 1024, - localAddr: &net.UnixAddr{Name: "file.path", Net: "unix"}, - remoteAddr: &net.UnixAddr{Name: "another.path", Net: "unix"}, - remoteName: "remote.remote", - }, - { - streamsStarted: 5, - streamsSucceeded: 2, - streamsFailed: 3, - messagesSent: 20, - messagesReceived: 10, - keepAlivesSent: 2, - lastLocalStreamCreatedTimestamp: time.Now().UTC(), - lastMessageSentTimestamp: time.Now().UTC(), - lastMessageReceivedTimestamp: time.Now().UTC(), - localFlowControlWindow: 65536, - remoteFlowControlWindow: 10240, - localAddr: &net.IPAddr{IP: net.ParseIP("1.0.0.1")}, - remoteAddr: &net.IPAddr{IP: net.ParseIP("9.0.0.1")}, - remoteName: "", + }), newSocket(czSocket{ + security: &credentials.OtherChannelzSecurityValue{ + Name: "XXXX", }, - { - localAddr: &net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: 10001}, - }, - { - security: &credentials.TLSChannelzSecurityValue{ - StandardName: "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", - RemoteCertificate: []byte{48, 130, 2, 156, 48, 130, 2, 5, 160}, - }, - }, - { - security: &credentials.OtherChannelzSecurityValue{ - Name: "XXXX", - }, - }, - { - security: &credentials.OtherChannelzSecurityValue{ - Name: "YYYY", - Value: &OtherSecurityValue{LocalCertificate: []byte{1, 2, 3}, RemoteCertificate: []byte{4, 5, 6}}, - }, + }), newSocket(czSocket{ + security: &credentials.OtherChannelzSecurityValue{ + Name: "YYYY", + Value: &OtherSecurityValue{LocalCertificate: []byte{1, 2, 3}, RemoteCertificate: []byte{4, 5, 6}}, }, + }), + } + otherSecVal, err := ptypes.MarshalAny(ss[6].Security.(*credentials.OtherChannelzSecurityValue).Value) + if err != nil { + t.Fatal("Error marshalling proto:", err) } + svr := newCZServer() - ids := make([]*channelz.Identifier, len(ss)) - svrID := channelz.RegisterServer(&dummyServer{}, "") - defer channelz.RemoveEntry(svrID) + skts := make([]*channelz.Socket, len(ss)) + svrID := channelz.RegisterServer("") + defer channelz.RemoveEntry(svrID.ID) for i, s := range ss { - ids[i], _ = channelz.RegisterNormalSocket(s, svrID, strconv.Itoa(i)) - defer channelz.RemoveEntry(ids[i]) + s.Parent = svrID + s.RefName = strconv.Itoa(i) + skts[i] = channelz.RegisterSocket(s) + defer channelz.RemoveEntry(skts[i].ID) } ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() - for i, s := range ss { - resp, _ := svr.GetSocket(ctx, &channelzpb.GetSocketRequest{SocketId: ids[i].Int()}) - got, want := resp.GetSocket().GetRef(), &channelzpb.SocketRef{SocketId: ids[i].Int(), Name: strconv.Itoa(i)} - if !cmp.Equal(got, want, protocmp.Transform()) { - t.Fatalf("resp.GetSocket() returned metrics.GetRef() = %#v, want %#v", got, want) + + emptyData := `data: { + last_local_stream_created_timestamp: {seconds: 0 nanos: 0} + last_remote_stream_created_timestamp: {seconds: 0 nanos: 0} + last_message_sent_timestamp: {seconds: 0 nanos: 0} + last_message_received_timestamp: {seconds: 0 nanos: 0} + local_flow_control_window: { value: 0 } + remote_flow_control_window: { value: 0 } + }` + want := []string{` + ref: {socket_id: ` + fmt.Sprint(skts[0].ID) + ` name: "0" } + data: { + streams_started: 10 + streams_succeeded: 2 + streams_failed: 3 + messages_sent: 20 + messages_received: 10 + keep_alives_sent: 2 + last_local_stream_created_timestamp: {seconds: 0 nanos: 0} + last_remote_stream_created_timestamp: {seconds: 1 nanos: 0} + last_message_sent_timestamp: {seconds: 2 nanos: 0} + last_message_received_timestamp: {seconds: 3 nanos: 0} + local_flow_control_window: { value: 65536 } + remote_flow_control_window: { value: 1024 } } - socket, err := socketProtoToStruct(resp.GetSocket()) - if err != nil { - t.Fatal(err) + local: { tcpip_address: { ip_address: "` + addr(skts[0].LocalAddr) + `" port: 10001 } } + remote: { tcpip_address: { ip_address: "` + addr(skts[0].RemoteAddr) + `" port: 10002 } } + remote_name: "remote.remote"`, + ` + ref: {socket_id: ` + fmt.Sprint(skts[1].ID) + ` name: "1" } + data: { + streams_started: 10 + streams_succeeded: 2 + streams_failed: 3 + messages_sent: 20 + messages_received: 10 + keep_alives_sent: 2 + last_local_stream_created_timestamp: {seconds: 0 nanos: 0} + last_remote_stream_created_timestamp: {seconds: 5 nanos: 0} + last_message_sent_timestamp: {seconds: 6 nanos: 0} + last_message_received_timestamp: {seconds: 7 nanos: 0} + local_flow_control_window: { value: 65536 } + remote_flow_control_window: { value: 1024 } } - if diff := cmp.Diff(s, socket, protocmp.Transform(), cmp.AllowUnexported(dummySocket{})); diff != "" { - t.Fatalf("unexpected socket, diff (-want +got):\n%s", diff) + local: { uds_address { filename: "file.path" } } + remote: { uds_address { filename: "another.path" } } + remote_name: "remote.remote"`, + ` + ref: {socket_id: ` + fmt.Sprint(skts[2].ID) + ` name: "2" } + data: { + streams_started: 5 + streams_succeeded: 2 + streams_failed: 3 + messages_sent: 20 + messages_received: 10 + keep_alives_sent: 2 + last_local_stream_created_timestamp: {seconds: 10 nanos: 10} + last_remote_stream_created_timestamp: {seconds: 0 nanos: 0} + last_message_sent_timestamp: {seconds: 0 nanos: 0} + last_message_received_timestamp: {seconds: 0 nanos: 0} + local_flow_control_window: { value: 65536 } + remote_flow_control_window: { value: 10240 } } + local: { tcpip_address: { ip_address: "` + addr(skts[2].LocalAddr) + `" } } + remote: { tcpip_address: { ip_address: "` + addr(skts[2].RemoteAddr) + `" } } + remote_name: ""`, + ` + ref: {socket_id: ` + fmt.Sprint(skts[3].ID) + ` name: "3" } + local: { tcpip_address: { ip_address: "` + addr(skts[3].LocalAddr) + `" port: 10001 } } + ` + emptyData, + ` + ref: {socket_id: ` + fmt.Sprint(skts[4].ID) + ` name: "4" } + security: { tls: { + standard_name: "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256" + remote_certificate: "\x30\x82\x02\x9c\x30\x82\x02\x05\xa0" + } } + ` + emptyData, + ` + ref: {socket_id: ` + fmt.Sprint(skts[5].ID) + ` name: "5" } + security: { other: { name: "XXXX" } } + ` + emptyData, + ` + ref: {socket_id: ` + fmt.Sprint(skts[6].ID) + ` name: "6" } + security: { other: { + name: "YYYY" + value: { + type_url: "type.googleapis.com/grpc.credentials.OtherChannelzSecurityValue" + value: "` + escape(otherSecVal.Value) + `" + } + } } + ` + emptyData, + } + + for i := range ss { + resp, _ := svr.GetSocket(ctx, &channelzpb.GetSocketRequest{SocketId: skts[i].ID}) + w := &channelzpb.Socket{} + if err := proto.UnmarshalText(want[i], w); err != nil { + t.Fatalf("Error unmarshalling %q: %v", want[i], err) + } + if diff := cmp.Diff(resp.GetSocket(), w, protocmp.Transform()); diff != "" { + t.Fatalf("Socket %v did not match expected. -got +want: %v", i, diff) + } + } +} + +func escape(bs []byte) string { + ret := "" + for _, b := range bs { + ret += fmt.Sprintf("\\x%02x", b) + } + return ret +} + +func addr(a net.Addr) string { + switch a := a.(type) { + case *net.TCPAddr: + return string(a.IP) + case *net.IPAddr: + return string(a.IP) } + return "" } diff --git a/channelz/service/util_sktopt_386_test.go b/channelz/service/util_sktopt_386_test.go deleted file mode 100644 index 3ba3dc96e7c6..000000000000 --- a/channelz/service/util_sktopt_386_test.go +++ /dev/null @@ -1,34 +0,0 @@ -//go:build 386 && linux -// +build 386,linux - -/* - * - * Copyright 2018 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package service - -import ( - "golang.org/x/sys/unix" - channelzpb "google.golang.org/grpc/channelz/grpc_channelz_v1" -) - -func protoToTime(protoTime *channelzpb.SocketOptionTimeout) *unix.Timeval { - timeout := &unix.Timeval{} - sec, usec := convertToDuration(protoTime.GetDuration()) - timeout.Sec, timeout.Usec = int32(sec), int32(usec) - return timeout -} diff --git a/channelz/service/util_sktopt_amd64_test.go b/channelz/service/util_sktopt_amd64_test.go deleted file mode 100644 index 124d7b758199..000000000000 --- a/channelz/service/util_sktopt_amd64_test.go +++ /dev/null @@ -1,33 +0,0 @@ -//go:build amd64 && linux -// +build amd64,linux - -/* - * - * Copyright 2018 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package service - -import ( - "golang.org/x/sys/unix" - channelzpb "google.golang.org/grpc/channelz/grpc_channelz_v1" -) - -func protoToTime(protoTime *channelzpb.SocketOptionTimeout) *unix.Timeval { - timeout := &unix.Timeval{} - timeout.Sec, timeout.Usec = convertToDuration(protoTime.GetDuration()) - return timeout -} diff --git a/clientconn.go b/clientconn.go index 626ea20ff6e8..1565f3b11a9e 100644 --- a/clientconn.go +++ b/clientconn.go @@ -128,7 +128,6 @@ func NewClient(target string, opts ...DialOption) (conn *ClientConn, err error) target: target, conns: make(map[*addrConn]struct{}), dopts: defaultDialOptions(), - czData: new(channelzData), } cc.retryThrottler.Store((*retryThrottler)(nil)) @@ -180,15 +179,15 @@ func NewClient(target string, opts ...DialOption) (conn *ClientConn, err error) // Determine the resolver to use. if err := cc.parseTargetAndFindResolver(); err != nil { - channelz.RemoveEntry(cc.channelzID) + channelz.RemoveEntry(cc.channelz.ID) return nil, err } if err = cc.determineAuthority(); err != nil { - channelz.RemoveEntry(cc.channelzID) + channelz.RemoveEntry(cc.channelz.ID) return nil, err } - cc.csMgr = newConnectivityStateManager(cc.ctx, cc.channelzID) + cc.csMgr = newConnectivityStateManager(cc.ctx, cc.channelz) cc.pickerWrapper = newPickerWrapper(cc.dopts.copts.StatsHandlers) cc.initIdleStateLocked() // Safe to call without the lock, since nothing else has a reference to cc. @@ -293,17 +292,17 @@ func DialContext(ctx context.Context, target string, opts ...DialOption) (conn * // addTraceEvent is a helper method to add a trace event on the channel. If the // channel is a nested one, the same event is also added on the parent channel. func (cc *ClientConn) addTraceEvent(msg string) { - ted := &channelz.TraceEventDesc{ + ted := &channelz.TraceEvent{ Desc: fmt.Sprintf("Channel %s", msg), Severity: channelz.CtInfo, } - if cc.dopts.channelzParentID != nil { - ted.Parent = &channelz.TraceEventDesc{ - Desc: fmt.Sprintf("Nested channel(id:%d) %s", cc.channelzID.Int(), msg), + if cc.dopts.channelzParent != nil { + ted.Parent = &channelz.TraceEvent{ + Desc: fmt.Sprintf("Nested channel(id:%d) %s", cc.channelz.ID, msg), Severity: channelz.CtInfo, } } - channelz.AddTraceEvent(logger, cc.channelzID, 0, ted) + channelz.AddTraceEvent(logger, cc.channelz, 0, ted) } type idler ClientConn @@ -420,14 +419,15 @@ func (cc *ClientConn) validateTransportCredentials() error { } // channelzRegistration registers the newly created ClientConn with channelz and -// stores the returned identifier in `cc.channelzID` and `cc.csMgr.channelzID`. -// A channelz trace event is emitted for ClientConn creation. If the newly -// created ClientConn is a nested one, i.e a valid parent ClientConn ID is -// specified via a dial option, the trace event is also added to the parent. +// stores the returned identifier in `cc.channelz`. A channelz trace event is +// emitted for ClientConn creation. If the newly created ClientConn is a nested +// one, i.e a valid parent ClientConn ID is specified via a dial option, the +// trace event is also added to the parent. // // Doesn't grab cc.mu as this method is expected to be called only at Dial time. func (cc *ClientConn) channelzRegistration(target string) { - cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, cc.dopts.channelzParentID, target) + parentChannel, _ := cc.dopts.channelzParent.(*channelz.Channel) + cc.channelz = channelz.RegisterChannel(parentChannel, target) cc.addTraceEvent("created") } @@ -494,11 +494,11 @@ func getChainStreamer(interceptors []StreamClientInterceptor, curr int, finalStr } // newConnectivityStateManager creates an connectivityStateManager with -// the specified id. -func newConnectivityStateManager(ctx context.Context, id *channelz.Identifier) *connectivityStateManager { +// the specified channel. +func newConnectivityStateManager(ctx context.Context, channel *channelz.Channel) *connectivityStateManager { return &connectivityStateManager{ - channelzID: id, - pubSub: grpcsync.NewPubSub(ctx), + channelz: channel, + pubSub: grpcsync.NewPubSub(ctx), } } @@ -512,7 +512,7 @@ type connectivityStateManager struct { mu sync.Mutex state connectivity.State notifyChan chan struct{} - channelzID *channelz.Identifier + channelz *channelz.Channel pubSub *grpcsync.PubSub } @@ -531,7 +531,7 @@ func (csm *connectivityStateManager) updateState(state connectivity.State) { csm.state = state csm.pubSub.Publish(state) - channelz.Infof(logger, csm.channelzID, "Channel Connectivity change to %v", state) + channelz.Infof(logger, csm.channelz, "Channel Connectivity change to %v", state) if csm.notifyChan != nil { // There are other goroutines waiting on this channel. close(csm.notifyChan) @@ -585,12 +585,12 @@ type ClientConn struct { cancel context.CancelFunc // Cancelled on close. // The following are initialized at dial time, and are read-only after that. - target string // User's dial target. - parsedTarget resolver.Target // See parseTargetAndFindResolver(). - authority string // See determineAuthority(). - dopts dialOptions // Default and user specified dial options. - channelzID *channelz.Identifier // Channelz identifier for the channel. - resolverBuilder resolver.Builder // See parseTargetAndFindResolver(). + target string // User's dial target. + parsedTarget resolver.Target // See parseTargetAndFindResolver(). + authority string // See determineAuthority(). + dopts dialOptions // Default and user specified dial options. + channelz *channelz.Channel // Channelz object. + resolverBuilder resolver.Builder // See parseTargetAndFindResolver(). idlenessMgr *idle.Manager // The following provide their own synchronization, and therefore don't @@ -598,7 +598,6 @@ type ClientConn struct { csMgr *connectivityStateManager pickerWrapper *pickerWrapper safeConfigSelector iresolver.SafeConfigSelector - czData *channelzData retryThrottler atomic.Value // Updated from service config. // mu protects the following fields. @@ -746,7 +745,7 @@ func (cc *ClientConn) updateResolverStateAndUnlock(s resolver.State, err error) var ret error if cc.dopts.disableServiceConfig { - channelz.Infof(logger, cc.channelzID, "ignoring service config from resolver (%v) and applying the default because service config is disabled", s.ServiceConfig) + channelz.Infof(logger, cc.channelz, "ignoring service config from resolver (%v) and applying the default because service config is disabled", s.ServiceConfig) cc.maybeApplyDefaultServiceConfig() } else if s.ServiceConfig == nil { cc.maybeApplyDefaultServiceConfig() @@ -757,7 +756,7 @@ func (cc *ClientConn) updateResolverStateAndUnlock(s resolver.State, err error) configSelector := iresolver.GetConfigSelector(s) if configSelector != nil { if len(s.ServiceConfig.Config.(*ServiceConfig).Methods) != 0 { - channelz.Infof(logger, cc.channelzID, "method configs in service config will be ignored due to presence of config selector") + channelz.Infof(logger, cc.channelz, "method configs in service config will be ignored due to presence of config selector") } } else { configSelector = &defaultConfigSelector{sc} @@ -836,22 +835,17 @@ func (cc *ClientConn) newAddrConnLocked(addrs []resolver.Address, opts balancer. addrs: copyAddressesWithoutBalancerAttributes(addrs), scopts: opts, dopts: cc.dopts, - czData: new(channelzData), + channelz: channelz.RegisterSubChannel(cc.channelz.ID, ""), resetBackoff: make(chan struct{}), stateChan: make(chan struct{}), } ac.ctx, ac.cancel = context.WithCancel(cc.ctx) - var err error - ac.channelzID, err = channelz.RegisterSubChannel(ac, cc.channelzID, "") - if err != nil { - return nil, err - } - channelz.AddTraceEvent(logger, ac.channelzID, 0, &channelz.TraceEventDesc{ + channelz.AddTraceEvent(logger, ac.channelz, 0, &channelz.TraceEvent{ Desc: "Subchannel created", Severity: channelz.CtInfo, - Parent: &channelz.TraceEventDesc{ - Desc: fmt.Sprintf("Subchannel(id:%d) created", ac.channelzID.Int()), + Parent: &channelz.TraceEvent{ + Desc: fmt.Sprintf("Subchannel(id:%d) created", ac.channelz.ID), Severity: channelz.CtInfo, }, }) @@ -874,17 +868,6 @@ func (cc *ClientConn) removeAddrConn(ac *addrConn, err error) { ac.tearDown(err) } -func (cc *ClientConn) channelzMetric() *channelz.ChannelInternalMetric { - return &channelz.ChannelInternalMetric{ - State: cc.GetState(), - Target: cc.target, - CallsStarted: atomic.LoadInt64(&cc.czData.callsStarted), - CallsSucceeded: atomic.LoadInt64(&cc.czData.callsSucceeded), - CallsFailed: atomic.LoadInt64(&cc.czData.callsFailed), - LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&cc.czData.lastCallStartedTime)), - } -} - // Target returns the target string of the ClientConn. func (cc *ClientConn) Target() string { return cc.target @@ -896,16 +879,16 @@ func (cc *ClientConn) CanonicalTarget() string { } func (cc *ClientConn) incrCallsStarted() { - atomic.AddInt64(&cc.czData.callsStarted, 1) - atomic.StoreInt64(&cc.czData.lastCallStartedTime, time.Now().UnixNano()) + cc.channelz.ChannelMetrics.CallsStarted.Add(1) + cc.channelz.ChannelMetrics.LastCallStartedTimestamp.Store(time.Now().UnixNano()) } func (cc *ClientConn) incrCallsSucceeded() { - atomic.AddInt64(&cc.czData.callsSucceeded, 1) + cc.channelz.ChannelMetrics.CallsSucceeded.Add(1) } func (cc *ClientConn) incrCallsFailed() { - atomic.AddInt64(&cc.czData.callsFailed, 1) + cc.channelz.ChannelMetrics.CallsFailed.Add(1) } // connect starts creating a transport. @@ -949,7 +932,7 @@ func equalAddresses(a, b []resolver.Address) bool { // connections or connection attempts. func (ac *addrConn) updateAddrs(addrs []resolver.Address) { ac.mu.Lock() - channelz.Infof(logger, ac.channelzID, "addrConn: updateAddrs curAddr: %v, addrs: %v", pretty.ToJSON(ac.curAddr), pretty.ToJSON(addrs)) + channelz.Infof(logger, ac.channelz, "addrConn: updateAddrs curAddr: %v, addrs: %v", pretty.ToJSON(ac.curAddr), pretty.ToJSON(addrs)) addrs = copyAddressesWithoutBalancerAttributes(addrs) if equalAddresses(ac.addrs, addrs) { @@ -1176,7 +1159,7 @@ func (cc *ClientConn) Close() error { // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add // trace reference to the entity being deleted, and thus prevent it from being // deleted right away. - channelz.RemoveEntry(cc.channelzID) + channelz.RemoveEntry(cc.channelz.ID) return nil } @@ -1208,8 +1191,7 @@ type addrConn struct { backoffIdx int // Needs to be stateful for resetConnectBackoff. resetBackoff chan struct{} - channelzID *channelz.Identifier - czData *channelzData + channelz *channelz.SubChannel } // Note: this requires a lock on ac.mu. @@ -1222,9 +1204,9 @@ func (ac *addrConn) updateConnectivityState(s connectivity.State, lastErr error) ac.stateChan = make(chan struct{}) ac.state = s if lastErr == nil { - channelz.Infof(logger, ac.channelzID, "Subchannel Connectivity change to %v", s) + channelz.Infof(logger, ac.channelz, "Subchannel Connectivity change to %v", s) } else { - channelz.Infof(logger, ac.channelzID, "Subchannel Connectivity change to %v, last error: %s", s, lastErr) + channelz.Infof(logger, ac.channelz, "Subchannel Connectivity change to %v, last error: %s", s, lastErr) } ac.acbw.updateState(s, lastErr) } @@ -1337,7 +1319,7 @@ func (ac *addrConn) tryAllAddrs(ctx context.Context, addrs []resolver.Address, c } ac.mu.Unlock() - channelz.Infof(logger, ac.channelzID, "Subchannel picks a new address %q to connect", addr.Addr) + channelz.Infof(logger, ac.channelz, "Subchannel picks a new address %q to connect", addr.Addr) err := ac.createTransport(ctx, addr, copts, connectDeadline) if err == nil { @@ -1390,7 +1372,7 @@ func (ac *addrConn) createTransport(ctx context.Context, addr resolver.Address, connectCtx, cancel := context.WithDeadline(ctx, connectDeadline) defer cancel() - copts.ChannelzParentID = ac.channelzID + copts.ChannelzParent = ac.channelz newTr, err := transport.NewClientTransport(connectCtx, ac.cc.ctx, addr, copts, onClose) if err != nil { @@ -1399,7 +1381,7 @@ func (ac *addrConn) createTransport(ctx context.Context, addr resolver.Address, } // newTr is either nil, or closed. hcancel() - channelz.Warningf(logger, ac.channelzID, "grpc: addrConn.createTransport failed to connect to %s. Err: %v", addr, err) + channelz.Warningf(logger, ac.channelz, "grpc: addrConn.createTransport failed to connect to %s. Err: %v", addr, err) return err } @@ -1471,7 +1453,7 @@ func (ac *addrConn) startHealthCheck(ctx context.Context) { // The health package is not imported to set health check function. // // TODO: add a link to the health check doc in the error message. - channelz.Error(logger, ac.channelzID, "Health check is requested but health check function is not set.") + channelz.Error(logger, ac.channelz, "Health check is requested but health check function is not set.") return } @@ -1501,9 +1483,9 @@ func (ac *addrConn) startHealthCheck(ctx context.Context) { err := ac.cc.dopts.healthCheckFunc(ctx, newStream, setConnectivityState, healthCheckConfig.ServiceName) if err != nil { if status.Code(err) == codes.Unimplemented { - channelz.Error(logger, ac.channelzID, "Subchannel health check is unimplemented at server side, thus health check is disabled") + channelz.Error(logger, ac.channelz, "Subchannel health check is unimplemented at server side, thus health check is disabled") } else { - channelz.Errorf(logger, ac.channelzID, "Health checking failed: %v", err) + channelz.Errorf(logger, ac.channelz, "Health checking failed: %v", err) } } }() @@ -1568,18 +1550,18 @@ func (ac *addrConn) tearDown(err error) { ac.cancel() ac.curAddr = resolver.Address{} - channelz.AddTraceEvent(logger, ac.channelzID, 0, &channelz.TraceEventDesc{ + channelz.AddTraceEvent(logger, ac.channelz, 0, &channelz.TraceEvent{ Desc: "Subchannel deleted", Severity: channelz.CtInfo, - Parent: &channelz.TraceEventDesc{ - Desc: fmt.Sprintf("Subchannel(id:%d) deleted", ac.channelzID.Int()), + Parent: &channelz.TraceEvent{ + Desc: fmt.Sprintf("Subchannel(id:%d) deleted", ac.channelz.ID), Severity: channelz.CtInfo, }, }) // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add // trace reference to the entity being deleted, and thus prevent it from // being deleted right away. - channelz.RemoveEntry(ac.channelzID) + channelz.RemoveEntry(ac.channelz.ID) ac.mu.Unlock() // We have to release the lock before the call to GracefulClose/Close here @@ -1606,39 +1588,6 @@ func (ac *addrConn) tearDown(err error) { } } -func (ac *addrConn) getState() connectivity.State { - ac.mu.Lock() - defer ac.mu.Unlock() - return ac.state -} - -func (ac *addrConn) ChannelzMetric() *channelz.ChannelInternalMetric { - ac.mu.Lock() - addr := ac.curAddr.Addr - ac.mu.Unlock() - return &channelz.ChannelInternalMetric{ - State: ac.getState(), - Target: addr, - CallsStarted: atomic.LoadInt64(&ac.czData.callsStarted), - CallsSucceeded: atomic.LoadInt64(&ac.czData.callsSucceeded), - CallsFailed: atomic.LoadInt64(&ac.czData.callsFailed), - LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&ac.czData.lastCallStartedTime)), - } -} - -func (ac *addrConn) incrCallsStarted() { - atomic.AddInt64(&ac.czData.callsStarted, 1) - atomic.StoreInt64(&ac.czData.lastCallStartedTime, time.Now().UnixNano()) -} - -func (ac *addrConn) incrCallsSucceeded() { - atomic.AddInt64(&ac.czData.callsSucceeded, 1) -} - -func (ac *addrConn) incrCallsFailed() { - atomic.AddInt64(&ac.czData.callsFailed, 1) -} - type retryThrottler struct { max float64 thresh float64 @@ -1676,12 +1625,17 @@ func (rt *retryThrottler) successfulRPC() { } } -type channelzChannel struct { - cc *ClientConn +func (ac *addrConn) incrCallsStarted() { + ac.channelz.ChannelMetrics.CallsStarted.Add(1) + ac.channelz.ChannelMetrics.LastCallStartedTimestamp.Store(time.Now().UnixNano()) } -func (c *channelzChannel) ChannelzMetric() *channelz.ChannelInternalMetric { - return c.cc.channelzMetric() +func (ac *addrConn) incrCallsSucceeded() { + ac.channelz.ChannelMetrics.CallsSucceeded.Add(1) +} + +func (ac *addrConn) incrCallsFailed() { + ac.channelz.ChannelMetrics.CallsFailed.Add(1) } // ErrClientConnTimeout indicates that the ClientConn cannot establish the @@ -1723,14 +1677,14 @@ func (cc *ClientConn) connectionError() error { // // Doesn't grab cc.mu as this method is expected to be called only at Dial time. func (cc *ClientConn) parseTargetAndFindResolver() error { - channelz.Infof(logger, cc.channelzID, "original dial target is: %q", cc.target) + channelz.Infof(logger, cc.channelz, "original dial target is: %q", cc.target) var rb resolver.Builder parsedTarget, err := parseTarget(cc.target) if err != nil { - channelz.Infof(logger, cc.channelzID, "dial target %q parse failed: %v", cc.target, err) + channelz.Infof(logger, cc.channelz, "dial target %q parse failed: %v", cc.target, err) } else { - channelz.Infof(logger, cc.channelzID, "parsed dial target is: %#v", parsedTarget) + channelz.Infof(logger, cc.channelz, "parsed dial target is: %#v", parsedTarget) rb = cc.getResolver(parsedTarget.URL.Scheme) if rb != nil { cc.parsedTarget = parsedTarget @@ -1749,15 +1703,15 @@ func (cc *ClientConn) parseTargetAndFindResolver() error { defScheme = resolver.GetDefaultScheme() } - channelz.Infof(logger, cc.channelzID, "fallback to scheme %q", defScheme) + channelz.Infof(logger, cc.channelz, "fallback to scheme %q", defScheme) canonicalTarget := defScheme + ":///" + cc.target parsedTarget, err = parseTarget(canonicalTarget) if err != nil { - channelz.Infof(logger, cc.channelzID, "dial target %q parse failed: %v", canonicalTarget, err) + channelz.Infof(logger, cc.channelz, "dial target %q parse failed: %v", canonicalTarget, err) return err } - channelz.Infof(logger, cc.channelzID, "parsed dial target is: %+v", parsedTarget) + channelz.Infof(logger, cc.channelz, "parsed dial target is: %+v", parsedTarget) rb = cc.getResolver(parsedTarget.URL.Scheme) if rb == nil { return fmt.Errorf("could not get resolver for default scheme: %q", parsedTarget.URL.Scheme) @@ -1880,6 +1834,6 @@ func (cc *ClientConn) determineAuthority() error { } else { cc.authority = encodeAuthority(endpoint) } - channelz.Infof(logger, cc.channelzID, "Channel authority set to %q", cc.authority) + channelz.Infof(logger, cc.channelz, "Channel authority set to %q", cc.authority) return nil } diff --git a/dialoptions.go b/dialoptions.go index 667f5396445b..402493224e06 100644 --- a/dialoptions.go +++ b/dialoptions.go @@ -68,7 +68,7 @@ type dialOptions struct { binaryLogger binarylog.Logger copts transport.ConnectOptions callOptions []CallOption - channelzParentID *channelz.Identifier + channelzParent channelz.Identifier disableServiceConfig bool disableRetry bool disableHealthCheck bool @@ -554,9 +554,9 @@ func WithAuthority(a string) DialOption { // // Notice: This API is EXPERIMENTAL and may be changed or removed in a // later release. -func WithChannelzParentID(id *channelz.Identifier) DialOption { +func WithChannelzParentID(c channelz.Identifier) DialOption { return newFuncDialOption(func(o *dialOptions) { - o.channelzParentID = id + o.channelzParent = c }) } diff --git a/internal/balancergroup/balancergroup_test.go b/internal/balancergroup/balancergroup_test.go index 8daab7eeba72..dfe52f3a6b5d 100644 --- a/internal/balancergroup/balancergroup_test.go +++ b/internal/balancergroup/balancergroup_test.go @@ -22,7 +22,6 @@ import ( "testing" "time" - "github.com/google/go-cmp/cmp" "google.golang.org/grpc" "google.golang.org/grpc/balancer" "google.golang.org/grpc/balancer/roundrobin" @@ -491,13 +490,13 @@ func (s) TestBalancerGroupBuildOptions(t *testing.T) { // Setup the stub balancer such that we can read the build options passed to // it in the UpdateClientConnState method. bOpts := balancer.BuildOptions{ - DialCreds: insecure.NewCredentials(), - ChannelzParentID: channelz.NewIdentifierForTesting(channelz.RefChannel, 1234, nil), - CustomUserAgent: userAgent, + DialCreds: insecure.NewCredentials(), + ChannelzParent: channelz.RegisterChannel(nil, "test channel"), + CustomUserAgent: userAgent, } stub.Register(balancerName, stub.BalancerFuncs{ UpdateClientConnState: func(bd *stub.BalancerData, _ balancer.ClientConnState) error { - if !cmp.Equal(bd.BuildOptions, bOpts) { + if bd.BuildOptions.DialCreds != bOpts.DialCreds || bd.BuildOptions.ChannelzParent != bOpts.ChannelzParent || bd.BuildOptions.CustomUserAgent != bOpts.CustomUserAgent { return fmt.Errorf("buildOptions in child balancer: %v, want %v", bd, bOpts) } return nil diff --git a/internal/channelz/channel.go b/internal/channelz/channel.go new file mode 100644 index 000000000000..d7e9e1d54ecb --- /dev/null +++ b/internal/channelz/channel.go @@ -0,0 +1,255 @@ +/* + * + * Copyright 2024 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package channelz + +import ( + "fmt" + "sync/atomic" + + "google.golang.org/grpc/connectivity" +) + +// Channel represents a channel within channelz, which includes metrics and +// internal channelz data, such as channelz id, child list, etc. +type Channel struct { + Entity + // ID is the channelz id of this channel. + ID int64 + // RefName is the human readable reference string of this channel. + RefName string + + closeCalled bool + nestedChans map[int64]string + subChans map[int64]string + Parent *Channel + trace *ChannelTrace + // traceRefCount is the number of trace events that reference this channel. + // Non-zero traceRefCount means the trace of this channel cannot be deleted. + traceRefCount int32 + + ChannelMetrics ChannelMetrics +} + +// Implemented to make Channel implement the Identifier interface used for +// nesting. +func (c *Channel) channelzIdentifier() {} + +func (c *Channel) String() string { + if c.Parent == nil { + return fmt.Sprintf("Channel #%d", c.ID) + } + return fmt.Sprintf("%s Channel #%d", c.Parent, c.ID) +} + +func (c *Channel) id() int64 { + return c.ID +} + +func (c *Channel) SubChans() map[int64]string { + db.mu.RLock() + defer db.mu.RUnlock() + return copyMap(c.subChans) +} + +func (c *Channel) NestedChans() map[int64]string { + db.mu.RLock() + defer db.mu.RUnlock() + return copyMap(c.nestedChans) +} + +func (c *Channel) Trace() *ChannelTrace { + db.mu.RLock() + defer db.mu.RUnlock() + return c.trace.copy() +} + +type ChannelMetrics struct { + // The current connectivity state of the channel. + State atomic.Pointer[connectivity.State] + // The target this channel originally tried to connect to. May be absent + Target atomic.Pointer[string] + // The number of calls started on the channel. + CallsStarted atomic.Int64 + // The number of calls that have completed with an OK status. + CallsSucceeded atomic.Int64 + // The number of calls that have a completed with a non-OK status. + CallsFailed atomic.Int64 + // The last time a call was started on the channel. + LastCallStartedTimestamp atomic.Int64 +} + +// CopyFrom copies the metrics in o to c. For testing only. +func (c *ChannelMetrics) CopyFrom(o *ChannelMetrics) { + c.State.Store(o.State.Load()) + c.Target.Store(o.Target.Load()) + c.CallsStarted.Store(o.CallsStarted.Load()) + c.CallsSucceeded.Store(o.CallsSucceeded.Load()) + c.CallsFailed.Store(o.CallsFailed.Load()) + c.LastCallStartedTimestamp.Store(o.LastCallStartedTimestamp.Load()) +} + +// Equal returns true iff the metrics of c are the same as the metrics of o. +// For testing only. +func (c *ChannelMetrics) Equal(o any) bool { + oc, ok := o.(*ChannelMetrics) + if !ok { + return false + } + if (c.State.Load() == nil) != (oc.State.Load() == nil) { + return false + } + if c.State.Load() != nil && *c.State.Load() != *oc.State.Load() { + return false + } + if (c.Target.Load() == nil) != (oc.Target.Load() == nil) { + return false + } + if c.Target.Load() != nil && *c.Target.Load() != *oc.Target.Load() { + return false + } + return c.CallsStarted.Load() == oc.CallsStarted.Load() && + c.CallsFailed.Load() == oc.CallsFailed.Load() && + c.CallsSucceeded.Load() == oc.CallsSucceeded.Load() && + c.LastCallStartedTimestamp.Load() == oc.LastCallStartedTimestamp.Load() +} + +func strFromPointer(s *string) string { + if s == nil { + return "" + } + return *s +} + +func (c *ChannelMetrics) String() string { + return fmt.Sprintf("State: %v, Target: %s, CallsStarted: %v, CallsSucceeded: %v, CallsFailed: %v, LastCallStartedTimestamp: %v", + c.State.Load(), strFromPointer(c.Target.Load()), c.CallsStarted.Load(), c.CallsSucceeded.Load(), c.CallsFailed.Load(), c.LastCallStartedTimestamp.Load(), + ) +} + +func NewChannelMetricForTesting(state connectivity.State, target string, started, succeeded, failed, timestamp int64) *ChannelMetrics { + c := &ChannelMetrics{} + c.State.Store(&state) + c.Target.Store(&target) + c.CallsStarted.Store(started) + c.CallsSucceeded.Store(succeeded) + c.CallsFailed.Store(failed) + c.LastCallStartedTimestamp.Store(timestamp) + return c +} + +func (c *Channel) addChild(id int64, e entry) { + switch v := e.(type) { + case *SubChannel: + c.subChans[id] = v.RefName + case *Channel: + c.nestedChans[id] = v.RefName + default: + logger.Errorf("cannot add a child (id = %d) of type %T to a channel", id, e) + } +} + +func (c *Channel) deleteChild(id int64) { + delete(c.subChans, id) + delete(c.nestedChans, id) + c.deleteSelfIfReady() +} + +func (c *Channel) triggerDelete() { + c.closeCalled = true + c.deleteSelfIfReady() +} + +func (c *Channel) getParentID() int64 { + if c.Parent == nil { + return -1 + } + return c.Parent.ID +} + +// deleteSelfFromTree tries to delete the channel from the channelz entry relation tree, which means +// deleting the channel reference from its parent's child list. +// +// In order for a channel to be deleted from the tree, it must meet the criteria that, removal of the +// corresponding grpc object has been invoked, and the channel does not have any children left. +// +// The returned boolean value indicates whether the channel has been successfully deleted from tree. +func (c *Channel) deleteSelfFromTree() (deleted bool) { + if !c.closeCalled || len(c.subChans)+len(c.nestedChans) != 0 { + return false + } + // not top channel + if c.Parent != nil { + c.Parent.deleteChild(c.ID) + } + return true +} + +// deleteSelfFromMap checks whether it is valid to delete the channel from the map, which means +// deleting the channel from channelz's tracking entirely. Users can no longer use id to query the +// channel, and its memory will be garbage collected. +// +// The trace reference count of the channel must be 0 in order to be deleted from the map. This is +// specified in the channel tracing gRFC that as long as some other trace has reference to an entity, +// the trace of the referenced entity must not be deleted. In order to release the resource allocated +// by grpc, the reference to the grpc object is reset to a dummy object. +// +// deleteSelfFromMap must be called after deleteSelfFromTree returns true. +// +// It returns a bool to indicate whether the channel can be safely deleted from map. +func (c *Channel) deleteSelfFromMap() (delete bool) { + return c.getTraceRefCount() == 0 +} + +// deleteSelfIfReady tries to delete the channel itself from the channelz database. +// The delete process includes two steps: +// 1. delete the channel from the entry relation tree, i.e. delete the channel reference from its +// parent's child list. +// 2. delete the channel from the map, i.e. delete the channel entirely from channelz. Lookup by id +// will return entry not found error. +func (c *Channel) deleteSelfIfReady() { + if !c.deleteSelfFromTree() { + return + } + if !c.deleteSelfFromMap() { + return + } + db.deleteEntry(c.ID) + c.trace.clear() +} + +func (c *Channel) getChannelTrace() *ChannelTrace { + return c.trace +} + +func (c *Channel) incrTraceRefCount() { + atomic.AddInt32(&c.traceRefCount, 1) +} + +func (c *Channel) decrTraceRefCount() { + atomic.AddInt32(&c.traceRefCount, -1) +} + +func (c *Channel) getTraceRefCount() int { + i := atomic.LoadInt32(&c.traceRefCount) + return int(i) +} + +func (c *Channel) getRefName() string { + return c.RefName +} diff --git a/internal/channelz/channelmap.go b/internal/channelz/channelmap.go new file mode 100644 index 000000000000..dfe18b08925d --- /dev/null +++ b/internal/channelz/channelmap.go @@ -0,0 +1,402 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package channelz + +import ( + "fmt" + "sort" + "sync" + "time" +) + +// entry represents a node in the channelz database. +type entry interface { + // addChild adds a child e, whose channelz id is id to child list + addChild(id int64, e entry) + // deleteChild deletes a child with channelz id to be id from child list + deleteChild(id int64) + // triggerDelete tries to delete self from channelz database. However, if + // child list is not empty, then deletion from the database is on hold until + // the last child is deleted from database. + triggerDelete() + // deleteSelfIfReady check whether triggerDelete() has been called before, + // and whether child list is now empty. If both conditions are met, then + // delete self from database. + deleteSelfIfReady() + // getParentID returns parent ID of the entry. 0 value parent ID means no parent. + getParentID() int64 + Entity +} + +// channelMap is the storage data structure for channelz. +// +// Methods of channelMap can be divided in two two categories with respect to +// locking. +// +// 1. Methods acquire the global lock. +// 2. Methods that can only be called when global lock is held. +// +// A second type of method need always to be called inside a first type of method. +type channelMap struct { + mu sync.RWMutex + topLevelChannels map[int64]struct{} + channels map[int64]*Channel + subChannels map[int64]*SubChannel + sockets map[int64]*Socket + servers map[int64]*Server +} + +func newChannelMap() *channelMap { + return &channelMap{ + topLevelChannels: make(map[int64]struct{}), + channels: make(map[int64]*Channel), + subChannels: make(map[int64]*SubChannel), + sockets: make(map[int64]*Socket), + servers: make(map[int64]*Server), + } +} + +func (c *channelMap) addServer(id int64, s *Server) { + c.mu.Lock() + defer c.mu.Unlock() + s.cm = c + c.servers[id] = s +} + +func (c *channelMap) addChannel(id int64, cn *Channel, isTopChannel bool, pid int64) { + c.mu.Lock() + defer c.mu.Unlock() + cn.trace.cm = c + c.channels[id] = cn + if isTopChannel { + c.topLevelChannels[id] = struct{}{} + } else if p := c.channels[pid]; p != nil { + p.addChild(id, cn) + } else { + logger.Infof("channel %d references invalid parent ID %d", id, pid) + } +} + +func (c *channelMap) addSubChannel(id int64, sc *SubChannel, pid int64) { + c.mu.Lock() + defer c.mu.Unlock() + sc.trace.cm = c + c.subChannels[id] = sc + if p := c.channels[pid]; p != nil { + p.addChild(id, sc) + } else { + logger.Infof("subchannel %d references invalid parent ID %d", id, pid) + } +} + +func (c *channelMap) addSocket(s *Socket) { + c.mu.Lock() + defer c.mu.Unlock() + s.cm = c + c.sockets[s.ID] = s + if s.Parent == nil { + logger.Infof("normal socket %d has no parent", s.ID) + } + s.Parent.(entry).addChild(s.ID, s) +} + +// removeEntry triggers the removal of an entry, which may not indeed delete the +// entry, if it has to wait on the deletion of its children and until no other +// entity's channel trace references it. It may lead to a chain of entry +// deletion. For example, deleting the last socket of a gracefully shutting down +// server will lead to the server being also deleted. +func (c *channelMap) removeEntry(id int64) { + c.mu.Lock() + defer c.mu.Unlock() + c.findEntry(id).triggerDelete() +} + +// tracedChannel represents tracing operations which are present on both +// channels and subChannels. +type tracedChannel interface { + getChannelTrace() *ChannelTrace + incrTraceRefCount() + decrTraceRefCount() + getRefName() string +} + +// c.mu must be held by the caller +func (c *channelMap) decrTraceRefCount(id int64) { + e := c.findEntry(id) + if v, ok := e.(tracedChannel); ok { + v.decrTraceRefCount() + e.deleteSelfIfReady() + } +} + +// c.mu must be held by the caller. +func (c *channelMap) findEntry(id int64) entry { + if v, ok := c.channels[id]; ok { + return v + } + if v, ok := c.subChannels[id]; ok { + return v + } + if v, ok := c.servers[id]; ok { + return v + } + if v, ok := c.sockets[id]; ok { + return v + } + return &dummyEntry{idNotFound: id} +} + +// c.mu must be held by the caller +// +// deleteEntry deletes an entry from the channelMap. Before calling this method, +// caller must check this entry is ready to be deleted, i.e removeEntry() has +// been called on it, and no children still exist. +func (c *channelMap) deleteEntry(id int64) entry { + if v, ok := c.sockets[id]; ok { + delete(c.sockets, id) + return v + } + if v, ok := c.subChannels[id]; ok { + delete(c.subChannels, id) + return v + } + if v, ok := c.channels[id]; ok { + delete(c.channels, id) + delete(c.topLevelChannels, id) + return v + } + if v, ok := c.servers[id]; ok { + delete(c.servers, id) + return v + } + return &dummyEntry{idNotFound: id} +} + +func (c *channelMap) traceEvent(id int64, desc *TraceEvent) { + c.mu.Lock() + defer c.mu.Unlock() + child := c.findEntry(id) + childTC, ok := child.(tracedChannel) + if !ok { + return + } + childTC.getChannelTrace().append(&traceEvent{Desc: desc.Desc, Severity: desc.Severity, Timestamp: time.Now()}) + if desc.Parent != nil { + parent := c.findEntry(child.getParentID()) + var chanType RefChannelType + switch child.(type) { + case *Channel: + chanType = RefChannel + case *SubChannel: + chanType = RefSubChannel + } + if parentTC, ok := parent.(tracedChannel); ok { + parentTC.getChannelTrace().append(&traceEvent{ + Desc: desc.Parent.Desc, + Severity: desc.Parent.Severity, + Timestamp: time.Now(), + RefID: id, + RefName: childTC.getRefName(), + RefType: chanType, + }) + childTC.incrTraceRefCount() + } + } +} + +type int64Slice []int64 + +func (s int64Slice) Len() int { return len(s) } +func (s int64Slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s int64Slice) Less(i, j int) bool { return s[i] < s[j] } + +func copyMap(m map[int64]string) map[int64]string { + n := make(map[int64]string) + for k, v := range m { + n[k] = v + } + return n +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +func (c *channelMap) getTopChannels(id int64, maxResults int) ([]*Channel, bool) { + if maxResults <= 0 { + maxResults = EntriesPerPage + } + c.mu.RLock() + defer c.mu.RUnlock() + l := int64(len(c.topLevelChannels)) + ids := make([]int64, 0, l) + + for k := range c.topLevelChannels { + ids = append(ids, k) + } + sort.Sort(int64Slice(ids)) + idx := sort.Search(len(ids), func(i int) bool { return ids[i] >= id }) + end := true + var t []*Channel + for _, v := range ids[idx:] { + if len(t) == maxResults { + end = false + break + } + if cn, ok := c.channels[v]; ok { + t = append(t, cn) + } + } + return t, end +} + +func (c *channelMap) getServers(id int64, maxResults int) ([]*Server, bool) { + if maxResults <= 0 { + maxResults = EntriesPerPage + } + c.mu.RLock() + defer c.mu.RUnlock() + ids := make([]int64, 0, len(c.servers)) + for k := range c.servers { + ids = append(ids, k) + } + sort.Sort(int64Slice(ids)) + idx := sort.Search(len(ids), func(i int) bool { return ids[i] >= id }) + end := true + var s []*Server + for _, v := range ids[idx:] { + if len(s) == maxResults { + end = false + break + } + if svr, ok := c.servers[v]; ok { + s = append(s, svr) + } + } + return s, end +} + +func (c *channelMap) getServerSockets(id int64, startID int64, maxResults int) ([]*Socket, bool) { + if maxResults <= 0 { + maxResults = EntriesPerPage + } + c.mu.RLock() + defer c.mu.RUnlock() + svr, ok := c.servers[id] + if !ok { + // server with id doesn't exist. + return nil, true + } + svrskts := svr.sockets + ids := make([]int64, 0, len(svrskts)) + sks := make([]*Socket, 0, min(len(svrskts), maxResults)) + for k := range svrskts { + ids = append(ids, k) + } + sort.Sort(int64Slice(ids)) + idx := sort.Search(len(ids), func(i int) bool { return ids[i] >= startID }) + end := true + for _, v := range ids[idx:] { + if len(sks) == maxResults { + end = false + break + } + if ns, ok := c.sockets[v]; ok { + sks = append(sks, ns) + } + } + return sks, end +} + +func (c *channelMap) getChannel(id int64) *Channel { + c.mu.RLock() + defer c.mu.RUnlock() + return c.channels[id] +} + +func (c *channelMap) getSubChannel(id int64) *SubChannel { + c.mu.RLock() + defer c.mu.RUnlock() + return c.subChannels[id] +} + +func (c *channelMap) getSocket(id int64) *Socket { + c.mu.RLock() + defer c.mu.RUnlock() + return c.sockets[id] +} + +func (c *channelMap) getServer(id int64) *Server { + c.mu.RLock() + defer c.mu.RUnlock() + return c.servers[id] +} + +type dummyEntry struct { + // dummyEntry is a fake entry to handle entry not found case. + idNotFound int64 + Entity +} + +func (d *dummyEntry) String() string { + return fmt.Sprintf("non-existent entity #%d", d.idNotFound) +} + +func (d *dummyEntry) ID() int64 { return d.idNotFound } + +func (d *dummyEntry) addChild(id int64, e entry) { + // Note: It is possible for a normal program to reach here under race + // condition. For example, there could be a race between ClientConn.Close() + // info being propagated to addrConn and http2Client. ClientConn.Close() + // cancel the context and result in http2Client to error. The error info is + // then caught by transport monitor and before addrConn.tearDown() is called + // in side ClientConn.Close(). Therefore, the addrConn will create a new + // transport. And when registering the new transport in channelz, its parent + // addrConn could have already been torn down and deleted from channelz + // tracking, and thus reach the code here. + logger.Infof("attempt to add child of type %T with id %d to a parent (id=%d) that doesn't currently exist", e, id, d.idNotFound) +} + +func (d *dummyEntry) deleteChild(id int64) { + // It is possible for a normal program to reach here under race condition. + // Refer to the example described in addChild(). + logger.Infof("attempt to delete child with id %d from a parent (id=%d) that doesn't currently exist", id, d.idNotFound) +} + +func (d *dummyEntry) triggerDelete() { + logger.Warningf("attempt to delete an entry (id=%d) that doesn't currently exist", d.idNotFound) +} + +func (*dummyEntry) deleteSelfIfReady() { + // code should not reach here. deleteSelfIfReady is always called on an existing entry. +} + +func (*dummyEntry) getParentID() int64 { + return 0 +} + +// Entity is implemented by all channelz types. +type Entity interface { + isEntity() + fmt.Stringer + id() int64 +} diff --git a/internal/channelz/funcs.go b/internal/channelz/funcs.go index fc094f3441b8..baf0fd92f39f 100644 --- a/internal/channelz/funcs.go +++ b/internal/channelz/funcs.go @@ -16,47 +16,32 @@ * */ -// Package channelz defines APIs for enabling channelz service, entry +// Package channelz defines internal APIs for enabling channelz service, entry // registration/deletion, and accessing channelz data. It also defines channelz // metric struct formats. -// -// All APIs in this package are experimental. package channelz import ( - "errors" - "sort" - "sync" "sync/atomic" "time" - "google.golang.org/grpc/grpclog" "google.golang.org/grpc/internal" ) -const ( - defaultMaxTraceEntry int32 = 30 -) - var ( // IDGen is the global channelz entity ID generator. It should not be used // outside this package except by tests. IDGen IDGenerator - db dbWrapper - // EntryPerPage defines the number of channelz entries to be shown on a web page. - EntryPerPage = int64(50) - curState int32 - maxTraceEntry = defaultMaxTraceEntry + db *channelMap = newChannelMap() + // EntriesPerPage defines the number of channelz entries to be shown on a web page. + EntriesPerPage = 50 + curState int32 ) // TurnOn turns on channelz data collection. func TurnOn() { - if !IsOn() { - db.set(newChannelMap()) - IDGen.Reset() - atomic.StoreInt32(&curState, 1) - } + atomic.StoreInt32(&curState, 1) } func init() { @@ -70,49 +55,15 @@ func IsOn() bool { return atomic.LoadInt32(&curState) == 1 } -// SetMaxTraceEntry sets maximum number of trace entry per entity (i.e. channel/subchannel). -// Setting it to 0 will disable channel tracing. -func SetMaxTraceEntry(i int32) { - atomic.StoreInt32(&maxTraceEntry, i) -} - -// ResetMaxTraceEntryToDefault resets the maximum number of trace entry per entity to default. -func ResetMaxTraceEntryToDefault() { - atomic.StoreInt32(&maxTraceEntry, defaultMaxTraceEntry) -} - -func getMaxTraceEntry() int { - i := atomic.LoadInt32(&maxTraceEntry) - return int(i) -} - -// dbWarpper wraps around a reference to internal channelz data storage, and -// provide synchronized functionality to set and get the reference. -type dbWrapper struct { - mu sync.RWMutex - DB *channelMap -} - -func (d *dbWrapper) set(db *channelMap) { - d.mu.Lock() - d.DB = db - d.mu.Unlock() -} - -func (d *dbWrapper) get() *channelMap { - d.mu.RLock() - defer d.mu.RUnlock() - return d.DB -} - // GetTopChannels returns a slice of top channel's ChannelMetric, along with a // boolean indicating whether there's more top channels to be queried for. // -// The arg id specifies that only top channel with id at or above it will be included -// in the result. The returned slice is up to a length of the arg maxResults or -// EntryPerPage if maxResults is zero, and is sorted in ascending id order. -func GetTopChannels(id int64, maxResults int64) ([]*ChannelMetric, bool) { - return db.get().GetTopChannels(id, maxResults) +// The arg id specifies that only top channel with id at or above it will be +// included in the result. The returned slice is up to a length of the arg +// maxResults or EntriesPerPage if maxResults is zero, and is sorted in ascending +// id order. +func GetTopChannels(id int64, maxResults int) ([]*Channel, bool) { + return db.getTopChannels(id, maxResults) } // GetServers returns a slice of server's ServerMetric, along with a @@ -120,73 +71,68 @@ func GetTopChannels(id int64, maxResults int64) ([]*ChannelMetric, bool) { // // The arg id specifies that only server with id at or above it will be included // in the result. The returned slice is up to a length of the arg maxResults or -// EntryPerPage if maxResults is zero, and is sorted in ascending id order. -func GetServers(id int64, maxResults int64) ([]*ServerMetric, bool) { - return db.get().GetServers(id, maxResults) +// EntriesPerPage if maxResults is zero, and is sorted in ascending id order. +func GetServers(id int64, maxResults int) ([]*Server, bool) { + return db.getServers(id, maxResults) } // GetServerSockets returns a slice of server's (identified by id) normal socket's -// SocketMetric, along with a boolean indicating whether there's more sockets to +// SocketMetrics, along with a boolean indicating whether there's more sockets to // be queried for. // // The arg startID specifies that only sockets with id at or above it will be // included in the result. The returned slice is up to a length of the arg maxResults -// or EntryPerPage if maxResults is zero, and is sorted in ascending id order. -func GetServerSockets(id int64, startID int64, maxResults int64) ([]*SocketMetric, bool) { - return db.get().GetServerSockets(id, startID, maxResults) +// or EntriesPerPage if maxResults is zero, and is sorted in ascending id order. +func GetServerSockets(id int64, startID int64, maxResults int) ([]*Socket, bool) { + return db.getServerSockets(id, startID, maxResults) } -// GetChannel returns the ChannelMetric for the channel (identified by id). -func GetChannel(id int64) *ChannelMetric { - return db.get().GetChannel(id) +// GetChannel returns the Channel for the channel (identified by id). +func GetChannel(id int64) *Channel { + return db.getChannel(id) } -// GetSubChannel returns the SubChannelMetric for the subchannel (identified by id). -func GetSubChannel(id int64) *SubChannelMetric { - return db.get().GetSubChannel(id) +// GetSubChannel returns the SubChannel for the subchannel (identified by id). +func GetSubChannel(id int64) *SubChannel { + return db.getSubChannel(id) } -// GetSocket returns the SocketInternalMetric for the socket (identified by id). -func GetSocket(id int64) *SocketMetric { - return db.get().GetSocket(id) +// GetSocket returns the Socket for the socket (identified by id). +func GetSocket(id int64) *Socket { + return db.getSocket(id) } // GetServer returns the ServerMetric for the server (identified by id). -func GetServer(id int64) *ServerMetric { - return db.get().GetServer(id) +func GetServer(id int64) *Server { + return db.getServer(id) } // RegisterChannel registers the given channel c in the channelz database with -// ref as its reference name, and adds it to the child list of its parent -// (identified by pid). pid == nil means no parent. +// ref as its reference name, and adds it to the child list of its parent. +// parent == nil means no parent. // // Returns a unique channelz identifier assigned to this channel. // // If channelz is not turned ON, the channelz database is not mutated. -func RegisterChannel(c Channel, pid *Identifier, ref string) *Identifier { +func RegisterChannel(parent *Channel, ref string) *Channel { id := IDGen.genID() - var parent int64 - isTopChannel := true - if pid != nil { - isTopChannel = false - parent = pid.Int() - } if !IsOn() { - return newIdentifer(RefChannel, id, pid) + return &Channel{ID: id} } - cn := &channel{ - refName: ref, - c: c, - subChans: make(map[int64]string), + isTopChannel := parent == nil + + cn := &Channel{ + ID: id, + RefName: ref, nestedChans: make(map[int64]string), - id: id, - pid: parent, - trace: &channelTrace{createdTime: time.Now(), events: make([]*TraceEvent, 0, getMaxTraceEntry())}, + subChans: make(map[int64]string), + Parent: parent, + trace: &ChannelTrace{CreationTime: time.Now(), Events: make([]*traceEvent, 0, getMaxTraceEntry())}, } - db.get().addChannel(id, cn, isTopChannel, parent) - return newIdentifer(RefChannel, id, pid) + db.addChannel(id, cn, isTopChannel, cn.getParentID()) + return cn } // RegisterSubChannel registers the given subChannel c in the channelz database @@ -196,555 +142,66 @@ func RegisterChannel(c Channel, pid *Identifier, ref string) *Identifier { // Returns a unique channelz identifier assigned to this subChannel. // // If channelz is not turned ON, the channelz database is not mutated. -func RegisterSubChannel(c Channel, pid *Identifier, ref string) (*Identifier, error) { - if pid == nil { - return nil, errors.New("a SubChannel's parent id cannot be nil") - } +func RegisterSubChannel(pid int64, ref string) *SubChannel { id := IDGen.genID() if !IsOn() { - return newIdentifer(RefSubChannel, id, pid), nil + return &SubChannel{ID: id} } - sc := &subChannel{ - refName: ref, - c: c, + sc := &SubChannel{ + RefName: ref, + ID: id, sockets: make(map[int64]string), - id: id, - pid: pid.Int(), - trace: &channelTrace{createdTime: time.Now(), events: make([]*TraceEvent, 0, getMaxTraceEntry())}, + parent: db.channels[pid], + trace: &ChannelTrace{CreationTime: time.Now(), Events: make([]*traceEvent, 0, getMaxTraceEntry())}, } - db.get().addSubChannel(id, sc, pid.Int()) - return newIdentifer(RefSubChannel, id, pid), nil + db.addSubChannel(id, sc, pid) + return sc } // RegisterServer registers the given server s in channelz database. It returns // the unique channelz tracking id assigned to this server. // // If channelz is not turned ON, the channelz database is not mutated. -func RegisterServer(s Server, ref string) *Identifier { +func RegisterServer(ref string) *Server { id := IDGen.genID() if !IsOn() { - return newIdentifer(RefServer, id, nil) + return &Server{ID: id} } - svr := &server{ - refName: ref, - s: s, + svr := &Server{ + RefName: ref, sockets: make(map[int64]string), listenSockets: make(map[int64]string), - id: id, - } - db.get().addServer(id, svr) - return newIdentifer(RefServer, id, nil) -} - -// RegisterListenSocket registers the given listen socket s in channelz database -// with ref as its reference name, and add it to the child list of its parent -// (identified by pid). It returns the unique channelz tracking id assigned to -// this listen socket. -// -// If channelz is not turned ON, the channelz database is not mutated. -func RegisterListenSocket(s Socket, pid *Identifier, ref string) (*Identifier, error) { - if pid == nil { - return nil, errors.New("a ListenSocket's parent id cannot be 0") + ID: id, } - id := IDGen.genID() - if !IsOn() { - return newIdentifer(RefListenSocket, id, pid), nil - } - - ls := &listenSocket{refName: ref, s: s, id: id, pid: pid.Int()} - db.get().addListenSocket(id, ls, pid.Int()) - return newIdentifer(RefListenSocket, id, pid), nil + db.addServer(id, svr) + return svr } -// RegisterNormalSocket registers the given normal socket s in channelz database +// RegisterSocket registers the given normal socket s in channelz database // with ref as its reference name, and adds it to the child list of its parent -// (identified by pid). It returns the unique channelz tracking id assigned to -// this normal socket. +// (identified by skt.Parent, which must be set). It returns the unique channelz +// tracking id assigned to this normal socket. // // If channelz is not turned ON, the channelz database is not mutated. -func RegisterNormalSocket(s Socket, pid *Identifier, ref string) (*Identifier, error) { - if pid == nil { - return nil, errors.New("a NormalSocket's parent id cannot be 0") - } - id := IDGen.genID() - if !IsOn() { - return newIdentifer(RefNormalSocket, id, pid), nil +func RegisterSocket(skt *Socket) *Socket { + skt.ID = IDGen.genID() + if IsOn() { + db.addSocket(skt) } - - ns := &normalSocket{refName: ref, s: s, id: id, pid: pid.Int()} - db.get().addNormalSocket(id, ns, pid.Int()) - return newIdentifer(RefNormalSocket, id, pid), nil + return skt } // RemoveEntry removes an entry with unique channelz tracking id to be id from // channelz database. // // If channelz is not turned ON, this function is a no-op. -func RemoveEntry(id *Identifier) { +func RemoveEntry(id int64) { if !IsOn() { return } - db.get().removeEntry(id.Int()) -} - -// TraceEventDesc is what the caller of AddTraceEvent should provide to describe -// the event to be added to the channel trace. -// -// The Parent field is optional. It is used for an event that will be recorded -// in the entity's parent trace. -type TraceEventDesc struct { - Desc string - Severity Severity - Parent *TraceEventDesc -} - -// AddTraceEvent adds trace related to the entity with specified id, using the -// provided TraceEventDesc. -// -// If channelz is not turned ON, this will simply log the event descriptions. -func AddTraceEvent(l grpclog.DepthLoggerV2, id *Identifier, depth int, desc *TraceEventDesc) { - // Log only the trace description associated with the bottom most entity. - switch desc.Severity { - case CtUnknown, CtInfo: - l.InfoDepth(depth+1, withParens(id)+desc.Desc) - case CtWarning: - l.WarningDepth(depth+1, withParens(id)+desc.Desc) - case CtError: - l.ErrorDepth(depth+1, withParens(id)+desc.Desc) - } - - if getMaxTraceEntry() == 0 { - return - } - if IsOn() { - db.get().traceEvent(id.Int(), desc) - } -} - -// channelMap is the storage data structure for channelz. -// Methods of channelMap can be divided in two two categories with respect to locking. -// 1. Methods acquire the global lock. -// 2. Methods that can only be called when global lock is held. -// A second type of method need always to be called inside a first type of method. -type channelMap struct { - mu sync.RWMutex - topLevelChannels map[int64]struct{} - servers map[int64]*server - channels map[int64]*channel - subChannels map[int64]*subChannel - listenSockets map[int64]*listenSocket - normalSockets map[int64]*normalSocket -} - -func newChannelMap() *channelMap { - return &channelMap{ - topLevelChannels: make(map[int64]struct{}), - channels: make(map[int64]*channel), - listenSockets: make(map[int64]*listenSocket), - normalSockets: make(map[int64]*normalSocket), - servers: make(map[int64]*server), - subChannels: make(map[int64]*subChannel), - } -} - -func (c *channelMap) addServer(id int64, s *server) { - c.mu.Lock() - s.cm = c - c.servers[id] = s - c.mu.Unlock() -} - -func (c *channelMap) addChannel(id int64, cn *channel, isTopChannel bool, pid int64) { - c.mu.Lock() - cn.cm = c - cn.trace.cm = c - c.channels[id] = cn - if isTopChannel { - c.topLevelChannels[id] = struct{}{} - } else { - c.findEntry(pid).addChild(id, cn) - } - c.mu.Unlock() -} - -func (c *channelMap) addSubChannel(id int64, sc *subChannel, pid int64) { - c.mu.Lock() - sc.cm = c - sc.trace.cm = c - c.subChannels[id] = sc - c.findEntry(pid).addChild(id, sc) - c.mu.Unlock() -} - -func (c *channelMap) addListenSocket(id int64, ls *listenSocket, pid int64) { - c.mu.Lock() - ls.cm = c - c.listenSockets[id] = ls - c.findEntry(pid).addChild(id, ls) - c.mu.Unlock() -} - -func (c *channelMap) addNormalSocket(id int64, ns *normalSocket, pid int64) { - c.mu.Lock() - ns.cm = c - c.normalSockets[id] = ns - c.findEntry(pid).addChild(id, ns) - c.mu.Unlock() -} - -// removeEntry triggers the removal of an entry, which may not indeed delete the entry, if it has to -// wait on the deletion of its children and until no other entity's channel trace references it. -// It may lead to a chain of entry deletion. For example, deleting the last socket of a gracefully -// shutting down server will lead to the server being also deleted. -func (c *channelMap) removeEntry(id int64) { - c.mu.Lock() - c.findEntry(id).triggerDelete() - c.mu.Unlock() -} - -// c.mu must be held by the caller -func (c *channelMap) decrTraceRefCount(id int64) { - e := c.findEntry(id) - if v, ok := e.(tracedChannel); ok { - v.decrTraceRefCount() - e.deleteSelfIfReady() - } -} - -// c.mu must be held by the caller. -func (c *channelMap) findEntry(id int64) entry { - var v entry - var ok bool - if v, ok = c.channels[id]; ok { - return v - } - if v, ok = c.subChannels[id]; ok { - return v - } - if v, ok = c.servers[id]; ok { - return v - } - if v, ok = c.listenSockets[id]; ok { - return v - } - if v, ok = c.normalSockets[id]; ok { - return v - } - return &dummyEntry{idNotFound: id} -} - -// c.mu must be held by the caller -// deleteEntry simply deletes an entry from the channelMap. Before calling this -// method, caller must check this entry is ready to be deleted, i.e removeEntry() -// has been called on it, and no children still exist. -// Conditionals are ordered by the expected frequency of deletion of each entity -// type, in order to optimize performance. -func (c *channelMap) deleteEntry(id int64) { - var ok bool - if _, ok = c.normalSockets[id]; ok { - delete(c.normalSockets, id) - return - } - if _, ok = c.subChannels[id]; ok { - delete(c.subChannels, id) - return - } - if _, ok = c.channels[id]; ok { - delete(c.channels, id) - delete(c.topLevelChannels, id) - return - } - if _, ok = c.listenSockets[id]; ok { - delete(c.listenSockets, id) - return - } - if _, ok = c.servers[id]; ok { - delete(c.servers, id) - return - } -} - -func (c *channelMap) traceEvent(id int64, desc *TraceEventDesc) { - c.mu.Lock() - child := c.findEntry(id) - childTC, ok := child.(tracedChannel) - if !ok { - c.mu.Unlock() - return - } - childTC.getChannelTrace().append(&TraceEvent{Desc: desc.Desc, Severity: desc.Severity, Timestamp: time.Now()}) - if desc.Parent != nil { - parent := c.findEntry(child.getParentID()) - var chanType RefChannelType - switch child.(type) { - case *channel: - chanType = RefChannel - case *subChannel: - chanType = RefSubChannel - } - if parentTC, ok := parent.(tracedChannel); ok { - parentTC.getChannelTrace().append(&TraceEvent{ - Desc: desc.Parent.Desc, - Severity: desc.Parent.Severity, - Timestamp: time.Now(), - RefID: id, - RefName: childTC.getRefName(), - RefType: chanType, - }) - childTC.incrTraceRefCount() - } - } - c.mu.Unlock() -} - -type int64Slice []int64 - -func (s int64Slice) Len() int { return len(s) } -func (s int64Slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s int64Slice) Less(i, j int) bool { return s[i] < s[j] } - -func copyMap(m map[int64]string) map[int64]string { - n := make(map[int64]string) - for k, v := range m { - n[k] = v - } - return n -} - -func min(a, b int64) int64 { - if a < b { - return a - } - return b -} - -func (c *channelMap) GetTopChannels(id int64, maxResults int64) ([]*ChannelMetric, bool) { - if maxResults <= 0 { - maxResults = EntryPerPage - } - c.mu.RLock() - l := int64(len(c.topLevelChannels)) - ids := make([]int64, 0, l) - cns := make([]*channel, 0, min(l, maxResults)) - - for k := range c.topLevelChannels { - ids = append(ids, k) - } - sort.Sort(int64Slice(ids)) - idx := sort.Search(len(ids), func(i int) bool { return ids[i] >= id }) - count := int64(0) - var end bool - var t []*ChannelMetric - for i, v := range ids[idx:] { - if count == maxResults { - break - } - if cn, ok := c.channels[v]; ok { - cns = append(cns, cn) - t = append(t, &ChannelMetric{ - NestedChans: copyMap(cn.nestedChans), - SubChans: copyMap(cn.subChans), - }) - count++ - } - if i == len(ids[idx:])-1 { - end = true - break - } - } - c.mu.RUnlock() - if count == 0 { - end = true - } - - for i, cn := range cns { - t[i].ChannelData = cn.c.ChannelzMetric() - t[i].ID = cn.id - t[i].RefName = cn.refName - t[i].Trace = cn.trace.dumpData() - } - return t, end -} - -func (c *channelMap) GetServers(id, maxResults int64) ([]*ServerMetric, bool) { - if maxResults <= 0 { - maxResults = EntryPerPage - } - c.mu.RLock() - l := int64(len(c.servers)) - ids := make([]int64, 0, l) - ss := make([]*server, 0, min(l, maxResults)) - for k := range c.servers { - ids = append(ids, k) - } - sort.Sort(int64Slice(ids)) - idx := sort.Search(len(ids), func(i int) bool { return ids[i] >= id }) - count := int64(0) - var end bool - var s []*ServerMetric - for i, v := range ids[idx:] { - if count == maxResults { - break - } - if svr, ok := c.servers[v]; ok { - ss = append(ss, svr) - s = append(s, &ServerMetric{ - ListenSockets: copyMap(svr.listenSockets), - }) - count++ - } - if i == len(ids[idx:])-1 { - end = true - break - } - } - c.mu.RUnlock() - if count == 0 { - end = true - } - - for i, svr := range ss { - s[i].ServerData = svr.s.ChannelzMetric() - s[i].ID = svr.id - s[i].RefName = svr.refName - } - return s, end -} - -func (c *channelMap) GetServerSockets(id int64, startID int64, maxResults int64) ([]*SocketMetric, bool) { - if maxResults <= 0 { - maxResults = EntryPerPage - } - var svr *server - var ok bool - c.mu.RLock() - if svr, ok = c.servers[id]; !ok { - // server with id doesn't exist. - c.mu.RUnlock() - return nil, true - } - svrskts := svr.sockets - l := int64(len(svrskts)) - ids := make([]int64, 0, l) - sks := make([]*normalSocket, 0, min(l, maxResults)) - for k := range svrskts { - ids = append(ids, k) - } - sort.Sort(int64Slice(ids)) - idx := sort.Search(len(ids), func(i int) bool { return ids[i] >= startID }) - count := int64(0) - var end bool - for i, v := range ids[idx:] { - if count == maxResults { - break - } - if ns, ok := c.normalSockets[v]; ok { - sks = append(sks, ns) - count++ - } - if i == len(ids[idx:])-1 { - end = true - break - } - } - c.mu.RUnlock() - if count == 0 { - end = true - } - s := make([]*SocketMetric, 0, len(sks)) - for _, ns := range sks { - sm := &SocketMetric{} - sm.SocketData = ns.s.ChannelzMetric() - sm.ID = ns.id - sm.RefName = ns.refName - s = append(s, sm) - } - return s, end -} - -func (c *channelMap) GetChannel(id int64) *ChannelMetric { - cm := &ChannelMetric{} - var cn *channel - var ok bool - c.mu.RLock() - if cn, ok = c.channels[id]; !ok { - // channel with id doesn't exist. - c.mu.RUnlock() - return nil - } - cm.NestedChans = copyMap(cn.nestedChans) - cm.SubChans = copyMap(cn.subChans) - // cn.c can be set to &dummyChannel{} when deleteSelfFromMap is called. Save a copy of cn.c when - // holding the lock to prevent potential data race. - chanCopy := cn.c - c.mu.RUnlock() - cm.ChannelData = chanCopy.ChannelzMetric() - cm.ID = cn.id - cm.RefName = cn.refName - cm.Trace = cn.trace.dumpData() - return cm -} - -func (c *channelMap) GetSubChannel(id int64) *SubChannelMetric { - cm := &SubChannelMetric{} - var sc *subChannel - var ok bool - c.mu.RLock() - if sc, ok = c.subChannels[id]; !ok { - // subchannel with id doesn't exist. - c.mu.RUnlock() - return nil - } - cm.Sockets = copyMap(sc.sockets) - // sc.c can be set to &dummyChannel{} when deleteSelfFromMap is called. Save a copy of sc.c when - // holding the lock to prevent potential data race. - chanCopy := sc.c - c.mu.RUnlock() - cm.ChannelData = chanCopy.ChannelzMetric() - cm.ID = sc.id - cm.RefName = sc.refName - cm.Trace = sc.trace.dumpData() - return cm -} - -func (c *channelMap) GetSocket(id int64) *SocketMetric { - sm := &SocketMetric{} - c.mu.RLock() - if ls, ok := c.listenSockets[id]; ok { - c.mu.RUnlock() - sm.SocketData = ls.s.ChannelzMetric() - sm.ID = ls.id - sm.RefName = ls.refName - return sm - } - if ns, ok := c.normalSockets[id]; ok { - c.mu.RUnlock() - sm.SocketData = ns.s.ChannelzMetric() - sm.ID = ns.id - sm.RefName = ns.refName - return sm - } - c.mu.RUnlock() - return nil -} - -func (c *channelMap) GetServer(id int64) *ServerMetric { - sm := &ServerMetric{} - var svr *server - var ok bool - c.mu.RLock() - if svr, ok = c.servers[id]; !ok { - c.mu.RUnlock() - return nil - } - sm.ListenSockets = copyMap(svr.listenSockets) - c.mu.RUnlock() - sm.ID = svr.id - sm.RefName = svr.refName - sm.ServerData = svr.s.ChannelzMetric() - return sm + db.removeEntry(id) } // IDGenerator is an incrementing atomic that tracks IDs for channelz entities. @@ -761,3 +218,11 @@ func (i *IDGenerator) Reset() { func (i *IDGenerator) genID() int64 { return atomic.AddInt64(&i.id, 1) } + +// Identifier is an opaque channelz identifier used to expose channelz symbols +// outside of grpc. Currently only implemented by Channel since no other +// types require exposure outside grpc. +type Identifier interface { + Entity + channelzIdentifier() +} diff --git a/internal/channelz/id.go b/internal/channelz/id.go deleted file mode 100644 index c9a27acd3710..000000000000 --- a/internal/channelz/id.go +++ /dev/null @@ -1,75 +0,0 @@ -/* - * - * Copyright 2022 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package channelz - -import "fmt" - -// Identifier is an opaque identifier which uniquely identifies an entity in the -// channelz database. -type Identifier struct { - typ RefChannelType - id int64 - str string - pid *Identifier -} - -// Type returns the entity type corresponding to id. -func (id *Identifier) Type() RefChannelType { - return id.typ -} - -// Int returns the integer identifier corresponding to id. -func (id *Identifier) Int() int64 { - return id.id -} - -// String returns a string representation of the entity corresponding to id. -// -// This includes some information about the parent as well. Examples: -// Top-level channel: [Channel #channel-number] -// Nested channel: [Channel #parent-channel-number Channel #channel-number] -// Sub channel: [Channel #parent-channel SubChannel #subchannel-number] -func (id *Identifier) String() string { - return id.str -} - -// Equal returns true if other is the same as id. -func (id *Identifier) Equal(other *Identifier) bool { - if (id != nil) != (other != nil) { - return false - } - if id == nil && other == nil { - return true - } - return id.typ == other.typ && id.id == other.id && id.pid == other.pid -} - -// NewIdentifierForTesting returns a new opaque identifier to be used only for -// testing purposes. -func NewIdentifierForTesting(typ RefChannelType, id int64, pid *Identifier) *Identifier { - return newIdentifer(typ, id, pid) -} - -func newIdentifer(typ RefChannelType, id int64, pid *Identifier) *Identifier { - str := fmt.Sprintf("%s #%d", typ, id) - if pid != nil { - str = fmt.Sprintf("%s %s", pid, str) - } - return &Identifier{typ: typ, id: id, str: str, pid: pid} -} diff --git a/internal/channelz/logging.go b/internal/channelz/logging.go index f89e6f77bbd0..ee4d72125805 100644 --- a/internal/channelz/logging.go +++ b/internal/channelz/logging.go @@ -26,53 +26,49 @@ import ( var logger = grpclog.Component("channelz") -func withParens(id *Identifier) string { - return "[" + id.String() + "] " -} - // Info logs and adds a trace event if channelz is on. -func Info(l grpclog.DepthLoggerV2, id *Identifier, args ...any) { - AddTraceEvent(l, id, 1, &TraceEventDesc{ +func Info(l grpclog.DepthLoggerV2, e Entity, args ...any) { + AddTraceEvent(l, e, 1, &TraceEvent{ Desc: fmt.Sprint(args...), Severity: CtInfo, }) } // Infof logs and adds a trace event if channelz is on. -func Infof(l grpclog.DepthLoggerV2, id *Identifier, format string, args ...any) { - AddTraceEvent(l, id, 1, &TraceEventDesc{ +func Infof(l grpclog.DepthLoggerV2, e Entity, format string, args ...any) { + AddTraceEvent(l, e, 1, &TraceEvent{ Desc: fmt.Sprintf(format, args...), Severity: CtInfo, }) } // Warning logs and adds a trace event if channelz is on. -func Warning(l grpclog.DepthLoggerV2, id *Identifier, args ...any) { - AddTraceEvent(l, id, 1, &TraceEventDesc{ +func Warning(l grpclog.DepthLoggerV2, e Entity, args ...any) { + AddTraceEvent(l, e, 1, &TraceEvent{ Desc: fmt.Sprint(args...), Severity: CtWarning, }) } // Warningf logs and adds a trace event if channelz is on. -func Warningf(l grpclog.DepthLoggerV2, id *Identifier, format string, args ...any) { - AddTraceEvent(l, id, 1, &TraceEventDesc{ +func Warningf(l grpclog.DepthLoggerV2, e Entity, format string, args ...any) { + AddTraceEvent(l, e, 1, &TraceEvent{ Desc: fmt.Sprintf(format, args...), Severity: CtWarning, }) } // Error logs and adds a trace event if channelz is on. -func Error(l grpclog.DepthLoggerV2, id *Identifier, args ...any) { - AddTraceEvent(l, id, 1, &TraceEventDesc{ +func Error(l grpclog.DepthLoggerV2, e Entity, args ...any) { + AddTraceEvent(l, e, 1, &TraceEvent{ Desc: fmt.Sprint(args...), Severity: CtError, }) } // Errorf logs and adds a trace event if channelz is on. -func Errorf(l grpclog.DepthLoggerV2, id *Identifier, format string, args ...any) { - AddTraceEvent(l, id, 1, &TraceEventDesc{ +func Errorf(l grpclog.DepthLoggerV2, e Entity, format string, args ...any) { + AddTraceEvent(l, e, 1, &TraceEvent{ Desc: fmt.Sprintf(format, args...), Severity: CtError, }) diff --git a/internal/channelz/server.go b/internal/channelz/server.go new file mode 100644 index 000000000000..cdfc49d6eacc --- /dev/null +++ b/internal/channelz/server.go @@ -0,0 +1,119 @@ +/* + * + * Copyright 2024 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package channelz + +import ( + "fmt" + "sync/atomic" +) + +// Server is the channelz representation of a server. +type Server struct { + Entity + ID int64 + RefName string + + ServerMetrics ServerMetrics + + closeCalled bool + sockets map[int64]string + listenSockets map[int64]string + cm *channelMap +} + +// ServerMetrics defines a struct containing metrics for servers. +type ServerMetrics struct { + // The number of incoming calls started on the server. + CallsStarted atomic.Int64 + // The number of incoming calls that have completed with an OK status. + CallsSucceeded atomic.Int64 + // The number of incoming calls that have a completed with a non-OK status. + CallsFailed atomic.Int64 + // The last time a call was started on the server. + LastCallStartedTimestamp atomic.Int64 +} + +// NewServerMetricsForTesting returns an initialized ServerMetrics. +func NewServerMetricsForTesting(started, succeeded, failed, timestamp int64) *ServerMetrics { + sm := &ServerMetrics{} + sm.CallsStarted.Store(started) + sm.CallsSucceeded.Store(succeeded) + sm.CallsFailed.Store(failed) + sm.LastCallStartedTimestamp.Store(timestamp) + return sm +} + +func (sm *ServerMetrics) CopyFrom(o *ServerMetrics) { + sm.CallsStarted.Store(o.CallsStarted.Load()) + sm.CallsSucceeded.Store(o.CallsSucceeded.Load()) + sm.CallsFailed.Store(o.CallsFailed.Load()) + sm.LastCallStartedTimestamp.Store(o.LastCallStartedTimestamp.Load()) +} + +// ListenSockets returns the listening sockets for s. +func (s *Server) ListenSockets() map[int64]string { + db.mu.RLock() + defer db.mu.RUnlock() + return copyMap(s.listenSockets) +} + +// String returns a printable description of s. +func (s *Server) String() string { + return fmt.Sprintf("Server #%d", s.ID) +} + +func (s *Server) id() int64 { + return s.ID +} + +func (s *Server) addChild(id int64, e entry) { + switch v := e.(type) { + case *Socket: + switch v.SocketType { + case SocketTypeNormal: + s.sockets[id] = v.RefName + case SocketTypeListen: + s.listenSockets[id] = v.RefName + } + default: + logger.Errorf("cannot add a child (id = %d) of type %T to a server", id, e) + } +} + +func (s *Server) deleteChild(id int64) { + delete(s.sockets, id) + delete(s.listenSockets, id) + s.deleteSelfIfReady() +} + +func (s *Server) triggerDelete() { + s.closeCalled = true + s.deleteSelfIfReady() +} + +func (s *Server) deleteSelfIfReady() { + if !s.closeCalled || len(s.sockets)+len(s.listenSockets) != 0 { + return + } + s.cm.deleteEntry(s.ID) +} + +func (s *Server) getParentID() int64 { + return 0 +} diff --git a/internal/channelz/socket.go b/internal/channelz/socket.go new file mode 100644 index 000000000000..fa64834b25d0 --- /dev/null +++ b/internal/channelz/socket.go @@ -0,0 +1,130 @@ +/* + * + * Copyright 2024 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package channelz + +import ( + "fmt" + "net" + "sync/atomic" + + "google.golang.org/grpc/credentials" +) + +// SocketMetrics defines the struct that the implementor of Socket interface +// should return from ChannelzMetric(). +type SocketMetrics struct { + // The number of streams that have been started. + StreamsStarted atomic.Int64 + // The number of streams that have ended successfully: + // On client side, receiving frame with eos bit set. + // On server side, sending frame with eos bit set. + StreamsSucceeded atomic.Int64 + // The number of streams that have ended unsuccessfully: + // On client side, termination without receiving frame with eos bit set. + // On server side, termination without sending frame with eos bit set. + StreamsFailed atomic.Int64 + // The number of messages successfully sent on this socket. + MessagesSent atomic.Int64 + MessagesReceived atomic.Int64 + // The number of keep alives sent. This is typically implemented with HTTP/2 + // ping messages. + KeepAlivesSent atomic.Int64 + // The last time a stream was created by this endpoint. Usually unset for + // servers. + LastLocalStreamCreatedTimestamp atomic.Int64 + // The last time a stream was created by the remote endpoint. Usually unset + // for clients. + LastRemoteStreamCreatedTimestamp atomic.Int64 + // The last time a message was sent by this endpoint. + LastMessageSentTimestamp atomic.Int64 + // The last time a message was received by this endpoint. + LastMessageReceivedTimestamp atomic.Int64 +} + +// EphemeralSocketMetrics are metrics that change rapidly and are tracked +// outside of channelz. +type EphemeralSocketMetrics struct { + // The amount of window, granted to the local endpoint by the remote endpoint. + // This may be slightly out of date due to network latency. This does NOT + // include stream level or TCP level flow control info. + LocalFlowControlWindow int64 + // The amount of window, granted to the remote endpoint by the local endpoint. + // This may be slightly out of date due to network latency. This does NOT + // include stream level or TCP level flow control info. + RemoteFlowControlWindow int64 +} + +type SocketType string + +const ( + SocketTypeNormal = "NormalSocket" + SocketTypeListen = "ListenSocket" +) + +type Socket struct { + Entity + SocketType SocketType + ID int64 + Parent Entity + cm *channelMap + SocketMetrics SocketMetrics + EphemeralMetrics func() *EphemeralSocketMetrics + + RefName string + // The locally bound address. Immutable. + LocalAddr net.Addr + // The remote bound address. May be absent. Immutable. + RemoteAddr net.Addr + // Optional, represents the name of the remote endpoint, if different than + // the original target name. Immutable. + RemoteName string + // Immutable. + SocketOptions *SocketOptionData + // Immutable. + Security credentials.ChannelzSecurityValue +} + +func (ls *Socket) String() string { + return fmt.Sprintf("%s %s #%d", ls.Parent, ls.SocketType, ls.ID) +} + +func (ls *Socket) id() int64 { + return ls.ID +} + +func (ls *Socket) addChild(id int64, e entry) { + logger.Errorf("cannot add a child (id = %d) of type %T to a listen socket", id, e) +} + +func (ls *Socket) deleteChild(id int64) { + logger.Errorf("cannot delete a child (id = %d) from a listen socket", id) +} + +func (ls *Socket) triggerDelete() { + ls.cm.deleteEntry(ls.ID) + ls.Parent.(entry).deleteChild(ls.ID) +} + +func (ls *Socket) deleteSelfIfReady() { + logger.Errorf("cannot call deleteSelfIfReady on a listen socket") +} + +func (ls *Socket) getParentID() int64 { + return ls.Parent.id() +} diff --git a/internal/channelz/subchannel.go b/internal/channelz/subchannel.go new file mode 100644 index 000000000000..3b88e4cba8e1 --- /dev/null +++ b/internal/channelz/subchannel.go @@ -0,0 +1,151 @@ +/* + * + * Copyright 2024 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package channelz + +import ( + "fmt" + "sync/atomic" +) + +// SubChannel is the channelz representation of a subchannel. +type SubChannel struct { + Entity + // ID is the channelz id of this subchannel. + ID int64 + // RefName is the human readable reference string of this subchannel. + RefName string + closeCalled bool + sockets map[int64]string + parent *Channel + trace *ChannelTrace + traceRefCount int32 + + ChannelMetrics ChannelMetrics +} + +func (sc *SubChannel) String() string { + return fmt.Sprintf("%s SubChannel #%d", sc.parent, sc.ID) +} + +func (sc *SubChannel) id() int64 { + return sc.ID +} + +func (sc *SubChannel) Sockets() map[int64]string { + db.mu.RLock() + defer db.mu.RUnlock() + return copyMap(sc.sockets) +} + +func (sc *SubChannel) Trace() *ChannelTrace { + db.mu.RLock() + defer db.mu.RUnlock() + return sc.trace.copy() +} + +func (sc *SubChannel) addChild(id int64, e entry) { + if v, ok := e.(*Socket); ok && v.SocketType == SocketTypeNormal { + sc.sockets[id] = v.RefName + } else { + logger.Errorf("cannot add a child (id = %d) of type %T to a subChannel", id, e) + } +} + +func (sc *SubChannel) deleteChild(id int64) { + delete(sc.sockets, id) + sc.deleteSelfIfReady() +} + +func (sc *SubChannel) triggerDelete() { + sc.closeCalled = true + sc.deleteSelfIfReady() +} + +func (sc *SubChannel) getParentID() int64 { + return sc.parent.ID +} + +// deleteSelfFromTree tries to delete the subchannel from the channelz entry relation tree, which +// means deleting the subchannel reference from its parent's child list. +// +// In order for a subchannel to be deleted from the tree, it must meet the criteria that, removal of +// the corresponding grpc object has been invoked, and the subchannel does not have any children left. +// +// The returned boolean value indicates whether the channel has been successfully deleted from tree. +func (sc *SubChannel) deleteSelfFromTree() (deleted bool) { + if !sc.closeCalled || len(sc.sockets) != 0 { + return false + } + sc.parent.deleteChild(sc.ID) + return true +} + +// deleteSelfFromMap checks whether it is valid to delete the subchannel from the map, which means +// deleting the subchannel from channelz's tracking entirely. Users can no longer use id to query +// the subchannel, and its memory will be garbage collected. +// +// The trace reference count of the subchannel must be 0 in order to be deleted from the map. This is +// specified in the channel tracing gRFC that as long as some other trace has reference to an entity, +// the trace of the referenced entity must not be deleted. In order to release the resource allocated +// by grpc, the reference to the grpc object is reset to a dummy object. +// +// deleteSelfFromMap must be called after deleteSelfFromTree returns true. +// +// It returns a bool to indicate whether the channel can be safely deleted from map. +func (sc *SubChannel) deleteSelfFromMap() (delete bool) { + return sc.getTraceRefCount() == 0 +} + +// deleteSelfIfReady tries to delete the subchannel itself from the channelz database. +// The delete process includes two steps: +// 1. delete the subchannel from the entry relation tree, i.e. delete the subchannel reference from +// its parent's child list. +// 2. delete the subchannel from the map, i.e. delete the subchannel entirely from channelz. Lookup +// by id will return entry not found error. +func (sc *SubChannel) deleteSelfIfReady() { + if !sc.deleteSelfFromTree() { + return + } + if !sc.deleteSelfFromMap() { + return + } + db.deleteEntry(sc.ID) + sc.trace.clear() +} + +func (sc *SubChannel) getChannelTrace() *ChannelTrace { + return sc.trace +} + +func (sc *SubChannel) incrTraceRefCount() { + atomic.AddInt32(&sc.traceRefCount, 1) +} + +func (sc *SubChannel) decrTraceRefCount() { + atomic.AddInt32(&sc.traceRefCount, -1) +} + +func (sc *SubChannel) getTraceRefCount() int { + i := atomic.LoadInt32(&sc.traceRefCount) + return int(i) +} + +func (sc *SubChannel) getRefName() string { + return sc.RefName +} diff --git a/internal/channelz/types_linux.go b/internal/channelz/syscall_linux.go similarity index 83% rename from internal/channelz/types_linux.go rename to internal/channelz/syscall_linux.go index 1b1c4cce34a9..5ac73ff83396 100644 --- a/internal/channelz/types_linux.go +++ b/internal/channelz/syscall_linux.go @@ -49,3 +49,17 @@ func (s *SocketOptionData) Getsockopt(fd uintptr) { s.TCPInfo = v } } + +// GetSocketOption gets the socket option info of the conn. +func GetSocketOption(socket any) *SocketOptionData { + c, ok := socket.(syscall.Conn) + if !ok { + return nil + } + data := &SocketOptionData{} + if rawConn, err := c.SyscallConn(); err == nil { + rawConn.Control(data.Getsockopt) + return data + } + return nil +} diff --git a/internal/channelz/types_nonlinux.go b/internal/channelz/syscall_nonlinux.go similarity index 90% rename from internal/channelz/types_nonlinux.go rename to internal/channelz/syscall_nonlinux.go index 8b06eed1ab8b..d1ed8df6a518 100644 --- a/internal/channelz/types_nonlinux.go +++ b/internal/channelz/syscall_nonlinux.go @@ -1,5 +1,4 @@ //go:build !linux -// +build !linux /* * @@ -41,3 +40,8 @@ func (s *SocketOptionData) Getsockopt(fd uintptr) { logger.Warning("Channelz: socket options are not supported on non-linux environments") }) } + +// GetSocketOption gets the socket option info of the conn. +func GetSocketOption(c any) *SocketOptionData { + return nil +} diff --git a/internal/channelz/util_test.go b/internal/channelz/syscall_test.go similarity index 99% rename from internal/channelz/util_test.go rename to internal/channelz/syscall_test.go index da0fd30b1905..cd0e1fcb04b2 100644 --- a/internal/channelz/util_test.go +++ b/internal/channelz/syscall_test.go @@ -1,5 +1,4 @@ //go:build linux -// +build linux /* * diff --git a/internal/channelz/trace.go b/internal/channelz/trace.go new file mode 100644 index 000000000000..36b867403230 --- /dev/null +++ b/internal/channelz/trace.go @@ -0,0 +1,204 @@ +/* + * + * Copyright 2018 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package channelz + +import ( + "fmt" + "sync" + "sync/atomic" + "time" + + "google.golang.org/grpc/grpclog" +) + +const ( + defaultMaxTraceEntry int32 = 30 +) + +var maxTraceEntry = defaultMaxTraceEntry + +// SetMaxTraceEntry sets maximum number of trace entries per entity (i.e. +// channel/subchannel). Setting it to 0 will disable channel tracing. +func SetMaxTraceEntry(i int32) { + atomic.StoreInt32(&maxTraceEntry, i) +} + +// ResetMaxTraceEntryToDefault resets the maximum number of trace entries per +// entity to default. +func ResetMaxTraceEntryToDefault() { + atomic.StoreInt32(&maxTraceEntry, defaultMaxTraceEntry) +} + +func getMaxTraceEntry() int { + i := atomic.LoadInt32(&maxTraceEntry) + return int(i) +} + +// traceEvent is an internal representation of a single trace event +type traceEvent struct { + // Desc is a simple description of the trace event. + Desc string + // Severity states the severity of this trace event. + Severity Severity + // Timestamp is the event time. + Timestamp time.Time + // RefID is the id of the entity that gets referenced in the event. RefID is 0 if no other entity is + // involved in this event. + // e.g. SubChannel (id: 4[]) Created. --> RefID = 4, RefName = "" (inside []) + RefID int64 + // RefName is the reference name for the entity that gets referenced in the event. + RefName string + // RefType indicates the referenced entity type, i.e Channel or SubChannel. + RefType RefChannelType +} + +// TraceEvent is what the caller of AddTraceEvent should provide to describe the +// event to be added to the channel trace. +// +// The Parent field is optional. It is used for an event that will be recorded +// in the entity's parent trace. +type TraceEvent struct { + Desc string + Severity Severity + Parent *TraceEvent +} + +type ChannelTrace struct { + cm *channelMap + clearCalled bool + CreationTime time.Time + EventNum int64 + mu sync.Mutex + Events []*traceEvent +} + +func (c *ChannelTrace) copy() *ChannelTrace { + return &ChannelTrace{ + CreationTime: c.CreationTime, + EventNum: c.EventNum, + Events: append(([]*traceEvent)(nil), c.Events...), + } +} + +func (c *ChannelTrace) append(e *traceEvent) { + c.mu.Lock() + if len(c.Events) == getMaxTraceEntry() { + del := c.Events[0] + c.Events = c.Events[1:] + if del.RefID != 0 { + // start recursive cleanup in a goroutine to not block the call originated from grpc. + go func() { + // need to acquire c.cm.mu lock to call the unlocked attemptCleanup func. + c.cm.mu.Lock() + c.cm.decrTraceRefCount(del.RefID) + c.cm.mu.Unlock() + }() + } + } + e.Timestamp = time.Now() + c.Events = append(c.Events, e) + c.EventNum++ + c.mu.Unlock() +} + +func (c *ChannelTrace) clear() { + if c.clearCalled { + return + } + c.clearCalled = true + c.mu.Lock() + for _, e := range c.Events { + if e.RefID != 0 { + // caller should have already held the c.cm.mu lock. + c.cm.decrTraceRefCount(e.RefID) + } + } + c.mu.Unlock() +} + +// Severity is the severity level of a trace event. +// The canonical enumeration of all valid values is here: +// https://github.com/grpc/grpc-proto/blob/9b13d199cc0d4703c7ea26c9c330ba695866eb23/grpc/channelz/v1/channelz.proto#L126. +type Severity int + +const ( + // CtUnknown indicates unknown severity of a trace event. + CtUnknown Severity = iota + // CtInfo indicates info level severity of a trace event. + CtInfo + // CtWarning indicates warning level severity of a trace event. + CtWarning + // CtError indicates error level severity of a trace event. + CtError +) + +// RefChannelType is the type of the entity being referenced in a trace event. +type RefChannelType int + +const ( + // RefUnknown indicates an unknown entity type, the zero value for this type. + RefUnknown RefChannelType = iota + // RefChannel indicates the referenced entity is a Channel. + RefChannel + // RefSubChannel indicates the referenced entity is a SubChannel. + RefSubChannel + // RefServer indicates the referenced entity is a Server. + RefServer + // RefListenSocket indicates the referenced entity is a ListenSocket. + RefListenSocket + // RefNormalSocket indicates the referenced entity is a NormalSocket. + RefNormalSocket +) + +var refChannelTypeToString = map[RefChannelType]string{ + RefUnknown: "Unknown", + RefChannel: "Channel", + RefSubChannel: "SubChannel", + RefServer: "Server", + RefListenSocket: "ListenSocket", + RefNormalSocket: "NormalSocket", +} + +func (r RefChannelType) String() string { + return refChannelTypeToString[r] +} + +// AddTraceEvent adds trace related to the entity with specified id, using the +// provided TraceEventDesc. +// +// If channelz is not turned ON, this will simply log the event descriptions. +func AddTraceEvent(l grpclog.DepthLoggerV2, e Entity, depth int, desc *TraceEvent) { + // Log only the trace description associated with the bottom most entity. + d := fmt.Sprintf("[%s]%s", e, desc.Desc) + switch desc.Severity { + case CtUnknown, CtInfo: + l.InfoDepth(depth+1, d) + case CtWarning: + l.WarningDepth(depth+1, d) + case CtError: + l.ErrorDepth(depth+1, d) + } + + if getMaxTraceEntry() == 0 { + return + } + if IsOn() { + db.traceEvent(e.id(), desc) + } +} diff --git a/internal/channelz/types.go b/internal/channelz/types.go deleted file mode 100644 index 1d4020f53795..000000000000 --- a/internal/channelz/types.go +++ /dev/null @@ -1,727 +0,0 @@ -/* - * - * Copyright 2018 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package channelz - -import ( - "net" - "sync" - "sync/atomic" - "time" - - "google.golang.org/grpc/connectivity" - "google.golang.org/grpc/credentials" -) - -// entry represents a node in the channelz database. -type entry interface { - // addChild adds a child e, whose channelz id is id to child list - addChild(id int64, e entry) - // deleteChild deletes a child with channelz id to be id from child list - deleteChild(id int64) - // triggerDelete tries to delete self from channelz database. However, if child - // list is not empty, then deletion from the database is on hold until the last - // child is deleted from database. - triggerDelete() - // deleteSelfIfReady check whether triggerDelete() has been called before, and whether child - // list is now empty. If both conditions are met, then delete self from database. - deleteSelfIfReady() - // getParentID returns parent ID of the entry. 0 value parent ID means no parent. - getParentID() int64 -} - -// dummyEntry is a fake entry to handle entry not found case. -type dummyEntry struct { - idNotFound int64 -} - -func (d *dummyEntry) addChild(id int64, e entry) { - // Note: It is possible for a normal program to reach here under race condition. - // For example, there could be a race between ClientConn.Close() info being propagated - // to addrConn and http2Client. ClientConn.Close() cancel the context and result - // in http2Client to error. The error info is then caught by transport monitor - // and before addrConn.tearDown() is called in side ClientConn.Close(). Therefore, - // the addrConn will create a new transport. And when registering the new transport in - // channelz, its parent addrConn could have already been torn down and deleted - // from channelz tracking, and thus reach the code here. - logger.Infof("attempt to add child of type %T with id %d to a parent (id=%d) that doesn't currently exist", e, id, d.idNotFound) -} - -func (d *dummyEntry) deleteChild(id int64) { - // It is possible for a normal program to reach here under race condition. - // Refer to the example described in addChild(). - logger.Infof("attempt to delete child with id %d from a parent (id=%d) that doesn't currently exist", id, d.idNotFound) -} - -func (d *dummyEntry) triggerDelete() { - logger.Warningf("attempt to delete an entry (id=%d) that doesn't currently exist", d.idNotFound) -} - -func (*dummyEntry) deleteSelfIfReady() { - // code should not reach here. deleteSelfIfReady is always called on an existing entry. -} - -func (*dummyEntry) getParentID() int64 { - return 0 -} - -// ChannelMetric defines the info channelz provides for a specific Channel, which -// includes ChannelInternalMetric and channelz-specific data, such as channelz id, -// child list, etc. -type ChannelMetric struct { - // ID is the channelz id of this channel. - ID int64 - // RefName is the human readable reference string of this channel. - RefName string - // ChannelData contains channel internal metric reported by the channel through - // ChannelzMetric(). - ChannelData *ChannelInternalMetric - // NestedChans tracks the nested channel type children of this channel in the format of - // a map from nested channel channelz id to corresponding reference string. - NestedChans map[int64]string - // SubChans tracks the subchannel type children of this channel in the format of a - // map from subchannel channelz id to corresponding reference string. - SubChans map[int64]string - // Sockets tracks the socket type children of this channel in the format of a map - // from socket channelz id to corresponding reference string. - // Note current grpc implementation doesn't allow channel having sockets directly, - // therefore, this is field is unused. - Sockets map[int64]string - // Trace contains the most recent traced events. - Trace *ChannelTrace -} - -// SubChannelMetric defines the info channelz provides for a specific SubChannel, -// which includes ChannelInternalMetric and channelz-specific data, such as -// channelz id, child list, etc. -type SubChannelMetric struct { - // ID is the channelz id of this subchannel. - ID int64 - // RefName is the human readable reference string of this subchannel. - RefName string - // ChannelData contains subchannel internal metric reported by the subchannel - // through ChannelzMetric(). - ChannelData *ChannelInternalMetric - // NestedChans tracks the nested channel type children of this subchannel in the format of - // a map from nested channel channelz id to corresponding reference string. - // Note current grpc implementation doesn't allow subchannel to have nested channels - // as children, therefore, this field is unused. - NestedChans map[int64]string - // SubChans tracks the subchannel type children of this subchannel in the format of a - // map from subchannel channelz id to corresponding reference string. - // Note current grpc implementation doesn't allow subchannel to have subchannels - // as children, therefore, this field is unused. - SubChans map[int64]string - // Sockets tracks the socket type children of this subchannel in the format of a map - // from socket channelz id to corresponding reference string. - Sockets map[int64]string - // Trace contains the most recent traced events. - Trace *ChannelTrace -} - -// ChannelInternalMetric defines the struct that the implementor of Channel interface -// should return from ChannelzMetric(). -type ChannelInternalMetric struct { - // current connectivity state of the channel. - State connectivity.State - // The target this channel originally tried to connect to. May be absent - Target string - // The number of calls started on the channel. - CallsStarted int64 - // The number of calls that have completed with an OK status. - CallsSucceeded int64 - // The number of calls that have a completed with a non-OK status. - CallsFailed int64 - // The last time a call was started on the channel. - LastCallStartedTimestamp time.Time -} - -// ChannelTrace stores traced events on a channel/subchannel and related info. -type ChannelTrace struct { - // EventNum is the number of events that ever got traced (i.e. including those that have been deleted) - EventNum int64 - // CreationTime is the creation time of the trace. - CreationTime time.Time - // Events stores the most recent trace events (up to $maxTraceEntry, newer event will overwrite the - // oldest one) - Events []*TraceEvent -} - -// TraceEvent represent a single trace event -type TraceEvent struct { - // Desc is a simple description of the trace event. - Desc string - // Severity states the severity of this trace event. - Severity Severity - // Timestamp is the event time. - Timestamp time.Time - // RefID is the id of the entity that gets referenced in the event. RefID is 0 if no other entity is - // involved in this event. - // e.g. SubChannel (id: 4[]) Created. --> RefID = 4, RefName = "" (inside []) - RefID int64 - // RefName is the reference name for the entity that gets referenced in the event. - RefName string - // RefType indicates the referenced entity type, i.e Channel or SubChannel. - RefType RefChannelType -} - -// Channel is the interface that should be satisfied in order to be tracked by -// channelz as Channel or SubChannel. -type Channel interface { - ChannelzMetric() *ChannelInternalMetric -} - -type dummyChannel struct{} - -func (d *dummyChannel) ChannelzMetric() *ChannelInternalMetric { - return &ChannelInternalMetric{} -} - -type channel struct { - refName string - c Channel - closeCalled bool - nestedChans map[int64]string - subChans map[int64]string - id int64 - pid int64 - cm *channelMap - trace *channelTrace - // traceRefCount is the number of trace events that reference this channel. - // Non-zero traceRefCount means the trace of this channel cannot be deleted. - traceRefCount int32 -} - -func (c *channel) addChild(id int64, e entry) { - switch v := e.(type) { - case *subChannel: - c.subChans[id] = v.refName - case *channel: - c.nestedChans[id] = v.refName - default: - logger.Errorf("cannot add a child (id = %d) of type %T to a channel", id, e) - } -} - -func (c *channel) deleteChild(id int64) { - delete(c.subChans, id) - delete(c.nestedChans, id) - c.deleteSelfIfReady() -} - -func (c *channel) triggerDelete() { - c.closeCalled = true - c.deleteSelfIfReady() -} - -func (c *channel) getParentID() int64 { - return c.pid -} - -// deleteSelfFromTree tries to delete the channel from the channelz entry relation tree, which means -// deleting the channel reference from its parent's child list. -// -// In order for a channel to be deleted from the tree, it must meet the criteria that, removal of the -// corresponding grpc object has been invoked, and the channel does not have any children left. -// -// The returned boolean value indicates whether the channel has been successfully deleted from tree. -func (c *channel) deleteSelfFromTree() (deleted bool) { - if !c.closeCalled || len(c.subChans)+len(c.nestedChans) != 0 { - return false - } - // not top channel - if c.pid != 0 { - c.cm.findEntry(c.pid).deleteChild(c.id) - } - return true -} - -// deleteSelfFromMap checks whether it is valid to delete the channel from the map, which means -// deleting the channel from channelz's tracking entirely. Users can no longer use id to query the -// channel, and its memory will be garbage collected. -// -// The trace reference count of the channel must be 0 in order to be deleted from the map. This is -// specified in the channel tracing gRFC that as long as some other trace has reference to an entity, -// the trace of the referenced entity must not be deleted. In order to release the resource allocated -// by grpc, the reference to the grpc object is reset to a dummy object. -// -// deleteSelfFromMap must be called after deleteSelfFromTree returns true. -// -// It returns a bool to indicate whether the channel can be safely deleted from map. -func (c *channel) deleteSelfFromMap() (delete bool) { - if c.getTraceRefCount() != 0 { - c.c = &dummyChannel{} - return false - } - return true -} - -// deleteSelfIfReady tries to delete the channel itself from the channelz database. -// The delete process includes two steps: -// 1. delete the channel from the entry relation tree, i.e. delete the channel reference from its -// parent's child list. -// 2. delete the channel from the map, i.e. delete the channel entirely from channelz. Lookup by id -// will return entry not found error. -func (c *channel) deleteSelfIfReady() { - if !c.deleteSelfFromTree() { - return - } - if !c.deleteSelfFromMap() { - return - } - c.cm.deleteEntry(c.id) - c.trace.clear() -} - -func (c *channel) getChannelTrace() *channelTrace { - return c.trace -} - -func (c *channel) incrTraceRefCount() { - atomic.AddInt32(&c.traceRefCount, 1) -} - -func (c *channel) decrTraceRefCount() { - atomic.AddInt32(&c.traceRefCount, -1) -} - -func (c *channel) getTraceRefCount() int { - i := atomic.LoadInt32(&c.traceRefCount) - return int(i) -} - -func (c *channel) getRefName() string { - return c.refName -} - -type subChannel struct { - refName string - c Channel - closeCalled bool - sockets map[int64]string - id int64 - pid int64 - cm *channelMap - trace *channelTrace - traceRefCount int32 -} - -func (sc *subChannel) addChild(id int64, e entry) { - if v, ok := e.(*normalSocket); ok { - sc.sockets[id] = v.refName - } else { - logger.Errorf("cannot add a child (id = %d) of type %T to a subChannel", id, e) - } -} - -func (sc *subChannel) deleteChild(id int64) { - delete(sc.sockets, id) - sc.deleteSelfIfReady() -} - -func (sc *subChannel) triggerDelete() { - sc.closeCalled = true - sc.deleteSelfIfReady() -} - -func (sc *subChannel) getParentID() int64 { - return sc.pid -} - -// deleteSelfFromTree tries to delete the subchannel from the channelz entry relation tree, which -// means deleting the subchannel reference from its parent's child list. -// -// In order for a subchannel to be deleted from the tree, it must meet the criteria that, removal of -// the corresponding grpc object has been invoked, and the subchannel does not have any children left. -// -// The returned boolean value indicates whether the channel has been successfully deleted from tree. -func (sc *subChannel) deleteSelfFromTree() (deleted bool) { - if !sc.closeCalled || len(sc.sockets) != 0 { - return false - } - sc.cm.findEntry(sc.pid).deleteChild(sc.id) - return true -} - -// deleteSelfFromMap checks whether it is valid to delete the subchannel from the map, which means -// deleting the subchannel from channelz's tracking entirely. Users can no longer use id to query -// the subchannel, and its memory will be garbage collected. -// -// The trace reference count of the subchannel must be 0 in order to be deleted from the map. This is -// specified in the channel tracing gRFC that as long as some other trace has reference to an entity, -// the trace of the referenced entity must not be deleted. In order to release the resource allocated -// by grpc, the reference to the grpc object is reset to a dummy object. -// -// deleteSelfFromMap must be called after deleteSelfFromTree returns true. -// -// It returns a bool to indicate whether the channel can be safely deleted from map. -func (sc *subChannel) deleteSelfFromMap() (delete bool) { - if sc.getTraceRefCount() != 0 { - // free the grpc struct (i.e. addrConn) - sc.c = &dummyChannel{} - return false - } - return true -} - -// deleteSelfIfReady tries to delete the subchannel itself from the channelz database. -// The delete process includes two steps: -// 1. delete the subchannel from the entry relation tree, i.e. delete the subchannel reference from -// its parent's child list. -// 2. delete the subchannel from the map, i.e. delete the subchannel entirely from channelz. Lookup -// by id will return entry not found error. -func (sc *subChannel) deleteSelfIfReady() { - if !sc.deleteSelfFromTree() { - return - } - if !sc.deleteSelfFromMap() { - return - } - sc.cm.deleteEntry(sc.id) - sc.trace.clear() -} - -func (sc *subChannel) getChannelTrace() *channelTrace { - return sc.trace -} - -func (sc *subChannel) incrTraceRefCount() { - atomic.AddInt32(&sc.traceRefCount, 1) -} - -func (sc *subChannel) decrTraceRefCount() { - atomic.AddInt32(&sc.traceRefCount, -1) -} - -func (sc *subChannel) getTraceRefCount() int { - i := atomic.LoadInt32(&sc.traceRefCount) - return int(i) -} - -func (sc *subChannel) getRefName() string { - return sc.refName -} - -// SocketMetric defines the info channelz provides for a specific Socket, which -// includes SocketInternalMetric and channelz-specific data, such as channelz id, etc. -type SocketMetric struct { - // ID is the channelz id of this socket. - ID int64 - // RefName is the human readable reference string of this socket. - RefName string - // SocketData contains socket internal metric reported by the socket through - // ChannelzMetric(). - SocketData *SocketInternalMetric -} - -// SocketInternalMetric defines the struct that the implementor of Socket interface -// should return from ChannelzMetric(). -type SocketInternalMetric struct { - // The number of streams that have been started. - StreamsStarted int64 - // The number of streams that have ended successfully: - // On client side, receiving frame with eos bit set. - // On server side, sending frame with eos bit set. - StreamsSucceeded int64 - // The number of streams that have ended unsuccessfully: - // On client side, termination without receiving frame with eos bit set. - // On server side, termination without sending frame with eos bit set. - StreamsFailed int64 - // The number of messages successfully sent on this socket. - MessagesSent int64 - MessagesReceived int64 - // The number of keep alives sent. This is typically implemented with HTTP/2 - // ping messages. - KeepAlivesSent int64 - // The last time a stream was created by this endpoint. Usually unset for - // servers. - LastLocalStreamCreatedTimestamp time.Time - // The last time a stream was created by the remote endpoint. Usually unset - // for clients. - LastRemoteStreamCreatedTimestamp time.Time - // The last time a message was sent by this endpoint. - LastMessageSentTimestamp time.Time - // The last time a message was received by this endpoint. - LastMessageReceivedTimestamp time.Time - // The amount of window, granted to the local endpoint by the remote endpoint. - // This may be slightly out of date due to network latency. This does NOT - // include stream level or TCP level flow control info. - LocalFlowControlWindow int64 - // The amount of window, granted to the remote endpoint by the local endpoint. - // This may be slightly out of date due to network latency. This does NOT - // include stream level or TCP level flow control info. - RemoteFlowControlWindow int64 - // The locally bound address. - LocalAddr net.Addr - // The remote bound address. May be absent. - RemoteAddr net.Addr - // Optional, represents the name of the remote endpoint, if different than - // the original target name. - RemoteName string - SocketOptions *SocketOptionData - Security credentials.ChannelzSecurityValue -} - -// Socket is the interface that should be satisfied in order to be tracked by -// channelz as Socket. -type Socket interface { - ChannelzMetric() *SocketInternalMetric -} - -type listenSocket struct { - refName string - s Socket - id int64 - pid int64 - cm *channelMap -} - -func (ls *listenSocket) addChild(id int64, e entry) { - logger.Errorf("cannot add a child (id = %d) of type %T to a listen socket", id, e) -} - -func (ls *listenSocket) deleteChild(id int64) { - logger.Errorf("cannot delete a child (id = %d) from a listen socket", id) -} - -func (ls *listenSocket) triggerDelete() { - ls.cm.deleteEntry(ls.id) - ls.cm.findEntry(ls.pid).deleteChild(ls.id) -} - -func (ls *listenSocket) deleteSelfIfReady() { - logger.Errorf("cannot call deleteSelfIfReady on a listen socket") -} - -func (ls *listenSocket) getParentID() int64 { - return ls.pid -} - -type normalSocket struct { - refName string - s Socket - id int64 - pid int64 - cm *channelMap -} - -func (ns *normalSocket) addChild(id int64, e entry) { - logger.Errorf("cannot add a child (id = %d) of type %T to a normal socket", id, e) -} - -func (ns *normalSocket) deleteChild(id int64) { - logger.Errorf("cannot delete a child (id = %d) from a normal socket", id) -} - -func (ns *normalSocket) triggerDelete() { - ns.cm.deleteEntry(ns.id) - ns.cm.findEntry(ns.pid).deleteChild(ns.id) -} - -func (ns *normalSocket) deleteSelfIfReady() { - logger.Errorf("cannot call deleteSelfIfReady on a normal socket") -} - -func (ns *normalSocket) getParentID() int64 { - return ns.pid -} - -// ServerMetric defines the info channelz provides for a specific Server, which -// includes ServerInternalMetric and channelz-specific data, such as channelz id, -// child list, etc. -type ServerMetric struct { - // ID is the channelz id of this server. - ID int64 - // RefName is the human readable reference string of this server. - RefName string - // ServerData contains server internal metric reported by the server through - // ChannelzMetric(). - ServerData *ServerInternalMetric - // ListenSockets tracks the listener socket type children of this server in the - // format of a map from socket channelz id to corresponding reference string. - ListenSockets map[int64]string -} - -// ServerInternalMetric defines the struct that the implementor of Server interface -// should return from ChannelzMetric(). -type ServerInternalMetric struct { - // The number of incoming calls started on the server. - CallsStarted int64 - // The number of incoming calls that have completed with an OK status. - CallsSucceeded int64 - // The number of incoming calls that have a completed with a non-OK status. - CallsFailed int64 - // The last time a call was started on the server. - LastCallStartedTimestamp time.Time -} - -// Server is the interface to be satisfied in order to be tracked by channelz as -// Server. -type Server interface { - ChannelzMetric() *ServerInternalMetric -} - -type server struct { - refName string - s Server - closeCalled bool - sockets map[int64]string - listenSockets map[int64]string - id int64 - cm *channelMap -} - -func (s *server) addChild(id int64, e entry) { - switch v := e.(type) { - case *normalSocket: - s.sockets[id] = v.refName - case *listenSocket: - s.listenSockets[id] = v.refName - default: - logger.Errorf("cannot add a child (id = %d) of type %T to a server", id, e) - } -} - -func (s *server) deleteChild(id int64) { - delete(s.sockets, id) - delete(s.listenSockets, id) - s.deleteSelfIfReady() -} - -func (s *server) triggerDelete() { - s.closeCalled = true - s.deleteSelfIfReady() -} - -func (s *server) deleteSelfIfReady() { - if !s.closeCalled || len(s.sockets)+len(s.listenSockets) != 0 { - return - } - s.cm.deleteEntry(s.id) -} - -func (s *server) getParentID() int64 { - return 0 -} - -type tracedChannel interface { - getChannelTrace() *channelTrace - incrTraceRefCount() - decrTraceRefCount() - getRefName() string -} - -type channelTrace struct { - cm *channelMap - clearCalled bool - createdTime time.Time - eventCount int64 - mu sync.Mutex - events []*TraceEvent -} - -func (c *channelTrace) append(e *TraceEvent) { - c.mu.Lock() - if len(c.events) == getMaxTraceEntry() { - del := c.events[0] - c.events = c.events[1:] - if del.RefID != 0 { - // start recursive cleanup in a goroutine to not block the call originated from grpc. - go func() { - // need to acquire c.cm.mu lock to call the unlocked attemptCleanup func. - c.cm.mu.Lock() - c.cm.decrTraceRefCount(del.RefID) - c.cm.mu.Unlock() - }() - } - } - e.Timestamp = time.Now() - c.events = append(c.events, e) - c.eventCount++ - c.mu.Unlock() -} - -func (c *channelTrace) clear() { - if c.clearCalled { - return - } - c.clearCalled = true - c.mu.Lock() - for _, e := range c.events { - if e.RefID != 0 { - // caller should have already held the c.cm.mu lock. - c.cm.decrTraceRefCount(e.RefID) - } - } - c.mu.Unlock() -} - -// Severity is the severity level of a trace event. -// The canonical enumeration of all valid values is here: -// https://github.com/grpc/grpc-proto/blob/9b13d199cc0d4703c7ea26c9c330ba695866eb23/grpc/channelz/v1/channelz.proto#L126. -type Severity int - -const ( - // CtUnknown indicates unknown severity of a trace event. - CtUnknown Severity = iota - // CtInfo indicates info level severity of a trace event. - CtInfo - // CtWarning indicates warning level severity of a trace event. - CtWarning - // CtError indicates error level severity of a trace event. - CtError -) - -// RefChannelType is the type of the entity being referenced in a trace event. -type RefChannelType int - -const ( - // RefUnknown indicates an unknown entity type, the zero value for this type. - RefUnknown RefChannelType = iota - // RefChannel indicates the referenced entity is a Channel. - RefChannel - // RefSubChannel indicates the referenced entity is a SubChannel. - RefSubChannel - // RefServer indicates the referenced entity is a Server. - RefServer - // RefListenSocket indicates the referenced entity is a ListenSocket. - RefListenSocket - // RefNormalSocket indicates the referenced entity is a NormalSocket. - RefNormalSocket -) - -var refChannelTypeToString = map[RefChannelType]string{ - RefUnknown: "Unknown", - RefChannel: "Channel", - RefSubChannel: "SubChannel", - RefServer: "Server", - RefListenSocket: "ListenSocket", - RefNormalSocket: "NormalSocket", -} - -func (r RefChannelType) String() string { - return refChannelTypeToString[r] -} - -func (c *channelTrace) dumpData() *ChannelTrace { - c.mu.Lock() - ct := &ChannelTrace{EventNum: c.eventCount, CreationTime: c.createdTime} - ct.Events = c.events[:len(c.events)] - c.mu.Unlock() - return ct -} diff --git a/internal/channelz/util_linux.go b/internal/channelz/util_linux.go deleted file mode 100644 index 98288c3f866f..000000000000 --- a/internal/channelz/util_linux.go +++ /dev/null @@ -1,37 +0,0 @@ -/* - * - * Copyright 2018 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package channelz - -import ( - "syscall" -) - -// GetSocketOption gets the socket option info of the conn. -func GetSocketOption(socket any) *SocketOptionData { - c, ok := socket.(syscall.Conn) - if !ok { - return nil - } - data := &SocketOptionData{} - if rawConn, err := c.SyscallConn(); err == nil { - rawConn.Control(data.Getsockopt) - return data - } - return nil -} diff --git a/internal/channelz/util_nonlinux.go b/internal/channelz/util_nonlinux.go deleted file mode 100644 index b5568b22e208..000000000000 --- a/internal/channelz/util_nonlinux.go +++ /dev/null @@ -1,27 +0,0 @@ -//go:build !linux -// +build !linux - -/* - * - * Copyright 2018 gRPC authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package channelz - -// GetSocketOption gets the socket option info of the conn. -func GetSocketOption(c any) *SocketOptionData { - return nil -} diff --git a/internal/idle/idle_e2e_test.go b/internal/idle/idle_e2e_test.go index da98c09420dd..dbd804ea76dd 100644 --- a/internal/idle/idle_e2e_test.go +++ b/internal/idle/idle_e2e_test.go @@ -74,11 +74,12 @@ func channelzTraceEventFound(ctx context.Context, wantDesc string) error { if l := len(tcs); l != 1 { return fmt.Errorf("when looking for channelz trace event with description %q, found %d top-level channels, want 1", wantDesc, l) } - if tcs[0].Trace == nil { + trace := tcs[0].Trace() + if trace == nil { return fmt.Errorf("when looking for channelz trace event with description %q, no trace events found for top-level channel", wantDesc) } - for _, e := range tcs[0].Trace.Events { + for _, e := range trace.Events { if strings.Contains(e.Desc, wantDesc) { return nil } diff --git a/internal/transport/http2_client.go b/internal/transport/http2_client.go index eff8799640c6..deba0c4d9ef4 100644 --- a/internal/transport/http2_client.go +++ b/internal/transport/http2_client.go @@ -140,9 +140,7 @@ type http2Client struct { // variable. kpDormant bool - // Fields below are for channelz metric collection. - channelzID *channelz.Identifier - czData *channelzData + channelz *channelz.Socket onClose func(GoAwayReason) @@ -319,6 +317,7 @@ func newHTTP2Client(connectCtx, ctx context.Context, addr resolver.Address, opts if opts.MaxHeaderListSize != nil { maxHeaderListSize = *opts.MaxHeaderListSize } + t := &http2Client{ ctx: ctx, ctxDone: ctx.Done(), // Cache Done chan. @@ -346,11 +345,25 @@ func newHTTP2Client(connectCtx, ctx context.Context, addr resolver.Address, opts maxConcurrentStreams: defaultMaxStreamsClient, streamQuota: defaultMaxStreamsClient, streamsQuotaAvailable: make(chan struct{}, 1), - czData: new(channelzData), keepaliveEnabled: keepaliveEnabled, bufferPool: newBufferPool(), onClose: onClose, } + var czSecurity credentials.ChannelzSecurityValue + if au, ok := authInfo.(credentials.ChannelzSecurityInfo); ok { + czSecurity = au.GetSecurityValue() + } + t.channelz = channelz.RegisterSocket( + &channelz.Socket{ + SocketType: channelz.SocketTypeNormal, + Parent: opts.ChannelzParent, + SocketMetrics: channelz.SocketMetrics{}, + EphemeralMetrics: t.socketMetrics, + LocalAddr: t.localAddr, + RemoteAddr: t.remoteAddr, + SocketOptions: channelz.GetSocketOption(t.conn), + Security: czSecurity, + }) t.logger = prefixLoggerForClientTransport(t) // Add peer information to the http2client context. t.ctx = peer.NewContext(t.ctx, t.getPeer()) @@ -381,10 +394,6 @@ func newHTTP2Client(connectCtx, ctx context.Context, addr resolver.Address, opts } sh.HandleConn(t.ctx, connBegin) } - t.channelzID, err = channelz.RegisterNormalSocket(t, opts.ChannelzParentID, fmt.Sprintf("%s -> %s", t.localAddr, t.remoteAddr)) - if err != nil { - return nil, err - } if t.keepaliveEnabled { t.kpDormancyCond = sync.NewCond(&t.mu) go t.keepalive() @@ -756,8 +765,8 @@ func (t *http2Client) NewStream(ctx context.Context, callHdr *CallHdr) (*Stream, return ErrConnClosing } if channelz.IsOn() { - atomic.AddInt64(&t.czData.streamsStarted, 1) - atomic.StoreInt64(&t.czData.lastStreamCreatedTime, time.Now().UnixNano()) + t.channelz.SocketMetrics.StreamsStarted.Add(1) + t.channelz.SocketMetrics.LastLocalStreamCreatedTimestamp.Store(time.Now().UnixNano()) } // If the keepalive goroutine has gone dormant, wake it up. if t.kpDormant { @@ -928,9 +937,9 @@ func (t *http2Client) closeStream(s *Stream, err error, rst bool, rstCode http2. t.mu.Unlock() if channelz.IsOn() { if eosReceived { - atomic.AddInt64(&t.czData.streamsSucceeded, 1) + t.channelz.SocketMetrics.StreamsSucceeded.Add(1) } else { - atomic.AddInt64(&t.czData.streamsFailed, 1) + t.channelz.SocketMetrics.StreamsFailed.Add(1) } } }, @@ -985,7 +994,7 @@ func (t *http2Client) Close(err error) { t.controlBuf.finish() t.cancel() t.conn.Close() - channelz.RemoveEntry(t.channelzID) + channelz.RemoveEntry(t.channelz.ID) // Append info about previous goaways if there were any, since this may be important // for understanding the root cause for this connection to be closed. _, goAwayDebugMessage := t.GetGoAwayReason() @@ -1708,7 +1717,7 @@ func (t *http2Client) keepalive() { // keepalive timer expired. In both cases, we need to send a ping. if !outstandingPing { if channelz.IsOn() { - atomic.AddInt64(&t.czData.kpCount, 1) + t.channelz.SocketMetrics.KeepAlivesSent.Add(1) } t.controlBuf.put(p) timeoutLeft = t.kp.Timeout @@ -1738,40 +1747,23 @@ func (t *http2Client) GoAway() <-chan struct{} { return t.goAway } -func (t *http2Client) ChannelzMetric() *channelz.SocketInternalMetric { - s := channelz.SocketInternalMetric{ - StreamsStarted: atomic.LoadInt64(&t.czData.streamsStarted), - StreamsSucceeded: atomic.LoadInt64(&t.czData.streamsSucceeded), - StreamsFailed: atomic.LoadInt64(&t.czData.streamsFailed), - MessagesSent: atomic.LoadInt64(&t.czData.msgSent), - MessagesReceived: atomic.LoadInt64(&t.czData.msgRecv), - KeepAlivesSent: atomic.LoadInt64(&t.czData.kpCount), - LastLocalStreamCreatedTimestamp: time.Unix(0, atomic.LoadInt64(&t.czData.lastStreamCreatedTime)), - LastMessageSentTimestamp: time.Unix(0, atomic.LoadInt64(&t.czData.lastMsgSentTime)), - LastMessageReceivedTimestamp: time.Unix(0, atomic.LoadInt64(&t.czData.lastMsgRecvTime)), - LocalFlowControlWindow: int64(t.fc.getSize()), - SocketOptions: channelz.GetSocketOption(t.conn), - LocalAddr: t.localAddr, - RemoteAddr: t.remoteAddr, - // RemoteName : - } - if au, ok := t.authInfo.(credentials.ChannelzSecurityInfo); ok { - s.Security = au.GetSecurityValue() - } - s.RemoteFlowControlWindow = t.getOutFlowWindow() - return &s +func (t *http2Client) socketMetrics() *channelz.EphemeralSocketMetrics { + return &channelz.EphemeralSocketMetrics{ + LocalFlowControlWindow: int64(t.fc.getSize()), + RemoteFlowControlWindow: t.getOutFlowWindow(), + } } func (t *http2Client) RemoteAddr() net.Addr { return t.remoteAddr } func (t *http2Client) IncrMsgSent() { - atomic.AddInt64(&t.czData.msgSent, 1) - atomic.StoreInt64(&t.czData.lastMsgSentTime, time.Now().UnixNano()) + t.channelz.SocketMetrics.MessagesSent.Add(1) + t.channelz.SocketMetrics.LastMessageSentTimestamp.Store(time.Now().UnixNano()) } func (t *http2Client) IncrMsgRecv() { - atomic.AddInt64(&t.czData.msgRecv, 1) - atomic.StoreInt64(&t.czData.lastMsgRecvTime, time.Now().UnixNano()) + t.channelz.SocketMetrics.MessagesReceived.Add(1) + t.channelz.SocketMetrics.LastMessageReceivedTimestamp.Store(time.Now().UnixNano()) } func (t *http2Client) getOutFlowWindow() int64 { diff --git a/internal/transport/http2_server.go b/internal/transport/http2_server.go index b13113c3e33b..d582e0471094 100644 --- a/internal/transport/http2_server.go +++ b/internal/transport/http2_server.go @@ -118,8 +118,7 @@ type http2Server struct { idle time.Time // Fields below are for channelz metric collection. - channelzID *channelz.Identifier - czData *channelzData + channelz *channelz.Socket bufferPool *bufferPool connectionID uint64 @@ -262,9 +261,24 @@ func NewServerTransport(conn net.Conn, config *ServerConfig) (_ ServerTransport, idle: time.Now(), kep: kep, initialWindowSize: iwz, - czData: new(channelzData), bufferPool: newBufferPool(), } + var czSecurity credentials.ChannelzSecurityValue + if au, ok := authInfo.(credentials.ChannelzSecurityInfo); ok { + czSecurity = au.GetSecurityValue() + } + t.channelz = channelz.RegisterSocket( + &channelz.Socket{ + SocketType: channelz.SocketTypeNormal, + Parent: config.ChannelzParent, + SocketMetrics: channelz.SocketMetrics{}, + EphemeralMetrics: t.socketMetrics, + LocalAddr: t.peer.LocalAddr, + RemoteAddr: t.peer.Addr, + SocketOptions: channelz.GetSocketOption(t.conn), + Security: czSecurity, + }, + ) t.logger = prefixLoggerForServerTransport(t) t.controlBuf = newControlBuffer(t.done) @@ -274,10 +288,6 @@ func NewServerTransport(conn net.Conn, config *ServerConfig) (_ ServerTransport, updateFlowControl: t.updateFlowControl, } } - t.channelzID, err = channelz.RegisterNormalSocket(t, config.ChannelzParentID, fmt.Sprintf("%s -> %s", t.peer.Addr, t.peer.LocalAddr)) - if err != nil { - return nil, err - } t.connectionID = atomic.AddUint64(&serverConnectionCounter, 1) t.framer.writer.Flush() @@ -594,8 +604,8 @@ func (t *http2Server) operateHeaders(ctx context.Context, frame *http2.MetaHeade } t.mu.Unlock() if channelz.IsOn() { - atomic.AddInt64(&t.czData.streamsStarted, 1) - atomic.StoreInt64(&t.czData.lastStreamCreatedTime, time.Now().UnixNano()) + t.channelz.SocketMetrics.StreamsStarted.Add(1) + t.channelz.SocketMetrics.LastRemoteStreamCreatedTimestamp.Store(time.Now().UnixNano()) } s.requestRead = func(n int) { t.adjustWindow(s, uint32(n)) @@ -1203,7 +1213,7 @@ func (t *http2Server) keepalive() { } if !outstandingPing { if channelz.IsOn() { - atomic.AddInt64(&t.czData.kpCount, 1) + t.channelz.SocketMetrics.KeepAlivesSent.Add(1) } t.controlBuf.put(p) kpTimeoutLeft = t.kp.Timeout @@ -1243,7 +1253,7 @@ func (t *http2Server) Close(err error) { if err := t.conn.Close(); err != nil && t.logger.V(logLevel) { t.logger.Infof("Error closing underlying net.Conn during Close: %v", err) } - channelz.RemoveEntry(t.channelzID) + channelz.RemoveEntry(t.channelz.ID) // Cancel all active streams. for _, s := range streams { s.cancel() @@ -1264,9 +1274,9 @@ func (t *http2Server) deleteStream(s *Stream, eosReceived bool) { if channelz.IsOn() { if eosReceived { - atomic.AddInt64(&t.czData.streamsSucceeded, 1) + t.channelz.SocketMetrics.StreamsSucceeded.Add(1) } else { - atomic.AddInt64(&t.czData.streamsFailed, 1) + t.channelz.SocketMetrics.StreamsFailed.Add(1) } } } @@ -1383,38 +1393,21 @@ func (t *http2Server) outgoingGoAwayHandler(g *goAway) (bool, error) { return false, nil } -func (t *http2Server) ChannelzMetric() *channelz.SocketInternalMetric { - s := channelz.SocketInternalMetric{ - StreamsStarted: atomic.LoadInt64(&t.czData.streamsStarted), - StreamsSucceeded: atomic.LoadInt64(&t.czData.streamsSucceeded), - StreamsFailed: atomic.LoadInt64(&t.czData.streamsFailed), - MessagesSent: atomic.LoadInt64(&t.czData.msgSent), - MessagesReceived: atomic.LoadInt64(&t.czData.msgRecv), - KeepAlivesSent: atomic.LoadInt64(&t.czData.kpCount), - LastRemoteStreamCreatedTimestamp: time.Unix(0, atomic.LoadInt64(&t.czData.lastStreamCreatedTime)), - LastMessageSentTimestamp: time.Unix(0, atomic.LoadInt64(&t.czData.lastMsgSentTime)), - LastMessageReceivedTimestamp: time.Unix(0, atomic.LoadInt64(&t.czData.lastMsgRecvTime)), - LocalFlowControlWindow: int64(t.fc.getSize()), - SocketOptions: channelz.GetSocketOption(t.conn), - LocalAddr: t.peer.LocalAddr, - RemoteAddr: t.peer.Addr, - // RemoteName : - } - if au, ok := t.peer.AuthInfo.(credentials.ChannelzSecurityInfo); ok { - s.Security = au.GetSecurityValue() - } - s.RemoteFlowControlWindow = t.getOutFlowWindow() - return &s +func (t *http2Server) socketMetrics() *channelz.EphemeralSocketMetrics { + return &channelz.EphemeralSocketMetrics{ + LocalFlowControlWindow: int64(t.fc.getSize()), + RemoteFlowControlWindow: t.getOutFlowWindow(), + } } func (t *http2Server) IncrMsgSent() { - atomic.AddInt64(&t.czData.msgSent, 1) - atomic.StoreInt64(&t.czData.lastMsgSentTime, time.Now().UnixNano()) + t.channelz.SocketMetrics.MessagesSent.Add(1) + t.channelz.SocketMetrics.LastMessageSentTimestamp.Add(1) } func (t *http2Server) IncrMsgRecv() { - atomic.AddInt64(&t.czData.msgRecv, 1) - atomic.StoreInt64(&t.czData.lastMsgRecvTime, time.Now().UnixNano()) + t.channelz.SocketMetrics.MessagesReceived.Add(1) + t.channelz.SocketMetrics.LastMessageReceivedTimestamp.Add(1) } func (t *http2Server) getOutFlowWindow() int64 { diff --git a/internal/transport/keepalive_test.go b/internal/transport/keepalive_test.go index 8144277fb6c1..3fafc38918dd 100644 --- a/internal/transport/keepalive_test.go +++ b/internal/transport/keepalive_test.go @@ -257,13 +257,14 @@ func (s) TestKeepaliveServerWithResponsiveClient(t *testing.T) { func (s) TestKeepaliveClientClosesUnresponsiveServer(t *testing.T) { connCh := make(chan net.Conn, 1) copts := ConnectOptions{ - ChannelzParentID: channelz.NewIdentifierForTesting(channelz.RefSubChannel, time.Now().Unix(), nil), + ChannelzParent: channelz.RegisterSubChannel(-1, "test subchan"), KeepaliveParams: keepalive.ClientParameters{ Time: 10 * time.Millisecond, Timeout: 10 * time.Millisecond, PermitWithoutStream: true, }, } + defer channelz.RemoveEntry(copts.ChannelzParent.ID) client, cancel := setUpWithNoPingServer(t, copts, connCh) defer cancel() defer client.Close(fmt.Errorf("closed manually by test")) @@ -287,12 +288,13 @@ func (s) TestKeepaliveClientClosesUnresponsiveServer(t *testing.T) { func (s) TestKeepaliveClientOpenWithUnresponsiveServer(t *testing.T) { connCh := make(chan net.Conn, 1) copts := ConnectOptions{ - ChannelzParentID: channelz.NewIdentifierForTesting(channelz.RefSubChannel, time.Now().Unix(), nil), + ChannelzParent: channelz.RegisterSubChannel(-1, "test subchan"), KeepaliveParams: keepalive.ClientParameters{ Time: 10 * time.Millisecond, Timeout: 10 * time.Millisecond, }, } + defer channelz.RemoveEntry(copts.ChannelzParent.ID) client, cancel := setUpWithNoPingServer(t, copts, connCh) defer cancel() defer client.Close(fmt.Errorf("closed manually by test")) @@ -317,12 +319,13 @@ func (s) TestKeepaliveClientOpenWithUnresponsiveServer(t *testing.T) { func (s) TestKeepaliveClientClosesWithActiveStreams(t *testing.T) { connCh := make(chan net.Conn, 1) copts := ConnectOptions{ - ChannelzParentID: channelz.NewIdentifierForTesting(channelz.RefSubChannel, time.Now().Unix(), nil), + ChannelzParent: channelz.RegisterSubChannel(-1, "test subchan"), KeepaliveParams: keepalive.ClientParameters{ Time: 500 * time.Millisecond, Timeout: 500 * time.Millisecond, }, } + defer channelz.RemoveEntry(copts.ChannelzParent.ID) // TODO(i/6099): Setup a server which can ping and no-ping based on a flag to // reduce the flakiness in this test. client, cancel := setUpWithNoPingServer(t, copts, connCh) diff --git a/internal/transport/transport.go b/internal/transport/transport.go index d3796c256e2f..0d2a6e47f671 100644 --- a/internal/transport/transport.go +++ b/internal/transport/transport.go @@ -571,7 +571,7 @@ type ServerConfig struct { WriteBufferSize int ReadBufferSize int SharedWriteBuffer bool - ChannelzParentID *channelz.Identifier + ChannelzParent *channelz.Server MaxHeaderListSize *uint32 HeaderTableSize *uint32 } @@ -606,8 +606,8 @@ type ConnectOptions struct { ReadBufferSize int // SharedWriteBuffer indicates whether connections should reuse write buffer SharedWriteBuffer bool - // ChannelzParentID sets the addrConn id which initiate the creation of this client transport. - ChannelzParentID *channelz.Identifier + // ChannelzParent sets the addrConn id which initiated the creation of this client transport. + ChannelzParent *channelz.SubChannel // MaxHeaderListSize sets the max (uncompressed) size of header list that is prepared to be received. MaxHeaderListSize *uint32 // UseProxy specifies if a proxy should be used. @@ -820,30 +820,6 @@ const ( GoAwayTooManyPings GoAwayReason = 2 ) -// channelzData is used to store channelz related data for http2Client and http2Server. -// These fields cannot be embedded in the original structs (e.g. http2Client), since to do atomic -// operation on int64 variable on 32-bit machine, user is responsible to enforce memory alignment. -// Here, by grouping those int64 fields inside a struct, we are enforcing the alignment. -type channelzData struct { - kpCount int64 - // The number of streams that have started, including already finished ones. - streamsStarted int64 - // Client side: The number of streams that have ended successfully by receiving - // EoS bit set frame from server. - // Server side: The number of streams that have ended successfully by sending - // frame with EoS bit set. - streamsSucceeded int64 - streamsFailed int64 - // lastStreamCreatedTime stores the timestamp that the last stream gets created. It is of int64 type - // instead of time.Time since it's more costly to atomically update time.Time variable than int64 - // variable. The same goes for lastMsgSentTime and lastMsgRecvTime. - lastStreamCreatedTime int64 - msgSent int64 - msgRecv int64 - lastMsgSentTime int64 - lastMsgRecvTime int64 -} - // ContextErr converts the error from context package into a status error. func ContextErr(err error) error { switch err { diff --git a/internal/transport/transport_test.go b/internal/transport/transport_test.go index ff27678294f1..90ce78f42f7a 100644 --- a/internal/transport/transport_test.go +++ b/internal/transport/transport_test.go @@ -299,14 +299,14 @@ type server struct { conns map[ServerTransport]net.Conn h *testStreamHandler ready chan struct{} - channelzID *channelz.Identifier + channelz *channelz.Server } func newTestServer() *server { return &server{ startedErr: make(chan error, 1), ready: make(chan struct{}), - channelzID: channelz.NewIdentifierForTesting(channelz.RefServer, time.Now().Unix(), nil), + channelz: channelz.RegisterServer("test server"), } } @@ -421,7 +421,7 @@ func (s *server) addr() string { func setUpServerOnly(t *testing.T, port int, sc *ServerConfig, ht hType) *server { server := newTestServer() - sc.ChannelzParentID = server.channelzID + sc.ChannelzParent = server.channelz go server.start(t, port, sc, ht) server.wait(t, 2*time.Second) return server @@ -434,7 +434,8 @@ func setUp(t *testing.T, port int, ht hType) (*server, *http2Client, func()) { func setUpWithOptions(t *testing.T, port int, sc *ServerConfig, ht hType, copts ConnectOptions) (*server, *http2Client, func()) { server := setUpServerOnly(t, port, sc, ht) addr := resolver.Address{Addr: "localhost:" + server.port} - copts.ChannelzParentID = channelz.NewIdentifierForTesting(channelz.RefSubChannel, time.Now().Unix(), nil) + copts.ChannelzParent = channelz.RegisterSubChannel(-1, "test channel") + t.Cleanup(func() { channelz.RemoveEntry(copts.ChannelzParent.ID) }) connectCtx, cancel := context.WithDeadline(context.Background(), time.Now().Add(2*time.Second)) ct, connErr := NewClientTransport(connectCtx, context.Background(), addr, copts, func(GoAwayReason) {}) @@ -1320,7 +1321,9 @@ func (s) TestClientHonorsConnectContext(t *testing.T) { connectCtx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) time.AfterFunc(100*time.Millisecond, cancel) - copts := ConnectOptions{ChannelzParentID: channelz.NewIdentifierForTesting(channelz.RefSubChannel, time.Now().Unix(), nil)} + parent := channelz.RegisterSubChannel(-1, "test channel") + copts := ConnectOptions{ChannelzParent: parent} + defer channelz.RemoveEntry(parent.ID) _, err = NewClientTransport(connectCtx, context.Background(), resolver.Address{Addr: lis.Addr().String()}, copts, func(GoAwayReason) {}) if err == nil { t.Fatalf("NewClientTransport() returned successfully; wanted error") @@ -1411,7 +1414,9 @@ func (s) TestClientWithMisbehavedServer(t *testing.T) { connectCtx, cancel := context.WithDeadline(context.Background(), time.Now().Add(2*time.Second)) defer cancel() - copts := ConnectOptions{ChannelzParentID: channelz.NewIdentifierForTesting(channelz.RefSubChannel, time.Now().Unix(), nil)} + parent := channelz.RegisterSubChannel(-1, "test channel") + defer channelz.RemoveEntry(parent.ID) + copts := ConnectOptions{ChannelzParent: parent} ct, err := NewClientTransport(connectCtx, context.Background(), resolver.Address{Addr: lis.Addr().String()}, copts, func(GoAwayReason) {}) if err != nil { t.Fatalf("Error while creating client transport: %v", err) @@ -2420,8 +2425,9 @@ func (s) TestClientHandshakeInfo(t *testing.T) { copts := ConnectOptions{ TransportCredentials: creds, - ChannelzParentID: channelz.NewIdentifierForTesting(channelz.RefSubChannel, time.Now().Unix(), nil), + ChannelzParent: channelz.RegisterSubChannel(-1, "test subchannel"), } + defer channelz.RemoveEntry(copts.ChannelzParent.ID) tr, err := NewClientTransport(ctx, context.Background(), addr, copts, func(GoAwayReason) {}) if err != nil { t.Fatalf("NewClientTransport(): %v", err) @@ -2460,9 +2466,10 @@ func (s) TestClientHandshakeInfoDialer(t *testing.T) { } copts := ConnectOptions{ - Dialer: dialer, - ChannelzParentID: channelz.NewIdentifierForTesting(channelz.RefSubChannel, time.Now().Unix(), nil), + Dialer: dialer, + ChannelzParent: channelz.RegisterSubChannel(-1, "test subchannel"), } + defer channelz.RemoveEntry(copts.ChannelzParent.ID) tr, err := NewClientTransport(ctx, context.Background(), addr, copts, func(GoAwayReason) {}) if err != nil { t.Fatalf("NewClientTransport(): %v", err) diff --git a/resolver_balancer_ext_test.go b/resolver_balancer_ext_test.go index aebc6652fa4b..045946d3e6e1 100644 --- a/resolver_balancer_ext_test.go +++ b/resolver_balancer_ext_test.go @@ -257,8 +257,8 @@ func (s) TestEnterIdleDuringBalancerNewSubConn(t *testing.T) { if len(tcs) != 1 { t.Fatalf("Found channels: %v; expected 1 entry", tcs) } - if len(tcs[0].SubChans) != 0 { - t.Fatalf("Found subchannels: %v; expected 0 entries", tcs[0].SubChans) + if got := tcs[0].SubChans(); len(got) != 0 { + t.Fatalf("Found subchannels: %v; expected 0 entries", got) } cc.Connect() } diff --git a/resolver_wrapper.go b/resolver_wrapper.go index f845ac95893b..9dcc9780f891 100644 --- a/resolver_wrapper.go +++ b/resolver_wrapper.go @@ -97,7 +97,7 @@ func (ccr *ccResolverWrapper) resolveNow(o resolver.ResolveNowOptions) { // finished shutting down, the channel should block on ccr.serializer.Done() // without cc.mu held. func (ccr *ccResolverWrapper) close() { - channelz.Info(logger, ccr.cc.channelzID, "Closing the name resolver") + channelz.Info(logger, ccr.cc.channelz, "Closing the name resolver") ccr.mu.Lock() ccr.closed = true ccr.mu.Unlock() @@ -147,7 +147,7 @@ func (ccr *ccResolverWrapper) ReportError(err error) { return } ccr.mu.Unlock() - channelz.Warningf(logger, ccr.cc.channelzID, "ccResolverWrapper: reporting error to cc: %v", err) + channelz.Warningf(logger, ccr.cc.channelz, "ccResolverWrapper: reporting error to cc: %v", err) ccr.cc.updateResolverStateAndUnlock(resolver.State{}, err) } @@ -194,5 +194,5 @@ func (ccr *ccResolverWrapper) addChannelzTraceEvent(s resolver.State) { } else if len(ccr.curState.Addresses) == 0 && len(s.Addresses) > 0 { updates = append(updates, "resolver returned new addresses") } - channelz.Infof(logger, ccr.cc.channelzID, "Resolver state updated: %s (%v)", pretty.ToJSON(s), strings.Join(updates, "; ")) + channelz.Infof(logger, ccr.cc.channelz, "Resolver state updated: %s (%v)", pretty.ToJSON(s), strings.Join(updates, "; ")) } diff --git a/rpc_util.go b/rpc_util.go index 82493d237bcf..998e251ddc41 100644 --- a/rpc_util.go +++ b/rpc_util.go @@ -962,19 +962,6 @@ func setCallInfoCodec(c *callInfo) error { return nil } -// channelzData is used to store channelz related data for ClientConn, addrConn and Server. -// These fields cannot be embedded in the original structs (e.g. ClientConn), since to do atomic -// operation on int64 variable on 32-bit machine, user is responsible to enforce memory alignment. -// Here, by grouping those int64 fields inside a struct, we are enforcing the alignment. -type channelzData struct { - callsStarted int64 - callsFailed int64 - callsSucceeded int64 - // lastCallStartedTime stores the timestamp that last call starts. It is of int64 type instead of - // time.Time since it's more costly to atomically update time.Time variable than int64 variable. - lastCallStartedTime int64 -} - // The SupportPackageIsVersion variables are referenced from generated protocol // buffer files to ensure compatibility with the gRPC version used. The latest // support package version is 7. diff --git a/server.go b/server.go index ef81661ae04c..2babb4f9b4c1 100644 --- a/server.go +++ b/server.go @@ -137,8 +137,7 @@ type Server struct { serveWG sync.WaitGroup // counts active Serve goroutines for Stop/GracefulStop handlersWG sync.WaitGroup // counts active method handler goroutines - channelzID *channelz.Identifier - czData *channelzData + channelz *channelz.Server serverWorkerChannel chan func() serverWorkerChannelClose func() @@ -659,7 +658,7 @@ func NewServer(opt ...ServerOption) *Server { services: make(map[string]*serviceInfo), quit: grpcsync.NewEvent(), done: grpcsync.NewEvent(), - czData: new(channelzData), + channelz: channelz.RegisterServer(""), } chainUnaryServerInterceptors(s) chainStreamServerInterceptors(s) @@ -673,8 +672,7 @@ func NewServer(opt ...ServerOption) *Server { s.initServerWorkers() } - s.channelzID = channelz.RegisterServer(&channelzServer{s}, "") - channelz.Info(logger, s.channelzID, "Server created") + channelz.Info(logger, s.channelz, "Server created") return s } @@ -800,20 +798,13 @@ var ErrServerStopped = errors.New("grpc: the server has been stopped") type listenSocket struct { net.Listener - channelzID *channelz.Identifier -} - -func (l *listenSocket) ChannelzMetric() *channelz.SocketInternalMetric { - return &channelz.SocketInternalMetric{ - SocketOptions: channelz.GetSocketOption(l.Listener), - LocalAddr: l.Listener.Addr(), - } + channelz *channelz.Socket } func (l *listenSocket) Close() error { err := l.Listener.Close() - channelz.RemoveEntry(l.channelzID) - channelz.Info(logger, l.channelzID, "ListenSocket deleted") + channelz.RemoveEntry(l.channelz.ID) + channelz.Info(logger, l.channelz, "ListenSocket deleted") return err } @@ -855,7 +846,10 @@ func (s *Server) Serve(lis net.Listener) error { } }() - ls := &listenSocket{Listener: lis} + ls := &listenSocket{ + Listener: lis, + channelz: channelz.RegisterSocket(&channelz.Socket{SocketType: channelz.SocketTypeListen, Parent: s.channelz, RefName: lis.Addr().String(), SocketOptions: channelz.GetSocketOption(lis)}), + } s.lis[ls] = true defer func() { @@ -867,14 +861,8 @@ func (s *Server) Serve(lis net.Listener) error { s.mu.Unlock() }() - var err error - ls.channelzID, err = channelz.RegisterListenSocket(ls, s.channelzID, lis.Addr().String()) - if err != nil { - s.mu.Unlock() - return err - } s.mu.Unlock() - channelz.Info(logger, ls.channelzID, "ListenSocket created") + channelz.Info(logger, ls.channelz, "ListenSocket created") var tempDelay time.Duration // how long to sleep on accept failure for { @@ -973,7 +961,7 @@ func (s *Server) newHTTP2Transport(c net.Conn) transport.ServerTransport { WriteBufferSize: s.opts.writeBufferSize, ReadBufferSize: s.opts.readBufferSize, SharedWriteBuffer: s.opts.sharedWriteBuffer, - ChannelzParentID: s.channelzID, + ChannelzParent: s.channelz, MaxHeaderListSize: s.opts.maxHeaderListSize, HeaderTableSize: s.opts.headerTableSize, } @@ -987,7 +975,7 @@ func (s *Server) newHTTP2Transport(c net.Conn) transport.ServerTransport { if err != credentials.ErrConnDispatched { // Don't log on ErrConnDispatched and io.EOF to prevent log spam. if err != io.EOF { - channelz.Info(logger, s.channelzID, "grpc: Server.Serve failed to create ServerTransport: ", err) + channelz.Info(logger, s.channelz, "grpc: Server.Serve failed to create ServerTransport: ", err) } c.Close() } @@ -1119,37 +1107,28 @@ func (s *Server) removeConn(addr string, st transport.ServerTransport) { } } -func (s *Server) channelzMetric() *channelz.ServerInternalMetric { - return &channelz.ServerInternalMetric{ - CallsStarted: atomic.LoadInt64(&s.czData.callsStarted), - CallsSucceeded: atomic.LoadInt64(&s.czData.callsSucceeded), - CallsFailed: atomic.LoadInt64(&s.czData.callsFailed), - LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&s.czData.lastCallStartedTime)), - } -} - func (s *Server) incrCallsStarted() { - atomic.AddInt64(&s.czData.callsStarted, 1) - atomic.StoreInt64(&s.czData.lastCallStartedTime, time.Now().UnixNano()) + s.channelz.ServerMetrics.CallsStarted.Add(1) + s.channelz.ServerMetrics.LastCallStartedTimestamp.Store(time.Now().UnixNano()) } func (s *Server) incrCallsSucceeded() { - atomic.AddInt64(&s.czData.callsSucceeded, 1) + s.channelz.ServerMetrics.CallsSucceeded.Add(1) } func (s *Server) incrCallsFailed() { - atomic.AddInt64(&s.czData.callsFailed, 1) + s.channelz.ServerMetrics.CallsFailed.Add(1) } func (s *Server) sendResponse(ctx context.Context, t transport.ServerTransport, stream *transport.Stream, msg any, cp Compressor, opts *transport.Options, comp encoding.Compressor) error { data, err := encode(s.getCodec(stream.ContentSubtype()), msg) if err != nil { - channelz.Error(logger, s.channelzID, "grpc: server failed to encode response: ", err) + channelz.Error(logger, s.channelz, "grpc: server failed to encode response: ", err) return err } compData, err := compress(data, cp, comp) if err != nil { - channelz.Error(logger, s.channelzID, "grpc: server failed to compress response: ", err) + channelz.Error(logger, s.channelz, "grpc: server failed to compress response: ", err) return err } hdr, payload := msgHeader(data, compData) @@ -1344,7 +1323,7 @@ func (s *Server) processUnaryRPC(ctx context.Context, t transport.ServerTranspor d, cancel, err := recvAndDecompress(&parser{r: stream, recvBufferPool: s.opts.recvBufferPool}, stream, dc, s.opts.maxReceiveMessageSize, payInfo, decomp) if err != nil { if e := t.WriteStatus(stream, status.Convert(err)); e != nil { - channelz.Warningf(logger, s.channelzID, "grpc: Server.processUnaryRPC failed to write status: %v", e) + channelz.Warningf(logger, s.channelz, "grpc: Server.processUnaryRPC failed to write status: %v", e) } return err } @@ -1395,7 +1374,7 @@ func (s *Server) processUnaryRPC(ctx context.Context, t transport.ServerTranspor trInfo.tr.SetError() } if e := t.WriteStatus(stream, appStatus); e != nil { - channelz.Warningf(logger, s.channelzID, "grpc: Server.processUnaryRPC failed to write status: %v", e) + channelz.Warningf(logger, s.channelz, "grpc: Server.processUnaryRPC failed to write status: %v", e) } if len(binlogs) != 0 { if h, _ := stream.Header(); h.Len() > 0 { @@ -1435,7 +1414,7 @@ func (s *Server) processUnaryRPC(ctx context.Context, t transport.ServerTranspor } if sts, ok := status.FromError(err); ok { if e := t.WriteStatus(stream, sts); e != nil { - channelz.Warningf(logger, s.channelzID, "grpc: Server.processUnaryRPC failed to write status: %v", e) + channelz.Warningf(logger, s.channelz, "grpc: Server.processUnaryRPC failed to write status: %v", e) } } else { switch st := err.(type) { @@ -1763,7 +1742,7 @@ func (s *Server) handleStream(t transport.ServerTransport, stream *transport.Str ti.tr.LazyLog(&fmtStringer{"%v", []any{err}}, true) ti.tr.SetError() } - channelz.Warningf(logger, s.channelzID, "grpc: Server.handleStream failed to write status: %v", err) + channelz.Warningf(logger, s.channelz, "grpc: Server.handleStream failed to write status: %v", err) } if ti != nil { ti.tr.Finish() @@ -1820,7 +1799,7 @@ func (s *Server) handleStream(t transport.ServerTransport, stream *transport.Str ti.tr.LazyLog(&fmtStringer{"%v", []any{err}}, true) ti.tr.SetError() } - channelz.Warningf(logger, s.channelzID, "grpc: Server.handleStream failed to write status: %v", err) + channelz.Warningf(logger, s.channelz, "grpc: Server.handleStream failed to write status: %v", err) } if ti != nil { ti.tr.Finish() @@ -1892,8 +1871,7 @@ func (s *Server) stop(graceful bool) { s.quit.Fire() defer s.done.Fire() - s.channelzRemoveOnce.Do(func() { channelz.RemoveEntry(s.channelzID) }) - + s.channelzRemoveOnce.Do(func() { channelz.RemoveEntry(s.channelz.ID) }) s.mu.Lock() s.closeListenersLocked() // Wait for serving threads to be ready to exit. Only then can we be sure no @@ -2148,14 +2126,6 @@ func Method(ctx context.Context) (string, bool) { return s.Method(), true } -type channelzServer struct { - s *Server -} - -func (c *channelzServer) ChannelzMetric() *channelz.ServerInternalMetric { - return c.s.channelzMetric() -} - // validateSendCompressor returns an error when given compressor name cannot be // handled by the server or the client based on the advertised compressors. func validateSendCompressor(name string, clientCompressors []string) error { diff --git a/stream.go b/stream.go index 814e998354ae..d939ffc63489 100644 --- a/stream.go +++ b/stream.go @@ -655,13 +655,13 @@ func (a *csAttempt) shouldRetry(err error) (bool, error) { if len(sps) == 1 { var e error if pushback, e = strconv.Atoi(sps[0]); e != nil || pushback < 0 { - channelz.Infof(logger, cs.cc.channelzID, "Server retry pushback specified to abort (%q).", sps[0]) + channelz.Infof(logger, cs.cc.channelz, "Server retry pushback specified to abort (%q).", sps[0]) cs.retryThrottler.throttle() // This counts as a failure for throttling. return false, err } hasPushback = true } else if len(sps) > 1 { - channelz.Warningf(logger, cs.cc.channelzID, "Server retry pushback specified multiple values (%q); not retrying.", sps) + channelz.Warningf(logger, cs.cc.channelz, "Server retry pushback specified multiple values (%q); not retrying.", sps) cs.retryThrottler.throttle() // This counts as a failure for throttling. return false, err } diff --git a/test/channelz_linux_test.go b/test/channelz_linux_test.go index d5b691c1d83e..1be9e683679e 100644 --- a/test/channelz_linux_test.go +++ b/test/channelz_linux_test.go @@ -47,12 +47,13 @@ func testCZSocketMetricsSocketOption(t *testing.T, e env) { if len(ss) != 1 { t.Fatalf("There should be one server, not %d", len(ss)) } - if len(ss[0].ListenSockets) != 1 { - t.Fatalf("There should be one listen socket, not %d", len(ss[0].ListenSockets)) + skts := ss[0].ListenSockets() + if len(skts) != 1 { + t.Fatalf("There should be one listen socket, not %d", len(skts)) } - for id := range ss[0].ListenSockets { + for id := range skts { sm := channelz.GetSocket(id) - if sm == nil || sm.SocketData == nil || sm.SocketData.SocketOptions == nil { + if sm == nil || sm.SocketOptions == nil { t.Fatalf("Unable to get server listen socket options") } } @@ -60,7 +61,7 @@ func testCZSocketMetricsSocketOption(t *testing.T, e env) { if len(ns) != 1 { t.Fatalf("There should be one server normal socket, not %d", len(ns)) } - if ns[0] == nil || ns[0].SocketData == nil || ns[0].SocketData.SocketOptions == nil { + if ns[0] == nil || ns[0].SocketOptions == nil { t.Fatalf("Unable to get server normal socket options") } @@ -68,25 +69,27 @@ func testCZSocketMetricsSocketOption(t *testing.T, e env) { if len(tchan) != 1 { t.Fatalf("There should only be one top channel, not %d", len(tchan)) } - if len(tchan[0].SubChans) != 1 { - t.Fatalf("There should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) + subChans := tchan[0].SubChans() + if len(subChans) != 1 { + t.Fatalf("There should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(subChans)) } var id int64 - for id = range tchan[0].SubChans { + for id = range subChans { break } sc := channelz.GetSubChannel(id) if sc == nil { t.Fatalf("There should only be one socket under subchannel %d, not 0", id) } - if len(sc.Sockets) != 1 { - t.Fatalf("There should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) + skts = sc.Sockets() + if len(skts) != 1 { + t.Fatalf("There should only be one socket under subchannel %d, not %d", sc.ID, len(skts)) } - for id = range sc.Sockets { + for id = range skts { break } skt := channelz.GetSocket(id) - if skt == nil || skt.SocketData == nil || skt.SocketData.SocketOptions == nil { + if skt == nil || skt.SocketOptions == nil { t.Fatalf("Unable to get client normal socket options") } } diff --git a/test/channelz_test.go b/test/channelz_test.go index b23acf4bdc1d..1709ab723035 100644 --- a/test/channelz_test.go +++ b/test/channelz_test.go @@ -68,16 +68,16 @@ func (s) TestCZServerRegistrationAndDeletion(t *testing.T) { testcases := []struct { total int start int64 - max int64 - length int64 + max int + length int end bool }{ - {total: int(channelz.EntryPerPage), start: 0, max: 0, length: channelz.EntryPerPage, end: true}, - {total: int(channelz.EntryPerPage) - 1, start: 0, max: 0, length: channelz.EntryPerPage - 1, end: true}, - {total: int(channelz.EntryPerPage) + 1, start: 0, max: 0, length: channelz.EntryPerPage, end: false}, - {total: int(channelz.EntryPerPage) + 1, start: int64(2*(channelz.EntryPerPage+1) + 1), max: 0, length: 0, end: true}, - {total: int(channelz.EntryPerPage), start: 0, max: 1, length: 1, end: false}, - {total: int(channelz.EntryPerPage), start: 0, max: channelz.EntryPerPage - 1, length: channelz.EntryPerPage - 1, end: false}, + {total: int(channelz.EntriesPerPage), start: 0, max: 0, length: channelz.EntriesPerPage, end: true}, + {total: int(channelz.EntriesPerPage) - 1, start: 0, max: 0, length: channelz.EntriesPerPage - 1, end: true}, + {total: int(channelz.EntriesPerPage) + 1, start: 0, max: 0, length: channelz.EntriesPerPage, end: false}, + {total: int(channelz.EntriesPerPage) + 1, start: int64(2*(channelz.EntriesPerPage+1) + 1), max: 0, length: 0, end: true}, + {total: int(channelz.EntriesPerPage), start: 0, max: 1, length: 1, end: false}, + {total: int(channelz.EntriesPerPage), start: 0, max: channelz.EntriesPerPage - 1, length: channelz.EntriesPerPage - 1, end: false}, } for i, c := range testcases { @@ -89,7 +89,7 @@ func (s) TestCZServerRegistrationAndDeletion(t *testing.T) { te.startServers(&testServer{security: e.security}, c.total) ss, end := channelz.GetServers(c.start, c.max) - if int64(len(ss)) != c.length || end != c.end { + if len(ss) != c.length || end != c.end { t.Fatalf("%d: GetServers(%d) = %+v (len of which: %d), end: %+v, want len(GetServers(%d)) = %d, end: %+v", i, c.start, ss, len(ss), end, c.start, c.length, c.end) } te.tearDown() @@ -138,16 +138,16 @@ func (s) TestCZTopChannelRegistrationAndDeletion(t *testing.T) { testcases := []struct { total int start int64 - max int64 - length int64 + max int + length int end bool }{ - {total: int(channelz.EntryPerPage), start: 0, max: 0, length: channelz.EntryPerPage, end: true}, - {total: int(channelz.EntryPerPage) - 1, start: 0, max: 0, length: channelz.EntryPerPage - 1, end: true}, - {total: int(channelz.EntryPerPage) + 1, start: 0, max: 0, length: channelz.EntryPerPage, end: false}, - {total: int(channelz.EntryPerPage) + 1, start: int64(2*(channelz.EntryPerPage+1) + 1), max: 0, length: 0, end: true}, - {total: int(channelz.EntryPerPage), start: 0, max: 1, length: 1, end: false}, - {total: int(channelz.EntryPerPage), start: 0, max: channelz.EntryPerPage - 1, length: channelz.EntryPerPage - 1, end: false}, + {total: int(channelz.EntriesPerPage), start: 0, max: 0, length: channelz.EntriesPerPage, end: true}, + {total: int(channelz.EntriesPerPage) - 1, start: 0, max: 0, length: channelz.EntriesPerPage - 1, end: true}, + {total: int(channelz.EntriesPerPage) + 1, start: 0, max: 0, length: channelz.EntriesPerPage, end: false}, + {total: int(channelz.EntriesPerPage) + 1, start: int64(2*(channelz.EntriesPerPage+1) + 1), max: 0, length: 0, end: true}, + {total: int(channelz.EntriesPerPage), start: 0, max: 1, length: 1, end: false}, + {total: int(channelz.EntriesPerPage), start: 0, max: channelz.EntriesPerPage - 1, length: channelz.EntriesPerPage - 1, end: false}, } for _, c := range testcases { @@ -165,7 +165,7 @@ func (s) TestCZTopChannelRegistrationAndDeletion(t *testing.T) { ccs = append(ccs, cc) } if err := verifyResultWithDelay(func() (bool, error) { - if tcs, end := channelz.GetTopChannels(c.start, c.max); int64(len(tcs)) != c.length || end != c.end { + if tcs, end := channelz.GetTopChannels(c.start, c.max); len(tcs) != c.length || end != c.end { return false, fmt.Errorf("getTopChannels(%d) = %+v (len of which: %d), end: %+v, want len(GetTopChannels(%d)) = %d, end: %+v", c.start, tcs, len(tcs), end, c.start, c.length, c.end) } return true, nil @@ -218,8 +218,8 @@ func (s) TestCZNestedChannelRegistrationAndDeletion(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].NestedChans) != 1 { - return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(tcs[0].NestedChans)) + if nestedChans := tcs[0].NestedChans(); len(nestedChans) != 1 { + return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(nestedChans)) } return true, nil }); err != nil { @@ -237,8 +237,8 @@ func (s) TestCZNestedChannelRegistrationAndDeletion(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].NestedChans) != 0 { - return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(tcs[0].NestedChans)) + if nestedChans := tcs[0].NestedChans(); len(nestedChans) != 0 { + return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(nestedChans)) } return true, nil }); err != nil { @@ -267,16 +267,17 @@ func (s) TestCZClientSubChannelSocketRegistrationAndDeletion(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].SubChans) != num { - return false, fmt.Errorf("there should be %d subchannel not %d", num, len(tcs[0].SubChans)) + subChans := tcs[0].SubChans() + if len(subChans) != num { + return false, fmt.Errorf("there should be %d subchannel not %d", num, len(subChans)) } count := 0 - for k := range tcs[0].SubChans { + for k := range subChans { sc := channelz.GetSubChannel(k) if sc == nil { return false, fmt.Errorf("got subchannel") } - count += len(sc.Sockets) + count += len(sc.Sockets()) } if count != num { return false, fmt.Errorf("there should be %d sockets not %d", num, count) @@ -294,16 +295,17 @@ func (s) TestCZClientSubChannelSocketRegistrationAndDeletion(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].SubChans) != num-1 { - return false, fmt.Errorf("there should be %d subchannel not %d", num-1, len(tcs[0].SubChans)) + subChans := tcs[0].SubChans() + if len(subChans) != num-1 { + return false, fmt.Errorf("there should be %d subchannel not %d", num-1, len(subChans)) } count := 0 - for k := range tcs[0].SubChans { + for k := range subChans { sc := channelz.GetSubChannel(k) if sc == nil { return false, fmt.Errorf("got subchannel") } - count += len(sc.Sockets) + count += len(sc.Sockets()) } if count != num-1 { return false, fmt.Errorf("there should be %d sockets not %d", num-1, count) @@ -319,17 +321,17 @@ func (s) TestCZServerSocketRegistrationAndDeletion(t *testing.T) { testcases := []struct { total int start int64 - max int64 - length int64 + max int + length int end bool }{ - {total: int(channelz.EntryPerPage), start: 0, max: 0, length: channelz.EntryPerPage, end: true}, - {total: int(channelz.EntryPerPage) - 1, start: 0, max: 0, length: channelz.EntryPerPage - 1, end: true}, - {total: int(channelz.EntryPerPage) + 1, start: 0, max: 0, length: channelz.EntryPerPage, end: false}, - {total: int(channelz.EntryPerPage), start: 1, max: 0, length: channelz.EntryPerPage - 1, end: true}, - {total: int(channelz.EntryPerPage) + 1, start: channelz.EntryPerPage + 1, max: 0, length: 0, end: true}, - {total: int(channelz.EntryPerPage), start: 0, max: 1, length: 1, end: false}, - {total: int(channelz.EntryPerPage), start: 0, max: channelz.EntryPerPage - 1, length: channelz.EntryPerPage - 1, end: false}, + {total: int(channelz.EntriesPerPage), start: 0, max: 0, length: channelz.EntriesPerPage, end: true}, + {total: int(channelz.EntriesPerPage) - 1, start: 0, max: 0, length: channelz.EntriesPerPage - 1, end: true}, + {total: int(channelz.EntriesPerPage) + 1, start: 0, max: 0, length: channelz.EntriesPerPage, end: false}, + {total: int(channelz.EntriesPerPage), start: 1, max: 0, length: channelz.EntriesPerPage - 1, end: true}, + {total: int(channelz.EntriesPerPage) + 1, start: int64(channelz.EntriesPerPage) + 1, max: 0, length: 0, end: true}, + {total: int(channelz.EntriesPerPage), start: 0, max: 1, length: 1, end: false}, + {total: int(channelz.EntriesPerPage), start: 0, max: channelz.EntriesPerPage - 1, length: channelz.EntriesPerPage - 1, end: false}, } for _, c := range testcases { @@ -352,13 +354,13 @@ func (s) TestCZServerSocketRegistrationAndDeletion(t *testing.T) { if len(ss) != 1 { return false, fmt.Errorf("there should only be one server, not %d", len(ss)) } - if len(ss[0].ListenSockets) != 1 { - return false, fmt.Errorf("there should only be one server listen socket, not %d", len(ss[0].ListenSockets)) + if got := len(ss[0].ListenSockets()); got != 1 { + return false, fmt.Errorf("there should only be one server listen socket, not %d", got) } startID := c.start if startID != 0 { - ns, _ := channelz.GetServerSockets(ss[0].ID, 0, int64(c.total)) + ns, _ := channelz.GetServerSockets(ss[0].ID, 0, c.total) if int64(len(ns)) < c.start { return false, fmt.Errorf("there should more than %d sockets, not %d", len(ns), c.start) } @@ -366,7 +368,7 @@ func (s) TestCZServerSocketRegistrationAndDeletion(t *testing.T) { } ns, end := channelz.GetServerSockets(ss[0].ID, startID, c.max) - if int64(len(ns)) != c.length || end != c.end { + if len(ns) != c.length || end != c.end { return false, fmt.Errorf("GetServerSockets(%d) = %+v (len of which: %d), end: %+v, want len(GetServerSockets(%d)) = %d, end: %+v", c.start, ns, len(ns), end, c.start, c.length, c.end) } @@ -405,8 +407,9 @@ func (s) TestCZServerListenSocketDeletion(t *testing.T) { if len(ss) != 1 { return false, fmt.Errorf("there should only be one server, not %d", len(ss)) } - if len(ss[0].ListenSockets) != 1 { - return false, fmt.Errorf("there should only be one server listen socket, not %d", len(ss[0].ListenSockets)) + skts := ss[0].ListenSockets() + if len(skts) != 1 { + return false, fmt.Errorf("there should only be one server listen socket, not %v", skts) } return true, nil }); err != nil { @@ -419,8 +422,9 @@ func (s) TestCZServerListenSocketDeletion(t *testing.T) { if len(ss) != 1 { return false, fmt.Errorf("there should be 1 server, not %d", len(ss)) } - if len(ss[0].ListenSockets) != 0 { - return false, fmt.Errorf("there should only be %d server listen socket, not %d", 0, len(ss[0].ListenSockets)) + skts := ss[0].ListenSockets() + if len(skts) != 0 { + return false, fmt.Errorf("there should only be %d server listen socket, not %v", 0, skts) } return true, nil }); err != nil { @@ -429,18 +433,6 @@ func (s) TestCZServerListenSocketDeletion(t *testing.T) { s.Stop() } -type dummyChannel struct{} - -func (d *dummyChannel) ChannelzMetric() *channelz.ChannelInternalMetric { - return &channelz.ChannelInternalMetric{} -} - -type dummySocket struct{} - -func (d *dummySocket) ChannelzMetric() *channelz.SocketInternalMetric { - return &channelz.SocketInternalMetric{} -} - func (s) TestCZRecusivelyDeletionOfEntry(t *testing.T) { // +--+TopChan+---+ // | | @@ -450,42 +442,42 @@ func (s) TestCZRecusivelyDeletionOfEntry(t *testing.T) { // v v // Socket1 Socket2 - topChanID := channelz.RegisterChannel(&dummyChannel{}, nil, "") - subChanID1, _ := channelz.RegisterSubChannel(&dummyChannel{}, topChanID, "") - subChanID2, _ := channelz.RegisterSubChannel(&dummyChannel{}, topChanID, "") - sktID1, _ := channelz.RegisterNormalSocket(&dummySocket{}, subChanID1, "") - sktID2, _ := channelz.RegisterNormalSocket(&dummySocket{}, subChanID1, "") + topChan := channelz.RegisterChannel(nil, "") + subChan1 := channelz.RegisterSubChannel(topChan.ID, "") + subChan2 := channelz.RegisterSubChannel(topChan.ID, "") + skt1 := channelz.RegisterSocket(&channelz.Socket{SocketType: channelz.SocketTypeNormal, Parent: subChan1}) + skt2 := channelz.RegisterSocket(&channelz.Socket{SocketType: channelz.SocketTypeNormal, Parent: subChan1}) tcs, _ := channelz.GetTopChannels(0, 0) if tcs == nil || len(tcs) != 1 { t.Fatalf("There should be one TopChannel entry") } - if len(tcs[0].SubChans) != 2 { + if len(tcs[0].SubChans()) != 2 { t.Fatalf("There should be two SubChannel entries") } - sc := channelz.GetSubChannel(subChanID1.Int()) - if sc == nil || len(sc.Sockets) != 2 { + sc := channelz.GetSubChannel(subChan1.ID) + if sc == nil || len(sc.Sockets()) != 2 { t.Fatalf("There should be two Socket entries") } - channelz.RemoveEntry(topChanID) + channelz.RemoveEntry(topChan.ID) tcs, _ = channelz.GetTopChannels(0, 0) if tcs == nil || len(tcs) != 1 { t.Fatalf("There should be one TopChannel entry") } - channelz.RemoveEntry(subChanID1) - channelz.RemoveEntry(subChanID2) + channelz.RemoveEntry(subChan1.ID) + channelz.RemoveEntry(subChan2.ID) tcs, _ = channelz.GetTopChannels(0, 0) if tcs == nil || len(tcs) != 1 { t.Fatalf("There should be one TopChannel entry") } - if len(tcs[0].SubChans) != 1 { + if len(tcs[0].SubChans()) != 1 { t.Fatalf("There should be one SubChannel entry") } - channelz.RemoveEntry(sktID1) - channelz.RemoveEntry(sktID2) + channelz.RemoveEntry(skt1.ID) + channelz.RemoveEntry(skt2.ID) tcs, _ = channelz.GetTopChannels(0, 0) if tcs != nil { t.Fatalf("There should be no TopChannel entry") @@ -543,18 +535,19 @@ func (s) TestCZChannelMetrics(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].SubChans) != num { - return false, fmt.Errorf("there should be %d subchannel not %d", num, len(tcs[0].SubChans)) + subChans := tcs[0].SubChans() + if len(subChans) != num { + return false, fmt.Errorf("there should be %d subchannel not %d", num, len(subChans)) } var cst, csu, cf int64 - for k := range tcs[0].SubChans { + for k := range subChans { sc := channelz.GetSubChannel(k) if sc == nil { return false, fmt.Errorf("got subchannel") } - cst += sc.ChannelData.CallsStarted - csu += sc.ChannelData.CallsSucceeded - cf += sc.ChannelData.CallsFailed + cst += sc.ChannelMetrics.CallsStarted.Load() + csu += sc.ChannelMetrics.CallsSucceeded.Load() + cf += sc.ChannelMetrics.CallsFailed.Load() } if cst != 3 { return false, fmt.Errorf("there should be 3 CallsStarted not %d", cst) @@ -565,14 +558,14 @@ func (s) TestCZChannelMetrics(t *testing.T) { if cf != 1 { return false, fmt.Errorf("there should be 1 CallsFailed not %d", cf) } - if tcs[0].ChannelData.CallsStarted != 3 { - return false, fmt.Errorf("there should be 3 CallsStarted not %d", tcs[0].ChannelData.CallsStarted) + if got := tcs[0].ChannelMetrics.CallsStarted.Load(); got != 3 { + return false, fmt.Errorf("there should be 3 CallsStarted not %d", got) } - if tcs[0].ChannelData.CallsSucceeded != 1 { - return false, fmt.Errorf("there should be 1 CallsSucceeded not %d", tcs[0].ChannelData.CallsSucceeded) + if got := tcs[0].ChannelMetrics.CallsSucceeded.Load(); got != 1 { + return false, fmt.Errorf("there should be 1 CallsSucceeded not %d", got) } - if tcs[0].ChannelData.CallsFailed != 1 { - return false, fmt.Errorf("there should be 1 CallsFailed not %d", tcs[0].ChannelData.CallsFailed) + if got := tcs[0].ChannelMetrics.CallsFailed.Load(); got != 1 { + return false, fmt.Errorf("there should be 1 CallsFailed not %d", got) } return true, nil }); err != nil { @@ -621,14 +614,14 @@ func (s) TestCZServerMetrics(t *testing.T) { if len(ss) != 1 { return false, fmt.Errorf("there should only be one server, not %d", len(ss)) } - if ss[0].ServerData.CallsStarted != 3 { - return false, fmt.Errorf("there should be 3 CallsStarted not %d", ss[0].ServerData.CallsStarted) + if cs := ss[0].ServerMetrics.CallsStarted.Load(); cs != 3 { + return false, fmt.Errorf("there should be 3 CallsStarted not %d", cs) } - if ss[0].ServerData.CallsSucceeded != 1 { - return false, fmt.Errorf("there should be 1 CallsSucceeded not %d", ss[0].ServerData.CallsSucceeded) + if cs := ss[0].ServerMetrics.CallsSucceeded.Load(); cs != 1 { + return false, fmt.Errorf("there should be 1 CallsSucceeded not %d", cs) } - if ss[0].ServerData.CallsFailed != 1 { - return false, fmt.Errorf("there should be 1 CallsFailed not %d", ss[0].ServerData.CallsFailed) + if cf := ss[0].ServerMetrics.CallsFailed.Load(); cf != 1 { + return false, fmt.Errorf("there should be 1 CallsFailed not %d", cf) } return true, nil }); err != nil { @@ -867,27 +860,29 @@ func (s) TestCZClientSocketMetricsStreamsAndMessagesCount(t *testing.T) { if len(tchan) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) } - if len(tchan[0].SubChans) != 1 { - return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) + subChans := tchan[0].SubChans() + if len(subChans) != 1 { + return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(subChans)) } - for scID = range tchan[0].SubChans { + for scID = range subChans { break } sc := channelz.GetSubChannel(scID) if sc == nil { return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", scID) } - if len(sc.Sockets) != 1 { - return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) + skts := sc.Sockets() + if len(skts) != 1 { + return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(skts)) } - for skID = range sc.Sockets { + for skID = range skts { break } skt := channelz.GetSocket(skID) - sktData := skt.SocketData - if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 1 || sktData.MessagesSent != 1 || sktData.MessagesReceived != 1 { - return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, MessagesSent, MessagesReceived) = (1, 1, 1, 1), got (%d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.MessagesSent, sktData.MessagesReceived) + sktData := &skt.SocketMetrics + if sktData.StreamsStarted.Load() != 1 || sktData.StreamsSucceeded.Load() != 1 || sktData.MessagesSent.Load() != 1 || sktData.MessagesReceived.Load() != 1 { + return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted.Load(), StreamsSucceeded.Load(), MessagesSent.Load(), MessagesReceived.Load()) = (1, 1, 1, 1), got (%d, %d, %d, %d)", skt.ID, sktData.StreamsStarted.Load(), sktData.StreamsSucceeded.Load(), sktData.MessagesSent.Load(), sktData.MessagesReceived.Load()) } return true, nil }); err != nil { @@ -897,9 +892,9 @@ func (s) TestCZClientSocketMetricsStreamsAndMessagesCount(t *testing.T) { doServerSideFailedUnaryCall(tc, t) if err := verifyResultWithDelay(func() (bool, error) { skt := channelz.GetSocket(skID) - sktData := skt.SocketData - if sktData.StreamsStarted != 2 || sktData.StreamsSucceeded != 2 || sktData.MessagesSent != 2 || sktData.MessagesReceived != 1 { - return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, MessagesSent, MessagesReceived) = (2, 2, 2, 1), got (%d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.MessagesSent, sktData.MessagesReceived) + sktData := &skt.SocketMetrics + if sktData.StreamsStarted.Load() != 2 || sktData.StreamsSucceeded.Load() != 2 || sktData.MessagesSent.Load() != 2 || sktData.MessagesReceived.Load() != 1 { + return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted.Load(), StreamsSucceeded.Load(), MessagesSent.Load(), MessagesReceived.Load()) = (2, 2, 2, 1), got (%d, %d, %d, %d)", skt.ID, sktData.StreamsStarted.Load(), sktData.StreamsSucceeded.Load(), sktData.MessagesSent.Load(), sktData.MessagesReceived.Load()) } return true, nil }); err != nil { @@ -909,9 +904,9 @@ func (s) TestCZClientSocketMetricsStreamsAndMessagesCount(t *testing.T) { doClientSideInitiatedFailedStream(tc, t) if err := verifyResultWithDelay(func() (bool, error) { skt := channelz.GetSocket(skID) - sktData := skt.SocketData - if sktData.StreamsStarted != 3 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 1 || sktData.MessagesSent != 3 || sktData.MessagesReceived != 2 { - return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (3, 2, 1, 3, 2), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) + sktData := &skt.SocketMetrics + if sktData.StreamsStarted.Load() != 3 || sktData.StreamsSucceeded.Load() != 2 || sktData.StreamsFailed.Load() != 1 || sktData.MessagesSent.Load() != 3 || sktData.MessagesReceived.Load() != 2 { + return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted.Load(), StreamsSucceeded.Load(), StreamsFailed.Load(), MessagesSent.Load(), MessagesReceived.Load()) = (3, 2, 1, 3, 2), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted.Load(), sktData.StreamsSucceeded.Load(), sktData.StreamsFailed.Load(), sktData.MessagesSent.Load(), sktData.MessagesReceived.Load()) } return true, nil }); err != nil { @@ -921,9 +916,9 @@ func (s) TestCZClientSocketMetricsStreamsAndMessagesCount(t *testing.T) { doServerSideInitiatedFailedStreamWithRSTStream(tc, t, rcw) if err := verifyResultWithDelay(func() (bool, error) { skt := channelz.GetSocket(skID) - sktData := skt.SocketData - if sktData.StreamsStarted != 4 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 2 || sktData.MessagesSent != 4 || sktData.MessagesReceived != 3 { - return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (4, 2, 2, 4, 3), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) + sktData := &skt.SocketMetrics + if sktData.StreamsStarted.Load() != 4 || sktData.StreamsSucceeded.Load() != 2 || sktData.StreamsFailed.Load() != 2 || sktData.MessagesSent.Load() != 4 || sktData.MessagesReceived.Load() != 3 { + return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted.Load(), StreamsSucceeded.Load(), StreamsFailed.Load(), MessagesSent.Load(), MessagesReceived.Load()) = (4, 2, 2, 4, 3), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted.Load(), sktData.StreamsSucceeded.Load(), sktData.StreamsFailed.Load(), sktData.MessagesSent.Load(), sktData.MessagesReceived.Load()) } return true, nil }); err != nil { @@ -933,9 +928,9 @@ func (s) TestCZClientSocketMetricsStreamsAndMessagesCount(t *testing.T) { doServerSideInitiatedFailedStreamWithGoAway(ctx, tc, t, rcw) if err := verifyResultWithDelay(func() (bool, error) { skt := channelz.GetSocket(skID) - sktData := skt.SocketData - if sktData.StreamsStarted != 6 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 3 || sktData.MessagesSent != 6 || sktData.MessagesReceived != 5 { - return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (6, 2, 3, 6, 5), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) + sktData := &skt.SocketMetrics + if sktData.StreamsStarted.Load() != 6 || sktData.StreamsSucceeded.Load() != 2 || sktData.StreamsFailed.Load() != 3 || sktData.MessagesSent.Load() != 6 || sktData.MessagesReceived.Load() != 5 { + return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted.Load(), StreamsSucceeded.Load(), StreamsFailed.Load(), MessagesSent.Load(), MessagesReceived.Load()) = (6, 2, 3, 6, 5), got (%d, %d, %d, %d, %d)", skt.ID, sktData.StreamsStarted.Load(), sktData.StreamsSucceeded.Load(), sktData.StreamsFailed.Load(), sktData.MessagesSent.Load(), sktData.MessagesReceived.Load()) } return true, nil }); err != nil { @@ -989,27 +984,29 @@ func (s) TestCZClientAndServerSocketMetricsStreamsCountFlowControlRSTStream(t *t if len(tchan) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) } - if len(tchan[0].SubChans) != 1 { - return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) + subChans := tchan[0].SubChans() + if len(subChans) != 1 { + return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(subChans)) } var id int64 - for id = range tchan[0].SubChans { + for id = range subChans { break } sc := channelz.GetSubChannel(id) if sc == nil { return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id) } - if len(sc.Sockets) != 1 { - return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) + skts := sc.Sockets() + if len(skts) != 1 { + return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(skts)) } - for id = range sc.Sockets { + for id = range skts { break } skt := channelz.GetSocket(id) - sktData := skt.SocketData - if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 0 || sktData.StreamsFailed != 1 { - return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted, StreamsSucceeded, StreamsFailed) = (1, 0, 1), got (%d, %d, %d)", skt.ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed) + sktData := &skt.SocketMetrics + if sktData.StreamsStarted.Load() != 1 || sktData.StreamsSucceeded.Load() != 0 || sktData.StreamsFailed.Load() != 1 { + return false, fmt.Errorf("channelz.GetSocket(%d), want (StreamsStarted.Load(), StreamsSucceeded.Load(), StreamsFailed.Load()) = (1, 0, 1), got (%d, %d, %d)", skt.ID, sktData.StreamsStarted.Load(), sktData.StreamsSucceeded.Load(), sktData.StreamsFailed.Load()) } ss, _ := channelz.GetServers(0, 0) if len(ss) != 1 { @@ -1020,9 +1017,9 @@ func (s) TestCZClientAndServerSocketMetricsStreamsCountFlowControlRSTStream(t *t if len(ns) != 1 { return false, fmt.Errorf("there should be one server normal socket, not %d", len(ns)) } - sktData = ns[0].SocketData - if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 0 || sktData.StreamsFailed != 1 { - return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, StreamsFailed) = (1, 0, 1), got (%d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed) + sktData = &ns[0].SocketMetrics + if sktData.StreamsStarted.Load() != 1 || sktData.StreamsSucceeded.Load() != 0 || sktData.StreamsFailed.Load() != 1 { + return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted.Load(), StreamsSucceeded.Load(), StreamsFailed.Load()) = (1, 0, 1), got (%d, %d, %d)", ns[0].ID, sktData.StreamsStarted.Load(), sktData.StreamsSucceeded.Load(), sktData.StreamsFailed.Load()) } return true, nil }); err != nil { @@ -1053,25 +1050,27 @@ func (s) TestCZClientAndServerSocketMetricsFlowControl(t *testing.T) { if len(tchan) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) } - if len(tchan[0].SubChans) != 1 { - return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) + subChans := tchan[0].SubChans() + if len(subChans) != 1 { + return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(subChans)) } var id int64 - for id = range tchan[0].SubChans { + for id = range subChans { break } sc := channelz.GetSubChannel(id) if sc == nil { return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id) } - if len(sc.Sockets) != 1 { - return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) + skts := sc.Sockets() + if len(skts) != 1 { + return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(skts)) } - for id = range sc.Sockets { + for id = range skts { break } skt := channelz.GetSocket(id) - sktData := skt.SocketData + sktData := skt.EphemeralMetrics() // 65536 - 5 (Length-Prefixed-Message size) * 10 = 65486 if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 65486 { return false, fmt.Errorf("client: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65536, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) @@ -1081,7 +1080,7 @@ func (s) TestCZClientAndServerSocketMetricsFlowControl(t *testing.T) { return false, fmt.Errorf("there should only be one server, not %d", len(ss)) } ns, _ := channelz.GetServerSockets(ss[0].ID, 0, 0) - sktData = ns[0].SocketData + sktData = ns[0].EphemeralMetrics() if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 65486 { return false, fmt.Errorf("server: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65536, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) } @@ -1095,7 +1094,7 @@ func (s) TestCZClientAndServerSocketMetricsFlowControl(t *testing.T) { if err := verifyResultWithDelay(func() (bool, error) { skt := channelz.GetSocket(cliSktID) - sktData := skt.SocketData + sktData := skt.EphemeralMetrics() // Local: 65536 - 5 (Length-Prefixed-Message size) * 10 = 65486 // Remote: 65536 - 5 (Length-Prefixed-Message size) * 10 - 10011 = 55475 if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 55475 { @@ -1106,7 +1105,7 @@ func (s) TestCZClientAndServerSocketMetricsFlowControl(t *testing.T) { return false, fmt.Errorf("there should only be one server, not %d", len(ss)) } ns, _ := channelz.GetServerSockets(svrSktID, 0, 0) - sktData = ns[0].SocketData + sktData = ns[0].EphemeralMetrics() if sktData.LocalFlowControlWindow != 55475 || sktData.RemoteFlowControlWindow != 65486 { return false, fmt.Errorf("server: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (55475, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) } @@ -1120,7 +1119,7 @@ func (s) TestCZClientAndServerSocketMetricsFlowControl(t *testing.T) { doStreamingInputCallWithLargePayload(tc, t) if err := verifyResultWithDelay(func() (bool, error) { skt := channelz.GetSocket(cliSktID) - sktData := skt.SocketData + sktData := skt.EphemeralMetrics() // Local: 65536 - 5 (Length-Prefixed-Message size) * 10 = 65486 // Remote: 65536 if sktData.LocalFlowControlWindow != 65486 || sktData.RemoteFlowControlWindow != 65536 { @@ -1131,7 +1130,7 @@ func (s) TestCZClientAndServerSocketMetricsFlowControl(t *testing.T) { return false, fmt.Errorf("there should only be one server, not %d", len(ss)) } ns, _ := channelz.GetServerSockets(svrSktID, 0, 0) - sktData = ns[0].SocketData + sktData = ns[0].EphemeralMetrics() if sktData.LocalFlowControlWindow != 65536 || sktData.RemoteFlowControlWindow != 65486 { return false, fmt.Errorf("server: (LocalFlowControlWindow, RemoteFlowControlWindow) size should be (65536, 65486), not (%d, %d)", sktData.LocalFlowControlWindow, sktData.RemoteFlowControlWindow) } @@ -1172,27 +1171,29 @@ func (s) TestCZClientSocketMetricsKeepAlive(t *testing.T) { if len(tchan) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) } - if len(tchan[0].SubChans) != 1 { - return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) + subChans := tchan[0].SubChans() + if len(subChans) != 1 { + return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(subChans)) } var id int64 - for id = range tchan[0].SubChans { + for id = range subChans { break } sc := channelz.GetSubChannel(id) if sc == nil { return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id) } - if len(sc.Sockets) != 1 { - return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) + skts := sc.Sockets() + if len(skts) != 1 { + return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(skts)) } - for id = range sc.Sockets { + for id = range skts { break } skt := channelz.GetSocket(id) want := int64(time.Since(start) / keepaliveRate) - if skt.SocketData.KeepAlivesSent != want { - return false, fmt.Errorf("there should be %v KeepAlives sent, not %d", want, skt.SocketData.KeepAlivesSent) + if got := skt.SocketMetrics.KeepAlivesSent.Load(); got != want { + return false, fmt.Errorf("there should be %v KeepAlives sent, not %d", want, got) } return true, nil }); err != nil { @@ -1225,9 +1226,9 @@ func (s) TestCZServerSocketMetricsStreamsAndMessagesCount(t *testing.T) { doSuccessfulUnaryCall(tc, t) if err := verifyResultWithDelay(func() (bool, error) { ns, _ := channelz.GetServerSockets(svrID, 0, 0) - sktData := ns[0].SocketData - if sktData.StreamsStarted != 1 || sktData.StreamsSucceeded != 1 || sktData.StreamsFailed != 0 || sktData.MessagesSent != 1 || sktData.MessagesReceived != 1 { - return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, MessagesSent, MessagesReceived) = (1, 1, 1, 1), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) + sktData := &ns[0].SocketMetrics + if sktData.StreamsStarted.Load() != 1 || sktData.StreamsSucceeded.Load() != 1 || sktData.StreamsFailed.Load() != 0 || sktData.MessagesSent.Load() != 1 || sktData.MessagesReceived.Load() != 1 { + return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted.Load(), StreamsSucceeded.Load(), MessagesSent.Load(), MessagesReceived.Load()) = (1, 1, 1, 1), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted.Load(), sktData.StreamsSucceeded.Load(), sktData.StreamsFailed.Load(), sktData.MessagesSent.Load(), sktData.MessagesReceived.Load()) } return true, nil }); err != nil { @@ -1237,9 +1238,9 @@ func (s) TestCZServerSocketMetricsStreamsAndMessagesCount(t *testing.T) { doServerSideFailedUnaryCall(tc, t) if err := verifyResultWithDelay(func() (bool, error) { ns, _ := channelz.GetServerSockets(svrID, 0, 0) - sktData := ns[0].SocketData - if sktData.StreamsStarted != 2 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 0 || sktData.MessagesSent != 1 || sktData.MessagesReceived != 1 { - return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (2, 2, 0, 1, 1), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) + sktData := &ns[0].SocketMetrics + if sktData.StreamsStarted.Load() != 2 || sktData.StreamsSucceeded.Load() != 2 || sktData.StreamsFailed.Load() != 0 || sktData.MessagesSent.Load() != 1 || sktData.MessagesReceived.Load() != 1 { + return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted.Load(), StreamsSucceeded.Load(), StreamsFailed.Load(), MessagesSent.Load(), MessagesReceived.Load()) = (2, 2, 0, 1, 1), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted.Load(), sktData.StreamsSucceeded.Load(), sktData.StreamsFailed.Load(), sktData.MessagesSent.Load(), sktData.MessagesReceived.Load()) } return true, nil }); err != nil { @@ -1249,9 +1250,9 @@ func (s) TestCZServerSocketMetricsStreamsAndMessagesCount(t *testing.T) { doClientSideInitiatedFailedStream(tc, t) if err := verifyResultWithDelay(func() (bool, error) { ns, _ := channelz.GetServerSockets(svrID, 0, 0) - sktData := ns[0].SocketData - if sktData.StreamsStarted != 3 || sktData.StreamsSucceeded != 2 || sktData.StreamsFailed != 1 || sktData.MessagesSent != 2 || sktData.MessagesReceived != 2 { - return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted, StreamsSucceeded, StreamsFailed, MessagesSent, MessagesReceived) = (3, 2, 1, 2, 2), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted, sktData.StreamsSucceeded, sktData.StreamsFailed, sktData.MessagesSent, sktData.MessagesReceived) + sktData := &ns[0].SocketMetrics + if sktData.StreamsStarted.Load() != 3 || sktData.StreamsSucceeded.Load() != 2 || sktData.StreamsFailed.Load() != 1 || sktData.MessagesSent.Load() != 2 || sktData.MessagesReceived.Load() != 2 { + return false, fmt.Errorf("server socket metric with ID %d, want (StreamsStarted.Load(), StreamsSucceeded.Load(), StreamsFailed.Load(), MessagesSent.Load(), MessagesReceived.Load()) = (3, 2, 1, 2, 2), got (%d, %d, %d, %d, %d)", ns[0].ID, sktData.StreamsStarted.Load(), sktData.StreamsSucceeded.Load(), sktData.StreamsFailed.Load(), sktData.MessagesSent.Load(), sktData.MessagesReceived.Load()) } return true, nil }); err != nil { @@ -1296,7 +1297,7 @@ func (s) TestCZServerSocketMetricsKeepAlive(t *testing.T) { t.Fatalf("there should be one server normal socket, not %d", len(ns)) } const wantMin, wantMax = 3, 7 - if got := ns[0].SocketData.KeepAlivesSent; got < wantMin || got > wantMax { + if got := ns[0].SocketMetrics.KeepAlivesSent.Load(); got < wantMin || got > wantMax { t.Fatalf("got keepalivesCount: %v, want keepalivesCount: [%v,%v]", got, wantMin, wantMax) } } @@ -1341,38 +1342,40 @@ func (s) TestCZSocketGetSecurityValueTLS(t *testing.T) { if len(tchan) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tchan)) } - if len(tchan[0].SubChans) != 1 { - return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(tchan[0].SubChans)) + subChans := tchan[0].SubChans() + if len(subChans) != 1 { + return false, fmt.Errorf("there should only be one subchannel under top channel %d, not %d", tchan[0].ID, len(subChans)) } var id int64 - for id = range tchan[0].SubChans { + for id = range subChans { break } sc := channelz.GetSubChannel(id) if sc == nil { return false, fmt.Errorf("there should only be one socket under subchannel %d, not 0", id) } - if len(sc.Sockets) != 1 { - return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(sc.Sockets)) + skts := sc.Sockets() + if len(skts) != 1 { + return false, fmt.Errorf("there should only be one socket under subchannel %d, not %d", sc.ID, len(skts)) } - for id = range sc.Sockets { + for id = range skts { break } skt := channelz.GetSocket(id) cert, _ := tls.LoadX509KeyPair(testdata.Path("x509/server1_cert.pem"), testdata.Path("x509/server1_key.pem")) - securityVal, ok := skt.SocketData.Security.(*credentials.TLSChannelzSecurityValue) + securityVal, ok := skt.Security.(*credentials.TLSChannelzSecurityValue) if !ok { - return false, fmt.Errorf("the SocketData.Security is of type: %T, want: *credentials.TLSChannelzSecurityValue", skt.SocketData.Security) + return false, fmt.Errorf("the Security is of type: %T, want: *credentials.TLSChannelzSecurityValue", skt.Security) } if !cmp.Equal(securityVal.RemoteCertificate, cert.Certificate[0]) { - return false, fmt.Errorf("SocketData.Security.RemoteCertificate got: %v, want: %v", securityVal.RemoteCertificate, cert.Certificate[0]) + return false, fmt.Errorf("Security.RemoteCertificate got: %v, want: %v", securityVal.RemoteCertificate, cert.Certificate[0]) } for _, v := range cipherSuites { if v == securityVal.StandardName { return true, nil } } - return false, fmt.Errorf("SocketData.Security.StandardName got: %v, want it to be one of %v", securityVal.StandardName, cipherSuites) + return false, fmt.Errorf("Security.StandardName got: %v, want it to be one of %v", securityVal.StandardName, cipherSuites) }); err != nil { t.Fatal(err) } @@ -1397,27 +1400,30 @@ func (s) TestCZChannelTraceCreationDeletion(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].NestedChans) != 1 { - return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(tcs[0].NestedChans)) + nestedChans := tcs[0].NestedChans() + if len(nestedChans) != 1 { + return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(nestedChans)) } - for k := range tcs[0].NestedChans { + for k := range nestedChans { nestedConn = k } - for _, e := range tcs[0].Trace.Events { + trace := tcs[0].Trace() + for _, e := range trace.Events { if e.RefID == nestedConn && e.RefType != channelz.RefChannel { return false, fmt.Errorf("nested channel trace event shoud have RefChannel as RefType") } } ncm := channelz.GetChannel(nestedConn) - if ncm.Trace == nil { + ncmTrace := ncm.Trace() + if ncmTrace == nil { return false, fmt.Errorf("trace for nested channel should not be empty") } - if len(ncm.Trace.Events) == 0 { + if len(ncmTrace.Events) == 0 { return false, fmt.Errorf("there should be at least one trace event for nested channel not 0") } pattern := `Channel created` - if ok, _ := regexp.MatchString(pattern, ncm.Trace.Events[0].Desc); !ok { - return false, fmt.Errorf("the first trace event should be %q, not %q", pattern, ncm.Trace.Events[0].Desc) + if ok, _ := regexp.MatchString(pattern, ncmTrace.Events[0].Desc); !ok { + return false, fmt.Errorf("the first trace event should be %q, not %q", pattern, ncmTrace.Events[0].Desc) } return true, nil }); err != nil { @@ -1435,22 +1441,24 @@ func (s) TestCZChannelTraceCreationDeletion(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].NestedChans) != 0 { - return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(tcs[0].NestedChans)) + nestedChans := tcs[0].NestedChans() + if len(nestedChans) != 0 { + return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(nestedChans)) } ncm := channelz.GetChannel(nestedConn) if ncm == nil { return false, fmt.Errorf("nested channel should still exist due to parent's trace reference") } - if ncm.Trace == nil { + trace := ncm.Trace() + if trace == nil { return false, fmt.Errorf("trace for nested channel should not be empty") } - if len(ncm.Trace.Events) == 0 { + if len(trace.Events) == 0 { return false, fmt.Errorf("there should be at least one trace event for nested channel not 0") } pattern := `Channel created` - if ok, _ := regexp.MatchString(pattern, ncm.Trace.Events[0].Desc); !ok { - return false, fmt.Errorf("the first trace event should be %q, not %q", pattern, ncm.Trace.Events[0].Desc) + if ok, _ := regexp.MatchString(pattern, trace.Events[0].Desc); !ok { + return false, fmt.Errorf("the first trace event should be %q, not %q", pattern, trace.Events[0].Desc) } return true, nil }); err != nil { @@ -1475,13 +1483,15 @@ func (s) TestCZSubChannelTraceCreationDeletion(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].SubChans) != 1 { - return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) + subChans := tcs[0].SubChans() + if len(subChans) != 1 { + return false, fmt.Errorf("there should be 1 subchannel not %d", len(subChans)) } - for k := range tcs[0].SubChans { + for k := range subChans { subConn = k } - for _, e := range tcs[0].Trace.Events { + trace := tcs[0].Trace() + for _, e := range trace.Events { if e.RefID == subConn && e.RefType != channelz.RefSubChannel { return false, fmt.Errorf("subchannel trace event shoud have RefType to be RefSubChannel") } @@ -1490,15 +1500,16 @@ func (s) TestCZSubChannelTraceCreationDeletion(t *testing.T) { if scm == nil { return false, fmt.Errorf("subChannel does not exist") } - if scm.Trace == nil { + scTrace := scm.Trace() + if scTrace == nil { return false, fmt.Errorf("trace for subChannel should not be empty") } - if len(scm.Trace.Events) == 0 { + if len(scTrace.Events) == 0 { return false, fmt.Errorf("there should be at least one trace event for subChannel not 0") } pattern := `Subchannel created` - if ok, _ := regexp.MatchString(pattern, scm.Trace.Events[0].Desc); !ok { - return false, fmt.Errorf("the first trace event should be %q, not %q", pattern, scm.Trace.Events[0].Desc) + if ok, _ := regexp.MatchString(pattern, scTrace.Events[0].Desc); !ok { + return false, fmt.Errorf("the first trace event should be %q, not %q", pattern, scTrace.Events[0].Desc) } return true, nil }); err != nil { @@ -1516,22 +1527,24 @@ func (s) TestCZSubChannelTraceCreationDeletion(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].SubChans) != 1 { - return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) + subChans := tcs[0].SubChans() + if len(subChans) != 1 { + return false, fmt.Errorf("there should be 1 subchannel not %d", len(subChans)) } scm := channelz.GetSubChannel(subConn) if scm == nil { return false, fmt.Errorf("subChannel should still exist due to parent's trace reference") } - if scm.Trace == nil { + trace := scm.Trace() + if trace == nil { return false, fmt.Errorf("trace for SubChannel should not be empty") } - if len(scm.Trace.Events) == 0 { + if len(trace.Events) == 0 { return false, fmt.Errorf("there should be at least one trace event for subChannel not 0") } pattern := `Subchannel deleted` - desc := scm.Trace.Events[len(scm.Trace.Events)-1].Desc + desc := trace.Events[len(trace.Events)-1].Desc if ok, _ := regexp.MatchString(pattern, desc); !ok { return false, fmt.Errorf("the last trace event should be %q, not %q", pattern, desc) } @@ -1561,12 +1574,13 @@ func (s) TestCZChannelAddressResolutionChange(t *testing.T) { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } cid = tcs[0].ID - for i := len(tcs[0].Trace.Events) - 1; i >= 0; i-- { - if strings.Contains(tcs[0].Trace.Events[i].Desc, "resolver returned new addresses") { + trace := tcs[0].Trace() + for i := len(trace.Events) - 1; i >= 0; i-- { + if strings.Contains(trace.Events[i].Desc, "resolver returned new addresses") { break } if i == 0 { - return false, fmt.Errorf("events do not contain expected address resolution from empty address state. Got: %+v", tcs[0].Trace.Events) + return false, fmt.Errorf("events do not contain expected address resolution from empty address state. Got: %+v", trace.Events) } } return true, nil @@ -1580,8 +1594,9 @@ func (s) TestCZChannelAddressResolutionChange(t *testing.T) { if err := verifyResultWithDelay(func() (bool, error) { cm := channelz.GetChannel(cid) - for i := len(cm.Trace.Events) - 1; i >= 0; i-- { - if strings.Contains(cm.Trace.Events[i].Desc, fmt.Sprintf("Channel switches to new LB policy %q", roundrobin.Name)) { + trace := cm.Trace() + for i := len(trace.Events) - 1; i >= 0; i-- { + if strings.Contains(trace.Events[i].Desc, fmt.Sprintf("Channel switches to new LB policy %q", roundrobin.Name)) { break } if i == 0 { @@ -1613,11 +1628,12 @@ func (s) TestCZChannelAddressResolutionChange(t *testing.T) { cm := channelz.GetChannel(cid) var es []string - for i := len(cm.Trace.Events) - 1; i >= 0; i-- { - if strings.Contains(cm.Trace.Events[i].Desc, "service config updated") { + trace := cm.Trace() + for i := len(trace.Events) - 1; i >= 0; i-- { + if strings.Contains(trace.Events[i].Desc, "service config updated") { break } - es = append(es, cm.Trace.Events[i].Desc) + es = append(es, trace.Events[i].Desc) if i == 0 { return false, fmt.Errorf("events do not contain expected address resolution of new service config\n Events:\n%v", strings.Join(es, "\n")) } @@ -1631,8 +1647,9 @@ func (s) TestCZChannelAddressResolutionChange(t *testing.T) { if err := verifyResultWithDelay(func() (bool, error) { cm := channelz.GetChannel(cid) - for i := len(cm.Trace.Events) - 1; i >= 0; i-- { - if strings.Contains(cm.Trace.Events[i].Desc, "resolver returned an empty address list") { + trace := cm.Trace() + for i := len(trace.Events) - 1; i >= 0; i-- { + if strings.Contains(trace.Events[i].Desc, "resolver returned an empty address list") { break } if i == 0 { @@ -1687,22 +1704,24 @@ func (s) TestCZSubChannelPickedNewAddress(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].SubChans) != 1 { - return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) + subChans := tcs[0].SubChans() + if len(subChans) != 1 { + return false, fmt.Errorf("there should be 1 subchannel not %d", len(subChans)) } var subConn int64 - for k := range tcs[0].SubChans { + for k := range subChans { subConn = k } scm := channelz.GetSubChannel(subConn) - if scm.Trace == nil { + trace := scm.Trace() + if trace == nil { return false, fmt.Errorf("trace for SubChannel should not be empty") } - if len(scm.Trace.Events) == 0 { + if len(trace.Events) == 0 { return false, fmt.Errorf("there should be at least one trace event for subChannel not 0") } - for i := len(scm.Trace.Events) - 1; i >= 0; i-- { - if strings.Contains(scm.Trace.Events[i].Desc, fmt.Sprintf("Subchannel picks a new address %q to connect", te.srvAddrs[2])) { + for i := len(trace.Events) - 1; i >= 0; i-- { + if strings.Contains(trace.Events[i].Desc, fmt.Sprintf("Subchannel picks a new address %q to connect", te.srvAddrs[2])) { break } if i == 0 { @@ -1742,10 +1761,11 @@ func (s) TestCZSubChannelConnectivityState(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].SubChans) != 1 { - return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) + subChans := tcs[0].SubChans() + if len(subChans) != 1 { + return false, fmt.Errorf("there should be 1 subchannel not %d", len(subChans)) } - for k := range tcs[0].SubChans { + for k := range subChans { // get the SubChannel id for further trace inquiry. subConn = k t.Logf("SubChannel Id is %d", subConn) @@ -1755,15 +1775,16 @@ func (s) TestCZSubChannelConnectivityState(t *testing.T) { if scm == nil { return false, fmt.Errorf("subChannel should still exist due to parent's trace reference") } - if scm.Trace == nil { + trace := scm.Trace() + if trace == nil { return false, fmt.Errorf("trace for SubChannel should not be empty") } - if len(scm.Trace.Events) == 0 { + if len(trace.Events) == 0 { return false, fmt.Errorf("there should be at least one trace event for subChannel not 0") } var ready, connecting, transient, shutdown int t.Log("SubChannel trace events seen so far...") - for _, e := range scm.Trace.Events { + for _, e := range trace.Events { t.Log(e.Desc) if strings.Contains(e.Desc, fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.TransientFailure)) { transient++ @@ -1777,7 +1798,7 @@ func (s) TestCZSubChannelConnectivityState(t *testing.T) { transient = 0 r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: "fake address"}}}) t.Log("SubChannel trace events seen so far...") - for _, e := range scm.Trace.Events { + for _, e := range trace.Events { t.Log(e.Desc) if strings.Contains(e.Desc, fmt.Sprintf("Subchannel Connectivity change to %v", connectivity.Ready)) { ready++ @@ -1838,7 +1859,7 @@ func (s) TestCZChannelConnectivityState(t *testing.T) { var ready, connecting, transient int t.Log("Channel trace events seen so far...") - for _, e := range tcs[0].Trace.Events { + for _, e := range tcs[0].Trace().Events { t.Log(e.Desc) if strings.Contains(e.Desc, fmt.Sprintf("Channel Connectivity change to %v", connectivity.Ready)) { ready++ @@ -1888,10 +1909,11 @@ func (s) TestCZTraceOverwriteChannelDeletion(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].NestedChans) != 1 { - return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(tcs[0].NestedChans)) + nestedChans := tcs[0].NestedChans() + if len(nestedChans) != 1 { + return false, fmt.Errorf("there should be one nested channel from grpclb, not %d", len(nestedChans)) } - for k := range tcs[0].NestedChans { + for k := range nestedChans { nestedConn = k } return true, nil @@ -1910,8 +1932,9 @@ func (s) TestCZTraceOverwriteChannelDeletion(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].NestedChans) != 0 { - return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(tcs[0].NestedChans)) + + if nestedChans := tcs[0].NestedChans(); len(nestedChans) != 0 { + return false, fmt.Errorf("there should be 0 nested channel from grpclb, not %d", len(nestedChans)) } return true, nil }); err != nil { @@ -1956,10 +1979,11 @@ func (s) TestCZTraceOverwriteSubChannelDeletion(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].SubChans) != 1 { - return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) + subChans := tcs[0].SubChans() + if len(subChans) != 1 { + return false, fmt.Errorf("there should be 1 subchannel not %d", len(subChans)) } - for k := range tcs[0].SubChans { + for k := range subChans { subConn = k } return true, nil @@ -2001,10 +2025,11 @@ func (s) TestCZTraceTopChannelDeletionTraceClear(t *testing.T) { if len(tcs) != 1 { return false, fmt.Errorf("there should only be one top channel, not %d", len(tcs)) } - if len(tcs[0].SubChans) != 1 { - return false, fmt.Errorf("there should be 1 subchannel not %d", len(tcs[0].SubChans)) + subChans := tcs[0].SubChans() + if len(subChans) != 1 { + return false, fmt.Errorf("there should be 1 subchannel not %d", len(subChans)) } - for k := range tcs[0].SubChans { + for k := range subChans { subConn = k } return true, nil diff --git a/test/clientconn_test.go b/test/clientconn_test.go index 4432701fb3d1..d34fea9133ec 100644 --- a/test/clientconn_test.go +++ b/test/clientconn_test.go @@ -71,8 +71,8 @@ func (s) TestClientConnClose_WithPendingRPC(t *testing.T) { if len(tcs) != 1 { t.Fatalf("there should only be one top channel, not %d", len(tcs)) } - started := tcs[0].ChannelData.CallsStarted - completed := tcs[0].ChannelData.CallsSucceeded + tcs[0].ChannelData.CallsFailed + started := tcs[0].ChannelMetrics.CallsStarted.Load() + completed := tcs[0].ChannelMetrics.CallsSucceeded.Load() + tcs[0].ChannelMetrics.CallsFailed.Load() if (started - completed) == 1 { break } diff --git a/test/healthcheck_test.go b/test/healthcheck_test.go index 2b5b5a82d93c..b03c47a31426 100644 --- a/test/healthcheck_test.go +++ b/test/healthcheck_test.go @@ -787,23 +787,25 @@ func (s) TestHealthCheckChannelzCountingCallSuccess(t *testing.T) { if len(cm) == 0 { return false, errors.New("channelz.GetTopChannels return 0 top channel") } - if len(cm[0].SubChans) == 0 { + subChans := cm[0].SubChans() + if len(subChans) == 0 { return false, errors.New("there is 0 subchannel") } var id int64 - for k := range cm[0].SubChans { + for k := range subChans { id = k break } scm := channelz.GetSubChannel(id) - if scm == nil || scm.ChannelData == nil { - return false, errors.New("nil subchannel metric or nil subchannel metric ChannelData returned") + if scm == nil { + return false, errors.New("nil subchannel returned") } // exponential backoff retry may result in more than one health check call. - if scm.ChannelData.CallsStarted > 0 && scm.ChannelData.CallsSucceeded > 0 && scm.ChannelData.CallsFailed == 0 { + cstart, csucc, cfail := scm.ChannelMetrics.CallsStarted.Load(), scm.ChannelMetrics.CallsSucceeded.Load(), scm.ChannelMetrics.CallsFailed.Load() + if cstart > 0 && csucc > 0 && cfail == 0 { return true, nil } - return false, fmt.Errorf("got %d CallsStarted, %d CallsSucceeded, want >0 >0", scm.ChannelData.CallsStarted, scm.ChannelData.CallsSucceeded) + return false, fmt.Errorf("got %d CallsStarted, %d CallsSucceeded %d CallsFailed, want >0 >0 =0", cstart, csucc, cfail) }); err != nil { t.Fatal(err) } @@ -834,23 +836,25 @@ func (s) TestHealthCheckChannelzCountingCallFailure(t *testing.T) { if len(cm) == 0 { return false, errors.New("channelz.GetTopChannels return 0 top channel") } - if len(cm[0].SubChans) == 0 { + subChans := cm[0].SubChans() + if len(subChans) == 0 { return false, errors.New("there is 0 subchannel") } var id int64 - for k := range cm[0].SubChans { + for k := range subChans { id = k break } scm := channelz.GetSubChannel(id) - if scm == nil || scm.ChannelData == nil { - return false, errors.New("nil subchannel metric or nil subchannel metric ChannelData returned") + if scm == nil { + return false, errors.New("nil subchannel returned") } // exponential backoff retry may result in more than one health check call. - if scm.ChannelData.CallsStarted > 0 && scm.ChannelData.CallsFailed > 0 && scm.ChannelData.CallsSucceeded == 0 { + cstart, cfail, csucc := scm.ChannelMetrics.CallsStarted.Load(), scm.ChannelMetrics.CallsFailed.Load(), scm.ChannelMetrics.CallsSucceeded.Load() + if cstart > 0 && cfail > 0 && csucc == 0 { return true, nil } - return false, fmt.Errorf("got %d CallsStarted, %d CallsFailed, want >0, >0", scm.ChannelData.CallsStarted, scm.ChannelData.CallsFailed) + return false, fmt.Errorf("got %d CallsStarted, %d CallsFailed, %d CallsSucceeded, want >0, >0", cstart, cfail, csucc) }); err != nil { t.Fatal(err) } diff --git a/test/pickfirst_test.go b/test/pickfirst_test.go index a762831a2e0c..171d40d29d7a 100644 --- a/test/pickfirst_test.go +++ b/test/pickfirst_test.go @@ -279,8 +279,8 @@ func (s) TestPickFirst_NewAddressWhileBlocking(t *testing.T) { if len(tcs) != 1 { t.Fatalf("there should only be one top channel, not %d", len(tcs)) } - started := tcs[0].ChannelData.CallsStarted - completed := tcs[0].ChannelData.CallsSucceeded + tcs[0].ChannelData.CallsFailed + started := tcs[0].ChannelMetrics.CallsStarted.Load() + completed := tcs[0].ChannelMetrics.CallsSucceeded.Load() + tcs[0].ChannelMetrics.CallsFailed.Load() if (started - completed) == 1 { break } diff --git a/test/roundrobin_test.go b/test/roundrobin_test.go index b4b17895b053..980b6bd4f1b4 100644 --- a/test/roundrobin_test.go +++ b/test/roundrobin_test.go @@ -128,7 +128,7 @@ func (s) TestRoundRobin_AddressesRemoved(t *testing.T) { // TestRoundRobin_NewAddressWhileBlocking tests the case where round_robin is // configured on a channel, things are working as expected and then a resolver // updates removes all addresses. An RPC attempted at this point in time will be -// blocked because there are no valid backends. This test verifies that when new +// blocked because there are no valid ¡ds. This test verifies that when new // backends are added, the RPC is able to complete. func (s) TestRoundRobin_NewAddressWhileBlocking(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) @@ -163,8 +163,8 @@ func (s) TestRoundRobin_NewAddressWhileBlocking(t *testing.T) { if len(tcs) != 1 { t.Fatalf("there should only be one top channel, not %d", len(tcs)) } - started := tcs[0].ChannelData.CallsStarted - completed := tcs[0].ChannelData.CallsSucceeded + tcs[0].ChannelData.CallsFailed + started := tcs[0].ChannelMetrics.CallsStarted.Load() + completed := tcs[0].ChannelMetrics.CallsSucceeded.Load() + tcs[0].ChannelMetrics.CallsFailed.Load() if (started - completed) == 1 { break } diff --git a/xds/internal/balancer/clustermanager/clustermanager_test.go b/xds/internal/balancer/clustermanager/clustermanager_test.go index a00a2836060a..b998c1b35f29 100644 --- a/xds/internal/balancer/clustermanager/clustermanager_test.go +++ b/xds/internal/balancer/clustermanager/clustermanager_test.go @@ -31,7 +31,6 @@ import ( "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/internal/balancer/stub" - "google.golang.org/grpc/internal/channelz" "google.golang.org/grpc/internal/grpctest" "google.golang.org/grpc/internal/hierarchy" "google.golang.org/grpc/internal/testutils" @@ -482,9 +481,8 @@ func TestClusterManagerForwardsBalancerBuildOptions(t *testing.T) { // it in the UpdateClientConnState method. ccsCh := testutils.NewChannel() bOpts := balancer.BuildOptions{ - DialCreds: insecure.NewCredentials(), - ChannelzParentID: channelz.NewIdentifierForTesting(channelz.RefChannel, 1234, nil), - CustomUserAgent: userAgent, + DialCreds: insecure.NewCredentials(), + CustomUserAgent: userAgent, } stub.Register(t.Name(), stub.BalancerFuncs{ UpdateClientConnState: func(bd *stub.BalancerData, _ balancer.ClientConnState) error { diff --git a/xds/internal/balancer/outlierdetection/balancer.go b/xds/internal/balancer/outlierdetection/balancer.go index 9e577c521d5a..34604318c31f 100644 --- a/xds/internal/balancer/outlierdetection/balancer.go +++ b/xds/internal/balancer/outlierdetection/balancer.go @@ -61,14 +61,14 @@ type bb struct{} func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Balancer { b := &outlierDetectionBalancer{ - cc: cc, - closed: grpcsync.NewEvent(), - done: grpcsync.NewEvent(), - addrs: make(map[string]*addressInfo), - scWrappers: make(map[balancer.SubConn]*subConnWrapper), - scUpdateCh: buffer.NewUnbounded(), - pickerUpdateCh: buffer.NewUnbounded(), - channelzParentID: bOpts.ChannelzParentID, + cc: cc, + closed: grpcsync.NewEvent(), + done: grpcsync.NewEvent(), + addrs: make(map[string]*addressInfo), + scWrappers: make(map[balancer.SubConn]*subConnWrapper), + scUpdateCh: buffer.NewUnbounded(), + pickerUpdateCh: buffer.NewUnbounded(), + channelzParent: bOpts.ChannelzParent, } b.logger = prefixLogger(b) b.logger.Infof("Created") @@ -164,11 +164,11 @@ type outlierDetectionBalancer struct { // to suppress redundant picker updates. recentPickerNoop bool - closed *grpcsync.Event - done *grpcsync.Event - cc balancer.ClientConn - logger *grpclog.PrefixLogger - channelzParentID *channelz.Identifier + closed *grpcsync.Event + done *grpcsync.Event + cc balancer.ClientConn + logger *grpclog.PrefixLogger + channelzParent channelz.Identifier // childMu guards calls into child (to uphold the balancer.Balancer API // guarantee of synchronous calls). @@ -837,7 +837,7 @@ func (b *outlierDetectionBalancer) successRateAlgorithm() { successRate := float64(bucket.numSuccesses) / float64(bucket.numSuccesses+bucket.numFailures) requiredSuccessRate := mean - stddev*(float64(ejectionCfg.StdevFactor)/1000) if successRate < requiredSuccessRate { - channelz.Infof(logger, b.channelzParentID, "SuccessRate algorithm detected outlier: %s. Parameters: successRate=%f, mean=%f, stddev=%f, requiredSuccessRate=%f", addrInfo, successRate, mean, stddev, requiredSuccessRate) + channelz.Infof(logger, b.channelzParent, "SuccessRate algorithm detected outlier: %s. Parameters: successRate=%f, mean=%f, stddev=%f, requiredSuccessRate=%f", addrInfo, successRate, mean, stddev, requiredSuccessRate) if uint32(grpcrand.Int31n(100)) < ejectionCfg.EnforcementPercentage { b.ejectAddress(addrInfo) } @@ -864,7 +864,7 @@ func (b *outlierDetectionBalancer) failurePercentageAlgorithm() { } failurePercentage := (float64(bucket.numFailures) / float64(bucket.numSuccesses+bucket.numFailures)) * 100 if failurePercentage > float64(b.cfg.FailurePercentageEjection.Threshold) { - channelz.Infof(logger, b.channelzParentID, "FailurePercentage algorithm detected outlier: %s, failurePercentage=%f", addrInfo, failurePercentage) + channelz.Infof(logger, b.channelzParent, "FailurePercentage algorithm detected outlier: %s, failurePercentage=%f", addrInfo, failurePercentage) if uint32(grpcrand.Int31n(100)) < ejectionCfg.EnforcementPercentage { b.ejectAddress(addrInfo) } @@ -879,7 +879,7 @@ func (b *outlierDetectionBalancer) ejectAddress(addrInfo *addressInfo) { addrInfo.ejectionTimeMultiplier++ for _, sbw := range addrInfo.sws { sbw.eject() - channelz.Infof(logger, b.channelzParentID, "Subchannel ejected: %s", sbw) + channelz.Infof(logger, b.channelzParent, "Subchannel ejected: %s", sbw) } } @@ -890,7 +890,7 @@ func (b *outlierDetectionBalancer) unejectAddress(addrInfo *addressInfo) { addrInfo.latestEjectionTimestamp = time.Time{} for _, sbw := range addrInfo.sws { sbw.uneject() - channelz.Infof(logger, b.channelzParentID, "Subchannel unejected: %s", sbw) + channelz.Infof(logger, b.channelzParent, "Subchannel unejected: %s", sbw) } } diff --git a/xds/internal/balancer/outlierdetection/balancer_test.go b/xds/internal/balancer/outlierdetection/balancer_test.go index 32c3a378d7ea..54eefaa34c1a 100644 --- a/xds/internal/balancer/outlierdetection/balancer_test.go +++ b/xds/internal/balancer/outlierdetection/balancer_test.go @@ -550,7 +550,9 @@ func setup(t *testing.T) (*outlierDetectionBalancer, *testutils.BalancerClientCo t.Fatalf("balancer.Get(%q) returned nil", Name) } tcc := testutils.NewBalancerClientConn(t) - odB := builder.Build(tcc, balancer.BuildOptions{ChannelzParentID: channelz.NewIdentifierForTesting(channelz.RefChannel, time.Now().Unix(), nil)}) + ch := channelz.RegisterChannel(nil, "test channel") + t.Cleanup(func() { channelz.RemoveEntry(ch.ID) }) + odB := builder.Build(tcc, balancer.BuildOptions{ChannelzParent: ch}) return odB.(*outlierDetectionBalancer), tcc, odB.Close }