redis · ofekshenawa · Mar 16, 2026 · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026
diff --git a/extra/redisotel-native/config.go b/extra/redisotel-native/config.go
@@ -156,22 +156,27 @@ type Config struct {
 // NewConfig creates a new Config with default values.
 // Default configuration:
 // - Enabled: false (must explicitly enable)
-// - MetricGroups: connection-basic + resiliency
+// - MetricGroups: all metric groups (command, connection, resiliency, pubsub, stream)
 // - HistogramAggregation: explicit bucket
 // - Buckets: 0.1ms to 10s (suitable for Redis operations)
 //
 // Example:
 //
 //	config := redisotel.NewConfig().
+//	    WithEnabled(true)
+//
+// To disable specific metric groups, use WithMetricGroups:
+//
+//	config := redisotel.NewConfig().
 //	    WithEnabled(true).
-//	    WithMetricGroups(redisotel.MetricGroupAll)
+//	    WithMetricGroups(redisotel.MetricGroupFlagConnectionBasic | redisotel.MetricGroupFlagResiliency)
 func NewConfig() *Config {
 	return &Config{
 		Enabled:       false,
 		MeterProvider: nil, // Will use global otel.GetMeterProvider() if nil
 
-		// Default metric groups: connection-basic + resiliency
-		MetricGroups: MetricGroupFlagConnectionBasic | MetricGroupFlagResiliency,
+		// Default metric groups: all groups enabled for comprehensive observability
+		MetricGroups: MetricGroupAll,
 
 		// No command filtering by default
 		IncludeCommands: nil,

diff --git a/extra/redisotel-native/go.mod b/extra/redisotel-native/go.mod
@@ -2,23 +2,24 @@ module github.com/redis/go-redis/extra/redisotel-native/v9
 
 go 1.24.0
 
-toolchain go1.24.4
-
 replace github.com/redis/go-redis/v9 => ../..
 
 require (
 	github.com/redis/go-redis/v9 v9.18.0
-	go.opentelemetry.io/otel v1.39.0
-	go.opentelemetry.io/otel/metric v1.39.0
+	go.opentelemetry.io/otel v1.40.0
+	go.opentelemetry.io/otel/metric v1.40.0
+	go.opentelemetry.io/otel/sdk/metric v1.40.0
 )
 
 require (
 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
 	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
 	github.com/go-logr/logr v1.4.3 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/google/uuid v1.6.0 // indirect
 	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
-	go.opentelemetry.io/otel/trace v1.39.0 // indirect
+	go.opentelemetry.io/otel/sdk v1.40.0 // indirect
+	go.opentelemetry.io/otel/trace v1.40.0 // indirect
 	go.uber.org/atomic v1.11.0 // indirect
-	golang.org/x/sys v0.39.0 // indirect
+	golang.org/x/sys v0.40.0 // indirect
 )
diff --git a/extra/redisotel-native/go.sum b/extra/redisotel-native/go.sum
@@ -15,6 +15,8 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
 github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
 github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE=
 github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
@@ -25,15 +27,19 @@ github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs=
 github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s=
 go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
 go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
-go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48=
-go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8=
-go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0=
-go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs=
-go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI=
-go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA=
+go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms=
+go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g=
+go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g=
+go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc=
+go.opentelemetry.io/otel/sdk v1.40.0 h1:KHW/jUzgo6wsPh9At46+h4upjtccTmuZCFAc9OJ71f8=
+go.opentelemetry.io/otel/sdk v1.40.0/go.mod h1:Ph7EFdYvxq72Y8Li9q8KebuYUr2KoeyHx0DRMKrYBUE=
+go.opentelemetry.io/otel/sdk/metric v1.40.0 h1:mtmdVqgQkeRxHgRv4qhyJduP3fYJRMX4AtAlbuWdCYw=
+go.opentelemetry.io/otel/sdk/metric v1.40.0/go.mod h1:4Z2bGMf0KSK3uRjlczMOeMhKU2rhUqdWNoKcYrtcBPg=
+go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw=
+go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA=
 go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
 go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
-golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
-golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
+golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/extra/redisotel-native/metrics_definitions_test.go b/extra/redisotel-native/metrics_definitions_test.go
@@ -0,0 +1,61 @@
+package redisotel
+
+import "testing"
+
+// Expected metric names per OTel semantic conventions.
+// Reference: https://opentelemetry.io/docs/specs/semconv/database/database-metrics/
+const (
+	semconvOperationDuration    = "db.client.operation.duration"
+	semconvConnectionCount      = "db.client.connection.count"
+	semconvConnectionCreateTime = "db.client.connection.create_time"
+	semconvConnectionWaitTime   = "db.client.connection.wait_time"
+	semconvConnectionPending    = "db.client.connection.pending_requests"
+)
+
+// TestMetricDefinitionsMatchSemconv verifies metric names match OTel semantic conventions.
+func TestMetricDefinitionsMatchSemconv(t *testing.T) {
+	tests := []struct {
+		name     string
+		got      string
+		expected string
+	}{
+		{"db.client.operation.duration", MetricOperationDuration, semconvOperationDuration},
+		{"db.client.connection.count", MetricConnectionCount, semconvConnectionCount},
+		{"db.client.connection.create_time", MetricConnectionCreateTime, semconvConnectionCreateTime},
+		{"db.client.connection.wait_time", MetricConnectionWaitTime, semconvConnectionWaitTime},
+		{"db.client.connection.pending_requests", MetricConnectionPendingReqs, semconvConnectionPending},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if tt.got != tt.expected {
+				t.Errorf("got %q, want %q", tt.got, tt.expected)
+			}
+		})
+	}
+}
+
+// TestSemconvMetricTypes documents expected metric instrument types.
+// Semconv specifies UpDownCounter for connection.count and pending_requests,
+// but this implementation uses ObservableGauge (known deviation, see issue #3730).
+func TestSemconvMetricTypes(t *testing.T) {
+	t.Run("connection.count uses Gauge (semconv specifies UpDownCounter)", func(t *testing.T) {
+		// Known deviation: using ObservableGauge instead of UpDownCounter
+	})
+
+	t.Run("pending_requests uses Gauge (semconv specifies UpDownCounter)", func(t *testing.T) {
+		// Known deviation: using ObservableGauge instead of UpDownCounter
+	})
+
+	t.Run("operation.duration uses Histogram (correct)", func(t *testing.T) {
+		// Matches semconv: Float64Histogram
+	})
+
+	t.Run("connection.create_time uses Histogram (correct)", func(t *testing.T) {
+		// Matches semconv: Float64Histogram
+	})
+
+	t.Run("connection.wait_time uses Histogram (correct)", func(t *testing.T) {
+		// Matches semconv: Float64Histogram
+	})
+}
diff --git a/extra/redisotel-native/metrics_stress_test.go b/extra/redisotel-native/metrics_stress_test.go
@@ -0,0 +1,220 @@
+package redisotel
+
+import (
+	"context"
+	"fmt"
+	"math/rand"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/redis/go-redis/v9"
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/sdk/metric"
+	"go.opentelemetry.io/otel/sdk/metric/metricdata"
+)
+
+const (
+	stressTestDuration       = 30 * time.Second
+	stressTestConcurrency    = 50
+	stressTestMinDelay       = 10 * time.Millisecond
+	stressTestMaxDelay       = 100 * time.Millisecond
+	stressTestStatusInterval = 5 * time.Second
+)
+
+// TestMetricsUnderStress validates metrics recording under concurrent load.
+func TestMetricsUnderStress(t *testing.T) {
+	ctx := context.Background()
+	testClient := redis.NewClient(&redis.Options{Addr: "localhost:6379"})
+	if err := testClient.Ping(ctx).Err(); err != nil {
+		testClient.Close()
+		t.Skip("Redis not available at localhost:6379")
+	}
+	testClient.Close()
+
+	reader := metric.NewManualReader()
+	meterProvider := metric.NewMeterProvider(metric.WithReader(reader))
+	defer func() {
+		_ = meterProvider.Shutdown(ctx)
+	}()
+
+	otel.SetMeterProvider(meterProvider)
+	resetObservabilityForTest()
+
+	otelInstance := GetObservabilityInstance()
+	config := NewConfig().
+		WithEnabled(true).
+		WithMeterProvider(meterProvider).
+		WithMetricGroups(MetricGroupAll)
+
+	if err := otelInstance.Init(config); err != nil {
+		t.Fatalf("Failed to initialize OTel: %v", err)
+	}
+	defer otelInstance.Shutdown()
+
+	rdb := redis.NewClient(&redis.Options{
+		Addr:         "localhost:6379",
+		PoolSize:     stressTestConcurrency,
+		MinIdleConns: 10,
+	})
+	defer rdb.Close()
+
+	var opsCompleted atomic.Int64
+	var opsErrors atomic.Int64
+	startTime := time.Now()
+	deadline := startTime.Add(stressTestDuration)
+
+	statusTicker := time.NewTicker(stressTestStatusInterval)
+	defer statusTicker.Stop()
+	done := make(chan struct{})
+
+	go func() {
+		for {
+			select {
+			case <-statusTicker.C:
+				elapsed := time.Since(startTime).Seconds()
+				ops := opsCompleted.Load()
+				t.Logf("[%.0fs] %d ops, %.1f ops/sec", elapsed, ops, float64(ops)/elapsed)
+			case <-done:
+				return
+			}
+		}
+	}()
+
+	var wg sync.WaitGroup
+	for i := 0; i < stressTestConcurrency; i++ {
+		wg.Add(1)
+		go func(workerID int) {
+			defer wg.Done()
+			for time.Now().Before(deadline) {
+				key := fmt.Sprintf("stress_test_key_%d_%d", workerID, rand.Int63())
+				value := fmt.Sprintf("value_%d", time.Now().UnixNano())
+
+				if err := rdb.Set(ctx, key, value, time.Minute).Err(); err != nil {
+					opsErrors.Add(1)
+				} else {
+					opsCompleted.Add(1)
+				}
+
+				if _, err := rdb.Get(ctx, key).Result(); err != nil && err != redis.Nil {
+					opsErrors.Add(1)
+				} else {
+					opsCompleted.Add(1)
+				}
+
+				delay := stressTestMinDelay + time.Duration(rand.Int63n(int64(stressTestMaxDelay-stressTestMinDelay)))
+				time.Sleep(delay)
+			}
+		}(i)
+	}
+
+	wg.Wait()
+	close(done)
+
+	totalOps := opsCompleted.Load()
+	totalErrors := opsErrors.Load()
+	elapsed := time.Since(startTime)
+	t.Logf("Completed: %d ops in %v (%.1f ops/sec), %d errors",
+		totalOps, elapsed.Round(time.Second), float64(totalOps)/elapsed.Seconds(), totalErrors)
+
+	var rm metricdata.ResourceMetrics
+	if err := reader.Collect(ctx, &rm); err != nil {
+		t.Fatalf("Failed to collect metrics: %v", err)
+	}
+
+	validateMetrics(t, rm)
+}
+
+func validateMetrics(t *testing.T, rm metricdata.ResourceMetrics) {
+	metricsFound := make(map[string]bool)
+	for _, sm := range rm.ScopeMetrics {
+		for _, m := range sm.Metrics {
+			metricsFound[m.Name] = true
+		}
+	}
+
+	required := []string{
+		MetricConnectionCount,
+		MetricConnectionCreateTime,
+		MetricOperationDuration,
+	}
+
+	for _, name := range required {
+		if !metricsFound[name] {
+			t.Errorf("Required metric not found: %s", name)
+		}
+	}
+}
+
+func resetObservabilityForTest() {
+	observabilityInstanceOnce = sync.Once{}
+	observabilityInstance = nil
+}
+
+// TestTracingAndMetricsCompatibility verifies that redisotel (tracing) and
+// redisotel-native (metrics) can be used together without conflicts.
+// Tracing uses AddHook (per-client), metrics uses SetOTelRecorder (global).
+func TestTracingAndMetricsCompatibility(t *testing.T) {
+	ctx := context.Background()
+	testClient := redis.NewClient(&redis.Options{Addr: "localhost:6379"})
+	if err := testClient.Ping(ctx).Err(); err != nil {
+		testClient.Close()
+		t.Skip("Redis not available at localhost:6379")
+	}
+	testClient.Close()
+
+	reader := metric.NewManualReader()
+	meterProvider := metric.NewMeterProvider(metric.WithReader(reader))
+	defer meterProvider.Shutdown(ctx)
+
+	otel.SetMeterProvider(meterProvider)
+	resetObservabilityForTest()
+
+	otelInstance := GetObservabilityInstance()
+	config := NewConfig().
+		WithEnabled(true).
+		WithMeterProvider(meterProvider).
+		WithMetricGroups(MetricGroupAll)
+
+	if err := otelInstance.Init(config); err != nil {
+		t.Fatalf("Failed to initialize OTel metrics: %v", err)
+	}
+	defer otelInstance.Shutdown()
+
+	rdb := redis.NewClient(&redis.Options{
+		Addr:     "localhost:6379",
+		PoolSize: 5,
+	})
+	defer rdb.Close()
+
+	// In production, also call: redisotel.InstrumentTracing(rdb)
+
+	for i := 0; i < 10; i++ {
+		key := fmt.Sprintf("compat-test-%d", i)
+		if err := rdb.Set(ctx, key, "value", time.Minute).Err(); err != nil {
+			t.Fatalf("SET failed: %v", err)
+		}
+		if _, err := rdb.Get(ctx, key).Result(); err != nil {
+			t.Fatalf("GET failed: %v", err)
+		}
+	}
+
+	var rm metricdata.ResourceMetrics
+	if err := reader.Collect(ctx, &rm); err != nil {
+		t.Fatalf("Failed to collect metrics: %v", err)
+	}
+
+	found := false
+	for _, sm := range rm.ScopeMetrics {
+		for _, m := range sm.Metrics {
+			if m.Name == MetricOperationDuration {
+				found = true
+			}
+		}
+	}
+
+	if !found {
+		t.Error("Expected to find db.client.operation.duration metric")
+	}
+}
diff --git a/internal/pool/pool.go b/internal/pool/pool.go
@@ -1234,6 +1234,12 @@ func (p *ConnPool) removeConnInternal(ctx context.Context, cn *Conn, reason erro
 
 func (p *ConnPool) CloseConn(cn *Conn) error {
 	p.removeConnWithLock(cn)
+
+	// Record connection closed metric for stale/unhealthy connections
+	if cb := getMetricConnectionClosedCallback(); cb != nil {
+		cb(context.Background(), cn, "stale", nil)
+	}
+
 	return p.closeConn(cn)
 }