From 28825cc7ead8d5a0c8dce7510bb17c2ebda95bb0 Mon Sep 17 00:00:00 2001 From: Joseph Heyburn Date: Tue, 14 Oct 2025 08:21:30 +0100 Subject: [PATCH 1/3] Have dual-channel-repl use replica-announce-ip - Dual channel replication does not use replica-announce-ip - This causes stale replicas in Sentinel - This change fixes that to use replica-announce-ip if it is set Signed-off-by: Joseph Heyburn --- src/replication.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/replication.c b/src/replication.c index 00c3ad5fbd..590a4483de 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2973,6 +2973,14 @@ static int dualChannelReplHandleHandshake(connection *conn, sds *err) { return C_ERR; } + if (server.replica_announce_ip) { + *err = sendCommand(conn, "REPLCONF", "ip-address", server.replica_announce_ip, NULL); + if (*err) { + dualChannelServerLog(LL_WARNING, "Sending command to primary in dual channel replication handshake: %s", *err); + return C_ERR; + } + } + if (connSetReadHandler(conn, dualChannelFullSyncWithPrimary) == C_ERR) { char conninfo[CONN_INFO_LEN]; dualChannelServerLog(LL_WARNING, "Can't create readable event for SYNC: %s (%s)", strerror(errno), From cd4be747f11e78e3b075c45f315da90e42ff3df0 Mon Sep 17 00:00:00 2001 From: Joseph Heyburn Date: Fri, 14 Nov 2025 13:24:52 +0000 Subject: [PATCH 2/3] Add tests for dual-channel-replication announce-ip Signed-off-by: Joseph Heyburn --- .../19-dual-channel-replica-announce-ip.tcl | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 tests/sentinel/tests/19-dual-channel-replica-announce-ip.tcl diff --git a/tests/sentinel/tests/19-dual-channel-replica-announce-ip.tcl b/tests/sentinel/tests/19-dual-channel-replica-announce-ip.tcl new file mode 100644 index 0000000000..11df16a498 --- /dev/null +++ b/tests/sentinel/tests/19-dual-channel-replica-announce-ip.tcl @@ -0,0 +1,112 @@ +# Test dual-channel replication with replica-announce-ip +# This test verifies that when dual-channel replication is enabled, +# the rdb/aof channel correctly announces its IP address to Sentinel +# using the replica-announce-ip configuration. + +source "../tests/includes/utils.tcl" + +# Configure replica-announce-ip before initialization +test "(pre-init) Configure replica-announce-ip" { + restart_killed_instances + + # Set replica-announce-ip on all replicas (non-primary instances) + # We'll use 127.0.0.1 as the announced IP (must be reachable for Sentinel) + set ::announced_ip "127.0.0.1" + for {set id 1} {$id < $::instances_count} {incr id} { + R $id config set replica-announce-ip $::announced_ip + } +} + +source "../tests/includes/init-tests.tcl" + +# Enable dual-channel replication after initialization is complete +test "Enable dual-channel replication and verify replica-announce-ip" { + # Enable dual-channel replication on all instances + foreach_valkey_id id { + R $id config set dual-channel-replication-enabled yes + R $id config set repl-diskless-sync yes + } + + # Give Sentinel time to update its view of replicas + after 1000 +} + +proc verify_replica_announced_ip {expected_ip} { + foreach_sentinel_id id { + # Check that replicas are reported with the announced IP + set replicas [S $id SENTINEL REPLICAS mymaster] + foreach replica $replicas { + set replica_ip [dict get $replica ip] + if {$replica_ip ne $expected_ip} { + return 0 + } + } + } + return 1 +} + +test "Sentinel reports replicas with announced IP in dual-channel replication" { + # Wait for replicas to sync with the primary + wait_for_condition 1000 50 { + [verify_replica_announced_ip $::announced_ip] + } else { + fail "Sentinel did not report replicas with the announced IP ($::announced_ip)" + } +} + +test "Verify replica is actually connected and syncing" { + # Verify that the replica is actually online and replicating + # by setting a value on the primary and checking it propagates + set test_key "test_key_for_dual_channel" + set test_value "test_value_[randomInt 1000000]" + + R $master_id set $test_key $test_value + + # Wait for the value to propagate to all replicas + for {set replica_id 1} {$replica_id < $::instances_count} {incr replica_id} { + wait_for_condition 1000 50 { + [R $replica_id get $test_key] eq $test_value + } else { + fail "Value did not propagate to replica $replica_id" + } + } +} + +test "Sentinel failover works with dual-channel replication and announced IP" { + # Verify that failover still works correctly with dual-channel replication + set old_primary_id $master_id + set old_primary_addr [S 0 SENTINEL GET-PRIMARY-ADDR-BY-NAME mymaster] + + # Kill the primary + kill_instance valkey $old_primary_id + + # Wait for Sentinel to detect the failure and perform failover + wait_for_condition 10000 50 { + [lindex [S 0 SENTINEL GET-PRIMARY-ADDR-BY-NAME mymaster] 1] != [lindex $old_primary_addr 1] + } else { + fail "Sentinel did not perform failover" + } + + # Verify new primary is elected + set new_primary_addr [S 0 SENTINEL GET-PRIMARY-ADDR-BY-NAME mymaster] + assert {$new_primary_addr ne $old_primary_addr} + + # Restart the old primary + restart_instance valkey $old_primary_id + + # Wait for the old primary to become a replica + wait_for_condition 1000 50 { + [RI $old_primary_id role] eq {slave} + } else { + fail "Old primary did not become a replica" + } +} + +# Cleanup: revert any special configuration +test "(post-cleanup) Reset dual-channel replication and replica-announce-ip" { + foreach_valkey_id id { + catch {R $id config set dual-channel-replication-enabled no} + catch {R $id config set replica-announce-ip ""} + catch {R $id config set repl-diskless-sync no} + } +} From 6a2d1e1a9b8dbe512b28bdc36468f322591b3a9e Mon Sep 17 00:00:00 2001 From: Joseph Heyburn Date: Fri, 14 Nov 2025 15:41:56 +0000 Subject: [PATCH 3/3] Another fix attempt --- src/replication.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/replication.c b/src/replication.c index 3e076e951f..ecdb9791f5 100644 --- a/src/replication.c +++ b/src/replication.c @@ -3016,6 +3016,22 @@ static int dualChannelReplHandleReplconfReply(connection *conn, sds *err) { *err); return C_ERR; } + + /* If replica-announce-ip is configured, we sent an additional REPLCONF ip-address command. + * We need to read its response as well. */ + if (server.replica_announce_ip) { + sdsfree(*err); + *err = receiveSynchronousResponse(conn); + if (*err == NULL) { + dualChannelServerLog(LL_WARNING, "Primary did not respond to REPLCONF ip-address command during SYNC handshake"); + return C_ERR; + } + if ((*err)[0] == '-') { + dualChannelServerLog(LL_WARNING, "Primary rejected REPLCONF ip-address: %s", *err); + return C_ERR; + } + } + if (connSyncWrite(conn, "SYNC\r\n", 6, server.repl_syncio_timeout * 1000) == -1) { dualChannelServerLog(LL_WARNING, "I/O error writing to Primary: %s", connGetLastError(conn)); return C_ERR;