@@ -64,3 +64,172 @@ start_cluster 3 4 {tags {external:skip cluster} overrides {cluster-ping-interval
6464 }
6565
6666} ;# start_cluster
67+
68+ start_cluster 7 3 {tags {external:skip cluster} overrides {cluster-ping-interval 1000 cluster-node-timeout 5000}} {
69+ test " Primaries will not time out then they are elected in the same epoch" {
70+ # Since we have the delay time, so these node may not initiate the
71+ # election at the same time (same epoch). But if they do, we make
72+ # sure there is no failover timeout.
73+
74+ # Killing there primary nodes.
75+ pause_process [srv 0 pid]
76+ pause_process [srv -1 pid]
77+ pause_process [srv -2 pid]
78+
79+ # Wait for the failover
80+ wait_for_condition 1000 50 {
81+ [s -7 role] == " master" &&
82+ [s -8 role] == " master" &&
83+ [s -9 role] == " master"
84+ } else {
85+ fail " No failover detected"
86+ }
87+
88+ # Make sure there is no false epoch 0.
89+ verify_no_log_message -7 " *Failover election in progress for epoch 0*" 0
90+ verify_no_log_message -8 " *Failover election in progress for epoch 0*" 0
91+ verify_no_log_message -9 " *Failover election in progress for epoch 0*" 0
92+
93+ # Make sure there is no failover timeout.
94+ verify_no_log_message -7 " *Failover attempt expired*" 0
95+ verify_no_log_message -8 " *Failover attempt expired*" 0
96+ verify_no_log_message -9 " *Failover attempt expired*" 0
97+
98+ # Resuming these primary nodes, speed up the shutdown.
99+ resume_process [srv 0 pid]
100+ resume_process [srv -1 pid]
101+ resume_process [srv -2 pid]
102+ }
103+ } ;# start_cluster
104+
105+ run_solo {cluster} {
106+ start_cluster 32 15 {tags {external:skip cluster} overrides {cluster-ping-interval 1000 cluster-node-timeout 15000}} {
107+ test " Multiple primary nodes are down, rank them based on the failed primary" {
108+ # Killing these primary nodes.
109+ for {set j 0} {$j < 15} {incr j} {
110+ pause_process [srv -$j pid]
111+ }
112+
113+ # Make sure that a node starts failover.
114+ wait_for_condition 1000 100 {
115+ [s -40 role] == " master"
116+ } else {
117+ fail " No failover detected"
118+ }
119+
120+ # Wait for the cluster state to become ok.
121+ for {set j 0} {$j < [llength $::servers ]} {incr j} {
122+ if {[process_is_paused [srv -$j pid]]} continue
123+ wait_for_condition 1000 100 {
124+ [CI $j cluster_state] eq " ok"
125+ } else {
126+ fail " Cluster node $j cluster_state:[ CI $j cluster_state] "
127+ }
128+ }
129+
130+ # Resuming these primary nodes, speed up the shutdown.
131+ for {set j 0} {$j < 15} {incr j} {
132+ resume_process [srv -$j pid]
133+ }
134+ }
135+ } ;# start_cluster
136+ } ;# run_solo
137+
138+ # Needs to run in the body of
139+ # start_cluster 3 1 {tags {external:skip cluster} overrides {cluster-replica-validity-factor 0}}
140+ proc test_replica_config_epoch_failover {type} {
141+ test " Replica can update the config epoch when trigger the failover - $type " {
142+ set CLUSTER_PACKET_TYPE_NONE -1
143+ set CLUSTER_PACKET_TYPE_ALL -2
144+
145+ if {$type == " automatic" } {
146+ R 3 CONFIG SET cluster-replica-no-failover no
147+ } elseif {$type == " manual" } {
148+ R 3 CONFIG SET cluster-replica-no-failover yes
149+ }
150+ R 3 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_ALL
151+
152+ set R0_nodeid [R 0 cluster myid]
153+
154+ # R 0 is the first node, so we expect its epoch to be the smallest,
155+ # so bumpepoch must succeed and it's config epoch will be changed.
156+ set res [R 0 cluster bumpepoch]
157+ assert_match {BUMPED *} $res
158+ set R0_config_epoch [lindex $res 1]
159+
160+ # Wait for the config epoch to propagate across the cluster.
161+ wait_for_condition 1000 10 {
162+ $R0_config_epoch == [dict get [cluster_get_node_by_id 1 $R0_nodeid ] config_epoch] &&
163+ $R0_config_epoch == [dict get [cluster_get_node_by_id 2 $R0_nodeid ] config_epoch]
164+ } else {
165+ fail " Other primaries does not update config epoch"
166+ }
167+ # Make sure that replica do not update config epoch.
168+ assert_not_equal $R0_config_epoch [dict get [cluster_get_node_by_id 3 $R0_nodeid ] config_epoch]
169+
170+ # Pause the R 0 and wait for the cluster to be down.
171+ pause_process [srv 0 pid]
172+ R 3 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE
173+ wait_for_condition 1000 50 {
174+ [CI 1 cluster_state] == " fail" &&
175+ [CI 2 cluster_state] == " fail" &&
176+ [CI 3 cluster_state] == " fail"
177+ } else {
178+ fail " Cluster does not fail"
179+ }
180+
181+ # Make sure both the automatic and the manual failover will fail in the first time.
182+ if {$type == " automatic" } {
183+ wait_for_log_messages -3 {" *Failover attempt expired*" } 0 1000 10
184+ } elseif {$type == " manual" } {
185+ R 3 cluster failover force
186+ wait_for_log_messages -3 {" *Manual failover timed out*" } 0 1000 10
187+ }
188+
189+ # Make sure the primaries prints the relevant logs.
190+ wait_for_log_messages -1 {" *Failover auth denied to* epoch * > reqConfigEpoch*" } 0 1000 10
191+ wait_for_log_messages -1 {" *has old slots configuration, sending an UPDATE message about*" } 0 1000 10
192+ wait_for_log_messages -2 {" *Failover auth denied to* epoch * > reqConfigEpoch*" } 0 1000 10
193+ wait_for_log_messages -2 {" *has old slots configuration, sending an UPDATE message about*" } 0 1000 10
194+
195+ # Make sure the replica has updated the config epoch.
196+ wait_for_condition 1000 10 {
197+ $R0_config_epoch == [dict get [cluster_get_node_by_id 1 $R0_nodeid ] config_epoch]
198+ } else {
199+ fail " The replica does not update the config epoch"
200+ }
201+
202+ if {$type == " manual" } {
203+ # The second manual failure will succeed because the config epoch
204+ # has already propagated.
205+ R 3 cluster failover force
206+ }
207+
208+ # Wait for the failover to success.
209+ wait_for_condition 1000 50 {
210+ [s -3 role] == " master" &&
211+ [CI 1 cluster_state] == " ok" &&
212+ [CI 2 cluster_state] == " ok" &&
213+ [CI 3 cluster_state] == " ok"
214+ } else {
215+ fail " Failover does not happen"
216+ }
217+
218+ # Restore the old primary, make sure it can covert
219+ resume_process [srv 0 pid]
220+ wait_for_condition 1000 50 {
221+ [s 0 role] == " slave" &&
222+ [CI 0 cluster_state] == " ok"
223+ } else {
224+ fail " The old primary was not converted into replica"
225+ }
226+ }
227+ }
228+
229+ start_cluster 3 1 {tags {external:skip cluster} overrides {cluster-replica-validity-factor 0}} {
230+ test_replica_config_epoch_failover " automatic"
231+ }
232+
233+ start_cluster 3 1 {tags {external:skip cluster} overrides {cluster-replica-validity-factor 0}} {
234+ test_replica_config_epoch_failover " manual"
235+ }
0 commit comments