Skip to content

Commit 58285a6

Browse files
authored
Fix WAITAOF mix-use last_offset and last_numreplicas (#11922)
There be a situation that satisfies WAIT, and then wrongly unblock WAITAOF because we mix-use last_offset and last_numreplicas. We update last_offset and last_numreplicas only when the condition matches. i.e. output of either replicationCountAOFAcksByOffset or replicationCountAcksByOffset is right. In this case, we need to have separate last_ variables for each of them. Added a last_aof_offset and last_aof_numreplicas for WAITAOF. WAITAOF was added in #11713. Found while coding #11917. A Test was added to validate that case.
1 parent 72f5aad commit 58285a6

File tree

3 files changed

+86
-5
lines changed

3 files changed

+86
-5
lines changed

src/replication.c

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3612,7 +3612,9 @@ void unblockClientWaitingReplicas(client *c) {
36123612
* since we received enough ACKs from slaves. */
36133613
void processClientsWaitingReplicas(void) {
36143614
long long last_offset = 0;
3615+
long long last_aof_offset = 0;
36153616
int last_numreplicas = 0;
3617+
int last_aof_numreplicas = 0;
36163618

36173619
listIter li;
36183620
listNode *ln;
@@ -3628,18 +3630,22 @@ void processClientsWaitingReplicas(void) {
36283630
if (is_wait_aof && c->bstate.numlocal && !server.aof_enabled) {
36293631
addReplyError(c, "WAITAOF cannot be used when numlocal is set but appendonly is disabled.");
36303632
unblockClient(c);
3631-
return;
3633+
continue;
36323634
}
36333635

36343636
/* Every time we find a client that is satisfied for a given
36353637
* offset and number of replicas, we remember it so the next client
36363638
* may be unblocked without calling replicationCountAcksByOffset()
36373639
* or calling replicationCountAOFAcksByOffset()
36383640
* if the requested offset / replicas were equal or less. */
3639-
if (last_offset && last_offset >= c->bstate.reploffset &&
3641+
if (!is_wait_aof && last_offset && last_offset >= c->bstate.reploffset &&
36403642
last_numreplicas >= c->bstate.numreplicas)
36413643
{
36423644
numreplicas = last_numreplicas;
3645+
} else if (is_wait_aof && last_aof_offset && last_aof_offset >= c->bstate.reploffset &&
3646+
last_aof_numreplicas >= c->bstate.numreplicas)
3647+
{
3648+
numreplicas = last_aof_numreplicas;
36433649
} else {
36443650
numreplicas = is_wait_aof ?
36453651
replicationCountAOFAcksByOffset(c->bstate.reploffset) :
@@ -3648,8 +3654,13 @@ void processClientsWaitingReplicas(void) {
36483654
/* Check if the number of replicas is satisfied. */
36493655
if (numreplicas < c->bstate.numreplicas) continue;
36503656

3651-
last_offset = c->bstate.reploffset;
3652-
last_numreplicas = numreplicas;
3657+
if (is_wait_aof) {
3658+
last_aof_offset = c->bstate.reploffset;
3659+
last_aof_numreplicas = numreplicas;
3660+
} else {
3661+
last_offset = c->bstate.reploffset;
3662+
last_numreplicas = numreplicas;
3663+
}
36533664
}
36543665

36553666
/* Check if the local constraint of WAITAOF is served */

src/server.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,7 @@ typedef struct blockingState {
10091009
/* BLOCKED_LIST, BLOCKED_ZSET and BLOCKED_STREAM or any other Keys related blocking */
10101010
dict *keys; /* The keys we are blocked on */
10111011

1012-
/* BLOCKED_WAIT */
1012+
/* BLOCKED_WAIT and BLOCKED_WAITAOF */
10131013
int numreplicas; /* Number of replicas we are waiting for ACK. */
10141014
int numlocal; /* Indication if WAITAOF is waiting for local fsync. */
10151015
long long reploffset; /* Replication offset to reach. */

tests/unit/wait.tcl

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,3 +360,73 @@ tags {"wait aof network external:skip"} {
360360
}
361361
}
362362
}
363+
364+
start_server {tags {"failover external:skip"}} {
365+
start_server {} {
366+
start_server {} {
367+
set master [srv 0 client]
368+
set master_host [srv 0 host]
369+
set master_port [srv 0 port]
370+
371+
set replica1 [srv -1 client]
372+
set replica1_pid [srv -1 pid]
373+
374+
set replica2 [srv -2 client]
375+
376+
test {setup replication for following tests} {
377+
$replica1 replicaof $master_host $master_port
378+
$replica2 replicaof $master_host $master_port
379+
wait_for_sync $replica1
380+
wait_for_sync $replica2
381+
}
382+
383+
test {WAIT and WAITAOF replica multiple clients unblock - reuse last result} {
384+
set rd [redis_deferring_client]
385+
set rd2 [redis_deferring_client]
386+
387+
$master config set appendonly yes
388+
$replica1 config set appendonly yes
389+
$replica2 config set appendonly yes
390+
391+
$master config set appendfsync always
392+
$replica1 config set appendfsync no
393+
$replica2 config set appendfsync no
394+
395+
waitForBgrewriteaof $master
396+
waitForBgrewriteaof $replica1
397+
waitForBgrewriteaof $replica2
398+
399+
exec kill -SIGSTOP $replica1_pid
400+
401+
$rd incr foo
402+
$rd read
403+
$rd waitaof 0 1 0
404+
405+
# rd2 has a newer repl_offset
406+
$rd2 incr foo
407+
$rd2 read
408+
$rd2 wait 2 0
409+
410+
wait_for_blocked_clients_count 2
411+
412+
exec kill -SIGCONT $replica1_pid
413+
414+
# WAIT will unblock the client first.
415+
assert_equal [$rd2 read] {2}
416+
417+
# Make $replica1 catch up the repl_aof_off, then WAITAOF will unblock the client.
418+
$replica1 config set appendfsync always
419+
$master incr foo
420+
assert_equal [$rd read] {1 1}
421+
422+
$rd ping
423+
assert_equal [$rd read] {PONG}
424+
$rd2 ping
425+
assert_equal [$rd2 read] {PONG}
426+
427+
$rd close
428+
$rd2 close
429+
}
430+
}
431+
}
432+
}

0 commit comments

Comments
 (0)