Skip to content

Commit 7c530fb

Browse files
oranagramadolson
authored andcommitted
diskless master, avoid bgsave child hung when fork parent crashes (redis#11463)
During a diskless sync, if the master main process crashes, the child would have hung in `write`. This fix closes the read fd on the child side, so that if the parent crashes, the child will get a write error and exit. This change also fixes disk-based replication, BGSAVE and AOFRW. In that case the child wouldn't have been hang, it would have just kept running until done which may be pointless. There is a certain degree of risk here. in case there's a BGSAVE child that could maybe succeed and the parent dies for some reason, the old code would have let the child keep running and maybe succeed and avoid data loss. On the other hand, if the parent is restarted, it would have loaded an old rdb file (or none), and then the child could reach the end and rename the rdb file (data conflicting with what the parent has), or also have a race with another BGSAVE child that the new parent started. Note that i removed a comment saying a write error will be ignored in the child and handled by the parent (this comment was very old and i don't think relevant).
1 parent dca8e4d commit 7c530fb

File tree

5 files changed

+59
-2
lines changed

5 files changed

+59
-2
lines changed

src/childinfo.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,9 @@ void sendChildInfoGeneric(childInfoType info_type, size_t keys, double progress,
112112
ssize_t wlen = sizeof(data);
113113

114114
if (write(server.child_info_pipe[1], &data, wlen) != wlen) {
115-
/* Nothing to do on error, this will be detected by the other side. */
115+
/* Failed writing to parent, it could have been killed, exit. */
116+
serverLog(LL_WARNING,"Child failed reporting info to parent, exiting. %s", strerror(errno));
117+
exit(1);
116118
}
117119
}
118120

src/rdb.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3399,6 +3399,10 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
33993399

34003400
rioInitWithFd(&rdb,rdb_pipe_write);
34013401

3402+
/* Close the reading part, so that if the parent crashes, the child will
3403+
* get a write error and exit. */
3404+
close(server.rdb_pipe_read);
3405+
34023406
redisSetProcTitle("redis-rdb-to-slaves");
34033407
redisSetCpuAffinity(server.bgsave_cpulist);
34043408

src/server.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6400,6 +6400,10 @@ int redisFork(int purpose) {
64006400
setOOMScoreAdj(CONFIG_OOM_BGCHILD);
64016401
dismissMemoryInChild();
64026402
closeChildUnusedResourceAfterFork();
6403+
/* Close the reading part, so that if the parent crashes, the child will
6404+
* get a write error and exit. */
6405+
if (server.child_info_pipe[0] != -1)
6406+
close(server.child_info_pipe[0]);
64036407
} else {
64046408
/* Parent */
64056409
if (childpid == -1) {

tests/integration/replication.tcl

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -992,7 +992,7 @@ test "diskless replication child being killed is collected" {
992992
# wait for the replicas to start reading the rdb
993993
wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 800 10
994994

995-
# wait to be sure the eplica is hung and the master is blocked on write
995+
# wait to be sure the replica is hung and the master is blocked on write
996996
after 500
997997

998998
# simulate the OOM killer or anyone else kills the child
@@ -1012,6 +1012,45 @@ test "diskless replication child being killed is collected" {
10121012
}
10131013
} {} {external:skip}
10141014

1015+
foreach mdl {yes no} {
1016+
test "replication dies when parent is killed - diskless: $mdl" {
1017+
# when master is killed, make sure the fork child can detect that and exit
1018+
start_server {tags {"repl"}} {
1019+
set master [srv 0 client]
1020+
set master_host [srv 0 host]
1021+
set master_port [srv 0 port]
1022+
set master_pid [srv 0 pid]
1023+
$master config set repl-diskless-sync $mdl
1024+
$master config set repl-diskless-sync-delay 0
1025+
# create keys that will take 10 seconds to save
1026+
$master config set rdb-key-save-delay 1000
1027+
$master debug populate 10000
1028+
start_server {} {
1029+
set replica [srv 0 client]
1030+
$replica replicaof $master_host $master_port
1031+
1032+
# wait for rdb child to start
1033+
wait_for_condition 5000 10 {
1034+
[s -1 rdb_bgsave_in_progress] == 1
1035+
} else {
1036+
fail "rdb child didn't start"
1037+
}
1038+
set fork_child_pid [get_child_pid -1]
1039+
1040+
# simulate the OOM killer or anyone else kills the parent
1041+
exec kill -9 $master_pid
1042+
1043+
# wait for the child to notice the parent died have exited
1044+
wait_for_condition 500 10 {
1045+
[process_is_alive $fork_child_pid] == 0
1046+
} else {
1047+
fail "rdb child didn't terminate"
1048+
}
1049+
}
1050+
}
1051+
} {} {external:skip}
1052+
}
1053+
10151054
test "diskless replication read pipe cleanup" {
10161055
# In diskless replication, we create a read pipe for the RDB, between the child and the parent.
10171056
# When we close this pipe (fd), the read handler also needs to be removed from the event loop (if it still registered).

tests/support/util.tcl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,14 @@ proc get_child_pid {idx} {
627627
return $child_pid
628628
}
629629

630+
proc process_is_alive pid {
631+
if {[catch {exec ps -p $pid} err]} {
632+
return 0
633+
} else {
634+
return 1
635+
}
636+
}
637+
630638
proc cmdrstat {cmd r} {
631639
if {[regexp "\r\ncmdstat_$cmd:(.*?)\r\n" [$r info commandstats] _ value]} {
632640
set _ $value

0 commit comments

Comments
 (0)