Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added bigfile
Binary file not shown.
24 changes: 22 additions & 2 deletions src/cluster_legacy.c
Original file line number Diff line number Diff line change
Expand Up @@ -1049,13 +1049,22 @@ int clusterSaveConfig(int do_fsync) {
return retval;
}

/* Save the cluster configuration file. If the save fails, exit the process. */
void clusterSaveConfigOrDie(int do_fsync) {
if (clusterSaveConfig(do_fsync) == C_ERR) {
serverLog(LL_WARNING, "Fatal: can't update cluster config file.");
exit(1);
}
}

/* Save the cluster configuration file. If the save fails, print the log. */
void clusterSaveConfigOrLog(int do_fsync) {
if (clusterSaveConfig(do_fsync) == C_ERR) {
serverLog(LL_WARNING, "Cluster config file is applying a change even though "
"it is unable to write to disk.");
}
}

/* Lock the cluster config using flock(), and retain the file descriptor used to
* acquire the lock so that the file will be locked as long as the process is up.
*
Expand Down Expand Up @@ -1321,6 +1330,7 @@ void clusterInit(void) {
server.cluster->currentEpoch = 0;
server.cluster->state = CLUSTER_FAIL;
server.cluster->fail_reason = CLUSTER_FAIL_NONE;
server.cluster->safe_to_join = 0;
server.cluster->size = 0;
server.cluster->todo_before_sleep = 0;
server.cluster->nodes = dictCreate(&clusterNodesDictType);
Expand Down Expand Up @@ -5013,6 +5023,12 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
* size + 1 */
if (!clusterNodeIsVotingPrimary(myself)) return;

if (!server.cluster->safe_to_join) {
serverLog(LL_WARNING, "Failover auth denied to %.40s (%s): it is not safe to vote in this moment)",
node->name, node->human_nodename);
return;
}

/* Request epoch must be >= our currentEpoch.
* Note that it is impossible for it to actually be greater since
* our currentEpoch was updated as a side effect of receiving this
Expand Down Expand Up @@ -6023,7 +6039,7 @@ void clusterBeforeSleep(void) {
/* Save the config, possibly using fsync. */
if (flags & CLUSTER_TODO_SAVE_CONFIG) {
int fsync = flags & CLUSTER_TODO_FSYNC_CONFIG;
clusterSaveConfigOrDie(fsync);
clusterSaveConfigOrLog(fsync);
}

if (flags & CLUSTER_TODO_BROADCAST_ALL) {
Expand Down Expand Up @@ -6267,8 +6283,12 @@ void clusterUpdateState(void) {
* to not count the DB loading time. */
if (first_call_time == 0) first_call_time = mstime();
if (clusterNodeIsPrimary(myself) && server.cluster->state == CLUSTER_FAIL &&
mstime() - first_call_time < CLUSTER_WRITABLE_DELAY)
mstime() - first_call_time < CLUSTER_WRITABLE_DELAY) {
server.cluster->safe_to_join = 0;
return;
} else {
server.cluster->safe_to_join = 1;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ca we Document and codify the exact criteria to set safe_to_join=1 (e.g., “after nodes.conf successfully written + fsync’ed, clusterState=ok, links stable for N ms”). Also specify when it returns to 0 (e.g., config write failure, epoch mismatch, handshake reset)

}

/* Start assuming the state is OK. We'll turn it into FAIL if there
* are the right conditions. */
Expand Down
1 change: 1 addition & 0 deletions src/cluster_legacy.h
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ struct clusterState {
uint64_t currentEpoch;
int state; /* CLUSTER_OK, CLUSTER_FAIL, ... */
int fail_reason; /* Why the cluster state changes to fail. */
int safe_to_join; /* Can the restarted node safely join the cluster? */
int size; /* Num of primary nodes with at least one slot */
dict *nodes; /* Hash table of name -> clusterNode structures */
dict *shards; /* Hash table of shard_id -> list (of nodes) structures */
Expand Down
31 changes: 31 additions & 0 deletions tests/unit/cluster/misc.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,34 @@ start_cluster 1 1 {tags {external:skip cluster}} {
}
}

# Create a folder called "nodes.conf" to trigger temp nodes.conf rename
# failure and it will cause cluster config file save to fail at the rename.
proc create_nodes_conf_folder {srv_idx} {
set dir [lindex [R $srv_idx config get dir] 1]
set cluster_conf [lindex [R $srv_idx config get cluster-config-file] 1]
set cluster_conf_path [file join $dir $cluster_conf]
if {[file exists $cluster_conf_path]} { exec rm -f $cluster_conf_path }
exec mkdir -p $cluster_conf_path
}

start_cluster 1 1 {tags {external:skip cluster}} {
test {Fail to save the cluster configuration file will not exit the process} {
# Create folder that can cause the rename fail.
create_nodes_conf_folder 0
create_nodes_conf_folder 1

# Trigger a takeover so that cluster will need to update the config file.
R 1 cluster failover takeover

assert_equal {PONG} [R 0 ping]
assert_equal {PONG} [R 1 ping]
assert_equal 1 [process_is_alive [srv 0 pid]]
assert_equal 1 [process_is_alive [srv -1 pid]]

# Make sure relevant logs are printed.
verify_log_message 0 "*Could not rename tmp cluster config file*" 0
verify_log_message -1 "*Could not rename tmp cluster config file*" 0
verify_log_message 0 "*Cluster config file is applying a change even though it is unable to write to disk*" 0
verify_log_message -1 "*Cluster config file is applying a change even though it is unable to write to disk*" 0
}
}
2 changes: 1 addition & 1 deletion tests/unit/latency-monitor.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ start_cluster 1 1 {tags {"latency-monitor cluster external:skip needs:latency"}
# We don't assert anything since we can't be sure whether it will be counted.
R 0 cluster saveconfig
R 1 cluster saveconfig
R 1 cluster failover force
R 1 cluster failover takeover
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason for changing it is that in the previous test case, force does not have enough votes to finish the election.

R 0 latency latest
R 1 latency latest
}
Expand Down
Loading