-
Notifications
You must be signed in to change notification settings - Fork 955
Trigger manual failover on SIGTERM / shutdown to cluster primary #1091
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6ab8888
4b49f03
f9ca731
df0ef8d
594fd5a
519eb2a
32043dd
e7b33fa
d6649e5
64831c9
b06a8c4
5f7b429
e56a360
0ccc4e4
c9bfd69
c8037a1
d70036b
7d55db6
4d5da8a
a1f957c
37147e8
bf60ed6
27b6f6d
61dd999
8423921
ed8c9bb
9e00910
8cba555
ade48cb
6b5cf7f
e3fdb7c
9c3d47e
9521f5f
c367470
5e88fd3
2cd1832
d2bf07f
533e6a6
3b18c45
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1254,8 +1254,68 @@ void clusterInitLast(void) { | |
| } | ||
| } | ||
|
|
||
| void clusterAutoFailoverOnShutdown(void) { | ||
| if (!nodeIsPrimary(myself) || !server.auto_failover_on_shutdown) return; | ||
|
|
||
| /* Find the first best replica, that is, the replica with the largest offset. */ | ||
| int legacy_replica = 0; | ||
| client *best_replica = NULL; | ||
| listIter replicas_iter; | ||
| listNode *replicas_list_node; | ||
| listRewind(server.replicas, &replicas_iter); | ||
| while ((replicas_list_node = listNext(&replicas_iter)) != NULL) { | ||
| client *replica = listNodeValue(replicas_list_node); | ||
| /* This is done only when the replica offset is caught up, to avoid data loss. | ||
| * And 0x90000 is 9.0.0, we only support this feature in this version. */ | ||
| if (replica->repl_data->replica_version < 0x90000) { | ||
| legacy_replica = 1; | ||
| best_replica = NULL; | ||
| break; | ||
| } | ||
| if (replica->repl_data->repl_state == REPLICA_STATE_ONLINE && | ||
| replica->repl_data->repl_ack_off == server.primary_repl_offset && | ||
| replica->repl_data->replica_nodeid && sdslen(replica->repl_data->replica_nodeid) == CLUSTER_NAMELEN) { | ||
| best_replica = replica; | ||
| } | ||
| } | ||
|
|
||
| /* We are not able to find the replica to do the auto failover. */ | ||
| if (best_replica == NULL) { | ||
| if (legacy_replica) { | ||
| serverLog(LL_NOTICE, "Unable to perform auto failover on shutdown since there are legacy replicas."); | ||
| } else { | ||
| serverLog(LL_NOTICE, "Unable to find a replica to perform the auto failover on shutdown."); | ||
| } | ||
| return; | ||
| } | ||
|
|
||
| /* Send the CLUSTER FAILOVER FORCE REPLICAID node-id to all replicas since | ||
| * it is a shared replication buffer, but only the replica with the matching | ||
| * node-id will execute it. The caller will call flushReplicasOutputBuffers, | ||
| * so in here it is a best effort. */ | ||
| char buf[128]; | ||
| size_t buflen = snprintf(buf, sizeof(buf), | ||
| "*5\r\n$7\r\nCLUSTER\r\n" | ||
| "$8\r\nFAILOVER\r\n" | ||
| "$5\r\nFORCE\r\n" | ||
| "$9\r\nREPLICAID\r\n" | ||
| "$%d\r\n%.*s\r\n", | ||
| CLUSTER_NAMELEN, | ||
| CLUSTER_NAMELEN, | ||
| best_replica->repl_data->replica_nodeid); | ||
| serverAssert(buflen <= 128); | ||
| /* Must install write handler for all replicas first before feeding | ||
enjoy-binbin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| * replication stream. */ | ||
| prepareReplicasToWrite(); | ||
| feedReplicationBuffer(buf, buflen); | ||
| serverLog(LL_NOTICE, "Perform auto failover to replica %s on shutdown.", best_replica->repl_data->replica_nodeid); | ||
| } | ||
|
|
||
| /* Called when a cluster node receives SHUTDOWN. */ | ||
| void clusterHandleServerShutdown(void) { | ||
| /* Check if we are able to do the auto failover on shutdown. */ | ||
| clusterAutoFailoverOnShutdown(); | ||
|
|
||
| /* The error logs have been logged in the save function if the save fails. */ | ||
| serverLog(LL_NOTICE, "Saving the cluster configuration file before exiting."); | ||
| clusterSaveConfig(1); | ||
|
|
@@ -7103,22 +7163,37 @@ int clusterCommandSpecial(client *c) { | |
| } else { | ||
| addReplyLongLong(c, clusterNodeFailureReportsCount(n)); | ||
| } | ||
| } else if (!strcasecmp(c->argv[1]->ptr, "failover") && (c->argc == 2 || c->argc == 3)) { | ||
| /* CLUSTER FAILOVER [FORCE|TAKEOVER] */ | ||
| } else if (!strcasecmp(c->argv[1]->ptr, "failover") && (c->argc >= 2)) { | ||
| /* CLUSTER FAILOVER [FORCE|TAKEOVER] [REPLICAID <NODE ID>] | ||
| * REPLICAID is currently available only for internal so we won't | ||
| * put it into the JSON file. */ | ||
|
Comment on lines
+7168
to
+7169
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we should just not document it. Someone will find it and use it, so I think removing it becomes a breaking change. All other arguments, including other internal commands, are documented in the json files.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we document it, I think it will mostly be confusing to users. Users might think they can send it to the primary like with standalone FAILOVER TO, but it's not replicated so it doesn't work. I agree though that even if we don't document it, we probably need to keep it around.
REPLCONF has no arguments in the JSON file. The page just says "Usage: REPLCONF". So this will not be the only undocumented argument. |
||
| int force = 0, takeover = 0; | ||
| robj *replicaid = NULL; | ||
|
|
||
| if (c->argc == 3) { | ||
| if (!strcasecmp(c->argv[2]->ptr, "force")) { | ||
| for (int j = 2; j < c->argc; j++) { | ||
| int moreargs = (c->argc - 1) - j; | ||
| if (!strcasecmp(c->argv[j]->ptr, "force")) { | ||
| force = 1; | ||
| } else if (!strcasecmp(c->argv[2]->ptr, "takeover")) { | ||
| } else if (!strcasecmp(c->argv[j]->ptr, "takeover")) { | ||
| takeover = 1; | ||
| force = 1; /* Takeover also implies force. */ | ||
| } else if (c == server.primary && !strcasecmp(c->argv[j]->ptr, "replicaid") && moreargs) { | ||
| /* This option is currently available only for primary. */ | ||
| j++; | ||
| replicaid = c->argv[j]; | ||
| } else { | ||
| addReplyErrorObject(c, shared.syntaxerr); | ||
| return 1; | ||
| } | ||
| } | ||
|
|
||
| /* Check if it should be executed by myself. */ | ||
| if (replicaid != NULL && memcmp(replicaid->ptr, myself->name, CLUSTER_NAMELEN) != 0) { | ||
| /* Ignore this command, including the sanity check and the process. */ | ||
| addReply(c, shared.ok); | ||
| return 1; | ||
| } | ||
|
|
||
| /* Check preconditions. */ | ||
| if (clusterNodeIsPrimary(myself)) { | ||
| addReplyError(c, "You should send CLUSTER FAILOVER to a replica"); | ||
|
|
@@ -7127,8 +7202,7 @@ int clusterCommandSpecial(client *c) { | |
| addReplyError(c, "I'm a replica but my master is unknown to me"); | ||
| return 1; | ||
| } else if (!force && (nodeFailed(myself->replicaof) || myself->replicaof->link == NULL)) { | ||
| addReplyError(c, "Master is down or failed, " | ||
| "please use CLUSTER FAILOVER FORCE"); | ||
| addReplyError(c, "Master is down or failed, please use CLUSTER FAILOVER FORCE"); | ||
| return 1; | ||
| } | ||
| resetManualFailover(); | ||
|
|
@@ -7147,7 +7221,11 @@ int clusterCommandSpecial(client *c) { | |
| /* If this is a forced failover, we don't need to talk with our | ||
| * primary to agree about the offset. We just failover taking over | ||
| * it without coordination. */ | ||
| serverLog(LL_NOTICE, "Forced failover user request accepted (user request from '%s').", client); | ||
| if (c == server.primary) { | ||
| serverLog(LL_NOTICE, "Forced failover primary request accepted (primary request from '%s').", client); | ||
| } else { | ||
| serverLog(LL_NOTICE, "Forced failover user request accepted (user request from '%s').", client); | ||
| } | ||
| manualFailoverCanStart(); | ||
| /* We can start a manual failover as soon as possible, setting a flag | ||
| * here so that we don't need to waiting for the cron to kick in. */ | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.