Skip to content

Commit b8f2eaa

Browse files
max cluster cron failed connections
Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>
1 parent 73696bf commit b8f2eaa

2 files changed

Lines changed: 12 additions & 2 deletions

File tree

src/cluster_legacy.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1632,6 +1632,7 @@ clusterNode *createClusterNode(char *nodename, int flags) {
16321632
node->replicaof = NULL;
16331633
node->last_in_ping_gossip = 0;
16341634
node->ping_sent = node->pong_received = 0;
1635+
node->last_connection_attempt = 0;
16351636
node->data_received = 0;
16361637
node->meet_sent = 0;
16371638
node->fail_time = 0;
@@ -5282,10 +5283,12 @@ static int nodeExceedsHandshakeTimeout(clusterNode *node, mstime_t now) {
52825283
return now - node->ctime > getHandshakeTimeout() ? 1 : 0;
52835284
}
52845285

5286+
#define MAX_CONNECTION_ATTEMPTS_PER_CRON 10
5287+
52855288
/* Check if the node is disconnected and re-establish the connection.
52865289
* Also update a few stats while we are here, that can be used to make
52875290
* better decisions in other part of the code. */
5288-
static int clusterNodeCronHandleReconnect(clusterNode *node, mstime_t now) {
5291+
static int clusterNodeCronHandleReconnect(clusterNode *node, mstime_t now, int *cluster_conn_attempts) {
52895292
/* Not interested in reconnecting the link with myself or nodes
52905293
* for which we have no address. */
52915294
if (node->flags & (CLUSTER_NODE_MYSELF | CLUSTER_NODE_NOADDR)) return 1;
@@ -5314,6 +5317,11 @@ static int clusterNodeCronHandleReconnect(clusterNode *node, mstime_t now) {
53145317
}
53155318

53165319
if (node->link == NULL) {
5320+
if ((now - node->last_connection_attempt) <= 1000 || *cluster_conn_attempts > MAX_CONNECTION_ATTEMPTS_PER_CRON) {
5321+
return 0;
5322+
}
5323+
node->last_connection_attempt = now;
5324+
(*cluster_conn_attempts)++;
53175325
clusterLink *link = createClusterLink(node);
53185326
link->conn = connCreate(connTypeOfCluster());
53195327
connSetPrivateData(link->conn, link);
@@ -5370,6 +5378,7 @@ void clusterCron(void) {
53705378
mstime_t min_pong = 0, now = mstime();
53715379
clusterNode *min_pong_node = NULL;
53725380
static unsigned long long iteration = 0;
5381+
int cluster_conn_attempts = 0;
53735382

53745383
iteration++; /* Number of times this function was called so far. */
53755384

@@ -5387,7 +5396,7 @@ void clusterCron(void) {
53875396
/* The protocol is that function(s) below return non-zero if the node was
53885397
* terminated.
53895398
*/
5390-
if (clusterNodeCronHandleReconnect(node, now)) continue;
5399+
if (clusterNodeCronHandleReconnect(node, now, &cluster_conn_attempts)) continue;
53915400
}
53925401
dictReleaseIterator(di);
53935402

src/cluster_legacy.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ struct _clusterNode {
352352
mstime_t fail_time; /* Unix time when FAIL flag was set */
353353
mstime_t repl_offset_time; /* Unix time we received offset for this node */
354354
mstime_t orphaned_time; /* Starting time of orphaned primary condition */
355+
mstime_t last_connection_attempt; /* Unix time we last tried to establish a link */
355356
mstime_t inbound_link_freed_time; /* Last time we freed the inbound link for this node.
356357
If it was never freed, it is the same as ctime */
357358
long long repl_offset; /* Last known repl offset for this node. */

0 commit comments

Comments
 (0)