Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion common/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ libswsscommon_la_SOURCES = \
warm_restart.cpp \
luatable.cpp \
countertable.cpp \
redisutility.cpp
redisutility.cpp \
restart_waiter.cpp

libswsscommon_la_CXXFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(LIBNL_CFLAGS) $(CODE_COVERAGE_CXXFLAGS)
libswsscommon_la_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(LIBNL_CPPFLAGS) $(CODE_COVERAGE_CPPFLAGS)
Expand Down
158 changes: 158 additions & 0 deletions common/restart_waiter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#include "restart_waiter.h"
#include "redispipeline.h"
#include "select.h"
#include "schema.h"
#include "subscriberstatetable.h"
#include "table.h"
#include <boost/algorithm/string.hpp>
#include <string>

using namespace swss;

static const std::string STATE_DB_NAME = "STATE_DB";
static const std::string STATE_DB_SEPARATOR = "|";
static const std::string RESTART_KEY = "system";
static const std::string RESTART_ENABLE_FIELD = "enable";
static const std::string FAST_REBOOT_TABLE_NAME = "FAST_REBOOT";

bool RestartWaiter::waitRestartDone(
unsigned int maxWaitSec,
unsigned int dbTimeout,
bool isTcpConn)
{
DBConnector stateDb(STATE_DB_NAME, dbTimeout, isTcpConn);
return isWarmOrFastRestartInProgress(stateDb) ? doWait(stateDb, maxWaitSec) : true;
}

bool RestartWaiter::waitWarmRestartDone(unsigned int maxWaitSec,
unsigned int dbTimeout,
bool isTcpConn)
{
DBConnector stateDb(STATE_DB_NAME, dbTimeout, isTcpConn);
if (isFastRestartInProgress(stateDb))
{
// It is fast boot, just return
return true;
}

return isWarmOrFastRestartInProgress(stateDb) ? doWait(stateDb, maxWaitSec) : true;
}

bool RestartWaiter::waitFastRestartDone(unsigned int maxWaitSec,
unsigned int dbTimeout,
bool isTcpConn)
{
DBConnector stateDb(STATE_DB_NAME, dbTimeout, isTcpConn);
if (!isFastRestartInProgress(stateDb))
{
// Fast boot is not in progress
return true;
}

return isWarmOrFastRestartInProgress(stateDb) ? doWait(stateDb, maxWaitSec) : true;
}

bool RestartWaiter::doWait(DBConnector &stateDb,
unsigned int maxWaitSec)
{
if (maxWaitSec == 0)
{
SWSS_LOG_ERROR("Error: invalid maxWaitSec value 0, must be larger than 0");
return false;
}

int selectTimeout = static_cast<int>(maxWaitSec) * 1000;

SubscriberStateTable restartEnableTable(&stateDb, STATE_WARM_RESTART_ENABLE_TABLE_NAME);
Select s;
s.addSelectable(&restartEnableTable);

auto start = std::chrono::steady_clock::now();
while (1)
{
Selectable *sel = NULL;
int ret = s.select(&sel, selectTimeout, true);

if (ret == Select::OBJECT)
{
KeyOpFieldsValuesTuple kco;
restartEnableTable.pop(kco);
auto &op = kfvOp(kco);
if (op == SET_COMMAND)
{
auto &key = kfvKey(kco);
if (key == RESTART_KEY)
{
auto& values = kfvFieldsValues(kco);
for (auto& fvt: values)
{
auto& field = fvField(fvt);

if (field == RESTART_ENABLE_FIELD)
{
// During system warm/fast restart, STATE_DB WARM_RESTART_ENABLE_TABLE|system enable
// field will be set to "true", it indicates warm/fast restart is in progress.
// After warm/fast restart done, warm reboot finalizer set the field back to false,
// it indicates warm/fast restart is done. So, we wait for this field here.
std::string value = fvValue(fvt);
boost::to_lower(value);
if (value == "false")
{
return true;
}
else
{
break;
}
}
}
}
}
}
else if (ret == Select::ERROR)
{
SWSS_LOG_NOTICE("Error: wait restart done got error - %s!", strerror(errno));
}
else if (ret == Select::TIMEOUT)
{
SWSS_LOG_INFO("Timeout: wait restart done got select timeout");
}
else if (ret == Select::SIGNALINT)
{
return false;
}

auto end = std::chrono::steady_clock::now();
int delay = static_cast<int>(
std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count());

if (delay >= static_cast<int>(maxWaitSec) * 1000)
{
return false;
}

selectTimeout -= delay;
Copy link
Copy Markdown
Contributor

@qiluo-msft qiluo-msft Oct 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delay

Suspecting a bug: delay is measured from a fixed start time, you should not decrease by delay in a loop. #Closed

Copy link
Copy Markdown
Collaborator Author

@Junchao-Mellanox Junchao-Mellanox Oct 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I understand. Let's say maxWaitTime is 90 seconds, thus initial selectTimeout is 90000 ms. For first iteration, restart is not done, and delay is 10000 ms. To make sure maxWaitTime is 90, the selectTimeout should be adjusted (90000 - 10000) = 80000 seconds because we have already waited 10 seconds.

}
}

bool RestartWaiter::isWarmOrFastRestartInProgress(DBConnector &stateDb)
{
auto ret = stateDb.hget(STATE_WARM_RESTART_ENABLE_TABLE_NAME + STATE_DB_SEPARATOR + RESTART_KEY, RESTART_ENABLE_FIELD);
if (ret) {
std::string value = *ret.get();
boost::to_lower(value);
return value == "true";
}
return false;
}

bool RestartWaiter::isFastRestartInProgress(DBConnector &stateDb)
{
auto ret = stateDb.get(FAST_REBOOT_TABLE_NAME + STATE_DB_SEPARATOR + RESTART_KEY);
return ret.get() != nullptr;
}

bool RestartWaiter::isWarmRestartInProgress(swss::DBConnector &stateDb)
{
return isWarmOrFastRestartInProgress(stateDb) && !isFastRestartInProgress(stateDb);
}
33 changes: 33 additions & 0 deletions common/restart_waiter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#pragma once

#include "dbconnector.h"

namespace swss
{

// Helper class to wait for warm/fast reboot done
class RestartWaiter
{
public:
static bool waitRestartDone(unsigned int maxWaitSec = 180,
unsigned int dbTimeout = 0,
bool isTcpConn = false);

static bool waitWarmRestartDone(unsigned int maxWaitSec = 180,
unsigned int dbTimeout = 0,
bool isTcpConn = false);

static bool waitFastRestartDone(unsigned int maxWaitSec = 180,
unsigned int dbTimeout = 0,
bool isTcpConn = false);

static bool isWarmOrFastRestartInProgress(swss::DBConnector &stateDb);
static bool isFastRestartInProgress(swss::DBConnector &stateDb);
static bool isWarmRestartInProgress(swss::DBConnector &stateDb);

private:
static bool doWait(swss::DBConnector &stateDb,
unsigned int maxWaitSec);
};

}
2 changes: 2 additions & 0 deletions pyext/swsscommon.i
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "events.h"
#include "configdb.h"
#include "status_code_util.h"
#include "restart_waiter.h"
%}

%include <std_string.i>
Expand Down Expand Up @@ -219,3 +220,4 @@ T castSelectableObj(swss::Selectable *temp)
%include "events.h"

%include "status_code_util.h"
%include "restart_waiter.h"
1 change: 1 addition & 0 deletions tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ tests_SOURCES = redis_ut.cpp \
events_common_ut.cpp \
events_service_ut.cpp \
events_ut.cpp \
restart_waiter_ut.cpp \
main.cpp

tests_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_GTEST) $(LIBNL_CFLAGS)
Expand Down
113 changes: 113 additions & 0 deletions tests/restart_waiter_ut.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#include <gtest/gtest.h>
#include <thread>
#include <string>
#include <unistd.h>
#include <vector>

#include "common/dbconnector.h"
#include "common/restart_waiter.h"
#include "common/schema.h"
#include "common/table.h"

using namespace swss;
using namespace std;

static const string FAST_REBOOT_KEY = "FAST_REBOOT|system";

static void set_reboot_status(string status, int delay = 0)
{
if (delay > 0)
{
sleep(delay);
}

DBConnector db("STATE_DB", 0);
Table table(&db, STATE_WARM_RESTART_ENABLE_TABLE_NAME);
table.hset("system", "enable", status);
}

class FastBootHelper
{
public:
FastBootHelper(): db("STATE_DB", 0)
{
db.set(FAST_REBOOT_KEY, "1");
}

~FastBootHelper()
{
db.del({FAST_REBOOT_KEY});
}
private:
DBConnector db;
};

TEST(RestartWaiter, success)
{
set_reboot_status("true");
thread t(set_reboot_status, "false", 3);
EXPECT_TRUE(RestartWaiter::waitRestartDone());
t.join();
}

TEST(RestartWaiter, successWarmRestart)
{
set_reboot_status("true");
thread t(set_reboot_status, "false", 3);
EXPECT_TRUE(RestartWaiter::waitWarmRestartDone());
t.join();
}

TEST(RestartWaiter, successFastRestart)
{
FastBootHelper helper;
set_reboot_status("true");
thread t(set_reboot_status, "false", 3);
EXPECT_TRUE(RestartWaiter::waitFastRestartDone());
t.join();
}

TEST(RestartWaiter, timeout)
{
set_reboot_status("true");
EXPECT_FALSE(RestartWaiter::waitRestartDone(1));
EXPECT_FALSE(RestartWaiter::waitWarmRestartDone(1));

FastBootHelper helper;
EXPECT_FALSE(RestartWaiter::waitFastRestartDone(1));

set_reboot_status("false");
}

TEST(RestartWaiter, successNoDelay)
{
set_reboot_status("false");
EXPECT_TRUE(RestartWaiter::waitRestartDone());
EXPECT_TRUE(RestartWaiter::waitWarmRestartDone());

FastBootHelper helper;
EXPECT_TRUE(RestartWaiter::waitFastRestartDone());
}

TEST(RestartWaiter, successNoKey)
{
DBConnector db("STATE_DB", 0);
string key = string(STATE_WARM_RESTART_ENABLE_TABLE_NAME) + string("|system");
db.del({key});
EXPECT_TRUE(RestartWaiter::waitRestartDone());
EXPECT_TRUE(RestartWaiter::waitWarmRestartDone());

FastBootHelper helper;
EXPECT_TRUE(RestartWaiter::waitFastRestartDone());
}

TEST(RestartWaiter, waitWarmButFastInProgress)
{
FastBootHelper helper;
EXPECT_TRUE(RestartWaiter::waitWarmRestartDone());
}

TEST(RestartWaiter, waitFastButFastNotInProgress)
{
EXPECT_TRUE(RestartWaiter::waitFastRestartDone());
}