From e5683d51696890b3e44517cb48512ff5d0238242 Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Sun, 13 Oct 2024 11:49:25 -0700 Subject: [PATCH 1/8] Add a delay between killing teamd processes When killing 10 or more teamd processes, add a delay of 0.1 seconds after every 10 kill signals/proceses. This is because in the LAG scale tests (in `ecmp/inner_hashing/test_inner_hashing_lag.py` in sonic-mgmt), it may create 100 LAGs, and when destroying them all, some of those LAGs may fail to be properly destroyed, leaving some stale port channels around. This seems to be because the netlink socket buffers on which the teamd processes get notifications become full with events of the other port channels/interfaces going down. As a workaround, add some delays in killing the teamd processes, so that the netlink buffers don't become full, causing messages to get dropped. This delay was randomly chosen, and it seems to work well with 100 LAGs on a KVM. It can probably made to be a bit more aggressive if needed (i.e. maybe 0.05 seconds every 20 processes). Signed-off-by: Saikrishna Arcot --- cfgmgr/teammgr.cpp | 54 ++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/cfgmgr/teammgr.cpp b/cfgmgr/teammgr.cpp index d72b522047e..4f4474cad20 100644 --- a/cfgmgr/teammgr.cpp +++ b/cfgmgr/teammgr.cpp @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include @@ -172,17 +174,29 @@ void TeamMgr::cleanTeamProcesses() SWSS_LOG_NOTICE("Cleaning up LAGs during shutdown..."); std::unordered_map aliasPidMap; + uint32_t sleepCounter = 0; for (const auto& alias: m_lagList) { - std::string res; pid_t pid; + if (++sleepCounter % 10 == 0) { + // Sleep for 100 milliseconds so as to not overwhelm the netlink + // socket buffers with events about interfaces going down + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + sleepCounter = 0; + } try { - std::stringstream cmd; - cmd << "cat " << shellquote("/var/run/teamd/" + alias + ".pid"); - EXEC_WITH_ERROR_THROW(cmd.str(), res); + ifstream pidFile("/var/run/teamd/" + alias + ".pid"); + if (pidFile.is_open()) { + pidFile >> pid; + aliasPidMap[alias] = pid; + SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid); + } else { + SWSS_LOG_NOTICE("Unable to read pid file for %s, skipping...", alias.c_str()); + continue; + } } catch (const std::exception &e) { @@ -191,31 +205,11 @@ void TeamMgr::cleanTeamProcesses() continue; } - try - { - pid = static_cast(std::stoul(res, nullptr, 10)); - aliasPidMap[alias] = pid; - - SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid); - } - catch (const std::exception &e) - { - SWSS_LOG_ERROR("Failed to read port channel %s pid: %s", alias.c_str(), e.what()); - continue; - } - - try - { - std::stringstream cmd; - cmd << "kill -TERM " << pid; - EXEC_WITH_ERROR_THROW(cmd.str(), res); - - SWSS_LOG_NOTICE("Sent SIGTERM to port channel %s pid %d", alias.c_str(), pid); - } - catch (const std::exception &e) - { - SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, e.what()); + if (kill(pid, SIGTERM)) { + SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, strerror(errno)); aliasPidMap.erase(alias); + } else { + SWSS_LOG_NOTICE("Sent SIGTERM to port channel %s pid %d", alias.c_str(), pid); } } @@ -228,9 +222,7 @@ void TeamMgr::cleanTeamProcesses() std::string res; SWSS_LOG_NOTICE("Waiting for port channel %s pid %d to stop...", alias.c_str(), pid); - - cmd << "tail -f --pid=" << pid << " /dev/null"; - EXEC_WITH_ERROR_THROW(cmd.str(), res); + waitpid(pid, NULL, 0); } SWSS_LOG_NOTICE("LAGs cleanup is done"); From f4fd3abcc31b0257256552b42967b06eb99cfd37 Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Mon, 21 Oct 2024 17:45:48 -0700 Subject: [PATCH 2/8] Update LAG removal code to use the same logic as cleaning up all LAGs Signed-off-by: Saikrishna Arcot --- cfgmgr/teammgr.cpp | 39 ++++++++++----------------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/cfgmgr/teammgr.cpp b/cfgmgr/teammgr.cpp index 4f4474cad20..45c2b023844 100644 --- a/cfgmgr/teammgr.cpp +++ b/cfgmgr/teammgr.cpp @@ -654,38 +654,19 @@ bool TeamMgr::removeLag(const string &alias) string res; pid_t pid; - try { - std::stringstream cmd; - cmd << "cat " << shellquote("/var/run/teamd/" + alias + ".pid"); - EXEC_WITH_ERROR_THROW(cmd.str(), res); - } - catch (const std::exception &e) - { - SWSS_LOG_NOTICE("Failed to remove non-existent port channel %s pid...", alias.c_str()); - return false; - } - - try - { - pid = static_cast(std::stoul(res, nullptr, 10)); - SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid); - } - catch (const std::exception &e) - { - SWSS_LOG_ERROR("Failed to read port channel %s pid: %s", alias.c_str(), e.what()); - return false; + ifstream pidfile("/var/run/teamd/" + alias + ".pid"); + if (pidfile.is_open()) { + pidfile >> pid; + SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid); + } else { + SWSS_LOG_NOTICE("Failed to remove non-existent port channel %s pid...", alias.c_str()); + return false; + } } - try - { - std::stringstream cmd; - cmd << "kill -TERM " << pid; - EXEC_WITH_ERROR_THROW(cmd.str(), res); - } - catch (const std::exception &e) - { - SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, e.what()); + if (kill(pid, SIGTERM)) { + SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, strerror(errno)); return false; } From 7b6fc536551453092da459168c60a7a3afb5556c Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Mon, 21 Oct 2024 17:46:22 -0700 Subject: [PATCH 3/8] Update tests to test LAG cleanup and to test with the new code This requires overriding some libc functions and capturing information about kill signals sent or intercepting file open operations. Signe -off-by: Saikrishna Arcot --- tests/mock_tests/Makefile.am | 4 +- tests/mock_tests/teammgrd/teammgr_ut.cpp | 124 +++++++++++++++++++++-- 2 files changed, 120 insertions(+), 8 deletions(-) diff --git a/tests/mock_tests/Makefile.am b/tests/mock_tests/Makefile.am index 0f5afa44866..fc56cde5079 100644 --- a/tests/mock_tests/Makefile.am +++ b/tests/mock_tests/Makefile.am @@ -225,8 +225,8 @@ tests_teammgrd_SOURCES = teammgrd/teammgr_ut.cpp \ tests_teammgrd_INCLUDES = $(tests_INCLUDES) -I$(top_srcdir)/cfgmgr -I$(top_srcdir)/lib tests_teammgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_GTEST) $(CFLAGS_SAI) tests_teammgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_GTEST) $(CFLAGS_SAI) $(tests_teammgrd_INCLUDES) -tests_teammgrd_LDADD = $(LDADD_GTEST) $(LDADD_SAI) -lnl-genl-3 -lhiredis -lhiredis \ - -lswsscommon -lswsscommon -lgtest -lgtest_main -lzmq -lnl-3 -lnl-route-3 -lpthread -lgmock -lgmock_main +tests_teammgrd_LDADD = $(LDADD_GTEST) $(LDADD_SAI) -ldl -lhiredis \ + -lswsscommon -lgtest -lgtest_main -lzmq -lpthread -lgmock -lgmock_main ## fpmsyncd unit tests diff --git a/tests/mock_tests/teammgrd/teammgr_ut.cpp b/tests/mock_tests/teammgrd/teammgr_ut.cpp index 32f064f5526..0420c336952 100644 --- a/tests/mock_tests/teammgrd/teammgr_ut.cpp +++ b/tests/mock_tests/teammgrd/teammgr_ut.cpp @@ -1,20 +1,87 @@ #include "gtest/gtest.h" #include "../mock_table.h" #include "teammgr.h" +#include extern int (*callback)(const std::string &cmd, std::string &stdout); extern std::vector mockCallArgs; +static std::vector< std::pair > mockKillCommands; +static std::map pidFiles; + +static int (*callback_kill)(pid_t pid, int sig) = NULL; +static FILE* (*callback_fopen)(const char *pathname, const char *mode) = NULL; + +static int cb_kill(pid_t pid, int sig) +{ + mockKillCommands.push_back(std::make_pair(pid, sig)); + return 0; +} + +int kill(pid_t pid, int sig) +{ + if (callback_kill) { + return callback_kill(pid, sig); + } + int (*realfunc)(pid_t, int) = + (int(*)(pid_t, int))(dlsym (RTLD_NEXT, "kill")); + return realfunc(pid, sig); +} + +static FILE* cb_fopen(const char *pathname, const char *mode) +{ + auto pidFileSearch = pidFiles.find(pathname); + if (pidFileSearch != pidFiles.end()) { + return pidFileSearch->second; + } else { + return NULL; + } +} + +FILE* fopen(const char *pathname, const char *mode) +{ + if (callback_fopen) { + FILE *fd = callback_fopen(pathname, mode); + if (fd) { + return fd; + } + } + FILE* (*realfunc)(const char *, const char *) = + (FILE* (*)(const char *, const char *))(dlsym (RTLD_NEXT, "fopen")); + return realfunc(pathname, mode); +} + +FILE* fopen64(const char *pathname, const char *mode) +{ + if (callback_fopen) { + FILE *fd = callback_fopen(pathname, mode); + if (fd) { + return fd; + } + } + FILE* (*realfunc)(const char *, const char *) = + (FILE* (*)(const char *, const char *))(dlsym (RTLD_NEXT, "fopen64")); + return realfunc(pathname, mode); +} int cb(const std::string &cmd, std::string &stdout) { mockCallArgs.push_back(cmd); - if (cmd.find("/usr/bin/teamd -r -t PortChannel1") != std::string::npos) + if (cmd.find("/usr/bin/teamd -r -t PortChannel382") != std::string::npos) { + mkdir("/var/run/teamd", 0755); + std::FILE* pidFile = std::tmpfile(); + std::fputs("1234", pidFile); + std::rewind(pidFile); + pidFiles["/var/run/teamd/PortChannel382.pid"] = pidFile; return 1; } - else if (cmd.find("cat \"/var/run/teamd/PortChannel1.pid\"") != std::string::npos) + else if (cmd.find("/usr/bin/teamd -r -t PortChannel495") != std::string::npos) { - stdout = "1234"; + mkdir("/var/run/teamd", 0755); + std::FILE* pidFile = std::tmpfile(); + std::fputs("5678", pidFile); + std::rewind(pidFile); + pidFiles["/var/run/teamd/PortChannel495.pid"] = pidFile; return 0; } return 0; @@ -53,7 +120,18 @@ namespace teammgr_ut cfg_lag_tables = tables; mockCallArgs.clear(); + mockKillCommands.clear(); + pidFiles.clear(); callback = cb; + callback_kill = cb_kill; + callback_fopen = cb_fopen; + } + + virtual void TearDown() override + { + callback = NULL; + callback_kill = NULL; + callback_fopen = NULL; } }; @@ -61,18 +139,52 @@ namespace teammgr_ut { swss::TeamMgr teammgr(m_config_db.get(), m_app_db.get(), m_state_db.get(), cfg_lag_tables); swss::Table cfg_lag_table = swss::Table(m_config_db.get(), CFG_LAG_TABLE_NAME); - cfg_lag_table.set("PortChannel1", { { "admin_status", "up" }, + cfg_lag_table.set("PortChannel382", { { "admin_status", "up" }, { "mtu", "9100" }, { "lacp_key", "auto" }, { "min_links", "2" } }); teammgr.addExistingData(&cfg_lag_table); teammgr.doTask(); + int exec_cmd_called = 0; + for (auto cmd : mockCallArgs){ + if (cmd.find("/usr/bin/teamd -r -t PortChannel382") != std::string::npos) { + exec_cmd_called++; + } + } + ASSERT_EQ(exec_cmd_called, 1); int kill_cmd_called = 0; + for (auto killedProcess : mockKillCommands) { + if (killedProcess.first == 1234) { + kill_cmd_called++; + } + } + ASSERT_EQ(kill_cmd_called, 1); + } + + TEST_F(TeamMgrTest, testProcessCleanupAfterAddLag) + { + swss::TeamMgr teammgr(m_config_db.get(), m_app_db.get(), m_state_db.get(), cfg_lag_tables); + swss::Table cfg_lag_table = swss::Table(m_config_db.get(), CFG_LAG_TABLE_NAME); + cfg_lag_table.set("PortChannel495", { { "admin_status", "up" }, + { "mtu", "9100" }, + { "lacp_key", "auto" }, + { "min_links", "2" } }); + teammgr.addExistingData(&cfg_lag_table); + teammgr.doTask(); + int exec_cmd_called = 0; for (auto cmd : mockCallArgs){ - if (cmd.find("kill -TERM 1234") != std::string::npos){ + if (cmd.find("/usr/bin/teamd -r -t PortChannel495") != std::string::npos) { + exec_cmd_called++; + } + } + ASSERT_EQ(exec_cmd_called, 1); + teammgr.cleanTeamProcesses(); + int kill_cmd_called = 0; + for (auto killedProcess : mockKillCommands) { + if (killedProcess.first == 5678) { kill_cmd_called++; } } ASSERT_EQ(kill_cmd_called, 1); } -} \ No newline at end of file +} From c5d84cfd3ccf5904cfcdfcc9606299b2bd56e8ca Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Tue, 22 Oct 2024 21:00:19 -0700 Subject: [PATCH 4/8] Add more tests to cover more cases Signed-off-by: Saikrishna Arcot --- tests/mock_tests/teammgrd/teammgr_ut.cpp | 131 ++++++++++++++++------- 1 file changed, 94 insertions(+), 37 deletions(-) diff --git a/tests/mock_tests/teammgrd/teammgr_ut.cpp b/tests/mock_tests/teammgrd/teammgr_ut.cpp index 0420c336952..ee9c5528e4f 100644 --- a/tests/mock_tests/teammgrd/teammgr_ut.cpp +++ b/tests/mock_tests/teammgrd/teammgr_ut.cpp @@ -9,7 +9,7 @@ static std::vector< std::pair > mockKillCommands; static std::map pidFiles; static int (*callback_kill)(pid_t pid, int sig) = NULL; -static FILE* (*callback_fopen)(const char *pathname, const char *mode) = NULL; +static std::pair (*callback_fopen)(const char *pathname, const char *mode) = NULL; static int cb_kill(pid_t pid, int sig) { @@ -27,22 +27,25 @@ int kill(pid_t pid, int sig) return realfunc(pid, sig); } -static FILE* cb_fopen(const char *pathname, const char *mode) +static std::pair cb_fopen(const char *pathname, const char *mode) { auto pidFileSearch = pidFiles.find(pathname); if (pidFileSearch != pidFiles.end()) { - return pidFileSearch->second; + if (!pidFileSearch->second) { + errno = ENOENT; + } + return std::make_pair(true, pidFileSearch->second); } else { - return NULL; + return std::make_pair(false, (FILE*)NULL); } } FILE* fopen(const char *pathname, const char *mode) { if (callback_fopen) { - FILE *fd = callback_fopen(pathname, mode); - if (fd) { - return fd; + std::pair callback_fd = callback_fopen(pathname, mode); + if (callback_fd.first) { + return callback_fd.second; } } FILE* (*realfunc)(const char *, const char *) = @@ -53,9 +56,9 @@ FILE* fopen(const char *pathname, const char *mode) FILE* fopen64(const char *pathname, const char *mode) { if (callback_fopen) { - FILE *fd = callback_fopen(pathname, mode); - if (fd) { - return fd; + std::pair callback_fd = callback_fopen(pathname, mode); + if (callback_fd.first) { + return callback_fd.second; } } FILE* (*realfunc)(const char *, const char *) = @@ -75,6 +78,11 @@ int cb(const std::string &cmd, std::string &stdout) pidFiles["/var/run/teamd/PortChannel382.pid"] = pidFile; return 1; } + else if (cmd.find("/usr/bin/teamd -r -t PortChannel812") != std::string::npos) + { + pidFiles["/var/run/teamd/PortChannel812.pid"] = NULL; + return 1; + } else if (cmd.find("/usr/bin/teamd -r -t PortChannel495") != std::string::npos) { mkdir("/var/run/teamd", 0755); @@ -84,6 +92,19 @@ int cb(const std::string &cmd, std::string &stdout) pidFiles["/var/run/teamd/PortChannel495.pid"] = pidFile; return 0; } + else if (cmd.find("/usr/bin/teamd -r -t PortChannel198") != std::string::npos) + { + pidFiles["/var/run/teamd/PortChannel198.pid"] = NULL; + } + else { + for (int i = 600; i < 620; i++) + { + if (cmd.find(std::string("/usr/bin/teamd -r -t PortChannel") + std::to_string(i)) != std::string::npos) + { + pidFiles[std::string("/var/run/teamd/PortChannel") + std::to_string(i) + std::string(".pid")] = NULL; + } + } + } return 0; } @@ -145,20 +166,28 @@ namespace teammgr_ut { "min_links", "2" } }); teammgr.addExistingData(&cfg_lag_table); teammgr.doTask(); - int exec_cmd_called = 0; - for (auto cmd : mockCallArgs){ - if (cmd.find("/usr/bin/teamd -r -t PortChannel382") != std::string::npos) { - exec_cmd_called++; - } - } - ASSERT_EQ(exec_cmd_called, 1); - int kill_cmd_called = 0; - for (auto killedProcess : mockKillCommands) { - if (killedProcess.first == 1234) { - kill_cmd_called++; - } - } - ASSERT_EQ(kill_cmd_called, 1); + ASSERT_NE(mockCallArgs.size(), 0); + EXPECT_NE(mockCallArgs.front().find("/usr/bin/teamd -r -t PortChannel382"), std::string::npos); + EXPECT_EQ(mockCallArgs.size(), 1); + EXPECT_EQ(mockKillCommands.front().first, 1234); + EXPECT_EQ(mockKillCommands.size(), 1); + } + + TEST_F(TeamMgrTest, testProcessPidFileMissingAfterAddLagFailure) + { + swss::TeamMgr teammgr(m_config_db.get(), m_app_db.get(), m_state_db.get(), cfg_lag_tables); + swss::Table cfg_lag_table = swss::Table(m_config_db.get(), CFG_LAG_TABLE_NAME); + cfg_lag_table.set("PortChannel812", { { "admin_status", "up" }, + { "mtu", "9100" }, + { "fallback", "true" }, + { "lacp_key", "auto" }, + { "min_links", "1" } }); + teammgr.addExistingData(&cfg_lag_table); + teammgr.doTask(); + ASSERT_NE(mockCallArgs.size(), 0); + EXPECT_NE(mockCallArgs.front().find("/usr/bin/teamd -r -t PortChannel812"), std::string::npos); + EXPECT_EQ(mockCallArgs.size(), 1); + EXPECT_EQ(mockKillCommands.size(), 0); } TEST_F(TeamMgrTest, testProcessCleanupAfterAddLag) @@ -171,20 +200,48 @@ namespace teammgr_ut { "min_links", "2" } }); teammgr.addExistingData(&cfg_lag_table); teammgr.doTask(); - int exec_cmd_called = 0; - for (auto cmd : mockCallArgs){ - if (cmd.find("/usr/bin/teamd -r -t PortChannel495") != std::string::npos) { - exec_cmd_called++; - } - } - ASSERT_EQ(exec_cmd_called, 1); + ASSERT_EQ(mockCallArgs.size(), 3); + ASSERT_NE(mockCallArgs.front().find("/usr/bin/teamd -r -t PortChannel495"), std::string::npos); teammgr.cleanTeamProcesses(); - int kill_cmd_called = 0; - for (auto killedProcess : mockKillCommands) { - if (killedProcess.first == 5678) { - kill_cmd_called++; - } + EXPECT_EQ(mockKillCommands.size(), 1); + EXPECT_EQ(mockKillCommands.front().first, 5678); + } + + TEST_F(TeamMgrTest, testProcessPidFileMissingDuringCleanup) + { + swss::TeamMgr teammgr(m_config_db.get(), m_app_db.get(), m_state_db.get(), cfg_lag_tables); + swss::Table cfg_lag_table = swss::Table(m_config_db.get(), CFG_LAG_TABLE_NAME); + cfg_lag_table.set("PortChannel198", { { "admin_status", "up" }, + { "mtu", "9100" }, + { "fallback", "true" }, + { "lacp_key", "auto" }, + { "min_links", "1" } }); + teammgr.addExistingData(&cfg_lag_table); + teammgr.doTask(); + ASSERT_NE(mockCallArgs.size(), 0); + EXPECT_NE(mockCallArgs.front().find("/usr/bin/teamd -r -t PortChannel198"), std::string::npos); + EXPECT_EQ(mockCallArgs.size(), 3); + teammgr.cleanTeamProcesses(); + EXPECT_EQ(mockKillCommands.size(), 0); + } + + TEST_F(TeamMgrTest, testSleepDuringCleanup) + { + swss::TeamMgr teammgr(m_config_db.get(), m_app_db.get(), m_state_db.get(), cfg_lag_tables); + swss::Table cfg_lag_table = swss::Table(m_config_db.get(), CFG_LAG_TABLE_NAME); + for (int i = 600; i < 620; i++) + { + cfg_lag_table.set(std::string("PortChannel") + std::to_string(i), { { "admin_status", "up" }, + { "mtu", "9100" }, + { "lacp_key", "auto" } }); } - ASSERT_EQ(kill_cmd_called, 1); + teammgr.addExistingData(&cfg_lag_table); + teammgr.doTask(); + ASSERT_EQ(mockCallArgs.size(), 60); + std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); + teammgr.cleanTeamProcesses(); + std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); + EXPECT_EQ(mockKillCommands.size(), 0); + EXPECT_GE(std::chrono::duration_cast(end - begin).count(), 200); } } From e7ce08dfcc24c213f37356d5f5f685e60d9aec67 Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Wed, 13 Nov 2024 14:16:58 -0800 Subject: [PATCH 5/8] Wait 200ms instead of 100ms, and fix teamd wait code 100ms might not be enough on slow systems for the teamd shutdown sequence to actually be staggered. Signed-off-by: Saikrishna Arcot --- cfgmgr/teammgr.cpp | 14 ++++++-------- tests/mock_tests/teammgrd/teammgr_ut.cpp | 15 +++++++++++---- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/cfgmgr/teammgr.cpp b/cfgmgr/teammgr.cpp index 45c2b023844..00f2dcae912 100644 --- a/cfgmgr/teammgr.cpp +++ b/cfgmgr/teammgr.cpp @@ -173,7 +173,7 @@ void TeamMgr::cleanTeamProcesses() SWSS_LOG_ENTER(); SWSS_LOG_NOTICE("Cleaning up LAGs during shutdown..."); - std::unordered_map aliasPidMap; + std::unordered_map aliasPidMap; uint32_t sleepCounter = 0; for (const auto& alias: m_lagList) @@ -182,7 +182,7 @@ void TeamMgr::cleanTeamProcesses() if (++sleepCounter % 10 == 0) { // Sleep for 100 milliseconds so as to not overwhelm the netlink // socket buffers with events about interfaces going down - std::this_thread::sleep_for(std::chrono::milliseconds(100)); + std::this_thread::sleep_for(std::chrono::milliseconds(200)); sleepCounter = 0; } @@ -218,11 +218,11 @@ void TeamMgr::cleanTeamProcesses() const auto &alias = cit.first; const auto &pid = cit.second; - std::stringstream cmd; - std::string res; - SWSS_LOG_NOTICE("Waiting for port channel %s pid %d to stop...", alias.c_str(), pid); - waitpid(pid, NULL, 0); + + while (!kill(pid, 0)) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } } SWSS_LOG_NOTICE("LAGs cleanup is done"); @@ -650,8 +650,6 @@ bool TeamMgr::removeLag(const string &alias) { SWSS_LOG_ENTER(); - stringstream cmd; - string res; pid_t pid; { diff --git a/tests/mock_tests/teammgrd/teammgr_ut.cpp b/tests/mock_tests/teammgrd/teammgr_ut.cpp index ee9c5528e4f..c34c31ac795 100644 --- a/tests/mock_tests/teammgrd/teammgr_ut.cpp +++ b/tests/mock_tests/teammgrd/teammgr_ut.cpp @@ -14,7 +14,12 @@ static std::pair (*callback_fopen)(const char *pathname, const char static int cb_kill(pid_t pid, int sig) { mockKillCommands.push_back(std::make_pair(pid, sig)); - return 0; + if (!sig) { + errno = ESRCH; + return -1; + } else { + return 0; + } } int kill(pid_t pid, int sig) @@ -169,8 +174,9 @@ namespace teammgr_ut ASSERT_NE(mockCallArgs.size(), 0); EXPECT_NE(mockCallArgs.front().find("/usr/bin/teamd -r -t PortChannel382"), std::string::npos); EXPECT_EQ(mockCallArgs.size(), 1); - EXPECT_EQ(mockKillCommands.front().first, 1234); EXPECT_EQ(mockKillCommands.size(), 1); + EXPECT_EQ(mockKillCommands.front().first, 1234); + EXPECT_EQ(mockKillCommands.front().second, SIGTERM); } TEST_F(TeamMgrTest, testProcessPidFileMissingAfterAddLagFailure) @@ -203,8 +209,9 @@ namespace teammgr_ut ASSERT_EQ(mockCallArgs.size(), 3); ASSERT_NE(mockCallArgs.front().find("/usr/bin/teamd -r -t PortChannel495"), std::string::npos); teammgr.cleanTeamProcesses(); - EXPECT_EQ(mockKillCommands.size(), 1); + EXPECT_EQ(mockKillCommands.size(), 2); EXPECT_EQ(mockKillCommands.front().first, 5678); + EXPECT_EQ(mockKillCommands.front().second, SIGTERM); } TEST_F(TeamMgrTest, testProcessPidFileMissingDuringCleanup) @@ -242,6 +249,6 @@ namespace teammgr_ut teammgr.cleanTeamProcesses(); std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); EXPECT_EQ(mockKillCommands.size(), 0); - EXPECT_GE(std::chrono::duration_cast(end - begin).count(), 200); + EXPECT_GE(std::chrono::duration_cast(end - begin).count(), 400); } } From 9f6ab64960929cc1abf534cdfeb0a31801bb025f Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Thu, 2 Jan 2025 19:45:49 -0800 Subject: [PATCH 6/8] Try 10ms sleep Signed-off-by: Saikrishna Arcot --- cfgmgr/teammgr.cpp | 10 +++------- tests/mock_tests/teammgrd/teammgr_ut.cpp | 2 +- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/cfgmgr/teammgr.cpp b/cfgmgr/teammgr.cpp index 00f2dcae912..0c1f99edf55 100644 --- a/cfgmgr/teammgr.cpp +++ b/cfgmgr/teammgr.cpp @@ -174,17 +174,13 @@ void TeamMgr::cleanTeamProcesses() SWSS_LOG_NOTICE("Cleaning up LAGs during shutdown..."); std::unordered_map aliasPidMap; - uint32_t sleepCounter = 0; for (const auto& alias: m_lagList) { pid_t pid; - if (++sleepCounter % 10 == 0) { - // Sleep for 100 milliseconds so as to not overwhelm the netlink - // socket buffers with events about interfaces going down - std::this_thread::sleep_for(std::chrono::milliseconds(200)); - sleepCounter = 0; - } + // Sleep for 10 milliseconds so as to not overwhelm the netlink + // socket buffers with events about interfaces going down + std::this_thread::sleep_for(std::chrono::milliseconds(10)); try { diff --git a/tests/mock_tests/teammgrd/teammgr_ut.cpp b/tests/mock_tests/teammgrd/teammgr_ut.cpp index c34c31ac795..f41b9577adb 100644 --- a/tests/mock_tests/teammgrd/teammgr_ut.cpp +++ b/tests/mock_tests/teammgrd/teammgr_ut.cpp @@ -249,6 +249,6 @@ namespace teammgr_ut teammgr.cleanTeamProcesses(); std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); EXPECT_EQ(mockKillCommands.size(), 0); - EXPECT_GE(std::chrono::duration_cast(end - begin).count(), 400); + EXPECT_GE(std::chrono::duration_cast(end - begin).count(), 200); } } From 702ffda59627523274bb96a58ddc4bda237b2f89 Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Wed, 8 Jan 2025 12:03:25 -0800 Subject: [PATCH 7/8] Fix code style Signed-off-by: Saikrishna Arcot --- cfgmgr/teammgr.cpp | 17 +++++++++---- tests/mock_tests/teammgrd/teammgr_ut.cpp | 32 ++++++++++++++++-------- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/cfgmgr/teammgr.cpp b/cfgmgr/teammgr.cpp index 0c1f99edf55..d29a8fed89a 100644 --- a/cfgmgr/teammgr.cpp +++ b/cfgmgr/teammgr.cpp @@ -185,11 +185,14 @@ void TeamMgr::cleanTeamProcesses() try { ifstream pidFile("/var/run/teamd/" + alias + ".pid"); - if (pidFile.is_open()) { + if (pidFile.is_open()) + { pidFile >> pid; aliasPidMap[alias] = pid; SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid); - } else { + } + else + { SWSS_LOG_NOTICE("Unable to read pid file for %s, skipping...", alias.c_str()); continue; } @@ -201,10 +204,13 @@ void TeamMgr::cleanTeamProcesses() continue; } - if (kill(pid, SIGTERM)) { + if (kill(pid, SIGTERM)) + { SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, strerror(errno)); aliasPidMap.erase(alias); - } else { + } + else + { SWSS_LOG_NOTICE("Sent SIGTERM to port channel %s pid %d", alias.c_str(), pid); } } @@ -216,7 +222,8 @@ void TeamMgr::cleanTeamProcesses() SWSS_LOG_NOTICE("Waiting for port channel %s pid %d to stop...", alias.c_str(), pid); - while (!kill(pid, 0)) { + while (!kill(pid, 0)) + { std::this_thread::sleep_for(std::chrono::milliseconds(10)); } } diff --git a/tests/mock_tests/teammgrd/teammgr_ut.cpp b/tests/mock_tests/teammgrd/teammgr_ut.cpp index f41b9577adb..ca81f25ac6d 100644 --- a/tests/mock_tests/teammgrd/teammgr_ut.cpp +++ b/tests/mock_tests/teammgrd/teammgr_ut.cpp @@ -14,17 +14,21 @@ static std::pair (*callback_fopen)(const char *pathname, const char static int cb_kill(pid_t pid, int sig) { mockKillCommands.push_back(std::make_pair(pid, sig)); - if (!sig) { + if (!sig) + { errno = ESRCH; return -1; - } else { + } + else + { return 0; } } int kill(pid_t pid, int sig) { - if (callback_kill) { + if (callback_kill) + { return callback_kill(pid, sig); } int (*realfunc)(pid_t, int) = @@ -35,21 +39,27 @@ int kill(pid_t pid, int sig) static std::pair cb_fopen(const char *pathname, const char *mode) { auto pidFileSearch = pidFiles.find(pathname); - if (pidFileSearch != pidFiles.end()) { - if (!pidFileSearch->second) { + if (pidFileSearch != pidFiles.end()) + { + if (!pidFileSearch->second) + { errno = ENOENT; } return std::make_pair(true, pidFileSearch->second); - } else { + } + else + { return std::make_pair(false, (FILE*)NULL); } } FILE* fopen(const char *pathname, const char *mode) { - if (callback_fopen) { + if (callback_fopen) + { std::pair callback_fd = callback_fopen(pathname, mode); - if (callback_fd.first) { + if (callback_fd.first) + { return callback_fd.second; } } @@ -60,9 +70,11 @@ FILE* fopen(const char *pathname, const char *mode) FILE* fopen64(const char *pathname, const char *mode) { - if (callback_fopen) { + if (callback_fopen) + { std::pair callback_fd = callback_fopen(pathname, mode); - if (callback_fd.first) { + if (callback_fd.first) + { return callback_fd.second; } } From e4b838b5b1ef239771e60b241a084b24f1bb4c6b Mon Sep 17 00:00:00 2001 From: Saikrishna Arcot Date: Thu, 9 Jan 2025 17:27:18 -0800 Subject: [PATCH 8/8] Some more coding style fixes Signed-off-by: Saikrishna Arcot --- cfgmgr/teammgr.cpp | 10 +++++++--- tests/mock_tests/teammgrd/teammgr_ut.cpp | 3 ++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cfgmgr/teammgr.cpp b/cfgmgr/teammgr.cpp index d29a8fed89a..f6c6394cdb2 100644 --- a/cfgmgr/teammgr.cpp +++ b/cfgmgr/teammgr.cpp @@ -657,16 +657,20 @@ bool TeamMgr::removeLag(const string &alias) { ifstream pidfile("/var/run/teamd/" + alias + ".pid"); - if (pidfile.is_open()) { + if (pidfile.is_open()) + { pidfile >> pid; SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid); - } else { + } + else + { SWSS_LOG_NOTICE("Failed to remove non-existent port channel %s pid...", alias.c_str()); return false; } } - if (kill(pid, SIGTERM)) { + if (kill(pid, SIGTERM)) + { SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, strerror(errno)); return false; } diff --git a/tests/mock_tests/teammgrd/teammgr_ut.cpp b/tests/mock_tests/teammgrd/teammgr_ut.cpp index ca81f25ac6d..a40f39f4841 100644 --- a/tests/mock_tests/teammgrd/teammgr_ut.cpp +++ b/tests/mock_tests/teammgrd/teammgr_ut.cpp @@ -113,7 +113,8 @@ int cb(const std::string &cmd, std::string &stdout) { pidFiles["/var/run/teamd/PortChannel198.pid"] = NULL; } - else { + else + { for (int i = 600; i < 620; i++) { if (cmd.find(std::string("/usr/bin/teamd -r -t PortChannel") + std::to_string(i)) != std::string::npos)