Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions syncd/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ endif

noinst_LIBRARIES = libSyncd.a libSyncdRequestShutdown.a
libSyncd_a_SOURCES = \
WatchdogScope.cpp \
SaiSwitchInterface.cpp \
ZeroMQSelectableChannel.cpp \
RedisSelectableChannel.cpp \
Expand Down
29 changes: 17 additions & 12 deletions syncd/Syncd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#include "RedisSelectableChannel.h"
#include "ZeroMQSelectableChannel.h"
#include "PerformanceIntervalTimer.h"
#include "TimerWatchdog.h"
#include "WatchdogScope.h"

#include "sairediscommon.h"

Expand Down Expand Up @@ -50,7 +50,8 @@ Syncd::Syncd(
m_asicInitViewMode(false), // by default we are in APPLY view mode
m_vendorSai(vendorSai),
m_veryFirstRun(false),
m_enableSyncMode(false)
m_enableSyncMode(false),
m_timerWatchdog(30 * 1000000) // watch for executions over 30 seconds
{
SWSS_LOG_ENTER();

Expand Down Expand Up @@ -317,6 +318,8 @@ sai_status_t Syncd::processSingleEvent(
return SAI_STATUS_SUCCESS;
}

WatchdogScope ws(m_timerWatchdog, op + ":" + key);

if (op == REDIS_ASIC_STATE_COMMAND_CREATE)
return processQuadEvent(SAI_COMMON_API_CREATE, kco);

Expand Down Expand Up @@ -1950,6 +1953,8 @@ void Syncd::processFlexCounterGroupEvent( // TODO must be moved to go via ASIC c
auto& op = kfvOp(kco);
auto& values = kfvFieldsValues(kco);

WatchdogScope ws(m_timerWatchdog, op + ":" + groupName);

if (op == SET_COMMAND)
{
m_manager->addCounterPlugin(groupName, values);
Expand Down Expand Up @@ -1978,6 +1983,8 @@ void Syncd::processFlexCounterEvent( // TODO must be moved to go via ASIC channe
auto& key = kfvKey(kco);
auto& op = kfvOp(kco);

WatchdogScope ws(m_timerWatchdog, op + ":" + key);

auto delimiter = key.find_first_of(":");

if (delimiter == std::string::npos)
Expand Down Expand Up @@ -4366,20 +4373,16 @@ void Syncd::run()
runMainLoop = false;
}

TimerWatchdog twd(30 * 1000000); // watch for executions over 30 seconds
m_timerWatchdog.setCallback(timerWatchdogCallback);

twd.setCallback(timerWatchdogCallback);

while(runMainLoop)
while (runMainLoop)
{
try
{
swss::Selectable *sel = NULL;

int result = s->select(&sel);

twd.setStartTime();

if (sel == m_restartQuery.get())
{
/*
Expand All @@ -4399,6 +4402,8 @@ void Syncd::run()

SWSS_LOG_NOTICE("drained queue");

WatchdogScope ws(m_timerWatchdog, "restart query");

shutdownType = handleRestartQuery(*m_restartQuery);

if (shutdownType != SYNCD_RESTART_TYPE_PRE_SHUTDOWN)
Expand Down Expand Up @@ -4468,8 +4473,6 @@ void Syncd::run()
{
SWSS_LOG_ERROR("select failed: %d", result);
}

twd.setEndTime();
}
catch(const std::exception &e)
{
Expand All @@ -4486,11 +4489,11 @@ void Syncd::run()

// make sure that if second exception will arise, then we break the loop
m_commandLineOptions->m_disableExitSleep = true;

twd.setEndTime();
}
}

WatchdogScope ws(m_timerWatchdog, "shutting down syncd");

if (shutdownType == SYNCD_RESTART_TYPE_WARM)
{
const char *warmBootWriteFile = profileGetValue(0, SAI_KEY_WARM_BOOT_WRITE_FILE);
Expand Down Expand Up @@ -4566,6 +4569,8 @@ syncd_restart_type_t Syncd::handleRestartQuery(

restartQuery.pop(op, data, values);

m_timerWatchdog.setEventName(op + ":" + data);

SWSS_LOG_NOTICE("received %s switch shutdown event", op.c_str());

return RequestShutdownCommandLineOptions::stringToRestartType(op);
Expand Down
3 changes: 3 additions & 0 deletions syncd/Syncd.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "BreakConfig.h"
#include "NotificationProducerBase.h"
#include "SelectableChannel.h"
#include "TimerWatchdog.h"

#include "meta/SaiAttributeList.h"

Expand Down Expand Up @@ -489,5 +490,7 @@ namespace syncd
std::shared_ptr<sairedis::ContextConfig> m_contextConfig;

std::shared_ptr<BreakConfig> m_breakConfig;

TimerWatchdog m_timerWatchdog;
};
}
39 changes: 31 additions & 8 deletions syncd/TimerWatchdog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#include <chrono>

#define MUTEX std::lock_guard<std::mutex> _lock(m_mutex);

using namespace syncd;

TimerWatchdog::TimerWatchdog(
Expand Down Expand Up @@ -35,30 +37,54 @@ TimerWatchdog::~TimerWatchdog()

void TimerWatchdog::setStartTime()
{
MUTEX;
SWSS_LOG_ENTER();

do
{
m_startTimestamp = getTimeSinceEpoch();
}
while (m_startTimestamp <= m_endTimestamp); // make sure new start time is always past last end time

}

void TimerWatchdog::setEndTime()
{
MUTEX;
SWSS_LOG_ENTER();

do
{
m_endTimestamp = getTimeSinceEpoch();
}
while (m_endTimestamp <= m_startTimestamp); // make sure new end time is always past last start time

auto span = m_endTimestamp - m_startTimestamp;

if (span > m_warnTimespan)
{
SWSS_LOG_ERROR("event '%s' took %ld ms to execute", m_eventName.c_str(), span/1000);
}
else if (span > 50*1000)
{
SWSS_LOG_WARN("event '%s' took %ld ms to execute", m_eventName.c_str(), span/1000);
}

m_eventName = "";
}

void TimerWatchdog::setEventName(
_In_ const std::string& eventName)
{
MUTEX;
SWSS_LOG_ENTER();

m_eventName = eventName;
}

void TimerWatchdog::setCallback(
_In_ std::function<void(uint64_t)> callback)
{
MUTEX;
SWSS_LOG_ENTER();

m_callback = callback;
Expand All @@ -74,6 +100,8 @@ void TimerWatchdog::threadFunction()
{
std::this_thread::sleep_for(std::chrono::seconds(1));

MUTEX; // to protect event name

// we make local copies, since executing functions can be so fast that
// when we will read second time start timestamp it can be different
// than previous one
Expand All @@ -94,7 +122,7 @@ void TimerWatchdog::threadFunction()

span = now - start; // this must be always non negative

SWSS_LOG_NOTICE("time span %ld ms", span/1000);
SWSS_LOG_NOTICE("time span %ld ms for '%s'", span/1000, m_eventName.c_str()); // TODO remove this

if (span < 0)
SWSS_LOG_THROW("negative span 'now - start': %ld - %ld", now, start);
Expand All @@ -105,12 +133,7 @@ void TimerWatchdog::threadFunction()

// function probably hanged

SWSS_LOG_WARN("timespan WD exceeded %ld ms", span/1000);

auto callback = m_callback;

if (callback)
callback(span);
SWSS_LOG_ERROR("time span WD exceeded %ld ms for %s", span/1000, m_eventName.c_str());
}

continue;
Expand Down
8 changes: 8 additions & 0 deletions syncd/TimerWatchdog.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <thread>
#include <atomic>
#include <functional>
#include <mutex>

namespace syncd
{
Expand All @@ -23,6 +24,9 @@ namespace syncd

void setEndTime();

void setEventName(
_In_ const std::string& eventName);

void setCallback(
_In_ std::function<void(uint64_t)> callback);

Expand Down Expand Up @@ -51,5 +55,9 @@ namespace syncd
std::shared_ptr<std::thread> m_thread;

std::function<void(uint64_t)> m_callback;

std::string m_eventName;

std::mutex m_mutex;
};
}
22 changes: 22 additions & 0 deletions syncd/WatchdogScope.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#include "WatchdogScope.h"

using namespace syncd;

WatchdogScope::WatchdogScope(
_In_ TimerWatchdog& tw,
_In_ const std::string eventName):
m_tw(tw)
{
// SWSS_LOG_ENTER(); // disabled

m_tw.setStartTime();

m_tw.setEventName(eventName);
}

WatchdogScope::~WatchdogScope()
{
// SWSS_LOG_ENTER(); // disabled

m_tw.setEndTime();
}
21 changes: 21 additions & 0 deletions syncd/WatchdogScope.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#pragma once

#include "TimerWatchdog.h"

namespace syncd
{
class WatchdogScope
{
public:

WatchdogScope(
_In_ TimerWatchdog& tw,
_In_ const std::string eventName);

~WatchdogScope();

private:

TimerWatchdog& m_tw;
};
}