diff --git a/orchagent/Makefile.am b/orchagent/Makefile.am index d431557b5c8..3723c764e61 100644 --- a/orchagent/Makefile.am +++ b/orchagent/Makefile.am @@ -56,7 +56,8 @@ orchagent_SOURCES = \ sfloworch.cpp \ chassisorch.cpp \ debugcounterorch.cpp \ - natorch.cpp + natorch.cpp \ + txmonorch.cpp orchagent_SOURCES += flex_counter/flex_counter_manager.cpp flex_counter/flex_counter_stat_manager.cpp orchagent_SOURCES += debug_counter/debug_counter.cpp debug_counter/drop_counter.cpp diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 38b7e406854..1767748bd42 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -36,6 +36,7 @@ BufferOrch *gBufferOrch; SwitchOrch *gSwitchOrch; Directory gDirectory; NatOrch *gNatOrch; +TxMonOrch *gTxMonOrch; bool gIsNatSupported = false; @@ -75,6 +76,10 @@ bool OrchDaemon::init() gSwitchOrch = new SwitchOrch(m_applDb, APP_SWITCH_TABLE_NAME, stateDbSwitchTable); + TableConnector stateDbTxErr(m_stateDb, STATE_TX_ERROR_TABLE_NAME); + TableConnector confDbTxErr(m_configDb, CFG_TX_ERROR_TABLE_NAME); + gTxMonOrch = new TxMonOrch(confDbTxErr, stateDbTxErr); + const int portsorch_base_pri = 40; vector ports_tables = { @@ -227,7 +232,9 @@ bool OrchDaemon::init() * when iterating ConsumerMap. This is ensured implicitly by the order of keys in ordered map. * For cases when Orch has to process tables in specific order, like PortsOrch during warm start, it has to override Orch::doTask() */ - m_orchList = { gSwitchOrch, gCrmOrch, gPortsOrch, gBufferOrch, gIntfsOrch, gNeighOrch, gRouteOrch, copp_orch, tunnel_decap_orch, qos_orch, wm_orch, policer_orch, sflow_orch, debug_counter_orch}; + m_orchList = { gSwitchOrch, gCrmOrch, gPortsOrch, gBufferOrch, gIntfsOrch, gNeighOrch, + gRouteOrch, copp_orch, tunnel_decap_orch, qos_orch, wm_orch, policer_orch, + sflow_orch, debug_counter_orch, gTxMonOrch}; bool initialize_dtel = false; if (platform == BFN_PLATFORM_SUBSTRING || platform == VS_PLATFORM_SUBSTRING) diff --git a/orchagent/orchdaemon.h b/orchagent/orchdaemon.h index 3094692df69..9f891003c5a 100644 --- a/orchagent/orchdaemon.h +++ b/orchagent/orchdaemon.h @@ -28,6 +28,7 @@ #include "watermarkorch.h" #include "policerorch.h" #include "sfloworch.h" +#include "txmonorch.h" #include "debugcounterorch.h" #include "directory.h" #include "natorch.h" diff --git a/orchagent/txmonorch.cpp b/orchagent/txmonorch.cpp new file mode 100644 index 00000000000..29eb55d3af8 --- /dev/null +++ b/orchagent/txmonorch.cpp @@ -0,0 +1,230 @@ +#include "txmonorch.h" +#include "schema.h" +#include "logger.h" +#include "timer.h" +#include "converter.h" +#include "portsorch.h" + +extern PortsOrch* gPortsOrch; + +TxMonOrch::TxMonOrch(TableConnector confDbConnector, TableConnector stateDbConnector) : + Orch(confDbConnector.first, confDbConnector.second), + m_countersDb(COUNTERS_DB, DBConnector::DEFAULT_UNIXSOCKET, 0), + m_countersTable(&m_countersDb, COUNTERS_TABLE), + m_countersPortNameTable(&m_countersDb, COUNTERS_PORT_NAME_MAP), + m_portsStateTable(stateDbConnector.first, stateDbConnector.second), + m_pollPeriodSec(DEFAULT_POLLING_PERIOD), + m_thresholdPackets(DEFAULT_THRESHOLD) +{ + SWSS_LOG_ENTER(); + auto interv = timespec { .tv_sec = m_pollPeriodSec, .tv_nsec = 0 }; + m_timer = new SelectableTimer(interv); + auto executor = new ExecutableTimer(m_timer, this, "POLLING_TIMER"); + Orch::addExecutor(executor); + m_timer->start(); +} + +TxMonOrch::~TxMonOrch() +{ + SWSS_LOG_ENTER(); +} + +void TxMonOrch::setTimer(uint32_t interval) +{ + SWSS_LOG_ENTER(); + auto interv = timespec { .tv_sec = interval, .tv_nsec = 0 }; + + m_timer->setInterval(interv); + m_timer->reset(); + m_pollPeriodSec = interval; + + SWSS_LOG_DEBUG("m_pollPeriod set [%u]", m_pollPeriodSec); +} + +void TxMonOrch::handlePeriodUpdate(const vector& data) +{ + SWSS_LOG_ENTER(); + uint32_t periodToSet = 0; + + for (auto element : data) + { + const auto &field = fvField(element); + const auto &value = fvValue(element); + + if (field == "value") + { + periodToSet = stoi(value); + + if(periodToSet != m_pollPeriodSec) + { + setTimer(periodToSet); + } + } + else + { + SWSS_LOG_ERROR("Unexpected field %s", field.c_str()); + } + } +} + +void TxMonOrch::handleThresholdUpdate(const vector& data) +{ + SWSS_LOG_ENTER(); + for (auto element : data) + { + const auto &field = fvField(element); + const auto &value = fvValue(element); + + if (field == "value") + { + m_thresholdPackets = stoi(value); + SWSS_LOG_DEBUG("m_threshold set [%u]", m_thresholdPackets); + } + else + { + SWSS_LOG_ERROR("Unexpected field %s", field.c_str()); + } + } +} + +void TxMonOrch::createPortsMap() +{ + SWSS_LOG_ENTER(); + map &portsList = gPortsOrch->getAllPorts(); + for (auto &entry : portsList) + { + string name = entry.first; + Port p = entry.second; + string oidStr; + + if (p.m_type != Port::PHY) + { + continue; + } + + if (!m_countersPortNameTable.hget("", name, oidStr)) + { + SWSS_LOG_THROW("Failed to get port oid from counters DB"); + } + m_portsMap.emplace(p.m_alias, oidStr); + } +} + +void TxMonOrch::setPortsStateDb(string portAlias, PortState state) +{ + SWSS_LOG_ENTER(); + vector fieldValuesVector; + fieldValuesVector.emplace_back("port_state", stateNames[state]); + m_portsStateTable.set(portAlias, fieldValuesVector); +} + +void TxMonOrch::getTxErrCounters() +{ + SWSS_LOG_ENTER(); + string tx_err_count; + + for (auto &entry : m_portsMap) + { + string portAlias = entry.first; + string portOid = m_portsMap.find(portAlias)->second; + + if (!m_countersTable.hget(portOid, SAI_PORT_TX_ERR_STAT, tx_err_count)) + { + setPortsStateDb(portAlias, UNKNOWN); + SWSS_LOG_THROW("Could not get tx error counters of port %s", portAlias.c_str()); + } + + m_currTxErrCounters[portAlias] = stoul(tx_err_count); + } +} + +void TxMonOrch::updatePortsStateDb() +{ + SWSS_LOG_ENTER(); + for (auto &entry : m_currTxErrCounters) + { + PortState portState = OK; + string portAlias = entry.first; + uint64_t curr = entry.second; + uint64_t prev = 0; + + prev = m_prevTxErrCounters[portAlias]; + + /* save data for the next update */ + m_prevTxErrCounters[portAlias] = curr; + + if ((curr - prev) >= m_thresholdPackets) + { + portState = NOT_OK; + } + setPortsStateDb(portAlias, portState); + } +} + +void TxMonOrch::doTask(Consumer& consumer) +{ + SWSS_LOG_ENTER(); + try + { + auto it = consumer.m_toSync.begin(); + while (it != consumer.m_toSync.end()) + { + KeyOpFieldsValuesTuple t = it->second; + + string key = kfvKey(t); + string op = kfvOp(t); + vector fvs = kfvFieldsValues(t); + + if (op == SET_COMMAND) + { + if (key == "polling_period") + { + handlePeriodUpdate(fvs); + } + else if (key == "threshold") + { + handleThresholdUpdate(fvs); + } + else + { + SWSS_LOG_ERROR("Unexpected key %s", key.c_str()); + } + } + else + { + SWSS_LOG_ERROR("Unexpected operation %s", op.c_str()); + } + + consumer.m_toSync.erase(it++); + } + } + catch (const std::exception& e) + { + SWSS_LOG_ERROR("Runtime error: %s", e.what()); + } +} + +void TxMonOrch::doTask(SelectableTimer &timer) +{ + SWSS_LOG_ENTER(); + try + { + if (!gPortsOrch->allPortsReady()) { + SWSS_LOG_NOTICE("Ports are not ready yet"); + return; + } + + if (!isPortsMapInitialized) + { + createPortsMap(); + isPortsMapInitialized = true; + } + + getTxErrCounters(); + updatePortsStateDb(); + } + catch (const std::exception& e) + { + SWSS_LOG_ERROR("Runtime error: %s", e.what()); + } +} diff --git a/orchagent/txmonorch.h b/orchagent/txmonorch.h new file mode 100644 index 00000000000..bb1b58b1aef --- /dev/null +++ b/orchagent/txmonorch.h @@ -0,0 +1,58 @@ +#ifndef __TXMONORCH__ +#define __TXMONORCH__ + +#include "table.h" +#include "orch.h" +#include "selectabletimer.h" + +using namespace swss; +using namespace std; + +#define DEFAULT_POLLING_PERIOD 30 +#define DEFAULT_THRESHOLD 10 +#define STATES_NUMBER 3 +#define SAI_PORT_TX_ERR_STAT "SAI_PORT_STAT_IF_OUT_ERRORS" + +enum PortState {OK, NOT_OK, UNKNOWN}; +static const array stateNames = {"OK", "NOT_OK", "UNKNOWN"}; + +class TxMonOrch: public Orch +{ +public: + TxMonOrch(TableConnector confDbConnector, TableConnector stateDbConnector); + ~TxMonOrch(void); + void doTask(Consumer &consumer); + void doTask(SelectableTimer &timer); + +private: + DBConnector m_countersDb; + Table m_countersTable; + Table m_countersPortNameTable; + + /* port alias to port state */ + Table m_portsStateTable; + + SelectableTimer *m_timer = nullptr; + /* time in seconds to wait between counter samples */ + uint32_t m_pollPeriodSec; + uint32_t m_thresholdPackets; + + bool isPortsMapInitialized = false; + /* port alias to port oid */ + map m_portsMap; + + /* port alias to port tx-error stats */ + map m_currTxErrCounters; + map m_prevTxErrCounters; + + void handlePeriodUpdate(const vector& data); + void handleThresholdUpdate(const vector& data); + void setTimer(uint32_t interval); + void createPortsMap(); + void getTxErrCounters(); + void setPortsStateDb(string portAlias, PortState state); + void updatePortsStateDb(); +}; + +#endif +