diff --git a/orchagent/Makefile.am b/orchagent/Makefile.am index 9524a61a196..2b744f5a3e2 100644 --- a/orchagent/Makefile.am +++ b/orchagent/Makefile.am @@ -94,7 +94,8 @@ orchagent_SOURCES = \ bfdorch.cpp \ srv6orch.cpp \ response_publisher.cpp \ - nvgreorch.cpp + nvgreorch.cpp \ + txmonitororch.cpp orchagent_SOURCES += flex_counter/flex_counter_manager.cpp flex_counter/flex_counter_stat_manager.cpp flex_counter/flow_counter_handler.cpp flex_counter/flowcounterrouteorch.cpp orchagent_SOURCES += debug_counter/debug_counter.cpp debug_counter/drop_counter.cpp diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 147c87459c6..0d1aef98099 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -54,6 +54,7 @@ BfdOrch *gBfdOrch; Srv6Orch *gSrv6Orch; FlowCounterRouteOrch *gFlowCounterRouteOrch; DebugCounterOrch *gDebugCounterOrch; +TxMonitorOrch *gTxMonitorOrch; bool gIsNatSupported = false; @@ -331,6 +332,10 @@ bool OrchDaemon::init() gNhgMapOrch = new NhgMapOrch(m_applDb, APP_FC_TO_NHG_INDEX_MAP_TABLE_NAME); + TableConnector txMonitoringConfig(m_configDb, CFG_TX_ERRORS_MONITORING_TABLE_NAME); + TableConnector txErrorsStatus(m_stateDb, STATE_TX_ERRORS_STATUS_TABLE_NAME); + gTxMonitorOrch = &TxMonitorOrch::getInstance(txMonitoringConfig, txErrorsStatus); + /* * The order of the orch list is important for state restore of warm start and * the queued processing in m_toSync map after gPortsOrch->allPortsReady() is set. @@ -339,7 +344,13 @@ bool OrchDaemon::init() * when iterating ConsumerMap. This is ensured implicitly by the order of keys in ordered map. * For cases when Orch has to process tables in specific order, like PortsOrch during warm start, it has to override Orch::doTask() */ - m_orchList = { gSwitchOrch, gCrmOrch, gPortsOrch, gBufferOrch, gFlowCounterRouteOrch, mux_orch, mux_cb_orch, gIntfsOrch, gNeighOrch, gNhgMapOrch, gNhgOrch, gCbfNhgOrch, gRouteOrch, gCoppOrch, gQosOrch, wm_orch, policer_orch, tunnel_decap_orch, sflow_orch, gDebugCounterOrch, gMacsecOrch, gBfdOrch, gSrv6Orch}; + + m_orchList = { gSwitchOrch, gCrmOrch, gPortsOrch, gBufferOrch, + gFlowCounterRouteOrch, mux_orch, mux_cb_orch, gIntfsOrch, + gNeighOrch, gNhgMapOrch, gNhgOrch, gCbfNhgOrch, gRouteOrch, + gCoppOrch, gQosOrch, wm_orch, policer_orch, tunnel_decap_orch, + sflow_orch, gDebugCounterOrch, gMacsecOrch, gBfdOrch, + gSrv6Orch, gTxMonitorOrch}; bool initialize_dtel = false; if (platform == BFN_PLATFORM_SUBSTRING || platform == VS_PLATFORM_SUBSTRING) diff --git a/orchagent/orchdaemon.h b/orchagent/orchdaemon.h index def4b786294..024ddab0503 100644 --- a/orchagent/orchdaemon.h +++ b/orchagent/orchdaemon.h @@ -45,6 +45,7 @@ #include "bfdorch.h" #include "srv6orch.h" #include "nvgreorch.h" +#include "txmonitororch.h" using namespace swss; diff --git a/orchagent/txmonitororch.cpp b/orchagent/txmonitororch.cpp new file mode 100644 index 00000000000..336b99fdcd4 --- /dev/null +++ b/orchagent/txmonitororch.cpp @@ -0,0 +1,246 @@ +#include "txmonitororch.h" +#include "sai_serialize.h" + +extern sai_port_api_t *sai_port_api; + +extern PortsOrch *gPortsOrch; + +using namespace std; +using namespace swss; + +TxMonitorOrch& TxMonitorOrch::getInstance(TableConnector txMonitoringConfig, + TableConnector txErrorsStatus) +{ + SWSS_LOG_ENTER(); + + static TxMonitorOrch *instance = new TxMonitorOrch + (txMonitoringConfig, txErrorsStatus); + return *instance; +} + +TxMonitorOrch::TxMonitorOrch(TableConnector txMonitoringConfig, + TableConnector txErrorsStatus) : + Orch(txMonitoringConfig.first, txMonitoringConfig.second), + m_threshold(DEFAULT_THRESHOLD), + m_pollingPeriod(DEFAULT_POLLING_PERIOD), + m_portMapReady(false), + m_countersDb(make_unique("COUNTERS_DB", 0)), + m_countersTable(make_unique(m_countersDb.get(), COUNTERS_TABLE)), + m_cfgTable(make_unique
(txMonitoringConfig.first, txMonitoringConfig.second)), + m_stateTable(make_unique
(txErrorsStatus.first, txErrorsStatus.second)) +{ + SWSS_LOG_ENTER(); + + setTimer(); + initCfgTable(); + // ports might not be ready so it will be initialised when needed and ready + + SWSS_LOG_NOTICE("TxMonitorOrch initalised."); +} + +TxMonitorOrch::~TxMonitorOrch(void) +{ + SWSS_LOG_ENTER(); + + m_portsMap.clear(); +} + +void TxMonitorOrch::initCfgTable() +{ + SWSS_LOG_ENTER(); + + vector fvs; + fvs.emplace_back("Threshold", to_string(m_threshold)); + fvs.emplace_back("Polling period", to_string(m_pollingPeriod)); + m_cfgTable->set("Config", fvs); + + SWSS_LOG_NOTICE("Configuration initalised with default threshold and polling period"); + SWSS_LOG_NOTICE("Threshold is set to %" PRIu64 " and default polling period is set to %ud seconds.", + m_threshold, m_pollingPeriod); +} + +void TxMonitorOrch::initPortsMap() +{ + SWSS_LOG_ENTER(); + SWSS_LOG_NOTICE("Initalising port map and status table"); + + if (!gPortsOrch->allPortsReady()) + { + SWSS_LOG_NOTICE("Ports not ready yet."); + return; + } + + m_fvs.push_back(okStatus); + + map& ports = gPortsOrch->getAllPorts(); + for (auto const &currPort : ports) + { + if (currPort.second.m_type != Port::Type::PHY) + { + continue; + } + uint64_t portId = currPort.second.m_port_id; + string oid = sai_serialize_object_id(portId); + string ifaceName = currPort.first; + + m_portsMap.emplace(ifaceName, PortTxInfo(oid)); + + // init status table + m_stateTable->set(ifaceName, m_fvs); + } + m_fvs.pop_back(); + + m_portMapReady = true; + SWSS_LOG_NOTICE("Ports map ready"); +} + +void TxMonitorOrch::setTimer() +{ + SWSS_LOG_ENTER(); + + auto interval = timespec { .tv_sec = m_pollingPeriod, .tv_nsec = 0 }; + if (m_timer == nullptr) + { + m_timer = new SelectableTimer(interval); + auto executor = new ExecutableTimer(m_timer, this, "TX_ERRORS_COUNTERS_POLL"); + Orch::addExecutor(executor); + m_timer->start(); + } + else + { + m_timer->setInterval(interval); + m_timer->reset(); + } +} + +void TxMonitorOrch::doTask(SelectableTimer &timer) +{ + SWSS_LOG_ENTER(); + if (m_portsMap.empty()) + { + initPortsMap(); // also initiates state table + } + if (!m_portMapReady) return; + txErrorsCheck(); +} + +void TxMonitorOrch::doTask(Consumer &consumer) +{ + SWSS_LOG_ENTER(); + + auto it = consumer.m_toSync.begin(); + while (it != consumer.m_toSync.end()) + { + KeyOpFieldsValuesTuple updates = it->second; + const std::string & key = kfvKey(updates); + const std::string & op = kfvOp(updates); + + if (key == "Config" && op == SET_COMMAND) + { + configUpdate(kfvFieldsValues(updates)); + } + else + { + SWSS_LOG_ERROR("Unknown operation!"); + } + it = consumer.m_toSync.erase(it); + } +} + +void TxMonitorOrch::configUpdate(const vector fvs) +{ + SWSS_LOG_ENTER(); + + for(FieldValueTuple fv : fvs) + { + string field = fvField(fv); + string value = fvValue(fv); + + if(field == "threshold") + { + setThreshold(stoull(value)); + } + else if(field == "polling_period") + { + setPollingPeriod(static_cast(stoul(value))); + } + else + { + SWSS_LOG_ERROR("Unknown field!"); + SWSS_LOG_ERROR("field = %s", field.c_str()); + } + } +} + +void TxMonitorOrch::setPollingPeriod(uint32_t newPollingPeriod) +{ + SWSS_LOG_ENTER(); + + if (m_pollingPeriod == newPollingPeriod) return; + + m_pollingPeriod = newPollingPeriod; + setTimer(); + + SWSS_LOG_NOTICE("Polling period is now set to %ud seconds.", m_pollingPeriod); +} + +void TxMonitorOrch::txErrorsCheck() +{ + SWSS_LOG_ENTER(); + SWSS_LOG_NOTICE("Polling TX error counters and updating status."); + + for (auto &currPort : m_portsMap) + { + string ifaceName = currPort.first; + PortTxInfo& currPortInfo = currPort.second; + string oid = currPortInfo.getOid(); + uint64_t prevTxErrCnt = currPortInfo.getTxErrsCnt(); + uint64_t newTxErrCnt = getTxErrCnt(oid, prevTxErrCnt); + + bool newStatus = (newTxErrCnt - prevTxErrCnt <= m_threshold); + + updateStatus(currPortInfo, ifaceName, newStatus); + currPortInfo.setTxErrsCnt(newTxErrCnt); + } +} + +inline uint64_t TxMonitorOrch::getTxErrCnt(string oid, uint64_t prevTxErrCnt) +{ + SWSS_LOG_ENTER(); + + string newTxErrCntStr; + if (!m_countersTable->hget(oid, "SAI_PORT_STAT_IF_OUT_ERRORS", newTxErrCntStr)) + { + return prevTxErrCnt; + } + uint64_t newTxErrCnt = stoull(newTxErrCntStr); + return newTxErrCnt; +} + +inline void TxMonitorOrch::updateStatus(PortTxInfo &portInfo, string ifaceName, bool newStatus) +{ + SWSS_LOG_ENTER(); + + bool prevStatus = portInfo.getStatus(); + + // Status will be updated only if it has changed. + if (newStatus != prevStatus) + { + SWSS_LOG_NOTICE("status updated for interface %s", ifaceName.c_str()); + portInfo.setStatuts(newStatus); + if (newStatus) // meaning that the status is OK + { + m_fvs.push_back(okStatus); + } + else + { + m_fvs.push_back(notOkStatus); + } + m_stateTable->set(ifaceName, m_fvs); + m_fvs.pop_back(); + } + else + { + SWSS_LOG_NOTICE("status not updated for interface %s", ifaceName.c_str()); + } +} diff --git a/orchagent/txmonitororch.h b/orchagent/txmonitororch.h new file mode 100644 index 00000000000..3fdb3e82fe4 --- /dev/null +++ b/orchagent/txmonitororch.h @@ -0,0 +1,99 @@ +#pragma once + + +#include "orch.h" +#include "port.h" +#include "timer.h" +#include "portsorch.h" +#include "countercheckorch.h" +#include +#include + + +extern "C" { +#include "sai.h" +} + +#define DEFAULT_THRESHOLD 10 +#define DEFAULT_POLLING_PERIOD 30 + +static const swss::FieldValueTuple okStatus{"Status", "OK"}; +static const swss::FieldValueTuple notOkStatus{"Status", "Not OK"}; + +class PortTxInfo +{ + public: + PortTxInfo(std::string oid = "", uint64_t txErrsCnt = 0, bool isOK = true) : + m_oid(oid), m_txErrsCnt(txErrsCnt), m_isOK(isOK) {} + + void setOid(std::string oid) { m_oid = oid; } + std::string getOid() { return m_oid; } + + void setTxErrsCnt(uint64_t txErrsCnt) { m_txErrsCnt = txErrsCnt; } + uint64_t getTxErrsCnt() { return m_txErrsCnt; } + + void setStatuts(bool isOK) { m_isOK = isOK; } + bool getStatus() { return m_isOK; } + + private: + std::string m_oid; + uint64_t m_txErrsCnt; + bool m_isOK; +}; + + +class TxMonitorOrch : public Orch +{ + public: + static TxMonitorOrch& getInstance(TableConnector txMonitoringConfig, + TableConnector txErrorsStatus); + virtual void doTask(swss::SelectableTimer &timer); + virtual void doTask(Consumer &consumer); + + private: + TxMonitorOrch(TableConnector txMonitoringConfig, + TableConnector txErrorsStatus); + virtual ~TxMonitorOrch(); + + void initPortsMap(); + void initCfgTable(); + void setTimer(); + + void txErrorsCheck(); + uint64_t getTxErrCnt(std::string oid, uint64_t prevTxErrCnt); + void updateStatus(PortTxInfo& portInfo, std::string oid, bool newStatus); + + void setThreshold(uint64_t newThreshold) + { + SWSS_LOG_ENTER(); + if (newThreshold == m_threshold) return; + m_threshold = newThreshold; + SWSS_LOG_NOTICE("Threshold is set to %" PRIu64 ".", m_threshold); + } + uint64_t getThreshold() + { + SWSS_LOG_ENTER(); + return m_threshold; + } + + void setPollingPeriod(uint32_t newPollingPeriod); + uint32_t getPollingPeriod() + { + SWSS_LOG_ENTER(); + return m_pollingPeriod; + } + + void configUpdate(const std::vector fvs); + + + uint64_t m_threshold; + uint32_t m_pollingPeriod; + bool m_portMapReady; + std::unique_ptr m_countersDb; + std::unique_ptr m_countersTable; + std::unique_ptr m_cfgTable; + std::unique_ptr m_stateTable; + SelectableTimer* m_timer; + std::map m_portsMap; // maps interface name to port info + std::vector m_fvs; +}; diff --git a/tests/mock_tests/Makefile.am b/tests/mock_tests/Makefile.am index 761aba19c66..9361524ba46 100644 --- a/tests/mock_tests/Makefile.am +++ b/tests/mock_tests/Makefile.am @@ -92,7 +92,8 @@ tests_SOURCES = aclorch_ut.cpp \ $(top_srcdir)/orchagent/lagid.cpp \ $(top_srcdir)/orchagent/bfdorch.cpp \ $(top_srcdir)/orchagent/srv6orch.cpp \ - $(top_srcdir)/orchagent/nvgreorch.cpp + $(top_srcdir)/orchagent/nvgreorch.cpp \ + $(top_srcdir)/orchagent/txmonitororch.cpp tests_SOURCES += $(FLEX_CTR_DIR)/flex_counter_manager.cpp $(FLEX_CTR_DIR)/flex_counter_stat_manager.cpp $(FLEX_CTR_DIR)/flow_counter_handler.cpp $(FLEX_CTR_DIR)/flowcounterrouteorch.cpp tests_SOURCES += $(DEBUG_CTR_DIR)/debug_counter.cpp $(DEBUG_CTR_DIR)/drop_counter.cpp