diff --git a/cfgmgr/fabricmgr.h b/cfgmgr/fabricmgr.h index dbe2fd0d897..1fd399fef9c 100644 --- a/cfgmgr/fabricmgr.h +++ b/cfgmgr/fabricmgr.h @@ -21,7 +21,7 @@ class FabricMgr : public Orch Table m_cfgFabricMonitorTable; Table m_cfgFabricPortTable; Table m_appFabricMonitorTable; - Table m_appFabricPortTable; + ProducerStateTable m_appFabricPortTable; void doTask(Consumer &consumer); bool writeConfigToAppDb(const std::string &alias, const std::string &field, const std::string &value); diff --git a/orchagent/fabricportsorch.cpp b/orchagent/fabricportsorch.cpp index b47f61a635c..b46fcede092 100644 --- a/orchagent/fabricportsorch.cpp +++ b/orchagent/fabricportsorch.cpp @@ -22,6 +22,8 @@ #define FABRIC_QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP "FABRIC_QUEUE_STAT_COUNTER" #define FABRIC_QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS 100000 #define FABRIC_DEBUG_POLLING_INTERVAL_DEFAULT (60) +#define FABRIC_MONITOR_DATA "FABRIC_MONITOR_DATA" +#define APPL_FABRIC_PORT_PREFIX "Fabric" // constants for link monitoring #define MAX_SKIP_CRCERR_ON_LNKUP_POLLS 20 @@ -84,6 +86,7 @@ FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector(new ProducerTable(m_flex_db.get(), APP_FABRIC_PORT_TABLE_NAME)); m_appl_db = shared_ptr(new DBConnector("APPL_DB", 0)); m_applTable = unique_ptr(new Table(m_appl_db.get(), APP_FABRIC_MONITOR_PORT_TABLE_NAME)); + m_applMonitorConstTable = unique_ptr
(new Table(m_appl_db.get(), APP_FABRIC_MONITOR_DATA_TABLE_NAME)); m_fabricPortStatEnabled = fabricPortStatEnabled; m_fabricQueueStatEnabled = fabricQueueStatEnabled; @@ -379,9 +382,51 @@ void FabricPortsOrch::updateFabricDebugCounters() int recoveryPollsCfg = RECOVERY_POLLS_CFG; // monPollThreshRecovery int errorRateCrcCellsCfg = ERROR_RATE_CRC_CELLS_CFG; // monErrThreshCrcCells int errorRateRxCellsCfg = ERROR_RATE_RX_CELLS_CFG; // monErrThreshRxCells + string applConstKey = FABRIC_MONITOR_DATA; std::vector constValues; SWSS_LOG_INFO("updateFabricDebugCounters"); + bool setCfgVal = m_applMonitorConstTable->get("FABRIC_MONITOR_DATA", constValues); + if (!setCfgVal) + { + SWSS_LOG_INFO("applConstKey %s default values not set", applConstKey.c_str()); + } + else + { + SWSS_LOG_INFO("applConstKey %s default values get set", applConstKey.c_str()); + } + string configVal = "1"; + for (auto cv : constValues) + { + configVal = fvValue(cv); + if (fvField(cv) == "monErrThreshCrcCells") + { + errorRateCrcCellsCfg = stoi(configVal); + SWSS_LOG_INFO("monErrThreshCrcCells: %s %s", configVal.c_str(), fvField(cv).c_str()); + continue; + } + if (fvField(cv) == "monErrThreshRxCells") + { + errorRateRxCellsCfg = stoi(configVal); + SWSS_LOG_INFO("monErrThreshRxCells: %s %s", configVal.c_str(), fvField(cv).c_str()); + continue; + } + if (fvField(cv) == "monPollThreshIsolation") + { + fecIsolatedPolls = stoi(configVal); + isolationPollsCfg = stoi(configVal); + SWSS_LOG_INFO("monPollThreshIsolation: %s %s", configVal.c_str(), fvField(cv).c_str()); + continue; + } + if (fvField(cv) == "monPollThreshRecovery") + { + fecUnisolatePolls = stoi(configVal); + recoveryPollsCfg = stoi(configVal); + SWSS_LOG_INFO("monPollThreshRecovery: %s", configVal.c_str()); + continue; + } + } + // Get debug countesrs (e.g. # of cells with crc errors, # of cells) for (auto p : m_fabricLanePortMap) { @@ -449,6 +494,8 @@ void FabricPortsOrch::updateFabricDebugCounters() // skipCrcErrorsOnLinkupCount SKIP_CRC_ERR_ON_LNKUP_CNT // skipFecErrorsOnLinkupCount SKIP_FEC_ERR_ON_LNKUP_CNT // removeProblemLinkCount RM_PROBLEM_LNK_CNT -- this is for feature of remove a flaky link permanently + // + // cfgIsolated CONFIG_ISOLATED int consecutivePollsWithErrors = 0; int consecutivePollsWithNoErrors = 0; @@ -465,13 +512,45 @@ void FabricPortsOrch::updateFabricDebugCounters() uint64_t testCodeErrors = 0; int autoIsolated = 0; + int cfgIsolated = 0; + int isolated = 0; string lnkStatus = "down"; string testState = "product"; + // Get appl_db values, and update state_db later with other attributes + string applKey = APPL_FABRIC_PORT_PREFIX + to_string(lane); + std::vector applValues; + string applResult = "False"; + bool exist = m_applTable->get(applKey, applValues); + if (!exist) + { + SWSS_LOG_NOTICE("No app infor for port %s", applKey.c_str()); + } + else + { + for (auto v : applValues) + { + applResult = fvValue(v); + if (fvField(v) == "isolateStatus") + { + if (applResult == "True") + { + cfgIsolated = 1; + } + else + { + cfgIsolated = 0; + } + SWSS_LOG_INFO("Port %s isolateStatus: %s %d", + applKey.c_str(), applResult.c_str(), cfgIsolated); + } + } + } + // Get the consecutive polls from the state db std::vector values; string valuePt; - bool exist = m_stateTable->get(key, values); + exist = m_stateTable->get(key, values); if (!exist) { SWSS_LOG_INFO("No state infor for port %s", key.c_str()); @@ -675,7 +754,6 @@ void FabricPortsOrch::updateFabricDebugCounters() valuePt = to_string(autoIsolated); m_stateTable->hset(key, "AUTO_ISOLATED", valuePt); SWSS_LOG_NOTICE("port %s set AUTO_ISOLATED %s", key.c_str(), valuePt.c_str()); - // Call SAI api here to actually isolated the link } else if (autoIsolated == 1 && consecutivePollsWithNoErrors >= recoveryPollsCfg && consecutivePollsWithNoFecErrs >= fecUnisolatePolls) @@ -685,9 +763,28 @@ void FabricPortsOrch::updateFabricDebugCounters() autoIsolated = 0; valuePt = to_string(autoIsolated); m_stateTable->hset(key, "AUTO_ISOLATED", valuePt); - SWSS_LOG_NOTICE("port %s set AUTO_ISOLATED %s", key.c_str(), valuePt.c_str()); - // Can we call SAI api here to unisolate the link? + SWSS_LOG_INFO("port %s set AUTO_ISOLATED %s", key.c_str(), valuePt.c_str()); + } + if (cfgIsolated == 1) + { + isolated = 1; + SWSS_LOG_INFO("port %s keep isolated due to configuation",key.c_str()); + } + else + { + if (autoIsolated == 1) + { + isolated = 1; + SWSS_LOG_INFO("port %s keep isolated due to autoisolation",key.c_str()); + } + else + { + isolated = 0; + SWSS_LOG_INFO("port %s unisolated",key.c_str()); + } } + // if "ISOLATED" is true, Call SAI api here to actually isolated the link + // if "ISOLATED" is false, Call SAP api to actually unisolate the link } else { @@ -726,6 +823,16 @@ void FabricPortsOrch::updateFabricDebugCounters() m_stateTable->hset(key, "CODE_ERRORS", valuePt.c_str()); SWSS_LOG_INFO("port %s set CODE_ERRORS %s", key.c_str(), valuePt.c_str()); + + valuePt = to_string(cfgIsolated); + m_stateTable->hset(key, "CONFIG_ISOLATED", valuePt.c_str()); + SWSS_LOG_INFO("port %s set CONFIG_ISOLATED %s", + key.c_str(), valuePt.c_str()); + + valuePt = to_string(isolated); + m_stateTable->hset(key, "ISOLATED", valuePt.c_str()); + SWSS_LOG_INFO("port %s set ISOLATED %s", + key.c_str(), valuePt.c_str()); } } @@ -733,8 +840,175 @@ void FabricPortsOrch::doTask() { } +void FabricPortsOrch::doFabricPortTask(Consumer &consumer) +{ + SWSS_LOG_NOTICE("FabricPortsOrch::doFabricPortTask"); + auto it = consumer.m_toSync.begin(); + while (it != consumer.m_toSync.end()) + { + KeyOpFieldsValuesTuple t = it->second; + string key = kfvKey(t); + string op = kfvOp(t); + + if (op == SET_COMMAND) + { + string alias, lanes; + string isolateStatus; + int forceIsolateCnt = 0; + + for (auto i : kfvFieldsValues(t)) + { + if (fvField(i) == "alias") + { + alias = fvValue(i); + } + else if (fvField(i) == "lanes") + { + lanes = fvValue(i); + } + else if (fvField(i) == "isolateStatus") + { + isolateStatus = fvValue(i); + } + else if (fvField(i) == "forceUnisolateStatus") + { + forceIsolateCnt = stoi(fvValue(i)); + } + } + // This method may be called with only some fields included. + // In that case read in the missing field data. + if (alias == "") + { + string new_alias; + SWSS_LOG_NOTICE("alias is NULL, key: %s", key.c_str()); + if (m_applTable->hget(key, "alias", new_alias)) + { + alias = new_alias; + SWSS_LOG_NOTICE("read new_alias, key: '%s', value: '%s'", key.c_str(), new_alias.c_str()); + } + else + { + SWSS_LOG_NOTICE("hget failed for key: %s, alias", key.c_str()); + } + } + if (lanes == "") + { + string new_lanes; + SWSS_LOG_NOTICE("lanes is NULL, key: %s", key.c_str()); + if (m_applTable->hget(key, "lanes", new_lanes)) + { + lanes = new_lanes; + SWSS_LOG_NOTICE("read new_lanes, key: '%s', value: '%s'", key.c_str(), new_lanes.c_str()); + } + else + { + SWSS_LOG_NOTICE("hget failed for key: %s, lanes", key.c_str()); + } + + } + if (isolateStatus == "") + { + string new_isolateStatus; + SWSS_LOG_NOTICE("isolateStatus is NULL, key: %s", key.c_str()); + if (m_applTable->hget(key, "isolateStatus", new_isolateStatus)) + { + isolateStatus = new_isolateStatus; + SWSS_LOG_NOTICE("read new_isolateStatus, key: '%s', value: '%s'", key.c_str(), new_isolateStatus.c_str()); + } + else + { + SWSS_LOG_NOTICE("hget failed for key: %s, isolateStatus", key.c_str()); + } + } + // Do not process if some data is still missing. + if (alias == "" || lanes == "" || isolateStatus == "" ) + { + SWSS_LOG_NOTICE("NULL values, skipping %s", key.c_str()); + it = consumer.m_toSync.erase(it); + continue; + } + SWSS_LOG_NOTICE("key %s alias %s isolateStatus %s lanes %s", + key.c_str(), alias.c_str(), isolateStatus.c_str(), lanes.c_str()); + // Call SAI api to isolate/unisolate the link here. + // Isolate the link if isolateStatus is True. + // Unisolate the link if isolateStatus is False. + + if (isolateStatus == "False") + { + // get state db value of forceIolatedCntInStateDb, + // if forceIolatedCnt != forceIolatedCntInStateDb + // 1) clear all isolate related flags in stateDb + // 2) replace the cnt in stateb + // + + std::vector values; + string state_key = FABRIC_PORT_PREFIX + lanes; + bool exist = m_stateTable->get(state_key, values); + if (!exist) + { + SWSS_LOG_NOTICE("React to unshut No state infor for port %s", state_key.c_str()); + } + else + { + SWSS_LOG_NOTICE("React to unshut port %s", state_key.c_str()); + } + int curVal = 0; + for (auto val : values) + { + if(fvField(val) == "FORCE_UN_ISOLATE") + { + curVal = stoi(fvValue(val)); + } + } + SWSS_LOG_INFO("Current %d Config %d", curVal, forceIsolateCnt); + if (curVal != forceIsolateCnt) + { + //update state_db; + string value_update; + value_update = to_string(forceIsolateCnt); + m_stateTable->hset(state_key, "FORCE_UN_ISOLATE", value_update.c_str()); + SWSS_LOG_NOTICE("port %s set FORCE_UN_ISOLATE %s", state_key.c_str(), value_update.c_str()); + + + // update all related fields in state_db: + // POLL_WITH_ERRORS 0 + m_stateTable->hset(state_key, "POLL_WITH_ERRORS", + m_defaultPollWithErrors.c_str()); + // POLL_WITH_NO_ERRORS 8 + m_stateTable->hset(state_key, "POLL_WITH_NO_ERRORS", + m_defaultPollWithNoErrors.c_str()); + // POLL_WITH_FEC_ERRORS 0 + m_stateTable->hset(state_key, "POLL_WITH_FEC_ERRORS", + m_defaultPollWithFecErrors.c_str()); + // POLL_WITH_NOFEC_ERRORS 8 + m_stateTable->hset(state_key, "POLL_WITH_NOFEC_ERRORS", + m_defaultPollWithNoFecErrors.c_str()); + // CONFIG_ISOLATED 0 + m_stateTable->hset(state_key, "CONFIG_ISOLATED", + m_defaultConfigIsolated.c_str()); + // ISOLATED 0 + m_stateTable->hset(state_key, "ISOLATED", + m_defaultIsolated.c_str()); + // AUTO_ISOLATED 0 + m_stateTable->hset(state_key, "AUTO_ISOLATED", + m_defaultAutoIsolated.c_str()); + } + } + } + it = consumer.m_toSync.erase(it); + } +} + void FabricPortsOrch::doTask(Consumer &consumer) { + SWSS_LOG_NOTICE("doTask from FabricPortsOrch"); + + string table_name = consumer.getTableName(); + + if (table_name == APP_FABRIC_MONITOR_PORT_TABLE_NAME) + { + doFabricPortTask(consumer); + } } void FabricPortsOrch::doTask(swss::SelectableTimer &timer) @@ -760,7 +1034,7 @@ void FabricPortsOrch::doTask(swss::SelectableTimer &timer) // Skip collecting debug information // as we don't have all fabric ports yet. return; - } + } if (m_getFabricPortListDone) { diff --git a/orchagent/fabricportsorch.h b/orchagent/fabricportsorch.h index 4c274cba000..0d637dec43b 100644 --- a/orchagent/fabricportsorch.h +++ b/orchagent/fabricportsorch.h @@ -31,6 +31,7 @@ class FabricPortsOrch : public Orch, public Subject unique_ptr
m_portNamePortCounterTable; unique_ptr
m_fabricCounterTable; unique_ptr
m_applTable; + unique_ptr
m_applMonitorConstTable; unique_ptr m_flexCounterTable; swss::SelectableTimer *m_timer = nullptr; @@ -47,6 +48,15 @@ class FabricPortsOrch : public Orch, public Subject bool m_getFabricPortListDone = false; bool m_isQueueStatsGenerated = false; + + string m_defaultPollWithErrors = "0"; + string m_defaultPollWithNoErrors = "8"; + string m_defaultPollWithFecErrors = "0"; + string m_defaultPollWithNoFecErrors = "8"; + string m_defaultConfigIsolated = "0"; + string m_defaultIsolated = "0"; + string m_defaultAutoIsolated = "0"; + int getFabricPortList(); void generatePortStats(); void updateFabricPortState(); @@ -54,6 +64,7 @@ class FabricPortsOrch : public Orch, public Subject void doTask() override; void doTask(Consumer &consumer); + void doFabricPortTask(Consumer &consumer); void doTask(swss::SelectableTimer &timer); }; diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 63fd037fa64..05e58c6ae92 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -19,6 +19,8 @@ using namespace swss; #define SELECT_TIMEOUT 1000 #define PFC_WD_POLL_MSECS 100 +#define APP_FABRIC_MONITOR_PORT_TABLE_NAME "FABRIC_PORT_TABLE" + /* orchagent heart beat message interval */ #define HEART_BEAT_INTERVAL_MSECS 10 * 1000 @@ -513,8 +515,10 @@ bool OrchDaemon::init() if (m_fabricEnabled) { + // register APP_FABRIC_MONITOR_PORT_TABLE_NAME table + const int fabric_portsorch_base_pri = 30; vector fabric_port_tables = { - // empty for now + { APP_FABRIC_MONITOR_PORT_TABLE_NAME, fabric_portsorch_base_pri } }; gFabricPortsOrch = new FabricPortsOrch(m_applDb, fabric_port_tables, m_fabricPortStatEnabled, m_fabricQueueStatEnabled); m_orchList.push_back(gFabricPortsOrch); @@ -1072,8 +1076,9 @@ bool FabricOrchDaemon::init() SWSS_LOG_ENTER(); SWSS_LOG_NOTICE("FabricOrchDaemon init"); + const int fabric_portsorch_base_pri = 30; vector fabric_port_tables = { - // empty for now, I don't consume anything yet + { APP_FABRIC_MONITOR_PORT_TABLE_NAME, fabric_portsorch_base_pri } }; gFabricPortsOrch = new FabricPortsOrch(m_applDb, fabric_port_tables); addOrchList(gFabricPortsOrch); diff --git a/tests/test_fabric.py b/tests/test_fabric.py index 2d1ea8c2930..72ad8287905 100644 --- a/tests/test_fabric.py +++ b/tests/test_fabric.py @@ -73,6 +73,14 @@ def test_voq_switch(self, vst): port_counters_stat_keys = flex_db.get_keys("FLEX_COUNTER_TABLE:" + meta_data['group_name']) for port_stat in port_counters_stat_keys: assert port_stat in dict(port_counters_keys.items()).values(), "Non port created on PORT_STAT_COUNTER group: {}".format(port_stat) + + # update some config_db entries + cfg_db = swsscommon.DBConnector(swsscommon.CONFIG_DB, dvs.redis_sock, 0) + tb = swsscommon.Table(cfg_db, "FABRIC_PORT") + fvs = swsscommon.FieldValuePairs([("isolateStatus","True")]) + tb.set("FABRIC_PORT|Fabric0", fvs ) + fvs = swsscommon.FieldValuePairs([("forceUnisolateStatus", "1")]) + tb.set("FABRIC_PORT|Fabric0", fvs ) else: print( "We do not check switch type:", cfg_switch_type ) diff --git a/tests/virtual_chassis/8/default_config.json b/tests/virtual_chassis/8/default_config.json index b50c86ffffa..6f77a1ade2b 100644 --- a/tests/virtual_chassis/8/default_config.json +++ b/tests/virtual_chassis/8/default_config.json @@ -10,85 +10,109 @@ "comment" : "default_config for a vs that runs chassis_db" } }, + "FABRIC_MONITOR": { + "FABRIC_MONITOR_DATA": { + "monErrThreshCrcCells": "1", + "monErrThreshRxCells": "61035156", + "monPollThreshRecovery": "8", + "monPollThreshIsolation": "1" + } + }, "FABRIC_PORT": { "Fabric0": { "alias": "Fabric0", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "0" }, "Fabric1": { "alias": "Fabric1", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "1" }, "Fabric2": { "alias": "Fabric2", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "2" }, "Fabric3": { "alias": "Fabric3", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "3" }, "Fabric4": { "alias": "Fabric4", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "4" }, "Fabric5": { "alias": "Fabric5", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "5" }, "Fabric6": { "alias": "Fabric6", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "6" }, "Fabric7": { "alias": "Fabric7", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "7" }, "Fabric8": { "alias": "Fabric8", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "8" }, "Fabric9": { "alias": "Fabric9", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "9" }, "Fabric10": { "alias": "Fabric10", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "10" }, "Fabric11": { "alias": "Fabric11", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "11" }, "Fabric12": { "alias": "Fabric12", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "12" }, "Fabric13": { "alias": "Fabric13", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "13" }, "Fabric14": { "alias": "Fabric14", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "14" }, "Fabric15": { "alias": "Fabric15", "isolateStatus": "False", + "forceUnisolateStatus": "0", "lanes": "15" } }