Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 182 additions & 0 deletions orchagent/fabricportsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#define RECOVERY_POLLS_CFG 8
#define ERROR_RATE_CRC_CELLS_CFG 1
#define ERROR_RATE_RX_CELLS_CFG 61035156
#define FABRIC_LINK_RATE 44316

extern sai_object_id_t gSwitchId;
extern sai_switch_api_t *sai_switch_api;
Expand Down Expand Up @@ -76,6 +77,7 @@ FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector<table_name_with_pr

m_state_db = shared_ptr<DBConnector>(new DBConnector("STATE_DB", 0));
m_stateTable = unique_ptr<Table>(new Table(m_state_db.get(), APP_FABRIC_PORT_TABLE_NAME));
m_fabricCapacityTable = unique_ptr<Table>(new Table(m_state_db.get(), STATE_FABRIC_CAPACITY_TABLE_NAME));

m_counter_db = shared_ptr<DBConnector>(new DBConnector("COUNTERS_DB", 0));
m_portNameQueueCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_FABRIC_QUEUE_NAME_MAP));
Expand Down Expand Up @@ -836,6 +838,185 @@ void FabricPortsOrch::updateFabricDebugCounters()
}
}

void FabricPortsOrch::updateFabricCapacity()
{
// Init value for fabric capacity monitoring
int capacity = 0;
int downCapacity = 0;
string lnkStatus = "down";
string configIsolated = "0";
string isolated = "0";
string autoIsolated = "0";
int operating_links = 0;
int total_links = 0;
int threshold = 100;
std::vector<FieldValueTuple> constValues;
string applKey = FABRIC_MONITOR_DATA;

// Get capacity warning threshold from APPL_DB table FABRIC_MONITOR_DATA
// By default, this threshold is 100 (percentage).
bool cfgVal = m_applMonitorConstTable->get("FABRIC_MONITOR_DATA", constValues);
if(!cfgVal)
{
SWSS_LOG_INFO("%s default values not set", applKey.c_str());
}
else
{
SWSS_LOG_INFO("%s has default values", applKey.c_str());
}
string configVal = "1";
for (auto cv : constValues)
{
configVal = fvValue(cv);
if (fvField(cv) == "monCapacityThreshWarn")
{
threshold = stoi(configVal);
SWSS_LOG_INFO("monCapacityThreshWarn: %s %s", configVal.c_str(), fvField(cv).c_str());
continue;
}
}

// Check fabric capacity.
SWSS_LOG_INFO("FabricPortsOrch::updateFabricCapacity start");
for (auto p : m_fabricLanePortMap)
{
int lane = p.first;
string key = FABRIC_PORT_PREFIX + to_string(lane);
std::vector<FieldValueTuple> values;
string valuePt;

// Get fabric serdes link status from STATE_DB
bool exist = m_stateTable->get(key, values);
if (!exist)
{
SWSS_LOG_INFO("No state infor for port %s", key.c_str());
return;
}
for (auto val : values)
{
valuePt = fvValue(val);
if (fvField(val) == "STATUS")
{
lnkStatus = valuePt;
continue;
}
if (fvField(val) == "CONFIG_ISOLATED")
{
configIsolated = valuePt;
continue;
}
if (fvField(val) == "ISOLATED")
{
isolated = valuePt;
continue;
}
if (fvField(val) == "AUTO_ISOLATED")
{
autoIsolated = valuePt;
continue;
}
}
// Calculate total number of serdes link, number of operational links,
// total fabric capacity.
bool linkIssue = false;
if (configIsolated == "1" || isolated == "1" || autoIsolated == "1")
{
linkIssue = true;
}

if (lnkStatus == "down" || linkIssue == true)
{
downCapacity += FABRIC_LINK_RATE;
}
else
{
capacity += FABRIC_LINK_RATE;
operating_links += 1;
}
total_links += 1;
}

SWSS_LOG_INFO("Capacity: %d Missing %d", capacity, downCapacity);

// Get LAST_EVENT from STATE_DB

// Calculate the current capacity to see if
// it is lower or higher than the threshold
string cur_event = "None";
string event = "None";
int expect_links = total_links * threshold / 100;
if (expect_links > operating_links)
{
cur_event = "Lower";
}
else
{
cur_event = "Higher";
}

SWSS_LOG_NOTICE(" total link %d expected link %d oper link %d event %s", total_links, expect_links, operating_links, cur_event.c_str());

// Update the capacity data in this poll to STATE_DB
SWSS_LOG_INFO("Capacity: %d Missing %d", capacity, downCapacity);

string lastEvent = "None";
string lastTime = "Never";
// Get the last event and time that event happend from STATE_DB
bool capacity_data = m_fabricCapacityTable->get("FABRIC_CAPACITY_DATA", constValues);
if (capacity_data)
{
for (auto cv : constValues)
{
if(fvField(cv) == "last_event")
{
lastEvent = fvValue(cv);
continue;
}
if(fvField(cv) == "last_event_time")
{
lastTime = fvValue(cv);
continue;
}
}
}

auto now = std::chrono::system_clock::now();
auto now_s = std::chrono::time_point_cast<std::chrono::seconds>(now);
auto nse = now_s.time_since_epoch();

// If last event is None or higher, but the capacity is lower in this poll,
// update the STATE_DB with the event (lower) and the time.
// If the last event is lower, and the capacity is back to higher than the threshold,
// update the STATE_DB with the event (higher) and the time.
event = lastEvent;
if (cur_event == "Lower")
{
if (lastEvent == "None" || lastEvent == "Higher")
{
event = "Lower";
lastTime = to_string(nse.count());
}
}
else if (cur_event == "Higher")
{
if (lastEvent == "Lower")
{
event = "Higher";
lastTime = to_string(nse.count());
}
}

// Update STATE_DB
SWSS_LOG_INFO("FabricPortsOrch::updateFabricCapacity now update STATE_DB");
m_fabricCapacityTable->hset("FABRIC_CAPACITY_DATA", "fabric_capacity", to_string(capacity));
m_fabricCapacityTable->hset("FABRIC_CAPACITY_DATA", "missing_capacity", to_string(downCapacity));
m_fabricCapacityTable->hset("FABRIC_CAPACITY_DATA", "operating_links", to_string(operating_links));
m_fabricCapacityTable->hset("FABRIC_CAPACITY_DATA", "number_of_links", to_string(total_links));
m_fabricCapacityTable->hset("FABRIC_CAPACITY_DATA", "warning_threshold", to_string(threshold));
m_fabricCapacityTable->hset("FABRIC_CAPACITY_DATA", "last_event", event);
m_fabricCapacityTable->hset("FABRIC_CAPACITY_DATA", "last_event_time", lastTime);
}

void FabricPortsOrch::doTask()
{
}
Expand Down Expand Up @@ -1039,6 +1220,7 @@ void FabricPortsOrch::doTask(swss::SelectableTimer &timer)
if (m_getFabricPortListDone)
{
updateFabricDebugCounters();
updateFabricCapacity();
}
}
}
5 changes: 5 additions & 0 deletions orchagent/fabricportsorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#include "producertable.h"
#include "flex_counter_manager.h"

#define STATE_FABRIC_CAPACITY_TABLE_NAME "FABRIC_CAPACITY_TABLE"
#define STATE_PORT_CAPACITY_TABLE_NAME "PORT_CAPACITY_TABLE"

class FabricPortsOrch : public Orch, public Subject
{
public:
Expand All @@ -31,6 +34,7 @@ class FabricPortsOrch : public Orch, public Subject
unique_ptr<Table> m_portNamePortCounterTable;
unique_ptr<Table> m_fabricCounterTable;
unique_ptr<Table> m_applTable;
unique_ptr<Table> m_fabricCapacityTable;
unique_ptr<Table> m_applMonitorConstTable;
unique_ptr<ProducerTable> m_flexCounterTable;

Expand Down Expand Up @@ -61,6 +65,7 @@ class FabricPortsOrch : public Orch, public Subject
void generatePortStats();
void updateFabricPortState();
void updateFabricDebugCounters();
void updateFabricCapacity();

void doTask() override;
void doTask(Consumer &consumer);
Expand Down
72 changes: 72 additions & 0 deletions tests/test_fabric_capacity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import random
from dvslib.dvs_database import DVSDatabase
from dvslib.dvs_common import PollingConfig


class TestVirtualChassis(object):
def test_voq_switch_fabric_capacity(self, vst):
"""Test basic fabric capacity infrastructure in VOQ switchs.

This test validates that when fabric links get isolated, the fabric capacity
get updated in the state_db.
When the link get unisolated, the fabric capacity get set back as well.
"""

dvss = vst.dvss
for name in dvss.keys():
dvs = dvss[name]
# Get the config information and choose a linecard or fabric card to test.
config_db = dvs.get_config_db()
metatbl = config_db.get_entry("DEVICE_METADATA", "localhost")

cfg_switch_type = metatbl.get("switch_type")
if cfg_switch_type == "fabric":

# get state_db infor
sdb = dvs.get_state_db()
# There are 16 fabric ports in the test environment.
# Choose one link to test.
portNum = random.randint(1, 16)
cdb_port = "Fabric"+str(portNum)
sdb_port = "PORT"+str(portNum)

max_poll = PollingConfig(polling_interval=60, timeout=600, strict=True)

# setup test environment
sdb.update_entry("FABRIC_PORT_TABLE", sdb_port, {"TEST": "TEST"})

# get current fabric capacity
capacity = sdb.get_entry("FABRIC_CAPACITY_TABLE", "FABRIC_CAPACITY_DATA")['operating_links']
if sdb.get_entry("FABRIC_PORT_TABLE", sdb_port)['STATUS'] == 'up':
try:
# clean up the testing port.
# set TEST_CRC_ERRORS to 0
# set TEST_CODE_ERRORS to 0
sdb.update_entry("FABRIC_PORT_TABLE", sdb_port, {"TEST_CRC_ERRORS":"0"})
sdb.update_entry("FABRIC_PORT_TABLE", sdb_port, {"TEST_CODE_ERRORS": "0"})

# isolate the link from config_db
config_db.update_entry("FABRIC_PORT", cdb_port, {"isolateStatus": "True"})
sdb.wait_for_field_match("FABRIC_PORT_TABLE", sdb_port, {"ISOLATED": "1"}, polling_config=max_poll)
# check if capacity reduced
sdb.wait_for_field_negative_match("FABRIC_CAPACITY_TABLE", "FABRIC_CAPACITY_DATA", {'operating_links': capacity}, polling_config=max_poll)
# unisolate the link from config_db
config_db.update_entry("FABRIC_PORT", cdb_port, {"isolateStatus": "False"})
sdb.wait_for_field_match("FABRIC_PORT_TABLE", sdb_port, {"ISOLATED": "0"}, polling_config=max_poll)
sdb.wait_for_field_match("FABRIC_CAPACITY_TABLE", "FABRIC_CAPACITY_DATA", {'operating_links': capacity}, polling_config=max_poll)
finally:
# cleanup
sdb.update_entry("FABRIC_PORT_TABLE", sdb_port, {"TEST_CRC_ERRORS": "0"})
sdb.update_entry("FABRIC_PORT_TABLE", sdb_port, {"TEST_CODE_ERRORS": "0"})
sdb.update_entry("FABRIC_PORT_TABLE", sdb_port, {"TEST": "product"})
else:
print("The link ", port, " is down")
else:
print("We do not check switch type:", cfg_switch_type)


# Add Dummy always-pass test at end as workaroud
# for issue when Flaky fail on final test it invokes module tear-down before retrying
def test_nonflaky_dummy():
pass

3 changes: 1 addition & 2 deletions tests/test_fabric_port_isolation.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,16 @@ def test_voq_switch_fabric_link(self, vst):
portNum = random.randint(1, 16)
port = "PORT"+str(portNum)
# wait for link monitoring algorithm skips init pollings
sdb.update_entry("FABRIC_PORT_TABLE", port, {"TEST": "TEST"})
max_poll = PollingConfig(polling_interval=60, timeout=1200, strict=True)
if sdb.get_entry("FABRIC_PORT_TABLE", port)['STATUS'] == 'up':
sdb.wait_for_field_match("FABRIC_PORT_TABLE", port, {"SKIP_FEC_ERR_ON_LNKUP_CNT": "2"}, polling_config=max_poll)
try:
# clean up the system for the testing port.
# set TEST_CRC_ERRORS to 0
# set TEST_CODE_ERRORS to 0
# set TEST to "TEST"
sdb.update_entry("FABRIC_PORT_TABLE", port, {"TEST_CRC_ERRORS":"0"})
sdb.update_entry("FABRIC_PORT_TABLE", port, {"TEST_CODE_ERRORS": "0"})
sdb.update_entry("FABRIC_PORT_TABLE", port, {"TEST": "TEST"})
# inject testing errors and wait for link get isolated.
sdb.update_entry("FABRIC_PORT_TABLE", port, {"TEST_CRC_ERRORS": "2"})
sdb.wait_for_field_match("FABRIC_PORT_TABLE", port, {"AUTO_ISOLATED": "1"}, polling_config=max_poll)
Expand Down