Skip to content

Commit 4d39712

Browse files
authored
[DPB]: Fix stale queue counter maps in COUNTERS_DB after port breakout (sonic-net#3982)
What I did Added cleanup of COUNTERS_*_NAME_MAP entries for a port during its deinit phase, and regenerated the NAME_MAP tables with fresh OIDs during port init when queue flex counters are already enabled. Why I did it After dynamic port breakout of a port, queue name map tables in the COUNTERS_DB table are not regenerated leaving stale entries resulting in CLI crash
1 parent e2cc8ce commit 4d39712

4 files changed

Lines changed: 147 additions & 11 deletions

File tree

orchagent/high_frequency_telemetry/counternameupdater.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,18 +40,15 @@ void CounterNameMapUpdater::setCounterNameMap(const std::vector<swss::FieldValue
4040
{
4141
SWSS_LOG_ENTER();
4242

43-
if (gHFTOrch)
43+
for (const auto& map : counter_name_maps)
4444
{
45-
for (const auto& map : counter_name_maps)
45+
const std::string& counter_name = fvField(map);
46+
sai_object_id_t oid = SAI_NULL_OBJECT_ID;
47+
if (!fvValue(map).empty())
4648
{
47-
const std::string& counter_name = fvField(map);
48-
sai_object_id_t oid = SAI_NULL_OBJECT_ID;
49-
if (!fvValue(map).empty())
50-
{
51-
sai_deserialize_object_id(fvValue(map), oid);
52-
}
53-
setCounterNameMap(counter_name, oid);
49+
sai_deserialize_object_id(fvValue(map), oid);
5450
}
51+
setCounterNameMap(counter_name, oid);
5552
}
5653
}
5754

orchagent/portsorch.cpp

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4033,6 +4033,40 @@ void PortsOrch::registerPort(Port &p)
40334033
wred_port_stat_manager.setCounterIdList(p.m_port_id, CounterType::PORT, wred_port_stats);
40344034
}
40354035

4036+
// If queue-related flex counters are already enabled, generate queue maps
4037+
// for the newly added port so that usecases like dynamic port breakout works.
4038+
bool queueFcEnabled = flex_counters_orch->getQueueCountersState() ||
4039+
flex_counters_orch->getQueueWatermarkCountersState() ||
4040+
flex_counters_orch->getWredQueueCountersState();
4041+
if (queueFcEnabled && !p.m_queue_ids.empty())
4042+
{
4043+
auto queuesStateVector = flex_counters_orch->getQueueConfigurations();
4044+
4045+
auto maxQueueNumber = getNumberOfPortSupportedQueueCounters(p.m_alias);
4046+
FlexCounterQueueStates flexCounterQueueState(maxQueueNumber);
4047+
4048+
if (queuesStateVector.count(p.m_alias))
4049+
{
4050+
flexCounterQueueState = queuesStateVector.at(p.m_alias);
4051+
}
4052+
else if (queuesStateVector.count(createAllAvailableBuffersStr))
4053+
{
4054+
if (maxQueueNumber > 0)
4055+
{
4056+
flexCounterQueueState.enableQueueCounters(0, maxQueueNumber - 1);
4057+
}
4058+
}
4059+
else
4060+
{
4061+
if (p.m_host_tx_queue_configured && p.m_host_tx_queue < maxQueueNumber)
4062+
{
4063+
flexCounterQueueState.enableQueueCounter(p.m_host_tx_queue);
4064+
}
4065+
}
4066+
4067+
generateQueueMapPerPort(p, flexCounterQueueState, false);
4068+
}
4069+
40364070
PortUpdate update = { p, true };
40374071
notify(SUBJECT_TYPE_PORT_CHANGE, static_cast<void *>(&update));
40384072

@@ -4056,9 +4090,12 @@ void PortsOrch::deInitPort(string alias, sai_object_id_t port_id)
40564090
return;
40574091
}
40584092

4059-
if (p.m_host_tx_queue_configured && p.m_queue_ids.size() > p.m_host_tx_queue)
4093+
if (!p.m_queue_ids.empty())
40604094
{
4061-
removePortBufferQueueCounters(p, to_string(p.m_host_tx_queue), false);
4095+
auto skip_host_tx_queue = p.m_host_tx_queue_configured && (p.m_queue_ids.size() > p.m_host_tx_queue);
4096+
// Remove all queue counters and mappings for this port to avoid stale entries
4097+
std::string range = "0-" + to_string(p.m_queue_ids.size() - 1);
4098+
removePortBufferQueueCounters(p, range, skip_host_tx_queue);
40624099
}
40634100

40644101
/* remove port from flex_counter_table for updating counters */

tests/mock_tests/mock_table.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,35 @@ namespace swss
126126
table->second.erase(key);
127127
}
128128
}
129+
130+
void Table::hdel(const std::string &key, const std::string &field, const std::string &op, const std::string &prefix)
131+
{
132+
auto &table = gDB[m_pipe->getDbId()][getTableName()];
133+
auto key_iter = table.find(key);
134+
if (key_iter == table.end())
135+
{
136+
return;
137+
}
138+
139+
auto &attrs = key_iter->second;
140+
std::vector<FieldValueTuple> new_attrs;
141+
for (const auto &attr : attrs)
142+
{
143+
if (attr.first != field)
144+
{
145+
new_attrs.push_back(attr);
146+
}
147+
}
148+
149+
if (new_attrs.empty())
150+
{
151+
table.erase(key);
152+
}
153+
else
154+
{
155+
table[key] = new_attrs;
156+
}
157+
}
129158

130159
void ProducerStateTable::set(const std::string &key,
131160
const std::vector<FieldValueTuple> &values,

tests/mock_tests/portsorch_ut.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1493,6 +1493,79 @@ namespace portsorch_test
14931493
_unhook_sai_queue_api();
14941494
}
14951495

1496+
TEST_F(PortsOrchTest, PortDeleteQueueCountersCleanup)
1497+
{
1498+
Table portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME);
1499+
std::deque<KeyOpFieldsValuesTuple> entries;
1500+
1501+
auto &ports = defaultPortList;
1502+
ASSERT_TRUE(!ports.empty());
1503+
1504+
// Create ports
1505+
for (const auto &it : ports)
1506+
{
1507+
portTable.set(it.first, it.second);
1508+
}
1509+
1510+
portTable.set("PortConfigDone", { { "count", to_string(ports.size()) } });
1511+
portTable.set("PortInitDone", { { "lanes", "0" } });
1512+
1513+
gPortsOrch->addExistingData(&portTable);
1514+
static_cast<Orch *>(gPortsOrch)->doTask();
1515+
1516+
Port port;
1517+
auto testQueueIdx = 0;
1518+
string portName = "Ethernet0";
1519+
ASSERT_TRUE(gPortsOrch->getPort(portName, port));
1520+
ASSERT_NE(port.m_port_id, SAI_NULL_OBJECT_ID);
1521+
ASSERT_GT(port.m_queue_ids.size(), 0u);
1522+
1523+
// Enable Queue flex counters
1524+
Table flexCounterCfg = Table(m_config_db.get(), CFG_FLEX_COUNTER_TABLE_NAME);
1525+
const std::vector<FieldValueTuple> enable({ {FLEX_COUNTER_STATUS_FIELD, "enable"} });
1526+
flexCounterCfg.set("QUEUE_WATERMARK", enable);
1527+
flexCounterCfg.set("QUEUE", enable);
1528+
1529+
auto flexCounterOrch = gDirectory.get<FlexCounterOrch*>();
1530+
flexCounterOrch->addExistingData(&flexCounterCfg);
1531+
static_cast<Orch *>(flexCounterOrch)->doTask();
1532+
1533+
sai_object_id_t targetQueueOid = port.m_queue_ids[testQueueIdx];
1534+
1535+
// Delete the port
1536+
entries.push_back({portName, "DEL", { { } }});
1537+
auto consumer = dynamic_cast<Consumer *>(gPortsOrch->getExecutor(APP_PORT_TABLE_NAME));
1538+
consumer->addToSync(entries);
1539+
static_cast<Orch *>(gPortsOrch)->doTask();
1540+
entries.clear();
1541+
1542+
Table countersQueueNameMap(m_counters_db.get(), "COUNTERS_QUEUE_NAME_MAP");
1543+
Table countersQueuePortMap(m_counters_db.get(), "COUNTERS_QUEUE_PORT_MAP");
1544+
1545+
// Verify specific alias:idx field is gone from name maps
1546+
string dummy;
1547+
ASSERT_FALSE(countersQueueNameMap.hget("", portName + ":" + to_string(testQueueIdx), dummy));
1548+
ASSERT_FALSE(countersQueuePortMap.hget("", sai_serialize_object_id(targetQueueOid), dummy));
1549+
1550+
// Re-add the same port
1551+
auto it = ports.find(portName);
1552+
entries.push_back({portName, "SET", it->second});
1553+
consumer->addToSync(entries);
1554+
static_cast<Orch *>(gPortsOrch)->doTask();
1555+
entries.clear();
1556+
1557+
// Fetch the re-added port and verify COUNTERS_DB entries exist for the queue index
1558+
Port readded;
1559+
ASSERT_TRUE(gPortsOrch->getPort(portName, readded));
1560+
ASSERT_GT(readded.m_queue_ids.size(), 0u);
1561+
sai_object_id_t readdedQ1 = readded.m_queue_ids[testQueueIdx];
1562+
1563+
// Name-map should contain alias:idx as a field
1564+
ASSERT_TRUE(countersQueueNameMap.hget("", portName + ":" + to_string(testQueueIdx), dummy));
1565+
// Port-map should contain queue OID -> port OID mapping as a field
1566+
ASSERT_TRUE(countersQueuePortMap.hget("", sai_serialize_object_id(readdedQ1), dummy));
1567+
}
1568+
14961569
TEST_F(PortsOrchTest, PortPTConfigDefaultTimestampTemplate)
14971570
{
14981571
auto portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME);

0 commit comments

Comments
 (0)