Skip to content

Commit 26a5a1c

Browse files
authored
Fix flex counter out-of-order issue by notifying counter operations using SelectableChannel (#3076)
* Fix flow counter sequence issue using redis SAI switch API enable counter polling and create counter group SAI switch API is invoked to enable counter polling and create counter group CLI option to choose whether the new infra should be used
1 parent 2f8bd9c commit 26a5a1c

30 files changed

Lines changed: 1482 additions & 254 deletions

orchagent/bufferorch.cpp

Lines changed: 18 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,6 @@ extern string gMySwitchType;
2323
extern string gMyHostName;
2424
extern string gMyAsicName;
2525

26-
#define BUFFER_POOL_WATERMARK_FLEX_STAT_COUNTER_POLL_MSECS "60000"
27-
28-
2926
static const vector<sai_buffer_pool_stat_t> bufferPoolWatermarkStatIds =
3027
{
3128
SAI_BUFFER_POOL_STAT_WATERMARK_BYTES,
@@ -52,9 +49,6 @@ std::map<string, std::map<size_t, string>> queue_port_flags;
5249

5350
BufferOrch::BufferOrch(DBConnector *applDb, DBConnector *confDb, DBConnector *stateDb, vector<string> &tableNames) :
5451
Orch(applDb, tableNames),
55-
m_flexCounterDb(new DBConnector("FLEX_COUNTER_DB", 0)),
56-
m_flexCounterTable(new ProducerTable(m_flexCounterDb.get(), FLEX_COUNTER_TABLE)),
57-
m_flexCounterGroupTable(new ProducerTable(m_flexCounterDb.get(), FLEX_COUNTER_GROUP_TABLE)),
5852
m_countersDb(new DBConnector("COUNTERS_DB", 0)),
5953
m_stateBufferMaximumValueTable(stateDb, STATE_BUFFER_MAXIMUM_VALUE_TABLE)
6054
{
@@ -229,22 +223,23 @@ void BufferOrch::initBufferConstants()
229223
void BufferOrch::initFlexCounterGroupTable(void)
230224
{
231225
string bufferPoolWmPluginName = "watermark_bufferpool.lua";
226+
string bufferPoolWmSha;
232227

233228
try
234229
{
235230
string bufferPoolLuaScript = swss::loadLuaScript(bufferPoolWmPluginName);
236-
string bufferPoolWmSha = swss::loadRedisScript(m_countersDb.get(), bufferPoolLuaScript);
237-
238-
vector<FieldValueTuple> fvTuples;
239-
fvTuples.emplace_back(BUFFER_POOL_PLUGIN_FIELD, bufferPoolWmSha);
240-
fvTuples.emplace_back(POLL_INTERVAL_FIELD, BUFFER_POOL_WATERMARK_FLEX_STAT_COUNTER_POLL_MSECS);
241-
242-
m_flexCounterGroupTable->set(BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP, fvTuples);
231+
bufferPoolWmSha = swss::loadRedisScript(m_countersDb.get(), bufferPoolLuaScript);
243232
}
244233
catch (const runtime_error &e)
245234
{
246235
SWSS_LOG_ERROR("Buffer pool watermark lua script and/or flex counter group not set successfully. Runtime error: %s", e.what());
247236
}
237+
238+
setFlexCounterGroupParameter(BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP,
239+
BUFFER_POOL_WATERMARK_FLEX_STAT_COUNTER_POLL_MSECS,
240+
"", // do not touch stats_mode
241+
BUFFER_POOL_PLUGIN_FIELD,
242+
bufferPoolWmSha);
248243
}
249244

250245
bool BufferOrch::isPortReady(const std::string& port_name) const
@@ -275,7 +270,7 @@ void BufferOrch::clearBufferPoolWatermarkCounterIdList(const sai_object_id_t obj
275270
if (m_isBufferPoolWatermarkCounterIdListGenerated)
276271
{
277272
string key = BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP ":" + sai_serialize_object_id(object_id);
278-
m_flexCounterTable->del(key);
273+
stopFlexCounterPolling(gSwitchId, key);
279274
}
280275
}
281276

@@ -326,37 +321,32 @@ void BufferOrch::generateBufferPoolWatermarkCounterIdList(void)
326321

327322
if (!noWmClrCapability)
328323
{
329-
vector<FieldValueTuple> fvs;
330-
331-
fvs.emplace_back(STATS_MODE_FIELD, STATS_MODE_READ_AND_CLEAR);
332-
m_flexCounterGroupTable->set(BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP, fvs);
324+
setFlexCounterGroupStatsMode(BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP,
325+
STATS_MODE_READ_AND_CLEAR);
333326
}
334327

335328
// Push buffer pool watermark COUNTER_ID_LIST to FLEX_COUNTER_TABLE on a per buffer pool basis
336-
vector<FieldValueTuple> fvTuples;
337-
fvTuples.emplace_back(BUFFER_POOL_COUNTER_ID_LIST, statList);
329+
string stats_mode;
330+
338331
bitMask = 1;
332+
339333
for (const auto &it : *(m_buffer_type_maps[APP_BUFFER_POOL_TABLE_NAME]))
340334
{
341335
string key = BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP ":" + sai_serialize_object_id(it.second.m_saiObjectId);
342336

337+
stats_mode = "";
338+
343339
if (noWmClrCapability)
344340
{
345-
string stats_mode = STATS_MODE_READ_AND_CLEAR;
346341
if (noWmClrCapability & bitMask)
347342
{
348343
stats_mode = STATS_MODE_READ;
349344
}
350-
fvTuples.emplace_back(STATS_MODE_FIELD, stats_mode);
351345

352-
m_flexCounterTable->set(key, fvTuples);
353-
fvTuples.pop_back();
354346
bitMask <<= 1;
355347
}
356-
else
357-
{
358-
m_flexCounterTable->set(key, fvTuples);
359-
}
348+
349+
startFlexCounterPolling(gSwitchId, key, statList, BUFFER_POOL_COUNTER_ID_LIST, stats_mode);
360350
}
361351

362352
m_isBufferPoolWatermarkCounterIdListGenerated = true;

orchagent/bufferorch.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "redisapi.h"
1010

1111
#define BUFFER_POOL_WATERMARK_STAT_COUNTER_FLEX_COUNTER_GROUP "BUFFER_POOL_WATERMARK_STAT_COUNTER"
12+
#define BUFFER_POOL_WATERMARK_FLEX_STAT_COUNTER_POLL_MSECS "60000"
1213

1314
const string buffer_size_field_name = "size";
1415
const string buffer_pool_type_field_name = "type";
@@ -63,10 +64,6 @@ class BufferOrch : public Orch
6364
std::unordered_map<std::string, bool> m_ready_list;
6465
std::unordered_map<std::string, std::vector<std::string>> m_port_ready_list_ref;
6566

66-
unique_ptr<DBConnector> m_flexCounterDb;
67-
unique_ptr<ProducerTable> m_flexCounterGroupTable;
68-
unique_ptr<ProducerTable> m_flexCounterTable;
69-
7067
Table m_stateBufferMaximumValueTable;
7168

7269
unique_ptr<DBConnector> m_countersDb;

orchagent/copporch.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ extern sai_object_id_t gSwitchId;
2626
extern PortsOrch* gPortsOrch;
2727
extern Directory<Orch*> gDirectory;
2828
extern bool gIsNatSupported;
29+
extern bool gTraditionalFlexCounter;
2930

3031
#define FLEX_COUNTER_UPD_INTERVAL 1
3132

@@ -126,11 +127,9 @@ const uint HOSTIF_TRAP_COUNTER_POLLING_INTERVAL_MS = 10000;
126127
CoppOrch::CoppOrch(DBConnector* db, string tableName) :
127128
Orch(db, tableName),
128129
m_counter_db(std::shared_ptr<DBConnector>(new DBConnector("COUNTERS_DB", 0))),
129-
m_flex_db(std::shared_ptr<DBConnector>(new DBConnector("FLEX_COUNTER_DB", 0))),
130130
m_asic_db(std::shared_ptr<DBConnector>(new DBConnector("ASIC_DB", 0))),
131131
m_counter_table(std::unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_TRAP_NAME_MAP))),
132132
m_vidToRidTable(std::unique_ptr<Table>(new Table(m_asic_db.get(), "VIDTORID"))),
133-
m_flex_counter_group_table(std::unique_ptr<ProducerTable>(new ProducerTable(m_flex_db.get(), FLEX_COUNTER_GROUP_TABLE))),
134133
m_trap_counter_manager(HOSTIF_TRAP_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, HOSTIF_TRAP_COUNTER_POLLING_INTERVAL_MS, false)
135134
{
136135
SWSS_LOG_ENTER();
@@ -772,7 +771,7 @@ void CoppOrch::doTask(SelectableTimer &timer)
772771
for (auto it = m_pendingAddToFlexCntr.begin(); it != m_pendingAddToFlexCntr.end(); )
773772
{
774773
const auto id = sai_serialize_object_id(it->first);
775-
if (m_vidToRidTable->hget("", id, value))
774+
if (!gTraditionalFlexCounter || m_vidToRidTable->hget("", id, value))
776775
{
777776
SWSS_LOG_INFO("Registering %s, id %s", it->second.c_str(), id.c_str());
778777

@@ -1205,20 +1204,22 @@ void CoppOrch::initTrapRatePlugin()
12051204
}
12061205

12071206
std::string trapRatePluginName = "trap_rates.lua";
1207+
std::string trapSha;
12081208
try
12091209
{
12101210
std::string trapLuaScript = swss::loadLuaScript(trapRatePluginName);
1211-
std::string trapSha = swss::loadRedisScript(m_counter_db.get(), trapLuaScript);
1212-
1213-
vector<FieldValueTuple> fieldValues;
1214-
fieldValues.emplace_back(FLOW_COUNTER_PLUGIN_FIELD, trapSha);
1215-
fieldValues.emplace_back(STATS_MODE_FIELD, STATS_MODE_READ);
1216-
m_flex_counter_group_table->set(HOSTIF_TRAP_COUNTER_FLEX_COUNTER_GROUP, fieldValues);
1211+
trapSha = swss::loadRedisScript(m_counter_db.get(), trapLuaScript);
12171212
}
12181213
catch (const runtime_error &e)
12191214
{
12201215
SWSS_LOG_ERROR("Trap flex counter groups were not set successfully: %s", e.what());
12211216
}
1217+
1218+
setFlexCounterGroupParameter(HOSTIF_TRAP_COUNTER_FLEX_COUNTER_GROUP,
1219+
"", // Do not touch poll interval
1220+
STATS_MODE_READ,
1221+
FLOW_COUNTER_PLUGIN_FIELD,
1222+
trapSha);
12221223
m_trap_rate_plugin_loaded = true;
12231224
}
12241225

orchagent/copporch.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,9 @@ class CoppOrch : public Orch
9898
std::map<sai_object_id_t, std::string> m_pendingAddToFlexCntr;
9999

100100
std::shared_ptr<DBConnector> m_counter_db;
101-
std::shared_ptr<DBConnector> m_flex_db;
102101
std::shared_ptr<DBConnector> m_asic_db;
103102
std::unique_ptr<Table> m_counter_table;
104103
std::unique_ptr<Table> m_vidToRidTable;
105-
std::unique_ptr<ProducerTable> m_flex_counter_group_table;
106104

107105
FlexCounterManager m_trap_counter_manager;
108106

orchagent/fabricportsorch.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,6 @@ FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector<table_name_with_pr
8484
m_portNamePortCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_FABRIC_PORT_NAME_MAP));
8585
m_fabricCounterTable = unique_ptr<Table>(new Table(m_counter_db.get(), COUNTERS_TABLE));
8686

87-
m_flex_db = shared_ptr<DBConnector>(new DBConnector("FLEX_COUNTER_DB", 0));
88-
m_flexCounterTable = unique_ptr<ProducerTable>(new ProducerTable(m_flex_db.get(), APP_FABRIC_PORT_TABLE_NAME));
8987
m_appl_db = shared_ptr<DBConnector>(new DBConnector("APPL_DB", 0));
9088
m_applTable = unique_ptr<Table>(new Table(m_appl_db.get(), APP_FABRIC_MONITOR_PORT_TABLE_NAME));
9189
m_applMonitorConstTable = unique_ptr<Table>(new Table(m_appl_db.get(), APP_FABRIC_MONITOR_DATA_TABLE_NAME));

orchagent/fabricportsorch.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ class FabricPortsOrch : public Orch, public Subject
2626

2727
shared_ptr<DBConnector> m_state_db;
2828
shared_ptr<DBConnector> m_counter_db;
29-
shared_ptr<DBConnector> m_flex_db;
3029
shared_ptr<DBConnector> m_appl_db;
3130

3231
unique_ptr<Table> m_stateTable;

orchagent/flex_counter/flex_counter_manager.cpp

Lines changed: 30 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ using swss::DBConnector;
1818
using swss::FieldValueTuple;
1919
using swss::ProducerTable;
2020

21+
extern sai_switch_api_t *sai_switch_api;
22+
23+
extern sai_object_id_t gSwitchId;
24+
2125
const string FLEX_COUNTER_ENABLE("enable");
2226
const string FLEX_COUNTER_DISABLE("disable");
2327

@@ -89,13 +93,13 @@ FlexCounterManager::FlexCounterManager(
8993
const uint polling_interval,
9094
const bool enabled,
9195
FieldValueTuple fv_plugin) :
92-
FlexCounterManager("FLEX_COUNTER_DB", group_name, stats_mode,
96+
FlexCounterManager(false, group_name, stats_mode,
9397
polling_interval, enabled, fv_plugin)
9498
{
9599
}
96100

97101
FlexCounterManager::FlexCounterManager(
98-
const string& db_name,
102+
const bool is_gearbox,
99103
const string& group_name,
100104
const StatsMode stats_mode,
101105
const uint polling_interval,
@@ -106,11 +110,7 @@ FlexCounterManager::FlexCounterManager(
106110
polling_interval(polling_interval),
107111
enabled(enabled),
108112
fv_plugin(fv_plugin),
109-
flex_counter_db(new DBConnector(db_name, 0)),
110-
flex_counter_group_table(new ProducerTable(flex_counter_db.get(),
111-
FLEX_COUNTER_GROUP_TABLE)),
112-
flex_counter_table(new ProducerTable(flex_counter_db.get(),
113-
FLEX_COUNTER_TABLE))
113+
is_gearbox(is_gearbox)
114114
{
115115
SWSS_LOG_ENTER();
116116

@@ -125,13 +125,10 @@ FlexCounterManager::~FlexCounterManager()
125125

126126
for (const auto& counter: installed_counters)
127127
{
128-
flex_counter_table->del(getFlexCounterTableKey(group_name, counter));
128+
stopFlexCounterPolling(counter.second, getFlexCounterTableKey(group_name, counter.first));
129129
}
130130

131-
if (flex_counter_group_table != nullptr)
132-
{
133-
flex_counter_group_table->del(group_name);
134-
}
131+
delFlexCounterGroup(group_name, is_gearbox);
135132

136133
SWSS_LOG_DEBUG("Deleted flex counter group '%s'.", group_name.c_str());
137134
}
@@ -140,31 +137,21 @@ void FlexCounterManager::applyGroupConfiguration()
140137
{
141138
SWSS_LOG_ENTER();
142139

143-
vector<FieldValueTuple> field_values =
144-
{
145-
FieldValueTuple(STATS_MODE_FIELD, stats_mode_lookup.at(stats_mode)),
146-
FieldValueTuple(POLL_INTERVAL_FIELD, std::to_string(polling_interval)),
147-
FieldValueTuple(FLEX_COUNTER_STATUS_FIELD, status_lookup.at(enabled))
148-
};
149-
150-
if (!fvField(fv_plugin).empty())
151-
{
152-
field_values.emplace_back(fv_plugin);
153-
}
154-
155-
flex_counter_group_table->set(group_name, field_values);
140+
setFlexCounterGroupParameter(group_name,
141+
std::to_string(polling_interval),
142+
stats_mode_lookup.at(stats_mode),
143+
fvField(fv_plugin),
144+
fvValue(fv_plugin),
145+
status_lookup.at(enabled),
146+
is_gearbox);
156147
}
157148

158149
void FlexCounterManager::updateGroupPollingInterval(
159150
const uint polling_interval)
160151
{
161152
SWSS_LOG_ENTER();
162153

163-
vector<FieldValueTuple> field_values =
164-
{
165-
FieldValueTuple(POLL_INTERVAL_FIELD, std::to_string(polling_interval))
166-
};
167-
flex_counter_group_table->set(group_name, field_values);
154+
setFlexCounterGroupPollInterval(group_name, std::to_string(polling_interval), is_gearbox);
168155

169156
SWSS_LOG_DEBUG("Set polling interval for flex counter group '%s' to %d ms.",
170157
group_name.c_str(), polling_interval);
@@ -181,11 +168,7 @@ void FlexCounterManager::enableFlexCounterGroup()
181168
return;
182169
}
183170

184-
vector<FieldValueTuple> field_values =
185-
{
186-
FieldValueTuple(FLEX_COUNTER_STATUS_FIELD, FLEX_COUNTER_ENABLE)
187-
};
188-
flex_counter_group_table->set(group_name, field_values);
171+
setFlexCounterGroupOperation(group_name, FLEX_COUNTER_ENABLE, is_gearbox);
189172
enabled = true;
190173

191174
SWSS_LOG_DEBUG("Enabling flex counters for group '%s'.",
@@ -203,11 +186,7 @@ void FlexCounterManager::disableFlexCounterGroup()
203186
return;
204187
}
205188

206-
vector<FieldValueTuple> field_values =
207-
{
208-
FieldValueTuple(FLEX_COUNTER_STATUS_FIELD, FLEX_COUNTER_DISABLE)
209-
};
210-
flex_counter_group_table->set(group_name, field_values);
189+
setFlexCounterGroupOperation(group_name, FLEX_COUNTER_DISABLE, is_gearbox);
211190
enabled = false;
212191

213192
SWSS_LOG_DEBUG("Disabling flex counters for group '%s'.",
@@ -219,7 +198,8 @@ void FlexCounterManager::disableFlexCounterGroup()
219198
void FlexCounterManager::setCounterIdList(
220199
const sai_object_id_t object_id,
221200
const CounterType counter_type,
222-
const unordered_set<string>& counter_stats)
201+
const unordered_set<string>& counter_stats,
202+
const sai_object_id_t switch_id)
223203
{
224204
SWSS_LOG_ENTER();
225205

@@ -231,12 +211,12 @@ void FlexCounterManager::setCounterIdList(
231211
return;
232212
}
233213

234-
std::vector<swss::FieldValueTuple> field_values =
235-
{
236-
FieldValueTuple(counter_type_it->second, serializeCounterStats(counter_stats))
237-
};
238-
flex_counter_table->set(getFlexCounterTableKey(group_name, object_id), field_values);
239-
installed_counters.insert(object_id);
214+
auto key = getFlexCounterTableKey(group_name, object_id);
215+
auto counter_ids = serializeCounterStats(counter_stats);
216+
auto effective_switch_id = switch_id == SAI_NULL_OBJECT_ID ? gSwitchId : switch_id;
217+
218+
startFlexCounterPolling(effective_switch_id, key, counter_ids, counter_type_it->second);
219+
installed_counters[object_id] = effective_switch_id;
240220

241221
SWSS_LOG_DEBUG("Updated flex counter id list for object '%" PRIu64 "' in group '%s'.",
242222
object_id,
@@ -258,7 +238,8 @@ void FlexCounterManager::clearCounterIdList(const sai_object_id_t object_id)
258238
return;
259239
}
260240

261-
flex_counter_table->del(getFlexCounterTableKey(group_name, object_id));
241+
auto key = getFlexCounterTableKey(group_name, object_id);
242+
stopFlexCounterPolling(installed_counters[object_id], key);
262243
installed_counters.erase(counter_it);
263244

264245
SWSS_LOG_DEBUG("Cleared flex counter id list for object '%" PRIu64 "' in group '%s'.",
@@ -272,7 +253,7 @@ string FlexCounterManager::getFlexCounterTableKey(
272253
{
273254
SWSS_LOG_ENTER();
274255

275-
return group_name + flex_counter_table->getTableNameSeparator() + sai_serialize_object_id(object_id);
256+
return group_name + ":" + sai_serialize_object_id(object_id);
276257
}
277258

278259
// serializeCounterStats turns a set of stats into a format suitable for FLEX_COUNTER_DB.

0 commit comments

Comments
 (0)