Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
5b25540
Survive PFC watchdog and storm action in warm-reboot
wendani Feb 16, 2019
1840d85
Remove logs used for debugging
wendani Feb 16, 2019
83ae4bf
Add queue index check before taking storm action during warm-reboot
wendani Feb 16, 2019
a925131
Correct log message
wendani Feb 16, 2019
6550073
Log storm event for all storm actions not only drop action
wendani Feb 16, 2019
383209e
Address review comments
wendani Feb 17, 2019
90f6db3
Address the situation that stoi() may throw an exception
wendani Feb 19, 2019
fb88541
Merge remote-tracking branch 'public/master' into pfcwd_wb_master
wendani Feb 28, 2019
722bc5f
Fine-gran handling of stoi exceptions
wendani Feb 26, 2019
8264eda
Shift temporarily to STATE_DB
wendani Feb 27, 2019
a5bc06a
Add debugging symbols
wendani Feb 27, 2019
d8e92a8
Revert "Shift temporarily to STATE_DB"
wendani Feb 27, 2019
ea7ba0d
Orthogonalize pfc wd table names
wendani Feb 28, 2019
8fce358
Implement doTask for the new Consumer, which subscribes to APPL_DB
wendani Feb 28, 2019
e3e789b
Clean up and touch-ups
wendani Feb 28, 2019
4500dae
Delete multiple fields in one hdel call
wendani Mar 1, 2019
bd53d49
Refactor codes with multi-fields hdel
wendani Mar 1, 2019
52df92d
Address comments: remove unnecessary catch blocks for stoi() call
wendani Mar 2, 2019
057fe50
Use RedisClient to do hset (previous through Table hset)
wendani Mar 2, 2019
5e0ff53
Remove debugging symbols
wendani Mar 1, 2019
36609e9
Address review comments: Replace PfcWdSwOrch<DropHandler, ForwardHand…
wendani Mar 23, 2019
cf597a6
Merge remote-tracking branch 'public/master' into pfc_wb_master
wendani Apr 1, 2019
2de7de5
Address review comments: Refactor existing codes to replace PfcWdSwOr…
wendani Apr 1, 2019
8b8e06c
Remove unused variable to correct compile error
wendani Apr 1, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 142 additions & 8 deletions orchagent/pfcwdorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@
#include "select.h"
#include "notifier.h"
#include "redisclient.h"
#include "schema.h"
#include "tokenize.h"

#define PFC_WD_GLOBAL "GLOBAL"
#define PFC_WD_ACTION "action"
#define PFC_WD_DETECTION_TIME "detection_time"
#define PFC_WD_RESTORATION_TIME "restoration_time"
#define BIG_RED_SWITCH_FIELD "BIG_RED_SWITCH"
#define PFC_WD_IN_STORM "storm"

#define PFC_WD_DETECTION_TIME_MAX (5 * 1000)
#define PFC_WD_DETECTION_TIME_MIN 100
Expand Down Expand Up @@ -154,6 +157,7 @@ void PfcWdOrch<DropHandler, ForwardHandler>::createEntry(const string& key,
uint32_t restorationTime = 0;
// According to requirements, drop action is default
PfcWdAction action = PfcWdAction::PFC_WD_ACTION_DROP;
vector<string> queues;

Port port;
if (!gPortsOrch->getPort(key, port))
Expand Down Expand Up @@ -197,6 +201,11 @@ void PfcWdOrch<DropHandler, ForwardHandler>::createEntry(const string& key,
return;
}
}
else if (field == PFC_WD_IN_STORM)
{
SWSS_LOG_NOTICE("In-storm queues %s on port %s", value.c_str(), key.c_str());
queues = tokenize(value, comma);
}
else
{
SWSS_LOG_ERROR(
Expand Down Expand Up @@ -239,6 +248,35 @@ void PfcWdOrch<DropHandler, ForwardHandler>::createEntry(const string& key,
}

SWSS_LOG_NOTICE("Started PFC Watchdog on port %s", port.m_alias.c_str());

// Start PFC storm action on queues of port
if (!queues.empty())
{
for (const auto &q : queues)
{
int qIdx = -1;
try
{
qIdx = stoi(q);
}
catch (...)
{
SWSS_LOG_ERROR("Invalid conversion to int from string %s", q.c_str());
continue;
}

if ((qIdx < 0) || (static_cast<unsigned int>(qIdx) >= port.m_queue_ids.size()))
{
SWSS_LOG_ERROR("Invalid queue index %d on port %s", qIdx, key.c_str());
continue;
}
if (!startWdActionOnQueue(PFC_WD_IN_STORM, port.m_queue_ids[qIdx]))
{
SWSS_LOG_ERROR("Failed to start PFC watchdog %s event action on port %s queue %d", PFC_WD_IN_STORM, key.c_str(), qIdx);
continue;
}
}
}
}

template <typename DropHandler, typename ForwardHandler>
Expand Down Expand Up @@ -624,7 +662,10 @@ PfcWdSwOrch<DropHandler, ForwardHandler>::PfcWdSwOrch(
c_portStatIds(portStatIds),
c_queueStatIds(queueStatIds),
c_queueAttrIds(queueAttrIds),
m_pollInterval(pollInterval)
m_pollInterval(pollInterval),
m_applDb(make_shared<DBConnector>(APPL_DB, DBConnector::DEFAULT_UNIXSOCKET, 0)),
m_applTable(make_shared<Table>(m_applDb.get(), APP_PFC_WD_TABLE_NAME)),
m_applDbRedisClient(m_applDb.get())
{
SWSS_LOG_ENTER();

Expand Down Expand Up @@ -728,11 +769,35 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer
sai_object_id_t queueId = SAI_NULL_OBJECT_ID;
sai_deserialize_object_id(queueIdStr, queueId);

if (!startWdActionOnQueue(event, queueId))
{
SWSS_LOG_ERROR("Failed to start PFC watchdog %s event action on queue %s", event.c_str(), queueIdStr.c_str());
}
}

template <typename DropHandler, typename ForwardHandler>
void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(SelectableTimer &timer)
{
SWSS_LOG_ENTER();

for (auto& handlerPair : m_entryMap)
{
if (handlerPair.second.handler != nullptr)
{
handlerPair.second.handler->commitCounters(true);
}
}

}

template <typename DropHandler, typename ForwardHandler>
bool PfcWdSwOrch<DropHandler, ForwardHandler>::startWdActionOnQueue(const string &event, sai_object_id_t queueId)
{
auto entry = m_entryMap.find(queueId);
if (entry == m_entryMap.end())
{
SWSS_LOG_ERROR("Queue %s is not registered", queueIdStr.c_str());
return;
SWSS_LOG_ERROR("Queue 0x%lx is not registered", queueId);
return false;
}

SWSS_LOG_NOTICE("Receive notification, %s", event.c_str());
Expand Down Expand Up @@ -760,6 +825,8 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer
entry->second.index,
PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable());
entry->second.handler->initCounters();
// Log storm event to APPL_DB for warm-reboot purpose
m_applTable->hset(entry->second.portAlias, to_string(entry->second.index), "storm");
}
}
else if (entry->second.action == PfcWdAction::PFC_WD_ACTION_DROP)
Expand All @@ -779,6 +846,8 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer
entry->second.index,
PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable());
entry->second.handler->initCounters();
// Log storm event to APPL_DB for warm-reboot purpose
m_applTable->hset(entry->second.portAlias, to_string(entry->second.index), "storm");
}
}
else if (entry->second.action == PfcWdAction::PFC_WD_ACTION_FORWARD)
Expand All @@ -798,11 +867,14 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer
entry->second.index,
PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable());
entry->second.handler->initCounters();
// Log storm event to APPL_DB for warm-reboot purpose
m_applTable->hset(entry->second.portAlias, to_string(entry->second.index), "storm");
}
}
else
{
SWSS_LOG_ERROR("Unknown PFC WD action");
return false;
}
}
else if (event == "restore")
Expand All @@ -818,27 +890,89 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer

entry->second.handler->commitCounters();
entry->second.handler = nullptr;
// Remove storm status in APPL_DB for warm-reboot purpose
string key = APP_PFC_WD_TABLE_NAME ":" + entry->second.portAlias;
m_applDbRedisClient.hdel(key, to_string(entry->second.index));
}
}
else
{
SWSS_LOG_ERROR("Received unknown event from plugin, %s", event.c_str());
return false;
}

return true;
}

template <typename DropHandler, typename ForwardHandler>
void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(SelectableTimer &timer)
bool PfcWdSwOrch<DropHandler, ForwardHandler>::bake()
{
SWSS_LOG_ENTER();
// clean all *_last fields in COUNTERS_TABLE
// to allow warm-reboot pfc detect & restore state machine to enter the same init state as cold-reboot
RedisClient redisClient(PfcWdOrch<DropHandler, ForwardHandler>::getCountersDb().get());

for (auto& handlerPair : m_entryMap)
vector<string> cKeys;
PfcWdSwOrch<DropHandler, ForwardHandler>::getCountersTable()->getKeys(cKeys);
for (const auto &key : cKeys)
{
if (handlerPair.second.handler != nullptr)
vector<FieldValueTuple> fvTuples;
PfcWdSwOrch<DropHandler, ForwardHandler>::getCountersTable()->get(key, fvTuples);
for (const auto &fv : fvTuples)
{
handlerPair.second.handler->commitCounters(true);
if (fvField(fv).find("_last") != string::npos)
{
redisClient.hdel(COUNTERS_TABLE ":" + key, fvField(fv));
}
}
}

// Re-organize the field-value to "storm" : <lossless queue list> (e.g., "3, 4")
Table table(m_applDb.get(), "_" APP_PFC_WD_TABLE_NAME);

vector<string> aKeys;
m_applTable->getKeys(aKeys);
for (const auto &key : aKeys)
{
vector<FieldValueTuple> oldFvTuples;
m_applTable->get(key, oldFvTuples);
string qList;
for (const auto &fv : oldFvTuples)
{
if (fvValue(fv) != "storm")
{
SWSS_LOG_ERROR("%s:%s, field %s value != \"storm\"", APP_PFC_WD_TABLE_NAME, key.c_str(), fvField(fv).c_str());
continue;
}
qList += (fvField(fv) + list_item_delimiter);
}
if (!qList.empty())
{
qList.pop_back();

vector<FieldValueTuple> newFvTuples;
newFvTuples.emplace_back("storm", qList);
table.set(key, newFvTuples);
}
}

Orch::bake();

// Piggyback in-storm queue info to the processing logic that handles pfcwd entries in CONFIG_DB
Consumer *consumer = dynamic_cast<Consumer *>(PfcWdSwOrch<DropHandler, ForwardHandler>::getExecutor(CFG_PFC_WD_TABLE_NAME));
if (consumer == NULL)
{
SWSS_LOG_ERROR("No consumer %s in Orch", CFG_PFC_WD_TABLE_NAME);
return false;
}

size_t refilled = consumer->refillToSync(&table);
SWSS_LOG_NOTICE("Add warm input PFC watchdog State: %s, %zd", APP_PFC_WD_TABLE_NAME, refilled);

for (const auto &key : aKeys)
{
table.del(key);
}
return true;
}

// Trick to keep member functions in a separate file
Expand Down
15 changes: 15 additions & 0 deletions orchagent/pfcwdorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "producertable.h"
#include "notificationconsumer.h"
#include "timer.h"
#include "redisclient.h"

extern "C" {
#include "sai.h"
Expand Down Expand Up @@ -49,6 +50,10 @@ class PfcWdOrch: public Orch

virtual void createEntry(const string& key, const vector<FieldValueTuple>& data);
void deleteEntry(const string& name);

protected:
virtual bool startWdActionOnQueue(const string &event, sai_object_id_t queueId) = 0;

private:

shared_ptr<DBConnector> m_countersDb = nullptr;
Expand All @@ -75,6 +80,12 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>
void createEntry(const string& key, const vector<FieldValueTuple>& data);
virtual void doTask(SelectableTimer &timer);
//XXX Add port/queue state change event handlers

bool bake() override;

protected:
bool startWdActionOnQueue(const string &event, sai_object_id_t queueId) override;

private:
struct PfcWdQueueEntry
{
Expand Down Expand Up @@ -118,6 +129,10 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>

bool m_bigRedSwitchFlag = false;
int m_pollInterval;

shared_ptr<DBConnector> m_applDb = nullptr;
shared_ptr<Table> m_applTable = nullptr;
RedisClient m_applDbRedisClient;
};

#endif