-
Notifications
You must be signed in to change notification settings - Fork 692
[pfcwd]: support BIG_RED_SWITCH mode #467
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ | |
| #define PFC_WD_ACTION "action" | ||
| #define PFC_WD_DETECTION_TIME "detection_time" | ||
| #define PFC_WD_RESTORATION_TIME "restoration_time" | ||
| #define BIG_RED_SWITCH_FIELD "BIG_RED_SWITCH" | ||
|
|
||
| #define PFC_WD_DETECTION_TIME_MAX (5 * 1000) | ||
| #define PFC_WD_DETECTION_TIME_MIN 100 | ||
|
|
@@ -261,6 +262,8 @@ template <typename DropHandler, typename ForwardHandler> | |
| void PfcWdSwOrch<DropHandler, ForwardHandler>::createEntry(const string& key, | ||
| const vector<FieldValueTuple>& data) | ||
| { | ||
| SWSS_LOG_ENTER(); | ||
|
|
||
| if (key == PFC_WD_GLOBAL) | ||
| { | ||
| for (auto valuePair: data) | ||
|
|
@@ -274,6 +277,11 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::createEntry(const string& key, | |
| fieldValues.emplace_back(POLL_INTERVAL_FIELD, value); | ||
| m_flexCounterGroupTable->set(PFC_WD_FLEX_COUNTER_GROUP, fieldValues); | ||
| } | ||
| else if (field == BIG_RED_SWITCH_FIELD) | ||
| { | ||
| SWSS_LOG_NOTICE("Recieve brs mode set, %s", value.c_str()); | ||
| setBigRedSwitchMode(value); | ||
| } | ||
| } | ||
| } | ||
| else | ||
|
|
@@ -282,6 +290,166 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::createEntry(const string& key, | |
| } | ||
| } | ||
|
|
||
| template <typename DropHandler, typename ForwardHandler> | ||
| void PfcWdSwOrch<DropHandler, ForwardHandler>::setBigRedSwitchMode(const string value) | ||
| { | ||
| SWSS_LOG_ENTER(); | ||
|
|
||
| if (value == "enable") | ||
| { | ||
| // When BIG_RED_SWITCH mode is enabled, pfcwd is automatically disabled | ||
| enableBigRedSwitchMode(); | ||
| } | ||
| else if (value == "disable") | ||
| { | ||
| disableBigRedSwitchMode(); | ||
| } | ||
| else | ||
| { | ||
| SWSS_LOG_NOTICE("Unsupported BIG_RED_SWITCH mode set input, please use enable or disable"); | ||
| } | ||
|
|
||
| } | ||
|
|
||
| template <typename DropHandler, typename ForwardHandler> | ||
| void PfcWdSwOrch<DropHandler, ForwardHandler>::disableBigRedSwitchMode() | ||
| { | ||
| SWSS_LOG_ENTER(); | ||
|
|
||
| m_bigRedSwitchFlag = false; | ||
| // Disable pfcwdaction hanlder on each queue if exists. | ||
| for (auto &entry : m_brsEntryMap) | ||
| { | ||
|
|
||
| if (entry.second.handler != nullptr) | ||
| { | ||
| SWSS_LOG_NOTICE( | ||
| "PFC Watchdog BIG_RED_SWITCH mode disabled on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.", | ||
| entry.second.portAlias.c_str(), | ||
| entry.second.index, | ||
| entry.first, | ||
| entry.second.portId); | ||
|
|
||
| entry.second.handler->commitCounters(); | ||
| entry.second.handler = nullptr; | ||
| } | ||
|
|
||
| auto queueId = entry.first; | ||
| RedisClient redisClient(PfcWdOrch<DropHandler, ForwardHandler>::getCountersDb().get()); | ||
| string countersKey = COUNTERS_TABLE ":" + sai_serialize_object_id(queueId); | ||
| redisClient.hdel(countersKey, "BIG_RED_SWITCH_MODE"); | ||
| } | ||
|
|
||
| m_brsEntryMap.clear(); | ||
| } | ||
|
|
||
| template <typename DropHandler, typename ForwardHandler> | ||
| void PfcWdSwOrch<DropHandler, ForwardHandler>::enableBigRedSwitchMode() | ||
| { | ||
| SWSS_LOG_ENTER(); | ||
|
|
||
| m_bigRedSwitchFlag = true; | ||
| // Write to database that each queue enables BIG_RED_SWITCH | ||
| auto allPorts = gPortsOrch->getAllPorts(); | ||
| sai_attribute_t attr; | ||
| attr.id = SAI_PORT_ATTR_PRIORITY_FLOW_CONTROL; | ||
|
|
||
| for (auto &it: allPorts) | ||
| { | ||
| Port port = it.second; | ||
|
|
||
| if (port.m_type != Port::PHY) | ||
| { | ||
| SWSS_LOG_INFO("Skip non-phy port %s", port.m_alias.c_str()); | ||
| continue; | ||
| } | ||
|
|
||
| // use portorch api to get lossless tc in future. | ||
| sai_status_t status = sai_port_api->get_port_attribute(port.m_port_id, 1, &attr); | ||
| if (status != SAI_STATUS_SUCCESS) | ||
| { | ||
| SWSS_LOG_ERROR("Failed to get PFC mask on port %s: %d", port.m_alias.c_str(), status); | ||
| return; | ||
| } | ||
|
|
||
| uint8_t pfcMask = attr.value.u8; | ||
| for (uint8_t i = 0; i < PFC_WD_TC_MAX; i++) | ||
| { | ||
| sai_object_id_t queueId = port.m_queue_ids[i]; | ||
| if ((pfcMask & (1 << i)) == 0 && m_entryMap.find(queueId) == m_entryMap.end()) | ||
| { | ||
| continue; | ||
| } | ||
|
|
||
| string queueIdStr = sai_serialize_object_id(queueId); | ||
|
|
||
| vector<FieldValueTuple> countersFieldValues; | ||
| countersFieldValues.emplace_back("BIG_RED_SWITCH_MODE", "enable"); | ||
| PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable()->set(queueIdStr, countersFieldValues); | ||
| } | ||
| } | ||
|
|
||
| // Disable pfcwdaction handler on each queue if exists. | ||
| for (auto & entry: m_entryMap) | ||
| { | ||
| if (entry.second.handler != nullptr) | ||
| { | ||
| entry.second.handler->commitCounters(); | ||
| entry.second.handler = nullptr; | ||
| } | ||
| } | ||
|
|
||
| // Create pfcwdaction hanlder on all the ports. | ||
| for (auto & it: allPorts) | ||
| { | ||
| Port port = it.second; | ||
| if (port.m_type != Port::PHY) | ||
| { | ||
| SWSS_LOG_INFO("Skip non-phy port %s", port.m_alias.c_str()); | ||
| continue; | ||
| } | ||
|
|
||
| // use portorch api to get lossless tc in future after asym PFC is available. | ||
| sai_status_t status = sai_port_api->get_port_attribute(port.m_port_id, 1, &attr); | ||
| if (status != SAI_STATUS_SUCCESS) | ||
| { | ||
| SWSS_LOG_ERROR("Failed to get PFC mask on port %s: %d", port.m_alias.c_str(), status); | ||
| return; | ||
| } | ||
|
|
||
| uint8_t pfcMask = attr.value.u8; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not aligned with asymmetric PFC
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This need to be updated once asym PFC is supported. There was one comment above this part of the code to remind us to update this part once asy is enabled.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, I thought it already was. My bad. |
||
| for (uint8_t i = 0; i < PFC_WD_TC_MAX; i++) | ||
| { | ||
| if ((pfcMask & (1 << i)) == 0) | ||
| { | ||
| continue; | ||
| } | ||
|
|
||
| sai_object_id_t queueId = port.m_queue_ids[i]; | ||
| string queueIdStr = sai_serialize_object_id(queueId); | ||
|
|
||
| auto entry = m_brsEntryMap.emplace(queueId, PfcWdQueueEntry(PfcWdAction::PFC_WD_ACTION_DROP, port.m_port_id, i, port.m_alias)).first; | ||
|
|
||
| if (entry->second.handler== nullptr) | ||
| { | ||
| SWSS_LOG_NOTICE( | ||
| "PFC Watchdog BIG_RED_SWITCH mode enabled on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.", | ||
| entry->second.portAlias.c_str(), | ||
| entry->second.index, | ||
| entry->first, | ||
| entry->second.portId); | ||
|
|
||
| entry->second.handler = make_shared<DropHandler>( | ||
| entry->second.portId, | ||
| entry->first, | ||
| entry->second.index, | ||
| PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable()); | ||
| entry->second.handler->initCounters(); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| template <typename DropHandler, typename ForwardHandler> | ||
| void PfcWdSwOrch<DropHandler, ForwardHandler>::registerInWdDb(const Port& port, | ||
| uint32_t detectionTime, uint32_t restorationTime, PfcWdAction action) | ||
|
|
@@ -355,7 +523,7 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::registerInWdDb(const Port& port, | |
| } | ||
|
|
||
| // Create internal entry | ||
| m_entryMap.emplace(queueId, PfcWdQueueEntry(action, port.m_port_id, i)); | ||
| m_entryMap.emplace(queueId, PfcWdQueueEntry(action, port.m_port_id, i, port.m_alias)); | ||
|
|
||
| string key = getFlexCounterTableKey(queueIdStr); | ||
| m_flexCounterTable->set(key, queueFieldValues); | ||
|
|
@@ -513,10 +681,11 @@ PfcWdSwOrch<DropHandler, ForwardHandler>::~PfcWdSwOrch(void) | |
|
|
||
| template <typename DropHandler, typename ForwardHandler> | ||
| PfcWdSwOrch<DropHandler, ForwardHandler>::PfcWdQueueEntry::PfcWdQueueEntry( | ||
| PfcWdAction action, sai_object_id_t port, uint8_t idx): | ||
| PfcWdAction action, sai_object_id_t port, uint8_t idx, string alias): | ||
| action(action), | ||
| portId(port), | ||
| index(idx) | ||
| index(idx), | ||
| portAlias(alias) | ||
| { | ||
| SWSS_LOG_ENTER(); | ||
| } | ||
|
|
@@ -564,12 +733,24 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer | |
| } | ||
|
|
||
| SWSS_LOG_NOTICE("Receive notification, %s", event.c_str()); | ||
| if (event == "storm") | ||
|
|
||
| if (m_bigRedSwitchFlag) | ||
| { | ||
| SWSS_LOG_NOTICE("Big_RED_SWITCH mode is on, ingore syncd pfc watchdog notification"); | ||
| } | ||
| else if (event == "storm") | ||
| { | ||
| if (entry->second.action == PfcWdAction::PFC_WD_ACTION_ALERT) | ||
| { | ||
| if (entry->second.handler == nullptr) | ||
| { | ||
| SWSS_LOG_NOTICE( | ||
| "PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.", | ||
| entry->second.portAlias.c_str(), | ||
| entry->second.index, | ||
| entry->first, | ||
| entry->second.portId); | ||
|
|
||
| entry->second.handler = make_shared<PfcWdActionHandler>( | ||
| entry->second.portId, | ||
| entry->first, | ||
|
|
@@ -582,6 +763,13 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer | |
| { | ||
| if (entry->second.handler == nullptr) | ||
| { | ||
| SWSS_LOG_NOTICE( | ||
| "PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.", | ||
| entry->second.portAlias.c_str(), | ||
| entry->second.index, | ||
| entry->first, | ||
| entry->second.portId); | ||
|
|
||
| entry->second.handler = make_shared<DropHandler>( | ||
| entry->second.portId, | ||
| entry->first, | ||
|
|
@@ -594,6 +782,13 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer | |
| { | ||
| if (entry->second.handler == nullptr) | ||
| { | ||
| SWSS_LOG_NOTICE( | ||
| "PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.", | ||
| entry->second.portAlias.c_str(), | ||
| entry->second.index, | ||
| entry->first, | ||
| entry->second.portId); | ||
|
|
||
| entry->second.handler = make_shared<ForwardHandler>( | ||
| entry->second.portId, | ||
| entry->first, | ||
|
|
@@ -604,13 +799,20 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer | |
| } | ||
| else | ||
| { | ||
| throw runtime_error("Unknown PFC WD action"); | ||
| SWSS_LOG_ERROR("Unknown PFC WD action"); | ||
| } | ||
| } | ||
| else if (event == "restore") | ||
| { | ||
| if (entry->second.handler != nullptr) | ||
| { | ||
| SWSS_LOG_NOTICE( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why move logs from handler to this orch?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since in big-red-switch mode, we don't want to have log relates to storm detect/restore. Moving log out of handler class provides flexibility for users to input various logs depending on the use case when they create action handler
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
| "PFC Watchdog storm restored on port %s, queue index %d, queue id 0x%lx and port id 0x%lx.", | ||
| entry->second.portAlias.c_str(), | ||
| entry->second.index, | ||
| entry->first, | ||
| entry->second.portId); | ||
|
|
||
| entry->second.handler->commitCounters(); | ||
| entry->second.handler = nullptr; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -78,11 +78,13 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler> | |
| PfcWdQueueEntry( | ||
| PfcWdAction action, | ||
| sai_object_id_t port, | ||
| uint8_t idx); | ||
| uint8_t idx, | ||
| string alias); | ||
|
|
||
| PfcWdAction action = PfcWdAction::PFC_WD_ACTION_UNKNOWN; | ||
| sai_object_id_t portId = SAI_NULL_OBJECT_ID; | ||
| uint8_t index = 0; | ||
| string portAlias; | ||
| shared_ptr<PfcWdActionHandler> handler = { nullptr }; | ||
| }; | ||
|
|
||
|
|
@@ -95,7 +97,13 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler> | |
|
|
||
| string filterPfcCounters(string counters, set<uint8_t>& losslessTc); | ||
| string getFlexCounterTableKey(string s); | ||
|
|
||
| void disableBigRedSwitchMode(); | ||
| void enableBigRedSwitchMode(); | ||
| void setBigRedSwitchMode(string value); | ||
|
|
||
| map<sai_object_id_t, PfcWdQueueEntry> m_entryMap; | ||
| map<sai_object_id_t, PfcWdQueueEntry> m_brsEntryMap; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why separate map for BRS?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After disable big-red-switch, we need to keep the PFC WD original configuration. This information actually is stored in m_entryMap. It would be better if we don't mix this information with brs.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In line 398 in .cpp file entry is removed from the regular map when BRS is enabled, but it is not added back when BRS is disabled.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That’s true. The previous active handlers mark the storm happens when BRS was enabled. After BRS runs for a while, the previous state is out of date so it’s unecessary to recreate those handlers. Instead we just need to recover the PWCWD configuration and let it detect storm again. And the configuration comes from entrymap
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok |
||
|
|
||
| const vector<sai_port_stat_t> c_portStatIds; | ||
| const vector<sai_queue_stat_t> c_queueStatIds; | ||
|
|
@@ -105,6 +113,7 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler> | |
| shared_ptr<ProducerTable> m_flexCounterTable = nullptr; | ||
| shared_ptr<ProducerTable> m_flexCounterGroupTable = nullptr; | ||
|
|
||
| bool m_bigRedSwitchFlag = false; | ||
| int m_pollInterval; | ||
| }; | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think that plugins should know about BRS. Since you chose to keep them running, you might as well ignore notifications from plugins by checking a flag in orch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would prefer to let plugins aware of BRS. Otherwise, syncd will runplugin and keeps sending notification every 100ms. Even though swss can ingore, it still consumes a lot unnecessary resources.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe then it is better to unregister plugins or have some field in DB along with poll interval that will globally turn plugins on/off?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think both method are mostly identical, and the only difference is how to skip plugins. The benefit to achieve this in lua script is it's lightweight and only affect pfcwd, also it don't need modification of syncd and makes syncd logic simpler. If you would prefer to change the logic to syncd, I can add it to backlog and change it later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Have added it to backlog. sonic-net/sonic-sairedis#314
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok