Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion orchagent/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ swssdir = $(datadir)/swss

dist_swss_DATA = \
pfc_detect_mellanox.lua \
pfc_restore_mellanox.lua
pfc_detect_broadcom.lua \
pfc_restore.lua

bin_PROGRAMS = orchagent routeresync

Expand Down
1 change: 1 addition & 0 deletions orchagent/orch.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const char comma = ',';
const char range_specifier = '-';

#define MLNX_PLATFORM_SUBSTRING "mellanox"
#define BRCM_PLATFORM_SUBSTRING "broadcom"

typedef enum
{
Expand Down
45 changes: 44 additions & 1 deletion orchagent/orchdaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,54 @@ bool OrchDaemon::init()
SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES,
};

static const vector<sai_queue_attr_t> queueAttrIds;

m_orchList.push_back(new PfcWdSwOrch<PfcWdZeroBufferHandler, PfcWdLossyHandler>(
m_applDb,
pfc_wd_tables,
portStatIds,
queueStatIds));
queueStatIds,
queueAttrIds));
}
else if (platform == BRCM_PLATFORM_SUBSTRING)
{
static const vector<sai_port_stat_t> portStatIds =
{
SAI_PORT_STAT_PFC_0_RX_PKTS,
SAI_PORT_STAT_PFC_1_RX_PKTS,
SAI_PORT_STAT_PFC_2_RX_PKTS,
SAI_PORT_STAT_PFC_3_RX_PKTS,
SAI_PORT_STAT_PFC_4_RX_PKTS,
SAI_PORT_STAT_PFC_5_RX_PKTS,
SAI_PORT_STAT_PFC_6_RX_PKTS,
SAI_PORT_STAT_PFC_7_RX_PKTS,
SAI_PORT_STAT_PFC_0_ON2OFF_RX_PKTS,
SAI_PORT_STAT_PFC_1_ON2OFF_RX_PKTS,
SAI_PORT_STAT_PFC_2_ON2OFF_RX_PKTS,
SAI_PORT_STAT_PFC_3_ON2OFF_RX_PKTS,
SAI_PORT_STAT_PFC_4_ON2OFF_RX_PKTS,
SAI_PORT_STAT_PFC_5_ON2OFF_RX_PKTS,
SAI_PORT_STAT_PFC_6_ON2OFF_RX_PKTS,
SAI_PORT_STAT_PFC_7_ON2OFF_RX_PKTS,
};

static const vector<sai_queue_stat_t> queueStatIds =
{
SAI_QUEUE_STAT_PACKETS,
SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES,
};

static const vector<sai_queue_attr_t> queueAttrIds =
{
SAI_QUEUE_ATTR_PAUSE_STATUS,
};

m_orchList.push_back(new PfcWdSwOrch<PfcWdActionHandler, PfcWdActionHandler>(
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can reuse forward handler

Copy link
Copy Markdown
Contributor Author

@sihuihan88 sihuihan88 Oct 11, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently both drop and forward uses base class for simplicity. I agree we might reuse forward handler. This PR is for alert mode. So I would prefer to update drop/forward handler together in a separate PR.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

m_applDb,
pfc_wd_tables,
portStatIds,
queueStatIds,
queueAttrIds));
}

return true;
Expand Down
91 changes: 91 additions & 0 deletions orchagent/pfc_detect_broadcom.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
-- KEYS - queue IDs
-- ARGV[1] - counters db index
-- ARGV[2] - counters table name
-- ARGV[3] - poll time interval
-- return queue Ids that satisfy criteria

local counters_db = ARGV[1]
local counters_table_name = ARGV[2]
local poll_time = tonumber(ARGV[3])

local rets = {}

redis.call('SELECT', counters_db)

-- Iterate through each queue
local n = table.getn(KEYS)
for i = n, 1, -1 do
local counter_keys = redis.call('HKEYS', counters_table_name .. ':' .. KEYS[i])
local counter_num = 0
local old_counter_num = 0
local is_deadlock = false
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
if pfc_wd_status == 'operational' or pfc_wd_action == 'alert' then
local detection_time = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME'))
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT')
if not time_left then
time_left = detection_time
else
time_left = tonumber(time_left)
end

local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i])
local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i])
local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS'
local pfc_on2off_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_ON2OFF_RX_PKTS'


-- Get all counters
local occupancy_bytes = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES'))
local packets = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS'))
local pfc_rx_packets = tonumber(redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key))
local pfc_on2off = tonumber(redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_on2off_key))
local queue_pause_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS')

local packets_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last')
local pfc_rx_packets_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
local pfc_on2off_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_on2off_key .. '_last')
local queue_pause_status_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last')

-- DEBUG CODE START. Uncomment to enable
local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM')
-- DEBUG CODE END.

-- If this is not a first run, then we have last values available
if packets_last and pfc_rx_packets_last and pfc_on2off_last and queue_pause_status_last then
packets_last = tonumber(packets_last)
pfc_rx_packets_last = tonumber(pfc_rx_packets_last)
pfc_on2off_last = tonumber(pfc_on2off_last)

-- Check actual condition of queue being in PFC storm
if (occupancy_bytes > 0 and packets - packets_last == 0 and pfc_rx_packets - pfc_rx_packets_last > 0) or
-- DEBUG CODE START. Uncomment to enable
(debug_storm == "enabled") or
-- DEBUG CODE END.
(occupancy_bytes == 0 and pfc_rx_packets - pfc_rx_packets_last > 0 and pfc_on2off - pfc_on2off_last == 0 and queue_pause_status_last == 'true' and queue_pause_status == 'true') then
if time_left <= poll_time then
redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","storm"]')
is_deadlock = true
time_left = detection_time
else
time_left = time_left - poll_time
end
else
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Plugins do not distinguish between actions. Two types of notifications are "storm" and "restore". Orchagent decides what to do based on configuration.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, the detect plugin checks queues in operational status, and restore plugin checks queues in stormed status. This is not true for the alert mode. In alert mode, the detect plugin will check all the queues regardless of the queue status, and restore plugin never check anything. Currently the logic of when/which queue to check is included in the plugin itself. That's the reason why plugin also needs to know the action type to decide which queue to check for alert-mode and normal case. In #342, it still runs restore plugins for stormed queues in alert mode, which is not correct.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the document, definition of alert mode is following:
PFC watchdog logs when the action of dropping packets when pause storm occurs and resume forwarding traffic when pause storm stops. With the alert mode, PFC watchdog only need to log that pause storm detected and pause storm stopped.

So as I understand, alert mode is same as other actions, except it doesn't apply any configuration, but only logs events.

@lguohan can you please take a look? Maybe I'm missing something?

Copy link
Copy Markdown
Contributor Author

@sihuihan88 sihuihan88 Oct 12, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The restore plugin criteria is that the stormed queue receives no more PFC. This criteria actually bases on the fact that we have taken the recovery action. After the recovery action, no more PFC should be received if the queue has recovered. In alert mode however, we don't take any action but log the events. In such case, restore criteria is too strong, as it's OK for the queue to occasionally receive PFC when no recovery action has been taken.

Detection plugin is the criteria to determine whether the queue is stormed when no recovery action has been taken. So in alert mode, we would use detection plugin to determine whether the queue storm is stopped.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","restore"]')
end
time_left = detection_time
end
end

-- Save values for next run
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last', queue_pause_status)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets)
redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left)
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last', pfc_rx_packets)
redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_on2off_key .. '_last', pfc_on2off)
end
end

return rets
6 changes: 5 additions & 1 deletion orchagent/pfc_detect_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ for i = n, 1, -1 do
local old_counter_num = 0
local is_deadlock = false
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
if pfc_wd_status == 'operational' then
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
if pfc_wd_status == 'operational' or pfc_wd_action == 'alert' then
local detection_time = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME'))
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT')
if not time_left then
Expand Down Expand Up @@ -67,6 +68,9 @@ for i = n, 1, -1 do
time_left = time_left - poll_time
end
else
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","restore"]')
end
time_left = detection_time
end
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ for i = n, 1, -1 do
local counter_keys = redis.call('HKEYS', counters_table_name .. ':' .. KEYS[i])
local pfc_rx_pkt_key = ''
local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS')
if pfc_wd_status ~= 'operational' then
local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION')
if pfc_wd_status ~= 'operational' and pfc_wd_action ~= 'alert' then
local restoration_time = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME'))
local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_RESTORATION_TIME_LEFT')
if time_left == nil then
Expand Down
63 changes: 49 additions & 14 deletions orchagent/pfcwdorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,27 @@ PfcWdAction PfcWdOrch<DropHandler, ForwardHandler>::deserializeAction(const stri
return actionMap.at(key);
}

template <typename DropHandler, typename ForwardHandler>
string PfcWdOrch<DropHandler, ForwardHandler>::serializeAction(const PfcWdAction& action)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Plugins don't have to know which action type it is. It's orchagent's responsibility to make such decision.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above comments, detect plugin need to detect stormed queues in alert mode instead of restore plugin. This logic need to be added into lua script based on current implementation.

{
SWSS_LOG_ENTER();

const map<PfcWdAction, string> actionMap =
{
{ PfcWdAction::PFC_WD_ACTION_FORWARD, "forward" },
{ PfcWdAction::PFC_WD_ACTION_DROP, "drop" },
{ PfcWdAction::PFC_WD_ACTION_ALERT, "alert" },
};

if (actionMap.find(action) == actionMap.end())
{
return "unknown";
}

return actionMap.at(action);
}


template <typename DropHandler, typename ForwardHandler>
void PfcWdOrch<DropHandler, ForwardHandler>::createEntry(const string& key,
const vector<FieldValueTuple>& data)
Expand Down Expand Up @@ -270,6 +291,8 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::registerInWdDb(const Port& port,
vector<FieldValueTuple> countersFieldValues;
countersFieldValues.emplace_back("PFC_WD_DETECTION_TIME", to_string(detectionTime * 1000));
countersFieldValues.emplace_back("PFC_WD_RESTORATION_TIME", to_string(restorationTime * 1000));
countersFieldValues.emplace_back("PFC_WD_ACTION", this->serializeAction(action));

PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable()->set(queueIdStr, countersFieldValues);

// We register our queues in PFC_WD table so that syncd will know that it must poll them
Expand All @@ -281,6 +304,12 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::registerInWdDb(const Port& port,
queueFieldValues.emplace_back(PFC_WD_QUEUE_COUNTER_ID_LIST, str);
}

if (!c_queueAttrIds.empty())
{
string str = counterIdsToStr(c_queueAttrIds, sai_serialize_queue_attr);
queueFieldValues.emplace_back(PFC_WD_QUEUE_ATTR_ID_LIST, str);
}

// Create internal entry
m_entryMap.emplace(queueId, PfcWdQueueEntry(action, port.m_port_id, i));

Expand Down Expand Up @@ -313,18 +342,19 @@ PfcWdSwOrch<DropHandler, ForwardHandler>::PfcWdSwOrch(
DBConnector *db,
vector<string> &tableNames,
vector<sai_port_stat_t> portStatIds,
vector<sai_queue_stat_t> queueStatIds):
vector<sai_queue_stat_t> queueStatIds,
vector<sai_queue_attr_t> queueAttrIds):
PfcWdOrch<DropHandler,
ForwardHandler>(db, tableNames),
m_pfcWdDb(new DBConnector(PFC_WD_DB, DBConnector::DEFAULT_UNIXSOCKET, 0)),
m_pfcWdTable(new ProducerStateTable(m_pfcWdDb.get(), PFC_WD_STATE_TABLE)),
c_portStatIds(portStatIds),
c_queueStatIds(queueStatIds)
c_queueStatIds(queueStatIds),
c_queueAttrIds(queueAttrIds)
{
SWSS_LOG_ENTER();

string platform = getenv("platform") ? getenv("platform") : "";

if (platform == "")
{
SWSS_LOG_ERROR("Platform environment variable is not defined");
Expand All @@ -333,7 +363,7 @@ PfcWdSwOrch<DropHandler, ForwardHandler>::PfcWdSwOrch(

string detectSha, restoreSha;
string detectPluginName = "pfc_detect_" + platform + ".lua";
string restorePluginName = "pfc_restore_" + platform + ".lua";
string restorePluginName = "pfc_restore.lua";

try
{
Expand Down Expand Up @@ -428,9 +458,21 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::handleWdNotification(swss::Notifi
return;
}

SWSS_LOG_NOTICE("Receive notification, %s", event.c_str());
if (event == "storm")
{
if (entry->second.action == PfcWdAction::PFC_WD_ACTION_DROP)
if (entry->second.action == PfcWdAction::PFC_WD_ACTION_ALERT)
{
if (entry->second.handler == nullptr)
{
entry->second.handler = make_shared<PfcWdActionHandler>(
entry->second.portId,
entry->first,
entry->second.index,
PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable());
}
}
else if (entry->second.action == PfcWdAction::PFC_WD_ACTION_DROP)
{
entry->second.handler = make_shared<DropHandler>(
entry->second.portId,
Expand All @@ -446,14 +488,6 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::handleWdNotification(swss::Notifi
entry->second.index,
PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable());
}
else if (entry->second.action == PfcWdAction::PFC_WD_ACTION_ALERT)
{
entry->second.handler = make_shared<PfcWdActionHandler>(
entry->second.portId,
entry->first,
entry->second.index,
PfcWdOrch<DropHandler, ForwardHandler>::getCountersTable());
}
else
{
throw runtime_error("Unknown PFC WD action");
Expand All @@ -465,7 +499,7 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::handleWdNotification(swss::Notifi
}
else
{
SWSS_LOG_ERROR("Received unknown event from plugin");
SWSS_LOG_ERROR("Received unknown event from plugin, %s", event.c_str());
}
}

Expand Down Expand Up @@ -551,3 +585,4 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::endWatchdogThread(void)

// Trick to keep member functions in a separate file
template class PfcWdSwOrch<PfcWdZeroBufferHandler, PfcWdLossyHandler>;
template class PfcWdSwOrch<PfcWdActionHandler, PfcWdActionHandler>;
9 changes: 6 additions & 3 deletions orchagent/pfcwdorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ class PfcWdOrch: public Orch
return m_countersDb;
}

private:
static PfcWdAction deserializeAction(const string& key);
void createEntry(const string& key, const vector<FieldValueTuple>& data);
static string serializeAction(const PfcWdAction &action);
private:
void createEntry(const string& key, const vector<FieldValueTuple>& data);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

four spaces

void deleteEntry(const string& name);

shared_ptr<DBConnector> m_countersDb = nullptr;
Expand All @@ -60,7 +61,8 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>
DBConnector *db,
vector<string> &tableNames,
vector<sai_port_stat_t> portStatIds,
vector<sai_queue_stat_t> queueStatIds);
vector<sai_queue_stat_t> queueStatIds,
vector<sai_queue_attr_t> queueAttrIds);
virtual ~PfcWdSwOrch(void);

virtual bool startWdOnPort(const Port& port,
Expand Down Expand Up @@ -97,6 +99,7 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>

const vector<sai_port_stat_t> c_portStatIds;
const vector<sai_queue_stat_t> c_queueStatIds;
const vector<sai_queue_attr_t> c_queueAttrIds;

shared_ptr<DBConnector> m_pfcWdDb = nullptr;
shared_ptr<ProducerStateTable> m_pfcWdTable = nullptr;
Expand Down