diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index fe57ccf1559..998343716b1 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -214,7 +214,7 @@ bool OrchDaemon::init() CFG_PFC_WD_TABLE_NAME }; - if (platform == MLNX_PLATFORM_SUBSTRING) + if ((platform == MLNX_PLATFORM_SUBSTRING) || (platform == BFN_PLATFORM_SUBSTRING)) { static const vector portStatIds = @@ -245,13 +245,26 @@ bool OrchDaemon::init() static const vector queueAttrIds; - m_orchList.push_back(new PfcWdSwOrch( - m_configDb, - pfc_wd_tables, - portStatIds, - queueStatIds, - queueAttrIds, - PFC_WD_POLL_MSECS)); + if (platform == MLNX_PLATFORM_SUBSTRING) + { + m_orchList.push_back(new PfcWdSwOrch( + m_configDb, + pfc_wd_tables, + portStatIds, + queueStatIds, + queueAttrIds, + PFC_WD_POLL_MSECS)); + } + else if (platform == BFN_PLATFORM_SUBSTRING) + { + m_orchList.push_back(new PfcWdSwOrch( + m_configDb, + pfc_wd_tables, + portStatIds, + queueStatIds, + queueAttrIds, + PFC_WD_POLL_MSECS)); + } } else if (platform == BRCM_PLATFORM_SUBSTRING) { diff --git a/orchagent/pfc_detect_barefoot.lua b/orchagent/pfc_detect_barefoot.lua index fd8304362d8..e77137b2879 100644 --- a/orchagent/pfc_detect_barefoot.lua +++ b/orchagent/pfc_detect_barefoot.lua @@ -21,70 +21,81 @@ for i = n, 1, -1 do local is_deadlock = false local pfc_wd_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_STATUS') local pfc_wd_action = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_ACTION') - if pfc_wd_status == 'operational' or pfc_wd_action == 'alert' then - local detection_time = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME')) - local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT') - if not time_left then - time_left = detection_time - else - time_left = tonumber(time_left) - end - local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i]) - local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i]) - local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS' - local pfc_on2off_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_ON2OFF_RX_PKTS' - + local big_red_switch_mode = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'BIG_RED_SWITCH_MODE') + if not big_red_switch_mode and (pfc_wd_status == 'operational' or pfc_wd_action == 'alert') then + local detection_time = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME') + if detection_time then + detection_time = tonumber(detection_time) + local time_left = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT') + if not time_left then + time_left = detection_time + else + time_left = tonumber(time_left) + end - -- Get all counters - local occupancy_bytes = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES')) - local packets = tonumber(redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS')) - local pfc_rx_packets = tonumber(redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key)) - local pfc_on2off = tonumber(redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_on2off_key)) - local queue_pause_status = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS') - - local packets_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last') - local pfc_rx_packets_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last') - local pfc_on2off_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_on2off_key .. '_last') - local queue_pause_status_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last') + local queue_index = redis.call('HGET', 'COUNTERS_QUEUE_INDEX_MAP', KEYS[i]) + local port_id = redis.call('HGET', 'COUNTERS_QUEUE_PORT_MAP', KEYS[i]) + local pfc_rx_pkt_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PKTS' + local pfc_duration_key = 'SAI_PORT_STAT_PFC_' .. queue_index .. '_RX_PAUSE_DURATION' - -- DEBUG CODE START. Uncomment to enable - local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM') - -- DEBUG CODE END. + -- Get all counters + local occupancy_bytes = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_CURR_OCCUPANCY_BYTES') + local packets = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS') + local pfc_rx_packets = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key) + local pfc_duration = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key) - -- If this is not a first run, then we have last values available - if packets_last and pfc_rx_packets_last and pfc_on2off_last and queue_pause_status_last then - packets_last = tonumber(packets_last) - pfc_rx_packets_last = tonumber(pfc_rx_packets_last) - pfc_on2off_last = tonumber(pfc_on2off_last) + if occupancy_bytes and packets and pfc_rx_packets and pfc_duration then + occupancy_bytes = tonumber(occupancy_bytes) + packets = tonumber(packets) + pfc_rx_packets = tonumber(pfc_rx_packets) + pfc_duration = tonumber(pfc_duration) - -- Check actual condition of queue being in PFC storm - if (occupancy_bytes > 0 and packets - packets_last == 0 and pfc_rx_packets - pfc_rx_packets_last > 0) or + local packets_last = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last') + local pfc_rx_packets_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last') + local pfc_duration_last = redis.call('HGET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last') -- DEBUG CODE START. Uncomment to enable - (debug_storm == "enabled") or + local debug_storm = redis.call('HGET', counters_table_name .. ':' .. KEYS[i], 'DEBUG_STORM') -- DEBUG CODE END. - (occupancy_bytes == 0 and pfc_rx_packets - pfc_rx_packets_last > 0 and pfc_on2off - pfc_on2off_last == 0 and queue_pause_status_last == 'true' and queue_pause_status == 'true') then - if time_left <= poll_time then - redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]') - is_deadlock = true - time_left = detection_time - else - time_left = time_left - poll_time + + -- If this is not a first run, then we have last values available + if packets_last and pfc_rx_packets_last and pfc_duration_last then + packets_last = tonumber(packets_last) + pfc_rx_packets_last = tonumber(pfc_rx_packets_last) + pfc_duration_last = tonumber(pfc_duration_last) + + -- Check actual condition of queue being in PFC storm + if (occupancy_bytes > 0 and packets - packets_last == 0 and pfc_rx_packets - pfc_rx_packets_last > 0) or + -- DEBUG CODE START. Uncomment to enable + (debug_storm == "enabled") or + -- DEBUG CODE END. + (occupancy_bytes == 0 and packets - packets_last == 0 and (pfc_duration - pfc_duration_last) > poll_time * 0.8) then + if time_left <= poll_time then + redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last') + redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last') + redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]') + is_deadlock = true + time_left = detection_time + else + time_left = time_left - poll_time + end + else + if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then + redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]') + end + time_left = detection_time + end end - else - if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then - redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]') + + -- Save values for next run + redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets) + redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left) + if is_deadlock == false then + redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last', pfc_rx_packets) + redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last', pfc_duration) end - time_left = detection_time end end - - -- Save values for next run - redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_ATTR_PAUSE_STATUS_last', queue_pause_status) - redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'SAI_QUEUE_STAT_PACKETS_last', packets) - redis.call('HSET', counters_table_name .. ':' .. KEYS[i], 'PFC_WD_DETECTION_TIME_LEFT', time_left) - redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last', pfc_rx_packets) - redis.call('HSET', counters_table_name .. ':' .. port_id, pfc_on2off_key .. '_last', pfc_on2off) end end