From 51660d5ab6c58c91d3b7171382fa2fca3263d102 Mon Sep 17 00:00:00 2001 From: bingwang Date: Thu, 5 Sep 2024 02:08:57 +0000 Subject: [PATCH 1/2] Stabilize PFC watchdog test --- tests/common/helpers/pfc_gen.py | 13 ------------- tests/pfcwd/files/pfcwd_helper.py | 17 +++++++++++++++++ tests/pfcwd/test_pfcwd_function.py | 11 +++++++++-- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/tests/common/helpers/pfc_gen.py b/tests/common/helpers/pfc_gen.py index dfbda38d338..de87f7305c7 100755 --- a/tests/common/helpers/pfc_gen.py +++ b/tests/common/helpers/pfc_gen.py @@ -17,15 +17,9 @@ logger = logging.getLogger('MyLogger') logger.setLevel(logging.DEBUG) -# Minimum number of processes to be created -MIN_PROCESS_NUM = 2 - # Maximum number of processes to be created MAX_PROCESS_NUM = 4 -# Minimum number of packets for enabling multiple processes -MIN_PACKET_NUM_MP = 10000 - class PacketSender(): """ @@ -180,13 +174,6 @@ def main(): pre_str = 'GLOBAL_PF' if options.global_pf else 'PFC' logger.debug(pre_str + '_STORM_START') - # Send PFC pause with multiple processes even if only one interface is provided - # if packet number is smaller than the threshold, then it's not necessary to use multiple processes - if options.num >= MIN_PACKET_NUM_MP: - while len(interfaces) < MIN_PROCESS_NUM: - interfaces.extend(interfaces) - options.num /= 2 - # Start sending PFC pause frames senders = [] interface_slices = [[] for i in range(MAX_PROCESS_NUM)] diff --git a/tests/pfcwd/files/pfcwd_helper.py b/tests/pfcwd/files/pfcwd_helper.py index 8f6a57b04b5..687de832836 100644 --- a/tests/pfcwd/files/pfcwd_helper.py +++ b/tests/pfcwd/files/pfcwd_helper.py @@ -5,6 +5,7 @@ import pytest import contextlib import time +import logging from tests.ptf_runner import ptf_runner from tests.common import constants @@ -24,6 +25,8 @@ EXPECT_PFC_WD_RESTORE_RE = ".*storm restored.*" +logger = logging.getLogger(__name__) + class TrafficPorts(object): """ Generate a list of ports needed for the PFC Watchdog test""" @@ -562,3 +565,17 @@ def has_neighbor_device(setup_pfc_test): (not details.get('rx_port_id') or None in details['rx_port_id']): return False # neighbor devices are not present return True + + +def check_pfc_storm_state(dut, port, queue): + """ + Helper function to check if PFC storm is detected/restored on a given queue + """ + pfcwd_stats = dut.show_and_parse("show pfcwd stats") + queue_name = str(port) + ":" + str(queue) + for entry in pfcwd_stats: + if entry["queue"] == queue_name: + logger.info("PFCWD status on queue {} stats: {}".format(queue_name, entry)) + return entry['storm detected/restored'] + logger.info("PFCWD not triggered on queue {}".format(queue_name)) + return None diff --git a/tests/pfcwd/test_pfcwd_function.py b/tests/pfcwd/test_pfcwd_function.py index 97d3d4b22c3..3e49851fd3b 100644 --- a/tests/pfcwd/test_pfcwd_function.py +++ b/tests/pfcwd/test_pfcwd_function.py @@ -18,7 +18,8 @@ from tests.common import constants from tests.common.dualtor.dual_tor_utils import is_tunnel_qos_remap_enabled, dualtor_ports # noqa F401 from tests.common.dualtor.mux_simulator_control import toggle_all_simulator_ports_to_enum_rand_one_per_hwsku_frontend_host_m # noqa F401, E501 -from .files.pfcwd_helper import send_background_traffic +from .files.pfcwd_helper import send_background_traffic, check_pfc_storm_state +from tests.common.utilities import wait_until PTF_PORT_MAPPING_MODE = 'use_orig_interface' @@ -717,6 +718,9 @@ def storm_detect_path(self, dut, port, action): test_ports_info = {self.pfc_wd['rx_port'][0]: self.pfc_wd} queues = [self.storm_hndle.pfc_queue_idx] + PFC_STORM_TIMEOUT = 30 + pfcwd_stats_before_test = check_pfc_storm_state(dut, port, self.storm_hndle.pfc_queue_idx) + with send_background_traffic(dut, self.ptf, queues, selected_test_ports, test_ports_info): if action != "dontcare": start_wd_on_ports(dut, port, restore_time, detect_time, action) @@ -733,7 +737,10 @@ def storm_detect_path(self, dut, port, action): if self.pfc_wd['fake_storm']: PfcCmd.set_storm_status(dut, self.queue_oid, "enabled") - time.sleep(5) + # Wait until PFC storm state changes + pytest_assert(wait_until(PFC_STORM_TIMEOUT, 2, 0, + lambda: check_pfc_storm_state(dut, port, self.storm_hndle.pfc_queue_idx) != pfcwd_stats_before_test), # noqa: E501 + "PFC storm state did not change as expected") # noqa: E127 # storm detect logger.info("Verify if PFC storm is detected on port {}".format(port)) From 7e8ae4142b8b7ad78bccc1db5bc4de45596dc73b Mon Sep 17 00:00:00 2001 From: bingwang Date: Thu, 5 Sep 2024 19:04:25 +0000 Subject: [PATCH 2/2] Limit the change to Mellanox --- tests/pfcwd/test_pfcwd_function.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tests/pfcwd/test_pfcwd_function.py b/tests/pfcwd/test_pfcwd_function.py index 3e49851fd3b..4f77a8f4121 100644 --- a/tests/pfcwd/test_pfcwd_function.py +++ b/tests/pfcwd/test_pfcwd_function.py @@ -718,8 +718,9 @@ def storm_detect_path(self, dut, port, action): test_ports_info = {self.pfc_wd['rx_port'][0]: self.pfc_wd} queues = [self.storm_hndle.pfc_queue_idx] - PFC_STORM_TIMEOUT = 30 - pfcwd_stats_before_test = check_pfc_storm_state(dut, port, self.storm_hndle.pfc_queue_idx) + if dut.facts['asic_type'] == "mellanox": + PFC_STORM_TIMEOUT = 30 + pfcwd_stats_before_test = check_pfc_storm_state(dut, port, self.storm_hndle.pfc_queue_idx) with send_background_traffic(dut, self.ptf, queues, selected_test_ports, test_ports_info): if action != "dontcare": @@ -737,10 +738,14 @@ def storm_detect_path(self, dut, port, action): if self.pfc_wd['fake_storm']: PfcCmd.set_storm_status(dut, self.queue_oid, "enabled") - # Wait until PFC storm state changes - pytest_assert(wait_until(PFC_STORM_TIMEOUT, 2, 0, - lambda: check_pfc_storm_state(dut, port, self.storm_hndle.pfc_queue_idx) != pfcwd_stats_before_test), # noqa: E501 - "PFC storm state did not change as expected") # noqa: E127 + if dut.facts['asic_type'] == "mellanox": + # On Mellanox platform, more time is required for PFC storm being triggered + # as PFC pause sent from Non-Mellanox leaf fanout is not continuous sometimes. + pytest_assert(wait_until(PFC_STORM_TIMEOUT, 2, 0, + lambda: check_pfc_storm_state(dut, port, self.storm_hndle.pfc_queue_idx) != pfcwd_stats_before_test), # noqa: E501, E128 + "PFC storm state did not change as expected") # noqa: E127 + else: + time.sleep(5) # storm detect logger.info("Verify if PFC storm is detected on port {}".format(port))