diff --git a/tests/common/plugins/conditional_mark/tests_mark_conditions.yaml b/tests/common/plugins/conditional_mark/tests_mark_conditions.yaml index bd9b5c1c955..151b6471a7a 100644 --- a/tests/common/plugins/conditional_mark/tests_mark_conditions.yaml +++ b/tests/common/plugins/conditional_mark/tests_mark_conditions.yaml @@ -2121,6 +2121,12 @@ qos/test_tunnel_qos_remap.py::test_pfc_watermark_extra_lossless_standby: - "asic_type in ['broadcom']" - https://github.com/sonic-net/sonic-mgmt/issues/11271 +qos/test_voq_watchdog.py: + skip: + reason: "These tests only apply to cisco 8000 platforms." + conditions: + - "asic_type not in ['cisco-8000']" + ####################################### ##### radv ##### ####################################### diff --git a/tests/qos/qos_sai_base.py b/tests/qos/qos_sai_base.py index 411e498787f..f7f5afefc0b 100644 --- a/tests/qos/qos_sai_base.py +++ b/tests/qos/qos_sai_base.py @@ -11,6 +11,7 @@ import copy import time import collections +from contextlib import contextmanager from tests.common.fixtures.ptfhost_utils import ptf_portmap_file # noqa F401 from tests.common.helpers.assertions import pytest_assert, pytest_require @@ -2809,7 +2810,18 @@ def set_cir_change(self, get_src_dst_asic_and_duts, dutConfig): yield return - @pytest.fixture(scope='class', autouse=True) + def voq_watchdog_enabled(self, get_src_dst_asic_and_duts): + dst_dut = get_src_dst_asic_and_duts['dst_dut'] + if dst_dut.facts['asic_type'] != "cisco-8000": + return False + namespace_option = "-n asic0" if dst_dut.facts.get("modular_chassis") else "" + show_command = "show platform npu global {}".format(namespace_option) + result = dst_dut.command(show_command) + pattern = r"voq_watchdog_enabled +: +True" + match = re.search(pattern, result["stdout"]) + return match + + @contextmanager def disable_voq_watchdog(self, duthosts, get_src_dst_asic_and_duts, dutConfig): dst_dut = get_src_dst_asic_and_duts['dst_dut'] dst_asic = get_src_dst_asic_and_duts['dst_asic'] @@ -2827,7 +2839,9 @@ def disable_voq_watchdog(self, duthosts, get_src_dst_asic_and_duts, dutConfig): dut_list.append(rp_dut) asic_index_list.append(asic.asic_index) - if dst_dut.facts['asic_type'] != "cisco-8000" or not dst_dut.sonichost.is_multi_asic: + # Skip if voq watchdog is not enabled. + if not self.voq_watchdog_enabled(get_src_dst_asic_and_duts): + logger.info("voq_watchdog is not enabled, skipping disable voq watchdog") yield return @@ -2855,4 +2869,12 @@ def disable_voq_watchdog(self, duthosts, get_src_dst_asic_and_duts, dutConfig): cmd_opt = "" dut.shell("sudo show platform npu script {} -s set_voq_watchdog.py".format(cmd_opt)) - return + @pytest.fixture(scope='function') + def disable_voq_watchdog_function_scope(self, duthosts, get_src_dst_asic_and_duts, dutConfig): + with self.disable_voq_watchdog(duthosts, get_src_dst_asic_and_duts, dutConfig) as result: + yield result + + @pytest.fixture(scope='class') + def disable_voq_watchdog_class_scope(self, duthosts, get_src_dst_asic_and_duts, dutConfig): + with self.disable_voq_watchdog(duthosts, get_src_dst_asic_and_duts, dutConfig) as result: + yield result diff --git a/tests/qos/test_qos_sai.py b/tests/qos/test_qos_sai.py index 7e01f55f80d..01b9124860a 100644 --- a/tests/qos/test_qos_sai.py +++ b/tests/qos/test_qos_sai.py @@ -201,6 +201,10 @@ class TestQosSai(QosSaiBase): 'Arista-7050CX3-32S-D48C8' ] + @pytest.fixture(scope="class", autouse=True) + def setup(self, disable_voq_watchdog_class_scope): + return + @pytest.fixture(scope='function') def change_port_speed( self, request, ptfhost, duthosts, dutTestParams, fanouthosts, dutConfig, tbinfo, diff --git a/tests/qos/test_voq_watchdog.py b/tests/qos/test_voq_watchdog.py new file mode 100644 index 00000000000..1a18231b997 --- /dev/null +++ b/tests/qos/test_voq_watchdog.py @@ -0,0 +1,131 @@ +"""SAI thrift-based tests for the VOQ watchdog feature in SONiC. + +This set of test cases verifies VOQ watchdog behavior. These are dataplane +tests that depend on the SAI thrift library in order to pause ports and read +drop counters. + +Parameters: + --ptf_portmap (str): file name of port index to DUT interface alias map. Default is None. + In case a filename is not provided, a file containing a port indices to aliases map will be generated. + + --qos_swap_syncd (bool): Used to install the RPC syncd image before running the tests. Default is True. + + --qos_dst_ports (list) Indices of available DUT test ports to serve as destination ports. Note: This is not port + index on DUT, rather an index into filtered (excludes lag member ports) DUT ports. Plan is to randomize port + selection. Default is [0, 1, 3]. + + --qos_src_ports (list) Indices of available DUT test ports to serve as source port. Similar note as in + qos_dst_ports applies. Default is [2]. +""" + +import logging +import pytest + +from tests.common.fixtures.duthost_utils import dut_qos_maps, \ + separated_dscp_to_tc_map_on_uplink # noqa F401 +from tests.common.fixtures.ptfhost_utils import copy_ptftests_directory # noqa F401 +from tests.common.fixtures.ptfhost_utils import copy_saitests_directory # noqa F401 +from tests.common.fixtures.ptfhost_utils import change_mac_addresses # noqa F401 +from .qos_sai_base import QosSaiBase + +logger = logging.getLogger(__name__) + +pytestmark = [ + pytest.mark.topology('any') +] + +PKTS_NUM = 100 + + +@pytest.fixture(scope="function") +def ignore_log_voq_watchdog(duthosts, loganalyzer): + if not loganalyzer: + yield + return + ignore_list = [r".*HARDWARE_WATCHDOG.*", r".*soft_reset*", r".*VOQ Appears to be stuck*"] + for dut in duthosts: + for line in ignore_list: + loganalyzer[dut.hostname].ignore_regex.append(line) + yield + return + + +class TestVoqWatchdog(QosSaiBase): + """TestVoqWatchdog derives from QosSaiBase and contains collection of VOQ watchdog test cases. + """ + @pytest.fixture(scope="class", autouse=True) + def check_skip_voq_watchdog_test(self, get_src_dst_asic_and_duts): + if not self.voq_watchdog_enabled(get_src_dst_asic_and_duts): + pytest.skip("Voq watchdog test is skipped since voq watchdog is not enabled.") + + def testVoqWatchdog( + self, ptfhost, dutTestParams, dutConfig, dutQosConfig, + get_src_dst_asic_and_duts, ignore_log_voq_watchdog + ): + """ + Test VOQ watchdog + Args: + ptfhost (AnsibleHost): Packet Test Framework (PTF) + dutTestParams (Fixture, dict): DUT host test params + dutConfig (Fixture, dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs, + and test ports + dutQosConfig (Fixture, dict): Map containing DUT host QoS configuration + Returns: + None + Raises: + RunAnsibleModuleFail if ptf test fails + """ + + testParams = dict() + testParams.update(dutTestParams["basicParams"]) + testParams.update({ + "dscp": 8, + "dst_port_id": dutConfig["testPorts"]["dst_port_id"], + "dst_port_ip": dutConfig["testPorts"]["dst_port_ip"], + "src_port_id": dutConfig["testPorts"]["src_port_id"], + "src_port_ip": dutConfig["testPorts"]["src_port_ip"], + "src_port_vlan": dutConfig["testPorts"]["src_port_vlan"], + "packet_size": 1350, + "pkts_num": PKTS_NUM, + "voq_watchdog_enabled": True, + }) + + self.runPtfTest( + ptfhost, testCase="sai_qos_tests.VoqWatchdogTest", + testParams=testParams) + + def testVoqWatchdogDisable( + self, ptfhost, dutTestParams, dutConfig, dutQosConfig, + get_src_dst_asic_and_duts, disable_voq_watchdog_function_scope + ): + """ + Test VOQ watchdog + Args: + ptfhost (AnsibleHost): Packet Test Framework (PTF) + dutTestParams (Fixture, dict): DUT host test params + dutConfig (Fixture, dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs, + and test ports + dutQosConfig (Fixture, dict): Map containing DUT host QoS configuration + Returns: + None + Raises: + RunAnsibleModuleFail if ptf test fails + """ + + testParams = dict() + testParams.update(dutTestParams["basicParams"]) + testParams.update({ + "dscp": 8, + "dst_port_id": dutConfig["testPorts"]["dst_port_id"], + "dst_port_ip": dutConfig["testPorts"]["dst_port_ip"], + "src_port_id": dutConfig["testPorts"]["src_port_id"], + "src_port_ip": dutConfig["testPorts"]["src_port_ip"], + "src_port_vlan": dutConfig["testPorts"]["src_port_vlan"], + "packet_size": 1350, + "pkts_num": PKTS_NUM, + "voq_watchdog_enabled": False, + }) + + self.runPtfTest( + ptfhost, testCase="sai_qos_tests.VoqWatchdogTest", + testParams=testParams) diff --git a/tests/saitests/py3/sai_qos_tests.py b/tests/saitests/py3/sai_qos_tests.py index 1b38a3a8784..5cd749caf7e 100755 --- a/tests/saitests/py3/sai_qos_tests.py +++ b/tests/saitests/py3/sai_qos_tests.py @@ -125,6 +125,13 @@ DEFAULT_PKT_COUNT = 10 PG_TOLERANCE = 2 +# Constants for voq watchdog test +VOQ_WATCHDOG_TIMEOUT_SECONDS = 60 +SAI_LOG_TO_CHECK = ["HARDWARE_WATCHDOG", "soft_reset"] +SDK_LOG_TO_CHECK = ["VOQ Appears to be stuck"] +SAI_LOG = "/var/log/sai.log" +SDK_LOG = "/var/log/syslog" + def log_message(message, level='info', to_stderr=False): if to_stderr: @@ -6622,3 +6629,156 @@ def runTest(self): finally: self.sai_thrift_port_tx_enable(self.dst_client, asic_type, uniq_dst_ports) + + +class VoqWatchdogTest(sai_base_test.ThriftInterfaceDataPlane): + def init_log_check(self): + pre_offsets = [] + for logfile in [SAI_LOG, SDK_LOG]: + offset_cmd = "stat -c %s {}".format(logfile) + stdout, err, ret = self.exec_cmd_on_dut( + self.dst_server_ip, + self.test_params['dut_username'], + self.test_params['dut_password'], + offset_cmd) + pre_offsets.append(int(stdout[0])) + return pre_offsets + + def verify_log(self, pre_offsets, voq_watchdog_enabled=True): + found_list = [] + for pre_offset, logfile, str_to_check in zip(pre_offsets, [SAI_LOG, SDK_LOG], + [SAI_LOG_TO_CHECK, SDK_LOG_TO_CHECK]): + egrep_str = '|'.join(str_to_check) + check_cmd = "sudo tail -c +{} {} | egrep '{}' || true".format(pre_offset + 1, logfile, egrep_str) + stdout, err, ret = self.exec_cmd_on_dut( + self.dst_server_ip, + self.test_params['dut_username'], + self.test_params['dut_password'], + check_cmd) + log_message("Log for {}: {}".format(egrep_str, stdout)) + for string in str_to_check: + if string in "".join(stdout): + found_list.append(True) + else: + found_list.append(False) + if voq_watchdog_enabled: + qos_test_assert(self, all(found is True for found in found_list), + "VOQ watchdog trigger not detected") + else: + qos_test_assert(self, all(found is False for found in found_list), + "unexpected VOQ watchdog trigger") + + def runTest(self): + switch_init(self.clients) + + # Parse input parameters + dscp = int(self.test_params['dscp']) + router_mac = self.test_params['router_mac'] + sonic_version = self.test_params['sonic_version'] + dst_port_id = int(self.test_params['dst_port_id']) + dst_port_ip = self.test_params['dst_port_ip'] + dst_port_mac = self.dataplane.get_mac(0, dst_port_id) + src_port_id = int(self.test_params['src_port_id']) + src_port_ip = self.test_params['src_port_ip'] + src_port_vlan = self.test_params['src_port_vlan'] + src_port_mac = self.dataplane.get_mac(0, src_port_id) + voq_watchdog_enabled = self.test_params['voq_watchdog_enabled'] + asic_type = self.test_params['sonic_asic_type'] + pkts_num = int(self.test_params['pkts_num']) + + pkt_dst_mac = router_mac if router_mac != '' else dst_port_mac + # get counter names to query + ingress_counters, egress_counters = get_counter_names(sonic_version) + + # Prepare IP packet data + ttl = 64 + if 'packet_size' in list(self.test_params.keys()): + packet_length = int(self.test_params['packet_size']) + else: + packet_length = 64 + + is_dualtor = self.test_params.get('is_dualtor', False) + def_vlan_mac = self.test_params.get('def_vlan_mac', None) + if is_dualtor and def_vlan_mac is not None: + pkt_dst_mac = def_vlan_mac + + pkt = construct_ip_pkt(packet_length, + pkt_dst_mac, + src_port_mac, + src_port_ip, + dst_port_ip, + dscp, + src_port_vlan, + ttl=ttl) + + log_message("test dst_port_id: {}, src_port_id: {}, src_vlan: {}".format( + dst_port_id, src_port_id, src_port_vlan), to_stderr=True) + # in case dst_port_id is part of LAG, find out the actual dst port + # for given IP parameters + dst_port_id = get_rx_port( + self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, src_port_vlan + ) + log_message("actual dst_port_id: {}".format(dst_port_id), to_stderr=True) + + self.sai_thrift_port_tx_disable(self.dst_client, asic_type, [dst_port_id]) + pre_offsets = self.init_log_check() + + try: + # send packets + send_packet(self, src_port_id, pkt, pkts_num) + + # allow enough time to trigger voq watchdog + time.sleep(VOQ_WATCHDOG_TIMEOUT_SECONDS * 1.3) + + # verify voq watchdog is triggered + self.verify_log(pre_offsets, voq_watchdog_enabled) + + self.sai_thrift_port_tx_enable(self.dst_client, asic_type, [dst_port_id]) + + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + # get a snapshot of counter values at recv and transmit ports + recv_counters_base, _ = sai_thrift_read_port_counters( + self.src_client, asic_type, port_list['src'][src_port_id]) + xmit_counters_base, queue_counters_base = sai_thrift_read_port_counters( + self.dst_client, asic_type, port_list['dst'][dst_port_id]) + if voq_watchdog_enabled: + # queue counters should be cleared after soft reset + qos_test_assert( + self, queue_counters_base[0] == 0, + 'queue counters are not cleared, soft reset is not triggered') + + # send packets + send_packet(self, src_port_id, pkt, pkts_num) + # allow enough time for the dut to sync up the counter values in counters_db + time.sleep(8) + + # get a snapshot of counter values at recv and transmit ports + recv_counters, _ = sai_thrift_read_port_counters( + self.src_client, asic_type, port_list['src'][src_port_id]) + xmit_counters, queue_counters = sai_thrift_read_port_counters( + self.dst_client, asic_type, port_list['dst'][dst_port_id]) + log_message( + '\trecv_counters {}\n\trecv_counters_base {}\n\t' + 'xmit_counters {}\n\txmit_counters_base {}\n\t' + 'queue_counters {}\n\tqueue_counters_base {}\n'.format( + recv_counters, recv_counters_base, xmit_counters, xmit_counters_base, + queue_counters, queue_counters_base), to_stderr=True) + # recv port no ingress drop + for cntr in ingress_counters: + qos_test_assert( + self, recv_counters[cntr] == recv_counters_base[cntr], + 'unexpectedly RX drop counter increase') + # xmit port no egress drop + for cntr in egress_counters: + qos_test_assert( + self, xmit_counters[cntr] == xmit_counters_base[cntr], + 'unexpectedly TX drop counter increase') + # queue counters increased by pkts_num + qos_test_assert( + self, queue_counters[0] == queue_counters_base[0] + pkts_num, + 'queue counter not matched, expected {}, got {}'.format( + queue_counters_base[0] + pkts_num, queue_counters[0])) + + finally: + self.sai_thrift_port_tx_enable(self.dst_client, asic_type, [dst_port_id])