Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2121,6 +2121,12 @@ qos/test_tunnel_qos_remap.py::test_pfc_watermark_extra_lossless_standby:
- "asic_type in ['broadcom']"
- https://github.com/sonic-net/sonic-mgmt/issues/11271

qos/test_voq_watchdog.py:
skip:
reason: "These tests only apply to cisco 8000 platforms."
conditions:
- "asic_type not in ['cisco-8000']"

#######################################
##### radv #####
#######################################
Expand Down
28 changes: 25 additions & 3 deletions tests/qos/qos_sai_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import copy
import time
import collections
from contextlib import contextmanager

from tests.common.fixtures.ptfhost_utils import ptf_portmap_file # noqa F401
from tests.common.helpers.assertions import pytest_assert, pytest_require
Expand Down Expand Up @@ -2809,7 +2810,18 @@ def set_cir_change(self, get_src_dst_asic_and_duts, dutConfig):
yield
return

@pytest.fixture(scope='class', autouse=True)
def voq_watchdog_enabled(self, get_src_dst_asic_and_duts):
dst_dut = get_src_dst_asic_and_duts['dst_dut']
if dst_dut.facts['asic_type'] != "cisco-8000":
return False
namespace_option = "-n asic0" if dst_dut.facts.get("modular_chassis") else ""
show_command = "show platform npu global {}".format(namespace_option)
result = dst_dut.command(show_command)
pattern = r"voq_watchdog_enabled +: +True"
match = re.search(pattern, result["stdout"])
return match

@contextmanager
def disable_voq_watchdog(self, duthosts, get_src_dst_asic_and_duts, dutConfig):
dst_dut = get_src_dst_asic_and_duts['dst_dut']
dst_asic = get_src_dst_asic_and_duts['dst_asic']
Expand All @@ -2827,7 +2839,9 @@ def disable_voq_watchdog(self, duthosts, get_src_dst_asic_and_duts, dutConfig):
dut_list.append(rp_dut)
asic_index_list.append(asic.asic_index)

if dst_dut.facts['asic_type'] != "cisco-8000" or not dst_dut.sonichost.is_multi_asic:
# Skip if voq watchdog is not enabled.
if not self.voq_watchdog_enabled(get_src_dst_asic_and_duts):
logger.info("voq_watchdog is not enabled, skipping disable voq watchdog")
yield
return

Expand Down Expand Up @@ -2855,4 +2869,12 @@ def disable_voq_watchdog(self, duthosts, get_src_dst_asic_and_duts, dutConfig):
cmd_opt = ""
dut.shell("sudo show platform npu script {} -s set_voq_watchdog.py".format(cmd_opt))

return
@pytest.fixture(scope='function')
def disable_voq_watchdog_function_scope(self, duthosts, get_src_dst_asic_and_duts, dutConfig):
with self.disable_voq_watchdog(duthosts, get_src_dst_asic_and_duts, dutConfig) as result:
yield result

@pytest.fixture(scope='class')
def disable_voq_watchdog_class_scope(self, duthosts, get_src_dst_asic_and_duts, dutConfig):
with self.disable_voq_watchdog(duthosts, get_src_dst_asic_and_duts, dutConfig) as result:
yield result
4 changes: 4 additions & 0 deletions tests/qos/test_qos_sai.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ class TestQosSai(QosSaiBase):
'Arista-7050CX3-32S-D48C8'
]

@pytest.fixture(scope="class", autouse=True)
def setup(self, disable_voq_watchdog_class_scope):
return

@pytest.fixture(scope='function')
def change_port_speed(
self, request, ptfhost, duthosts, dutTestParams, fanouthosts, dutConfig, tbinfo,
Expand Down
131 changes: 131 additions & 0 deletions tests/qos/test_voq_watchdog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
"""SAI thrift-based tests for the VOQ watchdog feature in SONiC.

This set of test cases verifies VOQ watchdog behavior. These are dataplane
tests that depend on the SAI thrift library in order to pause ports and read
drop counters.

Parameters:
--ptf_portmap <filename> (str): file name of port index to DUT interface alias map. Default is None.
In case a filename is not provided, a file containing a port indices to aliases map will be generated.

--qos_swap_syncd (bool): Used to install the RPC syncd image before running the tests. Default is True.

--qos_dst_ports (list) Indices of available DUT test ports to serve as destination ports. Note: This is not port
index on DUT, rather an index into filtered (excludes lag member ports) DUT ports. Plan is to randomize port
selection. Default is [0, 1, 3].

--qos_src_ports (list) Indices of available DUT test ports to serve as source port. Similar note as in
qos_dst_ports applies. Default is [2].
"""

import logging
import pytest

from tests.common.fixtures.duthost_utils import dut_qos_maps, \
separated_dscp_to_tc_map_on_uplink # noqa F401
from tests.common.fixtures.ptfhost_utils import copy_ptftests_directory # noqa F401
from tests.common.fixtures.ptfhost_utils import copy_saitests_directory # noqa F401
from tests.common.fixtures.ptfhost_utils import change_mac_addresses # noqa F401
from .qos_sai_base import QosSaiBase

logger = logging.getLogger(__name__)

pytestmark = [
pytest.mark.topology('any')
]

PKTS_NUM = 100


@pytest.fixture(scope="function")
def ignore_log_voq_watchdog(duthosts, loganalyzer):
if not loganalyzer:
yield
return
ignore_list = [r".*HARDWARE_WATCHDOG.*", r".*soft_reset*", r".*VOQ Appears to be stuck*"]
for dut in duthosts:
for line in ignore_list:
loganalyzer[dut.hostname].ignore_regex.append(line)
yield
return


class TestVoqWatchdog(QosSaiBase):
"""TestVoqWatchdog derives from QosSaiBase and contains collection of VOQ watchdog test cases.
"""
@pytest.fixture(scope="class", autouse=True)
def check_skip_voq_watchdog_test(self, get_src_dst_asic_and_duts):
if not self.voq_watchdog_enabled(get_src_dst_asic_and_duts):
pytest.skip("Voq watchdog test is skipped since voq watchdog is not enabled.")

def testVoqWatchdog(
self, ptfhost, dutTestParams, dutConfig, dutQosConfig,
get_src_dst_asic_and_duts, ignore_log_voq_watchdog
):
"""
Test VOQ watchdog
Args:
ptfhost (AnsibleHost): Packet Test Framework (PTF)
dutTestParams (Fixture, dict): DUT host test params
dutConfig (Fixture, dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs,
and test ports
dutQosConfig (Fixture, dict): Map containing DUT host QoS configuration
Returns:
None
Raises:
RunAnsibleModuleFail if ptf test fails
"""

testParams = dict()
testParams.update(dutTestParams["basicParams"])
testParams.update({
"dscp": 8,
"dst_port_id": dutConfig["testPorts"]["dst_port_id"],
"dst_port_ip": dutConfig["testPorts"]["dst_port_ip"],
"src_port_id": dutConfig["testPorts"]["src_port_id"],
"src_port_ip": dutConfig["testPorts"]["src_port_ip"],
"src_port_vlan": dutConfig["testPorts"]["src_port_vlan"],
"packet_size": 1350,
"pkts_num": PKTS_NUM,
"voq_watchdog_enabled": True,
})

self.runPtfTest(
ptfhost, testCase="sai_qos_tests.VoqWatchdogTest",
testParams=testParams)

def testVoqWatchdogDisable(
self, ptfhost, dutTestParams, dutConfig, dutQosConfig,
get_src_dst_asic_and_duts, disable_voq_watchdog_function_scope
):
"""
Test VOQ watchdog
Args:
ptfhost (AnsibleHost): Packet Test Framework (PTF)
dutTestParams (Fixture, dict): DUT host test params
dutConfig (Fixture, dict): Map of DUT config containing dut interfaces, test port IDs, test port IPs,
and test ports
dutQosConfig (Fixture, dict): Map containing DUT host QoS configuration
Returns:
None
Raises:
RunAnsibleModuleFail if ptf test fails
"""

testParams = dict()
testParams.update(dutTestParams["basicParams"])
testParams.update({
"dscp": 8,
"dst_port_id": dutConfig["testPorts"]["dst_port_id"],
"dst_port_ip": dutConfig["testPorts"]["dst_port_ip"],
"src_port_id": dutConfig["testPorts"]["src_port_id"],
"src_port_ip": dutConfig["testPorts"]["src_port_ip"],
"src_port_vlan": dutConfig["testPorts"]["src_port_vlan"],
"packet_size": 1350,
"pkts_num": PKTS_NUM,
"voq_watchdog_enabled": False,
})

self.runPtfTest(
ptfhost, testCase="sai_qos_tests.VoqWatchdogTest",
testParams=testParams)
160 changes: 160 additions & 0 deletions tests/saitests/py3/sai_qos_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,13 @@
DEFAULT_PKT_COUNT = 10
PG_TOLERANCE = 2

# Constants for voq watchdog test
VOQ_WATCHDOG_TIMEOUT_SECONDS = 60
SAI_LOG_TO_CHECK = ["HARDWARE_WATCHDOG", "soft_reset"]
SDK_LOG_TO_CHECK = ["VOQ Appears to be stuck"]
SAI_LOG = "/var/log/sai.log"
SDK_LOG = "/var/log/syslog"


def log_message(message, level='info', to_stderr=False):
if to_stderr:
Expand Down Expand Up @@ -6622,3 +6629,156 @@ def runTest(self):

finally:
self.sai_thrift_port_tx_enable(self.dst_client, asic_type, uniq_dst_ports)


class VoqWatchdogTest(sai_base_test.ThriftInterfaceDataPlane):
def init_log_check(self):
pre_offsets = []
for logfile in [SAI_LOG, SDK_LOG]:
offset_cmd = "stat -c %s {}".format(logfile)
stdout, err, ret = self.exec_cmd_on_dut(
self.dst_server_ip,
self.test_params['dut_username'],
self.test_params['dut_password'],
offset_cmd)
pre_offsets.append(int(stdout[0]))
return pre_offsets

def verify_log(self, pre_offsets, voq_watchdog_enabled=True):
found_list = []
for pre_offset, logfile, str_to_check in zip(pre_offsets, [SAI_LOG, SDK_LOG],
[SAI_LOG_TO_CHECK, SDK_LOG_TO_CHECK]):
egrep_str = '|'.join(str_to_check)
check_cmd = "sudo tail -c +{} {} | egrep '{}' || true".format(pre_offset + 1, logfile, egrep_str)
stdout, err, ret = self.exec_cmd_on_dut(
self.dst_server_ip,
self.test_params['dut_username'],
self.test_params['dut_password'],
check_cmd)
log_message("Log for {}: {}".format(egrep_str, stdout))
for string in str_to_check:
if string in "".join(stdout):
found_list.append(True)
else:
found_list.append(False)
if voq_watchdog_enabled:
qos_test_assert(self, all(found is True for found in found_list),
"VOQ watchdog trigger not detected")
else:
qos_test_assert(self, all(found is False for found in found_list),
"unexpected VOQ watchdog trigger")

def runTest(self):
switch_init(self.clients)

# Parse input parameters
dscp = int(self.test_params['dscp'])
router_mac = self.test_params['router_mac']
sonic_version = self.test_params['sonic_version']
dst_port_id = int(self.test_params['dst_port_id'])
dst_port_ip = self.test_params['dst_port_ip']
dst_port_mac = self.dataplane.get_mac(0, dst_port_id)
src_port_id = int(self.test_params['src_port_id'])
src_port_ip = self.test_params['src_port_ip']
src_port_vlan = self.test_params['src_port_vlan']
src_port_mac = self.dataplane.get_mac(0, src_port_id)
voq_watchdog_enabled = self.test_params['voq_watchdog_enabled']
asic_type = self.test_params['sonic_asic_type']
pkts_num = int(self.test_params['pkts_num'])

pkt_dst_mac = router_mac if router_mac != '' else dst_port_mac
# get counter names to query
ingress_counters, egress_counters = get_counter_names(sonic_version)

# Prepare IP packet data
ttl = 64
if 'packet_size' in list(self.test_params.keys()):
packet_length = int(self.test_params['packet_size'])
else:
packet_length = 64

is_dualtor = self.test_params.get('is_dualtor', False)
def_vlan_mac = self.test_params.get('def_vlan_mac', None)
if is_dualtor and def_vlan_mac is not None:
pkt_dst_mac = def_vlan_mac

pkt = construct_ip_pkt(packet_length,
pkt_dst_mac,
src_port_mac,
src_port_ip,
dst_port_ip,
dscp,
src_port_vlan,
ttl=ttl)

log_message("test dst_port_id: {}, src_port_id: {}, src_vlan: {}".format(
dst_port_id, src_port_id, src_port_vlan), to_stderr=True)
# in case dst_port_id is part of LAG, find out the actual dst port
# for given IP parameters
dst_port_id = get_rx_port(
self, 0, src_port_id, pkt_dst_mac, dst_port_ip, src_port_ip, src_port_vlan
)
log_message("actual dst_port_id: {}".format(dst_port_id), to_stderr=True)

self.sai_thrift_port_tx_disable(self.dst_client, asic_type, [dst_port_id])
pre_offsets = self.init_log_check()

try:
# send packets
send_packet(self, src_port_id, pkt, pkts_num)

# allow enough time to trigger voq watchdog
time.sleep(VOQ_WATCHDOG_TIMEOUT_SECONDS * 1.3)

# verify voq watchdog is triggered
self.verify_log(pre_offsets, voq_watchdog_enabled)

self.sai_thrift_port_tx_enable(self.dst_client, asic_type, [dst_port_id])

# allow enough time for the dut to sync up the counter values in counters_db
time.sleep(8)
# get a snapshot of counter values at recv and transmit ports
recv_counters_base, _ = sai_thrift_read_port_counters(
self.src_client, asic_type, port_list['src'][src_port_id])
xmit_counters_base, queue_counters_base = sai_thrift_read_port_counters(
self.dst_client, asic_type, port_list['dst'][dst_port_id])
if voq_watchdog_enabled:
# queue counters should be cleared after soft reset
qos_test_assert(
self, queue_counters_base[0] == 0,
'queue counters are not cleared, soft reset is not triggered')

# send packets
send_packet(self, src_port_id, pkt, pkts_num)
# allow enough time for the dut to sync up the counter values in counters_db
time.sleep(8)

# get a snapshot of counter values at recv and transmit ports
recv_counters, _ = sai_thrift_read_port_counters(
self.src_client, asic_type, port_list['src'][src_port_id])
xmit_counters, queue_counters = sai_thrift_read_port_counters(
self.dst_client, asic_type, port_list['dst'][dst_port_id])
log_message(
'\trecv_counters {}\n\trecv_counters_base {}\n\t'
'xmit_counters {}\n\txmit_counters_base {}\n\t'
'queue_counters {}\n\tqueue_counters_base {}\n'.format(
recv_counters, recv_counters_base, xmit_counters, xmit_counters_base,
queue_counters, queue_counters_base), to_stderr=True)
# recv port no ingress drop
for cntr in ingress_counters:
qos_test_assert(
self, recv_counters[cntr] == recv_counters_base[cntr],
'unexpectedly RX drop counter increase')
# xmit port no egress drop
for cntr in egress_counters:
qos_test_assert(
self, xmit_counters[cntr] == xmit_counters_base[cntr],
'unexpectedly TX drop counter increase')
# queue counters increased by pkts_num
qos_test_assert(
self, queue_counters[0] == queue_counters_base[0] + pkts_num,
'queue counter not matched, expected {}, got {}'.format(
queue_counters_base[0] + pkts_num, queue_counters[0]))

finally:
self.sai_thrift_port_tx_enable(self.dst_client, asic_type, [dst_port_id])
Loading