Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 95 additions & 80 deletions tests/snappi_tests/pfcwd/files/pfcwd_multi_node_helper.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
import time
from math import ceil
import logging
import random

from tests.common.helpers.assertions import pytest_assert, pytest_require
from tests.common.fixtures.conn_graph_facts import conn_graph_facts,\
fanout_graph_facts # noqa F401
from tests.common.snappi_tests.snappi_helpers import get_dut_port_id
from tests.common.snappi_tests.common_helpers import pfc_class_enable_vector,\
start_pfcwd, enable_packet_aging, get_pfcwd_poll_interval, get_pfcwd_detect_time, sec_to_nanosec
from tests.common.snappi_tests.port import select_ports
from tests.common.snappi_tests.snappi_helpers import wait_for_arp
from tests.common.fixtures.conn_graph_facts import conn_graph_facts, fanout_graph_facts # noqa: F401
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This file is shared by test_pfcwd_m2o_with_snappi.py. can we include it in this change too to avoid regression?

from tests.common.snappi_tests.snappi_helpers import get_dut_port_id # noqa: F401
from tests.common.snappi_tests.common_helpers import pfc_class_enable_vector, \
start_pfcwd, enable_packet_aging, get_pfcwd_poll_interval, get_pfcwd_detect_time, \
sec_to_nanosec # noqa: F401
from tests.common.snappi_tests.port import select_ports # noqa: F401
from tests.common.snappi_tests.snappi_helpers import wait_for_arp # noqa: F401
from tests.common.snappi_tests.snappi_test_params import SnappiTestParams
from tests.common.snappi_tests.variables import pfcQueueGroupSize, pfcQueueValueDict

logger = logging.getLogger(__name__)

PAUSE_FLOW_NAME = 'Pause Storm'
WARM_UP_TRAFFIC_NAME = "Warm Up Traffic"
TEST_FLOW_NAME = 'Test Flow'
TEST_FLOW_AGGR_RATE_PERCENT = 45
BG_FLOW_NAME = 'Background Flow'
BG_FLOW_AGGR_RATE_PERCENT = 45
WARM_UP_TRAFFIC_DUR = 1
DATA_PKT_SIZE = 1024
SNAPPI_POLL_DELAY_SEC = 2
Expand All @@ -31,31 +31,31 @@ def run_pfcwd_multi_node_test(api,
port_config_list,
conn_data,
fanout_data,
duthost,
dut_port,
pause_prio_list,
test_prio_list,
bg_prio_list,
prio_dscp_map,
trigger_pfcwd,
pattern):
pattern,
snappi_extra_params=None):
"""
Run PFC watchdog test in a multi-node (>=3) topoology
Run multidut PFC watchdog test in a multi-node (>=3) topoology

Args:
api (obj): SNAPPI session
testbed_config (obj): testbed L1/L2/L3 configuration
port_config_list (list): list of port configuration
conn_data (dict): the dictionary returned by conn_graph_fact.
fanout_data (dict): the dictionary returned by fanout_graph_fact.
duthost (Ansible host instance): device under test
dut_port (str): DUT port to test
pause_prio_list (list): priorities to pause for PFC pause storm
test_prio_list (list): priorities of test flows
bg_prio_list (list): priorities of background flows
prio_dscp_map (dict): Priority vs. DSCP map (key = priority).
trigger_pfcwd (bool): if PFC watchdog is expected to be triggered
pattern (str): traffic pattern
snappi_extra_params (SnappiTestParams obj): additional parameters for Snappi traffic
Returns:
N/A
"""
Expand All @@ -64,33 +64,58 @@ def run_pfcwd_multi_node_test(api,
raise ValueError('invalid traffic pattern passed in "{}", must be {}'.format(
pattern, ' or '.join(['"{}"'.format(src) for src in patterns])))

pytest_assert(testbed_config is not None,
'Fail to get L2/3 testbed config')
num_ports = len(port_config_list)
pytest_require(num_ports >= 3, "This test requires at least 3 ports")
if snappi_extra_params is None:
snappi_extra_params = SnappiTestParams()

# Traffic flow:
# tx_port (TGEN) --- ingress DUT --- egress DUT --- rx_port (TGEN)

# initialize the (duthost, port) set.
# The final list will have all the asics which needs to be configured for PFC
pfcwd_to_be_configured = set()

start_pfcwd(duthost)
enable_packet_aging(duthost)
rx_port = snappi_extra_params.multi_dut_params.multi_dut_ports[0]
rx_port_id_list = [rx_port["port_id"]]
egress_duthost = rx_port['duthost']
# Add the port to the set of ports to be configured for PFC
pfcwd_to_be_configured.add((egress_duthost, rx_port['asic_value']))

""" Get the ID of the port to test """
port_id = get_dut_port_id(dut_hostname=duthost.hostname,
dut_port=dut_port,
conn_data=conn_data,
fanout_data=fanout_data)
tx_port = [snappi_extra_params.multi_dut_params.multi_dut_ports[1],
snappi_extra_params.multi_dut_params.multi_dut_ports[2]]
tx_port_id_list = [tx_port[0]["port_id"], tx_port[1]["port_id"]]
# add ingress DUT into the set
pfcwd_to_be_configured.add((tx_port[0]['duthost'], tx_port[0]['asic_value']))
pfcwd_to_be_configured.add((tx_port[1]['duthost'], tx_port[1]['asic_value']))

pytest_assert(testbed_config is not None, 'Fail to get L2/3 testbed config')
num_ports = len(port_config_list)
pytest_require(num_ports >= 3, "This test requires at least 3 ports")

pytest_assert(port_id is not None,
'Fail to get ID for port {}'.format(dut_port))
# Enable PFC watchdog on the rx side and tx side of the DUT without duplication.
for duthost, asic in pfcwd_to_be_configured:
start_pfcwd(duthost, asic)
enable_packet_aging(duthost)

poll_interval_sec = get_pfcwd_poll_interval(duthost) / 1000.0
detect_time_sec = get_pfcwd_detect_time(
host_ans=duthost, intf=dut_port) / 1000.0
poll_interval_sec = get_pfcwd_poll_interval(egress_duthost, rx_port['asic_value']) / 1000.0
detect_time_sec = get_pfcwd_detect_time(host_ans=egress_duthost, intf=rx_port['peer_port'],
asic_value=rx_port['asic_value']) / 1000.0

if trigger_pfcwd:
pfc_storm_dur_sec = poll_interval_sec + detect_time_sec
else:
pfc_storm_dur_sec = 0.5 * detect_time_sec

exp_dur_sec = ceil(pfc_storm_dur_sec + 1)
cisco_platform = "Cisco" in egress_duthost.facts['hwsku']

speed_str = testbed_config.layer1[0].speed
speed_gbps = int(speed_str.split('_')[1])
TEST_FLOW_AGGR_RATE_PERCENT = 45
BG_FLOW_AGGR_RATE_PERCENT = 45
# Backplane is 200G in Cisco platforms.
if speed_gbps > 200 and cisco_platform:
TEST_FLOW_AGGR_RATE_PERCENT = TEST_FLOW_AGGR_RATE_PERCENT * 200 / speed_gbps
BG_FLOW_AGGR_RATE_PERCENT = BG_FLOW_AGGR_RATE_PERCENT * 200 / speed_gbps

""" Generate traffic config """
test_flow_rate_percent = int(TEST_FLOW_AGGR_RATE_PERCENT /
Expand All @@ -103,7 +128,8 @@ def run_pfcwd_multi_node_test(api,

__gen_traffic(testbed_config=testbed_config,
port_config_list=port_config_list,
port_id=port_id,
rx_port_id_list=rx_port_id_list,
tx_port_id_list=tx_port_id_list,
pause_flow_name=PAUSE_FLOW_NAME,
pause_prio_list=pause_prio_list,
test_flow_name=TEST_FLOW_NAME,
Expand All @@ -127,13 +153,6 @@ def run_pfcwd_multi_node_test(api,
all_flow_names=all_flow_names,
exp_dur_sec=exp_dur_sec)

speed_str = testbed_config.layer1[0].speed
speed_gbps = int(speed_str.split('_')[1])

""" Retrieve ASIC information for DUT """
asic_type = duthost.facts['asic_type']
rx_tx_tol_thrhlds = [0.0001, 0.0002] # Maintain a 0.01% and 0.02% deviation between tx and rx frames
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like multidut missing #9567 fix.


__verify_results(rows=flow_stats,
speed_gbps=speed_gbps,
pause_flow_name=PAUSE_FLOW_NAME,
Expand All @@ -144,10 +163,8 @@ def run_pfcwd_multi_node_test(api,
data_flow_dur_sec=exp_dur_sec,
data_pkt_size=DATA_PKT_SIZE,
trigger_pfcwd=trigger_pfcwd,
pause_port_id=port_id,
rx_deviation=TOLERANCE_THRESHOLD,
rx_tx_deviations=rx_tx_tol_thrhlds,
asic_type=asic_type)
pause_port_id=rx_port_id_list[0],
tolerance=TOLERANCE_THRESHOLD)


def __data_flow_name(name_prefix, src_id, dst_id, prio):
Expand Down Expand Up @@ -178,7 +195,7 @@ def __data_flow_src(flow_name):
"""
words = flow_name.split()
index = words.index('->')
return int(words[index-1])
return int(words[index - 1])


def __data_flow_dst(flow_name):
Expand All @@ -193,12 +210,13 @@ def __data_flow_dst(flow_name):
"""
words = flow_name.split()
index = words.index('->')
return int(words[index+1])
return int(words[index + 1])


def __gen_traffic(testbed_config,
port_config_list,
port_id,
rx_port_id_list,
tx_port_id_list,
pause_flow_name,
pause_prio_list,
test_flow_name,
Expand Down Expand Up @@ -239,10 +257,6 @@ def __gen_traffic(testbed_config,
N/A
"""

tx_port_id_list, rx_port_id_list = select_ports(port_config_list=port_config_list,
pattern=traffic_pattern,
rx_port_id=port_id)

""" Warm up traffic is initially sent before any other traffic to prevent pfcwd
fake alerts caused by idle links (non-incremented packet counters) during pfcwd detection periods """
warm_up_traffic_dur_sec = WARM_UP_TRAFFIC_DUR
Expand All @@ -264,7 +278,7 @@ def __gen_traffic(testbed_config,
prio_dscp_map=prio_dscp_map)

""" Generate a PFC pause storm """
pause_port_id = port_id
pause_port_id = rx_port_id_list[0]
__gen_pause_flow(testbed_config=testbed_config,
port_config_list=port_config_list,
src_port_id=pause_port_id,
Expand Down Expand Up @@ -323,7 +337,7 @@ def __gen_data_flows(testbed_config,
flow_prio_list (list): priorities of data flows
flow_rate_percent (int): rate percentage for each flow
flow_dur_sec (int): duration of each flow in second
flow_delay_sec (int): delay before starting all flows in second
flow_delay_sec (int): delay before starting pause flow in second
data_pkt_size (int): packet size of data flows in byte
prio_dscp_map (dict): Priority vs. DSCP map (key = priority).

Expand Down Expand Up @@ -373,17 +387,15 @@ def __gen_data_flow(testbed_config,
flow_prio_list (list): priorities of the flow
flow_rate_percent (int): rate percentage for the flow
flow_dur_sec (int): duration of the flow in second
flow_delay_sec (int): delay before starting flow in second
flow_delay_sec (int): delay before starting pause flow in second
data_pkt_size (int): packet size of the flow in byte
prio_dscp_map (dict): Priority vs. DSCP map (key = priority).

Returns:
N/A
"""
tx_port_config = next(
(x for x in port_config_list if x.id == src_port_id), None)
rx_port_config = next(
(x for x in port_config_list if x.id == dst_port_id), None)
tx_port_config = next((x for x in port_config_list if x.id == src_port_id), None)
rx_port_config = next((x for x in port_config_list if x.id == dst_port_id), None)

tx_mac = tx_port_config.mac
if tx_port_config.gateway == rx_port_config.gateway and \
Expand All @@ -403,7 +415,12 @@ def __gen_data_flow(testbed_config,
flow.tx_rx.port.tx_name = testbed_config.ports[src_port_id].name
flow.tx_rx.port.rx_name = testbed_config.ports[dst_port_id].name

eth, ipv4 = flow.packet.ethernet().ipv4()
eth, ipv4, udp = flow.packet.ethernet().ipv4().udp()
src_port = random.randint(5000, 6000)
udp.src_port.increment.start = src_port
udp.src_port.increment.step = 1
udp.src_port.increment.count = 1

eth.src.value = tx_mac
eth.dst.value = rx_mac
if pfcQueueGroupSize == 8:
Expand Down Expand Up @@ -491,6 +508,7 @@ def __gen_pause_flow(testbed_config,
pause_flow.rate.pps = pps
pause_flow.size.fixed = 64
pause_flow.duration.fixed_packets.packets = int(pkt_cnt)
pause_flow.duration.fixed_packets.delay.nanoseconds = 0
pause_flow.duration.fixed_packets.delay.nanoseconds = int(
sec_to_nanosec(flow_delay_sec))

Expand All @@ -514,7 +532,7 @@ def __run_traffic(api, config, all_flow_names, exp_dur_sec):
api.set_config(config)

logger.info('Wait for Arp to Resolve ...')
wait_for_arp(api, max_attempts=30, poll_interval_sec=2)
wait_for_arp(api, max_attempts=10, poll_interval_sec=2)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like we are missing #7370 fix in multidut?


logger.info('Starting transmit on all flows ...')
ts = api.transmit_state()
Expand Down Expand Up @@ -568,9 +586,7 @@ def __verify_results(rows,
data_pkt_size,
trigger_pfcwd,
pause_port_id,
rx_deviation,
rx_tx_deviations,
asic_type):
tolerance):
"""
Verify if we get expected experiment results

Expand All @@ -586,36 +602,34 @@ def __verify_results(rows,
test_flow_pause (bool): if test flows are expected to be paused
trigger_pfcwd (bool): if PFC watchdog is expected to be triggered
pause_port_id (int): ID of the port to send PFC pause frames
rx_deviation (float): maximum allowable deviation for rx_frames relative to theoretical value
rx_tx_deviations (list of floats): maximum allowable % deviation for rx_frames relative to tx_frames
asic_type (str): asic_type information for DUT
tolerance (float): maximum allowable deviation

Returns:
N/A
"""

""" Check for whether DUT is a Mellanox device """
is_mlnx_device = True if "mellanox" in asic_type.lower() else False

for row in rows:
flow_name = row.name
tx_frames = row.frames_tx
rx_frames = row.frames_rx

logger.info('Flow Name : {} , Tx Frames : {}, Rx Frames : {}'.format(flow_name, tx_frames, rx_frames))

if pause_flow_name in flow_name:
""" PFC pause storm """
logger.info('PFC pause storm expected to be dropped')
pytest_assert(tx_frames > 0 and rx_frames == 0,
"All the PFC packets should be dropped")

elif bg_flow_name in flow_name:
""" Background flows """
logger.info('Background flows expected not to have any dropped packets')
pytest_assert(tx_frames == rx_frames,
'{} should not have any dropped packet'.format(flow_name))

exp_bg_flow_rx_pkts = bg_flow_rate_percent / 100.0 * speed_gbps \
* 1e9 * data_flow_dur_sec / 8.0 / data_pkt_size
deviation = (rx_frames - exp_bg_flow_rx_pkts) / float(exp_bg_flow_rx_pkts)
pytest_assert(abs(deviation) < rx_deviation,
pytest_assert(abs(deviation) < tolerance,
'{} should receive {} packets (actual {})'.
format(flow_name, exp_bg_flow_rx_pkts, rx_frames))

Expand All @@ -627,32 +641,33 @@ def __verify_results(rows,
exp_test_flow_rx_pkts = test_flow_rate_percent / 100.0 * speed_gbps \
* 1e9 * data_flow_dur_sec / 8.0 / data_pkt_size

if trigger_pfcwd and dst_port_id == pause_port_id:
if trigger_pfcwd and\
(src_port_id == pause_port_id or dst_port_id == pause_port_id):
""" Once PFC watchdog is triggered, it will impact bi-directional traffic """
logger.info('Once PFC watchdog is triggered, it will impact bi-directional traffic')
logger.info('Tx and Rx should have dropped packets')
pytest_assert(tx_frames > rx_frames,
'{} should have dropped packets'.format(flow_name))

elif trigger_pfcwd and src_port_id == pause_port_id:
if is_mlnx_device:
""" During a pfc storm with pfcwd triggered, Mellanox devices do not drop Rx packets """
pytest_assert(tx_frames == rx_frames,
'{} should not have dropped packets for Mellanox device'.format(flow_name))

elif not trigger_pfcwd and dst_port_id == pause_port_id:
""" This test flow is delayed by PFC storm """
logger.info('This test flow is delayed by PFC storm')
logger.info('Tx and Rx should not have any dropped packet')
pytest_assert(tx_frames == rx_frames,
'{} should not have any dropped packet'.format(flow_name))
pytest_assert(rx_frames < exp_test_flow_rx_pkts,
'{} shoudl receive less than {} packets (actual {})'.
format(flow_name, exp_test_flow_rx_pkts, rx_frames))

else:
for dev_pct in rx_tx_deviations:
""" Otherwise, the test flow is not impacted by PFC storm """
pytest_assert(abs(tx_frames - rx_frames)/float(tx_frames) < dev_pct,
'{} should be within {} percent deviation'.format(flow_name, dev_pct*100))
""" Otherwise, the test flow is not impacted by PFC storm """
logger.info('the test flow is not impacted by PFC storm')
logger.info('Tx and Rx should not have any dropped packet')

pytest_assert(tx_frames == rx_frames,
'{} should not have any dropped packet'.format(flow_name))

deviation = (rx_frames - exp_test_flow_rx_pkts) / float(exp_test_flow_rx_pkts)
pytest_assert(abs(deviation) < rx_deviation,
pytest_assert(abs(deviation) < tolerance,
'{} should receive {} packets (actual {})'.
format(flow_name, exp_test_flow_rx_pkts, rx_frames))
Loading