Skip to content

Commit 7c31e46

Browse files
lolyumssonicbld
authored andcommitted
[dualtor_io] Allow duplications for link down downstream I/O (#17909)
What is the motivation for this PR? The following two link failure cases are failing on Cisco/MLNX: test_active_link_down_downstream_active test_active_link_down_downstream_active_soc The reason is that, after link down, between the fdb flush and tunnel route add (due to mux toggle-to-standby), the ASIC has no l2 information for server/soc neighbors, downstream traffic will flood to all vlan member ports on Cisco/MLNX platform. Those two testcase has no tolerance for packet duplications due to that, on Broadcom platform, traffic to neighbors with no l2 information will be simply dropped. Let's adapt to Cisco/MLNX platform, by allowing packet duplications for those two testcases. How did you verify/test it? dualtor_io/test_link_failure.py::test_active_link_down_downstream_active[active-active] PASSED [100%] dualtor_io/test_link_failure.py::test_active_link_down_downstream_active_soc[active-active] PASSED [100%] Signed-off-by: Longxiang Lyu <lolv@microsoft.com>
1 parent a7f50c6 commit 7c31e46

3 files changed

Lines changed: 49 additions & 17 deletions

File tree

tests/common/dualtor/data_plane_utils.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ def arp_setup(ptfhost):
4141

4242

4343
def validate_traffic_results(tor_IO, allowed_disruption, delay,
44-
allow_disruption_before_traffic=False):
44+
allow_disruption_before_traffic=False,
45+
allowed_duplication=None):
4546
"""
4647
Generates a report (dictionary) of I/O metrics that were calculated as part
4748
of the dataplane test. This report is to be used by testcases to verify the
@@ -107,7 +108,12 @@ def validate_traffic_results(tor_IO, allowed_disruption, delay,
107108
"Maximum allowed disruption: {}s"
108109
.format(server_ip, longest_disruption, delay))
109110

110-
if total_duplications > allowed_disruption:
111+
# NOTE: Not all testcases set the allowed duplication threshold and the duplication check
112+
# uses the allowed disruption threshold here.q So let's set the allowed duplication to
113+
# allowed disruption if the allowed duplication is provided here.
114+
if allowed_duplication is None:
115+
allowed_duplication = allowed_disruption
116+
if total_duplications > allowed_duplication:
111117
failures.append("Traffic to server {} was duplicated {} times. "
112118
"Allowed number of duplications: {}"
113119
.format(server_ip, total_duplications, allowed_disruption))
@@ -150,11 +156,12 @@ def _validate_long_disruption(disruptions, allowed_disruption, delay):
150156

151157

152158
def verify_and_report(tor_IO, verify, delay, allowed_disruption,
153-
allow_disruption_before_traffic=False):
159+
allow_disruption_before_traffic=False, allowed_duplication=None):
154160
# Wait for the IO to complete before doing checks
155161
if verify:
156162
validate_traffic_results(tor_IO, allowed_disruption=allowed_disruption, delay=delay,
157-
allow_disruption_before_traffic=allow_disruption_before_traffic)
163+
allow_disruption_before_traffic=allow_disruption_before_traffic,
164+
allowed_duplication=allowed_duplication)
158165
return tor_IO.get_test_results()
159166

160167

@@ -267,7 +274,8 @@ def send_t1_to_server_with_action(duthosts, ptfhost, ptfadapter, tbinfo,
267274

268275
def t1_to_server_io_test(activehost, tor_vlan_port=None,
269276
delay=0, allowed_disruption=0, action=None, verify=False, send_interval=0.1,
270-
stop_after=None, allow_disruption_before_traffic=False):
277+
stop_after=None, allow_disruption_before_traffic=False,
278+
allowed_duplication=None):
271279
"""
272280
Helper method for `send_t1_to_server_with_action`.
273281
Starts sender and sniffer before performing the action on the tor host.
@@ -302,7 +310,8 @@ def t1_to_server_io_test(activehost, tor_vlan_port=None,
302310
if delay and not allowed_disruption:
303311
allowed_disruption = 1
304312

305-
return verify_and_report(tor_IO, verify, delay, allowed_disruption, allow_disruption_before_traffic)
313+
return verify_and_report(tor_IO, verify, delay, allowed_disruption, allow_disruption_before_traffic,
314+
allowed_duplication=allowed_duplication)
306315

307316
yield t1_to_server_io_test
308317

@@ -416,7 +425,7 @@ def send_t1_to_soc_with_action(duthosts, ptfhost, ptfadapter, tbinfo,
416425

417426
def t1_to_soc_io_test(activehost, tor_vlan_port=None,
418427
delay=0, allowed_disruption=0, action=None, verify=False, send_interval=0.01,
419-
stop_after=None):
428+
stop_after=None, allowed_duplication=None):
420429

421430
tor_IO = run_test(duthosts, activehost, ptfhost, ptfadapter, vmhost,
422431
action, tbinfo, tor_vlan_port, send_interval,
@@ -432,7 +441,8 @@ def t1_to_soc_io_test(activehost, tor_vlan_port=None,
432441
if asic_type == "vs":
433442
logging.info("Skipping verify on VS platform")
434443
return
435-
return verify_and_report(tor_IO, verify, delay, allowed_disruption)
444+
return verify_and_report(tor_IO, verify, delay, allowed_disruption,
445+
allowed_duplication=allowed_duplication)
436446

437447
yield t1_to_soc_io_test
438448

tests/common/dualtor/dual_tor_io.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import six
1212
import scapy.all as scapyall
1313
import ptf.testutils as testutils
14-
from operator import itemgetter
1514
from itertools import groupby
1615

1716
from tests.common.dualtor.dual_tor_common import CableType
@@ -793,16 +792,37 @@ def examine_each_packet(self, server_ip, packets):
793792
logger.error("Sniffer failed to filter any traffic from DUT")
794793
else:
795794
# Find ranges of consecutive packets that have been duplicated
796-
# All packets within the same consecutive range will have the same
797-
# difference between the packet index and the sequence number
798-
for _, grouper in groupby(enumerate(duplicate_packet_list), lambda t: t[0] - t[1][0]):
799-
group = list(map(itemgetter(1), grouper))
800-
duplicate_start, duplicate_end = group[0], group[-1]
795+
# All consecutive packets with the same payload will be grouped as one
796+
# duplication group.
797+
# For example, for the duplication list as the following:
798+
# [(70, 1744253633.499116), (70, 1744253633.499151), (70, 1744253633.499186),
799+
# (81, 1744253635.49922), (81, 1744253635.499255)]
800+
# two duplications will be reported:
801+
# "duplications": [
802+
# {
803+
# "start_time": 1744253633.499116,
804+
# "end_time": 1744253633.499186,
805+
# "start_id": 70,
806+
# "end_id": 70,
807+
# "duplication_count": 3
808+
# },
809+
# {
810+
# "start_time": 1744253635.49922,
811+
# "end_time": 1744253635.499255,
812+
# "start_id": 81,
813+
# "end_id": 81,
814+
# "duplication_count": 2
815+
# }
816+
# ]
817+
for _, grouper in groupby(duplicate_packet_list, lambda d: d[0]):
818+
duplicates = list(grouper)
819+
duplicate_start, duplicate_end = duplicates[0], duplicates[-1]
801820
duplicate_dict = {
802821
'start_time': duplicate_start[1],
803822
'end_time': duplicate_end[1],
804823
'start_id': duplicate_start[0],
805-
'end_id': duplicate_end[0]
824+
'end_id': duplicate_end[0],
825+
'duplication_count': len(duplicates)
806826
}
807827
duplicate_ranges.append(duplicate_dict)
808828

tests/dualtor_io/test_link_failure.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ def test_active_link_down_downstream_active(
8484
if cable_type == CableType.active_active:
8585
send_t1_to_server_with_action(
8686
upper_tor_host, verify=True, delay=MUX_SIM_ALLOWED_DISRUPTION_SEC,
87-
allowed_disruption=1, action=shutdown_fanout_upper_tor_intfs
87+
allowed_disruption=1, allowed_duplication=1,
88+
action=shutdown_fanout_upper_tor_intfs
8889
)
8990
verify_tor_states(
9091
expected_active_host=lower_tor_host,
@@ -332,7 +333,8 @@ def test_active_link_down_downstream_active_soc(
332333
if cable_type == CableType.active_active:
333334
send_t1_to_soc_with_action(
334335
upper_tor_host, verify=True, delay=MUX_SIM_ALLOWED_DISRUPTION_SEC,
335-
allowed_disruption=1, action=shutdown_fanout_upper_tor_intfs
336+
allowed_disruption=1, allowed_duplication=1,
337+
action=shutdown_fanout_upper_tor_intfs
336338
)
337339
verify_tor_states(
338340
expected_active_host=lower_tor_host,

0 commit comments

Comments
 (0)