Skip to content

Commit 01e7ddb

Browse files
committed
Refactor test_acl.py with multi-thread for multi-dut scenario (sonic-net#14545)
Description of PR Summary: Refactor test_acl.py to better fit for chassis test. It reboot/config_reload the dut one by one, on Cisco Chassis, test_acl spend 12 hours. Use multi-thread to speed up the test. Approach What is the motivation for this PR? Refactor test_acl.py to better fit for chassis test. It reboot/config_reload the dut one by one, on Cisco Chassis, test_acl spend 12 hours. Use multi-thread to speed up the test. How did you do it? Use multi-thread to speed up the test. How did you verify/test it? Run on physical testbed co-authorized by: [email protected]
1 parent 9867113 commit 01e7ddb

5 files changed

Lines changed: 148 additions & 87 deletions

File tree

tests/acl/test_acl.py

Lines changed: 114 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from tests.common import reboot, port_toggle
1717
from tests.common.helpers.assertions import pytest_require, pytest_assert
18+
from tests.common.helpers.multi_thread_utils import SafeThreadPoolExecutor
1819
from tests.common.plugins.loganalyzer.loganalyzer import LogAnalyzer, LogAnalyzerError
1920
from tests.common.config_reload import config_reload
2021
from tests.common.fixtures.ptfhost_utils import copy_arp_responder_py, run_garp_service, change_mac_addresses # noqa F401
@@ -148,25 +149,31 @@ def remove_dataacl_table(duthosts):
148149
The change is written to configdb as we don't want DATAACL recovered after reboot
149150
"""
150151
TABLE_NAME = "DATAACL"
151-
for duthost in duthosts:
152-
lines = duthost.shell(cmd="show acl table {}".format(TABLE_NAME))['stdout_lines']
153-
data_acl_existing = False
154-
for line in lines:
155-
if TABLE_NAME in line:
156-
data_acl_existing = True
157-
break
158-
if data_acl_existing:
159-
# Remove DATAACL
160-
logger.info("Removing ACL table {}".format(TABLE_NAME))
161-
cmds = [
162-
"config acl remove table {}".format(TABLE_NAME),
163-
"config save -y"
164-
]
165-
duthost.shell_cmds(cmds=cmds)
152+
with SafeThreadPoolExecutor(max_workers=8) as executor:
153+
for duthost in duthosts:
154+
executor.submit(remove_dataacl_table_single_dut, TABLE_NAME, duthost)
166155
yield
167-
# Recover DUT by reloading minigraph
168-
for duthost in duthosts:
169-
config_reload(duthost, config_source="minigraph")
156+
with SafeThreadPoolExecutor(max_workers=8) as executor:
157+
# Recover DUT by reloading minigraph
158+
for duthost in duthosts:
159+
executor.submit(config_reload, duthost, config_source="minigraph", safe_reload=True)
160+
161+
162+
def remove_dataacl_table_single_dut(table_name, duthost):
163+
lines = duthost.shell(cmd="show acl table {}".format(table_name))['stdout_lines']
164+
data_acl_existing = False
165+
for line in lines:
166+
if table_name in line:
167+
data_acl_existing = True
168+
break
169+
if data_acl_existing:
170+
# Remove DATAACL
171+
logger.info("{} Removing ACL table {}".format(duthost.hostname, table_name))
172+
cmds = [
173+
"config acl remove table {}".format(table_name),
174+
"config save -y"
175+
]
176+
duthost.shell_cmds(cmds=cmds)
170177

171178

172179
def get_t2_info(duthosts, tbinfo):
@@ -427,7 +434,6 @@ def populate_vlan_arp_entries(setup, ptfhost, duthosts, rand_one_dut_hostname, i
427434
global DOWNSTREAM_IP_PORT_MAP
428435
# For m0 topo, need to refresh this constant for two different scenario
429436
DOWNSTREAM_IP_PORT_MAP = {}
430-
duthost = duthosts[rand_one_dut_hostname]
431437
if setup["topo"] not in ["t0", "mx", "m0_vlan"]:
432438
def noop():
433439
pass
@@ -466,7 +472,8 @@ def populate_arp_table():
466472
dut.command("sonic-clear arp")
467473
dut.command("sonic-clear ndp")
468474
# Wait some time to ensure the async call of clear is completed
469-
time.sleep(20)
475+
time.sleep(20)
476+
for dut in duthosts:
470477
for addr in addr_list:
471478
dut.command("ping {} -c 3".format(addr), module_ignore_errors=True)
472479

@@ -477,9 +484,10 @@ def populate_arp_table():
477484
logging.info("Stopping ARP responder")
478485
ptfhost.shell("supervisorctl stop arp_responder", module_ignore_errors=True)
479486

480-
duthost.command("sonic-clear fdb all")
481-
duthost.command("sonic-clear arp")
482-
duthost.command("sonic-clear ndp")
487+
for dut in duthosts:
488+
dut.command("sonic-clear fdb all")
489+
dut.command("sonic-clear arp")
490+
dut.command("sonic-clear ndp")
483491

484492

485493
@pytest.fixture(scope="module", params=["ingress", "egress"])
@@ -565,34 +573,42 @@ def acl_table(duthosts, rand_one_dut_hostname, setup, stage, ip_version, tbinfo,
565573

566574
dut_to_analyzer_map = {}
567575

568-
for duthost in duthosts:
569-
if duthost.is_supervisor_node():
570-
continue
571-
loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="acl")
572-
loganalyzer.load_common_config()
573-
dut_to_analyzer_map[duthost] = loganalyzer
574-
575-
try:
576-
loganalyzer.expect_regex = [LOG_EXPECT_ACL_TABLE_CREATE_RE]
577-
# Ignore any other errors to reduce noise
578-
loganalyzer.ignore_regex = [r".*"]
579-
with loganalyzer:
580-
create_or_remove_acl_table(duthost, acl_table_config, setup, "add", topo)
581-
wait_until(300, 20, 0, check_msg_in_syslog,
582-
duthost, LOG_EXPECT_ACL_TABLE_CREATE_RE)
583-
except LogAnalyzerError as err:
584-
# Cleanup Config DB if table creation failed
585-
logger.error("ACL table creation failed, attempting to clean-up...")
586-
create_or_remove_acl_table(duthost, acl_table_config, setup, "remove", topo)
587-
raise err
588-
576+
with SafeThreadPoolExecutor(max_workers=8) as executor:
577+
for duthost in duthosts:
578+
executor.submit(set_up_acl_table_single_dut, acl_table_config, dut_to_analyzer_map, duthost, setup, topo)
589579
try:
590580
yield acl_table_config
591581
finally:
592-
for duthost, loganalyzer in list(dut_to_analyzer_map.items()):
593-
loganalyzer.expect_regex = [LOG_EXPECT_ACL_TABLE_REMOVE_RE]
594-
with loganalyzer:
595-
create_or_remove_acl_table(duthost, acl_table_config, setup, "remove", topo)
582+
with SafeThreadPoolExecutor(max_workers=8) as executor:
583+
for duthost, loganalyzer in list(dut_to_analyzer_map.items()):
584+
executor.submit(tear_down_acl_table_single_dut, acl_table_config, duthost, loganalyzer, setup, topo)
585+
586+
587+
def tear_down_acl_table_single_dut(acl_table_config, duthost, loganalyzer, setup, topo):
588+
loganalyzer.expect_regex = [LOG_EXPECT_ACL_TABLE_REMOVE_RE]
589+
with loganalyzer:
590+
create_or_remove_acl_table(duthost, acl_table_config, setup, "remove", topo)
591+
592+
593+
def set_up_acl_table_single_dut(acl_table_config, dut_to_analyzer_map, duthost, setup, topo):
594+
if duthost.is_supervisor_node():
595+
return
596+
loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="acl")
597+
loganalyzer.load_common_config()
598+
dut_to_analyzer_map[duthost] = loganalyzer
599+
try:
600+
loganalyzer.expect_regex = [LOG_EXPECT_ACL_TABLE_CREATE_RE]
601+
# Ignore any other errors to reduce noise
602+
loganalyzer.ignore_regex = [r".*"]
603+
with loganalyzer:
604+
create_or_remove_acl_table(duthost, acl_table_config, setup, "add", topo)
605+
wait_until(300, 20, 0, check_msg_in_syslog,
606+
duthost, LOG_EXPECT_ACL_TABLE_CREATE_RE)
607+
except LogAnalyzerError as err:
608+
# Cleanup Config DB if table creation failed
609+
logger.error("ACL table creation failed, attempting to clean-up...")
610+
create_or_remove_acl_table(duthost, acl_table_config, setup, "remove", topo)
611+
raise err
596612

597613

598614
class BaseAclTest(six.with_metaclass(ABCMeta, object)):
@@ -660,43 +676,60 @@ def acl_rules(self, duthosts, localhost, setup, acl_table, populate_vlan_arp_ent
660676
661677
"""
662678
dut_to_analyzer_map = {}
663-
for duthost in duthosts:
664-
if duthost.is_supervisor_node():
665-
continue
666-
loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="acl_rules")
667-
loganalyzer.load_common_config()
668-
dut_to_analyzer_map[duthost] = loganalyzer
669-
670-
try:
671-
loganalyzer.expect_regex = [LOG_EXPECT_ACL_RULE_CREATE_RE]
672-
# Ignore any other errors to reduce noise
673-
loganalyzer.ignore_regex = [r".*"]
674-
with loganalyzer:
675-
self.setup_rules(duthost, acl_table, ip_version)
676-
# Give the dut some time for the ACL rules to be applied and LOG message generated
677-
wait_until(300, 20, 0, check_msg_in_syslog,
678-
duthost, LOG_EXPECT_ACL_RULE_CREATE_RE)
679-
680-
self.post_setup_hook(duthost, localhost, populate_vlan_arp_entries, tbinfo, conn_graph_facts)
681-
682-
assert self.check_rule_counters(duthost), "Rule counters should be ready!"
683-
684-
except LogAnalyzerError as err:
685-
# Cleanup Config DB if rule creation failed
686-
logger.error("ACL rule application failed, attempting to clean-up...")
687-
self.teardown_rules(duthost)
688-
raise err
679+
680+
with SafeThreadPoolExecutor(max_workers=8) as executor:
681+
for duthost in duthosts:
682+
executor.submit(self.set_up_acl_rules_single_dut, acl_table, conn_graph_facts,
683+
dut_to_analyzer_map, duthost, ip_version, localhost,
684+
populate_vlan_arp_entries, tbinfo)
685+
logger.info("Set up acl_rules finished")
689686

690687
try:
691688
yield
692689
finally:
693-
for duthost, loganalyzer in list(dut_to_analyzer_map.items()):
694-
if duthost.is_supervisor_node():
695-
continue
696-
loganalyzer.expect_regex = [LOG_EXPECT_ACL_RULE_REMOVE_RE]
697-
with loganalyzer:
698-
logger.info("Removing ACL rules")
699-
self.teardown_rules(duthost)
690+
with SafeThreadPoolExecutor(max_workers=8) as executor:
691+
for duthost, loganalyzer in list(dut_to_analyzer_map.items()):
692+
executor.submit(self.tear_down_acl_rule_single_dut, duthost, loganalyzer)
693+
logger.info("Tear down acl_rules finished")
694+
695+
def tear_down_acl_rule_single_dut(self, duthost, loganalyzer):
696+
if duthost.is_supervisor_node():
697+
return
698+
loganalyzer.expect_regex = [LOG_EXPECT_ACL_RULE_REMOVE_RE]
699+
with loganalyzer:
700+
logger.info("Removing ACL rules")
701+
self.teardown_rules(duthost)
702+
703+
def set_up_acl_rules_single_dut(self, acl_table,
704+
conn_graph_facts, dut_to_analyzer_map, duthost, # noqa F811
705+
ip_version, localhost,
706+
populate_vlan_arp_entries, tbinfo):
707+
logger.info("{}: ACL rule application started".format(duthost.hostname))
708+
if duthost.is_supervisor_node():
709+
return
710+
loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix="acl_rules")
711+
loganalyzer.load_common_config()
712+
dut_to_analyzer_map[duthost] = loganalyzer
713+
try:
714+
loganalyzer.expect_regex = [LOG_EXPECT_ACL_RULE_CREATE_RE]
715+
# Ignore any other errors to reduce noise
716+
loganalyzer.ignore_regex = [r".*"]
717+
with loganalyzer:
718+
self.setup_rules(duthost, acl_table, ip_version)
719+
# Give the dut some time for the ACL rules to be applied and LOG message generated
720+
wait_until(300, 20, 0, check_msg_in_syslog,
721+
duthost, LOG_EXPECT_ACL_RULE_CREATE_RE)
722+
723+
self.post_setup_hook(duthost, localhost, populate_vlan_arp_entries, tbinfo, conn_graph_facts)
724+
725+
assert self.check_rule_counters(duthost), "Rule counters should be ready!"
726+
727+
except LogAnalyzerError as err:
728+
# Cleanup Config DB if rule creation failed
729+
logger.error("ACL rule application failed, attempting to clean-up...")
730+
self.teardown_rules(duthost)
731+
raise err
732+
logger.info("{}: ACL rule application finished".format(duthost.hostname))
700733

701734
@pytest.yield_fixture(scope="class", autouse=True)
702735
def counters_sanity_check(self, duthosts, acl_rules, acl_table):

tests/common/config_reload.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,10 @@ def _config_reload_cmd_wrapper(cmd, executable):
203203
wait_critical_processes(sonic_host)
204204
# PFCWD feature does not enable on some topology, for example M0
205205
if config_source == 'minigraph' and pfcwd_feature_enabled(sonic_host):
206-
pytest_assert(wait_until(wait + 300, 20, 0, chk_for_pfc_wd, sonic_host),
207-
"PFC_WD is missing in CONFIG-DB")
208-
206+
# Supervisor node doesn't have PFC_WD
207+
if not sonic_host.is_supervisor_node():
208+
pytest_assert(wait_until(wait + 300, 20, 0, chk_for_pfc_wd, sonic_host),
209+
"PFC_WD is missing in CONFIG-DB")
209210
if check_intf_up_ports:
210211
pytest_assert(wait_until(wait + 300, 20, 0, check_interface_status_of_up_ports, sonic_host),
211212
"Not all ports that are admin up on are operationally up")
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from concurrent.futures import Future, as_completed
2+
from concurrent.futures.thread import ThreadPoolExecutor
3+
from typing import Optional, List
4+
5+
6+
class SafeThreadPoolExecutor(ThreadPoolExecutor):
7+
"""An enhanced thread pool executor
8+
9+
Everytime we submit a task, it will store the feature in self.features
10+
On the __exit__ function, it will wait all the tasks to be finished,
11+
And check any exceptions that are raised during the task executing
12+
"""
13+
def __init__(self, *args, **kwargs):
14+
super().__init__(*args, **kwargs)
15+
self.features: Optional[List[Future]] = []
16+
17+
def submit(self, __fn, *args, **kwargs):
18+
f = super().submit(__fn, *args, **kwargs)
19+
self.features.append(f)
20+
return f
21+
22+
def __exit__(self, exc_type, exc_val, exc_tb):
23+
for future in as_completed(self.features):
24+
# if exception caught in the sub-thread, .result() will raise it in the main thread
25+
_ = future.result()
26+
self.shutdown(wait=True)
27+
return False

tests/common/port_toggle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def __get_down_ports(expect_up=True):
8080

8181
if not startup_ok:
8282
down_ports = __get_down_ports()
83-
startup_err_msg = "Some ports did not come up as expected: {}".format(str(down_ports))
83+
startup_err_msg = "{}: Some ports did not come up as expected: {}".format(duthost.hostname, str(down_ports))
8484
except Exception as e:
8585
startup_err_msg = "Startup ports failed with exception: {}".format(repr(e))
8686

tests/common/reboot.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,12 +285,12 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10,
285285
# minutes to the maximum wait time. If it's ready sooner, then the
286286
# function will return sooner.
287287
pytest_assert(wait_until(wait + 400, 20, 0, duthost.critical_services_fully_started),
288-
"All critical services should be fully started!")
288+
"{}: All critical services should be fully started!".format(hostname))
289289
wait_critical_processes(duthost)
290290

291291
if check_intf_up_ports:
292292
pytest_assert(wait_until(wait + 300, 20, 0, check_interface_status_of_up_ports, duthost),
293-
"Not all ports that are admin up on are operationally up")
293+
"{}: Not all ports that are admin up on are operationally up".format(hostname))
294294
else:
295295
time.sleep(wait)
296296

0 commit comments

Comments
 (0)