From 2312ac2750df42e274004aa3df9fa49019420d6f Mon Sep 17 00:00:00 2001
From: Chenyang Wang <49756587+cyw233@users.noreply.github.com>
Date: Mon, 13 Jan 2025 13:31:07 +1100
Subject: [PATCH] refactor: optimize qos sai test (#16428)

Description of PR
Optimize the qos/test_qos_sai.py test to reduce the running time.

Summary:
Fixes # (issue) Microsoft ADO 30056122

Type of change
 Bug fix
 Testbed and Framework(new/improvement)
 New Test case
 Skipped for non-supported platforms
 Add ownership here(Microsft required only)
 Test case improvement

Approach
What is the motivation for this PR?
The running time of the qos/test_qos_sai.py test is too long (~9h) on T2 chassis so we wanted to reduce the running time. With this implementation, the running time will be reduced to (~7.5h)

How did you do it?
How did you verify/test it?
I ran the updated code and can confirm it's working well on T2 chassis: Elastictest link. The 2 DWRR failures are expected, which will be fixed after #16199

I also ran a T1 regression test to confirm: Elastictest link

co-authorized by: jianquanye@microsoft.com
---
 tests/qos/qos_sai_base.py | 48 +++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/tests/qos/qos_sai_base.py b/tests/qos/qos_sai_base.py
index 62694a1bfea..570a768a735 100644
--- a/tests/qos/qos_sai_base.py
+++ b/tests/qos/qos_sai_base.py
@@ -14,6 +14,7 @@
 
 from tests.common.fixtures.ptfhost_utils import ptf_portmap_file  # noqa F401
 from tests.common.helpers.assertions import pytest_assert, pytest_require
+from tests.common.helpers.multi_thread_utils import SafeThreadPoolExecutor
 from tests.common.mellanox_data import is_mellanox_device as isMellanoxDevice
 from tests.common.cisco_data import is_cisco_device
 from tests.common.dualtor.dual_tor_utils import upper_tor_host, lower_tor_host, dualtor_ports, is_tunnel_qos_remap_enabled  # noqa F401
@@ -590,13 +591,18 @@ def swapSyncd_on_selected_duts(self, request, duthosts, creds, tbinfo, lower_tor
                     new_creds['docker_registry_password'] = ''
                 else:
                     new_creds = creds
-                for duthost in dut_list:
-                    docker.swap_syncd(duthost, new_creds)
+
+                with SafeThreadPoolExecutor(max_workers=8) as executor:
+                    for duthost in dut_list:
+                        executor.submit(docker.swap_syncd, duthost, new_creds)
+
             yield
+
         finally:
             if swapSyncd:
-                for duthost in dut_list:
-                    docker.restore_default_syncd(duthost, new_creds)
+                with SafeThreadPoolExecutor(max_workers=8) as executor:
+                    for duthost in dut_list:
+                        executor.submit(docker.restore_default_syncd, duthost, new_creds)
 
     @pytest.fixture(scope='class', name="select_src_dst_dut_and_asic",
                     params=["single_asic", "single_dut_multi_asic",
@@ -1411,23 +1417,31 @@ def updateDockerService(host, docker="", action="", service=""):  # noqa: F811
                 upper_tor_host, testcase="test_qos_sai", feature_list=feature_list)
 
         disable_container_autorestart(src_dut, testcase="test_qos_sai", feature_list=feature_list)
-        for service in src_services:
-            updateDockerService(src_dut, action="stop", **service)
+        with SafeThreadPoolExecutor(max_workers=8) as executor:
+            for service in src_services:
+                executor.submit(updateDockerService, src_dut, action="stop", **service)
+
         src_dut.shell("sudo config bgp shutdown all")
         if src_asic != dst_asic:
             disable_container_autorestart(dst_dut, testcase="test_qos_sai", feature_list=feature_list)
-            for service in dst_services:
-                updateDockerService(dst_dut, action="stop", **service)
+            with SafeThreadPoolExecutor(max_workers=8) as executor:
+                for service in dst_services:
+                    executor.submit(updateDockerService, dst_dut, action="stop", **service)
+
             dst_dut.shell("sudo config bgp shutdown all")
 
         yield
 
-        for service in src_services:
-            updateDockerService(src_dut, action="start", **service)
+        with SafeThreadPoolExecutor(max_workers=8) as executor:
+            for service in src_services:
+                executor.submit(updateDockerService, src_dut, action="start", **service)
+
         src_dut.shell("sudo config bgp start all")
         if src_asic != dst_asic:
-            for service in dst_services:
-                updateDockerService(dst_dut, action="start", **service)
+            with SafeThreadPoolExecutor(max_workers=8) as executor:
+                for service in dst_services:
+                    executor.submit(updateDockerService, dst_dut, action="start", **service)
+
             dst_dut.shell("sudo config bgp start all")
 
         """ Start mux conatiner for dual ToR """
@@ -1906,9 +1920,13 @@ def dut_disable_ipv6(self, duthosts, tbinfo, lower_tor_host, swapSyncd_on_select
                 logger.info("Adding docker0's IPv6 address since it was removed when disabing IPv6")
                 duthost.shell("ip -6 addr add {} dev docker0".format(all_docker0_ipv6_addrs[duthost.hostname]))
 
-        # TODO: parallelize this step.. Do we really need this ?
-        for duthost in dut_list:
-            config_reload(duthost, config_source='config_db', safe_reload=True, check_intf_up_ports=True)
+        # TODO: Do we really need this ?
+        with SafeThreadPoolExecutor(max_workers=8) as executor:
+            for duthost in dut_list:
+                executor.submit(
+                    config_reload,
+                    duthost, config_source='config_db', safe_reload=True, check_intf_up_ports=True,
+                )
 
     @pytest.fixture(scope='class', autouse=True)
     def sharedHeadroomPoolSize(