From 87cd7d003baa80caa6f7c0c80c7a498078a59e03 Mon Sep 17 00:00:00 2001 From: judyjoseph <53951155+judyjoseph@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:28:29 -0700 Subject: [PATCH] Revert "Add test plan and tests for liquid cooling leakage detection (#20792)" This reverts commit b865f3b26712407a9be0c931fb5a2bf5c9b3e1c4. --- ...uid-Cooling-leakage-detection-test-plan.md | 106 ------- .../liquid_leakage_control_test_helper.py | 260 ------------------ ...anox_liquid_leakage_control_test_helper.py | 113 -------- .../mellanox_sensor_control_test_helper.py | 152 ---------- .../mellanox_thermal_control_test_helper.py | 143 +++++++++- .../platform_api/liquid_cooling_leakage.py | 56 ---- .../helpers/sensor_control_test_helper.py | 114 -------- .../helpers/thermal_control_test_helper.py | 114 +++++++- .../api/test_liquid_cooling_leakage.py | 68 ----- tests/platform_tests/mellanox/check_sysfs.py | 41 --- .../mellanox/test_psu_power_threshold.py | 2 +- .../mellanox/test_reboot_cause.py | 2 +- .../test_liquid_cooling_leakage_detection.py | 119 -------- tests/platform_tests/test_platform_info.py | 3 +- tests/snmp/test_snmp_phy_entity.py | 2 +- 15 files changed, 255 insertions(+), 1040 deletions(-) delete mode 100644 docs/testplan/bmc/Liquid-Cooling-leakage-detection-test-plan.md delete mode 100644 tests/common/helpers/liquid_leakage_control_test_helper.py delete mode 100644 tests/common/helpers/mellanox_liquid_leakage_control_test_helper.py delete mode 100644 tests/common/helpers/mellanox_sensor_control_test_helper.py delete mode 100644 tests/common/helpers/platform_api/liquid_cooling_leakage.py delete mode 100644 tests/common/helpers/sensor_control_test_helper.py delete mode 100644 tests/platform_tests/api/test_liquid_cooling_leakage.py delete mode 100644 tests/platform_tests/test_liquid_cooling_leakage_detection.py diff --git a/docs/testplan/bmc/Liquid-Cooling-leakage-detection-test-plan.md b/docs/testplan/bmc/Liquid-Cooling-leakage-detection-test-plan.md deleted file mode 100644 index e613f3926fc..00000000000 --- a/docs/testplan/bmc/Liquid-Cooling-leakage-detection-test-plan.md +++ /dev/null @@ -1,106 +0,0 @@ -# Liquid cooling leakage detection test plan - -* [Overview](#Overview) - * [HLD](#HLD) - * [Scope](#Scope) - * [Testbed](#Testbed) - * [Setup configuration](#Setup%20configuration) -* [Test](#Test) -* [TODO](#TODO) -* [Open questions](#Open%20questions) - -## Overview -Liquid cooling technology has become essential for efficiently cooling equipment and ensuring its proper operation. To address the potential dangers associated with liquid cooling leakage, it is crucial to implement a monitoring mechanism that can instantly alert the system when such an event occurs.The purpose of this test is to verify the functionality of leakage detection. - -### HLD -- Feature HLD: https://github.com/sonic-net/SONiC/pull/2032/ - -### Scope -The test is targeting on the verification of the functionality of leakage detection on device has liquid cooling system. - -### Testbed -Any - -### Setup configuration -Common tests configuration: -- Check whether the device has liquid cooling system. If yes, do the following tests, else skip them. - - When device has liquid cooling system: The key of enable_liquid_cooling exsits in pmon_daemon_control.json and the value is true - -Common tests cleanup: -- No. - - -## Test -### Test case #1 test_verify_liquid_senors_number_and_status -#### Test objective -Verify the number of the liquid sensors equals the configured number and the corresponding status is ok -#### Test steps -* Verify the number of the liquid sensors equals the configured number -* Verify there are no leaks - * Check that the status of all leak sensors is 'NO' in the output of the 'show platform leakage status' command - * Check that the status of all leak sensors is 'OK' in the output of the 'show system-health detail' command - -### Test case #2 test_mock_liquid_leak_event -#### Test objective -1. Mock liquid leak event and verify the dut has the correct response -2. Mock liquid leak event is fixed and verify the dut has the correct response -#### Test steps -* Randomly select one or serveral sensors to mock leak event. Take leakage1 as example: - * Save the value of /var/run/hw-management/system/leakage1 and unlink it - * Create a file /var/run/hw-management/system/leakage1 - * Echo 0 to /var/run/hw-management/system/leakage1 to mock leak event -* sleep liguid_cooling_update_interval (The default value is 0.5s) -* Verify state db has been updated to 'YES' for the mocked sensors -* Verify syslog has the corresponding GNMI event log indicating the liquid leakage event occurs, and msg has been sent out -* Verify there are leaks for the mocked sensors - * Check that the status of the mocked sensors is 'Yes' in the output of the 'show platform leakage status' command - * Check that the status of the mocked sensors is 'Not OK' in the output of the 'show system-health detail' command -* Restore the liquid sensor -* sleep liguid_cooling_update_interval -* Verify state db has been updated to 'No' for the mocked sensors -* Verify syslog has the corresponding GNMI event log indicating liquid leakgae event has been fixed -* Verify the leaks for the mocked sensors has been fixed - * Check that the status of the mocked sensors is 'NO' in the output of the 'show platform leakage status' command - * Check that the status of the mocked sensors is 'OK' in the output of the 'show system-health detail' command - -### Test case #3 Extend check_sysfs -#### Test objective - Extend check_sysfs so that when dut do reboot and config reload, the liquid cooling leakage sysfs can be verified -#### Test steps -* Extend the function of check_sysfs to check the sysfs related to liquid cooling leakage - -### Test case #4 Platfform API get_name -#### Test objective - Verify get_name gets the correct value -#### Test steps -* Call get_name, and verify it returns the correct value like leakage1,leakage2... - -### Test case #5 Platfform API is_leak -#### Test objective - Verify is_leak gets the correct value -#### Test steps -* Call is_leak, and verify it returns Flase - -### Test case #6 Platfform API get_leak_sensor_status -#### Test objective - Verify get_leak_sensor_status gets the correct value -#### Test steps -* Call get_leak_sensor_status, and verify it return the emtpy list - -### Test case #7 Platfform API get_num_leak_sensors -#### Test objective - Verify get_num_leak_sensors gets the correct value -#### Test steps -* Call get_num_leak_sensors, and verify the return vlaue equals to the leak sensros number defined in pltform.json - -### Test case #8 Platfform API get_all_leak_sensors -#### Test objective - Verify get_all_leak_sensors gets the correct value -#### Test steps -* Call get_all_leak_sensors, and verify the return vlaue equals to the leak sensros number defined in pltform.json - - -## TODO - - -## Open questions diff --git a/tests/common/helpers/liquid_leakage_control_test_helper.py b/tests/common/helpers/liquid_leakage_control_test_helper.py deleted file mode 100644 index c181118a8db..00000000000 --- a/tests/common/helpers/liquid_leakage_control_test_helper.py +++ /dev/null @@ -1,260 +0,0 @@ -import logging -import json -import os -import re -import pytest -import ast -from tests.common.helpers.sensor_control_test_helper import BaseMocker -from tests.common.helpers.assertions import pytest_require as pyrequire -from tests.common.helpers.dut_utils import check_container_state -from tests.common.helpers.gnmi_utils import gnmi_container -from tests.common import config_reload -# The interval of EVENT_PUBLISHED is 60 seconds by default. -# To left some buffer, the timeout for gnmi LD event is set to 90 seconds -WAIT_GNMI_LD_EVENT_TIMEOUT = 90 -# To left some buffer for the thread timeout,the timeout for gnmi event is set to 120 seconds -WAIT_GNMI_EVENT_TIMEOUT = WAIT_GNMI_LD_EVENT_TIMEOUT + 30 - - -class LiquidLeakageMocker(BaseMocker): - """ - Liquid leakage mocker. Vendor should implement this class to provide a liquid leakage mocker. - This class could mock liquid leakage detection status. - """ - - def mock_leakage(self): - """ - Change the mocked liquid leakage detection status to 'Leakage'. - :return: - """ - pass - - def mock_no_leakage(self): - """ - Change the mocked liquid leakage detection status to 'No Leakage'. - :return: - """ - pass - - def verify_leakage(self): - """ - Verify the leakage status of the DUT. - :return: - """ - pass - - def verify_no_leakage(self): - """ - Verify the leakage status of the DUT. - :return: - """ - pass - - -def get_leakage_status(dut): - """ - Get the leakage status of the DUT. - :param dut: DUT object representing a SONiC switch under test. - :return: The leakage status of the DUT. - """ - return dut.show_and_parse("show platform leakage status") - - -def get_leakage_status_in_health_system(dut): - """ - Get the health system status of the DUT. - :param dut: DUT object representing a SONiC switch under test. - :return: The health system status of the DUT. - """ - system_health_status = dut.show_and_parse("sudo show system-health detail") - system_health_leakage_status_list = [] - for status in system_health_status: - if status['name'].startswith('leakage'): - system_health_leakage_status_list.append(status) - logging.info(f"System health leakage status list: {system_health_leakage_status_list}") - return system_health_leakage_status_list - - -def get_state_db(dut): - return ast.literal_eval(dut.shell('sonic-db-dump -n STATE_DB -y')['stdout']) - - -def verify_leakage_status(dut, leakage_index_list, expected_status): - """ - Verify the leak status of the DUT. - :param dut: DUT object representing a SONiC switch under test. - :param expected_status: Expected status of the DUT. - :return: - """ - logging.info(f"Verify leakage status of {leakage_index_list} is : {expected_status}") - leakage_status_list = get_leakage_status(dut) - failed_leakage_list = [] - success_leakage_list = [] - for index in leakage_index_list: - for leak_status in leakage_status_list: - if leak_status['name'] == f"leakage{index}": - if leak_status['leak'].lower() != expected_status.lower(): - failed_leakage_list.append(index) - logging.info(f"Leakage status is not as expected: {leak_status}") - else: - success_leakage_list.append(index) - logging.info(f"Leakage status is as expected: {leak_status}") - assert len(failed_leakage_list) == 0, f"Leakage status is not as expected: {failed_leakage_list}" - assert len(success_leakage_list) == len(leakage_index_list), \ - f"Not all leakage status are detected: test leakage index list: {leakage_index_list}, " \ - f"success leakage index list: {success_leakage_list}" - return True - - -def verify_leakage_status_in_health_system(dut, leakage_index_list, expected_status): - """ - Verify the leakage status in health system of the DUT. - :param dut: DUT object representing a SONiC switch under test. - :param expected_status: Expected status of the DUT. - :return: - """ - logging.info(f"Verify leakage status in health system of {leakage_index_list} is: {expected_status}") - health_system_leakage_status_list = get_leakage_status_in_health_system(dut) - failed_leakage_list = [] - success_leakage_list = [] - for index in leakage_index_list: - for leak_status in health_system_leakage_status_list: - if f"leakage{index}" == leak_status['name']: - if leak_status['status'].lower() != expected_status.lower(): - failed_leakage_list.append(index) - logging.info(f"Leakage status in health system is not as expected: {leak_status}") - else: - success_leakage_list.append(index) - logging.info(f"Leakage status in health system is as expected: {leak_status}") - assert len(failed_leakage_list) == 0, f"Leakage status is not as expected: {failed_leakage_list}" - assert len(success_leakage_list) == len(leakage_index_list), \ - f"Not all leakage status are detected: test leakage index list: {leakage_index_list}, " \ - f"success leakage index list: {success_leakage_list}" - return True - - -def verify_leakage_status_in_state_db(dut, leakage_index_list, expected_status): - """ - Verify the leakage status in state db of the DUT. - :param dut: DUT object representing a SONiC switch under test. - :param expected_status: Expected status of the DUT. - :return: - """ - logging.info(f"Verify leakage status in state db of {leakage_index_list} is: {expected_status}") - state_db = get_state_db(dut) - failed_leakage_list = [] - success_leakage_list = [] - for index in leakage_index_list: - leak_status = state_db.get(f"LIQUID_COOLING_INFO|leakage{index}", {}).get("value", {}).get("leak_status") - if leak_status != expected_status: - failed_leakage_list.append(index) - logging.info(f"Leakage status in state db is not as expected: {leak_status}") - else: - success_leakage_list.append(index) - logging.info(f"Leakage status in state db is as expected: {leak_status}") - assert len(failed_leakage_list) == 0, f"Leakage status is not as expected: {failed_leakage_list}" - assert len(success_leakage_list) == len(leakage_index_list), \ - f"Not all leakage status are detected: test leakage index list: {leakage_index_list}, " \ - f"success leakage index list: {success_leakage_list}" - return True - - -def verify_gnmi_msg_is_sent(leakage_index_list, gnmi_result, msg_type): - """ - Verify the gnmi msg of the DUT. - :param dut: DUT object representing a SONiC switch under test. - :param gnmi_result: gnmi result of the DUT. - :return: - """ - logging.info( - f"Verify gnmi msg is sent for {leakage_index_list} with type: {msg_type} \n gnmi result: {gnmi_result}") - msg_common_prefix = "sonic-events-host:liquid-cooling-leak" - for index in leakage_index_list: - if msg_type == "leaking": - expected_msg_regex = f".*{msg_common_prefix}.*sensor report leaking event.*leakage{index}.*" - else: - expected_msg_regex = f".*{msg_common_prefix}.*leaking sensor report recoveried.*leakage{index}.*" - assert re.search(expected_msg_regex, gnmi_result), f"Gnmi msg is not as expected: {gnmi_result}" - return True - - -def startmonitor_gnmi_event(duthost, ptfhost): - """ - Monitor the gnmi event of the DUT. - :param dut: DUT object representing a SONiC switch under test. - :param ptfhost: PTF object representing a PTF switch under test. - :param result_queue: Queue object to store the result. - :return: - """ - dut_mgmt_ip = duthost.mgmt_ip - timeout = WAIT_GNMI_LD_EVENT_TIMEOUT - gnmi_subscribe_cmd = f"python /root/gnxi/gnmi_cli_py/py_gnmicli.py -g -t {dut_mgmt_ip} -p 50052 -m subscribe \ - -x all[heartbeat=2] -xt EVENTS -o ndastreamingservertest --subscribe_mode 0 --submode 1 --interval 0 \ - --update_count 0 --create_connections 1 --filter_event_regex sonic-events-host --timeout {timeout} " - result = ptfhost.shell(gnmi_subscribe_cmd, module_ignore_errors=True)['stdout'] - logging.info(f"gnmi subscribe cmd: {gnmi_subscribe_cmd} \n gnmi event result: {result}") - return result - - -def get_pmon_daemon_control_dict(dut): - """ - Get the pmon daemon control dict of the DUT. - :param dut: DUT object representing a SONiC switch under test. - :return: The pmon daemon control dict of the DUT. - """ - pmon_daemon_control_file_path = os.path.join( - "/usr/share/sonic/device", dut.facts["platform"], "pmon_daemon_control.json") - return json.loads(dut.shell(f"cat {pmon_daemon_control_file_path} ")['stdout']) - - -def is_liquid_cooling_system_supported(dut): - """ - Check if the liquid cooling system is supported on the DUT. - :param dut: DUT object representing a SONiC switch under test. - :return: True if the liquid cooling system is supported, False otherwise. - """ - pmon_daemon_control_dict = get_pmon_daemon_control_dict(dut) - if pmon_daemon_control_dict.get("enable_liquid_cooling"): - logging.info("Liquid cooling system is supported") - return True - else: - logging.info("Liquid cooling system is not supported") - return False - - -def get_liquid_cooling_update_interval(dut): - """ - Get the liquid cooling update interval of the DUT. - :param dut: DUT object representing a SONiC switch under test. - :return: The liquid cooling update interval of the DUT. - """ - pmon_daemon_control_dict = get_pmon_daemon_control_dict(dut) - return pmon_daemon_control_dict.get("liquid_cooling_update_interval") - - -@pytest.fixture(scope="function") -def setup_gnmi_server(duthosts, rand_one_dut_hostname, localhost, ptfhost): - ''' - Setup GNMI server with client certificates - ''' - duthost = duthosts[rand_one_dut_hostname] - - # Check if GNMI is enabled on the device - pyrequire( - check_container_state(duthost, gnmi_container(duthost), should_be_running=True), - "Test was not supported on devices which do not support GNMI!") - duthost.shell("sonic-db-cli CONFIG_DB hset 'GNMI|gnmi' port 50052") - duthost.shell("sonic-db-cli CONFIG_DB hset 'GNMI|gnmi' client_auth true") - duthost.shell("sonic-db-cli CONFIG_DB hset 'GNMI|certs' ca_crt /etc/sonic/telemetry/dsmsroot.cer") - duthost.shell( - "sonic-db-cli CONFIG_DB hset 'GNMI|certs' server_crt /etc/sonic/telemetry/streamingtelemetryserver.cer") - duthost.shell( - "sonic-db-cli CONFIG_DB hset 'GNMI|certs' server_key /etc/sonic/telemetry/streamingtelemetryserver.key") - duthost.shell('sonic-db-cli CONFIG_DB HSET "GNMI|gnmi" "client_auth" "false"') - duthost.shell('sudo systemctl reset-failed gnmi') - duthost.shell('sudo service gnmi restart') - - yield - - logging.info("Recover gnmi config") - config_reload(duthost, safe_reload=True) diff --git a/tests/common/helpers/mellanox_liquid_leakage_control_test_helper.py b/tests/common/helpers/mellanox_liquid_leakage_control_test_helper.py deleted file mode 100644 index 852630e4cb7..00000000000 --- a/tests/common/helpers/mellanox_liquid_leakage_control_test_helper.py +++ /dev/null @@ -1,113 +0,0 @@ -import logging -import random -from tests.common.helpers.sensor_control_test_helper import mocker -from tests.common.helpers.liquid_leakage_control_test_helper import LiquidLeakageMocker -from tests.common.helpers.mellanox_sensor_control_test_helper import MockerBaseHelper -from tests.common.helpers.liquid_leakage_control_test_helper import verify_leakage_status, \ - verify_leakage_status_in_health_system, verify_leakage_status_in_state_db - - -LEAKAGE_STATUS_PATH = '/var/run/hw-management/system/' - - -class MockerHelper(MockerBaseHelper): - """ - Mellanox specified mocker helper. - """ - - def __init__(self, dut): - """ - Constructor of mocker helper. - :param dut: DUT object representing a SONiC switch under test. - """ - super().__init__(dut) - self._extract_num_of_leakage_detection() - - def _extract_num_of_leakage_detection(self): - """ - Get leakage number for this DUT. - :return: - """ - get_leakage_num_cmd = f'ls {LEAKAGE_STATUS_PATH}/leakage* | wc -l' - output = self.dut.shell(get_leakage_num_cmd) - content = output['stdout'].strip() - if not content: - return - self.LEAKAGE_NUM = 2 - - -@mocker('LiquidLeakageMocker') -class MlxLiquidLeakageMocker(LiquidLeakageMocker): - """ - Mocker class to help generate liquid cooling leakage detection status and check it with actual data. - """ - - def __init__(self, dut): - """ - Constructor of LiquidLeakageMocker. - :param dut: DUT object representing a SONiC switch under test. - """ - LiquidLeakageMocker.__init__(self, dut) - self.mock_helper = MockerHelper(dut) - self.test_leakage_num = random.randint(1, self.mock_helper.LEAKAGE_NUM) - self.test_leakage_index_list = random.sample( - list(range(1, self.mock_helper.LEAKAGE_NUM + 1)), k=self.test_leakage_num) - logging.info( - f"Test leakage num: {self.test_leakage_num}, test leakage index list: {self.test_leakage_index_list}") - - def deinit(self): - """ - Destructor of LiquidLeakageMocker. - :return: - """ - self.mock_helper.deinit() - - def mock_leakage(self): - """ - Change the mocked liquid leakage detection status to 'Leakage'. - :return: - """ - liquid_leak = 0 - for index in self.test_leakage_index_list: - self.mock_helper.mock_value(f"{LEAKAGE_STATUS_PATH}/leakage{index}", liquid_leak) - - def mock_no_leakage(self): - """ - Change the mocked liquid leakage detection status to 'No Leakage'. - :return: - """ - no_liquid_leak = 1 - for index in self.test_leakage_index_list: - self.mock_helper.mock_value(f"{LEAKAGE_STATUS_PATH}/leakage{index}", no_liquid_leak) - - def check_result(self, actual_data): - """ - Check the result of liquid cooling leakage detection. - :param actual_data: Actual data of liquid cooling leakage detection. - :return: - """ - return self.mock_helper.read_value(self.LEAKAGE_STATUS_FILE) == 1 - - def verify_leakage(self): - """ - Verify the leakage status of the DUT. - :param expected_status: Expected status of the DUT. - :return: - """ - verify_leakage_status_in_state_db(self.dut, self.test_leakage_index_list, "Yes") - verify_leakage_status(self.dut, self.test_leakage_index_list, "YES") - verify_leakage_status_in_health_system(self.dut, self.test_leakage_index_list, "Not OK") - - return True - - def verify_no_leakage(self): - """ - Verify the leakage status of the DUT. - :param expected_status: Expected status of the DUT. - :return: - """ - verify_leakage_status_in_state_db(self.dut, self.test_leakage_index_list, "No") - verify_leakage_status(self.dut, self.test_leakage_index_list, "NO") - verify_leakage_status_in_health_system(self.dut, self.test_leakage_index_list, "OK") - - return True diff --git a/tests/common/helpers/mellanox_sensor_control_test_helper.py b/tests/common/helpers/mellanox_sensor_control_test_helper.py deleted file mode 100644 index d27155b0083..00000000000 --- a/tests/common/helpers/mellanox_sensor_control_test_helper.py +++ /dev/null @@ -1,152 +0,0 @@ -import logging -import time -from pkg_resources import parse_version - - -class SysfsNotExistError(Exception): - """ - Exception when sys fs not exist. - """ - pass - - -class MockerBaseHelper: - """ - Mellanox specified mocker helper. - """ - unlink_file_list = {} - regular_file_list = {} - - def __init__(self, dut): - """ - Constructor of mocker helper. - :param dut: DUT object representing a SONiC switch under test. - """ - self.dut = dut - self.deinit_retry = 5 - - def mock_value(self, file_path, value, force=False): - """ - Unlink existing sys fs file and replace it with a new one. Write given value to the new file. - :param file_path: Sys fs file path. - :param value: Value to write to sys fs file. - :param force: Force mock even if the file does not exist. - :return: - """ - if file_path not in self.regular_file_list and file_path not in self.unlink_file_list: - out = self.dut.stat(path=file_path) - exist = True - if not out['stat']['exists']: - if force: - exist = False - else: - raise SysfsNotExistError('{} not exist'.format(file_path)) - if exist and out['stat']['islnk']: - self._unlink(file_path) - else: - self._cache_file_value(file_path, force) - self.dut.shell('echo \'{}\' > {}'.format(value, file_path)) - - def read_value(self, file_path): - """ - Read sys fs file content. - :param file_path: Sys fs file path. - :return: Content of sys fs file. - """ - out = self.dut.stat(path=file_path) - if not out['stat']['exists']: - raise SysfsNotExistError('{} not exist'.format(file_path)) - try: - output = self.dut.command("cat %s" % file_path) - value = output["stdout"] - return value.strip() - except Exception as e: - assert 0, "Get content from %s failed, exception: %s" % ( - file_path, repr(e)) - - def _cache_file_value(self, file_path, may_nexist=False): - """ - Cache file value for regular file. - :param file_path: Regular file path. - :return: - """ - try: - output = self.dut.command("cat %s" % file_path) - value = output["stdout"] - self.regular_file_list[file_path] = value.strip() - except Exception as e: - if may_nexist: - self.regular_file_list[file_path] = None - else: - assert 0, "Get content from %s failed, exception: %s" % ( - file_path, repr(e)) - - def _unlink(self, file_path): - """ - Unlink given sys fs file, record its soft link target. - :param file_path: Sys fs file path. - :return: - """ - readlink_output = self.dut.command('readlink {}'.format(file_path)) - self.unlink_file_list[file_path] = readlink_output["stdout"] - self.dut.command('unlink {}'.format(file_path)) - self.dut.command('touch {}'.format(file_path)) - self.dut.command('chown admin {}'.format(file_path)) - - def deinit(self): - """ - Destructor of MockerHelper. Re-link all sys fs files. - :return: - """ - failed_recover_links = {} - for file_path, link_target in list(self.unlink_file_list.items()): - try: - self.dut.command( - 'ln -f -s {} {}'.format(link_target, file_path)) - except Exception: - # Catch any exception for later retry - failed_recover_links[file_path] = link_target - - failed_recover_files = {} - for file_path, value in list(self.regular_file_list.items()): - try: - if value is None: - self.dut.shell('rm -f {}'.format(file_path)) - else: - self.dut.shell('echo \'{}\' > {}'.format(value, file_path)) - except Exception: - # Catch any exception for later retry - failed_recover_files[file_path] = value - - self.unlink_file_list.clear() - self.regular_file_list.clear() - # If there is any failed recover files, retry it - if failed_recover_links or failed_recover_files: - self.deinit_retry -= 1 - if self.deinit_retry > 0: - self.unlink_file_list = failed_recover_links - self.regular_file_list = failed_recover_files - # The failed files might be used by other sonic daemons, delay 1 second - # here to avoid conflict - time.sleep(1) - self.deinit() - else: - # We don't want to retry it infinite, and 5 times retry - # is enough, so if it still fails after the retry, it - # means there is probably an issue with our sysfs, we need - # mark it fail here - failed_recover_files.update(failed_recover_links) - error_message = "Failed to recover all files, failed files: {}".format( - failed_recover_files) - logging.error(error_message) - raise RuntimeError(error_message) - - def is_201911(self): - """ - Workaround to make thermal control test cases compatible with 201911 and master - :return: - """ - if parse_version(self.dut.kernel_version) > parse_version('4.9.0'): - return False - else: - return True diff --git a/tests/common/helpers/mellanox_thermal_control_test_helper.py b/tests/common/helpers/mellanox_thermal_control_test_helper.py index 226371ee070..786ce5352c9 100644 --- a/tests/common/helpers/mellanox_thermal_control_test_helper.py +++ b/tests/common/helpers/mellanox_thermal_control_test_helper.py @@ -3,15 +3,14 @@ import re import random import logging +import time from pkg_resources import parse_version -from tests.common.helpers.sensor_control_test_helper import mocker -from tests.common.helpers.thermal_control_test_helper import FanStatusMocker, ThermalStatusMocker, \ +from tests.common.helpers.thermal_control_test_helper import mocker, FanStatusMocker, ThermalStatusMocker, \ SingleFanMocker from tests.common.mellanox_data import get_hw_management_version, get_platform_data from tests.common.helpers.minimum_table import get_min_table from tests.common.utilities import wait_until from tests.common.helpers.assertions import pytest_assert -from tests.common.helpers.mellanox_sensor_control_test_helper import MockerBaseHelper, SysfsNotExistError NOT_AVAILABLE = 'N/A' @@ -166,7 +165,14 @@ def patch_thermal_rule(mock_helper): SUSPEND_FILE_PATH = "/var/run/hw-management/config/suspend" -class MockerHelper(MockerBaseHelper): +class SysfsNotExistError(Exception): + """ + Exception when sys fs not exist. + """ + pass + + +class MockerHelper: """ Mellanox specified mocker helper. """ @@ -200,8 +206,9 @@ def __init__(self, dut): Constructor of mocker helper. :param dut: DUT object representing a SONiC switch under test. """ - super().__init__(dut) + self.dut = dut self._extract_num_of_fans_and_fan_drawers() + self.deinit_retry = 5 def _extract_num_of_fans_and_fan_drawers(self): """ @@ -254,6 +261,28 @@ def mock_led_value(self, file_path, value): file_path = os.path.join(MockerHelper.LED_PATH, file_path) self.mock_value(file_path, value) + def mock_value(self, file_path, value, force=False): + """ + Unlink existing sys fs file and replace it with a new one. Write given value to the new file. + :param file_path: Sys fs file path. + :param value: Value to write to sys fs file. + :param force: Force mock even if the file does not exist. + :return: + """ + if file_path not in self.regular_file_list and file_path not in self.unlink_file_list: + out = self.dut.stat(path=file_path) + exist = True + if not out['stat']['exists']: + if force: + exist = False + else: + raise SysfsNotExistError('{} not exist'.format(file_path)) + if exist and out['stat']['islnk']: + self._unlink(file_path) + else: + self._cache_file_value(file_path, force) + self.dut.shell('echo \'{}\' > {}'.format(value, file_path)) + def read_thermal_value(self, file_path): """ Read thermal related sys fs file content. @@ -272,6 +301,110 @@ def read_led_value(self, file_path): file_path = os.path.join(MockerHelper.LED_PATH, file_path) return self.read_value(file_path) + def read_value(self, file_path): + """ + Read sys fs file content. + :param file_path: Sys fs file path. + :return: Content of sys fs file. + """ + out = self.dut.stat(path=file_path) + if not out['stat']['exists']: + raise SysfsNotExistError('{} not exist'.format(file_path)) + try: + output = self.dut.command("cat %s" % file_path) + value = output["stdout"] + return value.strip() + except Exception as e: + assert 0, "Get content from %s failed, exception: %s" % ( + file_path, repr(e)) + + def _cache_file_value(self, file_path, may_nexist=False): + """ + Cache file value for regular file. + :param file_path: Regular file path. + :return: + """ + try: + output = self.dut.command("cat %s" % file_path) + value = output["stdout"] + self.regular_file_list[file_path] = value.strip() + except Exception as e: + if may_nexist: + self.regular_file_list[file_path] = None + else: + assert 0, "Get content from %s failed, exception: %s" % ( + file_path, repr(e)) + + def _unlink(self, file_path): + """ + Unlink given sys fs file, record its soft link target. + :param file_path: Sys fs file path. + :return: + """ + readlink_output = self.dut.command('readlink {}'.format(file_path)) + self.unlink_file_list[file_path] = readlink_output["stdout"] + self.dut.command('unlink {}'.format(file_path)) + self.dut.command('touch {}'.format(file_path)) + self.dut.command('chown admin {}'.format(file_path)) + + def deinit(self): + """ + Destructor of MockerHelper. Re-link all sys fs files. + :return: + """ + failed_recover_links = {} + for file_path, link_target in list(self.unlink_file_list.items()): + try: + self.dut.command( + 'ln -f -s {} {}'.format(link_target, file_path)) + except Exception: + # Catch any exception for later retry + failed_recover_links[file_path] = link_target + + failed_recover_files = {} + for file_path, value in list(self.regular_file_list.items()): + try: + if value is None: + self.dut.shell('rm -f {}'.format(file_path)) + else: + self.dut.shell('echo \'{}\' > {}'.format(value, file_path)) + except Exception: + # Catch any exception for later retry + failed_recover_files[file_path] = value + + self.unlink_file_list.clear() + self.regular_file_list.clear() + # If there is any failed recover files, retry it + if failed_recover_links or failed_recover_files: + self.deinit_retry -= 1 + if self.deinit_retry > 0: + self.unlink_file_list = failed_recover_links + self.regular_file_list = failed_recover_files + # The failed files might be used by other sonic daemons, delay 1 second + # here to avoid conflict + time.sleep(1) + self.deinit() + else: + # We don't want to retry it infinite, and 5 times retry + # is enough, so if it still fails after the retry, it + # means there is probably an issue with our sysfs, we need + # mark it fail here + failed_recover_files.update(failed_recover_links) + error_message = "Failed to recover all files, failed files: {}".format( + failed_recover_files) + logging.error(error_message) + raise RuntimeError(error_message) + + def is_201911(self): + """ + Workaround to make thermal control test cases compatible with 201911 and master + :return: + """ + if parse_version(self.dut.kernel_version) > parse_version('4.9.0'): + return False + else: + return True + def has_thermal_updater(self): cmd = 'python3 -c "from sonic_platform import thermal_updater"' out = self.dut.shell(cmd, module_ignore_errors=True) diff --git a/tests/common/helpers/platform_api/liquid_cooling_leakage.py b/tests/common/helpers/platform_api/liquid_cooling_leakage.py deleted file mode 100644 index ad4dbfc8404..00000000000 --- a/tests/common/helpers/platform_api/liquid_cooling_leakage.py +++ /dev/null @@ -1,56 +0,0 @@ -""" -This module provides an interface to remotely interact with the liquid cooling leakage -of the DUT via platform API -""" - -import json -import logging - -logger = logging.getLogger(__name__) - - -def liquid_cooling_leakage_api(conn, name, args=None): - if args is None: - args = [] - conn.request( - 'POST', - f'/platform/chassis/liquid_cooling/{name}', - json.dumps({'args': args})) - resp = conn.getresponse() - res = json.loads(resp.read())['res'] - logger.info(f'Executing liquid cooling leakage API: "{name}", arguments: "{args}", result: "{res}"') - return res - - -def single_liquid_cooling_leakage_api(conn, leak_sensor_id, name, args=None): - if args is None: - args = [] - conn.request( - 'POST', - f'/platform/chassis/liquid_cooling/leak_sensor/{leak_sensor_id}/{name}', - json.dumps({'args': args})) - resp = conn.getresponse() - res = json.loads(resp.read())['res'] - logger.info(f'Executing liquid cooling leakage API: "{name}", \ - leak_sensor_id: "{leak_sensor_id}", arguments: "{args}", result: "{res}"') - return res - - -def get_name(conn, leak_sensor_id): - return single_liquid_cooling_leakage_api(conn, leak_sensor_id, 'get_name') - - -def is_leak(conn, leak_sensor_id): - return single_liquid_cooling_leakage_api(conn, leak_sensor_id, 'is_leak') - - -def get_num_leak_sensors(conn): - return liquid_cooling_leakage_api(conn, 'get_num_leak_sensors') - - -def get_leak_sensor_status(conn): - return liquid_cooling_leakage_api(conn, 'get_leak_sensor_status') - - -def get_all_leak_sensors(conn): - return liquid_cooling_leakage_api(conn, 'get_all_leak_sensors') diff --git a/tests/common/helpers/sensor_control_test_helper.py b/tests/common/helpers/sensor_control_test_helper.py deleted file mode 100644 index 7e846c08fc5..00000000000 --- a/tests/common/helpers/sensor_control_test_helper.py +++ /dev/null @@ -1,114 +0,0 @@ -import pytest -from tests.common.reboot import reboot - - -class BaseMocker: - """ - @summary: Base class for sensor control data mocker - - This base class defines the basic interface to be provided by base mocker. Mockers implemented by each - vendor must be a subclass of this base class. - """ - # Mocker type dictionary. Vendor must register their concrete mocker class to this dictionary. - _mocker_type_dict = {} - - def __init__(self, dut): - """ - Constructor of a mocker. - :param dut: DUT object representing a SONiC switch under test. - """ - self.dut = dut - - def mock_data(self): - """ - Generate mock data. - :return: - """ - pass - - def check_result(self, actual_data): - """ - Check actual data with mocked data. - :param actual_data: A dictionary contains actual command line data. Key of the dictionary is the unique id - of a line of command line data. For 'show platform fan', the key is FAN name. Value - of the dictionary is a list of field values for a line. - :return: True if actual data match mocked data else False - """ - pass - - def deinit(self): - """ - Destructor. Vendor specific clean up work should do here. - :return: - """ - pass - - @classmethod - def register_mocker_type(cls, name, mocker_type): - """ - Register mocker type with its name. - :param name: Name of a mocker type. For example: FanStatusMocker. - :param mocker_type: Class of a mocker. - :return: - """ - cls._mocker_type_dict[name] = mocker_type - - @classmethod - def get_mocker_type(cls, name): - """ - Get mocker type by its name. - :param name: Name of a mocker type. For example: FanStatusMocker. - :return: Class of a mocker. - """ - return cls._mocker_type_dict[name] if name in cls._mocker_type_dict else None - - -def mocker(type_name): - """ - Decorator for register mocker type. - :param type_name: Name of a mocker type. - :return: - """ - def wrapper(object_type): - BaseMocker.register_mocker_type(type_name, object_type) - return object_type - return wrapper - - -@pytest.fixture -def mocker_factory(localhost, duthosts, enum_rand_one_per_hwsku_hostname): - """ - Fixture for thermal control data mocker factory. - :return: A function for creating thermal control related data mocker. - """ - mockers = [] - duthost = duthosts[enum_rand_one_per_hwsku_hostname] - - def _create_mocker(dut, mocker_name): - """ - Create vendor specified mocker object by mocker name. - :param dut: DUT object representing a SONiC switch under test. - :param mocker_name: Name of a mocker type. - :return: Created mocker instance. - """ - platform = dut.facts['platform'] - mocker_object = None - - if 'mlnx' in platform or 'nvidia' in platform: - mocker_type = BaseMocker.get_mocker_type(mocker_name) - if mocker_type: - mocker_object = mocker_type(dut) - mockers.append(mocker_object) - else: - pytest.skip("No mocker defined for this platform {}".format(platform)) - return mocker_object - - yield _create_mocker - - try: - for m in mockers: - m.deinit() - except Exception as e: - reboot(duthost, localhost) - assert 0, "Caught exception while recovering from mock - {}".format( - repr(e)) diff --git a/tests/common/helpers/thermal_control_test_helper.py b/tests/common/helpers/thermal_control_test_helper.py index a42e0d2e4b2..d747c1246d5 100644 --- a/tests/common/helpers/thermal_control_test_helper.py +++ b/tests/common/helpers/thermal_control_test_helper.py @@ -7,8 +7,8 @@ from tests.common.utilities import wait_until from tests.common.helpers.assertions import pytest_assert from tests.common.config_reload import config_reload +from tests.common.reboot import reboot from tests.common.devices.sonic import SonicHost -from tests.common.helpers.sensor_control_test_helper import BaseMocker DUT_THERMAL_POLICY_FILE = '/usr/share/sonic/device/{}/thermal_policy.json' DUT_THERMAL_POLICY_BACKUP_FILE = '/usr/share/sonic/device/{}/thermal_policy.json.bak' @@ -21,6 +21,118 @@ expected_stopped_status = "STOPPED" +class BaseMocker: + """ + @summary: Base class for thermal control data mocker + + This base class defines the basic interface to be provided by base mocker. Mockers implemented by each + vendor must be a subclass of this base class. + """ + # Mocker type dictionary. Vendor must register their concrete mocker class to this dictionary. + _mocker_type_dict = {} + + def __init__(self, dut): + """ + Constructor of a mocker. + :param dut: DUT object representing a SONiC switch under test. + """ + self.dut = dut + + def mock_data(self): + """ + Generate mock data. + :return: + """ + pass + + def check_result(self, actual_data): + """ + Check actual data with mocked data. + :param actual_data: A dictionary contains actual command line data. Key of the dictionary is the unique id + of a line of command line data. For 'show platform fan', the key is FAN name. Value + of the dictionary is a list of field values for a line. + :return: True if actual data match mocked data else False + """ + pass + + def deinit(self): + """ + Destructor. Vendor specific clean up work should do here. + :return: + """ + pass + + @classmethod + def register_mocker_type(cls, name, mocker_type): + """ + Register mocker type with its name. + :param name: Name of a mocker type. For example: FanStatusMocker. + :param mocker_type: Class of a mocker. + :return: + """ + cls._mocker_type_dict[name] = mocker_type + + @classmethod + def get_mocker_type(cls, name): + """ + Get mocker type by its name. + :param name: Name of a mocker type. For example: FanStatusMocker. + :return: Class of a mocker. + """ + return cls._mocker_type_dict[name] if name in cls._mocker_type_dict else None + + +def mocker(type_name): + """ + Decorator for register mocker type. + :param type_name: Name of a mocker type. + :return: + """ + def wrapper(object_type): + BaseMocker.register_mocker_type(type_name, object_type) + return object_type + return wrapper + + +@pytest.fixture +def mocker_factory(localhost, duthosts, enum_rand_one_per_hwsku_hostname): + """ + Fixture for thermal control data mocker factory. + :return: A function for creating thermal control related data mocker. + """ + mockers = [] + duthost = duthosts[enum_rand_one_per_hwsku_hostname] + + def _create_mocker(dut, mocker_name): + """ + Create vendor specified mocker object by mocker name. + :param dut: DUT object representing a SONiC switch under test. + :param mocker_name: Name of a mocker type. + :return: Created mocker instance. + """ + platform = dut.facts['platform'] + mocker_object = None + + if 'mlnx' in platform or 'nvidia' in platform: + mocker_type = BaseMocker.get_mocker_type(mocker_name) + if mocker_type: + mocker_object = mocker_type(dut) + mockers.append(mocker_object) + else: + pytest.skip("No mocker defined for this platform {}".format(platform)) + return mocker_object + + yield _create_mocker + + try: + for m in mockers: + m.deinit() + except Exception as e: + reboot(duthost, localhost) + assert 0, "Caught exception while recovering from mock - {}".format( + repr(e)) + + class FanStatusMocker(BaseMocker): """ Fan status mocker. Vendor should implement this class to provide a FAN mocker. diff --git a/tests/platform_tests/api/test_liquid_cooling_leakage.py b/tests/platform_tests/api/test_liquid_cooling_leakage.py deleted file mode 100644 index 9b4dd083f9b..00000000000 --- a/tests/platform_tests/api/test_liquid_cooling_leakage.py +++ /dev/null @@ -1,68 +0,0 @@ -import logging -import pytest - - -from tests.common.helpers.platform_api import liquid_cooling_leakage -from .platform_api_test_base import PlatformApiTestBase -from tests.common.platform.device_utils import platform_api_conn # noqa: F401 -from tests.common.platform.device_utils import start_platform_api_service # noqa: F401 -from tests.common.mellanox_data import get_platform_data - -logger = logging.getLogger(__name__) - -pytestmark = [ - pytest.mark.topology('any'), - pytest.mark.device_type('physical') -] - - -class TestLiquidCoolingLeakage(PlatformApiTestBase): - ''' Platform API test cases for the Liquid Cooling Leakage class''' - - leak_sensors_num = None - chassis_facts = None - - @pytest.fixture(scope="function", autouse=True) - def setup(self, duthosts, enum_rand_one_per_hwsku_hostname): # noqa: F811 - duthost = duthosts[enum_rand_one_per_hwsku_hostname] - platform_data = get_platform_data(duthost) - self.leak_sensors_num = platform_data['leak_sensors']['number'] - - def test_get_name(self, platform_api_conn): # noqa: F811 - for leak_sensor_id in range(0, self.leak_sensors_num): - name = liquid_cooling_leakage.get_name(platform_api_conn, leak_sensor_id) - if self.expect(name is not None, f"Unable to retrieve liquid cooling leakage {leak_sensor_id} name"): - self.expect( - isinstance(name, str), f"The value type of leakage{leak_sensor_id} is not str") - if name != f"leakage{leak_sensor_id + 1}": - self.expect(False, f"leakage{leak_sensor_id} name is incorrect, \ - expected: leakage{leak_sensor_id + 1}, actual: {name}") - self.assert_expectations() - - def test_is_leak(self, platform_api_conn): # noqa: F811 - for leak_sensor_id in range(0, self.leak_sensors_num): - is_leak = liquid_cooling_leakage.is_leak(platform_api_conn, leak_sensor_id) - if self.expect(is_leak is not None, f"Unable to retrieve liquid cooling leakage {leak_sensor_id} is leak"): - self.expect( - isinstance(is_leak, bool), f"The value type of leakage{leak_sensor_id} is not bool") - if is_leak: - self.expect(False, f"leakage{leak_sensor_id} is incorrect, \ - expected: False, actual: {is_leak}") - self.assert_expectations() - - def test_get_leak_sensor_status(self, platform_api_conn): # noqa: F811 - leak_sensor_status_list = liquid_cooling_leakage.get_leak_sensor_status(platform_api_conn) - if leak_sensor_status_list: - self.expect(False, f"There is a leak sensor with status {leak_sensor_status_list}") - self.assert_expectations() - - def test_get_num_leak_sensors(self, platform_api_conn): # noqa: F811 - api_leak_sensor_num = liquid_cooling_leakage.get_num_leak_sensors(platform_api_conn) - assert api_leak_sensor_num == self.leak_sensors_num, \ - f"Leak sensor number mismatch, expected: {self.leak_sensors_num}, actual: {api_leak_sensor_num}" - - def test_get_all_leak_sensors(self, platform_api_conn): # noqa: F811 - api_leak_sensor_list = liquid_cooling_leakage.get_all_leak_sensors(platform_api_conn) - logger.info(f"Leak sensor list: {api_leak_sensor_list}") - assert len(api_leak_sensor_list) == self.leak_sensors_num, \ - f"Leak sensor list length mismatch, expected: {self.leak_sensors_num}, actual: {len(api_leak_sensor_list)}" diff --git a/tests/platform_tests/mellanox/check_sysfs.py b/tests/platform_tests/mellanox/check_sysfs.py index 9c7a516bc56..4999695056c 100644 --- a/tests/platform_tests/mellanox/check_sysfs.py +++ b/tests/platform_tests/mellanox/check_sysfs.py @@ -179,10 +179,6 @@ def check_sysfs(dut): assert sfp_temp_crit < sfp_temp_emergency, "Wrong SFP critical temp or emergency temp, " \ "critical temp: {} emergency temp: {}".format( str(sfp_temp_crit), str(sfp_temp_emergency)) - - logging.info("Check liquid cooling leakage related sysfs") - check_liquid_cooling_leakage_sysfs(dut, sysfs_facts) - logging.info("Finish checking sysfs") @@ -267,8 +263,6 @@ def generate_sysfs_config(dut, platform_data): if platform_data['psus']['hot_swappable']: config.append(generate_sysfs_psu_config(dut, platform_data)) config.append(generate_sysfs_sfp_config(platform_data)) - if 'liquid_cooling_leakage' in platform_data and platform_data['leak_sensors']['number'] > 0: - config.append(generate_sysfs_leakage_config(platform_data)) return config @@ -446,38 +440,3 @@ def generate_sysfs_sfp_config(platform_data): } ] } - - -def generate_sysfs_leakage_config(platform_data): - return { - 'name': 'leakage_info', - 'start': 1, - 'count': platform_data['leak_sensors']['number'], - 'type': 'increment', - 'properties': [ - { - 'name': 'status', - 'cmd_pattern': 'cat /var/run/hw-management/system/leakage{}', - } - ] - } - - -def check_liquid_cooling_leakage_sysfs(dut, sysfs_facts): - """ - @summary: Check liquid cooling leakage related sysfs under /var/run/hw-management/system/leakage - """ - leak_sensors_num = sysfs_facts['leakage_info']['count'] if 'leakage_info' in sysfs_facts else 0 - if leak_sensors_num == 0: - logging.info("Skip checking leakage related sysfs because no liquid cooling leakage sensors found on device") - return - actual_leak_sensors_num = int(len( - dut.command("ls /var/run/hw-management/system/leakage* |wc -l")['stdout'])) - assert leak_sensors_num <= actual_leak_sensors_num, \ - f"liquid cooling leakage sensors number mismatch, \ - expected: {leak_sensors_num}, actual: {actual_leak_sensors_num}" - - logging.info("Check liquid cooling leakage should be ok") - for i in range(leak_sensors_num): - leak_sensor_status = sysfs_facts['leakage_info']['status'][i] - assert leak_sensor_status == "1", f"Leak sensor {i} is not leak. leak_sensor_status: {leak_sensor_status}" diff --git a/tests/platform_tests/mellanox/test_psu_power_threshold.py b/tests/platform_tests/mellanox/test_psu_power_threshold.py index f81bbfd782a..611e35dc92b 100644 --- a/tests/platform_tests/mellanox/test_psu_power_threshold.py +++ b/tests/platform_tests/mellanox/test_psu_power_threshold.py @@ -6,7 +6,7 @@ from tests.common.helpers.assertions import pytest_assert from tests.common.mellanox_data import get_platform_data from tests.common.utilities import wait_until # noqa: F401 -from tests.common.helpers.sensor_control_test_helper import mocker_factory # noqa: F401 +from tests.common.helpers.thermal_control_test_helper import mocker_factory # noqa: F401 from tests.common.helpers.mellanox_thermal_control_test_helper import MockerHelper, PsuPowerThresholdMocker # noqa:F401 pytestmark = [ diff --git a/tests/platform_tests/mellanox/test_reboot_cause.py b/tests/platform_tests/mellanox/test_reboot_cause.py index 5a69ff9d9ab..04c8701cbb4 100644 --- a/tests/platform_tests/mellanox/test_reboot_cause.py +++ b/tests/platform_tests/mellanox/test_reboot_cause.py @@ -2,7 +2,7 @@ import logging import pytest from tests.common.reboot import REBOOT_TYPE_BIOS, REBOOT_TYPE_ASIC, check_reboot_cause -from tests.common.helpers.sensor_control_test_helper import mocker_factory # noqa: F401 +from tests.common.helpers.thermal_control_test_helper import mocker_factory # noqa: F401 pytestmark = [ pytest.mark.asic('mellanox'), diff --git a/tests/platform_tests/test_liquid_cooling_leakage_detection.py b/tests/platform_tests/test_liquid_cooling_leakage_detection.py deleted file mode 100644 index b9a9a21f1ee..00000000000 --- a/tests/platform_tests/test_liquid_cooling_leakage_detection.py +++ /dev/null @@ -1,119 +0,0 @@ -import pytest -import logging -import time -from tests.common.helpers.assertions import pytest_require -from tests.common.helpers.sensor_control_test_helper import mocker_factory # noqa: F401 -from tests.common.helpers.liquid_leakage_control_test_helper import verify_leakage_status, \ - verify_leakage_status_in_health_system, get_liquid_cooling_update_interval, is_liquid_cooling_system_supported, \ - startmonitor_gnmi_event, verify_gnmi_msg_is_sent, setup_gnmi_server, WAIT_GNMI_EVENT_TIMEOUT # noqa: F401 -from tests.common.mellanox_data import get_platform_data -from tests.common.helpers.mellanox_liquid_leakage_control_test_helper import MlxLiquidLeakageMocker # noqa: F401 -from tests.common.plugins.loganalyzer.loganalyzer import LogAnalyzer, get_bughandler_instance -from concurrent.futures import ThreadPoolExecutor - - -pytestmark = [ - pytest.mark.topology('any'), - pytest.mark.device_type('physical') -] - -logger = logging.getLogger(__name__) - - -@pytest.fixture(scope="module", autouse=True) -def skip_when_no_liquid_cooling_system(duthosts, enum_rand_one_per_hwsku_hostname): - duthost = duthosts[enum_rand_one_per_hwsku_hostname] - if not is_liquid_cooling_system_supported(duthost): - pytest.skip("No liquid cooling leakage sensors found on device") - - -def test_verify_liquid_senors_number_and_status(duthosts, enum_rand_one_per_hwsku_hostname): - """ - Verify the liquid sensors number and status. - """ - duthost = duthosts[enum_rand_one_per_hwsku_hostname] - platform_data = get_platform_data(duthost) - leak_sensors_num = platform_data['leak_sensors']['number'] - actual_leak_sensors_num = int(duthost.shell("ls /var/run/hw-management/system/leakage* |wc -l")['stdout']) - assert leak_sensors_num <= actual_leak_sensors_num, \ - f"liquid cooling leakage sensors number mismatch, \ - expected: {leak_sensors_num}, actual: {actual_leak_sensors_num}" - leak_sensor_index_list = list(range(1, leak_sensors_num + 1)) - verify_leakage_status(duthost, leak_sensor_index_list, 'No') - verify_leakage_status_in_health_system(duthost, leak_sensor_index_list, "OK") - - return 0 - - -@pytest.mark.disable_loganalyzer -def test_mock_liquid_leak_event( - duthosts, enum_rand_one_per_hwsku_hostname, mocker_factory, ptfhost, setup_gnmi_server): # noqa: F811 - """ - 1. Mock liquid leak event and verify the dut has the correct response. - 2. Mock liquid leak event is fixed and verify the dut has the correct response. - """ - duthost = duthosts[enum_rand_one_per_hwsku_hostname] - - logging.info("Start to mock liquid leak event.") - mocker = mocker_factory(duthost, 'LiquidLeakageMocker') - pytest_require(mocker, "No LiquidLeakageMocker for %s, skip rest of the testing in this case" % - duthost.facts['asic_type']) - - logging.info("mock liquid leak event") - loganalyzer = LogAnalyzer(ansible_host=duthost, - marker_prefix="test_mock_liquid_leak_event_mock_leak", - bughandler=get_bughandler_instance({"type": "default"})) - marker = loganalyzer.init() - loganalyzer.match_regex = [] - - with ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(startmonitor_gnmi_event, duthost, ptfhost) - - logging.info('Mock liquid leakage event.') - mocker.mock_leakage() - logging.info('Wait and check actual data with mocked liquid leakage data...') - liquid_cooling_update_interval = get_liquid_cooling_update_interval(duthost) - time.sleep(liquid_cooling_update_interval) - mocker.verify_leakage() - loganalyzer.expect_regex = [] - expected_log_messages = [] - for index in mocker.test_leakage_index_list: - expected_log_messages.append(f".*Liquid cooling leakage sensor leakage{index} reported leaking.*") - loganalyzer.expect_regex.extend(expected_log_messages) - - loganalyzer.analyze(marker) - try: - logging.info("Wait for gnmi event result...") - result = future.result(timeout=WAIT_GNMI_EVENT_TIMEOUT) - verify_gnmi_msg_is_sent(mocker.test_leakage_index_list, result, "leaking") - except Exception as e: - logging.error(f"gNMI monitoring thread failed: {e}") - raise Exception(f"gNMI monitoring thread failed for mocking liquid leak event: {e}") - - logging.info("Mock liquid leak event is fixed.") - marker = loganalyzer.update_marker_prefix("test_mock_liquid_leak_event_mock_no_leak") - loganalyzer.match_regex = [] - - with ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(startmonitor_gnmi_event, duthost, ptfhost) - - logging.info('Mock liquid leak event is fixed.') - mocker.mock_no_leakage() - logging.info('Wait and check actual data with mocked liquid leakage data...') - time.sleep(liquid_cooling_update_interval) - mocker.verify_no_leakage() - loganalyzer.match_regex = [] - expected_log_messages = [] - loganalyzer.expect_regex = [] - for index in mocker.test_leakage_index_list: - expected_log_messages.append(f".*Liquid cooling leakage sensor leakage{index} recovered from leaking.*") - loganalyzer.expect_regex.extend(expected_log_messages) - - loganalyzer.analyze(marker) - try: - logging.info("Wait for gnmi event result...") - result = future.result(timeout=WAIT_GNMI_EVENT_TIMEOUT) - verify_gnmi_msg_is_sent(mocker.test_leakage_index_list, result, "recovered") - except Exception as e: - logging.error(f"gNMI monitoring thread failed: {e}") - raise Exception(f"gNMI monitoring thread failed for mocking liquid leak event is fixed: {e}") diff --git a/tests/platform_tests/test_platform_info.py b/tests/platform_tests/test_platform_info.py index e24e8dd30c9..bdadf5a3cfc 100644 --- a/tests/platform_tests/test_platform_info.py +++ b/tests/platform_tests/test_platform_info.py @@ -16,10 +16,9 @@ from tests.common.utilities import wait_until, get_sup_node_or_random_node from tests.common.platform.device_utils import get_dut_psu_line_pattern from tests.platform_tests.cli.util import get_skip_mod_list -from tests.common.helpers.sensor_control_test_helper import mocker_factory # noqa: F401 from tests.common.helpers.thermal_control_test_helper import ThermalPolicyFileContext,\ check_cli_output_with_mocker, restart_thermal_control_daemon, check_thermal_algorithm_status, \ - disable_thermal_policy # noqa: F401 + mocker_factory, disable_thermal_policy # noqa: F401 pytestmark = [ pytest.mark.topology('any'), diff --git a/tests/snmp/test_snmp_phy_entity.py b/tests/snmp/test_snmp_phy_entity.py index 053ae0b3876..5eabc7cca0e 100644 --- a/tests/snmp/test_snmp_phy_entity.py +++ b/tests/snmp/test_snmp_phy_entity.py @@ -10,7 +10,7 @@ from tests.common.helpers.snmp_helpers import get_snmp_facts from tests.common.helpers.assertions import pytest_assert from tests.common.helpers.psu_helpers import turn_on_all_outlets, check_outlet_status, get_grouped_pdus_by_psu -from tests.common.helpers.sensor_control_test_helper import mocker_factory # noqa: F401 +from tests.common.helpers.thermal_control_test_helper import mocker_factory # noqa F401 pytestmark = [ pytest.mark.topology('any'),