diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/smartswitch_thermal_updater.py b/platform/mellanox/mlnx-platform-api/sonic_platform/smartswitch_thermal_updater.py new file mode 100644 index 00000000000..da2253e77c3 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/smartswitch_thermal_updater.py @@ -0,0 +1,152 @@ +# +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from . import utils +from .thermal_updater import ThermalUpdater +from sonic_py_common import logger + +import sys + +sys.path.append('/run/hw-management/bin') + +try: + import hw_management_dpu_thermal_update +except ImportError: + # For unit test and for non-smartswitch systems, these functions should not be called + from unittest import mock + hw_management_dpu_thermal_update = mock.MagicMock() + hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_set = mock.MagicMock() + hw_management_dpu_thermal_update.thermal_data_dpu_ddr_set = mock.MagicMock() + hw_management_dpu_thermal_update.thermal_data_dpu_drive_set = mock.MagicMock() + hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_clear = mock.MagicMock() + hw_management_dpu_thermal_update.thermal_data_dpu_ddr_clear = mock.MagicMock() + hw_management_dpu_thermal_update.thermal_data_dpu_drive_clear = mock.MagicMock() + +CRIT_THRESH = "critical_high_threshold" +HIGH_THRESH = "high_threshold" +TEMPERATURE_DATA = "temperature" +DPU_STATUS_OFFLINE = "Offline" +DPU_STATUS_ONLINE = "Online" +CPU_FIELD = "CPU" +NVME_FIELD = "NVME" +DDR_FIELD = "DDR" +dpu_func_dict = { + CPU_FIELD: hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_set, + NVME_FIELD: hw_management_dpu_thermal_update.thermal_data_dpu_drive_set, + DDR_FIELD: hw_management_dpu_thermal_update.thermal_data_dpu_ddr_set, + } + +ERROR_READ_THERMAL_DATA = 254000 + +TC_CONFIG_FILE = '/run/hw-management/config/tc_config.json' +logger = logger.Logger('smart-switch-thermal-updater') + + +class SmartswitchThermalUpdater(ThermalUpdater): + def __init__(self, sfp_list, dpu_list, is_host_mgmt_mode=True): + super().__init__(sfp_list=sfp_list) + self._dpu_list = dpu_list + self._dpu_status = {} + self.host_mgmt_mode = is_host_mgmt_mode + + def load_tc_config_dpu(self): + dpu_poll_interval = 3 + data = utils.load_json_file(TC_CONFIG_FILE, log_func=None) + if data: + dev_parameters = data.get('dev_parameters', {}) + dpu_parameter = dev_parameters.get('dpu\\d+_module', {}) + dpu_poll_interval_config = dpu_parameter.get('poll_time') + dpu_poll_interval = int(dpu_poll_interval_config) / 2 if dpu_poll_interval_config else dpu_poll_interval + else: + logger.log_notice(f'{TC_CONFIG_FILE} does not exist, use default polling interval') + logger.log_notice(f'DPU polling interval: {dpu_poll_interval}') + self._timer.schedule(dpu_poll_interval, self.update_dpu) + + def start(self): + self.clean_thermal_data_dpu() + self.load_tc_config_dpu() + if self.host_mgmt_mode: + super().start() + else: + self._timer.start() + + def stop(self): + if self.host_mgmt_mode: + super().stop() + else: + self._timer.stop() + + def clean_thermal_data_dpu(self): + for dpu in self._dpu_list: + self.thermal_data_dpu_clear(dpu.get_hw_mgmt_id()) + + def thermal_data_dpu_clear(self, dpu_index): + hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_clear(dpu_index) + hw_management_dpu_thermal_update.thermal_data_dpu_ddr_clear(dpu_index) + hw_management_dpu_thermal_update.thermal_data_dpu_drive_clear(dpu_index) + + def get_dpu_temperature_data_from_dict_obj(self, dpu_component_temperature_data, field_name): + value = dpu_component_temperature_data.get(field_name) + fault_state = False + if not value: + fault_state = True + return 0, fault_state + try: + int_value = int(float(value)) + except ValueError: + logger.log_error(f"Unable to obtain temperature data for DPU {field_name}: {value}") + int_value = 0 + fault_state = True + return int_value, fault_state + + def get_dpu_component_temperature_data(self, dpu_temperature_data, component_name): + dpu_component_temperature_data = dpu_temperature_data.get(component_name, {}) + output_dict = {} + output_false_state = False + for value in [TEMPERATURE_DATA, HIGH_THRESH, CRIT_THRESH]: + output_dict[value], fault_state = self.get_dpu_temperature_data_from_dict_obj(dpu_component_temperature_data, value) + output_false_state = output_false_state or fault_state + return output_dict[TEMPERATURE_DATA], output_dict[HIGH_THRESH], output_dict[CRIT_THRESH], ERROR_READ_THERMAL_DATA if output_false_state else 0 + + def update_dpu_temperature(self, dpu, fault_state=False): + dpu_temperature_data = dpu.get_temperature_dict() if not fault_state else {} + for key, func in dpu_func_dict.items(): + temp_data, temp_thresh, temp_crit_thresh, fault_val = self.get_dpu_component_temperature_data(dpu_temperature_data, key) + return_val = func(dpu.get_hw_mgmt_id(), temp_data, temp_thresh, temp_crit_thresh, fault_val) + if not return_val: + logger.log_error(f"Unable to update Temperature data to hw-mgmt for {key} for {dpu.get_name()}") + + def update_single_dpu(self, dpu): + try: + dpu_oper_status = dpu.get_oper_status() + pre_oper_status = self._dpu_status.get(dpu.get_name()) + if dpu_oper_status == DPU_STATUS_ONLINE: + self.update_dpu_temperature(dpu) + elif pre_oper_status != dpu_oper_status: + # If dpu is shutdown from previous execution + self.thermal_data_dpu_clear(dpu.get_hw_mgmt_id()) + if pre_oper_status != dpu_oper_status: + # If there is a change in oper_status (irrespective of type of change) + self._dpu_status[dpu.get_name()] = dpu_oper_status + except Exception as e: + logger.log_error(f'Failed to update DPU {dpu.get_hw_mgmt_id()} thermal data - {e}') + self.update_dpu_temperature(dpu, fault_state=True) + + def update_dpu(self): + for dpu in self._dpu_list: + self.update_single_dpu(dpu) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py index 3512a0cf52e..813c5e8eefb 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py @@ -15,7 +15,8 @@ # limitations under the License. # from sonic_platform_base.sonic_thermal_control.thermal_manager_base import ThermalManagerBase -from . import thermal_updater +from . import thermal_updater +from . import smartswitch_thermal_updater from .device_data import DeviceDataManager @@ -33,12 +34,21 @@ def initialize(cls): and any other vendor specific initialization. :return: """ - if DeviceDataManager.is_module_host_management_mode(): + dpus_present = DeviceDataManager.get_platform_dpus_data() + host_mgmt_mode = DeviceDataManager.is_module_host_management_mode() + if not dpus_present and host_mgmt_mode: + # Non smart switch behaviour has highest priority from .chassis import Chassis - cls.thermal_updater_task = thermal_updater.ThermalUpdater(Chassis.chassis_instance.get_all_sfps()) + cls.thermal_updater_task = thermal_updater.ThermalUpdater(sfp_list=Chassis.chassis_instance.get_all_sfps()) + elif dpus_present: + from .chassis import Chassis + dpus = Chassis.chassis_instance.get_all_modules() + cls.thermal_updater_task = smartswitch_thermal_updater.SmartswitchThermalUpdater(sfp_list=Chassis.chassis_instance.get_all_sfps(), + dpu_list=dpus, + is_host_mgmt_mode=host_mgmt_mode) + if cls.thermal_updater_task: cls.thermal_updater_task.start() - @classmethod def deinitialize(cls): """ @@ -46,5 +56,5 @@ def deinitialize(cls): is a no-op. :return: """ - if DeviceDataManager.is_module_host_management_mode() and cls.thermal_updater_task: + if cls.thermal_updater_task: cls.thermal_updater_task.stop() diff --git a/platform/mellanox/mlnx-platform-api/tests/test_smartswsitch_thermal_updater.py b/platform/mellanox/mlnx-platform-api/tests/test_smartswsitch_thermal_updater.py new file mode 100644 index 00000000000..ce87afa6ddd --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/test_smartswsitch_thermal_updater.py @@ -0,0 +1,220 @@ +# +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from unittest import mock +import copy + +from sonic_platform.thermal_updater import hw_management_independent_mode_update +from sonic_platform.smartswitch_thermal_updater import SmartswitchThermalUpdater, hw_management_dpu_thermal_update +from sonic_platform.thermal_updater import ERROR_READ_THERMAL_DATA + + +mock_tc_config = """ +{ + "dev_parameters": { + "asic": { + "pwm_min": 20, + "pwm_max": 100, + "val_min": "!70000", + "val_max": "!105000", + "poll_time": 3 + }, + "module\\\\d+": { + "pwm_min": 20, + "pwm_max": 100, + "val_min": 60000, + "val_max": 80000, + "poll_time": 20 + }, + "dpu\\\\d+_module": { + "child_sensors_list": ["cx_amb", "voltmon1", "voltmon2"], + "poll_time": 24 + } + } +} +""" + + +class TestSmartSwitchThermalUpdater: + @mock.patch('sonic_platform.utils.write_file') + def test_configuration(self, mock_write): + dpu = mock.MagicMock() + mock_sfp = mock.MagicMock() + mock_sfp.sdk_index = 1 + self.reset_hw_mgmt_mocks() + mock_os_open = mock.mock_open(read_data=mock_tc_config) + updater = SmartswitchThermalUpdater([mock_sfp], dpu_list=[dpu]) + """ Expectation on start - Clean is called for sfp, asic, DPU + suspend -> 1 and load config for all 3 along with start of timer""" + updater._timer = mock.MagicMock() + mock_os_open = mock.mock_open(read_data=mock_tc_config) + with mock.patch('sonic_platform.utils.open', mock_os_open): + updater.start() + hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_clear.assert_called_once_with(dpu.get_hw_mgmt_id()) + hw_management_dpu_thermal_update.thermal_data_dpu_ddr_clear.assert_called_once_with(dpu.get_hw_mgmt_id()) + hw_management_dpu_thermal_update.thermal_data_dpu_drive_clear.assert_called_once_with(dpu.get_hw_mgmt_id()) + hw_management_independent_mode_update.thermal_data_clean_asic.assert_called_once() + hw_management_independent_mode_update.thermal_data_clean_module.assert_called_once() + mock_write.assert_called_once_with('/run/hw-management/config/suspend', 0) + assert updater._timer.schedule.call_count == 3 + # Called for DPU with time 24/2 = 12 + assert updater._timer.schedule.call_args_list[0][0][0] == 12 + # Expectation on stop - timer stop and suspend = 1 + mock_write.reset_mock() + updater.stop() + updater._timer.stop.assert_called_once() + mock_write.assert_called_once_with('/run/hw-management/config/suspend', 1) + mock_write.reset_mock() + self.reset_hw_mgmt_mocks() + updater = SmartswitchThermalUpdater(None, dpu_list=[dpu], is_host_mgmt_mode=False) + """ Expectation on start - Clean is called for DPU + load config for DPU along with start of timer""" + updater._timer = mock.MagicMock() + updater.start() + mock_write.assert_not_called() + hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_clear.assert_called_once_with(dpu.get_hw_mgmt_id()) + hw_management_dpu_thermal_update.thermal_data_dpu_ddr_clear.assert_called_once_with(dpu.get_hw_mgmt_id()) + hw_management_dpu_thermal_update.thermal_data_dpu_drive_clear.assert_called_once_with(dpu.get_hw_mgmt_id()) + hw_management_independent_mode_update.thermal_data_clean_asic.assert_not_called() + hw_management_independent_mode_update.thermal_data_clean_module.assert_not_called() + # Expectation on stop - timer stop + updater.stop() + updater._timer.stop.assert_called_once() + mock_write.assert_not_called() + + def test_update_dpu(self): + self.reset_hw_mgmt_mocks() + mock_dpu = mock.MagicMock() + mock_dpu.get_hw_mgmt_id = mock.MagicMock(return_value=1) + mock_dpu.get_name = mock.MagicMock(return_value="DPU0") + mock_dpu.get_oper_status = mock.MagicMock(return_value="Online") + temp_data = { + "DDR": {'temperature': '75.0', 'high_threshold': '95', 'critical_high_threshold': '100'}, + "CPU": {'temperature': '82.0', 'high_threshold': '90', 'critical_high_threshold': '100'}, + "NVME": {'temperature': '91', 'high_threshold': '95', 'critical_high_threshold': '98'} + } + mock_dpu.get_temperature_dict = mock.MagicMock(return_value=temp_data) + print(f"{mock_dpu.get_temperature_dict()}") + updater = SmartswitchThermalUpdater(sfp_list=None, dpu_list=[mock_dpu], is_host_mgmt_mode=False) + updater.update_dpu() + hw_management_dpu_thermal_update.thermal_data_dpu_ddr_set.assert_called_once_with(1, 75, 95, 100, 0) + hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_set.assert_called_once_with(1, 82, 90, 100, 0) + hw_management_dpu_thermal_update.thermal_data_dpu_drive_set.assert_called_once_with(1, 91, 95, 98, 0) + mock_dpu.get_temperature_dict = mock.MagicMock(return_value={}) + self.reset_hw_mgmt_mocks() + updater.update_dpu() + hw_management_dpu_thermal_update.thermal_data_dpu_ddr_set.assert_called_once_with(1, 0, 0, 0, ERROR_READ_THERMAL_DATA) + hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_set.assert_called_once_with(1, 0, 0, 0, ERROR_READ_THERMAL_DATA) + hw_management_dpu_thermal_update.thermal_data_dpu_drive_set.assert_called_once_with(1, 0, 0, 0, ERROR_READ_THERMAL_DATA) + func_dict = { + "DDR": hw_management_dpu_thermal_update.thermal_data_dpu_ddr_set, + "CPU": hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_set, + "NVME": hw_management_dpu_thermal_update.thermal_data_dpu_drive_set, + } + for value in ["DDR", "CPU", "NVME"]: + temp_data_without_entry = copy.deepcopy(temp_data) + # One of the values in DDR, CPU and NVME is set to empty + temp_data_without_entry[value] = {} + mock_dpu.get_temperature_dict = mock.MagicMock(return_value=temp_data_without_entry) + self.reset_hw_mgmt_mocks() + updater.update_dpu() + for key, func in func_dict.items(): + if key == value: + func.assert_called_once_with(1, 0, 0, 0, ERROR_READ_THERMAL_DATA) + else: + func.assert_called_once_with( + 1, + int(float(temp_data[key]['temperature'])), + int(float(temp_data[key]['high_threshold'])), + int(float(temp_data[key]['critical_high_threshold'])), + 0) + # One of the values in DDR, CPU and NVME is set to a string, can not convert to integer + for field in ["temperature", "high_threshold", "critical_high_threshold"]: + temp_data_invalid = copy.deepcopy(temp_data) + temp_data_orig = copy.deepcopy(temp_data) + temp_data_invalid[value][field] = "N/A" + mock_dpu.get_temperature_dict = mock.MagicMock(return_value=temp_data_invalid) + self.reset_hw_mgmt_mocks() + updater.update_dpu() + for key, func in func_dict.items(): + temp_data_orig[value][field] = 0 + func.assert_called_once_with( + 1, + int(float(temp_data_orig[key]['temperature'])), + int(float(temp_data_orig[key]['high_threshold'])), + int(float(temp_data_orig[key]['critical_high_threshold'])), + ERROR_READ_THERMAL_DATA if value == key else 0) + self.reset_hw_mgmt_mocks() + mock_dpu.get_oper_status = mock.MagicMock(return_value="Offline") + updater.update_dpu() + hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_clear.assert_called_once_with(1) + hw_management_dpu_thermal_update.thermal_data_dpu_ddr_clear.assert_called_once_with(1) + hw_management_dpu_thermal_update.thermal_data_dpu_drive_clear.assert_called_once_with(1) + hw_management_dpu_thermal_update.thermal_data_dpu_ddr_set.assert_not_called() + hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_set.assert_not_called() + hw_management_dpu_thermal_update.thermal_data_dpu_drive_set.assert_not_called() + # Clear is called only once + updater.update_dpu() + updater.update_dpu() + hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_clear.assert_called_once_with(1) + hw_management_dpu_thermal_update.thermal_data_dpu_ddr_clear.assert_called_once_with(1) + hw_management_dpu_thermal_update.thermal_data_dpu_drive_clear.assert_called_once_with(1) + self.reset_hw_mgmt_mocks() + mock_dpu.get_oper_status = mock.MagicMock(return_value="Online") + mock_dpu.get_temperature_dict = mock.MagicMock(return_value=temp_data) + updater.update_dpu() + hw_management_dpu_thermal_update.thermal_data_dpu_ddr_set.assert_called_once_with(1, 75, 95, 100, 0) + hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_set.assert_called_once_with(1, 82, 90, 100, 0) + hw_management_dpu_thermal_update.thermal_data_dpu_drive_set.assert_called_once_with(1, 91, 95, 98, 0) + # Multiple dpus + mock_dpu1 = mock.MagicMock() + mock_dpu1.get_hw_mgmt_id = mock.MagicMock(return_value=2) + mock_dpu1.get_name = mock.MagicMock(return_value="DPU1") + mock_dpu1.get_oper_status = mock.MagicMock(return_value="Online") + temp_data_1 = copy.deepcopy(temp_data) + temp_data_1["DDR"]["temperature"] = "52.0" + temp_data_1["CPU"]["temperature"] = "20.0" + temp_data_1["NVME"]["temperature"] = "100.0" + mock_dpu1.get_temperature_dict = mock.MagicMock(return_value=temp_data_1) + updater = SmartswitchThermalUpdater(sfp_list=None, dpu_list=[mock_dpu, mock_dpu1], is_host_mgmt_mode=False) + self.reset_hw_mgmt_mocks() + updater.update_dpu() + assert hw_management_dpu_thermal_update.thermal_data_dpu_ddr_set.call_count == 2 + assert hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_set.call_count == 2 + assert hw_management_dpu_thermal_update.thermal_data_dpu_drive_set.call_count == 2 + assert hw_management_dpu_thermal_update.thermal_data_dpu_ddr_set.call_args_list \ + == [mock.call(1, 75, 95, 100, 0), mock.call(2, 52, 95, 100, 0)] + assert hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_set.call_args_list \ + == [mock.call(1, 82, 90, 100, 0), mock.call(2, 20, 90, 100, 0)] + assert hw_management_dpu_thermal_update.thermal_data_dpu_drive_set.call_args_list \ + == [mock.call(1, 91, 95, 98, 0), mock.call(2, 100, 95, 98, 0)] + + def reset_hw_mgmt_mocks(self): + hw_management_independent_mode_update.reset_mock() + hw_management_independent_mode_update.thermal_data_clean_module.reset_mock() + hw_management_independent_mode_update.thermal_data_clean_asic.reset_mock() + hw_management_independent_mode_update.module_data_set_module_counter.reset_mock() + hw_management_independent_mode_update.thermal_data_set_asic.reset_mock() + hw_management_independent_mode_update.thermal_data_set_module.reset_mock() + hw_management_dpu_thermal_update.reset_mock() + hw_management_dpu_thermal_update.thermal_data_clean_module.reset_mock() + hw_management_dpu_thermal_update.thermal_data_dpu_drive_clear.reset_mock() + hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_clear.reset_mock() + hw_management_dpu_thermal_update.thermal_data_dpu_ddr_set.reset_mock() + hw_management_dpu_thermal_update.thermal_data_dpu_cpu_core_set.reset_mock() + hw_management_dpu_thermal_update.thermal_data_dpu_drive_set.reset_mock() diff --git a/platform/mellanox/mlnx-platform-api/tests/test_thermal_manager.py b/platform/mellanox/mlnx-platform-api/tests/test_thermal_manager.py new file mode 100644 index 00000000000..2f39b1cd1a8 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/tests/test_thermal_manager.py @@ -0,0 +1,67 @@ +# +# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from unittest import mock +from sonic_platform.thermal_manager import ThermalManager + + +class TestThermalManager: + + @mock.patch('sonic_platform.chassis.Chassis.chassis_instance', new_callable=mock.MagicMock) + @mock.patch('sonic_platform.device_data.DeviceDataManager.is_module_host_management_mode') + @mock.patch('sonic_platform.device_data.DeviceDataManager.get_platform_dpus_data') + def test_updater_init(self, mock_dpus_data, mock_management_mode, mock_chassis_instance): + mock_dpus_data.return_value = {} + mock_management_mode.return_value = True + sfp_mock = mock.MagicMock() + mod_mock = mock.MagicMock() + mock_chassis_instance.get_all_sfps = sfp_mock + mock_chassis_instance.get_all_modules = mod_mock + sfp_mock.return_value = ['sfp1', 'sfp2'] + mod_mock.return_value = ['dpu1', 'dpu2'] + + with mock.patch('sonic_platform.thermal_updater.ThermalUpdater') as mock_thermal, \ + mock.patch('sonic_platform.smartswitch_thermal_updater.SmartswitchThermalUpdater') as mock_sm_thermal: + # Host mgmt mode, no DPUs are used for init + mgr = ThermalManager() + mgr.initialize() + mock_thermal.assert_called_once_with(sfp_list=['sfp1', 'sfp2']) + mgr.deinitialize() + mgr.thermal_updater_task.stop.assert_called_once() + # Not initialized if no DPUs and not in host mgmt mode + mock_management_mode.return_value = False + mock_thermal.reset_mock() + mgr.initialize() + mock_thermal.assert_not_called() + mgr.deinitialize() + mgr.thermal_updater_task.stop.assert_called_once() + # Initialized with DPUs if DPUs are present + mock_dpus_data.return_value = {'DPUS': 'dpu1'} + mock_thermal.reset_mock() + mgr.initialize() + mock_sm_thermal.assert_called_once_with(sfp_list=['sfp1', 'sfp2'], dpu_list=['dpu1', 'dpu2'], is_host_mgmt_mode=False) + mgr.deinitialize() + mgr.thermal_updater_task.stop.assert_called_once() + # Host mgmt mode, with DPUS + mock_thermal.reset_mock() + mock_sm_thermal.reset_mock() + mock_management_mode.return_value = True + mgr.initialize() + mock_sm_thermal.assert_called_once_with(sfp_list=['sfp1', 'sfp2'], dpu_list=['dpu1', 'dpu2'], is_host_mgmt_mode=True) + mgr.deinitialize() + mgr.thermal_updater_task.stop.assert_called_once()