diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py index 950de0e681..49324bef49 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py @@ -439,6 +439,7 @@ def __init__(self, sfp_index, sfp_type=None, slot_id=0, linecard_port_count=0, l self.sn = None self.temp_high_threshold = None self.temp_critical_threshold = None + self.retry_read_threshold = 5 self.retry_read_vendor = 5 self.manufacturer = None self.part_number = None @@ -890,8 +891,10 @@ def reinit_if_sn_changed(self): self.temp_critical_threshold = None self.sn = self._get_serial() if self.sn is not None: + self.retry_read_threshold = 5 self.retry_read_vendor = 5 else: + self.retry_read_threshold = 0 self.retry_read_vendor = 0 return True return False @@ -965,7 +968,7 @@ def get_temperature_info(self): if not sw_control: return sw_control, None, None, None - sn_changed = self.reinit_if_sn_changed() + self.reinit_if_sn_changed() # software control, read from EEPROM temperature = super().get_temperature() if temperature is None: @@ -975,27 +978,44 @@ def get_temperature_info(self): # Temperature is not supported, no need read threshold return sw_control, 0.0, 0.0, 0.0 else: - if not sn_changed and self.temp_high_threshold is not None and self.temp_critical_threshold is not None: - return sw_control, temperature, self.temp_high_threshold, self.temp_critical_threshold - else: - # Read threshold from EEPROM - api = self.get_xcvr_api() - thresh_support = api.get_transceiver_thresholds_support() - if thresh_support is None: - # Failed to read threshold support field, no need read threshold - return sw_control, temperature, None, None - if thresh_support: - # Read threshold from EEPROM - self.temp_high_threshold = api.xcvr_eeprom.read(consts.TEMP_HIGH_WARNING_FIELD) - self.temp_critical_threshold = api.xcvr_eeprom.read(consts.TEMP_HIGH_ALARM_FIELD) - return sw_control, temperature, self.temp_high_threshold, self.temp_critical_threshold - else: - # No threshold support, use default threshold - return sw_control, temperature, 0.0, 0.0 + self._update_temperature_threshold(sw_control) + return sw_control, temperature, self.temp_high_threshold, self.temp_critical_threshold except: # module under initialization, return as temperature not supported return False, None, None, None + def _update_temperature_threshold(self, sw_control): + """Update temperature threshold + + Args: + sw_control (bool): True if software control, False if firmware control + """ + if self.retry_read_threshold <= 0: + return + self.temp_high_threshold = None + self.temp_critical_threshold = None + if sw_control: + api = self.get_xcvr_api() + if api: + thresh_support = api.get_transceiver_thresholds_support() + if thresh_support: + self.temp_high_threshold = api.xcvr_eeprom.read(consts.TEMP_HIGH_WARNING_FIELD) + self.temp_critical_threshold = api.xcvr_eeprom.read(consts.TEMP_HIGH_ALARM_FIELD) + else: + threshold_hi_file = f'/sys/module/sx_core/asic0/module{self.sdk_index}/temperature/threshold_hi' + threshold_critical_file = f'/sys/module/sx_core/asic0/module{self.sdk_index}/temperature/threshold_critical_hi' + + self.temp_high_threshold = utils.read_int_from_file(threshold_hi_file, log_func=None) + self.temp_high_threshold = self.temp_high_threshold / SFP_TEMPERATURE_SCALE + + self.temp_critical_threshold = utils.read_int_from_file(threshold_critical_file, log_func=None) + self.temp_critical_threshold = self.temp_critical_threshold / SFP_TEMPERATURE_SCALE + + if not self.temp_high_threshold or not self.temp_critical_threshold: + self.retry_read_threshold -= 1 + else: + self.retry_read_threshold = 0 + def get_temperature(self): """Get SFP temperature @@ -1028,11 +1048,12 @@ def get_temperature_warning_threshold(self): other float value if warning threshold is available """ try: - self.is_sw_control() + sw_control = self.is_sw_control() except: return 0.0 - self.temp_high_threshold = self._get_temperature_threshold(consts.TEMP_HIGH_WARNING_FIELD) + self.reinit_if_sn_changed() + self._update_temperature_threshold(sw_control) return self.temp_high_threshold def get_temperature_critical_threshold(self): @@ -1044,37 +1065,14 @@ def get_temperature_critical_threshold(self): other float value if critical threshold is available """ try: - self.is_sw_control() + sw_control = self.is_sw_control() except: return 0.0 - self.temp_critical_threshold = self._get_temperature_threshold(consts.TEMP_HIGH_ALARM_FIELD) + self.reinit_if_sn_changed() + self._update_temperature_threshold(sw_control) return self.temp_critical_threshold - def _get_temperature_threshold(self, thresh_field): - """Get temperature thresholds data from EEPROM - - Args: - thresh_field (str): threshold field name - - Returns: - float: temperature threshold - """ - sn_changed = self.reinit_if_sn_changed() - if not sn_changed: - if thresh_field == consts.TEMP_HIGH_WARNING_FIELD and self.temp_high_threshold is not None: - return self.temp_high_threshold - elif thresh_field == consts.TEMP_HIGH_ALARM_FIELD and self.temp_critical_threshold is not None: - return self.temp_critical_threshold - api = self.get_xcvr_api() - if not api: - return None - - thresh_support = api.get_transceiver_thresholds_support() - if thresh_support is None: - return None - return api.xcvr_eeprom.read(thresh_field) if thresh_support else 0.0 - def get_xcvr_api(self): """ Retrieves the XcvrApi associated with this SFP diff --git a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py index c4f67c83f7..09f2f6cd96 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py @@ -361,38 +361,71 @@ def test_get_temperature(self, mock_read, mock_exists): mock_read.return_value = 448 assert sfp.get_temperature() == 56.0 - def test_get_temperature_threshold(self): + @mock.patch('sonic_platform.utils.read_int_from_file') + def test_get_temperature_threshold(self, mock_read_int): sfp = SFP(0) - sfp.reinit_if_sn_changed = mock.MagicMock(return_value=True) sfp.is_sw_control = mock.MagicMock(return_value=True) - + mock_serial = 'some serial' mock_api = mock.MagicMock() - mock_api.get_transceiver_thresholds_support = mock.MagicMock(return_value=False) - sfp.get_xcvr_api = mock.MagicMock(return_value=None) - assert sfp.get_temperature_warning_threshold() is None - assert sfp.get_temperature_critical_threshold() is None - - sfp.get_xcvr_api.return_value = mock_api - assert sfp.get_temperature_warning_threshold() == 0.0 - assert sfp.get_temperature_critical_threshold() == 0.0 - - from sonic_platform_base.sonic_xcvr.fields import consts - mock_api.get_transceiver_thresholds_support.return_value = True mock_api.xcvr_eeprom = mock.MagicMock() - + from sonic_platform_base.sonic_xcvr.fields import consts def mock_read(field): if field == consts.TEMP_HIGH_ALARM_FIELD: return 85.0 elif field == consts.TEMP_HIGH_WARNING_FIELD: return 75.0 - + elif field == consts.VENDOR_SERIAL_NO_FIELD: + return mock_serial mock_api.xcvr_eeprom.read = mock.MagicMock(side_effect=mock_read) - assert sfp.get_temperature_warning_threshold() == 75.0 - assert sfp.get_temperature_critical_threshold() == 85.0 + + # No api object, means no access to EEPROM, expect None and retry_read_threshold - 1 + sfp.get_xcvr_api = mock.MagicMock(return_value=None) + retry = sfp.retry_read_threshold + assert sfp.get_temperature_warning_threshold() == None + assert sfp.retry_read_threshold == retry - 1 + retry = sfp.retry_read_threshold + assert sfp.get_temperature_critical_threshold() == None + assert sfp.retry_read_threshold == retry - 1 + + # No threshold support, expect None + mock_serial = 'some other serial' + mock_api.get_transceiver_thresholds_support = mock.MagicMock(return_value=False) + sfp.get_xcvr_api.return_value = mock_api + assert sfp.get_temperature_warning_threshold() == None + assert sfp.get_temperature_critical_threshold() == None + + # Threshold support, expect threshold values + mock_api.get_transceiver_thresholds_support.return_value = True - sfp.reinit_if_sn_changed.return_value = False assert sfp.get_temperature_warning_threshold() == 75.0 assert sfp.get_temperature_critical_threshold() == 85.0 + assert sfp.retry_read_threshold == 0 + + # No serial number, expect None and retry_read_threshold = 0 + mock_serial = None + assert sfp.get_temperature_warning_threshold() == None + assert sfp.get_temperature_critical_threshold() == None + assert sfp.retry_read_threshold == 0 + + # Firmware control, expect threshold values from sysfs + sfp.is_sw_control.return_value = False + mock_serial = "some serial" + def mock_read_int_side_effect(file_path, *args, **kwargs): + if 'threshold_hi' in file_path: + return 448 # 56.0 * 8.0 + elif 'threshold_critical_hi' in file_path: + return 480 # 60.0 * 8.0 + return None + + mock_read_int.side_effect = mock_read_int_side_effect + assert sfp.get_temperature_warning_threshold() == 56.0 + assert sfp.get_temperature_critical_threshold() == 60.0 + assert sfp.retry_read_threshold == 0 + + # Exception in is_sw_control, expect 0.0 + sfp.is_sw_control.side_effect = Exception('') + assert sfp.get_temperature_warning_threshold() == 0.0 + assert sfp.get_temperature_critical_threshold() == 0.0 @mock.patch('sonic_platform.utils.read_int_from_file') @mock.patch('sonic_platform.device_data.DeviceDataManager.is_module_host_management_mode') @@ -573,40 +606,24 @@ def test_set_lpmode(self, mock_read_int, mock_write): mock_write.assert_called_with('/sys/module/sx_core/asic0/module0/power_mode_policy', '3') @mock.patch('sonic_platform.sfp.SfpOptoeBase.get_temperature') - @mock.patch('sonic_platform.utils.read_int_from_file') - def test_get_temperature_info(self, mock_read_int, mock_super_get_temperature): + def test_get_temperature_info(self, mock_super_get_temperature): sfp = SFP(0) - sfp.reinit_if_sn_changed = mock.MagicMock(return_value=True) - sfp.is_sw_control = mock.MagicMock(return_value=False) - mock_api = mock.MagicMock() - mock_api.get_transceiver_thresholds_support = mock.MagicMock(return_value=True) - mock_api.xcvr_eeprom = mock.MagicMock() - from sonic_platform_base.sonic_xcvr.fields import consts - - def mock_read(field): - if field == consts.TEMP_HIGH_ALARM_FIELD: - return 85.0 - elif field == consts.TEMP_HIGH_WARNING_FIELD: - return 75.0 - mock_api.xcvr_eeprom.read = mock.MagicMock(side_effect=mock_read) - sfp.get_xcvr_api = mock.MagicMock(return_value=mock_api) + sfp.is_sw_control = mock.MagicMock(return_value=False) assert sfp.get_temperature_info() == (False, None, None, None) sfp.is_sw_control.return_value = True + sfp._update_temperature_threshold = mock.MagicMock() + sfp.temp_high_threshold = 75.0 + sfp.temp_critical_threshold = 85.0 mock_super_get_temperature.return_value = 58.0 assert sfp.get_temperature_info() == (True, 58.0, 75.0, 85.0) - mock_api.get_transceiver_thresholds_support.return_value = None - assert sfp.get_temperature_info() == (True, 58.0, None, None) - - mock_api.get_transceiver_thresholds_support.return_value = False - assert sfp.get_temperature_info() == (True, 58.0, 0.0, 0.0) + mock_super_get_temperature.return_value = None + assert sfp.get_temperature_info() == (True, None, None, None) - sfp.reinit_if_sn_changed.return_value = False - assert sfp.get_temperature_info() == (True, 58.0, 75.0, 85.0) - sfp.is_sw_control.side_effect = Exception('') - assert sfp.get_temperature_info() == (False, None, None, None) + mock_super_get_temperature.return_value = 0.0 + assert sfp.get_temperature_info() == (True, 0.0, 0.0, 0.0) @mock.patch('time.sleep', mock.MagicMock()) def test_get_temperature_info_vendor_retry_loop(self):