Skip to content
Merged
90 changes: 44 additions & 46 deletions platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,7 @@ def __init__(self, sfp_index, sfp_type=None, slot_id=0, linecard_port_count=0, l
self.sn = None
self.temp_high_threshold = None
self.temp_critical_threshold = None
self.retry_read_threshold = 5
self.retry_read_vendor = 5
self.manufacturer = None
self.part_number = None
Expand Down Expand Up @@ -890,8 +891,10 @@ def reinit_if_sn_changed(self):
self.temp_critical_threshold = None
self.sn = self._get_serial()
if self.sn is not None:
self.retry_read_threshold = 5
self.retry_read_vendor = 5
else:
self.retry_read_threshold = 0
self.retry_read_vendor = 0
return True
return False
Expand Down Expand Up @@ -965,7 +968,7 @@ def get_temperature_info(self):
if not sw_control:
return sw_control, None, None, None

sn_changed = self.reinit_if_sn_changed()
self.reinit_if_sn_changed()
# software control, read from EEPROM
temperature = super().get_temperature()
if temperature is None:
Expand All @@ -975,27 +978,44 @@ def get_temperature_info(self):
# Temperature is not supported, no need read threshold
return sw_control, 0.0, 0.0, 0.0
else:
if not sn_changed and self.temp_high_threshold is not None and self.temp_critical_threshold is not None:
return sw_control, temperature, self.temp_high_threshold, self.temp_critical_threshold
else:
# Read threshold from EEPROM
api = self.get_xcvr_api()
thresh_support = api.get_transceiver_thresholds_support()
if thresh_support is None:
# Failed to read threshold support field, no need read threshold
return sw_control, temperature, None, None
if thresh_support:
# Read threshold from EEPROM
self.temp_high_threshold = api.xcvr_eeprom.read(consts.TEMP_HIGH_WARNING_FIELD)
self.temp_critical_threshold = api.xcvr_eeprom.read(consts.TEMP_HIGH_ALARM_FIELD)
return sw_control, temperature, self.temp_high_threshold, self.temp_critical_threshold
else:
# No threshold support, use default threshold
return sw_control, temperature, 0.0, 0.0
self._update_temperature_threshold(sw_control)
return sw_control, temperature, self.temp_high_threshold, self.temp_critical_threshold
except:
# module under initialization, return as temperature not supported
return False, None, None, None

def _update_temperature_threshold(self, sw_control):
"""Update temperature threshold

Args:
sw_control (bool): True if software control, False if firmware control
"""
if self.retry_read_threshold <= 0:
return
self.temp_high_threshold = None
self.temp_critical_threshold = None
if sw_control:
api = self.get_xcvr_api()
if api:
thresh_support = api.get_transceiver_thresholds_support()
if thresh_support:
self.temp_high_threshold = api.xcvr_eeprom.read(consts.TEMP_HIGH_WARNING_FIELD)
self.temp_critical_threshold = api.xcvr_eeprom.read(consts.TEMP_HIGH_ALARM_FIELD)
else:
threshold_hi_file = f'/sys/module/sx_core/asic0/module{self.sdk_index}/temperature/threshold_hi'
threshold_critical_file = f'/sys/module/sx_core/asic0/module{self.sdk_index}/temperature/threshold_critical_hi'

self.temp_high_threshold = utils.read_int_from_file(threshold_hi_file, log_func=None)
self.temp_high_threshold = self.temp_high_threshold / SFP_TEMPERATURE_SCALE

self.temp_critical_threshold = utils.read_int_from_file(threshold_critical_file, log_func=None)
self.temp_critical_threshold = self.temp_critical_threshold / SFP_TEMPERATURE_SCALE

if not self.temp_high_threshold or not self.temp_critical_threshold:
self.retry_read_threshold -= 1
else:
self.retry_read_threshold = 0

def get_temperature(self):
"""Get SFP temperature

Expand Down Expand Up @@ -1028,11 +1048,12 @@ def get_temperature_warning_threshold(self):
other float value if warning threshold is available
"""
try:
self.is_sw_control()
sw_control = self.is_sw_control()
except:
return 0.0

self.temp_high_threshold = self._get_temperature_threshold(consts.TEMP_HIGH_WARNING_FIELD)
self.reinit_if_sn_changed()
self._update_temperature_threshold(sw_control)
return self.temp_high_threshold

def get_temperature_critical_threshold(self):
Expand All @@ -1044,37 +1065,14 @@ def get_temperature_critical_threshold(self):
other float value if critical threshold is available
"""
try:
self.is_sw_control()
sw_control = self.is_sw_control()
except:
return 0.0

self.temp_critical_threshold = self._get_temperature_threshold(consts.TEMP_HIGH_ALARM_FIELD)
self.reinit_if_sn_changed()
self._update_temperature_threshold(sw_control)
return self.temp_critical_threshold

def _get_temperature_threshold(self, thresh_field):
"""Get temperature thresholds data from EEPROM

Args:
thresh_field (str): threshold field name

Returns:
float: temperature threshold
"""
sn_changed = self.reinit_if_sn_changed()
if not sn_changed:
if thresh_field == consts.TEMP_HIGH_WARNING_FIELD and self.temp_high_threshold is not None:
return self.temp_high_threshold
elif thresh_field == consts.TEMP_HIGH_ALARM_FIELD and self.temp_critical_threshold is not None:
return self.temp_critical_threshold
api = self.get_xcvr_api()
if not api:
return None

thresh_support = api.get_transceiver_thresholds_support()
if thresh_support is None:
return None
return api.xcvr_eeprom.read(thresh_field) if thresh_support else 0.0

def get_xcvr_api(self):
"""
Retrieves the XcvrApi associated with this SFP
Expand Down
105 changes: 61 additions & 44 deletions platform/mellanox/mlnx-platform-api/tests/test_sfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,38 +361,71 @@ def test_get_temperature(self, mock_read, mock_exists):
mock_read.return_value = 448
assert sfp.get_temperature() == 56.0

def test_get_temperature_threshold(self):
@mock.patch('sonic_platform.utils.read_int_from_file')
def test_get_temperature_threshold(self, mock_read_int):
sfp = SFP(0)
sfp.reinit_if_sn_changed = mock.MagicMock(return_value=True)
sfp.is_sw_control = mock.MagicMock(return_value=True)

mock_serial = 'some serial'
mock_api = mock.MagicMock()
mock_api.get_transceiver_thresholds_support = mock.MagicMock(return_value=False)
sfp.get_xcvr_api = mock.MagicMock(return_value=None)
assert sfp.get_temperature_warning_threshold() is None
assert sfp.get_temperature_critical_threshold() is None

sfp.get_xcvr_api.return_value = mock_api
assert sfp.get_temperature_warning_threshold() == 0.0
assert sfp.get_temperature_critical_threshold() == 0.0

from sonic_platform_base.sonic_xcvr.fields import consts
mock_api.get_transceiver_thresholds_support.return_value = True
mock_api.xcvr_eeprom = mock.MagicMock()

from sonic_platform_base.sonic_xcvr.fields import consts
def mock_read(field):
if field == consts.TEMP_HIGH_ALARM_FIELD:
return 85.0
elif field == consts.TEMP_HIGH_WARNING_FIELD:
return 75.0

elif field == consts.VENDOR_SERIAL_NO_FIELD:
return mock_serial
mock_api.xcvr_eeprom.read = mock.MagicMock(side_effect=mock_read)
assert sfp.get_temperature_warning_threshold() == 75.0
assert sfp.get_temperature_critical_threshold() == 85.0

# No api object, means no access to EEPROM, expect None and retry_read_threshold - 1
sfp.get_xcvr_api = mock.MagicMock(return_value=None)
retry = sfp.retry_read_threshold
assert sfp.get_temperature_warning_threshold() == None
assert sfp.retry_read_threshold == retry - 1
retry = sfp.retry_read_threshold
assert sfp.get_temperature_critical_threshold() == None
assert sfp.retry_read_threshold == retry - 1

# No threshold support, expect None
mock_serial = 'some other serial'
mock_api.get_transceiver_thresholds_support = mock.MagicMock(return_value=False)
sfp.get_xcvr_api.return_value = mock_api
assert sfp.get_temperature_warning_threshold() == None
assert sfp.get_temperature_critical_threshold() == None

# Threshold support, expect threshold values
mock_api.get_transceiver_thresholds_support.return_value = True

sfp.reinit_if_sn_changed.return_value = False
assert sfp.get_temperature_warning_threshold() == 75.0
assert sfp.get_temperature_critical_threshold() == 85.0
assert sfp.retry_read_threshold == 0

# No serial number, expect None and retry_read_threshold = 0
mock_serial = None
assert sfp.get_temperature_warning_threshold() == None
assert sfp.get_temperature_critical_threshold() == None
assert sfp.retry_read_threshold == 0

# Firmware control, expect threshold values from sysfs
sfp.is_sw_control.return_value = False
mock_serial = "some serial"
def mock_read_int_side_effect(file_path, *args, **kwargs):
if 'threshold_hi' in file_path:
return 448 # 56.0 * 8.0
elif 'threshold_critical_hi' in file_path:
return 480 # 60.0 * 8.0
return None

mock_read_int.side_effect = mock_read_int_side_effect
assert sfp.get_temperature_warning_threshold() == 56.0
assert sfp.get_temperature_critical_threshold() == 60.0
assert sfp.retry_read_threshold == 0

# Exception in is_sw_control, expect 0.0
sfp.is_sw_control.side_effect = Exception('')
assert sfp.get_temperature_warning_threshold() == 0.0
assert sfp.get_temperature_critical_threshold() == 0.0

@mock.patch('sonic_platform.utils.read_int_from_file')
@mock.patch('sonic_platform.device_data.DeviceDataManager.is_module_host_management_mode')
Expand Down Expand Up @@ -573,40 +606,24 @@ def test_set_lpmode(self, mock_read_int, mock_write):
mock_write.assert_called_with('/sys/module/sx_core/asic0/module0/power_mode_policy', '3')

@mock.patch('sonic_platform.sfp.SfpOptoeBase.get_temperature')
@mock.patch('sonic_platform.utils.read_int_from_file')
def test_get_temperature_info(self, mock_read_int, mock_super_get_temperature):
def test_get_temperature_info(self, mock_super_get_temperature):
sfp = SFP(0)
sfp.reinit_if_sn_changed = mock.MagicMock(return_value=True)
sfp.is_sw_control = mock.MagicMock(return_value=False)
mock_api = mock.MagicMock()
mock_api.get_transceiver_thresholds_support = mock.MagicMock(return_value=True)
mock_api.xcvr_eeprom = mock.MagicMock()
from sonic_platform_base.sonic_xcvr.fields import consts

def mock_read(field):
if field == consts.TEMP_HIGH_ALARM_FIELD:
return 85.0
elif field == consts.TEMP_HIGH_WARNING_FIELD:
return 75.0

mock_api.xcvr_eeprom.read = mock.MagicMock(side_effect=mock_read)
sfp.get_xcvr_api = mock.MagicMock(return_value=mock_api)
sfp.is_sw_control = mock.MagicMock(return_value=False)
assert sfp.get_temperature_info() == (False, None, None, None)

sfp.is_sw_control.return_value = True
sfp._update_temperature_threshold = mock.MagicMock()
sfp.temp_high_threshold = 75.0
sfp.temp_critical_threshold = 85.0
mock_super_get_temperature.return_value = 58.0
assert sfp.get_temperature_info() == (True, 58.0, 75.0, 85.0)

mock_api.get_transceiver_thresholds_support.return_value = None
assert sfp.get_temperature_info() == (True, 58.0, None, None)

mock_api.get_transceiver_thresholds_support.return_value = False
assert sfp.get_temperature_info() == (True, 58.0, 0.0, 0.0)
mock_super_get_temperature.return_value = None
assert sfp.get_temperature_info() == (True, None, None, None)

sfp.reinit_if_sn_changed.return_value = False
assert sfp.get_temperature_info() == (True, 58.0, 75.0, 85.0)
sfp.is_sw_control.side_effect = Exception('')
assert sfp.get_temperature_info() == (False, None, None, None)
mock_super_get_temperature.return_value = 0.0
assert sfp.get_temperature_info() == (True, 0.0, 0.0, 0.0)

@mock.patch('time.sleep', mock.MagicMock())
def test_get_temperature_info_vendor_retry_loop(self):
Expand Down
Loading