Skip to content

Commit 5d45759

Browse files
authored
[Mellanox] Support PSU power threshold checking (#11863)
* Support power threshold Signed-off-by: Stephen Sun <[email protected]> * get_psu_power_warning_threshold => get_psu_power_warning_suppress_threshold Signed-off-by: Stephen Sun <[email protected]> * Fix comments Signed-off-by: Stephen Sun <[email protected]> Signed-off-by: Stephen Sun <[email protected]>
1 parent f402e6b commit 5d45759

File tree

2 files changed

+112
-0
lines changed

2 files changed

+112
-0
lines changed

platform/mellanox/mlnx-platform-api/sonic_platform/psu.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,11 @@ class Psu(FixedPsu):
216216
PSU_VPD = "eeprom/psu{}_vpd"
217217
PSU_CURRENT_IN = "power/psu{}_curr_in"
218218
PSU_VOLT_IN = "power/psu{}_volt_in"
219+
PORT_AMBIENT_TEMP = os.path.join(PSU_PATH, "thermal/port_amb")
220+
FAN_AMBIENT_TEMP = os.path.join(PSU_PATH, "thermal/fan_amb")
221+
AMBIENT_TEMP_CRITICAL_THRESHOLD = os.path.join(PSU_PATH, "config/amb_tmp_crit_limit")
222+
AMBIENT_TEMP_WARNING_THRESHOLD = os.path.join(PSU_PATH, "config/amb_tmp_warn_limit")
223+
PSU_POWER_SLOPE = os.path.join(PSU_PATH, "config/psu_power_slope")
219224

220225
shared_led = None
221226

@@ -235,6 +240,8 @@ def __init__(self, psu_index):
235240
self.psu_power_max = self.psu_power + "_max"
236241
self.psu_presence = os.path.join(PSU_PATH, "thermal/psu{}_status".format(self.index))
237242

243+
self.psu_power_max_capacity = os.path.join(PSU_PATH, "config/psu{}_power_capacity".format(self.index))
244+
238245
self.psu_temp = os.path.join(PSU_PATH, 'thermal/psu{}_temp'.format(self.index))
239246
self.psu_temp_threshold = os.path.join(PSU_PATH, 'thermal/psu{}_temp_max'.format(self.index))
240247

@@ -505,6 +512,56 @@ def get_input_current(self):
505512
return float(amperes) / 1000
506513
return None
507514

515+
def _get_psu_power_threshold(self, temp_threshold_path):
516+
"""
517+
Calculate power threshold for a PSU according to the maximum power capacity and ambient temperature
518+
amb_temp = min(port_amb, fan_amb)
519+
If amb_temp < ambient_temp_threshold
520+
threshold = max capacity
521+
else
522+
threshold = max capacity - slope*(amb_temp - ambient_temp_threshold)
523+
"""
524+
if self.get_powergood_status():
525+
if os.path.exists(self.psu_power_max_capacity):
526+
power_max_capacity = utils.read_int_from_file(self.psu_power_max_capacity)
527+
temp_threshold = utils.read_int_from_file(temp_threshold_path)
528+
fan_ambient_temp = utils.read_int_from_file(Psu.FAN_AMBIENT_TEMP)
529+
port_ambient_temp = utils.read_int_from_file(Psu.PORT_AMBIENT_TEMP)
530+
ambient_temp = min(fan_ambient_temp, port_ambient_temp)
531+
if ambient_temp < temp_threshold:
532+
power_threshold = power_max_capacity
533+
else:
534+
slope = utils.read_int_from_file(Psu.PSU_POWER_SLOPE)
535+
power_threshold = power_max_capacity - (ambient_temp - temp_threshold) * slope
536+
if power_threshold <= 0:
537+
logger.log_warning('Got negative PSU power threshold {} for {}'.format(power_threshold, self.get_name()))
538+
power_threshold = 0
539+
return float(power_threshold) / 1000000
540+
541+
return None
542+
543+
def get_psu_power_warning_suppress_threshold(self):
544+
"""
545+
Retrieve the warning suppress threshold of the power on this PSU
546+
The value can be volatile, so the caller should call the API each time it is used.
547+
On Mellanox platform, it is translated from the `warning threshold`
548+
549+
Returns:
550+
A float number, the warning suppress threshold of the PSU in watts.
551+
"""
552+
return self._get_psu_power_threshold(Psu.AMBIENT_TEMP_WARNING_THRESHOLD)
553+
554+
def get_psu_power_critical_threshold(self):
555+
"""
556+
Retrieve the critical threshold of the power on this PSU
557+
The value can be volatile, so the caller should call the API each time it is used.
558+
559+
Returns:
560+
A float number, the critical threshold of the PSU in watts.
561+
"""
562+
return self._get_psu_power_threshold(Psu.AMBIENT_TEMP_CRITICAL_THRESHOLD)
563+
564+
508565
class InvalidPsuVolWA:
509566
"""This class is created as a workaround for a known hardware issue that the PSU voltage threshold could be a
510567
invalid value 127998. Once we read a voltage threshold value equal to 127998, we should do following:

platform/mellanox/mlnx-platform-api/tests/test_psu.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,58 @@ def get_entry_value(key):
161161
vpd_info[InvalidPsuVolWA.CAPACITY_FIELD] = InvalidPsuVolWA.EXPECT_CAPACITY
162162
assert InvalidPsuVolWA.run(psu, InvalidPsuVolWA.INVALID_VOLTAGE_VALUE, '') == 9999
163163
mock_run_command.assert_called_with(['sensors', '-s'])
164+
165+
@mock.patch('os.path.exists', mock.MagicMock(return_value=True))
166+
@mock.patch('sonic_platform.utils.read_int_from_file')
167+
def test_psu_power_threshold(self, mock_read_int_from_file):
168+
Psu.all_psus_support_power_threshold = True
169+
psu = Psu(0)
170+
common_info = {
171+
psu.psu_oper_status: 1,
172+
psu.psu_power_max_capacity: 100000000,
173+
psu.AMBIENT_TEMP_CRITICAL_THRESHOLD: 65000,
174+
psu.AMBIENT_TEMP_WARNING_THRESHOLD: 55000,
175+
psu.PSU_POWER_SLOPE: 2000
176+
}
177+
normal_data = {
178+
psu.PORT_AMBIENT_TEMP: 55000,
179+
psu.FAN_AMBIENT_TEMP: 50000,
180+
'warning_threshold': 100.0,
181+
'critical_threshold': 100.0
182+
}
183+
warning_data = {
184+
psu.PORT_AMBIENT_TEMP: 65000,
185+
psu.FAN_AMBIENT_TEMP: 60000,
186+
'warning_threshold': 90.0,
187+
'critical_threshold': 100.0
188+
}
189+
critical_data = {
190+
psu.PORT_AMBIENT_TEMP: 70000,
191+
psu.FAN_AMBIENT_TEMP: 75000,
192+
'warning_threshold': 70.0,
193+
'critical_threshold': 90.0
194+
}
195+
test_data = {}
196+
def mock_side_effect(value):
197+
if value in common_info:
198+
return common_info[value]
199+
else:
200+
return test_data[value]
201+
202+
mock_read_int_from_file.side_effect = mock_side_effect
203+
test_data = normal_data
204+
assert psu.get_psu_power_warning_suppress_threshold() == normal_data['warning_threshold']
205+
assert psu.get_psu_power_critical_threshold() == normal_data['critical_threshold']
206+
207+
test_data = warning_data
208+
assert psu.get_psu_power_warning_suppress_threshold() == warning_data['warning_threshold']
209+
assert psu.get_psu_power_critical_threshold() == warning_data['critical_threshold']
210+
211+
test_data = critical_data
212+
assert psu.get_psu_power_warning_suppress_threshold() == critical_data['warning_threshold']
213+
assert psu.get_psu_power_critical_threshold() == critical_data['critical_threshold']
214+
215+
def test_psu_not_support_power_threshold(self):
216+
psu = Psu(0)
217+
assert psu.get_psu_power_warning_suppress_threshold() is None
218+
assert psu.get_psu_power_critical_threshold() is None

0 commit comments

Comments
 (0)