Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
cf98224
Add make support for thermal control feature
Junchao-Mellanox Nov 28, 2019
f428c1c
[sonic_platform]fix issues found in thermal ctrl:
Nov 28, 2019
2b4089d
[sonic_platform]fix issues found in thermal ctrl:
Nov 28, 2019
25a3aef
[sonic_platform]add get_name for fan and psu
Nov 28, 2019
325840c
[sonic_platform]fix divide-by-zero error due to max speed for psu fan…
Dec 2, 2019
952393f
[thermal.py]handle get_high_critical_threshold
Dec 4, 2019
1d83717
Move task_base form sonic-platform-daemon to sonic-daemon-base so tha…
Junchao-Mellanox Dec 6, 2019
77a1d4b
Merge branch 'thermal-control' of https://github.com/Junchao-Mellanox…
Junchao-Mellanox Dec 12, 2019
ff562c9
Merge from stephen to fix thermal related issues
Junchao-Mellanox Dec 13, 2019
6f1e2c3
[fan.py]fix error in get_target_speed for PSU fan
Dec 13, 2019
fa8273b
Add thermal policy JSON file for mellanox devices
Junchao-Mellanox Dec 16, 2019
aeee985
Fix issue found by manual test
Junchao-Mellanox Dec 16, 2019
d27f67b
add default thermal control implementation to mellanox platform api a…
Junchao-Mellanox Dec 18, 2019
5773bb2
add unit test support for sonic thermalctld
Junchao-Mellanox Dec 18, 2019
9772e30
rename some functions to make it more readable according to Kebo comm…
Junchao-Mellanox Dec 19, 2019
d4de5d8
add check for speed value in thermal actions to avoid load invalid value
Junchao-Mellanox Dec 19, 2019
183a42b
add unit test to cover thermal conditions and actions
Junchao-Mellanox Dec 20, 2019
27803f3
Change according to stepan comment
Junchao-Mellanox Dec 23, 2019
af996c7
1. Add base class for FanConditions and PsuConditions; 2. Add default…
Junchao-Mellanox Dec 24, 2019
704228c
remove thermalctld from critical_processes
Junchao-Mellanox Dec 24, 2019
3d9bf8a
add default thermal manager getter in chassis.py
Junchao-Mellanox Dec 24, 2019
e479f4e
add implementation for start and stop thermal algorithm
Junchao-Mellanox Dec 25, 2019
c865626
add unit test for thermal algorithm related JSON config
Junchao-Mellanox Dec 26, 2019
dcc232e
adjust thermal_policy.json to make it compatible with current platfor…
Junchao-Mellanox Dec 26, 2019
bef6699
Change PSU FAN naming rule
Junchao-Mellanox Jan 7, 2020
7521051
Merge branch 'master' into thermal-policy
Junchao-Mellanox Jan 19, 2020
fbee0b0
Add SONIC_CONFIG_ENGINE as dependency to fix build issue in master br…
Junchao-Mellanox Feb 1, 2020
0606fec
Merge branch 'thermal-policy' of https://github.com/Junchao-Mellanox/…
Junchao-Mellanox Feb 1, 2020
0c271cf
Change policy.json to JSON policy file
Junchao-Mellanox Feb 1, 2020
71c4249
update submodule for thermal control feature
Junchao-Mellanox Feb 3, 2020
d3558df
Update submodule to fix conflict
Junchao-Mellanox Feb 4, 2020
0829793
fix build issue by change submodule pointer
Junchao-Mellanox Feb 4, 2020
0d10d75
Fix review comment: add a new line at the end of file
Junchao-Mellanox Feb 12, 2020
6ad3cbb
[Mellanox/hw-mgmt]Add patch which disables thermal policy in hw-mgmt
Feb 14, 2020
8e97863
[Mellanox/hw-mgmt]Update the patch of disabling thermal policy for hw…
Feb 25, 2020
93f521b
Merge remote-tracking branch 'origin/master' into thermal-policy
Junchao-Mellanox Feb 27, 2020
eda8e85
[thermal control] update submodule sonic-platform-daemons
Junchao-Mellanox Feb 27, 2020
6872831
[thermal control] update pointer for sonic-platform-common
Junchao-Mellanox Feb 28, 2020
8fad575
Merge remote-tracking branch 'stephen/disable-hw-mgmt-thermal-policy'…
Junchao-Mellanox Feb 28, 2020
3382e3d
Merge remote-tracking branch 'origin/master' into thermal-policy
Junchao-Mellanox Mar 5, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn2010-r0/thermal_policy.json
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn2100-r0/thermal_policy.json
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn2410-r0/thermal_policy.json
72 changes: 72 additions & 0 deletions device/mellanox/x86_64-mlnx_msn2700-r0/thermal_policy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{
"thermal_control_algorithm": {
"run_at_boot_up": "false",
"fan_speed_when_suspend": "60"
},
"info_types": [
{
"type": "fan_info"
},
{
"type": "psu_info"
},
{
"type": "chassis_info"
}
],
"policies": [
{
"name": "any fan absence",
"conditions": [
{
"type": "fan.any.absence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "any psu absence",
"conditions": [
{
"type": "psu.any.absence"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "all fan and psu presence",
"conditions": [
{
"type": "fan.all.presence"
},
{
"type": "psu.all.presence"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "60"
}
]
}
]
}
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn2740-r0/thermal_policy.json
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn3700-r0/thermal_policy.json
1 change: 1 addition & 0 deletions device/mellanox/x86_64-mlnx_msn3800-r0/thermal_policy.json
11 changes: 11 additions & 0 deletions dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,14 @@ stdout_logfile=syslog
stderr_logfile=syslog
startsecs=0
{% endif %}

{% if not skip_thermalctld %}
[program:thermalctld]
command=/usr/bin/thermalctld
priority=9
autostart=false
autorestart=true
stdout_logfile=syslog
stderr_logfile=syslog
startsecs=0
{% endif %}
4 changes: 4 additions & 0 deletions dockers/docker-platform-monitor/start.sh.j2
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,7 @@ supervisorctl start psud
supervisorctl start syseepromd
{% endif %}

{% if not skip_thermalctld %}
supervisorctl start thermalctld
{% endif %}

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
From 76b02916794be2e2558fcff1d11609a594f633d7 Mon Sep 17 00:00:00 2001
From: Stephen Sun <[email protected]>
Date: Fri, 14 Feb 2020 13:48:00 +0800
Subject: [PATCH] Disable thermal policy running in hw-mgmt service SONiC
thermal control algorithm has been supported.

Signed-off-by: Stephen Sun <[email protected]>
---
usr/usr/bin/hw-management.sh | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/usr/usr/bin/hw-management.sh b/usr/usr/bin/hw-management.sh
index 2cdbfb2..48b41d5 100755
--- a/usr/usr/bin/hw-management.sh
+++ b/usr/usr/bin/hw-management.sh
@@ -799,7 +799,11 @@ do_start()
#disabled for leopard chipless bringup.
echo 1 > $config_path/suspend

- $THERMAL_CONTROL $thermal_type $max_tachos $max_psus&
+#
+# Disable thermal control algorithm in hw-management service
+# because there has already been that in SONiC
+#
+# $THERMAL_CONTROL $thermal_type $max_tachos $max_psus&
}

do_stop()
--
1.9.1

1 change: 1 addition & 0 deletions platform/mellanox/mlnx-platform-api.mk
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
SONIC_PLATFORM_API_PY2 = mlnx_platform_api-1.0-py2-none-any.whl
$(SONIC_PLATFORM_API_PY2)_SRC_PATH = $(PLATFORM_PATH)/mlnx-platform-api
$(SONIC_PLATFORM_API_PY2)_PYTHON_VERSION = 2
$(SONIC_PLATFORM_API_PY2)_DEPENDS = $(SONIC_PLATFORM_COMMON_PY2) $(SONIC_DAEMON_BASE_PY2) $(SONIC_CONFIG_ENGINE)
SONIC_PYTHON_WHEELS += $(SONIC_PLATFORM_API_PY2)

export mlnx_platform_api_py2_wheel_path="$(addprefix $(PYTHON_WHEELS_PATH)/,$(SONIC_PLATFORM_API_PY2))"
2 changes: 2 additions & 0 deletions platform/mellanox/mlnx-platform-api/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.pyc
.cache/
3 changes: 3 additions & 0 deletions platform/mellanox/mlnx-platform-api/pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[pytest]
filterwarnings =
ignore::DeprecationWarning
2 changes: 2 additions & 0 deletions platform/mellanox/mlnx-platform-api/setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[aliases]
test=pytest
9 changes: 9 additions & 0 deletions platform/mellanox/mlnx-platform-api/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@
maintainer_email='[email protected]',
packages=[
'sonic_platform',
'tests'
],
setup_requires= [
'pytest-runner'
],
tests_require = [
'pytest',
'mock>=2.0.0'
],
classifiers=[
'Development Status :: 3 - Alpha',
Expand All @@ -26,5 +34,6 @@
'Topic :: Utilities',
],
keywords='sonic SONiC platform PLATFORM',
test_suite='setup.get_test_suite'
)

Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__all__ = ["platform", "chassis"]
from sonic_platform import *
from sonic_platform import *
Original file line number Diff line number Diff line change
Expand Up @@ -470,3 +470,8 @@ def get_change_event(self, timeout=0):
return True, {'sfp':port_dict}
else:
return True, {'sfp':{}}

def get_thermal_manager(self):
from .thermal_manager import ThermalManager
return ThermalManager

18 changes: 13 additions & 5 deletions platform/mellanox/mlnx-platform-api/sonic_platform/fan.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@ def __init__(self, has_fan_dir, fan_index, drawer_index = 1, psu_fan = False):
self.fan_speed_set_path = "fan{}_speed_set".format(self.index)
self.fan_presence_path = "fan{}_status".format(self.drawer_index)
self.fan_max_speed_path = "fan{}_max".format(self.index)
self._name = "fan{}".format(fan_index + 1)
else:
self.fan_speed_get_path = "psu{}_fan1_speed_get".format(self.index)
self.fan_presence_path = "psu{}_fan1_speed_get".format(self.index)
self.fan_max_speed_path = "psu{}_max".format(self.index)
self._name = 'psu_{}_fan_{}'.format(self.index, fan_index)
self.fan_max_speed_path = None
self.fan_status_path = "fan{}_fault".format(self.index)
self.fan_green_led_path = "led_fan{}_green".format(self.drawer_index)
self.fan_red_led_path = "led_fan{}_red".format(self.drawer_index)
Expand Down Expand Up @@ -93,6 +95,9 @@ def get_direction(self):
raise RuntimeError("Failed to read fan direction status to {}".format(repr(e)))


def get_name(self):
return self._name

def get_status(self):
"""
Retrieves the operational status of fan
Expand Down Expand Up @@ -171,7 +176,11 @@ def get_speed(self):
speed_in_rpm = int(fan_curr_speed.read())
except (ValueError, IOError):
speed_in_rpm = 0


if self.fan_max_speed_path is None:
# in case of max speed unsupported, we just return speed in unit of RPM.
return speed_in_rpm

max_speed_in_rpm = self._get_max_speed_in_rpm()
speed = 100*speed_in_rpm/max_speed_in_rpm

Expand All @@ -185,11 +194,10 @@ def get_target_speed(self):
Returns:
int: percentage of the max fan speed
"""
speed = 0

if self.is_psu_fan:
# Not like system fan, psu fan speed can not be modified, so target speed is N/A
return speed
return self.get_speed()

try:
with open(os.path.join(FAN_PATH, self.fan_speed_set_path), 'r') as fan_pwm:
pwm = int(fan_pwm.read())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(self):
self._chassis.initialize_psu()
self._chassis.initialize_fan()
self._chassis.initialize_eeprom()
self._chassis.initialize_thermals()

def _is_host(self):
"""
Expand Down
6 changes: 5 additions & 1 deletion platform/mellanox/mlnx-platform-api/sonic_platform/psu.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def __init__(self, psu_index, sku):
psu_oper_status = "thermal/psu{}_pwr_status".format(self.index)
#psu_oper_status should always be present for all SKUs
self.psu_oper_status = os.path.join(self.psu_path, psu_oper_status)
self._name = "PSU{}".format(psu_index + 1)

if sku in hwsku_dict_psu:
filemap = psu_profile_list[hwsku_dict_psu[sku]]
Expand Down Expand Up @@ -100,7 +101,10 @@ def __init__(self, psu_index, sku):

fan = Fan(sku, psu_index, psu_index, True)
if fan.get_presence():
self._fan = fan
self._fan_list.append(fan)

def get_name(self):
return self._name

self.psu_green_led_path = "led_psu_green"
self.psu_red_led_path = "led_psu_red"
Expand Down
Loading