Skip to content

Commit 1a94141

Browse files
[Mellanox] Adjust log level to avoid too many thermal logs (sonic-net#4631)
* Trigger thermal action log only if thermal condition changes * test file existence before read file content * fix error for set psu fan speed * Remove logs because it print too frequently
1 parent 1eecae2 commit 1a94141

4 files changed

Lines changed: 19 additions & 13 deletions

File tree

platform/mellanox/mlnx-platform-api/sonic_platform/fan.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,8 @@ def set_speed(self, speed):
241241
status = True
242242

243243
if self.is_psu_fan:
244+
if not self.get_presence():
245+
return False
244246
from .thermal import logger
245247
try:
246248
with open(self.psu_i2c_bus_path, 'r') as f:

platform/mellanox/mlnx-platform-api/sonic_platform/psu.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ def _read_generic_file(self, filename, len):
112112
"""
113113
result = 0
114114
try:
115+
if not os.path.exists(filename):
116+
return result
115117
with open(filename, 'r') as fileobj:
116118
result = int(fileobj.read().strip())
117119
except Exception as e:

platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,12 +495,15 @@ def set_thermal_algorithm_status(cls, status, force=True):
495495
We usually disable the algorithm when we want to set a fix speed. E.g, when
496496
a fan unit is removed from system, we will set fan speed to 100% and disable
497497
the algorithm to avoid it adjust the speed.
498+
499+
Returns:
500+
True if thermal algorithm status changed.
498501
"""
499502
if not cls.thermal_profile:
500503
raise Exception("Fail to get thermal profile for this switch")
501504

502505
if not force and cls.thermal_algorithm_status == status:
503-
return
506+
return False
504507

505508
cls.thermal_algorithm_status = status
506509
content = "enabled" if status else "disabled"
@@ -521,6 +524,7 @@ def set_thermal_algorithm_status(cls, status, force=True):
521524
for index in range(count):
522525
cls._write_generic_file(join(THERMAL_ZONE_GEARBOX_PATH.format(start + index), THERMAL_ZONE_MODE), content)
523526
cls._write_generic_file(join(THERMAL_ZONE_GEARBOX_PATH.format(start + index), THERMAL_ZONE_POLICY), policy)
527+
return True
524528

525529
@classmethod
526530
def check_thermal_zone_temperature(cls):

platform/mellanox/mlnx-platform-api/sonic_platform/thermal_actions.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,6 @@ def set_psu_fan_speed(cls, thermal_info_dict, speed):
6666
for psu_fan in psu.get_all_fans():
6767
psu_fan.set_speed(speed)
6868

69-
logger.log_info('Updated PSU FAN speed to {}%'.format(speed))
70-
71-
7269

7370
@thermal_json_object('fan.all.check_and_set_speed')
7471
class CheckAndSetAllFanSpeedAction(SetAllFanSpeedAction):
@@ -131,14 +128,17 @@ def execute(self, thermal_info_dict):
131128
from .thermal import Thermal
132129
from .thermal_conditions import UpdateCoolingLevelToMinCondition
133130
from .fan import Fan
134-
Thermal.set_thermal_algorithm_status(self.status, False)
135-
if self.status:
136-
# Check thermal zone temperature, if all thermal zone temperature
137-
# back to normal, set it to minimum allowed speed to
138-
# save power
139-
UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict)
131+
status_changed = Thermal.set_thermal_algorithm_status(self.status, False)
140132

141-
logger.log_info('Changed thermal algorithm status to {}'.format(self.status))
133+
# Only update cooling level if thermal algorithm status changed
134+
if status_changed:
135+
if self.status:
136+
# Check thermal zone temperature, if all thermal zone temperature
137+
# back to normal, set it to minimum allowed speed to
138+
# save power
139+
UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict)
140+
141+
logger.log_info('Changed thermal algorithm status to {}'.format(self.status))
142142

143143

144144
class ChangeMinCoolingLevelAction(ThermalPolicyActionBase):
@@ -174,8 +174,6 @@ def execute(self, thermal_info_dict):
174174
Fan.set_cooling_level(Fan.min_cooling_level, current_cooling_level)
175175
UpdateCoolingLevelToMinAction.update_cooling_level_to_minimum(thermal_info_dict)
176176

177-
logger.log_info('Changed minimum cooling level to {}'.format(Fan.min_cooling_level))
178-
179177

180178
class UpdatePsuFanSpeedAction(ThermalPolicyActionBase):
181179
def execute(self, thermal_info_dict):

0 commit comments

Comments
 (0)