diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py b/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py index f9875a296d3..4f5b6fe8e3c 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py @@ -16,6 +16,7 @@ from sonic_platform.fan import Fan from sonic_platform.fan import FAN_PATH from sonic_platform.sfp import SFP + from sonic_platform.thermal import Thermal, initialize_thermals from sonic_platform.watchdog import get_watchdog from sonic_daemon_base.daemon_base import Logger from eeprom import Eeprom @@ -69,7 +70,7 @@ # magic code defnition for port number, qsfp port position of each hwsku # port_position_tuple = (PORT_START, QSFP_PORT_START, PORT_END, PORT_IN_BLOCK, EEPROM_OFFSET) -hwsku_dict = {'ACS-MSN2700': 0, "LS-SN2700":0, 'ACS-MSN2740': 0, 'ACS-MSN2100': 1, 'ACS-MSN2410': 2, 'ACS-MSN2010': 3, 'ACS-MSN3700': 0, 'ACS-MSN3700C': 0, 'Mellanox-SN2700': 0, 'Mellanox-SN2700-D48C8': 0} +hwsku_dict_port = {'ACS-MSN2700': 0, "LS-SN2700":0, 'ACS-MSN2740': 0, 'ACS-MSN2100': 1, 'ACS-MSN2410': 2, 'ACS-MSN2010': 3, 'ACS-MSN3700': 0, 'ACS-MSN3700C': 0, 'Mellanox-SN2700': 0, 'Mellanox-SN2700-D48C8': 0} port_position_tuple_list = [(0, 0, 31, 32, 1), (0, 0, 15, 16, 1), (0, 48, 55, 56, 1),(0, 18, 21, 22, 1)] class Chassis(ChassisBase): @@ -98,8 +99,11 @@ def __init__(self): fan = Fan(index, index) self._fan_list.append(fan) + # Initialize SKU name + self.sku = self._get_sku_name() + # Initialize SFP list - port_position_tuple = self._get_port_position_tuple_by_sku_name() + port_position_tuple = self._get_port_position_tuple_by_sku_name(self.sku) self.PORT_START = port_position_tuple[0] self.QSFP_PORT_START = port_position_tuple[1] self.PORT_END = port_position_tuple[2] @@ -112,6 +116,9 @@ def __init__(self): sfp_module = SFP(index, 'SFP') self._sfp_list.append(sfp_module) + # Initialize thermals + initialize_thermals(self.sku, self._thermal_list, self._psu_list) + # Initialize EEPROM self.eeprom = Eeprom() @@ -137,10 +144,13 @@ def _extract_num_of_fans_and_fan_drawers(self): return num_of_fan, num_of_drawer - def _get_port_position_tuple_by_sku_name(self): + def _get_sku_name(self): p = subprocess.Popen(GET_HWSKU_CMD, shell=True, stdout=subprocess.PIPE) out, err = p.communicate() - position_tuple = port_position_tuple_list[hwsku_dict[out.rstrip('\n')]] + return out.rstrip('\n') + + def _get_port_position_tuple_by_sku_name(self, sku): + position_tuple = port_position_tuple_list[hwsku_dict_port[self.sku]] return position_tuple def get_base_mac(self): diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py b/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py index bcbd643eb00..1ea3d7a13f9 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/psu.py @@ -12,10 +12,15 @@ try: from sonic_platform_base.psu_base import PsuBase + from sonic_daemon_base.daemon_base import Logger from sonic_platform.fan import Fan except ImportError as e: raise ImportError (str(e) + "- required module not found") +# Global logger class instance +SYSLOG_IDENTIFIER = "mlnx-psu" +logger = Logger(SYSLOG_IDENTIFIER) + psu_list = [] class Psu(PsuBase): @@ -26,30 +31,53 @@ def __init__(self, psu_index): # PSU is 1-based on Mellanox platform self.index = psu_index + 1 psu_list.append(self.index) - self.psu_path = "/var/run/hw-management/thermal/" - self.psu_oper_status = "psu{}_pwr_status".format(self.index) - self.psu_presence = "psu{}_status".format(self.index) - if os.path.exists(os.path.join(self.psu_path, self.psu_presence)): - self.presence_file_exists = True + self.psu_path = "/var/run/hw-management/" + self.psu_oper_status = "thermal/psu{}_pwr_status".format(self.index) + psu_voltage = "power/psu{}_volt".format(self.index) + if os.path.exists(os.path.join(self.psu_path, psu_voltage)): + self.psu_voltage = psu_voltage + else: + self.psu_voltage = None + psu_curr = "power/psu{}_curr".format(self.index) + if os.path.exists(os.path.join(self.psu_path, psu_curr)): + self.psu_current = psu_curr + else: + self.psu_current = None + psu_power = "power/psu{}_power".format(self.index) + if os.path.exists(os.path.join(self.psu_path, psu_power)): + self.psu_power = psu_power + else: + self.psu_power = None + psu_presence = "thermal/psu{}_status".format(self.index) + if os.path.exists(os.path.join(self.psu_path, psu_presence)): + self.psu_presence = psu_presence else: - self.presence_file_exists = False + self.psu_presence = None fan = Fan(psu_index, psu_index, True) if fan.get_presence(): self._fan = fan - def get_status(self): + def _read_generic_file(self, filename, len): + """ + Read a generic file, returns the contents of the file + """ + result = 0 + try: + with open(filename, 'r') as fileobj: + result = int(fileobj.read()) + except: + logger.log_info("Fail to read file {}, maybe it doesn't exist".format(filename)) + result = 0 + return result + + def get_powergood_status(self): """ Retrieves the operational status of power supply unit (PSU) defined Returns: bool: True if PSU is operating properly, False if not """ - status = 0 - try: - with open(os.path.join(self.psu_path, self.psu_oper_status), 'r') as power_status: - status = int(power_status.read()) - except (ValueError, IOError): - status = 0 + status = self._read_generic_file(os.path.join(self.psu_path, self.psu_oper_status), 0) return status == 1 @@ -60,15 +88,48 @@ def get_presence(self): Returns: bool: True if PSU is present, False if not """ - status = 0 - if self.presence_file_exists: - try: - with open(os.path.join(self.psu_path, self.psu_presence), 'r') as presence_status: - status = int(presence_status.read()) - except (ValueError, IOError): - status = 0 + if self.psu_presence is not None: + status = self._read_generic_file(os.path.join(self.psu_path, self.psu_presence), 0) + return status == 1 else: - status = self.index in psu_list + return True - return status == 1 + def get_voltage(self): + """ + Retrieves current PSU voltage output + + Returns: + A float number, the output voltage in volts, + e.g. 12.1 + """ + if self.psu_voltage is not None: + voltage = self._read_generic_file(os.path.join(self.psu_path, self.psu_voltage), 0) + return float(voltage) / 1000 + else: + return None + + def get_current(self): + """ + Retrieves present electric current supplied by PSU + Returns: + A float number, the electric current in amperes, e.g 15.4 + """ + if self.psu_current is not None: + amperes = self._read_generic_file(os.path.join(self.psu_path, self.psu_current), 0) + return float(amperes) / 1000 + else: + return None + + def get_power(self): + """ + Retrieves current energy supplied by PSU + + Returns: + A float number, the power in watts, e.g. 302.6 + """ + if self.psu_power is not None: + power = self._read_generic_file(os.path.join(self.psu_path, self.psu_power), 0) + return float(power) / 1000000 + else: + return None diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py new file mode 100644 index 00000000000..3492765c850 --- /dev/null +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python + +############################################################################# +# Mellanox +# +# Module contains an implementation of SONiC Platform Base API and +# provides the thermals data which are available in the platform +# +############################################################################# + +import os.path + +try: + from sonic_platform_base.thermal_base import ThermalBase + from sonic_daemon_base.daemon_base import Logger + from os import listdir + from os.path import isfile, join + import io +except ImportError as e: + raise ImportError (str(e) + "- required module not found") + +# Global logger class instance +SYSLOG_IDENTIFIER = "mlnx-thermal" +logger = Logger(SYSLOG_IDENTIFIER) + +THERMAL_DEV_CATEGORY_CPU_CORE = "cpu_core" +THERMAL_DEV_CATEGORY_CPU_PACK = "cpu_pack" +THERMAL_DEV_CATEGORY_MODULE = "module" +THERMAL_DEV_CATEGORY_PSU = "psu" +THERMAL_DEV_CATEGORY_GEARBOX = "gearbox" +THERMAL_DEV_CATEGORY_AMBIENT = "ambient" + +THERMAL_DEV_ASIC_AMBIENT = "asic_amb" +THERMAL_DEV_FAN_AMBIENT = "fan_amb" +THERMAL_DEV_PORT_AMBIENT = "port_amb" +THERMAL_DEV_COMEX_AMBIENT = "comex_amb" +THERMAL_DEV_BOARD_AMBIENT = "board_amb" + +THERMAL_API_GET_TEMPERATURE = "get_temperature" +THERMAL_API_GET_HIGH_THRESHOLD = "get_high_threshold" + +HW_MGMT_THERMAL_ROOT = "/var/run/hw-management/thermal/" + +thermal_api_handler_cpu_core = { + THERMAL_API_GET_TEMPERATURE:"cpu_core{}", + THERMAL_API_GET_HIGH_THRESHOLD:"cpu_core{}_max" +} +thermal_api_handler_cpu_pack = { + THERMAL_API_GET_TEMPERATURE:"cpu_pack", + THERMAL_API_GET_HIGH_THRESHOLD:"cpu_pack_max" +} +thermal_api_handler_module = { + THERMAL_API_GET_TEMPERATURE:"temp_input_module{}", + THERMAL_API_GET_HIGH_THRESHOLD:"temp_crit_module{}" +} +thermal_api_handler_psu = { + THERMAL_API_GET_TEMPERATURE:"psu{}", + THERMAL_API_GET_HIGH_THRESHOLD:"psu{}_max" +} +thermal_api_handler_gearbox = { + THERMAL_API_GET_TEMPERATURE:None, + THERMAL_API_GET_HIGH_THRESHOLD:None +} +thermal_ambient_apis = { + THERMAL_DEV_ASIC_AMBIENT : "asic", + THERMAL_DEV_PORT_AMBIENT : "port_amb", + THERMAL_DEV_FAN_AMBIENT : "fan_amb", + THERMAL_DEV_COMEX_AMBIENT : "comex_amb", + THERMAL_DEV_BOARD_AMBIENT : "board_amb" +} +thermal_ambient_name = { + THERMAL_DEV_ASIC_AMBIENT : "Ambient ASIC Temp", + THERMAL_DEV_PORT_AMBIENT : "Ambient Port Side Temp", + THERMAL_DEV_FAN_AMBIENT : "Ambient Fan Side Temp", + THERMAL_DEV_COMEX_AMBIENT : "Ambient COMEX Temp", + THERMAL_DEV_BOARD_AMBIENT : "Ambient Board Temp" +} +thermal_api_handlers = { + THERMAL_DEV_CATEGORY_CPU_CORE : thermal_api_handler_cpu_core, + THERMAL_DEV_CATEGORY_CPU_PACK : thermal_api_handler_cpu_pack, + THERMAL_DEV_CATEGORY_MODULE : thermal_api_handler_module, + THERMAL_DEV_CATEGORY_PSU : thermal_api_handler_psu, + THERMAL_DEV_CATEGORY_GEARBOX : thermal_api_handler_gearbox +} +thermal_name = { + THERMAL_DEV_CATEGORY_CPU_CORE : "CPU Core {} Temp", + THERMAL_DEV_CATEGORY_CPU_PACK : "CPU Pack Temp", + THERMAL_DEV_CATEGORY_MODULE : "xSFP module {} Temp", + THERMAL_DEV_CATEGORY_PSU : "PSU-{} Temp", + THERMAL_DEV_CATEGORY_GEARBOX : "Gearbox {} Temp" +} + +thermal_device_categories_all = [ + THERMAL_DEV_CATEGORY_CPU_CORE, + THERMAL_DEV_CATEGORY_CPU_PACK, + THERMAL_DEV_CATEGORY_MODULE, + THERMAL_DEV_CATEGORY_PSU, + THERMAL_DEV_CATEGORY_AMBIENT, + THERMAL_DEV_CATEGORY_GEARBOX +] + +thermal_device_categories_singleton = [ + THERMAL_DEV_CATEGORY_CPU_PACK, + THERMAL_DEV_CATEGORY_AMBIENT +] +thermal_api_names = [ + THERMAL_API_GET_TEMPERATURE, + THERMAL_API_GET_HIGH_THRESHOLD +] + +hwsku_dict_thermal = {'ACS-MSN2700': 0, "LS-SN2700":0, 'ACS-MSN2740': 3, 'ACS-MSN2100': 1, 'ACS-MSN2410': 2, 'ACS-MSN2010': 4, 'ACS-MSN3700': 5, 'ACS-MSN3700C': 6, 'Mellanox-SN2700': 0, 'Mellanox-SN2700-D48C8': 0, 'ACS-MSN3800': 7} +thermal_profile_list = [ + # 2700 + { + THERMAL_DEV_CATEGORY_CPU_CORE:(0, 2), + THERMAL_DEV_CATEGORY_MODULE:(1, 32), + THERMAL_DEV_CATEGORY_PSU:(1, 2), + THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), + THERMAL_DEV_CATEGORY_GEARBOX:(0,0), + THERMAL_DEV_CATEGORY_AMBIENT:(0, + [ + THERMAL_DEV_ASIC_AMBIENT, + THERMAL_DEV_PORT_AMBIENT, + THERMAL_DEV_FAN_AMBIENT + ] + ) + }, + # 2100 + { + THERMAL_DEV_CATEGORY_CPU_CORE:(0, 4), + THERMAL_DEV_CATEGORY_MODULE:(1, 16), + THERMAL_DEV_CATEGORY_PSU:(0, 0), + THERMAL_DEV_CATEGORY_CPU_PACK:(0,0), + THERMAL_DEV_CATEGORY_GEARBOX:(0,0), + THERMAL_DEV_CATEGORY_AMBIENT:(0, + [ + THERMAL_DEV_ASIC_AMBIENT, + THERMAL_DEV_PORT_AMBIENT, + THERMAL_DEV_FAN_AMBIENT, + ] + ) + }, + # 2410 + { + THERMAL_DEV_CATEGORY_CPU_CORE:(0, 2), + THERMAL_DEV_CATEGORY_MODULE:(1, 56), + THERMAL_DEV_CATEGORY_PSU:(1, 2), + THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), + THERMAL_DEV_CATEGORY_GEARBOX:(0,0), + THERMAL_DEV_CATEGORY_AMBIENT:(0, + [ + THERMAL_DEV_ASIC_AMBIENT, + THERMAL_DEV_PORT_AMBIENT, + THERMAL_DEV_FAN_AMBIENT, + ] + ) + }, + # 2740 + { + THERMAL_DEV_CATEGORY_CPU_CORE:(0, 4), + THERMAL_DEV_CATEGORY_MODULE:(1, 32), + THERMAL_DEV_CATEGORY_PSU:(1, 2), + THERMAL_DEV_CATEGORY_CPU_PACK:(0,0), + THERMAL_DEV_CATEGORY_GEARBOX:(0,0), + THERMAL_DEV_CATEGORY_AMBIENT:(0, + [ + THERMAL_DEV_ASIC_AMBIENT, + THERMAL_DEV_PORT_AMBIENT, + THERMAL_DEV_FAN_AMBIENT, + ] + ) + }, + # 2010 + { + THERMAL_DEV_CATEGORY_CPU_CORE:(0, 4), + THERMAL_DEV_CATEGORY_MODULE:(1, 22), + THERMAL_DEV_CATEGORY_PSU:(0, 0), + THERMAL_DEV_CATEGORY_CPU_PACK:(0,0), + THERMAL_DEV_CATEGORY_GEARBOX:(0,0), + THERMAL_DEV_CATEGORY_AMBIENT:(0, + [ + THERMAL_DEV_ASIC_AMBIENT, + THERMAL_DEV_PORT_AMBIENT, + THERMAL_DEV_FAN_AMBIENT, + ] + ) + }, + # 3700 + { + THERMAL_DEV_CATEGORY_CPU_CORE:(0, 4), + THERMAL_DEV_CATEGORY_MODULE:(1, 32), + THERMAL_DEV_CATEGORY_PSU:(1, 2), + THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), + THERMAL_DEV_CATEGORY_GEARBOX:(0,0), + THERMAL_DEV_CATEGORY_AMBIENT:(0, + [ + THERMAL_DEV_ASIC_AMBIENT, + THERMAL_DEV_COMEX_AMBIENT, + THERMAL_DEV_PORT_AMBIENT, + THERMAL_DEV_FAN_AMBIENT + ] + ) + }, + # 3700c + { + THERMAL_DEV_CATEGORY_CPU_CORE:(0, 2), + THERMAL_DEV_CATEGORY_MODULE:(1, 32), + THERMAL_DEV_CATEGORY_PSU:(1, 2), + THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), + THERMAL_DEV_CATEGORY_GEARBOX:(0,0), + THERMAL_DEV_CATEGORY_AMBIENT:(0, + [ + THERMAL_DEV_ASIC_AMBIENT, + THERMAL_DEV_COMEX_AMBIENT, + THERMAL_DEV_PORT_AMBIENT, + THERMAL_DEV_FAN_AMBIENT + ] + ) + }, + # 3800 + { + THERMAL_DEV_CATEGORY_CPU_CORE:(0, 4), + THERMAL_DEV_CATEGORY_MODULE:(1, 64), + THERMAL_DEV_CATEGORY_PSU:(1, 2), + THERMAL_DEV_CATEGORY_CPU_PACK:(0,1), + THERMAL_DEV_CATEGORY_GEARBOX:(0,0), + THERMAL_DEV_CATEGORY_AMBIENT:(0, + [ + THERMAL_DEV_ASIC_AMBIENT, + THERMAL_DEV_COMEX_AMBIENT, + THERMAL_DEV_PORT_AMBIENT, + THERMAL_DEV_FAN_AMBIENT + ] + ) + }, +] + +def initialize_thermals(sku, thermal_list, psu_list): + tp_index = hwsku_dict_thermal[sku] + thermal_profile = thermal_profile_list[tp_index] + for category in thermal_device_categories_all: + if category == THERMAL_DEV_CATEGORY_AMBIENT: + count, ambient_list = thermal_profile[category] + for ambient in ambient_list: + thermal = Thermal(category, ambient, True) + thermal_list.append(thermal) + else: + start, count = 0, 0 + if category in thermal_profile: + start, count = thermal_profile[category] + if count == 0: + continue + if count == 1: + thermal = Thermal(category, 0, False) + thermal_list.append(thermal) + else: + if category == THERMAL_DEV_CATEGORY_PSU: + for index in range(count): + thermal = Thermal(category, start + index, True, psu_list[index]) + thermal_list.append(thermal) + else: + for index in range(count): + thermal = Thermal(category, start + index, True) + thermal_list.append(thermal) + +class Thermal(ThermalBase): + def __init__(self, category, index, has_index, dependency = None): + """ + index should be a string for category ambient and int for other categories + """ + if category == THERMAL_DEV_CATEGORY_AMBIENT: + self.name = thermal_ambient_name[index] + self.index = index + elif has_index: + self.name = thermal_name[category].format(index) + self.index = index + else: + self.name = thermal_name[category] + self.index = 0 + + self.category = category + self.temperature = self._get_file_from_api(THERMAL_API_GET_TEMPERATURE) + self.high_threshold = self._get_file_from_api(THERMAL_API_GET_HIGH_THRESHOLD) + self.dependency = dependency + + def get_name(self): + """ + Retrieves the name of the device + + Returns: + string: The name of the device + """ + return self.name + + def _read_generic_file(self, filename, len): + """ + Read a generic file, returns the contents of the file + """ + result = None + try: + with open(filename, 'r') as fileobj: + result = fileobj.read() + except: + if self.dependency is None or self.dependency.get_powergood_status(): + logger.log_warning("Fail to read file {}, maybe it doesn't exist".format(filename)) + result = None + return result + + def _get_file_from_api(self, api_name): + if self.category == THERMAL_DEV_CATEGORY_AMBIENT: + if api_name == THERMAL_API_GET_TEMPERATURE: + filename = thermal_ambient_apis[self.index] + else: + return None + else: + handler = thermal_api_handlers[self.category][api_name] + if self.category in thermal_device_categories_singleton: + filename = handler + else: + filename = handler.format(self.index) + return join(HW_MGMT_THERMAL_ROOT, filename) + + def get_temperature(self): + """ + Retrieves current temperature reading from thermal + + Returns: + A float number of current temperature in Celsius up to nearest thousandth + of one degree Celsius, e.g. 30.125 + """ + value_str = self._read_generic_file(self.temperature, 0) + if value_str is None: + return None + value_float = float(value_str) + return value_float / 1000.0 + + def get_high_threshold(self): + """ + Retrieves the high threshold temperature of thermal + + Returns: + A float number, the high threshold temperature of thermal in Celsius + up to nearest thousandth of one degree Celsius, e.g. 30.125 + """ + if self.high_threshold is None: + return None + value_str = self._read_generic_file(self.high_threshold, 0) + if value_str is None: + return None + value_float = float(value_str) + return value_float / 1000.0