Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 163 additions & 1 deletion sonic_platform_base/module_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,18 @@
"""

import sys
import os
import fcntl
from . import device_base
import json
import threading
import contextlib
import shutil

# PCI state database constants
PCIE_DETACH_INFO_TABLE = "PCIE_DETACH_INFO"
PCIE_OPERATION_DETACHING = "detaching"
PCIE_OPERATION_ATTACHING = "attaching"

class ModuleBase(device_base.DeviceBase):
"""
Expand All @@ -16,6 +26,7 @@ class ModuleBase(device_base.DeviceBase):
"""
# Device type definition. Note, this is a constant.
DEVICE_TYPE = "module"
PCI_OPERATION_LOCK_FILE_PATH = "/var/lock/{}_pci.lock"

# Possible card types for modular chassis
MODULE_TYPE_SUPERVISOR = "SUPERVISOR"
Expand Down Expand Up @@ -73,6 +84,8 @@ def __init__(self):
self._thermal_list = []
self._voltage_sensor_list = []
self._current_sensor_list = []
self.state_db_connector = None
self.pci_bus_info = None

# List of SfpBase-derived objects representing all sfps
# available on the module
Expand All @@ -81,6 +94,17 @@ def __init__(self):
# List of ASIC-derived objects representing all ASICs
# visibile in PCI domain on the module
self._asic_list = []

@contextlib.contextmanager
def _pci_operation_lock(self):
"""File-based lock for PCI operations using flock"""
lock_file_path = self.PCI_OPERATION_LOCK_FILE_PATH.format(self.get_name())
with open(lock_file_path, 'w') as f:
try:
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
yield
finally:
fcntl.flock(f.fileno(), fcntl.LOCK_UN)

def get_base_mac(self):
"""
Expand Down Expand Up @@ -271,10 +295,70 @@ def get_pci_bus_info(self):
Retrieves the bus information.

Returns:
Returns the PCI bus information in BDF format like "[DDDD:]BB:SS:F"
Returns the PCI bus information in list of BDF format like "[DDDD:]BB:SS:F"
"""
raise NotImplementedError

def handle_pci_removal(self):
"""
Handles PCI device removal by updating state database and detaching device.

Returns:
bool: True if operation was successful, False otherwise
"""
try:
bus_info_list = self.get_pci_bus_info()
with self._pci_operation_lock():
for bus in bus_info_list:
self.pci_entry_state_db(bus, PCIE_OPERATION_DETACHING)
return self.pci_detach()
except Exception as e:
sys.stderr.write("Failed to handle PCI removal: {}\n".format(str(e)))
return False

def pci_entry_state_db(self, pcie_string, operation):
"""
Generic function to handle PCI device state database entry.

Args:
pcie_string (str): The PCI bus string to be written to state database
operation (str): The operation being performed ("detaching" or "attaching")

Raises:
RuntimeError: If state database connection fails
"""
try:
# Do not use import if swsscommon is not needed
import swsscommon
PCIE_DETACH_INFO_TABLE_KEY = PCIE_DETACH_INFO_TABLE+"|"+pcie_string
if not self.state_db_connector:
self.state_db_connector = swsscommon.swsscommon.DBConnector("STATE_DB", 0)
if operation == PCIE_OPERATION_ATTACHING:
self.state_db_connector.delete(PCIE_DETACH_INFO_TABLE_KEY)
return
self.state_db_connector.hset(PCIE_DETACH_INFO_TABLE_KEY, "bus_info", pcie_string)
self.state_db_connector.hset(PCIE_DETACH_INFO_TABLE_KEY, "dpu_state", operation)
except Exception as e:
sys.stderr.write("Failed to write pcie bus info to state database: {}\n".format(str(e)))

def handle_pci_rescan(self):
"""
Handles PCI device rescan by updating state database and reattaching device.

Returns:
bool: True if operation was successful, False otherwise
"""
try:
bus_info_list = self.get_pci_bus_info()
with self._pci_operation_lock():
return_value = self.pci_reattach()
for bus in bus_info_list:
self.pci_entry_state_db(bus, PCIE_OPERATION_ATTACHING)
return return_value
except Exception as e:
sys.stderr.write("Failed to handle PCI rescan: {}\n".format(str(e)))
return False

def pci_detach(self):
"""
Detaches the PCI device.
Expand Down Expand Up @@ -687,3 +771,81 @@ def get_all_asics(self):
And '0000:05:00.0' is its PCI address.
"""
return self._asic_list

def handle_sensor_removal(self):
"""
Handles sensor removal by copying ignore configuration file from platform folder
to sensors.d directory and restarting sensord if the file exists.

Returns:
bool: True if operation was successful, False otherwise
"""
try:
module_name = self.get_name()
source_file = f"/usr/share/sonic/platform/module_sensors_ignore_conf/ignore_sensors_{module_name}.conf"
target_file = f"/etc/sensors.d/ignore_sensors_{module_name}.conf"

# If source file does not exist, we dont need to copy it and restart sensord
if not os.path.exists(source_file):
return True

shutil.copy2(source_file, target_file)

# Restart sensord
os.system("service sensord restart")

return True
except Exception as e:
sys.stderr.write("Failed to handle sensor removal: {}\n".format(str(e)))
return False

def handle_sensor_addition(self):
"""
Handles sensor addition by removing the ignore configuration file from
sensors.d directory and restarting sensord.

Returns:
bool: True if operation was successful, False otherwise
"""
try:
module_name = self.get_name()
target_file = f"/etc/sensors.d/ignore_sensors_{module_name}.conf"

# If target file does not exist, we dont need to remove it and restart sensord
if not os.path.exists(target_file):
return True

# Remove the file
os.remove(target_file)

# Restart sensord
os.system("service sensord restart")

return True
except Exception as e:
sys.stderr.write("Failed to handle sensor addition: {}\n".format(str(e)))
return False

def module_pre_shutdown(self):
"""
Handles module pre-shutdown operations by detaching PCI devices and handling sensor removal.
This function should be called before shutting down a module.

Returns:
bool: True if all operations were successful, False otherwise
"""
sensor_result = self.handle_sensor_removal()
pci_result = self.handle_pci_removal()
return pci_result and sensor_result

def module_post_startup(self):
"""
Handles module post-startup operations by reattaching PCI devices and handling sensor addition.
This function should be called after a module has started up.

Returns:
bool: True if all operations were successful, False otherwise
"""
pci_result = self.handle_pci_rescan()
sensor_result = self.handle_sensor_addition()
return pci_result and sensor_result
180 changes: 180 additions & 0 deletions tests/module_base_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,35 @@
from sonic_platform_base.module_base import ModuleBase
import pytest
import json
import os
import fcntl
from unittest.mock import patch, MagicMock, call
from io import StringIO
import shutil

class MockFile:
def __init__(self, data=None):
self.data = data
self.written_data = None
self.closed = False
self.fileno_called = False

def __enter__(self):
return self

def __exit__(self, *args):
self.closed = True

def read(self):
return self.data

def write(self, data):
self.written_data = data

def fileno(self):
self.fileno_called = True
return 123


class TestModuleBase:

Expand Down Expand Up @@ -39,3 +70,152 @@ def test_sensors(self):
assert(module.get_all_current_sensors() == ["s1"])
assert(module.get_current_sensor(0) == "s1")

def test_pci_entry_state_db(self):
module = ModuleBase()
mock_connector = MagicMock()
module.state_db_connector = mock_connector

module.pci_entry_state_db("0000:00:00.0", "detaching")
mock_connector.hset.assert_has_calls([
call("PCIE_DETACH_INFO|0000:00:00.0", "bus_info", "0000:00:00.0"),
call("PCIE_DETACH_INFO|0000:00:00.0", "dpu_state", "detaching")
])

module.pci_entry_state_db("0000:00:00.0", "attaching")
mock_connector.delete.assert_called_with("PCIE_DETACH_INFO|0000:00:00.0")

mock_connector.hset.side_effect = Exception("DB Error")
module.pci_entry_state_db("0000:00:00.0", "detaching")

def test_pci_operation_lock(self):
module = ModuleBase()
mock_file = MockFile()

with patch('builtins.open', return_value=mock_file) as mock_file_open, \
patch('fcntl.flock') as mock_flock, \
patch.object(module, 'get_name', return_value="DPU0"), \
patch('os.makedirs') as mock_makedirs:

with module._pci_operation_lock():
mock_flock.assert_called_with(123, fcntl.LOCK_EX)

mock_flock.assert_has_calls([
call(123, fcntl.LOCK_EX),
call(123, fcntl.LOCK_UN)
])
assert mock_file.fileno_called

def test_handle_pci_removal(self):
module = ModuleBase()

with patch.object(module, 'get_pci_bus_info', return_value=["0000:00:00.0"]), \
patch.object(module, 'pci_entry_state_db') as mock_db, \
patch.object(module, 'pci_detach', return_value=True), \
patch.object(module, '_pci_operation_lock') as mock_lock, \
patch.object(module, 'get_name', return_value="DPU0"):
assert module.handle_pci_removal() is True
mock_db.assert_called_with("0000:00:00.0", "detaching")
mock_lock.assert_called_once()

with patch.object(module, 'get_pci_bus_info', side_effect=Exception()):
assert module.handle_pci_removal() is False

def test_handle_pci_rescan(self):
module = ModuleBase()

with patch.object(module, 'get_pci_bus_info', return_value=["0000:00:00.0"]), \
patch.object(module, 'pci_entry_state_db') as mock_db, \
patch.object(module, 'pci_reattach', return_value=True), \
patch.object(module, '_pci_operation_lock') as mock_lock, \
patch.object(module, 'get_name', return_value="DPU0"):
assert module.handle_pci_rescan() is True
mock_db.assert_called_with("0000:00:00.0", "attaching")
mock_lock.assert_called_once()

with patch.object(module, 'get_pci_bus_info', side_effect=Exception()):
assert module.handle_pci_rescan() is False

def test_handle_sensor_removal(self):
module = ModuleBase()

with patch.object(module, 'get_name', return_value="DPU0"), \
patch('os.path.exists', return_value=True), \
patch('shutil.copy2') as mock_copy, \
patch('os.system') as mock_system:
assert module.handle_sensor_removal() is True
mock_copy.assert_called_once_with("/usr/share/sonic/platform/module_sensors_ignore_conf/ignore_sensors_DPU0.conf",
"/etc/sensors.d/ignore_sensors_DPU0.conf")
mock_system.assert_called_once_with("service sensord restart")

with patch.object(module, 'get_name', return_value="DPU0"), \
patch('os.path.exists', return_value=False), \
patch('shutil.copy2') as mock_copy, \
patch('os.system') as mock_system:
assert module.handle_sensor_removal() is True
mock_copy.assert_not_called()
mock_system.assert_not_called()

with patch.object(module, 'get_name', return_value="DPU0"), \
patch('os.path.exists', return_value=True), \
patch('shutil.copy2', side_effect=Exception("Copy failed")):
assert module.handle_sensor_removal() is False

def test_handle_sensor_addition(self):
module = ModuleBase()

with patch.object(module, 'get_name', return_value="DPU0"), \
patch('os.path.exists', return_value=True), \
patch('os.remove') as mock_remove, \
patch('os.system') as mock_system:
assert module.handle_sensor_addition() is True
mock_remove.assert_called_once_with("/etc/sensors.d/ignore_sensors_DPU0.conf")
mock_system.assert_called_once_with("service sensord restart")

with patch.object(module, 'get_name', return_value="DPU0"), \
patch('os.path.exists', return_value=False), \
patch('os.remove') as mock_remove, \
patch('os.system') as mock_system:
assert module.handle_sensor_addition() is True
mock_remove.assert_not_called()
mock_system.assert_not_called()

with patch.object(module, 'get_name', return_value="DPU0"), \
patch('os.path.exists', return_value=True), \
patch('os.remove', side_effect=Exception("Remove failed")):
assert module.handle_sensor_addition() is False

def test_module_pre_shutdown(self):
module = ModuleBase()

# Test successful case
with patch.object(module, 'handle_pci_removal', return_value=True), \
patch.object(module, 'handle_sensor_removal', return_value=True):
assert module.module_pre_shutdown() is True

# Test PCI removal failure
with patch.object(module, 'handle_pci_removal', return_value=False), \
patch.object(module, 'handle_sensor_removal', return_value=True):
assert module.module_pre_shutdown() is False

# Test sensor removal failure
with patch.object(module, 'handle_pci_removal', return_value=True), \
patch.object(module, 'handle_sensor_removal', return_value=False):
assert module.module_pre_shutdown() is False

def test_module_post_startup(self):
module = ModuleBase()

# Test successful case
with patch.object(module, 'handle_pci_rescan', return_value=True), \
patch.object(module, 'handle_sensor_addition', return_value=True):
assert module.module_post_startup() is True

# Test PCI rescan failure
with patch.object(module, 'handle_pci_rescan', return_value=False), \
patch.object(module, 'handle_sensor_addition', return_value=True):
assert module.module_post_startup() is False

# Test sensor addition failure
with patch.object(module, 'handle_pci_rescan', return_value=True), \
patch.object(module, 'handle_sensor_addition', return_value=False):
assert module.module_post_startup() is False
Loading