diff --git a/device/mellanox/x86_64-nvidia_sn4280-r0/installer.conf b/device/mellanox/x86_64-nvidia_sn4280-r0/installer.conf index 776e1f6aa4d..c8322e67b15 100644 --- a/device/mellanox/x86_64-nvidia_sn4280-r0/installer.conf +++ b/device/mellanox/x86_64-nvidia_sn4280-r0/installer.conf @@ -1 +1 @@ -ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="libata.force=noncq module_blacklist=mlx5_ib,mlx5_core ima_hash=sha384 amd_iommu=off cpufreq.default_governor=performance" +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="libata.force=noncq ima_hash=sha384 amd_iommu=off cpufreq.default_governor=performance" diff --git a/device/mellanox/x86_64-nvidia_sn4280-r0/platform.json b/device/mellanox/x86_64-nvidia_sn4280-r0/platform.json index c9370bd6d7f..b7cd3c881f0 100644 --- a/device/mellanox/x86_64-nvidia_sn4280-r0/platform.json +++ b/device/mellanox/x86_64-nvidia_sn4280-r0/platform.json @@ -658,6 +658,7 @@ "baud-rate": "115200" }, "rshim_info": "rshim0", + "rshim_bus_info": "0000:08:00.1", "bus_info": "0000:08:00.0" }, "dpu1": { @@ -670,6 +671,7 @@ "baud-rate": "115200" }, "rshim_info": "rshim1", + "rshim_bus_info": "0000:07:00.1", "bus_info": "0000:07:00.0" }, "dpu2": { @@ -682,6 +684,7 @@ "baud-rate": "115200" }, "rshim_info": "rshim2", + "rshim_bus_info": "0000:01:00.1", "bus_info": "0000:01:00.0" }, "dpu3": { @@ -694,6 +697,7 @@ "baud-rate": "115200" }, "rshim_info": "rshim3", + "rshim_bus_info": "0000:02:00.1", "bus_info": "0000:02:00.0" } } diff --git a/files/build_templates/sonic_debian_extension.j2 b/files/build_templates/sonic_debian_extension.j2 index 579c165d9ce..ff62efd1909 100644 --- a/files/build_templates/sonic_debian_extension.j2 +++ b/files/build_templates/sonic_debian_extension.j2 @@ -1115,6 +1115,14 @@ sudo cp platform/mellanox/rshim/files/rshim@.service $FILESYSTEM_ROOT_USR_LIB_SY # Install dpumap script sudo install -m 755 platform/mellanox/smartswitch/dpumap.sh $FILESYSTEM_ROOT/usr/bin/dpumap.sh + +# Install dpu-udev-manager script +sudo install -m 755 platform/mellanox/smartswitch/dpu-udev-manager/dpu-udev-manager.sh $FILESYSTEM_ROOT/usr/bin/dpu-udev-manager.sh + +# Install dpu-udev-manager service +sudo install -m 644 platform/mellanox/smartswitch/dpu-udev-manager/dpu-udev-manager.service $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM/ +sudo LANG=C chroot $FILESYSTEM_ROOT systemctl enable dpu-udev-manager + {% endif %} {% if sonic_asic_platform == "nvidia-bluefield" %} diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py b/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py index d53e60c9f81..a758d08e3d2 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py @@ -32,6 +32,7 @@ class DpuInterfaceEnum(Enum): MIDPLANE_INT = "midplane_interface" RSHIM_INT = "rshim_info" PCIE_INT = "bus_info" + RSHIM_PCIE_INT = "rshim_bus_info" dpu_interface_values = [item.value for item in DpuInterfaceEnum] diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/dpuctlplat.py b/platform/mellanox/mlnx-platform-api/sonic_platform/dpuctlplat.py index 941c1aa6d92..6c42efc312e 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/dpuctlplat.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/dpuctlplat.py @@ -43,7 +43,6 @@ WAIT_FOR_SHTDN = 120 WAIT_FOR_DPU_READY = 180 -WAIT_FOR_PCI_DEV = 60 class OperationType(Enum): @@ -104,10 +103,7 @@ def __init__(self, dpu_name): self.shtdn_state = None self.dpu_ready_state = None self.setup_logger() - self.pci_dev_path = None - self.rshim_interface = None - # Use systemd dbus to execute start and stop rshim service - os.environ['DBUS_SESSION_BUS_ADDRESS'] = 'unix:path=/run/dbus/system_bus_socket' + self.pci_dev_path = [] self.verbosity = False def setup_logger(self, use_print=False): @@ -141,45 +137,11 @@ def run_cmd_output(self, cmd, raise_exception=True): def dpu_pre_shutdown(self): """Method to execute shutdown activities for the DPU""" - rshim_op = self.dpu_rshim_service_control("stop") - pci_rem_op = self.dpu_pci_remove() - return rshim_op and pci_rem_op + return self.dpu_pci_remove() def dpu_post_startup(self): """Method to execute all post startup activities for the DPU""" - pci_scan_op = self.dpu_pci_scan() - rshim_op = None - if self.wait_for_pci(): - rshim_op = self.dpu_rshim_service_control("start") - if rshim_op and pci_scan_op: - return True - return False - - def get_rshim_interface(self): - """Parse the rshim interface from platform.json, raise Runtime error if the device id is not available""" - if not self.rshim_interface: - interface_name = DeviceDataManager.get_dpu_interface(self.dpu_name, DpuInterfaceEnum.RSHIM_INT.value) - if not interface_name: - raise RuntimeError(f"Unable to Parse rshim information for {self.dpu_name} from Platform.json") - # rshim1 -> rshim@1 - self.rshim_interface = interface_name[:5] + "@" + interface_name[5:] - return self.rshim_interface - - def dpu_rshim_service_control(self, op): - """Start/Stop the RSHIM service for the current DPU""" - try: - rshim_cmd = ["dbus-send", "--dest=org.freedesktop.systemd1", "--type=method_call", - "--print-reply", "--reply-timeout=2000", - "/org/freedesktop/systemd1", - f"org.freedesktop.systemd1.Manager.{op.capitalize()}Unit", - f"string:{self.get_rshim_interface()}.service", - "string:replace"] - self.run_cmd_output(rshim_cmd) - # If command fails execution exception is raised , return true if control is still in try block - return True - except Exception as e: - self.log_error(f"Failed to {op} rshim!: {e}") - return False + return self.dpu_pci_scan() @contextmanager def get_open_fd(self, path, flag): @@ -190,31 +152,19 @@ def get_open_fd(self, path, flag): os.close(fd) def get_pci_dev_path(self): - """Parse the PCIE device ID from platform.json, raise Runtime error if the device id is not available""" - if not self.pci_dev_path: - pci_dev_id = DeviceDataManager.get_dpu_interface(self.dpu_name, DpuInterfaceEnum.PCIE_INT.value) - if not pci_dev_id: - raise RuntimeError(f"Unable to obtain pci device id for {self.dpu_name} from platform.json") - self.pci_dev_path = os.path.join(PCI_DEV_BASE, pci_dev_id, "remove") - return self.pci_dev_path + """Parse the PCIE devices ID from platform.json, raise Runtime error if the device id is not available""" + if self.pci_dev_path: + return self.pci_dev_path + + pci_dev_id = DeviceDataManager.get_dpu_interface(self.dpu_name, DpuInterfaceEnum.PCIE_INT.value) + rshim_pci_dev_id = DeviceDataManager.get_dpu_interface(self.dpu_name, DpuInterfaceEnum.RSHIM_PCIE_INT.value) + if not pci_dev_id or not rshim_pci_dev_id: + raise RuntimeError(f"Unable to obtain PCI device IDs for {self.dpu_name} from platform.json") + + self.pci_dev_path = [os.path.join(PCI_DEV_BASE, pci_dev_id), + os.path.join(PCI_DEV_BASE, rshim_pci_dev_id)] - def wait_for_pci(self): - """Wait for the PCI device folder in the PCI Path, required before starting rshim""" - try: - with self.get_open_fd(PCI_DEV_BASE, os.O_RDONLY) as dir_fd: - if os.path.exists(os.path.dirname(self.get_pci_dev_path())): - return True - poll_obj = poll() - poll_obj.register(dir_fd, POLLIN) - start = time.monotonic() - while (time.monotonic() - start) < WAIT_FOR_PCI_DEV: - events = poll_obj.poll(WAIT_FOR_PCI_DEV * 1000) - if events: - if os.path.exists(os.path.dirname(self.get_pci_dev_path())): - return True - return os.path.exists(os.path.dirname(self.get_pci_dev_path())) - except Exception as e: - self.log_error(f"Unable to wait for PCI device:{e}") + return self.pci_dev_path def write_file(self, file_name, content_towrite): """Write given value to file only if file exists""" @@ -297,7 +247,10 @@ def _power_on(self): def dpu_pci_remove(self): """Per DPU PCI remove API""" try: - self.write_file(self.get_pci_dev_path(), OperationType.SET.value) + for pci_dev_path in self.get_pci_dev_path(): + remove_path = os.path.join(pci_dev_path, "remove") + if os.path.exists(remove_path): + self.write_file(remove_path, OperationType.SET.value) return True except Exception: self.log_info(f"Failed PCI Removal!") diff --git a/platform/mellanox/mlnx-platform-api/tests/test_device_data.py b/platform/mellanox/mlnx-platform-api/tests/test_device_data.py index f34cb4e471e..76c370d0dd0 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_device_data.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_device_data.py @@ -120,7 +120,8 @@ def test_dpu_interface_data(self, mock_load_json): "Ethernet224": "Ethernet0" }, "rshim_info": "rshim0", - "bus_info": "0000:08:00.0" + "bus_info": "0000:08:00.0", + "rshim_bus_info": "0000:08:00.1" }, "dpu1": { "midplane_interface": "dpu1", @@ -128,7 +129,8 @@ def test_dpu_interface_data(self, mock_load_json): "Ethernet232": "Ethernet0" }, "rshim_info": "rshim1", - "bus_info": "0000:07:00.0" + "bus_info": "0000:07:00.0", + "rshim_bus_info": "0000:07:00.1" }, "dpu2": { "midplane_interface": "dpu2", @@ -136,7 +138,8 @@ def test_dpu_interface_data(self, mock_load_json): "Ethernet240": "Ethernet0" }, "rshim_info": "rshim2", - "bus_info": "0000:01:00.0" + "bus_info": "0000:01:00.0", + "rshim_bus_info": "0000:01:00.1" }, "dpu3": { "midplane_interface": "dpu3", @@ -144,7 +147,8 @@ def test_dpu_interface_data(self, mock_load_json): "Ethernet248": "Ethernet0" }, "rshim_info": "rshim3", - "bus_info": "0000:02:00.0" + "bus_info": "0000:02:00.0", + "rshim_bus_info": "0000:02:00.1" } } mock_load_json.return_value = mock_value diff --git a/platform/mellanox/mlnx-platform-api/tests/test_dpuctlplat.py b/platform/mellanox/mlnx-platform-api/tests/test_dpuctlplat.py index c31f810ecf7..325a1ed5016 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_dpuctlplat.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_dpuctlplat.py @@ -16,540 +16,345 @@ # limitations under the License. # -"""dpuctlplat Tests Implementation""" +"""Tests for dpuctlplat Platform API Wrapper""" import os import sys +import time import pytest -from sonic_platform.dpuctlplat import DpuCtlPlat, BootProgEnum, PCI_DEV_BASE, OperationType - +import subprocess from unittest.mock import MagicMock, patch, Mock, call +from sonic_platform.dpuctlplat import ( + DpuCtlPlat, BootProgEnum, PCI_DEV_BASE, OperationType, + WAIT_FOR_SHTDN, WAIT_FOR_DPU_READY +) + test_path = os.path.dirname(os.path.abspath(__file__)) modules_path = os.path.dirname(test_path) sys.path.insert(0, modules_path) scripts_path = os.path.join(modules_path, "scripts") +# Test data +TEST_DPU_LIST = ['dpu0', 'dpu1', 'dpu2', 'dpu3'] +TEST_PCI_PATH = os.path.join(PCI_DEV_BASE, "0000:08:00.0") +TEST_RSHIM_PCI_PATH = os.path.join(PCI_DEV_BASE, "0000:08:00.1") +TEST_PCI_REMOVE_PATH = os.path.join(TEST_PCI_PATH, "remove") +TEST_RSHIM_PCI_REMOVE_PATH = os.path.join(TEST_RSHIM_PCI_PATH, "remove") + +@pytest.fixture +def dpuctl_obj(): + """Fixture to create a DpuCtlPlat object for testing""" + obj = DpuCtlPlat('dpu0') + obj.setup_logger(True) + obj.pci_dev_path = [TEST_PCI_PATH, TEST_RSHIM_PCI_PATH] + return obj -def create_dpu_list(): - """Create dpu object list for Function calls""" - existing_dpu_list = ['dpu0', 'dpu1', 'dpu2', 'dpu3'] - dpuctl_list = [] - for dpu_name in existing_dpu_list: - dpuctl_obj = DpuCtlPlat(dpu_name) +class TestDpuCtlPlatInit: + """Tests for DpuCtlPlat initialization""" + + def test_init(self, dpuctl_obj): + """Test initialization of DpuCtlPlat object""" + assert dpuctl_obj.dpu_name == 'dpu0' + assert dpuctl_obj.dpu_id == 0 + assert dpuctl_obj._name == 'dpu1' # hwmgmt name is dpu index + 1 + assert dpuctl_obj.verbosity is False + assert isinstance(dpuctl_obj.boot_prog_map, dict) + assert len(dpuctl_obj.boot_prog_map) > 0 + assert len(dpuctl_obj.pci_dev_path) == 2 # Both PCI and RSHIM paths + + def test_setup_logger(self, dpuctl_obj): + """Test logger setup""" + # Test with print mode dpuctl_obj.setup_logger(True) - dpuctl_list.append(dpuctl_obj) - context = { - "dpuctl_list": dpuctl_list, - } - return context + assert dpuctl_obj.logger_info == print + assert dpuctl_obj.logger_error == print + assert dpuctl_obj.logger_debug == print + + # Test with syslogger mode + dpuctl_obj.setup_logger(False) + assert dpuctl_obj.logger_info != print + assert dpuctl_obj.logger_error != print + assert dpuctl_obj.logger_debug != print + def test_get_pci_dev_path(self, dpuctl_obj): + """Test PCI device path retrieval""" + # Test with both PCI and RSHIM paths + with patch('sonic_platform.device_data.DeviceDataManager.get_dpu_interface') as mock_get: + mock_get.side_effect = ["0000:08:00.0", "0000:08:00.1"] + paths = dpuctl_obj.get_pci_dev_path() + assert len(paths) == 2 + assert paths[0].endswith("0000:08:00.0") + assert paths[1].endswith("0000:08:00.1") -obj = create_dpu_list() + # Test with missing PCI path + with patch('sonic_platform.device_data.DeviceDataManager.get_dpu_interface') as mock_get: + mock_get.side_effect = [None, "0000:08:00.1"] + dpuctl_obj.pci_dev_path = [] + with pytest.raises(RuntimeError) as exc: + dpuctl_obj.get_pci_dev_path() + assert "Unable to obtain PCI device IDs" in str(exc.value) -rshim_interface = "rshim@0" -pci_dev_path = os.path.join(PCI_DEV_BASE, "0000:08:00.0", 'remove') + # Test with missing RSHIM path + with patch('sonic_platform.device_data.DeviceDataManager.get_dpu_interface') as mock_get: + mock_get.side_effect = ["0000:08:00.0", None] + with pytest.raises(RuntimeError) as exc: + dpuctl_obj.get_pci_dev_path() + assert "Unable to obtain PCI device IDs" in str(exc.value) +class TestDpuCtlPlatPCI: + """Tests for PCI-related functionality""" + + def test_pci_operations(self, dpuctl_obj): + """Test PCI remove and scan operations""" + written_data = [] + def mock_write_file(file_name, content_towrite): + written_data.append({"file": file_name, "data": content_towrite}) + return True + + # Test PCI remove - should remove both devices + with patch.object(dpuctl_obj, 'write_file', wraps=mock_write_file), \ + patch('os.path.exists', return_value=True): + assert dpuctl_obj.dpu_pci_remove() + assert len(written_data) == 2 + assert written_data[0]["file"] == TEST_PCI_REMOVE_PATH + assert written_data[0]["data"] == "1" + assert written_data[1]["file"] == TEST_RSHIM_PCI_REMOVE_PATH + assert written_data[1]["data"] == "1" -class TestDpuClass: - """Tests for dpuctl Platform API Wrapper""" - @classmethod - def setup_class(cls): - """Setup function for all tests for dpuctl implementation""" - os.environ["PATH"] += os.pathsep + scripts_path - os.environ["MLNX_PLATFORM_API_DPUCTL_UNIT_TESTING"] = "2" - dpuctl_obj = obj["dpuctl_list"][0] - dpuctl_obj.rshim_interface = rshim_interface - dpuctl_obj.pci_dev_path = pci_dev_path + # Test PCI scan - should scan devices + written_data.clear() + with patch.object(dpuctl_obj, 'write_file', wraps=mock_write_file): + assert dpuctl_obj.dpu_pci_scan() + assert written_data[0]["file"].endswith("rescan") + assert written_data[0]["data"] == "1" + +class TestDpuCtlPlatPower: + """Tests for power management functionality""" @patch('os.path.exists', MagicMock(return_value=True)) - @patch('multiprocessing.Process.start', MagicMock(return_value=True)) - @patch('multiprocessing.Process.is_alive', MagicMock(return_value=False)) @patch('sonic_platform.inotify_helper.InotifyHelper.wait_watch') @patch('sonic_platform.inotify_helper.InotifyHelper.__init__') - @patch('subprocess.check_output', MagicMock(return_value=True)) - def test_power_off(self, mock_inotify, mock_add_watch): - """Tests for Per DPU Power Off function""" - dpuctl_obj = obj["dpuctl_list"][0] - mock_inotify.return_value = None - mock_add_watch.return_value = True + def test_power_off(self, mock_inotify_init, mock_wait_watch, dpuctl_obj): + """Test power off functionality""" + mock_inotify_init.return_value = None + mock_wait_watch.return_value = True written_data = [] def mock_write_file(file_name, content_towrite): - written_data.append({"file": file_name, - "data": content_towrite}) + written_data.append({"file": file_name, "data": content_towrite}) return True + + # Test force power off with patch.object(dpuctl_obj, 'write_file', wraps=mock_write_file), \ - patch.object(dpuctl_obj, 'read_boot_prog', MagicMock(return_value=BootProgEnum.OS_RUN.value)): + patch.object(dpuctl_obj, 'read_boot_prog', return_value=BootProgEnum.OS_RUN.value): assert dpuctl_obj.dpu_power_off(True) - print(f"{written_data}") - assert written_data[0]["file"].endswith(f"{pci_dev_path}") - assert "1" == written_data[0]["data"] - assert "0" == written_data[1]["data"] - assert written_data[1]["file"].endswith( - f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "0" == written_data[2]["data"] - assert written_data[2]["file"].endswith( - f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - written_data = [] - assert dpuctl_obj.dpu_power_off(False) - assert mock_inotify.call_args.args[0].endswith( - f"{dpuctl_obj.get_hwmgmt_name()}_shtdn_ready") - assert written_data[0]["file"].endswith( - f"{pci_dev_path}") - assert "1" == written_data[0]["data"] - assert written_data[1]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "0" == written_data[1]["data"] - assert written_data[2]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr") - assert "0" == written_data[2]["data"] - written_data = [] - mock_add_watch.return_value = None + assert len(written_data) == 4 # Both PCI and RSHIM removals + rst + pwr_force + assert written_data[0]["file"] == TEST_PCI_REMOVE_PATH + assert written_data[0]["data"] == "1" + assert written_data[1]["file"] == TEST_RSHIM_PCI_REMOVE_PATH + assert written_data[1]["data"] == "1" + assert written_data[2]["data"] == "0" # rst + assert written_data[3]["data"] == "0" # pwr_force + + # Test normal power off + written_data.clear() + with patch.object(dpuctl_obj, 'write_file', wraps=mock_write_file), \ + patch.object(dpuctl_obj, 'read_boot_prog', return_value=BootProgEnum.OS_RUN.value): assert dpuctl_obj.dpu_power_off(False) - assert mock_inotify.call_args.args[0].endswith( - f"{dpuctl_obj.get_hwmgmt_name()}_shtdn_ready") - assert written_data[0]["file"].endswith( - f"{pci_dev_path}") - assert "1" == written_data[0]["data"] - assert written_data[1]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "0" == written_data[1]["data"] - assert written_data[2]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "0" == written_data[2]["data"] - assert written_data[3]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - assert "0" == written_data[3]["data"] - # Test whether value of boot_progress skips power off - with patch.object(dpuctl_obj, 'read_boot_prog') as mock_boot_prog, \ - patch.object(dpuctl_obj, 'write_file', wraps=mock_write_file), \ - patch.object(dpuctl_obj, '_power_off_force') as mock_power_off_force, \ - patch.object(dpuctl_obj, '_power_off') as mock_power_off, \ - patch.object(dpuctl_obj, 'log_info') as mock_obj: - mock_boot_prog.return_value = BootProgEnum.RST.value - mock_add_watch.return_value = True + assert len(written_data) == 4 # Both PCI and RSHIM removals + rst + pwr + assert written_data[0]["file"] == TEST_PCI_REMOVE_PATH + assert written_data[1]["file"] == TEST_RSHIM_PCI_REMOVE_PATH + assert written_data[2]["file"].endswith("_rst") + assert written_data[3]["file"].endswith("_pwr") + + # Test power off when already off + with patch.object(dpuctl_obj, 'read_boot_prog', return_value=BootProgEnum.RST.value), \ + patch.object(dpuctl_obj, 'log_info') as mock_log: assert dpuctl_obj.dpu_power_off(False) - assert mock_obj.call_args_list[1].args[0] == "Skipping DPU power off as DPU is already powered off" + assert "Skipping DPU power off as DPU is already powered off" in mock_log.call_args_list[-1].args[0] @patch('os.path.exists', MagicMock(return_value=True)) - @patch('multiprocessing.Process.start', MagicMock(return_value=True)) - @patch('multiprocessing.Process.is_alive', MagicMock(return_value=False)) @patch('sonic_platform.inotify_helper.InotifyHelper.wait_watch') @patch('sonic_platform.inotify_helper.InotifyHelper.__init__') - @patch('subprocess.check_output', MagicMock(return_value=True)) - def test_power_on(self, mock_inotify, mock_add_watch): - """Tests for Per DPU Power On function""" - dpuctl_obj = obj["dpuctl_list"][0] - mock_inotify.return_value = None - mock_add_watch.return_value = True + def test_power_on(self, mock_inotify_init, mock_wait_watch, dpuctl_obj): + """Test power on functionality""" + mock_inotify_init.return_value = None + mock_wait_watch.return_value = True written_data = [] def mock_write_file(file_name, content_towrite): - written_data.append({"file": file_name, - "data": content_towrite}) + written_data.append({"file": file_name, "data": content_towrite}) return True + + # Test force power on with patch.object(dpuctl_obj, 'write_file', wraps=mock_write_file), \ - patch.object(dpuctl_obj, 'wait_for_pci', wraps=MagicMock(return_value=None)), \ - patch.object(dpuctl_obj, 'dpu_rshim_service_control', wraps=MagicMock(return_value=None)), \ - patch.object(dpuctl_obj, 'read_boot_prog', wraps=MagicMock(return_value=BootProgEnum.RST.value)), \ - patch.object(dpuctl_obj, 'read_force_power_path') as mock_pwr_force_read: - mock_pwr_force_read.return_value = 1 + patch.object(dpuctl_obj, 'read_boot_prog', return_value=BootProgEnum.RST.value), \ + patch.object(dpuctl_obj, 'read_force_power_path', return_value=1): assert dpuctl_obj.dpu_power_on(True) - assert mock_inotify.call_args.args[0].endswith( - f"{dpuctl_obj.get_hwmgmt_name()}_ready") - assert written_data[0]["file"].endswith( - f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - assert "1" == written_data[0]["data"] - assert written_data[1]["file"].endswith( - f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "1" == written_data[1]["data"] - written_data = [] - assert dpuctl_obj.dpu_power_on(False) - assert written_data[0]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr") - assert "1" == written_data[0]["data"] - assert written_data[1]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "1" == written_data[1]["data"] - assert written_data[2]["file"].endswith(f"rescan") - assert "1" == written_data[2]["data"] - written_data = [] - mock_add_watch.return_value = None - assert not dpuctl_obj.dpu_power_on(False) - assert len(written_data) == 19 - assert written_data[0]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr") - assert "1" == written_data[0]["data"] - assert written_data[1]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "1" == written_data[1]["data"] - for i in range(4): - assert written_data[2 + 4 * i]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "0" == written_data[2 + 4 * i]["data"] - assert written_data[3 + 4 * i]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - assert "0" == written_data[3 + 4 * i]["data"] - assert written_data[4 + 4 * i]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - assert "1" == written_data[4 + 4 * i]["data"] - assert written_data[5 + 4 * i]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "1" == written_data[5 + 4 * i]["data"] - assert written_data[18]["file"].endswith(f"rescan") - assert "1" == written_data[18]["data"] - written_data = [] - mock_add_watch.return_value = True - mock_pwr_force_read.return_value = 0 - mock_inotify.reset_mock() + assert len(written_data) == 3 # pwr_force + rst + rescan + assert written_data[0]["file"].endswith("_pwr_force") + assert written_data[0]["data"] == "1" + assert written_data[1]["file"].endswith("_rst") + assert written_data[1]["data"] == "1" + assert written_data[2]["file"].endswith("rescan") + assert written_data[2]["data"] == "1" + + # Test normal power on + written_data.clear() + with patch.object(dpuctl_obj, 'write_file', wraps=mock_write_file), \ + patch.object(dpuctl_obj, 'read_boot_prog', return_value=BootProgEnum.RST.value), \ + patch.object(dpuctl_obj, 'read_force_power_path', return_value=1): assert dpuctl_obj.dpu_power_on(False) - assert mock_inotify.call_args.args[0].endswith( - f"{dpuctl_obj.get_hwmgmt_name()}_ready") - assert written_data[0]["file"].endswith( - f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - assert "1" == written_data[0]["data"] - assert written_data[1]["file"].endswith( - f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "1" == written_data[1]["data"] + assert len(written_data) == 3 # pwr + rst + rescan + assert written_data[0]["file"].endswith("_pwr") + assert written_data[1]["file"].endswith("_rst") + assert written_data[2]["file"].endswith("rescan") + +class TestDpuCtlPlatReboot: + """Tests for reboot functionality""" @patch('os.path.exists', MagicMock(return_value=True)) - @patch('multiprocessing.Process.start', MagicMock(return_value=None)) - @patch('multiprocessing.Process.is_alive', MagicMock(return_value=False)) @patch('sonic_platform.inotify_helper.InotifyHelper.wait_watch') @patch('sonic_platform.inotify_helper.InotifyHelper.__init__') - def test_dpu_reset(self, mock_inotify, mock_add_watch): - """Tests for Per DPU Reset function""" - dpuctl_obj = obj["dpuctl_list"][0] - mock_inotify.return_value = None - mock_add_watch.return_value = True + def test_reboot(self, mock_inotify_init, mock_wait_watch, dpuctl_obj): + """Test reboot functionality""" + mock_inotify_init.return_value = None + mock_wait_watch.return_value = True written_data = [] def mock_write_file(file_name, content_towrite): - written_data.append({"file": file_name, - "data": content_towrite}) + written_data.append({"file": file_name, "data": content_towrite}) return True + + # Test normal reboot with patch.object(dpuctl_obj, 'write_file', wraps=mock_write_file), \ - patch.object(dpuctl_obj, 'read_boot_prog', MagicMock(return_value=BootProgEnum.OS_RUN.value)), \ - patch.object(dpuctl_obj, 'dpu_rshim_service_control', wraps=MagicMock(return_value=None)): - dpuctl_obj.write_file = mock_write_file + patch.object(dpuctl_obj, 'read_boot_prog', return_value=BootProgEnum.OS_RUN.value): assert dpuctl_obj.dpu_reboot(False) - assert len(written_data) == 4 - assert written_data[0]["file"].endswith(f"{pci_dev_path}") - assert "1" == written_data[0]["data"] - assert written_data[1]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "0" == written_data[1]["data"] - assert written_data[2]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "1" == written_data[2]["data"] - assert written_data[3]["file"].endswith(f"rescan") - assert "1" == written_data[3]["data"] - assert mock_inotify.call_args.args[0].endswith( - f"{dpuctl_obj.get_hwmgmt_name()}_ready") - mock_add_watch.return_value = None - written_data = [] - assert not dpuctl_obj.dpu_reboot() - assert len(written_data) == 22 - assert written_data[0]["file"].endswith(f"{pci_dev_path}") - assert "1" == written_data[0]["data"] - assert written_data[1]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "0" == written_data[1]["data"] - assert written_data[2]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "0" == written_data[2]["data"] - assert written_data[3]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - assert "0" == written_data[3]["data"] - assert written_data[4]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "1" == written_data[4]["data"] - for i in range(4): - assert written_data[5 + 4 * i]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "0" == written_data[5 + 4 * i]["data"] - assert written_data[6 + 4 * i]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - assert "0" == written_data[6 + 4 * i]["data"] - assert written_data[7 + 4 * i]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - assert "1" == written_data[7 + 4 * i]["data"] - assert written_data[8 + 4 * i]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "1" == written_data[8 + 4 * i]["data"] - assert written_data[21]["file"].endswith(f"rescan") - assert "1" == written_data[21]["data"] - # Force Reboot - mock_inotify.reset_mock() - mock_add_watch.return_value = True - mock_inotify.return_value = None - written_data = [] + assert len(written_data) == 5 # Both PCI removals + rst + rst + rescan + assert written_data[0]["file"] == TEST_PCI_REMOVE_PATH + assert written_data[1]["file"] == TEST_RSHIM_PCI_REMOVE_PATH + assert written_data[2]["file"].endswith("_rst") + assert written_data[3]["file"].endswith("_rst") + assert written_data[4]["file"].endswith("rescan") + + # Test force reboot + written_data.clear() with patch.object(dpuctl_obj, 'write_file', wraps=mock_write_file), \ - patch.object(dpuctl_obj, 'read_boot_prog', MagicMock(return_value=BootProgEnum.OS_RUN.value)), \ - patch.object(dpuctl_obj, 'dpu_rshim_service_control', wraps=MagicMock(return_value=None)): - dpuctl_obj.write_file = mock_write_file + patch.object(dpuctl_obj, 'read_boot_prog', return_value=BootProgEnum.OS_RUN.value): assert dpuctl_obj.dpu_reboot(True) - mock_add_watch.return_value = None - assert len(written_data) == 6 - assert written_data[0]["file"].endswith(f"{pci_dev_path}") - assert "1" == written_data[0]["data"] - assert written_data[1]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "0" == written_data[1]["data"] - assert written_data[2]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - assert "0" == written_data[2]["data"] - assert written_data[3]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - assert "1" == written_data[3]["data"] - assert written_data[4]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "1" == written_data[4]["data"] - assert written_data[5]["file"].endswith(f"rescan") - assert "1" == written_data[5]["data"] - assert mock_inotify.call_args.args[0].endswith( - f"{dpuctl_obj.get_hwmgmt_name()}_ready") - mock_add_watch.return_value = None - written_data = [] - assert not dpuctl_obj.dpu_reboot(True) - assert len(written_data) == 18 - assert written_data[0]["file"].endswith(f"{pci_dev_path}") - assert "1" == written_data[0]["data"] - assert written_data[1]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "0" == written_data[1]["data"] - assert written_data[2]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - assert "0" == written_data[2]["data"] - assert written_data[3]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - assert "1" == written_data[3]["data"] - assert written_data[4]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "1" == written_data[4]["data"] - for i in range(3): - assert written_data[5 + 4 * i]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "0" == written_data[5 + 4 * i]["data"] - assert written_data[6 + 4 * i]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - assert "0" == written_data[6 + 4 * i]["data"] - assert written_data[7 + 4 * i]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_pwr_force") - assert "1" == written_data[7 + 4 * i]["data"] - assert written_data[8 + 4 * i]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "1" == written_data[8 + 4 * i]["data"] - assert written_data[17]["file"].endswith(f"rescan") - assert "1" == written_data[17]["data"] - written_data = [] - mock_inotify.reset_mock() - mock_add_watch.reset_mock() - mock_inotify.return_value = None - mock_add_watch.return_value = True + assert len(written_data) == 7 # Both PCI removals + rst + pwr_force + pwr_force + rst + rescan + assert written_data[0]["file"] == TEST_PCI_REMOVE_PATH + assert written_data[1]["file"] == TEST_RSHIM_PCI_REMOVE_PATH + assert written_data[2]["file"].endswith("_rst") + assert written_data[3]["file"].endswith("_pwr_force") + assert written_data[4]["file"].endswith("_pwr_force") + assert written_data[5]["file"].endswith("_rst") + assert written_data[6]["file"].endswith("rescan") + + # Test no-wait reboot + written_data.clear() with patch.object(dpuctl_obj, 'write_file', wraps=mock_write_file), \ - patch.object(dpuctl_obj, 'read_boot_prog', MagicMock(return_value=BootProgEnum.OS_RUN.value)), \ - patch.object(dpuctl_obj, 'dpu_rshim_service_control') as mock_rshim: - assert dpuctl_obj.dpu_reboot(forced=False, no_wait=True) - # Rshim service is only stopped and not started - mock_rshim.assert_called_once() - mock_rshim.call_args.args[0] == "stop" - assert written_data[0]["file"].endswith(f"{pci_dev_path}") - assert "1" == written_data[0]["data"] - assert written_data[1]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "0" == written_data[1]["data"] - assert written_data[2]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "1" == written_data[2]["data"] - mock_inotify.called_once() - mock_add_watch.called_once() - # Skip pre startup and post shutdown - written_data = [] + patch.object(dpuctl_obj, 'read_boot_prog', return_value=BootProgEnum.OS_RUN.value): + assert dpuctl_obj.dpu_reboot(no_wait=True) + assert len(written_data) == 4 # Both PCI removals + rst + rst + assert written_data[0]["file"] == TEST_PCI_REMOVE_PATH + assert written_data[1]["file"] == TEST_RSHIM_PCI_REMOVE_PATH + assert written_data[2]["file"].endswith("_rst") + assert written_data[3]["file"].endswith("_rst") + + # Test reboot with skip_pre_post=True + written_data.clear() with patch.object(dpuctl_obj, 'write_file', wraps=mock_write_file), \ - patch.object(dpuctl_obj, 'read_boot_prog', MagicMock(return_value=BootProgEnum.OS_START.value)), \ - patch.object(dpuctl_obj, 'dpu_rshim_service_control') as mock_rshim: + patch.object(dpuctl_obj, 'read_boot_prog', return_value=BootProgEnum.OS_RUN.value): assert dpuctl_obj.dpu_reboot(skip_pre_post=True) - mock_rshim.assert_not_called() - # We skip writing PCI data - assert written_data[0]["file"].endswith(f"{dpuctl_obj.get_hwmgmt_name()}_rst") - assert "0" == written_data[0]["data"] - assert not written_data[-1]["file"].endswith("rescan") - - def test_prog_update(self): - dpuctl_obj = obj["dpuctl_list"][0] + assert len(written_data) == 2 # Only rst operations + assert all("_rst" in data["file"] for data in written_data) + assert not any("remove" in data["file"] for data in written_data) + assert not any("rescan" in data["file"] for data in written_data) + +class TestDpuCtlPlatUtils: + """Tests for utility functions""" + + def test_run_cmd_output(self, dpuctl_obj): + """Test command execution and error handling""" + # Test successful command + with patch('subprocess.check_output') as mock_cmd: + mock_cmd.return_value = b"success\n" + assert dpuctl_obj.run_cmd_output(["test"]) == "success" + + # Test failed command with exception + with patch('subprocess.check_output') as mock_cmd: + mock_cmd.side_effect = subprocess.CalledProcessError(1, "test") + with pytest.raises(subprocess.CalledProcessError): + dpuctl_obj.run_cmd_output(["test"]) + + # Test failed command without exception + with patch('subprocess.check_output') as mock_cmd: + mock_cmd.side_effect = subprocess.CalledProcessError(1, "test") + assert dpuctl_obj.run_cmd_output(["test"], raise_exception=False) is None + + def test_write_file(self, dpuctl_obj): + """Test file writing functionality""" + with patch('sonic_platform.utils.write_file') as mock_write: + mock_write.return_value = True + assert dpuctl_obj.write_file("test_file", "test_content") + mock_write.assert_called_once_with("test_file", "test_content", raise_exception=True) + + mock_write.side_effect = Exception("Write error") + with pytest.raises(Exception) as exc: + dpuctl_obj.write_file("test_file", "test_content") + assert "Write error" in str(exc.value) + + def test_get_hwmgmt_name(self, dpuctl_obj): + """Test hardware management name generation""" + assert dpuctl_obj.get_hwmgmt_name() == "dpu1" # dpu0 -> dpu1 + dpuctl_obj.dpu_name = "dpu1" + dpuctl_obj.dpu_id = 1 + assert dpuctl_obj.get_hwmgmt_name() == "dpu2" # dpu1 -> dpu2 + +class TestDpuCtlPlatStatus: + """Tests for status monitoring functionality""" + + def test_boot_progress(self, dpuctl_obj): + """Test boot progress monitoring""" dpuctl_obj.boot_prog_path = os.path.join(test_path, 'mock_dpu_boot_prog') - class Dummy: + class DummyPoller: def poll(self): return True - dummy_obj = Dummy() - mock_file_path = "mock_dpu_boot_prog" - mock_val = 0 - boot_prog_map = dpuctl_obj.boot_prog_map - - def mock_read_int_from_file(file_path, default=0, raise_exception=False, log_func=None): - if file_path.endswith(mock_file_path): - return mock_val - else: - return 0 - with patch("sonic_platform.utils.read_int_from_file", wraps=mock_read_int_from_file), \ - patch.object(dpuctl_obj, 'wait_for_pci', wraps=MagicMock(return_value=None)), \ - patch.object(dpuctl_obj, 'dpu_rshim_service_control', wraps=MagicMock(return_value=None)): - for key_val in boot_prog_map.keys(): - mock_val = key_val - dpuctl_obj.update_boot_prog_once(dummy_obj) - assert dpuctl_obj.boot_prog_state == key_val - assert dpuctl_obj.boot_prog_indication == f"{key_val} - {boot_prog_map.get(key_val)}" - mock_val = 25 - dpuctl_obj.update_boot_prog_once(dummy_obj) - assert dpuctl_obj.boot_prog_state == 25 - assert dpuctl_obj.boot_prog_indication == "25 - N/A" - mock_val = 36 - dpuctl_obj.update_boot_prog_once(dummy_obj) - assert dpuctl_obj.boot_prog_state == 36 - assert dpuctl_obj.boot_prog_indication == "36 - N/A" - mock_file_path = "dpu1_ready" - mock_val = 1 - dpuctl_obj.dpu_status_update() - assert dpuctl_obj.boot_prog_state == 0 - assert dpuctl_obj.boot_prog_indication == f"0 - {boot_prog_map.get(0)}" + + with patch("sonic_platform.utils.read_int_from_file") as mock_read: + # Test known boot progress states + for state in BootProgEnum: + mock_read.return_value = state.value + dpuctl_obj.update_boot_prog_once(DummyPoller()) + assert dpuctl_obj.boot_prog_state == state.value + assert dpuctl_obj.boot_prog_indication == f"{state.value} - {dpuctl_obj.boot_prog_map[state.value]}" + + # Test unknown boot progress state + mock_read.return_value = 99 + dpuctl_obj.update_boot_prog_once(DummyPoller()) + assert dpuctl_obj.boot_prog_state == 99 + assert dpuctl_obj.boot_prog_indication == "99 - N/A" + + def test_status_updates(self, dpuctl_obj): + """Test DPU status updates""" + with patch("sonic_platform.utils.read_int_from_file") as mock_read: + # Test ready state + mock_read.return_value = 1 + dpuctl_obj.dpu_ready_update() assert dpuctl_obj.dpu_ready_state == 1 - assert dpuctl_obj.dpu_ready_indication == f"True" - assert dpuctl_obj.dpu_shtdn_ready_state == 0 - assert dpuctl_obj.dpu_shtdn_ready_indication == f"False" - mock_file_path = "dpu1_shtdn_ready" - dpuctl_obj.dpu_status_update() - assert dpuctl_obj.boot_prog_state == 0 - assert dpuctl_obj.boot_prog_indication == f"0 - {boot_prog_map.get(0)}" - assert dpuctl_obj.dpu_ready_state == 0 - assert dpuctl_obj.dpu_ready_indication == "False" + assert dpuctl_obj.dpu_ready_indication == "True" + + # Test shutdown ready state + mock_read.return_value = 1 + dpuctl_obj.dpu_shtdn_ready_update() assert dpuctl_obj.dpu_shtdn_ready_state == 1 assert dpuctl_obj.dpu_shtdn_ready_indication == "True" - mock_file_path = "dpu1_shtdn_ready" - mock_val = 25 - dpuctl_obj.dpu_status_update() + + # Test invalid states + mock_read.return_value = 25 + dpuctl_obj.dpu_ready_update() + assert dpuctl_obj.dpu_ready_indication == "25 - N/A" + dpuctl_obj.dpu_shtdn_ready_update() assert dpuctl_obj.dpu_shtdn_ready_indication == "25 - N/A" - mock_file_path = "dpu1_ready" - mock_val = 50 - dpuctl_obj.dpu_status_update() - assert dpuctl_obj.dpu_ready_indication == "50 - N/A" - - @patch('os.path.exists') - @patch('os.open', MagicMock(return_value=-1)) - @patch('os.close', MagicMock(return_value=None)) - @patch('sonic_platform.dpuctlplat.poll') - def test_pci_func(self, m1, mock_exists): - dpuctl_obj = obj["dpuctl_list"][0] - mock_exists.return_value = False - mock_obj = Mock() - mock_obj.register.return_value = None - mock_obj.poll.return_value = None - m1.return_value = mock_obj - timeout_val = 45 - - def mock_time_diff(): - mock_time_diff.counter += 1 - return mock_time_diff.counter * timeout_val - mock_time_diff.counter = 0 - with patch("time.monotonic", wraps=mock_time_diff): - # PCI Device is not recognized - assert not dpuctl_obj.wait_for_pci() - pci_parent_path = os.path.dirname(pci_dev_path) - assert pci_parent_path == mock_exists.call_args.args[0] - mock_obj.register.assert_called_once() - mock_obj.poll.assert_called_once() - # PCI device is recognized immediately - mock_obj.reset_mock() - mock_exists.reset_mock() - mock_exists.return_value = True - assert dpuctl_obj.wait_for_pci() - assert pci_parent_path == mock_exists.call_args.args[0] - mock_obj.register.assert_not_called() - mock_obj.poll.assert_not_called() - # PCI device is added later (Detected in Loop) - timeout_val = 20 - mock_exists.reset_mock() - mock_obj.reset_mock() - mock_exists.side_effect = [False, True] - mock_obj.poll.return_value = True - assert dpuctl_obj.wait_for_pci() - mock_obj.register.assert_called_once() - mock_obj.poll.assert_called_once() - # PCI device is added later (Detected at the end) - timeout_val = 80 - mock_exists.reset_mock() - mock_obj.reset_mock() - mock_exists.side_effect = [False, True] - assert dpuctl_obj.wait_for_pci() - mock_obj.register.assert_called_once() - mock_obj.poll.assert_not_called() - with patch.object(dpuctl_obj, 'pci_dev_path', None), \ - patch('sonic_platform.device_data.DeviceDataManager.get_dpu_interface') as mock_int,\ - patch.object(dpuctl_obj, 'log_error') as mock_obj: - mock_int.return_value = None - dpuctl_obj.wait_for_pci() - mock_obj.assert_called_once_with("Unable to wait for PCI device:Unable to obtain pci device id for dpu0 from platform.json") - new_pci_dev_id = "0000:05:00.0" - mock_int.return_value = new_pci_dev_id - dpuctl_obj.wait_for_pci() - assert dpuctl_obj.pci_dev_path.endswith(f"{new_pci_dev_id}/remove") - # pci dev_path is cached - mock_int.reset_mock() - mock_int.return_value = "None" - dpuctl_obj.wait_for_pci() - mock_int.assert_not_called() - assert dpuctl_obj.pci_dev_path.endswith(f"{new_pci_dev_id}/remove") - - def test_rshim_service(self): - dpuctl_obj = obj["dpuctl_list"][0] - with patch.object(dpuctl_obj, 'run_cmd_output') as mock_method: - dpuctl_obj.dpu_rshim_service_control('start') - mock_method.assert_called_once() - cmd_string = ' '.join(mock_method.call_args.args[0]) - service_name = rshim_interface - operation = "Start" - assert (operation in cmd_string) and (service_name in cmd_string) - mock_method.reset_mock() - operation = "Stop" - dpuctl_obj.dpu_rshim_service_control('stop') - cmd_string = ' '.join(mock_method.call_args.args[0]) - assert (operation in cmd_string) and (service_name in cmd_string) - mock_method.assert_called_once() - with pytest.raises(TypeError): - dpuctl_obj.dpu_rshim_service_control() - with patch.object(dpuctl_obj, 'rshim_interface', None), \ - patch('sonic_platform.device_data.DeviceDataManager.get_dpu_interface') as mock_int,\ - patch.object(dpuctl_obj, 'log_error') as mock_obj: - mock_int.return_value = None - dpuctl_obj.dpu_rshim_service_control('start') - mock_obj.assert_called_once_with("Failed to start rshim!: Unable to Parse rshim information for dpu0 from Platform.json") - mock_int.return_value = "rshim1" - dpuctl_obj.dpu_rshim_service_control('start') - assert dpuctl_obj.rshim_interface == "rshim@1" - mock_int.reset_mock() - mock_int.return_value = "rshim20" - dpuctl_obj.dpu_rshim_service_control('start') - # Rshim name is cached - mock_int.assert_not_called() - assert dpuctl_obj.rshim_interface == "rshim@1" - - def test_pre_and_post(self): - dpuctl_obj = obj["dpuctl_list"][0] - with patch.object(dpuctl_obj, 'dpu_rshim_service_control') as mock_rshim, patch.object(dpuctl_obj, 'write_file') as mock_write: - manager_mock = Mock() - manager_mock.attach_mock(mock_rshim, 'rshim') - manager_mock.attach_mock(mock_write, 'write') - mock_rshim.return_value = True - mock_write.return_value = True - assert dpuctl_obj.dpu_pre_shutdown() - mock_rshim.assert_called_once() - mock_write.assert_called_once() - # Confirm the order of calls and the parameters - manager_mock.mock_calls[0] == call.rshim('stop') - manager_mock.mock_calls[1] == call.rshim(dpuctl_obj.pci_dev_path, '1') - mock_rshim.return_value = False - assert not dpuctl_obj.dpu_pre_shutdown() - mock_rshim.return_value = True - # Test post startup - mock_rshim.reset_mock() - mock_write.reset_mock() - manager_mock.reset_mock() - with patch.object(dpuctl_obj, 'wait_for_pci') as mock_pci: - manager_mock.attach_mock(mock_rshim, 'rshim') - manager_mock.attach_mock(mock_write, 'write') - manager_mock.attach_mock(mock_pci, 'pci') - dpuctl_obj.dpu_post_startup() - mock_rshim.assert_called_once() - mock_write.assert_called_once() - mock_pci.assert_called_once() - # Confirm the order of calls and the parameters - manager_mock.mock_calls[0] == call.rshim('/sys/bus/pci/rescan', '1') - manager_mock.mock_calls[1] == call.pci() - manager_mock.mock_calls[2] == call.rshim('start') - mock_rshim.return_value = False - assert not dpuctl_obj.dpu_post_startup() - with patch.object(dpuctl_obj, 'write_file', side_effect=Exception("Mock")), \ - patch.object(dpuctl_obj, 'run_cmd_output', MagicMock(return_value=True)): - assert not dpuctl_obj.dpu_pre_shutdown() - with patch.object(dpuctl_obj, 'run_cmd_output', side_effect=Exception("Mock")), \ - patch.object(dpuctl_obj, 'dpu_pci_remove', MagicMock(return_value=True)): - assert not dpuctl_obj.dpu_pre_shutdown() - with patch.object(dpuctl_obj, 'write_file', side_effect=Exception("Mock")), \ - patch.object(dpuctl_obj, 'wait_for_pci', MagicMock(return_value=True)), \ - patch.object(dpuctl_obj, 'run_cmd_output', MagicMock(return_value=True)): - assert not dpuctl_obj.dpu_post_startup() - with patch.object(dpuctl_obj, 'run_cmd_output', side_effect=Exception("Mock")), \ - patch.object(dpuctl_obj, 'wait_for_pci', MagicMock(return_value=True)), \ - patch.object(dpuctl_obj, 'dpu_pci_scan', MagicMock(return_value=True)): - assert not dpuctl_obj.dpu_post_startup() - - @classmethod - def teardown_class(cls): - """Teardown function for all tests for dpuctl implementation""" - os.environ["MLNX_PLATFORM_API_DPUCTL_UNIT_TESTING"] = "0" - os.environ["PATH"] = os.pathsep.join( - os.environ["PATH"].split(os.pathsep)[:-1]) diff --git a/platform/mellanox/rshim/Makefile b/platform/mellanox/rshim/Makefile index 8df6534b9d0..fb80246cf1c 100644 --- a/platform/mellanox/rshim/Makefile +++ b/platform/mellanox/rshim/Makefile @@ -31,7 +31,6 @@ $(addprefix $(DEST)/, $(MLNX_RSHIM)): $(DEST)/% : tar xf rshim-${MLNX_RSHIM_DRIVER_VERSION}.tar.gz -C rshim_src --strip-components=1 pushd rshim_src - patch -p1 < ../rename_tmfifo_to_dpu.patch # disable default systemd service sed -i 's/ dh_auto_configure/ dh_auto_configure -- --with-systemdsystemunitdir=no/' debian/rules diff --git a/platform/mellanox/rshim/files/rshim.sh b/platform/mellanox/rshim/files/rshim.sh index caa049ffaef..9074b420161 100644 --- a/platform/mellanox/rshim/files/rshim.sh +++ b/platform/mellanox/rshim/files/rshim.sh @@ -21,8 +21,9 @@ if [ $# -eq 0 ]; then echo "Usage: $0 " exit 1 fi - rshim_name="rshim$1" + +# First try rshim_bus_info pcie=$(dpumap.sh rshim2pcie $rshim_name) if [ $? -ne 0 ]; then @@ -30,10 +31,20 @@ if [ $? -ne 0 ]; then exit 1 fi - +# Check if rshim_bus_info exists in PCI tree if ! lspci -D | grep $pcie > /dev/null; then - echo "PCIE device $pcie is not available" - exit 1 + # If not found, try getting bus_info + dpu=$(dpumap.sh rshim2dpu $rshim_name) + if [ $? -ne 0 ]; then + echo "Error: Could not find DPU for rshim$1" + exit 1 + fi + + pcie=$(dpumap.sh dpu2pcie $dpu) + if [ $? -ne 0 ] || ! lspci -D | grep $pcie > /dev/null; then + echo "PCIE device not found under rshim_bus_info or bus_info" + exit 1 + fi fi /usr/sbin/rshim -i $1 -d pcie-$pcie diff --git a/platform/mellanox/rshim/rename_tmfifo_to_dpu.patch b/platform/mellanox/rshim/rename_tmfifo_to_dpu.patch deleted file mode 100644 index f01880d0c1f..00000000000 --- a/platform/mellanox/rshim/rename_tmfifo_to_dpu.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/src/rshim_net.c b/src/rshim_net.c -index a9e8663..7214df2 100644 ---- a/src/rshim_net.c -+++ b/src/rshim_net.c -@@ -263,7 +263,7 @@ int rshim_net_init(rshim_backend_t *bd) - char ifname[IFNAMSIZ]; - int rc, fd[2]; - -- snprintf(ifname, sizeof(ifname), "tmfifo_net%d", bd->index); -+ snprintf(ifname, sizeof(ifname), "dpu%d", bd->index); - bd->net_fd = rshim_if_open(ifname, bd->index); - - if (bd->net_fd < 0) diff --git a/platform/mellanox/smartswitch/dpu-udev-manager/dpu-udev-manager.service b/platform/mellanox/smartswitch/dpu-udev-manager/dpu-udev-manager.service new file mode 100644 index 00000000000..b251f13eae4 --- /dev/null +++ b/platform/mellanox/smartswitch/dpu-udev-manager/dpu-udev-manager.service @@ -0,0 +1,12 @@ +[Unit] +Description=Manage the DPU udev rules +After=systemd-udevd.service +Requires=systemd-udevd.service + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/usr/bin/dpu-udev-manager.sh start + +[Install] +WantedBy=multi-user.target diff --git a/platform/mellanox/smartswitch/dpu-udev-manager/dpu-udev-manager.sh b/platform/mellanox/smartswitch/dpu-udev-manager/dpu-udev-manager.sh new file mode 100755 index 00000000000..91c06d89d31 --- /dev/null +++ b/platform/mellanox/smartswitch/dpu-udev-manager/dpu-udev-manager.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +# Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +declare -r udev_file="/etc/udev/rules.d/92-midplane-intf.rules" +declare -r platform=$(grep 'onie_platform=' /host/machine.conf | cut -d '=' -f 2) +declare -r platform_json="/usr/share/sonic/device/$platform/platform.json" + +declare -r query='.DPUS | to_entries[] | "\(.key) \(.value.bus_info)"' + +do_start() { + jq -r "$query" $platform_json | while read -r dpu bus_info; do + echo SUBSYSTEM==\"net\", ACTION==\"add\", KERNELS==\"$bus_info\", NAME=\"$dpu\" + done > $udev_file +} + +case "$1" in + start) + do_start + ;; + *) + echo "Error: Invalid argument." + echo "Usage: $0 {start}" + exit 1 + ;; +esac diff --git a/platform/mellanox/smartswitch/dpumap.sh b/platform/mellanox/smartswitch/dpumap.sh index 34ed184190d..267ac3df1bd 100755 --- a/platform/mellanox/smartswitch/dpumap.sh +++ b/platform/mellanox/smartswitch/dpumap.sh @@ -23,7 +23,7 @@ PLATFORM=${PLATFORM:-`sonic-db-cli CONFIG_DB HGET 'DEVICE_METADATA|localhost' pl PLATFORM_JSON=/usr/share/sonic/device/$PLATFORM/platform.json usage(){ - echo "Usage: $0 {dpu2pcie|dpu2rshim|rshim2dpu|pcie2dpu|rshim2pcie|pcie2rshim} name" + echo "Usage: $0 {dpu2pcie|dpu2rshim|rshim2dpu|pcie2dpu|rshim2pcie|pcie2rshim|listdpus|listrshims} [name]" } validate_platform(){ @@ -57,18 +57,35 @@ case $1 in var="rshim" ;; "rshim2pcie") - jq_query='.DPUS | to_entries[] | select(.value.rshim_info == $rshim) | .value.bus_info' + jq_query='.DPUS | to_entries[] | select(.value.rshim_info == $rshim) | .value.rshim_bus_info' var="rshim" ;; + "listdpus") + jq_query='.DPUS | keys[]' + var="" + ;; + "listrshims") + jq_query='.DPUS | to_entries[] | .value.rshim_info' + var="" + ;; *) echo "Invalid usage of script!" usage exit 1 esac -IFS=',' read -r -a identifier_array <<< "$2" +if [[ -n "$2" ]]; then + IFS=',' read -r -a identifier_array <<< "$2" +else + identifier_array=("") +fi + for identifier in "${identifier_array[@]}"; do - op=$(jq -r --arg "$var" "$identifier" "$jq_query" "$PLATFORM_JSON") + if [[ -z "$var" ]]; then + op=$(jq -r "$jq_query" "$PLATFORM_JSON") + else + op=$(jq -r --arg "$var" "$identifier" "$jq_query" "$PLATFORM_JSON") + fi if [[ "$op" != "null" ]]; then echo "$op" else diff --git a/platform/mellanox/sonic-bfb-installer.sh b/platform/mellanox/sonic-bfb-installer.sh index 39c5bfe4a04..5bb1d0b4c83 100755 --- a/platform/mellanox/sonic-bfb-installer.sh +++ b/platform/mellanox/sonic-bfb-installer.sh @@ -20,61 +20,126 @@ declare -A rshim2dpu command_name="sonic-bfb-installer.sh" usage(){ - echo "Syntax: $command_name -b|--bfb --rshim|-r --verbose|-v --config|-c --help|h" + echo "Syntax: $command_name -b|--bfb --rshim|-r --dpu|-d --verbose|-v --config|-c --help|-h" echo "Arguments:" - echo "-b Provide custom path for bfb image" - echo "-r Install only on DPUs connected to rshim interfaces provided, mention all if installation is requried on all connected DPUs" - echo "-v Verbose installation result output" - echo "-c Config file" - echo "-h Help" + echo "-b|--bfb Provide custom path for bfb image" + echo "-r|--rshim Install only on DPUs connected to rshim interfaces provided, mention all if installation is required on all connected DPUs" + echo "-d|--dpu Install on specified DPUs, mention all if installation is required on all connected DPUs" + echo "-v|--verbose Verbose installation result output" + echo "-c|--config Config file" + echo "-h|--help Help" } WORK_DIR=`mktemp -d -p "$DIR"` -bfb_install_call(){ - #Example:sudo bfb-install -b -r rshim - local appendix=$4 +wait_for_rshim_boot() { local -r rshim=$1 - local dpu=$2 - local bfb=$3 - local result_file=$(mktemp "${WORK_DIR}/result_file.XXXXX") - if [ -z "$appendix" ]; then - local cmd="timeout 600s bfb-install -b $bfb -r $1" - else - local cmd="timeout 600s bfb-install -b $bfb -r $1 -c $appendix" + local timeout=10 + + while [ ! -e "/dev/${rshim}/boot" ] && [ $timeout -gt 0 ]; do + sleep 1 + ((timeout--)) + done + + if [ ! -e "/dev/${rshim}/boot" ]; then + echo "$rshim: Error: Boot file did not appear after 10 seconds" + return 1 fi - echo "Installing bfb image on DPU connected to $rshim using $cmd" - local indicator="$rshim:" - trap 'kill_ch_procs' SIGINT SIGTERM SIGHUP - eval "$cmd" > >(while IFS= read -r line; do echo "$indicator $line"; done >> "$result_file") 2>&1 & - cmd_pid=$! - local total_time=600 + return 0 +} + +remove_pci_device() { + local -r rshim=$1 + local -r dpu=$2 + + # Get bus_id and rshim_bus_id for this DPU + local bus_id=$(dpumap.sh dpu2pcie $dpu) + local rshim_bus_id=$(dpumap.sh rshim2pcie $rshim) + + # Check if both bus_id and rshim_bus_id devices exist + if [ -n "$bus_id" ] && [ -n "$rshim_bus_id" ]; then + if lspci -D | grep -q "$bus_id" && lspci -D | grep -q "$rshim_bus_id"; then + echo "$rshim: Removing PCI device $bus_id" + echo 1 > /sys/bus/pci/devices/$bus_id/remove + fi + fi +} + +monitor_installation() { + local -r rid=$1 + local -r pid=$2 + local -r total_time=$3 local elapsed=0 - # Interval is selected at random so all the processes can print to same line + + # Random interval between 3-10 seconds for progress updates local interval=$(($RANDOM%(10-3+1)+3)) - while kill -0 $cmd_pid 2>/dev/null; do + + while kill -0 $pid 2>/dev/null; do sleep $interval elapsed=$((elapsed + interval)) - echo -ne "\r$indicator Installing... $elapsed/$total_time seconds elapsed" + echo -ne "\r$rid: Installing... $elapsed/$total_time seconds elapsed" if [ $elapsed -ge $total_time ]; then break fi done + echo +} + +bfb_install_call() { + local -r rshim=$1 + local -r dpu=$2 + local -r bfb=$3 + local -r appendix=$4 + local -r rid=${rshim#rshim} + local -r result_file=$(mktemp "${WORK_DIR}/result_file.XXXXX") + local -r timeout_secs=1200 + + # Start rshim service and ensure it's stopped on exit + systemctl start rshim@${rid}.service + trap "systemctl stop rshim@${rid}.service" EXIT + + # Wait for boot file and remove PCI device + if ! wait_for_rshim_boot "$rshim"; then + exit 1 + fi + remove_pci_device "$rshim" "$dpu" + + # Construct bfb-install command + local cmd="timeout ${timeout_secs}s bfb-install -b $bfb -r $rshim" + if [ -n "$appendix" ]; then + cmd="$cmd -c $appendix" + fi + echo "Installing bfb image on DPU connected to $rshim using $cmd" + + # Run installation with progress monitoring + trap 'kill_ch_procs' SIGINT SIGTERM SIGHUP + eval "$cmd" > >(while IFS= read -r line; do echo "$rid: $line"; done >> "$result_file") 2>&1 & + local cmd_pid=$! + + monitor_installation "$rid" $cmd_pid $timeout_secs + + # Check installation result wait $cmd_pid local exit_status=$? - if [ $exit_status -ne 0 ]; then - echo "$rshim: Error: Installation failed on connected DPU!" + if [ $exit_status -ne 0 ]; then + echo "$rid: Error: Installation failed on connected DPU!" else - echo "$rshim: Installation Successful" + echo "$rid: Installation Successful" fi - if [ $exit_status -ne 0 ] ||[ $verbose = true ]; then + + # Show detailed output if verbose or error + if [ $exit_status -ne 0 ] || [ $verbose = true ]; then cat "$result_file" fi - echo "$rshim: Resetting DPU $dpu" - cmd="dpuctl dpu-reset --force $dpu" + + # Stop rshim service and reset DPU + systemctl stop rshim@${rid}.service + echo "$rid: Resetting DPU $dpu" + + local reset_cmd="dpuctl dpu-reset --force $dpu" if [[ $verbose == true ]]; then - cmd="$cmd -v" + reset_cmd="$reset_cmd -v" fi - eval $cmd + eval $reset_cmd } file_cleanup(){ @@ -146,116 +211,170 @@ check_for_root(){ fi } -main(){ +detect_rshims_from_pci(){ + # Get list of supported DPUs from dpumap.sh + local dpu_list=$(dpumap.sh listdpus 2>/dev/null) + if [ $? -ne 0 ] || [ -z "$dpu_list" ]; then + echo "No supported DPUs found" + return 1 + fi + + # For each DPU, check if its PCI exists and get corresponding rshim + local detected_rshims=() + while read -r dpu; do + local bus_info=$(dpumap.sh dpu2pcie "$dpu" 2>/dev/null) + if [ $? -eq 0 ] && [ ! -z "$bus_info" ] && [ -e "/sys/bus/pci/devices/$bus_info" ]; then + local rshim=$(dpumap.sh dpu2rshim "$dpu" 2>/dev/null) + if [ $? -eq 0 ] && [ ! -z "$rshim" ]; then + detected_rshims+=("$rshim") + fi + fi + done <<< "$dpu_list" + + if [ ${#detected_rshims[@]} -eq 0 ]; then + echo "No rshim devices detected" + return 1 + fi + + # Return unique sorted list of detected rshim devices + printf '%s\n' "${detected_rshims[@]}" | sort -u + return 0 +} + +main() { check_for_root - local config= - while [ "$1" != "--" ] && [ -n "$1" ]; do - case $1 in - --help|-h) - usage; - exit 0 - ;; - --bfb|-b) - shift; - bfb=$1 - ;; - --rshim|-r) - shift; - rshim_dev=$1 - ;; - --dpu|-d) - shift; - dpus=$1 - ;; - --config|-c) - shift; - config=$1 - ;; - --verbose|-v) - verbose=true - ;; - esac - shift - done + + # Parse command line arguments + local config= bfb= rshim_dev= dpus= verbose=false + parse_arguments "$@" + + # Validate BFB image if [ -z "$bfb" ]; then - echo "Error : bfb image is not provided." + echo "Error: bfb image is not provided." usage exit 1 - else - is_url $bfb fi + is_url "$bfb" + trap "file_cleanup" EXIT - dev_names_det+=($( - ls -d /dev/rshim? | awk -F'/' '{print $NF}' - )) + + # Detect available rshim interfaces + local dev_names_det=($(detect_rshims_from_pci)) if [ "${#dev_names_det[@]}" -eq 0 ]; then - echo "No rshim interfaces detected! Make sure to run the $command_name script from the host device/ switch!" + echo "No rshim interfaces detected! Make sure to run the $command_name script from the host device/switch!" exit 1 fi + + # Handle rshim/dpu selection + local dev_names=() if [ -z "$rshim_dev" ]; then if [ -z "$dpus" ]; then - echo "No rshim interfaces provided!" - usage - exit 1 - fi - if [ "$dpus" = "all" ]; then - rshim_dev="$dpus" - else + echo "No rshim interfaces provided!" + usage + exit 1 + fi + if [ "$dpus" = "all" ]; then + rshim_dev="all" + else IFS=',' read -ra dpu_names <<< "$dpus" - validate_dpus ${dpu_names[@]} - fi + validate_dpus "${dpu_names[@]}" + fi fi - if [ "$rshim_dev" = "all" ]; then - dev_names=("${dev_names_det[@]}") - echo "${#dev_names_det[@]} rshim interfaces detected:" - echo "${dev_names_det[@]}" - else - if [ ${#dev_names[@]} -eq 0 ]; then - # If the list is not empty, the list is obtained from the DPUs - IFS=',' read -ra dev_names <<< "$rshim_dev" - fi - validate_rshim ${dev_names[@]} + dev_names=("${dev_names_det[@]}") + echo "${#dev_names_det[@]} rshim interfaces detected:" + echo "${dev_names_det[@]}" + else + if [ ${#dev_names[@]} -eq 0 ]; then + IFS=',' read -ra dev_names <<< "$rshim_dev" + fi + validate_rshim "${dev_names[@]}" fi + if [ ${#rshim2dpu[@]} -eq 0 ]; then - get_mapping ${dev_names[@]} + get_mapping "${dev_names[@]}" fi - # Sort list of rshim interfaces so that config is applied in a known order - sorted_devs=($(for i in "${dev_names[@]}"; do echo $i; done | sort)) - if [ ! -z ${config} ]; then + + # Sort devices and handle config files + local sorted_devs=($(printf '%s\n' "${dev_names[@]}" | sort)) + local arr=() + + if [ -n "$config" ]; then echo "Using ${config} file/s" if [[ "$config" == *","* ]]; then IFS=',' read -r -a arr <<< "$config" else - arr=() - for ((i=0; i<${#dev_names[@]}; i++)); do + arr=("$config") + for ((i=1; i<${#dev_names[@]}; i++)); do arr+=("$config") done fi - if [ ${#arr[@]} -ne ${#sorted_devs[@]} ]; then - echo "Length of config file list does not match the devices selected: ${sorted_devs[@]} and ${arr[@]}" - exit 1 - fi - for i in "${!arr[@]}" - do - if [ ! -f ${arr[$i]} ]; then - echo "Config provided ${arr[$i]} is not a file! Please check" - exit 1 - fi - done + + validate_config_files "${sorted_devs[@]}" "${arr[@]}" fi + + # Install BFB on each device trap 'kill_ch_procs' SIGINT SIGTERM SIGHUP - for i in "${!sorted_devs[@]}" - do - : + + for i in "${!sorted_devs[@]}"; do rshim_name=${sorted_devs[$i]} dpu_name=${rshim2dpu[$rshim_name]} - bfb_install_call ${rshim_name} ${dpu_name} $bfb ${arr[$i]} & + bfb_install_call "$rshim_name" "$dpu_name" "$bfb" "${arr[$i]}" & done wait } +# Helper function to parse command line arguments +parse_arguments() { + while [ "$1" != "--" ] && [ -n "$1" ]; do + case $1 in + --help|-h) + usage + exit 0 + ;; + --bfb|-b) + shift + bfb=$1 + ;; + --rshim|-r) + shift + rshim_dev=$1 + ;; + --dpu|-d) + shift + dpus=$1 + ;; + --config|-c) + shift + config=$1 + ;; + --verbose|-v) + verbose=true + ;; + esac + shift + done +} + +# Helper function to validate config files +validate_config_files() { + local -a sorted_devs=("${@:1:${#sorted_devs[@]}}") + local -a arr=("${@:$((${#sorted_devs[@]}+1))}") + + if [ ${#arr[@]} -ne ${#sorted_devs[@]} ]; then + echo "Length of config file list does not match the devices selected: ${sorted_devs[*]} and ${arr[*]}" + exit 1 + fi + + for config_file in "${arr[@]}"; do + if [ ! -f "$config_file" ]; then + echo "Config provided $config_file is not a file! Please check" + exit 1 + fi + done +} + kill_all_descendant_procs() { local pid="$1" local self_kill="${2:-false}" @@ -269,7 +388,6 @@ kill_all_descendant_procs() { fi } - kill_ch_procs(){ echo "" echo "Installation Interrupted.. killing All child procs"