Skip to content
1 change: 1 addition & 0 deletions tests/common/fixtures/grpc_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ def _configure_gnoi_tls_server(duthost):
duthost.shell('sonic-db-cli CONFIG_DB hset "GNMI|gnmi" port 50052')
duthost.shell('sonic-db-cli CONFIG_DB hset "GNMI|gnmi" client_auth true')
duthost.shell('sonic-db-cli CONFIG_DB hset "GNMI|gnmi" log_level 2')
duthost.shell('sonic-db-cli CONFIG_DB hset "GNMI|gnmi" user_auth cert')

# Configure certificate paths using centralized config
config_db_settings = grpc_config.get_config_db_cert_settings()
Expand Down
91 changes: 87 additions & 4 deletions tests/common/helpers/upgrade_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
from six.moves.urllib.parse import urlparse
import tests.common.fixtures.grpc_fixtures # noqa: F401
from tests.common.helpers.assertions import pytest_assert
from tests.common.reboot import reboot, get_reboot_cause, reboot_ctrl_dict
from tests.common import reboot
from tests.common.helpers.multi_thread_utils import SafeThreadPoolExecutor
from tests.common.reboot import get_reboot_cause, reboot_ctrl_dict
from tests.common.reboot import REBOOT_TYPE_WARM, REBOOT_TYPE_COLD
from tests.common.utilities import wait_until, setup_ferret
from tests.common.platform.device_utils import check_neighbors
from typing import Optional, Dict

from typing import Optional, Sequence, Tuple, Union, Dict
SYSTEM_STABILIZE_MAX_TIME = 300
logger = logging.getLogger(__name__)

Expand All @@ -21,6 +22,10 @@
TMP_PORTS_FILE = '/tmp/ports.json'
TMP_PEER_INFO_FILE = "/tmp/peer_dev_info.json"
TMP_PEER_PORT_INFO_FILE = "/tmp/neigh_port_info.json"
SS_TARGET_TYPE_HDR = "x-sonic-ss-target-type"
SS_TARGET_INDEX_HDR = "x-sonic-ss-target-index"

GrpcMetadata = Union[Dict[str, str], Sequence[Tuple[str, str]]]


@dataclass(frozen=True)
Expand All @@ -31,6 +36,9 @@ class GnoiUpgradeConfig:
protocol: str = "HTTP"
allow_fail: bool = False
to_version: Optional[str] = None # Optional expected version string to validate after upgrade
ss_target_type: Optional[str] = None # e.g. "dpu"
ss_target_index: Optional[int] = None # e.g. 3
metadata: Optional[GrpcMetadata] = None
ss_reboot_ready_timeout: int = 1200
ss_reboot_message: str = "Rebooting DPU for maintenance"

Expand Down Expand Up @@ -395,4 +403,79 @@ def perform_gnoi_upgrade(
f"Current image mismatch after reboot. current={images.get('current')} expected={cfg.to_version}. full={images}"
)

return {"transfer_resp": transfer_resp, "setpkg_resp": setpkg_resp}
return {"transfer_resp": transfer_resp, "setpkg_resp": setpkg_resp, "reboot_resp": reboot_resp}


def _wait_gnoi_time_ready(ptf_gnoi, metadata, cfg: GnoiUpgradeConfig, timeout=None, interval: int = 10) -> bool:
timeout = timeout or cfg.ss_reboot_ready_timeout
return wait_until(timeout, interval, 0, ptf_gnoi.system_time, metadata=metadata)


def _upgrade_one_dpu_via_gnoi(duthost, tbinfo, ptf_gnoi, cfg: GnoiUpgradeConfig) -> Dict:
if cfg.metadata is None:
raise ValueError("cfg.metadata must be provided for SmartSwitch DPU upgrade")

md = cfg.metadata

ptf_gnoi.system_time(metadata=md)

transfer_resp = ptf_gnoi.file_transfer_to_remote(
url=cfg.to_image,
local_path=cfg.dut_image_path,
protocol=cfg.protocol,
metadata=md,
)

setpkg_resp = ptf_gnoi.system_set_package(
local_path=cfg.dut_image_path,
version=cfg.to_version,
activate=True,
metadata=md,
)

method = str(cfg.upgrade_type).upper()
try:
reboot_resp = ptf_gnoi.system_reboot(
method=method,
delay=0,
message=cfg.ss_reboot_message,
metadata=md,
)
except Exception as e:
logger.info("Reboot raised (often expected): %s", e)
reboot_resp = None

if cfg.allow_fail:
return {"transfer_resp": transfer_resp, "setpkg_resp": setpkg_resp, "reboot_resp": reboot_resp}

ok = _wait_gnoi_time_ready(ptf_gnoi, md, cfg)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@v-cshekar , as a next step from here, please include your changes (generic perform_reboot call in this section)

pytest_assert(ok, f"gNOI Time not reachable within {cfg.ss_reboot_ready_timeout}s after reboot")

return {"transfer_resp": transfer_resp, "setpkg_resp": setpkg_resp, "reboot_resp": reboot_resp}


def perform_gnoi_upgrade_smartswitch_dpu(duthost, tbinfo, ptf_gnoi, cfg: GnoiUpgradeConfig) -> Dict:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optional - Considering the concurrent call has a name perform_gnoi_upgrade_smartswitch_dpus_parallel, a better name for this function should be perform_gnoi_upgrade_smartswitch_single_dpu?

return _upgrade_one_dpu_via_gnoi(duthost, tbinfo, ptf_gnoi, cfg)


def perform_gnoi_upgrade_smartswitch_dpus_parallel(
duthost, tbinfo,
ptf_gnoi,
cfgs: Sequence[GnoiUpgradeConfig],
max_workers: Optional[int] = None,
) -> Dict[int, Dict]:
if not cfgs:
raise ValueError("cfgs is empty")

workers = max_workers or len(cfgs)
results: Dict[int, Dict] = {}

with SafeThreadPoolExecutor(max_workers=workers) as executor:
futs = {}
for i, cfg in enumerate(cfgs):
futs[i] = executor.submit(_upgrade_one_dpu_via_gnoi, duthost, tbinfo, ptf_gnoi, cfg)

for i, fut in futs.items():
results[i] = fut.result()

return results
22 changes: 14 additions & 8 deletions tests/common/ptf_gnoi.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(self, grpc_client):
self.grpc_client = grpc_client
logger.info(f"Initialized PtfGnoi wrapper with {grpc_client}")

def system_time(self) -> Dict:
def system_time(self, metadata=None) -> Dict:
"""
Get the current system time from the device.

Expand All @@ -47,7 +47,7 @@ def system_time(self) -> Dict:
logger.debug("Getting system time via gNOI System.Time")

# Make the low-level gRPC call
response = self.grpc_client.call_unary("gnoi.system.System", "Time")
response = self.grpc_client.call_unary("gnoi.system.System", "Time", metadata=metadata)

# Convert time string to int for consistency
if "time" in response:
Expand All @@ -63,7 +63,7 @@ def system_time(self) -> Dict:
# TODO: Add file_get(), file_put(), file_remove() methods
# These are left for future implementation when gNOI File service is stable

def file_stat(self, remote_file: str) -> Dict:
def file_stat(self, remote_file: str, metadata=None) -> Dict:
"""
Get file statistics from the device.

Expand All @@ -84,7 +84,7 @@ def file_stat(self, remote_file: str) -> Dict:
request = {"path": remote_file}

try:
response = self.grpc_client.call_unary("gnoi.file.File", "Stat", request)
response = self.grpc_client.call_unary("gnoi.file.File", "Stat", request, metadata=metadata)

# Convert numeric strings to proper types for consistency
if "stats" in response and isinstance(response["stats"], list):
Expand Down Expand Up @@ -117,6 +117,7 @@ def file_transfer_to_remote(
local_path: str,
protocol: Optional[str] = None,
credentials: Optional[Dict[str, str]] = None,
metadata=None
) -> Dict:
"""
Download a remote artifact to the DUT using gNOI File.TransferToRemote.
Expand Down Expand Up @@ -182,11 +183,11 @@ def file_transfer_to_remote(
remote_download["credentials"] = credentials

request = {
"localPath": local_path,
"remoteDownload": remote_download,
"local_path": local_path,
"remote_download": remote_download,
}

response = self.grpc_client.call_unary("gnoi.file.File", "TransferToRemote", request)
response = self.grpc_client.call_unary("gnoi.file.File", "TransferToRemote", request, metadata=metadata)
logger.info("TransferToRemote completed: %s -> %s", url, local_path)
return response

Expand All @@ -195,6 +196,7 @@ def system_set_package(
local_path: str,
version: Optional[str] = None,
activate: bool = True,
metadata=None,
) -> Dict:
"""
Set the upgrade package on the DUT using gNOI System.SetPackage (client-streaming RPC).
Expand All @@ -221,6 +223,8 @@ def system_set_package(
"SetPackage via gNOI System.SetPackage (streaming): filename=%s version=%s activate=%s",
local_path, version, activate,
)
self.grpc_client.configure_max_time(3600) # allow long SetPackage
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

grpc client and server connections dropped when "set_package" which is image installation. It complains about too many pings and "ENHANCE_YOUR_CALM" and disconnected, considering it's client/server side session time out issue, added the max_time and keep_alive parameters in the grpc command.

self.grpc_client.configure_keepalive_time(300) # 5 min keepalive (less aggressive
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here - why is this step really needed?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We really need to understand this step clearly - if this is client side need, or server's limitation -- @hdwhdw

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is client side need, we need to update the MoP that we have for our clients.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We maybe need some changes on server side to determine when the installation is completed.


pkg: Dict[str, object] = {
"filename": local_path,
Expand All @@ -233,6 +237,7 @@ def system_set_package(
"gnoi.system.System",
"SetPackage",
[{"package": pkg}],
metadata=metadata,
)

logger.info("SetPackage completed: %s", local_path)
Expand All @@ -244,6 +249,7 @@ def system_reboot(
delay: Optional[int] = None,
message: Optional[str] = None,
force: bool = False,
metadata=None,
) -> Dict:
"""
Reboot the DUT using gNOI System.Reboot.
Expand Down Expand Up @@ -290,6 +296,6 @@ def system_reboot(

logger.debug("Reboot via gNOI System.Reboot: %s", request)

response = self.grpc_client.call_unary("gnoi.system.System", "Reboot", request)
response = self.grpc_client.call_unary("gnoi.system.System", "Reboot", request, metadata=metadata)
logger.info("Reboot request sent: method=%s delay=%s force=%s", method, delay, force)
return response
Loading
Loading