diff --git a/.azure-pipelines/recover_testbed/common.py b/.azure-pipelines/recover_testbed/common.py index 06613ff8e6d..58208887334 100644 --- a/.azure-pipelines/recover_testbed/common.py +++ b/.azure-pipelines/recover_testbed/common.py @@ -8,6 +8,7 @@ import time import pexpect import ipaddress +from constants import OS_VERSION_IN_GRUB, ONIE_ENTRY_IN_GRUB, INSTALL_OS_IN_ONIE, ONIE_START_TO_DISCOVERY, SONIC_PROMPT _self_dir = os.path.dirname(os.path.abspath(__file__)) base_path = os.path.realpath(os.path.join(_self_dir, "../..")) @@ -46,9 +47,9 @@ def get_pdu_managers(sonichosts, conn_graph_facts): return pdu_managers -def posix_shell_onie(dut_console, mgmt_ip, image_url): +def posix_shell_onie(dut_console, mgmt_ip, image_url, is_nexus=False): oldtty = termios.tcgetattr(sys.stdin) - enter_onie_flag = 0 + enter_onie_flag = True gw_ip = list(ipaddress.ip_interface(mgmt_ip).network.hosts())[0] try: tty.setraw(sys.stdin.fileno()) @@ -66,20 +67,21 @@ def posix_shell_onie(dut_console, mgmt_ip, image_url): x = x.decode('ISO-8859-9') - if "GNU GRUB" in x: - enter_onie_flag += 1 + if is_nexus and "loader" in x and ">" in x: + dut_console.remote_conn.send('reboot\n') continue - if "SONiC-OS-" in x and enter_onie_flag == 1: + if OS_VERSION_IN_GRUB in x and enter_onie_flag is True: # Send arrow key "down" here. dut_console.remote_conn.send(b'\x1b[B') continue - if "*ONIE" in x and "Install OS" not in x: + if ONIE_ENTRY_IN_GRUB in x and INSTALL_OS_IN_ONIE not in x: dut_console.remote_conn.send("\n") - enter_onie_flag += 1 + enter_onie_flag = False - if "ONIE: Starting ONIE Service Discovery" in x: + # "ONIE: Starting ONIE Service Discovery" + if ONIE_START_TO_DISCOVERY in x: # TODO: Define a function to send command here for i in range(5): dut_console.remote_conn.send('onie-discovery-stop\n') @@ -102,7 +104,7 @@ def posix_shell_onie(dut_console, mgmt_ip, image_url): if "ETA" in x: break - if "sonic login:" in x: + if SONIC_PROMPT in x: dut_console.remote_conn.close() sys.stdout.write(x) diff --git a/.azure-pipelines/recover_testbed/constants.py b/.azure-pipelines/recover_testbed/constants.py new file mode 100644 index 00000000000..d0cc7e6542e --- /dev/null +++ b/.azure-pipelines/recover_testbed/constants.py @@ -0,0 +1,48 @@ +RC_SSH_SUCCESS = 0 +RC_SSH_FAILED = 1 +RC_PASSWORD_FAILED = 2 + +# Here we will get a screen like +# +# GNU GRUB version 2.02 +# +# +----------------------------------------------------------------------------+ +# |*SONiC-OS-20220531.48 | +# | ONIE | +# | | +# | | +# +----------------------------------------------------------------------------+ +# Use the ^ and v keys to select which entry is highlighted. +# Press enter to boot the selected OS, `e' to edit the commands +# before booting or `c' for a command-line. + +# The buffer maybe small and can not hold all characters +# So we select typical characters +OS_VERSION_IN_GRUB = "-OS-" +ONIE_ENTRY_IN_GRUB = "*ONIE" + +# After enter into ONIE, we we get the screen like +# +# GNU GRUB version 2.02 +# +# +----------------------------------------------------------------------------+ +# |*ONIE: Install OS | +# | ONIE: Rescue | +# | ONIE: Uninstall OS | +# | ONIE: Update ONIE | +# | ONIE: Embed ONIE | +# | | +# | | +# +----------------------------------------------------------------------------+ +# Use the ^ and v keys to select which entry is highlighted. +# Press enter to boot the selected OS, `e' to edit the commands +# before booting or `c' for a command-line. + +INSTALL_OS_IN_ONIE = "Install OS" + +# After enter into the installation in ONIE, it will discover some configuration +# And finally, we will get the string "ONIE: Starting ONIE Service Discovery" +ONIE_START_TO_DISCOVERY = "Discovery" + +# At last, if installation successes in ONIE, we will get the prompt +SONIC_PROMPT = "sonic login:" diff --git a/.azure-pipelines/recover_testbed/dut_connection.py b/.azure-pipelines/recover_testbed/dut_connection.py index fd9436f6696..a6ed5e0b643 100644 --- a/.azure-pipelines/recover_testbed/dut_connection.py +++ b/.azure-pipelines/recover_testbed/dut_connection.py @@ -4,13 +4,13 @@ import os import sys import paramiko -import socket import glob import re import yaml import jinja2 from tests.common.connections.console_host import ConsoleHost from paramiko.ssh_exception import AuthenticationException +from constants import RC_SSH_FAILED, RC_PASSWORD_FAILED _self_dir = os.path.dirname(os.path.abspath(__file__)) base_path = os.path.realpath(os.path.join(_self_dir, "../..")) @@ -22,10 +22,6 @@ logger = logging.getLogger(__name__) -RC_SSH_SUCCESS = 0 -RC_SOCKET_TIMEOUT = 1 -RC_PASSWORD_FAILED = 2 - def creds_on_dut(sonichost): groups = sonichost.im.get_host(sonichost.hostname).get_vars()['group_names'] @@ -137,7 +133,8 @@ def duthost_ssh(sonichost): return sonic_username, password, sonic_ip except AuthenticationException: continue - except socket.timeout as e: + # Errors such like timeout, connection fails + except Exception as e: logger.info("Cannot access DUT {} via ssh, error: {}".format(sonichost.hostname, e)) - return RC_SOCKET_TIMEOUT + return RC_SSH_FAILED return RC_PASSWORD_FAILED diff --git a/.azure-pipelines/recover_testbed/recover_testbed.py b/.azure-pipelines/recover_testbed/recover_testbed.py index 2705f4eddbe..a765f3be70a 100644 --- a/.azure-pipelines/recover_testbed/recover_testbed.py +++ b/.azure-pipelines/recover_testbed/recover_testbed.py @@ -5,6 +5,7 @@ import os import sys from common import do_power_cycle, check_sonic_installer, posix_shell_aboot, posix_shell_onie +from constants import RC_SSH_FAILED _self_dir = os.path.dirname(os.path.abspath(__file__)) base_path = os.path.realpath(os.path.join(_self_dir, "../..")) @@ -36,13 +37,13 @@ def recover_via_console(sonichost, conn_graph_facts, localhost, mgmt_ip, image_u do_power_cycle(sonichost, conn_graph_facts, localhost) - type = hwsku.split('-')[0].lower() + device_type = hwsku.split('-')[0].lower() - if type in ["arista"]: + if device_type in ["arista"]: posix_shell_aboot(dut_console, mgmt_ip, image_url) - # elif type in ["Cisco"]: - # return - elif type in ["mellanox", "nexus", "acs"]: + elif device_type in ["nexus"]: + posix_shell_onie(dut_console, mgmt_ip, image_url, is_nexus=True) + elif device_type in ["mellanox", "cisco", "acs"]: posix_shell_onie(dut_console, mgmt_ip, image_url) else: return @@ -73,8 +74,7 @@ def recover_testbed(sonichosts, conn_graph_facts, localhost, image_url, hwsku): except Exception as e: logger.info("Exception caught while executing cmd. Error message: {}".format(e)) need_to_recover = True - # TODO: Define the return message like RC_SOCKET_TIMEOUT in common file - elif dut_ssh == 1: + elif dut_ssh == RC_SSH_FAILED: # Do power cycle need_to_recover = True else: