From b440c548ef2982c3adf96f106a0e0136ed35006c Mon Sep 17 00:00:00 2001 From: Matthew Soulsby Date: Wed, 30 Oct 2024 13:27:52 +1100 Subject: [PATCH] tests-common: Handle PortInUseException for SSHConsoleConn (#15174) What is the motivation for this PR? This PR is intended to add more resilience to the dut_console tests, as if a testbed with a blocked port was selected for a nightly test, it would cause the dut_console tests to fail during setup. This provides a method for auto-recovery for these cases. How did you do it? By completing the following: Add connection types for each of the configuration menu types (Digi, Cisco, and Sonic) to allow for different command sequences to be accounted for Add function to clear a DUT's console port Add retry logic to add resilience to main DUT connection set up How did you verify/test it? This process was conducted on 5 testbeds, with at least one of each config menu type. Steps conducted during testing SSH into testbed using the console IP and console port - simulating a blocked port In a separate terminal, run any dut_console test individually During test setup, the connection from step 1 was terminated successfully The test then runs successfully Example test output Successful port reset (Digi config): Screenshot 2024-10-25 165601 Successful port reset (Sonic config): Screenshot 2024-10-25 183646 Sample failure Simulating config menu type is not defined (causing the port clear function to exit early), and a more descriptive error message is thrown regarding why the connection to the DUT is failing: Screenshot 2024-10-25 153008 Any platform specific information? N/A - this is a generalised solution which accounts for as many types of configuration menus as possible. --- ansible/files/sonic_lab_console_links.csv | 10 +- ansible/library/conn_graph_facts.py | 1 + tests/common/connections/base_console_conn.py | 6 + tests/common/connections/console_host.py | 14 ++- tests/common/connections/ssh_console_conn.py | 43 +++++-- tests/conftest.py | 109 ++++++++++++++++-- 6 files changed, 161 insertions(+), 22 deletions(-) diff --git a/ansible/files/sonic_lab_console_links.csv b/ansible/files/sonic_lab_console_links.csv index 75b8e386604..16926ee064b 100644 --- a/ansible/files/sonic_lab_console_links.csv +++ b/ansible/files/sonic_lab_console_links.csv @@ -1,5 +1,5 @@ -StartDevice,StartPort,EndDevice,Console_type,Proxy,BaudRate -console-1,10,str-msn2700-01,ssh,,9600 -console-2,11,str-7260-10,ssh,,9600 -console-1,12,str-7260-11,ssh,, -management-1,13,str-acs-serv-01,ssh,,9600 +StartDevice,StartPort,EndDevice,Console_type,Console_menu_type,Proxy,BaudRate +console-1,10,str-msn2700-01,ssh,,,9600 +console-2,11,str-7260-10,ssh,,,9600 +console-1,12,str-7260-11,ssh,,, +management-1,13,str-acs-serv-01,ssh,,,9600 diff --git a/ansible/library/conn_graph_facts.py b/ansible/library/conn_graph_facts.py index abc6dfbe16e..2663e94114c 100755 --- a/ansible/library/conn_graph_facts.py +++ b/ansible/library/conn_graph_facts.py @@ -349,6 +349,7 @@ def csv_to_graph_facts(self): "peerport": entry["StartPort"], "proxy": entry["Proxy"], "type": entry["Console_type"], + "menu_type": entry["Console_menu_type"], } } self.graph_facts["console_links"] = console_links diff --git a/tests/common/connections/base_console_conn.py b/tests/common/connections/base_console_conn.py index ec01696e2ab..c82297262c3 100644 --- a/tests/common/connections/base_console_conn.py +++ b/tests/common/connections/base_console_conn.py @@ -21,6 +21,12 @@ CONSOLE_SSH = "console_ssh" # Console login via SSH, then login to devices by 'menu ports' CONSOLE_SSH_MENU_PORTS = "console_ssh_menu_ports" +# Console login via SSH, no stage 2 login (Digi Config Menu) +CONSOLE_SSH_DIGI_CONFIG = "console_ssh_digi_config" +# Console login via SSH, no stage 2 login (SONiC switch config) +CONSOLE_SSH_SONIC_CONFIG = "console_ssh_sonic_config" +# Console login via SSH, no stage 2 login (Cisco switch config) +CONSOLE_SSH_CISCO_CONFIG = "console_ssh_cisco_config" class BaseConsoleConn(CiscoBaseConnection): diff --git a/tests/common/connections/console_host.py b/tests/common/connections/console_host.py index d02f40f0b27..313371e652d 100644 --- a/tests/common/connections/console_host.py +++ b/tests/common/connections/console_host.py @@ -1,11 +1,21 @@ -from .base_console_conn import CONSOLE_SSH, CONSOLE_SSH_MENU_PORTS, CONSOLE_TELNET +from .base_console_conn import ( + CONSOLE_SSH, + CONSOLE_SSH_CISCO_CONFIG, + CONSOLE_SSH_MENU_PORTS, + CONSOLE_TELNET, + CONSOLE_SSH_DIGI_CONFIG, + CONSOLE_SSH_SONIC_CONFIG +) from .telnet_console_conn import TelnetConsoleConn from .ssh_console_conn import SSHConsoleConn ConsoleTypeMapper = { CONSOLE_TELNET: TelnetConsoleConn, CONSOLE_SSH: SSHConsoleConn, - CONSOLE_SSH_MENU_PORTS: SSHConsoleConn + CONSOLE_SSH_MENU_PORTS: SSHConsoleConn, + CONSOLE_SSH_DIGI_CONFIG: SSHConsoleConn, + CONSOLE_SSH_SONIC_CONFIG: SSHConsoleConn, + CONSOLE_SSH_CISCO_CONFIG: SSHConsoleConn, } diff --git a/tests/common/connections/ssh_console_conn.py b/tests/common/connections/ssh_console_conn.py index ecc93b090a9..be53b050d97 100644 --- a/tests/common/connections/ssh_console_conn.py +++ b/tests/common/connections/ssh_console_conn.py @@ -1,6 +1,6 @@ import time import re -from .base_console_conn import BaseConsoleConn, CONSOLE_SSH +from .base_console_conn import CONSOLE_SSH_DIGI_CONFIG, BaseConsoleConn, CONSOLE_SSH from netmiko.ssh_exception import NetMikoAuthenticationException from paramiko.ssh_exception import SSHException @@ -15,10 +15,18 @@ def __init__(self, **kwargs): self.sonic_username = kwargs['sonic_username'] self.sonic_password = kwargs['sonic_password'] - if kwargs['console_type'] == CONSOLE_SSH: + # Store console type for later use + self.console_type = kwargs['console_type'] + + if self.console_type == CONSOLE_SSH: + # Login requires port to be provided kwargs['username'] = kwargs['console_username'] + r':' + str(kwargs['console_port']) self.menu_port = None + elif self.console_type.endswith("config"): + # Login to config menu only requires username + kwargs['username'] = kwargs['console_username'] else: + # Login requires menu port kwargs['username'] = kwargs['console_username'] self.menu_port = kwargs['console_port'] kwargs['password'] = kwargs['console_password'] @@ -30,10 +38,19 @@ def session_preparation(self): session_init_msg = self._test_channel_read() self.logger.debug(session_init_msg) - if re.search(r"Port is in use. Closing connection...", session_init_msg, flags=re.M): + if re.search( + r"(Port is in use. Closing connection...|Cannot connect: line \[\d{2}\] is busy)", + session_init_msg, + flags=re.M + ): console_port = self.username.split(':')[-1] raise PortInUseException(f"Host closed connection, as console port '{console_port}' is currently occupied.") + if self.console_type.endswith("config"): + # We can skip stage 2 login for config menu connections + self.session_preparation_finalise() + return + if (self.menu_port): # For devices logining via menu port, 2 additional login are needed # Since we have attempted all passwords in __init__ of base class until successful login @@ -54,7 +71,18 @@ def session_preparation(self): else: break - self.set_base_prompt() + self.session_preparation_finalise() + + def session_preparation_finalise(self): + """ + Helper function to handle final stages of session preparation. + """ + # Digi config menu has a unique prompt terminator (----->) + if self.console_type == CONSOLE_SSH_DIGI_CONFIG: + self.set_base_prompt(">") + else: + self.set_base_prompt() + # Clear the read buffer time.sleep(0.3 * self.global_delay_factor) self.clear_buffer() @@ -151,9 +179,10 @@ def login_stage_2(self, raise NetMikoAuthenticationException(msg) def cleanup(self): - # Send an exit to logout from SONiC - self.send_command(command_string="exit", - expect_string="login:") + # If we are in SONiC, send an exit to logout + if not self.console_type.endswith("config"): + self.send_command(command_string="exit", + expect_string="login:") # remote_conn must be closed, or the SSH session will be kept on Digi, # and any other login is prevented self.remote_conn.close() diff --git a/tests/conftest.py b/tests/conftest.py index d42870d0e3c..2642f847ded 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,6 +15,11 @@ from datetime import datetime from ipaddress import ip_interface, IPv4Interface +from tests.common.connections.base_console_conn import ( + CONSOLE_SSH_CISCO_CONFIG, + CONSOLE_SSH_DIGI_CONFIG, + CONSOLE_SSH_SONIC_CONFIG +) from tests.common.fixtures.conn_graph_facts import conn_graph_facts # noqa F401 from tests.common.devices.local import Localhost from tests.common.devices.ptf import PTFHost @@ -1821,24 +1826,112 @@ def duthost_console(duthosts, enum_supervisor_dut_hostname, localhost, conn_grap console_host = console_host.split("/")[0] console_port = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['peerport'] console_type = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['type'] + console_menu_type = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['menu_type'] console_username = conn_graph_facts['device_console_link'][dut_hostname]['ConsolePort']['proxy'] - console_type = "console_" + console_type + console_type = f"console_{console_type}" + console_menu_type = f"{console_type}_{console_menu_type}" # console password and sonic_password are lists, which may contain more than one password sonicadmin_alt_password = localhost.host.options['variable_manager']._hostvars[dut_hostname].get( "ansible_altpassword") - host = ConsoleHost(console_type=console_type, - console_host=console_host, - console_port=console_port, - sonic_username=creds['sonicadmin_user'], - sonic_password=[creds['sonicadmin_password'], sonicadmin_alt_password], - console_username=console_username, - console_password=creds['console_password'][console_type]) + sonic_password = [creds['sonicadmin_password'], sonicadmin_alt_password] + + # Attempt to clear the console port + try: + duthost_clear_console_port( + menu_type=console_menu_type, + console_host=console_host, + console_port=console_port, + console_username=console_username, + console_password=creds['console_password'][console_type] + ) + except Exception as e: + logger.warning(f"Issue trying to clear console port: {e}") + + # Set up console host + host = None + for attempt in range(1, 4): + try: + host = ConsoleHost(console_type=console_type, + console_host=console_host, + console_port=console_port, + sonic_username=creds['sonicadmin_user'], + sonic_password=sonic_password, + console_username=console_username, + console_password=creds['console_password'][console_type]) + break + except Exception as e: + logger.warning(f"Attempt {attempt}/3 failed: {e}") + continue + else: + raise Exception("Failed to set up connection to console port. See warning logs for details.") + yield host host.disconnect() +def duthost_clear_console_port( + menu_type: str, + console_host: str, + console_port: str, + console_username: str, + console_password: str +): + """ + Helper function to clear the console port for a given DUT. + Useful when a device has an occupied console port, preventing dut_console tests from running. + + Parameters: + menu_type: Connection type for the console's config menu (as expected by the ConsoleTypeMapper) + console_host: DUT host's console IP address + console_port: DUT host's console port, to be cleared + console_username: Username for the console account (overridden for Digi console) + console_password: Password for the console account + """ + if menu_type == "console_ssh_": + raise Exception("Device does not have a defined Console_menu_type.") + + # Override console user if the configuration menu is Digi, as this requires admin login + console_user = 'admin' if menu_type == CONSOLE_SSH_DIGI_CONFIG else console_username + + duthost_config_menu = ConsoleHost( + console_type=menu_type, + console_host=console_host, + console_port=console_port, + console_username=console_user, + console_password=console_password, + sonic_username=None, + sonic_password=None + ) + + # Command lists for each config menu type + # List of tuples, containing a command to execute, and an optional pattern to wait for + command_list = { + CONSOLE_SSH_DIGI_CONFIG: [ + ('2', None), # Enter serial port config + (console_port, None), # Choose DUT console port + ('a', None), # Enter port management + ('1', f'Port #{console_port} has been reset successfully.') # Reset chosen port + ], + CONSOLE_SSH_SONIC_CONFIG: [ + (f'sudo sonic-clear line {console_port}', None) # Clear DUT console port (requires sudo) + ], + CONSOLE_SSH_CISCO_CONFIG: [ + (f'clear line tty {console_port}', '[confirm]'), # Clear DUT console port + ('', '[OK]') # Confirm selection + ], + } + + for command, wait_for_pattern in command_list[menu_type]: + duthost_config_menu.write_channel(command + duthost_config_menu.RETURN) + duthost_config_menu.read_until_prompt_or_pattern(wait_for_pattern) + + duthost_config_menu.disconnect() + logger.info(f"Successfully cleared console port {console_port}, sleeping for 5 seconds") + time.sleep(5) + + @pytest.fixture(scope='session') def cleanup_cache_for_session(request): """