diff --git a/tests/common/dualtor/dual_tor_utils.py b/tests/common/dualtor/dual_tor_utils.py index 4bea108f434..bb028a1144c 100644 --- a/tests/common/dualtor/dual_tor_utils.py +++ b/tests/common/dualtor/dual_tor_utils.py @@ -564,6 +564,58 @@ def shutdown(dut_intfs=None): fanout_intfs_to_recover.clear() +@pytest.fixture +def fanout_upper_tor_port_control(upper_tor_host, upper_tor_fanouthosts, tbinfo, + cable_type, active_active_ports, active_standby_ports): # noqa F811 + """ + Fixture returns methods to shutdown and restart all fanout ports connected to + the upper_tor_host. + """ + shut_fanouts = [] + fanout_intfs_to_recover.clear() + + mux_ports = active_active_ports if cable_type == CableType.active_active else active_standby_ports + + def shutdown(dut_intfs=None): + logger.info('Shutdown fanout ports connected to upper_tor') + if dut_intfs is None: + dut_intfs = mux_ports + shut_fanouts.append(_shutdown_fanout_tor_intfs(upper_tor_host, upper_tor_fanouthosts, tbinfo, dut_intfs)) + + def restart(): + for fanout_host, intf_list in list(fanout_intfs_to_recover.items()): + fanout_host.no_shutdown(intf_list) + fanout_intfs_to_recover.clear() + + yield shutdown, restart + + +@pytest.fixture +def fanout_lower_tor_port_control(lower_tor_host, lower_tor_fanouthosts, tbinfo, + cable_type, active_active_ports, active_standby_ports): # noqa F811 + """ + Fixture returns methods to shutdown and restart all fanout ports connected to + the upper_tor_host. + """ + shut_fanouts = [] + fanout_intfs_to_recover.clear() + + mux_ports = active_active_ports if cable_type == CableType.active_active else active_standby_ports + + def shutdown(dut_intfs=None): + logger.info('Shutdown fanout ports connected to lower_tor') + if dut_intfs is None: + dut_intfs = mux_ports + shut_fanouts.append(_shutdown_fanout_tor_intfs(lower_tor_host, lower_tor_fanouthosts, tbinfo, dut_intfs)) + + def restart(): + for fanout_host, intf_list in list(fanout_intfs_to_recover.items()): + fanout_host.no_shutdown(intf_list) + fanout_intfs_to_recover.clear() + + yield shutdown, restart + + @pytest.fixture def shutdown_fanout_tor_intfs(upper_tor_host, upper_tor_fanouthosts, lower_tor_host, lower_tor_fanouthosts, tbinfo, cable_type, active_active_ports, active_standby_ports): # noqa F811 diff --git a/tests/common/dualtor/icmp_responder_control.py b/tests/common/dualtor/icmp_responder_control.py index 9ce9ea14951..639b84125ee 100644 --- a/tests/common/dualtor/icmp_responder_control.py +++ b/tests/common/dualtor/icmp_responder_control.py @@ -38,3 +38,40 @@ def _pause_icmp_respond(mux_ports): yield _pause_icmp_respond ptfhost.shell("supervisorctl restart icmp_responder", module_ignore_errors=True) + + +def set_supervisorctl_status_icmp_responder(ptfhost, cmd, status): + + icmp_responder_status = ptfhost.shell("supervisorctl status icmp_responder", + module_ignore_errors=True)["stdout"] + if status in icmp_responder_status: + raise RuntimeError(f"icmp_responder is already in {status} state") + + ptfhost.shell(f'supervisorctl {cmd} icmp_responder', module_ignore_errors=True) + + icmp_responder_status = ptfhost.shell("supervisorctl status icmp_responder", + module_ignore_errors=True)["stdout"] + if status not in icmp_responder_status: + raise RuntimeError(f"could not set icmp_responder to {status} state") + + +@pytest.fixture +def shutdown_icmp_responder(ptfhost): # noqa F811 + + def _shutdown_icmp_responder(): + cmd = 'stop' + status = 'STOPPED' + set_supervisorctl_status_icmp_responder(ptfhost, cmd, status) + + yield _shutdown_icmp_responder + + +@pytest.fixture +def start_icmp_responder(ptfhost): # noqa F811 + + def _start_icmp_responder(): + cmd = 'start' + status = 'RUNNING' + set_supervisorctl_status_icmp_responder(ptfhost, cmd, status) + + yield _start_icmp_responder diff --git a/tests/common/platform/interface_utils.py b/tests/common/platform/interface_utils.py index 55d9f8e4941..abc27626bd7 100644 --- a/tests/common/platform/interface_utils.py +++ b/tests/common/platform/interface_utils.py @@ -61,6 +61,20 @@ def check_interface_status_of_up_ports(duthost): return True +def expect_interface_status(dut, interface_name, expected_op_status): + """ + Compare the operational status of a given interface name to an + expected value, return True if they are equal False otherwise. + Raises Exception if given interface name does not exist. + """ + output = dut.command("show interface description") + intf_status = parse_intf_status(output["stdout_lines"][2:]) + status = intf_status.get(interface_name) + if status is None: + raise Exception(f'interface name {interface_name} does not exist') + return status['oper'] == expected_op_status + + def check_interface_status(dut, asic_index, interfaces, xcvr_skip_list): """ @summary: Check the admin and oper status of the specified interfaces on DUT. diff --git a/tests/common/plugins/conditional_mark/tests_mark_conditions.yaml b/tests/common/plugins/conditional_mark/tests_mark_conditions.yaml index 03994e5d341..898dd44095e 100644 --- a/tests/common/plugins/conditional_mark/tests_mark_conditions.yaml +++ b/tests/common/plugins/conditional_mark/tests_mark_conditions.yaml @@ -570,6 +570,12 @@ dualtor_mgmt/test_dualtor_bgp_update_delay.py: - asic_type in ['vs'] - https://github.com/sonic-net/sonic-mgmt/issues/14996 +dualtor_mgmt/test_server_failure.py::test_server_reboot: + skip: + reason: "KVM testbed does not have fanout hosts" + conditions: + - "asic_type in ['vs']" + ####################################### ##### dut_console ##### ####################################### diff --git a/tests/dualtor_mgmt/test_server_failure.py b/tests/dualtor_mgmt/test_server_failure.py index 2bbdc2b0146..201c971fd8f 100644 --- a/tests/dualtor_mgmt/test_server_failure.py +++ b/tests/dualtor_mgmt/test_server_failure.py @@ -3,7 +3,8 @@ import random from tests.common.dualtor.mux_simulator_control import toggle_simulator_port_to_upper_tor, \ - simulator_flap_counter, simulator_server_down # noqa F401 + simulator_flap_counter, simulator_server_down, \ + toggle_all_simulator_ports # noqa F401 from tests.common.helpers.assertions import pytest_assert from tests.common.dualtor.dual_tor_utils import show_muxcable_status # noqa: F401 from tests.common.dualtor.dual_tor_common import active_active_ports # noqa F401 @@ -13,11 +14,17 @@ from tests.common.dualtor.dual_tor_utils import validate_active_active_dualtor_setup # noqa F401 from tests.common.dualtor.dual_tor_utils import upper_tor_host # noqa F401 from tests.common.dualtor.dual_tor_utils import lower_tor_host # noqa F401 +from tests.common.dualtor.dual_tor_utils import lower_tor_fanouthosts, fanout_lower_tor_port_control # noqa F401 +from tests.common.dualtor.dual_tor_utils import upper_tor_fanouthosts, fanout_upper_tor_port_control # noqa F401 from tests.common.dualtor.nic_simulator_control import simulator_server_down_active_active # noqa F401 from tests.common.fixtures.ptfhost_utils import change_mac_addresses, run_garp_service, \ run_icmp_responder # noqa: F401 from tests.common.utilities import wait_until - +from tests.common.dualtor.icmp_responder_control import shutdown_icmp_responder # noqa: F401 +from tests.common.dualtor.icmp_responder_control import start_icmp_responder # noqa: F401 +from tests.common.dualtor.control_plane_utils import verify_tor_states +from tests.common.platform.interface_utils import expect_interface_status +from tests.common.dualtor.constants import UPPER_TOR pytestmark = [ pytest.mark.topology('dualtor'), @@ -90,3 +97,90 @@ def lower_tor_mux_state_verfication(state, health): "mux_cable status is unexpected. Should be (standby, unhealthy)") pytest_assert(wait_until(30, 1, 0, lower_tor_mux_state_verfication, 'standby', 'unhealthy'), "mux_cable status is unexpected. Should be (standby, unhealthy)") + + +@pytest.mark.enable_active_active +def test_server_reboot(request, cable_type, tbinfo, # noqa: F811 + start_icmp_responder, shutdown_icmp_responder, # noqa: F811 + active_standby_ports, active_active_ports, # noqa: F811 + upper_tor_host, lower_tor_host, # noqa: F811 + toggle_all_simulator_ports, # noqa: F811 + fanout_upper_tor_port_control, # noqa: F811 + fanout_lower_tor_port_control): # noqa: F811 + + """ + Test verifies that TOR health returns back to healthy status after a server reboot. + """ + if cable_type == CableType.active_standby: + interface_name = random.choice(active_standby_ports) + # Set upper_tor as active + toggle_all_simulator_ports(UPPER_TOR) + verify_tor_states(expected_active_host=upper_tor_host, + expected_standby_host=lower_tor_host, cable_type=cable_type) + + pytest_assert(expect_interface_status(upper_tor_host, interface_name, 'up'), + f'{interface_name} on upper ToR must be up') + pytest_assert(expect_interface_status(lower_tor_host, interface_name, 'up'), + f'{interface_name} on lower ToR must be up') + shutdown_icmp_responder() + + # simulate server reboot by turning off all fanout ports on both the ToRs + shutdown_upper, restart_upper = fanout_upper_tor_port_control + shutdown_lower, restart_lower = fanout_lower_tor_port_control + shutdown_upper() + shutdown_lower() + pytest_assert(wait_until(30, 1, 0, expect_interface_status, upper_tor_host, interface_name, 'down'), + f'{interface_name} on upper ToR is expected to be down after server shutdown') + pytest_assert(wait_until(30, 1, 0, expect_interface_status, lower_tor_host, interface_name, 'down'), + f'{interface_name} on lower ToR is expected to be down after server shutdown') + restart_upper() + restart_lower() + + # fanout ports are back on + pytest_assert(wait_until(30, 1, 0, expect_interface_status, upper_tor_host, interface_name, 'up'), + f'{interface_name} on upper ToR is expected to be down after server shutdown') + pytest_assert(wait_until(30, 1, 0, expect_interface_status, lower_tor_host, interface_name, 'up'), + f'{interface_name} on lower ToR is expected to be down after server shutdown') + + start_icmp_responder() + # The ToRs must then reconcile to a consistent state + # Upper ToR switches to standby and Lower to active. + verify_tor_states(expected_active_host=lower_tor_host, + expected_standby_host=upper_tor_host, cable_type=cable_type) + elif cable_type == CableType.active_active: + interface_name = random.choice(active_active_ports) + + verify_tor_states(expected_active_host=[upper_tor_host, lower_tor_host], + expected_standby_host=None, cable_type=cable_type) + + pytest_assert(expect_interface_status(upper_tor_host, interface_name, 'up'), + f'{interface_name} on upper ToR must be up') + pytest_assert(expect_interface_status(lower_tor_host, interface_name, 'up'), + f'{interface_name} on lower ToR must be up') + shutdown_icmp_responder() + + verify_tor_states(expected_active_host=None, + expected_standby_host=[upper_tor_host, lower_tor_host], + expected_standby_health='unhealthy') + + # simulate server reboot by turning off all fanout ports on both the ToRs + shutdown_upper, restart_upper = fanout_upper_tor_port_control + shutdown_lower, restart_lower = fanout_lower_tor_port_control + shutdown_upper() + shutdown_lower() + pytest_assert(wait_until(30, 1, 0, expect_interface_status, upper_tor_host, interface_name, 'down'), + f'{interface_name} on upper ToR is expected to be down after server shutdown') + pytest_assert(wait_until(30, 1, 0, expect_interface_status, lower_tor_host, interface_name, 'down'), + f'{interface_name} on lower ToR is expected to be down after server shutdown') + restart_upper() + restart_lower() + + # fanout ports are back on + pytest_assert(wait_until(30, 1, 0, expect_interface_status, upper_tor_host, interface_name, 'up'), + f'{interface_name} on upper ToR is expected to be down after server shutdown') + pytest_assert(wait_until(30, 1, 0, expect_interface_status, lower_tor_host, interface_name, 'up'), + f'{interface_name} on lower ToR is expected to be down after server shutdown') + + start_icmp_responder() + verify_tor_states(expected_active_host=[upper_tor_host, lower_tor_host], + expected_standby_host=None, cable_type=cable_type)