diff --git a/tests/common/plugins/pdu_controller/__init__.py b/tests/common/plugins/pdu_controller/__init__.py index 594b21d937f..14689728e2d 100644 --- a/tests/common/plugins/pdu_controller/__init__.py +++ b/tests/common/plugins/pdu_controller/__init__.py @@ -2,7 +2,7 @@ import pytest from .pdu_manager import pdu_manager_factory -from tests.common.utilities import get_host_visible_vars +from tests.common.utilities import get_host_visible_vars, get_sup_node_or_random_node logger = logging.getLogger(__name__) @@ -31,14 +31,14 @@ def get_pdu_visible_vars(inventories, pdu_hostnames): @pytest.fixture(scope="module") -def pdu_controller(duthosts, enum_rand_one_per_hwsku_hostname, conn_graph_facts): +def pdu_controller(duthosts, conn_graph_facts): """ @summary: Fixture for controlling power supply to PSUs of DUT @param duthost: Fixture duthost defined in sonic-mgmt/tests/conftest.py @returns: Returns a pdu controller object implementing the BasePduController interface defined in controller_base.py. """ - duthost = duthosts[enum_rand_one_per_hwsku_hostname] + duthost = get_sup_node_or_random_node(duthosts) pdu_hosts = get_pdu_hosts(duthost) pdu_hostnames = [] if pdu_hosts: diff --git a/tests/common/utilities.py b/tests/common/utilities.py index e2bddf855c9..eed7e03669b 100644 --- a/tests/common/utilities.py +++ b/tests/common/utilities.py @@ -9,6 +9,7 @@ import logging import os import re +import random import six import sys import threading @@ -77,6 +78,19 @@ def skip_release_for_platform(duthost, release_list, platform_list): duthost.os_version, duthost.facts['platform'], ", ".join(release_list), ", ".join(platform_list))) +def get_sup_node_or_random_node(duthosts): + # accomodate for T2 chassis, which only SUP has pdu info + # try to find sup node in multi-dut + for dut in duthosts: + if dut.is_supervisor_node(): + return dut + # if not chassis, it's dualtor or single-dut, return random node or itself + if len(duthosts) > 1: + duthosts = random.sample(duthosts, 1) + logger.info("Randomly select dut {} for testing".format(duthosts[0])) + return duthosts[0] + + def wait(seconds, msg=""): """ @summary: Pause specified number of seconds diff --git a/tests/platform_tests/test_platform_info.py b/tests/platform_tests/test_platform_info.py index 0288f10fc01..6b219d47d93 100644 --- a/tests/platform_tests/test_platform_info.py +++ b/tests/platform_tests/test_platform_info.py @@ -12,7 +12,7 @@ from retry.api import retry_call from tests.common.helpers.assertions import pytest_assert, pytest_require from tests.common.plugins.loganalyzer.loganalyzer import LogAnalyzer -from tests.common.utilities import wait_until +from tests.common.utilities import wait_until, get_sup_node_or_random_node from tests.common.platform.device_utils import get_dut_psu_line_pattern from .thermal_control_test_helper import ThermalPolicyFileContext,\ check_cli_output_with_mocker, restart_thermal_control_daemon, check_thermal_algorithm_status,\ @@ -240,12 +240,12 @@ def check_all_psu_on(dut, psu_test_results): @pytest.mark.disable_loganalyzer @pytest.mark.parametrize('ignore_particular_error_log', [SKIP_ERROR_LOG_PSU_ABSENCE], indirect=True) -def test_turn_on_off_psu_and_check_psustatus(duthosts, enum_rand_one_per_hwsku_hostname, +def test_turn_on_off_psu_and_check_psustatus(duthosts, pdu_controller, ignore_particular_error_log, tbinfo): """ @summary: Turn off/on PSU and check PSU status using 'show platform psustatus' """ - duthost = duthosts[enum_rand_one_per_hwsku_hostname] + duthost = get_sup_node_or_random_node(duthosts) psu_line_pattern = get_dut_psu_line_pattern(duthost) diff --git a/tests/platform_tests/test_power_off_reboot.py b/tests/platform_tests/test_power_off_reboot.py new file mode 100644 index 00000000000..01e696d9d9a --- /dev/null +++ b/tests/platform_tests/test_power_off_reboot.py @@ -0,0 +1,124 @@ +import logging +import pytest +import time + +from tests.common.fixtures.conn_graph_facts import conn_graph_facts # noqa F401 +from tests.common.reboot import wait_for_startup, REBOOT_TYPE_POWEROFF +from tests.common.platform.processes_utils import wait_critical_processes, check_critical_processes +from tests.common.helpers.assertions import pytest_assert +from tests.platform_tests.test_reboot import check_interfaces_and_services,\ + reboot_and_check +from tests.common.utilities import get_plt_reboot_ctrl + +pytestmark = [ + pytest.mark.disable_loganalyzer, + pytest.mark.topology('any') +] + +INTERFACE_WAIT_TIME = 300 + + +@pytest.fixture +def set_max_time_for_interfaces(duthost): + """ + For chassis testbeds, we need to specify plt_reboot_ctrl in inventory file, + to let MAX_TIME_TO_REBOOT to be overwritten by specified timeout value + """ + global INTERFACE_WAIT_TIME + plt_reboot_ctrl = get_plt_reboot_ctrl(duthost, 'test_reboot.py', 'cold') + if plt_reboot_ctrl: + INTERFACE_WAIT_TIME = plt_reboot_ctrl.get('timeout', 300) + + +@pytest.fixture(scope="module", autouse=True) +def teardown_module(duthosts, enum_supervisor_dut_hostname, xcvr_skip_list): + duthost = duthosts[enum_supervisor_dut_hostname] + yield + + logging.info("Tearing down: to make sure all the critical services, interfaces and transceivers are good") + interfaces = conn_graph_facts["device_conn"][duthost.hostname] + check_critical_processes(duthost, watch_secs=10) + check_interfaces_and_services(duthost, interfaces, xcvr_skip_list, INTERFACE_WAIT_TIME) + + +def _power_off_reboot_helper(kwargs): + """ + @summary: used to parametrized test cases on power_off_delay + @param kwargs: the delay time between turning off and on the PSU + """ + pdu_ctrl = kwargs["pdu_ctrl"] + all_outlets = kwargs["all_outlets"] + power_on_seq = kwargs["power_on_seq"] + delay_time = kwargs["delay_time"] + + for outlet in all_outlets: + logging.debug("turning off {}".format(outlet)) + pdu_ctrl.turn_off_outlet(outlet) + time.sleep(delay_time) + logging.info("Power on {}".format(power_on_seq)) + for outlet in power_on_seq: + logging.debug("turning on {}".format(outlet)) + pdu_ctrl.turn_on_outlet(outlet) + + +def test_power_off_reboot(duthosts, localhost, enum_supervisor_dut_hostname, + set_max_time_for_interfaces, xcvr_skip_list, pdu_controller, power_off_delay): + """ + @summary: This test case is to perform reboot via powercycle and check platform status + @param duthost: Fixture for DUT AnsibleHost object + @param localhost: Fixture for interacting with localhost through ansible + @param conn_graph_facts: Fixture parse and return lab connection graph + @param xcvr_skip_list: list of DUT's interfaces for which transeiver checks are skipped + @param pdu_controller: The python object of psu controller + @param power_off_delay: Pytest parameter. The delay between turning off and on the PSU + """ + duthost = duthosts[enum_supervisor_dut_hostname] + UNSUPPORTED_ASIC_TYPE = ["cisco-8000"] + if duthost.facts["asic_type"] in UNSUPPORTED_ASIC_TYPE: + pytest.skip("Skipping test_power_off_reboot. Test unsupported on {} platform" + .format(duthost.facts["asic_type"])) + pdu_ctrl = pdu_controller + if pdu_ctrl is None: + pytest.skip("No PSU controller for %s, skip rest of the testing in this case" % duthost.hostname) + is_chassis = duthost.get_facts().get("modular_chassis") + if is_chassis and duthost.is_supervisor_node(): + # Following is to accomodate for chassis, when no '--power_off_delay' option is given on pipeline run + power_off_delay = 60 + all_outlets = pdu_ctrl.get_outlet_status() + # If PDU supports returning output_watts, making sure that all outlets has power. + no_power = [item for item in all_outlets if int(item.get('output_watts', '1')) == 0] + pytest_assert(not no_power, "Not all outlets have power output: {}".format(no_power)) + + # Purpose of this list is to control sequence of turning on PSUs in power off testing. + # If there are 2 PSUs, then 3 scenarios would be covered: + # 1. Turn off all PSUs, turn on PSU1, then check. + # 2. Turn off all PSUs, turn on PSU2, then check. + # 3. Turn off all PSUs, turn on one of the PSU, then turn on the other PSU, then check. + power_on_seq_list = [] + if all_outlets: + power_on_seq_list = [[item] for item in all_outlets] + power_on_seq_list.append(all_outlets) + + logging.info("Got all power on sequences {}".format(power_on_seq_list)) + + poweroff_reboot_kwargs = {"dut": duthost} + + try: + for power_on_seq in power_on_seq_list: + poweroff_reboot_kwargs["pdu_ctrl"] = pdu_ctrl + poweroff_reboot_kwargs["all_outlets"] = all_outlets + poweroff_reboot_kwargs["power_on_seq"] = power_on_seq + poweroff_reboot_kwargs["delay_time"] = power_off_delay + reboot_and_check(localhost, duthost, conn_graph_facts["device_conn"][duthost.hostname], + xcvr_skip_list, REBOOT_TYPE_POWEROFF, + _power_off_reboot_helper, poweroff_reboot_kwargs) + except Exception as e: + logging.debug("Restore power after test failure") + for outlet in all_outlets: + logging.debug("turning on {}".format(outlet)) + pdu_ctrl.turn_on_outlet(outlet) + # Wait for ssh port to open up on the DUT + reboot_time = 600 if is_chassis else 120 + wait_for_startup(duthost, localhost, 0, reboot_time) + wait_critical_processes(duthost) + raise e diff --git a/tests/platform_tests/test_reboot.py b/tests/platform_tests/test_reboot.py index fcda7d03eef..91b52fc8c92 100644 --- a/tests/platform_tests/test_reboot.py +++ b/tests/platform_tests/test_reboot.py @@ -8,14 +8,14 @@ https://github.com/sonic-net/SONiC/blob/master/doc/pmon/sonic_platform_test_plan.md """ import logging -import time import pytest from tests.common.fixtures.conn_graph_facts import conn_graph_facts # noqa F401 from tests.common.utilities import wait_until, get_plt_reboot_ctrl from tests.common.reboot import sync_reboot_history_queue_with_dut, reboot, check_reboot_cause,\ - check_reboot_cause_history, reboot_ctrl_dict, REBOOT_TYPE_HISTOYR_QUEUE, REBOOT_TYPE_COLD,\ - REBOOT_TYPE_SOFT, REBOOT_TYPE_FAST, REBOOT_TYPE_WARM, REBOOT_TYPE_POWEROFF, REBOOT_TYPE_WATCHDOG + check_reboot_cause_history, reboot_ctrl_dict,\ + REBOOT_TYPE_HISTOYR_QUEUE, REBOOT_TYPE_COLD,\ + REBOOT_TYPE_SOFT, REBOOT_TYPE_FAST, REBOOT_TYPE_WARM, REBOOT_TYPE_WATCHDOG from tests.common.platform.transceiver_utils import check_transceiver_basic from tests.common.platform.interface_utils import check_all_interface_information, get_port_map from tests.common.platform.daemon_utils import check_pmon_daemon_status @@ -84,7 +84,8 @@ def reboot_and_check(localhost, dut, interfaces, xcvr_skip_list, check_interfaces_and_services(dut, interfaces, xcvr_skip_list, reboot_type) -def check_interfaces_and_services(dut, interfaces, xcvr_skip_list, reboot_type=None): +def check_interfaces_and_services(dut, interfaces, xcvr_skip_list, + interfaces_wait_time=MAX_WAIT_TIME_FOR_INTERFACES, reboot_type=None): """ Perform a further check after reboot-cause, including transceiver status, interface status @param localhost: The Localhost object. @@ -98,11 +99,11 @@ def check_interfaces_and_services(dut, interfaces, xcvr_skip_list, reboot_type=N logging.info("skipping interfaces related check for supervisor") else: logging.info("Wait {} seconds for all the transceivers to be detected".format( - MAX_WAIT_TIME_FOR_INTERFACES)) - result = wait_until(MAX_WAIT_TIME_FOR_INTERFACES, 20, 0, check_all_interface_information, dut, interfaces, + interfaces_wait_time)) + result = wait_until(interfaces_wait_time, 20, 0, check_all_interface_information, dut, interfaces, xcvr_skip_list) assert result, "Not all transceivers are detected or interfaces are up in {} seconds".format( - MAX_WAIT_TIME_FOR_INTERFACES) + interfaces_wait_time) logging.info("Check transceiver status") for asic_index in dut.get_frontend_asic_ids(): @@ -215,89 +216,6 @@ def test_warm_reboot(duthosts, enum_rand_one_per_hwsku_hostname, [duthost.hostname], xcvr_skip_list, reboot_type=REBOOT_TYPE_WARM) -def _power_off_reboot_helper(kwargs): - """ - @summary: used to parametrized test cases on power_off_delay - @param kwargs: the delay time between turning off and on the PSU - """ - pdu_ctrl = kwargs["pdu_ctrl"] - all_outlets = kwargs["all_outlets"] - power_on_seq = kwargs["power_on_seq"] - delay_time = kwargs["delay_time"] - - for outlet in all_outlets: - logging.debug("turning off {}".format(outlet)) - pdu_ctrl.turn_off_outlet(outlet) - time.sleep(delay_time) - logging.info("Power on {}".format(power_on_seq)) - for outlet in power_on_seq: - logging.debug("turning on {}".format(outlet)) - pdu_ctrl.turn_on_outlet(outlet) - - -def test_power_off_reboot(duthosts, enum_rand_one_per_hwsku_hostname, - localhost, conn_graph_facts, xcvr_skip_list, # noqa F811 - pdu_controller, power_off_delay): - """ - @summary: This test case is to perform reboot via powercycle and check platform status - @param duthost: Fixture for DUT AnsibleHost object - @param localhost: Fixture for interacting with localhost through ansible - @param conn_graph_facts: Fixture parse and return lab connection graph - @param xcvr_skip_list: list of DUT's interfaces for which transeiver checks are skipped - @param pdu_controller: The python object of psu controller - @param power_off_delay: Pytest parameter. The delay between turning off and on the PSU - """ - duthost = duthosts[enum_rand_one_per_hwsku_hostname] - UNSUPPORTED_ASIC_TYPE = ["cisco-8000"] - if duthost.facts["asic_type"] in UNSUPPORTED_ASIC_TYPE: - pytest.skip("Skipping test_power_off_reboot. Test unsupported on {} platform".format( - duthost.facts["asic_type"])) - pdu_ctrl = pdu_controller - if pdu_ctrl is None: - pytest.skip( - "No PSU controller for %s, skip rest of the testing in this case" % duthost.hostname) - - all_outlets = pdu_ctrl.get_outlet_status() - # If PDU supports returning output_watts, making sure that all outlets has power. - no_power = [item for item in all_outlets if int( - item.get('output_watts', '1')) == 0] - pytest_assert( - not no_power, "Not all outlets have power output: {}".format(no_power)) - - # Purpose of this list is to control sequence of turning on PSUs in power off testing. - # If there are 2 PSUs, then 3 scenarios would be covered: - # 1. Turn off all PSUs, turn on PSU1, then check. - # 2. Turn off all PSUs, turn on PSU2, then check. - # 3. Turn off all PSUs, turn on one of the PSU, then turn on the other PSU, then check. - power_on_seq_list = [] - if all_outlets: - power_on_seq_list = [[item] for item in all_outlets] - power_on_seq_list.append(all_outlets) - - logging.info("Got all power on sequences {}".format(power_on_seq_list)) - - poweroff_reboot_kwargs = {"dut": duthost} - - try: - for power_on_seq in power_on_seq_list: - poweroff_reboot_kwargs["pdu_ctrl"] = pdu_ctrl - poweroff_reboot_kwargs["all_outlets"] = all_outlets - poweroff_reboot_kwargs["power_on_seq"] = power_on_seq - poweroff_reboot_kwargs["delay_time"] = power_off_delay - reboot_and_check(localhost, duthost, conn_graph_facts["device_conn"][duthost.hostname], - xcvr_skip_list, REBOOT_TYPE_POWEROFF, - _power_off_reboot_helper, poweroff_reboot_kwargs) - except Exception as e: - logging.debug("Restore power after test failure") - for outlet in all_outlets: - logging.debug("turning on {}".format(outlet)) - pdu_ctrl.turn_on_outlet(outlet) - # Sleep 120 for dut to boot up - time.sleep(120) - wait_critical_processes(duthost) - raise e - - def test_watchdog_reboot(duthosts, enum_rand_one_per_hwsku_hostname, localhost, conn_graph_facts, set_max_time_for_interfaces, xcvr_skip_list, tbinfo): # noqa F811 """