Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions tests/common/plugins/pdu_controller/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest
from .pdu_manager import pdu_manager_factory
from tests.common.utilities import get_host_visible_vars
from tests.common.utilities import get_host_visible_vars, get_sup_node_or_random_node


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -31,14 +31,14 @@ def get_pdu_visible_vars(inventories, pdu_hostnames):


@pytest.fixture(scope="module")
def pdu_controller(duthosts, enum_rand_one_per_hwsku_hostname, conn_graph_facts):
def pdu_controller(duthosts, conn_graph_facts):
"""
@summary: Fixture for controlling power supply to PSUs of DUT
@param duthost: Fixture duthost defined in sonic-mgmt/tests/conftest.py
@returns: Returns a pdu controller object implementing the BasePduController interface defined in
controller_base.py.
"""
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
duthost = get_sup_node_or_random_node(duthosts)
pdu_hosts = get_pdu_hosts(duthost)
pdu_hostnames = []
if pdu_hosts:
Expand Down
14 changes: 14 additions & 0 deletions tests/common/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import logging
import os
import re
import random
import six
import sys
import threading
Expand Down Expand Up @@ -77,6 +78,19 @@ def skip_release_for_platform(duthost, release_list, platform_list):
duthost.os_version, duthost.facts['platform'], ", ".join(release_list), ", ".join(platform_list)))


def get_sup_node_or_random_node(duthosts):
# accomodate for T2 chassis, which only SUP has pdu info
# try to find sup node in multi-dut
for dut in duthosts:
if dut.is_supervisor_node():
return dut
# if not chassis, it's dualtor or single-dut, return random node or itself
if len(duthosts) > 1:
duthosts = random.sample(duthosts, 1)
logger.info("Randomly select dut {} for testing".format(duthosts[0]))
return duthosts[0]


def wait(seconds, msg=""):
"""
@summary: Pause specified number of seconds
Expand Down
6 changes: 3 additions & 3 deletions tests/platform_tests/test_platform_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from retry.api import retry_call
from tests.common.helpers.assertions import pytest_assert, pytest_require
from tests.common.plugins.loganalyzer.loganalyzer import LogAnalyzer
from tests.common.utilities import wait_until
from tests.common.utilities import wait_until, get_sup_node_or_random_node
from tests.common.platform.device_utils import get_dut_psu_line_pattern
from .thermal_control_test_helper import ThermalPolicyFileContext,\
check_cli_output_with_mocker, restart_thermal_control_daemon, check_thermal_algorithm_status,\
Expand Down Expand Up @@ -240,12 +240,12 @@ def check_all_psu_on(dut, psu_test_results):

@pytest.mark.disable_loganalyzer
@pytest.mark.parametrize('ignore_particular_error_log', [SKIP_ERROR_LOG_PSU_ABSENCE], indirect=True)
def test_turn_on_off_psu_and_check_psustatus(duthosts, enum_rand_one_per_hwsku_hostname,
def test_turn_on_off_psu_and_check_psustatus(duthosts,
pdu_controller, ignore_particular_error_log, tbinfo):
"""
@summary: Turn off/on PSU and check PSU status using 'show platform psustatus'
"""
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
duthost = get_sup_node_or_random_node(duthosts)

psu_line_pattern = get_dut_psu_line_pattern(duthost)

Expand Down
124 changes: 124 additions & 0 deletions tests/platform_tests/test_power_off_reboot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import logging
import pytest
import time

from tests.common.fixtures.conn_graph_facts import conn_graph_facts # noqa F401
from tests.common.reboot import wait_for_startup, REBOOT_TYPE_POWEROFF
from tests.common.platform.processes_utils import wait_critical_processes, check_critical_processes
from tests.common.helpers.assertions import pytest_assert
from tests.platform_tests.test_reboot import check_interfaces_and_services,\
reboot_and_check
from tests.common.utilities import get_plt_reboot_ctrl

pytestmark = [
pytest.mark.disable_loganalyzer,
pytest.mark.topology('any')
]

INTERFACE_WAIT_TIME = 300


@pytest.fixture
def set_max_time_for_interfaces(duthost):
"""
For chassis testbeds, we need to specify plt_reboot_ctrl in inventory file,
to let MAX_TIME_TO_REBOOT to be overwritten by specified timeout value
"""
global INTERFACE_WAIT_TIME
plt_reboot_ctrl = get_plt_reboot_ctrl(duthost, 'test_reboot.py', 'cold')
if plt_reboot_ctrl:
INTERFACE_WAIT_TIME = plt_reboot_ctrl.get('timeout', 300)


@pytest.fixture(scope="module", autouse=True)
def teardown_module(duthosts, enum_supervisor_dut_hostname, xcvr_skip_list):
duthost = duthosts[enum_supervisor_dut_hostname]
yield

logging.info("Tearing down: to make sure all the critical services, interfaces and transceivers are good")
interfaces = conn_graph_facts["device_conn"][duthost.hostname]
check_critical_processes(duthost, watch_secs=10)
check_interfaces_and_services(duthost, interfaces, xcvr_skip_list, INTERFACE_WAIT_TIME)


def _power_off_reboot_helper(kwargs):
"""
@summary: used to parametrized test cases on power_off_delay
@param kwargs: the delay time between turning off and on the PSU
"""
pdu_ctrl = kwargs["pdu_ctrl"]
all_outlets = kwargs["all_outlets"]
power_on_seq = kwargs["power_on_seq"]
delay_time = kwargs["delay_time"]

for outlet in all_outlets:
logging.debug("turning off {}".format(outlet))
pdu_ctrl.turn_off_outlet(outlet)
time.sleep(delay_time)
logging.info("Power on {}".format(power_on_seq))
for outlet in power_on_seq:
logging.debug("turning on {}".format(outlet))
pdu_ctrl.turn_on_outlet(outlet)


def test_power_off_reboot(duthosts, localhost, enum_supervisor_dut_hostname,
set_max_time_for_interfaces, xcvr_skip_list, pdu_controller, power_off_delay):
"""
@summary: This test case is to perform reboot via powercycle and check platform status
@param duthost: Fixture for DUT AnsibleHost object
@param localhost: Fixture for interacting with localhost through ansible
@param conn_graph_facts: Fixture parse and return lab connection graph
@param xcvr_skip_list: list of DUT's interfaces for which transeiver checks are skipped
@param pdu_controller: The python object of psu controller
@param power_off_delay: Pytest parameter. The delay between turning off and on the PSU
"""
duthost = duthosts[enum_supervisor_dut_hostname]
UNSUPPORTED_ASIC_TYPE = ["cisco-8000"]
if duthost.facts["asic_type"] in UNSUPPORTED_ASIC_TYPE:
pytest.skip("Skipping test_power_off_reboot. Test unsupported on {} platform"
.format(duthost.facts["asic_type"]))
pdu_ctrl = pdu_controller
if pdu_ctrl is None:
pytest.skip("No PSU controller for %s, skip rest of the testing in this case" % duthost.hostname)
is_chassis = duthost.get_facts().get("modular_chassis")
if is_chassis and duthost.is_supervisor_node():
# Following is to accomodate for chassis, when no '--power_off_delay' option is given on pipeline run
power_off_delay = 60
all_outlets = pdu_ctrl.get_outlet_status()
# If PDU supports returning output_watts, making sure that all outlets has power.
no_power = [item for item in all_outlets if int(item.get('output_watts', '1')) == 0]
pytest_assert(not no_power, "Not all outlets have power output: {}".format(no_power))

# Purpose of this list is to control sequence of turning on PSUs in power off testing.
# If there are 2 PSUs, then 3 scenarios would be covered:
# 1. Turn off all PSUs, turn on PSU1, then check.
# 2. Turn off all PSUs, turn on PSU2, then check.
# 3. Turn off all PSUs, turn on one of the PSU, then turn on the other PSU, then check.
power_on_seq_list = []
if all_outlets:
power_on_seq_list = [[item] for item in all_outlets]
power_on_seq_list.append(all_outlets)

logging.info("Got all power on sequences {}".format(power_on_seq_list))

poweroff_reboot_kwargs = {"dut": duthost}

try:
for power_on_seq in power_on_seq_list:
poweroff_reboot_kwargs["pdu_ctrl"] = pdu_ctrl
poweroff_reboot_kwargs["all_outlets"] = all_outlets
poweroff_reboot_kwargs["power_on_seq"] = power_on_seq
poweroff_reboot_kwargs["delay_time"] = power_off_delay
reboot_and_check(localhost, duthost, conn_graph_facts["device_conn"][duthost.hostname],
xcvr_skip_list, REBOOT_TYPE_POWEROFF,
_power_off_reboot_helper, poweroff_reboot_kwargs)
except Exception as e:
logging.debug("Restore power after test failure")
for outlet in all_outlets:
logging.debug("turning on {}".format(outlet))
pdu_ctrl.turn_on_outlet(outlet)
# Wait for ssh port to open up on the DUT
reboot_time = 600 if is_chassis else 120
wait_for_startup(duthost, localhost, 0, reboot_time)
wait_critical_processes(duthost)
raise e
98 changes: 8 additions & 90 deletions tests/platform_tests/test_reboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
https://github.com/sonic-net/SONiC/blob/master/doc/pmon/sonic_platform_test_plan.md
"""
import logging
import time
import pytest

from tests.common.fixtures.conn_graph_facts import conn_graph_facts # noqa F401
from tests.common.utilities import wait_until, get_plt_reboot_ctrl
from tests.common.reboot import sync_reboot_history_queue_with_dut, reboot, check_reboot_cause,\
check_reboot_cause_history, reboot_ctrl_dict, REBOOT_TYPE_HISTOYR_QUEUE, REBOOT_TYPE_COLD,\
REBOOT_TYPE_SOFT, REBOOT_TYPE_FAST, REBOOT_TYPE_WARM, REBOOT_TYPE_POWEROFF, REBOOT_TYPE_WATCHDOG
check_reboot_cause_history, reboot_ctrl_dict,\
REBOOT_TYPE_HISTOYR_QUEUE, REBOOT_TYPE_COLD,\
REBOOT_TYPE_SOFT, REBOOT_TYPE_FAST, REBOOT_TYPE_WARM, REBOOT_TYPE_WATCHDOG
from tests.common.platform.transceiver_utils import check_transceiver_basic
from tests.common.platform.interface_utils import check_all_interface_information, get_port_map
from tests.common.platform.daemon_utils import check_pmon_daemon_status
Expand Down Expand Up @@ -84,7 +84,8 @@ def reboot_and_check(localhost, dut, interfaces, xcvr_skip_list,
check_interfaces_and_services(dut, interfaces, xcvr_skip_list, reboot_type)


def check_interfaces_and_services(dut, interfaces, xcvr_skip_list, reboot_type=None):
def check_interfaces_and_services(dut, interfaces, xcvr_skip_list,
interfaces_wait_time=MAX_WAIT_TIME_FOR_INTERFACES, reboot_type=None):
"""
Perform a further check after reboot-cause, including transceiver status, interface status
@param localhost: The Localhost object.
Expand All @@ -98,11 +99,11 @@ def check_interfaces_and_services(dut, interfaces, xcvr_skip_list, reboot_type=N
logging.info("skipping interfaces related check for supervisor")
else:
logging.info("Wait {} seconds for all the transceivers to be detected".format(
MAX_WAIT_TIME_FOR_INTERFACES))
result = wait_until(MAX_WAIT_TIME_FOR_INTERFACES, 20, 0, check_all_interface_information, dut, interfaces,
interfaces_wait_time))
result = wait_until(interfaces_wait_time, 20, 0, check_all_interface_information, dut, interfaces,
xcvr_skip_list)
assert result, "Not all transceivers are detected or interfaces are up in {} seconds".format(
MAX_WAIT_TIME_FOR_INTERFACES)
interfaces_wait_time)

logging.info("Check transceiver status")
for asic_index in dut.get_frontend_asic_ids():
Expand Down Expand Up @@ -215,89 +216,6 @@ def test_warm_reboot(duthosts, enum_rand_one_per_hwsku_hostname,
[duthost.hostname], xcvr_skip_list, reboot_type=REBOOT_TYPE_WARM)


def _power_off_reboot_helper(kwargs):
"""
@summary: used to parametrized test cases on power_off_delay
@param kwargs: the delay time between turning off and on the PSU
"""
pdu_ctrl = kwargs["pdu_ctrl"]
all_outlets = kwargs["all_outlets"]
power_on_seq = kwargs["power_on_seq"]
delay_time = kwargs["delay_time"]

for outlet in all_outlets:
logging.debug("turning off {}".format(outlet))
pdu_ctrl.turn_off_outlet(outlet)
time.sleep(delay_time)
logging.info("Power on {}".format(power_on_seq))
for outlet in power_on_seq:
logging.debug("turning on {}".format(outlet))
pdu_ctrl.turn_on_outlet(outlet)


def test_power_off_reboot(duthosts, enum_rand_one_per_hwsku_hostname,
localhost, conn_graph_facts, xcvr_skip_list, # noqa F811
pdu_controller, power_off_delay):
"""
@summary: This test case is to perform reboot via powercycle and check platform status
@param duthost: Fixture for DUT AnsibleHost object
@param localhost: Fixture for interacting with localhost through ansible
@param conn_graph_facts: Fixture parse and return lab connection graph
@param xcvr_skip_list: list of DUT's interfaces for which transeiver checks are skipped
@param pdu_controller: The python object of psu controller
@param power_off_delay: Pytest parameter. The delay between turning off and on the PSU
"""
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
UNSUPPORTED_ASIC_TYPE = ["cisco-8000"]
if duthost.facts["asic_type"] in UNSUPPORTED_ASIC_TYPE:
pytest.skip("Skipping test_power_off_reboot. Test unsupported on {} platform".format(
duthost.facts["asic_type"]))
pdu_ctrl = pdu_controller
if pdu_ctrl is None:
pytest.skip(
"No PSU controller for %s, skip rest of the testing in this case" % duthost.hostname)

all_outlets = pdu_ctrl.get_outlet_status()
# If PDU supports returning output_watts, making sure that all outlets has power.
no_power = [item for item in all_outlets if int(
item.get('output_watts', '1')) == 0]
pytest_assert(
not no_power, "Not all outlets have power output: {}".format(no_power))

# Purpose of this list is to control sequence of turning on PSUs in power off testing.
# If there are 2 PSUs, then 3 scenarios would be covered:
# 1. Turn off all PSUs, turn on PSU1, then check.
# 2. Turn off all PSUs, turn on PSU2, then check.
# 3. Turn off all PSUs, turn on one of the PSU, then turn on the other PSU, then check.
power_on_seq_list = []
if all_outlets:
power_on_seq_list = [[item] for item in all_outlets]
power_on_seq_list.append(all_outlets)

logging.info("Got all power on sequences {}".format(power_on_seq_list))

poweroff_reboot_kwargs = {"dut": duthost}

try:
for power_on_seq in power_on_seq_list:
poweroff_reboot_kwargs["pdu_ctrl"] = pdu_ctrl
poweroff_reboot_kwargs["all_outlets"] = all_outlets
poweroff_reboot_kwargs["power_on_seq"] = power_on_seq
poweroff_reboot_kwargs["delay_time"] = power_off_delay
reboot_and_check(localhost, duthost, conn_graph_facts["device_conn"][duthost.hostname],
xcvr_skip_list, REBOOT_TYPE_POWEROFF,
_power_off_reboot_helper, poweroff_reboot_kwargs)
except Exception as e:
logging.debug("Restore power after test failure")
for outlet in all_outlets:
logging.debug("turning on {}".format(outlet))
pdu_ctrl.turn_on_outlet(outlet)
# Sleep 120 for dut to boot up
time.sleep(120)
wait_critical_processes(duthost)
raise e


def test_watchdog_reboot(duthosts, enum_rand_one_per_hwsku_hostname,
localhost, conn_graph_facts, set_max_time_for_interfaces, xcvr_skip_list, tbinfo): # noqa F811
"""
Expand Down