diff --git a/tests/common/reboot.py b/tests/common/reboot.py index 708d35e0fc5..86331935270 100644 --- a/tests/common/reboot.py +++ b/tests/common/reboot.py @@ -9,16 +9,59 @@ SONIC_SSH_PORT = 22 SONIC_SSH_REGEX = 'OpenSSH_[\\w\\.]+ Debian' -# map reboot type -> reboot command -reboot_commands =\ -{ - 'cold': 'reboot', - 'fast': 'fast-reboot', - 'warm': 'warm-reboot', +REBOOT_TYPE_WARM = "warm" +REBOOT_TYPE_COLD = "cold" +REBOOT_TYPE_FAST = "fast" +REBOOT_TYPE_POWEROFF = "power off" +REBOOT_TYPE_WATCHDOG = "watchdog" +REBOOT_TYPE_UNKNOWN = "Unknown" + +''' + command : command to reboot the DUT + timeout : timeout waiting for DUT to come back after reboot + wait : time wait for switch the stablize + cause : search string to determine reboot cause + test_reboot_cause_only : indicate if the purpose of test is for reboot cause only +''' +reboot_ctrl_dict = { + REBOOT_TYPE_POWEROFF: { + "timeout": 300, + "wait": 120, + "cause": "Power Loss", + "test_reboot_cause_only": True + }, + REBOOT_TYPE_COLD: { + "command": "reboot", + "timeout": 300, + "wait": 120, + "cause": "'reboot'", + "test_reboot_cause_only": False + }, + REBOOT_TYPE_FAST: { + "command": "fast-reboot", + "timeout": 180, + "wait": 120, + "cause": "fast-reboot", + "test_reboot_cause_only": False + }, + REBOOT_TYPE_WARM: { + "command": "warm-reboot", + "timeout": 210, + "wait": 90, + "cause": "warm-reboot", + "test_reboot_cause_only": False + }, + REBOOT_TYPE_WATCHDOG: { + "command": "python -c \"import sonic_platform.platform as P; P.Platform().get_chassis().get_watchdog().arm(5); exit()\"", + "timeout": 300, + "wait": 120, + "cause": "Watchdog", + "test_reboot_cause_only": True + } } -def reboot(duthost, localhost, reboot_type='cold', delay=10, timeout=180, wait=120): +def reboot(duthost, localhost, reboot_type='cold', delay=10, timeout=0, wait=0, reboot_helper=None, reboot_kwargs=None): """ reboots DUT :param duthost: DUT host object @@ -27,6 +70,8 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10, timeout=180, wait=1 :param delay: delay between ssh availability checks :param timeout: timeout for waiting ssh port state change :param wait: time to wait for DUT to initialize + :param reboot_helper: helper function to execute the power toggling + :param reboot_kwargs: arguments to pass to the reboot_helper :return: """ @@ -35,15 +80,30 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10, timeout=180, wait=1 dut_ip = duthost.setup()['ansible_facts']['ansible_eth0']['ipv4']['address'] try: - reboot_command = reboot_commands[reboot_type] + reboot_ctrl = reboot_ctrl_dict[reboot_type] + reboot_command = reboot_ctrl['command'] if reboot_type != REBOOT_TYPE_POWEROFF else None + if timeout == 0: + timeout = reboot_ctrl['timeout'] + if wait == 0: + wait = reboot_ctrl['wait'] except KeyError: raise ValueError('invalid reboot type: "{}"'.format(reboot_type)) - def execute_reboot(): + def execute_reboot_command(): logger.info('rebooting with command "{}"'.format(reboot_command)) return duthost.command(reboot_command) - reboot_res = pool.apply_async(execute_reboot) + def execute_reboot_helper(): + logger.info('rebooting with helper "{}"'.format(reboot_helper)) + return reboot_helper(reboot_kwargs) + + dut_datetime = duthost.get_now_time() + + if reboot_type != REBOOT_TYPE_POWEROFF: + reboot_res = pool.apply_async(execute_reboot_command) + else: + assert reboot_helper is not None, "A reboot function must be provided for power off reboot" + reboot_res = pool.apply_async(execute_reboot_helper) logger.info('waiting for ssh to drop') res = localhost.wait_for(host=dut_ip, @@ -82,15 +142,17 @@ def execute_reboot(): logger.info('waiting for warmboot-finalizer service to finish') res = duthost.command('systemctl is-active warmboot-finalizer.service',module_ignore_errors=True) finalizer_state = res['stdout'].strip() + logger.info('warmboot finalizer service state {}'.format(finalizer_state)) assert finalizer_state == 'activating' count = 0 while finalizer_state == 'activating': try: - res = duthost.command('systemctl is-active warmboot-finalizer.service') + res = duthost.command('systemctl is-active warmboot-finalizer.service',module_ignore_errors=True) except AnsibleModuleException as err: res = err.module_result finalizer_state = res['stdout'].strip() + logger.info('warmboot finalizer service state {}'.format(finalizer_state)) time.sleep(delay) if count * delay > timeout: raise Exception('warmboot-finalizer.service did not finish') @@ -100,3 +162,34 @@ def execute_reboot(): logger.info('{} reboot finished'.format(reboot_type)) pool.terminate() + + dut_uptime = duthost.get_up_time() + logger.info('DUT up since {}'.format(dut_uptime)) + assert float(dut_uptime.strftime("%s")) - float(dut_datetime.strftime("%s")) > 10, "Device did not reboot" + + +def get_reboot_cause(dut): + """ + @summary: get the reboot cause on DUT. + @param dut: The AnsibleHost object of DUT. + """ + logging.info('Getting reboot cause from dut {}'.format(dut.hostname)) + output = dut.shell('show reboot-cause') + cause = output['stdout'] + + for type, ctrl in reboot_ctrl_dict.items(): + if re.search(ctrl['cause'], cause): + return type + + return REBOOT_TYPE_UNKNOWN + + +def check_reboot_cause(dut, reboot_cause_expected): + """ + @summary: Check the reboot cause on DUT. Can be used with wailt_until + @param dut: The AnsibleHost object of DUT. + @param reboot_cause_expected: The expected reboot cause. + """ + reboot_cause_got = get_reboot_cause(dut) + logging.debug("dut {} last reboot-cause {}".format(dut.hostname, reboot_cause_got)) + return reboot_cause_got == reboot_cause_expected diff --git a/tests/platform/platform_fixtures.py b/tests/platform/platform_fixtures.py deleted file mode 100644 index 8be133d60db..00000000000 --- a/tests/platform/platform_fixtures.py +++ /dev/null @@ -1,12 +0,0 @@ -import pytest -import os - -@pytest.fixture(scope="module") -def conn_graph_facts(testbed_devices): - dut = testbed_devices["dut"] - localhost = testbed_devices["localhost"] - - base_path = os.path.dirname(os.path.realpath(__file__)) - lab_conn_graph_file = os.path.join(base_path, "../../ansible/files/lab_connection_graph.xml") - conn_graph_facts = localhost.conn_graph_facts(host=dut.hostname, filename=lab_conn_graph_file)['ansible_facts'] - return conn_graph_facts diff --git a/tests/platform/test_reboot.py b/tests/platform/test_reboot.py index 9428d975b6c..a684f0f053c 100644 --- a/tests/platform/test_reboot.py +++ b/tests/platform/test_reboot.py @@ -17,56 +17,20 @@ import pytest -from platform_fixtures import conn_graph_facts +from common.fixtures.conn_graph_facts import conn_graph_facts from common.utilities import wait_until +from common.reboot import * +from common.platform.interface_utils import check_interface_information +from common.platform.transceiver_utils import check_transceiver_basic + from check_critical_services import check_critical_services -from check_transceiver_status import check_transceiver_basic from check_daemon_status import check_pmon_daemon_status -from check_all_interface_info import check_interface_information pytestmark = [pytest.mark.disable_loganalyzer] MAX_WAIT_TIME_FOR_INTERFACES = 300 MAX_WAIT_TIME_FOR_REBOOT_CAUSE = 120 -REBOOT_TYPE_WARM = "warm" -REBOOT_TYPE_COLD = "cold" -REBOOT_TYPE_FAST = "fast" -REBOOT_TYPE_POWEROFF = "power off" -REBOOT_TYPE_WATCHDOG = "watchdog" - -reboot_ctrl_dict = { - REBOOT_TYPE_POWEROFF: { - "timeout": 300, - "cause": "Power Loss", - "test_reboot_cause_only": True - }, - REBOOT_TYPE_COLD: { - "command": "reboot", - "timeout": 300, - "cause": "reboot", - "test_reboot_cause_only": False - }, - REBOOT_TYPE_FAST: { - "command": "fast-reboot", - "timeout": 180, - "cause": "fast-reboot", - "test_reboot_cause_only": False - }, - REBOOT_TYPE_WARM: { - "command": "warm-reboot", - "timeout": 180, - "cause": "warm-reboot", - "test_reboot_cause_only": False - }, - REBOOT_TYPE_WATCHDOG: { - "command": "python -c \"import sonic_platform.platform as P; P.Platform().get_chassis().get_watchdog().arm(5); exit()\"", - "timeout": 300, - "cause": "Watchdog", - "test_reboot_cause_only": True - } -} - @pytest.fixture(scope="module", autouse=True) def teardown_module(duthost, conn_graph_facts): @@ -78,21 +42,6 @@ def teardown_module(duthost, conn_graph_facts): check_interfaces_and_services(duthost, interfaces) - -def check_reboot_cause(dut, reboot_cause_expected): - """ - @summary: Check the reboot cause on DUT. - @param dut: The AnsibleHost object of DUT. - @param reboot_cause_expected: The expected reboot cause. - """ - logging.info("Check the reboot cause") - output = dut.shell("show reboot-cause") - reboot_cause_got = output["stdout"] - logging.debug("show reboot-cause returns {}".format(reboot_cause_got)) - m = re.search(reboot_cause_expected, reboot_cause_got) - return m is not None - - def reboot_and_check(localhost, dut, interfaces, reboot_type=REBOOT_TYPE_COLD, reboot_helper=None, reboot_kwargs=None): """ Perform the specified type of reboot and check platform status. @@ -105,45 +54,11 @@ def reboot_and_check(localhost, dut, interfaces, reboot_type=REBOOT_TYPE_COLD, r """ logging.info("Run %s reboot on DUT" % reboot_type) - assert reboot_type in reboot_ctrl_dict.keys(), "Unknown reboot type %s" % reboot_type - - reboot_timeout = reboot_ctrl_dict[reboot_type]["timeout"] - ansible_host = dut.host.options["inventory_manager"].get_host(dut.hostname).vars["ansible_host"] - - dut_datetime = datetime.strptime(dut.command('date -u +"%Y-%m-%d %H:%M:%S"')["stdout"], "%Y-%m-%d %H:%M:%S") - - if reboot_type == REBOOT_TYPE_POWEROFF: - assert reboot_helper is not None, "A reboot function must be provided for power off reboot" - - reboot_helper(reboot_kwargs) - - localhost.wait_for(host=ansible_host, port=22, state="stopped", search_regex="OpenSSH_[\\w\\.]+ Debian", delay=10, timeout=120) - else: - reboot_cmd = reboot_ctrl_dict[reboot_type]["command"] - reboot_task, reboot_res = dut.command(reboot_cmd, module_ignore_errors=True, module_async=True) - - logging.info("Wait for DUT to go down") - res = localhost.wait_for(host=ansible_host, port=22, state="stopped", search_regex="OpenSSH_[\\w\\.]+ Debian", timeout=180, module_ignore_errors=True) - if "failed" in res: - try: - logging.error("Wait for switch down failed, try to kill any possible stuck reboot task") - pid = dut.command("pgrep -f '%s'" % reboot_cmd)["stdout"] - dut.command("kill -9 %s" % pid) - reboot_task.terminate() - logging.error("Result of command '%s': " + str(reboot_res.get(timeout=0))) - except Exception as e: - logging.error("Exception raised while cleanup reboot task and get result: " + repr(e)) - - logging.info("Wait for DUT to come back") - localhost.wait_for(host=ansible_host, port=22, state="started", search_regex="OpenSSH_[\\w\\.]+ Debian", delay=10, timeout=reboot_timeout) - - - logging.info("Check the uptime to verify whether reboot was performed") - dut_uptime = datetime.strptime(dut.command("uptime -s")["stdout"], "%Y-%m-%d %H:%M:%S") - assert float(dut_uptime.strftime("%s")) - float(dut_datetime.strftime("%s")) > 10, "Device did not reboot" + reboot(dut, localhost, reboot_type=reboot_type, reboot_helper=reboot_helper, reboot_kwargs=reboot_kwargs) check_interfaces_and_services(dut, interfaces, reboot_type) + def check_interfaces_and_services(dut, interfaces, reboot_type = None): """ Perform a further check after reboot-cause, including transceiver status, interface status @@ -156,9 +71,8 @@ def check_interfaces_and_services(dut, interfaces, reboot_type = None): if reboot_type is not None: logging.info("Check reboot cause") - reboot_cause = reboot_ctrl_dict[reboot_type]["cause"] - assert wait_until(MAX_WAIT_TIME_FOR_REBOOT_CAUSE, 20, check_reboot_cause, dut, reboot_cause), \ - "got reboot-cause failed after rebooted by %s" % reboot_cause + assert wait_until(MAX_WAIT_TIME_FOR_REBOOT_CAUSE, 20, check_reboot_cause, dut, reboot_type), \ + "got reboot-cause failed after rebooted by %s" % reboot_type if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]: logging.info("Further checking skipped for %s test which intends to verify reboot-cause only" % reboot_type) @@ -305,8 +219,19 @@ def test_watchdog_reboot(testbed_devices, conn_graph_facts): test_watchdog_supported = "python -c \"import sonic_platform.platform as P; P.Platform().get_chassis().get_watchdog(); exit()\"" - watchdog_supported = ans_host.command(test_watchdog_supported)["stderr"] + watchdog_supported = ans_host.command(test_watchdog_supported,module_ignore_errors=True)["stderr"] if "" != watchdog_supported: pytest.skip("Watchdog is not supported on this DUT, skip this test case") reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], REBOOT_TYPE_WATCHDOG) + + +def test_continuous_reboot(testbed_devices, conn_graph_facts): + """ + @summary: This test case is to perform 3 cold reboot in a row + """ + ans_host = testbed_devices["dut"] + localhost = testbed_devices["localhost"] + + for i in range(3): + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type=REBOOT_TYPE_COLD)