diff --git a/tests/common/platform/processes_utils.py b/tests/common/platform/processes_utils.py new file mode 100644 index 00000000000..dc666b5c43f --- /dev/null +++ b/tests/common/platform/processes_utils.py @@ -0,0 +1,49 @@ +""" +Helper script for checking status of critical processes + +This script contains re-usable functions for checking status of critical services. +""" +import logging +import time + +from tests.common.helpers.assertions import pytest_assert +from tests.common.utilities import wait_until + + +def _get_critical_processes_status(dut): + processes_status = dut.all_critical_process_status() + for k, v in processes_status.items(): + if v['status'] == False or len(v['exited_critical_process']) > 0: + return False, processes_status + + return True, processes_status + +def _all_critical_processes_healthy(dut): + logging.info("Check critical processes status") + status, _ = _get_critical_processes_status(dut) + return status + +def check_critical_processes(dut, watch_secs=0): + """ + @summary: check all critical processes. They should be all running. + keep on checking every 5 seconds until watch_secs drops below 0. + @param dut: The AnsibleHost object of DUT. For interacting with DUT. + @param watch_secs: all processes should remain healthy for watch_secs seconds. + """ + logging.info("Check all critical processes are healthy for {} seconds".format(watch_secs)) + while watch_secs >= 0: + status, details = _get_critical_processes_status(dut) + pytest_assert(status, "Not all critical processes are healthy: {}".format(details)) + if watch_secs > 0: + time.sleep(min(5, watch_secs)) + watch_secs = watch_secs - 5 + +def wait_critical_processes(dut): + """ + @summary: wait until all critical processes are healthy. + @param dut: The AnsibleHost object of DUT. For interacting with DUT. + """ + logging.info("Wait until all critical processes are healthy") + pytest_assert(wait_until(300, 20, _all_critical_processes_healthy, dut), + "Not all critical processes are healthy") + diff --git a/tests/platform_tests/check_critical_services.py b/tests/platform_tests/check_critical_services.py index dc2e311aba6..25dec382c16 100644 --- a/tests/platform_tests/check_critical_services.py +++ b/tests/platform_tests/check_critical_services.py @@ -6,6 +6,7 @@ import time import logging +from tests.common.helpers.assertions import pytest_assert from tests.common.utilities import wait_until @@ -33,5 +34,6 @@ def check_critical_services(dut): @param dut: The AnsibleHost object of DUT. For interacting with DUT. """ logging.info("Wait until all critical services are fully started") - assert wait_until(300, 20, _all_critical_services_fully_started, dut), "Not all critical services are fully started" + pytest_assert(wait_until(300, 20, _all_critical_services_fully_started, dut), + "Not all critical services are fully started") diff --git a/tests/platform_tests/test_sequential_restart.py b/tests/platform_tests/test_sequential_restart.py index 1801fe3c07a..c79b0419df3 100644 --- a/tests/platform_tests/test_sequential_restart.py +++ b/tests/platform_tests/test_sequential_restart.py @@ -9,6 +9,8 @@ import pytest from tests.common.fixtures.conn_graph_facts import conn_graph_facts +from tests.common.helpers.assertions import pytest_assert +from tests.common.platform.processes_utils import check_critical_processes from tests.common.utilities import wait_until from check_critical_services import check_critical_services from check_transceiver_status import check_transceiver_basic @@ -31,8 +33,8 @@ def restart_service_and_check(localhost, dut, service, interfaces): check_critical_services(dut) logging.info("Wait some time for all the transceivers to be detected") - assert wait_until(300, 20, check_interface_information, dut, interfaces), \ - "Not all interface information are detected within 300 seconds" + pytest_assert(wait_until(300, 20, check_interface_information, dut, interfaces), + "Not all interface information are detected within 300 seconds") logging.info("Check transceiver status") check_transceiver_basic(dut, interfaces) @@ -48,6 +50,9 @@ def restart_service_and_check(localhost, dut, service, interfaces): logging.info("Check sysfs") check_sysfs(dut) + logging.info("Check that critical processes are healthy for 60 seconds") + check_critical_processes(dut, 60) + def test_restart_swss(duthost, localhost, conn_graph_facts): """