Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions tests/common/platform/processes_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""
Helper script for checking status of critical processes

This script contains re-usable functions for checking status of critical services.
"""
import logging
import time

from tests.common.helpers.assertions import pytest_assert
from tests.common.utilities import wait_until


def _get_critical_processes_status(dut):
processes_status = dut.all_critical_process_status()
for k, v in processes_status.items():
if v['status'] == False or len(v['exited_critical_process']) > 0:
return False, processes_status

return True, processes_status

def _all_critical_processes_healthy(dut):
logging.info("Check critical processes status")
status, _ = _get_critical_processes_status(dut)
return status

def check_critical_processes(dut, watch_secs=0):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we not using the existing 'all_critical_process_status' in devices.py?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are using it. See line 14 :-)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah I see. My bad

"""
@summary: check all critical processes. They should be all running.
keep on checking every 5 seconds until watch_secs drops below 0.
@param dut: The AnsibleHost object of DUT. For interacting with DUT.
@param watch_secs: all processes should remain healthy for watch_secs seconds.
"""
logging.info("Check all critical processes are healthy for {} seconds".format(watch_secs))
while watch_secs >= 0:
status, details = _get_critical_processes_status(dut)
pytest_assert(status, "Not all critical processes are healthy: {}".format(details))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be just logging error since we want this loop to continue?

Copy link
Copy Markdown
Collaborator Author

@yxieca yxieca Jul 29, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The purpose of this loop is to make sure that there is no critical process failure for 60 seconds (or a spot check if watch_secs == 0). If there is a failure, then the test should fail.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What you have in mind is the other method: wait_critical_process()

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got confused with the wait_critical_process approach. It is clear now

if watch_secs > 0:
time.sleep(min(5, watch_secs))
watch_secs = watch_secs - 5

def wait_critical_processes(dut):
"""
@summary: wait until all critical processes are healthy.
@param dut: The AnsibleHost object of DUT. For interacting with DUT.
"""
logging.info("Wait until all critical processes are healthy")
pytest_assert(wait_until(300, 20, _all_critical_processes_healthy, dut),
"Not all critical processes are healthy")

4 changes: 3 additions & 1 deletion tests/platform_tests/check_critical_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import time
import logging

from tests.common.helpers.assertions import pytest_assert
from tests.common.utilities import wait_until


Expand Down Expand Up @@ -33,5 +34,6 @@ def check_critical_services(dut):
@param dut: The AnsibleHost object of DUT. For interacting with DUT.
"""
logging.info("Wait until all critical services are fully started")
assert wait_until(300, 20, _all_critical_services_fully_started, dut), "Not all critical services are fully started"
pytest_assert(wait_until(300, 20, _all_critical_services_fully_started, dut),
"Not all critical services are fully started")

9 changes: 7 additions & 2 deletions tests/platform_tests/test_sequential_restart.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import pytest

from tests.common.fixtures.conn_graph_facts import conn_graph_facts
from tests.common.helpers.assertions import pytest_assert
from tests.common.platform.processes_utils import check_critical_processes
from tests.common.utilities import wait_until
from check_critical_services import check_critical_services
from check_transceiver_status import check_transceiver_basic
Expand All @@ -31,8 +33,8 @@ def restart_service_and_check(localhost, dut, service, interfaces):
check_critical_services(dut)

logging.info("Wait some time for all the transceivers to be detected")
assert wait_until(300, 20, check_interface_information, dut, interfaces), \
"Not all interface information are detected within 300 seconds"
pytest_assert(wait_until(300, 20, check_interface_information, dut, interfaces),
"Not all interface information are detected within 300 seconds")

logging.info("Check transceiver status")
check_transceiver_basic(dut, interfaces)
Expand All @@ -48,6 +50,9 @@ def restart_service_and_check(localhost, dut, service, interfaces):
logging.info("Check sysfs")
check_sysfs(dut)

logging.info("Check that critical processes are healthy for 60 seconds")
check_critical_processes(dut, 60)


def test_restart_swss(duthost, localhost, conn_graph_facts):
"""
Expand Down