-
Notifications
You must be signed in to change notification settings - Fork 1k
[test_system_health.py] Adjust test cases of system health #4562
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,8 +2,10 @@ | |
| import logging | ||
| import os | ||
| import pytest | ||
| import random | ||
| import time | ||
| from pkg_resources import parse_version | ||
| from tests.common import config_reload | ||
| from tests.common.utilities import wait_until | ||
| from tests.common.helpers.assertions import pytest_require | ||
| from tests.platform_tests.thermal_control_test_helper import disable_thermal_policy | ||
|
|
@@ -63,27 +65,22 @@ def check_image_version(duthost): | |
| yield | ||
|
|
||
|
|
||
| @pytest.fixture(autouse=True, scope='module') | ||
| def config_reload_after_tests(duthost): | ||
| yield | ||
| config_reload(duthost) | ||
|
|
||
|
|
||
| def test_service_checker(duthosts, enum_rand_one_per_hwsku_hostname): | ||
| duthost = duthosts[enum_rand_one_per_hwsku_hostname] | ||
| wait_system_health_boot_up(duthost) | ||
| with ConfigFileContext(duthost, os.path.join(FILES_DIR, IGNORE_DEVICE_CHECK_CONFIG_FILE)): | ||
| cmd = "monit summary -B" | ||
| logger.info('Getting output for command {}'.format(cmd)) | ||
| output = duthost.shell(cmd) | ||
| content = output['stdout'].strip() | ||
| lines = content.splitlines() | ||
| status_begin = lines[1].find('Status') | ||
| type_begin = lines[1].find('Type') | ||
| processes_status = duthost.all_critical_process_status() | ||
| expect_error_dict = {} | ||
| logger.info('Getting service status') | ||
| for line in lines[2:]: | ||
| service_name = line[0:status_begin].strip() | ||
| status = line[status_begin:type_begin].strip() | ||
| service_type = line[type_begin:].strip() | ||
| assert service_type in SERVICE_EXPECT_STATUS_DICT, 'Unknown service type {}'.format(service_type) | ||
| expect_status = SERVICE_EXPECT_STATUS_DICT[service_type] | ||
| if expect_status != status: | ||
| expect_error_dict[service_name] = '{} is not {}'.format(service_name, expect_status) | ||
| for container_name, processes in processes_status.items(): | ||
| if processes["status"] is False or len(processes["exited_critical_process"]) > 0: | ||
| for process_name in processes["exited_critical_process"]: | ||
| expect_error_dict[process_name] = '{}:{} is not running'.format(container_name, process_name) | ||
|
|
||
| logger.info('Waiting {} seconds for healthd to work'.format(DEFAULT_INTERVAL)) | ||
| time.sleep(DEFAULT_INTERVAL) | ||
|
|
@@ -98,6 +95,30 @@ def test_service_checker(duthosts, enum_rand_one_per_hwsku_hostname): | |
| assert summary == expect_summary, 'Expect summary {}, got {}'.format(expect_summary, summary) | ||
|
|
||
|
|
||
| @pytest.mark.disable_loganalyzer | ||
| def test_service_checker_with_process_exit(duthosts, enum_rand_one_per_hwsku_hostname): | ||
| duthost = duthosts[enum_rand_one_per_hwsku_hostname] | ||
| wait_system_health_boot_up(duthost) | ||
| with ConfigFileContext(duthost, os.path.join(FILES_DIR, IGNORE_DEVICE_CHECK_CONFIG_FILE)): | ||
| processes_status = duthost.all_critical_process_status() | ||
| containers = [x for x in list(processes_status.keys()) if x != "syncd" and x !="database"] | ||
| logging.info('Test containers: {}'.format(containers)) | ||
| random.shuffle(containers) | ||
| for container in containers: | ||
| running_critical_process = processes_status[container]['running_critical_process'] | ||
| if not running_critical_process: | ||
| continue | ||
|
|
||
| critical_process = random.sample(running_critical_process, 1)[0] | ||
| with ProcessExitContext(duthost, container, critical_process): | ||
| time.sleep(DEFAULT_INTERVAL) | ||
| value = redis_get_field_value(duthost, STATE_DB, HEALTH_TABLE_NAME, '{}:{}'.format(container, critical_process)) | ||
| assert value == "'{}' is not running".format(critical_process), 'Got value {}'.format(value) | ||
| summary = redis_get_field_value(duthost, STATE_DB, HEALTH_TABLE_NAME, 'summary') | ||
| assert summary == SUMMARY_NOT_OK | ||
| break | ||
|
|
||
|
|
||
| @pytest.mark.disable_loganalyzer | ||
| def test_device_checker(duthosts, enum_rand_one_per_hwsku_hostname, device_mocker_factory, disable_thermal_policy): | ||
| duthost = duthosts[enum_rand_one_per_hwsku_hostname] | ||
|
|
@@ -139,7 +160,7 @@ def test_device_checker(duthosts, enum_rand_one_per_hwsku_hostname, device_mocke | |
| time.sleep(THERMAL_CHECK_INTERVAL) | ||
| value = redis_get_field_value(duthost, STATE_DB, HEALTH_TABLE_NAME, fan_name) | ||
| assert not value or fan_expect_value not in value, 'Mock fan valid speed, expect {}, but it still report invalid speed' | ||
|
|
||
| value = redis_get_field_value(duthost, STATE_DB, HEALTH_TABLE_NAME, 'ASIC') | ||
| assert not value or asic_expect_value not in value, 'Mock ASIC normal temperature, but it is still overheated' | ||
|
|
||
|
|
@@ -158,7 +179,7 @@ def test_device_checker(duthosts, enum_rand_one_per_hwsku_hostname, device_mocke | |
| value = redis_get_field_value(duthost, STATE_DB, HEALTH_TABLE_NAME, fan_name) | ||
| assert value and value == fan_expect_value, 'Mock fan absence, expect {}, but got {}'.format(fan_expect_value, | ||
| value) | ||
|
|
||
| value = redis_get_field_value(duthost, STATE_DB, HEALTH_TABLE_NAME, psu_name) | ||
| assert value and psu_expect_value == value, 'Mock PSU no power, expect {}, but got {}'.format(psu_expect_value, | ||
| value) | ||
|
|
@@ -173,7 +194,7 @@ def test_device_checker(duthosts, enum_rand_one_per_hwsku_hostname, device_mocke | |
| value = redis_get_field_value(duthost, STATE_DB, HEALTH_TABLE_NAME, fan_name) | ||
| assert not value or value != fan_expect_value, 'Mock fan presence, but it still report absence' | ||
|
|
||
|
|
||
| time.sleep(PSU_CHECK_INTERVAL) | ||
| value = redis_get_field_value(duthost, STATE_DB, HEALTH_TABLE_NAME, psu_name) | ||
| assert not value or psu_expect_value != value, 'Mock PSU power good, but it is still out of power' | ||
|
|
@@ -348,3 +369,18 @@ def __exit__(self, exc_type, exc_val, exc_tb): | |
| :return: | ||
| """ | ||
| self.dut.command('mv -f {} {}'.format(self.backup_config, self.origin_config)) | ||
|
|
||
|
|
||
| class ProcessExitContext: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| def __init__(self, dut, container_name, process_name): | ||
| self.dut = dut | ||
| self.container_name = container_name | ||
| self.process_name = process_name | ||
|
|
||
| def __enter__(self): | ||
| logging.info('Stopping {}:{}'.format(self.container_name, self.process_name)) | ||
| self.dut.command('docker exec -it {} bash -c "supervisorctl stop {}"'.format(self.container_name, self.process_name)) | ||
|
|
||
| def __exit__(self, exc_type, exc_val, exc_tb): | ||
| logging.info('Starting {}:{}'.format(self.container_name, self.process_name)) | ||
| self.dut.command('docker exec -it {} bash -c "supervisorctl start {}"'.format(self.container_name, self.process_name)) | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the expected max time for redis table to take effect? Is 60s too long?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
System-health poll critical process status every 60 seconds, so it has to wait 60 second here.