diff --git a/tests/common/devices.py b/tests/common/devices.py index 9aac450cbb0..7ee8bf0e7ad 100644 --- a/tests/common/devices.py +++ b/tests/common/devices.py @@ -9,11 +9,13 @@ """ import json import logging -from multiprocessing import Process, Queue +import os +from multiprocessing.pool import ThreadPool from errors import RunAnsibleModuleFail from errors import UnsupportedAnsibleModule + class AnsibleHostBase(object): """ @summary: The base class for various objects. @@ -44,13 +46,11 @@ def _run(self, *module_args, **complex_args): module_async = complex_args.pop('module_async', False) if module_async: - q = Queue() - def run_module(queue, module_args, complex_args): - res = self.module(*module_args, **complex_args) - q.put(res[self.hostname]) - p = Process(target=run_module, args=(q, module_args, complex_args)) - p.start() - return p, q + def run_module(module_args, complex_args): + return self.module(*module_args, **complex_args)[self.hostname] + pool = ThreadPool() + result = pool.apply_async(run_module, (module_args, complex_args)) + return pool, result res = self.module(*module_args, **complex_args)[self.hostname] if res.is_failed and not module_ignore_errors: @@ -149,7 +149,8 @@ def is_service_fully_started(self, service): return True else: return False - except: + except Exception as e: + logging.error("Failed to get service status, exception: %s" % repr(e)) return False def critical_services_fully_started(self): @@ -163,7 +164,6 @@ def critical_services_fully_started(self): logging.debug("Status of critical services: %s" % str(result)) return all(result.values()) - def get_crm_resources(self): """ @summary: Run the "crm show resources all" command and parse its output @@ -185,11 +185,32 @@ def get_crm_resources(self): fields = line.split() if len(fields) == 5: result["acl_resources"].append({"stage": fields[0], "bind_point": fields[1], - "resource_name": fields[2], "used_count": int(fields[3]), "available_count": int(fields[4])}) + "resource_name": fields[2], "used_count": int(fields[3]), + "available_count": int(fields[4])}) if current_table == 3: # content of the third table, table resources fields = line.split() if len(fields) == 4: result["table_resources"].append({"table_id": fields[0], "resource_name": fields[1], - "used_count": int(fields[2]), "available_count": int(fields[3])}) + "used_count": int(fields[2]), "available_count": int(fields[3])}) return result + + def get_pmon_daemon_list(self): + """ + @summary: in 201811 use different way to get the pmon daemon list since + config file (/usr/share/sonic/device/{platform}/{hwsku}/pmon_daemon_control.json) is not available. + Check the availability of two plugins led_control.py and sfputil.py, they are for ledd and xcvrd. + If one of them does not exist, then the related daemon is not expected to be running on this platform. + """ + daemon_list = [] + + led_plugin_path = os.path.join('/usr/share/sonic/device', self.facts["platform"], 'plugins/led_control.py') + sfp_plugin_path = os.path.join('/usr/share/sonic/device', self.facts["platform"], 'plugins/sfputil.py') + + if os.path.isfile(led_plugin_path): + daemon_list.append('ledd') + if os.path.isfile(sfp_plugin_path): + daemon_list.append('xcvrd') + + logging.info("Pmon daemon list for this platform is %s" % str(daemon_list)) + return daemon_list diff --git a/tests/common/mellanox_data.py b/tests/common/mellanox_data.py index 7ef9aa424bd..65b25cabe9a 100644 --- a/tests/common/mellanox_data.py +++ b/tests/common/mellanox_data.py @@ -17,6 +17,15 @@ "psus": { "number": 2, "hot_swappable": True + }, + "cpu_pack": { + "number": 1 + }, + "cpu_cores": { + "number": 2 + }, + "ports": { + "number": 32 } }, "ACS-MSN2740": { @@ -32,6 +41,15 @@ "psus": { "number": 2, "hot_swappable": True + }, + "cpu_pack": { + "number": 0 + }, + "cpu_cores": { + "number": 4 + }, + "ports": { + "number": 32 } }, "ACS-MSN2410": { @@ -47,6 +65,15 @@ "psus": { "number": 2, "hot_swappable": True + }, + "cpu_pack": { + "number": 1 + }, + "cpu_cores": { + "number": 2 + }, + "ports": { + "number": 56 } }, "ACS-MSN2010": { @@ -62,6 +89,15 @@ "psus": { "number": 2, "hot_swappable": False + }, + "cpu_pack": { + "number": 0 + }, + "cpu_cores": { + "number": 4 + }, + "ports": { + "number": 22 } }, "ACS-MSN2100": { @@ -77,6 +113,15 @@ "psus": { "number": 2, "hot_swappable": False + }, + "cpu_pack": { + "number": 0 + }, + "cpu_cores": { + "number": 4 + }, + "ports": { + "number": 16 } }, "ACS-MSN3800": { @@ -92,6 +137,15 @@ "psus": { "number": 2, "hot_swappable": True + }, + "cpu_pack": { + "number": 1 + }, + "cpu_cores": { + "number": 4 + }, + "ports": { + "number": 64 } }, "ACS-MSN3700": { @@ -107,6 +161,15 @@ "psus": { "number": 2, "hot_swappable": True + }, + "cpu_pack": { + "number": 1 + }, + "cpu_cores": { + "number": 4 + }, + "ports": { + "number": 32 } }, "ACS-MSN3700C": { @@ -122,21 +185,15 @@ "psus": { "number": 2, "hot_swappable": True - } - }, - "ACS-MSN3510": { - "reboot": { - "cold_reboot": True, - "fast_reboot": True, - "warm_reboot": False }, - "fans": { - "number": 6, - "hot_swappable": True + "cpu_pack": { + "number": 1 }, - "psus": { - "number": 2, - "hot_swappable": True + "cpu_cores": { + "number": 2 + }, + "ports": { + "number": 32 } } } diff --git a/tests/conftest.py b/tests/conftest.py index 2701fdf06fd..7288a74b823 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,14 +7,10 @@ import ipaddr as ipaddress from ansible_host import AnsibleHost +from loganalyzer import LogAnalyzer pytest_plugins = ('ptf_fixtures', 'ansible_fixtures') -# Add the tests folder to sys.path, for importing the lib package -_current_file_dir = os.path.dirname(os.path.realpath(__file__)) -if _current_file_dir not in sys.path: - sys.path.append(current_file_dir) - class TestbedInfo(object): """ @@ -35,7 +31,6 @@ def __init__(self, testbed_file): name = '' for key in line: if ('uniq-name' in key or 'conf-name' in key) and '#' in line[key]: - ### skip comment line continue elif 'uniq-name' in key or 'conf-name' in key: name = line[key] @@ -52,6 +47,8 @@ def __init__(self, testbed_file): def pytest_addoption(parser): parser.addoption("--testbed", action="store", default=None, help="testbed name") parser.addoption("--testbed_file", action="store", default=None, help="testbed file name") + parser.addoption("--disable_loganalyzer", action="store_true", default=False, + help="disable loganalyzer analysis for 'loganalyzer' fixture") @pytest.fixture(scope="session") @@ -78,11 +75,12 @@ def testbed_devices(ansible_adhoc, testbed): @param testbed: Fixture for parsing testbed configuration file. @return: Return the created device objects in a dictionary """ - from common.devices import SonicHost, Localhost + from common.devices import SonicHost, Localhost, PTFHost + + devices = { + "localhost": Localhost(ansible_adhoc), + "dut": SonicHost(ansible_adhoc, testbed["dut"], gather_facts=True)} - devices = {} - devices["localhost"] = Localhost(ansible_adhoc) - devices["dut"] = SonicHost(ansible_adhoc, testbed["dut"], gather_facts=True) if "ptf" in testbed: devices["ptf"] = PTFHost(ansible_adhoc, testbed["ptf"]) @@ -121,3 +119,21 @@ def eos(): with open('eos/eos.yml') as stream: eos = yaml.safe_load(stream) return eos + + +@pytest.fixture(autouse=True) +def loganalyzer(duthost, request): + loganalyzer = LogAnalyzer(ansible_host=duthost, marker_prefix=request.node.name) + # Add start marker into DUT syslog + marker = loganalyzer.init() + yield loganalyzer + if not request.config.getoption("--disable_loganalyzer") and "disable_loganalyzer" not in request.keywords: + # Read existed common regular expressions located with legacy loganalyzer module + loganalyzer.load_common_config() + # Parse syslog and process result. Raise "LogAnalyzerError" exception if: total match or expected missing + # match is not equal to zero + loganalyzer.analyze(marker) + else: + # Add end marker into DUT syslog + loganalyzer._add_end_marker(marker) + diff --git a/tests/loganalyzer/README.md b/tests/loganalyzer/README.md new file mode 100644 index 00000000000..4b6bfe7d9f4 --- /dev/null +++ b/tests/loganalyzer/README.md @@ -0,0 +1,135 @@ +#### Loganalyzer API usage example + +Below is described possibility of loganalyzer fixture/module usage. + +##### Loganalyzer fixture +In the root conftest there is implemented "loganalyzer" pytest fixture, which starts automatically for all test cases. +Fixture main flow: +- loganalyzer will add start marker before test case start +- loganalyzer will add stop marker after test case finish +- if loganalyzer analysis is not disabled for current test case it will analyze DUT syslog and display results. +If loganalyzer find specified messages which corresponds to defined regular expressions, it will display found messages and pytest will generate 'error'. + +#### To skip loganalyzer analysis for: +- all test cases - use pytest command line option ```--disable_loganalyzer``` +- specific test case: mark test case with ```@pytest.mark.disable_loganalyzer``` decorator. Example is shown below. + + +#### Notes: +loganalyzer.init() - can be called several times without calling "loganalyzer.analyze(marker)" between calls. Each call return its unique marker, which is used for "analyze" phase - loganalyzer.analyze(marker). + + +### Loganalyzer usage example + +#### Example calling loganalyzer init/analyze methods automatically by using with statement +```python + # Read existed common regular expressions located with legacy loganalyzer module + loganalyzer.load_common_config() + # Analyze syslog for code executed inside with statement + with loganalyzer as analyzer: + logging.debug("============== Test steps ===================") + # Add test code here ... + time.sleep(1) + + # Separately analyze syslog for code executed inside each with statement + with loganalyzer as analyzer: + # Clear current regexp match list if there is a need to have clear configuration + loganalyzer.match_regex = [] + # Load regular expressions from the specified file + reg_exp = loganalyzer.parse_regexp_file(src=COMMON_MATCH) + # Extend currently configured match criteria (regular expressions) with data read from "COMMON_MATCH" file + loganalyzer.match_regex.extend(reg_exp) + # Add test code here ... + # Here will be executed syslog analysis on context manager __exit__ + time.sleep(1) + with loganalyzer as analyzer: + # Clear current regexp match list if there is a need to have clear configuration + loganalyzer.match_regex = [] + # Set match criteria (regular expression) to custom regexp - "test:.*Error" + loganalyzer.match_regex.extend(["test:.*Error"]) + # Add test code here ... + # Here will be executed syslog analysis on context manager __exit__ + time.sleep(1) + with loganalyzer as analyzer: + # Add test code here ... + # Here will be executed syslog analysis on context manager __exit__ + time.sleep(1) +``` + +#### Example calling loganalyzer init/analyze methods directly in test case +```python + # Example 1 + # Read existed common regular expressions located with legacy loganalyzer module + loganalyzer.load_common_config() + # Add start marker to the DUT syslog + marker = loganalyzer.init() + # PERFORM TEST CASE STEPS ... + # Verify that error messages were not found in DUT syslog. Exception will be raised if in DUT syslog will be found messages which fits regexp defined in COMMON_MATCH + loganalyzer.analyze(marker) + + # Example 2 + # Read existed common regular expressions located with legacy loganalyzer module + loganalyzer.load_common_config() + # Add start marker to the DUT syslog + marker = loganalyzer.init() + # PERFORM TEST CASE STEPS ... + # Get summary of analyzed DUT syslog + result = loganalyzer.analyze(marker, fail=False) + # Verify that specific amount of error messages found in syslog # Negative test case + assert result["total"]["match"] == 2, "Not found expected errors: {}".format(result) + + # Example 3 + # Download extracted syslog file from DUT to the local host + loganalyzer.save_extracted_log(dest="/tmp/log/syslog") + + # Example 4 + # Update previously configured marker + # Now start marker will have new prefix - test_bgp + loganalyzer.update_marker_prefix("test_bgp") + + def get_platform_info(dut): + """ + Example callback which gets DUT platform information and returns obtained string + """ + return dut.command("show platform summary") + + # Example 5 + # Execute specific function and analyze logs during function execution + run_cmd_result = loganalyzer.run_cmd(get_platform_info, ans_host) + # Process result of "get_platform_info" callback + assert all(item in run_cmd_result["stdout"] for item in ["Platform", "HwSKU", "ASIC"]) is True, "Unexpected output returned after command execution: {}".format(run_cmd_result) + + # Example 6 + # Clear current regexp match list + loganalyzer.match_regex = [] + # Load regular expressions from the specified file defined in COMMON_MATCH variable + reg_exp = loganalyzer.parse_regexp_file(src=COMMON_MATCH) + # Extend currently configured match criteria (regular expressions) with data read from "COMMON_MATCH" file + loganalyzer.match_regex.extend(reg_exp) + marker = loganalyzer.init() + # PERFORM TEST CASE STEPS ... + # Verify that error messages were not found in DUT syslog. Exception will be raised if in DUT syslog will be found messages which fits regexp defined in COMMON_MATCH + loganalyzer.analyze(marker) + + # Example 7 + loganalyzer.expect_regex = [] + # Add specific EXPECTED regular expression + # Means that in the DUT syslog loganalyzer will search for message which matches with "kernel:.*Oops" regular expression + # If such message will not be present in DUT syslog, it will raise exception + loganalyzer.expect_regex.append("kernel:.*Oops") + # Add start marker to the DUT syslog + marker = loganalyzer.init() + # PERFORM TEST CASE STEPS ... + # Verify that expected error messages WERE FOUND in DUT syslog. Exception will be raised if in DUT syslog will NOT be found messages which fits to "kernel:.*Oops" regular expression + loganalyzer.analyze(marker) + + # Example 8 + loganalyzer.expect_regex = [] + # Add specific EXPECTED regular expression + # Means that in the DUT syslog loganalyzer will search for message which matches with "kernel:.*Oops" regular expression + # If such message will not be present in DUT syslog, it will raise exception + loganalyzer.expect_regex.append("kernel:.*Oops") + # PERFORM TEST CASE STEPS ... + # Verify that expected error messages WERE FOUND in DUT syslog. Exception will be raised if in DUT syslog will NOT be found messages which fits to "kernel:.*Oops" regular expression + loganalyzer.run_cmd(ans_host.command, "echo '---------- kernel: says Oops --------------' >> /var/log/syslog") +``` diff --git a/tests/loganalyzer/__init__.py b/tests/loganalyzer/__init__.py new file mode 100644 index 00000000000..e32a3567489 --- /dev/null +++ b/tests/loganalyzer/__init__.py @@ -0,0 +1 @@ +from .loganalyzer import LogAnalyzer, COMMON_MATCH, COMMON_IGNORE, COMMON_EXPECT, LogAnalyzerError diff --git a/tests/loganalyzer/loganalyzer.py b/tests/loganalyzer/loganalyzer.py new file mode 100644 index 00000000000..85d31fbde75 --- /dev/null +++ b/tests/loganalyzer/loganalyzer.py @@ -0,0 +1,232 @@ +import sys +import logging +import os +import re +import time +import pprint +import system_msg_handler + +from system_msg_handler import AnsibleLogAnalyzer as ansible_loganalyzer +from os.path import join, split +from os.path import normpath + +ANSIBLE_LOGANALYZER_MODULE = system_msg_handler.__file__.replace(r".pyc", ".py") +COMMON_MATCH = join(split(__file__)[0], "loganalyzer_common_match.txt") +COMMON_IGNORE = join(split(__file__)[0], "loganalyzer_common_ignore.txt") +COMMON_EXPECT = join(split(__file__)[0], "loganalyzer_common_expect.txt") +SYSLOG_TMP_FOLDER = "/tmp/pytest-run/syslog" + + +class LogAnalyzerError(Exception): + """Raised when loganalyzer found matches during analysis phase.""" + def __repr__(self): + return pprint.pformat(self.message) + + +class LogAnalyzer: + def __init__(self, ansible_host, marker_prefix, dut_run_dir="/tmp"): + self.ansible_host = ansible_host + self.dut_run_dir = dut_run_dir + self.extracted_syslog = os.path.join(self.dut_run_dir, "syslog") + self.marker_prefix = marker_prefix + self.ansible_loganalyzer = ansible_loganalyzer(self.marker_prefix, False) + + self.match_regex = [] + self.expect_regex = [] + self.ignore_regex = [] + self._markers = [] + + def _add_end_marker(self, marker): + """ + @summary: Add stop marker into syslog on the DUT. + + @return: True for successfull execution False otherwise + """ + self.ansible_host.copy(src=ANSIBLE_LOGANALYZER_MODULE, dest=os.path.join(self.dut_run_dir, "loganalyzer.py")) + + cmd = "python {run_dir}/loganalyzer.py --action add_end_marker --run_id {marker}".format(run_dir=self.dut_run_dir, marker=marker) + + logging.debug("Adding end marker '{}'".format(marker)) + self.ansible_host.command(cmd) + + def __enter__(self): + """ + Store start markers which are used in analyze phase. + """ + self._markers.append(self.init()) + + def __exit__(self, *args): + """ + Analyze syslog messages. + """ + self.analyze(self._markers.pop()) + + def _verify_log(self, result): + """ + Verify that total match and expected missing match equals to zero or raise exception otherwise. + Verify that expected_match is not equal to zero when there is configured expected regexp in self.expect_regex list + """ + if not result: + raise LogAnalyzerError("Log analyzer failed - no result.") + if result["total"]["match"] != 0 or result["total"]["expected_missing_match"] != 0: + raise LogAnalyzerError(result) + + # Check for negative case + if self.expect_regex and result["total"]["expected_match"] == 0: + raise LogAnalyzerError(result) + + def update_marker_prefix(self, marker_prefix): + """ + @summary: Update configured marker prefix + """ + self.marker_prefix = marker_prefix + + def load_common_config(self): + """ + @summary: Load regular expressions from common files, which are localted in folder with legacy loganalyzer. + Loaded regular expressions are used by "analyze" method to match expected text in the downloaded log file. + """ + self.match_regex = self.ansible_loganalyzer.create_msg_regex([COMMON_MATCH])[1] + self.ignore_regex = self.ansible_loganalyzer.create_msg_regex([COMMON_IGNORE])[1] + self.expect_regex = self.ansible_loganalyzer.create_msg_regex([COMMON_EXPECT])[1] + + def parse_regexp_file(self, src): + """ + @summary: Get regular expressions defined in src file. + """ + return self.ansible_loganalyzer.create_msg_regex([src])[1] + + def run_cmd(self, callback, *args, **kwargs): + """ + @summary: Initialize loganalyzer, execute function and analyze syslog. + + @param callback: Python callable or function to be executed. + @param args: Input arguments for callback function. + @param kwargs: Input key value arguments for callback function. + + @return: Callback execution result + """ + marker = self.init() + try: + call_result = callback(*args, **kwargs) + except Exception as err: + logging.error("Error during callback execution:\n{}".format(err)) + logging.debug("Log analysis result\n".format(self.analyze(marker))) + raise err + self.analyze(marker) + + return call_result + + def init(self): + """ + @summary: Add start marker into syslog on the DUT. + + @return: True for successfull execution False otherwise + """ + logging.debug("Loganalyzer init") + + self.ansible_host.copy(src=ANSIBLE_LOGANALYZER_MODULE, dest=os.path.join(self.dut_run_dir, "loganalyzer.py")) + + start_marker = ".".join((self.marker_prefix, time.strftime("%Y-%m-%d-%H:%M:%S", time.gmtime()))) + cmd = "python {run_dir}/loganalyzer.py --action init --run_id {start_marker}".format(run_dir=self.dut_run_dir, start_marker=start_marker) + + logging.debug("Adding start marker '{}'".format(start_marker)) + self.ansible_host.command(cmd) + return start_marker + + def analyze(self, marker, fail=True): + """ + @summary: Extract syslog logs based on the start/stop markers and compose one file. Download composed file, analyze file based on defined regular expressions. + + @param marker: Marker obtained from "init" method. + @param fail: Flag to enable/disable raising exception when loganalyzer find error messages. + + @return: If "fail" is False - return dictionary of parsed syslog summary, if dictionary can't be parsed - return empty dictionary. If "fail" is True and if found match messages - raise exception. + """ + logging.debug("Loganalyzer analyze") + analyzer_summary = {"total": {"match": 0, "expected_match": 0, "expected_missing_match": 0}, + "match_files": {}, + "match_messages": {}, + "expect_messages": {}, + "unused_expected_regexp": [] + } + tmp_folder = ".".join((SYSLOG_TMP_FOLDER, time.strftime("%Y-%m-%d-%H:%M:%S", time.gmtime()))) + self.ansible_loganalyzer.run_id = marker + + # Add end marker into DUT syslog + self._add_end_marker(marker) + + try: + # Disable logrotate cron task + self.ansible_host.command("sed -i 's/^/#/g' /etc/cron.d/logrotate") + + logging.debug("Waiting for logrotate from previous cron task run to finish") + # Wait for logrotate from previous cron task run to finish + end = time.time() + 60 + while time.time() < end: + # Verify for exception because self.ansible_host automatically handle command return codes and raise exception for none zero code + try: + self.ansible_host.command("pgrep -f logrotate") + except Exception: + break + else: + time.sleep(5) + continue + else: + logging.error("Logrotate from previous task was not finished during 60 seconds") + + # On DUT extract syslog files from /var/log/ and create one file by location - /tmp/syslog + self.ansible_host.extract_log(directory='/var/log', file_prefix='syslog', start_string='start-LogAnalyzer-{}'.format(marker), target_filename=self.extracted_syslog) + finally: + # Enable logrotate cron task back + self.ansible_host.command("sed -i 's/^#//g' /etc/cron.d/logrotate") + + # Download extracted logs from the DUT to the temporal folder defined in SYSLOG_TMP_FOLDER + self.save_extracted_log(dest=tmp_folder) + + match_messages_regex = re.compile('|'.join(self.match_regex)) if len(self.match_regex) else None + ignore_messages_regex = re.compile('|'.join(self.ignore_regex)) if len(self.ignore_regex) else None + expect_messages_regex = re.compile('|'.join(self.expect_regex)) if len(self.expect_regex) else None + + analyzer_parse_result = self.ansible_loganalyzer.analyze_file_list([tmp_folder], match_messages_regex, ignore_messages_regex, expect_messages_regex) + # Print syslog file content and remove the file + with open(tmp_folder) as fo: + logging.debug("Syslog content:\n\n{}".format(fo.read())) + os.remove(tmp_folder) + + total_match_cnt = 0 + total_expect_cnt = 0 + expected_lines_total = [] + unused_regex_messages = [] + + for key, value in analyzer_parse_result.iteritems(): + matching_lines, expecting_lines = value + analyzer_summary["total"]["match"] += len(matching_lines) + analyzer_summary["total"]["expected_match"] += len(expecting_lines) + analyzer_summary["match_files"][key] = {"match": len(matching_lines), "expected_match": len(expecting_lines)} + analyzer_summary["match_messages"][key] = matching_lines + analyzer_summary["expect_messages"][key] = expecting_lines + expected_lines_total.extend(expecting_lines) + + # Find unused regex matches + for regex in self.expect_regex: + for line in expected_lines_total: + if re.search(regex, line): + break + else: + unused_regex_messages.append(regex) + analyzer_summary["total"]["expected_missing_match"] = len(unused_regex_messages) + analyzer_summary["unused_expected_regexp"] = unused_regex_messages + + if fail: + self._verify_log(analyzer_summary) + else: + return analyzer_summary + + def save_extracted_log(self, dest): + """ + @summary: Download extracted syslog log file to the ansible host. + + @param dest: File path to store downloaded log file. + """ + self.ansible_host.fetch(dest=dest, src=self.extracted_syslog, flat="yes") diff --git a/tests/loganalyzer/loganalyzer_common_expect.txt b/tests/loganalyzer/loganalyzer_common_expect.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/loganalyzer/loganalyzer_common_ignore.txt b/tests/loganalyzer/loganalyzer_common_ignore.txt new file mode 100644 index 00000000000..9f732b760a7 --- /dev/null +++ b/tests/loganalyzer/loganalyzer_common_ignore.txt @@ -0,0 +1,2 @@ +r, ".* ERR ntpd.*routing socket reports: No buffer space available.*" +r, ".* ERR snmp.*ERROR: MIBUpdater.*" diff --git a/tests/loganalyzer/loganalyzer_common_match.txt b/tests/loganalyzer/loganalyzer_common_match.txt new file mode 100644 index 00000000000..23c2870e4a9 --- /dev/null +++ b/tests/loganalyzer/loganalyzer_common_match.txt @@ -0,0 +1,6 @@ +r, "\.ERR", "\.WARN", "crash" +r, "kernel:.*Oops", "kernel:.*hung", "kernel.*oom\s" +r, "kernel:.*scheduling", "kernel:.*atomic", "kernel:.*panic" +r, "kernel:.*\serr", "kernel:.*allocation", "kernel:.*kill" +r, "kernel:.*kmemleak.*", "kernel:.* Err:" +s, "ERR" diff --git a/tests/loganalyzer/system_msg_handler.py b/tests/loganalyzer/system_msg_handler.py new file mode 100644 index 00000000000..0d963610681 --- /dev/null +++ b/tests/loganalyzer/system_msg_handler.py @@ -0,0 +1,665 @@ +''' +Owner: Hrachya Mughnetsyan + +Created on: 11/11/2016 + +Description: This file contains the log analyzer functionality in order + to verify no failures are detected in the system logs while + it can be that traffic/functionality works. + + Design is available in https://github.com/Azure/SONiC/wiki/LogAnalyzer + +Usage: Examples of how to use log analyzer + sudo python loganalyzer.py --out_dir /home/hrachya/projects/loganalyzer/log.analyzer.results --action analyze --run_id myTest114 --logs file3.log -m /home/hrachya/projects/loganalyzer/match.file.1.log,/home/hrachya/projects/loganalyzer/match.file.2.log -i ignore.file.1.log,ignore.file.2.log -v +''' + +#--------------------------------------------------------------------- +# Global imports +#--------------------------------------------------------------------- +import sys +import getopt +import re +import csv +import pprint +import logging +import logging.handlers +from __builtin__ import True + +#--------------------------------------------------------------------- +# Global variables +#--------------------------------------------------------------------- +tokenizer = ',' +comment_key = '#' +system_log_file = '/var/log/syslog' + +#-- List of ERROR codes to be returned by AnsibleLogAnalyzer +err_duplicate_start_marker = -1 +err_duplicate_end_marker = -2 +err_no_end_marker = -3 +err_no_start_marker = -4 +err_invalid_string_format = -5 +err_invalid_input = -6 + +class AnsibleLogAnalyzer: + ''' + @summary: Overview of functionality + + This class performs analysis of the log files, searching for concerning messages. + The definition of concerning messages is passed to analyze_file_list() method, + as a list of regular expressions. + Additionally there will be a list of regular expressions which we wish to ignore. + Any line in log file which will match to the set of matching regex expressions + AND will not match set of 'ignore' regex expressions, will be considered a + 'match' and will be reported. + + AnsibleLogAnalyzer will be called initially before any test has ran, and will be + instructed to place 'start' marker into all log files to be analyzed. + When tests have ran, AnsibleLogAnalyzer will be instructed to place end-marker + into the log files. After this, AnsibleLogAnalyzer will be invoked to perform the + analysis of logs. The analysis will be performed on specified log files. + For each log file only the content between start/end markers will be analyzed. + + For details see comments on analyze_file_list method. + ''' + + ''' + Prefixes used to build start and end markers. + The prefixes will be combined with a unique string, called run_id, passed by + the caller, to produce start/end markers for given analysis run. + ''' + + start_marker_prefix = "start-LogAnalyzer" + end_marker_prefix = "end-LogAnalyzer" + + def init_sys_logger(self): + logger = logging.getLogger('LogAnalyzer') + logger.setLevel(logging.DEBUG) + handler = logging.handlers.SysLogHandler(address = '/dev/log') + logger.addHandler(handler) + return logger + #--------------------------------------------------------------------- + + def __init__(self, run_id, verbose): + self.run_id = run_id + self.verbose = verbose + #--------------------------------------------------------------------- + + def print_diagnostic_message(self, message): + if (not self.verbose): + return + + print '[LogAnalyzer][diagnostic]:%s' % message + #--------------------------------------------------------------------- + + def create_start_marker(self): + return self.start_marker_prefix + "-" + self.run_id + + #--------------------------------------------------------------------- + + def is_filename_stdin(self, file_name): + return file_name == "-" + + #--------------------------------------------------------------------- + + def create_end_marker(self): + return self.end_marker_prefix + "-" + self.run_id + #--------------------------------------------------------------------- + + def place_marker_to_file(self, log_file, marker): + ''' + @summary: Place marker into each log file specified. + @param log_file : File path, to be applied with marker. + @param marker: Marker to be placed into log files. + ''' + if not len(log_file) or self.is_filename_stdin(log_file): + self.print_diagnostic_message('Log file {} not found. Skip adding marker.'.format(log_file)) + self.print_diagnostic_message('log file:{}, place marker {}'.format(log_file, marker)) + with open(log_file, 'a') as file: + file.write(marker) + file.write('\n') + file.flush() + + def place_marker_to_syslog(self, marker): + ''' + @summary: Place marker into '/dev/log'. + @param marker: Marker to be placed into syslog. + ''' + + syslogger = self.init_sys_logger() + syslogger.info(marker) + syslogger.info('\n') + + def place_marker(self, log_file_list, marker): + ''' + @summary: Place marker into '/dev/log' and each log file specified. + @param log_file_list : List of file paths, to be applied with marker. + @param marker: Marker to be placed into log files. + ''' + + for log_file in log_file_list: + self.place_marker_to_file(log_file, marker) + + self.place_marker_to_syslog(marker) + + return + #--------------------------------------------------------------------- + + def error_to_regx(self, error_string): + ''' + This method converts a (list of) strings to one regular expression. + + @summary: Meta characters are escaped by inserting a '\' beforehand + Digits are replaced with the arbitrary '\d+' code + A list is converted into an alteration statement (|) + + @param error_string: the string(s) to be converted into a regular expression + + @return: A SINGLE regular expression string + ''' + + #-- Check if error_string is a string or a list --# + if (isinstance(error_string, basestring)): + original_string = error_string + #-- Escapes out of all the meta characters --# + error_string = re.escape(error_string) + #-- Replaces a white space with the white space regular expression + error_string = re.sub(r"(\\\s+)+", "\\\\s+", error_string) + #-- Replaces a digit number with the digit regular expression + error_string = re.sub(r"\b\d+\b", "\\\\d+", error_string) + #-- Replaces a hex number with the hex regular expression + error_string = re.sub(r"0x[0-9a-fA-F]+", "0x[0-9a-fA-F]+", error_string) + self.print_diagnostic_message('Built error string: %s' % error_string) + + #-- If given a list, concatenate into one regx --# + else: + error_string = '|'.join(map(self.error_to_regx, error_string)) + + return error_string + #--------------------------------------------------------------------- + + def create_msg_regex(self, file_lsit): + ''' + @summary: This method reads input file containing list of regular expressions + to be matched against. + + @param file_list : List of file paths, contains search expressions. + + @return: A regex class instance, corresponding to loaded regex expressions. + Will be used for matching operations by callers. + ''' + messages_regex = [] + + if file_lsit is None or (0 == len(file_lsit)): + return None + + for filename in file_lsit: + self.print_diagnostic_message('processing match file:%s' % filename) + with open(filename, 'rb') as csvfile: + csvreader = csv.reader(csvfile, quotechar='"', delimiter=',', + skipinitialspace=True) + + for index, row in enumerate(csvreader): + row = [item for item in row if item != ""] + self.print_diagnostic_message('[diagnostic]:processing row:%d' % index) + self.print_diagnostic_message('row:%s'% row) + try: + #-- Ignore Empty Lines + if not row: + continue + #-- Ignore commented Lines + if row[0].startswith(comment_key): + self.print_diagnostic_message('[diagnostic]:skipping row[0]:%s' % row[0]) + continue + + #-- ('s' | 'r') = (Raw String | Regular Expression) + is_regex = row[0] + if ('s' == row[0]): + is_regex = False + elif ('r' == row[0]): + is_regex = True + else: + raise Exception('file:%s, malformed line:%d. ' + 'must be \'s\'(string) or \'r\'(regex)' + %(filename,index)) + + if (is_regex): + messages_regex.extend(row[1:]) + else: + messages_regex.append(self.error_to_regx(row[1:])) + + except Exception as e: + print 'ERROR: line %d is formatted incorrectly in file %s. Skipping line' % (index, filename) + print repr(e) + sys.exit(err_invalid_string_format) + + if (len(messages_regex)): + regex = re.compile('|'.join(messages_regex)) + else: + regex = None + return regex, messages_regex + #--------------------------------------------------------------------- + + def line_matches(self, str, match_messages_regex, ignore_messages_regex): + ''' + @summary: This method checks whether given string matches against the + set of regular expressions. + + @param str: string to match against 'match' and 'ignore' regex expressions. + A string which matched to the 'match' set will be reported. + A string which matches to 'match' set, but also matches to + 'ignore' set - will not be reported (will be ignored) + + @param match_messages_regex: + regex class instance containing messages to match against. + + @param ignore_messages_regex: + regex class instance containing messages to ignore match against. + + @return: True is str matches regex criteria, otherwise False. + ''' + + ret_code = False + + if ((match_messages_regex is not None) and (match_messages_regex.findall(str))): + if (ignore_messages_regex is None): + ret_code = True + + elif (not ignore_messages_regex.findall(str)): + self.print_diagnostic_message('matching line: %s' % str) + ret_code = True + + return ret_code + #--------------------------------------------------------------------- + + def line_is_expected(self, str, expect_messages_regex): + ''' + @summary: This method checks whether given string matches against the + set of "expected" regular expressions. + ''' + + ret_code = False + if (expect_messages_regex is not None) and (expect_messages_regex.findall(str)): + ret_code = True + + return ret_code + + def analyze_file(self, log_file_path, match_messages_regex, ignore_messages_regex, expect_messages_regex): + ''' + @summary: Analyze input file content for messages matching input regex + expressions. See line_matches() for details on matching criteria. + + @param log_file_path: Patch to the log file. + + @param match_messages_regex: + regex class instance containing messages to match against. + + @param ignore_messages_regex: + regex class instance containing messages to ignore match against. + + @param expect_messages_regex: + regex class instance containing messages that are expected to appear in logfile. + + @param end_marker_regex - end marker + + @return: List of strings match search criteria. + ''' + + + self.print_diagnostic_message('analyzing file: %s'% log_file_path) + + #-- indicates whether log analyzer currently is in the log range between start + #-- and end marker. see analyze_file method. + in_analysis_range = False + stdin_as_input = self.is_filename_stdin(log_file_path) + matching_lines = [] + expected_lines = [] + found_start_marker = False + found_end_marker = False + if stdin_as_input: + log_file = sys.stdin + else: + log_file = open(log_file_path, 'r') + + start_marker = self.create_start_marker() + end_marker = self.create_end_marker() + + for rev_line in reversed(log_file.readlines()): + if stdin_as_input: + in_analysis_range = True + else: + if rev_line.find(end_marker) != -1: + self.print_diagnostic_message('found end marker: %s' % end_marker) + if (found_end_marker): + print 'ERROR: duplicate end marker found' + sys.exit(err_duplicate_end_marker) + found_end_marker = True + in_analysis_range = True + continue + + if not stdin_as_input: + if rev_line.find(start_marker) != -1 and 'nsible' not in rev_line: + self.print_diagnostic_message('found start marker: %s' % start_marker) + if (found_start_marker): + print 'ERROR: duplicate start marker found' + sys.exit(err_duplicate_start_marker) + found_start_marker = True + + if(not in_analysis_range): + print 'ERROR: found start marker:%s without corresponding end marker' % rev_line + sys.exit(err_no_end_marker) + in_analysis_range = False + break + + if in_analysis_range : + if self.line_is_expected(rev_line, expect_messages_regex): + expected_lines.append(rev_line) + + elif self.line_matches(rev_line, match_messages_regex, ignore_messages_regex): + matching_lines.append(rev_line) + + # care about the markers only if input is not stdin + if not stdin_as_input: + if (not found_start_marker): + print 'ERROR: start marker was not found' + sys.exit(err_no_start_marker) + + if (not found_end_marker): + print 'ERROR: end marker was not found' + sys.exit(err_no_end_marker) + + return matching_lines, expected_lines + #--------------------------------------------------------------------- + + def analyze_file_list(self, log_file_list, match_messages_regex, ignore_messages_regex, expect_messages_regex): + ''' + @summary: Analyze input files messages matching input regex expressions. + See line_matches() for details on matching criteria. + + @param log_file_list: List of paths to the log files. + + @param match_messages_regex: + regex class instance containing messages to match against. + + @param ignore_messages_regex: + regex class instance containing messages to ignore match against. + + @param expect_messages_regex: + regex class instance containing messages that are expected to appear in logfile. + + @return: Returns map + ''' + res = {} + + for log_file in log_file_list: + if not len(log_file): + continue + match_strings, expect_strings = self.analyze_file(log_file, match_messages_regex, ignore_messages_regex, expect_messages_regex) + + match_strings.reverse() + expect_strings.reverse() + res[log_file] = [ match_strings, expect_strings ] + + return res + #--------------------------------------------------------------------- + +def usage(): + print 'loganalyzer input parameters:' + print '--help Print usage' + print '--verbose Print verbose output during the run' + print '--action init|analyze - action to perform.' + print ' init - initialize analysis by placing start-marker' + print ' to all log files specified in --logs parameter.' + print ' analyze - perform log analysis of files specified in --logs parameter.' + print ' add_end_marker - add end marker to all log files specified in --logs parameter.' + print '--out_dir path Directory path where to place output files, ' + print ' must be present when --action == analyze' + print '--logs path{,path} List of full paths to log files to be analyzed.' + print ' Implicitly system log file will be also processed' + print '--run_id string String passed to loganalyzer, uniquely identifying ' + print ' analysis session. Used to construct start/end markers. ' + print '--match_files_in path{,path} List of paths to files containing strings. A string from log file' + print ' By default syslog will be always analyzed and should be passed by match_files_in.' + print ' matching any string from match_files_in will be collected and ' + print ' reported. Must be present when action == analyze' + print '--ignore_files_in path{,path} List of paths to files containing string. ' + print ' A string from log file matching any string from these' + print ' files will be ignored during analysis. Must be present' + print ' when action == analyze.' + print '--expect_files_in path{,path} List of path to files containing string. ' + print ' All the strings from these files will be expected to present' + print ' in one of specified log files during the analysis. Must be present' + print ' when action == analyze.' + +#--------------------------------------------------------------------- + +def check_action(action, log_files_in, out_dir, match_files_in, ignore_files_in, expect_files_in): + ''' + @summary: This function validates command line parameter 'action' and + other related parameters. + + @return: True if input is correct + ''' + + ret_code = True + + if (action == 'init'): + ret_code = True + elif (action == 'add_end_marker'): + ret_code = True + elif (action == 'analyze'): + if out_dir is None or len(out_dir) == 0: + print 'ERROR: missing required out_dir for analyze action' + ret_code = False + + elif match_files_in is None or len(match_files_in) == 0: + print 'ERROR: missing required match_files_in for analyze action' + ret_code = False + + + else: + ret_code = False + print 'ERROR: invalid action:%s specified' % action + + return ret_code +#--------------------------------------------------------------------- + +def check_run_id(run_id): + ''' + @summary: Validate command line parameter 'run_id' + + @param run_id: Unique string identifying current run + + @return: True if input is correct + ''' + + ret_code = True + + if ((run_id is None) or (len(run_id) == 0)): + print 'ERROR: no run_id specified' + ret_code = False + + return ret_code +#--------------------------------------------------------------------- + +def write_result_file(run_id, out_dir, analysis_result_per_file, messages_regex_e, unused_regex_messages): + ''' + @summary: Write results of analysis into a file. + + @param run_id: Uinique string identifying current run + + @param out_dir: Full path to output directory where to place the result file. + + @param analysis_result_per_file: map file_name: [list of found matching strings] + + @return: void + ''' + + match_cnt = 0 + expected_cnt = 0 + expected_lines_total = [] + + with open(out_dir + "/result.loganalysis." + run_id + ".log", 'w') as out_file: + for key, val in analysis_result_per_file.iteritems(): + matching_lines, expected_lines = val + + out_file.write("\n-----------Matches found in file:'%s'-----------\n" % key) + for s in matching_lines: + out_file.write(s) + out_file.write('\nMatches:%d\n' % len(matching_lines)) + match_cnt += len(matching_lines) + + out_file.write("\n-------------------------------------------------\n\n") + + for i in expected_lines: + out_file.write(i) + expected_lines_total.append(i) + out_file.write('\nExpected and found matches:%d\n' % len(expected_lines)) + expected_cnt += len(expected_lines) + + out_file.write("\n-------------------------------------------------\n\n") + out_file.write('Total matches:%d\n' % match_cnt) + # Find unused regex matches + for regex in messages_regex_e: + for line in expected_lines_total: + if re.search(regex, line): + break + else: + unused_regex_messages.append(regex) + + out_file.write('Total expected and found matches:%d\n' % expected_cnt) + out_file.write('Total expected but not found matches: %d\n\n' % len(unused_regex_messages)) + for regex in unused_regex_messages: + out_file.write(regex + "\n") + + out_file.write("\n-------------------------------------------------\n\n") + out_file.flush() +#--------------------------------------------------------------------- + +def write_summary_file(run_id, out_dir, analysis_result_per_file, unused_regex_messages): + ''' + @summary: This function writes results summary into a file + + @param run_id: Unique string identifying current run + + @param out_dir: Output directory full path. + + @param analysis_result_per_file: map file_name:[list of matching strings] + + @return: void + ''' + + out_file = open(out_dir + "/summary.loganalysis." + run_id + ".log", 'w') + out_file.write("\nLOG ANALYSIS SUMMARY\n") + total_match_cnt = 0 + total_expect_cnt = 0 + for key, val in analysis_result_per_file.iteritems(): + matching_lines, expecting_lines = val + + file_match_cnt = len(matching_lines) + file_expect_cnt = len(expecting_lines) + out_file.write("FILE: %s MATCHES %d\n" % (key, file_match_cnt)) + out_file.write("FILE: %s EXPECTED MATCHES %d\n" % (key, file_expect_cnt)) + out_file.flush() + total_match_cnt += file_match_cnt + total_expect_cnt += file_expect_cnt + + out_file.write("-----------------------------------\n") + out_file.write("TOTAL MATCHES: %d\n" % total_match_cnt) + out_file.write("TOTAL EXPECTED MATCHES: %d\n" % total_expect_cnt) + out_file.write("TOTAL EXPECTED MISSING MATCHES: %d\n" % len(unused_regex_messages)) + out_file.write("-----------------------------------\n") + out_file.flush() + out_file.close() +#--------------------------------------------------------------------- + +def main(argv): + + action = None + run_id = None + log_files_in = "" + out_dir = None + match_files_in = None + ignore_files_in = None + expect_files_in = None + verbose = False + + try: + opts, args = getopt.getopt(argv, "a:r:l:o:m:i:e:vh", ["action=", "run_id=", "logs=", "out_dir=", "match_files_in=", "ignore_files_in=", "expect_files_in=", "verbose", "help"]) + + except getopt.GetoptError: + print "Invalid option specified" + usage() + sys.exit(err_invalid_input) + + for opt, arg in opts: + if (opt in ("-h", "--help")): + usage() + sys.exit(err_invalid_input) + + if (opt in ("-a", "--action")): + action = arg + + elif (opt in ("-r", "--run_id")): + run_id = arg + + elif (opt in ("-l", "--logs")): + log_files_in = arg + + elif (opt in ("-o", "--out_dir")): + out_dir = arg + + elif (opt in ("-m", "--match_files_in")): + match_files_in = arg + + elif (opt in ("-i", "--ignore_files_in")): + ignore_files_in = arg + + elif (opt in ("-e", "--expect_files_in")): + expect_files_in = arg + + elif (opt in ("-v", "--verbose")): + verbose = True + + if not (check_action(action, log_files_in, out_dir, match_files_in, ignore_files_in, expect_files_in) and check_run_id(run_id)): + usage() + sys.exit(err_invalid_input) + + analyzer = AnsibleLogAnalyzer(run_id, verbose) + + log_file_list = filter(None, log_files_in.split(tokenizer)) + + result = {} + if (action == "init"): + analyzer.place_marker(log_file_list, analyzer.create_start_marker()) + return 0 + elif (action == "analyze"): + match_file_list = match_files_in.split(tokenizer) + ignore_file_list = ignore_files_in.split(tokenizer) + expect_file_list = expect_files_in.split(tokenizer) + + analyzer.place_marker(log_file_list, analyzer.create_end_marker()) + + match_messages_regex, messages_regex_m = analyzer.create_msg_regex(match_file_list) + ignore_messages_regex, messages_regex_i = analyzer.create_msg_regex(ignore_file_list) + expect_messages_regex, messages_regex_e = analyzer.create_msg_regex(expect_file_list) + + # if no log file specified - add system log + if not log_file_list: + log_file_list.append(system_log_file) + + result = analyzer.analyze_file_list(log_file_list, match_messages_regex, + ignore_messages_regex, expect_messages_regex) + unused_regex_messages = [] + write_result_file(run_id, out_dir, result, messages_regex_e, unused_regex_messages) + write_summary_file(run_id, out_dir, result, unused_regex_messages) + elif (action == "add_end_marker"): + analyzer.place_marker(log_file_list, analyzer.create_end_marker()) + return 0 + + else: + print 'Unknown action:%s specified' % action + return len(result) +#--------------------------------------------------------------------- + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/tests/platform/check_all_interface_info.py b/tests/platform/check_all_interface_info.py new file mode 100644 index 00000000000..ba7b5b78e8c --- /dev/null +++ b/tests/platform/check_all_interface_info.py @@ -0,0 +1,19 @@ +""" +Helper script for checking all related information of interfaces + +This script contains re-usable functions for checking status of interfaces on SONiC. +""" +import logging +from check_transceiver_status import all_transceivers_detected +from check_interface_status import check_interface_status + + +def check_interface_information(dut, interfaces): + if not all_transceivers_detected(dut, interfaces): + logging.info("Not all transceivers are detected") + return False + if not check_interface_status(dut, interfaces): + logging.info("Not all interfaces are up") + return False + + return True diff --git a/tests/platform/check_daemon_status.py b/tests/platform/check_daemon_status.py new file mode 100644 index 00000000000..ea9fd6d6f64 --- /dev/null +++ b/tests/platform/check_daemon_status.py @@ -0,0 +1,32 @@ +""" +Helper script for checking status of platform daemon status + +This script contains re-usable functions for checking status of platform daemon status. +""" +import logging + + +def check_pmon_daemon_status(dut): + """ + @summary: check daemon running status inside pmon docker. + + This function use command "supervisorctl status" inside the container and check the status from the command output. + If the daemon status is "RUNNING" then return True, if daemon not exist or status is not "RUNNING", return false. + """ + daemon_list = dut.get_pmon_daemon_list() + daemon_status = {} + try: + for daemon in daemon_list: + output = dut.shell('docker exec pmon supervisorctl status | grep %s' % daemon, module_ignore_errors=True) + if bool(output["stdout_lines"]): + expected_line = output["stdout_lines"][0] + expected_line_list = expected_line.split() + daemon_status[daemon] = (daemon in expected_line_list and 'RUNNING' in expected_line_list) + logging.debug("Daemon %s status is %s" % (daemon, str(daemon_status[daemon]))) + else: + logging.debug("Daemon %s does not exist" % daemon) + return False + return all(daemon_status.values()) + except Exception as e: + logging.error("Failed to get platform daemon status, exception: %s" % repr(e)) + return False diff --git a/tests/platform/check_interface_status.py b/tests/platform/check_interface_status.py index a2aa4a4c578..0de7b1691e4 100644 --- a/tests/platform/check_interface_status.py +++ b/tests/platform/check_interface_status.py @@ -35,23 +35,39 @@ def check_interface_status(dut, interfaces): """ @summary: Check the admin and oper status of the specified interfaces on DUT. @param dut: The AnsibleHost object of DUT. For interacting with DUT. - @param hostname: @param interfaces: List of interfaces that need to be checked. """ logging.info("Check interface status using cmd 'intfutil'") - mg_ports = dut.minigraph_facts(host=dut.hostname)["ansible_facts"]["minigraph_ports"] + mg_ports = dut.minigraph_facts(host=dut.hostname)["ansible_facts"]["minigraph_ports"] output = dut.command("intfutil description") intf_status = parse_intf_status(output["stdout_lines"][2:]) + check_intf_presence_command = 'show interface transceiver presence {}' for intf in interfaces: expected_oper = "up" if intf in mg_ports else "down" expected_admin = "up" if intf in mg_ports else "down" - assert intf in intf_status, "Missing status for interface %s" % intf - assert intf_status[intf]["oper"] == expected_oper, \ - "Oper status of interface %s is %s, expected '%s'" % (intf, intf_status[intf]["oper"], expected_oper) - assert intf_status[intf]["admin"] == expected_oper, \ - "Admin status of interface %s is %s, expected '%s'" % (intf, intf_status[intf]["admin"], expected_admin) + if intf not in intf_status: + logging.info("Missing status for interface %s" % intf) + return False + if intf_status[intf]["oper"] != expected_oper: + logging.info("Oper status of interface %s is %s, expected '%s'" % (intf, intf_status[intf]["oper"], + expected_oper)) + return False + if intf_status[intf]["admin"] != expected_admin: + logging.info("Admin status of interface %s is %s, expected '%s'" % (intf, intf_status[intf]["admin"], + expected_admin)) + return False + + # Cross check the interface SFP presence status + check_presence_output = dut.command(check_intf_presence_command.format(intf)) + presence_list = check_presence_output["stdout_lines"][2].split() + assert intf in presence_list, "Wrong interface name in the output: %s" % str(presence_list) + assert 'Present' in presence_list, "Status is not expected, presence status: %s" % str(presence_list) logging.info("Check interface status using the interface_facts module") intf_facts = dut.interface_facts(up_ports=mg_ports)["ansible_facts"] down_ports = intf_facts["ansible_interface_link_down_ports"] - assert len(down_ports) == 0, "Some interfaces are down: %s" % str(down_ports) + if len(down_ports) != 0: + logging.info("Some interfaces are down: %s" % str(down_ports)) + return False + + return True diff --git a/tests/platform/conftest.py b/tests/platform/conftest.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/platform/mellanox/check_sysfs.py b/tests/platform/mellanox/check_sysfs.py index 364e977dfa5..72e9f365d43 100644 --- a/tests/platform/mellanox/check_sysfs.py +++ b/tests/platform/mellanox/check_sysfs.py @@ -22,14 +22,15 @@ def check_sysfs_broken_symbolinks(dut): assert len(broken_symbolinks) == 0, \ "Found some broken symbolinks: %s" % str(broken_symbolinks) + def check_sysfs_thermal(dut): logging.info("Check thermal") file_asic = dut.command("cat /bsp/thermal/asic") try: asic_temp = float(file_asic["stdout"]) / 1000 - assert asic_temp > 0 and asic_temp < 85, "Abnormal ASIC temperature: %s" % file_asic["stdout"] - except: - assert False, "Bad content in /bsp/thermal/asic: %s" % file_asic["stdout"] + assert 0 < asic_temp < 85, "Abnormal ASIC temperature: %s" % file_asic["stdout"] + except Exception as e: + assert False, "Bad content in /bsp/thermal/asic: %s, exception: %s" % (file_asic["stdout"], repr(e)) def check_sysfs_fan(dut): @@ -38,14 +39,20 @@ def check_sysfs_fan(dut): from common.mellanox_data import SWITCH_MODELS fan_count = SWITCH_MODELS[dut.facts["hwsku"]]["fans"]["number"] - if SWITCH_MODELS[dut.facts["hwsku"]]["fans"]["hot_swappable"]: - fan_status_list = ["/bsp/module/fan%d_status" % fan_id for fan_id in range(1, fan_count + 1)] - for fan_status in fan_status_list: - fan_status_content = dut.command("cat %s" % fan_status) - assert fan_status_content["stdout"] == "1", "Content of %s is not 1" % fan_status - - fan_min_list = ["/bsp/fan/fan%d_min" % fan_id for fan_id in range(1, fan_count + 1)] - for fan_min in fan_min_list: + fan_speed = 0 + fan_min_speed = 0 + fan_max_speed = 0 + fan_set_speed = 0 + for fan_id in range(1, fan_count + 1): + if SWITCH_MODELS[dut.facts["hwsku"]]["fans"]["hot_swappable"]: + fan_status = "/bsp/module/fan{}_status".format(fan_id) + try: + fan_status_content = dut.command("cat %s" % fan_status) + assert fan_status_content["stdout"] == "1", "Content of %s is not 1" % fan_status + except Exception as e: + assert False, "Get content from %s failed, exception: %s" % (fan_status, repr(e)) + + fan_min = "/bsp/fan/fan{}_min".format(fan_id) try: fan_min_content = dut.command("cat %s" % fan_min) fan_min_speed = int(fan_min_content["stdout"]) @@ -53,8 +60,7 @@ def check_sysfs_fan(dut): except Exception as e: assert False, "Get content from %s failed, exception: %s" % (fan_min, repr(e)) - fan_max_list = ["/bsp/fan/fan%d_max" % fan_id for fan_id in range(1, fan_count + 1)] - for fan_max in fan_max_list: + fan_max = "/bsp/fan/fan{}_max".format(fan_id) try: fan_max_content = dut.command("cat %s" % fan_max) fan_max_speed = int(fan_max_content["stdout"]) @@ -62,8 +68,7 @@ def check_sysfs_fan(dut): except Exception as e: assert False, "Get content from %s failed, exception: %s" % (fan_max, repr(e)) - fan_speed_get_list = ["/bsp/fan/fan%d_speed_get" % fan_id for fan_id in range(1, fan_count + 1)] - for fan_speed_get in fan_speed_get_list: + fan_speed_get = "/bsp/fan/fan{}_speed_get".format(fan_id) try: fan_speed_get_content = dut.command("cat %s" % fan_speed_get) fan_speed = int(fan_speed_get_content["stdout"]) @@ -71,10 +76,90 @@ def check_sysfs_fan(dut): except Exception as e: assert False, "Get content from %s failed, exception: %s" % (fan_speed_get, repr(e)) - fan_speed_set_list = ["/bsp/fan/fan%d_speed_set" % fan_id for fan_id in range(1, fan_count + 1)] - for fan_speed_set in fan_speed_set_list: - fan_speed_set_content = dut.command("cat %s" % fan_speed_set) - assert fan_speed_set_content["stdout"] == "153", "Fan speed should be set to 60%, 153/255" + assert fan_min_speed < fan_speed < fan_max_speed, \ + "Fan speed out of range: min speed: %d, speed: %d, max speed: %d" \ + % (fan_min_speed, fan_speed, fan_max_speed) + + fan_speed_set = "/bsp/fan/fan{}_speed_set".format(fan_id) + try: + fan_speed_set_content = dut.command("cat %s" % fan_speed_set) + assert fan_speed_set_content["stdout"] == "153", "Fan speed should be set to 60%, 153/255" + fan_set_speed = int(fan_speed_set_content["stdout"]) + except Exception as e: + assert False, "Get content from %s failed, exception: %s" % (fan_speed_set, repr(e)) + + max_tolerance_speed = ((float(fan_set_speed) / 256) * fan_max_speed) * (1 + 0.3) + min_tolerance_speed = ((float(fan_set_speed) / 256) * fan_max_speed) * (1 - 0.3) + assert min_tolerance_speed < fan_speed < max_tolerance_speed, "Speed out of tolerance speed range (%d, %d)" \ + % (min_tolerance_speed, max_tolerance_speed) + + +def check_sysfs_cpu(dut): + logging.info("Check cpu") + from common.mellanox_data import SWITCH_MODELS + cpu_pack_count = SWITCH_MODELS[dut.facts["hwsku"]]["cpu_pack"]["number"] + if cpu_pack_count != 0: + cpu_pack_temp_file = "/bsp/thermal/cpu_pack" + cpu_pack_temp_file_output = dut.command("cat %s" % cpu_pack_temp_file) + cpu_pack_temp = float(cpu_pack_temp_file_output["stdout"])/1000 + + cpu_pack_max_temp_file = "/bsp/thermal/cpu_pack_max" + cpu_pack_max_temp_file_output = dut.command("cat %s" % cpu_pack_max_temp_file) + cpu_pack_max_temp = float(cpu_pack_max_temp_file_output["stdout"])/1000 + + cpu_pack_crit_temp_file = "/bsp/thermal/cpu_pack_crit" + cpu_pack_crit_temp_file_output = dut.command("cat %s" % cpu_pack_crit_temp_file) + cpu_pack_crit_temp = float(cpu_pack_crit_temp_file_output["stdout"])/1000 + + assert cpu_pack_max_temp <= cpu_pack_crit_temp, "Bad CPU pack max temp or critical temp, %s, %s " \ + % (str(cpu_pack_max_temp), str(cpu_pack_crit_temp)) + assert cpu_pack_temp < cpu_pack_max_temp, "CPU pack overheated, temp: %s" % (str(cpu_pack_temp)) + + cpu_core_count = SWITCH_MODELS[dut.facts["hwsku"]]["cpu_cores"]["number"] + for core_id in range(0, cpu_core_count): + cpu_core_temp_file = "/bsp/thermal/cpu_core{}".format(core_id) + cpu_core_temp_file_output = dut.command("cat %s" % cpu_core_temp_file) + cpu_core_temp = float(cpu_core_temp_file_output["stdout"])/1000 + + cpu_core_max_temp_file = "/bsp/thermal/cpu_core{}_max".format(core_id) + cpu_core_max_temp_file_output = dut.command("cat %s" % cpu_core_max_temp_file) + cpu_core_max_temp = float(cpu_core_max_temp_file_output["stdout"])/1000 + + cpu_core_crit_temp_file = "/bsp/thermal/cpu_core{}_crit".format(core_id) + cpu_core_crit_temp_file_output = dut.command("cat %s" % cpu_core_crit_temp_file) + cpu_core_crit_temp = float(cpu_core_crit_temp_file_output["stdout"])/1000 + + assert cpu_core_max_temp <= cpu_core_crit_temp, "Bad CPU core%d max temp or critical temp, %s, %s " \ + % (core_id, str(cpu_core_max_temp), str(cpu_core_crit_temp)) + assert cpu_core_temp < cpu_core_max_temp, "CPU core%d overheated, temp: %s" % (core_id, str(cpu_core_temp)) + + +def check_psu_status_sysfs_consistency(dut, psu_id, psu_state): + """ + @summary: Check psu related sysfs under /bsp/module against psu_state + """ + psu_exist = "/bsp/module/psu%s_status" % psu_id + if psu_state == "NOT PRESENT": + psu_exist_content = dut.command("cat %s" % psu_exist) + logging.info("PSU state %s file %s read %s" % (psu_state, psu_exist, psu_exist_content["stdout"])) + assert psu_exist_content["stdout"] == "0", "CLI returns NOT PRESENT while %s contains %s" % \ + (psu_exist, psu_exist_content["stdout"]) + else: + from common.mellanox_data import SWITCH_MODELS + dut_hwsku = dut.facts["hwsku"] + hot_swappabe = SWITCH_MODELS[dut_hwsku]["psus"]["hot_swappable"] + if hot_swappabe: + psu_exist_content = dut.command("cat %s" % psu_exist) + logging.info("PSU state %s file %s read %s" % (psu_state, psu_exist, psu_exist_content["stdout"])) + assert psu_exist_content["stdout"] == "1", "CLI returns %s while %s contains %s" % \ + (psu_state, psu_exist, psu_exist_content["stdout"]) + + psu_pwr_state = "/bsp/module/psu%s_pwr_status" % psu_id + psu_pwr_state_content = dut.command("cat %s" % psu_pwr_state) + logging.info("PSU state %s file %s read %s" % (psu_state, psu_pwr_state, psu_pwr_state_content["stdout"])) + assert (psu_pwr_state_content["stdout"] == "1" and psu_state == "OK") \ + or (psu_pwr_state_content["stdout"] == "0" and psu_state == "NOT OK"),\ + "sysfs content %s mismatches with psu_state %s" % (psu_pwr_state_content["stdout"], psu_state) def check_sysfs_psu(dut): @@ -83,11 +168,8 @@ def check_sysfs_psu(dut): from common.mellanox_data import SWITCH_MODELS psu_count = SWITCH_MODELS[dut.facts["hwsku"]]["psus"]["number"] - if SWITCH_MODELS[dut.facts["hwsku"]]["psus"]["hot_swappable"]: - psu_status_list = ["/bsp/module/psu%d_status" % psu_id for psu_id in range(1, psu_count + 1)] - for psu_status in psu_status_list: - psu_status_content = dut.command("cat %s" % psu_status) - assert psu_status_content["stdout"] == "1", "Content of %s is not 1" % psu_status + for psu_id in range(1, psu_count + 1): + check_psu_status_sysfs_consistency(dut, psu_id, 'OK') def check_sysfs_qsfp(dut, interfaces): @@ -112,6 +194,8 @@ def check_sysfs(dut, interfaces): check_sysfs_fan(dut) + check_sysfs_cpu(dut) + check_sysfs_psu(dut) check_sysfs_qsfp(dut, interfaces) diff --git a/tests/platform/mellanox/conftest.py b/tests/platform/mellanox/conftest.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/platform/mellanox/test_check_sfp_presence.py b/tests/platform/mellanox/test_check_sfp_presence.py new file mode 100644 index 00000000000..5ed2583a1f4 --- /dev/null +++ b/tests/platform/mellanox/test_check_sfp_presence.py @@ -0,0 +1,35 @@ +""" +Cross check show sfp presence with qsfp_status +""" +import logging +import os +import json + +from platform_fixtures import conn_graph_facts + + +def test_check_sfp_presence(testbed_devices, conn_graph_facts): + """This test case is to check SFP presence status with CLI and sysfs. + """ + ans_host = testbed_devices["dut"] + ports_config = json.loads(ans_host.command("sudo sonic-cfggen -d --var-json PORT")["stdout"]) + check_qsfp_sysfs_command = 'cat /bsp/qsfp/qsfp{}_status' + check_intf_presence_command = 'show interface transceiver presence {}' + + logging.info("Use show interface status information") + for intf in conn_graph_facts["device_conn"]: + intf_lanes = ports_config[intf]["lanes"] + sfp_id = int(intf_lanes.split(",")[0])/4 + 1 + + check_presence_output = ans_host.command(check_intf_presence_command.format(intf)) + assert check_presence_output["rc"] == 0, "Failed to read interface %s transceiver presence" % intf + logging.info(str(check_presence_output["stdout_lines"][2])) + presence_list = check_presence_output["stdout_lines"][2].split() + logging.info(str(presence_list)) + assert intf in presence_list, "Wrong interface name in the output %s" % str(presence_list) + assert 'Present' in presence_list, "Status is not expected, output %s" % str(presence_list) + + check_sysfs_output = ans_host.command(check_qsfp_sysfs_command.format(str(sfp_id))) + logging.info('output of check sysfs %s' % (str(check_sysfs_output))) + assert check_sysfs_output["rc"] == 0, "Failed to read qsfp_status of sfp%s." % str(sfp_id) + assert check_sysfs_output["stdout"] == '1', "Content of qsfp_status of sfp%s is not correct" % str(sfp_id) diff --git a/tests/platform/mellanox/test_check_sysfs.py b/tests/platform/mellanox/test_check_sysfs.py index 973d653f6c6..fb6dd6b6bbe 100644 --- a/tests/platform/mellanox/test_check_sysfs.py +++ b/tests/platform/mellanox/test_check_sysfs.py @@ -5,6 +5,7 @@ https://github.com/Azure/SONiC/blob/master/doc/pmon/sonic_platform_test_plan.md """ import logging +import os try: from platform_fixtures import conn_graph_facts diff --git a/tests/platform/platform_fixtures.py b/tests/platform/platform_fixtures.py index 0b73940db3f..8be133d60db 100644 --- a/tests/platform/platform_fixtures.py +++ b/tests/platform/platform_fixtures.py @@ -1,4 +1,5 @@ import pytest +import os @pytest.fixture(scope="module") def conn_graph_facts(testbed_devices): diff --git a/tests/platform/test_platform_info.py b/tests/platform/test_platform_info.py index 67eb65676a5..93c30ffd674 100644 --- a/tests/platform/test_platform_info.py +++ b/tests/platform/test_platform_info.py @@ -7,6 +7,8 @@ import logging import re import time +import os +import sys import pytest @@ -37,6 +39,24 @@ def test_show_platform_summary(testbed_devices): "Unexpected output fields, actual=%s, expected=%s" % (str(actual_fields), str(expected_fields)) +def check_vendor_specific_psustatus(dut, psu_status_line): + """ + @summary: Vendor specific psu status check + """ + if dut.facts["asic_type"] in ["mellanox"]: + current_file_dir = os.path.dirname(os.path.realpath(__file__)) + sub_folder_dir = os.path.join(current_file_dir, "mellanox") + if sub_folder_dir not in sys.path: + sys.path.append(sub_folder_dir) + from check_sysfs import check_psu_status_sysfs_consistency + + psu_line_pattern = re.compile(r"PSU\s+(\d)+\s+(OK|NOT OK|NOT PRESENT)") + psu_match = psu_line_pattern.match(psu_status_line) + psu_id = psu_match.group(1) + psu_status = psu_match.group(2) + + check_psu_status_sysfs_consistency(dut, psu_id, psu_status) + def test_show_platform_psustatus(testbed_devices): """ @summary: Check output of 'show platform psustatus' @@ -45,9 +65,10 @@ def test_show_platform_psustatus(testbed_devices): logging.info("Check PSU status using '%s', hostname: %s" % (CMD_PLATFORM_PSUSTATUS, ans_host.hostname)) psu_status = ans_host.command(CMD_PLATFORM_PSUSTATUS) - psu_line_pattern = re.compile(r"PSU\s+\d+\s+(OK|NOT OK)") + psu_line_pattern = re.compile(r"PSU\s+\d+\s+(OK|NOT OK|NOT PRESENT)") for line in psu_status["stdout_lines"][2:]: assert psu_line_pattern.match(line), "Unexpected PSU status output" + check_vendor_specific_psustatus(ans_host, line) def test_turn_on_off_psu_and_check_psustatus(testbed_devices, psu_controller): @@ -108,6 +129,7 @@ def test_turn_on_off_psu_and_check_psustatus(testbed_devices, psu_controller): fields = line.split() if fields[2] != "OK": psu_under_test = fields[1] + check_vendor_specific_psustatus(ans_host, line) assert psu_under_test is not None, "No PSU is turned off" logging.info("Turn on PSU %s" % str(psu["psu_id"])) @@ -120,6 +142,7 @@ def test_turn_on_off_psu_and_check_psustatus(testbed_devices, psu_controller): fields = line.split() if fields[1] == psu_under_test: assert fields[2] == "OK", "Unexpected PSU status after turned it on" + check_vendor_specific_psustatus(ans_host, line) psu_test_results[psu_under_test] = True diff --git a/tests/platform/test_reboot.py b/tests/platform/test_reboot.py index 8278b96c42a..01ee03534ae 100644 --- a/tests/platform/test_reboot.py +++ b/tests/platform/test_reboot.py @@ -13,60 +13,126 @@ import time import sys +from datetime import datetime + import pytest from platform_fixtures import conn_graph_facts +from psu_controller import psu_controller from common.utilities import wait_until from check_critical_services import check_critical_services -from check_interface_status import check_interface_status from check_transceiver_status import check_transceiver_basic -from check_transceiver_status import all_transceivers_detected +from check_daemon_status import check_pmon_daemon_status +from check_all_interface_info import check_interface_information + +pytestmark = [pytest.mark.disable_loganalyzer] + +REBOOT_TYPE_WARM = "warm" +REBOOT_TYPE_COLD = "cold" +REBOOT_TYPE_FAST = "fast" +REBOOT_TYPE_POWEROFF = "power off" + +reboot_ctrl_dict = { + REBOOT_TYPE_POWEROFF: { + "timeout": 300, + "cause": "Power Loss" + }, + REBOOT_TYPE_COLD: { + "command": "reboot", + "timeout": 300, + "cause": "reboot" + }, + REBOOT_TYPE_FAST: { + "command": "fast-reboot", + "timeout": 180, + "cause": "fast-reboot" + }, + REBOOT_TYPE_WARM: { + "command": "warm-reboot", + "timeout": 180, + "cause": "warm-reboot" + } +} + + +def check_reboot_cause(dut, reboot_cause_expected): + """ + @summary: Check the reboot cause on DUT. + @param dut: The AnsibleHost object of DUT. + @param reboot_cause_expected: The expected reboot cause. + """ + logging.info("Check the reboot cause") + output = dut.shell("show reboot-cause") + reboot_cause_got = output["stdout"] + logging.debug("show reboot-cause returns {}".format(reboot_cause_got)) + m = re.search(reboot_cause_expected, reboot_cause_got) + assert m is not None, "got reboot-cause %s after rebooted by %s" % (reboot_cause_got, reboot_cause_expected) -def reboot_and_check(localhost, dut, interfaces, reboot_type="cold"): +def reboot_and_check(localhost, dut, interfaces, reboot_type=REBOOT_TYPE_COLD, reboot_helper=None, reboot_kwargs=None): """ Perform the specified type of reboot and check platform status. + @param localhost: The Localhost object. + @param dut: The AnsibleHost object of DUT. + @param interfaces: DUT's interfaces defined by minigraph + @param reboot_type: The reboot type, pre-defined const that has name convention of REBOOT_TYPE_XXX. + @param reboot_helper: The helper function used only by power off reboot + @param reboot_kwargs: The argument used by reboot_helper """ logging.info("Run %s reboot on DUT" % reboot_type) - if reboot_type == "cold": - reboot_cmd = "reboot" - reboot_timeout = 300 - elif reboot_type == "fast": - reboot_cmd = "fast-reboot" - reboot_timeout = 180 - elif reboot_type == "warm": - reboot_cmd = "warm-reboot" - reboot_timeout = 180 + + assert reboot_type in reboot_ctrl_dict.keys(), "Unknown reboot type %s" % reboot_type + + reboot_timeout = reboot_ctrl_dict[reboot_type]["timeout"] + reboot_cause = reboot_ctrl_dict[reboot_type]["cause"] + + dut_datetime = datetime.strptime(dut.command('date -u +"%Y-%m-%d %H:%M:%S"')["stdout"], "%Y-%m-%d %H:%M:%S") + + if reboot_type == REBOOT_TYPE_POWEROFF: + assert reboot_helper is not None, "A reboot function must be provided for power off reboot" + + reboot_helper(reboot_kwargs) + + localhost.wait_for(host=dut.hostname, port=22, state="stopped", delay=10, timeout=120) else: - assert False, "Reboot type %s is not supported" % reboot_type - process, queue = dut.command(reboot_cmd, module_async=True) - - logging.info("Wait for DUT to go down") - res = localhost.wait_for(host=dut.hostname, port=22, state="stopped", delay=10, timeout=120, - module_ignore_errors=True) - if "failed" in res: - if process.is_alive(): - logging.error("Command '%s' is not completed" % reboot_cmd) - process.terminate() - logging.error("reboot result %s" % str(queue.get())) - assert False, "DUT did not go down" + reboot_cmd = reboot_ctrl_dict[reboot_type]["command"] + reboot_task, reboot_res = dut.command(reboot_cmd, module_ignore_errors=True, module_async=True) + + logging.info("Wait for DUT to go down") + res = localhost.wait_for(host=dut.hostname, port=22, state="stopped", timeout=180, module_ignore_errors=True) + if "failed" in res: + try: + logging.error("Wait for switch down failed, try to kill any possible stuck reboot task") + pid = dut.command("pgrep -f '%s'" % reboot_cmd)["stdout"] + dut.command("kill -9 %s" % pid) + reboot_task.terminate() + logging.error("Result of command '%s': " + str(reboot_res.get(timeout=0))) + except Exception as e: + logging.error("Exception raised while cleanup reboot task and get result: " + repr(e)) logging.info("Wait for DUT to come back") localhost.wait_for(host=dut.hostname, port=22, state="started", delay=10, timeout=reboot_timeout) + logging.info("Check the uptime to verify whether reboot was performed") + dut_uptime = datetime.strptime(dut.command("uptime -s")["stdout"], "%Y-%m-%d %H:%M:%S") + assert float(dut_uptime.strftime("%s")) - float(dut_datetime.strftime("%s")) > 10, "Device did not reboot" + logging.info("Wait until all critical services are fully started") check_critical_services(dut) - logging.info("Wait some time for all the transceivers to be detected") - assert wait_until(300, 20, all_transceivers_detected, dut, interfaces), \ - "Not all transceivers are detected in 300 seconds" + logging.info("Check reboot cause") + check_reboot_cause(dut, reboot_cause) - logging.info("Check interface status") - check_interface_status(dut, interfaces) + logging.info("Wait some time for all the transceivers to be detected") + assert wait_until(300, 20, check_interface_information, dut, interfaces), \ + "Not all transceivers are detected or interfaces are up in 300 seconds" logging.info("Check transceiver status") check_transceiver_basic(dut, interfaces) + logging.info("Check pmon daemon status") + assert check_pmon_daemon_status(dut), "Not all pmon daemons running." + if dut.facts["asic_type"] in ["mellanox"]: current_file_dir = os.path.dirname(os.path.realpath(__file__)) @@ -86,7 +152,7 @@ def test_cold_reboot(testbed_devices, conn_graph_facts): ans_host = testbed_devices["dut"] localhost = testbed_devices["localhost"] - reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type="cold") + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type=REBOOT_TYPE_COLD) def test_fast_reboot(testbed_devices, conn_graph_facts): @@ -96,7 +162,7 @@ def test_fast_reboot(testbed_devices, conn_graph_facts): ans_host = testbed_devices["dut"] localhost = testbed_devices["localhost"] - reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type="fast") + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type=REBOOT_TYPE_FAST) def test_warm_reboot(testbed_devices, conn_graph_facts): @@ -112,4 +178,74 @@ def test_warm_reboot(testbed_devices, conn_graph_facts): if "disabled" in issu_capability: pytest.skip("ISSU is not supported on this DUT, skip this test case") - reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type="warm") + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], reboot_type=REBOOT_TYPE_WARM) + + +@pytest.fixture(params=[15, 5]) +def power_off_delay(request): + """ + @summary: used to parametrized test cases on power_off_delay + @param request: pytest request object + @return: power_off_delay + """ + return request.param + + +def _power_off_reboot_helper(kwargs): + """ + @summary: used to parametrized test cases on power_off_delay + @param kwargs: the delay time between turning off and on the PSU + """ + psu_ctrl = kwargs["psu_ctrl"] + all_psu = kwargs["all_psu"] + power_on_seq = kwargs["power_on_seq"] + delay_time = kwargs["delay_time"] + + for psu in all_psu: + logging.debug("turning off {}".format(psu)) + psu_ctrl.turn_off_psu(psu["psu_id"]) + time.sleep(delay_time) + logging.info("Power on {}".format(power_on_seq)) + for psu in power_on_seq: + logging.debug("turning on {}".format(psu)) + psu_ctrl.turn_on_psu(psu["psu_id"]) + + +def test_power_off_reboot(testbed_devices, conn_graph_facts, psu_controller, power_off_delay): + """ + @summary: This test case is to perform reboot via powercycle and check platform status + @param testbed_devices: Fixture initialize devices in testbed + @param conn_graph_facts: Fixture parse and return lab connection graph + @param psu_controller: The python object of psu controller + @param power_off_delay: Pytest fixture. The delay between turning off and on the PSU + """ + ans_host = testbed_devices["dut"] + localhost = testbed_devices["localhost"] + + psu_ctrl = psu_controller(ans_host.hostname, ans_host.facts["asic_type"]) + if psu_ctrl is None: + pytest.skip("No PSU controller for %s, skip rest of the testing in this case" % ans_host.hostname) + + all_psu = psu_ctrl.get_psu_status() + + # Purpose of this list is to control sequence of turning on PSUs in power off testing. + # If there are 2 PSUs, then 3 scenarios would be covered: + # 1. Turn off all PSUs, turn on PSU1, then check. + # 2. Turn off all PSUs, turn on PSU2, then check. + # 3. Turn off all PSUs, turn on one of the PSU, then turn on the other PSU, then check. + power_on_seq_list = [] + if all_psu: + power_on_seq_list = [[item] for item in all_psu] + power_on_seq_list.append(all_psu) + + logging.info("Got all power on sequences {}".format(power_on_seq_list)) + + poweroff_reboot_kwargs = {"dut": ans_host} + + for power_on_seq in power_on_seq_list: + poweroff_reboot_kwargs["psu_ctrl"] = psu_ctrl + poweroff_reboot_kwargs["all_psu"] = all_psu + poweroff_reboot_kwargs["power_on_seq"] = power_on_seq + poweroff_reboot_kwargs["delay_time"] = power_off_delay + reboot_and_check(localhost, ans_host, conn_graph_facts["device_conn"], REBOOT_TYPE_POWEROFF, + _power_off_reboot_helper, poweroff_reboot_kwargs) diff --git a/tests/platform/test_reload_config.py b/tests/platform/test_reload_config.py index 047945dfab4..db64ee2b558 100644 --- a/tests/platform/test_reload_config.py +++ b/tests/platform/test_reload_config.py @@ -9,13 +9,14 @@ import os import time import sys +import pytest from platform_fixtures import conn_graph_facts from common.utilities import wait_until from check_critical_services import check_critical_services -from check_interface_status import check_interface_status from check_transceiver_status import check_transceiver_basic -from check_transceiver_status import all_transceivers_detected +from check_all_interface_info import check_interface_information +pytestmark = [pytest.mark.disable_loganalyzer] def test_reload_configuration(testbed_devices, conn_graph_facts): @@ -33,13 +34,9 @@ def test_reload_configuration(testbed_devices, conn_graph_facts): check_critical_services(ans_host) logging.info("Wait some time for all the transceivers to be detected") - assert wait_until(300, 20, all_transceivers_detected, ans_host, interfaces), \ + assert wait_until(300, 20, check_interface_information, ans_host, interfaces), \ "Not all transceivers are detected in 300 seconds" - logging.info("Check interface status") - time.sleep(60) - check_interface_status(ans_host, interfaces) - logging.info("Check transceiver status") check_transceiver_basic(ans_host, interfaces) diff --git a/tests/platform/test_sequential_restart.py b/tests/platform/test_sequential_restart.py index 78f087689fb..84cecf17873 100644 --- a/tests/platform/test_sequential_restart.py +++ b/tests/platform/test_sequential_restart.py @@ -15,9 +15,9 @@ from platform_fixtures import conn_graph_facts from common.utilities import wait_until from check_critical_services import check_critical_services -from check_interface_status import check_interface_status from check_transceiver_status import check_transceiver_basic -from check_transceiver_status import all_transceivers_detected +from check_all_interface_info import check_interface_information +pytestmark = [pytest.mark.disable_loganalyzer] def restart_service_and_check(localhost, dut, service, interfaces): @@ -32,12 +32,8 @@ def restart_service_and_check(localhost, dut, service, interfaces): check_critical_services(dut) logging.info("Wait some time for all the transceivers to be detected") - assert wait_until(300, 20, all_transceivers_detected, dut, interfaces), \ - "Not all transceivers are detected in 300 seconds" - - logging.info("Check interface status") - time.sleep(60) - check_interface_status(dut, interfaces) + assert wait_until(300, 20, check_interface_information, dut, interfaces), \ + "Not all interface information are detected within 300 seconds" logging.info("Check transceiver status") check_transceiver_basic(dut, interfaces) diff --git a/tests/platform/test_sfp.py b/tests/platform/test_sfp.py index bba52ad5473..7de20af0042 100644 --- a/tests/platform/test_sfp.py +++ b/tests/platform/test_sfp.py @@ -97,7 +97,9 @@ def test_check_sfp_status_and_configure_sfp(testbed_devices, conn_graph_facts): for intf in conn_graph_facts["device_conn"]: reset_result = ans_host.command("%s %s" % (cmd_sfp_reset, intf)) assert reset_result["rc"] == 0, "'%s %s' failed" % (cmd_sfp_reset, intf) - time.sleep(120) # Wait some time for SFP to fully recover after reset + time.sleep(5) + logging.info("Wait some time for SFP to fully recover after reset") + time.sleep(60) logging.info("Check sfp presence again after reset") sfp_presence = ans_host.command(cmd_sfp_presence) @@ -106,6 +108,12 @@ def test_check_sfp_status_and_configure_sfp(testbed_devices, conn_graph_facts): assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_sfp_presence assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'" + logging.info("Check interface status") + mg_facts = ans_host.minigraph_facts(host=ans_host.hostname)["ansible_facts"] + intf_facts = ans_host.interface_facts(up_ports=mg_facts["minigraph_ports"])["ansible_facts"] + assert len(intf_facts["ansible_interface_link_down_ports"]) == 0, \ + "Some interfaces are down: %s" % str(intf_facts["ansible_interface_link_down_ports"]) + def test_check_sfp_low_power_mode(testbed_devices, conn_graph_facts): """ @@ -164,3 +172,9 @@ def test_check_sfp_low_power_mode(testbed_devices, conn_graph_facts): for intf in conn_graph_facts["device_conn"]: assert intf in parsed_presence, "Interface is not in output of '%s'" % cmd_sfp_presence assert parsed_presence[intf] == "Present", "Interface presence is not 'Present'" + + logging.info("Check interface status") + mg_facts = ans_host.minigraph_facts(host=ans_host.hostname)["ansible_facts"] + intf_facts = ans_host.interface_facts(up_ports=mg_facts["minigraph_ports"])["ansible_facts"] + assert len(intf_facts["ansible_interface_link_down_ports"]) == 0, \ + "Some interfaces are down: %s" % str(intf_facts["ansible_interface_link_down_ports"])