diff --git a/tests/common/fixtures/advanced_reboot.py b/tests/common/fixtures/advanced_reboot.py index e3d34ec1b94..f0bb7255dd0 100644 --- a/tests/common/fixtures/advanced_reboot.py +++ b/tests/common/fixtures/advanced_reboot.py @@ -355,10 +355,9 @@ def __fetchTestLogs(self, rebootOper=None): for log in logs: host.fetch(**log) - def runRebootTestcase(self, prebootList=None, inbootList=None, prebootFiles=None): + def imageInstall(self, prebootList=None, inbootList=None, prebootFiles=None): ''' - This method validates and prepare test bed for rebot test case. It runs the reboot test case using provided - test arguments + This method validates and prepares test bed for reboot test case. @param prebootList: list of operation to run before reboot process @param inbootList: list of operation to run during reboot prcoess @param prebootFiles: preboot files @@ -382,6 +381,17 @@ def runRebootTestcase(self, prebootList=None, inbootList=None, prebootFiles=None # Handle mellanox platform self.__handleMellanoxDut() + def runRebootTestcase(self, prebootList=None, inbootList=None, prebootFiles=None): + ''' + This method validates and prepares test bed for reboot test case. It runs the reboot test case using provided + test arguments + @param prebootList: list of operation to run before reboot process + @param inbootList: list of operation to run during reboot prcoess + @param prebootFiles: preboot files + ''' + + self.imageInstall(prebootList, inbootList, prebootFiles) + # Run advanced-reboot.ReloadTest for item in preboot/inboot list for rebootOper in self.rebootData['sadList']: try: diff --git a/tests/platform_tests/args/cont_warm_reboot_args.py b/tests/platform_tests/args/cont_warm_reboot_args.py index af836cc0761..da0525b14ea 100644 --- a/tests/platform_tests/args/cont_warm_reboot_args.py +++ b/tests/platform_tests/args/cont_warm_reboot_args.py @@ -25,3 +25,17 @@ def add_cont_warm_reboot_args(parser): default=False, help="Enable continuous IO", ) + + parser.addoption( + "--image_location", + action="store", + default=None, + help="Path to image(s) to be installed", + ) + + parser.addoption( + "--image_list", + action="store", + default="current", + help="Comma separated list of images to be installed during continuous reboot test", + ) diff --git a/tests/platform_tests/test_cont_warm_reboot.py b/tests/platform_tests/test_cont_warm_reboot.py index 102ad5cdfea..3f9c0c7ea33 100644 --- a/tests/platform_tests/test_cont_warm_reboot.py +++ b/tests/platform_tests/test_cont_warm_reboot.py @@ -9,10 +9,11 @@ """ import os import sys +import json import pytest import threading - from check_critical_services import check_critical_services +from tests.common.errors import RunAnsibleModuleFail from tests.common.helpers.assertions import pytest_assert from tests.common.utilities import wait from tests.common.utilities import wait_until @@ -33,132 +34,168 @@ MAX_WAIT_TIME_FOR_INTERFACES = 300 MAX_WAIT_TIME_FOR_REBOOT_CAUSE = 120 - -@pytest.fixture(autouse=True, scope="module") -def continuous_reboot_count(request): - return request.config.getoption("--continuous_reboot_count") - -@pytest.fixture(autouse=True, scope="module") -def continuous_reboot_delay(request): - return request.config.getoption("--continuous_reboot_delay") - -@pytest.fixture(autouse=True, scope="module") -def enable_continuous_io(request): - return request.config.getoption("--enable_continuous_io") - - -def reboot_and_check(localhost, dut, interfaces, reboot_type=REBOOT_TYPE_WARM, reboot_kwargs=None): - """ - Perform the specified type of reboot and check platform status. - @param localhost: The Localhost object. - @param dut: The AnsibleHost object of DUT. - @param interfaces: DUT's interfaces defined by minigraph - @param reboot_type: The reboot type, pre-defined const that has name convention of REBOOT_TYPE_XXX. - @param reboot_kwargs: The argument used by reboot_helper - """ - logging.info("Run %s reboot on DUT" % reboot_type) - - reboot(dut, localhost, reboot_type=reboot_type, reboot_helper=None, reboot_kwargs=reboot_kwargs) - - # Perform health-check - check_services(dut) - check_reboot_type(dut, reboot_type) - check_interfaces_and_transceivers(dut, interfaces) - check_neighbors(dut) - - -def check_services(dut): - """ - Perform a health check of services - @param dut: The AnsibleHost object of DUT. - """ - logging.info("Wait until all critical services are fully started") - check_critical_services(dut) - - -def check_reboot_type(dut, reboot_type=None): - """ - Perform a match of reboot-cause and reboot-trigger - @param dut: The AnsibleHost object of DUT. - """ - if reboot_type is not None: - logging.info("Check reboot cause") - pytest_assert(wait_until(MAX_WAIT_TIME_FOR_REBOOT_CAUSE, 20, check_reboot_cause, dut, reboot_type), \ - "got reboot-cause failed after rebooted by %s" % reboot_type) - - if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]: - logging.info("Further checking skipped for %s test which intends to verify reboot-cause only" % reboot_type) - return - - -def check_interfaces_and_transceivers(dut, interfaces): - """ - Perform a check of transceivers, LAGs and interfaces status - @param dut: The AnsibleHost object of DUT. - @param interfaces: DUT's interfaces defined by minigraph - """ - logging.info("Wait %d seconds for all the transceivers to be detected" % MAX_WAIT_TIME_FOR_INTERFACES) - pytest_assert(wait_until(MAX_WAIT_TIME_FOR_INTERFACES, 20, check_interface_information, dut, interfaces), \ - "Not all transceivers are detected or interfaces are up in %d seconds" % MAX_WAIT_TIME_FOR_INTERFACES) - - logging.info("Check transceiver status") - check_transceiver_basic(dut, interfaces) - - logging.info("Check LAGs and interfaces status") - checks.check_interfaces(dut) - - -def check_neighbors(dut): - """ - Perform a BGP neighborship check. - @param dut: The AnsibleHost object of DUT. - """ - logging.info("Check BGP neighbors status. Expected state - established") - bgp_facts = dut.bgp_facts()['ansible_facts'] - mg_facts = dut.minigraph_facts(host=dut.hostname)['ansible_facts'] - - for value in bgp_facts['bgp_neighbors'].values(): - # Verify bgp sessions are established - pytest_assert(value['state'] == 'established', "BGP session not established") - # Verify locat ASNs in bgp sessions - pytest_assert(value['local AS'] == mg_facts['minigraph_bgp_asn'], \ - "Local ASNs not found in BGP session") - - for v in mg_facts['minigraph_bgp']: - # Compare the bgp neighbors name with minigraph bgp neigbhors name - pytest_assert(v['name'] == bgp_facts['bgp_neighbors'][v['addr'].lower()]['description'], \ - "BGP neighbor's name does not match minigraph") - # Compare the bgp neighbors ASN with minigraph - pytest_assert(v['asn'] == bgp_facts['bgp_neighbors'][v['addr'].lower()]['remote AS'], \ - "BGP neighbor's ASN does not match minigraph") - - -def test_cont_warm_reboot(duthost, ptfhost, localhost, conn_graph_facts, continuous_reboot_count, \ - continuous_reboot_delay, enable_continuous_io, get_advanced_reboot): - """ - @summary: This test case is to perform continuous warm reboot in a row - """ - asic_type = duthost.facts["asic_type"] - if asic_type in ["mellanox"]: - issu_capability = duthost.command("show platform mlnx issu")["stdout"] - if "disabled" in issu_capability: - pytest.skip("ISSU is not supported on this DUT, skip this test case") - - # Start advancedReboot script on the ptf host to enable continuous I/O - advancedReboot = get_advanced_reboot(rebootType='warm-reboot', enableContinuousIO=enable_continuous_io) - thr = threading.Thread(target=advancedReboot.runRebootTestcase) - thr.setDaemon(True) - thr.start() - - # Start continuous warm reboot on the DUT - for count in range(continuous_reboot_count): - logging.info("==================== Continuous warm reboot iteration: {}/{} ====================".format \ - (count + 1, continuous_reboot_count)) - reboot_and_check(localhost, duthost, conn_graph_facts["device_conn"], reboot_type=REBOOT_TYPE_WARM) - wait(continuous_reboot_delay, msg="Wait {}s before next warm-reboot".format(continuous_reboot_delay)) - - # Find the pid of continuous I/O script inside ptf container and send a stop signal - pid_res = ptfhost.command("cat /tmp/advanced-reboot-pid.log") - ptfhost.command("kill -SIGUSR1 {}".format(pid_res['stdout'])) - thr.join() - logging.info("Continuous warm-reboot test completed") \ No newline at end of file +class ContinuousReboot: + def __init__(self, request, duthost, ptfhost, localhost, conn_graph_facts, get_advanced_reboot): + self.request = request + self.duthost = duthost + self.ptfhost = ptfhost + self.localhost = localhost + self.conn_graph_facts = conn_graph_facts + self.continuous_reboot_count = request.config.getoption("--continuous_reboot_count") + self.continuous_reboot_delay = request.config.getoption("--continuous_reboot_delay") + self.enable_continuous_io = request.config.getoption("--enable_continuous_io") + self.image_location = request.config.getoption("--image_location") + self.image_list = request.config.getoption("--image_list") + self.get_advanced_reboot = get_advanced_reboot + self.currentImage = self.duthost.shell('sonic_installer list | grep Current | cut -f2 -d " "')['stdout'] + + if self.image_location is None: + logging.error("Invalid image location specified: {}".format(str(self.image_location))) + + def reboot_and_check(self, interfaces, reboot_type=REBOOT_TYPE_WARM, reboot_kwargs=None): + """ + Perform the specified type of reboot and check platform status. + @param interfaces: DUT's interfaces defined by minigraph + @param reboot_type: The reboot type, pre-defined const that has name convention of REBOOT_TYPE_XXX. + @param reboot_kwargs: The argument used by reboot_helper + """ + logging.info("Run %s reboot on DUT" % reboot_type) + + reboot(self.duthost, self.localhost, reboot_type=reboot_type, reboot_helper=None, reboot_kwargs=reboot_kwargs) + + # Perform health-check + self.check_services() + self.check_reboot_type(reboot_type) + self.check_interfaces_and_transceivers(interfaces) + self.check_neighbors() + + + def check_services(self): + """ + Perform a health check of services + """ + logging.info("Wait until all critical services are fully started") + check_critical_services(self.duthost) + + + def check_reboot_type(self, reboot_type=None): + """ + Perform a match of reboot-cause and reboot-trigger + """ + if reboot_type is not None: + logging.info("Check reboot cause") + pytest_assert(wait_until(MAX_WAIT_TIME_FOR_REBOOT_CAUSE, 20, check_reboot_cause, self.duthost, reboot_type), \ + "got reboot-cause failed after rebooted by %s" % reboot_type) + + if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]: + logging.info("Further checking skipped for %s test which intends to verify reboot-cause only" % reboot_type) + return + + + def check_interfaces_and_transceivers(self, interfaces): + """ + Perform a check of transceivers, LAGs and interfaces status + @param dut: The AnsibleHost object of DUT. + @param interfaces: DUT's interfaces defined by minigraph + """ + logging.info("Wait %d seconds for all the transceivers to be detected" % MAX_WAIT_TIME_FOR_INTERFACES) + pytest_assert(wait_until(MAX_WAIT_TIME_FOR_INTERFACES, 20, check_interface_information, self.duthost, interfaces), \ + "Not all transceivers are detected or interfaces are up in %d seconds" % MAX_WAIT_TIME_FOR_INTERFACES) + + logging.info("Check transceiver status") + check_transceiver_basic(self.duthost, interfaces) + + logging.info("Check LAGs and interfaces status") + checks.check_interfaces(self.duthost) + + + def check_neighbors(self): + """ + Perform a BGP neighborship check. + """ + logging.info("Check BGP neighbors status. Expected state - established") + bgp_facts = self.duthost.bgp_facts()['ansible_facts'] + mg_facts = self.duthost.minigraph_facts(host=self.duthost.hostname)['ansible_facts'] + + for value in bgp_facts['bgp_neighbors'].values(): + # Verify bgp sessions are established + pytest_assert(value['state'] == 'established', "BGP session not established") + # Verify locat ASNs in bgp sessions + pytest_assert(value['local AS'] == mg_facts['minigraph_bgp_asn'], \ + "Local ASNs not found in BGP session") + + for v in mg_facts['minigraph_bgp']: + # Compare the bgp neighbors name with minigraph bgp neigbhors name + pytest_assert(v['name'] == bgp_facts['bgp_neighbors'][v['addr'].lower()]['description'], \ + "BGP neighbor's name does not match minigraph") + # Compare the bgp neighbors ASN with minigraph + pytest_assert(v['asn'] == bgp_facts['bgp_neighbors'][v['addr'].lower()]['remote AS'], \ + "BGP neighbor's ASN does not match minigraph") + + + def start_cont_warm_reboot(self): + """ + @summary: This test case is to perform continuous warm reboot in a row + """ + asic_type = self.duthost.facts["asic_type"] + if asic_type in ["mellanox"]: + issu_capability = self.duthost.command("show platform mlnx issu")["stdout"] + if "disabled" in issu_capability: + pytest.skip("ISSU is not supported on this DUT, skip this test case") + + # Start advancedReboot script on the ptf host to enable continuous I/O + advancedReboot = self.get_advanced_reboot(rebootType='warm-reboot', enableContinuousIO=self.enable_continuous_io) + thr = threading.Thread(target=advancedReboot.runRebootTestcase) + thr.setDaemon(True) + thr.start() + + file_template = { + 'install_list': self.image_list, # this list can be modified at runtime to enable testing different images + 'location': self.image_location, + 'CONTINUOUS_IO': True, + } + with open("image_install_list.json", "w") as image_file: + json.dump(file_template, image_file) + + # Start continuous warm reboot on the DUT + for count in range(self.continuous_reboot_count): + logging.info("==================== Continuous warm reboot iteration: {}/{} ====================".format \ + (count + 1, self.continuous_reboot_count)) + with open("image_install_list.json", "r") as f: + install_info = json.load(f) + image_install_list = install_info.get('install_list').split(",") + # Use modulus operator to cycle through the image_install_list per reboot iteration + image = image_install_list[count % len(image_install_list)].strip() + image_path = install_info.get('location').strip() + image + + if image == "current": + logging.info("Next image is set to current - skip image installation") + else: + advancedReboot.newSonicImage = image_path + advancedReboot.cleanupOldSonicImages = True + logging.info("Installing image {} on DUT".format(image_path)) + advancedReboot.imageInstall() + self.reboot_and_check(self.conn_graph_facts["device_conn"], reboot_type=REBOOT_TYPE_WARM) + wait(self.continuous_reboot_delay, msg="Wait {}s before next warm-reboot".format(self.continuous_reboot_delay)) + try: + # Find the pid of continuous I/O script inside ptf container and send a stop, clean signal + pid_res = self.ptfhost.command("cat /tmp/advanced-reboot-pid.log") + logging.info("Find PID result: {}".format(pid_res)) + self.ptfhost.command("kill -SIGUSR1 {}".format(pid_res['stdout'])) + res = self.ptfhost.command("rm /tmp/advanced-reboot-pid.log") + logging.info("File deletion on ptfhost: {}".format(res)) + except RunAnsibleModuleFail as err: + if 'stderr_lines' in err.results: + logging.info("Executing cmd: {} failed. Error: {}".format( \ + str(err.results.get('cmd')), str(err.results.get('stderr_lines')))) + # Make sure that the cont-IO thread is completed + thr.join(60) + if thr.is_alive(): + logging.error("Failed to join continuous I/O thread in 60s") + logging.info("Continuous warm-reboot test completed") + +def test_cont_warm_reboot(request, duthost, ptfhost, localhost, conn_graph_facts, get_advanced_reboot): + continuous_reboot = ContinuousReboot(request, duthost, ptfhost, localhost, conn_graph_facts, \ + get_advanced_reboot) + + continuous_reboot.start_cont_warm_reboot()