Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion tests/common/fixtures/advanced_reboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def __fetchTestLogs(self, rebootOper=None):
for log in logs:
host.fetch(**log)

def runRebootTestcase(self, prebootList=None, inbootList=None, prebootFiles=None):
def imageInstall(self, prebootList=None, inbootList=None, prebootFiles=None):
'''
This method validates and prepare test bed for rebot test case. It runs the reboot test case using provided
test arguments
Expand All @@ -382,6 +382,17 @@ def runRebootTestcase(self, prebootList=None, inbootList=None, prebootFiles=None
# Handle mellanox platform
self.__handleMellanoxDut()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is the imageInstall a wrapper around __HandleMellanoxDut? is there a code missing here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nevermind, I saw it being used below.


def runRebootTestcase(self, prebootList=None, inbootList=None, prebootFiles=None):
'''
This method validates and prepare test bed for rebot test case. It runs the reboot test case using provided
Copy link

@SavchukRomanLv SavchukRomanLv Jul 28, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please fix typo

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed, thanks.

test arguments
@param prebootList: list of operation to run before reboot process
@param inbootList: list of operation to run during reboot prcoess
@param prebootFiles: preboot files
'''

self.imageInstall(prebootList, inbootList, prebootFiles)

# Run advanced-reboot.ReloadTest for item in preboot/inboot list
for rebootOper in self.rebootData['sadList']:
try:
Expand Down
14 changes: 14 additions & 0 deletions tests/platform_tests/args/cont_warm_reboot_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,17 @@ def add_cont_warm_reboot_args(parser):
default=False,
help="Enable continuous IO",
)

parser.addoption(
"--image_location",
action="store",
default=None,
help="Enable continuous IO",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

help phrase does not match option?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed now, thanks.

)

parser.addoption(
"--image_list",
action="store",
default="current",
help="Comma separated list of images to be installed during continuous reboot test",
)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add newline

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed, thanks.

297 changes: 167 additions & 130 deletions tests/platform_tests/test_cont_warm_reboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
"""
import os
import sys
import json
import pytest
import threading

from check_critical_services import check_critical_services
from tests.common.errors import RunAnsibleModuleFail
from tests.common.helpers.assertions import pytest_assert
from tests.common.utilities import wait
from tests.common.utilities import wait_until
Expand All @@ -33,132 +34,168 @@
MAX_WAIT_TIME_FOR_INTERFACES = 300
MAX_WAIT_TIME_FOR_REBOOT_CAUSE = 120


@pytest.fixture(autouse=True, scope="module")
def continuous_reboot_count(request):
return request.config.getoption("--continuous_reboot_count")

@pytest.fixture(autouse=True, scope="module")
def continuous_reboot_delay(request):
return request.config.getoption("--continuous_reboot_delay")

@pytest.fixture(autouse=True, scope="module")
def enable_continuous_io(request):
return request.config.getoption("--enable_continuous_io")


def reboot_and_check(localhost, dut, interfaces, reboot_type=REBOOT_TYPE_WARM, reboot_kwargs=None):
"""
Perform the specified type of reboot and check platform status.
@param localhost: The Localhost object.
@param dut: The AnsibleHost object of DUT.
@param interfaces: DUT's interfaces defined by minigraph
@param reboot_type: The reboot type, pre-defined const that has name convention of REBOOT_TYPE_XXX.
@param reboot_kwargs: The argument used by reboot_helper
"""
logging.info("Run %s reboot on DUT" % reboot_type)

reboot(dut, localhost, reboot_type=reboot_type, reboot_helper=None, reboot_kwargs=reboot_kwargs)

# Perform health-check
check_services(dut)
check_reboot_type(dut, reboot_type)
check_interfaces_and_transceivers(dut, interfaces)
check_neighbors(dut)


def check_services(dut):
"""
Perform a health check of services
@param dut: The AnsibleHost object of DUT.
"""
logging.info("Wait until all critical services are fully started")
check_critical_services(dut)


def check_reboot_type(dut, reboot_type=None):
"""
Perform a match of reboot-cause and reboot-trigger
@param dut: The AnsibleHost object of DUT.
"""
if reboot_type is not None:
logging.info("Check reboot cause")
pytest_assert(wait_until(MAX_WAIT_TIME_FOR_REBOOT_CAUSE, 20, check_reboot_cause, dut, reboot_type), \
"got reboot-cause failed after rebooted by %s" % reboot_type)

if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]:
logging.info("Further checking skipped for %s test which intends to verify reboot-cause only" % reboot_type)
return


def check_interfaces_and_transceivers(dut, interfaces):
"""
Perform a check of transceivers, LAGs and interfaces status
@param dut: The AnsibleHost object of DUT.
@param interfaces: DUT's interfaces defined by minigraph
"""
logging.info("Wait %d seconds for all the transceivers to be detected" % MAX_WAIT_TIME_FOR_INTERFACES)
pytest_assert(wait_until(MAX_WAIT_TIME_FOR_INTERFACES, 20, check_interface_information, dut, interfaces), \
"Not all transceivers are detected or interfaces are up in %d seconds" % MAX_WAIT_TIME_FOR_INTERFACES)

logging.info("Check transceiver status")
check_transceiver_basic(dut, interfaces)

logging.info("Check LAGs and interfaces status")
checks.check_interfaces(dut)


def check_neighbors(dut):
"""
Perform a BGP neighborship check.
@param dut: The AnsibleHost object of DUT.
"""
logging.info("Check BGP neighbors status. Expected state - established")
bgp_facts = dut.bgp_facts()['ansible_facts']
mg_facts = dut.minigraph_facts(host=dut.hostname)['ansible_facts']

for value in bgp_facts['bgp_neighbors'].values():
# Verify bgp sessions are established
pytest_assert(value['state'] == 'established', "BGP session not established")
# Verify locat ASNs in bgp sessions
pytest_assert(value['local AS'] == mg_facts['minigraph_bgp_asn'], \
"Local ASNs not found in BGP session")

for v in mg_facts['minigraph_bgp']:
# Compare the bgp neighbors name with minigraph bgp neigbhors name
pytest_assert(v['name'] == bgp_facts['bgp_neighbors'][v['addr'].lower()]['description'], \
"BGP neighbor's name does not match minigraph")
# Compare the bgp neighbors ASN with minigraph
pytest_assert(v['asn'] == bgp_facts['bgp_neighbors'][v['addr'].lower()]['remote AS'], \
"BGP neighbor's ASN does not match minigraph")


def test_cont_warm_reboot(duthost, ptfhost, localhost, conn_graph_facts, continuous_reboot_count, \
continuous_reboot_delay, enable_continuous_io, get_advanced_reboot):
"""
@summary: This test case is to perform continuous warm reboot in a row
"""
asic_type = duthost.facts["asic_type"]
if asic_type in ["mellanox"]:
issu_capability = duthost.command("show platform mlnx issu")["stdout"]
if "disabled" in issu_capability:
pytest.skip("ISSU is not supported on this DUT, skip this test case")

# Start advancedReboot script on the ptf host to enable continuous I/O
advancedReboot = get_advanced_reboot(rebootType='warm-reboot', enableContinuousIO=enable_continuous_io)
thr = threading.Thread(target=advancedReboot.runRebootTestcase)
thr.setDaemon(True)
thr.start()

# Start continuous warm reboot on the DUT
for count in range(continuous_reboot_count):
logging.info("==================== Continuous warm reboot iteration: {}/{} ====================".format \
(count + 1, continuous_reboot_count))
reboot_and_check(localhost, duthost, conn_graph_facts["device_conn"], reboot_type=REBOOT_TYPE_WARM)
wait(continuous_reboot_delay, msg="Wait {}s before next warm-reboot".format(continuous_reboot_delay))

# Find the pid of continuous I/O script inside ptf container and send a stop signal
pid_res = ptfhost.command("cat /tmp/advanced-reboot-pid.log")
ptfhost.command("kill -SIGUSR1 {}".format(pid_res['stdout']))
thr.join()
logging.info("Continuous warm-reboot test completed")
class ContinuousReboot:
def __init__(self, request, duthost, ptfhost, localhost, conn_graph_facts, get_advanced_reboot):
self.request = request
self.duthost = duthost
self.ptfhost = ptfhost
self.localhost = localhost
self.conn_graph_facts = conn_graph_facts
self.continuous_reboot_count = request.config.getoption("--continuous_reboot_count")
self.continuous_reboot_delay = request.config.getoption("--continuous_reboot_delay")
self.enable_continuous_io = request.config.getoption("--enable_continuous_io")
self.image_location = request.config.getoption("--image_location")
self.image_list = request.config.getoption("--image_list")
self.get_advanced_reboot = get_advanced_reboot
self.currentImage = self.duthost.shell('sonic_installer list | grep Current | cut -f2 -d " "')['stdout']

if self.image_location is None:
logging.error("Invalid image location specified: {}".format(str(self.image_location)))

def reboot_and_check(self, interfaces, reboot_type=REBOOT_TYPE_WARM, reboot_kwargs=None):
"""
Perform the specified type of reboot and check platform status.
@param interfaces: DUT's interfaces defined by minigraph
@param reboot_type: The reboot type, pre-defined const that has name convention of REBOOT_TYPE_XXX.
@param reboot_kwargs: The argument used by reboot_helper
"""
logging.info("Run %s reboot on DUT" % reboot_type)

reboot(self.duthost, self.localhost, reboot_type=reboot_type, reboot_helper=None, reboot_kwargs=reboot_kwargs)

# Perform health-check
self.check_services()
self.check_reboot_type(reboot_type)
self.check_interfaces_and_transceivers(interfaces)
self.check_neighbors()


def check_services(self):
"""
Perform a health check of services
"""
logging.info("Wait until all critical services are fully started")
check_critical_services(self.duthost)


def check_reboot_type(self, reboot_type=None):
"""
Perform a match of reboot-cause and reboot-trigger
"""
if reboot_type is not None:
logging.info("Check reboot cause")
pytest_assert(wait_until(MAX_WAIT_TIME_FOR_REBOOT_CAUSE, 20, check_reboot_cause, self.duthost, reboot_type), \
"got reboot-cause failed after rebooted by %s" % reboot_type)

if reboot_ctrl_dict[reboot_type]["test_reboot_cause_only"]:
logging.info("Further checking skipped for %s test which intends to verify reboot-cause only" % reboot_type)
return


def check_interfaces_and_transceivers(self, interfaces):
"""
Perform a check of transceivers, LAGs and interfaces status
@param dut: The AnsibleHost object of DUT.
@param interfaces: DUT's interfaces defined by minigraph
"""
logging.info("Wait %d seconds for all the transceivers to be detected" % MAX_WAIT_TIME_FOR_INTERFACES)
pytest_assert(wait_until(MAX_WAIT_TIME_FOR_INTERFACES, 20, check_interface_information, self.duthost, interfaces), \
"Not all transceivers are detected or interfaces are up in %d seconds" % MAX_WAIT_TIME_FOR_INTERFACES)

logging.info("Check transceiver status")
check_transceiver_basic(self.duthost, interfaces)

logging.info("Check LAGs and interfaces status")
checks.check_interfaces(self.duthost)


def check_neighbors(self):
"""
Perform a BGP neighborship check.
"""
logging.info("Check BGP neighbors status. Expected state - established")
bgp_facts = self.duthost.bgp_facts()['ansible_facts']
mg_facts = self.duthost.minigraph_facts(host=self.duthost.hostname)['ansible_facts']

for value in bgp_facts['bgp_neighbors'].values():
# Verify bgp sessions are established
pytest_assert(value['state'] == 'established', "BGP session not established")
# Verify locat ASNs in bgp sessions
pytest_assert(value['local AS'] == mg_facts['minigraph_bgp_asn'], \
"Local ASNs not found in BGP session")

for v in mg_facts['minigraph_bgp']:
# Compare the bgp neighbors name with minigraph bgp neigbhors name
pytest_assert(v['name'] == bgp_facts['bgp_neighbors'][v['addr'].lower()]['description'], \
"BGP neighbor's name does not match minigraph")
# Compare the bgp neighbors ASN with minigraph
pytest_assert(v['asn'] == bgp_facts['bgp_neighbors'][v['addr'].lower()]['remote AS'], \
"BGP neighbor's ASN does not match minigraph")


def start_cont_warm_reboot(self):
"""
@summary: This test case is to perform continuous warm reboot in a row
"""
asic_type = self.duthost.facts["asic_type"]
if asic_type in ["mellanox"]:
issu_capability = self.duthost.command("show platform mlnx issu")["stdout"]
if "disabled" in issu_capability:
pytest.skip("ISSU is not supported on this DUT, skip this test case")

# Start advancedReboot script on the ptf host to enable continuous I/O
advancedReboot = self.get_advanced_reboot(rebootType='warm-reboot', enableContinuousIO=self.enable_continuous_io)
thr = threading.Thread(target=advancedReboot.runRebootTestcase)
thr.setDaemon(True)
thr.start()

file_template = {
'install_list': self.image_list, # this list can be modified at runtime to enable testing different images
'location': 'http://100.127.20.23/installer/sonic/broadcom/internal-201811/',

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this is hardcoded?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should not be hardcoded, the value of CLI argument --image_location is to be used here. Fixed now, thanks.

'CONTINUOUS_IO': True,
}
with open("image_install_list.json", "w") as image_file:
json.dump(file_template, image_file)

# Start continuous warm reboot on the DUT
for count in range(self.continuous_reboot_count):
logging.info("==================== Continuous warm reboot iteration: {}/{} ====================".format \
(count + 1, self.continuous_reboot_count))
with open("image_install_list.json", "r") as f:
install_info = json.load(f)
image_install_list = install_info.get('install_list').split(",")
# Use modulus operator to cycle through the image_install_list per reboot iteration
image = image_install_list[count % len(image_install_list)]
image_path = install_info.get('location') + image

if image == "current":
logging.info("Next image is set to current - skip image installation")
else:
advancedReboot.newSonicImage = image_path
advancedReboot.cleanupOldSonicImages = True
logging.info("Installing image {} on DUT".format(image_path))
advancedReboot.imageInstall()
self.reboot_and_check(self.conn_graph_facts["device_conn"], reboot_type=REBOOT_TYPE_WARM)
wait(self.continuous_reboot_delay, msg="Wait {}s before next warm-reboot".format(self.continuous_reboot_delay))
try:
# Find the pid of continuous I/O script inside ptf container and send a stop, clean signal
pid_res = self.ptfhost.command("cat /tmp/advanced-reboot-pid.log")
logging.info("Find PID result: {}".format(pid_res))
self.ptfhost.command("kill -SIGUSR1 {}".format(pid_res['stdout']))
res = self.ptfhost.command("rm /tmp/advanced-reboot-pid.log")
logging.info("File deletion on ptfhost: {}".format(res))
except RunAnsibleModuleFail as err:
if 'stderr_lines' in err.results:
logging.info("Executing cmd: {} failed. Error: {}".format( \
str(err.results.get('cmd')), str(err.results.get('stderr_lines'))))
# Make sure that the cont-IO thread is completed
thr.join(60)
if thr.is_alive():
logging.error("Failed to join continuous I/O thread in 60s")
logging.info("Continuous warm-reboot test completed")

def test_cont_warm_reboot(request, duthost, ptfhost, localhost, conn_graph_facts, get_advanced_reboot):
continuous_reboot = ContinuousReboot(request, duthost, ptfhost, localhost, conn_graph_facts, \
get_advanced_reboot)

continuous_reboot.start_cont_warm_reboot()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, add newline

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed, thanks.