Skip to content

Commit 8423050

Browse files
Javier-Tangshemesh2
authored andcommitted
Test upgrade path t2 (sonic-net#17902)
Description of PR Summary: Fixes sonic-net#17078 Approach What is the motivation for this PR? Create testcase for cold upgrade for T2 How did you do it? Add test_upgrade_path_t2 testcase to test_upgrade_path.py based on previous upgrade path tests How did you verify/test it? Ran on multiple T2 devices, also ran on T1 warm upgrade to make sure it's not breaking Any platform specific information? N/A Supported testbed topology if it's a new test case? N/A Documentation Please run using variables: --upgrade_type=cold --base_image_list= --target_image_list= e.g. sudo ./run_tests.sh -i <INV FILE> -n <TB NAME> -m individual -l INFO -e '--showlocals --assert plain -rav --enable_cpa --upgrade_type=cold --skip_sanity --base_image_list=<FROM IMAGE URL> --target_image_list=<TO IMAGE URL>' -u -c upgrade_path/test_upgrade_path.py::test_upgrade_path_t2 Signed-off-by: Javier Tan [email protected] Signed-off-by: Guy Shemesh <[email protected]>
1 parent 31089e8 commit 8423050

5 files changed

Lines changed: 129 additions & 28 deletions

File tree

tests/common/helpers/upgrade_helpers.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import pytest
22
import logging
3-
import time
43
import ipaddress
54
import json
65
import re
@@ -99,18 +98,17 @@ def install_sonic(duthost, image_url, tbinfo):
9998
return res['ansible_facts']['downloaded_image_version']
10099

101100

102-
def check_services(duthost):
101+
def check_services(duthost, tbinfo):
103102
"""
104103
Perform a health check of services
105104
"""
106-
logging.info("Wait until DUT uptime reaches {}s".format(300))
107-
while duthost.get_uptime().total_seconds() < 300:
108-
time.sleep(1)
105+
dut_min_uptime = 900 if 't2' in tbinfo['topo']['name'] else 300
109106
logging.info("Wait until all critical services are fully started")
110-
logging.info("Check critical service status")
111-
pytest_assert(duthost.critical_services_fully_started(), "dut.critical_services_fully_started is False")
107+
pytest_assert(wait_until(dut_min_uptime, 30, 30, duthost.critical_services_fully_started),
108+
"Not all critical services are fully started")
112109

113-
for service in duthost.critical_services:
110+
critical_services = [re.sub(r'(\d+)$', r'@\1', service) for service in duthost.critical_services]
111+
for service in critical_services:
114112
status = duthost.get_service_props(service)
115113
pytest_assert(status["ActiveState"] == "active", "ActiveState of {} is {}, expected: active"
116114
.format(service, status["ActiveState"]))
@@ -126,14 +124,21 @@ def check_reboot_cause(duthost, expected_cause):
126124

127125
def check_copp_config(duthost):
128126
logging.info("Comparing CoPP configuration from copp_cfg.json to COPP_TABLE")
129-
copp_tables = json.loads(duthost.shell("sonic-db-dump -n APPL_DB -k COPP_TABLE* -y")["stdout"])
130-
copp_cfg = json.loads(duthost.shell("cat /etc/sonic/copp_cfg.json")["stdout"])
131-
feature_status = duthost.shell("show feature status")["stdout"]
132-
copp_tables_formatted = get_copp_table_formatted_dict(copp_tables)
133-
copp_cfg_formatted = get_copp_cfg_formatted_dict(copp_cfg, feature_status)
134-
pytest_assert(copp_tables_formatted == copp_cfg_formatted,
135-
"There is a difference between CoPP config and CoPP tables. CoPP config: {}\nCoPP tables:"
136-
" {}".format(copp_tables_formatted, copp_cfg_formatted))
127+
128+
if duthost.is_supervisor_node() and duthost.facts['switch_type'] == "fabric":
129+
logging.info("Skipping CoPP config check for fabric (VoQ) supervisor card as it "
130+
"doesn't program CoPP tables into APPL_DB")
131+
return
132+
133+
for asichost in duthost.asics:
134+
copp_tables = json.loads(asichost.command("sonic-db-dump -n APPL_DB -k COPP_TABLE* -y")["stdout"])
135+
copp_cfg = json.loads(duthost.shell("cat /etc/sonic/copp_cfg.json")["stdout"])
136+
feature_status = duthost.shell("show feature status")["stdout"]
137+
copp_tables_formatted = get_copp_table_formatted_dict(copp_tables)
138+
copp_cfg_formatted = get_copp_cfg_formatted_dict(copp_cfg, feature_status)
139+
pytest_assert(copp_tables_formatted == copp_cfg_formatted,
140+
"There is a difference between CoPP config and CoPP tables. CoPP config: {}\nCoPP tables: {}"
141+
.format(copp_tables_formatted, copp_cfg_formatted))
137142

138143

139144
def get_copp_table_formatted_dict(copp_tables):
@@ -200,7 +205,7 @@ def upgrade_test_helper(duthost, localhost, ptfhost, from_image, to_image,
200205

201206
for i in range(reboot_count):
202207
if upgrade_type == REBOOT_TYPE_COLD:
203-
reboot(duthost, localhost)
208+
reboot(duthost, localhost, safe_reboot=True)
204209
if postboot_setup:
205210
postboot_setup()
206211
else:
@@ -215,7 +220,7 @@ def upgrade_test_helper(duthost, localhost, ptfhost, from_image, to_image,
215220
pytest_assert(wait_until(timeout, 5, 0, check_reboot_cause, duthost, upgrade_type),
216221
"Reboot cause {} did not match the trigger - {}".format(get_reboot_cause(duthost),
217222
upgrade_type))
218-
check_services(duthost)
223+
check_services(duthost, tbinfo)
219224
check_neighbors(duthost, tbinfo)
220225
check_copp_config(duthost)
221226

tests/common/platform/device_utils.py

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -264,8 +264,9 @@ def check_services(duthost):
264264
if not wait_until(330, 30, 0, duthost.critical_services_fully_started):
265265
raise RebootHealthError("dut.critical_services_fully_started is False")
266266

267+
critical_services = [re.sub(r'(\d+)$', r'@\1', service) for service in duthost.critical_services]
267268
logging.info("Check critical service status")
268-
for service in duthost.critical_services:
269+
for service in critical_services:
269270
status = duthost.get_service_props(service)
270271
if status["ActiveState"] != "active":
271272
raise RebootHealthError("ActiveState of {} is {}, expected: active".format(
@@ -299,8 +300,13 @@ def check_interfaces_and_transceivers(duthost, request):
299300

300301
logging.info(
301302
"Check whether transceiver information of all ports are in redis")
302-
xcvr_info = duthost.command("redis-cli -n 6 keys TRANSCEIVER_INFO*")
303-
parsed_xcvr_info = parse_transceiver_info(xcvr_info["stdout_lines"])
303+
parsed_xcvr_info = []
304+
305+
for asichost in duthost.asics:
306+
docker_cmd = asichost.get_docker_cmd("redis-cli -n 6 keys TRANSCEIVER_INFO*", "database")
307+
xcvr_info = duthost.command(docker_cmd)
308+
parsed_xcvr_info.extend(parse_transceiver_info(xcvr_info["stdout_lines"]))
309+
304310
interfaces = conn_graph_facts["device_conn"][duthost.hostname]
305311
if duthost.facts['hwsku'] in MGFX_HWSKU:
306312
interfaces = MGFX_XCVR_INTF
@@ -316,13 +322,21 @@ def check_neighbors(duthost, tbinfo):
316322
Perform a BGP neighborship check.
317323
"""
318324
logging.info("Check BGP neighbors status. Expected state - established")
319-
bgp_facts = duthost.bgp_facts()['ansible_facts']
325+
326+
# Verify bgp sessions are established
327+
bgp_neighbors = duthost.get_bgp_neighbors_per_asic(state="all")
328+
if not wait_until(600, 10, 0, duthost.check_bgp_session_state_all_asics, bgp_neighbors):
329+
raise RebootHealthError("BGP session not established")
330+
331+
# Only produces bgp_neighbors attribute of bgp_facts (only one used at the moment)
332+
bgp_facts = {'bgp_neighbors': {}}
333+
for asichost in duthost.asics:
334+
asic_ansible_facts = asichost.bgp_facts()['ansible_facts']
335+
bgp_facts['bgp_neighbors'].update(asic_ansible_facts['bgp_neighbors'])
336+
320337
mg_facts = duthost.get_extended_minigraph_facts(tbinfo)
321338

322339
for value in list(bgp_facts['bgp_neighbors'].values()):
323-
# Verify bgp sessions are established
324-
if value['state'] != 'established':
325-
raise RebootHealthError("BGP session not established")
326340
# Verify locat ASNs in bgp sessions
327341
if (value['local AS'] != mg_facts['minigraph_bgp_asn']):
328342
raise RebootHealthError("Local ASNs not found in BGP session.\
@@ -391,6 +405,9 @@ def get_current_sonic_version(duthost):
391405

392406
@pytest.fixture
393407
def verify_dut_health(request, duthosts, rand_one_dut_hostname, tbinfo):
408+
"""
409+
Performs health check on single DUT defined by rand_one_dut_hostname before and after a test
410+
"""
394411
global test_report
395412
test_report = {}
396413
duthost = duthosts[rand_one_dut_hostname]
@@ -418,6 +435,40 @@ def verify_dut_health(request, duthosts, rand_one_dut_hostname, tbinfo):
418435
.format(test_report))
419436

420437

438+
@pytest.fixture
439+
def verify_testbed_health(request, duthosts, tbinfo):
440+
"""
441+
Performs health check on all DUTs in a testbed before and after a test
442+
"""
443+
global test_report
444+
for duthost in duthosts:
445+
test_report = {}
446+
check_services(duthost)
447+
check_interfaces_and_transceivers(duthost, request)
448+
check_neighbors(duthost, tbinfo)
449+
check_all = all([check is True for check in list(test_report.values())])
450+
pytest_assert(check_all, "DUT {} not ready for test. Health check failed before reboot: {}"
451+
.format(duthost.hostname, test_report))
452+
453+
if "20191130" in duthost.os_version:
454+
pre_existing_cores = duthost.shell(
455+
'ls /var/core/ | grep -v python | wc -l')['stdout']
456+
else:
457+
pre_existing_cores = duthost.shell('ls /var/core/ | wc -l')['stdout']
458+
459+
yield
460+
461+
for duthost in duthosts:
462+
test_report = {}
463+
check_services(duthost)
464+
check_interfaces_and_transceivers(duthost, request)
465+
check_neighbors(duthost, tbinfo)
466+
verify_no_coredumps(duthost, pre_existing_cores)
467+
check_all = all([check is True for check in list(test_report.values())])
468+
pytest_assert(check_all, "Health check failed for {} after reboot: {}"
469+
.format(duthost.hostname, test_report))
470+
471+
421472
def get_current_sonic_version(duthost):
422473
return duthost.shell('sonic_installer list | grep Current | cut -f2 -d " "')['stdout']
423474

tests/common/plugins/conditional_mark/tests_mark_conditions.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2573,6 +2573,12 @@ upgrade_path:
25732573
conditions:
25742574
- "asic_type in ['vs']"
25752575

2576+
upgrade_path/test_upgrade_path.py::test_upgrade_path_t2:
2577+
skip:
2578+
reason: "Only supported on T2 topology"
2579+
conditions:
2580+
- "'t2' not in topo_type"
2581+
25762582
#######################################
25772583
##### vlan #####
25782584
#######################################

tests/upgrade_path/test_multi_hop_upgrade_path.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def post_hop_teardown(hop_index):
7878
timeout = max((SYSTEM_STABILIZE_MAX_TIME - networking_uptime), 1)
7979
pytest_assert(wait_until(timeout, 5, 0, check_reboot_cause, duthost, upgrade_type),
8080
"Reboot cause {} did not match the trigger - {}".format(get_reboot_cause(duthost), upgrade_type))
81-
check_services(duthost)
81+
check_services(duthost, tbinfo)
8282
check_neighbors(duthost, tbinfo)
8383
check_copp_config(duthost)
8484
logger.info("Finished post hop teardown for hop {} image {}".format(hop_index, to_image))
@@ -137,7 +137,7 @@ def post_hop_teardown(hop_index):
137137
timeout = max((SYSTEM_STABILIZE_MAX_TIME - networking_uptime), 1)
138138
pytest_assert(wait_until(timeout, 5, 0, check_reboot_cause, duthost, upgrade_type),
139139
"Reboot cause {} did not match the trigger - {}".format(get_reboot_cause(duthost), upgrade_type))
140-
check_services(duthost)
140+
check_services(duthost, tbinfo)
141141
check_neighbors(duthost, tbinfo)
142142
check_copp_config(duthost)
143143
logger.info("Finished post hop teardown for hop {} image {}".format(hop_index, to_image))

tests/upgrade_path/test_upgrade_path.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,19 @@
22
import logging
33
from tests.common.helpers.upgrade_helpers import install_sonic, upgrade_test_helper
44
from tests.common.helpers.upgrade_helpers import restore_image # noqa F401
5+
from tests.common.helpers.multi_thread_utils import SafeThreadPoolExecutor
56
from tests.upgrade_path.utilities import cleanup_prev_images, boot_into_base_image
67
from tests.common.fixtures.advanced_reboot import get_advanced_reboot # noqa F401
78
from tests.common.fixtures.consistency_checker.consistency_checker import consistency_checker_provider # noqa F401
8-
from tests.common.platform.device_utils import verify_dut_health # noqa F401
9+
from tests.common.platform.device_utils import verify_dut_health, verify_testbed_health # noqa F401
910
from tests.common.fixtures.duthost_utils import backup_and_restore_config_db # noqa F401
1011
from tests.common.platform.device_utils import advanceboot_loganalyzer, advanceboot_neighbor_restore # noqa F401
1112
from tests.common.fixtures.ptfhost_utils import copy_ptftests_directory # noqa F401
1213
from tests.common.fixtures.ptfhost_utils import change_mac_addresses # noqa F401
1314
from tests.common.fixtures.ptfhost_utils import remove_ip_addresses # noqa F401
1415
from tests.common.fixtures.ptfhost_utils import copy_arp_responder_py # noqa F401
1516
from tests.common.platform.warmboot_sad_cases import get_sad_case_list, SAD_CASE_LIST
17+
from tests.common.reboot import REBOOT_TYPE_COLD
1618

1719

1820
pytestmark = [
@@ -103,6 +105,43 @@ def upgrade_path_preboot_setup():
103105
enable_cpa=enable_cpa)
104106

105107

108+
@pytest.mark.device_type('t2')
109+
def test_upgrade_path_t2(localhost, duthosts, ptfhost, upgrade_path_lists,
110+
tbinfo, request, verify_testbed_health): # noqa: F811
111+
112+
_, from_image, to_image, _, _ = upgrade_path_lists
113+
upgrade_type = REBOOT_TYPE_COLD
114+
logger.info("Test upgrade path from {} to {}".format(from_image, to_image))
115+
116+
def upgrade_path_preboot_setup(dut):
117+
setup_upgrade_test(dut, localhost, from_image, to_image, tbinfo, upgrade_type)
118+
119+
def upgrade_path_postboot_setup(dut):
120+
dut.shell("config bgp startup all")
121+
122+
# get_advanced_reboot=None and advanceboot_loganalyzer=None as only cold reboot needed for T2
123+
suphost = duthosts.supervisor_nodes[0]
124+
upgrade_test_helper(suphost, localhost, ptfhost, from_image,
125+
to_image, tbinfo, upgrade_type,
126+
get_advanced_reboot=None, # Not needed as only cold reboot supported to T2
127+
advanceboot_loganalyzer=None, # Not needed as only cold reboot supported to T2
128+
preboot_setup=lambda: upgrade_path_preboot_setup(suphost),
129+
postboot_setup=lambda: upgrade_path_postboot_setup(suphost),
130+
consistency_checker_provider=None, # Not needed as only cold reboot supported to T2
131+
enable_cpa=False)
132+
133+
with SafeThreadPoolExecutor(max_workers=8) as executor:
134+
for dut in duthosts.frontend_nodes:
135+
executor.submit(upgrade_test_helper, dut, localhost, ptfhost, from_image,
136+
to_image, tbinfo, upgrade_type,
137+
get_advanced_reboot=None, # Not needed as only cold reboot supported to T2
138+
advanceboot_loganalyzer=None, # Not needed as only cold reboot supported to T2
139+
preboot_setup=lambda dut=dut: upgrade_path_preboot_setup(dut),
140+
postboot_setup=lambda dut=dut: upgrade_path_postboot_setup(dut),
141+
consistency_checker_provider=None, # Not needed as only cold reboot supported to T2
142+
enable_cpa=False)
143+
144+
106145
@pytest.mark.device_type('vs')
107146
def test_warm_upgrade_sad_path(localhost, duthosts, ptfhost, rand_one_dut_hostname,
108147
nbrhosts, fanouthosts, vmhost, tbinfo, request, restore_image, # noqa F811

0 commit comments

Comments
 (0)