Skip to content

Commit f2cadae

Browse files
authored
Added consistency checker to all relevant advanced-reboot cases (#16877)
Description of PR Summary: Added consistency checker to all relevant advanced-reboot cases and added minor improvements including: Ignoring NULL values in ASIC querying Logging ASIC/DB inconsistencies Fixes # (issue)
1 parent 1628450 commit f2cadae

7 files changed

Lines changed: 240 additions & 95 deletions

File tree

tests/common/fixtures/advanced_reboot.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ def __init__(self, request, duthosts, duthost, ptfhost, localhost, tbinfo, creds
9191
self.allowMacJump = kwargs["allow_mac_jumping"] if "allow_mac_jumping" in kwargs else False
9292
self.advanceboot_loganalyzer = kwargs["advanceboot_loganalyzer"] if "advanceboot_loganalyzer"\
9393
in kwargs else None
94+
self.consistency_checker_provider = kwargs["consistency_checker_provider"] if "consistency_checker_provider"\
95+
in kwargs else None
9496
self.other_vendor_nos = kwargs['other_vendor_nos'] if 'other_vendor_nos' in kwargs else False
9597
self.__dict__.update(kwargs)
9698
self.__extractTestParam()
@@ -557,6 +559,50 @@ def acl_manager_checker(self, error_list):
557559
if int(acl_proc_count) != 1:
558560
error_list.append("Expected one ACL manager process running. Actual: {}".format(acl_proc_count))
559561

562+
def check_asic_and_db_consistency(self):
563+
"""
564+
Check ASIC_DB and ASIC consistency, logging out any inconsistencies that are found.
565+
"""
566+
if not self.consistency_checker_provider.is_consistency_check_supported(self.duthost):
567+
os_version = self.duthost.image_facts()["ansible_facts"]["ansible_image_facts"]["current"]
568+
platform = self.duthost.facts['platform']
569+
logger.info((f"Consistency check is not supported on this platform ({platform}) and "
570+
f"version ({os_version})"))
571+
return
572+
573+
with self.consistency_checker_provider.get_consistency_checker(self.duthost) as consistency_checker:
574+
inconsistencies = consistency_checker.check_consistency()
575+
not_implemented_attributes = set()
576+
mismatched_attributes = {}
577+
failed_to_query_asic_attributes = {}
578+
579+
for sai_object, summary in inconsistencies.items():
580+
# Not implemented attributes
581+
object_name = sai_object.split(":")[1]
582+
for attr in summary["attributeNotImplemented"]:
583+
not_implemented_attributes.add(f"{object_name}.{attr}")
584+
585+
# Mismatched attributes
586+
mismatched_attributes = {
587+
attr: summary["attributes"][attr] for attr
588+
in summary["mismatchedAttributes"]
589+
}
590+
if mismatched_attributes:
591+
mismatched_attributes[sai_object] = mismatched_attributes
592+
593+
# Failed to query ASIC attributes
594+
if summary["failedToQueryAsic"]:
595+
failed_to_query_asic_attributes[sai_object] = summary["failedToQueryAsic"]
596+
597+
if not_implemented_attributes:
598+
logger.warning(f"Not implemented attributes: {not_implemented_attributes}")
599+
600+
if mismatched_attributes:
601+
logger.error(f"Mismatched attributes found: {mismatched_attributes}")
602+
603+
if failed_to_query_asic_attributes:
604+
logger.error(f"Failed to query ASIC attributes: {failed_to_query_asic_attributes}")
605+
560606
def runRebootTest(self):
561607
# Run advanced-reboot.ReloadTest for item in preboot/inboot list
562608
count = 0
@@ -597,6 +643,8 @@ def runRebootTest(self):
597643
finally:
598644
if self.postboot_setup:
599645
self.postboot_setup()
646+
if self.consistency_checker_provider:
647+
self.check_asic_and_db_consistency()
600648
# capture the test logs, and print all of them in case of failure, or a summary in case of success
601649
log_dir = self.__fetchTestLogs(rebootOper, log_dst_suffix=rebootOper)
602650
self.print_test_logs_summary(log_dir)
@@ -677,6 +725,9 @@ def runMultiHopRebootTest(self, upgrade_path_urls, rebootOper=None, base_image_s
677725
self.duthost.hostname, upgrade_path_str))
678726
if post_hop_teardown:
679727
post_hop_teardown(hop_index)
728+
729+
if self.consistency_checker_provider:
730+
self.check_asic_and_db_consistency()
680731
except Exception:
681732
traceback_msg = traceback.format_exc()
682733
err_msg = "Exception caught while running advanced-reboot test on ptf during upgrade {}: \n{}".format(

tests/common/fixtures/consistency_checker/consistency_checker.py

Lines changed: 113 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
import json
44
import os
55
import datetime
6+
from typing import List, Optional
67
from collections import defaultdict
7-
from tests.common.fixtures.consistency_checker.constants import SUPPORTED_PLATFORMS_AND_VERSIONS
8+
from tests.common.fixtures.consistency_checker.constants import SUPPORTED_PLATFORMS_AND_VERSIONS, \
9+
ConsistencyCheckQueryKey, ALL_ATTRIBUTES
810

911
logger = logging.getLogger(__name__)
1012

@@ -154,7 +156,7 @@ def get_db_and_asic_peers(self, keys=["*"]) -> dict:
154156

155157
return dict(results)
156158

157-
def check_consistency(self, keys=["*"]) -> dict:
159+
def check_consistency(self, keys=None) -> dict:
158160
"""
159161
Get the out-of-sync ASIC_DB and ASIC attributes. Differences are indicative of an error state.
160162
Same arg style as the get_objects function but returns a list of objects that don't match or couldn't
@@ -163,7 +165,7 @@ def check_consistency(self, keys=["*"]) -> dict:
163165
164166
:param keys: Optional list of glob search strings that correspond to the --key arg of sonic-db-dump.
165167
sonic-db-dump doesn't take multiple keys, so a list is passed in to support multiple
166-
keys at the API level.
168+
keys at the API level. If not provided, then the default keys are used.
167169
:return: Dictionary containing the out-of-sync ASIC_DB and ASIC attributes.
168170
169171
Example return val (matching):
@@ -186,19 +188,25 @@ def check_consistency(self, keys=["*"]) -> dict:
186188
"failedToQueryAsic": [
187189
{"SAI_BUFFER_PROFILE_ATTR_SHARED_DYNAMIC_TH": "Failed to query attribute value"}
188190
],
189-
"mismatchedAttributes": ["SAI_BUFFER_PROFILE_ATTR_THRESHOLD_MODE"]
191+
"mismatchedAttributes": ["SAI_BUFFER_PROFILE_ATTR_THRESHOLD_MODE"],
192+
"attributeNotImplemented": ["SAI_BUFFER_PROFILE_ATTR_POOL_ID"]
190193
},
191194
...
192195
}
193196
"""
197+
if keys is None:
198+
platform = self._duthost.facts['platform']
199+
os_version = self._duthost.image_facts()["ansible_facts"]["ansible_image_facts"]["current"]
200+
keys = self._get_consistency_checker_keys(platform, os_version)
194201

195202
db_attributes = self._get_db_attributes(keys)
196203
asic_attributes = self._get_asic_attributes_from_db_results(db_attributes)
197204

198205
inconsistencies = defaultdict(lambda: {
199206
"attributes": {},
200207
"failedToQueryAsic": [],
201-
"mismatchedAttributes": []
208+
"mismatchedAttributes": [],
209+
"attributeNotImplemented": [],
202210
})
203211

204212
for object in db_attributes:
@@ -231,23 +239,71 @@ def check_consistency(self, keys=["*"]) -> dict:
231239
if asic_query_success:
232240
inconsistencies[object]["mismatchedAttributes"].append(attr)
233241
else:
234-
inconsistencies[object]["failedToQueryAsic"].append({attr: asic_object[attr]["error"]})
242+
error = asic_object[attr]["error"]
243+
if "ATTR_NOT_IMPLEMENTED" in error:
244+
inconsistencies[object]["attributeNotImplemented"].append(attr)
245+
else:
246+
inconsistencies[object]["failedToQueryAsic"].append({attr: error})
235247

236248
return dict(inconsistencies)
237249

238-
def _get_db_attributes(self, keys: list) -> dict:
250+
def _get_consistency_checker_keys(self, platform, os_version) -> List[str]:
251+
"""
252+
Get the keys for the given platform and OS version.
253+
254+
:param platform: Platform name
255+
:param os_version: OS version
256+
:return: List of keys
257+
"""
258+
259+
if platform not in SUPPORTED_PLATFORMS_AND_VERSIONS:
260+
raise Exception(f"Unsupported platform: {platform}")
261+
262+
supported_versions = SUPPORTED_PLATFORMS_AND_VERSIONS[platform]
263+
for version in supported_versions:
264+
if version in os_version:
265+
return supported_versions[version]
266+
267+
raise Exception(f"Unsupported OS version: {os_version}")
268+
269+
def _get_db_attributes(self, keys: List[ConsistencyCheckQueryKey]) -> dict:
239270
"""
240271
Fetchs and merges the attributes of the objects returned by the search key from the DB.
241272
"""
242273
db_attributes = {}
243274
for key in keys:
244-
result = self._duthost.command(f"sonic-db-dump -k '{key}' -n ASIC_DB")
275+
result = self._duthost.command(f"sonic-db-dump -k '{key.key}' -n ASIC_DB")
245276
if result['rc'] != 0:
246277
raise Exception((f"Failed to fetch attributes for key '{key}' from ASIC_DB. "
247278
f"Return code: {result['rc']}, stdout: {result['stdout']}, "
248279
f"stderr: {result['stderr']}"))
249280

250281
query_result = json.loads(result['stdout'])
282+
283+
# Filter for attributes that we want ...
284+
objects_with_no_attrs = []
285+
for object in query_result:
286+
287+
if "NULL" in query_result[object]["value"]:
288+
logger.debug(f"Ignoring attribute 'NULL' for object '{object}'")
289+
del query_result[object]["value"]["NULL"]
290+
291+
if ALL_ATTRIBUTES in key.attributes:
292+
logger.debug(f"Retaining all attributes for object '{object}'")
293+
else:
294+
attributes_to_remove = set(query_result[object]["value"].keys()) - set(key.attributes)
295+
for attr in attributes_to_remove:
296+
logger.debug(f"Ignoring attribute '{attr}' for object '{object}'")
297+
del query_result[object]["value"][attr]
298+
299+
if len(query_result[object]["value"]) == 0:
300+
objects_with_no_attrs.append(object)
301+
302+
# ... then remove the objects that have no attributes left
303+
for object in objects_with_no_attrs:
304+
logger.debug(f"Ignoring empty object '{object}'")
305+
del query_result[object]
306+
251307
db_attributes.update(query_result)
252308

253309
return db_attributes
@@ -304,6 +360,19 @@ def _get_asic_attributes_from_db_results(self, db_attributes: dict) -> dict:
304360

305361

306362
class ConsistencyCheckerProvider:
363+
364+
def __init__(self, libsairedis_url_template: Optional[str],
365+
python3_pysairedis_url_template: Optional[str]) -> None:
366+
"""
367+
The libsairedis_url_template and python3_pysairedis_url_template are optional URL templates that the
368+
consistency checker can use to download the libsairedis and python3-pysairedis debs respectively.
369+
370+
:param libsairedis_url_template: Optional URL template for the libsairedis deb
371+
:param python3_pysairedis_url_template: Optional URL template for the python3-pysairedis deb
372+
"""
373+
self._libsairedis_url_template = libsairedis_url_template
374+
self._python3_pysairedis_url_template = python3_pysairedis_url_template
375+
307376
def is_consistency_check_supported(self, dut) -> bool:
308377
"""
309378
Checks if the provided DUT is supported for consistency checking.
@@ -318,29 +387,56 @@ def is_consistency_check_supported(self, dut) -> bool:
318387

319388
current_version = dut.image_facts()['ansible_facts']['ansible_image_facts']['current']
320389
supported_versions = SUPPORTED_PLATFORMS_AND_VERSIONS[platform]
321-
if any(v in current_version for v in supported_versions):
390+
if any(v in current_version for v in supported_versions.keys()):
322391
return True
323392

324393
return False
325394

326-
def get_consistency_checker(self, dut, libsairedis_download_url=None,
327-
python3_pysairedis_download_url=None) -> ConsistencyChecker:
395+
def get_consistency_checker(self, dut) -> ConsistencyChecker:
328396
"""
329397
Get a new instance of the ConsistencyChecker class.
330398
331399
:param dut: SonicHost object
332-
:param libsairedis_download_url: Optional URL that the consistency checker should use to download the
333-
libsairedis deb
334-
:param python3_pysairedis_download_url: Optional URL that the consistency checker should use to
335-
download the python3-pysairedis deb
336400
:return ConsistencyChecker: New instance of the ConsistencyChecker class
337401
"""
402+
403+
os_version = dut.image_facts()["ansible_facts"]["ansible_image_facts"]["current"]
404+
405+
if self._libsairedis_url_template or self._python3_pysairedis_url_template:
406+
if "202305" in os_version:
407+
sonic_version_template_param = "202305"
408+
elif "202311" in os_version:
409+
sonic_version_template_param = "202311"
410+
else:
411+
raise Exception(f"Unsupported OS version: {os_version}")
412+
413+
libsairedis_download_url = self._libsairedis_url_template\
414+
.format(sonic_version=sonic_version_template_param)\
415+
if self._libsairedis_url_template else None
416+
417+
python3_pysairedis_download_url = self._python3_pysairedis_url_template\
418+
.format(sonic_version=sonic_version_template_param)\
419+
if self._python3_pysairedis_url_template else None
420+
338421
return ConsistencyChecker(dut, libsairedis_download_url, python3_pysairedis_download_url)
339422

340423

341424
@pytest.fixture
342-
def consistency_checker_provider():
425+
def consistency_checker_provider(request):
343426
"""
344427
Fixture that provides the ConsistencyCheckerProvider class.
428+
429+
:param request: pytest request object
345430
"""
346-
return ConsistencyCheckerProvider()
431+
432+
if not request.config.getoption("enable_consistency_checker"):
433+
logger.info("Consistency checker is not enabled. Skipping check.")
434+
return None
435+
436+
consistency_checker_libsairedis_url_template = request.config.getoption(
437+
"consistency_checker_libsairedis_url_template")
438+
consistency_checker_python3_pysairedis_url_template = request.config.getoption(
439+
"consistency_checker_python3_pysairedis_url_template")
440+
441+
return ConsistencyCheckerProvider(consistency_checker_libsairedis_url_template,
442+
consistency_checker_python3_pysairedis_url_template)
Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,51 @@
1+
from dataclasses import dataclass
2+
from typing import List
3+
4+
ALL_ATTRIBUTES = "all"
5+
6+
7+
@dataclass
8+
class ConsistencyCheckQueryKey:
9+
key: str
10+
attributes: List[str]
11+
12+
13+
BROADCOM_KEYS: List[ConsistencyCheckQueryKey] = [
14+
ConsistencyCheckQueryKey("ASIC_STATE:SAI_OBJECT_TYPE_BUFFER_POOL:*", attributes=[ALL_ATTRIBUTES]),
15+
ConsistencyCheckQueryKey("ASIC_STATE:SAI_OBJECT_TYPE_BUFFER_PROFILE:*", attributes=[ALL_ATTRIBUTES]),
16+
ConsistencyCheckQueryKey("ASIC_STATE:SAI_OBJECT_TYPE_SWITCH:*", attributes=[ALL_ATTRIBUTES]),
17+
ConsistencyCheckQueryKey("ASIC_STATE:SAI_OBJECT_TYPE_WRED:*", attributes=[ALL_ATTRIBUTES]),
18+
ConsistencyCheckQueryKey(
19+
"ASIC_STATE:SAI_OBJECT_TYPE_PORT:*",
20+
attributes=[
21+
"SAI_PORT_ATTR_QOS_TC_TO_QUEUE_MAP",
22+
"SAI_PORT_ATTR_QOS_TC_TO_PRIORITY_GROUP_MAP",
23+
"SAI_PORT_ATTR_QOS_PFC_PRIORITY_TO_QUEUE_MAP",
24+
"SAI_PORT_ATTR_QOS_DSCP_TO_TC_MAP",
25+
"SAI_PORT_ATTR_MTU",
26+
"SAI_PORT_ATTR_INGRESS_ACL",
27+
"SAI_PORT_ATTR_AUTO_NEG_MODE",
28+
"SAI_PORT_ATTR_PRIORITY_FLOW_CONTROL",
29+
"SAI_PORT_ATTR_ADMIN_STATE",
30+
"SAI_PORT_ATTR_FEC_MODE",
31+
# The "get" implementation of the SAI_PORT_ATTR_SPEED attribute sometimes has a side effect of changing
32+
# the port speed. Consistency-checker should not change the state of the DUT, so we ignore this attribute
33+
# "SAI_PORT_ATTR_SPEED",
34+
# This attribute doesn't match between ASIC_DB and ASIC SAI and the test fails the assertion
35+
# "SAI_PORT_ATTR_PORT_VLAN_ID",
36+
]
37+
),
38+
]
39+
140

241
# The list of platforms and versions that have been tested to work with the consistency checker
342
SUPPORTED_PLATFORMS_AND_VERSIONS = {
4-
"x86_64-arista_7060_cx32s": ["202305", "202311"],
5-
"x86_64-arista_7260cx3_64": ["202305", "202311"],
43+
"x86_64-arista_7060_cx32s": {
44+
"202305": BROADCOM_KEYS,
45+
"202311": BROADCOM_KEYS,
46+
},
47+
"x86_64-arista_7260cx3_64": {
48+
"202305": BROADCOM_KEYS,
49+
"202311": BROADCOM_KEYS,
50+
},
651
}

0 commit comments

Comments
 (0)