diff --git a/ansible/library/bgp_facts.py b/ansible/library/bgp_facts.py index 471d8cfa2c4..5d757ac63e4 100644 --- a/ansible/library/bgp_facts.py +++ b/ansible/library/bgp_facts.py @@ -142,6 +142,7 @@ def parse_neighbors(self): lines = n.splitlines() neighbor['admin'] = 'up' neighbor['accepted prefixes'] = 0 + neighbor_ip = None for line in lines: if regex_ipv4.match(line): @@ -186,7 +187,8 @@ def parse_neighbors(self): if message_stats: neighbor['message statistics'] = message_stats - neighbors[neighbor_ip] = neighbor + if neighbor_ip: + neighbors[neighbor_ip] = neighbor except Exception as e: self.module.fail_json(msg=str(e)) diff --git a/tests/common/devices.py b/tests/common/devices.py index 1324c56cceb..d80e64f8c92 100644 --- a/tests/common/devices.py +++ b/tests/common/devices.py @@ -630,6 +630,18 @@ def get_bgp_neighbor_info(self, neighbor_ip): return nbinfo[str(neighbor_ip)] + def get_bgp_neighbors(self): + """ + Get a diction of BGP neighbor states + + Args: None + + Returns: dictionary { (neighbor_ip : info_dict)* } + + """ + bgp_facts = self.bgp_facts()['ansible_facts'] + return bgp_facts['bgp_neighbors'] + def check_bgp_session_state(self, neigh_ips, state="established"): """ @summary: check if current bgp session equals to the target state diff --git a/tests/common/plugins/sanity_check/README.md b/tests/common/plugins/sanity_check/README.md index 22473d5c231..06e9c85688e 100644 --- a/tests/common/plugins/sanity_check/README.md +++ b/tests/common/plugins/sanity_check/README.md @@ -58,6 +58,8 @@ If a supported check item is prefixed with `-`, then this item will not be check With this design, we can extend the sanity check items in the future. By default, only a very basic set of sanity check is performed. For some test scripts that do not need some default sanity check items or need some extra sanity check items, we can use this syntax to tailor the check items that fit best for the current test script. +User can change check item list by passing parameter from command line --check_items="add remove string". Exmaple: --check_items="-services,+bgp" means do not check services, but add bgp to the check list. This parameter is not an absolute list, it is addition or subtraction from the existing list. + ## Log collecting If sanity check is to be performed, the script will also run some commands on the DUT to collect some basic information for debugging. Please refer to sonic-mgmt/tests/common/plugins/sanity_check/constants::PRINT_LOGS for the list of logs that will be collected. diff --git a/tests/common/plugins/sanity_check/__init__.py b/tests/common/plugins/sanity_check/__init__.py index 715fc68f70d..f94b8381f79 100644 --- a/tests/common/plugins/sanity_check/__init__.py +++ b/tests/common/plugins/sanity_check/__init__.py @@ -47,7 +47,7 @@ def sanity_check(localhost, duthost, request, fanouthosts): skip_sanity = False allow_recover = False recover_method = "adaptive" - check_items = set(copy.deepcopy(constants.SUPPORTED_CHECK_ITEMS)) # Default check items + check_items = set(copy.deepcopy(constants.DEFAULT_CHECK_ITEMS)) # Default check items post_check = False customized_sanity_check = None @@ -76,6 +76,10 @@ def sanity_check(localhost, duthost, request, fanouthosts): skip_sanity = True if request.config.option.allow_recover: allow_recover = True + items = request.config.getoption("--check_items") + if items: + items_array=str(items).split(',') + check_items = _update_check_items(check_items, items_array, constants.SUPPORTED_CHECK_ITEMS) logger.info("Sanity check settings: skip_sanity=%s, check_items=%s, allow_recover=%s, recover_method=%s, post_check=%s" % \ (skip_sanity, check_items, allow_recover, recover_method, post_check)) diff --git a/tests/common/plugins/sanity_check/checks.py b/tests/common/plugins/sanity_check/checks.py index d21655e613b..af80e21581e 100644 --- a/tests/common/plugins/sanity_check/checks.py +++ b/tests/common/plugins/sanity_check/checks.py @@ -96,6 +96,27 @@ def check_interfaces(dut): logger.info("Done checking interfaces status.") return check_result +def check_bgp_status(dut): + logger.info("Checking bgp status...") + check_result = {"failed": False, "check_item": "bgp"} + + neis = dut.get_bgp_neighbors() + if not neis: + logger.info("BGP neighbors: None") + check_result["failed"] = True + else: + down_neis = [] + for nei, v in neis.items(): + if v["state"] != "established": + down_neis.append(nei) + if down_neis: + logger.info("BGP neighbors down: {}".format(down_neis)) + check_result["failed"] = True + check_result["down_neighbors"] = down_neis + + logger.info("Done checking bgp status.") + return check_result + def check_dbmemory(dut): logger.info("Checking database memory...") @@ -168,6 +189,8 @@ def do_checks(dut, check_items): results.append(check_dbmemory(dut)) elif item == "processes": results.append(check_processes(dut)) + elif item == "bgp": + results.append(check_bgp_status(dut)) return results diff --git a/tests/common/plugins/sanity_check/constants.py b/tests/common/plugins/sanity_check/constants.py index bf622bdb600..73fe89081b1 100644 --- a/tests/common/plugins/sanity_check/constants.py +++ b/tests/common/plugins/sanity_check/constants.py @@ -13,7 +13,7 @@ # Recover related definitions RECOVER_METHODS = { - "config_reload": {"cmd": "bash -c 'config reload -y &>/dev/null'", "reboot": False, "adaptive": False, 'recover_wait': 60}, + "config_reload": {"cmd": "bash -c 'config reload -y &>/dev/null'", "reboot": False, "adaptive": False, 'recover_wait': 120}, "load_minigraph": {"cmd": "bash -c 'config load_minigraph -y &>/dev/null'", "reboot": False, "adaptive": False, 'recover_wait': 60}, "reboot": {"cmd": "reboot", "reboot": True, "adaptive": False, 'recover_wait': 120}, "warm_reboot": {"cmd": "warm-reboot", "reboot": True, "adaptive": False, 'recover_wait': 120}, @@ -21,4 +21,5 @@ "adaptive": {"cmd": None, "reboot": False, "adaptive": True, 'recover_wait': 30}, } # All supported recover methods -SUPPORTED_CHECK_ITEMS = ["services", "interfaces", "dbmemory", "processes"] # Supported checks +SUPPORTED_CHECK_ITEMS = ["services", "interfaces", "dbmemory", "processes", "bgp"] # Supported checks +DEFAULT_CHECK_ITEMS = ["services", "interfaces", "dbmemory", "processes", "bgp"] # Default checks diff --git a/tests/common/plugins/sanity_check/recover.py b/tests/common/plugins/sanity_check/recover.py index 55c6eca0c52..61b4c3e475d 100644 --- a/tests/common/plugins/sanity_check/recover.py +++ b/tests/common/plugins/sanity_check/recover.py @@ -32,7 +32,7 @@ def reboot_dut(dut, localhost, cmd, wait_time): def __recover_interfaces(dut, fanouthosts, result, wait_time): action = None for port in result['down_ports']: - logging.info("Restoring port {}".format(port)) + logging.warning("Restoring port: {}".format(port)) pn = str(port).lower() if 'portchannel' in pn or 'vlan' in pn: @@ -50,7 +50,7 @@ def __recover_interfaces(dut, fanouthosts, result, wait_time): def __recover_services(dut, result): status = result['services_status'] services = [ x for x in status if not status[x] ] - logging.info("Service(s) down: {}".format(services)) + logging.warning("Service(s) down: {}".format(services)) return 'reboot' if 'database' in services else 'config_reload' @@ -63,12 +63,11 @@ def adaptive_recover(dut, localhost, fanouthosts, check_results, wait_time): outstanding_action = None for result in check_results: if result['failed']: - logging.info("Restoring {}".format(result)) if result['check_item'] == 'interfaces': action = __recover_interfaces(dut, fanouthosts, result, wait_time) elif result['check_item'] == 'services': action = __recover_services(dut, result) - elif result['check_item'] == 'processes': + elif result['check_item'] in [ 'processes', 'bgp' ]: action = 'config_reload' else: action = 'reboot' @@ -78,6 +77,8 @@ def adaptive_recover(dut, localhost, fanouthosts, check_results, wait_time): if action and (not outstanding_action or outstanding_action == 'config_reload'): outstanding_action = action + logging.warning("Restoring {} with proposed action: {}, final action: {}".format(result, action, outstanding_action)) + if outstanding_action: method = constants.RECOVER_METHODS[outstanding_action] wait_time = method['recover_wait'] @@ -88,7 +89,7 @@ def adaptive_recover(dut, localhost, fanouthosts, check_results, wait_time): def recover(dut, localhost, fanouthosts, check_results, recover_method): - logger.info("Try to recover %s using method %s" % (dut.hostname, recover_method)) + logger.warning("Try to recover %s using method %s" % (dut.hostname, recover_method)) method = constants.RECOVER_METHODS[recover_method] wait_time = method['recover_wait'] if method["adaptive"]: diff --git a/tests/conftest.py b/tests/conftest.py index 06331ea13a6..2124305e96f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -121,6 +121,8 @@ def pytest_addoption(parser): help="Skip sanity check") parser.addoption("--allow_recover", action="store_true", default=False, help="Allow recovery attempt in sanity check in case of failure") + parser.addoption("--check_items", action="store", default=False, + help="Change (add|remove) check items in the check list") ######################## # pre-test options #