Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion ansible/library/bgp_facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def parse_neighbors(self):
lines = n.splitlines()
neighbor['admin'] = 'up'
neighbor['accepted prefixes'] = 0
neighbor_ip = None

for line in lines:
if regex_ipv4.match(line):
Expand Down Expand Up @@ -186,7 +187,8 @@ def parse_neighbors(self):
if message_stats:
neighbor['message statistics'] = message_stats

neighbors[neighbor_ip] = neighbor
if neighbor_ip:
neighbors[neighbor_ip] = neighbor

except Exception as e:
self.module.fail_json(msg=str(e))
Expand Down
12 changes: 12 additions & 0 deletions tests/common/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,18 @@ def get_bgp_neighbor_info(self, neighbor_ip):

return nbinfo[str(neighbor_ip)]

def get_bgp_neighbors(self):
"""
Get a diction of BGP neighbor states

Args: None

Returns: dictionary { (neighbor_ip : info_dict)* }

"""
bgp_facts = self.bgp_facts()['ansible_facts']
return bgp_facts['bgp_neighbors']

def check_bgp_session_state(self, neigh_ips, state="established"):
"""
@summary: check if current bgp session equals to the target state
Expand Down
2 changes: 2 additions & 0 deletions tests/common/plugins/sanity_check/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ If a supported check item is prefixed with `-`, then this item will not be check

With this design, we can extend the sanity check items in the future. By default, only a very basic set of sanity check is performed. For some test scripts that do not need some default sanity check items or need some extra sanity check items, we can use this syntax to tailor the check items that fit best for the current test script.

User can change check item list by passing parameter from command line --check_items="add remove string". Exmaple: --check_items="-services,+bgp" means do not check services, but add bgp to the check list. This parameter is not an absolute list, it is addition or subtraction from the existing list.

## Log collecting
If sanity check is to be performed, the script will also run some commands on the DUT to collect some basic information for debugging. Please refer to sonic-mgmt/tests/common/plugins/sanity_check/constants::PRINT_LOGS for the list of logs that will be collected.

Expand Down
6 changes: 5 additions & 1 deletion tests/common/plugins/sanity_check/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def sanity_check(localhost, duthost, request, fanouthosts):
skip_sanity = False
allow_recover = False
recover_method = "adaptive"
check_items = set(copy.deepcopy(constants.SUPPORTED_CHECK_ITEMS)) # Default check items
check_items = set(copy.deepcopy(constants.DEFAULT_CHECK_ITEMS)) # Default check items
post_check = False

customized_sanity_check = None
Expand Down Expand Up @@ -76,6 +76,10 @@ def sanity_check(localhost, duthost, request, fanouthosts):
skip_sanity = True
if request.config.option.allow_recover:
allow_recover = True
items = request.config.getoption("--check_items")
if items:
items_array=str(items).split(',')
check_items = _update_check_items(check_items, items_array, constants.SUPPORTED_CHECK_ITEMS)

logger.info("Sanity check settings: skip_sanity=%s, check_items=%s, allow_recover=%s, recover_method=%s, post_check=%s" % \
(skip_sanity, check_items, allow_recover, recover_method, post_check))
Expand Down
23 changes: 23 additions & 0 deletions tests/common/plugins/sanity_check/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,27 @@ def check_interfaces(dut):
logger.info("Done checking interfaces status.")
return check_result

def check_bgp_status(dut):
logger.info("Checking bgp status...")
check_result = {"failed": False, "check_item": "bgp"}

neis = dut.get_bgp_neighbors()
if not neis:
logger.info("BGP neighbors: None")
check_result["failed"] = True
else:
down_neis = []
for nei, v in neis.items():
if v["state"] != "established":
down_neis.append(nei)
if down_neis:
logger.info("BGP neighbors down: {}".format(down_neis))
check_result["failed"] = True
check_result["down_neighbors"] = down_neis

logger.info("Done checking bgp status.")
return check_result

def check_dbmemory(dut):
logger.info("Checking database memory...")

Expand Down Expand Up @@ -168,6 +189,8 @@ def do_checks(dut, check_items):
results.append(check_dbmemory(dut))
elif item == "processes":
results.append(check_processes(dut))
elif item == "bgp":
results.append(check_bgp_status(dut))

return results

Expand Down
5 changes: 3 additions & 2 deletions tests/common/plugins/sanity_check/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@

# Recover related definitions
RECOVER_METHODS = {
"config_reload": {"cmd": "bash -c 'config reload -y &>/dev/null'", "reboot": False, "adaptive": False, 'recover_wait': 60},
"config_reload": {"cmd": "bash -c 'config reload -y &>/dev/null'", "reboot": False, "adaptive": False, 'recover_wait': 120},
"load_minigraph": {"cmd": "bash -c 'config load_minigraph -y &>/dev/null'", "reboot": False, "adaptive": False, 'recover_wait': 60},
"reboot": {"cmd": "reboot", "reboot": True, "adaptive": False, 'recover_wait': 120},
"warm_reboot": {"cmd": "warm-reboot", "reboot": True, "adaptive": False, 'recover_wait': 120},
"fast_reboot": {"cmd": "fast_reboot", "reboot": True, "adaptive": False, 'recover_wait': 120},
"adaptive": {"cmd": None, "reboot": False, "adaptive": True, 'recover_wait': 30},
} # All supported recover methods

SUPPORTED_CHECK_ITEMS = ["services", "interfaces", "dbmemory", "processes"] # Supported checks
SUPPORTED_CHECK_ITEMS = ["services", "interfaces", "dbmemory", "processes", "bgp"] # Supported checks
DEFAULT_CHECK_ITEMS = ["services", "interfaces", "dbmemory", "processes", "bgp"] # Default checks
11 changes: 6 additions & 5 deletions tests/common/plugins/sanity_check/recover.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def reboot_dut(dut, localhost, cmd, wait_time):
def __recover_interfaces(dut, fanouthosts, result, wait_time):
action = None
for port in result['down_ports']:
logging.info("Restoring port {}".format(port))
logging.warning("Restoring port: {}".format(port))

pn = str(port).lower()
if 'portchannel' in pn or 'vlan' in pn:
Expand All @@ -50,7 +50,7 @@ def __recover_interfaces(dut, fanouthosts, result, wait_time):
def __recover_services(dut, result):
status = result['services_status']
services = [ x for x in status if not status[x] ]
logging.info("Service(s) down: {}".format(services))
logging.warning("Service(s) down: {}".format(services))
return 'reboot' if 'database' in services else 'config_reload'


Expand All @@ -63,12 +63,11 @@ def adaptive_recover(dut, localhost, fanouthosts, check_results, wait_time):
outstanding_action = None
for result in check_results:
if result['failed']:
logging.info("Restoring {}".format(result))
if result['check_item'] == 'interfaces':
action = __recover_interfaces(dut, fanouthosts, result, wait_time)
elif result['check_item'] == 'services':
action = __recover_services(dut, result)
elif result['check_item'] == 'processes':
elif result['check_item'] in [ 'processes', 'bgp' ]:
action = 'config_reload'
else:
action = 'reboot'
Expand All @@ -78,6 +77,8 @@ def adaptive_recover(dut, localhost, fanouthosts, check_results, wait_time):
if action and (not outstanding_action or outstanding_action == 'config_reload'):
outstanding_action = action

logging.warning("Restoring {} with proposed action: {}, final action: {}".format(result, action, outstanding_action))

if outstanding_action:
method = constants.RECOVER_METHODS[outstanding_action]
wait_time = method['recover_wait']
Expand All @@ -88,7 +89,7 @@ def adaptive_recover(dut, localhost, fanouthosts, check_results, wait_time):


def recover(dut, localhost, fanouthosts, check_results, recover_method):
logger.info("Try to recover %s using method %s" % (dut.hostname, recover_method))
logger.warning("Try to recover %s using method %s" % (dut.hostname, recover_method))
method = constants.RECOVER_METHODS[recover_method]
wait_time = method['recover_wait']
if method["adaptive"]:
Expand Down
2 changes: 2 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ def pytest_addoption(parser):
help="Skip sanity check")
parser.addoption("--allow_recover", action="store_true", default=False,
help="Allow recovery attempt in sanity check in case of failure")
parser.addoption("--check_items", action="store", default=False,
help="Change (add|remove) check items in the check list")

########################
# pre-test options #
Expand Down