From bdd1a86ae687a2e2cb9f3057ef8eba97e6ba6086 Mon Sep 17 00:00:00 2001 From: sanmalho Date: Fri, 7 Oct 2022 16:38:07 -0400 Subject: [PATCH 1/6] Enhancing check_dut_health_status to be multi-asic aware In our pipeline runs, autorestart tests against a multi-asic DUT were failing with error ' Feature 'x' auto-restart is not consistent across namespaces The reason was that pretest goes and disables autorestart on all the containers. Then ACL tests run with change the config_db's and to retore in it's cleanup does a config load_minigraph. The above results in setting the autorestart state of the containers back to default of 'enabled' Now, when check_dut_health_status kicks in, it takes a snapshot of the config_db.json before the ACL suite which has the autorestart state as 'disabled'. But, after the ACL suite, it detects that autorestart state has changed to 'enabled'. Thus, it tries to restore it. However, when it restores, it only restores config_db.json, and not the other asics config_db's. This results in the state of autorestart to be not consistent across namespaces - global has autorestart 'disabled', while namespace has autorestart 'enabled'. Fix for the above it to have enhance check_dut_health_status to be multi-asic aware. - It compares not just config_db.json, but also the config_db's of all the asics. - If it finds that config has changed, restore not just config_db.json, but also the config_db's of all the asics before rebooting the DUT. --- tests/conftest.py | 162 ++++++++++++++++++++++++++++------------------ 1 file changed, 100 insertions(+), 62 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 710afae84fd..6f63282497a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1743,8 +1743,17 @@ def __dut_reload(duts_data, node=None, results=None): logger.error('Missing kwarg "node" or "results"') return logger.info("dut reload called on {}".format(node.hostname)) - node.copy(content=json.dumps(duts_data[node.hostname]["pre_running_config"], indent=4), + node.copy(content=json.dumps(duts_data[node.hostname]["pre_running_config"]["host"], indent=4), dest='/etc/sonic/config_db.json', verbose=False) + + if node.is_multi_asic: + for asic_index in range(0, node.facts.get('num_asic')): + asic_ns = 'asic{}'.format(asic_index) + asic_cfg_file = "/tmp/{}_config_db{}.json".format(node.hostname, asic_index) + with open(asic_cfg_file, "w") as outfile: + outfile.write(json.dumps(duts_data[node.hostname]['pre_running_config'][asic_ns], indent=4)) + node.copy(src=asic_cfg_file, dest='/etc/sonic/config_db{}.json'.format(asic_index), verbose=False) + config_reload(node) @@ -1784,12 +1793,25 @@ def core_dump_and_config_check(duthosts, request): pre_existing_core_dumps = duthost.shell('ls /var/core/')['stdout'].split() duts_data[duthost.hostname]["pre_core_dumps"] = pre_existing_core_dumps + logger.info("Collecting running config before test on {}".format(duthost.hostname)) + duts_data[duthost.hostname]["pre_running_config"] = {} if not duthost.stat(path="/etc/sonic/running_golden_config.json")['stat']['exists']: logger.info("Collecting running golden config before test on {}".format(duthost.hostname)) duthost.shell("sonic-cfggen -d --print-data > /etc/sonic/running_golden_config.json") - duts_data[duthost.hostname]["pre_running_config"] = \ + duts_data[duthost.hostname]["pre_running_config"]["host"] = \ json.loads(duthost.shell("cat /etc/sonic/running_golden_config.json", verbose=False)['stdout']) + if duthost.is_multi_asic: + for asic_index in range(0, duthost.facts.get('num_asic')): + asic_ns = 'asic{}'.format(asic_index) + if not duthost.stat( + path="/etc/sonic/running_golden_config{}.json".format(asic_index))['stat']['exists']: + duthost.shell("sonic-cfggen -n {} -d --print-data > /etc/sonic/running_golden_config{}.json". + format(asic_ns, asic_index)) + duts_data[duthost.hostname]['pre_running_config'][asic_ns] = \ + json.loads(duthost.shell("cat /etc/sonic/running_golden_config{}.json".format(asic_index), + verbose=False)['stdout']) + yield if check_flag: @@ -1813,8 +1835,16 @@ def core_dump_and_config_check(duthosts, request): core_dump_check_pass = False logger.info("Collecting running config after test on {}".format(duthost.hostname)) - duts_data[duthost.hostname]["cur_running_config"] = \ + # get running config after running + duts_data[duthost.hostname]["cur_running_config"] = {} + duts_data[duthost.hostname]["cur_running_config"]['host'] = \ json.loads(duthost.shell("sonic-cfggen -d --print-data", verbose=False)['stdout']) + if duthost.is_multi_asic: + for asic_index in range(0, duthost.facts.get('num_asic')): + asic_ns = 'asic{}'.format(asic_index) + duts_data[duthost.hostname]["cur_running_config"][asic_ns] = \ + json.loads(duthost.shell("sonic-cfggen -n {} -d --print-data".format(asic_ns), + verbose=False)['stdout']) # The tables that we don't care EXCLUDE_CONFIG_TABLE_NAMES = set([]) @@ -1833,72 +1863,80 @@ def _remove_entry(table_name, key_name, config): if len(config[table_name]) == 0: config.pop(table_name) - # Remove ignored keys from base config - for exclude_key in EXCLUDE_CONFIG_KEY_NAMES: - fields = exclude_key.split('|') - if len(fields) != 2: - continue - _remove_entry(fields[0], fields[1], duts_data[duthost.hostname]["pre_running_config"]) - _remove_entry(fields[0], fields[1], duts_data[duthost.hostname]["cur_running_config"]) - - # Get common keys in pre running config and cur running config - - pre_running_config_keys = set(duts_data[duthost.hostname]["pre_running_config"].keys()) - cur_running_config_keys = set(duts_data[duthost.hostname]["cur_running_config"].keys()) - - # Check if there are extra keys in pre running config - pre_config_extra_keys = list(pre_running_config_keys - cur_running_config_keys - EXCLUDE_CONFIG_TABLE_NAMES) - for key in pre_config_extra_keys: - pre_only_config[duthost.hostname].update({key: duts_data[duthost.hostname]["pre_running_config"][key]}) - - # Check if there are extra keys in cur running config - cur_config_extra_keys = list(cur_running_config_keys - pre_running_config_keys - EXCLUDE_CONFIG_TABLE_NAMES) - for key in cur_config_extra_keys: - cur_only_config[duthost.hostname].update({key: duts_data[duthost.hostname]["cur_running_config"][key]}) - - common_config_keys = list(pre_running_config_keys & cur_running_config_keys - EXCLUDE_CONFIG_TABLE_NAMES) - # Check if the running config is modified after module running - for key in common_config_keys: - # TODO: remove these code when solve the problem of "FLEX_COUNTER_DELAY_STATUS" - if key == "FLEX_COUNTER_TABLE": - for sub_key, sub_value in duts_data[duthost.hostname]["pre_running_config"][key].items(): - try: - pre_value = duts_data[duthost.hostname]["pre_running_config"][key][sub_key] - cur_value = duts_data[duthost.hostname]["cur_running_config"][key][sub_key] - if pre_value["FLEX_COUNTER_STATUS"] != cur_value["FLEX_COUNTER_STATUS"]: - inconsistent_config[duthost.hostname].update( + for cfg_context in duts_data[duthost.hostname]['pre_running_config']: + pre_only_config[duthost.hostname][cfg_context] = {} + cur_only_config[duthost.hostname][cfg_context] = {} + inconsistent_config[duthost.hostname][cfg_context] = {} + + pre_running_config = duts_data[duthost.hostname]["pre_running_config"][cfg_context] + cur_running_config = duts_data[duthost.hostname]["cur_running_config"][cfg_context] + + pre_running_config_keys = set(pre_running_config.keys()) + cur_running_config_keys = set(cur_running_config.keys()) + + # Remove ignored keys from base config + for exclude_key in EXCLUDE_CONFIG_KEY_NAMES: + fields = exclude_key.split('|') + if len(fields) != 2: + continue + _remove_entry(fields[0], fields[1], pre_running_config) + _remove_entry(fields[0], fields[1], cur_running_config) + + # Check if there are extra keys in pre running config + pre_config_extra_keys = list( + pre_running_config_keys - cur_running_config_keys - EXCLUDE_CONFIG_TABLE_NAMES) + for key in pre_config_extra_keys: + pre_only_config[duthost.hostname].update({key: pre_running_config[key]}) + + # Check if there are extra keys in cur running config + cur_config_extra_keys = list( + cur_running_config_keys - pre_running_config_keys - EXCLUDE_CONFIG_TABLE_NAMES) + for key in cur_config_extra_keys: + cur_only_config[duthost.hostname].update({key: cur_running_config[key]}) + + # Get common keys in pre running config and cur running config + common_config_keys = list(pre_running_config_keys & cur_running_config_keys - EXCLUDE_CONFIG_TABLE_NAMES) + + # Check if the running config is modified after module running + for key in common_config_keys: + # TODO: remove these code when solve the problem of "FLEX_COUNTER_DELAY_STATUS" + if key == "FLEX_COUNTER_TABLE": + for sub_key, sub_value in pre_running_config[key].items(): + try: + pre_value = pre_running_config[key][sub_key] + cur_value = cur_running_config[key][sub_key] + if pre_value["FLEX_COUNTER_STATUS"] != cur_value["FLEX_COUNTER_STATUS"]: + inconsistent_config[duthost.hostname][cfg_context].update( + { + key: { + "pre_value": pre_running_config[key], + "cur_value": cur_running_config[key] + } + } + ) + except KeyError: + inconsistent_config[duthost.hostname][cfg_context].update( { key: { - "pre_value": duts_data[duthost.hostname]["pre_running_config"][key], - "cur_value": duts_data[duthost.hostname]["cur_running_config"][key] + "pre_value": pre_running_config[key], + "cur_value": cur_running_config[key] } } ) - except KeyError: - inconsistent_config[duthost.hostname].update( - { - key: { - "pre_value": duts_data[duthost.hostname]["pre_running_config"][key], - "cur_value": duts_data[duthost.hostname]["cur_running_config"][key] - } - } - ) - elif duts_data[duthost.hostname]["pre_running_config"][key] != \ - duts_data[duthost.hostname]["cur_running_config"][key]: - inconsistent_config[duthost.hostname].update( - { - key: { - "pre_value": duts_data[duthost.hostname]["pre_running_config"][key], - "cur_value": duts_data[duthost.hostname]["cur_running_config"][key] + elif pre_running_config[key] != cur_running_config[key]: + inconsistent_config[duthost.hostname][cfg_context].update( + { + key: { + "pre_value": pre_running_config[key], + "cur_value": cur_running_config[key] } - } - ) - - if pre_only_config[duthost.hostname] or \ - cur_only_config[duthost.hostname] or \ - inconsistent_config[duthost.hostname]: - config_db_check_pass = False + } + ) + if pre_only_config[duthost.hostname][cfg_context] or \ + cur_only_config[duthost.hostname][cfg_context] or \ + inconsistent_config[duthost.hostname][cfg_context]: + config_db_check_pass = False if not (core_dump_check_pass and config_db_check_pass): check_result = { "core_dump_check": { From 9d2e1f1299b3440bd390cbf3b993a2ddac51a7aa Mon Sep 17 00:00:00 2001 From: sanmalho Date: Thu, 27 Oct 2022 17:03:25 -0400 Subject: [PATCH 2/6] Include safe_reload and check_ifs_are_up as part of config_reload This will make sure that critical services and ports are up when proceeding to the next suite. The default wait time of 120 for pizza box and 240 for modular chassis is sometimes not sufficient - especially with all 400G ports having SFPs --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 6f63282497a..46ee6f34c6a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1754,7 +1754,7 @@ def __dut_reload(duts_data, node=None, results=None): outfile.write(json.dumps(duts_data[node.hostname]['pre_running_config'][asic_ns], indent=4)) node.copy(src=asic_cfg_file, dest='/etc/sonic/config_db{}.json'.format(asic_index), verbose=False) - config_reload(node) + config_reload(node, safe_reload=True, check_intf_up_ports=True) @pytest.fixture(scope="module", autouse=True) From 9c679e20ec794b97489747f791dfbfdf70eaa13b Mon Sep 17 00:00:00 2001 From: sanmalho Date: Tue, 8 Nov 2022 16:19:56 -0500 Subject: [PATCH 3/6] Changes to support golden_running_config as part of core_dump_and_config_check --- ansible/config_sonic_basedon_testbed.yml | 7 +++++++ tests/conftest.py | 2 +- tests/test_pretest.py | 5 +++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/ansible/config_sonic_basedon_testbed.yml b/ansible/config_sonic_basedon_testbed.yml index 789cad4508e..b62466bf636 100644 --- a/ansible/config_sonic_basedon_testbed.yml +++ b/ansible/config_sonic_basedon_testbed.yml @@ -597,6 +597,13 @@ file: path=/etc/sonic/running_golden_config.json state=absent + - name: remove running golden config files for all the asics if exists + become: True + file: path=/etc/sonic/running_golden_config{{ item }}.json + state=absent + with_sequence: start=0 end={{ num_asics - 1 }} + when: num_asics > 1 + - name: cleanup all cached facts shell: python ../tests/common/cache/facts_cache.py delegate_to: localhost diff --git a/tests/conftest.py b/tests/conftest.py index 46ee6f34c6a..6f63282497a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1754,7 +1754,7 @@ def __dut_reload(duts_data, node=None, results=None): outfile.write(json.dumps(duts_data[node.hostname]['pre_running_config'][asic_ns], indent=4)) node.copy(src=asic_cfg_file, dest='/etc/sonic/config_db{}.json'.format(asic_index), verbose=False) - config_reload(node, safe_reload=True, check_intf_up_ports=True) + config_reload(node) @pytest.fixture(scope="module", autouse=True) diff --git a/tests/test_pretest.py b/tests/test_pretest.py index 79dd827ec12..8bfcb3678fe 100644 --- a/tests/test_pretest.py +++ b/tests/test_pretest.py @@ -294,3 +294,8 @@ def test_generate_running_golden_config(duthosts): """ for duthost in duthosts: duthost.shell("sonic-cfggen -d --print-data > /etc/sonic/running_golden_config.json") + if duthost.is_multi_asic: + for asic_index in range(0, duthost.facts.get('num_asic')): + asic_ns = 'asic{}'.format(asic_index) + duthost.shell("sonic-cfggen -n {} -d --print-data > /etc/sonic/running_golden_config{}.json". + format(asic_ns, asic_index)) \ No newline at end of file From 4704b61ed2feda5cf5ef2ca687c7bebe9253fad4 Mon Sep 17 00:00:00 2001 From: sanmalho Date: Thu, 17 Nov 2022 15:48:51 -0500 Subject: [PATCH 4/6] Review comments on PR#6257 - core_dump_and_config_check fixture --- tests/conftest.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 6f63282497a..b2c5479d44a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1743,16 +1743,17 @@ def __dut_reload(duts_data, node=None, results=None): logger.error('Missing kwarg "node" or "results"') return logger.info("dut reload called on {}".format(node.hostname)) - node.copy(content=json.dumps(duts_data[node.hostname]["pre_running_config"]["host"], indent=4), + node.copy(content=json.dumps(duts_data[node.hostname]["pre_running_config"][None], indent=4), dest='/etc/sonic/config_db.json', verbose=False) if node.is_multi_asic: for asic_index in range(0, node.facts.get('num_asic')): - asic_ns = 'asic{}'.format(asic_index) + asic_ns = "asic{}".format(asic_index) asic_cfg_file = "/tmp/{}_config_db{}.json".format(node.hostname, asic_index) with open(asic_cfg_file, "w") as outfile: outfile.write(json.dumps(duts_data[node.hostname]['pre_running_config'][asic_ns], indent=4)) node.copy(src=asic_cfg_file, dest='/etc/sonic/config_db{}.json'.format(asic_index), verbose=False) + os.remove(asic_cfg_file) config_reload(node) @@ -1798,12 +1799,12 @@ def core_dump_and_config_check(duthosts, request): if not duthost.stat(path="/etc/sonic/running_golden_config.json")['stat']['exists']: logger.info("Collecting running golden config before test on {}".format(duthost.hostname)) duthost.shell("sonic-cfggen -d --print-data > /etc/sonic/running_golden_config.json") - duts_data[duthost.hostname]["pre_running_config"]["host"] = \ + duts_data[duthost.hostname]["pre_running_config"][None] = \ json.loads(duthost.shell("cat /etc/sonic/running_golden_config.json", verbose=False)['stdout']) if duthost.is_multi_asic: for asic_index in range(0, duthost.facts.get('num_asic')): - asic_ns = 'asic{}'.format(asic_index) + asic_ns = "asic{}".format(asic_index) if not duthost.stat( path="/etc/sonic/running_golden_config{}.json".format(asic_index))['stat']['exists']: duthost.shell("sonic-cfggen -n {} -d --print-data > /etc/sonic/running_golden_config{}.json". @@ -1841,7 +1842,7 @@ def core_dump_and_config_check(duthosts, request): json.loads(duthost.shell("sonic-cfggen -d --print-data", verbose=False)['stdout']) if duthost.is_multi_asic: for asic_index in range(0, duthost.facts.get('num_asic')): - asic_ns = 'asic{}'.format(asic_index) + asic_ns = "asic{}".format(asic_index) duts_data[duthost.hostname]["cur_running_config"][asic_ns] = \ json.loads(duthost.shell("sonic-cfggen -n {} -d --print-data".format(asic_ns), verbose=False)['stdout']) From cdcb0aa6457f800e04ec11fc8e6071ca5c191257 Mon Sep 17 00:00:00 2001 From: sanmalho Date: Fri, 18 Nov 2022 16:18:13 -0500 Subject: [PATCH 5/6] Fix for failing test cases - Forgot to replace 'host' with None in one of the spots --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index b2c5479d44a..7e0049f7e93 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1838,7 +1838,7 @@ def core_dump_and_config_check(duthosts, request): logger.info("Collecting running config after test on {}".format(duthost.hostname)) # get running config after running duts_data[duthost.hostname]["cur_running_config"] = {} - duts_data[duthost.hostname]["cur_running_config"]['host'] = \ + duts_data[duthost.hostname]["cur_running_config"][None] = \ json.loads(duthost.shell("sonic-cfggen -d --print-data", verbose=False)['stdout']) if duthost.is_multi_asic: for asic_index in range(0, duthost.facts.get('num_asic')): From 9eb2766bdd77749a6b8f4e840fc446db5e88602c Mon Sep 17 00:00:00 2001 From: sanmalho Date: Mon, 21 Nov 2022 10:56:52 -0500 Subject: [PATCH 6/6] Fixing flake8 issues --- tests/conftest.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 7e0049f7e93..3db0c36f3c9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1893,10 +1893,11 @@ def _remove_entry(table_name, key_name, config): cur_config_extra_keys = list( cur_running_config_keys - pre_running_config_keys - EXCLUDE_CONFIG_TABLE_NAMES) for key in cur_config_extra_keys: - cur_only_config[duthost.hostname].update({key: cur_running_config[key]}) + cur_only_config[duthost.hostname].update({key: cur_running_config[key]}) # Get common keys in pre running config and cur running config - common_config_keys = list(pre_running_config_keys & cur_running_config_keys - EXCLUDE_CONFIG_TABLE_NAMES) + common_config_keys = list(pre_running_config_keys & cur_running_config_keys - + EXCLUDE_CONFIG_TABLE_NAMES) # Check if the running config is modified after module running for key in common_config_keys: