From 4f16eff0bed39706e00ba7fd0d389bcd580a02ff Mon Sep 17 00:00:00 2001 From: Storm Liang Date: Thu, 12 Mar 2026 14:51:07 +1100 Subject: [PATCH 1/2] Fix test_add_rack flakiness by waiting for BGP convergence before DB comparison The test_add_rack test was failing ~1% of runs with 'DB compare failed after adding T0 via generic patch updater'. Root cause: DB comparison ran before BGP sessions established, causing app-db route entry mismatches. Changes: - Add is_bgp_session_established() helper that returns bool for use with wait_until retry mechanism - Move BGP session convergence check BEFORE DB comparison in generic_patch_add_t0(), so app-db routes are populated before comparing against baseline - BGP check now retries with wait_until instead of bare assert Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: Storm Liang --- tests/common/configlet/utils.py | 13 +++++++++++++ tests/configlet/util/generic_patch.py | 16 +++++++++++----- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/tests/common/configlet/utils.py b/tests/common/configlet/utils.py index b057c335256..99ec6d53c3b 100644 --- a/tests/common/configlet/utils.py +++ b/tests/common/configlet/utils.py @@ -393,6 +393,19 @@ def db_comp(duthost, test_db_dir, ref_db_dir, ctx): return True +def is_bgp_session_established(duthost, ip): + """Check if BGP session is established. Returns True/False for use with wait_until.""" + if sys.version_info[0] > 2: + info = duthost.get_bgp_neighbor_info(ip) + else: + info = duthost.get_bgp_neighbor_info(ip.decode('utf-8')) + bgp_state = info.get("bgpState", "") + if bgp_state != "Established": + log_info("BGP session for {} is '{}', waiting for 'Established'".format(ip, bgp_state)) + return False + return True + + def chk_bgp_session(duthost, ip, msg): if sys.version_info[0] > 2: info = duthost.get_bgp_neighbor_info(ip) diff --git a/tests/configlet/util/generic_patch.py b/tests/configlet/util/generic_patch.py index 1ea8279aeb1..a92ac9cef9b 100644 --- a/tests/configlet/util/generic_patch.py +++ b/tests/configlet/util/generic_patch.py @@ -8,7 +8,7 @@ from tests.common.configlet.utils import orig_db_dir, no_t0_db_dir, patch_add_t0_dir, patch_rm_t0_dir, tor_data,\ RELOAD_WAIT_TIME, PAUSE_INTF_DOWN, PAUSE_INTF_UP, PAUSE_CLET_APPLY, DB_COMP_WAIT_TIME,\ - do_pause, db_comp, chk_bgp_session + do_pause, db_comp, chk_bgp_session, is_bgp_session_established if os.path.exists("/etc/sonic/sonic-environment"): from mock_for_switch import config_reload, wait_until @@ -169,14 +169,20 @@ def generic_patch_add_t0(duthost, skip_load=False, hack_apply=False): duthost.shell("config interface startup {}".format(tor_ifname)) do_pause(PAUSE_INTF_UP, "pause upon i/f {} startup after add patch".format(tor_ifname)) + # Wait for BGP sessions to establish BEFORE DB comparison. + # After adding T0 config, BGP needs to converge and populate app-db route entries. + # Comparing DBs before BGP convergence causes spurious mismatches in app-db. + assert wait_until(DB_COMP_WAIT_TIME, 20, 0, is_bgp_session_established, + duthost, tor_data["ip"]["remote"]), \ + "BGP IPv4 session for {} not established before DB comparison".format(tor_data["ip"]["remote"]) + assert wait_until(DB_COMP_WAIT_TIME, 20, 0, is_bgp_session_established, + duthost, tor_data["ipv6"]["remote"].lower()), \ + "BGP IPv6 session for {} not established before DB comparison".format(tor_data["ipv6"]["remote"].lower()) + assert wait_until(DB_COMP_WAIT_TIME, 20, 0, db_comp, duthost, patch_add_t0_dir, orig_db_dir, "generic_patch_add_t0"), \ "DB compare failed after adding T0 via generic patch updater" - # Ensure BGP session is up - chk_bgp_session(duthost, tor_data["ip"]["remote"], "post-patch-add test") - chk_bgp_session(duthost, tor_data["ipv6"]["remote"].lower(), "post-patch-add test") - def generic_patch_rm_t0(duthost, skip_load=False, hack_apply=False): # Load config with T0 From 6d7717c08ff1a17e7a049e567bd2ee401781af86 Mon Sep 17 00:00:00 2001 From: Storm Liang Date: Fri, 13 Mar 2026 00:58:02 +1100 Subject: [PATCH 2/2] Remove unused chk_bgp_session import to fix flake8 F401 Signed-off-by: Storm Liang --- tests/configlet/util/generic_patch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/configlet/util/generic_patch.py b/tests/configlet/util/generic_patch.py index a92ac9cef9b..00842b188e7 100644 --- a/tests/configlet/util/generic_patch.py +++ b/tests/configlet/util/generic_patch.py @@ -8,7 +8,7 @@ from tests.common.configlet.utils import orig_db_dir, no_t0_db_dir, patch_add_t0_dir, patch_rm_t0_dir, tor_data,\ RELOAD_WAIT_TIME, PAUSE_INTF_DOWN, PAUSE_INTF_UP, PAUSE_CLET_APPLY, DB_COMP_WAIT_TIME,\ - do_pause, db_comp, chk_bgp_session, is_bgp_session_established + do_pause, db_comp, is_bgp_session_established if os.path.exists("/etc/sonic/sonic-environment"): from mock_for_switch import config_reload, wait_until