diff --git a/bgpd/bgp_evpn.c b/bgpd/bgp_evpn.c index 65a0c97b2bec..ae4e41d77e49 100644 --- a/bgpd/bgp_evpn.c +++ b/bgpd/bgp_evpn.c @@ -17,6 +17,7 @@ #include "zclient.h" #include "lib/printfrr.h" +#include "libfrr.h" #include "bgpd/bgp_attr_evpn.h" #include "bgpd/bgpd.h" @@ -42,6 +43,7 @@ #include "bgpd/bgp_trace.h" #include "bgpd/bgp_mpath.h" #include "bgpd/bgp_packet.h" +#include "bitfield.h" /* * Definitions and external declarations. @@ -55,6 +57,7 @@ DEFINE_MTYPE_STATIC(BGPD, VRF_ROUTE_TARGET, "L3 Route Target"); /* * Static function declarations */ +static void bgp_evpn_write_vni_rd_to_statefile(uint32_t vni, uint16_t rd_id); static void bgp_evpn_remote_ip_hash_init(struct bgpevpn *evpn); static void bgp_evpn_remote_ip_hash_destroy(struct bgpevpn *evpn); static void bgp_evpn_remote_ip_hash_add(struct bgpevpn *vpn, @@ -103,6 +106,87 @@ static const char *vxlan_flood_control_str(enum vxlan_flood_control flood_ctrl) } } + +/* Ensure vpn->rd_id is restored from cache or allocated and persisted */ +static void bgp_evpn_assign_rd_id_for_vni(struct bgp *bgp, struct bgpevpn *vpn) +{ + struct vni_rd_state_entry key = { .vni = vpn->vni }; + struct vni_rd_state_entry *found; + struct vni_rd_state_entry *entry; + + found = vni_rd_state_hash_find(&bm->vni_rd_state, &key); + if (found) { + vpn->rd_id = found->rd_id; + found->used = true; + if (!bf_test_index(bm->rd_idspace, vpn->rd_id)) + bf_set_bit(bm->rd_idspace, vpn->rd_id); + return; + } + + bf_assign_index(bm->rd_idspace, vpn->rd_id); + bgp_evpn_write_vni_rd_to_statefile(vpn->vni, vpn->rd_id); + + entry = XCALLOC(MTYPE_BGP_RD_STATE, sizeof(*entry)); + entry->vni = vpn->vni; + entry->rd_id = vpn->rd_id; + entry->used = true; + vni_rd_state_hash_add(&bm->vni_rd_state, entry); +} + +/* + * Append a single line with L2 VNI and RD-ID info to a file in frr_libstatedir. + * Line format: " \n". + */ +static void bgp_evpn_write_vni_rd_to_statefile(uint32_t vni, uint16_t rd_id) +{ + char path[512]; + FILE *fp; + + snprintf(path, sizeof(path), "%s/%s", frr_libstatedir, BGP_EVPN_VNI_RD_STATEFILE); + + fp = fopen(path, "a"); + if (!fp) { + zlog_err("EVPN: failed to open %s for append: %s", path, safe_strerror(errno)); + return; + } + + fprintf(fp, "%u %hu\n", vni, rd_id); + fclose(fp); +} + +/* + * Rewrite VNI RD state file from in-memory hash. + */ +void bgp_evpn_rewrite_vni_rd_statefile(void) +{ + char path[512]; + char tmp[512]; + FILE *fp; + struct vni_rd_state_entry *entry; + + snprintf(path, sizeof(path), "%s/%s", frr_libstatedir, BGP_EVPN_VNI_RD_STATEFILE); + snprintf(tmp, sizeof(tmp), "%s/%s.tmp", frr_libstatedir, BGP_EVPN_VNI_RD_STATEFILE); + + fp = fopen(tmp, "w"); + if (!fp) { + zlog_err("EVPN: failed to open %s for writing: %s", tmp, safe_strerror(errno)); + return; + } + + frr_each (vni_rd_state_hash, &bm->vni_rd_state, entry) + fprintf(fp, "%u %hu\n", entry->vni, entry->rd_id); + + if (fclose(fp) != 0) { + remove(tmp); + return; + } + + if (rename(tmp, path) != 0) { + zlog_err("EVPN: failed to replace %s: %s", path, safe_strerror(errno)); + remove(tmp); + } +} + /* * Private functions. */ @@ -6721,7 +6805,8 @@ struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni, vpn->export_rtl->cmp = (int (*)(void *, void *))bgp_evpn_route_target_cmp; vpn->export_rtl->del = bgp_evpn_xxport_delete_ecomm; - bf_assign_index(bm->rd_idspace, vpn->rd_id); + /* Restore or allocate RD-ID for this VNI */ + bgp_evpn_assign_rd_id_for_vni(bgp, vpn); derive_rd_rt_for_vni(bgp, vpn); /* Initialize EVPN route tables. */ @@ -6751,6 +6836,9 @@ struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni, */ void bgp_evpn_free(struct bgp *bgp, struct bgpevpn *vpn) { + struct vni_rd_state_entry vni_key = { 0 }; + struct vni_rd_state_entry *found; + bgp_evpn_remote_ip_hash_destroy(vpn); bgp_evpn_vni_es_cleanup(vpn); bgpevpn_unlink_from_l3vni(vpn); @@ -6759,6 +6847,20 @@ void bgp_evpn_free(struct bgp *bgp, struct bgpevpn *vpn) bgp_evpn_unmap_vni_from_its_rts(bgp, vpn); list_delete(&vpn->import_rtl); list_delete(&vpn->export_rtl); + /* + * Remove this VNI's entry from cache; rewrite file only on normal + * deletion. TODO: batch the rewrite when multiple VNIs are deleted + * in one pass (e.g. no advertise-all-vni) to avoid O(n) full + * file rewrites. + */ + vni_key.vni = vpn->vni; + found = vni_rd_state_hash_find(&bm->vni_rd_state, &vni_key); + if (found) { + vni_rd_state_hash_del(&bm->vni_rd_state, found); + XFREE(MTYPE_BGP_RD_STATE, found); + } + if (!bm->terminating) + bgp_evpn_rewrite_vni_rd_statefile(); bf_release_index(bm->rd_idspace, vpn->rd_id); hash_release(bgp->vni_svi_hash, vpn); hash_release(bgp->vnihash, vpn); diff --git a/bgpd/bgp_evpn.h b/bgpd/bgp_evpn.h index 38ec840b7ffd..097551c0d3f6 100644 --- a/bgpd/bgp_evpn.h +++ b/bgpd/bgp_evpn.h @@ -115,6 +115,7 @@ static inline bool evpn_resolve_overlay_index(void) return bgp ? bgp->resolve_overlay_index : false; } +extern void bgp_evpn_rewrite_vni_rd_statefile(void); extern void bgp_evpn_advertise_type5_route(struct bgp *bgp_vrf, struct bgp_path_info *originator, const struct prefix *p, struct attr *src_attr, afi_t afi, safi_t safi, uint32_t addpath_id); diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c index ff8493e776a6..242ae9f72a11 100644 --- a/bgpd/bgp_main.c +++ b/bgpd/bgp_main.c @@ -242,6 +242,22 @@ static FRR_NORETURN void bgp_exit(int status) bgp_zebra_destroy(); bf_free(bm->rd_idspace); + { + struct vrf_rd_state_entry *ve; + + while ((ve = vrf_rd_state_hash_pop(&bm->vrf_rd_state))) { + XFREE(MTYPE_BGP_NAME, ve->name); + XFREE(MTYPE_BGP_RD_STATE, ve); + } + vrf_rd_state_hash_fini(&bm->vrf_rd_state); + } + { + struct vni_rd_state_entry *ne; + + while ((ne = vni_rd_state_hash_pop(&bm->vni_rd_state))) + XFREE(MTYPE_BGP_RD_STATE, ne); + vni_rd_state_hash_fini(&bm->vni_rd_state); + } list_delete(&bm->bgp); list_delete(&bm->addresses); diff --git a/bgpd/bgp_memory.c b/bgpd/bgp_memory.c index 30e0a72dc30a..6a8df0858030 100644 --- a/bgpd/bgp_memory.c +++ b/bgpd/bgp_memory.c @@ -15,6 +15,7 @@ DEFINE_MGROUP(BGPD, "bgpd"); DEFINE_MTYPE(BGPD, BGP, "BGP instance"); +DEFINE_MTYPE(BGPD, BGP_RD_STATE, "BGP RD state entry"); DEFINE_MTYPE(BGPD, BGP_NAME, "BGP Name data"); DEFINE_MTYPE(BGPD, BGP_LISTENER, "BGP listen socket details"); DEFINE_MTYPE(BGPD, BGP_PEER, "BGP peer"); diff --git a/bgpd/bgp_memory.h b/bgpd/bgp_memory.h index 1097817c7f3e..f493f5b22144 100644 --- a/bgpd/bgp_memory.h +++ b/bgpd/bgp_memory.h @@ -11,6 +11,7 @@ DECLARE_MGROUP(BGPD); DECLARE_MTYPE(BGP); +DECLARE_MTYPE(BGP_RD_STATE); DECLARE_MTYPE(BGP_NAME); DECLARE_MTYPE(BGP_LISTENER); DECLARE_MTYPE(BGP_PEER); diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index aeee1a13bcd4..cc64269a10b8 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -18,6 +18,7 @@ #include "filter.h" #include "routemap.h" #include "log.h" +#include "libfrr.h" #include "plist.h" #include "linklist.h" #include "workqueue.h" @@ -31,6 +32,7 @@ #include "lib/sockopt.h" #include "frr_pthread.h" #include "bitfield.h" +#include #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" @@ -83,6 +85,177 @@ #include "bgpd/bgp_ls.h" #include "bgpd/bgp_ls_ted.h" +/* Forward declaration (used before full definition) */ +static void bgp_write_vrf_rd_to_statefile(const char *vrf_name, uint16_t vrf_rd_id); + +/* Assign or restore a unique rd id for auto derivation of vrf's RD */ +static void bgp_assign_or_restore_vrf_rd_id(struct bgp *bgp, const char *vrf_name) +{ + struct vrf_rd_state_entry key = { .name = (char *)vrf_name }; + struct vrf_rd_state_entry *found; + struct vrf_rd_state_entry *entry; + + found = vrf_rd_state_hash_find(&bm->vrf_rd_state, &key); + if (found) { + bgp->vrf_rd_id = found->rd_id; + found->used = true; + if (!bf_test_index(bm->rd_idspace, bgp->vrf_rd_id)) + bf_set_bit(bm->rd_idspace, bgp->vrf_rd_id); + return; + } + + bf_assign_index(bm->rd_idspace, bgp->vrf_rd_id); + bgp_write_vrf_rd_to_statefile(vrf_name, bgp->vrf_rd_id); + + entry = XCALLOC(MTYPE_BGP_RD_STATE, sizeof(struct vrf_rd_state_entry)); + entry->name = XSTRDUP(MTYPE_BGP_NAME, vrf_name); + entry->rd_id = bgp->vrf_rd_id; + entry->used = true; + vrf_rd_state_hash_add(&bm->vrf_rd_state, entry); +} + +static void bgp_load_vrf_rd_statefile(void) +{ + char path[512]; + char line[512]; + FILE *fp; + + if (!bm) + return; + + vrf_rd_state_hash_init(&bm->vrf_rd_state); + + snprintf(path, sizeof(path), "%s/%s", frr_libstatedir, BGP_VRF_RD_STATEFILE); + fp = fopen(path, "r"); + if (!fp) + return; + + while (fgets(line, sizeof(line), fp)) { + char namebuf[256] = { 0 }; + unsigned int rd = 0; + struct vrf_rd_state_entry *entry, *old; + + if (sscanf(line, "%255s %u", namebuf, &rd) != 2) { + zlog_warn("BGP: malformed line in %s: %s", + BGP_VRF_RD_STATEFILE, line); + continue; + } + if (rd > UINT16_MAX) { + zlog_warn("BGP: ignoring invalid rd_id %u for VRF %s in %s", + rd, namebuf, BGP_VRF_RD_STATEFILE); + continue; + } + + entry = XCALLOC(MTYPE_BGP_RD_STATE, sizeof(*entry)); + entry->name = XSTRDUP(MTYPE_BGP_NAME, namebuf); + entry->rd_id = (uint16_t)rd; + old = vrf_rd_state_hash_find(&bm->vrf_rd_state, entry); + if (old) { + XFREE(MTYPE_BGP_NAME, entry->name); + XFREE(MTYPE_BGP_RD_STATE, entry); + } else { + vrf_rd_state_hash_add(&bm->vrf_rd_state, entry); + bf_set_bit(bm->rd_idspace, entry->rd_id); + } + } + fclose(fp); +} + +static void bgp_load_vni_rd_statefile(void) +{ + char path[512]; + char line[256]; + FILE *fp; + + if (!bm) + return; + + vni_rd_state_hash_init(&bm->vni_rd_state); + + snprintf(path, sizeof(path), "%s/%s", frr_libstatedir, BGP_EVPN_VNI_RD_STATEFILE); + fp = fopen(path, "r"); + if (!fp) + return; + + while (fgets(line, sizeof(line), fp)) { + unsigned int vni = 0; + unsigned int rd = 0; + struct vni_rd_state_entry *entry, *old; + + if (sscanf(line, "%u %u", &vni, &rd) != 2) { + zlog_warn("BGP: malformed line in %s: %s", + BGP_EVPN_VNI_RD_STATEFILE, line); + continue; + } + if (rd > UINT16_MAX) { + zlog_warn("BGP: ignoring invalid rd_id %u for VNI %u in %s", + rd, vni, BGP_EVPN_VNI_RD_STATEFILE); + continue; + } + + entry = XCALLOC(MTYPE_BGP_RD_STATE, sizeof(*entry)); + entry->vni = vni; + entry->rd_id = (uint16_t)rd; + old = vni_rd_state_hash_find(&bm->vni_rd_state, entry); + if (old) { + XFREE(MTYPE_BGP_RD_STATE, entry); + } else { + vni_rd_state_hash_add(&bm->vni_rd_state, entry); + bf_set_bit(bm->rd_idspace, entry->rd_id); + } + } + fclose(fp); +} + +static void bgp_write_vrf_rd_to_statefile(const char *vrf_name, uint16_t vrf_rd_id) +{ + char path[512]; + FILE *fp; + + /* Ensure state dir exists (best-effort) */ + (void)mkdir(frr_libstatedir, 0755); + + snprintf(path, sizeof(path), "%s/%s", frr_libstatedir, BGP_VRF_RD_STATEFILE); + + fp = fopen(path, "a"); + if (!fp) { + /* Non-fatal: skip persisting if path is unavailable */ + return; + } + + fprintf(fp, "%s %hu\n", vrf_name, vrf_rd_id); + fclose(fp); +} + +static void bgp_rewrite_vrf_rd_statefile(void) +{ + char path[512]; + char tmp[512]; + FILE *fp; + struct vrf_rd_state_entry *entry; + + snprintf(path, sizeof(path), "%s/%s", frr_libstatedir, BGP_VRF_RD_STATEFILE); + snprintf(tmp, sizeof(tmp), "%s/%s.tmp", frr_libstatedir, BGP_VRF_RD_STATEFILE); + + fp = fopen(tmp, "w"); + if (!fp) + return; + + frr_each (vrf_rd_state_hash, &bm->vrf_rd_state, entry) + fprintf(fp, "%s %hu\n", entry->name, entry->rd_id); + + if (fclose(fp) != 0) { + remove(tmp); + return; + } + + if (rename(tmp, path) != 0) { + zlog_err("BGP: failed to replace %s: %s", path, safe_strerror(errno)); + remove(tmp); + } +} + + DEFINE_MTYPE_STATIC(BGPD, PEER_TX_SHUTDOWN_MSG, "Peer shutdown message (TX)"); DEFINE_QOBJ_TYPE(bgp_master); DEFINE_QOBJ_TYPE(bgp); @@ -3882,8 +4055,11 @@ static struct bgp *bgp_create(as_t *as, const char *name, update_bgp_group_init(bgp); if (!hidden) { - /* assign a unique rd id for auto derivation of vrf's RD */ - bf_assign_index(bm->rd_idspace, bgp->vrf_rd_id); + /* assign or restore a unique rd id for auto derivation of vrf's RD */ + const char *vrf_name = (inst_type == BGP_INSTANCE_TYPE_DEFAULT) + ? "default" + : (name ? name : "?"); + bgp_assign_or_restore_vrf_rd_id(bgp, vrf_name); bgp_evpn_init(bgp); bgp_evpn_vrf_es_init(bgp); @@ -4639,6 +4815,24 @@ void bgp_free(struct bgp *bgp) struct bgp_table *table; struct bgp_dest *dest; struct bgp_rmap *rmap; + const char *vrf_name; + struct vrf_rd_state_entry *found; + struct vrf_rd_state_entry key; + + vrf_name = (bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT) + ? "default" + : (bgp->name ? bgp->name : "?"); + + memset(&key, 0, sizeof(key)); + key.name = (char *)vrf_name; + found = vrf_rd_state_hash_find(&bm->vrf_rd_state, &key); + if (found) { + vrf_rd_state_hash_del(&bm->vrf_rd_state, found); + XFREE(MTYPE_BGP_NAME, found->name); + XFREE(MTYPE_BGP_RD_STATE, found); + } + if (!bm->terminating) + bgp_rewrite_vrf_rd_statefile(); QOBJ_UNREG(bgp); @@ -4679,7 +4873,6 @@ void bgp_free(struct bgp *bgp) bgp_scan_finish(bgp); bgp_address_destroy(bgp); bgp_tip_hash_destroy(bgp); - /* release the auto RD id */ bf_release_index(bm->rd_idspace, bgp->vrf_rd_id); @@ -9088,6 +9281,11 @@ void bgp_master_init(struct event_loop *master, const int buffer_size, bgp_nhg_init(); bgp_evpn_mh_init(); QOBJ_REG(bm, bgp_master); + + /* Load existing VRF->RD-ID state from file before reading configs */ + bgp_load_vrf_rd_statefile(); + /* Load existing L2 VNI->RD-ID state from file before reading configs */ + bgp_load_vni_rd_statefile(); } /* @@ -9287,9 +9485,53 @@ static int peer_unshut_after_cfg(struct bgp *bgp) return 0; } +/* + * After config is loaded, clean up orphan entries from VRF/VNI RD state hashes. + * Entries not marked 'used' were in the state file but no longer in config. + */ +static int bgp_rd_state_cleanup_after_config(struct bgp *bgp) +{ + static bool cleanup_done; + bool vrf_changed = false, vni_changed = false; + struct vrf_rd_state_entry *ve; + struct vni_rd_state_entry *ne; + + if (cleanup_done) + return 0; + cleanup_done = true; + + /* Clean orphan VRF entries */ + frr_each_safe (vrf_rd_state_hash, &bm->vrf_rd_state, ve) { + if (!ve->used) { + bf_release_index(bm->rd_idspace, ve->rd_id); + vrf_rd_state_hash_del(&bm->vrf_rd_state, ve); + XFREE(MTYPE_BGP_NAME, ve->name); + XFREE(MTYPE_BGP_RD_STATE, ve); + vrf_changed = true; + } + } + if (vrf_changed) + bgp_rewrite_vrf_rd_statefile(); + + /* Clean orphan VNI entries */ + frr_each_safe (vni_rd_state_hash, &bm->vni_rd_state, ne) { + if (!ne->used) { + bf_release_index(bm->rd_idspace, ne->rd_id); + vni_rd_state_hash_del(&bm->vni_rd_state, ne); + XFREE(MTYPE_BGP_RD_STATE, ne); + vni_changed = true; + } + } + if (vni_changed) + bgp_evpn_rewrite_vni_rd_statefile(); + + return 0; +} + void bgp_init(unsigned short instance) { hook_register(bgp_config_end, peer_unshut_after_cfg); + hook_register(bgp_config_end, bgp_rd_state_cleanup_after_config); /* allocates some vital data structures used by peer commands in * vty_init */ diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 38641003e64a..a9bb649f2714 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -17,9 +17,13 @@ #include "srv6.h" #include "iana_afi.h" #include "asn.h" +#include "typesafe.h" +#include "jhash.h" PREDECL_LIST(zebra_announce); PREDECL_LIST(zebra_l2_vni); +PREDECL_HASH(vrf_rd_state_hash); +PREDECL_HASH(vni_rd_state_hash); enum bgp_bp_install_type { BGP_BP_INSTALL_ROUTE, @@ -145,6 +149,12 @@ struct bgp_master { /* The Mac table */ struct hash *self_mac_hash; + /* Cached VRF_NAME->RD-ID state loaded at startup from libstatedir */ + struct vrf_rd_state_hash_head vrf_rd_state; + + /* Cached VNI->RD-ID state loaded at startup from libstatedir */ + struct vni_rd_state_hash_head vni_rd_state; + /* BGP start time. */ time_t start_time; @@ -248,6 +258,52 @@ struct bgp_master { }; DECLARE_QOBJ_TYPE(bgp_master); +/* State file names (under frr_libstatedir) */ +#define BGP_VRF_RD_STATEFILE ".bgp_vrf_rd.txt" +#define BGP_EVPN_VNI_RD_STATEFILE ".bgp_vni_rd.txt" + +struct vrf_rd_state_entry { + char *name; + uint16_t rd_id; + bool used; + struct vrf_rd_state_hash_item item; +}; + +struct vni_rd_state_entry { + uint32_t vni; + uint16_t rd_id; + bool used; + struct vni_rd_state_hash_item item; +}; + +static inline uint32_t vrf_rd_state_hash_key(const struct vrf_rd_state_entry *e) +{ + return jhash(e->name, strlen(e->name), 0x5abc1234); +} + +static inline int vrf_rd_state_hash_cmp(const struct vrf_rd_state_entry *e1, + const struct vrf_rd_state_entry *e2) +{ + return strcmp(e1->name, e2->name); +} + +DECLARE_HASH(vrf_rd_state_hash, struct vrf_rd_state_entry, item, + vrf_rd_state_hash_cmp, vrf_rd_state_hash_key); + +static inline uint32_t vni_rd_state_hash_key(const struct vni_rd_state_entry *e) +{ + return jhash_1word(e->vni, 0); +} + +static inline int vni_rd_state_hash_cmp(const struct vni_rd_state_entry *e1, + const struct vni_rd_state_entry *e2) +{ + return (int)(e1->vni - e2->vni); +} + +DECLARE_HASH(vni_rd_state_hash, struct vni_rd_state_entry, item, + vni_rd_state_hash_cmp, vni_rd_state_hash_key); + /* BGP route-map structure. */ struct bgp_rmap { char *name; diff --git a/tests/topotests/bgp_evpn_gr/test_bgp_evpn_gr.py b/tests/topotests/bgp_evpn_gr/test_bgp_evpn_gr.py index b311a5bdb29a..6e7117bbe6ec 100644 --- a/tests/topotests/bgp_evpn_gr/test_bgp_evpn_gr.py +++ b/tests/topotests/bgp_evpn_gr/test_bgp_evpn_gr.py @@ -2,13 +2,13 @@ # SPDX-License-Identifier: ISC # # Reference topology from bgp_evpn_overlay_index_gateway/test_bgp_evpn_overlay_index_gateway.py: -# +# # +--------+ BGP +--------+ BGP +--------+ +--------+ # SN1 | | IPv4/v6 | | EVPN | | | | # =======+ Host1 +---------+ PE1 +------+ PE2 +------+ Host2 + # | | | | | | | | # +--------+ +--------+ +--------+ +--------+ -# +# # Host1 is connected to PE1 and Host2 is connected to PE2. # Host1 and PE1 have IPv4/v6 BGP sessions. # PE1 and PE2 have an EVPN session. @@ -38,6 +38,7 @@ start_router_daemons, ) + def build_topo(tgen): # Create PE and host routers for name in ["PE1", "PE2", "host1", "host2"]: @@ -47,6 +48,7 @@ def build_topo(tgen): tgen.add_link(tgen.gears["PE1"], tgen.gears["host1"], "PE1-eth1", "host1-eth0") tgen.add_link(tgen.gears["PE2"], tgen.gears["host2"], "PE2-eth1", "host2-eth0") + def setup_module(mod): tgen = Topogen(build_topo, mod.__name__) tgen.start_topology() @@ -68,7 +70,9 @@ def setup_module(mod): bridge_ipv6 = f"fd00:50:1::{suf}/48" pe.cmd_raises("ip link add vrf-blue type vrf table 10") pe.cmd_raises("ip link set dev vrf-blue up") - pe.cmd_raises(f"ip link add vxlan100 type vxlan id 100 dstport 4789 local {vtep_ip}") + pe.cmd_raises( + f"ip link add vxlan100 type vxlan id 100 dstport 4789 local {vtep_ip}" + ) pe.cmd_raises("ip link add name br100 type bridge stp_state 0") pe.cmd_raises("ip link set dev vxlan100 master br100") pe.cmd_raises(f"ip link set dev {name}-eth1 master br100") @@ -78,7 +82,9 @@ def setup_module(mod): pe.cmd_raises(f"ip link set up dev {name}-eth1") pe.cmd_raises("ip link set dev br100 master vrf-blue") pe.cmd_raises(f"ip -6 addr add {bridge_ipv6} dev br100") - pe.cmd_raises(f"ip link add vxlan1000 type vxlan id 1000 dstport 4789 local {vtep_ip}") + pe.cmd_raises( + f"ip link add vxlan1000 type vxlan id 1000 dstport 4789 local {vtep_ip}" + ) pe.cmd_raises("ip link add name br1000 type bridge stp_state 0") pe.cmd_raises("ip link set dev vxlan1000 master br1000") pe.cmd_raises("ip link set up dev br1000") @@ -111,7 +117,9 @@ def _evpn_peer_established(router: TopoRouter, neighbor_ip: str) -> bool: if neighbor_ip not in peers: logger.info(f"Neighbor {neighbor_ip} not found in BGP summary") return False - logger.info(f"Neighbor {neighbor_ip} found in BGP summary with state: {peers[neighbor_ip].get('state')}") + logger.info( + f"Neighbor {neighbor_ip} found in BGP summary with state: {peers[neighbor_ip].get('state')}" + ) return peers[neighbor_ip].get("state") == "Established" @@ -215,7 +223,9 @@ def _evpn_remote_type_paths_stale(router: TopoRouter, route_type: int) -> bool: return found_remote and found_stale -def _evpn_routes_with_stale_only_for_rd(router: TopoRouter, rd: str, route_type: int) -> bool: +def _evpn_routes_with_stale_only_for_rd( + router: TopoRouter, rd: str, route_type: int +) -> bool: """ Verify that all paths whose RD matches the input RD and route_type are marked as stale. Succeeds only if at least one matching path is found and all such paths are stale. @@ -247,13 +257,16 @@ def _evpn_routes_with_stale_only_for_rd(router: TopoRouter, rd: str, route_type: found_matching_path = True logger.info(f"Checking prefix: {prefix} path: {p}") if not bool(p.get("stale")): - logger.info(f"Checking prefix: {prefix} path: {p} is not stale, returning False") + logger.info( + f"Checking prefix: {prefix} path: {p} is not stale, returning False" + ) return False if not found_matching_path: logger.info(f"No matching path found for RD: {rd} and route type: {route_type}") return False return True + def _vrf_has_kernel_routes(router: TopoRouter, vrf_name: str, prefixes): if isinstance(prefixes, str): prefixes = [prefixes] @@ -314,7 +327,7 @@ def fetch_vni_rd_from_pe2(pe2: TopoRouter, vni: int): output = json.loads(output) if "rd" in output: logger.info(f"RD for VNI {vni} found: {output.get('rd')}") - return output.get("rd") + return output.get("rd") except Exception as e: logger.info(f"Failed to fetch RD from PE2 VNI {vni}: {e}") return None @@ -415,6 +428,202 @@ def _gr_r_bit_set(router: TopoRouter, neighbor_ip: str) -> bool: return False +def _parse_state_file(router, path): + """Read an RD state file and return a dict of name/vni -> rd_id.""" + output = router.cmd(f"cat {path} 2>/dev/null").strip() + result = {} + for line in output.splitlines(): + parts = line.split() + if len(parts) == 2: + result[parts[0]] = int(parts[1]) + return result + + +def _get_vni_rds(router, vnis): + """Fetch RD for each VNI via 'show bgp l2vpn evpn vni json'.""" + rds = {} + for vni in vnis: + output = router.vtysh_cmd(f"show bgp l2vpn evpn vni {vni} json") + try: + data = json.loads(output) + rd = data.get("rd") + if rd: + rds[vni] = rd + except Exception: + pass + return rds + + +def test_bgp_evpn_rd_state_files_created(): + """After startup, verify RD state files exist with correct entries.""" + tgen = get_topogen() + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + pe1 = tgen.gears["PE1"] + + logger.info("STEP 1: Wait for EVPN session to be established") + test_func = functools.partial(_evpn_peer_established, pe1, "10.0.1.2") + result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) + assert result, "PE1 EVPN session with PE2 not established" + + logger.info("STEP 2: Wait for EVPN VNI routes to be present") + test_func = functools.partial(_evpn_any_prefixes, pe1) + result, _ = topotest.run_and_expect(test_func, True, count=30, wait=1) + assert result, "No EVPN routes present on PE1" + + logger.info("STEP 3: Verify VRF RD state file exists and has entries") + vrf_state = _parse_state_file(pe1, "/var/lib/frr/.bgp_vrf_rd.txt") + assert len(vrf_state) > 0, "VRF RD state file is empty or missing" + assert "default" in vrf_state, "default VRF not found in VRF RD state file" + logger.info(f"VRF RD state file contents: {vrf_state}") + + logger.info("STEP 4: Verify VNI RD state file exists and has entries") + vni_state = _parse_state_file(pe1, "/var/lib/frr/.bgp_vni_rd.txt") + assert len(vni_state) > 0, "VNI RD state file is empty or missing" + logger.info(f"VNI RD state file contents: {vni_state}") + + assert "100" in vni_state, "L2VNI 100 not found in VNI RD state file" + + +def test_bgp_evpn_rd_persistence_across_restart(): + """Kill and restart bgpd on PE1; verify RD values remain the same.""" + tgen = get_topogen() + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + pe1 = tgen.gears["PE1"] + + logger.info("STEP 1: Wait for EVPN session and routes to be present") + test_func = functools.partial(_evpn_peer_established, pe1, "10.0.1.2") + result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) + assert result, "PE1 EVPN session with PE2 not established" + + test_func = functools.partial(_evpn_any_prefixes, pe1) + result, _ = topotest.run_and_expect(test_func, True, count=30, wait=1) + assert result, "No EVPN routes present on PE1" + + logger.info("STEP 2: Record RD values before restart") + rds_before = _get_vni_rds(pe1, [100]) + vrf_state_before = _parse_state_file(pe1, "/var/lib/frr/.bgp_vrf_rd.txt") + vni_state_before = _parse_state_file(pe1, "/var/lib/frr/.bgp_vni_rd.txt") + logger.info(f"RDs before restart: {rds_before}") + logger.info(f"VRF state before: {vrf_state_before}") + logger.info(f"VNI state before: {vni_state_before}") + assert len(rds_before) >= 1, "Expected RD for L2VNI 100 before restart" + + logger.info("STEP 3: Kill bgpd on PE1") + kill_router_daemons(tgen, "PE1", ["bgpd"]) + + logger.info("STEP 4: Restart bgpd on PE1") + source_config = os.path.join(CWD, "PE1/frr.conf") + start_router_daemons(tgen, "PE1", ["bgpd"]) + pe1.cmd(f"vtysh -f {source_config}") + + logger.info("STEP 5: Wait for EVPN session to re-establish") + test_func = functools.partial(_evpn_peer_established, pe1, "10.0.1.2") + result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) + assert result, "PE1 EVPN session with PE2 not re-established after restart" + + test_func = functools.partial(_evpn_any_prefixes, pe1) + result, _ = topotest.run_and_expect(test_func, True, count=30, wait=1) + assert result, "No EVPN routes on PE1 after restart" + + logger.info("STEP 6: Verify RD values are identical after restart") + rds_after = _get_vni_rds(pe1, [100]) + logger.info(f"RDs after restart: {rds_after}") + assert ( + rds_before == rds_after + ), f"RDs changed after restart! Before: {rds_before}, After: {rds_after}" + + logger.info("STEP 7: Verify state files are consistent after restart") + vrf_state_after = _parse_state_file(pe1, "/var/lib/frr/.bgp_vrf_rd.txt") + vni_state_after = _parse_state_file(pe1, "/var/lib/frr/.bgp_vni_rd.txt") + logger.info(f"VRF state after: {vrf_state_after}") + logger.info(f"VNI state after: {vni_state_after}") + assert ( + vrf_state_before == vrf_state_after + ), f"VRF state file changed! Before: {vrf_state_before}, After: {vrf_state_after}" + assert ( + vni_state_before == vni_state_after + ), f"VNI state file changed! Before: {vni_state_before}, After: {vni_state_after}" + + +def test_bgp_evpn_rd_orphan_cleanup(): + """Remove VRF from config, restart bgpd, verify orphan entry is cleaned.""" + tgen = get_topogen() + if tgen.routers_have_failure(): + pytest.skip(tgen.errors) + + pe1 = tgen.gears["PE1"] + + logger.info("STEP 1: Wait for EVPN session to be established") + test_func = functools.partial(_evpn_peer_established, pe1, "10.0.1.2") + result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) + assert result, "PE1 EVPN session with PE2 not established" + + logger.info("STEP 2: Record state files before VRF removal") + vrf_state_before = _parse_state_file(pe1, "/var/lib/frr/.bgp_vrf_rd.txt") + logger.info(f"VRF state before removal: {vrf_state_before}") + assert "default" in vrf_state_before, "default VRF missing before test" + + vrf_blue_present = "vrf-blue" in vrf_state_before + logger.info(f"vrf-blue in state file: {vrf_blue_present}") + + logger.info("STEP 3: Kill bgpd on PE1 (without saving config)") + kill_router_daemons(tgen, "PE1", ["bgpd"], save_config=False) + + logger.info("STEP 4: Remove kernel VRF so bgpd won't auto-discover it") + pe1.cmd("ip link del vrf-blue") + + logger.info("STEP 5: Write minimal config without VRF and restart bgpd") + minimal_conf = ( + "log file bgpd.log\n" + "!\n" + "router bgp 101\n" + " bgp router-id 10.100.0.1\n" + " no bgp ebgp-requires-policy\n" + " no bgp network import-check\n" + " neighbor 10.0.1.2 remote-as 102\n" + " !\n" + " address-family l2vpn evpn\n" + " neighbor 10.0.1.2 activate\n" + " advertise-all-vni\n" + " exit-address-family\n" + "!\n" + ) + minimal_conf_path = os.path.join(CWD, "PE1_minimal.conf") + with open(minimal_conf_path, "w") as f: + f.write(minimal_conf) + + start_router_daemons(tgen, "PE1", ["bgpd"]) + pe1.cmd(f"vtysh -f {minimal_conf_path}") + + logger.info("STEP 6: Wait for config to settle") + test_func = functools.partial(_evpn_peer_established, pe1, "10.0.1.2") + result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) + assert result, "PE1 EVPN session not established after minimal config" + + logger.info("STEP 7: Verify orphan VRF entry is cleaned from state file") + vrf_state_after = _parse_state_file(pe1, "/var/lib/frr/.bgp_vrf_rd.txt") + logger.info(f"VRF state after orphan cleanup: {vrf_state_after}") + + if vrf_blue_present: + assert ( + "vrf-blue" not in vrf_state_after + ), "vrf-blue still present in state file after being removed from config" + + logger.info("STEP 8: Restore kernel VRF and full config for subsequent tests") + kill_router_daemons(tgen, "PE1", ["bgpd"], save_config=False) + pe1.cmd("ip link add vrf-blue type vrf table 10") + pe1.cmd("ip link set dev vrf-blue up") + pe1.cmd("ip link set dev br100 master vrf-blue") + pe1.cmd("ip link set dev br1000 master vrf-blue") + start_router_daemons(tgen, "PE1", ["bgpd"]) + source_config = os.path.join(CWD, "PE1/frr.conf") + pe1.cmd(f"vtysh -f {source_config}") + + def test_bgp_evpn_gr_stale_and_recovery(): tgen = get_topogen() pe1 = tgen.gears["PE1"] @@ -449,13 +658,23 @@ def test_bgp_evpn_gr_stale_and_recovery(): assert result, "No remote EVPN type-5 routes seen on PE2" # Kernel VRF routes imported (type-5): verify PE2 has host1, PE1 has host2 - logger.info("STEP 5: Verify type-5 routes are installed into kernel VRF on both PEs") - test_func = functools.partial(_vrf_has_kernel_routes, pe2, "vrf-blue", ["172.31.0.21"]) + logger.info( + "STEP 5: Verify type-5 routes are installed into kernel VRF on both PEs" + ) + test_func = functools.partial( + _vrf_has_kernel_routes, pe2, "vrf-blue", ["172.31.0.21"] + ) result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) - assert result, "Type-5 prefix 172.31.0.21/32 not installed in PE2 kernel VRF vrf-blue" - test_func = functools.partial(_vrf_has_kernel_routes, pe1, "vrf-blue", ["172.31.0.22"]) + assert ( + result + ), "Type-5 prefix 172.31.0.21/32 not installed in PE2 kernel VRF vrf-blue" + test_func = functools.partial( + _vrf_has_kernel_routes, pe1, "vrf-blue", ["172.31.0.22"] + ) result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) - assert result, "Type-5 prefix 172.31.0.22/32 not installed in PE1 kernel VRF vrf-blue" + assert ( + result + ), "Type-5 prefix 172.31.0.22/32 not installed in PE1 kernel VRF vrf-blue" # Ensure type-2 routes exist on both PEs logger.info("STEP 6: Verify remote EVPN type-2 routes exist on both PEs") @@ -468,15 +687,21 @@ def test_bgp_evpn_gr_stale_and_recovery(): # Ensure type-2 are installed as extern_learn in FDB (remote MACs) logger.info("STEP 7: Verify remote MACs are extern_learn in FDB (type-2)") - test_func = functools.partial(_bridge_has_extern_learn, pe1, "vxlan100", "1a:2b:3c:4d:5e:62") + test_func = functools.partial( + _bridge_has_extern_learn, pe1, "vxlan100", "1a:2b:3c:4d:5e:62" + ) result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) assert result, "Remote MAC (host2) not extern_learn in PE1 FDB for vxlan100" - test_func = functools.partial(_bridge_has_extern_learn, pe2, "vxlan100", "1a:2b:3c:4d:5e:61") + test_func = functools.partial( + _bridge_has_extern_learn, pe2, "vxlan100", "1a:2b:3c:4d:5e:61" + ) result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) assert result, "Remote MAC (host1) not extern_learn in PE2 FDB for vxlan100" # Verify that remote MACs on PE1 are installed as extern_learn in kernel's ip_neigh table - logger.info("STEP 7b: Verify remote MACs are extern_learn in PE1 kernel's ip_neigh table") + logger.info( + "STEP 7b: Verify remote MACs are extern_learn in PE1 kernel's ip_neigh table" + ) # For PE1, check host2's MAC (remote side) test_func = functools.partial(_ip_neigh_has_extern_learn, pe1, "1a:2b:3c:4d:5e:62") @@ -500,43 +725,55 @@ def test_bgp_evpn_gr_stale_and_recovery(): # PE1 should retain only PE2-originated EVPN routes as stale during GR (type-2 and type-5), not local # Verify only type-5 routes from PE2's RD are stale logger.info("STEP 9: Check PE1 retains ONLY PE2-originated type-5 routes as stale") - test_func = functools.partial(_evpn_routes_with_stale_only_for_rd, pe1, rd=pe2_rd_vni_1000, route_type=5) - result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) - assert result, ( - "PE1 did not retain ONLY PE2-originated EVPN type-5 routes as stale during PE2 restart" + test_func = functools.partial( + _evpn_routes_with_stale_only_for_rd, pe1, rd=pe2_rd_vni_1000, route_type=5 ) - + result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) + assert ( + result + ), "PE1 did not retain ONLY PE2-originated EVPN type-5 routes as stale during PE2 restart" + logger.info(f"PE2 RD for VNI 100: {pe2_rd_vni_100}") # Verify only type-2 routes from PE2's RD are stale logger.info("STEP 10: Check PE1 retains ONLY PE2-originated type-2 routes as stale") - test_func = functools.partial(_evpn_routes_with_stale_only_for_rd, pe1, rd=pe2_rd_vni_100, route_type=2) - result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) - assert result, ( - "PE1 did not retain ONLY PE2-originated EVPN type-2 routes as stale during PE2 restart" + test_func = functools.partial( + _evpn_routes_with_stale_only_for_rd, pe1, rd=pe2_rd_vni_100, route_type=2 ) + result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) + assert ( + result + ), "PE1 did not retain ONLY PE2-originated EVPN type-2 routes as stale during PE2 restart" # Also generic check for any stale presence - logger.info("STEP 11: Confirm PE1 shows some EVPN routes as stale during PE2 restart") + logger.info( + "STEP 11: Confirm PE1 shows some EVPN routes as stale during PE2 restart" + ) test_func = functools.partial(_evpn_has_any_stale, pe1) result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) assert result, "PE1 did not retain EVPN routes as stale during PE2 restart" # Verify PE1 kernel still has routes learned from PE2 in vrf-blue (type-5 retained) logger.info("STEP 12: Verify PE1 kernel retains type-5 routes from PE2 during GR") - test_func = functools.partial(_vrf_has_kernel_routes, pe1, "vrf-blue", ["172.31.0.22"]) + test_func = functools.partial( + _vrf_has_kernel_routes, pe1, "vrf-blue", ["172.31.0.22"] + ) result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) assert result, "PE1 kernel VRF routes learned from PE2 disappeared during GR" # Verify PE1 FDB retains extern learned MAC from PE2 (type-2 retained) logger.info("STEP 13: Verify PE1 FDB retains extern_learn MAC from PE2 during GR") - test_func = functools.partial(_bridge_has_extern_learn, pe1, "vxlan100", "1a:2b:3c:4d:5e:62") + test_func = functools.partial( + _bridge_has_extern_learn, pe1, "vxlan100", "1a:2b:3c:4d:5e:62" + ) result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) assert result, "PE1 FDB extern_learn entry from PE2 disappeared during GR" # Verify PE1 kernel still has routes learned from PE2 in vrf-blue (type-5 retained) test_func = functools.partial(_ip_neigh_has_extern_learn, pe1, "1a:2b:3c:4d:5e:62") result, _ = topotest.run_and_expect(test_func, True, count=30, wait=1) - assert result, "PE1 kernel ip_neigh table extern_learn entry from PE2 disappeared during GR" + assert ( + result + ), "PE1 kernel ip_neigh table extern_learn entry from PE2 disappeared during GR" # Bring bgpd back on PE2 logger.info("STEP 14: Restart bgpd on PE2 to recover session") @@ -573,13 +810,15 @@ def test_bgp_evpn_gr_stale_and_recovery(): test_func = functools.partial(_gr_r_bit_set, pe1, "10.0.1.2") result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) assert result, "EVPN GR R-bit not set on PE1 neighbor view after PE2 restart" - + test_func = functools.partial(_evpn_f_bit_set, pe1, "10.0.1.2") result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) assert result, "EVPN AF F-bit not set on PE1 neighbor view during PE2 restart" # After session recovery, stale flags should be cleared for type-2 and type-5 on PE1 - logger.info("STEP 17: Ensure remote EVPN type-5 and type-2 remain active after recovery") + logger.info( + "STEP 17: Ensure remote EVPN type-5 and type-2 remain active after recovery" + ) test_func = functools.partial(_evpn_has_remote_route_type, pe1, 5) result, _ = topotest.run_and_expect(test_func, True, count=120, wait=2) assert result, "Remote EVPN type-5 routes disappeared on PE1 after PE2 recovered" @@ -589,13 +828,17 @@ def test_bgp_evpn_gr_stale_and_recovery(): # After bgpd recovery on PE2, verify PE1 kernel still has routes learned from PE2 logger.info("STEP 18: Verify PE1 kernel still has routes from PE2 after recovery") - test_func = functools.partial(_vrf_has_kernel_routes, pe1, "vrf-blue", ["172.31.0.22"]) + test_func = functools.partial( + _vrf_has_kernel_routes, pe1, "vrf-blue", ["172.31.0.22"] + ) result, _ = topotest.run_and_expect(test_func, True, count=120, wait=2) assert result, "PE1 kernel VRF routes learned from PE2 disappeared after recovery" # And verify PE1 FDB still has extern_learn entry from PE2 logger.info("STEP 19: Verify PE1 FDB retains extern_learn MAC after recovery") - test_func = functools.partial(_bridge_has_extern_learn, pe1, "vxlan100", "1a:2b:3c:4d:5e:62") + test_func = functools.partial( + _bridge_has_extern_learn, pe1, "vxlan100", "1a:2b:3c:4d:5e:62" + ) result, _ = topotest.run_and_expect(test_func, True, count=120, wait=2) assert result, "PE1 FDB extern_learn entry from PE2 disappeared after recovery" @@ -652,12 +895,16 @@ def test_bgp_evpn_gr_stale_cleanup_on_timeout(): assert result, "No remote EVPN type-5 routes on PE1" logger.info("STEP 4: Verify kernel VRF has type-5 route on PE1 prior to GR") - test_func = functools.partial(_vrf_has_kernel_routes, pe1, "vrf-blue", ["172.31.0.22"]) + test_func = functools.partial( + _vrf_has_kernel_routes, pe1, "vrf-blue", ["172.31.0.22"] + ) result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) assert result, "Missing kernel VRF routes on PE1 prior to GR" logger.info("STEP 5: Verify extern_learn MAC is present on PE1 prior to GR") - test_func = functools.partial(_bridge_has_extern_learn, pe1, "vxlan100", "1a:2b:3c:4d:5e:62") + test_func = functools.partial( + _bridge_has_extern_learn, pe1, "vxlan100", "1a:2b:3c:4d:5e:62" + ) result, _ = topotest.run_and_expect(test_func, True, count=60, wait=2) assert result, "Missing extern_learn MAC on PE1 prior to GR" @@ -675,10 +922,16 @@ def test_bgp_evpn_gr_stale_cleanup_on_timeout(): result, _ = topotest.run_and_expect(test_func, True, count=160, wait=1) assert result, "VRF kernel routes on PE1 not cleaned after GR stalepath-time expiry" - logger.info("STEP 9: Verify FDB extern_learn MAC learned from PE2 is cleaned on PE1") - test_func = functools.partial(_bridge_extern_absent, pe1, "vxlan100", "1a:2b:3c:4d:5e:62") + logger.info( + "STEP 9: Verify FDB extern_learn MAC learned from PE2 is cleaned on PE1" + ) + test_func = functools.partial( + _bridge_extern_absent, pe1, "vxlan100", "1a:2b:3c:4d:5e:62" + ) result, _ = topotest.run_and_expect(test_func, True, count=160, wait=1) - assert result, "FDB extern_learn MAC on PE1 not cleaned after GR stalepath-time expiry" + assert ( + result + ), "FDB extern_learn MAC on PE1 not cleaned after GR stalepath-time expiry" # Restore bgpd on PE2 for subsequent tests logger.info("STEP 10: Restart bgpd on PE2 for subsequent tests")