|
| 1 | +From e623b92190e926c4ab797c6707c22ba15fa93eef Mon Sep 17 00:00:00 2001 |
| 2 | +From: Stepan Blyschak <stepanb@nvidia.com> |
| 3 | +Date: Wed, 3 Dec 2025 15:43:18 +0000 |
| 4 | +Subject: [PATCH] bgpd: send EOR during GR only when FIB install complete for |
| 5 | + suppress-fib enabled |
| 6 | + |
| 7 | +Currently during GR, EOR is sent to neighbor prematurely for suppress fib enabled |
| 8 | +case. below fix has be implemented. |
| 9 | + keep a counter to track the routes installed in FIB.Increamnet |
| 10 | + counter when bgp send route install to zebra, decreamnet counter when |
| 11 | + fib install ack to received from zebra in bgp.when this count reaches |
| 12 | + zero and route deferred count is 0 ad gr route syn pending is set, then |
| 13 | + do further processing of sending EOR and zebra gr update complete. |
| 14 | + This will send EOR as soon as last route fib install ack is received. |
| 15 | + |
| 16 | +Testing: |
| 17 | +before: |
| 18 | +2020:2025/08/19 21:23:53.786402 BGP: [ZP3RE-J4Q8C] send End-of-RIB for IPv4 Unicast to swp1s1.3 |
| 19 | +2021:2025/08/19 21:23:53.786412 BGP: [ZP3RE-J4Q8C] send End-of-RIB for IPv6 Unicast to swp1s0.3 |
| 20 | +2022:2025/08/19 21:23:53.786415 BGP: [ZP3RE-J4Q8C] send End-of-RIB for IPv4 Unicast to swp1s0.3 |
| 21 | +2511:2025/08/19 21:23:54.162310 BGP: [TN0HX-6G1RR] u1:s5 send UPDATE w/ attr: , origin ?, mp_nexthop ::(::), path 64900 56000 |
| 22 | +2512:2025/08/19 21:23:54.162314 BGP: [H06SA-0JAPR] u1:s5 send MP_REACH for afi/safi IPv4/unicast |
| 23 | +2513:2025/08/19 21:23:54.162316 BGP: [HVRWP-5R9NQ] u1:s5 send UPDATE 91.0.0.49/32 IPv4 unicast |
| 24 | + |
| 25 | +after: |
| 26 | +4270:2025/08/22 17:41:41.631993 BGP: [HVRWP-5R9NQ] u2:s2 send UPDATE 2003:1::/125 IPv6 unicast |
| 27 | +4271:2025/08/22 17:41:41.631998 BGP: [HVRWP-5R9NQ] u2:s2 send UPDATE 2003:7:2::/125 IPv6 unicast |
| 28 | +4272:2025/08/22 17:41:41.632003 BGP: [WEV7K-2GAQ5] u2:s2 send UPDATE len 116 (max message len: 65535) numpfx 2 |
| 29 | +4273:2025/08/22 17:41:41.632008 BGP: [JJ5V1-EZ0XX] u2:s2 swp1s1 send UPDATE w/ mp_nexthops 2003:0:1::1, fe80::1e34:daff:febe:4169 |
| 30 | +4274:2025/08/22 17:41:41.632041 BGP: [ZP3RE-J4Q8C] send End-of-RIB for IPv4 Unicast to swp1s1 |
| 31 | +4275:2025/08/22 17:41:41.632054 BGP: [ZP3RE-J4Q8C] send End-of-RIB for IPv6 Unicast to swp1s1 |
| 32 | + |
| 33 | +Signed-off-by: Vijayalaxmi Basavaraj <vbasavaraj@nvidia.com> |
| 34 | +Signed-off-by: Stepan Blyschak <stepanb@nvidia.com> |
| 35 | +--- |
| 36 | + bgpd/bgp_fsm.c | 13 +++++ |
| 37 | + bgpd/bgp_packet.c | 12 +++-- |
| 38 | + bgpd/bgp_route.c | 123 +++++++++++++++++++++++++++++++++++++++------- |
| 39 | + bgpd/bgp_route.h | 3 ++ |
| 40 | + bgpd/bgp_zebra.c | 8 ++- |
| 41 | + bgpd/bgpd.h | 11 +++-- |
| 42 | + 6 files changed, 141 insertions(+), 29 deletions(-) |
| 43 | + |
| 44 | +diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c |
| 45 | +index 540086c5e..eb8361e41 100644 |
| 46 | +--- a/bgpd/bgp_fsm.c |
| 47 | ++++ b/bgpd/bgp_fsm.c |
| 48 | +@@ -847,6 +847,19 @@ static void bgp_graceful_deferral_timer_expire(struct event *thread) |
| 49 | + safi = info->safi; |
| 50 | + bgp = info->bgp; |
| 51 | + |
| 52 | ++ /* Check if graceful restart deferral completion is needed */ |
| 53 | ++ if (BGP_SUPPRESS_FIB_ENABLED(bgp) && (bgp->gr_info[afi][safi].eor_required == bgp->gr_info[afi][safi].eor_received) && |
| 54 | ++ !bgp->gr_info[afi][safi].gr_deferred && bgp->gr_route_sync_pending) { |
| 55 | ++ if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART)) |
| 56 | ++ zlog_debug("%s: Triggering GR deferral completion from timer expiry for %s", |
| 57 | ++ bgp->name_pretty, get_afi_safi_str(afi, safi, false)); |
| 58 | ++ bgp->gr_info[afi][safi].eor_required = 0; |
| 59 | ++ bgp->gr_info[afi][safi].eor_received = 0; |
| 60 | ++ XFREE(MTYPE_TMP, info); |
| 61 | ++ bgp_process_gr_deferral_complete(bgp, afi, safi); |
| 62 | ++ return; |
| 63 | ++ } |
| 64 | ++ |
| 65 | + if (BGP_DEBUG(update, UPDATE_OUT)) |
| 66 | + zlog_debug( |
| 67 | + "afi %d, safi %d : graceful restart deferral timer expired", |
| 68 | +diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c |
| 69 | +index 13d610e1b..886adeffb 100644 |
| 70 | +--- a/bgpd/bgp_packet.c |
| 71 | ++++ b/bgpd/bgp_packet.c |
| 72 | +@@ -2545,12 +2545,14 @@ static int bgp_update_receive(struct peer_connection *connection, |
| 73 | + gr_info->eor_required, |
| 74 | + "EOR RCV", |
| 75 | + gr_info->eor_received); |
| 76 | +- if (gr_info->t_select_deferral) { |
| 77 | +- void *info = EVENT_ARG( |
| 78 | +- gr_info->t_select_deferral); |
| 79 | +- XFREE(MTYPE_TMP, info); |
| 80 | ++ if (!BGP_SUPPRESS_FIB_ENABLED(peer->bgp)) { |
| 81 | ++ if (gr_info->t_select_deferral) { |
| 82 | ++ void *info = EVENT_ARG( |
| 83 | ++ gr_info->t_select_deferral); |
| 84 | ++ XFREE(MTYPE_TMP, info); |
| 85 | ++ } |
| 86 | ++ event_cancel(&gr_info->t_select_deferral); |
| 87 | + } |
| 88 | +- event_cancel(&gr_info->t_select_deferral); |
| 89 | + gr_info->eor_required = 0; |
| 90 | + gr_info->eor_received = 0; |
| 91 | + /* Best path selection */ |
| 92 | +diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c |
| 93 | +index 6de269ea9..c743f56f1 100644 |
| 94 | +--- a/bgpd/bgp_route.c |
| 95 | ++++ b/bgpd/bgp_route.c |
| 96 | +@@ -3930,6 +3930,7 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest, |
| 97 | + * Ensure that on uninstall that the INSTALL_PENDING |
| 98 | + * is no longer set |
| 99 | + */ |
| 100 | ++ bgp_dest_decrement_gr_fib_install_pending_count(dest); |
| 101 | + UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING); |
| 102 | + } |
| 103 | + |
| 104 | +@@ -4043,25 +4044,11 @@ void bgp_best_path_select_defer(struct bgp *bgp, afi_t afi, safi_t safi) |
| 105 | + |
| 106 | + /* Send EOR message when all routes are processed */ |
| 107 | + if (!bgp->gr_info[afi][safi].gr_deferred) { |
| 108 | +- bgp_send_delayed_eor(bgp); |
| 109 | +- /* Send route processing complete message to RIB */ |
| 110 | +- bgp_zebra_update(bgp, afi, safi, |
| 111 | +- ZEBRA_CLIENT_ROUTE_UPDATE_COMPLETE); |
| 112 | +- bgp->gr_info[afi][safi].route_sync = true; |
| 113 | +- |
| 114 | +- /* If this instance is all done, check for GR completion overall */ |
| 115 | +- FOREACH_AFI_SAFI_NSF (afi, safi) { |
| 116 | +- if (bgp->gr_info[afi][safi].af_enabled && |
| 117 | +- !bgp->gr_info[afi][safi].route_sync) { |
| 118 | +- route_sync_pending = true; |
| 119 | +- break; |
| 120 | +- } |
| 121 | +- } |
| 122 | +- |
| 123 | +- if (!route_sync_pending) { |
| 124 | +- bgp->gr_route_sync_pending = false; |
| 125 | +- bgp_update_gr_completion(); |
| 126 | +- } |
| 127 | ++ /* t_select_deferral will be NULL when either gr_route_fib_install_pending_cnt is 0 |
| 128 | ++ * or deferral timer for fib install expires |
| 129 | ++ */ |
| 130 | ++ if (!BGP_SUPPRESS_FIB_ENABLED(bgp) || !bgp->gr_info[afi][safi].t_select_deferral) |
| 131 | ++ bgp_process_gr_deferral_complete(bgp, afi, safi); |
| 132 | + return; |
| 133 | + } |
| 134 | + |
| 135 | +@@ -4079,6 +4066,104 @@ void bgp_best_path_select_defer(struct bgp *bgp, afi_t afi, safi_t safi) |
| 136 | + &bgp->gr_info[afi][safi].t_route_select); |
| 137 | + } |
| 138 | + |
| 139 | ++void bgp_process_gr_deferral_complete(struct bgp *bgp, afi_t afi, safi_t safi) |
| 140 | ++{ |
| 141 | ++ bool route_sync_pending = false; |
| 142 | ++ |
| 143 | ++ bgp_send_delayed_eor(bgp); |
| 144 | ++ /* Send route processing complete message to RIB */ |
| 145 | ++ bgp_zebra_update(bgp, afi, safi, ZEBRA_CLIENT_ROUTE_UPDATE_COMPLETE); |
| 146 | ++ bgp->gr_info[afi][safi].route_sync = true; |
| 147 | ++ |
| 148 | ++ /* If this instance is all done, check for GR completion overall */ |
| 149 | ++ FOREACH_AFI_SAFI_NSF (afi, safi) { |
| 150 | ++ if (bgp->gr_info[afi][safi].af_enabled && !bgp->gr_info[afi][safi].route_sync) { |
| 151 | ++ route_sync_pending = true; |
| 152 | ++ break; |
| 153 | ++ } |
| 154 | ++ } |
| 155 | ++ |
| 156 | ++ if (!route_sync_pending) { |
| 157 | ++ bgp->gr_route_sync_pending = false; |
| 158 | ++ bgp_update_gr_completion(); |
| 159 | ++ } |
| 160 | ++} |
| 161 | ++ |
| 162 | ++/* This function increments gr_route_fib_install_pending_cnt if needed based on BGP_NODE_FIB_INSTALL_PENDING flag */ |
| 163 | ++void bgp_dest_increment_gr_fib_install_pending_count(struct bgp_dest *dest) |
| 164 | ++{ |
| 165 | ++ struct bgp_table *table = NULL; |
| 166 | ++ struct bgp *bgp = NULL; |
| 167 | ++ afi_t afi = AFI_UNSPEC; |
| 168 | ++ safi_t safi = SAFI_UNSPEC; |
| 169 | ++ |
| 170 | ++ table = bgp_dest_table(dest); |
| 171 | ++ if (!table) |
| 172 | ++ return; |
| 173 | ++ |
| 174 | ++ bgp = table->bgp; |
| 175 | ++ afi = table->afi; |
| 176 | ++ safi = table->safi; |
| 177 | ++ |
| 178 | ++ if (BGP_SUPPRESS_FIB_ENABLED(bgp) && bgp->gr_route_sync_pending && |
| 179 | ++ !CHECK_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING)) { |
| 180 | ++ bgp->gr_info[afi][safi].gr_route_fib_install_pending_cnt++; |
| 181 | ++ if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART)) |
| 182 | ++ zlog_debug("%s: GR route FIB install count incremented to %u for %s (prefix: %pBD)", |
| 183 | ++ bgp->name_pretty, |
| 184 | ++ bgp->gr_info[afi][safi].gr_route_fib_install_pending_cnt, |
| 185 | ++ get_afi_safi_str(afi, safi, false), dest); |
| 186 | ++ } |
| 187 | ++} |
| 188 | ++ |
| 189 | ++/* This function decrements gr_route_fib_install_pending_cnt if needed based on BGP_NODE_FIB_INSTALL_PENDING flag */ |
| 190 | ++void bgp_dest_decrement_gr_fib_install_pending_count(struct bgp_dest *dest) |
| 191 | ++{ |
| 192 | ++ struct bgp_table *table = NULL; |
| 193 | ++ struct bgp *bgp = NULL; |
| 194 | ++ afi_t afi = 0; |
| 195 | ++ safi_t safi = 0; |
| 196 | ++ |
| 197 | ++ table = bgp_dest_table(dest); |
| 198 | ++ if (!table) |
| 199 | ++ return; |
| 200 | ++ |
| 201 | ++ bgp = table->bgp; |
| 202 | ++ afi = table->afi; |
| 203 | ++ safi = table->safi; |
| 204 | ++ |
| 205 | ++ if (BGP_SUPPRESS_FIB_ENABLED(bgp) && bgp->gr_route_sync_pending && |
| 206 | ++ CHECK_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING) && |
| 207 | ++ bgp->gr_info[afi][safi].gr_route_fib_install_pending_cnt > 0) { |
| 208 | ++ bgp->gr_info[afi][safi].gr_route_fib_install_pending_cnt--; |
| 209 | ++ if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART)) |
| 210 | ++ zlog_debug("%s: GR route FIB install count decremented to %u for %s (prefix: %pBD)", |
| 211 | ++ bgp->name_pretty, |
| 212 | ++ bgp->gr_info[afi][safi].gr_route_fib_install_pending_cnt, |
| 213 | ++ get_afi_safi_str(afi, safi, false), dest); |
| 214 | ++ } |
| 215 | ++ |
| 216 | ++ /* Check if graceful restart deferral completion is needed */ |
| 217 | ++ if (!bgp->gr_info[afi][safi].gr_deferred && |
| 218 | ++ !bgp->gr_info[afi][safi].gr_route_fib_install_pending_cnt && |
| 219 | ++ bgp->gr_route_sync_pending) { |
| 220 | ++ struct graceful_restart_info *gr_info = &(bgp->gr_info[afi][safi]); |
| 221 | ++ |
| 222 | ++ if (gr_info->t_select_deferral) { |
| 223 | ++ void *info = EVENT_ARG(gr_info->t_select_deferral); |
| 224 | ++ |
| 225 | ++ XFREE(MTYPE_TMP, info); |
| 226 | ++ } |
| 227 | ++ event_cancel(&gr_info->t_select_deferral); |
| 228 | ++ if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART)) |
| 229 | ++ zlog_debug("%s: Triggering GR deferral completion from FIB notification for %s", |
| 230 | ++ bgp->name_pretty, get_afi_safi_str(afi, safi, false)); |
| 231 | ++ bgp_process_gr_deferral_complete(bgp, afi, safi); |
| 232 | ++ } |
| 233 | ++} |
| 234 | ++ |
| 235 | ++ |
| 236 | ++ |
| 237 | + static const char *subqueue2str(enum meta_queue_indexes index) |
| 238 | + { |
| 239 | + switch (index) { |
| 240 | +diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h |
| 241 | +index f3323b86a..fe486f12f 100644 |
| 242 | +--- a/bgpd/bgp_route.h |
| 243 | ++++ b/bgpd/bgp_route.h |
| 244 | +@@ -975,6 +975,9 @@ extern int bgp_show_table_rd(struct vty *vty, struct bgp *bgp, afi_t afi, safi_t |
| 245 | + enum bgp_show_type type, void *output_arg, |
| 246 | + uint16_t show_flags); |
| 247 | + extern void bgp_best_path_select_defer(struct bgp *bgp, afi_t afi, safi_t safi); |
| 248 | ++extern void bgp_dest_increment_gr_fib_install_pending_count(struct bgp_dest *dest); |
| 249 | ++extern void bgp_dest_decrement_gr_fib_install_pending_count(struct bgp_dest *dest); |
| 250 | ++extern void bgp_process_gr_deferral_complete(struct bgp *bgp, afi_t afi, safi_t safi); |
| 251 | + extern bool bgp_update_martian_nexthop(struct bgp *bgp, afi_t afi, safi_t safi, |
| 252 | + uint8_t type, uint8_t stype, |
| 253 | + struct attr *attr, struct bgp_dest *dest); |
| 254 | +diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c |
| 255 | +index c7773a676..045d0acf6 100644 |
| 256 | +--- a/bgpd/bgp_zebra.c |
| 257 | ++++ b/bgpd/bgp_zebra.c |
| 258 | +@@ -1906,12 +1906,15 @@ void bgp_zebra_route_install(struct bgp_dest *dest, struct bgp_path_info *info, |
| 259 | + * let's set the fact that we expect this route to be installed |
| 260 | + */ |
| 261 | + if (install) { |
| 262 | +- if (BGP_SUPPRESS_FIB_ENABLED(bgp)) |
| 263 | ++ if (BGP_SUPPRESS_FIB_ENABLED(bgp)) { |
| 264 | ++ bgp_dest_increment_gr_fib_install_pending_count(dest); |
| 265 | + SET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING); |
| 266 | ++ } |
| 267 | + |
| 268 | + if (bgp->main_zebra_update_hold && !is_evpn) |
| 269 | + return; |
| 270 | + } else { |
| 271 | ++ bgp_dest_decrement_gr_fib_install_pending_count(dest); |
| 272 | + UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING); |
| 273 | + } |
| 274 | + |
| 275 | +@@ -2888,6 +2891,7 @@ static int bgp_zebra_route_notify_owner(int command, struct zclient *zclient, |
| 276 | + case ZAPI_ROUTE_INSTALLED: |
| 277 | + new_select = NULL; |
| 278 | + /* Clear the flags so that route can be processed */ |
| 279 | ++ bgp_dest_decrement_gr_fib_install_pending_count(dest); |
| 280 | + UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING); |
| 281 | + SET_FLAG(dest->flags, BGP_NODE_FIB_INSTALLED); |
| 282 | + if (BGP_DEBUG(zebra, ZEBRA)) |
| 283 | +@@ -2924,6 +2928,7 @@ static int bgp_zebra_route_notify_owner(int command, struct zclient *zclient, |
| 284 | + if (BGP_DEBUG(zebra, ZEBRA)) |
| 285 | + zlog_debug("route: %pBD Failed to Install into Fib", |
| 286 | + dest); |
| 287 | ++ bgp_dest_decrement_gr_fib_install_pending_count(dest); |
| 288 | + UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING); |
| 289 | + UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALLED); |
| 290 | + for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) { |
| 291 | +@@ -2939,6 +2944,7 @@ static int bgp_zebra_route_notify_owner(int command, struct zclient *zclient, |
| 292 | + zlog_debug("route: %pBD removed due to better admin won", |
| 293 | + dest); |
| 294 | + new_select = NULL; |
| 295 | ++ bgp_dest_decrement_gr_fib_install_pending_count(dest); |
| 296 | + UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING); |
| 297 | + UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALLED); |
| 298 | + for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) { |
| 299 | +diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h |
| 300 | +index 5c674b1ee..f1669a340 100644 |
| 301 | +--- a/bgpd/bgpd.h |
| 302 | ++++ b/bgpd/bgpd.h |
| 303 | +@@ -326,10 +326,11 @@ enum bgp_instance_type { |
| 304 | + }; |
| 305 | + |
| 306 | + #define BGP_SEND_EOR(bgp, afi, safi) \ |
| 307 | +- (!CHECK_FLAG(bgp->flags, BGP_FLAG_GR_DISABLE_EOR) \ |
| 308 | +- && ((bgp->gr_info[afi][safi].t_select_deferral == NULL) \ |
| 309 | +- || (bgp->gr_info[afi][safi].eor_required \ |
| 310 | +- == bgp->gr_info[afi][safi].eor_received))) |
| 311 | ++ (!CHECK_FLAG(bgp->flags, BGP_FLAG_GR_DISABLE_EOR) \ |
| 312 | ++ && ((bgp->gr_info[afi][safi].t_select_deferral == NULL) \ |
| 313 | ++ || (bgp->gr_info[afi][safi].eor_required \ |
| 314 | ++ == bgp->gr_info[afi][safi].eor_received)) \ |
| 315 | ++ && (!BGP_SUPPRESS_FIB_ENABLED(bgp) || !bgp->gr_info[afi][safi].t_select_deferral)) |
| 316 | + |
| 317 | + /* BGP GR Global ds */ |
| 318 | + |
| 319 | +@@ -346,6 +347,8 @@ struct graceful_restart_info { |
| 320 | + struct event *t_select_deferral; |
| 321 | + /* Routes Deferred */ |
| 322 | + uint32_t gr_deferred; |
| 323 | ++ /* Routes waiting for FIB install */ |
| 324 | ++ uint32_t gr_route_fib_install_pending_cnt; |
| 325 | + /* Best route select */ |
| 326 | + struct event *t_route_select; |
| 327 | + /* AFI, SAFI enabled */ |
| 328 | +-- |
| 329 | +2.39.5 |
| 330 | + |
0 commit comments