Skip to content

Commit 049fcda

Browse files
vivekrnvstepanblyschakdgsudharsan
authored andcommitted
[FRR] send EOR during GR only when fib install complete (#25269)
Signed-off-by: Stepan Blyschak <stepanb@nvidia.com> Co-authored-by: Stepan Blyschak <stepanb@nvidia.com> Co-authored-by: Sudharsan Dhamal Gopalarathnam <sudharsand@nvidia.com> Signed-off-by: Feng Pan <fenpan@microsoft.com>
1 parent a264481 commit 049fcda

File tree

2 files changed

+331
-0
lines changed

2 files changed

+331
-0
lines changed
Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,330 @@
1+
From e623b92190e926c4ab797c6707c22ba15fa93eef Mon Sep 17 00:00:00 2001
2+
From: Stepan Blyschak <stepanb@nvidia.com>
3+
Date: Wed, 3 Dec 2025 15:43:18 +0000
4+
Subject: [PATCH] bgpd: send EOR during GR only when FIB install complete for
5+
suppress-fib enabled
6+
7+
Currently during GR, EOR is sent to neighbor prematurely for suppress fib enabled
8+
case. below fix has be implemented.
9+
keep a counter to track the routes installed in FIB.Increamnet
10+
counter when bgp send route install to zebra, decreamnet counter when
11+
fib install ack to received from zebra in bgp.when this count reaches
12+
zero and route deferred count is 0 ad gr route syn pending is set, then
13+
do further processing of sending EOR and zebra gr update complete.
14+
This will send EOR as soon as last route fib install ack is received.
15+
16+
Testing:
17+
before:
18+
2020:2025/08/19 21:23:53.786402 BGP: [ZP3RE-J4Q8C] send End-of-RIB for IPv4 Unicast to swp1s1.3
19+
2021:2025/08/19 21:23:53.786412 BGP: [ZP3RE-J4Q8C] send End-of-RIB for IPv6 Unicast to swp1s0.3
20+
2022:2025/08/19 21:23:53.786415 BGP: [ZP3RE-J4Q8C] send End-of-RIB for IPv4 Unicast to swp1s0.3
21+
2511:2025/08/19 21:23:54.162310 BGP: [TN0HX-6G1RR] u1:s5 send UPDATE w/ attr: , origin ?, mp_nexthop ::(::), path 64900 56000
22+
2512:2025/08/19 21:23:54.162314 BGP: [H06SA-0JAPR] u1:s5 send MP_REACH for afi/safi IPv4/unicast
23+
2513:2025/08/19 21:23:54.162316 BGP: [HVRWP-5R9NQ] u1:s5 send UPDATE 91.0.0.49/32 IPv4 unicast
24+
25+
after:
26+
4270:2025/08/22 17:41:41.631993 BGP: [HVRWP-5R9NQ] u2:s2 send UPDATE 2003:1::/125 IPv6 unicast
27+
4271:2025/08/22 17:41:41.631998 BGP: [HVRWP-5R9NQ] u2:s2 send UPDATE 2003:7:2::/125 IPv6 unicast
28+
4272:2025/08/22 17:41:41.632003 BGP: [WEV7K-2GAQ5] u2:s2 send UPDATE len 116 (max message len: 65535) numpfx 2
29+
4273:2025/08/22 17:41:41.632008 BGP: [JJ5V1-EZ0XX] u2:s2 swp1s1 send UPDATE w/ mp_nexthops 2003:0:1::1, fe80::1e34:daff:febe:4169
30+
4274:2025/08/22 17:41:41.632041 BGP: [ZP3RE-J4Q8C] send End-of-RIB for IPv4 Unicast to swp1s1
31+
4275:2025/08/22 17:41:41.632054 BGP: [ZP3RE-J4Q8C] send End-of-RIB for IPv6 Unicast to swp1s1
32+
33+
Signed-off-by: Vijayalaxmi Basavaraj <vbasavaraj@nvidia.com>
34+
Signed-off-by: Stepan Blyschak <stepanb@nvidia.com>
35+
---
36+
bgpd/bgp_fsm.c | 13 +++++
37+
bgpd/bgp_packet.c | 12 +++--
38+
bgpd/bgp_route.c | 123 +++++++++++++++++++++++++++++++++++++++-------
39+
bgpd/bgp_route.h | 3 ++
40+
bgpd/bgp_zebra.c | 8 ++-
41+
bgpd/bgpd.h | 11 +++--
42+
6 files changed, 141 insertions(+), 29 deletions(-)
43+
44+
diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c
45+
index 540086c5e..eb8361e41 100644
46+
--- a/bgpd/bgp_fsm.c
47+
+++ b/bgpd/bgp_fsm.c
48+
@@ -847,6 +847,19 @@ static void bgp_graceful_deferral_timer_expire(struct event *thread)
49+
safi = info->safi;
50+
bgp = info->bgp;
51+
52+
+ /* Check if graceful restart deferral completion is needed */
53+
+ if (BGP_SUPPRESS_FIB_ENABLED(bgp) && (bgp->gr_info[afi][safi].eor_required == bgp->gr_info[afi][safi].eor_received) &&
54+
+ !bgp->gr_info[afi][safi].gr_deferred && bgp->gr_route_sync_pending) {
55+
+ if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART))
56+
+ zlog_debug("%s: Triggering GR deferral completion from timer expiry for %s",
57+
+ bgp->name_pretty, get_afi_safi_str(afi, safi, false));
58+
+ bgp->gr_info[afi][safi].eor_required = 0;
59+
+ bgp->gr_info[afi][safi].eor_received = 0;
60+
+ XFREE(MTYPE_TMP, info);
61+
+ bgp_process_gr_deferral_complete(bgp, afi, safi);
62+
+ return;
63+
+ }
64+
+
65+
if (BGP_DEBUG(update, UPDATE_OUT))
66+
zlog_debug(
67+
"afi %d, safi %d : graceful restart deferral timer expired",
68+
diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c
69+
index 13d610e1b..886adeffb 100644
70+
--- a/bgpd/bgp_packet.c
71+
+++ b/bgpd/bgp_packet.c
72+
@@ -2545,12 +2545,14 @@ static int bgp_update_receive(struct peer_connection *connection,
73+
gr_info->eor_required,
74+
"EOR RCV",
75+
gr_info->eor_received);
76+
- if (gr_info->t_select_deferral) {
77+
- void *info = EVENT_ARG(
78+
- gr_info->t_select_deferral);
79+
- XFREE(MTYPE_TMP, info);
80+
+ if (!BGP_SUPPRESS_FIB_ENABLED(peer->bgp)) {
81+
+ if (gr_info->t_select_deferral) {
82+
+ void *info = EVENT_ARG(
83+
+ gr_info->t_select_deferral);
84+
+ XFREE(MTYPE_TMP, info);
85+
+ }
86+
+ event_cancel(&gr_info->t_select_deferral);
87+
}
88+
- event_cancel(&gr_info->t_select_deferral);
89+
gr_info->eor_required = 0;
90+
gr_info->eor_received = 0;
91+
/* Best path selection */
92+
diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c
93+
index 6de269ea9..c743f56f1 100644
94+
--- a/bgpd/bgp_route.c
95+
+++ b/bgpd/bgp_route.c
96+
@@ -3930,6 +3930,7 @@ static void bgp_process_main_one(struct bgp *bgp, struct bgp_dest *dest,
97+
* Ensure that on uninstall that the INSTALL_PENDING
98+
* is no longer set
99+
*/
100+
+ bgp_dest_decrement_gr_fib_install_pending_count(dest);
101+
UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING);
102+
}
103+
104+
@@ -4043,25 +4044,11 @@ void bgp_best_path_select_defer(struct bgp *bgp, afi_t afi, safi_t safi)
105+
106+
/* Send EOR message when all routes are processed */
107+
if (!bgp->gr_info[afi][safi].gr_deferred) {
108+
- bgp_send_delayed_eor(bgp);
109+
- /* Send route processing complete message to RIB */
110+
- bgp_zebra_update(bgp, afi, safi,
111+
- ZEBRA_CLIENT_ROUTE_UPDATE_COMPLETE);
112+
- bgp->gr_info[afi][safi].route_sync = true;
113+
-
114+
- /* If this instance is all done, check for GR completion overall */
115+
- FOREACH_AFI_SAFI_NSF (afi, safi) {
116+
- if (bgp->gr_info[afi][safi].af_enabled &&
117+
- !bgp->gr_info[afi][safi].route_sync) {
118+
- route_sync_pending = true;
119+
- break;
120+
- }
121+
- }
122+
-
123+
- if (!route_sync_pending) {
124+
- bgp->gr_route_sync_pending = false;
125+
- bgp_update_gr_completion();
126+
- }
127+
+ /* t_select_deferral will be NULL when either gr_route_fib_install_pending_cnt is 0
128+
+ * or deferral timer for fib install expires
129+
+ */
130+
+ if (!BGP_SUPPRESS_FIB_ENABLED(bgp) || !bgp->gr_info[afi][safi].t_select_deferral)
131+
+ bgp_process_gr_deferral_complete(bgp, afi, safi);
132+
return;
133+
}
134+
135+
@@ -4079,6 +4066,104 @@ void bgp_best_path_select_defer(struct bgp *bgp, afi_t afi, safi_t safi)
136+
&bgp->gr_info[afi][safi].t_route_select);
137+
}
138+
139+
+void bgp_process_gr_deferral_complete(struct bgp *bgp, afi_t afi, safi_t safi)
140+
+{
141+
+ bool route_sync_pending = false;
142+
+
143+
+ bgp_send_delayed_eor(bgp);
144+
+ /* Send route processing complete message to RIB */
145+
+ bgp_zebra_update(bgp, afi, safi, ZEBRA_CLIENT_ROUTE_UPDATE_COMPLETE);
146+
+ bgp->gr_info[afi][safi].route_sync = true;
147+
+
148+
+ /* If this instance is all done, check for GR completion overall */
149+
+ FOREACH_AFI_SAFI_NSF (afi, safi) {
150+
+ if (bgp->gr_info[afi][safi].af_enabled && !bgp->gr_info[afi][safi].route_sync) {
151+
+ route_sync_pending = true;
152+
+ break;
153+
+ }
154+
+ }
155+
+
156+
+ if (!route_sync_pending) {
157+
+ bgp->gr_route_sync_pending = false;
158+
+ bgp_update_gr_completion();
159+
+ }
160+
+}
161+
+
162+
+/* This function increments gr_route_fib_install_pending_cnt if needed based on BGP_NODE_FIB_INSTALL_PENDING flag */
163+
+void bgp_dest_increment_gr_fib_install_pending_count(struct bgp_dest *dest)
164+
+{
165+
+ struct bgp_table *table = NULL;
166+
+ struct bgp *bgp = NULL;
167+
+ afi_t afi = AFI_UNSPEC;
168+
+ safi_t safi = SAFI_UNSPEC;
169+
+
170+
+ table = bgp_dest_table(dest);
171+
+ if (!table)
172+
+ return;
173+
+
174+
+ bgp = table->bgp;
175+
+ afi = table->afi;
176+
+ safi = table->safi;
177+
+
178+
+ if (BGP_SUPPRESS_FIB_ENABLED(bgp) && bgp->gr_route_sync_pending &&
179+
+ !CHECK_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING)) {
180+
+ bgp->gr_info[afi][safi].gr_route_fib_install_pending_cnt++;
181+
+ if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART))
182+
+ zlog_debug("%s: GR route FIB install count incremented to %u for %s (prefix: %pBD)",
183+
+ bgp->name_pretty,
184+
+ bgp->gr_info[afi][safi].gr_route_fib_install_pending_cnt,
185+
+ get_afi_safi_str(afi, safi, false), dest);
186+
+ }
187+
+}
188+
+
189+
+/* This function decrements gr_route_fib_install_pending_cnt if needed based on BGP_NODE_FIB_INSTALL_PENDING flag */
190+
+void bgp_dest_decrement_gr_fib_install_pending_count(struct bgp_dest *dest)
191+
+{
192+
+ struct bgp_table *table = NULL;
193+
+ struct bgp *bgp = NULL;
194+
+ afi_t afi = 0;
195+
+ safi_t safi = 0;
196+
+
197+
+ table = bgp_dest_table(dest);
198+
+ if (!table)
199+
+ return;
200+
+
201+
+ bgp = table->bgp;
202+
+ afi = table->afi;
203+
+ safi = table->safi;
204+
+
205+
+ if (BGP_SUPPRESS_FIB_ENABLED(bgp) && bgp->gr_route_sync_pending &&
206+
+ CHECK_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING) &&
207+
+ bgp->gr_info[afi][safi].gr_route_fib_install_pending_cnt > 0) {
208+
+ bgp->gr_info[afi][safi].gr_route_fib_install_pending_cnt--;
209+
+ if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART))
210+
+ zlog_debug("%s: GR route FIB install count decremented to %u for %s (prefix: %pBD)",
211+
+ bgp->name_pretty,
212+
+ bgp->gr_info[afi][safi].gr_route_fib_install_pending_cnt,
213+
+ get_afi_safi_str(afi, safi, false), dest);
214+
+ }
215+
+
216+
+ /* Check if graceful restart deferral completion is needed */
217+
+ if (!bgp->gr_info[afi][safi].gr_deferred &&
218+
+ !bgp->gr_info[afi][safi].gr_route_fib_install_pending_cnt &&
219+
+ bgp->gr_route_sync_pending) {
220+
+ struct graceful_restart_info *gr_info = &(bgp->gr_info[afi][safi]);
221+
+
222+
+ if (gr_info->t_select_deferral) {
223+
+ void *info = EVENT_ARG(gr_info->t_select_deferral);
224+
+
225+
+ XFREE(MTYPE_TMP, info);
226+
+ }
227+
+ event_cancel(&gr_info->t_select_deferral);
228+
+ if (BGP_DEBUG(graceful_restart, GRACEFUL_RESTART))
229+
+ zlog_debug("%s: Triggering GR deferral completion from FIB notification for %s",
230+
+ bgp->name_pretty, get_afi_safi_str(afi, safi, false));
231+
+ bgp_process_gr_deferral_complete(bgp, afi, safi);
232+
+ }
233+
+}
234+
+
235+
+
236+
+
237+
static const char *subqueue2str(enum meta_queue_indexes index)
238+
{
239+
switch (index) {
240+
diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h
241+
index f3323b86a..fe486f12f 100644
242+
--- a/bgpd/bgp_route.h
243+
+++ b/bgpd/bgp_route.h
244+
@@ -975,6 +975,9 @@ extern int bgp_show_table_rd(struct vty *vty, struct bgp *bgp, afi_t afi, safi_t
245+
enum bgp_show_type type, void *output_arg,
246+
uint16_t show_flags);
247+
extern void bgp_best_path_select_defer(struct bgp *bgp, afi_t afi, safi_t safi);
248+
+extern void bgp_dest_increment_gr_fib_install_pending_count(struct bgp_dest *dest);
249+
+extern void bgp_dest_decrement_gr_fib_install_pending_count(struct bgp_dest *dest);
250+
+extern void bgp_process_gr_deferral_complete(struct bgp *bgp, afi_t afi, safi_t safi);
251+
extern bool bgp_update_martian_nexthop(struct bgp *bgp, afi_t afi, safi_t safi,
252+
uint8_t type, uint8_t stype,
253+
struct attr *attr, struct bgp_dest *dest);
254+
diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c
255+
index c7773a676..045d0acf6 100644
256+
--- a/bgpd/bgp_zebra.c
257+
+++ b/bgpd/bgp_zebra.c
258+
@@ -1906,12 +1906,15 @@ void bgp_zebra_route_install(struct bgp_dest *dest, struct bgp_path_info *info,
259+
* let's set the fact that we expect this route to be installed
260+
*/
261+
if (install) {
262+
- if (BGP_SUPPRESS_FIB_ENABLED(bgp))
263+
+ if (BGP_SUPPRESS_FIB_ENABLED(bgp)) {
264+
+ bgp_dest_increment_gr_fib_install_pending_count(dest);
265+
SET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING);
266+
+ }
267+
268+
if (bgp->main_zebra_update_hold && !is_evpn)
269+
return;
270+
} else {
271+
+ bgp_dest_decrement_gr_fib_install_pending_count(dest);
272+
UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING);
273+
}
274+
275+
@@ -2888,6 +2891,7 @@ static int bgp_zebra_route_notify_owner(int command, struct zclient *zclient,
276+
case ZAPI_ROUTE_INSTALLED:
277+
new_select = NULL;
278+
/* Clear the flags so that route can be processed */
279+
+ bgp_dest_decrement_gr_fib_install_pending_count(dest);
280+
UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING);
281+
SET_FLAG(dest->flags, BGP_NODE_FIB_INSTALLED);
282+
if (BGP_DEBUG(zebra, ZEBRA))
283+
@@ -2924,6 +2928,7 @@ static int bgp_zebra_route_notify_owner(int command, struct zclient *zclient,
284+
if (BGP_DEBUG(zebra, ZEBRA))
285+
zlog_debug("route: %pBD Failed to Install into Fib",
286+
dest);
287+
+ bgp_dest_decrement_gr_fib_install_pending_count(dest);
288+
UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING);
289+
UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALLED);
290+
for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) {
291+
@@ -2939,6 +2944,7 @@ static int bgp_zebra_route_notify_owner(int command, struct zclient *zclient,
292+
zlog_debug("route: %pBD removed due to better admin won",
293+
dest);
294+
new_select = NULL;
295+
+ bgp_dest_decrement_gr_fib_install_pending_count(dest);
296+
UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALL_PENDING);
297+
UNSET_FLAG(dest->flags, BGP_NODE_FIB_INSTALLED);
298+
for (pi = bgp_dest_get_bgp_path_info(dest); pi; pi = pi->next) {
299+
diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h
300+
index 5c674b1ee..f1669a340 100644
301+
--- a/bgpd/bgpd.h
302+
+++ b/bgpd/bgpd.h
303+
@@ -326,10 +326,11 @@ enum bgp_instance_type {
304+
};
305+
306+
#define BGP_SEND_EOR(bgp, afi, safi) \
307+
- (!CHECK_FLAG(bgp->flags, BGP_FLAG_GR_DISABLE_EOR) \
308+
- && ((bgp->gr_info[afi][safi].t_select_deferral == NULL) \
309+
- || (bgp->gr_info[afi][safi].eor_required \
310+
- == bgp->gr_info[afi][safi].eor_received)))
311+
+ (!CHECK_FLAG(bgp->flags, BGP_FLAG_GR_DISABLE_EOR) \
312+
+ && ((bgp->gr_info[afi][safi].t_select_deferral == NULL) \
313+
+ || (bgp->gr_info[afi][safi].eor_required \
314+
+ == bgp->gr_info[afi][safi].eor_received)) \
315+
+ && (!BGP_SUPPRESS_FIB_ENABLED(bgp) || !bgp->gr_info[afi][safi].t_select_deferral))
316+
317+
/* BGP GR Global ds */
318+
319+
@@ -346,6 +347,8 @@ struct graceful_restart_info {
320+
struct event *t_select_deferral;
321+
/* Routes Deferred */
322+
uint32_t gr_deferred;
323+
+ /* Routes waiting for FIB install */
324+
+ uint32_t gr_route_fib_install_pending_cnt;
325+
/* Best route select */
326+
struct event *t_route_select;
327+
/* AFI, SAFI enabled */
328+
--
329+
2.39.5
330+

src/sonic-frr/patch/series

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,4 @@
6060
0098-SRv6-Add-support-for-multiple-SRv6-locators.patch
6161
0099-zebra-Fix-SRv6-explicit-SID-allocation-to-use-the-provided-locator.patch
6262
0100-bgpd-Allow-proper-shutdown-of-bgp-dynamic-peers.patch
63+
0101-bgpd-send-EOR-during-GR-only-when-FIB-install-comple.patch

0 commit comments

Comments
 (0)