Skip to content

Commit 85dc667

Browse files
vivek-cumulusPdoijode
authored andcommitted
*: Add and use option for graceful (re)start
Add a new start option "-K" to libfrr to denote a graceful start, and use it in zebra and bgpd. zebra will use this option to denote a planned FRR graceful restart (supporting only bgpd currently) to wait for a route sync completion from bgpd before cleaning up old stale routes from the FIB. An optional timer provides an upper-bounds for this cleanup. bgpd will use this option to denote either a planned FRR graceful restart or a bgpd-only graceful restart, and this will drive the BGP GR restarting router procedures. Signed-off-by: Vivek Venkatraman <[email protected]>
1 parent 5533e14 commit 85dc667

18 files changed

Lines changed: 217 additions & 67 deletions

bgpd/bgp_main.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,9 @@ int main(int argc, char **argv)
516516
bgp_option_set(BGP_OPT_NO_FIB);
517517
if (no_zebra_flag)
518518
bgp_option_set(BGP_OPT_NO_ZEBRA);
519-
SET_FLAG(bm->flags, BM_FLAG_GRACEFUL_RESTART);
519+
if (bgpd_di.graceful_restart) {
520+
SET_FLAG(bm->flags, BM_FLAG_GRACEFUL_RESTART);
521+
}
520522
bgp_error_init();
521523
/* Initializations. */
522524
libagentx_init();

lib/libfrr.c

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -102,37 +102,41 @@ static void opt_extend(const struct optspec *os)
102102
#define OPTION_SCRIPTDIR 1009
103103

104104
static const struct option lo_always[] = {
105-
{"help", no_argument, NULL, 'h'},
106-
{"version", no_argument, NULL, 'v'},
107-
{"daemon", no_argument, NULL, 'd'},
108-
{"module", no_argument, NULL, 'M'},
109-
{"profile", required_argument, NULL, 'F'},
110-
{"pathspace", required_argument, NULL, 'N'},
111-
{"vrfdefaultname", required_argument, NULL, 'o'},
112-
{"vty_socket", required_argument, NULL, OPTION_VTYSOCK},
113-
{"moduledir", required_argument, NULL, OPTION_MODULEDIR},
114-
{"scriptdir", required_argument, NULL, OPTION_SCRIPTDIR},
115-
{"log", required_argument, NULL, OPTION_LOG},
116-
{"log-level", required_argument, NULL, OPTION_LOGLEVEL},
117-
{"command-log-always", no_argument, NULL, OPTION_LOGGING},
118-
{"limit-fds", required_argument, NULL, OPTION_LIMIT_FDS},
119-
{NULL}};
105+
{ "help", no_argument, NULL, 'h' },
106+
{ "version", no_argument, NULL, 'v' },
107+
{ "daemon", no_argument, NULL, 'd' },
108+
{ "module", no_argument, NULL, 'M' },
109+
{ "profile", required_argument, NULL, 'F' },
110+
{ "pathspace", required_argument, NULL, 'N' },
111+
{ "vrfdefaultname", required_argument, NULL, 'o' },
112+
{ "graceful_restart", optional_argument, NULL, 'K' },
113+
{ "vty_socket", required_argument, NULL, OPTION_VTYSOCK },
114+
{ "moduledir", required_argument, NULL, OPTION_MODULEDIR },
115+
{ "scriptdir", required_argument, NULL, OPTION_SCRIPTDIR },
116+
{ "log", required_argument, NULL, OPTION_LOG },
117+
{ "log-level", required_argument, NULL, OPTION_LOGLEVEL },
118+
{ "command-log-always", no_argument, NULL, OPTION_LOGGING },
119+
{ "limit-fds", required_argument, NULL, OPTION_LIMIT_FDS },
120+
{ NULL }
121+
};
120122
static const struct optspec os_always = {
121-
"hvdM:F:N:o:",
123+
"hvdM:F:N:o:K::",
122124
" -h, --help Display this help and exit\n"
123125
" -v, --version Print program version\n"
124126
" -d, --daemon Runs in daemon mode\n"
125127
" -M, --module Load specified module\n"
126128
" -F, --profile Use specified configuration profile\n"
127129
" -N, --pathspace Insert prefix into config & socket paths\n"
128130
" -o, --vrfdefaultname Set default VRF name.\n"
131+
" -K, --graceful_restart FRR starting in Graceful Restart mode, with optional route-cleanup timer\n"
129132
" --vty_socket Override vty socket path\n"
130133
" --moduledir Override modules directory\n"
131134
" --scriptdir Override scripts directory\n"
132135
" --log Set Logging to stdout, syslog, or file:<name>\n"
133136
" --log-level Set Logging Level to use, debug, info, warn, etc\n"
134137
" --limit-fds Limit number of fds supported\n",
135-
lo_always};
138+
lo_always
139+
};
136140

137141
static bool logging_to_stdout = false; /* set when --log stdout specified */
138142

@@ -358,6 +362,8 @@ void frr_preinit(struct frr_daemon_info *daemon, int argc, char **argv)
358362
strlcpy(frr_protonameinst, di->logname, sizeof(frr_protonameinst));
359363

360364
di->cli_mode = FRR_CLI_CLASSIC;
365+
di->graceful_restart = false;
366+
di->gr_cleanup_time = 0;
361367

362368
/* we may be starting with extra FDs open for whatever purpose,
363369
* e.g. logging, some module, etc. Recording them here allows later
@@ -520,6 +526,11 @@ static int frr_opt(int opt)
520526
di->db_file = optarg;
521527
break;
522528
#endif
529+
case 'K':
530+
di->graceful_restart = true;
531+
if (optarg)
532+
di->gr_cleanup_time = atoi(optarg);
533+
break;
523534
case 'C':
524535
if (di->flags & FRR_NO_SPLIT_CONFIG)
525536
return 1;

lib/libfrr.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ struct frr_daemon_info {
118118
bool dryrun;
119119
bool daemon_mode;
120120
bool terminal;
121+
bool graceful_restart;
122+
int gr_cleanup_time;
121123
enum frr_cli_mode cli_mode;
122124

123125
struct event *read_in;

tests/topotests/bgp_gr_functionality_topo1/test_bgp_gr_functionality_topo1-1.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1047,8 +1047,9 @@ def test_BGP_GR_TC_4_p0(request):
10471047
tc_name, result
10481048
)
10491049

1050+
#R-bit must not be set if the helper node R2 restarts
10501051
result = verify_r_bit(tgen, topo, addr_type, input_dict, dut="r1", peer="r2")
1051-
assert result is True, "Testcase {} : Failed \n Error {}".format(
1052+
assert result is not True, "Testcase {} : Failed \n Error {}".format(
10521053
tc_name, result
10531054
)
10541055

@@ -1209,7 +1210,11 @@ def test_BGP_GR_TC_5_1_2_p1(request):
12091210
logger.info("[Phase 2] : Restart BGPd on router R2. ")
12101211
kill_router_daemons(tgen, "r2", ["bgpd"])
12111212

1213+
#Start BGP with -K option to start BGP gracefully
1214+
tgen.net["r2"].daemons_options["bgpd"] = "-K "
12121215
start_router_daemons(tgen, "r2", ["bgpd"])
1216+
#Unset -K after starting BGP
1217+
tgen.net["r2"].daemons_options["bgpd"] = ""
12131218

12141219
logger.info("[Phase 4] : R2 is UP now, so time to collect GR stats ")
12151220

tests/topotests/bgp_gr_functionality_topo1/test_bgp_gr_functionality_topo1-2.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,12 @@ def test_BGP_GR_TC_8_p1(request):
361361
kill_router_daemons(tgen, "r1", ["bgpd"])
362362

363363
logger.info("[Phase 3] : R1 is about to come up now ")
364+
#Set -K option to start BGP gracefully
365+
tgen.net["r1"].daemons_options["bgpd"] = "-K "
366+
364367
start_router_daemons(tgen, "r1", ["bgpd"])
368+
#Unset -K after starting BGP
369+
tgen.net["r1"].daemons_options["bgpd"] = ""
365370

366371
logger.info("[Phase 4] : R2 is UP now, so time to collect GR stats ")
367372

tests/topotests/bgp_gr_functionality_topo1/test_bgp_gr_functionality_topo1-3.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,7 +1577,12 @@ def test_BGP_GR_TC_9_p1(request):
15771577
)
15781578

15791579
logger.info("[Phase 5] : R2 is about to come up now ")
1580+
#Set -K option to start BGP gracefully
1581+
tgen.net["r2"].daemons_options["bgpd"] = "-K "
1582+
15801583
start_router_daemons(tgen, "r2", ["bgpd"])
1584+
#Unset -K after starting BGP
1585+
tgen.net["r2"].daemons_options["bgpd"] = ""
15811586

15821587
logger.info("[Phase 4] : R2 is UP now, so time to collect GR stats ")
15831588

tests/topotests/bgp_gr_functionality_topo2/test_bgp_gr_functionality_topo2-2.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,12 @@ def test_BGP_GR_chaos_28_p1(request):
548548
logger.info("[Step 3] : Start BGPd daemon on R1..")
549549

550550
# Start BGPd daemon on R1
551+
#Set -K option to start BGP gracefully
552+
tgen.net["r1"].daemons_options["bgpd"] = "-K "
551553
start_router_daemons(tgen, "r1", ["bgpd"])
554+
#Unset -K after starting BGP
555+
tgen.net["r1"].daemons_options["bgpd"] = ""
556+
552557

553558
logger.info("[Step 4] : Start BGPd daemon on R3..")
554559

tests/topotests/bgp_gr_functionality_topo2/test_bgp_gr_functionality_topo2-3.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,11 @@ def test_BGP_GR_chaos_37_p1(request):
725725
logger.info("[Step 4] : Start BGPd daemon on R1..")
726726

727727
# Start BGPd daemon on R1
728+
#Set -K option to start BGP gracefully
729+
tgen.net["r1"].daemons_options["bgpd"] = "-K "
728730
start_router_daemons(tgen, "r1", ["bgpd"])
731+
#Unset -K after starting BGP
732+
tgen.net["r1"].daemons_options["bgpd"] = ""
729733

730734
logger.info("[Step 5] : Kill BGPd daemon on R3..")
731735

@@ -742,14 +746,19 @@ def test_BGP_GR_chaos_37_p1(request):
742746

743747
# Start BGPd daemon on R3
744748
start_router_daemons(tgen, "r3", ["bgpd"])
745-
749+
746750
for addr_type in ADDR_TYPES:
747751
# Verify r_bit
748-
result = verify_r_bit(tgen, topo, addr_type, input_dict, dut="r1", peer="r3")
752+
result = verify_r_bit(tgen, topo, addr_type, input_dict, dut="r3", peer="r1")
749753
assert result is True, "Testcase {} : Failed \n Error {}".format(
750754
tc_name, result
751755
)
752756

757+
result = verify_r_bit(tgen, topo, addr_type, input_dict, dut="r1", peer="r3")
758+
assert result is not True, "Testcase {} : Failed \n Error {}".format(
759+
tc_name, result
760+
)
761+
753762
# Verifying RIB routes
754763
input_dict_1 = {key: topo["routers"][key] for key in ["r3"]}
755764
result = verify_rib(tgen, addr_type, dut, input_dict_1)

zebra/main.c

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,6 @@ struct mgmt_be_client *mgmt_be_client;
6565
/* Route retain mode flag. */
6666
int retain_mode = 0;
6767

68-
int graceful_restart;
69-
7068
/* Receive buffer size for kernel control sockets */
7169
#define RCVBUFSIZE_MIN 4194304
7270
#ifdef HAVE_NETLINK
@@ -88,15 +86,14 @@ const struct option longopts[] = {
8886
{ "socket", required_argument, NULL, 'z' },
8987
{ "ecmp", required_argument, NULL, 'e' },
9088
{ "retain", no_argument, NULL, 'r' },
91-
{ "graceful_restart", required_argument, NULL, 'K' },
9289
{ "asic-offload", optional_argument, NULL, OPTION_ASIC_OFFLOAD },
9390
{ "v6-with-v4-nexthops", no_argument, NULL, OPTION_V6_WITH_V4_NEXTHOP },
9491
#ifdef HAVE_NETLINK
9592
{ "vrfwnetns", no_argument, NULL, 'n' },
9693
{ "nl-bufsize", required_argument, NULL, 's' },
9794
{ "v6-rr-semantics", no_argument, NULL, OPTION_V6_RR_SEMANTICS },
9895
#endif /* HAVE_NETLINK */
99-
{"routing-table", optional_argument, NULL, 'R'},
96+
{ "routing-table", optional_argument, NULL, 'R' },
10097
{ 0 }
10198
};
10299

@@ -326,7 +323,6 @@ int main(int argc, char **argv)
326323
bool v6_with_v4_nexthop = false;
327324
bool notify_on_ack = true;
328325

329-
graceful_restart = 0;
330326
vrf_configure_backend(VRF_BACKEND_VRF_LITE);
331327

332328
frr_preinit(&zebra_di, argc, argv);
@@ -342,7 +338,6 @@ int main(int argc, char **argv)
342338
" -z, --socket Set path of zebra socket\n"
343339
" -e, --ecmp Specify ECMP to use.\n"
344340
" -r, --retain When program terminates, retain added route by zebra.\n"
345-
" -K, --graceful_restart Graceful restart at the kernel level, timer in seconds for expiration\n"
346341
" -A, --asic-offload FRR is interacting with an asic underneath the linux kernel\n"
347342
" --v6-with-v4-nexthops Underlying dataplane supports v6 routes with v4 nexthops"
348343
#ifdef HAVE_NETLINK
@@ -352,8 +347,7 @@ int main(int argc, char **argv)
352347
#else
353348
" -s, Set kernel socket receive buffer size\n"
354349
#endif /* HAVE_NETLINK */
355-
" -R, --routing-table Set kernel routing table\n"
356-
);
350+
" -R, --routing-table Set kernel routing table\n");
357351

358352
while (1) {
359353
int opt = frr_getopt(argc, argv, NULL);
@@ -397,9 +391,6 @@ int main(int argc, char **argv)
397391
case 'r':
398392
retain_mode = 1;
399393
break;
400-
case 'K':
401-
graceful_restart = atoi(optarg);
402-
break;
403394
case 's':
404395
rcvbufsize = atoi(optarg);
405396
if (rcvbufsize < RCVBUFSIZE_MIN)
@@ -488,11 +479,25 @@ int main(int argc, char **argv)
488479
* Clean up zebra-originated routes. The requests will be sent to OS
489480
* immediately, so originating PID in notifications from kernel
490481
* will be equal to the current getpid(). To know about such routes,
491-
* we have to have route_read() called before.
482+
* we have to have route_read() called before.
483+
* If FRR is gracefully restarting, we either wait for clients
484+
* (e.g., BGP) to signal GR is complete else we wait for specified
485+
* duration.
492486
*/
493487
zrouter.startup_time = monotime(NULL);
494-
event_add_timer(zrouter.master, rib_sweep_route, NULL, graceful_restart,
495-
&zrouter.sweeper);
488+
zrouter.rib_sweep_time = 0;
489+
zrouter.graceful_restart = zebra_di.graceful_restart;
490+
if (!zrouter.graceful_restart)
491+
event_add_timer(zrouter.master, rib_sweep_route, NULL, 0, NULL);
492+
else {
493+
int gr_cleanup_time;
494+
495+
gr_cleanup_time = zebra_di.gr_cleanup_time
496+
? zebra_di.gr_cleanup_time
497+
: ZEBRA_GR_DEFAULT_RIB_SWEEP_TIME;
498+
event_add_timer(zrouter.master, rib_sweep_route, NULL,
499+
gr_cleanup_time, &zrouter.t_rib_sweep);
500+
}
496501

497502
/* Needed for BSD routing socket. */
498503
pid = getpid();

zebra/rib.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -622,10 +622,10 @@ static inline struct nexthop_group *rib_get_fib_backup_nhg(
622622
}
623623

624624
extern void zebra_gr_process_client(afi_t afi, vrf_id_t vrf_id, uint8_t proto,
625-
uint8_t instance);
625+
uint8_t instance, time_t restart_time);
626626

627627
extern int rib_add_gr_run(afi_t afi, vrf_id_t vrf_id, uint8_t proto,
628-
uint8_t instance);
628+
uint8_t instance, time_t restart_time);
629629

630630
extern void zebra_vty_init(void);
631631

0 commit comments

Comments
 (0)