Skip to content

Commit 596d88c

Browse files
authored
Added Change for given Route ECMP to fallback on Default Route ECMP (#3389)
* Added Change for given Route ECMP to fallback on Default Route ECMP (#3389) What I did: Added Change for given Route ECMP to fallback on Default Route ECMP. When all the Members of Route are Link Down and if route is eligible for fallback to default route the ECMP Member in SAI Nexthop Goup are updated to the Default Route Nexthop/Nexthop's Members. This change does not take care of this scenarios: When the Route which is fallback on Default Route Nexthops if the original nexthop become active [link comes up] it does not move back to original path. Reason is we except this should transient case as the Route which is fallback should get deleted once all the links are down If Default Routes gets updated [BGP Updates] or if default Route nexthops become link down we do not update ECMP members of Routes that are already fallback to default. Again Reason being Route which is fallback should get deleted once all the links are down and is during this short window getting default routes update is very corner case. We can optimize if needed.
1 parent d35c1c7 commit 596d88c

4 files changed

Lines changed: 461 additions & 11 deletions

File tree

orchagent/routeorch.cpp

Lines changed: 176 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,71 @@ void RouteOrch::detach(Observer *observer, const IpAddress& dstAddr, sai_object_
383383
}
384384
}
385385

386+
void RouteOrch::updateDefaultRouteSwapSet(const NextHopGroupKey default_nhg_key, std::set<NextHopKey>& active_default_route_nhops)
387+
{
388+
std::set<NextHopKey> current_default_route_nhops;
389+
current_default_route_nhops.clear();
390+
391+
if (default_nhg_key.getSize() == 1)
392+
{
393+
current_default_route_nhops.insert(*default_nhg_key.getNextHops().begin());
394+
}
395+
else
396+
{
397+
auto nhgm = m_syncdNextHopGroups[default_nhg_key].nhopgroup_members;
398+
for (auto nhop = nhgm.begin(); nhop != nhgm.end(); ++nhop)
399+
{
400+
current_default_route_nhops.insert(nhop->first);
401+
}
402+
}
403+
404+
active_default_route_nhops.clear();
405+
std::copy(current_default_route_nhops.begin(), current_default_route_nhops.end(), std::inserter(active_default_route_nhops, active_default_route_nhops.begin()));
406+
}
407+
408+
bool RouteOrch::addDefaultRouteNexthopsInNextHopGroup(NextHopGroupEntry& original_next_hop_group, std::set<NextHopKey>& default_route_next_hop_set)
409+
{
410+
/* In the function we update the member of existing NexthopGroup to the Default Route Nexthop's */
411+
SWSS_LOG_ENTER();
412+
sai_object_id_t nexthop_group_member_id;
413+
sai_status_t status;
414+
415+
for (auto it : default_route_next_hop_set)
416+
{
417+
vector<sai_attribute_t> nhgm_attrs;
418+
sai_attribute_t nhgm_attr;
419+
nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID;
420+
nhgm_attr.value.oid = original_next_hop_group.next_hop_group_id;
421+
nhgm_attrs.push_back(nhgm_attr);
422+
423+
nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID;
424+
nhgm_attr.value.oid = m_neighOrch->getNextHopId(it);
425+
nhgm_attrs.push_back(nhgm_attr);
426+
427+
status = sai_next_hop_group_api->create_next_hop_group_member(&nexthop_group_member_id, gSwitchId,
428+
(uint32_t)nhgm_attrs.size(),
429+
nhgm_attrs.data());
430+
431+
if (status != SAI_STATUS_SUCCESS)
432+
{
433+
SWSS_LOG_ERROR("Default Route Swap Failed to add next hop member to group %" PRIx64 ": %d\n",
434+
original_next_hop_group.next_hop_group_id, status);
435+
task_process_status handle_status = handleSaiCreateStatus(SAI_API_NEXT_HOP_GROUP, status);
436+
if (handle_status != task_success)
437+
{
438+
return parseHandleSaiStatusFailure(handle_status);
439+
}
440+
}
441+
// Increment the Default Route Active NH Reference Count
442+
m_neighOrch->increaseNextHopRefCount(it);
443+
gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER);
444+
original_next_hop_group.default_route_nhopgroup_members[it].next_hop_id = nexthop_group_member_id;
445+
original_next_hop_group.default_route_nhopgroup_members[it].seq_id = 0;
446+
original_next_hop_group.is_default_route_nh_swap = true;
447+
}
448+
return true;
449+
}
450+
386451
bool RouteOrch::validnexthopinNextHopGroup(const NextHopKey &nexthop, uint32_t& count)
387452
{
388453
SWSS_LOG_ENTER();
@@ -400,6 +465,13 @@ bool RouteOrch::validnexthopinNextHopGroup(const NextHopKey &nexthop, uint32_t&
400465
continue;
401466
}
402467

468+
// Route NHOP Group is swapped by default route nh memeber . do not add Nexthop again.
469+
// Wait for Nexthop Group Cleanup
470+
if (nhopgroup->second.is_default_route_nh_swap)
471+
{
472+
continue;
473+
}
474+
403475
vector<sai_attribute_t> nhgm_attrs;
404476
sai_attribute_t nhgm_attr;
405477

@@ -446,6 +518,9 @@ bool RouteOrch::validnexthopinNextHopGroup(const NextHopKey &nexthop, uint32_t&
446518
++count;
447519
gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER);
448520
nhopgroup->second.nhopgroup_members[nexthop].next_hop_id = nexthop_id;
521+
/* Keep the count of number of nexthop members are present in Nexthop Group
522+
* when the links became active again*/
523+
nhopgroup->second.nh_member_install_count++;
449524
}
450525

451526
if (!m_fgNhgOrch->validNextHopInNextHopGroup(nexthop))
@@ -473,6 +548,14 @@ bool RouteOrch::invalidnexthopinNextHopGroup(const NextHopKey &nexthop, uint32_t
473548
continue;
474549
}
475550

551+
// Route NHOP Group is already swapped by default route nh memeber . do not delete actual nexthop again.
552+
553+
if (nhopgroup->second.is_default_route_nh_swap)
554+
{
555+
continue;
556+
}
557+
558+
476559
nexthop_id = nhopgroup->second.nhopgroup_members[nexthop].next_hop_id;
477560
status = sai_next_hop_group_api->remove_next_hop_group_member(nexthop_id);
478561

@@ -486,7 +569,24 @@ bool RouteOrch::invalidnexthopinNextHopGroup(const NextHopKey &nexthop, uint32_t
486569
return parseHandleSaiStatusFailure(handle_status);
487570
}
488571
}
489-
572+
// Reduce the member install count when links down
573+
if (nhopgroup->second.nh_member_install_count)
574+
{
575+
nhopgroup->second.nh_member_install_count--;
576+
}
577+
// Nexthop Group member count has become zero so swap it's memebers with default route
578+
// nexthop's if this route is eligible for such a swap
579+
if (nhopgroup->second.nh_member_install_count == 0 && nhopgroup->second.eligible_for_default_route_nh_swap && !nhopgroup->second.is_default_route_nh_swap)
580+
{
581+
if(nexthop.ip_address.isV4())
582+
{
583+
addDefaultRouteNexthopsInNextHopGroup(nhopgroup->second, v4_active_default_route_nhops);
584+
}
585+
else
586+
{
587+
addDefaultRouteNexthopsInNextHopGroup(nhopgroup->second, v6_active_default_route_nhops);
588+
}
589+
}
490590
++count;
491591
gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER);
492592
}
@@ -638,6 +738,7 @@ void RouteOrch::doTask(Consumer& consumer)
638738
bool srv6_seg = false;
639739
bool srv6_vpn = false;
640740
bool srv6_nh = false;
741+
bool fallback_to_default_route = false;
641742

642743
for (auto i : kfvFieldsValues(t))
643744
{
@@ -683,6 +784,9 @@ void RouteOrch::doTask(Consumer& consumer)
683784
ctx.protocol = fvValue(i);
684785
}
685786

787+
if (fvField(i) == "fallback_to_default_route")
788+
fallback_to_default_route = fvValue(i) == "true";
789+
686790
if (fvField(i) == "vpn_sid") {
687791
srv6_vpn_sids = fvValue(i);
688792
srv6_nh = true;
@@ -707,6 +811,7 @@ void RouteOrch::doTask(Consumer& consumer)
707811
continue;
708812
}
709813

814+
ctx.fallback_to_default_route = fallback_to_default_route;
710815
ctx.nhg_index = nhg_index;
711816
ctx.context_index = context_index;
712817

@@ -1003,6 +1108,8 @@ void RouteOrch::doTask(Consumer& consumer)
10031108
// Go through the bulker results
10041109
auto it_prev = consumer.m_toSync.begin();
10051110
m_bulkNhgReducedRefCnt.clear();
1111+
NextHopGroupKey v4_default_nhg_key;
1112+
NextHopGroupKey v6_default_nhg_key;
10061113
m_bulkSrv6NhgReducedVec.clear();
10071114

10081115
while (it_prev != it)
@@ -1068,6 +1175,20 @@ void RouteOrch::doTask(Consumer& consumer)
10681175
it_prev = consumer.m_toSync.erase(it_prev);
10691176
else
10701177
it_prev++;
1178+
1179+
// Save the Default Route of Default VRF to be used for
1180+
// enabling fallback to it as needed
1181+
if (ip_prefix.isDefaultRoute() && vrf_id == gVirtualRouterId)
1182+
{
1183+
if (ip_prefix.isV4())
1184+
{
1185+
v4_default_nhg_key = getSyncdRouteNhgKey(gVirtualRouterId, ip_prefix);
1186+
}
1187+
else
1188+
{
1189+
v6_default_nhg_key = getSyncdRouteNhgKey(gVirtualRouterId, ip_prefix);
1190+
}
1191+
}
10711192
}
10721193
}
10731194
else if (op == DEL_COMMAND)
@@ -1089,16 +1210,31 @@ void RouteOrch::doTask(Consumer& consumer)
10891210
}
10901211
else if (m_syncdNextHopGroups[it_nhg.first].ref_count == 0)
10911212
{
1092-
removeNextHopGroup(it_nhg.first);
1213+
// Pass the flag to indicate if the NextHop Group as Default Route NH Members as swapped.
1214+
removeNextHopGroup(it_nhg.first, m_syncdNextHopGroups[it_nhg.first].is_default_route_nh_swap);
10931215
}
10941216
}
1095-
10961217
/* Reduce reference for srv6 next hop group */
10971218
/* Later delete for increase refcnt early */
10981219
if (!m_bulkSrv6NhgReducedVec.empty())
10991220
{
11001221
m_srv6Orch->removeSrv6Nexthops(m_bulkSrv6NhgReducedVec);
11011222
}
1223+
/* No Update to Default Route so we can return */
1224+
if (!(v4_default_nhg_key.getSize()) && !(v6_default_nhg_key.getSize()))
1225+
{
1226+
return;
1227+
}
1228+
/* Update to v4 Default Route so update the data structure */
1229+
if (v4_default_nhg_key.getSize())
1230+
{
1231+
updateDefaultRouteSwapSet(v4_default_nhg_key, v4_active_default_route_nhops);
1232+
}
1233+
/* Update to v6 Default Route so update the data structure */
1234+
if (v6_default_nhg_key.getSize())
1235+
{
1236+
updateDefaultRouteSwapSet(v6_default_nhg_key, v6_active_default_route_nhops);
1237+
}
11021238
}
11031239
}
11041240

@@ -1397,6 +1533,7 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops)
13971533

13981534
NextHopGroupEntry next_hop_group_entry;
13991535
next_hop_group_entry.next_hop_group_id = next_hop_group_id;
1536+
next_hop_group_entry.nh_member_install_count = 0;
14001537

14011538
size_t npid_count = next_hop_ids.size();
14021539
vector<sai_object_id_t> nhgm_ids(npid_count);
@@ -1467,6 +1604,8 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops)
14671604
{
14681605
next_hop_group_entry.nhopgroup_members[nhopgroup_members_set.find(nhid)->second].next_hop_id = nhgm_id;
14691606
next_hop_group_entry.nhopgroup_members[nhopgroup_members_set.find(nhid)->second].seq_id = ((uint32_t)i) + 1;
1607+
/* Keep the count of number of nexthop members are present in Nexthop Group*/
1608+
next_hop_group_entry.nh_member_install_count++;
14701609
}
14711610
}
14721611

@@ -1484,7 +1623,7 @@ bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops)
14841623
return true;
14851624
}
14861625

1487-
bool RouteOrch::removeNextHopGroup(const NextHopGroupKey &nexthops)
1626+
bool RouteOrch::removeNextHopGroup(const NextHopGroupKey &nexthops, const bool is_default_route_nh_swap)
14881627
{
14891628
SWSS_LOG_ENTER();
14901629

@@ -1505,10 +1644,15 @@ bool RouteOrch::removeNextHopGroup(const NextHopGroupKey &nexthops)
15051644
SWSS_LOG_NOTICE("Delete next hop group %s", nexthops.to_string().c_str());
15061645

15071646
vector<sai_object_id_t> next_hop_ids;
1508-
auto& nhgm = next_hop_group_entry->second.nhopgroup_members;
1647+
/* If the NexthopGroup is the one that has been swapped with default route members
1648+
* than when deleting such Nexthop Group we have to remove default route nexthop group members */
1649+
auto& nhgm = is_default_route_nh_swap ? next_hop_group_entry->second.default_route_nhopgroup_members : next_hop_group_entry->second.nhopgroup_members;
15091650
for (auto nhop = nhgm.begin(); nhop != nhgm.end();)
15101651
{
1511-
if (m_neighOrch->isNextHopFlagSet(nhop->first, NHFLAGS_IFDOWN))
1652+
/* This check we skip for Nexthop Group that has been swapped
1653+
* as Nexthop Group Members are not original member which are already removed
1654+
* as part of API invalidnexthopinNextHopGroup */
1655+
if (m_neighOrch->isNextHopFlagSet(nhop->first, NHFLAGS_IFDOWN) && (!is_default_route_nh_swap))
15121656
{
15131657
SWSS_LOG_WARN("NHFLAGS_IFDOWN set for next hop group member %s with next_hop_id %" PRIx64,
15141658
nhop->first.to_string().c_str(), nhop->second.next_hop_id);
@@ -1587,6 +1731,16 @@ bool RouteOrch::removeNextHopGroup(const NextHopGroupKey &nexthops)
15871731
}
15881732
}
15891733

1734+
// Decrement Nexthop Reference Count for Default Route NH Member used as swapped
1735+
if (is_default_route_nh_swap)
1736+
{
1737+
auto& nhgm = next_hop_group_entry->second.default_route_nhopgroup_members;
1738+
for (auto nhop = nhgm.begin(); nhop != nhgm.end(); ++nhop)
1739+
{
1740+
m_neighOrch->decreaseNextHopRefCount(nhop->first);
1741+
}
1742+
}
1743+
15901744
m_syncdNextHopGroups.erase(nexthops);
15911745

15921746
return true;
@@ -2008,6 +2162,13 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops)
20082162
/* Return false since the original route is not successfully added */
20092163
return false;
20102164
}
2165+
else
2166+
{
2167+
/* Nexthop Creation Successful. So the save the state if eligible to fallback to default route
2168+
* based on APP_DB value for the route. Also initialize the present to False as swap did not happen */
2169+
m_syncdNextHopGroups[nextHops].eligible_for_default_route_nh_swap = ctx.fallback_to_default_route;
2170+
m_syncdNextHopGroups[nextHops].is_default_route_nh_swap = false;
2171+
}
20112172
}
20122173

20132174
next_hop_id = m_syncdNextHopGroups[nextHops].next_hop_group_id;
@@ -2610,6 +2771,15 @@ bool RouteOrch::removeRoutePost(const RouteBulkContext& ctx)
26102771
updateDefRouteState(ipPrefix.to_string());
26112772

26122773
SWSS_LOG_INFO("Set route %s next hop ID to NULL", ipPrefix.to_string().c_str());
2774+
2775+
if (ipPrefix.isV4())
2776+
{
2777+
v4_active_default_route_nhops.clear();
2778+
}
2779+
else
2780+
{
2781+
v6_active_default_route_nhops.clear();
2782+
}
26132783
}
26142784
else
26152785
{

orchagent/routeorch.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ struct NextHopGroupEntry
4040
sai_object_id_t next_hop_group_id; // next hop group id
4141
int ref_count; // reference count
4242
NextHopGroupMembers nhopgroup_members; // ids of members indexed by <ip_address, if_alias>
43+
NextHopGroupMembers default_route_nhopgroup_members; // ids of members indexed by <ip_address, if_alias>
44+
uint32_t nh_member_install_count;
45+
bool eligible_for_default_route_nh_swap;
46+
bool is_default_route_nh_swap;
4347
};
4448

4549
struct NextHopUpdate
@@ -125,6 +129,7 @@ struct RouteBulkContext
125129
bool excp_intfs_flag;
126130
// using_temp_nhg will track if the NhgOrch's owned NHG is temporary or not
127131
bool using_temp_nhg;
132+
bool fallback_to_default_route;
128133
std::vector<string> ipv;
129134
std::vector<string> alsv;
130135
std::vector<string> vni_labelv;
@@ -136,7 +141,8 @@ struct RouteBulkContext
136141
bool is_set; // True if set operation
137142

138143
RouteBulkContext(const std::string& key, bool is_set)
139-
: key(key), excp_intfs_flag(false), using_temp_nhg(false), is_set(is_set)
144+
: key(key), excp_intfs_flag(false), using_temp_nhg(false), is_set(is_set),
145+
fallback_to_default_route(false)
140146
{
141147
}
142148

@@ -155,6 +161,7 @@ struct RouteBulkContext
155161
using_temp_nhg = false;
156162
key.clear();
157163
protocol.clear();
164+
fallback_to_default_route = false;
158165
}
159166
};
160167

@@ -206,12 +213,13 @@ class RouteOrch : public Orch, public Subject
206213
bool isRefCounterZero(const NextHopGroupKey&) const;
207214

208215
bool addNextHopGroup(const NextHopGroupKey&);
209-
bool removeNextHopGroup(const NextHopGroupKey&);
216+
bool removeNextHopGroup(const NextHopGroupKey&, const bool is_default_route_nh_swap=false);
210217

211218
void addNextHopRoute(const NextHopKey&, const RouteKey&);
212219
void removeNextHopRoute(const NextHopKey&, const RouteKey&);
213220
bool updateNextHopRoutes(const NextHopKey&, uint32_t&);
214221
bool getRoutesForNexthop(std::set<RouteKey>&, const NextHopKey&);
222+
bool swapnexthopinNextHopGroup(sai_object_id_t next_hop_group_id, sai_object_id_t default_next_hop_id);
215223

216224
bool validnexthopinNextHopGroup(const NextHopKey&, uint32_t&);
217225
bool invalidnexthopinNextHopGroup(const NextHopKey&, uint32_t&);
@@ -251,6 +259,8 @@ class RouteOrch : public Orch, public Subject
251259
unsigned int m_maxNextHopGroupCount;
252260
bool m_resync;
253261

262+
std::set<NextHopKey> v4_active_default_route_nhops;
263+
std::set<NextHopKey> v6_active_default_route_nhops;
254264
shared_ptr<DBConnector> m_stateDb;
255265
unique_ptr<swss::Table> m_stateDefaultRouteTb;
256266

@@ -296,6 +306,8 @@ class RouteOrch : public Orch, public Subject
296306
bool isVipRoute(const IpPrefix &ipPrefix, const NextHopGroupKey &nextHops);
297307
void createVipRouteSubnetDecapTerm(const IpPrefix &ipPrefix);
298308
void removeVipRouteSubnetDecapTerm(const IpPrefix &ipPrefix);
309+
bool addDefaultRouteNexthopsInNextHopGroup(NextHopGroupEntry& original_next_hop_group, std::set<NextHopKey>& default_route_next_hop_set);
310+
void updateDefaultRouteSwapSet(const NextHopGroupKey default_nhg_key, std::set<NextHopKey>& active_default_route_nhops);
299311
void incNhgRefCount(const std::string& nhg_index, const std::string &context_index = "");
300312
void decNhgRefCount(const std::string& nhg_index, const std::string &context_index = "");
301313
};

0 commit comments

Comments
 (0)