Skip to content

Commit 312f41a

Browse files
committed
[muxorch] handling multiple mux nexthops for route
What I did: added logic to handle when a route points to a nexthop group with mux neighbors. In this case, only one active neighbor, or the tunnel nexthop will be programmed to the ASIC. Why I did it: having a route with multiple mux neighbors caused a data loop which lead to packet loss when different neighbors were in different states. How I did it: added logic to update routes when a route is changed, a mux neighbor is changed, or there is a mux state change. HLD: sonic-net/SONiC#1256 Signed-off-by: Nikola Dancejic <[email protected]>
1 parent 7f03db2 commit 312f41a

File tree

5 files changed

+223
-33
lines changed

5 files changed

+223
-33
lines changed

orchagent/muxorch.cpp

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -973,6 +973,127 @@ sai_object_id_t MuxOrch::getNextHopTunnelId(std::string tunnelKey, IpAddress& ip
973973
return it->second.nh_id;
974974
}
975975

976+
/**
977+
* @brief updates the given route to point to a single active NH or tunnel
978+
* @param pfx IpPrefix of route to update
979+
*/
980+
void MuxOrch::updateRoute(const IpPrefix &pfx)
981+
{
982+
// Create Route entry
983+
sai_route_entry_t route_entry;
984+
route_entry.switch_id = gSwitchId;
985+
route_entry.vr_id = gVirtualRouterId;
986+
copy(route_entry.destination, pfx);
987+
subnet(route_entry.destination, route_entry.destination);
988+
989+
// initialize attr
990+
sai_attribute_t attr;
991+
vector<sai_attribute_t> attrs;
992+
attr.id = SAI_ROUTE_ENTRY_ATTR_PACKET_ACTION;
993+
attr.value.s32 = SAI_PACKET_ACTION_FORWARD;
994+
attrs.push_back(attr);
995+
attr.id = SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID;
996+
997+
// initialize route_key to get nexthops
998+
RouteKey route_key;
999+
route_key.vrf_id = gVirtualRouterId;
1000+
route_key.prefix = pfx;
1001+
1002+
// get list of nexthops
1003+
std::set<NextHopKey> nextHops = gRouteOrch->getNextHopsForRoute(route_key);
1004+
if (nextHops.size() > 1)
1005+
{
1006+
/* Multi-nexthop case should only program 1 active or 1 standby nexthop */
1007+
1008+
// If the route already points to an Active neighbor, do nothing.
1009+
RouteTables m_syncdRoutes = gRouteOrch->getSyncdRoutes();
1010+
RouteNhg rtnhg;
1011+
if (m_syncdRoutes.find(gVirtualRouterId) != m_syncdRoutes.end())
1012+
{
1013+
auto it = m_syncdRoutes[gVirtualRouterId].find(pfx);
1014+
if (it != m_syncdRoutes[gVirtualRouterId].end())
1015+
{
1016+
rtnhg = it->second;
1017+
NextHopGroupKey nhg_key = rtnhg.nhg_key;
1018+
std::set<NextHopKey> current_nexthops = nhg_key.getNextHops();
1019+
if (nhg_key.getSize() == 1 &&
1020+
findMuxCableInSubnet(current_nexthops.begin()->ip_address)->isActive())
1021+
{
1022+
SWSS_LOG_INFO("Route %s points to Active neighbor %s",
1023+
pfx.getIp().to_string().c_str(), current_nexthops.begin()->to_string().c_str());
1024+
return;
1025+
}
1026+
else if (gRouteOrch->hasNextHopGroup(nhg_key))
1027+
{
1028+
SWSS_LOG_DEBUG("Removing route %s nexthop group: %s",
1029+
pfx.getIp().to_string().c_str(), nhg_key.to_string().c_str());
1030+
gRouteOrch->removeNextHopGroup(nhg_key);
1031+
}
1032+
}
1033+
}
1034+
1035+
SWSS_LOG_INFO("Updating route %s pointing to Mux neighbors",
1036+
pfx.getIp().to_string().c_str());
1037+
1038+
// Loop through nexthops to find active neighbor
1039+
for (auto nh = nextHops.begin(); nh != nextHops.end(); nh++)
1040+
{
1041+
if (findMuxCableInSubnet(nh->ip_address)->isActive())
1042+
{
1043+
// If we find an active nexthop neighbor, program it to hardware.
1044+
attr.value.oid = gNeighOrch->getNextHopId(*nh);
1045+
attrs.push_back(attr);
1046+
SWSS_LOG_NOTICE("Updating route %s with nexthop: %" PRIx64 "",
1047+
pfx.getIp().to_string().c_str(), attr.value.oid);
1048+
sai_status_t status = sai_route_api->create_route_entry(&route_entry, (uint32_t)attrs.size(), attrs.data());
1049+
if (status != SAI_STATUS_SUCCESS)
1050+
{
1051+
SWSS_LOG_ERROR("Failed to create nexthop route %s,nh %" PRIx64 " rv:%d",
1052+
pfx.getIp().to_string().c_str(), attr.value.oid, status);
1053+
}
1054+
return;
1055+
}
1056+
}
1057+
1058+
// No active neighbor was found. program tunnel route instead
1059+
auto tun = getNextHopTunnelId(MUX_TUNNEL, mux_peer_switch_);
1060+
attr.value.oid = tun;
1061+
attrs.push_back(attr);
1062+
1063+
SWSS_LOG_DEBUG("No Active neighbors found, setting route %s to point to tunnel: %" PRIx64 "",
1064+
pfx.getIp().to_string().c_str(), tun);
1065+
1066+
sai_status_t status = sai_route_api->create_route_entry(&route_entry, (uint32_t)attrs.size(), attrs.data());
1067+
if (status != SAI_STATUS_SUCCESS)
1068+
{
1069+
SWSS_LOG_ERROR("Failed to create tunnel route %s,nh %" PRIx64 " rv:%d",
1070+
pfx.getIp().to_string().c_str(), tun, status);
1071+
return;
1072+
}
1073+
}
1074+
}
1075+
1076+
/**
1077+
* @brief updates all routes pointing to the given cables neighbor list
1078+
* @param cable MuxCable to update
1079+
*/
1080+
void MuxOrch::updateRoutesForCable(MuxCable* cable)
1081+
{
1082+
const MuxNeighbor neighbors = cable->getNeighbors();
1083+
for (auto nh = neighbors.begin(); nh != neighbors.end(); nh ++)
1084+
{
1085+
std::set<RouteKey> routes;
1086+
NextHopKey nhkey(nh->first, cable->getAlias());
1087+
if (gRouteOrch->getRoutesForNexthop(routes, nhkey))
1088+
{
1089+
for (auto rt = routes.begin(); rt != routes.end(); rt++)
1090+
{
1091+
updateRoute(rt->prefix);
1092+
}
1093+
}
1094+
}
1095+
}
1096+
9761097
MuxCable* MuxOrch::findMuxCableInSubnet(IpAddress ip)
9771098
{
9781099
for (auto it = mux_cable_tb_.begin(); it != mux_cable_tb_.end(); it++)
@@ -1086,12 +1207,14 @@ void MuxOrch::updateFdb(const FdbUpdate& update)
10861207
}
10871208
nh->second = update.entry.port_name;
10881209
ptr->updateNeighbor(nh->first, false);
1210+
updateRoutesForCable(ptr);
10891211
}
10901212

10911213
if (isMuxExists(update.entry.port_name))
10921214
{
10931215
ptr = getMuxCable(update.entry.port_name);
10941216
ptr->updateNeighbor(nh->first, true);
1217+
updateRoutesForCable(ptr);
10951218
}
10961219
}
10971220
}
@@ -1141,6 +1264,7 @@ void MuxOrch::updateNeighbor(const NeighborUpdate& update)
11411264
if (ptr->isIpInSubnet(update.entry.ip_address))
11421265
{
11431266
ptr->updateNeighbor(update.entry, update.add);
1267+
updateRoutesForCable(ptr);
11441268
return;
11451269
}
11461270
}
@@ -1181,13 +1305,15 @@ void MuxOrch::updateNeighbor(const NeighborUpdate& update)
11811305
{
11821306
ptr = getMuxCable(old_port);
11831307
ptr->updateNeighbor(update.entry, false);
1308+
updateRoutesForCable(ptr);
11841309
addNexthop(update.entry);
11851310
}
11861311

11871312
if (!port.empty() && isMuxExists(port))
11881313
{
11891314
ptr = getMuxCable(port);
11901315
ptr->updateNeighbor(update.entry, update.add);
1316+
updateRoutesForCable(ptr);
11911317
}
11921318
}
11931319

@@ -1201,6 +1327,30 @@ void MuxOrch::removeNexthop(NextHopKey nh)
12011327
mux_nexthop_tb_.erase(nh);
12021328
}
12031329

1330+
bool MuxOrch::containsNextHop(const NextHopKey& nh)
1331+
{
1332+
return mux_nexthop_tb_.find(nh) != mux_nexthop_tb_.end();
1333+
}
1334+
1335+
/**
1336+
* @brief checks if a given nexthop group belongs to a mux
1337+
* @param nextHops NextHopGroupKey
1338+
* @return true if a mux contains any of the nexthops in the group
1339+
* false if none of the nexthops belong to a mux
1340+
*/
1341+
bool MuxOrch::isMuxNexthops(const NextHopGroupKey& nextHops)
1342+
{
1343+
const std::set<NextHopKey> s_nexthops = nextHops.getNextHops();
1344+
for (auto it = s_nexthops.begin(); it != s_nexthops.end(); it ++)
1345+
{
1346+
if (this->containsNextHop(*it))
1347+
{
1348+
return true;
1349+
}
1350+
}
1351+
return false;
1352+
}
1353+
12041354
string MuxOrch::getNexthopMuxName(NextHopKey nh)
12051355
{
12061356
if (mux_nexthop_tb_.find(nh) == mux_nexthop_tb_.end())
@@ -1574,6 +1724,8 @@ bool MuxCableOrch::addOperation(const Request& request)
15741724
return true;
15751725
}
15761726

1727+
mux_orch->updateRoutesForCable(mux_obj);
1728+
15771729
SWSS_LOG_NOTICE("Mux State set to %s for port %s", state.c_str(), port_name.c_str());
15781730

15791731
return true;

orchagent/muxorch.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ class MuxNbrHandler
7272
void update(NextHopKey nh, sai_object_id_t, bool = true, MuxState = MuxState::MUX_STATE_INIT);
7373

7474
sai_object_id_t getNextHopId(const NextHopKey);
75+
const MuxNeighbor getNeighbors() const {return neighbors_;}
76+
const string getAlias() const {return alias_;}
7577

7678
private:
7779
inline void updateTunnelRoute(NextHopKey, bool = true);
@@ -107,6 +109,9 @@ class MuxCable
107109
return nbr_handler_->getNextHopId(nh);
108110
}
109111

112+
const MuxNeighbor getNeighbors() const {return nbr_handler_->getNeighbors();}
113+
const string getAlias() const {return nbr_handler_->getAlias();}
114+
110115
private:
111116
bool stateActive();
112117
bool stateInitActive();
@@ -195,13 +200,18 @@ class MuxOrch : public Orch2, public Observer, public Subject
195200

196201
void addNexthop(NextHopKey, string = "");
197202
void removeNexthop(NextHopKey);
203+
bool containsNextHop(const NextHopKey&);
204+
bool isMuxNexthops(const NextHopGroupKey&);
198205
string getNexthopMuxName(NextHopKey);
199206
sai_object_id_t getNextHopId(const NextHopKey&);
200207

201208
sai_object_id_t createNextHopTunnel(std::string tunnelKey, IpAddress& ipAddr);
202209
bool removeNextHopTunnel(std::string tunnelKey, IpAddress& ipAddr);
203210
sai_object_id_t getNextHopTunnelId(std::string tunnelKey, IpAddress& ipAddr);
204211

212+
void updateRoute(const IpPrefix &pfx);
213+
void updateRoutesForCable(MuxCable* cable);
214+
205215
private:
206216
virtual bool addOperation(const Request& request);
207217
virtual bool delOperation(const Request& request);

orchagent/nexthopgroupkey.h

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -23,27 +23,13 @@ class NextHopGroupKey
2323
/* ip_string|if_alias|vni|router_mac separated by ',' */
2424
NextHopGroupKey(const std::string &nexthops, bool overlay_nh, bool srv6_nh = false)
2525
{
26-
if (overlay_nh)
27-
{
28-
m_overlay_nexthops = true;
29-
m_srv6_nexthops = false;
30-
auto nhv = tokenize(nexthops, NHG_DELIMITER);
31-
for (const auto &nh_str : nhv)
32-
{
33-
auto nh = NextHopKey(nh_str, overlay_nh, srv6_nh);
34-
m_nexthops.insert(nh);
35-
}
36-
}
37-
else if (srv6_nh)
26+
m_overlay_nexthops = overlay_nh;
27+
m_srv6_nexthops = srv6_nh && !overlay_nh;
28+
auto nhv = tokenize(nexthops, NHG_DELIMITER);
29+
for (const auto &nh_str : nhv)
3830
{
39-
m_overlay_nexthops = false;
40-
m_srv6_nexthops = true;
41-
auto nhv = tokenize(nexthops, NHG_DELIMITER);
42-
for (const auto &nh_str : nhv)
43-
{
44-
auto nh = NextHopKey(nh_str, overlay_nh, srv6_nh);
45-
m_nexthops.insert(nh);
46-
}
31+
auto nh = NextHopKey(nh_str, overlay_nh, srv6_nh);
32+
m_nexthops.insert(nh);
4733
}
4834
}
4935

orchagent/routeorch.cpp

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "cbf/cbfnhgorch.h"
77
#include "logger.h"
88
#include "flowcounterrouteorch.h"
9+
#include "muxorch.h"
910
#include "swssnet.h"
1011
#include "crmorch.h"
1112
#include "directory.h"
@@ -1580,6 +1581,43 @@ bool RouteOrch::updateNextHopRoutes(const NextHopKey& nextHop, uint32_t& numRout
15801581
return true;
15811582
}
15821583

1584+
/**
1585+
* @brief returns a list of nexthops associated with route
1586+
* @param routeKey routekey to lookup
1587+
* @return std::set<NextHopKey> of nexthop keys
1588+
*/
1589+
std::set<NextHopKey> RouteOrch::getNextHopsForRoute(const RouteKey& routeKey)
1590+
{
1591+
std::set<NextHopKey> nh_list;
1592+
for (auto nh = m_nextHops.begin(); nh != m_nextHops.end(); nh++)
1593+
{
1594+
auto route_key = nh->second.find(routeKey);
1595+
if (route_key != nh->second.end())
1596+
{
1597+
nh_list.emplace(nh->first);
1598+
}
1599+
}
1600+
return nh_list;
1601+
}
1602+
1603+
/**
1604+
* @brief returns a route prefix associated with nexthopkey
1605+
* @param routekey_set empty set of routekeys to populate
1606+
* @param nexthop_key nexthop key to lookup
1607+
* @return true if found, false if not found.
1608+
*/
1609+
bool RouteOrch::getRoutesForNexthop(std::set<RouteKey>& routekey_set, const NextHopKey& nexthop_key)
1610+
{
1611+
auto it = m_nextHops.find(nexthop_key);
1612+
1613+
if (it != m_nextHops.end())
1614+
{
1615+
routekey_set = it->second;
1616+
}
1617+
1618+
return it != m_nextHops.end();
1619+
}
1620+
15831621
void RouteOrch::addTempRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops)
15841622
{
15851623
SWSS_LOG_ENTER();
@@ -1643,15 +1681,8 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops)
16431681
m_vrfOrch->increaseVrfRefCount(vrf_id);
16441682
}
16451683

1646-
if (nextHops.is_overlay_nexthop())
1647-
{
1648-
overlay_nh = true;
1649-
}
1650-
1651-
if (nextHops.is_srv6_nexthop())
1652-
{
1653-
srv6_nh = true;
1654-
}
1684+
overlay_nh = nextHops.is_overlay_nexthop();
1685+
srv6_nh = nextHops.is_srv6_nexthop();
16551686

16561687
auto it_route = m_syncdRoutes.at(vrf_id).find(ipPrefix);
16571688

@@ -2202,14 +2233,23 @@ bool RouteOrch::addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey
22022233
ipPrefix.to_string().c_str(), nextHops.to_string().c_str());
22032234
}
22042235

2205-
if (ctx.nhg_index.empty() && nextHops.getSize() == 1 && !nextHops.is_overlay_nexthop() && !nextHops.is_srv6_nexthop())
2236+
MuxOrch* mux_orch = gDirectory.get<MuxOrch*>();
2237+
if ((ctx.nhg_index.empty() && nextHops.getSize() == 1 &&
2238+
!nextHops.is_overlay_nexthop() && !nextHops.is_srv6_nexthop()) ||
2239+
mux_orch->isMuxNexthops(nextHops))
22062240
{
22072241
RouteKey r_key = { vrf_id, ipPrefix };
2208-
auto nexthop = NextHopKey(nextHops.to_string());
2209-
if (!nexthop.ip_address.isZero())
2242+
auto nexthop_list = nextHops.getNextHops();
2243+
2244+
for (auto nh = nexthop_list.begin(); nh != nexthop_list.end(); nh++)
22102245
{
2211-
addNextHopRoute(nexthop, r_key);
2246+
if (!nh->ip_address.isZero())
2247+
{
2248+
addNextHopRoute(*nh, r_key);
2249+
}
22122250
}
2251+
// update routes to reflect mux state
2252+
mux_orch->updateRoute(ipPrefix);
22132253
}
22142254

22152255
if (ipPrefix.isDefaultRoute())

orchagent/routeorch.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,8 @@ class RouteOrch : public Orch, public Subject
195195
void addNextHopRoute(const NextHopKey&, const RouteKey&);
196196
void removeNextHopRoute(const NextHopKey&, const RouteKey&);
197197
bool updateNextHopRoutes(const NextHopKey&, uint32_t&);
198+
std::set<NextHopKey> getNextHopsForRoute(const RouteKey&);
199+
bool getRoutesForNexthop(std::set<RouteKey>&, const NextHopKey&);
198200

199201
bool validnexthopinNextHopGroup(const NextHopKey&, uint32_t&);
200202
bool invalidnexthopinNextHopGroup(const NextHopKey&, uint32_t&);

0 commit comments

Comments
 (0)