Skip to content

Commit c9c1aa2

Browse files
shi-suShi Su
authored andcommitted
Add failure handling for SAI get operations (#1768)
What I did Add failure handling for SAI get operations. The function allows handling failures in SAI get operations according to the orch type, SAI type, SAI status. Why I did it Enable custom failure handling for SAI get operations.
1 parent 47b4276 commit c9c1aa2

9 files changed

Lines changed: 168 additions & 28 deletions

File tree

orchagent/aclorch.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2311,7 +2311,11 @@ void AclOrch::init(vector<TableConnector>& connectors, PortsOrch *portOrch, Mirr
23112311
else
23122312
{
23132313
SWSS_LOG_ERROR("Failed to get ACL entry priority min/max values, rv:%d", status);
2314-
throw "AclOrch initialization failure";
2314+
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
2315+
if (handle_status != task_process_status::task_success)
2316+
{
2317+
throw "AclOrch initialization failure";
2318+
}
23152319
}
23162320

23172321
queryAclActionCapability();

orchagent/copporch.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,11 @@ void CoppOrch::initDefaultTrapGroup()
179179
if (status != SAI_STATUS_SUCCESS)
180180
{
181181
SWSS_LOG_ERROR("Failed to get default trap group, rv:%d", status);
182-
throw "CoppOrch initialization failure";
182+
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
183+
if (handle_status != task_process_status::task_success)
184+
{
185+
throw "CoppOrch initialization failure";
186+
}
183187
}
184188

185189
SWSS_LOG_INFO("Get default trap group");

orchagent/crmorch.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,11 @@ void CrmOrch::getResAvailableCounters()
474474
break;
475475
}
476476
SWSS_LOG_ERROR("Failed to get switch attribute %u , rv:%d", attr.id, status);
477-
break;
477+
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
478+
if (handle_status != task_process_status::task_success)
479+
{
480+
break;
481+
}
478482
}
479483

480484
res.second.countersMap[CRM_COUNTERS_TABLE_KEY].availableCounter = attr.value.u32;
@@ -500,7 +504,11 @@ void CrmOrch::getResAvailableCounters()
500504
if (status != SAI_STATUS_SUCCESS)
501505
{
502506
SWSS_LOG_ERROR("Failed to get switch attribute %u , rv:%d", attr.id, status);
503-
break;
507+
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
508+
if (handle_status != task_process_status::task_success)
509+
{
510+
break;
511+
}
504512
}
505513

506514
for (uint32_t i = 0; i < attr.value.aclresource.count; i++)

orchagent/fdborch.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,11 @@ bool FdbOrch::getPort(const MacAddress& mac, uint16_t vlan, Port& port)
503503
{
504504
SWSS_LOG_ERROR("Failed to get bridge port ID for FDB entry %s, rv:%d",
505505
mac.to_string().c_str(), status);
506-
return false;
506+
task_process_status handle_status = handleSaiGetStatus(SAI_API_FDB, status);
507+
if (handle_status != task_process_status::task_success)
508+
{
509+
return false;
510+
}
507511
}
508512

509513
if (!m_portsOrch->getPortByBridgePortId(attr.value.oid, port))

orchagent/fgnhgorch.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -294,11 +294,15 @@ bool FgNhgOrch::createFineGrainedNextHopGroup(FGNextHopGroupEntry &syncd_fg_rout
294294
{
295295
SWSS_LOG_ERROR("Failed to query next hop group %s SAI_NEXT_HOP_GROUP_ATTR_REAL_SIZE, rv:%d",
296296
nextHops.to_string().c_str(), status);
297-
if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry))
297+
task_process_status handle_status = handleSaiGetStatus(SAI_API_NEXT_HOP_GROUP, status);
298+
if (handle_status != task_process_status::task_success)
298299
{
299-
SWSS_LOG_ERROR("Failed to clean-up after next hop group real_size query failure");
300+
if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry))
301+
{
302+
SWSS_LOG_ERROR("Failed to clean-up after next hop group real_size query failure");
303+
}
304+
return false;
300305
}
301-
return false;
302306
}
303307
fgNhgEntry->real_bucket_size = nhg_attr.value.u32;
304308
}

orchagent/orch.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -765,6 +765,35 @@ task_process_status Orch::handleSaiRemoveStatus(sai_api_t api, sai_status_t stat
765765
return task_need_retry;
766766
}
767767

768+
task_process_status Orch::handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context)
769+
{
770+
/*
771+
* This function aims to provide coarse handling of failures in sairedis get
772+
* operation (i.e., notify users by throwing excepions when failures happen).
773+
* Return value: task_success - Handled the status successfully. No need to retry this SAI operation.
774+
* task_need_retry - Cannot handle the status. Need to retry the SAI operation.
775+
* task_failed - Failed to handle the status but another attempt is unlikely to resolve the failure.
776+
* TODO: 1. Add general handling logic for specific statuses
777+
* 2. Develop fine-grain failure handling mechanisms and replace this coarse handling
778+
* in each orch.
779+
* 3. Take the type of sai api into consideration.
780+
*/
781+
switch (status)
782+
{
783+
case SAI_STATUS_SUCCESS:
784+
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiGetStatus");
785+
return task_success;
786+
case SAI_STATUS_NOT_IMPLEMENTED:
787+
SWSS_LOG_ERROR("Encountered failure in get operation due to the function is not implemented, exiting orchagent, SAI API: %s",
788+
sai_serialize_api(api).c_str());
789+
throw std::logic_error("SAI get function not implemented");
790+
default:
791+
SWSS_LOG_ERROR("Encountered failure in get operation, SAI API: %s, status: %s",
792+
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
793+
}
794+
return task_failed;
795+
}
796+
768797
bool Orch::parseHandleSaiStatusFailure(task_process_status status)
769798
{
770799
/*

orchagent/orch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ class Orch
240240
virtual task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
241241
virtual task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
242242
virtual task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
243+
virtual task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context = nullptr);
243244
bool parseHandleSaiStatusFailure(task_process_status status);
244245
private:
245246
void removeMeFromObjsReferencedByMe(type_map &type_maps, const std::string &table, const std::string &obj_name, const std::string &field, const std::string &old_referenced_obj_name);

orchagent/portsorch.cpp

Lines changed: 86 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector<table_name_with_pri_t> &tableNames)
328328
if (status != SAI_STATUS_SUCCESS)
329329
{
330330
SWSS_LOG_ERROR("Failed to get CPU port, rv:%d", status);
331-
throw runtime_error("PortsOrch initialization failure");
331+
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
332+
if (handle_status != task_process_status::task_success)
333+
{
334+
throw runtime_error("PortsOrch initialization failure");
335+
}
332336
}
333337

334338
m_cpuPort = Port("CPU", Port::CPU);
@@ -343,7 +347,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector<table_name_with_pri_t> &tableNames)
343347
if (status != SAI_STATUS_SUCCESS)
344348
{
345349
SWSS_LOG_ERROR("Failed to get port number, rv:%d", status);
346-
throw runtime_error("PortsOrch initialization failure");
350+
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
351+
if (handle_status != task_process_status::task_success)
352+
{
353+
throw runtime_error("PortsOrch initialization failure");
354+
}
347355
}
348356

349357
m_portCount = attr.value.u32;
@@ -361,7 +369,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector<table_name_with_pri_t> &tableNames)
361369
if (status != SAI_STATUS_SUCCESS)
362370
{
363371
SWSS_LOG_ERROR("Failed to get port list, rv:%d", status);
364-
throw runtime_error("PortsOrch initialization failure");
372+
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
373+
if (handle_status != task_process_status::task_success)
374+
{
375+
throw runtime_error("PortsOrch initialization failure");
376+
}
365377
}
366378

367379
/* Get port hardware lane info */
@@ -376,7 +388,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector<table_name_with_pri_t> &tableNames)
376388
if (status != SAI_STATUS_SUCCESS)
377389
{
378390
SWSS_LOG_ERROR("Failed to get hardware lane list pid:%" PRIx64, port_list[i]);
379-
throw runtime_error("PortsOrch initialization failure");
391+
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
392+
if (handle_status != task_process_status::task_success)
393+
{
394+
throw runtime_error("PortsOrch initialization failure");
395+
}
380396
}
381397

382398
set<int> tmp_lane_set;
@@ -407,7 +423,11 @@ PortsOrch::PortsOrch(DBConnector *db, vector<table_name_with_pri_t> &tableNames)
407423
if (status != SAI_STATUS_SUCCESS)
408424
{
409425
SWSS_LOG_ERROR("Failed to get default 1Q bridge and/or default VLAN, rv:%d", status);
410-
throw runtime_error("PortsOrch initialization failure");
426+
task_process_status handle_status = handleSaiGetStatus(SAI_API_SWITCH, status);
427+
if (handle_status != task_process_status::task_success)
428+
{
429+
throw runtime_error("PortsOrch initialization failure");
430+
}
411431
}
412432

413433
m_default1QBridge = attrs[0].value.oid;
@@ -437,7 +457,11 @@ void PortsOrch::removeDefaultVlanMembers()
437457
if (status != SAI_STATUS_SUCCESS)
438458
{
439459
SWSS_LOG_ERROR("Failed to get VLAN member list in default VLAN, rv:%d", status);
440-
throw runtime_error("PortsOrch initialization failure");
460+
task_process_status handle_status = handleSaiGetStatus(SAI_API_VLAN, status);
461+
if (handle_status != task_process_status::task_success)
462+
{
463+
throw runtime_error("PortsOrch initialization failure");
464+
}
441465
}
442466

443467
/* Remove VLAN members in default VLAN */
@@ -471,7 +495,11 @@ void PortsOrch::removeDefaultBridgePorts()
471495
if (status != SAI_STATUS_SUCCESS)
472496
{
473497
SWSS_LOG_ERROR("Failed to get bridge port list in default 1Q bridge, rv:%d", status);
474-
throw runtime_error("PortsOrch initialization failure");
498+
task_process_status handle_status = handleSaiGetStatus(SAI_API_BRIDGE, status);
499+
if (handle_status != task_process_status::task_success)
500+
{
501+
throw runtime_error("PortsOrch initialization failure");
502+
}
475503
}
476504

477505
auto bridge_port_count = attr.value.objlist.count;
@@ -486,7 +514,11 @@ void PortsOrch::removeDefaultBridgePorts()
486514
if (status != SAI_STATUS_SUCCESS)
487515
{
488516
SWSS_LOG_ERROR("Failed to get bridge port type, rv:%d", status);
489-
throw runtime_error("PortsOrch initialization failure");
517+
task_process_status handle_status = handleSaiGetStatus(SAI_API_BRIDGE, status);
518+
if (handle_status != task_process_status::task_success)
519+
{
520+
throw runtime_error("PortsOrch initialization failure");
521+
}
490522
}
491523
if (attr.value.s32 == SAI_BRIDGE_PORT_TYPE_PORT)
492524
{
@@ -880,7 +912,11 @@ bool PortsOrch::getPortAdminStatus(sai_object_id_t id, bool &up)
880912
if (status != SAI_STATUS_SUCCESS)
881913
{
882914
SWSS_LOG_ERROR("Failed to get admin status for port pid:%" PRIx64, id);
883-
return false;
915+
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
916+
if (handle_status != task_process_status::task_success)
917+
{
918+
return false;
919+
}
884920
}
885921

886922
up = attr.value.booldata;
@@ -1805,9 +1841,19 @@ bool PortsOrch::getPortSpeed(sai_object_id_t id, sai_uint32_t &speed)
18051841
status = sai_port_api->get_port_attribute(id, 1, &attr);
18061842

18071843
if (status == SAI_STATUS_SUCCESS)
1844+
{
18081845
speed = attr.value.u32;
1846+
}
1847+
else
1848+
{
1849+
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
1850+
if (handle_status != task_process_status::task_success)
1851+
{
1852+
return false;
1853+
}
1854+
}
18091855

1810-
return status == SAI_STATUS_SUCCESS;
1856+
return true;
18111857
}
18121858

18131859
bool PortsOrch::setPortAdvSpeed(sai_object_id_t port_id, sai_uint32_t speed)
@@ -1847,7 +1893,11 @@ bool PortsOrch::getQueueTypeAndIndex(sai_object_id_t queue_id, string &type, uin
18471893
if (status != SAI_STATUS_SUCCESS)
18481894
{
18491895
SWSS_LOG_ERROR("Failed to get queue type and index for queue %" PRIu64 " rv:%d", queue_id, status);
1850-
return false;
1896+
task_process_status handle_status = handleSaiGetStatus(SAI_API_QUEUE, status);
1897+
if (handle_status != task_process_status::task_success)
1898+
{
1899+
return false;
1900+
}
18511901
}
18521902

18531903
switch (attr[0].value.s32)
@@ -3473,7 +3523,11 @@ void PortsOrch::initializeQueues(Port &port)
34733523
if (status != SAI_STATUS_SUCCESS)
34743524
{
34753525
SWSS_LOG_ERROR("Failed to get number of queues for port %s rv:%d", port.m_alias.c_str(), status);
3476-
throw runtime_error("PortsOrch initialization failure.");
3526+
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
3527+
if (handle_status != task_process_status::task_success)
3528+
{
3529+
throw runtime_error("PortsOrch initialization failure.");
3530+
}
34773531
}
34783532
SWSS_LOG_INFO("Get %d queues for port %s", attr.value.u32, port.m_alias.c_str());
34793533

@@ -3493,7 +3547,11 @@ void PortsOrch::initializeQueues(Port &port)
34933547
if (status != SAI_STATUS_SUCCESS)
34943548
{
34953549
SWSS_LOG_ERROR("Failed to get queue list for port %s rv:%d", port.m_alias.c_str(), status);
3496-
throw runtime_error("PortsOrch initialization failure.");
3550+
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
3551+
if (handle_status != task_process_status::task_success)
3552+
{
3553+
throw runtime_error("PortsOrch initialization failure.");
3554+
}
34973555
}
34983556

34993557
SWSS_LOG_INFO("Get queues for port %s", port.m_alias.c_str());
@@ -3509,7 +3567,11 @@ void PortsOrch::initializePriorityGroups(Port &port)
35093567
if (status != SAI_STATUS_SUCCESS)
35103568
{
35113569
SWSS_LOG_ERROR("Failed to get number of priority groups for port %s rv:%d", port.m_alias.c_str(), status);
3512-
throw runtime_error("PortsOrch initialization failure.");
3570+
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
3571+
if (handle_status != task_process_status::task_success)
3572+
{
3573+
throw runtime_error("PortsOrch initialization failure.");
3574+
}
35133575
}
35143576
SWSS_LOG_INFO("Get %d priority groups for port %s", attr.value.u32, port.m_alias.c_str());
35153577

@@ -3530,7 +3592,11 @@ void PortsOrch::initializePriorityGroups(Port &port)
35303592
if (status != SAI_STATUS_SUCCESS)
35313593
{
35323594
SWSS_LOG_ERROR("Fail to get priority group list for port %s rv:%d", port.m_alias.c_str(), status);
3533-
throw runtime_error("PortsOrch initialization failure.");
3595+
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
3596+
if (handle_status != task_process_status::task_success)
3597+
{
3598+
throw runtime_error("PortsOrch initialization failure.");
3599+
}
35343600
}
35353601
SWSS_LOG_INFO("Get priority groups for port %s", port.m_alias.c_str());
35363602
}
@@ -4844,7 +4910,11 @@ bool PortsOrch::setPortSerdesAttribute(sai_object_id_t port_id,
48444910
{
48454911
SWSS_LOG_ERROR("Failed to get port attr serdes id %d to port pid:0x%" PRIx64,
48464912
port_attr.id, port_id);
4847-
return false;
4913+
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, status);
4914+
if (handle_status != task_process_status::task_success)
4915+
{
4916+
return false;
4917+
}
48484918
}
48494919

48504920
if (port_attr.value.oid != SAI_NULL_OBJECT_ID)

orchagent/qosorch.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -933,7 +933,11 @@ sai_object_id_t QosOrch::getSchedulerGroup(const Port &port, const sai_object_id
933933
if (SAI_STATUS_SUCCESS != sai_status)
934934
{
935935
SWSS_LOG_ERROR("Failed to get number of scheduler groups for port:%s", port.m_alias.c_str());
936-
return SAI_NULL_OBJECT_ID;
936+
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, sai_status);
937+
if (handle_status != task_process_status::task_success)
938+
{
939+
return SAI_NULL_OBJECT_ID;
940+
}
937941
}
938942

939943
/* Get total groups list on the port */
@@ -947,7 +951,11 @@ sai_object_id_t QosOrch::getSchedulerGroup(const Port &port, const sai_object_id
947951
if (SAI_STATUS_SUCCESS != sai_status)
948952
{
949953
SWSS_LOG_ERROR("Failed to get scheduler group list for port:%s", port.m_alias.c_str());
950-
return SAI_NULL_OBJECT_ID;
954+
task_process_status handle_status = handleSaiGetStatus(SAI_API_PORT, sai_status);
955+
if (handle_status != task_process_status::task_success)
956+
{
957+
return SAI_NULL_OBJECT_ID;
958+
}
951959
}
952960

953961
m_scheduler_group_port_info[port.m_port_id] = {
@@ -969,7 +977,11 @@ sai_object_id_t QosOrch::getSchedulerGroup(const Port &port, const sai_object_id
969977
if (SAI_STATUS_SUCCESS != sai_status)
970978
{
971979
SWSS_LOG_ERROR("Failed to get child count for scheduler group:0x%" PRIx64 " of port:%s", group_id, port.m_alias.c_str());
972-
return SAI_NULL_OBJECT_ID;
980+
task_process_status handle_status = handleSaiGetStatus(SAI_API_SCHEDULER_GROUP, sai_status);
981+
if (handle_status != task_process_status::task_success)
982+
{
983+
return SAI_NULL_OBJECT_ID;
984+
}
973985
}
974986

975987
uint32_t child_count = attr.value.u32;
@@ -988,7 +1000,11 @@ sai_object_id_t QosOrch::getSchedulerGroup(const Port &port, const sai_object_id
9881000
if (SAI_STATUS_SUCCESS != sai_status)
9891001
{
9901002
SWSS_LOG_ERROR("Failed to get child list for scheduler group:0x%" PRIx64 " of port:%s", group_id, port.m_alias.c_str());
991-
return SAI_NULL_OBJECT_ID;
1003+
task_process_status handle_status = handleSaiGetStatus(SAI_API_SCHEDULER_GROUP, sai_status);
1004+
if (handle_status != task_process_status::task_success)
1005+
{
1006+
return SAI_NULL_OBJECT_ID;
1007+
}
9921008
}
9931009

9941010
m_scheduler_group_port_info[port.m_port_id].child_groups[ii] = std::move(child_groups);

0 commit comments

Comments
 (0)