Skip to content

Commit ab327ec

Browse files
avoid OA crashes for all cases
Signed-off-by: Prabhat Aravind <paravind@microsoft.com>
1 parent 596d88c commit ab327ec

File tree

2 files changed

+67
-209
lines changed

2 files changed

+67
-209
lines changed

orchagent/orch.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,15 @@ typedef enum
6666
task_duplicated
6767
} task_process_status;
6868

69+
70+
typedef enum
71+
{
72+
oper_create,
73+
oper_remove,
74+
oper_set,
75+
oper_get
76+
} sai_oper_type_t
77+
6978
typedef struct
7079
{
7180
// m_objsDependingOnMe stores names (without table name) of all objects depending on the current obj

orchagent/saihelper.cpp

Lines changed: 58 additions & 209 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ sai_stp_api_t* sai_stp_api;
9090

9191
extern sai_object_id_t gSwitchId;
9292
extern bool gTraditionalFlexCounter;
93+
extern event_handle_t g_events_handle;
9394

9495
vector<sai_object_id_t> gGearboxOids;
9596

@@ -532,100 +533,23 @@ task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, vo
532533
* in each orch.
533534
* 3. Take the type of sai api into consideration.
534535
*/
535-
switch (api)
536-
{
537-
case SAI_API_FDB:
538-
switch (status)
539-
{
540-
case SAI_STATUS_SUCCESS:
541-
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus");
542-
return task_success;
543-
case SAI_STATUS_ITEM_ALREADY_EXISTS:
544-
/*
545-
* In FDB creation, there are scenarios where the hardware learns an FDB entry before orchagent.
546-
* In such cases, the FDB SAI creation would report the status of SAI_STATUS_ITEM_ALREADY_EXISTS,
547-
* and orchagent should ignore the error and treat it as entry was explicitly created.
548-
*/
549-
return task_success;
550-
default:
551-
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
552-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
553-
handleSaiFailure(true);
554-
break;
555-
}
556-
break;
557-
case SAI_API_HOSTIF:
558-
switch (status)
559-
{
560-
case SAI_STATUS_SUCCESS:
561-
return task_success;
562-
case SAI_STATUS_FAILURE:
563-
/*
564-
* Host interface maybe failed due to lane not available.
565-
* In some scenarios, like SONiC virtual machine, the invalid lane may be not enabled by VM configuration,
566-
* So just ignore the failure and report an error log.
567-
*/
568-
return task_ignore;
569-
default:
570-
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
571-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
572-
handleSaiFailure(true);
573-
break;
574-
}
575-
break;
576-
case SAI_API_ROUTE:
577-
switch (status)
578-
{
579-
case SAI_STATUS_SUCCESS:
580-
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus");
581-
return task_success;
582-
case SAI_STATUS_ITEM_ALREADY_EXISTS:
583-
case SAI_STATUS_NOT_EXECUTED:
584-
/* With VNET routes, the same route can be learned via multiple
585-
sources, like via BGP. Handle this gracefully */
586-
return task_success;
587-
case SAI_STATUS_TABLE_FULL:
588-
return task_need_retry;
589-
default:
590-
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
591-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
592-
handleSaiFailure(true);
593-
break;
594-
}
595-
break;
596-
case SAI_API_NEIGHBOR:
597-
case SAI_API_NEXT_HOP:
598-
case SAI_API_NEXT_HOP_GROUP:
599-
switch(status)
600-
{
601-
case SAI_STATUS_SUCCESS:
602-
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus");
603-
return task_success;
604-
case SAI_STATUS_ITEM_ALREADY_EXISTS:
605-
return task_success;
606-
case SAI_STATUS_TABLE_FULL:
607-
return task_need_retry;
608-
default:
609-
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
610-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
611-
handleSaiFailure(true);
612-
break;
613-
}
614-
break;
536+
switch (status)
537+
{
538+
case SAI_STATUS_SUCCESS:
539+
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus");
540+
return task_success;
541+
case SAI_STATUS_ITEM_ALREADY_EXISTS:
542+
return task_success;
543+
case SAI_STATUS_INSUFFICIENT_RESOURCES:
544+
case SAI_STATUS_TABLE_FULL:
545+
case SAI_STATUS_NO_MEMORY:
546+
case SAI_STATUS_NV_STORAGE_FULL:
547+
return task_need_retry;
615548
default:
616-
switch (status)
617-
{
618-
case SAI_STATUS_SUCCESS:
619-
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus");
620-
return task_success;
621-
default:
622-
SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s",
623-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
624-
handleSaiFailure(true);
625-
break;
626-
}
627-
}
628-
return task_need_retry;
549+
handleSaiFailure(api, "create", status);
550+
break;
551+
}
552+
return task_need_retry;
629553
}
630554

631555
task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context)
@@ -641,67 +565,24 @@ task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void
641565
* in each orch.
642566
* 3. Take the type of sai api into consideration.
643567
*/
644-
if (status == SAI_STATUS_SUCCESS)
645-
{
646-
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiSetStatus");
647-
return task_success;
648-
}
649-
650-
switch (api)
568+
switch (status)
651569
{
652-
case SAI_API_PORT:
653-
switch (status)
654-
{
655-
case SAI_STATUS_INVALID_ATTR_VALUE_0:
656-
/*
657-
* If user gives an invalid attribute value, no need to retry or exit orchagent, just fail the current task
658-
* and let user correct the configuration.
659-
*/
660-
SWSS_LOG_ERROR("Encountered SAI_STATUS_INVALID_ATTR_VALUE_0 in set operation, task failed, SAI API: %s, status: %s",
661-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
662-
return task_failed;
663-
default:
664-
SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s",
665-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
666-
handleSaiFailure(true);
667-
break;
668-
}
669-
break;
670-
case SAI_API_TUNNEL:
671-
switch (status)
672-
{
673-
case SAI_STATUS_ATTR_NOT_SUPPORTED_0:
674-
SWSS_LOG_ERROR("Encountered SAI_STATUS_ATTR_NOT_SUPPORTED_0 in set operation, task failed, SAI API: %s, status: %s",
675-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
676-
return task_failed;
677-
default:
678-
SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s",
679-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
680-
handleSaiFailure(true);
681-
break;
682-
}
683-
break;
684-
case SAI_API_BUFFER:
685-
switch (status)
686-
{
687-
case SAI_STATUS_INSUFFICIENT_RESOURCES:
688-
SWSS_LOG_ERROR("Encountered SAI_STATUS_INSUFFICIENT_RESOURCES in set operation, task failed, SAI API: %s, status: %s",
689-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
690-
return task_failed;
691-
default:
692-
SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s",
693-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
694-
handleSaiFailure(true);
695-
break;
696-
}
697-
break;
570+
case SAI_STATUS_SUCCESS:
571+
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiSetStatus");
572+
return task_success;
573+
case SAI_STATUS_ITEM_ALREADY_EXISTS:
574+
/* Bulked calls could return this value */
575+
case SAI_STATUS_NOT_EXECUTED:
576+
return task_success;
577+
case SAI_STATUS_INSUFFICIENT_RESOURCES:
578+
case SAI_STATUS_TABLE_FULL:
579+
case SAI_STATUS_NO_MEMORY:
580+
case SAI_STATUS_NV_STORAGE_FULL:
581+
return task_need_retry;
698582
default:
699-
SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s",
700-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
701-
handleSaiFailure(true);
583+
handleSaiFailure(api, oper_set, status);
702584
break;
703585
}
704-
705586
return task_need_retry;
706587
}
707588

@@ -719,55 +600,18 @@ task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, vo
719600
* in each orch.
720601
* 3. Take the type of sai api into consideration.
721602
*/
722-
switch (api)
603+
switch (status)
723604
{
724-
case SAI_API_ROUTE:
725-
switch (status)
726-
{
727-
case SAI_STATUS_SUCCESS:
728-
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiRemoveStatus");
729-
return task_success;
730-
case SAI_STATUS_ITEM_NOT_FOUND:
731-
case SAI_STATUS_NOT_EXECUTED:
732-
/* When the same route is learned via multiple sources,
733-
there can be a duplicate remove operation. Handle this gracefully */
734-
return task_success;
735-
default:
736-
SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s",
737-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
738-
handleSaiFailure(true);
739-
break;
740-
}
741-
break;
742-
case SAI_API_NEIGHBOR:
743-
case SAI_API_NEXT_HOP:
744-
case SAI_API_NEXT_HOP_GROUP:
745-
switch (status)
746-
{
747-
case SAI_STATUS_SUCCESS:
748-
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiRemoveStatus");
749-
return task_success;
750-
case SAI_STATUS_ITEM_NOT_FOUND:
751-
return task_success;
752-
default:
753-
SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s",
754-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
755-
handleSaiFailure(true);
756-
break;
757-
}
758-
break;
605+
case SAI_STATUS_SUCCESS:
606+
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiRemoveStatus");
607+
return task_success;
608+
case SAI_STATUS_NOT_EXECUTED:
609+
/* When the same route is learned via multiple sources,
610+
there can be a duplicate remove operation. Handle this gracefully */
611+
return task_success;
759612
default:
760-
switch (status)
761-
{
762-
case SAI_STATUS_SUCCESS:
763-
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiRemoveStatus");
764-
return task_success;
765-
default:
766-
SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s",
767-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
768-
handleSaiFailure(true);
769-
break;
770-
}
613+
handleSaiFailure(api, oper_remove, status);
614+
break;
771615
}
772616
return task_need_retry;
773617
}
@@ -791,12 +635,10 @@ task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void
791635
SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiGetStatus");
792636
return task_success;
793637
case SAI_STATUS_NOT_IMPLEMENTED:
794-
SWSS_LOG_ERROR("Encountered failure in get operation due to the function is not implemented, exiting orchagent, SAI API: %s",
795-
sai_serialize_api(api).c_str());
796638
throw std::logic_error("SAI get function not implemented");
797639
default:
798-
SWSS_LOG_ERROR("Encountered failure in get operation, SAI API: %s, status: %s",
799-
sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str());
640+
handleSaiFailure(api, oper_get, status);
641+
break;
800642
}
801643
return task_failed;
802644
}
@@ -820,11 +662,23 @@ bool parseHandleSaiStatusFailure(task_process_status status)
820662
return true;
821663
}
822664

823-
/* Handling SAI failure. Request redis to invoke SAI failure dump and abort if set*/
824-
void handleSaiFailure(bool abort_on_failure)
665+
/* Handling SAI failure. Request redis to invoke SAI failure dump */
666+
void handleSaiFailure(sai_api_t api, string oper, sai_status_t status)
825667
{
826668
SWSS_LOG_ENTER();
827669

670+
string s_api = sai_serialize_api(api);
671+
string s_status = sai_serialize_status(status);
672+
SWSS_LOG_ERROR("Encountered failure in %s operation, SAI API: %s, status: %s",
673+
oper.c_str(), s_api.c_str(), s_status.c_str());
674+
675+
// Publish a structured syslog event
676+
event_params_t params = {
677+
{ "operation", oper },
678+
{ "api", s_api },
679+
{ "status", s_status }};
680+
event_publish(g_events_handle, "sai-operation-failure", &params);
681+
828682
sai_attribute_t attr;
829683

830684
attr.id = SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD;
@@ -834,13 +688,8 @@ void handleSaiFailure(bool abort_on_failure)
834688
{
835689
SWSS_LOG_ERROR("Failed to take sai failure dump %d", status);
836690
}
837-
if (abort_on_failure)
838-
{
839-
abort();
840-
}
841691
}
842692

843-
844693
static inline void initSaiRedisCounterEmptyParameter(sai_s8_list_t &sai_s8_list)
845694
{
846695
sai_s8_list.list = nullptr;
@@ -1140,7 +989,7 @@ std::vector<sai_stat_id_t> queryAvailableCounterStats(const sai_object_type_t ob
1140989

1141990
if (!info)
1142991
{
1143-
SWSS_LOG_ERROR("Metadata info query failed, invalid object: %d", object_type);
992+
SWSS_LOG_WARN("Metadata info query failed, invalid object: %d", object_type);
1144993
return stat_list;
1145994
}
1146995

0 commit comments

Comments
 (0)