From a181b5d94ed75c213626d2fc7e6995c049404769 Mon Sep 17 00:00:00 2001 From: Prabhat Aravind Date: Fri, 25 Oct 2024 18:30:38 +0000 Subject: [PATCH 1/2] Orchagent SAI error handling improvements What I did This change aims to reduce self-induced orchagent exit when any SAI API call fails (i.e returns anything other than SAI_STATUS_SUCCESS). This change is the first set of changes that does the following: handleSaiCreateStatus() / handleSaiSetStatus() changes Return 'task_success' for SAI_STATUS_ITEM_ALREADY_EXISTS, SAI_STATUS_ITEM_NOT_FOUND, SAI_STATUS_ADDR_NOT_FOUND and SAI_STATUS_OBJECT_IN_USE irrespective of the object type. Return 'task_need_retry' for SAI_STATUS_INSUFFICIENT_RESOURCES, SAI_STATUS_TABLE_FULL, SAI_STATUS_NO_MEMORY and SAI_STATUS_NV_STORAGE_FULL. Call handleSaiFailure() and return 'task_failed' for other SAI errors. This will log a structured syslog via eventd and also take a SAI dump. handleSaiRemoveStatus() changes Return 'task_success' for SAI_STATUS_ITEM_ALREADY_EXISTS, SAI_STATUS_ITEM_NOT_FOUND, SAI_STATUS_ADDR_NOT_FOUND, SAI_STATUS_INSUFFICIENT_RESOURCES, SAI_STATUS_TABLE_FULL, SAI_STATUS_NO_MEMORY, SAI_STATUS_NV_STORAGE_FULL Return 'task_need_retry' for SAI_STATUS_OBJECT_IN_USE Call handleSaiFailure() and return 'task_failed' for other SAI errors. This will log a structured syslog via eventd and also take a SAI dump. handleSaiGetStatus() changes Log a NOTICE message and return task_failed. This is similar to what is being done today for GET calls. handleSaiFailure() changes Update handleSaiFailure() to take 3 arguments - namely the SAI API, operation type string and SAI API return status. This will be used in crafting a structured syslog error message when the failure happens. All callers of this function are updated accordingly. Mock test changes Added new tests for coverage and updated existing tests that do "ASSERT_DEATH" assertions when SAI API calls fail. Fixed errors in existing portsorch_ut test cases What is not done There are changes needed to all orchs to handle scenarios where orchagent doesn't crash anymore when SAI API calls fail. There are also places in different orchs where an explicit exception is thrown in case of SAI errors. These and the remaining items in sonic-net/SONiC#1698 will be handled in phase-2. Why I did it Crashing orchagent on every SAI error is an overkill. Instead, we follow the approach that is called out in the HLD above to handle these errors in a more graceful manner. How I verified it By adding mock tests to verify that orchagent no longer exits when there are SAI API call failures. Details if related Sample error handling snippet showing eventd log and SAI dump invocation. 2025 Jun 6 17:53:24.448168 sonic ERR swss#orchagent: :- meta_sai_validate_route_entry: object key SAI_OBJECT_TYPE_ROUTE_ENTRY:{"dest":"10.1.0.32/32","switch_id":"oid:0x21000000000000","vr":"oid:0x3000000000023"} already exists 2025 Jun 6 17:53:24.448547 sonic ERR swss#orchagent: :- flush_creating_entries: EntityBulker.flush create entries failed, number of entries to create: 1, status: SAI_STATUS_ITEM_ALREADY_EXISTS 2025 Jun 6 17:53:24.448750 sonic ERR swss#orchagent: :- addRoutePost: Failed to create route 10.1.0.32/32 with next hop(s) 30.1.0.2@PortChannel101 2025 Jun 6 17:53:24.448933 sonic ERR swss#orchagent: :- handleSaiFailure: Encountered failure in create operation, SAI API: SAI_API_ROUTE, status: SAI_STATUS_NOT_EXECUTED 2025 Jun 6 17:53:24.449276 sonic NOTICE syncd#syncd: :- processNotifySyncd: Invoking SAI failure dump 2025 Jun 6 17:53:24.449276 sonic NOTICE swss#orchagent: :- publish: EVENT_PUBLISHED: {"sonic-events-swss:sai-operation-failure":{"api":"SAI_API_ROUTE","operation":"create","status":"SAI_STATUS_NOT_EXECUTED","timestamp":"2025-06-06T17:53:24.447963Z"}} 2025 Jun 6 17:53:24.449309 sonic NOTICE swss#orchagent: :- notifySyncd: sending syncd: SYNCD_INVOKE_DUMP 2025 Jun 6 17:53:24.465408 sonic NOTICE swss#orchagent: :- sai_redis_notify_syncd: invoked DUMP succeeded Signed-off-by: Prabhat Aravind --- orchagent/main.cpp | 15 +- orchagent/orchdaemon.cpp | 2 +- orchagent/saihelper.cpp | 347 ++++++------------ orchagent/saihelper.h | 3 +- tests/mock_tests/mock_saihelper.cpp | 393 ++++++++++++++++++++- tests/mock_tests/orchdaemon_ut.cpp | 12 + tests/mock_tests/portsorch_ut.cpp | 27 +- tests/mock_tests/test_failure_handling.cpp | 14 +- 8 files changed, 543 insertions(+), 270 deletions(-) diff --git a/orchagent/main.cpp b/orchagent/main.cpp index 395c7a7e484..6074169a6c4 100644 --- a/orchagent/main.cpp +++ b/orchagent/main.cpp @@ -127,7 +127,8 @@ void syncd_apply_view() if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to notify syncd APPLY_VIEW %d", status); - handleSaiFailure(true); + handleSaiFailure(SAI_API_SWITCH, "set", status); + return; } } @@ -701,7 +702,8 @@ int main(int argc, char **argv) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to create a switch, rv:%d", status); - handleSaiFailure(true); + handleSaiFailure(SAI_API_SWITCH, "create", status); + return EXIT_FAILURE; } SWSS_LOG_NOTICE("Create a switch, id:%" PRIu64, gSwitchId); @@ -732,7 +734,8 @@ int main(int argc, char **argv) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to get MAC address from switch, rv:%d", status); - handleSaiFailure(true); + handleSaiFailure(SAI_API_SWITCH, "get", status); + return EXIT_FAILURE; } else { @@ -747,7 +750,8 @@ int main(int argc, char **argv) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Fail to get switch virtual router ID %d", status); - handleSaiFailure(true); + handleSaiFailure(SAI_API_SWITCH, "get", status); + return EXIT_FAILURE; } gVirtualRouterId = attr.value.oid; @@ -789,7 +793,8 @@ int main(int argc, char **argv) if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to create underlay router interface %d", status); - handleSaiFailure(true); + handleSaiFailure(SAI_API_ROUTER_INTERFACE, "create", status); + return EXIT_FAILURE; } SWSS_LOG_NOTICE("Created underlay router interface ID %" PRIx64, gUnderlayIfId); diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 9d366bcd320..50f6cde21fe 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -836,7 +836,7 @@ void OrchDaemon::flush() if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to flush redis pipeline %d", status); - handleSaiFailure(true); + handleSaiFailure(SAI_API_SWITCH, "set", status); } for (auto* orch: m_orchList) diff --git a/orchagent/saihelper.cpp b/orchagent/saihelper.cpp index 0b471c3c1e3..888fa17efdb 100644 --- a/orchagent/saihelper.cpp +++ b/orchagent/saihelper.cpp @@ -96,6 +96,7 @@ extern sai_object_id_t gSwitchId; extern bool gTraditionalFlexCounter; extern bool gSyncMode; extern sai_redis_communication_mode_t gRedisCommunicationMode; +extern event_handle_t g_events_handle; vector gGearboxOids; @@ -333,7 +334,7 @@ void initSaiRedis() if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to set communication mode, rv:%d", status); - exit(EXIT_FAILURE); + return handleSaiFailure(SAI_API_SWITCH, "set", status); } auto record_filename = Recorder::Instance().sairedis.getFile(); @@ -351,7 +352,7 @@ void initSaiRedis() { SWSS_LOG_ERROR("Failed to set SAI Redis recording output folder to %s, rv:%d", record_location.c_str(), status); - exit(EXIT_FAILURE); + return handleSaiFailure(SAI_API_SWITCH, "set", status); } attr.id = SAI_REDIS_SWITCH_ATTR_RECORDING_FILENAME; @@ -363,7 +364,7 @@ void initSaiRedis() { SWSS_LOG_ERROR("Failed to set SAI Redis recording logfile to %s, rv:%d", record_filename.c_str(), status); - exit(EXIT_FAILURE); + return handleSaiFailure(SAI_API_SWITCH, "set", status); } } @@ -377,7 +378,7 @@ void initSaiRedis() { SWSS_LOG_ERROR("Failed to %s SAI Redis recording, rv:%d", Recorder::Instance().sairedis.isRecord() ? "enable" : "disable", status); - exit(EXIT_FAILURE); + return handleSaiFailure(SAI_API_SWITCH, "set", status); } if (gRedisCommunicationMode == SAI_REDIS_COMMUNICATION_MODE_REDIS_ASYNC) @@ -390,7 +391,7 @@ void initSaiRedis() if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to enable redis pipeline, rv:%d", status); - exit(EXIT_FAILURE); + return handleSaiFailure(SAI_API_SWITCH, "set", status); } } @@ -408,7 +409,7 @@ void initSaiRedis() if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to set SAI REDIS response timeout"); - exit(EXIT_FAILURE); + return handleSaiFailure(SAI_API_SWITCH, "set", status); } SWSS_LOG_NOTICE("SAI REDIS response timeout set successfully to %" PRIu64 " ", attr.value.u64); @@ -421,7 +422,7 @@ void initSaiRedis() if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to notify syncd INIT_VIEW, rv:%d gSwitchId %" PRIx64, status, gSwitchId); - exit(EXIT_FAILURE); + return handleSaiFailure(SAI_API_SWITCH, "set", status); } SWSS_LOG_NOTICE("Notify syncd INIT_VIEW"); @@ -435,7 +436,7 @@ void initSaiRedis() if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to set SAI REDIS response timeout"); - exit(EXIT_FAILURE); + return handleSaiFailure(SAI_API_SWITCH, "set", status); } SWSS_LOG_NOTICE("SAI REDIS response timeout set successfully to %" PRIu64 " ", attr.value.u64); @@ -566,119 +567,33 @@ task_process_status handleSaiCreateStatus(sai_api_t api, sai_status_t status, vo * in each orch. * 3. Take the type of sai api into consideration. */ - switch (api) + string s_api = sai_serialize_api(api); + string s_status = sai_serialize_status(status); + + switch (status) { - case SAI_API_FDB: - switch (status) - { - case SAI_STATUS_SUCCESS: - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus"); - return task_success; - case SAI_STATUS_ITEM_ALREADY_EXISTS: - /* - * In FDB creation, there are scenarios where the hardware learns an FDB entry before orchagent. - * In such cases, the FDB SAI creation would report the status of SAI_STATUS_ITEM_ALREADY_EXISTS, - * and orchagent should ignore the error and treat it as entry was explicitly created. - */ - return task_success; - default: - SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - handleSaiFailure(true); - break; - } - break; - case SAI_API_HOSTIF: - switch (status) - { - case SAI_STATUS_SUCCESS: - return task_success; - case SAI_STATUS_FAILURE: - /* - * Host interface maybe failed due to lane not available. - * In some scenarios, like SONiC virtual machine, the invalid lane may be not enabled by VM configuration, - * So just ignore the failure and report an error log. - */ - return task_ignore; - default: - SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - handleSaiFailure(true); - break; - } - break; - case SAI_API_ROUTE: - switch (status) - { - case SAI_STATUS_SUCCESS: - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus"); - return task_success; - case SAI_STATUS_ITEM_ALREADY_EXISTS: - case SAI_STATUS_NOT_EXECUTED: - /* With VNET routes, the same route can be learned via multiple - sources, like via BGP. Handle this gracefully */ - return task_success; - case SAI_STATUS_TABLE_FULL: - return task_need_retry; - default: - SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - handleSaiFailure(true); - break; - } - break; - case SAI_API_NEIGHBOR: - case SAI_API_NEXT_HOP: - case SAI_API_NEXT_HOP_GROUP: - switch(status) - { - case SAI_STATUS_SUCCESS: - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus"); - return task_success; - case SAI_STATUS_ITEM_ALREADY_EXISTS: - return task_success; - case SAI_STATUS_TABLE_FULL: - return task_need_retry; - default: - SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - handleSaiFailure(true); - break; - } - break; - case SAI_API_ICMP_ECHO: - switch(status) - { - case SAI_STATUS_SUCCESS: - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus"); - return task_success; - /* - * Offload table resource maybe a shared resource, - * avoid abort when icmp offload table is full. - */ - case SAI_STATUS_TABLE_FULL: - return task_need_retry; - default: - SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - handleSaiFailure(true); - break; - } - break; + case SAI_STATUS_SUCCESS: + return task_success; + case SAI_STATUS_ITEM_NOT_FOUND: + case SAI_STATUS_ADDR_NOT_FOUND: + case SAI_STATUS_OBJECT_IN_USE: + SWSS_LOG_WARN("Status %s is not expected for create operation, SAI API: %s", + s_status.c_str(), s_api.c_str()); + return task_success; + case SAI_STATUS_ITEM_ALREADY_EXISTS: + SWSS_LOG_NOTICE("Returning success for create operation, SAI API: %s, status: %s", + s_api.c_str(), s_status.c_str()); + return task_success; + case SAI_STATUS_INSUFFICIENT_RESOURCES: + case SAI_STATUS_TABLE_FULL: + case SAI_STATUS_NO_MEMORY: + case SAI_STATUS_NV_STORAGE_FULL: + return task_need_retry; default: - switch (status) - { - case SAI_STATUS_SUCCESS: - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiCreateStatus"); - return task_success; - default: - SWSS_LOG_ERROR("Encountered failure in create operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - handleSaiFailure(true); - break; - } + handleSaiFailure(api, "create", status); + break; } - return task_need_retry; + return task_failed; } task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void *context) @@ -694,68 +609,38 @@ task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void * in each orch. * 3. Take the type of sai api into consideration. */ - if (status == SAI_STATUS_SUCCESS) - { - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiSetStatus"); - return task_success; - } + string s_api = sai_serialize_api(api); + string s_status = sai_serialize_status(status); - switch (api) + switch (status) { - case SAI_API_PORT: - switch (status) - { - case SAI_STATUS_INVALID_ATTR_VALUE_0: - /* - * If user gives an invalid attribute value, no need to retry or exit orchagent, just fail the current task - * and let user correct the configuration. - */ - SWSS_LOG_ERROR("Encountered SAI_STATUS_INVALID_ATTR_VALUE_0 in set operation, task failed, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - return task_failed; - default: - SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - handleSaiFailure(true); - break; - } - break; - case SAI_API_TUNNEL: - switch (status) - { - case SAI_STATUS_ATTR_NOT_SUPPORTED_0: - SWSS_LOG_ERROR("Encountered SAI_STATUS_ATTR_NOT_SUPPORTED_0 in set operation, task failed, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - return task_failed; - default: - SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - handleSaiFailure(true); - break; - } - break; - case SAI_API_BUFFER: - switch (status) - { - case SAI_STATUS_INSUFFICIENT_RESOURCES: - SWSS_LOG_ERROR("Encountered SAI_STATUS_INSUFFICIENT_RESOURCES in set operation, task failed, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - return task_failed; - default: - SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - handleSaiFailure(true); - break; - } - break; + case SAI_STATUS_SUCCESS: + return task_success; + case SAI_STATUS_OBJECT_IN_USE: + SWSS_LOG_WARN("Status %s is not expected for set operation, SAI API: %s", + s_status.c_str(), s_api.c_str()); + return task_success; + case SAI_STATUS_ITEM_ALREADY_EXISTS: + case SAI_STATUS_ITEM_NOT_FOUND: + case SAI_STATUS_ADDR_NOT_FOUND: + /* There are specific cases especially with dual-TORs where tunnel + * routes and non-tunnel routes could be create for the same prefix + * which can potentially lead to conditions where ITEM_NOT_FOUND can + * be returned. This needs special handling in muxorch/routeorch. + */ + SWSS_LOG_NOTICE("Returning success for set operation, SAI API: %s, status: %s", + s_api.c_str(), s_status.c_str()); + return task_success; + case SAI_STATUS_INSUFFICIENT_RESOURCES: + case SAI_STATUS_TABLE_FULL: + case SAI_STATUS_NO_MEMORY: + case SAI_STATUS_NV_STORAGE_FULL: + return task_need_retry; default: - SWSS_LOG_ERROR("Encountered failure in set operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - handleSaiFailure(true); + handleSaiFailure(api, "set", status); break; } - - return task_need_retry; + return task_failed; } task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context) @@ -772,57 +657,33 @@ task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, vo * in each orch. * 3. Take the type of sai api into consideration. */ - switch (api) + string s_api = sai_serialize_api(api); + string s_status = sai_serialize_status(status); + + switch (status) { - case SAI_API_ROUTE: - switch (status) - { - case SAI_STATUS_SUCCESS: - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiRemoveStatus"); - return task_success; - case SAI_STATUS_ITEM_NOT_FOUND: - case SAI_STATUS_NOT_EXECUTED: - /* When the same route is learned via multiple sources, - there can be a duplicate remove operation. Handle this gracefully */ - return task_success; - default: - SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - handleSaiFailure(true); - break; - } - break; - case SAI_API_NEIGHBOR: - case SAI_API_NEXT_HOP: - case SAI_API_NEXT_HOP_GROUP: - switch (status) - { - case SAI_STATUS_SUCCESS: - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiRemoveStatus"); - return task_success; - case SAI_STATUS_ITEM_NOT_FOUND: - return task_success; - default: - SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - handleSaiFailure(true); - break; - } - break; + case SAI_STATUS_SUCCESS: + return task_success; + case SAI_STATUS_ITEM_ALREADY_EXISTS: + case SAI_STATUS_INSUFFICIENT_RESOURCES: + case SAI_STATUS_TABLE_FULL: + case SAI_STATUS_NO_MEMORY: + case SAI_STATUS_NV_STORAGE_FULL: + SWSS_LOG_WARN("Status %s is not expected for remove operation, SAI API: %s", + s_status.c_str(), s_api.c_str()); + return task_success; + case SAI_STATUS_ITEM_NOT_FOUND: + case SAI_STATUS_ADDR_NOT_FOUND: + SWSS_LOG_NOTICE("Returning success for remove operation, SAI API: %s, status: %s", + s_api.c_str(), s_status.c_str()); + return task_success; + case SAI_STATUS_OBJECT_IN_USE: + return task_need_retry; default: - switch (status) - { - case SAI_STATUS_SUCCESS: - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiRemoveStatus"); - return task_success; - default: - SWSS_LOG_ERROR("Encountered failure in remove operation, exiting orchagent, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); - handleSaiFailure(true); - break; - } + handleSaiFailure(api, "remove", status); + break; } - return task_need_retry; + return task_failed; } task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context) @@ -838,18 +699,21 @@ task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void * in each orch. * 3. Take the type of sai api into consideration. */ + string s_api = sai_serialize_api(api); + string s_status = sai_serialize_status(status); + switch (status) { case SAI_STATUS_SUCCESS: - SWSS_LOG_WARN("SAI_STATUS_SUCCESS is not expected in handleSaiGetStatus"); return task_success; - case SAI_STATUS_NOT_IMPLEMENTED: - SWSS_LOG_ERROR("Encountered failure in get operation due to the function is not implemented, exiting orchagent, SAI API: %s", - sai_serialize_api(api).c_str()); - throw std::logic_error("SAI get function not implemented"); default: - SWSS_LOG_ERROR("Encountered failure in get operation, SAI API: %s, status: %s", - sai_serialize_api(api).c_str(), sai_serialize_status(status).c_str()); + /* + * handleSaiFailure() is not called for GET failures as it might + * overwhelm the system if there are too many such calls + */ + SWSS_LOG_NOTICE("Encountered failure in GET operation, SAI API: %s, status: %s", + s_api.c_str(), s_status.c_str()); + break; } return task_failed; } @@ -873,27 +737,34 @@ bool parseHandleSaiStatusFailure(task_process_status status) return true; } -/* Handling SAI failure. Request redis to invoke SAI failure dump and abort if set*/ -void handleSaiFailure(bool abort_on_failure) +/* Handling SAI failure. Request redis to invoke SAI failure dump */ +void handleSaiFailure(sai_api_t api, string oper, sai_status_t status) { SWSS_LOG_ENTER(); + string s_api = sai_serialize_api(api); + string s_status = sai_serialize_status(status); + SWSS_LOG_ERROR("Encountered failure in %s operation, SAI API: %s, status: %s", + oper.c_str(), s_api.c_str(), s_status.c_str()); + + // Publish a structured syslog event + event_params_t params = { + { "operation", oper }, + { "api", s_api }, + { "status", s_status }}; + event_publish(g_events_handle, "sai-operation-failure", ¶ms); + sai_attribute_t attr; attr.id = SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD; attr.value.s32 = SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP; - sai_status_t status = sai_switch_api->set_switch_attribute(gSwitchId, &attr); + status = sai_switch_api->set_switch_attribute(gSwitchId, &attr); if (status != SAI_STATUS_SUCCESS) { SWSS_LOG_ERROR("Failed to take sai failure dump %d", status); } - if (abort_on_failure) - { - abort(); - } } - static inline void initSaiRedisCounterEmptyParameter(sai_s8_list_t &sai_s8_list) { sai_s8_list.list = nullptr; diff --git a/orchagent/saihelper.h b/orchagent/saihelper.h index fab2d9ae912..54a52971fdf 100644 --- a/orchagent/saihelper.h +++ b/orchagent/saihelper.h @@ -5,6 +5,7 @@ #include #include "orch.h" #include "producertable.h" +#include "events.h" #define IS_ATTR_ID_IN_RANGE(attrId, objectType, attrPrefix) \ ((attrId) >= SAI_ ## objectType ## _ATTR_ ## attrPrefix ## _START && (attrId) <= SAI_ ## objectType ## _ATTR_ ## attrPrefix ## _END) @@ -20,7 +21,7 @@ task_process_status handleSaiSetStatus(sai_api_t api, sai_status_t status, void task_process_status handleSaiRemoveStatus(sai_api_t api, sai_status_t status, void *context = nullptr); task_process_status handleSaiGetStatus(sai_api_t api, sai_status_t status, void *context = nullptr); bool parseHandleSaiStatusFailure(task_process_status status); -void handleSaiFailure(bool abort_on_failure); +void handleSaiFailure(sai_api_t api, std::string oper, sai_status_t status); void setFlexCounterGroupParameter(const std::string &group, const std::string &poll_interval, diff --git a/tests/mock_tests/mock_saihelper.cpp b/tests/mock_tests/mock_saihelper.cpp index 2278b28c7e0..7398da920ef 100644 --- a/tests/mock_tests/mock_saihelper.cpp +++ b/tests/mock_tests/mock_saihelper.cpp @@ -9,6 +9,7 @@ #include "mock_table.h" #include "mock_response_publisher.h" #include "saihelper.h" +#include namespace saihelper_test { @@ -23,6 +24,12 @@ namespace saihelper_test bool set_comm_mode_not_supported; bool use_pipeline_not_supported; + bool record_output_dir_failure; + bool record_filename_failure; + bool record_failure; + bool response_timeout_failure; + uint32_t *_sai_syncd_notifications_count; + int32_t *_sai_syncd_notification_event; sai_status_t _ut_stub_sai_set_switch_attribute( _In_ sai_object_id_t switch_id, @@ -35,21 +42,41 @@ namespace saihelper_test { return SAI_STATUS_NOT_SUPPORTED; } - else - { - return SAI_STATUS_SUCCESS; - } break; case SAI_REDIS_SWITCH_ATTR_USE_PIPELINE: if (use_pipeline_not_supported) { return SAI_STATUS_NOT_SUPPORTED; } - else + break; + case SAI_REDIS_SWITCH_ATTR_RECORDING_OUTPUT_DIR: + if (record_output_dir_failure) + { + return SAI_STATUS_FAILURE; + } + break; + case SAI_REDIS_SWITCH_ATTR_RECORDING_FILENAME: + if (record_filename_failure) + { + return SAI_STATUS_FAILURE; + } + break; + case SAI_REDIS_SWITCH_ATTR_RECORD: + if (record_failure) + { + return SAI_STATUS_FAILURE; + } + break; + case SAI_REDIS_SWITCH_ATTR_SYNC_OPERATION_RESPONSE_TIMEOUT: + if (response_timeout_failure) { - return SAI_STATUS_SUCCESS; + return SAI_STATUS_FAILURE; } break; + case SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD: + *_sai_syncd_notifications_count = *_sai_syncd_notifications_count + 1; + *_sai_syncd_notification_event = attr[0].value.s32; + break; default: break; } @@ -90,6 +117,10 @@ namespace saihelper_test set_comm_mode_not_supported = false; use_pipeline_not_supported = false; + record_output_dir_failure = false; + record_filename_failure = false; + record_failure = false; + response_timeout_failure = false; map profile = { { "SAI_VS_SWITCH_TYPE", "SAI_VS_SWITCH_TYPE_BCM56850" }, @@ -142,8 +173,17 @@ namespace saihelper_test _hook_sai_apis(); initSwitchOrch(); - // Assert that the program terminates after initSaiRedis() call - ASSERT_DEATH({initSaiRedis();}, ""); + _sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + _sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + *_sai_syncd_notifications_count = 0; + uint32_t notif_count = *_sai_syncd_notifications_count; + + initSaiRedis(); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + set_comm_mode_not_supported = false; _unhook_sai_apis(); } @@ -153,10 +193,343 @@ namespace saihelper_test _hook_sai_apis(); initSwitchOrch(); - // Assert that the program terminates after initSaiRedis() call - ASSERT_DEATH({initSaiRedis();}, ""); + _sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + _sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + *_sai_syncd_notifications_count = 0; + uint32_t notif_count = *_sai_syncd_notifications_count; + + initSaiRedis(); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + use_pipeline_not_supported = false; _unhook_sai_apis(); } + + TEST_F(SaihelperTest, TestSetRecordingOutputDirFailure) { + record_output_dir_failure = true; + _hook_sai_apis(); + initSwitchOrch(); + + _sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + _sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + *_sai_syncd_notifications_count = 0; + uint32_t notif_count = *_sai_syncd_notifications_count; + + initSaiRedis(); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + + record_output_dir_failure = false; + _unhook_sai_apis(); + } + + TEST_F(SaihelperTest, TestSetRecordingFilenameFailure) { + record_filename_failure = true; + _hook_sai_apis(); + initSwitchOrch(); + + _sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + _sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + *_sai_syncd_notifications_count = 0; + uint32_t notif_count = *_sai_syncd_notifications_count; + + initSaiRedis(); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + + record_filename_failure = false; + _unhook_sai_apis(); + } + + TEST_F(SaihelperTest, TestSetRecordFailure) { + record_failure = true; + _hook_sai_apis(); + initSwitchOrch(); + + _sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + _sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + *_sai_syncd_notifications_count = 0; + uint32_t notif_count = *_sai_syncd_notifications_count; + + initSaiRedis(); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + + record_failure = false; + _unhook_sai_apis(); + } + + TEST_F(SaihelperTest, TestSetResponseTimeoutFailure) { + response_timeout_failure = true; + _hook_sai_apis(); + initSwitchOrch(); + + _sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + _sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + *_sai_syncd_notifications_count = 0; + uint32_t notif_count = *_sai_syncd_notifications_count; + (void) setenv("platform", "mellanox", 1); + + initSaiRedis(); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + + response_timeout_failure = false; + (void) unsetenv("platform"); + _unhook_sai_apis(); + } + + TEST_F(SaihelperTest, TestCreateFailure) { + _hook_sai_apis(); + initSwitchOrch(); + + _sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + _sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + *_sai_syncd_notifications_count = 0; + uint32_t notif_count = *_sai_syncd_notifications_count; + task_process_status status; + + status = handleSaiCreateStatus(SAI_API_ROUTE, SAI_STATUS_FAILURE); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiCreateStatus(SAI_API_PORT, SAI_STATUS_FAILURE); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiCreateStatus(SAI_API_TUNNEL, SAI_STATUS_NOT_SUPPORTED); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiCreateStatus(SAI_API_TUNNEL, SAI_STATUS_NOT_IMPLEMENTED); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiCreateStatus(SAI_API_NEXT_HOP_GROUP, SAI_STATUS_INVALID_PARAMETER); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiCreateStatus(SAI_API_SWITCH, SAI_STATUS_UNINITIALIZED); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiCreateStatus((sai_api_t) SAI_API_DASH_OUTBOUND_ROUTING, SAI_STATUS_INVALID_OBJECT_ID); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiCreateStatus(SAI_API_ACL, SAI_STATUS_MANDATORY_ATTRIBUTE_MISSING); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiCreateStatus(SAI_API_QOS_MAP, SAI_STATUS_INVALID_ATTR_VALUE_MAX); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiCreateStatus(SAI_API_TUNNEL, SAI_STATUS_ATTR_NOT_IMPLEMENTED_6); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiCreateStatus(SAI_API_ROUTER_INTERFACE, SAI_STATUS_UNKNOWN_ATTRIBUTE_0); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiCreateStatus(SAI_API_ROUTER_INTERFACE, SAI_STATUS_ATTR_NOT_SUPPORTED_0); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiCreateStatus(SAI_API_LAG, SAI_STATUS_INVALID_PORT_NUMBER); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + _unhook_sai_apis(); + } + + TEST_F(SaihelperTest, TestSetFailure) { + _hook_sai_apis(); + initSwitchOrch(); + + _sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + _sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + *_sai_syncd_notifications_count = 0; + uint32_t notif_count = *_sai_syncd_notifications_count; + task_process_status status; + + status = handleSaiSetStatus(SAI_API_ROUTE, SAI_STATUS_FAILURE); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiSetStatus(SAI_API_ROUTE, SAI_STATUS_NOT_EXECUTED); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiSetStatus(SAI_API_PORT, SAI_STATUS_FAILURE); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiSetStatus(SAI_API_TUNNEL, SAI_STATUS_NOT_IMPLEMENTED); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiSetStatus(SAI_API_HOSTIF, SAI_STATUS_INVALID_PARAMETER); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiSetStatus(SAI_API_PORT, SAI_STATUS_ATTR_NOT_SUPPORTED_0); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + status = handleSaiSetStatus(SAI_API_LAG, SAI_STATUS_INVALID_PORT_NUMBER); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); + ASSERT_EQ(status, task_failed); + + _unhook_sai_apis(); + } + + TEST_F(SaihelperTest, TestGetFailure) { + _hook_sai_apis(); + initSwitchOrch(); + + _sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + *_sai_syncd_notifications_count = 0; + task_process_status status; + + status = handleSaiGetStatus(SAI_API_FDB, SAI_STATUS_INVALID_PARAMETER); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_failed); + _unhook_sai_apis(); + } + + TEST_F(SaihelperTest, TestAllSuccess) { + _hook_sai_apis(); + initSwitchOrch(); + + _sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + *_sai_syncd_notifications_count = 0; + task_process_status status; + + status = handleSaiCreateStatus(SAI_API_NEXT_HOP_GROUP, SAI_STATUS_SUCCESS); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_success); + + status = handleSaiSetStatus(SAI_API_ROUTE, SAI_STATUS_SUCCESS); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_success); + + status = handleSaiRemoveStatus(SAI_API_NEXT_HOP, SAI_STATUS_SUCCESS); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_success); + + status = handleSaiCreateStatus(SAI_API_VLAN, SAI_STATUS_ITEM_ALREADY_EXISTS); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_success); + + status = handleSaiSetStatus(SAI_API_ROUTE, SAI_STATUS_ITEM_ALREADY_EXISTS); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_success); + + status = handleSaiCreateStatus(SAI_API_MIRROR, SAI_STATUS_ITEM_NOT_FOUND); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_success); + + status = handleSaiSetStatus(SAI_API_NEIGHBOR, SAI_STATUS_ITEM_NOT_FOUND); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_success); + + status = handleSaiRemoveStatus(SAI_API_LAG, SAI_STATUS_ITEM_NOT_FOUND); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_success); + + _unhook_sai_apis(); + } + + TEST_F(SaihelperTest, TestCreateSetResourceFailure) { + _hook_sai_apis(); + initSwitchOrch(); + + _sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + *_sai_syncd_notifications_count = 0; + task_process_status status; + + status = handleSaiCreateStatus(SAI_API_ACL, SAI_STATUS_INSUFFICIENT_RESOURCES); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_need_retry); + + status = handleSaiSetStatus(SAI_API_PORT, SAI_STATUS_INSUFFICIENT_RESOURCES); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_need_retry); + + status = handleSaiCreateStatus(SAI_API_TUNNEL, SAI_STATUS_TABLE_FULL); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_need_retry); + + status = handleSaiSetStatus(SAI_API_ROUTER_INTERFACE, SAI_STATUS_TABLE_FULL); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_need_retry); + + status = handleSaiCreateStatus(SAI_API_NEXT_HOP_GROUP, SAI_STATUS_NO_MEMORY); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_need_retry); + + status = handleSaiSetStatus(SAI_API_NEXT_HOP_GROUP, SAI_STATUS_NO_MEMORY); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_need_retry); + + _unhook_sai_apis(); + } + + TEST_F(SaihelperTest, TestRemoveObjectInUse) { + _hook_sai_apis(); + initSwitchOrch(); + + _sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + *_sai_syncd_notifications_count = 0; + task_process_status status; + + status = handleSaiRemoveStatus(SAI_API_NEXT_HOP_GROUP, SAI_STATUS_OBJECT_IN_USE); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_need_retry); + + _unhook_sai_apis(); + } + } diff --git a/tests/mock_tests/orchdaemon_ut.cpp b/tests/mock_tests/orchdaemon_ut.cpp index 76dec528205..cb3973e76f5 100644 --- a/tests/mock_tests/orchdaemon_ut.cpp +++ b/tests/mock_tests/orchdaemon_ut.cpp @@ -6,6 +6,7 @@ #include #include #include "mock_sai_switch.h" +#include "saihelper.h" extern sai_switch_api_t* sai_switch_api; sai_switch_api_t test_sai_switch; @@ -16,6 +17,7 @@ namespace orchdaemon_test using ::testing::_; using ::testing::Return; using ::testing::StrictMock; + using ::testing::InSequence; DBConnector appl_db("APPL_DB", 0); DBConnector state_db("STATE_DB", 0); @@ -165,4 +167,14 @@ namespace orchdaemon_test orchd->disableRingBuffer(); } + TEST_F(OrchDaemonTest, TestRedisFlushFailure) + { + InSequence s; + + EXPECT_CALL(mock_sai_switch_, set_switch_attribute( _, _)).WillOnce(Return(SAI_STATUS_FAILURE)); + EXPECT_CALL(mock_sai_switch_, set_switch_attribute(_, _)); + + orchd->flush(); + } + } diff --git a/tests/mock_tests/portsorch_ut.cpp b/tests/mock_tests/portsorch_ut.cpp index a8c898ab651..6630425dffb 100644 --- a/tests/mock_tests/portsorch_ut.cpp +++ b/tests/mock_tests/portsorch_ut.cpp @@ -93,9 +93,9 @@ namespace portsorch_test uint32_t set_pt_interface_id_count = false; uint32_t set_pt_timestamp_template_count = false; uint32_t set_port_tam_count = false; - uint32_t set_pt_interface_id_failures; - uint32_t set_pt_timestamp_template_failures; - uint32_t set_port_tam_failures; + uint32_t set_pt_interface_id_failures = 0; + uint32_t set_pt_timestamp_template_failures = 0; + uint32_t set_port_tam_failures = 0; bool set_link_event_damping_success = true; uint32_t _sai_set_link_event_damping_algorithm_count; uint32_t _sai_set_link_event_damping_config_count; @@ -238,7 +238,7 @@ namespace portsorch_test { if (attr[0].id == SAI_REDIS_SWITCH_ATTR_NOTIFY_SYNCD) { - *_sai_syncd_notifications_count =+ 1; + *_sai_syncd_notifications_count = *_sai_syncd_notifications_count + 1; *_sai_syncd_notification_event = attr[0].value.s32; } else if (attr[0].id == SAI_SWITCH_ATTR_PFC_DLR_PACKET_ACTION) @@ -1686,6 +1686,12 @@ namespace portsorch_test _hook_sai_port_api(); _hook_sai_switch_api(); + _sai_syncd_notifications_count = (uint32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + _sai_syncd_notification_event = (int32_t*)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + *_sai_syncd_notifications_count = 0; + uint32_t notif_count = *_sai_syncd_notifications_count; auto portTable = Table(m_app_db.get(), APP_PORT_TABLE_NAME); Port p; std::deque kfvList; @@ -1738,8 +1744,9 @@ namespace portsorch_test consumer->addToSync(kfvList); static_cast(gPortsOrch)->doTask(); - - ASSERT_EQ(set_pt_interface_id_fail, 1); + ASSERT_EQ(set_pt_interface_id_failures, 1); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); set_pt_interface_id_fail = false; @@ -1762,6 +1769,8 @@ namespace portsorch_test static_cast(gPortsOrch)->doTask(); ASSERT_EQ(set_pt_timestamp_template_failures, 1); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); set_pt_timestamp_template_fail = false; @@ -1783,7 +1792,9 @@ namespace portsorch_test static_cast(gPortsOrch)->doTask(); - ASSERT_EQ(set_port_tam_fail, 1); + ASSERT_EQ(set_port_tam_failures, 1); + ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); + ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); set_port_tam_fail = false; @@ -2554,7 +2565,7 @@ namespace portsorch_test }}); auto consumer = dynamic_cast(gPortsOrch->getExecutor(APP_PORT_TABLE_NAME)); consumer->addToSync(entries); - ASSERT_DEATH({static_cast(gPortsOrch)->doTask();}, ""); + gPortsOrch->doTask(); ASSERT_EQ(*_sai_syncd_notifications_count, 1); ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); diff --git a/tests/mock_tests/test_failure_handling.cpp b/tests/mock_tests/test_failure_handling.cpp index 7381f4015ee..48fb3cb825c 100644 --- a/tests/mock_tests/test_failure_handling.cpp +++ b/tests/mock_tests/test_failure_handling.cpp @@ -58,31 +58,31 @@ namespace saifailure_test *_sai_syncd_notifications_count = 0; uint32_t notif_count = *_sai_syncd_notifications_count; - ASSERT_DEATH({handleSaiCreateStatus(SAI_API_FDB, SAI_STATUS_FAILURE);}, ""); + handleSaiCreateStatus(SAI_API_FDB, SAI_STATUS_FAILURE); ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); - ASSERT_DEATH({handleSaiCreateStatus(SAI_API_HOSTIF, SAI_STATUS_INVALID_PARAMETER);}, ""); + handleSaiCreateStatus(SAI_API_HOSTIF, SAI_STATUS_INVALID_PARAMETER); ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); - ASSERT_DEATH({handleSaiCreateStatus(SAI_API_PORT, SAI_STATUS_FAILURE);}, ""); + handleSaiCreateStatus(SAI_API_PORT, SAI_STATUS_FAILURE); ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); - ASSERT_DEATH({handleSaiSetStatus(SAI_API_HOSTIF, SAI_STATUS_FAILURE);}, ""); + handleSaiSetStatus(SAI_API_HOSTIF, SAI_STATUS_FAILURE); ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); - ASSERT_DEATH({handleSaiSetStatus(SAI_API_PORT, SAI_STATUS_FAILURE);}, ""); + handleSaiSetStatus(SAI_API_PORT, SAI_STATUS_FAILURE); ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); - ASSERT_DEATH({handleSaiSetStatus(SAI_API_TUNNEL, SAI_STATUS_FAILURE);}, ""); + handleSaiSetStatus(SAI_API_TUNNEL, SAI_STATUS_FAILURE); ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); - ASSERT_DEATH({handleSaiRemoveStatus(SAI_API_LAG, SAI_STATUS_FAILURE);}, ""); + handleSaiRemoveStatus(SAI_API_LAG, SAI_STATUS_FAILURE); ASSERT_EQ(*_sai_syncd_notifications_count, ++notif_count); ASSERT_EQ(*_sai_syncd_notification_event, SAI_REDIS_NOTIFY_SYNCD_INVOKE_DUMP); From d89b480133bee59fa14b886372c71c003f80a1be Mon Sep 17 00:00:00 2001 From: Prabhat Aravind Date: Sat, 14 Jun 2025 19:17:14 +0000 Subject: [PATCH 2/2] Add more mock tests for coverage Signed-off-by: Prabhat Aravind --- tests/mock_tests/mock_saihelper.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/mock_tests/mock_saihelper.cpp b/tests/mock_tests/mock_saihelper.cpp index 7398da920ef..0c9abf5f6a7 100644 --- a/tests/mock_tests/mock_saihelper.cpp +++ b/tests/mock_tests/mock_saihelper.cpp @@ -472,10 +472,18 @@ namespace saihelper_test ASSERT_EQ(*_sai_syncd_notifications_count, 0); ASSERT_EQ(status, task_success); + status = handleSaiSetStatus(SAI_API_NEXT_HOP, SAI_STATUS_OBJECT_IN_USE); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_success); + status = handleSaiRemoveStatus(SAI_API_LAG, SAI_STATUS_ITEM_NOT_FOUND); ASSERT_EQ(*_sai_syncd_notifications_count, 0); ASSERT_EQ(status, task_success); + status = handleSaiRemoveStatus(SAI_API_PORT, SAI_STATUS_ITEM_ALREADY_EXISTS); + ASSERT_EQ(*_sai_syncd_notifications_count, 0); + ASSERT_EQ(status, task_success); + _unhook_sai_apis(); }