From f2ad69b0a0b585fe7ac99c17ec050cc515245bba Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Fri, 9 Aug 2019 14:19:59 +0300 Subject: [PATCH 1/5] [sonic_platform.sfp_event]enhance the initialization flow of sfp_event --- .../sonic_platform/sfp_event.py | 116 +++++++++++++----- 1 file changed, 85 insertions(+), 31 deletions(-) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py index 1e57603d38a..95fe5dfe362 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py @@ -24,6 +24,14 @@ SDK_SFP_STATE_OUT: STATUS_PLUGOUT, } +# system level event/error +EVENT_ON_ALL_SFP = '-1' +SYSTEM_NOT_READY = 'system_not_ready' +SYSTEM_READY = 'system_become_ready' +SYSTEM_FAIL = 'system_fail' + +SDK_DAEMON_READY_FILE = '/tmp/sdk_ready' + PMPE_PACKET_SIZE = 2000 logger = Logger(SYSLOG_IDENTIFIER) @@ -31,47 +39,93 @@ class sfp_event: ''' Listen to plugin/plugout cable events ''' - SX_OPEN_RETRIES = 20 + SX_OPEN_RETRIES = 30 + SX_OPEN_TIMEOUT = 5 + SELECT_TIMEOUT = 1 def __init__(self): self.swid = 0 self.handle = None - - def initialize(self): - # open SDK API handle. - # retry at most SX_OPEN_RETRIES times to wait until SDK is started during system startup - retry = 1 - while True: - rc, self.handle = sx_api_open(None) - if rc == SX_STATUS_SUCCESS: - break - - logger.log_info("failed to open SDK API handle... retrying {}".format(retry)) - - time.sleep(2 ** retry) - retry += 1 - - if retry > self.SX_OPEN_RETRIES: - raise RuntimeError("failed to open SDK API handle after {} retries".format(retry)) + self.running = True + self.has_report_not_ready = False # Allocate SDK fd and user channel structures self.rx_fd_p = new_sx_fd_t_p() self.user_channel_p = new_sx_user_channel_t_p() - rc = sx_api_host_ifc_open(self.handle, self.rx_fd_p) - if rc != SX_STATUS_SUCCESS: - raise RuntimeError("sx_api_host_ifc_open exited with error, rc {}".format(rc)) - - self.user_channel_p.type = SX_USER_CHANNEL_TYPE_FD - self.user_channel_p.channel.fd = self.rx_fd_p + def initialize(self): + swid_cnt_p = None - rc = sx_api_host_ifc_trap_id_register_set(self.handle, - SX_ACCESS_CMD_REGISTER, - self.swid, - SX_TRAP_ID_PMPE, - self.user_channel_p) - if rc != SX_STATUS_SUCCESS: - raise RuntimeError("sx_api_host_ifc_trap_id_register_set exited with error, rc {}".format(rc)) + try: + # Wait for SDK daemon to be started with detect the sdk_ready file + retry = 0 + while not os.path.exists(SDK_DAEMON_READY_FILE): + if retry >= self.SX_OPEN_RETRIES: + raise RuntimeError("SDK daemon failed to start after {} retries and {} seconds waiting, exiting..." + .format(retry, self.SX_OPEN_TIMEOUT * self.SX_OPEN_RETRIES)) + else: + logger.log_info("SDK daemon not started yet, retry {} times".format(retry)) + retry = retry + 1 + time.sleep(self.SX_OPEN_TIMEOUT) + + # to make sure SDK daemon has started + time.sleep(self.SX_OPEN_TIMEOUT) + + # After SDK daemon started, sx_api_open and sx_api_host_ifc_open is ready for call + rc, self.handle = sx_api_open(None) + if rc != SX_STATUS_SUCCESS: + raise RuntimeError("failed to call sx_api_open with rc {}, exiting...".format(rc)) + + rc = sx_api_host_ifc_open(self.handle, self.rx_fd_p) + if rc != SX_STATUS_SUCCESS: + raise RuntimeError("failed to call sx_api_host_ifc_open with rc {}, exiting...".format(rc)) + + self.user_channel_p.type = SX_USER_CHANNEL_TYPE_FD + self.user_channel_p.channel.fd = self.rx_fd_p + + # Wait for switch to be created and initialized inside SDK + retry = 0 + swid_cnt_p = new_uint32_t_p() + uint32_t_p_assign(swid_cnt_p, 0) + swid_cnt = 0 + while True: + if retry >= self.SX_OPEN_RETRIES: + raise RuntimeError("switch not created after {} retries and {} seconds waiting, exiting..." + .format(retry, self.SX_OPEN_RETRIES * self.SX_OPEN_TIMEOUT)) + else: + rc = sx_api_port_swid_list_get(self.handle, None, swid_cnt_p) + if rc == SX_STATUS_SUCCESS: + swid_cnt = uint32_t_p_value(swid_cnt_p) + if swid_cnt > 0: + delete_uint32_t_p(swid_cnt_p) + swid_cnt_p = None + break + else: + logger.log_info("switch not created yet, swid_cnt {}, retry {} times and wait for {} seconds" + .format(swid_cnt, retry, self.SX_OPEN_TIMEOUT * retry)) + else: + raise RuntimeError("sx_api_port_swid_list_get fail with rc {}, retry {} times and wait for {} seconds". + format(rc, retry, self.SX_OPEN_TIMEOUT * retry)) + + retry = retry + 1 + time.sleep(self.SX_OPEN_TIMEOUT) + + # After switch was created inside SDK, sx_api_host_ifc_trap_id_register_set is ready to call + rc = sx_api_host_ifc_trap_id_register_set(self.handle, + SX_ACCESS_CMD_REGISTER, + self.swid, + SX_TRAP_ID_PMPE, + self.user_channel_p) + + if rc != SX_STATUS_SUCCESS: + raise RuntimeError("sx_api_host_ifc_trap_id_register_set failed with rc {}, exiting...".format(rc)) + + self.running = True + except Exception as e: + logger.log_error("mlnx-sfpd initialization failed due to {}, exiting...".format(repr(e))) + if swid_cnt_p is not None: + delete_uint32_t_p(swid_cnt_p) + self.deinitialize() def deinitialize(self): if self.handle is None: From 46f3c253102aba2fa43a8d65affb1b351f351dbf Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Tue, 20 Aug 2019 01:41:33 +0300 Subject: [PATCH 2/5] [sonic_platform.sfp_event] replace "retry = retry + 1" with "retry += 1" --- .../mellanox/mlnx-platform-api/sonic_platform/sfp_event.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py index 95fe5dfe362..15c2e2b8eeb 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py @@ -65,7 +65,7 @@ def initialize(self): .format(retry, self.SX_OPEN_TIMEOUT * self.SX_OPEN_RETRIES)) else: logger.log_info("SDK daemon not started yet, retry {} times".format(retry)) - retry = retry + 1 + retry += 1 time.sleep(self.SX_OPEN_TIMEOUT) # to make sure SDK daemon has started @@ -107,7 +107,7 @@ def initialize(self): raise RuntimeError("sx_api_port_swid_list_get fail with rc {}, retry {} times and wait for {} seconds". format(rc, retry, self.SX_OPEN_TIMEOUT * retry)) - retry = retry + 1 + retry += 1 time.sleep(self.SX_OPEN_TIMEOUT) # After switch was created inside SDK, sx_api_host_ifc_trap_id_register_set is ready to call From e931725db29e1779e39f5c48a6b7b3a128165113 Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Thu, 22 Aug 2019 09:08:25 +0300 Subject: [PATCH 3/5] [sonic_platform] fix typo in sfp_event --- platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py index 15c2e2b8eeb..4a24fc9941d 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py @@ -122,7 +122,7 @@ def initialize(self): self.running = True except Exception as e: - logger.log_error("mlnx-sfpd initialization failed due to {}, exiting...".format(repr(e))) + logger.log_error("sfp_event initialization failed due to {}, exiting...".format(repr(e))) if swid_cnt_p is not None: delete_uint32_t_p(swid_cnt_p) self.deinitialize() From b765f53055c489b11441e708018ecf401a35833b Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Mon, 26 Aug 2019 06:31:38 +0300 Subject: [PATCH 4/5] [sfp_event] remove unused variables --- .../mellanox/mlnx-platform-api/sonic_platform/sfp_event.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py index 4a24fc9941d..65f092dfcbe 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py @@ -46,8 +46,6 @@ class sfp_event: def __init__(self): self.swid = 0 self.handle = None - self.running = True - self.has_report_not_ready = False # Allocate SDK fd and user channel structures self.rx_fd_p = new_sx_fd_t_p() @@ -119,8 +117,6 @@ def initialize(self): if rc != SX_STATUS_SUCCESS: raise RuntimeError("sx_api_host_ifc_trap_id_register_set failed with rc {}, exiting...".format(rc)) - - self.running = True except Exception as e: logger.log_error("sfp_event initialization failed due to {}, exiting...".format(repr(e))) if swid_cnt_p is not None: From 1a4b27ebb0cb7a8cad61cba234f60f8013476705 Mon Sep 17 00:00:00 2001 From: Stephen Sun Date: Wed, 25 Sep 2019 09:03:32 +0300 Subject: [PATCH 5/5] [sonic_platform/sfp_event.py]remove unnecessary statements --- .../mellanox/mlnx-platform-api/sonic_platform/sfp_event.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py index 65f092dfcbe..439df785b24 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py @@ -66,9 +66,6 @@ def initialize(self): retry += 1 time.sleep(self.SX_OPEN_TIMEOUT) - # to make sure SDK daemon has started - time.sleep(self.SX_OPEN_TIMEOUT) - # After SDK daemon started, sx_api_open and sx_api_host_ifc_open is ready for call rc, self.handle = sx_api_open(None) if rc != SX_STATUS_SUCCESS: