Skip to content

Commit 05d2068

Browse files
authored
[xcvrd] Enable periodic polling of VDM relevant data (#582)
* [xcvrd] Enable periodic polling of VDM relevant data Signed-off-by: Mihir Patel <[email protected]> * Added VDM freeze and unfreeze support * Update VDM flag change counters and set/clear time in redis-db during periodic polling * Updated comments and initializing flag count to 0 if flag is clear upon xcvrd boot-up * Updated comments and initializing flag count to 0 * Fixed unit-test failure in test_update_flag_metadata_tables * Moved dom_mgr.py to xcvrd/dom/ and changed a warning to debug in port_event_helper.py * Restructured VDM related functions to separate classes * Created vdm_utilities and db_utilities folder * Addressed PR comments --------- Signed-off-by: Mihir Patel <[email protected]>
1 parent 803aae7 commit 05d2068

File tree

11 files changed

+1015
-90
lines changed

11 files changed

+1015
-90
lines changed

sonic-xcvrd/tests/test_xcvrd.py

Lines changed: 460 additions & 33 deletions
Large diffs are not rendered by default.

sonic-xcvrd/xcvrd/dom/__init__.py

Whitespace-only changes.
Lines changed: 73 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
as a child thread of xcvrd main thread.
55
"""
66

7+
from contextlib import contextmanager
8+
9+
710
try:
811
import threading
912
import copy
@@ -13,18 +16,21 @@
1316
from natsort import natsorted
1417
from swsscommon import swsscommon
1518

16-
from . import xcvrd
17-
from .xcvrd_utilities import sfp_status_helper
18-
from .xcvrd_utilities.xcvr_table_helper import *
19-
from .xcvrd_utilities import port_event_helper
19+
from xcvrd import xcvrd
20+
from xcvrd.xcvrd_utilities import sfp_status_helper
21+
from xcvrd.xcvrd_utilities.xcvr_table_helper import *
22+
from xcvrd.xcvrd_utilities import port_event_helper
23+
from xcvrd.dom.utilities.db.utils import DBUtils
24+
from xcvrd.dom.utilities.vdm.utils import VDMUtils
25+
from xcvrd.dom.utilities.vdm.db_utils import VDMDBUtils
2026
except ImportError as e:
2127
raise ImportError(str(e) + " - required module not found in dom_mgr.py")
2228

2329
class DomInfoUpdateTask(threading.Thread):
2430
DOM_LOGGER_PREFIX = "DOM-INFO-UPDATE: "
2531
DOM_INFO_UPDATE_PERIOD_SECS = 60
2632

27-
def __init__(self, namespaces, port_mapping, main_thread_stop_event, skip_cmis_mgr, helper_logger):
33+
def __init__(self, namespaces, port_mapping, sfp_obj_dict, main_thread_stop_event, skip_cmis_mgr, helper_logger):
2834
threading.Thread.__init__(self)
2935
self.name = "DomInfoUpdateTask"
3036
self.exc = None
@@ -34,6 +40,11 @@ def __init__(self, namespaces, port_mapping, main_thread_stop_event, skip_cmis_m
3440
self.port_mapping = copy.deepcopy(port_mapping)
3541
self.namespaces = namespaces
3642
self.skip_cmis_mgr = skip_cmis_mgr
43+
self.sfp_obj_dict = sfp_obj_dict
44+
self.xcvr_table_helper = XcvrTableHelper(self.namespaces)
45+
self.db_utils = DBUtils(self.helper_logger)
46+
self.vdm_utils = VDMUtils(self.sfp_obj_dict, self.helper_logger)
47+
self.vdm_db_utils = VDMDBUtils(self.sfp_obj_dict, self.port_mapping, self.xcvr_table_helper, self.task_stopping_event, self.helper_logger)
3748

3849
def log_debug(self, message):
3950
self.helper_logger.log_debug("{}{}".format(self.DOM_LOGGER_PREFIX, message))
@@ -131,11 +142,6 @@ def beautify_dom_info_dict(self, dom_info_dict, physical_port):
131142
# For all the other keys:
132143
dom_info_dict[k] = str(v)
133144

134-
def beautify_info_dict(self, info_dict):
135-
for k, v in info_dict.items():
136-
if not isinstance(v, str):
137-
info_dict[k] = str(v)
138-
139145
# Update port sfp firmware info in db
140146
def post_port_sfp_firmware_info_to_db(self, logical_port_name, port_mapping, table,
141147
stop_event=threading.Event(), firmware_info_cache=None):
@@ -219,7 +225,7 @@ def update_port_transceiver_status_table_hw(self, logical_port_name, port_mappin
219225
# Skip if empty (i.e. get_transceiver_status API is not applicable for this xcvr)
220226
if not transceiver_status_dict:
221227
continue
222-
self.beautify_info_dict(transceiver_status_dict)
228+
self.db_utils.beautify_info_dict(transceiver_status_dict)
223229
fvs = swsscommon.FieldValuePairs([(k, v) for k, v in transceiver_status_dict.items()])
224230
table.set(physical_port_name, fvs)
225231
else:
@@ -249,18 +255,19 @@ def post_port_pm_info_to_db(self, logical_port_name, port_mapping, table, stop_e
249255
# Skip if empty (i.e. get_transceiver_pm API is not applicable for this xcvr)
250256
if not pm_info_dict:
251257
continue
252-
self.beautify_info_dict(pm_info_dict)
258+
self.db_utils.beautify_info_dict(pm_info_dict)
253259
fvs = swsscommon.FieldValuePairs([(k, v) for k, v in pm_info_dict.items()])
254260
table.set(physical_port_name, fvs)
255261
else:
256262
return xcvrd.SFP_EEPROM_NOT_READY
257263

258264
def task_worker(self):
259-
self.xcvr_table_helper = XcvrTableHelper(self.namespaces)
260265
self.log_notice("Start DOM monitoring loop")
261266
firmware_info_cache = {}
262267
dom_info_cache = {}
263268
transceiver_status_cache = {}
269+
vdm_real_value_cache = {}
270+
vdm_flag_cache = {}
264271
pm_info_cache = {}
265272
sel, asic_context = port_event_helper.subscribe_port_config_change(self.namespaces)
266273

@@ -270,6 +277,8 @@ def task_worker(self):
270277
firmware_info_cache.clear()
271278
dom_info_cache.clear()
272279
transceiver_status_cache.clear()
280+
vdm_real_value_cache.clear()
281+
vdm_flag_cache.clear()
273282
pm_info_cache.clear()
274283

275284
# Handle port change event from main thread
@@ -279,13 +288,26 @@ def task_worker(self):
279288
if self.is_port_dom_monitoring_disabled(logical_port_name):
280289
continue
281290

291+
if self.task_stopping_event.is_set():
292+
self.log_notice("DomInfoUpdateTask stop event generated during DOM monitoring loop")
293+
break
294+
282295
# Get the asic to which this port belongs
283296
asic_index = self.port_mapping.get_asic_id_for_logical_port(logical_port_name)
284297
if asic_index is None:
285298
self.log_warning("Got invalid asic index for {}, ignored".format(logical_port_name))
286299
continue
287300

301+
physical_port_list = self.port_mapping.get_logical_to_physical(logical_port_name)
302+
if not physical_port_list:
303+
self.log_warning("Got unknown physical port list {} for lport {}".format(physical_port_list, logical_port_name))
304+
continue
305+
physical_port = physical_port_list[0]
306+
288307
if not sfp_status_helper.detect_port_in_error_status(logical_port_name, self.xcvr_table_helper.get_status_tbl(asic_index)):
308+
if not xcvrd._wrapper_get_presence(physical_port):
309+
continue
310+
289311
try:
290312
self.post_port_sfp_firmware_info_to_db(logical_port_name, self.port_mapping, self.xcvr_table_helper.get_firmware_info_tbl(asic_index), self.task_stopping_event, firmware_info_cache=firmware_info_cache)
291313
except (KeyError, TypeError) as e:
@@ -308,14 +330,35 @@ def task_worker(self):
308330
#continue to process next port since execption could be raised due to port reset, transceiver removal
309331
self.log_warning("Got exception {} while processing transceiver status hw for port {}, ignored".format(repr(e), logical_port_name))
310332
continue
311-
try:
312-
self.post_port_pm_info_to_db(logical_port_name, self.port_mapping, self.xcvr_table_helper.get_pm_tbl(asic_index), self.task_stopping_event, pm_info_cache=pm_info_cache)
313-
except (KeyError, TypeError) as e:
314-
#continue to process next port since execption could be raised due to port reset, transceiver removal
315-
self.log_warning("Got exception {} while processing pm info for port {}, ignored".format(repr(e), logical_port_name))
316-
continue
317-
318-
self.log_info("Stop DOM monitoring loop")
333+
if self.vdm_utils.is_transceiver_vdm_supported(physical_port):
334+
# Freeze VDM stats before reading VDM values
335+
with self.vdm_utils.vdm_freeze_context(physical_port) as vdm_frozen:
336+
if not vdm_frozen:
337+
self.log_error("Failed to freeze VDM stats for port {}".format(physical_port))
338+
continue
339+
try:
340+
# Read and post VDM real values to DB
341+
self.vdm_db_utils.post_port_vdm_real_values_to_db(logical_port_name, self.xcvr_table_helper.get_vdm_real_value_tbl(asic_index),
342+
self.vdm_utils.get_vdm_real_values, db_cache=vdm_real_value_cache)
343+
except (KeyError, TypeError) as e:
344+
#continue to process next port since execption could be raised due to port reset, transceiver removal
345+
self.log_warning("Got exception {} while processing vdm values for port {}, ignored".format(repr(e), logical_port_name))
346+
continue
347+
try:
348+
# Read and post VDM flags and metadata to DB
349+
self.vdm_db_utils.post_port_vdm_flags_to_db(logical_port_name, db_cache=vdm_flag_cache)
350+
except (KeyError, TypeError) as e:
351+
#continue to process next port since execption could be raised due to port reset, transceiver removal
352+
self.log_warning("Got exception {} while processing vdm flags for port {}, ignored".format(repr(e), logical_port_name))
353+
continue
354+
try:
355+
self.post_port_pm_info_to_db(logical_port_name, self.port_mapping, self.xcvr_table_helper.get_pm_tbl(asic_index), self.task_stopping_event, pm_info_cache=pm_info_cache)
356+
except (KeyError, TypeError) as e:
357+
#continue to process next port since execption could be raised due to port reset, transceiver removal
358+
self.log_warning("Got exception {} while processing pm info for port {}, ignored".format(repr(e), logical_port_name))
359+
continue
360+
361+
self.log_notice("Stop DOM monitoring loop")
319362

320363
def run(self):
321364
if self.task_stopping_event.is_set():
@@ -347,14 +390,18 @@ def on_remove_logical_port(self, port_change_event):
347390
"""
348391
# To avoid race condition, remove the entry TRANSCEIVER_FIRMWARE_INFO, TRANSCEIVER_DOM_SENSOR, TRANSCEIVER_PM and HW section of TRANSCEIVER_STATUS table.
349392
# This thread only updates TRANSCEIVER_FIRMWARE_INFO, TRANSCEIVER_DOM_SENSOR, TRANSCEIVER_PM and HW section of TRANSCEIVER_STATUS table,
350-
# so we don't have to remove entries from TRANSCEIVER_INFO and TRANSCEIVER_DOM_THRESHOLD
393+
# so we don't have to remove entries from TRANSCEIVER_INFO, TRANSCEIVER_DOM_THRESHOLD and VDM threshold value tables.
351394
xcvrd.del_port_sfp_dom_info_from_db(port_change_event.port_name,
352395
self.port_mapping,
353-
None,
354-
self.xcvr_table_helper.get_dom_tbl(port_change_event.asic_id),
355-
None,
396+
[self.xcvr_table_helper.get_dom_tbl(port_change_event.asic_id),
397+
self.xcvr_table_helper.get_vdm_real_value_tbl(port_change_event.asic_id),
398+
*[self.xcvr_table_helper.get_vdm_flag_tbl(port_change_event.asic_id, key) for key in VDM_THRESHOLD_TYPES],
399+
*[self.xcvr_table_helper.get_vdm_flag_change_count_tbl(port_change_event.asic_id, key) for key in VDM_THRESHOLD_TYPES],
400+
*[self.xcvr_table_helper.get_vdm_flag_set_time_tbl(port_change_event.asic_id, key) for key in VDM_THRESHOLD_TYPES],
401+
*[self.xcvr_table_helper.get_vdm_flag_clear_time_tbl(port_change_event.asic_id, key) for key in VDM_THRESHOLD_TYPES],
356402
self.xcvr_table_helper.get_pm_tbl(port_change_event.asic_id),
357-
self.xcvr_table_helper.get_firmware_info_tbl(port_change_event.asic_id))
403+
self.xcvr_table_helper.get_firmware_info_tbl(port_change_event.asic_id)
404+
])
358405
xcvrd.delete_port_from_status_table_hw(port_change_event.port_name,
359406
self.port_mapping,
360407
self.xcvr_table_helper.get_status_tbl(port_change_event.asic_id))

sonic-xcvrd/xcvrd/dom/utilities/__init__.py

Whitespace-only changes.
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
from swsscommon import swsscommon
2+
3+
class DBUtils:
4+
"""
5+
This class contains utility functions to interact with the redis database.
6+
"""
7+
def __init__(self, logger):
8+
self.logger = logger
9+
10+
"""
11+
Updates the metadata tables for flag table
12+
As part of the metadata update, the following tables are updated:
13+
- Change Count Table
14+
- Last Set Time Table
15+
- Last Clear Time Table
16+
"""
17+
def update_flag_metadata_tables(self, logical_port_name, field_name, current_value,
18+
flag_values_dict_update_time,
19+
flag_value_table,
20+
flag_change_count_table, flag_last_set_time_table, flag_last_clear_time_table,
21+
table_name_for_logging):
22+
if flag_value_table is None:
23+
self.logger.log_error(f"flag_value_table {table_name_for_logging} is None for port {logical_port_name}")
24+
return
25+
26+
found, db_flags_value_dict = flag_value_table.get(logical_port_name)
27+
# Table is empty, this is the first update to the metadata tables (this also means that the transceiver was detected for the first time)
28+
# Initialize the change count to 0 and last set and clear times to 'never'
29+
if not found:
30+
flag_change_count_table.set(logical_port_name, swsscommon.FieldValuePairs([(field_name, '0')]))
31+
flag_last_set_time_table.set(logical_port_name, swsscommon.FieldValuePairs([(field_name, 'never')]))
32+
flag_last_clear_time_table.set(logical_port_name, swsscommon.FieldValuePairs([(field_name, 'never')]))
33+
return
34+
else:
35+
db_flags_value_dict = dict(db_flags_value_dict)
36+
37+
# No metadata update required if the value is 'N/A'
38+
if str(current_value).strip() == 'N/A':
39+
return
40+
41+
# Update metadata if the value of flag has changed from the previous value
42+
if field_name in db_flags_value_dict and db_flags_value_dict[field_name] != str(current_value):
43+
found, db_change_count_dict = flag_change_count_table.get(logical_port_name)
44+
if not found:
45+
self.logger.log_error(f"Failed to get the change count for table {table_name_for_logging} port {logical_port_name}")
46+
return
47+
db_change_count_dict = dict(db_change_count_dict)
48+
db_change_count = int(db_change_count_dict[field_name])
49+
db_change_count += 1
50+
flag_change_count_table.set(logical_port_name, swsscommon.FieldValuePairs([(field_name, str(db_change_count))]))
51+
if current_value:
52+
flag_last_set_time_table.set(logical_port_name, swsscommon.FieldValuePairs([(field_name, flag_values_dict_update_time)]))
53+
else:
54+
flag_last_clear_time_table.set(logical_port_name, swsscommon.FieldValuePairs([(field_name, flag_values_dict_update_time)]))
55+
56+
def beautify_info_dict(self, info_dict):
57+
for k, v in info_dict.items():
58+
if not isinstance(v, str):
59+
info_dict[k] = str(v)

0 commit comments

Comments
 (0)