Skip to content

Commit 3624cb7

Browse files
authored
[stormond] Added new dynamic field 'last_sync_time' to STATE_DB (#535)
* Added new dynamic field 'last_sync_time' that shows when STORAGE_INFO for disk was last synced to STATE_DB * Moved 'start' message to actual starting point of the daemon * Added functions for formatted and epoch time for user friendly time display * Made changes per prgeor review comments * Pivot to SysLogger for all logging * Increased log level so that they are seen in syslogs * Code coverage improvement
1 parent 0431fa3 commit 3624cb7

2 files changed

Lines changed: 265 additions & 62 deletions

File tree

sonic-stormond/scripts/stormond

Lines changed: 45 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import shutil
1414
import json
1515
import time
1616

17+
from datetime import datetime
1718
from sonic_py_common import daemon_base, device_info, syslogger
1819
from swsscommon import swsscommon
1920
from sonic_platform_base.sonic_storage.storage_devices import StorageDevices, BLKDEV_BASE_PATH
@@ -49,6 +50,8 @@ class DaemonStorage(daemon_base.DaemonBase):
4950
self.log = syslogger.SysLogger(SYSLOG_IDENTIFIER)
5051
super(DaemonStorage, self).__init__(log_identifier)
5152

53+
self.log.log_notice("Starting Storage Monitoring Daemon")
54+
5255
self.timeout = STORMOND_PERIODIC_STATEDB_SYNC_SECS
5356
self.fsstats_sync_interval = STORMOND_SYNC_TO_DISK_SECS
5457
self.stop_event = threading.Event()
@@ -68,6 +71,9 @@ class DaemonStorage(daemon_base.DaemonBase):
6871
self.fsio_rw_json = {disk:{} for disk in self.storage.devices}
6972
self.fsio_rw_statedb = {disk:{} for disk in self.storage.devices}
7073

74+
# This is the time format string
75+
self.time_format_string = "%Y-%m-%d %H:%M:%S"
76+
7177
# This time is set at init and then subsequently after each FSIO JSON file sync
7278
self.fsio_sync_time = time.time()
7379

@@ -82,7 +88,8 @@ class DaemonStorage(daemon_base.DaemonBase):
8288
"total_fsio_writes", \
8389
"disk_io_reads", \
8490
"disk_io_writes", \
85-
"reserved_blocks"]
91+
"reserved_blocks", \
92+
"last_sync_time"]
8693

8794
# These are the fields that we are interested in saving to disk to protect against
8895
# reboots or crashes
@@ -97,21 +104,26 @@ class DaemonStorage(daemon_base.DaemonBase):
97104
self._load_fsio_rw_json()
98105
self._determine_sot()
99106

107+
# This function is used to convert the epoch time to a user friendly formatted string
108+
def get_formatted_time(self, time_since_epoch):
109+
return datetime.fromtimestamp(time_since_epoch).strftime(self.time_format_string)
110+
111+
# This function is used to configure the polling and sync intervals for the daemon
100112
def get_configdb_intervals(self):
101113
self.config_db = daemon_base.db_connect("CONFIG_DB")
102114
config_info = dict(self.config_db.hgetall('STORMOND_CONFIG|INTERVALS'))
103115
self.timeout = int(config_info.get('daemon_polling_interval', STORMOND_PERIODIC_STATEDB_SYNC_SECS))
104116
self.fsstats_sync_interval = int(config_info.get('fsstats_sync_interval', STORMOND_SYNC_TO_DISK_SECS))
105117

106-
self.log_info("Polling Interval set to {} seconds".format(self.timeout))
107-
self.log_info("FSIO JSON file Interval set to {} seconds".format(self.fsstats_sync_interval))
118+
self.log.log_notice("Polling Interval set to {} seconds".format(self.timeout))
119+
self.log.log_notice("FSIO JSON file Interval set to {} seconds".format(self.fsstats_sync_interval))
108120

109121

110122
# Get the total and latest FSIO reads and writes from JSON file
111123
def _load_fsio_rw_json(self):
112124
try:
113125
if not os.path.exists(FSIO_RW_JSON_FILE):
114-
self.log_info("{} not present.".format(FSIO_RW_JSON_FILE))
126+
self.log.log_notice("{} not present.".format(FSIO_RW_JSON_FILE))
115127
return
116128

117129
# Load JSON file
@@ -123,21 +135,21 @@ class DaemonStorage(daemon_base.DaemonBase):
123135
for field in self.statedb_json_sync_fields:
124136

125137
if self.fsio_rw_json[storage_device][field] == None:
126-
self.log_warning("{}:{} value = None in JSON file".format(storage_device, field))
138+
self.log.log_warning("{}:{} value = None in JSON file".format(storage_device, field))
127139
return
128140

129141
self.fsio_json_file_loaded = True
130142

131143
except Exception as e:
132-
self.log_error("JSON file could not be loaded: {}".format(str(e)))
144+
self.log.log_error("JSON file could not be loaded: {}".format(str(e)))
133145

134146
return
135147

136148

137149
# Sync the total and latest procfs reads and writes from STATE_DB to JSON file on disk
138150
def sync_fsio_rw_json(self):
139151

140-
self.log_info("Syncing total and latest procfs reads and writes from STATE_DB to JSON file")
152+
self.log.log_notice("Syncing total and latest procfs reads and writes from STATE_DB to JSON file")
141153

142154
json_file_dict = {disk:{} for disk in self.storage.devices}
143155
try:
@@ -146,21 +158,21 @@ class DaemonStorage(daemon_base.DaemonBase):
146158
json_file_dict[device][field] = self.state_db.hget('STORAGE_INFO|{}'.format(device), field)
147159

148160
self.fsio_sync_time = time.time()
149-
json_file_dict["successful_sync_time"] = str(self.fsio_sync_time)
161+
json_file_dict["successful_sync_time"] = str(self.get_formatted_time(self.fsio_sync_time))
150162

151163
with open(FSIO_RW_JSON_FILE, 'w+') as f:
152164
json.dump(json_file_dict, f)
153165

154166
return True
155167

156168
except Exception as ex:
157-
self.log_error("Unable to sync state_db to disk: {}".format(str(ex)))
169+
self.log.log_error("Unable to sync state_db to disk: {}".format(str(ex)))
158170
return False
159171

160172

161173
# Update the successful sync time to STATE_DB
162174
def write_sync_time_statedb(self):
163-
self.state_db.hset("{}|{}".format(STORAGE_DEVICE_TABLE,FSSTATS_SYNC_TIME_KEY), "successful_sync_time", str(self.fsio_sync_time))
175+
self.state_db.hset("{}|{}".format(STORAGE_DEVICE_TABLE,FSSTATS_SYNC_TIME_KEY), "successful_sync_time", str(self.get_formatted_time(self.fsio_sync_time)))
164176

165177
# Run a sanity check on the state_db. If successful, get total, latest
166178
# FSIO reads and writes for each storage device from STATE_DB
@@ -185,12 +197,12 @@ class DaemonStorage(daemon_base.DaemonBase):
185197
self.fsio_rw_statedb[storage_device][field] = "0" if value is None else value
186198

187199
if value is None:
188-
self.log_warning("{}:{} value = None in StateDB".format(storage_device, field))
200+
self.log.log_warning("{}:{} value = None in StateDB".format(storage_device, field))
189201
return
190202

191203
self.statedb_storage_info_loaded = True
192204
except Exception as e:
193-
self.log_error("Reading STATE_DB failed with: {}".format(str(e)))
205+
self.log.log_error("Reading STATE_DB failed with: {}".format(str(e)))
194206

195207

196208
def _determine_sot(self):
@@ -269,21 +281,21 @@ class DaemonStorage(daemon_base.DaemonBase):
269281
try:
270282
# Unlikely scenario
271283
if storage_object is None:
272-
self.log_info("{} does not have an instantiated object. Static Information cannot be gathered.".format(storage_device))
284+
self.log.log_notice("{} does not have an instantiated object. Static Information cannot be gathered.".format(storage_device))
273285
continue
274286

275287
static_kvp_dict = {}
276288

277289
static_kvp_dict["device_model"] = storage_object.get_model()
278290
static_kvp_dict["serial"] = storage_object.get_serial()
279291

280-
self.log_info("Storage Device: {}, Device Model: {}, Serial: {}".format(storage_device, static_kvp_dict["device_model"], static_kvp_dict["serial"]))
292+
self.log.log_notice("Storage Device: {}, Device Model: {}, Serial: {}".format(storage_device, static_kvp_dict["device_model"], static_kvp_dict["serial"]))
281293

282294
# update Storage Device Status to DB
283295
self.update_storage_info_status_db(storage_device, static_kvp_dict)
284296

285297
except Exception as ex:
286-
self.log_error("get_static_fields_update_state_db() failed with: {}".format(str(ex)))
298+
self.log.log_error("get_static_fields_update_state_db() failed with: {}".format(str(ex)))
287299

288300
# Get Dynamic attributes and update the State DB
289301
def get_dynamic_fields_update_state_db(self):
@@ -292,7 +304,7 @@ class DaemonStorage(daemon_base.DaemonBase):
292304
for storage_device, storage_object in self.storage.devices.items():
293305
try:
294306
if storage_object is None:
295-
self.log_info("Storage device '{}' does not have an instantiated object. Dynamic Information cannot be gathered.".format(storage_device))
307+
self.log.log_notice("Storage device '{}' does not have an instantiated object. Dynamic Information cannot be gathered.".format(storage_device))
296308
continue
297309

298310
# Fetch the latest dynamic info
@@ -309,20 +321,23 @@ class DaemonStorage(daemon_base.DaemonBase):
309321
dynamic_kvp_dict["disk_io_reads"] = storage_object.get_disk_io_reads()
310322
dynamic_kvp_dict["disk_io_writes"] = storage_object.get_disk_io_writes()
311323
dynamic_kvp_dict["reserved_blocks"] = storage_object.get_reserved_blocks()
324+
dynamic_kvp_dict["last_sync_time"] = self.get_formatted_time(time.time())
312325

313326
dynamic_kvp_dict["total_fsio_reads"], dynamic_kvp_dict["total_fsio_writes"] = self._reconcile_fsio_rw_values(dynamic_kvp_dict, storage_device)
314327

315-
self.log_info("Storage Device: {}, Firmware: {}, health: {}%, Temp: {}C, FS IO Reads: {}, FS IO Writes: {}".format(\
316-
storage_device, dynamic_kvp_dict["firmware"], dynamic_kvp_dict["health"], dynamic_kvp_dict["temperature"], dynamic_kvp_dict["total_fsio_reads"],dynamic_kvp_dict["total_fsio_writes"]))
317-
self.log_info("Latest FSIO Reads: {}, Latest FSIO Writes: {}".format(dynamic_kvp_dict["latest_fsio_reads"], dynamic_kvp_dict["latest_fsio_writes"]))
318-
self.log_info("Disk IO Reads: {}, Disk IO Writes: {}, Reserved Blocks: {}".format(dynamic_kvp_dict["disk_io_reads"], dynamic_kvp_dict["disk_io_writes"], \
319-
dynamic_kvp_dict["reserved_blocks"]))
320-
321328
# Update storage device statistics to STATE_DB
322329
self.update_storage_info_status_db(storage_device, dynamic_kvp_dict)
323330

331+
# Log to syslog
332+
self.log.log_notice("Storage Device: {}, Firmware: {}, health: {}%, Temp: {}C, FS IO Reads: {}, FS IO Writes: {}".format(\
333+
storage_device, dynamic_kvp_dict["firmware"], dynamic_kvp_dict["health"], dynamic_kvp_dict["temperature"], dynamic_kvp_dict["total_fsio_reads"],dynamic_kvp_dict["total_fsio_writes"]))
334+
self.log.log_notice("Latest FSIO Reads: {}, Latest FSIO Writes: {}".format(dynamic_kvp_dict["latest_fsio_reads"], dynamic_kvp_dict["latest_fsio_writes"]))
335+
self.log.log_notice("Disk IO Reads: {}, Disk IO Writes: {}, Reserved Blocks: {}".format(dynamic_kvp_dict["disk_io_reads"], dynamic_kvp_dict["disk_io_writes"], \
336+
dynamic_kvp_dict["reserved_blocks"]))
337+
self.log.log_notice("Last successful sync time to STATE_DB: {}".format(dynamic_kvp_dict["last_sync_time"]))
338+
324339
except Exception as ex:
325-
self.log_info("get_dynamic_fields_update_state_db() failed with: {}".format(str(ex)))
340+
self.log.log_notice("get_dynamic_fields_update_state_db() failed with: {}".format(str(ex)))
326341

327342

328343
# Override signal handler from DaemonBase
@@ -333,22 +348,22 @@ class DaemonStorage(daemon_base.DaemonBase):
333348
global exit_code
334349

335350
if sig in FATAL_SIGNALS:
336-
self.log_info("Caught signal '{}'".format(signal.Signals(sig).name))
351+
self.log.log_notice("Caught signal '{}'".format(signal.Signals(sig).name))
337352

338353
if self.sync_fsio_rw_json():
339354
self.write_sync_time_statedb()
340355
else:
341-
self.log_warning("Unable to sync latest and total procfs RW to disk")
356+
self.log.log_warning("Unable to sync latest and total procfs RW to disk")
342357

343-
self.log_info("Exiting with {}".format(signal.Signals(sig).name))
358+
self.log.log_notice("Exiting with {}".format(signal.Signals(sig).name))
344359

345360
# Make sure we exit with a non-zero code so that supervisor will try to restart us
346361
exit_code = 128 + sig
347362
self.stop_event.set()
348363
elif sig in NONFATAL_SIGNALS:
349-
self.log_info("Caught signal '{}' - ignoring...".format(signal.Signals(sig).name))
364+
self.log.log_notice("Caught signal '{}' - ignoring...".format(signal.Signals(sig).name))
350365
else:
351-
self.log_warning("Caught unhandled signal '{}' - ignoring...".format(signal.Signals(sig).name))
366+
self.log.log_warning("Caught unhandled signal '{}' - ignoring...".format(signal.Signals(sig).name))
352367

353368
# Main daemon logic
354369
def run(self):
@@ -374,7 +389,7 @@ class DaemonStorage(daemon_base.DaemonBase):
374389
if self.sync_fsio_rw_json():
375390
self.write_sync_time_statedb()
376391
else:
377-
self.log_warning("Unable to sync latest and total procfs RW to disk")
392+
self.log.log_warning("Unable to sync latest and total procfs RW to disk")
378393

379394
return True
380395
#
@@ -385,15 +400,13 @@ class DaemonStorage(daemon_base.DaemonBase):
385400
def main():
386401
stormon = DaemonStorage(SYSLOG_IDENTIFIER)
387402

388-
stormon.log_info("Starting Storage Monitoring Daemon")
389-
390403
# Read and update Static Fields to the StateDB once
391404
stormon.get_static_fields_update_state_db()
392405

393406
while stormon.run():
394407
pass
395408

396-
stormon.log_info("Shutting down Storage Monitoring Daemon")
409+
stormon.log.log_notice("Shutting down Storage Monitoring Daemon")
397410

398411
return exit_code
399412

0 commit comments

Comments
 (0)