Skip to content

Commit b674dff

Browse files
[chassisd] Monitor midplane status (sonic-net#127)
Enhance chassisd to monitor midplane status of the cards in modular chassis HLD: sonic-net/SONiC#646 -What I did Add monitoring of the midplane or internal ethernet network between supervisor and line-card modules. -How I did it Along with status monitoring, also monitor the midplane reachability between supervisor and modules. It updates the STATE_DB with the status information. 'show chassis-modules midplane-status' will read from the STATE_DB
1 parent b0be7ca commit b674dff

4 files changed

Lines changed: 209 additions & 5 deletions

File tree

sonic-chassisd/scripts/chassisd

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ CHASSIS_MODULE_INFO_DESC_FIELD = 'desc'
4545
CHASSIS_MODULE_INFO_SLOT_FIELD = 'slot'
4646
CHASSIS_MODULE_INFO_OPERSTATUS_FIELD = 'oper_status'
4747

48+
CHASSIS_MIDPLANE_INFO_TABLE = 'CHASSIS_MIDPLANE_TABLE'
49+
CHASSIS_MIDPLANE_INFO_KEY_TEMPLATE = 'CHASSIS_MIDPLANE {}'
50+
CHASSIS_MIDPLANE_INFO_NAME_FIELD = 'name'
51+
CHASSIS_MIDPLANE_INFO_IP_FIELD = 'ip_address'
52+
CHASSIS_MIDPLANE_INFO_ACCESS_FIELD = 'access'
53+
4854
CHASSIS_INFO_UPDATE_PERIOD_SECS = 10
4955

5056
CHASSIS_LOAD_ERROR = 1
@@ -57,6 +63,7 @@ SELECT_TIMEOUT = 1000
5763
NOT_AVAILABLE = 'N/A'
5864
INVALID_SLOT = ModuleBase.MODULE_INVALID_SLOT
5965
INVALID_MODULE_INDEX = -1
66+
INVALID_IP = '0.0.0.0'
6067

6168
MODULE_ADMIN_DOWN = 0
6269
MODULE_ADMIN_UP = 1
@@ -72,7 +79,8 @@ def try_get(callback, *args, **kwargs):
7279
"""
7380
Handy function to invoke the callback and catch NotImplementedError
7481
:param callback: Callback to be invoked
75-
:param default: Default return value if exception occur
82+
:param args: Arguments to be passed to callback
83+
:param kwargs: Default return value if exception occur
7684
:return: Default return value if exception occur else return value of the callback
7785
"""
7886
default = kwargs.get('default', NOT_AVAILABLE)
@@ -149,11 +157,16 @@ class ModuleUpdater(logger.Logger):
149157
state_db = daemon_base.db_connect("STATE_DB")
150158
self.chassis_table = swsscommon.Table(state_db, CHASSIS_INFO_TABLE)
151159
self.module_table = swsscommon.Table(state_db, CHASSIS_MODULE_INFO_TABLE)
160+
self.midplane_table = swsscommon.Table(state_db, CHASSIS_MIDPLANE_INFO_TABLE)
152161
self.info_dict_keys = [CHASSIS_MODULE_INFO_NAME_FIELD,
153162
CHASSIS_MODULE_INFO_DESC_FIELD,
154163
CHASSIS_MODULE_INFO_SLOT_FIELD,
155164
CHASSIS_MODULE_INFO_OPERSTATUS_FIELD]
156165

166+
self.midplane_initialized = try_get(chassis.init_midplane_switch, default=False)
167+
if not self.midplane_initialized:
168+
self.log_error("Chassisd midplane intialization failed")
169+
157170
def deinit(self):
158171
"""
159172
Destructor of ModuleUpdater
@@ -163,6 +176,8 @@ class ModuleUpdater(logger.Logger):
163176
for module_index in range(0, self.num_modules):
164177
name = try_get(self.chassis.get_module(module_index).get_name)
165178
self.module_table._del(name)
179+
if self.midplane_table.get(name) is not None:
180+
self.midplane_table._del(name)
166181

167182
if self.chassis_table is not None:
168183
self.chassis_table._del(CHASSIS_INFO_KEY_TEMPLATE.format(1))
@@ -218,6 +233,41 @@ class ModuleUpdater(logger.Logger):
218233

219234
return module_info_dict
220235

236+
def _is_supervisor(self):
237+
if self.my_slot == self.supervisor_slot:
238+
return True
239+
else:
240+
return False
241+
242+
def check_midplane_reachability(self):
243+
if not self.midplane_initialized:
244+
return
245+
246+
index = -1
247+
for module in self.chassis.get_all_modules():
248+
index += 1
249+
# Skip fabric cards
250+
if module.get_type() == ModuleBase.MODULE_TYPE_FABRIC:
251+
continue
252+
253+
if self._is_supervisor():
254+
# On supervisor skip checking for supervisor
255+
if module.get_slot() == self.supervisor_slot:
256+
continue
257+
else:
258+
# On line-card check only supervisor
259+
if module.get_slot() != self.supervisor_slot:
260+
continue
261+
262+
module_key = try_get(module.get_name, default='MODULE {}'.format(index))
263+
midplane_ip = try_get(module.get_midplane_ip, default=INVALID_IP)
264+
midplane_access = try_get(module.is_midplane_reachable, default=False)
265+
266+
# Update db with midplane information
267+
fvs = swsscommon.FieldValuePairs([(CHASSIS_MIDPLANE_INFO_IP_FIELD, midplane_ip),
268+
(CHASSIS_MIDPLANE_INFO_ACCESS_FIELD, str(midplane_access))])
269+
self.midplane_table.set(module_key, fvs)
270+
221271
#
222272
# Config Manager task ========================================================
223273
#
@@ -306,14 +356,17 @@ class ChassisdDaemon(daemon_base.DaemonBase):
306356
self.module_updater.modules_num_update()
307357

308358
# Check for valid slot numbers
309-
my_slot = try_get(platform_chassis.get_my_slot, default=INVALID_SLOT)
310-
supervisor_slot = try_get(platform_chassis.get_supervisor_slot, default=INVALID_SLOT)
311-
if (my_slot == INVALID_SLOT) or (supervisor_slot == INVALID_SLOT):
359+
self.module_updater.my_slot = try_get(platform_chassis.get_my_slot,
360+
default=INVALID_SLOT)
361+
self.module_updater.supervisor_slot = try_get(platform_chassis.get_supervisor_slot,
362+
default=INVALID_SLOT)
363+
if ((self.module_updater.my_slot == INVALID_SLOT) or
364+
(self.module_updater.supervisor_slot == INVALID_SLOT)):
312365
self.log_error("Chassisd not supported for this platform")
313366
sys.exit(CHASSIS_NOT_SUPPORTED)
314367

315368
# Start configuration manager task on supervisor module
316-
if supervisor_slot == my_slot:
369+
if self.module_updater.supervisor_slot == self.module_updater.my_slot:
317370
config_manager = ConfigManagerTask()
318371
config_manager.task_run()
319372

@@ -322,6 +375,7 @@ class ChassisdDaemon(daemon_base.DaemonBase):
322375

323376
while not self.stop.wait(CHASSIS_INFO_UPDATE_PERIOD_SECS):
324377
self.module_updater.module_db_update()
378+
self.module_updater.check_midplane_reachability()
325379

326380
self.log_info("Stop daemon main loop")
327381

sonic-chassisd/tests/mock_platform.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ def __init__(self, module_index, module_name, module_desc, module_type, module_s
2727
self.hw_slot = module_slot
2828
self.module_status = ''
2929
self.admin_state = 1
30+
self.supervisor_slot = 16
31+
self.midplane_access = False
3032

3133
def get_name(self):
3234
return self.module_name
@@ -52,10 +54,25 @@ def set_admin_state(self, up):
5254
def get_admin_state(self):
5355
return self.admin_state
5456

57+
def get_midplane_ip(self):
58+
return self.midplane_ip
59+
60+
def set_midplane_ip(self):
61+
if self.supervisor_slot == self.get_slot():
62+
self.midplane_ip = '192.168.1.100'
63+
else:
64+
self.midplane_ip = '192.168.1.{}'.format(self.get_slot())
65+
66+
def is_midplane_reachable(self):
67+
return self.midplane_access
68+
69+
def set_midplane_reachable(self, up):
70+
self.midplane_access = up
5571

5672
class MockChassis:
5773
def __init__(self):
5874
self.module_list = []
75+
self.midplane_supervisor_access = False
5976

6077
def get_num_modules(self):
6178
return len(self.module_list)
@@ -72,3 +89,6 @@ def get_module_index(self, module_name):
7289
if module.module_name == module_name:
7390
return module.module_index
7491
return -1
92+
93+
def init_midplane_switch(self):
94+
return True

sonic-chassisd/tests/mock_swsscommon.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ def get(self, key):
1919
return self.mock_dict[key]
2020
return None
2121

22+
def size(self):
23+
return len(self.mock_dict)
2224

2325
class FieldValuePairs:
2426
def __init__(self, fvs):

sonic-chassisd/tests/test_chassisd.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,3 +238,131 @@ def test_configupdater_check_num_modules():
238238
module_updater.deinit()
239239
fvs = module_updater.chassis_table.get(CHASSIS_INFO_KEY_TEMPLATE.format(1))
240240
assert fvs == None
241+
242+
def test_midplane_presence_modules():
243+
chassis = MockChassis()
244+
245+
#Supervisor
246+
index = 0
247+
name = "SUPERVISOR0"
248+
desc = "Supervisor card"
249+
slot = 16
250+
module_type = ModuleBase.MODULE_TYPE_SUPERVISOR
251+
supervisor = MockModule(index, name, desc, module_type, slot)
252+
supervisor.set_midplane_ip()
253+
chassis.module_list.append(supervisor)
254+
255+
#Linecard
256+
index = 1
257+
name = "LINE-CARD0"
258+
desc = "36 port 400G card"
259+
slot = 1
260+
module_type = ModuleBase.MODULE_TYPE_LINE
261+
module = MockModule(index, name, desc, module_type, slot)
262+
module.set_midplane_ip()
263+
chassis.module_list.append(module)
264+
265+
#Fabric-card
266+
index = 1
267+
name = "FABRIC-CARD0"
268+
desc = "Switch fabric card"
269+
slot = 17
270+
module_type = ModuleBase.MODULE_TYPE_FABRIC
271+
fabric = MockModule(index, name, desc, module_type, slot)
272+
chassis.module_list.append(fabric)
273+
274+
#Run on supervisor
275+
module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis)
276+
module_updater.supervisor_slot = supervisor.get_slot()
277+
module_updater.my_slot = supervisor.get_slot()
278+
module_updater.modules_num_update()
279+
module_updater.module_db_update()
280+
module_updater.check_midplane_reachability()
281+
282+
midplane_table = module_updater.midplane_table
283+
#Check only one entry in database
284+
assert 1 == midplane_table.size()
285+
286+
#Check fields in database
287+
name = "LINE-CARD0"
288+
fvs = midplane_table.get(name)
289+
assert fvs != None
290+
assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD]
291+
assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD]
292+
293+
#Set access of line-card to down
294+
module.set_midplane_reachable(False)
295+
module_updater.check_midplane_reachability()
296+
fvs = midplane_table.get(name)
297+
assert fvs != None
298+
assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD]
299+
assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD]
300+
301+
#Deinit
302+
module_updater.deinit()
303+
fvs = midplane_table.get(name)
304+
assert fvs == None
305+
306+
def test_midplane_presence_supervisor():
307+
chassis = MockChassis()
308+
309+
#Supervisor
310+
index = 0
311+
name = "SUPERVISOR0"
312+
desc = "Supervisor card"
313+
slot = 16
314+
module_type = ModuleBase.MODULE_TYPE_SUPERVISOR
315+
supervisor = MockModule(index, name, desc, module_type, slot)
316+
supervisor.set_midplane_ip()
317+
chassis.module_list.append(supervisor)
318+
319+
#Linecard
320+
index = 1
321+
name = "LINE-CARD0"
322+
desc = "36 port 400G card"
323+
slot = 1
324+
module_type = ModuleBase.MODULE_TYPE_LINE
325+
module = MockModule(index, name, desc, module_type, slot)
326+
module.set_midplane_ip()
327+
chassis.module_list.append(module)
328+
329+
#Fabric-card
330+
index = 1
331+
name = "FABRIC-CARD0"
332+
desc = "Switch fabric card"
333+
slot = 17
334+
module_type = ModuleBase.MODULE_TYPE_FABRIC
335+
fabric = MockModule(index, name, desc, module_type, slot)
336+
chassis.module_list.append(fabric)
337+
338+
#Run on supervisor
339+
module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis)
340+
module_updater.supervisor_slot = supervisor.get_slot()
341+
module_updater.my_slot = module.get_slot()
342+
module_updater.modules_num_update()
343+
module_updater.module_db_update()
344+
module_updater.check_midplane_reachability()
345+
346+
midplane_table = module_updater.midplane_table
347+
#Check only one entry in database
348+
assert 1 == midplane_table.size()
349+
350+
#Check fields in database
351+
name = "SUPERVISOR0"
352+
fvs = midplane_table.get(name)
353+
assert fvs != None
354+
assert supervisor.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD]
355+
assert str(supervisor.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD]
356+
357+
#Set access of line-card to down
358+
supervisor.set_midplane_reachable(False)
359+
module_updater.check_midplane_reachability()
360+
fvs = midplane_table.get(name)
361+
assert fvs != None
362+
assert supervisor.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD]
363+
assert str(supervisor.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD]
364+
365+
#Deinit
366+
module_updater.deinit()
367+
fvs = midplane_table.get(name)
368+
assert fvs == None

0 commit comments

Comments
 (0)