Skip to content

Commit f4a7e22

Browse files
authored
[k8s]: Bypass the systemd service restart limit and do immediately restart when change to local mode (#15432) (#15868)
1 parent 38e721b commit f4a7e22

File tree

2 files changed

+40
-0
lines changed

2 files changed

+40
-0
lines changed

src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ def is_systemd_active(feat):
151151
def restart_systemd_service(server, feat, owner):
152152
log_debug("Restart service {} to owner:{}".format(feat, owner))
153153
if not UNIT_TESTING:
154+
subprocess.call(["systemctl", "reset-failed", str(feat)])
154155
status = subprocess.call(["systemctl", "restart", str(feat)])
155156
else:
156157
server.mod_db_entry(STATE_DB_NAME,
@@ -551,6 +552,7 @@ def on_state_update(self, key, op, data):
551552

552553
self.st_data[key] = _update_entry(dflt_st_feat, data)
553554
remote_state = self.st_data[key][ST_FEAT_REMOTE_STATE]
555+
current_owner = self.st_data[key][ST_FEAT_OWNER]
554556

555557
if (remote_state == REMOTE_RUNNING) and (old_remote_state != remote_state):
556558
# Tag latest
@@ -563,6 +565,13 @@ def on_state_update(self, key, op, data):
563565

564566
log_debug("try to tag latest label after {} seconds @{}".format(
565567
remote_ctr_config[TAG_IMAGE_LATEST], start_time))
568+
569+
# This is for going back to local without waiting the systemd restart time
570+
# when k8s is down, can't deploy containers to worker and need to go back to local
571+
# if current owner is already local, we don't do restart
572+
if (current_owner != OWNER_LOCAL) and (remote_state == REMOTE_NONE) and (old_remote_state == REMOTE_STOPPED):
573+
restart_systemd_service(self.server, key, OWNER_LOCAL)
574+
return
566575

567576
if (not init):
568577
if (old_remote_state == remote_state):

src/sonic-ctrmgrd/tests/ctrmgrd_test.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,37 @@
324324
}
325325
}
326326
}
327+
},
328+
4: {
329+
common_test.DESCR: "Restart immediately to go back to local when remote_state changes to none from stopped",
330+
common_test.ARGS: "ctrmgrd",
331+
common_test.PRE: {
332+
common_test.STATE_DB_NO: {
333+
common_test.FEATURE_TABLE: {
334+
"snmp": {
335+
"remote_state": "stopped",
336+
}
337+
}
338+
}
339+
},
340+
common_test.UPD: {
341+
common_test.STATE_DB_NO: {
342+
common_test.FEATURE_TABLE: {
343+
"snmp": {
344+
"remote_state": "none",
345+
}
346+
}
347+
}
348+
},
349+
common_test.POST: {
350+
common_test.STATE_DB_NO: {
351+
common_test.FEATURE_TABLE: {
352+
"snmp": {
353+
"restart": "true"
354+
}
355+
}
356+
}
357+
}
327358
}
328359
}
329360

0 commit comments

Comments
 (0)