Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
7863b59
ChangeApplier and unit test code.
renukamanavalan Oct 1, 2021
84f493b
minor update
renukamanavalan Oct 4, 2021
dd337eb
fix unused import
renukamanavalan Oct 4, 2021
6e0bba0
Added service validation
renukamanavalan Oct 11, 2021
cabcf23
Take off added code for logging
renukamanavalan Oct 19, 2021
a075606
Merge remote-tracking branch 'upstream/master' into updater
renukamanavalan Oct 19, 2021
bd7781c
fix merge
renukamanavalan Oct 19, 2021
f839e53
change applier & test updated for service validation
renukamanavalan Oct 20, 2021
beb1ea7
removed unused import
renukamanavalan Oct 20, 2021
fe25d2e
Added two service validators
renukamanavalan Oct 20, 2021
fec8f8f
move global to class; No logical code changes
renukamanavalan Oct 24, 2021
7d90266
A fix in file path
renukamanavalan Oct 27, 2021
91f8f7e
Merge remote-tracking branch 'upstream/master' into updater
renukamanavalan Oct 27, 2021
6cd9dd8
1) Copy generic_updater_config.conf.json as part of install
renukamanavalan Nov 2, 2021
5a9434b
Merge remote-tracking branch 'upstream/master' into updater
renukamanavalan Nov 2, 2021
30feec2
Prune only empty tables
renukamanavalan Nov 3, 2021
8cd17fd
file rename
renukamanavalan Nov 10, 2021
7533ed1
Merge remote-tracking branch 'upstream/master' into updater
renukamanavalan Nov 10, 2021
9abb300
1) Drop print to stdout from change_applier
renukamanavalan Nov 16, 2021
a41052b
No logical code changes; Name changes only, per review comments
renukamanavalan Nov 22, 2021
d37b350
Merge remote-tracking branch 'upstream/master' into updater
renukamanavalan Jan 13, 2022
3bd91b5
Handle failed restart
renukamanavalan Jan 18, 2022
f520752
Add pause, only if restart fails even after reset
renukamanavalan Jan 19, 2022
e66ce4a
Added test code
renukamanavalan Jan 19, 2022
d280814
Dropped redundant test code
renukamanavalan Jan 19, 2022
f30ff36
minor
renukamanavalan Jan 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 32 additions & 3 deletions generic_config_updater/services_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,42 @@ def set_verbose(verbose=False):

def _service_restart(svc_name):
rc = os.system(f"systemctl restart {svc_name}")
logger.log(logger.LOG_PRIORITY_NOTICE,
f"Restarted {svc_name}", print_to_console)
if rc != 0:
Copy link
Contributor

@qiluo-msft qiluo-msft Jan 19, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rc

Is the rc a stable code when too many restarts happen? If yes, just check if rc == <code>: #Closed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rc == or != 0 is stable comparison for success/failure

Copy link
Contributor

@qiluo-msft qiluo-msft Jan 19, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I mean is there a specific exit code for "too many restarts" failure? if yes, we can check that code specifically.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are no specific error code. 1 is a generic error code, which is most commonly we see. But this could change in future. Moreover if there is any inherent service related error, the monitor catches it and leads to ICM.

In our case, for any failure, we try our best, before giving up.

ref: link

"In case of an error while processing any init-script action except for status, the init script shall print an error message and exit with a non-zero status code:"

"1 generic or unspecified error (current practice)"

# This failure is likely due to too many restarts
#
rc = os.system(f"systemctl reset-failed {svc_name}")
logger.log(logger.LOG_PRIORITY_ERROR,
f"Service has been reset. rc={rc}; Try restart again...",
print_to_console)

rc = os.system(f"systemctl restart {svc_name}")
if rc != 0:
Copy link
Contributor

@qiluo-msft qiluo-msft Jan 19, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rc

Is there a specific exit code for 2nd failure? #Closed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't get your comments. But rc is the absolute way to check success / failure.

# Even with reset-failed, restart fails.
# Give a pause before retry.
#
logger.log(logger.LOG_PRIORITY_ERROR,
f"Restart failed for {svc_name} rc={rc} after reset; Pause for 10s & retry",
print_to_console)
os.system("sleep 10s")
rc = os.system(f"systemctl restart {svc_name}")

if rc == 0:
logger.log(logger.LOG_PRIORITY_NOTICE,
f"Restart succeeded for {svc_name}",
print_to_console)
else:
logger.log(logger.LOG_PRIORITY_ERROR,
f"Restart failed for {svc_name} rc={rc}",
print_to_console)
return rc == 0


def rsyslog_validator(old_config, upd_config, keys):
return _service_restart("rsyslog-config")
Copy link
Contributor

@qiluo-msft qiluo-msft Jan 19, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_service_restart

With _service_restart fixed, do we still need this fix? #Closed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What we need is to run /usr/bin/rsyslog-config.sh.
What it does is, update /etc/rsyslog.conf from CONFG-DB & restart rsyslog.
We do the same and additionally handle the possibility of rsyslog restart failure.

rsyslog-config is not a service, but a one shot wrapper to run /usr/bin/rsyslog-config.sh, after updategraph.service, at the startup.

rc = os.system("/usr/bin/rsyslog-config.sh")
if rc != 0:
return _service_restart("rsyslog")
else:
return True


def dhcp_validator(old_config, upd_config, keys):
Expand Down
43 changes: 36 additions & 7 deletions tests/generic_config_updater/service_validator_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,25 @@
from collections import defaultdict
from unittest.mock import patch

from generic_config_updater.services_validator import vlan_validator
from generic_config_updater.services_validator import vlan_validator, rsyslog_validator
import generic_config_updater.gu_common


# Mimics os.system call
#
os_system_expected_cmd = ""
os_system_calls = []
os_system_call_index = 0
msg = ""

def os_system_cfggen(cmd):
assert cmd == os_system_expected_cmd, msg
return 0
def mock_os_system_call(cmd):
global os_system_calls, os_system_call_index

assert os_system_call_index < len(os_system_calls)
entry = os_system_calls[os_system_call_index]
os_system_call_index += 1

assert cmd == entry["cmd"], msg
return entry["rc"]


test_data = [
Expand Down Expand Up @@ -53,19 +60,41 @@ def os_system_cfggen(cmd):
}
]

test_rsyslog_fail = [
# Fail the calls, to get the entire fail path calls invoked
#
{ "cmd": "/usr/bin/rsyslog-config.sh", "rc": 1 }, # config update; fails
{ "cmd": "systemctl restart rsyslog", "rc": 1 }, # rsyslog restart; fails
{ "cmd": "systemctl reset-failed rsyslog", "rc": 1 }, # reset; failure here just logs
{ "cmd": "systemctl restart rsyslog", "rc": 1 }, # restart again; fails
{ "cmd": "sleep 10s", "rc": 0 }, # sleep; rc ignored
{ "cmd": "systemctl restart rsyslog", "rc": 1 }, # restart again; fails
]


class TestServiceValidator(unittest.TestCase):

@patch("generic_config_updater.change_applier.os.system")
def test_change_apply(self, mock_os_sys):
global os_system_expected_cmd
global os_system_calls, os_system_call_index

mock_os_sys.side_effect = os_system_cfggen
mock_os_sys.side_effect = mock_os_system_call

i = 0
for entry in test_data:
os_system_expected_cmd = entry["cmd"]
if entry["cmd"]:
os_system_calls.append({"cmd": entry["cmd"], "rc": 0 })
msg = "case failed: {}".format(str(entry))

vlan_validator(entry["old"], entry["upd"], None)


# Test failure case
#
os_system_calls = test_rsyslog_fail
os_system_call_index = 0

rc = rsyslog_validator("", "", "")
assert not rc, "rsyslog_validator expected to fail"