Skip to content

Commit b3d997f

Browse files
authored
Add test plan for reboot blocking mode (#19600)
What is the motivation for this PR? https://github.com/sonic-net/SONiC/blob/master/doc/reboot/Reboot_BlockingMode_HLD.md How did you do it? https://github.com/sonic-net/SONiC/blob/master/doc/reboot/Reboot_BlockingMode_HLD.md Signed-off-by: Litao Yu <lityu@microsoft.com>
1 parent 53e586a commit b3d997f

File tree

3 files changed

+223
-6
lines changed

3 files changed

+223
-6
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Reboot Blocking Mode Test Plan
2+
3+
## 1 Overview
4+
5+
The purpose is to test the functionality of **Reboot Blocking Mode** feature on SONiC switch.
6+
7+
For details of the feature design, please refer to HLD: [Reboot support BlockingMode in SONiC](https://github.com/sonic-net/SONiC/blob/master/doc/reboot/Reboot_BlockingMode_HLD.md).
8+
9+
### 1.1 Scope
10+
11+
The test is targeting a running SONiC system will fully functioning configuration. The purpose of this test is to verify the function of reboot BlockingMode with CLI and config file.
12+
13+
### 1.2 Testbed
14+
15+
The test can run on both physical and virtual testbeds with any topology.
16+
17+
### 1.3 Limitation
18+
19+
The blocking mode only affect device sku with no platform reboot enabled. So test will always success on that kinds of hardware sku.
20+
21+
## 2 Setup Configuration
22+
23+
Because in non-blocking mode, the CLI output is unpredictable. So we need to mock the original reboot file `/sbin/reboot`. We will update this file as an empty script so that in non-BlockingMode, we will always quickly complete the `reboot` command.
24+
25+
## 3 Test
26+
27+
### Test for BlockingMode CLI
28+
#### Test case #1 - Verify original logic will not block
29+
1. Run command `reboot; echo "ExpectedFinished"`. The command needs to have a timeout with 10mins. This is to avoid the script blocked unexpected.
30+
1. Check if the command output contains `ExpectedFinished` as expected.
31+
32+
#### Test case #2 - Verify running output when blocking mode enabled
33+
1. Run command `reboot -b -v; echo "UnexpectedFinished"`. The command needs to have a timeout with 10mins.
34+
1. Check if the command output not contains `UnexpectedFinished` as expected.
35+
1. Check if there are extra dots after `Issuing OS-level reboot ...` output.
36+
37+
### Test for BlockingMode config file
38+
#### Test case #1 - Verify timeout config for blocking mode with config file
39+
1. Backup the config file `/etc/sonic/reboot.conf` if exists. Update the following configs to the config file:
40+
```
41+
blocking_mode=true
42+
blocking_mode_timeout=0
43+
show_timer=true
44+
```
45+
1. Run command `reboot; echo "UnexpectedFinished"`. The command needs to have a timeout with 10mins.
46+
1. Check if the command output not contains `UnexpectedFinished` as expected.
47+
1. Restore the config file `/etc/sonic/reboot.conf`
48+
49+
## 4 Cleanup
50+
Since the reboot script already killed the SONiC modules, we need to do another reboot after restore `/sbin/reboot`.

tests/common/platform/processes_utils.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,18 @@ def check_critical_processes(dut, watch_secs=0):
6363
watch_secs = watch_secs - 5
6464

6565

66-
def wait_critical_processes(dut):
66+
def wait_critical_processes(dut, timeout=None):
6767
"""
6868
@summary: wait until all critical processes are healthy.
6969
@param dut: The AnsibleHost object of DUT. For interacting with DUT.
70+
@param timeout: customized timeout value in seconds. If specified, it overwrites the value from inventory file.
7071
"""
71-
timeout = reset_timeout(dut)
72-
# No matter what we set in inventory file, we always set sup timeout to 900
73-
# because most SUPs have 10+ dockers that need to come up
74-
if dut.is_supervisor_node():
75-
timeout = 900
72+
if timeout is None:
73+
timeout = reset_timeout(dut)
74+
# No matter what we set in inventory file, we always set sup timeout to 900
75+
# because most SUPs have 10+ dockers that need to come up
76+
if dut.is_supervisor_node():
77+
timeout = 900
7678
logging.info("Wait until all critical processes are healthy in {} sec"
7779
.format(timeout))
7880
pytest_assert(wait_until(timeout, 20, 0, _all_critical_processes_healthy, dut),
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
import pytest
2+
import re
3+
from tests.common.helpers.assertions import pytest_assert
4+
from tests.common.platform.processes_utils import wait_critical_processes
5+
6+
pytestmark = [
7+
pytest.mark.disable_loganalyzer,
8+
pytest.mark.topology('any'),
9+
]
10+
11+
COMMAND_TIMEOUT = 90 # seconds
12+
13+
14+
def check_if_platform_reboot_enabled(duthost) -> bool:
15+
platform = get_command_result(duthost, "sonic-cfggen -H -v DEVICE_METADATA.localhost.platform")
16+
return check_if_dut_file_exist(duthost, "/usr/share/sonic/device/{}/platform_reboot".format(platform))
17+
18+
19+
def mock_systemctl_reboot(duthost):
20+
if not check_if_dut_file_exist(duthost, "/sbin/reboot.bak"):
21+
# Check exist to avoid override original reboot file.
22+
execute_command(duthost, "sudo mv /sbin/reboot /sbin/reboot.bak")
23+
execute_command(duthost, "sudo echo \"\" > /sbin/reboot")
24+
execute_command(duthost, "sudo chmod +x /sbin/reboot")
25+
execute_command_ignore_error(duthost, "sudo /usr/local/bin/watchdogutil disarm")
26+
27+
# Disable watch dog to avoid reboot too early.
28+
execute_command(
29+
duthost,
30+
"sudo sed -i 's#/usr/local/bin/watchdogutil#/usr/local/bin/disabled_watchdogutil#g' /usr/local/bin/reboot")
31+
32+
33+
def restore_systemctl_reboot_and_reboot(duthost):
34+
if not check_if_dut_file_exist(duthost, "/sbin/reboot.bak"):
35+
return
36+
execute_command(duthost, "sudo rm /sbin/reboot")
37+
execute_command(duthost, "sudo mv /sbin/reboot.bak /sbin/reboot")
38+
execute_command(
39+
duthost,
40+
"sudo sed -i 's#/usr/local/bin/disabled_watchdogutil#/usr/local/bin/watchdogutil#g' /usr/local/bin/reboot")
41+
execute_command(duthost, "sudo reboot")
42+
43+
timeout = None
44+
if duthost.is_supervisor_node():
45+
timeout = 900
46+
elif duthost.is_multi_asic:
47+
timeout = 420
48+
wait_critical_processes(duthost, timeout=timeout)
49+
50+
51+
def mock_reboot_config_file(duthost):
52+
if (
53+
check_if_dut_file_exist(duthost, "/etc/sonic/reboot.conf")
54+
and not check_if_dut_file_exist(duthost, "/etc/sonic/reboot.conf.bak")
55+
):
56+
execute_command(duthost, "sudo mv /etc/sonic/reboot.conf /etc/sonic/reboot.conf.bak")
57+
execute_command(
58+
duthost,
59+
"echo -e \"blocking_mode=true\\nshow_timer=true\" > /etc/sonic/reboot.conf")
60+
61+
62+
def mock_reboot_config_file_with_0_timeout(duthost):
63+
if (
64+
check_if_dut_file_exist(duthost, "/etc/sonic/reboot.conf")
65+
and not check_if_dut_file_exist(duthost, "/etc/sonic/reboot.conf.bak")
66+
):
67+
execute_command(duthost, "sudo mv /etc/sonic/reboot.conf /etc/sonic/reboot.conf.bak")
68+
execute_command(
69+
duthost,
70+
"echo -e \"blocking_mode=true\\nblocking_mode_timeout=0\\nshow_timer=true\" > /etc/sonic/reboot.conf")
71+
72+
73+
def restore_reboot_config_file(duthost):
74+
execute_command(duthost, "sudo rm /etc/sonic/reboot.conf")
75+
if check_if_dut_file_exist(duthost, "/etc/sonic/reboot.conf.bak"):
76+
execute_command(duthost, "sudo mv /etc/sonic/reboot.conf.bak /etc/sonic/reboot.conf")
77+
78+
79+
def execute_command(duthost, cmd):
80+
result = duthost.shell(cmd)
81+
pytest_assert(result["rc"] == 0, "Unexpected rc: {}".format(result["rc"]))
82+
83+
84+
def execute_command_ignore_error(duthost, cmd):
85+
duthost.shell(cmd, module_ignore_errors=True)
86+
87+
88+
def get_command_result(duthost, cmd):
89+
result = duthost.shell(cmd, module_ignore_errors=True)
90+
return result["stdout"]
91+
92+
93+
def check_if_dut_file_exist(duthost, filepath) -> bool:
94+
result = duthost.shell(f"test -f {filepath} && echo true || echo false", module_ignore_errors=True)
95+
return "true" in result["stdout"]
96+
97+
98+
class TestRebootBlockingModeCLI:
99+
@pytest.fixture(autouse=True, scope="function")
100+
def setup_teardown(
101+
self,
102+
duthosts,
103+
enum_rand_one_per_hwsku_hostname
104+
):
105+
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
106+
if check_if_platform_reboot_enabled(duthost):
107+
pytest.skip("Skip test because platform reboot is enabled.")
108+
109+
mock_systemctl_reboot(duthost)
110+
yield
111+
restore_systemctl_reboot_and_reboot(duthost)
112+
113+
def test_non_blocking_mode(
114+
self,
115+
duthosts,
116+
enum_rand_one_per_hwsku_hostname
117+
):
118+
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
119+
result = get_command_result(
120+
duthost,
121+
f"sudo timeout {COMMAND_TIMEOUT}s bash -c 'sudo reboot; echo \"ExpectedFinished\"'")
122+
pytest_assert("ExpectedFinished" in result, "Reboot didn't exited as expected.")
123+
124+
def test_blocking_mode(
125+
self,
126+
duthosts,
127+
enum_rand_one_per_hwsku_hostname
128+
):
129+
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
130+
result = get_command_result(
131+
duthost,
132+
f"sudo timeout {COMMAND_TIMEOUT}s bash -c 'sudo reboot -b -v; echo \"UnexpectedFinished\"'")
133+
pytest_assert("UnexpectedFinished" not in result, "Reboot script didn't blocked as expected.")
134+
pattern = r".*\n[.]+$"
135+
pytest_assert(re.search(pattern, result), "Cannot find dots as expected in output: {}".format(result))
136+
137+
138+
class TestRebootBlockingModeConfigFile:
139+
@pytest.fixture(autouse=True, scope="function")
140+
def setup_teardown(
141+
self,
142+
duthosts,
143+
enum_rand_one_per_hwsku_hostname
144+
):
145+
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
146+
if check_if_platform_reboot_enabled(duthost):
147+
pytest.skip("Skip test because platform reboot is enabled.")
148+
149+
mock_systemctl_reboot(duthost)
150+
yield
151+
152+
restore_reboot_config_file(duthost)
153+
restore_systemctl_reboot_and_reboot(duthost)
154+
155+
def test_timeout_for_blocking_mode(
156+
self,
157+
duthosts,
158+
enum_rand_one_per_hwsku_hostname
159+
):
160+
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
161+
mock_reboot_config_file_with_0_timeout(duthost)
162+
result = get_command_result(
163+
duthost,
164+
f"sudo timeout {COMMAND_TIMEOUT}s bash -c 'sudo reboot; echo \"ExpectedFinished\"'")
165+
pytest_assert("ExpectedFinished" in result, "Reboot didn't exited as expected.")

0 commit comments

Comments
 (0)