Skip to content

Commit 0591b51

Browse files
committed
Add library and sample scripts for running ansible in pure python (#7792)
What is the motivation for this PR? This change experimented the idea of running ansible in pure python. With the classes and functions defined in ansible_hosts.py and devices.py, we can use pure python to run any ansible module on any hosts. The required inputs are ansible inventory file and optional variable files. We can view it as a python version of playbook. Comparing with ansible playbook, we can take advantage of a real programming language. The drawback is that python programming experience is required. Comparing with pytest-ansible, we do not need pytest. This design supports some ansible features not supported by pytest-ansible: * fork: Pytest-ansible does not support running ansible modules in parallel. This design uses ansible's builtin forking capability to run modules in parallel. * module attributes: Ansible supports additional module attributes that can be specified for each task in playbook. These module attributes can affect execution of the modules, for example "become", "async", etc. Pytest-ansible does not support these attributes. With this design, we can use keyword argument module_attrs to specify module attributes while calling an ansible module. Not all ansible's builtin features are supported by this design. For example: * Notify and event handler. (We can use python's libs to support that) This idea is still new. I haven't figured out all of its potentials and limitations. Feedbacks, suggestions and contributions are more than welcome! How did you do it? * Added basic class AnsibleHosts and AnsibleHost * Added class SonicHosts * Added helper functions like init_xxx_hosts * Added script ansible/upgrade_sonic.py as replacement of ansible/upgrade_sonic.yml * Added script .azure-pipelines/upgrade_image.py which can be called in nightly test. How did you verify/test it? Test run the upgrade_sonic.py and upgrade_image.py scripts. Signed-off-by: Xin Wang <xiwang5@microsoft.com>
1 parent 2ecbc95 commit 0591b51

5 files changed

Lines changed: 1840 additions & 0 deletions

File tree

.azure-pipelines/upgrade_image.py

Lines changed: 312 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,312 @@
1+
"""Script for upgrading SONiC image for nightly tests.
2+
3+
Main purpose of this script is to upgrade SONiC image for nightly tests. Based on the arguments passed in, the script
4+
may power cycle the devices before upgrade. Or only power cycle the devices only when they are unreachable.
5+
6+
Before upgrade to the target image, this script may upgrade to a previous image firstly. This is to avoid that the
7+
devices are already running the target image. Then image upgrading could be skipped. The problem is that the current
8+
image may has been updated by people for debugging purpose. Upgrade to a previous image firstly can ensure that the
9+
target image is clean.
10+
"""
11+
import argparse
12+
import logging
13+
import os
14+
import requests
15+
import sys
16+
17+
_self_dir = os.path.dirname(os.path.abspath(__file__))
18+
base_path = os.path.realpath(os.path.join(_self_dir, ".."))
19+
if base_path not in sys.path:
20+
sys.path.append(base_path)
21+
ansible_path = os.path.realpath(os.path.join(_self_dir, "../ansible"))
22+
if ansible_path not in sys.path:
23+
sys.path.append(ansible_path)
24+
25+
26+
from devutil.devices import init_localhost, init_testbed_sonichosts # noqa E402
27+
from devutil.sonic_helpers import upgrade_image # noqa E402
28+
29+
from tests.common.plugins.pdu_controller.pdu_manager import pdu_manager_factory # noqa E402
30+
31+
logger = logging.getLogger(__name__)
32+
33+
34+
RC_INIT_FAILED = 1
35+
RC_UPGRADE_PREV_FAILED = 2
36+
RC_UPGRADE_FAILED = 3
37+
RC_ENABLE_FIPS_FAILED = 4
38+
39+
40+
def validate_args(args):
41+
_log_level_map = {
42+
"debug": logging.DEBUG,
43+
"info": logging.INFO,
44+
"warning": logging.WARNING,
45+
"error": logging.ERROR,
46+
"critical": logging.CRITICAL
47+
}
48+
logging.basicConfig(
49+
stream=sys.stdout,
50+
level=_log_level_map[args.log_level],
51+
format="%(asctime)s %(filename)s#%(lineno)d %(levelname)s - %(message)s"
52+
)
53+
54+
args.skip_prev_image = False
55+
if not args.prev_image_url:
56+
args.prev_image_url = "{}.PREV.1".format(args.image_url)
57+
logger.info("PREV_IMAGE_URL={}".format(args.prev_image_url))
58+
59+
try:
60+
res_prev_image = requests.head(args.prev_image_url, timeout=20)
61+
if res_prev_image.status_code != 200:
62+
logger.info("Not able to get prev_image at {}, skip upgrading to prev_image.".format(args.prev_image_url))
63+
args.skip_prev_image = True
64+
except Exception as e:
65+
logger.info(
66+
"Downloading prev image {} failed with {}, skip upgrading to prev image".format(
67+
args.prev_image_url, repr(e)
68+
)
69+
)
70+
args.skip_prev_image = True
71+
72+
73+
def get_pdu_managers(sonichosts, conn_graph_facts):
74+
"""Get PDU managers for all the devices to be upgraded.
75+
76+
Args:
77+
sonichosts (SonicHosts): Instance of class SonicHosts
78+
conn_graph_facts (dict): Connection graph dict.
79+
80+
Returns:
81+
dict: A dict of PDU managers. Key is device hostname. Value is the PDU manager object for the device.
82+
"""
83+
pdu_managers = {}
84+
for hostname in sonichosts.hostnames:
85+
pdu_links = conn_graph_facts["device_pdu_links"][hostname]
86+
pdu_hostnames = [peer_info["peerdevice"] for peer_info in pdu_links.values()]
87+
pdu_vars = {}
88+
for pdu_hostname in pdu_hostnames:
89+
pdu_vars[pdu_hostname] = sonichosts.get_host_visible_vars(pdu_hostname)
90+
91+
pdu_managers[hostname] = pdu_manager_factory(hostname, None, conn_graph_facts, pdu_vars)
92+
return pdu_managers
93+
94+
95+
def main(args):
96+
logger.info("Validating arguments")
97+
validate_args(args)
98+
99+
logger.info("Initializing hosts")
100+
localhost = init_localhost(args.inventory, options={"verbosity": args.verbosity})
101+
sonichosts = init_testbed_sonichosts(
102+
args.inventory, args.testbed_name, testbed_file=args.tbfile, options={"verbosity": args.verbosity}
103+
)
104+
105+
if not localhost or not sonichosts:
106+
sys.exit(RC_INIT_FAILED)
107+
108+
conn_graph_facts = localhost.conn_graph_facts(
109+
hosts=sonichosts.hostnames,
110+
filepath=os.path.join(ansible_path, "files")
111+
)["ansible_facts"]
112+
113+
if args.always_power_cycle or args.power_cycle_unreachable:
114+
pdu_managers = get_pdu_managers(sonichosts, conn_graph_facts)
115+
116+
# Power cycle before upgrade
117+
if args.always_power_cycle:
118+
logger.info("Power cycle before upgrade")
119+
for hostname, pdu_manager in pdu_managers.items():
120+
logger.info("Turn off power outlets to {}".format(hostname))
121+
pdu_manager.turn_off_outlet()
122+
localhost.pause(seconds=30, prompt="Pause between power off/on")
123+
for hostname, pdu_manager in pdu_managers.items():
124+
logger.info("Turn on power outlets to {}".format(hostname))
125+
pdu_manager.turn_on_outlet()
126+
localhost.pause(seconds=180, prompt="Add some sleep to allow power cycled DUTs to come back")
127+
128+
# Power cycle when unreachable
129+
elif args.power_cycle_unreachable:
130+
logger.info("Power cycle unreachable")
131+
ping_results = {}
132+
needs_sleep = False
133+
for hostname, ip in zip(sonichosts.hostnames, sonichosts.ips):
134+
logger.info("Ping {} @{} from localhost".format(hostname, ip))
135+
ping_failed = localhost.command(
136+
"timeout 2 ping {} -c 1".format(ip), module_ignore_errors=True
137+
).get("localhost", {}).get("failed")
138+
if ping_failed:
139+
logger.info("Ping {} @{} from localhost failed. Going to power off it".format(hostname, ip))
140+
ping_results[hostname] = ping_failed
141+
pdu_managers[hostname].turn_off_outlet()
142+
needs_sleep = True
143+
144+
if needs_sleep:
145+
localhost.pause(seconds=30, prompt="Pause between power off/on")
146+
147+
for hostname, ping_failed in ping_results.items():
148+
if ping_failed:
149+
logger.info("Power on {}".format(hostname))
150+
pdu_managers[hostname].turn_on_outlet()
151+
152+
if needs_sleep:
153+
localhost.pause(seconds=180, prompt="Add some sleep to allow power cycled DUTs to come back")
154+
155+
# Upgrade to prev image
156+
if not args.skip_prev_image:
157+
logger.info("upgrade to prev image at {}".format(args.prev_image_url))
158+
upgrade_success = upgrade_image(
159+
sonichosts,
160+
localhost,
161+
args.prev_image_url,
162+
upgrade_type=args.upgrade_type,
163+
onie_pause_time=args.onie_pause_time
164+
)
165+
166+
if not upgrade_success:
167+
logger.error("Upgrade prev_image {} failed".format(args.prev_image_url))
168+
sys.exit(RC_UPGRADE_PREV_FAILED)
169+
else:
170+
logger.info("Upgraded to prev_image {}.".format(args.prev_image_url))
171+
172+
for hostname, version in sonichosts.sonic_version.items():
173+
logger.info("SONiC host {} current version {}".format(hostname, version.get("build_version")))
174+
175+
# Upgrade to target image
176+
logger.info("upgrade to target image at {}".format(args.image_url))
177+
upgrade_success = upgrade_image(
178+
sonichosts,
179+
localhost,
180+
args.image_url,
181+
upgrade_type=args.upgrade_type,
182+
onie_pause_time=args.onie_pause_time
183+
)
184+
if not upgrade_success:
185+
logger.error("Upgrade image {} failed".format(args.image_url))
186+
sys.exit(RC_UPGRADE_FAILED)
187+
else:
188+
logger.info("Upgraded to image {}".format(args.prev_image_url))
189+
for hostname, version in sonichosts.sonic_version.items():
190+
logger.info("SONiC host {} current version {}".format(hostname, version.get("build_version")))
191+
192+
# Enable FIPS
193+
if args.enable_fips:
194+
logger.info("Need to enable FIPS")
195+
try:
196+
sonichosts.command("sonic-installer set-fips", module_attrs={"become": True})
197+
sonichosts.command("shutdown -r now", module_attrs={"become": True, "async": 300, "poll": 0})
198+
except Exception as e:
199+
logger.error("Failed to enable FIPS mode: {}".repr(e))
200+
sys.exit(RC_ENABLE_FIPS_FAILED)
201+
202+
localhost.pause(seconds=180, prompt="Pause after reboot")
203+
logger.info("===== UPGRADE IMAGE DONE =====")
204+
205+
206+
if __name__ == "__main__":
207+
208+
parser = argparse.ArgumentParser(
209+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
210+
description="Tool for SONiC image upgrade during nightly tests.")
211+
212+
parser.add_argument(
213+
"-i", "--inventory",
214+
type=str,
215+
dest="inventory",
216+
required=True,
217+
help="Ansible inventory file")
218+
219+
parser.add_argument(
220+
"-t", "--testbed-name",
221+
type=str,
222+
required=True,
223+
dest="testbed_name",
224+
help="Testbed name. DUTs of the specified testbed will be upgraded."
225+
)
226+
227+
parser.add_argument(
228+
"-u", "--url",
229+
type=str,
230+
dest="image_url",
231+
required=True,
232+
help="SONiC image url."
233+
)
234+
235+
parser.add_argument(
236+
"--prev-url",
237+
type=str,
238+
dest="prev_image_url",
239+
default=None,
240+
help="SONiC image url."
241+
)
242+
243+
parser.add_argument(
244+
"--tbfile",
245+
type=str,
246+
dest="tbfile",
247+
default="testbed.yaml",
248+
help="Testbed definition file."
249+
)
250+
251+
parser.add_argument(
252+
"--always-power-cycle",
253+
type=bool,
254+
dest="always_power_cycle",
255+
default=False,
256+
help="Always power cycle DUTs before upgrade."
257+
)
258+
259+
parser.add_argument(
260+
"--power-cycle-unreachable",
261+
type=bool,
262+
dest="power_cycle_unreachable",
263+
default=True,
264+
help="Only power cycle unreachable DUTs."
265+
)
266+
267+
parser.add_argument(
268+
"--onie-pause-time",
269+
type=int,
270+
dest="onie_pause_time",
271+
default=30,
272+
help="Seconds to pause after booted into onie."
273+
)
274+
275+
parser.add_argument(
276+
"-y", "--type",
277+
type=str,
278+
choices=["sonic", "onie"],
279+
dest="upgrade_type",
280+
required=False,
281+
default="sonic",
282+
help="Upgrade type."
283+
)
284+
285+
parser.add_argument(
286+
"--enable-fips",
287+
type=bool,
288+
dest="enable_fips",
289+
required=False,
290+
default=False,
291+
help="Enable FIPS."
292+
)
293+
294+
parser.add_argument(
295+
"-v", "--verbosity",
296+
type=int,
297+
dest="verbosity",
298+
default=2,
299+
help="Log verbosity (0-3)."
300+
)
301+
302+
parser.add_argument(
303+
"--log-level",
304+
type=str,
305+
dest="log_level",
306+
choices=["debug", "info", "warning", "error", "critical"],
307+
default="debug",
308+
help="Loglevel"
309+
)
310+
311+
args = parser.parse_args()
312+
main(args)

0 commit comments

Comments
 (0)