Skip to content

Commit 128567c

Browse files
authored
checking disk and memory space before installing image (#11151)
What is the motivation for this PR? Update images failed sometimes on low-memory devices. How did you do it? Check memory and disk space before installing image. If no enough space of memory or disk, abort. How did you verify/test it? Upgrade the images on low memory and small disk devices several times. no issue. Signed-off-by: xuliping <[email protected]>
1 parent a42cf1d commit 128567c

File tree

1 file changed

+89
-5
lines changed

1 file changed

+89
-5
lines changed

ansible/library/reduce_and_add_sonic_images.py

Lines changed: 89 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,9 @@ def download_new_sonic_image(module, new_image_url, save_as):
143143
log("Completed downloading image")
144144

145145
free_disk_size = get_disk_free_size(module, "/")
146-
log("After downloaded sonic image, latest free disk size: {}".format(free_disk_size))
146+
total, avail = get_memory_sizes(module)
147+
log("After downloaded sonic image, latest free disk size: {}, "
148+
"memory total {} available {}".format(free_disk_size, total, avail))
147149

148150
if path.exists(save_as):
149151
log("Checking downloaded image version")
@@ -152,13 +154,35 @@ def download_new_sonic_image(module, new_image_url, save_as):
152154
log("Downloaded image version: {}".format(results["downloaded_image_version"]))
153155

154156

155-
def install_new_sonic_image(module, new_image_url, save_as=None):
157+
def install_new_sonic_image(module, new_image_url, save_as=None, required_space=1600):
156158
log("install new sonic image")
157159

160+
log("Clean-up previous downloads first")
161+
exec_command(
162+
module,
163+
cmd="rm -f {}".format("/host/downloaded-sonic-image"),
164+
msg="clean up previously downloaded image",
165+
ignore_error=True
166+
)
167+
158168
if not save_as:
159169
avail = get_disk_free_size(module, "/host")
160170
save_as = "/host/downloaded-sonic-image" if avail >= 2000 else "/tmp/tmpfs/downloaded-sonic-image"
161171

172+
free_disk_size = get_disk_free_size(module, "/")
173+
total, avail = get_memory_sizes(module)
174+
log("Before install sonic image, free disk {}, memory total {} available {}".format(free_disk_size, total, avail))
175+
if avail < 1024 or free_disk_size < required_space:
176+
log("free memory or disk space size is not enough to install a new image")
177+
module.fail_json(
178+
msg="Image installation failed: rc=%d, out=%s, err=%s" % (
179+
-1,
180+
"free memory or disk space size is not enough to install a new image",
181+
""
182+
)
183+
)
184+
return
185+
162186
if save_as.startswith("/tmp/tmpfs"):
163187
log("Create a tmpfs partition to download image to install")
164188
exec_command(module, cmd="mkdir -p /tmp/tmpfs", ignore_error=True)
@@ -228,9 +252,14 @@ def get_disk_used_percent(module):
228252
output = exec_command(module, cmd="df -BM --output=pcent /host")[1]
229253
return int(output.splitlines()[-1][:-1])
230254

255+
def get_disk_total_size(module):
256+
output = exec_command(module, cmd="df -BM --output=size /host")[1]
257+
return int(output.splitlines()[-1][:-1])
258+
231259
current_used_percent = get_disk_used_percent(module)
232-
log("current used percent: {}".format(current_used_percent))
233-
if current_used_percent > disk_used_pcent:
260+
total_size = get_disk_total_size(module)
261+
log("current used percent: {}, total_size {}".format(current_used_percent, total_size))
262+
if current_used_percent > disk_used_pcent or total_size < 4096:
234263
log("Trying to free up spaces at best effort")
235264
exec_command(module, "rm -f /var/log/*.gz", ignore_error=True)
236265
exec_command(module, "rm -f /var/core/*", ignore_error=True)
@@ -245,6 +274,58 @@ def get_disk_used_percent(module):
245274
log("After free up disk space, latest free disk size: {}".format(free_disk_size))
246275

247276

277+
def free_up_memory_drop_caches(module):
278+
"""
279+
To free pagecache:
280+
echo 1 > /proc/sys/vm/drop_caches
281+
To free reclaimable slab objects (includes dentries and inodes):
282+
echo 2 > /proc/sys/vm/drop_caches
283+
To free slab objects and pagecache:
284+
echo 3 > /proc/sys/vm/drop_caches
285+
"""
286+
for index in range(1, 4):
287+
cmd = 'echo {} > /proc/sys/vm/drop_caches'.format(index)
288+
exec_command(module, cmd, ignore_error=True)
289+
cmd = 'sync'
290+
exec_command(module, cmd, ignore_error=True)
291+
log("drop {} cache to free up memory space".format(index))
292+
return
293+
294+
295+
def free_up_memory_stop_process(module):
296+
"""
297+
stop the process which not need during upgrade
298+
not start it here since it will be started after reboot
299+
sudo systemctl list-units --type=service --state=running
300+
"""
301+
302+
processes = ['monit.service', 'telemetry.service', 'stop pmon.service']
303+
for process in processes:
304+
cmd = 'systemctl stop {}'.format(process)
305+
exec_command(module, cmd, ignore_error=True)
306+
log("stop process {}".format(process))
307+
308+
return
309+
310+
311+
def free_up_memory_space(module, free_space_needed=1024):
312+
"""for low memory device, drop cache and disable process which not need during upgrade."""
313+
log("free up memory space at best effort")
314+
315+
total, avail = get_memory_sizes(module)
316+
if avail > free_space_needed:
317+
log("Available memory {}, no need to free up memory space".format(avail))
318+
return
319+
320+
free_up_memory_drop_caches(module)
321+
free_up_memory_stop_process(module)
322+
323+
total, avail = get_memory_sizes(module)
324+
log("After free up, current available memory {}".format(avail))
325+
326+
return
327+
328+
248329
def work_around_for_reboot(module):
249330
# work around reboot for s6100
250331
# Replace /usr/share/sonic/device/x86_64-dell_s6100_c2538-r0/platform_reboot_pre_check
@@ -275,12 +356,14 @@ def main():
275356
disk_used_pcent=dict(required=False, type='int', default=8),
276357
new_image_url=dict(required=False, type='str', default=None),
277358
save_as=dict(required=False, type='str', default=None),
359+
required_space=dict(required=False, type='int', default=1600),
278360
),
279361
supports_check_mode=False)
280362

281363
disk_used_pcent = module.params['disk_used_pcent']
282364
new_image_url = module.params['new_image_url']
283365
save_as = module.params['save_as']
366+
required_space = module.params['required_space']
284367

285368
try:
286369
if not new_image_url:
@@ -295,10 +378,11 @@ def main():
295378
results["current_stage"] = "prepare"
296379

297380
free_up_disk_space(module, disk_used_pcent)
381+
free_up_memory_space(module)
298382
setup_swap_if_necessary(module)
299383
results["current_stage"] = "install"
300384

301-
install_new_sonic_image(module, new_image_url, save_as)
385+
install_new_sonic_image(module, new_image_url, save_as, required_space)
302386
results["current_stage"] = "complete"
303387
except Exception:
304388
err = str(sys.exc_info())

0 commit comments

Comments
 (0)