Skip to content
Merged
39 changes: 11 additions & 28 deletions easybuild/framework/easyblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,12 @@
from easybuild.tools.config import install_path, log_path, package_path, source_paths
from easybuild.tools.environment import restore_env, sanitize_env
from easybuild.tools.filetools import CHECKSUM_TYPE_MD5, CHECKSUM_TYPE_SHA256
from easybuild.tools.filetools import adjust_permissions, apply_patch, back_up_file
from easybuild.tools.filetools import change_dir, convert_name, compute_checksum, copy_file, derive_alt_pypi_url
from easybuild.tools.filetools import diff_files, download_file, encode_class_name, extract_file
from easybuild.tools.filetools import adjust_permissions, apply_patch, back_up_file, change_dir, convert_name
from easybuild.tools.filetools import compute_checksum, copy_file, check_lock, create_lock, derive_alt_pypi_url
from easybuild.tools.filetools import diff_files, dir_contains_files, download_file, encode_class_name, extract_file
from easybuild.tools.filetools import find_backup_name_candidate, get_source_tarball_from_git, is_alt_pypi_url
from easybuild.tools.filetools import is_binary, is_sha256_checksum, mkdir, move_file, move_logs, read_file, remove_dir
from easybuild.tools.filetools import remove_file, verify_checksum, weld_paths, write_file, dir_contains_files
from easybuild.tools.filetools import remove_file, remove_lock, verify_checksum, weld_paths, write_file
from easybuild.tools.hooks import BUILD_STEP, CLEANUP_STEP, CONFIGURE_STEP, EXTENSIONS_STEP, FETCH_STEP, INSTALL_STEP
from easybuild.tools.hooks import MODULE_STEP, PACKAGE_STEP, PATCH_STEP, PERMISSIONS_STEP, POSTITER_STEP, POSTPROC_STEP
from easybuild.tools.hooks import PREPARE_STEP, READY_STEP, SANITYCHECK_STEP, SOURCE_STEP, TEST_STEP, TESTCASES_STEP
Expand Down Expand Up @@ -3049,30 +3049,14 @@ def run_all_steps(self, run_test_cases):
if ignore_locks:
self.log.info("Ignoring locks...")
else:
locks_dir = build_option('locks_dir') or os.path.join(install_path('software'), '.locks')
lock_path = os.path.join(locks_dir, '%s.lock' % self.installdir.replace('/', '_'))

# if lock already exists, either abort or wait until it disappears
if os.path.exists(lock_path):
wait_on_lock = build_option('wait_on_lock')
if wait_on_lock:
while os.path.exists(lock_path):
print_msg("lock %s exists, waiting %d seconds..." % (lock_path, wait_on_lock),
silent=self.silent)
time.sleep(wait_on_lock)
else:
raise EasyBuildError("Lock %s already exists, aborting!", lock_path)
lock_name = self.installdir.replace('/', '_')

# create lock to avoid that another installation running in parallel messes things up;
# we use a directory as a lock, since that's atomically created
try:
mkdir(lock_path, parents=True)
except EasyBuildError as err:
# clean up the error message a bit, get rid of the "Failed to create directory" part + quotes
stripped_err = str(err).split(':', 1)[1].strip().replace("'", '').replace('"', '')
raise EasyBuildError("Failed to create lock %s: %s", lock_path, stripped_err)
# check if lock already exists;
# either aborts with an error or waits until it disappears (depends on --wait-on-lock)
check_lock(lock_name)

self.log.info("Lock created: %s", lock_path)
# create lock to avoid that another installation running in parallel messes things up
create_lock(lock_name)

try:
for (step_name, descr, step_methods, skippable) in steps:
Expand All @@ -3090,8 +3074,7 @@ def run_all_steps(self, run_test_cases):
pass
finally:
if not ignore_locks:
remove_dir(lock_path)
self.log.info("Lock removed: %s", lock_path)
remove_lock(lock_name)

# return True for successfull build (or stopped build)
return True
Expand Down
9 changes: 6 additions & 3 deletions easybuild/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
from easybuild.tools.containers.common import containerize
from easybuild.tools.docs import list_software
from easybuild.tools.filetools import adjust_permissions, cleanup, copy_file, copy_files, dump_index, load_index
from easybuild.tools.filetools import read_file, write_file
from easybuild.tools.filetools import read_file, register_lock_cleanup_signal_handlers, write_file
from easybuild.tools.github import check_github, close_pr, new_branch_github, find_easybuild_easyconfig
from easybuild.tools.github import install_github_token, list_prs, new_pr, new_pr_from_branch, merge_pr
from easybuild.tools.github import sync_branch_with_develop, sync_pr_with_develop, update_branch, update_pr
Expand Down Expand Up @@ -189,6 +189,9 @@ def main(args=None, logfile=None, do_build=None, testing=False, modtool=None):
:param do_build: whether or not to actually perform the build
:param testing: enable testing mode
"""

register_lock_cleanup_signal_handlers()

# if $CDPATH is set, unset it, it'll only cause trouble...
# see https://github.com/easybuilders/easybuild-framework/issues/2944
if 'CDPATH' in os.environ:
Expand Down Expand Up @@ -518,5 +521,5 @@ def main(args=None, logfile=None, do_build=None, testing=False, modtool=None):
main()
except EasyBuildError as err:
print_error(err.msg)
except KeyboardInterrupt:
print_error("Cancelled by user (keyboard interrupt)")
except KeyboardInterrupt as err:
print_error("Cancelled by user: %s" % err)
103 changes: 102 additions & 1 deletion easybuild/tools/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import os
import re
import shutil
import signal
import stat
import sys
import tempfile
Expand All @@ -60,7 +61,7 @@
from easybuild.tools import run
# import build_log must stay, to use of EasyBuildLog
from easybuild.tools.build_log import EasyBuildError, dry_run_msg, print_msg, print_warning
from easybuild.tools.config import GENERIC_EASYBLOCK_PKG, build_option
from easybuild.tools.config import GENERIC_EASYBLOCK_PKG, build_option, install_path
from easybuild.tools.py2vs3 import std_urllib, string_type
from easybuild.tools.utilities import nub, remove_unwanted_chars

Expand Down Expand Up @@ -156,6 +157,9 @@
'.tar.z': "tar xzf %(filepath)s",
}

# global set of names of locks that were created in this session
global_lock_names = set()


class ZlibChecksum(object):
"""
Expand Down Expand Up @@ -1476,6 +1480,103 @@ def mkdir(path, parents=False, set_gid=None, sticky=None):
_log.debug("Not creating existing path %s" % path)


def det_lock_path(lock_name):
"""
Determine full path for lock with specifed name.
"""
locks_dir = build_option('locks_dir') or os.path.join(install_path('software'), '.locks')
return os.path.join(locks_dir, lock_name + '.lock')


def create_lock(lock_name):
"""Create lock with specified name."""

lock_path = det_lock_path(lock_name)
_log.info("Creating lock at %s...", lock_path)
try:
# we use a directory as a lock, since that's atomically created
mkdir(lock_path, parents=True)
global_lock_names.add(lock_name)
except EasyBuildError as err:
# clean up the error message a bit, get rid of the "Failed to create directory" part + quotes
stripped_err = str(err).split(':', 1)[1].strip().replace("'", '').replace('"', '')
raise EasyBuildError("Failed to create lock %s: %s", lock_path, stripped_err)
_log.info("Lock created: %s", lock_path)


def check_lock(lock_name):
"""
Check whether a lock with specified name already exists.

If it exists, either wait until it's released, or raise an error
(depending on --wait-on-lock configuration option).
"""
lock_path = det_lock_path(lock_name)
if os.path.exists(lock_path):
_log.info("Lock %s exists!", lock_path)
wait_on_lock = build_option('wait_on_lock')
if wait_on_lock:
while os.path.exists(lock_path):
print_msg("lock %s exists, waiting %d seconds..." % (lock_path, wait_on_lock),
silent=build_option('silent'))
time.sleep(wait_on_lock)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would expect wait_on_lock to be the max time to wait, not how long between checks. That's what you would normally do in this situation, i.e. specify a max time to wait.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, we need a way to limit the time you're waiting for a lock, and the semantics of --wait-on-lock is a bit weird right now.

So, I changed things in d879cda:

  • --wait-on-lock still works as before (0 (the default) implies no waiting (raise an error if lock is already there), a value implies enabling waiting with interval of specified number of seconds), but is deprecated;
  • --wait-on-lock-limit can be used to i) enable waiting for lock to be released, ii) specify a maximum waiting time (if set to 0 is implies no waiting (raise error if lock is there), if set to -1 it implies waiting indefinitely);
  • --wait-on-lock-interval can be used to specify the interval for re-checking whether the lock has been released (default 60 sec.)

else:
raise EasyBuildError("Lock %s already exists, aborting!", lock_path)
else:
_log.info("Lock %s does not exist", lock_path)


def remove_lock(lock_name):
"""
Remove lock with specified name.
"""
lock_path = det_lock_path(lock_name)
_log.info("Removing lock %s...", lock_path)
remove_dir(lock_path)
if lock_name in global_lock_names:
global_lock_names.remove(lock_name)
_log.info("Lock removed: %s", lock_path)


def clean_up_locks():
"""
Clean up all still existing locks that were created in this session.
"""
for lock_name in list(global_lock_names):
remove_lock(lock_name)


def clean_up_locks_signal_handler(signum, frame):
"""
Signal handler, cleans up locks & exists with received signal number.
"""

if not build_option('silent'):
print_warning("signal received (%s), cleaning up locks (%s)..." % (signum, ', '.join(global_lock_names)))
clean_up_locks()

# by default, a KeyboardInterrupt is raised with SIGINT, so keep doing so
if signum == signal.SIGINT:
raise KeyboardInterrupt("keyboard interrupt")
else:
sys.exit(signum)


def register_lock_cleanup_signal_handlers():
"""
Register signal handler for signals that cancel the current EasyBuild session,
so we can clean up the locks that were created first.
"""
signums = [
signal.SIGABRT,
signal.SIGINT, # Ctrl-C
signal.SIGTERM, # signal 15, soft kill (like when Slurm job is cancelled or received timeout)
signal.SIGQUIT, # kinda like Ctrl-C
]
for signum in signums:
signal.signal(signum, clean_up_locks_signal_handler)


def expand_glob_paths(glob_paths):
"""Expand specified glob paths to a list of unique non-glob paths to only files."""
paths = []
Expand Down
91 changes: 91 additions & 0 deletions test/framework/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2399,6 +2399,97 @@ def test_copy_framework_files(self):

self.assertEqual(res['new'], expected_new)

def test_locks(self):
"""Tests for lock-related functions."""

init_config(build_options={'silent': True})

# make sure that global list of locks is empty when we start off
self.assertFalse(ft.global_lock_names)

# use a realistic lock name (cfr. EasyBlock.run_all_steps)
installdir = os.path.join(self.test_installpath, 'software', 'test', '1.2.3-foss-2019b-Python-3.7.4')
lock_name = installdir.replace('/', '_')

# det_lock_path returns full path to lock with specified name
# (used internally by create_lock, check_lock, remove_lock)
lock_path = ft.det_lock_path(lock_name)
self.assertFalse(os.path.exists(lock_path))

locks_dir = os.path.dirname(lock_path)
self.assertFalse(os.path.exists(locks_dir))

# if lock doesn't exist yet, check_lock just returns
ft.check_lock(lock_name)

# create lock, and check whether it actually was created
ft.create_lock(lock_name)
self.assertTrue(os.path.exists(lock_path))

# can't use os.path.samefile until locks_dir actually exists
self.assertTrue(os.path.samefile(locks_dir, os.path.join(self.test_installpath, 'software', '.locks')))

self.assertEqual(os.listdir(locks_dir), [lock_name + '.lock'])

# if lock exists, then check_lock raises an error
self.assertErrorRegex(EasyBuildError, "Lock .* already exists", ft.check_lock, lock_name)

# remove_lock should... remove the lock
ft.remove_lock(lock_name)
self.assertFalse(os.path.exists(lock_path))
self.assertEqual(os.listdir(locks_dir), [])

# no harm done if remove_lock is called if lock is already gone
ft.remove_lock(lock_name)

# check_lock just returns again after lock is removed
ft.check_lock(lock_name)

# global list of locks should be empty at this point
self.assertFalse(ft.global_lock_names)

# calling clean_up_locks when there are no locks should not cause trouble
ft.clean_up_locks()

ft.create_lock(lock_name)
self.assertEqual(ft.global_lock_names, set([lock_name]))
self.assertEqual(os.listdir(locks_dir), [lock_name + '.lock'])

ft.clean_up_locks()
self.assertFalse(ft.global_lock_names)
self.assertFalse(os.path.exists(lock_path))
self.assertEqual(os.listdir(locks_dir), [])

# no problem with multiple locks
lock_names = [lock_name, 'test123', 'foo@bar%baz']
lock_paths = [os.path.join(locks_dir, x + '.lock') for x in lock_names]
for ln in lock_names:
ft.create_lock(ln)
for lp in lock_paths:
self.assertTrue(os.path.exists(lp), "Path %s should exist" % lp)

self.assertEqual(ft.global_lock_names, set(lock_names))
expected_locks = sorted(ln + '.lock' for ln in lock_names)
self.assertEqual(sorted(os.listdir(locks_dir)), expected_locks)

ft.clean_up_locks()
for lp in lock_paths:
self.assertFalse(os.path.exists(lp), "Path %s should not exist" % lp)
self.assertFalse(ft.global_lock_names)
self.assertEqual(os.listdir(locks_dir), [])

# also test signal handler that is supposed to clean up locks
ft.create_lock(lock_name)
self.assertTrue(ft.global_lock_names)
self.assertTrue(os.path.exists(lock_path))
self.assertEqual(os.listdir(locks_dir), [lock_name + '.lock'])

# clean_up_locks_signal_handler causes sys.exit with specified exit code
self.assertErrorRegex(SystemExit, '15', ft.clean_up_locks_signal_handler, 15, None)
self.assertFalse(ft.global_lock_names)
self.assertFalse(os.path.exists(lock_path))
self.assertEqual(os.listdir(locks_dir), [])


def suite():
""" returns all the testcases in this module """
Expand Down
56 changes: 54 additions & 2 deletions test/framework/toy_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ def check_toy(self, installpath, outtxt, version='0.0', versionprefix='', versio
self.assertTrue(os.path.exists(devel_module_path))

def test_toy_build(self, extra_args=None, ec_file=None, tmpdir=None, verify=True, fails=False, verbose=True,
raise_error=False, test_report=None, versionsuffix='', testing=True):
raise_error=False, test_report=None, versionsuffix='', testing=True,
raise_systemexit=False):
"""Perform a toy build."""
if extra_args is None:
extra_args = []
Expand All @@ -145,7 +146,7 @@ def test_toy_build(self, extra_args=None, ec_file=None, tmpdir=None, verify=True
myerr = None
try:
outtxt = self.eb_main(args, logfile=self.dummylogfn, do_build=True, verbose=verbose,
raise_error=raise_error, testing=testing)
raise_error=raise_error, testing=testing, raise_systemexit=raise_systemexit)
except Exception as err:
myerr = err
if raise_error:
Expand Down Expand Up @@ -2607,6 +2608,57 @@ def __exit__(self, type, value, traceback):
self.assertErrorRegex(EasyBuildError, error_pattern, self.test_toy_build,
extra_args=extra_args, raise_error=True, verbose=False)

def test_toy_lock_cleanup_signals(self):
"""Test cleanup of locks after EasyBuild session gets a cancellation signal."""

locks_dir = os.path.join(self.test_installpath, 'software', '.locks')
self.assertFalse(os.path.exists(locks_dir))

# context manager which stops the function being called with the specified signal
class wait_and_signal:
def __init__(self, seconds, signum):
self.seconds = seconds
self.signum = signum

def send_signal(self, *args):
os.kill(os.getpid(), self.signum)

def __enter__(self):
signal.signal(signal.SIGALRM, self.send_signal)
signal.alarm(self.seconds)

def __exit__(self, type, value, traceback):
pass

# add extra sleep command to ensure session takes long enough
test_ecs_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'easyconfigs', 'test_ecs')
toy_ec_txt = read_file(os.path.join(test_ecs_dir, 't', 'toy', 'toy-0.0.eb'))

test_ec = os.path.join(self.test_prefix, 'test.eb')
write_file(test_ec, toy_ec_txt + '\npostinstallcmds = ["sleep 5"]')

signums = [
(signal.SIGABRT, SystemExit),
(signal.SIGINT, KeyboardInterrupt),
(signal.SIGTERM, SystemExit),
(signal.SIGQUIT, SystemExit),
]
for (signum, exc) in signums:
with wait_and_signal(1, signum):
self.mock_stderr(True)
self.mock_stdout(True)
self.assertErrorRegex(exc, '.*', self.test_toy_build, ec_file=test_ec, verify=False,
raise_error=True, testing=False, raise_systemexit=True)

stderr = self.get_stderr().strip()
self.mock_stderr(False)
self.mock_stdout(False)

pattern = r"^WARNING: signal received \(%s\), " % int(signum)
pattern += r"cleaning up locks \(.*software_toy_0.0\)\.\.\."
regex = re.compile(pattern)
self.assertTrue(regex.search(stderr), "Pattern '%s' found in: %s" % (regex.pattern, stderr))

def test_toy_build_unicode_description(self):
"""Test installation of easyconfig file that has non-ASCII characters in description."""
# cfr. https://github.com/easybuilders/easybuild-framework/issues/3284
Expand Down