diff --git a/easybuild/tools/config.py b/easybuild/tools/config.py index eb4af47650..440749a388 100644 --- a/easybuild/tools/config.py +++ b/easybuild/tools/config.py @@ -92,6 +92,8 @@ DEFAULT_CONT_TYPE = CONT_TYPE_SINGULARITY DEFAULT_BRANCH = 'develop' +DEFAULT_DOWNLOAD_INITIAL_WAIT_TIME = 10 +DEFAULT_DOWNLOAD_MAX_ATTEMPTS = 6 DEFAULT_DOWNLOAD_TIMEOUT = 10 DEFAULT_ENV_FOR_SHEBANG = '/usr/bin/env' DEFAULT_ENVVAR_USERS_MODULES = 'HOME' diff --git a/easybuild/tools/filetools.py b/easybuild/tools/filetools.py index 2ce00ce7aa..5dee65dcd4 100644 --- a/easybuild/tools/filetools.py +++ b/easybuild/tools/filetools.py @@ -69,6 +69,7 @@ # import build_log must stay, to use of EasyBuildLog from easybuild.tools.build_log import EasyBuildError, EasyBuildExit, CWD_NOTFOUND_ERROR from easybuild.tools.build_log import dry_run_msg, print_msg, print_warning +from easybuild.tools.config import DEFAULT_DOWNLOAD_INITIAL_WAIT_TIME, DEFAULT_DOWNLOAD_MAX_ATTEMPTS from easybuild.tools.config import ERROR, GENERIC_EASYBLOCK_PKG, IGNORE, WARN, build_option, install_path from easybuild.tools.output import PROGRESS_BAR_DOWNLOAD_ONE, start_progress_bar, stop_progress_bar, update_progress_bar from easybuild.tools.hooks import load_source @@ -777,8 +778,23 @@ def det_file_size(http_header): return res -def download_file(filename, url, path, forced=False, trace=True): - """Download a file from the given URL, to the specified path.""" +def download_file(filename, url, path, forced=False, trace=True, max_attempts=None, initial_wait_time=None): + """ + Download a file from the given URL, to the specified path. + + :param filename: name of file to download + :param url: URL of file to download + :param path: path to download file to + :param forced: boolean to indicate whether force should be used to write the file + :param trace: boolean to indicate whether trace output should be printed + :param max_attempts: max. number of attempts to download file from specified URL + :param initial_wait_time: wait time (in seconds) after first attempt (doubled at each attempt) + """ + + if max_attempts is None: + max_attempts = DEFAULT_DOWNLOAD_MAX_ATTEMPTS + if initial_wait_time is None: + initial_wait_time = DEFAULT_DOWNLOAD_INITIAL_WAIT_TIME insecure = build_option('insecure_download') @@ -802,7 +818,6 @@ def download_file(filename, url, path, forced=False, trace=True): # try downloading, three times max. downloaded = False - max_attempts = 3 attempt_cnt = 0 # use custom HTTP header @@ -823,6 +838,8 @@ def download_file(filename, url, path, forced=False, trace=True): used_urllib = std_urllib switch_to_requests = False + wait_time = initial_wait_time + while not downloaded and attempt_cnt < max_attempts: attempt_cnt += 1 try: @@ -861,6 +878,8 @@ def download_file(filename, url, path, forced=False, trace=True): status_code = err.code if status_code == 403 and attempt_cnt == 1: switch_to_requests = True + elif status_code == 429: # too many requests + _log.warning(f"Downloading of {url} failed with HTTP status code 429 (Too many requests)") elif 400 <= status_code <= 499: _log.warning("URL %s was not found (HTTP response code %s), not trying again" % (url, status_code)) break @@ -887,6 +906,11 @@ def download_file(filename, url, path, forced=False, trace=True): _log.info("Downloading using requests package instead of urllib2") used_urllib = requests + # exponential backoff + wait_time *= 2 + _log.info(f"Waiting for {wait_time} seconds before trying download of {url} again...") + time.sleep(wait_time) + if downloaded: _log.info("Successful download of file %s from url %s to path %s" % (filename, url, path)) if trace: diff --git a/easybuild/tools/github.py b/easybuild/tools/github.py index 70ae40b612..d5f6920bfd 100644 --- a/easybuild/tools/github.py +++ b/easybuild/tools/github.py @@ -55,7 +55,7 @@ from easybuild.framework.easyconfig.parser import EasyConfigParser from easybuild.tools import LooseVersion from easybuild.tools.build_log import EasyBuildError, EasyBuildExit, print_msg, print_warning -from easybuild.tools.config import build_option +from easybuild.tools.config import DEFAULT_DOWNLOAD_MAX_ATTEMPTS, build_option from easybuild.tools.filetools import apply_patch, copy_dir, copy_easyblocks, copy_file, copy_framework_files from easybuild.tools.filetools import det_patched_files, download_file, extract_file from easybuild.tools.filetools import get_easyblock_class_name, mkdir, read_file, symlink, which, write_file @@ -546,9 +546,16 @@ def fetch_files_from_pr(pr, path=None, github_user=None, github_account=None, gi github_user=github_user) # determine list of changed files via diff - diff_fn = os.path.basename(pr_data['diff_url']) + diff_url = pr_data['diff_url'] + diff_fn = os.path.basename(diff_url) diff_filepath = os.path.join(path, diff_fn) - download_file(diff_fn, pr_data['diff_url'], diff_filepath, forced=True, trace=False) + # max. 6 attempts + initial wait time of 10sec -> max. 10 * (2^6) = 640sec (~10min) before giving up on download + # see also https://github.com/easybuilders/easybuild-framework/issues/4869 + max_attempts = DEFAULT_DOWNLOAD_MAX_ATTEMPTS + download_file(diff_fn, diff_url, diff_filepath, forced=True, trace=False, + max_attempts=max_attempts) + if not os.path.exists(diff_filepath): + raise EasyBuildError(f"Failed to download {diff_url}, even after {max_attempts} attempts and being patient...") diff_txt = read_file(diff_filepath) _log.debug("Diff for PR #%s:\n%s", pr, diff_txt) @@ -700,17 +707,21 @@ def fetch_files_from_commit(commit, files=None, path=None, github_account=None, diff_url = os.path.join(GITHUB_URL, github_account, github_repo, 'commit', commit + '.diff') diff_fn = os.path.basename(diff_url) diff_filepath = os.path.join(path, diff_fn) - if download_file(diff_fn, diff_url, diff_filepath, forced=True, trace=False): + # max. 6 attempts + initial wait time of 10sec -> max. 10 * (2^6) = 640sec (~10min) before giving up on download + # see also https://github.com/easybuilders/easybuild-framework/issues/4869 + max_attempts = DEFAULT_DOWNLOAD_MAX_ATTEMPTS + download_file(diff_fn, diff_url, diff_filepath, forced=True, trace=False, + max_attempts=max_attempts) + if os.path.exists(diff_filepath): diff_txt = read_file(diff_filepath) _log.debug("Diff for commit %s:\n%s", commit, diff_txt) files = det_patched_files(txt=diff_txt, omit_ab_prefix=True, github=True, filter_deleted=True) _log.debug("List of patched files for commit %s: %s", commit, files) else: - raise EasyBuildError( - "Failed to download diff for commit %s of %s/%s", commit, github_account, github_repo, - exit_code=EasyBuildExit.FAIL_GITHUB - ) + msg = f"Failed to download diff for commit {commit} of {github_account}/{github_repo} " + msg += " (after {max_attempts} attempts)" + raise EasyBuildError(msg, exit_code=EasyBuildExit.FAIL_GITHUB) # download tarball for specific commit repo_commit = download_repo(repo=github_repo, commit=commit, account=github_account) diff --git a/test/framework/filetools.py b/test/framework/filetools.py index 373bb4c77f..525e97f86f 100644 --- a/test/framework/filetools.py +++ b/test/framework/filetools.py @@ -557,6 +557,21 @@ def test_download_file(self): downloads = glob.glob(target_location + '*') self.assertEqual(len(downloads), 1) + ft.remove_file(target_location) + + # with max attempts set to 0, nothing gets downloaded + with self.mocked_stdout_stderr(): + res = ft.download_file(fn, source_url, target_location, max_attempts=0) + self.assertEqual(res, None) + downloads = glob.glob(target_location + '*') + self.assertEqual(len(downloads), 0) + + with self.mocked_stdout_stderr(): + res = ft.download_file(fn, source_url, target_location, max_attempts=3, initial_wait_time=5) + self.assertEqual(res, target_location, "'download' of local file works") + downloads = glob.glob(target_location + '*') + self.assertEqual(len(downloads), 1) + # non-existing files result in None return value with self.mocked_stdout_stderr(): self.assertEqual(ft.download_file(fn, 'file://%s/nosuchfile' % test_dir, target_location), None)