Skip to content
2 changes: 2 additions & 0 deletions easybuild/tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@
DEFAULT_CONT_TYPE = CONT_TYPE_SINGULARITY

DEFAULT_BRANCH = 'develop'
DEFAULT_DOWNLOAD_INITIAL_WAIT_TIME = 10
DEFAULT_DOWNLOAD_MAX_ATTEMPTS = 6
DEFAULT_DOWNLOAD_TIMEOUT = 10
DEFAULT_ENV_FOR_SHEBANG = '/usr/bin/env'
DEFAULT_ENVVAR_USERS_MODULES = 'HOME'
Expand Down
30 changes: 27 additions & 3 deletions easybuild/tools/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
# import build_log must stay, to use of EasyBuildLog
from easybuild.tools.build_log import EasyBuildError, EasyBuildExit, CWD_NOTFOUND_ERROR
from easybuild.tools.build_log import dry_run_msg, print_msg, print_warning
from easybuild.tools.config import DEFAULT_DOWNLOAD_INITIAL_WAIT_TIME, DEFAULT_DOWNLOAD_MAX_ATTEMPTS
from easybuild.tools.config import ERROR, GENERIC_EASYBLOCK_PKG, IGNORE, WARN, build_option, install_path
from easybuild.tools.output import PROGRESS_BAR_DOWNLOAD_ONE, start_progress_bar, stop_progress_bar, update_progress_bar
from easybuild.tools.hooks import load_source
Expand Down Expand Up @@ -777,8 +778,23 @@ def det_file_size(http_header):
return res


def download_file(filename, url, path, forced=False, trace=True):
"""Download a file from the given URL, to the specified path."""
def download_file(filename, url, path, forced=False, trace=True, max_attempts=None, initial_wait_time=None):
"""
Download a file from the given URL, to the specified path.

:param filename: name of file to download
:param url: URL of file to download
:param path: path to download file to
:param forced: boolean to indicate whether force should be used to write the file
:param trace: boolean to indicate whether trace output should be printed
:param max_attempts: max. number of attempts to download file from specified URL
:param initial_wait_time: wait time (in seconds) after first attempt (doubled at each attempt)
"""

if max_attempts is None:
max_attempts = DEFAULT_DOWNLOAD_MAX_ATTEMPTS
if initial_wait_time is None:
initial_wait_time = DEFAULT_DOWNLOAD_INITIAL_WAIT_TIME

insecure = build_option('insecure_download')

Expand All @@ -802,7 +818,6 @@ def download_file(filename, url, path, forced=False, trace=True):

# try downloading, three times max.
downloaded = False
max_attempts = 3
attempt_cnt = 0

# use custom HTTP header
Expand All @@ -823,6 +838,8 @@ def download_file(filename, url, path, forced=False, trace=True):
used_urllib = std_urllib
switch_to_requests = False

wait_time = initial_wait_time

while not downloaded and attempt_cnt < max_attempts:
attempt_cnt += 1
try:
Expand Down Expand Up @@ -861,6 +878,8 @@ def download_file(filename, url, path, forced=False, trace=True):
status_code = err.code
if status_code == 403 and attempt_cnt == 1:
switch_to_requests = True
elif status_code == 429: # too many requests
_log.warning(f"Downloading of {url} failed with HTTP status code 429 (Too many requests)")
elif 400 <= status_code <= 499:
_log.warning("URL %s was not found (HTTP response code %s), not trying again" % (url, status_code))
break
Expand All @@ -887,6 +906,11 @@ def download_file(filename, url, path, forced=False, trace=True):
_log.info("Downloading using requests package instead of urllib2")
used_urllib = requests

# exponential backoff
wait_time *= 2
_log.info(f"Waiting for {wait_time} seconds before trying download of {url} again...")
time.sleep(wait_time)

if downloaded:
_log.info("Successful download of file %s from url %s to path %s" % (filename, url, path))
if trace:
Expand Down
27 changes: 19 additions & 8 deletions easybuild/tools/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
from easybuild.framework.easyconfig.parser import EasyConfigParser
from easybuild.tools import LooseVersion
from easybuild.tools.build_log import EasyBuildError, EasyBuildExit, print_msg, print_warning
from easybuild.tools.config import build_option
from easybuild.tools.config import DEFAULT_DOWNLOAD_MAX_ATTEMPTS, build_option
from easybuild.tools.filetools import apply_patch, copy_dir, copy_easyblocks, copy_file, copy_framework_files
from easybuild.tools.filetools import det_patched_files, download_file, extract_file
from easybuild.tools.filetools import get_easyblock_class_name, mkdir, read_file, symlink, which, write_file
Expand Down Expand Up @@ -546,9 +546,16 @@ def fetch_files_from_pr(pr, path=None, github_user=None, github_account=None, gi
github_user=github_user)

# determine list of changed files via diff
diff_fn = os.path.basename(pr_data['diff_url'])
diff_url = pr_data['diff_url']
diff_fn = os.path.basename(diff_url)
diff_filepath = os.path.join(path, diff_fn)
download_file(diff_fn, pr_data['diff_url'], diff_filepath, forced=True, trace=False)
# max. 6 attempts + initial wait time of 10sec -> max. 10 * (2^6) = 640sec (~10min) before giving up on download
# see also https://github.com/easybuilders/easybuild-framework/issues/4869
max_attempts = DEFAULT_DOWNLOAD_MAX_ATTEMPTS
download_file(diff_fn, diff_url, diff_filepath, forced=True, trace=False,
max_attempts=DEFAULT_DOWNLOAD_MAX_ATTEMPTS)
if not os.path.exists(diff_filepath):
raise EasyBuildError(f"Failed to download {diff_url}, even after {max_attempts} attempts and being patient...")
diff_txt = read_file(diff_filepath)
_log.debug("Diff for PR #%s:\n%s", pr, diff_txt)

Expand Down Expand Up @@ -700,17 +707,21 @@ def fetch_files_from_commit(commit, files=None, path=None, github_account=None,
diff_url = os.path.join(GITHUB_URL, github_account, github_repo, 'commit', commit + '.diff')
diff_fn = os.path.basename(diff_url)
diff_filepath = os.path.join(path, diff_fn)
if download_file(diff_fn, diff_url, diff_filepath, forced=True, trace=False):
# max. 6 attempts + initial wait time of 10sec -> max. 10 * (2^6) = 640sec (~10min) before giving up on download
# see also https://github.com/easybuilders/easybuild-framework/issues/4869
max_attempts = DEFAULT_DOWNLOAD_MAX_ATTEMPTS
download_file(diff_fn, diff_url, diff_filepath, forced=True, trace=False,
max_attempts=DEFAULT_DOWNLOAD_MAX_ATTEMPTS)
if os.path.exists(diff_filepath):
diff_txt = read_file(diff_filepath)
_log.debug("Diff for commit %s:\n%s", commit, diff_txt)

files = det_patched_files(txt=diff_txt, omit_ab_prefix=True, github=True, filter_deleted=True)
_log.debug("List of patched files for commit %s: %s", commit, files)
else:
raise EasyBuildError(
"Failed to download diff for commit %s of %s/%s", commit, github_account, github_repo,
exit_code=EasyBuildExit.FAIL_GITHUB
)
msg = f"Failed to download diff for commit {commit} of {github_account}/{github_repo} "
msg += " (after {max_attempts} attempts)"
raise EasyBuildError(msg, exit_code=EasyBuildExit.FAIL_GITHUB)

# download tarball for specific commit
repo_commit = download_repo(repo=github_repo, commit=commit, account=github_account)
Expand Down
15 changes: 15 additions & 0 deletions test/framework/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,21 @@ def test_download_file(self):
downloads = glob.glob(target_location + '*')
self.assertEqual(len(downloads), 1)

ft.remove_file(target_location)

# with max attempts set to 0, nothing gets downloaded
with self.mocked_stdout_stderr():
res = ft.download_file(fn, source_url, target_location, max_attempts=0)
self.assertEqual(res, None)
downloads = glob.glob(target_location + '*')
self.assertEqual(len(downloads), 0)

with self.mocked_stdout_stderr():
res = ft.download_file(fn, source_url, target_location, max_attempts=3, initial_wait_time=5)
self.assertEqual(res, target_location, "'download' of local file works")
downloads = glob.glob(target_location + '*')
self.assertEqual(len(downloads), 1)

# non-existing files result in None return value
with self.mocked_stdout_stderr():
self.assertEqual(ft.download_file(fn, 'file://%s/nosuchfile' % test_dir, target_location), None)
Expand Down
Loading