diff --git a/easybuild/framework/easyblock.py b/easybuild/framework/easyblock.py index 4d9b4fbdd7..eb65894260 100644 --- a/easybuild/framework/easyblock.py +++ b/easybuild/framework/easyblock.py @@ -82,7 +82,8 @@ from easybuild.tools.config import MOD_SEARCH_PATH_HEADERS, PYTHONPATH, SEARCH_PATH_BIN_DIRS, SEARCH_PATH_LIB_DIRS from easybuild.tools.config import build_option, build_path, get_failed_install_build_dirs_path from easybuild.tools.config import get_failed_install_logs_path, get_log_filename, get_repository, get_repositorypath -from easybuild.tools.config import install_path, log_path, package_path, source_paths +from easybuild.tools.config import install_path, log_path, package_path, source_paths, source_paths_data +from easybuild.tools.config import DATA, SOFTWARE from easybuild.tools.environment import restore_env, sanitize_env from easybuild.tools.filetools import CHECKSUM_TYPE_SHA256 from easybuild.tools.filetools import adjust_permissions, apply_patch, back_up_file, change_dir, check_lock @@ -167,12 +168,13 @@ def __init__(self, ec, logfile=None): # list of patch/source files, along with checksums self.patches = [] self.src = [] + self.data_src = [] self.checksums = [] self.json_checksums = None # build/install directories self.builddir = None - self.installdir = None # software + self.installdir = None # software or data self.installdir_mod = None # module file # extensions @@ -519,11 +521,11 @@ def fetch_sources(self, sources=None, checksums=None): Add a list of source files (can be tarballs, isos, urls). All source files will be checked if a file exists (or can be located) - :param sources: list of sources to fetch (if None, use 'sources' easyconfig parameter) + :param sources: list of sources to fetch (if None, use 'sources' or 'data_sources' easyconfig parameter) :param checksums: list of checksums for sources """ if sources is None: - sources = self.cfg['sources'] + sources = self.cfg['sources'] or self.cfg['data_sources'] if checksums is None: checksums = self.cfg['checksums'] @@ -801,7 +803,10 @@ def obtain_file(self, filename, extension=False, urls=None, download_filename=No :param download_instructions: instructions to manually add source (used for complex cases) :param alt_location: alternative location to use instead of self.name """ - srcpaths = source_paths() + if self.cfg['data_sources']: + srcpaths = source_paths_data() + else: + srcpaths = source_paths() # We don't account for the checksums file in the progress bar if filename != 'checksum.json': @@ -1166,7 +1171,10 @@ def gen_installdir(self): """ Generate the name of the installation directory. """ - basepath = install_path() + if self.cfg['data_sources']: + basepath = install_path(DATA) + else: + basepath = install_path(SOFTWARE) if basepath: self.install_subdir = ActiveMNS().det_install_subdir(self.cfg) self.installdir = os.path.join(os.path.abspath(basepath), self.install_subdir) @@ -2591,8 +2599,10 @@ def fetch_step(self, skip_checksums=False): # fetch sources if self.cfg['sources']: self.fetch_sources(self.cfg['sources'], checksums=self.cfg['checksums']) + elif self.cfg['data_sources']: + self.fetch_sources(self.cfg['data_sources'], checksums=self.cfg['checksums']) else: - self.log.info('no sources provided') + self.log.info('no sources or data_sources provided') if self.dry_run: # actual list of patches is printed via _obtain_file_dry_run method @@ -5082,8 +5092,8 @@ def make_checksum_lines(checksums, indent_level): if app.src: placeholder = '# PLACEHOLDER FOR SOURCES/PATCHES WITH CHECKSUMS' - # grab raw lines for source_urls, sources, patches - keys = ['patches', 'source_urls', 'sources'] + # grab raw lines for source_urls, sources, data_sources, patches + keys = ['data_sources', 'patches', 'source_urls', 'sources'] raw = {} for key in keys: regex = re.compile(r'^(%s(?:.|\n)*?\])\s*$' % key, re.M) @@ -5097,10 +5107,12 @@ def make_checksum_lines(checksums, indent_level): # inject combination of source_urls/sources/patches/checksums into easyconfig # by replacing first occurence of placeholder that was put in place sources_raw = raw.get('sources', '') + data_sources_raw = raw.get('data_sources', '') source_urls_raw = raw.get('source_urls', '') patches_raw = raw.get('patches', '') regex = re.compile(placeholder + '\n', re.M) - ectxt = regex.sub(source_urls_raw + sources_raw + patches_raw + checksums_txt + '\n', ectxt, count=1) + ectxt = regex.sub(source_urls_raw + sources_raw + data_sources_raw + patches_raw + checksums_txt + '\n', + ectxt, count=1) # get rid of potential remaining placeholders ectxt = regex.sub('', ectxt) diff --git a/easybuild/framework/easyconfig/default.py b/easybuild/framework/easyconfig/default.py index 66ed417b61..a2048ed15e 100644 --- a/easybuild/framework/easyconfig/default.py +++ b/easybuild/framework/easyconfig/default.py @@ -92,6 +92,7 @@ 'checksums': [[], "Checksums for sources and patches", BUILD], 'configopts': ['', 'Extra options passed to configure (default already has --prefix)', BUILD], 'cuda_compute_capabilities': [[], "List of CUDA compute capabilities to build with (if supported)", BUILD], + 'data_sources': [[], "List of source files for data", BUILD], 'download_instructions': ['', "Specify steps to acquire necessary file, if obtaining it is difficult", BUILD], 'easyblock': [None, "EasyBlock to use for building; if set to None, an easyblock is selected " "based on the software name", BUILD], @@ -132,7 +133,7 @@ 'skip_mod_files_sanity_check': [False, "Skip the check for .mod files in a GCCcore level install", BUILD], 'skipsteps': [[], "Skip these steps", BUILD], 'source_urls': [[], "List of URLs for source files", BUILD], - 'sources': [[], "List of source files", BUILD], + 'sources': [[], "List of source files for software", BUILD], 'stop': [None, 'Keyword to halt the build process after a certain step.', BUILD], 'testopts': ['', 'Extra options for test.', BUILD], 'tests': [[], ("List of test-scripts to run after install. A test script should return a " diff --git a/easybuild/framework/easyconfig/format/format.py b/easybuild/framework/easyconfig/format/format.py index eec432967e..19eb5c2b22 100644 --- a/easybuild/framework/easyconfig/format/format.py +++ b/easybuild/framework/easyconfig/format/format.py @@ -62,7 +62,7 @@ ['name', 'version', 'versionprefix', 'versionsuffix'], ['homepage', 'description'], ['toolchain', 'toolchainopts'], - ['source_urls', 'sources', 'patches', 'checksums'], + ['source_urls', 'sources', 'data_sources', 'patches', 'checksums'], DEPENDENCY_PARAMETERS + ['multi_deps'], ['osdependencies'], ['preconfigopts', 'configopts'], diff --git a/easybuild/tools/config.py b/easybuild/tools/config.py index 664080d8f2..fa13c7878a 100644 --- a/easybuild/tools/config.py +++ b/easybuild/tools/config.py @@ -71,6 +71,10 @@ EMPTY_LIST = 'empty_list' +DATA = 'data' +MODULES = 'modules' +SOFTWARE = 'software' + PKG_TOOL_FPM = 'fpm' PKG_TYPE_RPM = 'rpm' @@ -112,8 +116,10 @@ 'packagepath': 'packages', 'repositorypath': 'ebfiles_repo', 'sourcepath': 'sources', - 'subdir_modules': 'modules', - 'subdir_software': 'software', + 'sourcepath_data': 'sources', + 'subdir_data': DATA, + 'subdir_modules': MODULES, + 'subdir_software': SOFTWARE, } DEFAULT_PKG_RELEASE = '1' DEFAULT_PKG_TOOL = PKG_TOOL_FPM @@ -478,6 +484,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX): ('chem', "Chemistry, Computational Chemistry and Quantum Chemistry"), ('compiler', "Compilers"), ('data', "Data management & processing tools"), + ('dataset', "Datasets"), ('debugger', "Debuggers"), ('devel', "Development tools"), ('geo', "Earth Sciences"), @@ -512,6 +519,7 @@ class ConfigurationVariables(BaseConfigurationVariables): 'failed_install_build_dirs_path', 'failed_install_logs_path', 'installpath', + 'installpath_data', 'installpath_modules', 'installpath_software', 'job_backend', @@ -526,6 +534,8 @@ class ConfigurationVariables(BaseConfigurationVariables): 'repository', 'repositorypath', 'sourcepath', + 'sourcepath_data', + 'subdir_data', 'subdir_modules', 'subdir_software', 'tmp_logdir', @@ -569,16 +579,20 @@ def init(options, config_options_dict): """ tmpdict = copy.deepcopy(config_options_dict) - # make sure source path is a list - sourcepath = tmpdict['sourcepath'] - if isinstance(sourcepath, str): - tmpdict['sourcepath'] = sourcepath.split(':') - _log.debug("Converted source path ('%s') to a list of paths: %s" % (sourcepath, tmpdict['sourcepath'])) - elif not isinstance(sourcepath, (tuple, list)): - raise EasyBuildError( - "Value for sourcepath has invalid type (%s): %s", type(sourcepath), sourcepath, - exit_code=EasyBuildExit.OPTION_ERROR - ) + if tmpdict['sourcepath_data'] is None: + tmpdict['sourcepath_data'] = tmpdict['sourcepath'][:] + + for srcpath in ['sourcepath', 'sourcepath_data']: + # make sure source path is a list + sourcepath = tmpdict[srcpath] + if isinstance(sourcepath, str): + tmpdict[srcpath] = sourcepath.split(':') + _log.debug("Converted source path ('%s') to a list of paths: %s" % (sourcepath, tmpdict[srcpath])) + elif not isinstance(sourcepath, (tuple, list)): + raise EasyBuildError( + "Value for %s has invalid type (%s): %s", srcpath, type(sourcepath), sourcepath, + exit_code=EasyBuildExit.OPTION_ERROR + ) # initialize configuration variables (any future calls to ConfigurationVariables() will yield the same instance variables = ConfigurationVariables(tmpdict, ignore_unknown_keys=True) @@ -704,11 +718,18 @@ def build_path(): def source_paths(): """ - Return the list of source paths + Return the list of source paths for software """ return ConfigurationVariables()['sourcepath'] +def source_paths_data(): + """ + Return the list of source paths for data + """ + return ConfigurationVariables()['sourcepath_data'] + + def source_path(): """NO LONGER SUPPORTED: use source_paths instead""" _log.nosupport("source_path() is replaced by source_paths()", '2.0') @@ -717,15 +738,16 @@ def source_path(): def install_path(typ=None): """ Returns the install path - - subdir 'software' for actual installation (default) + - subdir 'software' for actual software installation (default) - subdir 'modules' for environment modules (typ='mod') + - subdir 'data' for data installation (typ='data') """ if typ is None: - typ = 'software' + typ = SOFTWARE elif typ == 'mod': - typ = 'modules' + typ = MODULES - known_types = ['modules', 'software'] + known_types = [MODULES, SOFTWARE, DATA] if typ not in known_types: raise EasyBuildError( "Unknown type specified in install_path(): %s (known: %s)", typ, ', '.join(known_types), diff --git a/easybuild/tools/options.py b/easybuild/tools/options.py index 4068aa1ad6..6041278574 100644 --- a/easybuild/tools/options.py +++ b/easybuild/tools/options.py @@ -611,6 +611,8 @@ def config_options(self): 'strlist', 'store', []), 'installpath': ("Install path for software and modules", None, 'store', mk_full_default_path('installpath')), + 'installpath-data': ("Install path for data (if None, combine --installpath and --subdir-data)", + None, 'store', None), 'installpath-modules': ("Install path for modules (if None, combine --installpath and --subdir-modules)", None, 'store', None), 'installpath-software': ("Install path for software (if None, combine --installpath and --subdir-software)", @@ -644,7 +646,7 @@ def config_options(self): None, 'store', mk_full_default_path('packagepath')), 'package-naming-scheme': ("Packaging naming scheme choice", 'choice', 'store', DEFAULT_PNS, sorted(avail_package_naming_schemes().keys())), - 'prefix': (("Change prefix for buildpath, installpath, sourcepath and repositorypath " + 'prefix': (("Change prefix for buildpath, installpath, sourcepath, sourcepath-data, and repositorypath " "(used prefix for defaults %s)" % DEFAULT_PREFIX), None, 'store', None), 'recursive-module-unload': ("Enable generating of modules that unload recursively.", @@ -659,8 +661,12 @@ def config_options(self): 'store', DEFAULT_SEARCH_PATH_CPP_HEADERS, [*SEARCH_PATH["cpp_headers"]]), 'search-path-linker': ("Search path used at build time by the linker for libraries", 'choice', 'store', DEFAULT_SEARCH_PATH_LINKER, [*SEARCH_PATH["linker"]]), - 'sourcepath': ("Path(s) to where sources should be downloaded (string, colon-separated)", + 'sourcepath': ("Path(s) to where software sources should be downloaded (string, colon-separated)", None, 'store', mk_full_default_path('sourcepath')), + 'sourcepath-data': ("Path(s) to where data sources should be downloaded (string, colon-separated) " + "(same as sourcepath if not specified)", None, 'store', None), + 'subdir-data': ("Installpath subdir for data", + None, 'store', DEFAULT_PATH_SUBDIRS['subdir_data']), 'subdir-modules': ("Installpath subdir for modules", None, 'store', DEFAULT_PATH_SUBDIRS['subdir_modules']), 'subdir-software': ("Installpath subdir for software", None, 'store', DEFAULT_PATH_SUBDIRS['subdir_software']), @@ -1234,7 +1240,7 @@ def _postprocess_config(self): # (see also https://github.com/easybuilders/easybuild-framework/issues/3892); path_opt_names = ['buildpath', 'containerpath', 'failed_install_build_dirs_path', 'failed_install_logs_path', 'git_working_dirs_path', 'installpath', 'installpath_modules', 'installpath_software', - 'prefix', 'packagepath', 'robot_paths', 'sourcepath'] + 'installpath_data', 'prefix', 'packagepath', 'robot_paths', 'sourcepath', 'sourcepath_data'] for opt_name in path_opt_names: self._ensure_abs_path(opt_name) @@ -1244,7 +1250,7 @@ def _postprocess_config(self): # repository has to be reinitialised to take new repositorypath in account; # in the legacy-style configuration, repository is initialised in configuration file itself; path_opts = ['buildpath', 'containerpath', 'installpath', 'packagepath', 'repository', 'repositorypath', - 'sourcepath'] + 'sourcepath', 'sourcepath_data'] for dest in path_opts: if not self.options._action_taken.get(dest, False): if dest == 'repository': diff --git a/test/framework/options.py b/test/framework/options.py index fce2d49086..10f84a0df9 100644 --- a/test/framework/options.py +++ b/test/framework/options.py @@ -5335,6 +5335,7 @@ def test_show_config(self): 'EASYBUILD_INSTALLPATH', 'EASYBUILD_ROBOT_PATHS', 'EASYBUILD_SOURCEPATH', + 'EASYBUILD_SOURCEPATH_DATA', ] for key in os.environ.keys(): if key.startswith('EASYBUILD_') and key not in retained_eb_env_vars: @@ -5368,6 +5369,7 @@ def test_show_config(self): r"robot-paths\s* \(E\) = " + os.path.join(test_dir, 'easyconfigs', 'test_ecs'), r"rpath\s* \(D\) = " + ('False' if get_os_type() == DARWIN else 'True'), r"sourcepath\s* \(E\) = " + os.path.join(test_dir, 'sandbox', 'sources'), + r"sourcepath-data\s* \(E\) = " + os.path.join(test_dir, 'sandbox', 'data_sources'), r"subdir-modules\s* \(F\) = mods", ] diff --git a/test/framework/utilities.py b/test/framework/utilities.py index 27ba444d96..70e19220d3 100644 --- a/test/framework/utilities.py +++ b/test/framework/utilities.py @@ -115,7 +115,9 @@ def setUp(self): testdir = os.path.dirname(os.path.abspath(__file__)) self.test_sourcepath = os.path.join(testdir, 'sandbox', 'sources') + self.test_sourcepath_data = os.path.join(testdir, 'sandbox', 'data_sources') os.environ['EASYBUILD_SOURCEPATH'] = self.test_sourcepath + os.environ['EASYBUILD_SOURCEPATH_DATA'] = self.test_sourcepath_data os.environ['EASYBUILD_PREFIX'] = self.test_prefix self.test_buildpath = tempfile.mkdtemp() os.environ['EASYBUILD_BUILDPATH'] = self.test_buildpath