From 4955d8becd470d8cf0a979a68efff57708657069 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Thu, 30 Apr 2020 14:16:47 +0200 Subject: [PATCH] flesh out get_mpi_cmd_template function from Mpi.mpi_cmd_for method, so it can be leveraged in easyblocks --- easybuild/tools/toolchain/mpi.py | 167 ++++++++++++++++++------------- test/framework/toolchain.py | 36 +++++++ 2 files changed, 133 insertions(+), 70 deletions(-) diff --git a/easybuild/tools/toolchain/mpi.py b/easybuild/tools/toolchain/mpi.py index 052c3e061d..93f833b07a 100644 --- a/easybuild/tools/toolchain/mpi.py +++ b/easybuild/tools/toolchain/mpi.py @@ -28,10 +28,12 @@ :author: Stijn De Weirdt (Ghent University) :author: Kenneth Hoste (Ghent University) """ +import copy import os import tempfile from distutils.version import LooseVersion +from easybuild.base import fancylogger import easybuild.tools.environment as env import easybuild.tools.toolchain as toolchain from easybuild.tools.build_log import EasyBuildError @@ -41,6 +43,95 @@ from easybuild.tools.toolchain.toolchain import Toolchain +_log = fancylogger.getLogger('tools.toolchain.mpi', fname=False) + + +def get_mpi_cmd_template(mpi_family, params, mpi_version=None): + """ + Return template for MPI command, for specified MPI family. + + :param mpi_family: MPI family to use to determine MPI command template + """ + + params = copy.deepcopy(params) + + mpi_cmd_template = build_option('mpi_cmd_template') + if mpi_cmd_template: + _log.info("Using specified template for MPI commands: %s", mpi_cmd_template) + else: + # different known mpirun commands + mpirun_n_cmd = "mpirun -n %(nr_ranks)s %(cmd)s" + mpi_cmds = { + toolchain.OPENMPI: mpirun_n_cmd, + toolchain.QLOGICMPI: "mpirun -H localhost -np %(nr_ranks)s %(cmd)s", + toolchain.INTELMPI: mpirun_n_cmd, + toolchain.MVAPICH2: mpirun_n_cmd, + toolchain.MPICH: mpirun_n_cmd, + toolchain.MPICH2: mpirun_n_cmd, + } + + # Intel MPI mpirun needs more work + if mpi_cmd_template is None: + + if mpi_family == toolchain.INTELMPI: + + if mpi_version is None: + raise EasyBuildError("Intel MPI version unknown, can't determine MPI command template!") + + # for old versions of Intel MPI, we need to use MPD + if LooseVersion(mpi_version) <= LooseVersion('4.1'): + + mpi_cmds[toolchain.INTELMPI] = "mpirun %(mpdbf)s %(nodesfile)s -np %(nr_ranks)s %(cmd)s" + + # set temporary dir for MPD + # note: this needs to be kept *short*, + # to avoid mpirun failing with "socket.error: AF_UNIX path too long" + # exact limit is unknown, but ~20 characters seems to be OK + env.setvar('I_MPI_MPD_TMPDIR', tempfile.gettempdir()) + mpd_tmpdir = os.environ['I_MPI_MPD_TMPDIR'] + if len(mpd_tmpdir) > 20: + _log.warning("$I_MPI_MPD_TMPDIR should be (very) short to avoid problems: %s", mpd_tmpdir) + + # temporary location for mpdboot and nodes files + tmpdir = tempfile.mkdtemp(prefix='mpi_cmd_for-') + + # set PBS_ENVIRONMENT, so that --file option for mpdboot isn't stripped away + env.setvar('PBS_ENVIRONMENT', "PBS_BATCH_MPI") + + # make sure we're always using mpd as process manager + # only required for/picked up by Intel MPI v4.1 or higher, no harm done for others + env.setvar('I_MPI_PROCESS_MANAGER', 'mpd') + + # create mpdboot file + mpdboot = os.path.join(tmpdir, 'mpdboot') + write_file(mpdboot, "localhost ifhn=localhost") + + params.update({'mpdbf': "--file=%s" % mpdboot}) + + # create nodes file + nodes = os.path.join(tmpdir, 'nodes') + write_file(nodes, "localhost\n" * int(params['nr_ranks'])) + + params.update({'nodesfile': "-machinefile %s" % nodes}) + + if mpi_family in mpi_cmds: + mpi_cmd_template = mpi_cmds[mpi_family] + _log.info("Using template MPI command '%s' for MPI family '%s'", mpi_cmd_template, mpi_family) + else: + raise EasyBuildError("Don't know which template MPI command to use for MPI family '%s'", mpi_family) + + missing = [] + for key in sorted(params.keys()): + tmpl = '%(' + key + ')s' + if tmpl not in mpi_cmd_template: + missing.append(tmpl) + if missing: + raise EasyBuildError("Missing templates in mpi-cmd-template value '%s': %s", + mpi_cmd_template, ', '.join(missing)) + + return mpi_cmd_template, params + + class Mpi(Toolchain): """General MPI-like class can't be used without creating new class M(Mpi) @@ -191,79 +282,15 @@ def mpi_cmd_for(self, cmd, nr_ranks): 'cmd': cmd, } - mpi_cmd_template = build_option('mpi_cmd_template') - if mpi_cmd_template: - self.log.info("Using specified template for MPI commands: %s", mpi_cmd_template) - else: - # different known mpirun commands - mpirun_n_cmd = "mpirun -n %(nr_ranks)s %(cmd)s" - mpi_cmds = { - toolchain.OPENMPI: mpirun_n_cmd, - toolchain.QLOGICMPI: "mpirun -H localhost -np %(nr_ranks)s %(cmd)s", - toolchain.INTELMPI: mpirun_n_cmd, - toolchain.MVAPICH2: mpirun_n_cmd, - toolchain.MPICH: mpirun_n_cmd, - toolchain.MPICH2: mpirun_n_cmd, - } - mpi_family = self.mpi_family() - # Intel MPI mpirun needs more work - if mpi_cmd_template is None: - - if mpi_family == toolchain.INTELMPI: - - # for old versions of Intel MPI, we need to use MPD - impi_ver = self.get_software_version(self.MPI_MODULE_NAME)[0] - if LooseVersion(impi_ver) <= LooseVersion('4.1'): - - mpi_cmds[toolchain.INTELMPI] = "mpirun %(mpdbf)s %(nodesfile)s -np %(nr_ranks)s %(cmd)s" - - # set temporary dir for MPD - # note: this needs to be kept *short*, - # to avoid mpirun failing with "socket.error: AF_UNIX path too long" - # exact limit is unknown, but ~20 characters seems to be OK - env.setvar('I_MPI_MPD_TMPDIR', tempfile.gettempdir()) - mpd_tmpdir = os.environ['I_MPI_MPD_TMPDIR'] - if len(mpd_tmpdir) > 20: - self.log.warning("$I_MPI_MPD_TMPDIR should be (very) short to avoid problems: %s", mpd_tmpdir) - - # temporary location for mpdboot and nodes files - tmpdir = tempfile.mkdtemp(prefix='mpi_cmd_for-') - - # set PBS_ENVIRONMENT, so that --file option for mpdboot isn't stripped away - env.setvar('PBS_ENVIRONMENT', "PBS_BATCH_MPI") - - # make sure we're always using mpd as process manager - # only required for/picked up by Intel MPI v4.1 or higher, no harm done for others - env.setvar('I_MPI_PROCESS_MANAGER', 'mpd') - - # create mpdboot file - mpdboot = os.path.join(tmpdir, 'mpdboot') - write_file(mpdboot, "localhost ifhn=localhost") - - params.update({'mpdbf': "--file=%s" % mpdboot}) - - # create nodes file - nodes = os.path.join(tmpdir, 'nodes') - write_file(nodes, "localhost\n" * int(nr_ranks)) - - params.update({'nodesfile': "-machinefile %s" % nodes}) - - if mpi_family in mpi_cmds.keys(): - mpi_cmd_template = mpi_cmds[mpi_family] - self.log.info("Using template MPI command '%s' for MPI family '%s'", mpi_cmd_template, mpi_family) - else: - raise EasyBuildError("Don't know which template MPI command to use for MPI family '%s'", mpi_family) + if mpi_family == toolchain.INTELMPI: + mpi_version = self.get_software_version(self.MPI_MODULE_NAME)[0] + else: + mpi_version = None - missing = [] - for key in sorted(params.keys()): - tmpl = '%(' + key + ')s' - if tmpl not in mpi_cmd_template: - missing.append(tmpl) - if missing: - raise EasyBuildError("Missing templates in mpi-cmd-template value '%s': %s", - mpi_cmd_template, ', '.join(missing)) + mpi_cmd_template, params = get_mpi_cmd_template(mpi_family, params, mpi_version=mpi_version) + self.log.info("Using MPI command template '%s' (params: %s)", mpi_cmd_template, params) try: res = mpi_cmd_template % params diff --git a/test/framework/toolchain.py b/test/framework/toolchain.py index 2b0fc84634..4ed54ead66 100644 --- a/test/framework/toolchain.py +++ b/test/framework/toolchain.py @@ -40,6 +40,7 @@ from test.framework.utilities import EnhancedTestCase, TestLoaderFiltered, find_full_path, init_config import easybuild.tools.modules as modules +import easybuild.tools.toolchain as toolchain import easybuild.tools.toolchain.compiler from easybuild.framework.easyconfig.easyconfig import EasyConfig, ActiveMNS from easybuild.toolchains.system import SystemToolchain @@ -49,6 +50,7 @@ from easybuild.tools.filetools import adjust_permissions, copy_dir, find_eb_script, mkdir, read_file, write_file, which from easybuild.tools.py2vs3 import string_type from easybuild.tools.run import run_cmd +from easybuild.tools.toolchain.mpi import get_mpi_cmd_template from easybuild.tools.toolchain.toolchain import env_vars_external_module from easybuild.tools.toolchain.utilities import get_toolchain, search_toolchain @@ -1027,6 +1029,40 @@ def test_mpi_cmd_for(self): error_pattern = "Failed to complete MPI cmd template .* with .*: KeyError 'foo'" self.assertErrorRegex(EasyBuildError, error_pattern, tc.mpi_cmd_for, 'test', 1) + def test_get_mpi_cmd_template(self): + """Test get_mpi_cmd_template function.""" + + # search_toolchain needs to be called once to make sure constants like toolchain.OPENMPI are in place + search_toolchain('') + + input_params = {'nr_ranks': 123, 'cmd': 'this_is_just_a_test'} + + for mpi_fam in [toolchain.OPENMPI, toolchain.MPICH, toolchain.MPICH2, toolchain.MVAPICH2]: + mpi_cmd_tmpl, params = get_mpi_cmd_template(mpi_fam, input_params) + self.assertEqual(mpi_cmd_tmpl, "mpirun -n %(nr_ranks)s %(cmd)s") + self.assertEqual(params, input_params) + + # Intel MPI is a special case, also requires MPI version to be known + impi = toolchain.INTELMPI + error_pattern = "Intel MPI version unknown, can't determine MPI command template!" + self.assertErrorRegex(EasyBuildError, error_pattern, get_mpi_cmd_template, impi, {}) + + mpi_cmd_tmpl, params = get_mpi_cmd_template(toolchain.INTELMPI, input_params, mpi_version='1.0') + self.assertEqual(mpi_cmd_tmpl, "mpirun %(mpdbf)s %(nodesfile)s -np %(nr_ranks)s %(cmd)s") + self.assertEqual(sorted(params.keys()), ['cmd', 'mpdbf', 'nodesfile', 'nr_ranks']) + self.assertEqual(params['cmd'], 'this_is_just_a_test') + self.assertEqual(params['nr_ranks'], 123) + + mpdbf = params['mpdbf'] + regex = re.compile('^--file=.*/mpdboot$') + self.assertTrue(regex.match(mpdbf), "'%s' should match pattern '%s'" % (mpdbf, regex.pattern)) + self.assertTrue(os.path.exists(mpdbf.split('=')[1])) + + nodesfile = params['nodesfile'] + regex = re.compile('^-machinefile /.*/nodes$') + self.assertTrue(regex.match(nodesfile), "'%s' should match pattern '%s'" % (nodesfile, regex.pattern)) + self.assertTrue(os.path.exists(nodesfile.split(' ')[1])) + def test_prepare_deps(self): """Test preparing for a toolchain when dependencies are involved.""" tc = self.get_toolchain('GCC', version='6.4.0-2.28')