From 8f746b4175809f2d617ac029d224d1bd11c386e6 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 26 May 2021 11:27:41 +0200 Subject: [PATCH 1/3] Support systems with more than 1024 cores Dynamically determine the size of the cpu_set_t struct doubling it on each try --- easybuild/tools/systemtools.py | 38 ++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/easybuild/tools/systemtools.py b/easybuild/tools/systemtools.py index dbd7415f8a..866450f2dd 100644 --- a/easybuild/tools/systemtools.py +++ b/easybuild/tools/systemtools.py @@ -29,6 +29,7 @@ @auther: Ward Poelmans (Ghent University) """ import ctypes +import errno import fcntl import grp # @UnresolvedImport import os @@ -160,24 +161,35 @@ class SystemToolsException(Exception): def sched_getaffinity(): """Determine list of available cores for current process.""" cpu_mask_t = ctypes.c_ulong - cpu_setsize = 1024 n_cpu_bits = 8 * ctypes.sizeof(cpu_mask_t) - n_mask_bits = cpu_setsize // n_cpu_bits - - class cpu_set_t(ctypes.Structure): - """Class that implements the cpu_set_t struct.""" - _fields_ = [('bits', cpu_mask_t * n_mask_bits)] _libc_lib = find_library('c') - _libc = ctypes.cdll.LoadLibrary(_libc_lib) + _libc = ctypes.CDLL(_libc_lib, use_errno=True) pid = os.getpid() - cs = cpu_set_t() - ec = _libc.sched_getaffinity(os.getpid(), ctypes.sizeof(cpu_set_t), ctypes.pointer(cs)) - if ec == 0: - _log.debug("sched_getaffinity for pid %s successful", pid) - else: - raise EasyBuildError("sched_getaffinity failed for pid %s ec %s", pid, ec) + + cpu_setsize = 1024 # Max number of CPUs currently detectable + max_cpu_setsize = ctypes.c_ulong(-1).value // 4 # (INT_MAX / 2) + # Limit it to something reasonable but still big enough + max_cpu_setsize = min(max_cpu_setsize, 1e9) + while cpu_setsize < max_cpu_setsize: + n_mask_bits = cpu_setsize // n_cpu_bits + + class cpu_set_t(ctypes.Structure): + """Class that implements the cpu_set_t struct.""" + _fields_ = [('bits', cpu_mask_t * n_mask_bits)] + + cs = cpu_set_t() + ec = _libc.sched_getaffinity(pid, ctypes.sizeof(cpu_set_t), ctypes.pointer(cs)) + if ec == 0: + _log.debug("sched_getaffinity for pid %s successful", pid) + break + elif ctypes.get_errno() != errno.EINVAL: + raise EasyBuildError("sched_getaffinity failed for pid %s errno %s", pid, ctypes.get_errno()) + cpu_setsize *= 2 + + if ec != 0: + raise EasyBuildError("sched_getaffinity failed finding a large enough cpuset for pid %s", pid) cpus = [] for bitmask in cs.bits: From 7153985228893f663bdf0fffda250aaebd6d6798 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Wed, 26 May 2021 20:38:51 +0200 Subject: [PATCH 2/3] avoid TypeError when comparing None with 0.0 in test_cpu_speed_native --- test/framework/systemtools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/framework/systemtools.py b/test/framework/systemtools.py index 68fae7ea5a..78855b3ef7 100644 --- a/test/framework/systemtools.py +++ b/test/framework/systemtools.py @@ -454,7 +454,7 @@ def test_cpu_speed_native(self): """Test getting CPU speed.""" cpu_speed = get_cpu_speed() self.assertTrue(isinstance(cpu_speed, float) or cpu_speed is None) - self.assertTrue(cpu_speed > 0.0 or cpu_speed is None) + self.assertTrue(cpu_speed is None or cpu_speed > 0.0) def test_cpu_speed_linux(self): """Test getting CPU speed (mocked for Linux).""" From 3c1279e971ccf81d67e21e93e3996c2a09558455 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Wed, 26 May 2021 20:52:03 +0200 Subject: [PATCH 3/3] consistently use cpu_mask_t in sched_getaffinity --- easybuild/tools/systemtools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/tools/systemtools.py b/easybuild/tools/systemtools.py index 866450f2dd..26f66623cd 100644 --- a/easybuild/tools/systemtools.py +++ b/easybuild/tools/systemtools.py @@ -169,7 +169,7 @@ def sched_getaffinity(): pid = os.getpid() cpu_setsize = 1024 # Max number of CPUs currently detectable - max_cpu_setsize = ctypes.c_ulong(-1).value // 4 # (INT_MAX / 2) + max_cpu_setsize = cpu_mask_t(-1).value // 4 # (INT_MAX / 2) # Limit it to something reasonable but still big enough max_cpu_setsize = min(max_cpu_setsize, 1e9) while cpu_setsize < max_cpu_setsize: