Skip to content

Commit 95b57e9

Browse files
authored
Merge pull request #2388 from ComputeCanada/gcc_tree_vectorize
include -ftree-vectorize and -fno-math-errno in default compiler optimisation flags for GCC
2 parents 0dcfdc8 + ed7494d commit 95b57e9

8 files changed

Lines changed: 54 additions & 29 deletions

File tree

easybuild/toolchains/compiler/clang.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ class Clang(Compiler):
8585
'defaultprec': [],
8686
'loose': ['ffast-math', 'fno-unsafe-math-optimizations'],
8787
'veryloose': ['ffast-math'],
88+
'vectorize': {False: 'fno-vectorize', True: 'fvectorize'},
8889
}
8990

9091
# used when 'optarch' toolchain option is enabled (and --optarch is not specified)

easybuild/toolchains/compiler/gcc.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
import easybuild.tools.systemtools as systemtools
3636
from easybuild.tools.build_log import EasyBuildError
3737
from easybuild.tools.modules import get_software_root, get_software_version
38-
from easybuild.tools.toolchain.compiler import Compiler
38+
from easybuild.tools.toolchain.compiler import Compiler, DEFAULT_OPT_LEVEL
3939

4040

4141
TC_CONSTANT_GCC = "GCC"
@@ -61,10 +61,12 @@ class Gcc(Compiler):
6161
'lto': 'flto',
6262
'ieee': ['mieee-fp', 'fno-trapping-math'],
6363
'strict': ['mieee-fp', 'mno-recip'],
64-
'precise':['mno-recip'],
65-
'defaultprec':[],
66-
'loose': ['mrecip', 'mno-ieee-fp'],
67-
'veryloose': ['mrecip=all', 'mno-ieee-fp'],
64+
'precise': ['mno-recip'],
65+
'defaultprec': ['fno-math-errno'],
66+
'loose': ['fno-math-errno', 'mrecip', 'mno-ieee-fp'],
67+
'veryloose': ['fno-math-errno', 'mrecip=all', 'mno-ieee-fp'],
68+
'vectorize': {False: 'fno-tree-vectorize', True: 'ftree-vectorize'},
69+
DEFAULT_OPT_LEVEL: ['O2', 'ftree-vectorize'],
6870
}
6971

7072
# used when 'optarch' toolchain option is enabled (and --optarch is not specified)

easybuild/toolchains/compiler/ibmxl.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from distutils.version import LooseVersion
1212

1313
import easybuild.tools.systemtools as systemtools
14-
from easybuild.tools.toolchain.compiler import Compiler
14+
from easybuild.tools.toolchain.compiler import Compiler, DEFAULT_OPT_LEVEL
1515

1616

1717
TC_CONSTANT_IBMCOMP = "IBMXL"
@@ -35,6 +35,8 @@ class IBMXL(Compiler):
3535
'defaultprec': ['', '', ''],
3636
'loose': [''],
3737
'veryloose': [''],
38+
'vectorize': {False: 'qsimd=noauto', True: 'qsimd=auto'},
39+
DEFAULT_OPT_LEVEL: ['O2', 'qsimd=auto'],
3840
'ibm-static': 'qstaticlink=xllibs',
3941
'pic': 'qpic',
4042
'shared': 'qmkshrobj',

easybuild/toolchains/compiler/inteliccifort.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ class IntelIccIfort(Compiler):
6464
'defaultprec': ['ftz', 'fp-speculation=safe', 'fp-model source'],
6565
'loose': ['fp-model fast=1'],
6666
'veryloose': ['fp-model fast=2'],
67+
'vectorize': {False: 'no-vec', True: 'vec'},
6768
'intel-static': 'static-intel',
6869
'no-icc': 'no-icc',
6970
'error-unknown-option': 'we10006', # error at warning #10006: ignoring unknown option

easybuild/toolchains/compiler/pgi.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ class Pgi(Compiler):
6565
'defaultprec': ['Mflushz'],
6666
'loose': ['Mfprelaxed'],
6767
'veryloose': ['Mfprelaxed=div,order,intrinsic,recip,sqrt,rsqrt', 'Mfpapprox'],
68+
'vectorize': {False: 'Mnovect', True: 'Mvect'},
6869
}
6970

7071
# used when 'optarch' toolchain option is enabled (and --optarch is not specified)

easybuild/tools/toolchain/compiler.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ class Compiler(Toolchain):
8888
'static': (False, "Build static library"),
8989
'32bit': (False, "Compile 32bit target"), # LA, FFTW
9090
'openmp': (False, "Enable OpenMP"),
91+
'vectorize': (None, "Enable compiler auto-vectorization, default except for noopt and lowopt"),
9192
'packed-linker-options': (False, "Pack the linker options as comma separated list"), # ScaLAPACK mainly
9293
'rpath': (True, "Use RPATH wrappers when --rpath is enabled in EasyBuild configuration"),
9394
}
@@ -245,8 +246,19 @@ def _set_compiler_flags(self):
245246
(default_opt_level, self.COMPILER_OPT_FLAGS))
246247

247248
# 1st one is the one to use. add default at the end so len is at least 1
248-
optflags = [self.options.option(x) for x in self.COMPILER_OPT_FLAGS if self.options.get(x, False)] + \
249-
[self.options.option(default_opt_level)]
249+
optflags = ([self.options.option(x) for x in self.COMPILER_OPT_FLAGS if self.options.get(x, False)] + \
250+
[self.options.option(default_opt_level)])[:1]
251+
252+
# only apply if the vectorize toolchainopt is explicitly set
253+
# otherwise the individual compiler toolchain file should make sure that
254+
# vectorization is disabled for noopt and lowopt, and enabled otherwise.
255+
if self.options.get('vectorize') is not None:
256+
vectoptions = self.options.option('vectorize')
257+
vectflags = vectoptions[self.options['vectorize']]
258+
# avoid double use of such flags, or e.g. -fno-tree-vectorize followed by -ftree-vectorize
259+
if isinstance(optflags[0], list):
260+
optflags[0] = [flag for flag in optflags[0] if flag not in vectoptions.values()]
261+
optflags.append(vectflags)
250262

251263
optarchflags = []
252264
if build_option('optarch') == OPTARCH_GENERIC:
@@ -259,7 +271,7 @@ def _set_compiler_flags(self):
259271
precflags = [self.options.option(x) for x in self.COMPILER_PREC_FLAGS if self.options.get(x, False)] + \
260272
[self.options.option('defaultprec')]
261273

262-
self.variables.nextend('OPTFLAGS', optflags[:1] + optarchflags)
274+
self.variables.nextend('OPTFLAGS', optflags + optarchflags)
263275
self.variables.nextend('PRECFLAGS', precflags[:1])
264276

265277
# precflags last

test/framework/options.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2948,7 +2948,7 @@ def test_dump_env_config(self):
29482948
"module load hwloc/1.6.2-GCC-4.7.2", # loading of dependency module
29492949
# defining build env
29502950
"export FC='gfortran'",
2951-
"export CFLAGS='-O2 -march=native'",
2951+
"export CFLAGS='-O2 -ftree-vectorize -march=native -fno-math-errno'",
29522952
]
29532953
for pattern in patterns:
29542954
regex = re.compile("^%s$" % pattern, re.M)

test/framework/toolchain.py

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def test_misc_flags_shared(self):
277277
# we need to make sure we check for flags, not letter (e.g. 'v' vs '-v')
278278
flag = '-%s' % tc.COMPILER_SHARED_OPTION_MAP[opt]
279279
for var in flag_vars:
280-
flags = tc.get_variable(var)
280+
flags = tc.get_variable(var).split()
281281
if enable:
282282
self.assertTrue(flag in flags, "%s: True means %s in %s" % (opt, flag, flags))
283283
else:
@@ -290,21 +290,25 @@ def test_misc_flags_unique(self):
290290
flag_vars = ['CFLAGS', 'CXXFLAGS', 'FCFLAGS', 'FFLAGS', 'F90FLAGS']
291291

292292
# setting option should result in corresponding flag to be set (unique options)
293-
for opt in ['unroll', 'optarch', 'openmp']:
293+
for opt in ['unroll', 'optarch', 'openmp', 'vectorize']:
294294
for enable in [True, False]:
295295
tc = self.get_toolchain("goalf", version="1.1.0-no-OFED")
296296
tc.set_options({opt: enable})
297297
tc.prepare()
298298
if opt == 'optarch':
299-
flag = '-%s' % tc.COMPILER_OPTIMAL_ARCHITECTURE_OPTION[(tc.arch, tc.cpu_family)]
299+
option = tc.COMPILER_OPTIMAL_ARCHITECTURE_OPTION[(tc.arch, tc.cpu_family)]
300300
else:
301-
flag = '-%s' % tc.options.options_map[opt]
301+
option = tc.options.options_map[opt]
302+
if not isinstance(option, dict):
303+
option = {True: option}
302304
for var in flag_vars:
303305
flags = tc.get_variable(var)
304-
if enable:
305-
self.assertTrue(flag in flags, "%s: True means %s in %s" % (opt, flag, flags))
306-
else:
307-
self.assertTrue(flag not in flags, "%s: False means no %s in %s" % (opt, flag, flags))
306+
for key, value in option.items():
307+
flag = "-%s" % value
308+
if enable == key:
309+
self.assertTrue(flag in flags, "%s: %s means %s in %s" % (opt, enable, flag, flags))
310+
else:
311+
self.assertTrue(flag not in flags, "%s: %s means no %s in %s" % (opt, enable, flag, flags))
308312
self.modtool.purge()
309313

310314
def test_override_optarch(self):
@@ -387,7 +391,7 @@ def test_compiler_dependent_optarch(self):
387391
"""Test whether specifying optarch on a per compiler basis works."""
388392
flag_vars = ['CFLAGS', 'CXXFLAGS', 'FCFLAGS', 'FFLAGS', 'F90FLAGS']
389393
intel_options = [('intelflag', 'intelflag'), ('GENERIC', 'xSSE2'), ('', '')]
390-
gcc_options = [('gccflag', 'gccflag'), ('-ftree-vectorize', '-ftree-vectorize'), ('', '')]
394+
gcc_options = [('gccflag', 'gccflag'), ('march=nocona', 'march=nocona'), ('', '')]
391395
gcccore_options = [('gcccoreflag', 'gcccoreflag'), ('GENERIC', 'march=x86-64 -mtune=generic'), ('', '')]
392396
toolchains = [('iccifort', '2011.13.367'), ('GCC', '4.7.2'), ('GCCcore', '6.2.0'), ('PGI', '16.7-GCC-5.4.0-2.26')]
393397
enabled = [True, False]
@@ -476,20 +480,20 @@ def test_precision_flags(self):
476480

477481
flag_vars = ['CFLAGS', 'CXXFLAGS', 'FCFLAGS', 'FFLAGS', 'F90FLAGS']
478482

479-
# check default precision: no specific flag for GCC
483+
# check default precision: -fno-math-errno flag for GCC
480484
tc = self.get_toolchain("goalf", version="1.1.0-no-OFED")
481485
tc.set_options({})
482486
tc.prepare()
483487
for var in flag_vars:
484-
self.assertEqual(os.getenv(var), "-O2 -march=native")
488+
self.assertEqual(os.getenv(var), "-O2 -ftree-vectorize -march=native -fno-math-errno")
485489

486490
# check other precision flags
487491
prec_flags = {
488-
'ieee': "-mieee-fp -fno-trapping-math",
492+
'ieee': "-fno-math-errno -mieee-fp -fno-trapping-math",
489493
'strict': "-mieee-fp -mno-recip",
490494
'precise': "-mno-recip",
491-
'loose': "-mrecip -mno-ieee-fp",
492-
'veryloose': "-mrecip=all -mno-ieee-fp",
495+
'loose': "-fno-math-errno -mrecip -mno-ieee-fp",
496+
'veryloose': "-fno-math-errno -mrecip=all -mno-ieee-fp",
493497
}
494498
for prec in prec_flags:
495499
for enable in [True, False]:
@@ -498,9 +502,9 @@ def test_precision_flags(self):
498502
tc.prepare()
499503
for var in flag_vars:
500504
if enable:
501-
self.assertEqual(os.getenv(var), "-O2 -march=native %s" % prec_flags[prec])
505+
self.assertEqual(os.getenv(var), "-O2 -ftree-vectorize -march=native %s" % prec_flags[prec])
502506
else:
503-
self.assertEqual(os.getenv(var), "-O2 -march=native")
507+
self.assertEqual(os.getenv(var), "-O2 -ftree-vectorize -march=native -fno-math-errno")
504508
self.modtool.purge()
505509

506510
def test_cgoolf_toolchain(self):
@@ -579,8 +583,10 @@ def test_goolfc(self):
579583
tc.set_options(opts)
580584
tc.prepare()
581585

586+
archflags = tc.COMPILER_OPTIMAL_ARCHITECTURE_OPTION[(tc.arch, tc.cpu_family)]
587+
optflags = "-O2 -ftree-vectorize -%s -fno-math-errno -fopenmp" % archflags
582588
nvcc_flags = r' '.join([
583-
r'-Xcompiler="-O2 -%s -fopenmp"' % tc.COMPILER_OPTIMAL_ARCHITECTURE_OPTION[(tc.arch, tc.cpu_family)],
589+
r'-Xcompiler="%s"' % optflags,
584590
# the use of -lcudart in -Xlinker is a bit silly but hard to avoid
585591
r'-Xlinker=".* -lm -lrt -lcudart -lpthread"',
586592
r' '.join(["-gencode %s" % x for x in opts['cuda_gencode']]),
@@ -903,9 +909,9 @@ def test_independence(self):
903909

904910
tc_cflags = {
905911
'CrayCCE': "-O2 -homp -craype-verbose",
906-
'CrayGNU': "-O2 -fopenmp -craype-verbose",
912+
'CrayGNU': "-O2 -fno-math-errno -fopenmp -craype-verbose",
907913
'CrayIntel': "-O2 -ftz -fp-speculation=safe -fp-model source -fopenmp -craype-verbose",
908-
'GCC': "-O2 -test -fopenmp",
914+
'GCC': "-O2 -ftree-vectorize -test -fno-math-errno -fopenmp",
909915
'iccifort': "-O2 -test -ftz -fp-speculation=safe -fp-model source -fopenmp",
910916
}
911917

0 commit comments

Comments
 (0)