@@ -170,7 +170,7 @@ def is_version_ok(version_range):
170170 'com_github_googleapis_googleapis' : '2.0.0:2.5.0' ,
171171 'com_github_googlecloudplatform_google_cloud_cpp' : '2.0.0:' , # Not used due to $TF_NEED_GCP=0
172172 'com_github_grpc_grpc' : '2.2.0:' ,
173- 'com_googlesource_code_re2' : '2.0.0:' , # Requires the RE2 version with Abseil ( or 2023-06-01+)
173+ 'com_googlesource_code_re2' : '2.0.0:' , # Requires or 2023-06-01+ and building TF with system Abseil
174174 'grpc' : '2.0.0:2.2.0' ,
175175 }
176176 # Python packages installed as extensions or in the Python module
@@ -424,8 +424,8 @@ def get_system_libs(self):
424424 libpaths .append (os .path .join (openssl_root , libpath ))
425425
426426 if ignored_system_deps :
427- print_warning ('%d TensorFlow dependencies have not been resolved by EasyBuild. Check the log for details.' ,
428- len (ignored_system_deps ))
427+ print_warning ('%d TensorFlow dependencies have not been resolved by EasyBuild. '
428+ "Search the log for 'TF_SYSTEM_LIBS' for details." , len (ignored_system_deps ))
429429 self .log .warning ('For the following $TF_SYSTEM_LIBS dependencies TensorFlow will download a copy ' +
430430 'because an EB dependency was not found: \n %s\n ' +
431431 'EC Dependencies: %s\n ' +
@@ -564,6 +564,8 @@ def configure_step(self):
564564
565565 self ._with_cuda = bool (cuda_root )
566566
567+ repo_env = {} # Variables that need to be passed as --repo_env to Bazel
568+
567569 config_env_vars = {
568570 'CC_OPT_FLAGS' : os .getenv ('CXXFLAGS' ),
569571 'MPI_HOME' : mpi_home ,
@@ -575,7 +577,6 @@ def configure_step(self):
575577 'TF_NEED_CUDA' : ('0' , '1' )[self ._with_cuda ],
576578 'TF_NEED_OPENCL' : ('0' , '1' )[bool (opencl_root )],
577579 'TF_NEED_ROCM' : '0' ,
578- 'TF_NEED_TENSORRT' : '0' ,
579580 'TF_SET_ANDROID_WORKSPACE' : '0' ,
580581 'TF_SYSTEM_LIBS' : ',' .join (self .system_libs_info [0 ]),
581582 }
@@ -612,10 +613,10 @@ def configure_step(self):
612613 # Clang toggle since 2.14.0
613614 if LooseVersion (self .version ) > LooseVersion ('2.13' ):
614615 config_env_vars ['TF_NEED_CLANG' ] = '0'
615- # Hermietic python version since 2.14.0
616+ # Hermetic python version since 2.14.0
616617 if LooseVersion (self .version ) > LooseVersion ('2.13' ):
617618 pyver = det_python_version (self .python_cmd )
618- config_env_vars ['TF_PYTHON_VERSION' ] = '.' .join (pyver .split ('.' )[:2 ])
619+ repo_env ['TF_PYTHON_VERSION' ] = '.' .join (pyver .split ('.' )[:2 ])
619620
620621 if self ._with_cuda :
621622 cuda_version = get_software_version ('CUDA' )
@@ -627,18 +628,9 @@ def configure_step(self):
627628 else :
628629 compiler_path = which (os .getenv ('CC' ), on_error = ERROR )
629630
630- # list of CUDA compute capabilities to use can be specifed in two ways (where (2) overrules (1)):
631- # (1) in the easyconfig file, via the custom cuda_compute_capabilities;
632- # (2) in the EasyBuild configuration, via --cuda-compute-capabilities configuration option;
633- ec_cuda_cc = self .cfg ['cuda_compute_capabilities' ]
634- cfg_cuda_cc = build_option ('cuda_compute_capabilities' )
635- cuda_cc = cfg_cuda_cc or ec_cuda_cc or []
631+ cuda_cc = self .cfg .get_cuda_cc_template_value ("cuda_cc_space_sep" , required = False ).split ()
636632
637- if cfg_cuda_cc and ec_cuda_cc :
638- warning_msg = "cuda_compute_capabilities specified in easyconfig (%s) are overruled by " % ec_cuda_cc
639- warning_msg += "--cuda-compute-capabilities configuration option (%s)" % cfg_cuda_cc
640- print_warning (warning_msg )
641- elif not cuda_cc :
633+ if not cuda_cc :
642634 warning_msg = "No CUDA compute capabilities specified, so using TensorFlow default "
643635 warning_msg += "(which may not be optimal for your system).\n You should use "
644636 warning_msg += "the --cuda-compute-capabilities configuration option or the cuda_compute_capabilities "
@@ -662,19 +654,25 @@ def configure_step(self):
662654 'GCC_HOST_COMPILER_PATH' : compiler_path ,
663655 # This is the binutils bin folder: https://github.com/tensorflow/tensorflow/issues/39263
664656 'GCC_HOST_COMPILER_PREFIX' : self .binutils_bin_path ,
665- 'TF_CUDA_COMPUTE_CAPABILITIES' : ',' .join (cuda_cc ),
666- 'TF_CUDA_VERSION' : cuda_maj_min_ver ,
667657 })
668658
669659 # from v2.18 TF with CUDA needs this envs be set
670660 if LooseVersion (self .version ) >= LooseVersion ('2.18' ):
671661 config_env_vars .update ({
672662 'CUDA_NVCC' : '1' ,
673- 'HERMETIC_CUDA_COMPUTE_CAPABILITIES' : ',' .join (cuda_cc ),
663+ 'HERMETIC_CUDA_VERSION' : cuda_version ,
664+ 'HERMETIC_CUDA_COMPUTE_CAPABILITIES' : ',' .join (f"sm_{ cc .replace ('.' , '' )} " for cc in cuda_cc ),
665+ 'LOCAL_CUDA_PATH' : cuda_root ,
666+ })
667+ else :
668+ config_env_vars .update ({
669+ 'TF_CUDA_COMPUTE_CAPABILITIES' : ',' .join (cuda_cc ),
670+ 'TF_CUDA_VERSION' : cuda_maj_min_ver ,
674671 })
675672
676- # for recent TensorFlow versions, $TF_CUDA_PATHS and $TF_CUBLAS_VERSION must also be set
677- if LooseVersion (self .version ) >= LooseVersion ('1.14' ):
673+ # for these TensorFlow versions, $TF_CUDA_PATHS and $TF_CUBLAS_VERSION must also be set
674+ # TF 2.18 introduced "Hermetic CUDA" which doesn't use those env vars anymore
675+ if '1.14' <= LooseVersion (self .version ) < '2.18' :
678676
679677 # figure out correct major/minor version for CUBLAS from cublas_api.h
680678 cublas_api_header_glob_pattern = os .path .join (cuda_root , 'targets' , '*' , 'include' , 'cublas_api.h' )
@@ -698,41 +696,71 @@ def configure_step(self):
698696 'TF_CUDA_PATHS' : cuda_root ,
699697 'TF_CUBLAS_VERSION' : '.' .join (cublas_ver_parts ),
700698 })
699+ elif LooseVersion (self .version ) >= '2.18' :
700+ # TF_CUDA_PATHS replaced CUDNN_INSTALL_PATH, TENSORRT_INSTALL_PATH, NCCL_INSTALL_PATH, NCCL_HDR_PATH
701+ # in 2.0. Version guard set to 2.18 to avoid potentially breaking older easyconfigs
702+ repo_env ['TF_CUDA_PATHS' ] = cuda_root
701703
702704 if cudnn_root :
703705 cudnn_version = get_software_version ('cuDNN' )
704706 cudnn_maj_min_patch_ver = '.' .join (cudnn_version .split ('.' )[:3 ])
705707
706- config_env_vars .update ({
707- 'CUDNN_INSTALL_PATH' : cudnn_root ,
708- 'TF_CUDNN_VERSION' : cudnn_maj_min_patch_ver ,
709- })
708+ if LooseVersion (self .version ) >= '2.18' :
709+ repo_env ['TF_CUDA_PATHS' ] += ',' + cudnn_root
710+ repo_env ['TF_CUDNN_VERSION' ] = cudnn_version
711+ config_env_vars .update ({
712+ 'LOCAL_CUDNN_PATH' : cudnn_root ,
713+ 'HERMETIC_CUDNN_VERSION' : cudnn_version ,
714+ })
715+ else :
716+ config_env_vars .update ({
717+ 'CUDNN_INSTALL_PATH' : cudnn_root ,
718+ 'TF_CUDNN_VERSION' : cudnn_maj_min_patch_ver ,
719+ })
710720 else :
711721 raise EasyBuildError ("TensorFlow has a strict dependency on cuDNN if CUDA is enabled" )
722+
712723 if nccl_root :
713724 nccl_version = get_software_version ('NCCL' )
714725 # Ignore the PKG_REVISION identifier if it exists (i.e., report 2.4.6 for 2.4.6-1 or 2.4.6-2)
715726 nccl_version = nccl_version .split ('-' )[0 ]
716- config_env_vars .update ({
717- 'NCCL_INSTALL_PATH' : nccl_root ,
718- })
727+ if LooseVersion (self .version ) >= '2.18' :
728+ repo_env ['TF_CUDA_PATHS' ] += ',' + nccl_root
729+ config_env_vars ['LOCAL_NCCL_PATH' ] = nccl_root
730+ else :
731+ config_env_vars ['NCCL_INSTALL_PATH' ] = nccl_root
719732 else :
720733 nccl_version = '1.3' # Use simple downloadable version
721- config_env_vars .update ({
722- 'TF_NCCL_VERSION' : nccl_version ,
723- })
734+ if LooseVersion (self .version ) >= '2.18' :
735+ repo_env ['TF_NCCL_VERSION' ] = nccl_version
736+ else :
737+ config_env_vars ['TF_NCCL_VERSION' ] = nccl_version
738+
724739 if tensorrt_root :
725740 tensorrt_version = get_software_version ('TensorRT' )
726- config_env_vars . update ( {
741+ tensor_rt_vars = {
727742 'TF_NEED_TENSORRT' : '1' ,
728- 'TENSORRT_INSTALL_PATH' : tensorrt_root ,
729743 'TF_TENSORRT_VERSION' : tensorrt_version ,
730- })
744+ 'TENSORRT_INSTALL_PATH' : tensorrt_root ,
745+ }
746+ if LooseVersion (self .version ) >= '2.18' :
747+ repo_env ['TF_CUDA_PATHS' ] += ',' + tensorrt_root
748+ else :
749+ tensor_rt_vars = {'TF_NEED_TENSORRT' : '0' }
750+ if LooseVersion (self .version ) >= '2.18' :
751+ repo_env .update (tensor_rt_vars )
752+ else :
753+ config_env_vars .update (tensor_rt_vars )
754+
755+ nvshmem_root = get_software_root ('NVSHMEM' )
756+ if nvshmem_root and LooseVersion (self .version ) >= '2.18' :
757+ repo_env ['LOCAL_NVSHMEM_PATH' ] = nvshmem_root
731758
732759 configure_py_contents = read_file ('configure.py' )
733760 for key , val in sorted (config_env_vars .items ()):
734761 if key .startswith ('TF_' ) and key not in configure_py_contents :
735- self .log .warning ('Did not find %s option in configure.py. Setting might not have any effect' , key )
762+ print_warning ('Did not find %s option in configure.py. Setting might not have any effect' ,
763+ key , log = self .log )
736764 env .setvar (key , val )
737765
738766 # configure.py (called by configure script) already calls bazel to determine the bazel version
@@ -746,11 +774,19 @@ def configure_step(self):
746774 cmd = self .cfg ['preconfigopts' ] + './configure ' + self .cfg ['configopts' ]
747775 run_shell_cmd (cmd )
748776
777+ tf_conf_bazelrc = os .path .join (self .start_dir , '.tf_configure.bazelrc' )
778+
779+ if LooseVersion (self .version ) >= '2.17' :
780+ repo_env ['WHEEL_NAME' ] = 'tensorflow'
781+
782+ write_file (tf_conf_bazelrc ,
783+ '\n ' .join (f'build --repo_env { key } ="{ value } "' for key , value in repo_env .items ()),
784+ append = True )
785+
749786 # when building on Arm 64-bit we can't just use --copt=-mcpu=native (or likewise for any -mcpu=...),
750787 # because it breaks the build of XNNPACK;
751788 # see also https://github.com/easybuilders/easybuild-easyconfigs/issues/18899
752789 if get_cpu_architecture () == AARCH64 :
753- tf_conf_bazelrc = os .path .join (self .start_dir , '.tf_configure.bazelrc' )
754790 regex_subs = [
755791 # use --per_file_copt instead of --copt to selectively use -mcpu=native (not for XNNPACK),
756792 # the leading '-' ensures that -mcpu=native is *not* used when building XNNPACK;
@@ -976,12 +1012,12 @@ def build_step(self):
9761012 + self .target_opts
9771013 + [self .cfg ['buildopts' ]]
9781014 )
979- if LooseVersion (self .version ) < LooseVersion ( '2.16' ) :
1015+ if LooseVersion (self .version ) < '2.16' :
9801016 cmd += ['//tensorflow/tools/pip_package:build_pip_package' ]
981- elif LooseVersion (self .version ) < LooseVersion ( '2.17' ) : # for v2.16.x
982- cmd += ['//tensorflow/tools/pip_package:v2/wheel --repo_env=WHEEL_NAME=tensorflow ' ]
1017+ elif LooseVersion (self .version ) < '2.17' : # for v2.16.x
1018+ cmd += ['//tensorflow/tools/pip_package:v2/wheel' ]
9831019 else :
984- cmd += ['//tensorflow/tools/pip_package:wheel --repo_env=WHEEL_NAME=tensorflow ' ]
1020+ cmd += ['//tensorflow/tools/pip_package:wheel' ]
9851021
9861022 with self .set_tmp_dir ():
9871023 run_shell_cmd (' ' .join (cmd ))
0 commit comments