From c9cc347256197fc35fb28e0cac763121308b3710 Mon Sep 17 00:00:00 2001 From: vitsoft Date: Thu, 30 Oct 2025 18:15:23 +0100 Subject: [PATCH] adding easyconfigs: Arrow-22.0.0-gfbf-2025a.eb --- .../a/Arrow/Arrow-22.0.0-gfbf-2025a.eb | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 easybuild/easyconfigs/a/Arrow/Arrow-22.0.0-gfbf-2025a.eb diff --git a/easybuild/easyconfigs/a/Arrow/Arrow-22.0.0-gfbf-2025a.eb b/easybuild/easyconfigs/a/Arrow/Arrow-22.0.0-gfbf-2025a.eb new file mode 100644 index 00000000000..e1bd3905fcd --- /dev/null +++ b/easybuild/easyconfigs/a/Arrow/Arrow-22.0.0-gfbf-2025a.eb @@ -0,0 +1,81 @@ +easyblock = 'CMakeMake' + +name = 'Arrow' +version = '22.0.0' + +homepage = 'https://arrow.apache.org' +description = """Apache Arrow (incl. PyArrow Python bindings), a cross-language development platform + for in-memory data.""" + +toolchain = {'name': 'gfbf', 'version': '2025a'} + +source_urls = ['https://archive.apache.org/dist/%(namelower)s/%(namelower)s-%(version)s'] +sources = ['apache-arrow-%(version)s.tar.gz'] +checksums = ['131250cd24dec0cddde04e2ad8c9e2bc43edc5e84203a81cf71cf1a33a6e7e0f'] + +builddependencies = [ + ('CMake', '3.31.3'), + ('Cython', '3.1.1'), + ('Autotools', '20240712'), + ('flex', '2.6.4'), + ('Bison', '3.8.2'), + ('pkgconf', '2.3.0'), +] + +# Arrow strongly prefers included jemalloc, so not including it as a dependency +dependencies = [ + ('Python', '3.13.1'), + ('Python-bundle-PyPI', '2025.04'), + ('SciPy-bundle', '2025.06'), # for numpy + ('Boost', '1.88.0'), + ('lz4', '1.10.0'), + ('zlib', '1.3.1'), + ('bzip2', '1.0.8'), + ('zstd', '1.5.6'), + ('snappy', '1.2.2'), + ('RapidJSON', '1.1.0-20250205'), + ('RE2', '2024-07-02'), + ('utf8proc', '2.10.0'), +] + +start_dir = 'cpp' + +# Avoid searching dependencies in an active conda environment +preconfigopts = "unset CONDA_PREFIX; " + +# see https://arrow.apache.org/docs/developers/python.html +configopts = "-DARROW_DATASET=on -DARROW_PARQUET=ON -DARROW_ORC=ON " +configopts += "-DARROW_PYTHON=on " +configopts += "-DARROW_WITH_ZLIB=ON -DARROW_WITH_BZ2=ON -DARROW_WITH_LZ4=ON -DARROW_WITH_SNAPPY=ON " +configopts += "-DARROW_WITH_ZSTD=ON -DZSTD_ROOT=$EBROOTZSTD " +configopts += "-DARROW_MIMALLOC=ON -DARROW_JEMALLOC=OFF " + +exts_defaultclass = 'PythonPackage' +exts_list = [ + ('pyarrow', version, { + 'preinstallopts': ( + "export PKG_CONFIG_PATH=%(installdir)s/lib/pkgconfig:$PKG_CONFIG_PATH" + " && export Arrow_DIR=%(installdir)s && export ArrowDataset_DIR=%(installdir)s" + " && export ArrowAcero_DIR=%(installdir)s && export Parquet_DIR=%(installdir)s" + " && sed -i 's/numpy==[0-9.]*/numpy/g' pyproject.toml" + ' && PYARROW_CMAKE_OPTIONS="-DZSTD_ROOT=$EBROOTZSTD"' + " PYARROW_WITH_DATASET=1 PYARROW_WITH_PARQUET=1 PYARROW_WITH_ORC=1 " + ), + 'nosource': True, + 'start_dir': '../python', + }), +] + +sanity_check_paths = { + 'files': ['lib/libarrow.a', 'lib/libarrow.%s' % SHLIB_EXT, + 'lib/python%%(pyshortver)s/site-packages/pyarrow/libarrow_python.%s' % SHLIB_EXT], + 'dirs': ['include/arrow', 'lib/cmake/Arrow', 'lib/pkgconfig', 'lib/python%(pyshortver)s/site-packages'], +} + +sanity_check_commands = [ + "python -s -c 'import pyarrow.dataset'", + "python -s -c 'import pyarrow.parquet'", + "python -s -c 'import pyarrow.orc'", +] + +moduleclass = 'data'