Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions easybuild/easyconfigs/a/Arrow/Arrow-22.0.0-gfbf-2025a.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
easyblock = 'CMakeMake'

name = 'Arrow'
version = '22.0.0'

homepage = 'https://arrow.apache.org'
description = """Apache Arrow (incl. PyArrow Python bindings), a cross-language development platform
for in-memory data."""

toolchain = {'name': 'gfbf', 'version': '2025a'}

source_urls = ['https://archive.apache.org/dist/%(namelower)s/%(namelower)s-%(version)s']
sources = ['apache-arrow-%(version)s.tar.gz']
checksums = ['131250cd24dec0cddde04e2ad8c9e2bc43edc5e84203a81cf71cf1a33a6e7e0f']

builddependencies = [
('CMake', '3.31.3'),
('Cython', '3.1.1'),
('Autotools', '20240712'),
('flex', '2.6.4'),
('Bison', '3.8.2'),
('pkgconf', '2.3.0'),
]

# Arrow strongly prefers included jemalloc, so not including it as a dependency
dependencies = [
('Python', '3.13.1'),
('Python-bundle-PyPI', '2025.04'),
('SciPy-bundle', '2025.06'), # for numpy
('Boost', '1.88.0'),
('lz4', '1.10.0'),
('zlib', '1.3.1'),
('bzip2', '1.0.8'),
('zstd', '1.5.6'),
('snappy', '1.2.2'),
('RapidJSON', '1.1.0-20250205'),
('RE2', '2024-07-02'),
('utf8proc', '2.10.0'),
]

start_dir = 'cpp'

# Avoid searching dependencies in an active conda environment
preconfigopts = "unset CONDA_PREFIX; "

# see https://arrow.apache.org/docs/developers/python.html
configopts = "-DARROW_DATASET=on -DARROW_PARQUET=ON -DARROW_ORC=ON "
configopts += "-DARROW_PYTHON=on "
configopts += "-DARROW_WITH_ZLIB=ON -DARROW_WITH_BZ2=ON -DARROW_WITH_LZ4=ON -DARROW_WITH_SNAPPY=ON "
configopts += "-DARROW_WITH_ZSTD=ON -DZSTD_ROOT=$EBROOTZSTD "
configopts += "-DARROW_MIMALLOC=ON -DARROW_JEMALLOC=OFF "

exts_defaultclass = 'PythonPackage'
exts_list = [
('pyarrow', version, {
'preinstallopts': (
"export PKG_CONFIG_PATH=%(installdir)s/lib/pkgconfig:$PKG_CONFIG_PATH"
" && export Arrow_DIR=%(installdir)s && export ArrowDataset_DIR=%(installdir)s"
" && export ArrowAcero_DIR=%(installdir)s && export Parquet_DIR=%(installdir)s"
" && sed -i 's/numpy==[0-9.]*/numpy/g' pyproject.toml"
' && PYARROW_CMAKE_OPTIONS="-DZSTD_ROOT=$EBROOTZSTD"'
" PYARROW_WITH_DATASET=1 PYARROW_WITH_PARQUET=1 PYARROW_WITH_ORC=1 "
),
'nosource': True,
'start_dir': '../python',
}),
]

sanity_check_paths = {
'files': ['lib/libarrow.a', 'lib/libarrow.%s' % SHLIB_EXT,
'lib/python%%(pyshortver)s/site-packages/pyarrow/libarrow_python.%s' % SHLIB_EXT],
'dirs': ['include/arrow', 'lib/cmake/Arrow', 'lib/pkgconfig', 'lib/python%(pyshortver)s/site-packages'],
}

sanity_check_commands = [
"python -s -c 'import pyarrow.dataset'",
"python -s -c 'import pyarrow.parquet'",
"python -s -c 'import pyarrow.orc'",
]

moduleclass = 'data'