Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 12 additions & 24 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -361,24 +361,12 @@ if(DEFINED ENV{BODO_VENDOR_MPI4PY} AND NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/b
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/bodo/mpi4py/_vendored_mpi4py" DESTINATION "${SKBUILD_PLATLIB_DIR}/bodo/mpi4py/")
endif()

# Find PyArrow Include and Lib Directory
# TODO: Use Arrow Directories from Conda Instead if Available
execute_process(
COMMAND "${PYTHON_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_include())"
OUTPUT_VARIABLE PYARROW_INCLUDE_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE
)
# PyArrow on Pip bundles the Arrow shared libraries, but named as lib*.EXT.VERSION
# I.E. libarrow.so.1700 instead of the expected libarrow.so
# Thus, we need PyArrow to create symlinks to the correct names
# They have a helper function to do this, and it should be a NOOP with PyArrow on Conda
execute_process(
COMMAND "${PYTHON_EXECUTABLE}" -c "import pyarrow; pyarrow.create_library_symlinks(); print(pyarrow.get_library_dirs()[0])"
OUTPUT_VARIABLE PYARROW_LIB_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE
)
cmake_print_variables(PYARROW_INCLUDE_DIR)
cmake_print_variables(PYARROW_LIB_DIR)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
include(PyArrowHelpers)
setup_pyarrow_dirs()

message(STATUS "PyArrow Includes: ${PYARROW_INCLUDE_DIR}")
message(STATUS "PyArrow Lib Dirs: ${PYARROW_LIBRARY_DIRS}")

# Numpy Include Directory is provided by scikit-build-core
# through `Python_NumPy_INCLUDE_DIR`
Expand All @@ -405,7 +393,7 @@ python_add_library(csv_json_reader
"bodo/io/csv_json_reader.cpp"
)
target_include_directories(csv_json_reader PRIVATE ${BASE_INCLUDE_DIRS} "${CMAKE_CURRENT_SOURCE_DIR}/bodo/io/" "${CMAKE_CURRENT_BINARY_DIR}/bodo/io/")
target_link_directories(csv_json_reader PRIVATE ${PYARROW_LIB_DIR} ${CONDA_LIB_DIR} ${MPI_LIB_DIR})
target_link_directories(csv_json_reader PRIVATE ${PYARROW_LIBRARY_DIRS} ${CONDA_LIB_DIR} ${MPI_LIB_DIR})
target_link_libraries(csv_json_reader PRIVATE arrow arrow_python ${MPI_LIBRARIES})
install(TARGETS csv_json_reader DESTINATION "bodo/io/")

Expand All @@ -422,7 +410,7 @@ add_custom_command(
set(pyarrow_wrappers_sources "bodo/io/pyarrow_wrappers.cpp" "bodo/io/arrow_compat.cpp")
python_add_library(pyarrow_wrappers MODULE WITH_SOABI "${pyarrow_wrappers_sources}")
target_include_directories(pyarrow_wrappers PRIVATE ${BASE_INCLUDE_DIRS})
target_link_directories(pyarrow_wrappers PRIVATE ${PYARROW_LIB_DIR} ${CONDA_LIB_DIR} ${MPI_LIB_DIR})
target_link_directories(pyarrow_wrappers PRIVATE ${PYARROW_LIBRARY_DIRS} ${CONDA_LIB_DIR} ${MPI_LIB_DIR})
# On Windows this is necessary to define due to importing/exporting DLL semantics.
# See https://github.com/apache/arrow/blob/9105a4109a80a1c01eabb24ee4b9f7c94ee942cb/python/CMakeLists.txt#L472
target_compile_definitions(pyarrow_wrappers PRIVATE ARROW_PYTHON_EXPORTING)
Expand Down Expand Up @@ -465,7 +453,7 @@ add_custom_command(

python_add_library(memory MODULE WITH_SOABI "bodo/memory.cpp")
target_include_directories(memory PRIVATE ${BASE_INCLUDE_DIRS} "${CMAKE_CURRENT_SOURCE_DIR}/bodo/")
target_link_directories(memory PRIVATE ${PYARROW_LIB_DIR} ${CONDA_LIB_DIR})
target_link_directories(memory PRIVATE ${PYARROW_LIBRARY_DIRS} ${CONDA_LIB_DIR})
target_link_libraries(memory PRIVATE arrow arrow_python)
install(TARGETS memory DESTINATION "bodo/")

Expand All @@ -492,7 +480,7 @@ python_add_library(
)

target_include_directories(memory_tester PRIVATE ${BASE_INCLUDE_DIRS} "${CMAKE_CURRENT_SOURCE_DIR}/bodo/tests/")
target_link_directories(memory_tester PRIVATE ${PYARROW_LIB_DIR} ${CONDA_LIB_DIR} ${MPI_LIB_DIR})
target_link_directories(memory_tester PRIVATE ${PYARROW_LIBRARY_DIRS} ${CONDA_LIB_DIR} ${MPI_LIB_DIR})
target_link_libraries(memory_tester PRIVATE "${MPI_LIBRARIES}" arrow arrow_python fmt::fmt)
if(NOT(CMAKE_BUILD_TYPE STREQUAL "Release"))
target_compile_definitions(
Expand Down Expand Up @@ -570,7 +558,7 @@ python_add_library(memory_cpp
"bodo/libs/_storage_manager.cpp"
)
target_include_directories(memory_cpp PRIVATE ${BASE_INCLUDE_DIRS} "${CMAKE_CURRENT_SOURCE_DIR}/bodo/libs/" "${CMAKE_CURRENT_BINARY_DIR}/bodo/libs/")
target_link_directories(memory_cpp PRIVATE ${CONDA_LIB_DIR} ${PYARROW_LIB_DIR} ${MPI_LIB_DIR})
target_link_directories(memory_cpp PRIVATE ${CONDA_LIB_DIR} ${PYARROW_LIBRARY_DIRS} ${MPI_LIB_DIR})
target_link_libraries(memory_cpp PRIVATE fmt::fmt arrow ${MPI_LIBRARIES})
install(TARGETS memory_cpp DESTINATION "bodo/")

Expand Down Expand Up @@ -803,7 +791,7 @@ if (NOT WIN32)
)
endif()

target_link_directories(ext PRIVATE ${PYARROW_LIB_DIR} "${Python_NumPy_INCLUDE_DIR}/../lib")
target_link_directories(ext PRIVATE ${PYARROW_LIBRARY_DIRS} "${Python_NumPy_INCLUDE_DIR}/../lib")
target_include_directories(ext SYSTEM PRIVATE ${datasketches_INSTALL_DIR}/include)
target_compile_definitions(ext PRIVATE ARROW_PYTHON_EXPORTING)
target_link_libraries(ext PRIVATE
Expand Down
90 changes: 90 additions & 0 deletions cmake/PyArrowHelpers.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
function(setup_pyarrow_dirs)
# This replicatees the logic of pyarrow.get_include_dir,
# pyarrow.create_library_symlinks, and pyarrow.get_library_dirs
# in CMAKE in order to support cross compilation. Otherwise those
# functions would cause the arrow native extensions
# to load at build time which is not possible in
# cross compilation scenarios.

find_package(Python3 REQUIRED COMPONENTS Interpreter)
find_package(PkgConfig QUIET)

# LOCATE PYARROW HOME
execute_process(
COMMAND "${Python3_EXECUTABLE}" -c
"import importlib.util; print(importlib.util.find_spec('pyarrow').submodule_search_locations[0])"
OUTPUT_VARIABLE PA_HOME
OUTPUT_STRIP_TRAILING_WHITESPACE
)

if(NOT PA_HOME)
message(FATAL_ERROR "Could not find pyarrow module location via Python.")
endif()

set(PYARROW_HOME "${PA_HOME}" CACHE PATH "Path to PyArrow installation" FORCE)
message(STATUS "PyArrow Location: ${PYARROW_HOME}")

# CREATE SYMLINKS (Replica of create_library_symlinks)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
file(GLOB PYARROW_BUNDLED_LIBS "${PYARROW_HOME}/*.so.*")

foreach(LIB_PATH ${PYARROW_BUNDLED_LIBS})
get_filename_component(LIB_NAME "${LIB_PATH}" NAME)
string(REGEX REPLACE "\\.[^.]+$" "" SYMLINK_NAME "${LIB_NAME}")
set(FULL_SYMLINK_PATH "${PYARROW_HOME}/${SYMLINK_NAME}")

if(NOT EXISTS "${FULL_SYMLINK_PATH}")
message(STATUS "Creating PyArrow Symlink: ${SYMLINK_NAME} -> ${LIB_NAME}")
execute_process(
COMMAND ${CMAKE_COMMAND} -E create_symlink "${LIB_NAME}" "${SYMLINK_NAME}"
WORKING_DIRECTORY "${PYARROW_HOME}"
)
endif()
endforeach()
elseif(APPLE)
# macOS: libarrow.1700.dylib -> libarrow.dylib
# Python pattern: glob.glob('*.*.dylib')
file(GLOB PYARROW_BUNDLED_LIBS "${PYARROW_HOME}/*.*.dylib")

foreach(LIB_PATH ${PYARROW_BUNDLED_LIBS})
get_filename_component(LIB_NAME "${LIB_PATH}" NAME)

# Python logic: return '.'.join((hard_path.rsplit('.', 2)[0], 'dylib'))
# CMake Regex: Match ".VERSION.dylib" at the end and replace with ".dylib"
string(REGEX REPLACE "\\.[^.]+\\.dylib$" ".dylib" SYMLINK_NAME "${LIB_NAME}")

set(FULL_SYMLINK_PATH "${PYARROW_HOME}/${SYMLINK_NAME}")
if(NOT EXISTS "${FULL_SYMLINK_PATH}")
message(STATUS "Creating PyArrow Symlink: ${SYMLINK_NAME} -> ${LIB_NAME}")
execute_process(
COMMAND ${CMAKE_COMMAND} -E create_symlink "${LIB_NAME}" "${SYMLINK_NAME}"
WORKING_DIRECTORY "${PYARROW_HOME}"
)
endif()
endforeach()
endif()

# GET LIBRARY DIRS (Replica of get_library_dirs)
set(TEMP_DIRS "${PYARROW_HOME}")

if(DEFINED ENV{ARROW_HOME})
list(APPEND TEMP_DIRS "$ENV{ARROW_HOME}/lib")
endif()

if(PKG_CONFIG_FOUND)
foreach(PKG_NAME "arrow" "arrow_python")
pkg_check_modules(PC_${PKG_NAME} QUIET ${PKG_NAME})
if(PC_${PKG_NAME}_FOUND)
list(APPEND TEMP_DIRS ${PC_${PKG_NAME}_LIBRARY_DIRS})
endif()
endforeach()
endif()

list(REMOVE_DUPLICATES TEMP_DIRS)

# EXPOSE VARIABLES TO PARENT SCOPE
list(GET TEMP_DIRS 0 PRIMARY_LIB_DIR)
set(PYARROW_LIBRARY_DIRS "${PRIMARY_LIB_DIR}" PARENT_SCOPE)
set(PYARROW_INCLUDE_DIR "${PYARROW_HOME}/include" PARENT_SCOPE)

endfunction()
Loading