@@ -355,97 +355,90 @@ if(NOT BUILD_CPU_ONLY)
355355 )
356356 endif ()
357357
358- set (JIT_LTO_TARGET_ARCHITECTURE "" )
359- set (JIT_LTO_COMPILATION OFF )
360- set (jit_lto_files)
358+ set (JIT_LTO_TARGET_ARCHITECTURE "70-real" )
361359 if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0)
362360 set (JIT_LTO_TARGET_ARCHITECTURE "75-real" )
363- set (JIT_LTO_COMPILATION ON )
364361 endif ()
365362
366- if (JIT_LTO_COMPILATION)
367- # Generate interleaved scan kernel files at build time
368- include (cmake/modules/generate_jit_lto_kernels.cmake )
363+ # Generate interleaved scan kernel files at build time
364+ include (cmake/modules/generate_jit_lto_kernels.cmake )
369365
370- add_library (jit_lto_kernel_usage_requirements INTERFACE )
371- target_include_directories (
372- jit_lto_kernel_usage_requirements
373- INTERFACE "${CMAKE_CURRENT_SOURCE_DIR} /include" "${CMAKE_CURRENT_SOURCE_DIR} /src"
374- "${CMAKE_CURRENT_SOURCE_DIR} /../c/include"
375- )
376- target_compile_options (
377- jit_lto_kernel_usage_requirements INTERFACE "$<$<COMPILE_LANGUAGE :CXX >:${CUVS_CXX_FLAGS} >"
378- "$<$<COMPILE_LANGUAGE :CUDA >:${CUVS_CUDA_FLAGS} >"
379- )
380- target_compile_features (jit_lto_kernel_usage_requirements INTERFACE cuda_std_20 )
381- target_link_libraries (
382- jit_lto_kernel_usage_requirements INTERFACE rmm::rmm raft::raft CCCL::CCCL
383- )
384-
385- block (PROPAGATE interleaved_scan_files metric_files filter_files post_lambda_files )
386- set (CMAKE_CUDA_ARCHITECTURES ${JIT_LTO_TARGET_ARCHITECTURE} )
387- generate_jit_lto_kernels (
388- interleaved_scan_files
389- NAME_FORMAT
390- "interleaved_scan_capacity_@capacity@_veclen_@veclen@_@ascending_descending@_@compute_norm_name@_data_@type_abbrev@_acc_@acc_abbrev@_idx_@idx_abbrev@"
391- MATRIX_JSON_FILE
392- "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_matrix.json"
393- KERNEL_INPUT_FILE
394- "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in"
395- EMBEDDED_INPUT_FILE
396- "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_embedded.cpp.in"
397- OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/interleaved_scan"
398- KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
399- )
400- generate_jit_lto_kernels (
401- metric_files
402- NAME_FORMAT "metric_@metric_name@_veclen_@veclen@_data_@type_abbrev@_acc_@acc_abbrev@"
403- MATRIX_JSON_FILE
404- "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/metric_matrix.json"
405- KERNEL_INPUT_FILE
406- "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/metric_kernel.cu.in"
407- EMBEDDED_INPUT_FILE
408- "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/metric_embedded.cpp.in"
409- OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/metric"
410- KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
411- )
412- generate_jit_lto_kernels (
413- filter_files
414- NAME_FORMAT "@filter_name@"
415- MATRIX_JSON_FILE
416- "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/filter_matrix.json"
417- KERNEL_INPUT_FILE
418- "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/filter_kernel.cu.in"
419- EMBEDDED_INPUT_FILE
420- "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/filter_embedded.cpp.in"
421- OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/filter"
422- KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
423- )
424- generate_jit_lto_kernels (
425- post_lambda_files
426- NAME_FORMAT "@post_lambda_name@"
427- MATRIX_JSON_FILE
428- "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_matrix.json"
429- KERNEL_INPUT_FILE
430- "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_kernel.cu.in"
431- EMBEDDED_INPUT_FILE
432- "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_embedded.cpp.in"
433- OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/post_lambda"
434- KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
435- )
436- endblock ()
437-
438- set (jit_lto_files
439- ${interleaved_scan_files}
440- ${metric_files}
441- ${filter_files}
442- ${post_lambda_files}
443- src/detail/jit_lto/AlgorithmLauncher.cpp
444- src/detail/jit_lto/AlgorithmPlanner.cpp
445- src/detail/jit_lto/FragmentEntry.cpp
446- src/detail/jit_lto/nvjitlink_checker.cpp
447- )
448- endif ()
366+ add_library (jit_lto_kernel_usage_requirements INTERFACE )
367+ target_include_directories (
368+ jit_lto_kernel_usage_requirements
369+ INTERFACE "${CMAKE_CURRENT_SOURCE_DIR} /include" "${CMAKE_CURRENT_SOURCE_DIR} /src"
370+ "${CMAKE_CURRENT_SOURCE_DIR} /../c/include"
371+ )
372+ target_compile_options (
373+ jit_lto_kernel_usage_requirements INTERFACE "$<$<COMPILE_LANGUAGE :CXX >:${CUVS_CXX_FLAGS} >"
374+ "$<$<COMPILE_LANGUAGE :CUDA >:${CUVS_CUDA_FLAGS} >"
375+ )
376+ target_compile_features (jit_lto_kernel_usage_requirements INTERFACE cuda_std_20 )
377+ target_link_libraries (jit_lto_kernel_usage_requirements INTERFACE rmm::rmm raft::raft CCCL::CCCL )
378+
379+ block (PROPAGATE interleaved_scan_files metric_files filter_files post_lambda_files )
380+ set (CMAKE_CUDA_ARCHITECTURES ${JIT_LTO_TARGET_ARCHITECTURE} )
381+ generate_jit_lto_kernels (
382+ interleaved_scan_files
383+ NAME_FORMAT
384+ "interleaved_scan_capacity_@capacity@_veclen_@veclen@_@ascending_descending@_@compute_norm_name@_data_@type_abbrev@_acc_@acc_abbrev@_idx_@idx_abbrev@"
385+ MATRIX_JSON_FILE
386+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_matrix.json"
387+ KERNEL_INPUT_FILE
388+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_kernel.cu.in"
389+ EMBEDDED_INPUT_FILE
390+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/interleaved_scan_embedded.cpp.in"
391+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/interleaved_scan"
392+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
393+ )
394+ generate_jit_lto_kernels (
395+ metric_files
396+ NAME_FORMAT "metric_@metric_name@_veclen_@veclen@_data_@type_abbrev@_acc_@acc_abbrev@"
397+ MATRIX_JSON_FILE
398+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/metric_matrix.json"
399+ KERNEL_INPUT_FILE
400+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/metric_kernel.cu.in"
401+ EMBEDDED_INPUT_FILE
402+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/metric_embedded.cpp.in"
403+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/metric"
404+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
405+ )
406+ generate_jit_lto_kernels (
407+ filter_files
408+ NAME_FORMAT "@filter_name@"
409+ MATRIX_JSON_FILE
410+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/filter_matrix.json"
411+ KERNEL_INPUT_FILE
412+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/filter_kernel.cu.in"
413+ EMBEDDED_INPUT_FILE
414+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/filter_embedded.cpp.in"
415+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/filter"
416+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
417+ )
418+ generate_jit_lto_kernels (
419+ post_lambda_files
420+ NAME_FORMAT "@post_lambda_name@"
421+ MATRIX_JSON_FILE
422+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_matrix.json"
423+ KERNEL_INPUT_FILE
424+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_kernel.cu.in"
425+ EMBEDDED_INPUT_FILE
426+ "${CMAKE_CURRENT_SOURCE_DIR} /src/neighbors/ivf_flat/jit_lto_kernels/post_lambda_embedded.cpp.in"
427+ OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR} /generated_kernels/post_lambda"
428+ KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
429+ )
430+ endblock ()
431+
432+ set (jit_lto_files
433+ ${interleaved_scan_files}
434+ ${metric_files}
435+ ${filter_files}
436+ ${post_lambda_files}
437+ src/detail/jit_lto/AlgorithmLauncher.cpp
438+ src/detail/jit_lto/AlgorithmPlanner.cpp
439+ src/detail/jit_lto/FragmentEntry.cpp
440+ src/detail/jit_lto/nvjitlink_checker.cpp
441+ )
449442
450443 add_library (
451444 cuvs_objs OBJECT
@@ -674,10 +667,8 @@ if(NOT BUILD_CPU_ONLY)
674667 )
675668
676669 target_compile_definitions (
677- cuvs_objs
678- PRIVATE $<$<BOOL :${BUILD_CAGRA_HNSWLIB} >:CUVS_BUILD_CAGRA_HNSWLIB >
679- $<$<BOOL :${CUVS_NVTX} >:NVTX_ENABLED >
680- $<$<BOOL :${JIT_LTO_COMPILATION} >:CUVS_ENABLE_JIT_LTO >
670+ cuvs_objs PRIVATE $<$<BOOL :${BUILD_CAGRA_HNSWLIB} >:CUVS_BUILD_CAGRA_HNSWLIB >
671+ $<$<BOOL :${CUVS_NVTX} >:NVTX_ENABLED >
681672 )
682673
683674 target_link_libraries (
@@ -752,10 +743,8 @@ if(NOT BUILD_CPU_ONLY)
752743 "$<$<AND :$<COMPILE_LANGUAGE :CUDA >,$<CONFIG :Debug >>:${CUVS_DEBUG_CUDA_FLAGS} >"
753744 )
754745 target_compile_definitions (
755- cuvs
756- PUBLIC $<$<BOOL :${BUILD_CAGRA_HNSWLIB} >:CUVS_BUILD_CAGRA_HNSWLIB >
757- $<$<BOOL :${CUVS_NVTX} >:NVTX_ENABLED >
758- $<$<BOOL :${JIT_LTO_COMPILATION} >:CUVS_ENABLE_JIT_LTO >
746+ cuvs PUBLIC $<$<BOOL :${BUILD_CAGRA_HNSWLIB} >:CUVS_BUILD_CAGRA_HNSWLIB >
747+ $<$<BOOL :${CUVS_NVTX} >:NVTX_ENABLED >
759748 )
760749
761750 target_link_libraries (
@@ -767,11 +756,8 @@ if(NOT BUILD_CPU_ONLY)
767756 $<BUILD_LOCAL_INTERFACE :$<TARGET_NAME_IF_EXISTS :NCCL ::NCCL >>
768757 $<BUILD_LOCAL_INTERFACE :$<TARGET_NAME_IF_EXISTS :hnswlib ::hnswlib >>
769758 $<$<BOOL :${CUVS_NVTX} >:CUDA ::nvtx3 >
770- PRIVATE rmm::rmm
771- $<TARGET_NAME_IF_EXISTS :OpenMP ::OpenMP_CXX >
772- $<COMPILE_ONLY :nvidia ::cutlass ::cutlass >
773- $<COMPILE_ONLY :cuco ::cuco >
774- $<$<BOOL :${JIT_LTO_COMPILATION} >:CUDA ::nvJitLink >
759+ PRIVATE rmm::rmm $<TARGET_NAME_IF_EXISTS :OpenMP ::OpenMP_CXX >
760+ $<COMPILE_ONLY :nvidia ::cutlass ::cutlass > $<COMPILE_ONLY :cuco ::cuco > CUDA::nvJitLink
775761 )
776762 set_property (TARGET cuvs PROPERTY NO_CUDART_DEP ON )
777763
@@ -810,10 +796,8 @@ SECTIONS
810796
811797 target_compile_options (cuvs_static PRIVATE "$<$<COMPILE_LANGUAGE :CXX >:${CUVS_CXX_FLAGS} >" )
812798 target_compile_definitions (
813- cuvs_static
814- PUBLIC $<$<BOOL :${BUILD_CAGRA_HNSWLIB} >:CUVS_BUILD_CAGRA_HNSWLIB >
815- $<$<BOOL :${CUVS_NVTX} >:NVTX_ENABLED >
816- $<$<BOOL :${JIT_LTO_COMPILATION} >:CUVS_ENABLE_JIT_LTO >
799+ cuvs_static PUBLIC $<$<BOOL :${BUILD_CAGRA_HNSWLIB} >:CUVS_BUILD_CAGRA_HNSWLIB >
800+ $<$<BOOL :${CUVS_NVTX} >:NVTX_ENABLED >
817801 )
818802
819803 target_include_directories (cuvs_static INTERFACE "$<INSTALL_INTERFACE :include >" )
@@ -831,7 +815,7 @@ SECTIONS
831815 $<BUILD_LOCAL_INTERFACE :$<TARGET_NAME_IF_EXISTS :hnswlib ::hnswlib >> # header only
832816 PRIVATE rmm::rmm
833817 $<TARGET_NAME_IF_EXISTS :OpenMP ::OpenMP_CXX >
834- $<$< BOOL : ${JIT_LTO_COMPILATION} >: CUDA ::nvJitLink >
818+ CUDA::nvJitLink
835819 $<$<BOOL :${CUVS_NVTX} >:CUDA ::nvtx3 >
836820 $<COMPILE_ONLY :nvidia ::cutlass ::cutlass >
837821 $<COMPILE_ONLY :cuco ::cuco >
0 commit comments