Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cmake/phi.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ endfunction()

# call kernel_declare need to make sure whether the target of input exists
function(kernel_declare TARGET_LIST)
# message("TARGET LIST ${TARGET_LIST}")
foreach(kernel_path ${TARGET_LIST})
# message("kernel path ${kernel_path}" )
file(READ ${kernel_path} kernel_impl)
string(
REGEX
Expand Down Expand Up @@ -111,6 +113,7 @@ function(kernel_declare TARGET_LIST)
endfunction()

function(kernel_library TARGET)
return()
set(common_srcs)
set(cpu_srcs)
set(gpu_srcs)
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/inference/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ endif()
# fluid_modules exclude API-interface of inference/api and inference/capi_exp
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(phi_modules GLOBAL PROPERTY PHI_MODULES)
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(utils_modules stringpiece pretty_log string_helper benchmark)

add_subdirectory(api)
Expand All @@ -50,7 +51,7 @@ set(STATIC_INFERENCE_API
reset_tensor_array
analysis_config
paddle_pass_builder
activation_functions
phi
${mkldnn_quantizer_cfg})

#windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy
Expand All @@ -59,7 +60,7 @@ if(WIN32 AND WITH_GPU)
${utils_modules})
else()
create_static_lib(paddle_inference ${fluid_modules} ${phi_modules}
${STATIC_INFERENCE_API} ${utils_modules})
${phi_kernels} ${STATIC_INFERENCE_API} ${utils_modules})
endif()

if(NOT APPLE)
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combin

op_library(run_program_op SRCS run_program_op.cc run_program_op.cu.cc DEPS executor_cache ${OP_HEADER_DEPS})
target_link_libraries(run_program_op cuda_graph_with_memory_pool)
op_library(quantize_linear_op DEPS cast_kernel)
op_library(quantize_linear_op DEPS phi)
op_library(save_combine_op DEPS string_array)
op_library(load_combine_op DEPS string_array)

Expand Down
1 change: 1 addition & 0 deletions paddle/phi/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ set(PHI_DEPS
string_tensor
api_scalar
api_int_array)

get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(PHI_DEPS ${PHI_DEPS} ${phi_kernels})

Expand Down
7 changes: 4 additions & 3 deletions paddle/phi/api/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -335,11 +335,12 @@ cc_library(
cc_library(
api_gen_utils
SRCS api_gen_utils.cc
DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor)
DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor
infermeta_utils)
cc_library(
phi_data_transform
SRCS data_transform.cc
DEPS phi_tensor_raw transfer_layout_kernel cast_kernel copy_kernel tensor)
DEPS phi_tensor_raw phi tensor)
cc_library(
api_custom_impl
SRCS api_custom_impl.cc
Expand Down Expand Up @@ -404,7 +405,7 @@ cc_library(
cc_library(
tensor_copy
SRCS tensor_copy.cc
DEPS phi_tensor_raw copy_kernel kernel_dispatch api_gen_utils)
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils)
cc_library(
api_scalar
SRCS scalar.cc
Expand Down
210 changes: 95 additions & 115 deletions paddle/phi/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,124 +41,104 @@ set(COMMON_KERNEL_DEPS
selected_rows_functor)
# remove this dep after removing fluid deps on tensor creation
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_api_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta infermeta_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} switch_autotune)

# [ 2. Kernels that most kernels depend on ]
# There are a few kernels that are very basic operations, and most of the
# kernels depend on these kernels.
set(COMMON_BAISC_KERNELS empty_kernel full_kernel)
kernel_library(empty_kernel DEPS ${COMMON_KERNEL_DEPS})
kernel_library(full_kernel DEPS ${COMMON_KERNEL_DEPS} empty_kernel)

# [ 3. Kernels with special dependencies ]
# Some kernels depend on some targets that are not commonly used.
# These targets are not suitable for common dependencies.
# In this case, you need to manually generate them here.
set(AUTOTUNE_KERNELS conv_kernel conv_grad_kernel conv_grad_grad_kernel
conv_transpose_kernel conv_transpose_grad_kernel)
set(MANUAL_BUILD_KERNELS
${AUTOTUNE_KERNELS}
cross_entropy_kernel
adam_kernel
adamw_kernel
deformable_conv_kernel
deformable_conv_grad_kernel
eigh_kernel
gumbel_softmax_kernel
gumbel_softmax_grad_kernel
hierarchical_sigmoid_kernel
hierarchical_sigmoid_grad_kernel
matrix_power_kernel
matrix_power_grad_kernel
maxout_kernel
maxout_grad_kernel
pool_kernel
put_along_axis_kernel
put_along_axis_grad_kernel
segment_pool_kernel
segment_pool_grad_kernel
softmax_kernel
softmax_grad_kernel
take_along_axis_kernel
take_along_axis_grad_kernel
triangular_solve_grad_kernel
determinant_grad_kernel
reduce_sum_kernel
reduce_mean_kernel
rnn_kernel
rnn_grad_kernel
warpctc_kernel
warpctc_grad_kernel)
foreach(src ${AUTOTUNE_KERNELS})
kernel_library(${src} DEPS ${COMMON_KERNEL_DEPS} switch_autotune)
endforeach()
kernel_library(
adam_kernel
DEPS
gflags
glog
flags
${COMMON_KERNEL_DEPS}
selected_rows_functor
threadpool
jit_kernel_helper)
kernel_library(adamw_kernel DEPS ${COMMON_KERNEL_DEPS} adam_kernel)
kernel_library(cross_entropy_kernel DEPS ${COMMON_KERNEL_DEPS} softmax
cross_entropy)
kernel_library(deformable_conv_kernel DEPS ${COMMON_KERNEL_DEPS}
deformable_conv_functor)
kernel_library(deformable_conv_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
deformable_conv_functor)
kernel_library(determinant_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
matrix_inverse)
kernel_library(eigh_kernel DEPS ${COMMON_KERNEL_DEPS} lapack_function)
kernel_library(hierarchical_sigmoid_kernel DEPS ${COMMON_KERNEL_DEPS}
matrix_bit_code)
kernel_library(hierarchical_sigmoid_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
matrix_bit_code)
kernel_library(gumbel_softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(gumbel_softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(reduce_sum_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel)
kernel_library(reduce_mean_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel)
kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse)
kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
matrix_inverse)
kernel_library(maxout_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting)
kernel_library(maxout_grad_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting)
kernel_library(pool_kernel DEPS ${COMMON_KERNEL_DEPS} pooling)
kernel_library(put_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS}
gather_scatter_kernel)
kernel_library(put_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
gather_scatter_kernel)
kernel_library(segment_pool_kernel DEPS ${COMMON_KERNEL_DEPS} segment_pooling)
kernel_library(segment_pool_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
segment_pooling)
kernel_library(softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(take_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS}
gather_scatter_kernel)
kernel_library(take_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
gather_scatter_kernel)
kernel_library(triangular_solve_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
matrix_reduce)
kernel_library(rnn_kernel DEPS ${COMMON_KERNEL_DEPS} concat_and_split_functor
lstm_compute gru_compute)
kernel_library(rnn_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
concat_and_split_functor lstm_compute gru_compute)
kernel_library(warpctc_kernel DEPS ${COMMON_KERNEL_DEPS} phi_dynload_warpctc
sequence_padding sequence_scale)
kernel_library(warpctc_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
phi_dynload_warpctc sequence_padding sequence_scale)

# 4. auto parse and build kernel targets by cmake
register_kernels(EXCLUDES ${COMMON_BAISC_KERNELS} ${MANUAL_BUILD_KERNELS} DEPS
${COMMON_KERNEL_DEPS} ${COMMON_BAISC_KERNELS})
set(COMMON_KERNEL_DEPS
${COMMON_KERNEL_DEPS}
threadpool
jit_kernel_helper
softmax
cross_entropy
matrix_bit_code
lapack_function
lstm_compute
gru_compute
deformable_conv_functor
matrix_reduce
segment_pooling
gather_scatter_kernel
pooling
maxouting
matrix_inverse
phi_dynload_warpctc
sequence_padding
sequence_scale)

# phi sparse kernels
add_subdirectory(sparse)
# phi selected_rows kernels
add_subdirectory(selected_rows)
set(COMMON_KERNEL_DEPS
${COMMON_KERNEL_DEPS}
dense_tensor
string_tensor
sparse_coo_tensor
sparse_csr_tensor
kernel_context
kernel_factory
arg_map_context
convert_utils
lod_utils
custom_kernel
string_infermeta
utf8proc)

copy_if_different(${kernel_declare_file} ${kernel_declare_file_final})
# For strings kernels
add_subdirectory(strings)

file(GLOB kernel_h "*.h" "selected_rows/*.h" "sparse/*.h" "strings/*.h")
file(GLOB kernel_impl_h "impl/*.h" "selected_rows/impl/*.h")
file(GLOB kernel_primitive_h "primitive/*.h")
file(
GLOB
kernel_cc
"*.cc"
"cpu/*.cc"
"selected_rows/*.cc"
"selected_rows/cpu/*.cc"
"sparse/*.cc"
"sparse/cpu/*.cc"
"strings/*.cc"
"strings/cpu/*.cc")

file(
GLOB
kernel_cu
"gpu/*.cu"
"gpu/*.cu.cc"
"gpudnn/*.cu"
"kps/*.cu"
"selected_rows/gpu/*.cu"
"sparse/gpu/*.cu"
"strings/*.cu"
"strings/gpu/*.cu")

# file(GLOB kernel_cudnn "gpudnn/*.cu")
# file(GLOB kernel_kps "kps/*.cu")
file(GLOB kernel_xpu "xpu/*.cc")

add_library(phi_cpu ${kernel_cc})
kernel_declare("${kernel_cc}")
target_link_libraries(phi_cpu ${COMMON_KERNEL_DEPS})
set_property(GLOBAL PROPERTY PHI_KERNELS phi_cpu)

if(WITH_GPU OR WITH_ROCM)
if(WITH_GPU)
add_library(phi_gpu ${kernel_cu})
elseif(WITH_ROCM)
hip_add_library(phi_gpu STATIC ${kernel_cu})
endif()
kernel_declare("${kernel_cu}")
target_link_libraries(phi_gpu ${COMMON_KERNEL_DEPS})
set_property(GLOBAL PROPERTY PHI_KERNELS phi_cpu phi_gpu)
endif()

if(WITH_XPU)
if(WITH_XPU_KP)
file(GLOB kernel_xpu_kps "kps/*.cu")
xpu_add_library(phi_xpu STATIC ${kernel_xpu} ${kernel_xpu_kps})
else()
add_library(phi_xpu ${kernel_xpu})
endif()
kernel_declare(${kernel_xpu})
target_link_libraries(phi_xpu ${COMMON_KERNEL_DEPS})
set_property(GLOBAL PROPERTY PHI_KERNELS phi_cpu phi_xpu)
endif()
2 changes: 1 addition & 1 deletion paddle/phi/tests/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ cc_test(test_type_info SRCS test_type_info.cc)
cc_test(
test_kernel_factory
SRCS test_kernel_factory.cc
DEPS kernel_factory scale_kernel)
DEPS kernel_factory phi)
cc_test(
test_sparse_coo_tensor
SRCS test_sparse_coo_tensor.cc
Expand Down