@@ -11,6 +11,7 @@ function(op_library TARGET)
1111 set (cu_cc_srcs)
1212 set (hip_cc_srcs)
1313 set (xpu_cc_srcs)
14+ set (npu_cc_srcs)
1415 set (cudnn_cu_cc_srcs)
1516 set (miopen_cu_cc_srcs)
1617 set (cudnn_cu_srcs)
@@ -20,6 +21,9 @@ function(op_library TARGET)
2021 set (mkldnn_cc_srcs)
2122 set (MKLDNN_FILE)
2223 set (op_common_deps operator op_registry math_function layer common_infer_shape_functions)
24+ if (WITH_ASCEND_CL)
25+ set (op_common_deps ${op_common_deps} npu_op_runner)
26+ endif ()
2327 # Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build.
2428 set (options UNITY)
2529 set (oneValueArgs "" )
@@ -85,6 +89,12 @@ function(op_library TARGET)
8589 list (APPEND xpu_cc_srcs ${XPU_FILE} .cc)
8690 endif ()
8791 endif ()
92+ if (WITH_ASCEND_CL)
93+ string (REPLACE "_op" "_op_npu" NPU_FILE "${TARGET} " )
94+ if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR} /${NPU_FILE} .cc)
95+ list (APPEND npu_cc_srcs ${NPU_FILE} .cc)
96+ endif ()
97+ endif ()
8898 else ()
8999 foreach (src ${op_library_SRCS} )
90100 if (WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu$" )
@@ -107,6 +117,8 @@ function(op_library TARGET)
107117 list (APPEND cu_cc_srcs ${src} )
108118 elseif (WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$" )
109119 list (APPEND xpu_cc_srcs ${src} )
120+ elseif (WITH_ASCEND_CL AND ${src} MATCHES ".*_op_npu.cc$" )
121+ list (APPEND npu_cc_srcs ${src} )
110122 elseif (${src} MATCHES ".*\\ .cc$" )
111123 list (APPEND cc_srcs ${src} )
112124 else ()
@@ -176,7 +188,7 @@ function(op_library TARGET)
176188 # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
177189 if (WITH_UNITY_BUILD AND op_library_UNITY)
178190 # Combine the cc source files.
179- compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} )
191+ compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${npu_cc_srcs} )
180192 if (TARGET ${UNITY_TARGET} )
181193 # If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`.
182194 target_sources (${UNITY_TARGET} PRIVATE ${unity_target_cc_sources} )
@@ -187,7 +199,7 @@ function(op_library TARGET)
187199 # Add alias library to handle dependencies.
188200 add_library (${TARGET} ALIAS ${UNITY_TARGET} )
189201 else ()
190- cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} DEPS ${op_library_DEPS}
202+ cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${npu_cc_srcs} DEPS ${op_library_DEPS}
191203 ${op_common_deps} )
192204 endif ()
193205 endif ()
@@ -207,6 +219,7 @@ function(op_library TARGET)
207219 # The registration of USE_OP, please refer to paddle/fluid/framework/op_registry.h.
208220 # Note that it's enough to just adding one operator to pybind in a *_op.cc file.
209221 # And for detail pybind information, please see generated paddle/pybind/pybind.h.
222+ set (ORIGINAL_TARGET ${TARGET} )
210223 file (READ ${TARGET} .cc TARGET_CONTENT)
211224 string (REGEX MATCH "REGISTER_OPERATOR\\ (.*REGISTER_OPERATOR\\ (" multi_register "${TARGET_CONTENT} " )
212225 # [ \t\r\n]* is used for blank characters
@@ -239,8 +252,9 @@ function(op_library TARGET)
239252 list (LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
240253 list (LENGTH xpu_cc_srcs xpu_cc_srcs_len)
241254 list (LENGTH miopen_cu_cc_srcs miopen_cu_cc_srcs_len)
255+ list (LENGTH npu_cc_srcs npu_cc_srcs_len)
242256 if (${pybind_flag} EQUAL 0 AND ${mkldnn_cc_srcs_len} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0 AND
243- ${hip_srcs_len} EQUAL 0 AND ${hip_cc_srcs_len} EQUAL 0 AND ${miopen_cu_cc_srcs_len} EQUAL 0 AND ${xpu_cc_srcs_len} EQUAL 0)
257+ ${hip_srcs_len} EQUAL 0 AND ${hip_cc_srcs_len} EQUAL 0 AND ${miopen_cu_cc_srcs_len} EQUAL 0 AND ${xpu_cc_srcs_len} EQUAL 0 AND ${npu_cc_srcs_len} EQUAL 0 )
244258 file (APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET} );\n " )
245259 set (pybind_flag 1)
246260 endif ()
@@ -280,6 +294,26 @@ function(op_library TARGET)
280294 if (WITH_XPU AND ${xpu_cc_srcs_len} GREATER 0)
281295 file (APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET} , XPU);\n " )
282296 endif ()
297+
298+ if (WITH_ASCEND_CL AND ${npu_cc_srcs_len} GREATER 0)
299+ file (READ ${ORIGINAL_TARGET} _npu.cc TARGET_NPU_CONTENT)
300+ # It is different from the logic above, becareful
301+ string (REGEX MATCH "REGISTER_OP_NPU_KERNEL\\ (.*" multi_npu_register "${TARGET_NPU_CONTENT} " )
302+ # [ \t\r\n]* is used for blank characters
303+ string (REGEX MATCH "REGISTER_OP_NPU_KERNEL\\ ([ \t\r\n ]*[a-z0-9_]*," one_npu_register "${multi_npu_register} " )
304+
305+ if (one_npu_register STREQUAL "" )
306+ string (REPLACE "_op" "" NPU_TARGET "${TARGET} " )
307+ else ()
308+ string (REPLACE "REGISTER_OP_NPU_KERNEL(" "" NPU_TARGET "${one_npu_register} " )
309+ string (REPLACE "," "" NPU_TARGET "${NPU_TARGET} " )
310+ # [ \t\r\n]+ is used for blank characters.
311+ # Here we use '+' instead of '*' since it is a REPLACE operation.
312+ string (REGEX REPLACE "[ \t\r\n ]+" "" NPU_TARGET "${NPU_TARGET} " )
313+ endif ()
314+ file (APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${NPU_TARGET} , NPU);\n " )
315+ endif ()
316+
283317 # pybind USE_OP_DEVICE_KERNEL for MKLDNN
284318 if (WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
285319 # Append first implemented MKLDNN activation operator
@@ -330,6 +364,7 @@ function(register_operators)
330364 file (GLOB OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR} " "*_op.cc" )
331365 string (REPLACE "_mkldnn" "" OPS "${OPS} " )
332366 string (REPLACE "_xpu" "" OPS "${OPS} " )
367+ string (REPLACE "_npu" "" OPS "${OPS} " )
333368 string (REPLACE ".cc" "" OPS "${OPS} " )
334369 list (REMOVE_DUPLICATES OPS)
335370 list (LENGTH register_operators_DEPS register_operators_DEPS_len)
0 commit comments