|
16 | 16 | #include "paddle/fluid/framework/lod_tensor.h" |
17 | 17 | #include "paddle/fluid/framework/scope.h" |
18 | 18 | #include "paddle/fluid/inference/api/paddle_inference_api.h" |
| 19 | +#include "paddle/fluid/inference/api/paddle_tensor.h" |
19 | 20 | #include "paddle/fluid/memory/memcpy.h" |
20 | 21 | #include "paddle/fluid/platform/enforce.h" |
21 | 22 |
|
@@ -150,10 +151,26 @@ void Tensor::CopyFromCpu(const T *data) { |
150 | 151 | PADDLE_THROW(paddle::platform::errors::Unavailable( |
151 | 152 | "Can not create tensor with XPU place because paddle is not compiled " |
152 | 153 | "with XPU.")); |
| 154 | +#endif |
| 155 | + } else if (place_ == PlaceType::kNPU) { |
| 156 | +#ifdef PADDLE_WITH_ASCEND_CL |
| 157 | + paddle::platform::DeviceContextPool &pool = |
| 158 | + paddle::platform::DeviceContextPool::Instance(); |
| 159 | + paddle::platform::NPUPlace npu_place(device_); |
| 160 | + auto *t_data = tensor->mutable_data<T>(npu_place); |
| 161 | + auto *dev_ctx = static_cast<const paddle::platform::NPUDeviceContext *>( |
| 162 | + pool.Get(npu_place)); |
| 163 | + paddle::memory::Copy(npu_place, static_cast<void *>(t_data), |
| 164 | + paddle::platform::CPUPlace(), data, ele_size, |
| 165 | + dev_ctx->stream()); |
| 166 | +#else |
| 167 | + PADDLE_THROW(paddle::platform::errors::Unavailable( |
| 168 | + "Can not create tensor with NPU place because paddle is not compiled " |
| 169 | + "with NPU.")); |
153 | 170 | #endif |
154 | 171 | } else { |
155 | 172 | PADDLE_THROW(paddle::platform::errors::InvalidArgument( |
156 | | - "The analysis predictor supports CPU, GPU and XPU now.")); |
| 173 | + "The analysis predictor supports CPU, GPU, NPU and XPU now.")); |
157 | 174 | } |
158 | 175 | } |
159 | 176 |
|
@@ -212,10 +229,26 @@ void Tensor::CopyToCpu(T *data) { |
212 | 229 | PADDLE_THROW(paddle::platform::errors::Unavailable( |
213 | 230 | "Can not create tensor with XPU place because paddle is not compiled " |
214 | 231 | "with XPU.")); |
| 232 | +#endif |
| 233 | + } else if (place_ == PlaceType::kNPU) { |
| 234 | +#ifdef PADDLE_WITH_ASCEND_CL |
| 235 | + paddle::platform::DeviceContextPool &pool = |
| 236 | + paddle::platform::DeviceContextPool::Instance(); |
| 237 | + auto npu_place = BOOST_GET_CONST(paddle::platform::NPUPlace, t_place); |
| 238 | + auto *dev_ctx = static_cast<const paddle::platform::NPUDeviceContext *>( |
| 239 | + pool.Get(npu_place)); |
| 240 | + paddle::memory::Copy(paddle::platform::CPUPlace(), |
| 241 | + static_cast<void *>(data), npu_place, t_data, |
| 242 | + ele_num * sizeof(T), dev_ctx->stream()); |
| 243 | + aclrtSynchronizeStream(dev_ctx->stream()); |
| 244 | +#else |
| 245 | + PADDLE_THROW(paddle::platform::errors::Unavailable( |
| 246 | + "Can not create tensor with NPU place because paddle is not compiled " |
| 247 | + "with NPU.")); |
215 | 248 | #endif |
216 | 249 | } else { |
217 | 250 | PADDLE_THROW(paddle::platform::errors::InvalidArgument( |
218 | | - "The analysis predictor supports CPU, GPU and XPU now.")); |
| 251 | + "The analysis predictor supports CPU, GPU, NPU and XPU now.")); |
219 | 252 | } |
220 | 253 | } |
221 | 254 | template PD_INFER_DECL void Tensor::CopyFromCpu<float>(const float *data); |
|
0 commit comments