Skip to content

Commit 9fc8d3a

Browse files
committed
update
1 parent 2274ef6 commit 9fc8d3a

File tree

3 files changed

+54
-5
lines changed

3 files changed

+54
-5
lines changed

paddle/fluid/inference/api/analysis_config.cc

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,14 @@ void AnalysisConfig::EnableXpu(int l3_workspace_size, bool locked,
113113
}
114114

115115
void AnalysisConfig::EnableNpu(int device_id) {
116+
#ifdef PADDLE_WITH_ASCEND_CL
117+
use_npu_ = true;
116118
npu_device_id_ = device_id;
119+
#else
120+
LOG(ERROR) << "Please compile with npu to EnableNpu()";
121+
use_npu_ = false;
122+
#endif
123+
117124
Update();
118125
}
119126

@@ -391,7 +398,9 @@ void AnalysisConfig::Update() {
391398
if (info == serialized_info_cache_) return;
392399

393400
// Transfer pass_builder and copy the existing compatible passes.
394-
if (!pass_builder_ || ((use_gpu() ^ pass_builder_->use_gpu()))) {
401+
if (!pass_builder_ || ((use_gpu() ^ pass_builder_->use_gpu())) ||
402+
((use_xpu() ^ pass_builder_->use_xpu())) ||
403+
((use_npu() ^ pass_builder_->use_npu()))) {
395404
if (use_gpu()) {
396405
pass_builder_.reset(new GpuPassStrategy);
397406

paddle/fluid/inference/api/details/zero_copy_tensor.cc

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "paddle/fluid/framework/lod_tensor.h"
1717
#include "paddle/fluid/framework/scope.h"
1818
#include "paddle/fluid/inference/api/paddle_inference_api.h"
19+
#include "paddle/fluid/inference/api/paddle_tensor.h"
1920
#include "paddle/fluid/memory/memcpy.h"
2021
#include "paddle/fluid/platform/enforce.h"
2122

@@ -150,10 +151,26 @@ void Tensor::CopyFromCpu(const T *data) {
150151
PADDLE_THROW(paddle::platform::errors::Unavailable(
151152
"Can not create tensor with XPU place because paddle is not compiled "
152153
"with XPU."));
154+
#endif
155+
} else if (place_ == PlaceType::kNPU) {
156+
#ifdef PADDLE_WITH_ASCEND_CL
157+
paddle::platform::DeviceContextPool &pool =
158+
paddle::platform::DeviceContextPool::Instance();
159+
paddle::platform::NPUPlace npu_place(device_);
160+
auto *t_data = tensor->mutable_data<T>(npu_place);
161+
auto *dev_ctx = static_cast<const paddle::platform::NPUDeviceContext *>(
162+
pool.Get(npu_place));
163+
paddle::memory::Copy(npu_place, static_cast<void *>(t_data),
164+
paddle::platform::CPUPlace(), data, ele_size,
165+
dev_ctx->stream());
166+
#else
167+
PADDLE_THROW(paddle::platform::errors::Unavailable(
168+
"Can not create tensor with NPU place because paddle is not compiled "
169+
"with NPU."));
153170
#endif
154171
} else {
155172
PADDLE_THROW(paddle::platform::errors::InvalidArgument(
156-
"The analysis predictor supports CPU, GPU and XPU now."));
173+
"The analysis predictor supports CPU, GPU, NPU and XPU now."));
157174
}
158175
}
159176

@@ -212,10 +229,26 @@ void Tensor::CopyToCpu(T *data) {
212229
PADDLE_THROW(paddle::platform::errors::Unavailable(
213230
"Can not create tensor with XPU place because paddle is not compiled "
214231
"with XPU."));
232+
#endif
233+
} else if (place_ == PlaceType::kNPU) {
234+
#ifdef PADDLE_WITH_ASCEND_CL
235+
paddle::platform::DeviceContextPool &pool =
236+
paddle::platform::DeviceContextPool::Instance();
237+
auto npu_place = BOOST_GET_CONST(paddle::platform::NPUPlace, t_place);
238+
auto *dev_ctx = static_cast<const paddle::platform::NPUDeviceContext *>(
239+
pool.Get(npu_place));
240+
paddle::memory::Copy(paddle::platform::CPUPlace(),
241+
static_cast<void *>(data), npu_place, t_data,
242+
ele_num * sizeof(T), dev_ctx->stream());
243+
aclrtSynchronizeStream(dev_ctx->stream());
244+
#else
245+
PADDLE_THROW(paddle::platform::errors::Unavailable(
246+
"Can not create tensor with NPU place because paddle is not compiled "
247+
"with NPU."));
215248
#endif
216249
} else {
217250
PADDLE_THROW(paddle::platform::errors::InvalidArgument(
218-
"The analysis predictor supports CPU, GPU and XPU now."));
251+
"The analysis predictor supports CPU, GPU, NPU and XPU now."));
219252
}
220253
}
221254
template PD_INFER_DECL void Tensor::CopyFromCpu<float>(const float *data);

paddle/fluid/inference/api/paddle_pass_builder.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,15 +241,22 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
241241
/// mode.
242242
class PD_INFER_DECL XpuPassStrategy final : public PassStrategy {
243243
public:
244-
XpuPassStrategy() : PassStrategy({}) {}
244+
XpuPassStrategy() : PassStrategy({}) { use_xpu_ = true; }
245245
};
246246

247247
/// \class NpuPassStrategy
248248
/// \brief The NPU passes controller, it is used in AnalysisPredictor with NPU
249249
/// mode.
250250
class PD_INFER_DECL NpuPassStrategy final : public PassStrategy {
251251
public:
252-
NpuPassStrategy() : PassStrategy({}) {}
252+
NpuPassStrategy() : PassStrategy({}) { use_npu_ = true; }
253+
254+
/// \brief Construct by copying another NpuPassStrategy object.
255+
/// \param[in] other The NpuPassStrategy object we want to copy.
256+
explicit NpuPassStrategy(const NpuPassStrategy &other)
257+
: PassStrategy(other.AllPasses()) {
258+
use_npu_ = true;
259+
}
253260
};
254261

255262
/// \brief List of tensorRT subgraph passes.

0 commit comments

Comments
 (0)