update

jiweibo · jiweibo · commit 9fc8d3a93084 · 2021-07-13T02:13:56.000Z
diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
@@ -113,7 +113,14 @@ void AnalysisConfig::EnableXpu(int l3_workspace_size, bool locked,
 }
 
 void AnalysisConfig::EnableNpu(int device_id) {
+#ifdef PADDLE_WITH_ASCEND_CL
+  use_npu_ = true;
   npu_device_id_ = device_id;
+#else
+  LOG(ERROR) << "Please compile with npu to EnableNpu()";
+  use_npu_ = false;
+#endif
+
   Update();
 }
 
@@ -391,7 +398,9 @@ void AnalysisConfig::Update() {
   if (info == serialized_info_cache_) return;
 
   // Transfer pass_builder and copy the existing compatible passes.
-  if (!pass_builder_ || ((use_gpu() ^ pass_builder_->use_gpu()))) {
+  if (!pass_builder_ || ((use_gpu() ^ pass_builder_->use_gpu())) ||
+      ((use_xpu() ^ pass_builder_->use_xpu())) ||
+      ((use_npu() ^ pass_builder_->use_npu()))) {
     if (use_gpu()) {
       pass_builder_.reset(new GpuPassStrategy);
 
diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -16,6 +16,7 @@
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/inference/api/paddle_tensor.h"
 #include "paddle/fluid/memory/memcpy.h"
 #include "paddle/fluid/platform/enforce.h"
 
@@ -150,10 +151,26 @@ void Tensor::CopyFromCpu(const T *data) {
     PADDLE_THROW(paddle::platform::errors::Unavailable(
         "Can not create tensor with XPU place because paddle is not compiled "
         "with XPU."));
+#endif
+  } else if (place_ == PlaceType::kNPU) {
+#ifdef PADDLE_WITH_ASCEND_CL
+    paddle::platform::DeviceContextPool &pool =
+        paddle::platform::DeviceContextPool::Instance();
+    paddle::platform::NPUPlace npu_place(device_);
+    auto *t_data = tensor->mutable_data<T>(npu_place);
+    auto *dev_ctx = static_cast<const paddle::platform::NPUDeviceContext *>(
+        pool.Get(npu_place));
+    paddle::memory::Copy(npu_place, static_cast<void *>(t_data),
+                         paddle::platform::CPUPlace(), data, ele_size,
+                         dev_ctx->stream());
+#else
+    PADDLE_THROW(paddle::platform::errors::Unavailable(
+        "Can not create tensor with NPU place because paddle is not compiled "
+        "with NPU."));
 #endif
   } else {
     PADDLE_THROW(paddle::platform::errors::InvalidArgument(
-        "The analysis predictor supports CPU, GPU and XPU now."));
+        "The analysis predictor supports CPU, GPU, NPU and XPU now."));
   }
 }
 
@@ -212,10 +229,26 @@ void Tensor::CopyToCpu(T *data) {
     PADDLE_THROW(paddle::platform::errors::Unavailable(
         "Can not create tensor with XPU place because paddle is not compiled "
         "with XPU."));
+#endif
+  } else if (place_ == PlaceType::kNPU) {
+#ifdef PADDLE_WITH_ASCEND_CL
+    paddle::platform::DeviceContextPool &pool =
+        paddle::platform::DeviceContextPool::Instance();
+    auto npu_place = BOOST_GET_CONST(paddle::platform::NPUPlace, t_place);
+    auto *dev_ctx = static_cast<const paddle::platform::NPUDeviceContext *>(
+        pool.Get(npu_place));
+    paddle::memory::Copy(paddle::platform::CPUPlace(),
+                         static_cast<void *>(data), npu_place, t_data,
+                         ele_num * sizeof(T), dev_ctx->stream());
+    aclrtSynchronizeStream(dev_ctx->stream());
+#else
+    PADDLE_THROW(paddle::platform::errors::Unavailable(
+        "Can not create tensor with NPU place because paddle is not compiled "
+        "with NPU."));
 #endif
   } else {
     PADDLE_THROW(paddle::platform::errors::InvalidArgument(
-        "The analysis predictor supports CPU, GPU and XPU now."));
+        "The analysis predictor supports CPU, GPU, NPU and XPU now."));
   }
 }
 template PD_INFER_DECL void Tensor::CopyFromCpu<float>(const float *data);
diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h
@@ -241,15 +241,22 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
 /// mode.
 class PD_INFER_DECL XpuPassStrategy final : public PassStrategy {
  public:
-  XpuPassStrategy() : PassStrategy({}) {}
+  XpuPassStrategy() : PassStrategy({}) { use_xpu_ = true; }
 };
 
 /// \class NpuPassStrategy
 /// \brief The NPU passes controller, it is used in AnalysisPredictor with NPU
 /// mode.
 class PD_INFER_DECL NpuPassStrategy final : public PassStrategy {
  public:
-  NpuPassStrategy() : PassStrategy({}) {}
+  NpuPassStrategy() : PassStrategy({}) { use_npu_ = true; }
+
+  /// \brief Construct by copying another NpuPassStrategy object.
+  /// \param[in] other The NpuPassStrategy object we want to copy.
+  explicit NpuPassStrategy(const NpuPassStrategy &other)
+      : PassStrategy(other.AllPasses()) {
+    use_npu_ = true;
+  }
 };
 
 /// \brief List of tensorRT subgraph passes.