PaddlePaddle · NHZlX · Mar 20, 2019 · Mar 20, 2019 · luotao1 · Mar 20, 2019
diff --git a/paddle/fluid/inference/anakin/engine.cc b/paddle/fluid/inference/anakin/engine.cc
@@ -71,6 +71,7 @@ void AnakinEngine<TargetT, PrecisionType, RunType>::Execute(
     const std::map<std::string, framework::LoDTensor *> &inputs,
     const std::map<std::string, framework::LoDTensor *> &outputs,
     cudaStream_t stream) {
+  cudaDeviceSynchronize();
   for (const auto &input : inputs) {
     auto *tensor = input.second;
     auto *data = tensor->data<float>();

diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc
@@ -74,6 +74,19 @@ T *ZeroCopyTensor::data(PaddlePlace *place, int *size) const {
   return res;
 }
 
+PaddleDType ZeroCopyTensor::type() {
+  EAGER_GET_TENSOR;
+  auto type = tensor->type();
+  if (type == framework::proto::VarType::FP32) {
+    return PaddleDType::FLOAT32;
+  } else if (type == framework::proto::VarType::INT64) {
+    return PaddleDType::INT64;
+  } else {
+    LOG(ERROR) << "unknown type, only support float32 and int64 now.";
+  }
+  return PaddleDType::FLOAT32;
+}
+
 template <typename T>
 void ZeroCopyTensor::copy_from_cpu(const T *data) {
   EAGER_GET_TENSOR;
@@ -119,6 +132,7 @@ void ZeroCopyTensor::copy_to_cpu(T *data) {
         static_cast<const platform::CUDADeviceContext *>(pool.Get(gpu_place));
     memory::Copy(platform::CPUPlace(), static_cast<void *>(data), gpu_place,
                  t_data, ele_num * sizeof(T), dev_ctx->stream());
+    cudaDeviceSynchronize();
 #else
     PADDLE_THROW("Not compile with CUDA, should not reach here.");
 #endif

diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h
@@ -176,6 +176,8 @@ class ZeroCopyTensor {
     device_ = device;
   }
 
+  PaddleDType type();
+
  protected:
   explicit ZeroCopyTensor(void* scope) : scope_{scope} {}
   void SetName(const std::string& name) { name_ = name; }
@@ -190,6 +192,7 @@ class ZeroCopyTensor {
   // performance.
   mutable void* tensor_{nullptr};
   PaddlePlace place_;
+  PaddleDType dtype_;
   int device_;
 };