PaddlePaddle · LeoZhao-Intel · Jun 13, 2019 · Jun 13, 2019 · Jun 13, 2019 · Jun 14, 2019
diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
@@ -114,6 +114,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
   // MKLDNN related.
   CP_MEMBER(use_mkldnn_);
   CP_MEMBER(mkldnn_enabled_op_types_);
+  CP_MEMBER(mkldnn_disable_cache_);
   // Quantization related.
   CP_MEMBER(use_mkldnn_quantizer_);
   CP_MEMBER(mkldnn_quantizer_config_);
@@ -150,9 +151,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
   Update();
 }
 
-void AnalysisConfig::EnableMKLDNN() {
+void AnalysisConfig::EnableMKLDNN(int mkldnn_disable_cache) {
 #ifdef PADDLE_WITH_MKLDNN
   use_mkldnn_ = true;
+  mkldnn_disable_cache_ = mkldnn_disable_cache;
 #else
   LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN";
   use_mkldnn_ = false;

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -197,6 +197,15 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
                             std::vector<PaddleTensor> *output_data,
                             int batch_size) {
   paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
+#ifdef PADDLE_WITH_MKLDNN
+  // TODO(intel): will refactor this code later
+  VLOG(3) << "AnalysisPredictor::Run get_cur_thread_id="
+          << paddle::platform::get_cur_thread_id()
+          << ", mkldnn_disable_cache_=" << config_.mkldnn_disable_cache_
+          << "\n";
+  if (paddle::platform::get_cur_thread_id() == 0)
+    paddle::platform::set_cur_thread_id(config_.mkldnn_disable_cache_);
+#endif
   VLOG(3) << "Predictor::predict";
   inference::Timer timer;
   timer.tic();
@@ -238,6 +247,11 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
   // recover the cpu_math_library_num_threads to 1, in order to avoid thread
   // conflict when integrating it into deployment service.
   paddle::platform::SetNumThreads(1);
+#ifdef PADDLE_WITH_MKLDNN
+  // TODO(intel): will refactor this code later
+  // To avoid confusion when thread is reused from pool
+  if (config_.mkldnn_disable_cache_ > 0) paddle::platform::set_cur_thread_id(0);
+#endif
 
   return true;
 }
@@ -595,6 +609,15 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
 
 bool AnalysisPredictor::ZeroCopyRun() {
   paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
+#ifdef PADDLE_WITH_MKLDNN
+  // TODO(intel): will refactor this code later
+  VLOG(3) << "AnalysisPredictor::ZeroCopyRun get_cur_thread_id="
+          << paddle::platform::get_cur_thread_id()
+          << ", mkldnn_disable_cache_=" << config_.mkldnn_disable_cache_
+          << "\n";
+  if (paddle::platform::get_cur_thread_id() == 0)
+    paddle::platform::set_cur_thread_id(config_.mkldnn_disable_cache_);
+#endif
   executor_->Run();
   // Fix TensorArray reuse not cleaned bug.
   tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_);
@@ -603,6 +626,11 @@ bool AnalysisPredictor::ZeroCopyRun() {
   // recover the cpu_math_library_num_threads to 1, in order to avoid thread
   // conflict when integrating it into deployment service.
   paddle::platform::SetNumThreads(1);
+#ifdef PADDLE_WITH_MKLDNN
+  // TODO(intel): will refactor this code later
+  // To avoid confusion when thread is reused from pool
+  if (config_.mkldnn_disable_cache_ > 0) paddle::platform::set_cur_thread_id(0);
+#endif
 
   return true;
 }

diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -177,8 +177,9 @@ struct AnalysisConfig {
   bool ngraph_enabled() const { return use_ngraph_; }
 
   /** Turn on MKLDNN.
+   *@param mkldnn_disable_cache if disable mkldnn cache
    */
-  void EnableMKLDNN();
+  void EnableMKLDNN(int mkldnn_disable_cache = 0);
   /** A boolean state telling whether to use the MKLDNN.
    */
   bool mkldnn_enabled() const { return use_mkldnn_; }
@@ -287,6 +288,7 @@ struct AnalysisConfig {
   bool use_ngraph_{false};
   bool use_mkldnn_{false};
   std::unordered_set<std::string> mkldnn_enabled_op_types_;
+  int mkldnn_disable_cache_{0};
 
   bool model_from_memory_{false};
 

diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
@@ -81,6 +81,13 @@ std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
   platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt));
   platform::MKLDNNHandler::AppendKey(&key,
                                      std::to_string(multi_input[0]->format()));
+  if (platform::get_cur_thread_id() == 0) {
+    auto tid = std::this_thread::get_id();
+    std::stringstream ss;
+    ss << tid;
+    platform::MKLDNNHandler::AppendKey(&key, "-t:");
+    platform::MKLDNNHandler::AppendKey(&key, ss.str());
+  }
   return key;
 }
 

diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
@@ -221,6 +221,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
         user_weights_memory_p, pipeline, is_test);
 
     std::shared_ptr<mkldnn::memory> dst_memory_p;
+    std::shared_ptr<mkldnn::memory> user_residual_memory_p;
 
     if (fuse_residual_conn) {
       auto residual_param = ctx.Input<Tensor>("ResidualData");
@@ -243,7 +244,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
 
         auto user_residual_md = platform::MKLDNNMemDesc(
             residual_data_tz, residual_data_type, residual_param->format());
-        auto user_residual_memory_p = handler.AcquireResidualDataMemory(
+        user_residual_memory_p = handler.AcquireResidualDataMemory(
             user_residual_md, to_void_cast<T>(residual_param_data));
 
         dst_memory_p = handler.AcquireDstMemoryFromResidualDataMemory(
@@ -263,14 +264,16 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
 
     // create convolution op primitive
     std::shared_ptr<mkldnn::convolution_forward> conv_p;
+    std::shared_ptr<mkldnn::memory> user_bias_memory_p;
+    std::shared_ptr<mkldnn::memory> bias_memory_p;
     if (bias) {
       const T* bias_data = bias->data<T>();
       auto user_bias_md = platform::MKLDNNMemDesc(
           {bias_tz}, platform::MKLDNNGetDataType<T>(), memory::format::x);
-      auto user_bias_memory_p =
+      user_bias_memory_p =
           handler.AcquireBiasMemory(user_bias_md, to_void_cast<T>(bias_data));
 
-      auto bias_memory_p =
+      bias_memory_p =
           handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline);
       conv_p = handler.AcquireConvolution(src_memory_p, weights_memory_p,
                                           bias_memory_p, dst_memory_p);

diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
@@ -48,6 +48,14 @@ std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
   platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt));
   platform::MKLDNNHandler::AppendKey(&key, std::to_string(fmt));
   platform::MKLDNNHandler::AppendKey(&key, suffix);
+
+  if (platform::get_cur_thread_id() == 0) {
+    auto tid = std::this_thread::get_id();
+    std::stringstream ss;
+    ss << tid;
+    platform::MKLDNNHandler::AppendKey(&key, "-t:");
+    platform::MKLDNNHandler::AppendKey(&key, ss.str());
+  }
   return key;
 }
 
@@ -130,6 +138,7 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
 
     auto pool_p =
         std::static_pointer_cast<pooling_forward>(dev_ctx.GetBlob(key_pool_p));
+    std::shared_ptr<mkldnn::memory> src_memory, dst_memory;
     if (pool_p == nullptr) {
       const std::vector<int>& padding_left_top(paddings);
       std::vector<int> padding_right_bottom(paddings);
@@ -158,9 +167,9 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
       // save pool_pd into global device context to be referred in backward path
       if (!is_test) dev_ctx.SetBlob(key_pool_pd, pool_pd);
 
-      auto src_memory = std::make_shared<memory>(pool_pd->src_primitive_desc(),
-                                                 to_void_cast<T>(input_data));
-      auto dst_memory =
+      src_memory = std::make_shared<memory>(pool_pd->src_primitive_desc(),
+                                            to_void_cast<T>(input_data));
+      dst_memory =
           std::make_shared<memory>(pool_pd->dst_primitive_desc(), output_data);
 
       dev_ctx.SetBlob(key_pool_src_mem_p, src_memory);

diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc
@@ -413,6 +413,9 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,
 
   int tid = platform::get_cur_thread_id();
 
+  // use tid to indicate if cache is enabled, tid > 0 means disable cache
+  if (tid > 0) return;
+
   std::lock_guard<std::mutex> lock(*p_mutex_);
 
   // Find KeyBlob for current thread
@@ -434,7 +437,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,
   } else {
     key_it->second = data;  // set data to existing blob
   }
-
+  VLOG(3) << "MKLDNNDeviceContext::SetBlob " << name << "\n";
   // lock will be automatically released when out of scope
   return;
 }

diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h
@@ -38,6 +38,9 @@ class MKLDNNHandler {
     std::stringstream ss;
     ss << tid;
     key_ = key_common_ + "-t:" + ss.str();
+    if (platform::get_cur_thread_id() > 0) {
+      key_ = key_common_;
+    }
   }
 
   std::shared_ptr<mkldnn::memory> AcquireSrcMemory(

diff --git a/python/paddle/fluid/contrib/slim/quantization/mkldnn_post_training_strategy.py b/python/paddle/fluid/contrib/slim/quantization/mkldnn_post_training_strategy.py
@@ -71,7 +71,7 @@ def on_compression_begin(self, context):
         infer_config.switch_ir_optim(True)
         infer_config.disable_gpu()
         infer_config.set_model(self.fp32_model_path)
-        infer_config.enable_mkldnn()
+        infer_config.enable_mkldnn(0)
         infer_config.set_cpu_math_library_num_threads(
             self.cpu_math_library_num_threads)