PaddlePaddle · luotao1 · Jul 8, 2019 · Jul 8, 2019 · Jul 10, 2019 · Jul 10, 2019
diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h
@@ -148,6 +148,7 @@ struct Argument {
   // Pass a set of op types to enable its mkldnn kernel
   DECL_ARGUMENT_FIELD(mkldnn_enabled_op_types, MKLDNNEnabledOpTypes,
                       std::unordered_set<std::string>);
+  DECL_ARGUMENT_FIELD(mkldnn_cache_capacity, MkldnnCacheCapacity, int);
 
 #ifdef PADDLE_WITH_MKLDNN
   // A set of op types to enable their quantized kernels

diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
@@ -115,6 +115,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
   // MKLDNN related.
   CP_MEMBER(use_mkldnn_);
   CP_MEMBER(mkldnn_enabled_op_types_);
+  CP_MEMBER(mkldnn_cache_capacity_);
   // Quantization related.
   CP_MEMBER(use_mkldnn_quantizer_);
   CP_MEMBER(mkldnn_quantizer_config_);
@@ -162,6 +163,15 @@ void AnalysisConfig::EnableMKLDNN() {
   Update();
 }
 
+void AnalysisConfig::SetMkldnnCacheCapacity(int capacity) {
+#ifdef PADDLE_WITH_MKLDNN
+  mkldnn_cache_capacity_ = capacity;
+#else
+  LOG(ERROR) << "Please compile with MKLDNN first to set MKLDNN Thread Id";
+  mkldnn_cache_capacity_ = 0;
+#endif
+}
+
 void AnalysisConfig::EnableMkldnnQuantizer() {
 #ifdef PADDLE_WITH_MKLDNN
   if (!mkldnn_quantizer_config_)
@@ -343,6 +353,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
   ss << use_ngraph_;
 
   ss << use_mkldnn_;
+  ss << mkldnn_cache_capacity_;
   for (auto &item : mkldnn_enabled_op_types_) ss << item;
   ss << ";";
 

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -185,10 +185,47 @@ bool AnalysisPredictor::PrepareExecutor() {
   return true;
 }
 
+void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
+#ifdef PADDLE_WITH_MKLDNN
+  VLOG(2) << "AnalysisPredictor::Run get_cur_mkldnn_session_id="
+          << platform::get_cur_mkldnn_session_id();
+  // In cache clearing mode.
+  if (config_.mkldnn_cache_capacity_ > 0) {
+    VLOG(2) << "In mkldnn cache clear mode.";
+    platform::set_cur_mkldnn_session_id(
+        platform::kMKLDNNSessionID_CacheClearing);
+    platform::set_cur_input_shape_cache_capacity(
+        config_.mkldnn_cache_capacity_);
+    // Set current_input_shape for caching dynamic shape.
+    std::stringstream ss;
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      for (size_t j = 0; j < inputs[i].shape.size(); ++j) {
+        ss << inputs[i].shape[j] << "-";
+      }
+    }
+    VLOG(2) << "Set input shape=" << ss.str();
+    platform::set_cur_input_shape_str(ss.str());
+  }
+#endif
+}
+
+void AnalysisPredictor::MkldnnPostReset() {
+#ifdef PADDLE_WITH_MKLDNN
+  // In cache clearing mode.
+  if (config_.mkldnn_cache_capacity_ > 0) {
+    paddle::platform::set_cur_mkldnn_session_id(
+        platform::kMKLDNNSessionID_Default);
+  }
+#endif
+}
+
 bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
                             std::vector<PaddleTensor> *output_data,
                             int batch_size) {
   paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
+#ifdef PADDLE_WITH_MKLDNN
+  if (config_.use_mkldnn_) MkldnnPreSet(inputs);
+#endif
   VLOG(3) << "Predictor::predict";
   inference::Timer timer;
   timer.tic();
@@ -230,7 +267,9 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
   // recover the cpu_math_library_num_threads to 1, in order to avoid thread
   // conflict when integrating it into deployment service.
   paddle::platform::SetNumThreads(1);
-
+#ifdef PADDLE_WITH_MKLDNN
+  if (config_.use_mkldnn_) MkldnnPostReset();
+#endif
   return true;
 }
 
@@ -595,7 +634,6 @@ bool AnalysisPredictor::ZeroCopyRun() {
   // recover the cpu_math_library_num_threads to 1, in order to avoid thread
   // conflict when integrating it into deployment service.
   paddle::platform::SetNumThreads(1);
-
   return true;
 }
 

diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h
@@ -109,6 +109,11 @@ class AnalysisPredictor : public PaddlePredictor {
   template <typename T>
   void GetFetchOne(const framework::LoDTensor &fetchs,
                    PaddleTensor *output_data);
+  // PreSet and PostReset for Mkldnn multi-thread and dynamic shape input.
+  // Used in AnalysisPredictor::Run(), do not support
+  // AnalysisPredictor::ZeroRun() now.
+  void MkldnnPreSet(const std::vector<PaddleTensor> &inputs);
+  void MkldnnPostReset();
 
 #if PADDLE_WITH_TENSORRT
   // When we use Paddle-TRT INT8 engine, we need to generate calibration table

diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -184,6 +184,10 @@ struct AnalysisConfig {
   /** Turn on MKLDNN.
    */
   void EnableMKLDNN();
+  /** set the cache capacity of different input shapes for MKLDNN.
+   *  Default 0 means don't cache any shape.
+   */
+  void SetMkldnnCacheCapacity(int capacity);
   /** A boolean state telling whether to use the MKLDNN.
    */
   bool mkldnn_enabled() const { return use_mkldnn_; }
@@ -316,8 +320,11 @@ struct AnalysisConfig {
   std::vector<std::string> anakin_passes_filter_;
   std::vector<std::string> anakin_ops_filter_;
 
+  // mkldnn related.
+  int mkldnn_cache_capacity_{0};
   bool use_mkldnn_quantizer_{false};
   std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_;
+
   // If the config is already used on a predictor, it becomes invalid.
   // Any config can only be used with one predictor.
   // Variables held by config can take up a lot of memory in some cases.

diff --git a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
@@ -173,20 +173,55 @@ TEST(Analyzer_MM_DNN, compare_determine) {
 }
 
 #ifdef PADDLE_WITH_MKLDNN
-void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) {
+void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity,
+                          std::vector<std::vector<PaddleTensor>> *outputs) {
   AnalysisConfig config;
   SetConfig(&config);
   config.EnableMKLDNN();
-  // TODO(luotao): explicit following settings will be deprecated after enhance
-  // config.EnableMKLDNN() interface.
+  config.SetMkldnnCacheCapacity(mkldnn_input_shape_cache_capacity);
+
+  std::vector<PaddleTensor> input;
+  auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
+
+  int sample_num = 10;
+  DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
+  outputs->resize(sample_num);
+
+  for (int i = 0; i < sample_num; i++) {
+    PrepareInputs(&input, &data, FLAGS_batch_size);
+    predictor->Run(input, &(*outputs)[i], 1);
+  }
+}
+
+TEST(Analyzer_MM_DNN, mkldnn_cache_clear) {
+  std::vector<std::vector<PaddleTensor>> outputs, cache_outputs;
+  // 0 means do not use cache clear strategy.
+  TestMkldnnCacheClear(0, &outputs);
+  // 4 means use cache clear strategy, and the
+  // mkldnn_input_shape_cache_capacity is 4.
+  TestMkldnnCacheClear(4, &cache_outputs);
+  // compare the result.
+  for (size_t i = 0; i < outputs.size(); i++) {
+    CompareResult(outputs[i], cache_outputs[i]);
+  }
+}
+
+void TestMkldnnShapeBlobSize(int mkldnn_input_shape_cache_capacity) {
+  AnalysisConfig config;
+  SetConfig(&config);
+  config.EnableMKLDNN();
+  config.SwitchUseFeedFetchOps(false);
+  // Since AnalysisPredictor::Run() will reset cur_mkldnn_session_id to default
+  // before its finished, we use AnalysisPredictor::ZeroCopyRun() here to check
+  // the mkldnn_shape_blob_size.
   if (mkldnn_input_shape_cache_capacity > 0) {
     platform::set_cur_mkldnn_session_id(
         platform::kMKLDNNSessionID_CacheClearing);
     platform::set_cur_input_shape_cache_capacity(
         mkldnn_input_shape_cache_capacity);
   }
 
-  std::vector<PaddleTensor> input, output;
+  std::vector<PaddleTensor> input;
   auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
 
   int sample_num = 10;
@@ -195,36 +230,37 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) {
   auto &pool = platform::DeviceContextPool::Instance();
   auto *dev_ctx = dynamic_cast<platform::MKLDNNDeviceContext *>(
       pool.Get(platform::CPUPlace()));
+  // clear before test
+  dev_ctx->ResetBlobMap();
+
   for (int i = 0; i < sample_num; i++) {
     PrepareInputs(&input, &data, FLAGS_batch_size);
+    ConvertPaddleTensorToZeroCopyTensor(predictor.get(), input);
     if (mkldnn_input_shape_cache_capacity > 0) {
       std::stringstream ss;
       for (size_t i = 0; i < input.size(); i++) {
         for (size_t j = 0; j < input[i].shape.size(); ++j) {
           ss << input[i].shape[j] << "-";
         }
       }
-      // TODO(luotao): explicit following settings will be deprecated after
-      // enhance config.EnableMKLDNN() interface.
       platform::set_cur_input_shape_str(ss.str());
     }
-    predictor->Run(input, &output, 1);
+    predictor->ZeroCopyRun();
   }
   if (mkldnn_input_shape_cache_capacity > 0) {
     PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(),
                       mkldnn_input_shape_cache_capacity);
   } else {
     PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(), 1UL);
   }
-  dev_ctx->ResetBlobMap();
 }
 
-TEST(Analyzer_MM_DNN, mkldnn_cache_clear) {
+TEST(Analyzer_MM_DNN, mkldnn_shape_blob_size) {
   // 0 means do not use cache clear strategy.
-  TestMkldnnCacheClear(0);
+  TestMkldnnShapeBlobSize(0);
   // 4 means use cache clear strategy, and the
   // mkldnn_input_shape_cache_capacity is 4.
-  TestMkldnnCacheClear(4);
+  TestMkldnnShapeBlobSize(4);
 }
 #endif
 

diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc
@@ -462,7 +462,8 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,
   if (key_it == sBlob->end()) {
     // In cache clearing mode, cur_input_shape_cache_capacity defines
     // max pblob capacity
-    if ((sid == kMKLDNNSessionID_CacheClearing) &&
+    if ((static_cast<size_t>(sid) == kMKLDNNSessionID_CacheClearing) &&
+        sBlob->size() &&
         (sBlob->size() >=
          static_cast<size_t>(cur_input_shape_cache_capacity))) {
       VLOG(2) << "sid=" << sid