From 64acbebfc923ecbefa9b57489ccdbf8490c504b0 Mon Sep 17 00:00:00 2001
From: Tao Luo <luotao02@baidu.com>
Date: Fri, 5 Jul 2019 12:06:11 +0800
Subject: [PATCH 1/4] add mkldnn shapeblob cache clear strategy

test=develop
---
 .../tests/api/analyzer_mm_dnn_tester.cc       | 57 +++++++++++++++++++
 paddle/fluid/platform/device_context.cc       | 27 ++++++++-
 paddle/fluid/platform/device_context.h        |  9 ++-
 3 files changed, 87 insertions(+), 6 deletions(-)
diff --git a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
index 17c670a68cc9cb..92592b7028d059 100644
--- a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
@@ -172,5 +172,62 @@ TEST(Analyzer_MM_DNN, compare_determine) {
                        input_slots_all);
 }
 
+#ifdef PADDLE_WITH_MKLDNN
+void TestMkldnnCacheClear(int mkldnn_input_shape_cache_size) {
+  AnalysisConfig config;
+  SetConfig(&config);
+  config.EnableMKLDNN();
+  // TODO(luotao): explicit following settings will be deprecated after enhance
+  // config.EnableMKLDNN() interface.
+  if (mkldnn_input_shape_cache_size > 0) {
+    platform::set_cur_mkldnn_session_id(
+        platform::kMKLDNNSessionID_CacheClearing);
+    platform::set_cur_input_shape_cache_size(mkldnn_input_shape_cache_size);
+  }
+
+  std::vector<PaddleTensor> input, output;
+  auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
+
+  int sample_num = 10;
+  DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
+
+  auto &pool = platform::DeviceContextPool::Instance();
+  auto *dev_ctx = dynamic_cast<platform::MKLDNNDeviceContext *>(
+      pool.Get(platform::CPUPlace()));
+  for (int i = 0; i < sample_num; i++) {
+    PrepareInputs(&input, &data, FLAGS_batch_size);
+    if (mkldnn_input_shape_cache_size > 0) {
+      std::stringstream ss;
+      for (size_t i = 0; i < input.size(); i++) {
+        for (size_t j = 0; j < input[i].shape.size(); ++j) {
+          ss << input[i].shape[j] << "-";
+        }
+      }
+      // TODO(luotao): explicit following settings will be deprecated after
+      // enhance config.EnableMKLDNN() interface.
+      platform::set_cur_input_shape_str(ss.str());
+    }
+    predictor->Run(input, &output, 1);
+  }
+  if (mkldnn_input_shape_cache_size > 0) {
+    PADDLE_ENFORCE_EQ(
+        dev_ctx->GetShapeBlobSize(platform::kMKLDNNSessionID_CacheClearing),
+        mkldnn_input_shape_cache_size);
+  } else {
+    PADDLE_ENFORCE_EQ(
+        dev_ctx->GetShapeBlobSize(platform::kMKLDNNSessionID_Default), 1UL);
+  }
+  dev_ctx->ResetBlobMap();
+}
+
+TEST(Analyzer_MM_DNN, mkldnn_cache_clear) {
+  // 0 means do not use cache clear strategy.
+  TestMkldnnCacheClear(0);
+  // 4 means use cache clear strategy, and the
+  // mkldnn_input_shape_cache_size is 4.
+  TestMkldnnCacheClear(4);
+}
+#endif
+
 }  // namespace inference
 }  // namespace paddle
diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc
index 0dabe2ed3d92c1..fe18c6a65c2f96 100644
--- a/paddle/fluid/platform/device_context.cc
+++ b/paddle/fluid/platform/device_context.cc
@@ -407,6 +407,9 @@ thread_local size_t cur_mkldnn_session_id = kMKLDNNSessionID_Default;
 // - For fixed-shape, it's a null string in default.
 // - For dynamic-shape, it's user specific.
 thread_local std::string cur_input_shape_str = "";
+// the cache size of different input shapes for MKLDNN.
+// Default 1 means fixed input shape, not dynamic shape.
+thread_local int cur_input_shape_cache_size = 1;
 }  // namespace
 
 void set_cur_mkldnn_session_id(size_t sid) { cur_mkldnn_session_id = sid; }
@@ -414,10 +417,22 @@ size_t get_cur_mkldnn_session_id(void) { return cur_mkldnn_session_id; }
 void set_cur_input_shape_str(std::string input_shape_str) {
   cur_input_shape_str = input_shape_str;
 }
-std::string get_cur_input_shape_str(void) { return cur_input_shape_str; }
+void set_cur_input_shape_cache_size(int input_shape_cache_size) {
+  cur_input_shape_cache_size = input_shape_cache_size;
+}
 
 void MKLDNNDeviceContext::ResetBlobMap() const { p_blobmap_->clear(); }
 
+size_t MKLDNNDeviceContext::GetShapeBlobSize(int mkldnn_session_id) const {
+  BlobMap* pMap = p_blobmap_.get();
+  auto map_it = pMap->find(mkldnn_session_id);
+  if (map_it == pMap->end()) {
+    LOG(FATAL) << "MKLDNNDeviceContext don't find mkldnn_session_id : "
+               << mkldnn_session_id;
+  }
+  return map_it->second->size();
+}
+
 void MKLDNNDeviceContext::SetBlob(const std::string& name,
                                   std::shared_ptr<void> data) const {
   BlobMap* pMap = p_blobmap_.get();
@@ -441,10 +456,17 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,
   }
 
   // Find KeyBlob for current input shape
-  std::string cur_input_shape_str = platform::get_cur_input_shape_str();
   auto key_it = sBlob->find(cur_input_shape_str);
 
   if (key_it == sBlob->end()) {
+    // In cache clearing mode, cur_input_shape_cache_size defines max pblob
+    // capacity
+    if ((tid == kMKLDNNSessionID_CacheClearing) &&
+        (sBlob->size() == static_cast<size_t>(cur_input_shape_cache_size))) {
+      VLOG(2) << "tid=" << tid
+              << ", remove all head blob of shape: " << sBlob->begin()->first;
+      sBlob->erase(sBlob->begin()->first);
+    }
     pBlob = std::shared_ptr<KeyBlob>(new KeyBlob());
     (*sBlob)[cur_input_shape_str] = pBlob;
   } else {
@@ -479,7 +501,6 @@ std::shared_ptr<void> MKLDNNDeviceContext::GetBlob(
     VLOG(2) << "GetBlob: tid=" << tid << ", miss tid\n";
     return nullptr;
   }
-  std::string cur_input_shape_str = platform::get_cur_input_shape_str();
   sBlob = map_it->second;
 
   // Find KeyBlob for current input shape secondly
diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h
index 1aef2bb45dd796..eec80a3fabdf44 100644
--- a/paddle/fluid/platform/device_context.h
+++ b/paddle/fluid/platform/device_context.h
@@ -389,14 +389,14 @@ using ShapeBlob = std::unordered_map<std::string, std::shared_ptr<KeyBlob>>;
 using BlobMap = std::unordered_map<int, std::shared_ptr<ShapeBlob>>;
 
 // default mkldnn session id
-constexpr size_t kMKLDNNSessionID_Default = 0;
+constexpr int kMKLDNNSessionID_Default = 0;
 // mkldnn session id for cache clearing mode
-constexpr size_t kMKLDNNSessionID_CacheClearing = -1;
+constexpr int kMKLDNNSessionID_CacheClearing = -1;
 
 void set_cur_mkldnn_session_id(size_t);
 size_t get_cur_mkldnn_session_id(void);
 void set_cur_input_shape_str(std::string input_shape_str);
-std::string get_cur_input_shape_str(void);
+void set_cur_input_shape_cache_size(int input_shape_cache_size);
 
 class MKLDNNDeviceContext : public CPUDeviceContext {
  public:
@@ -408,6 +408,9 @@ class MKLDNNDeviceContext : public CPUDeviceContext {
   // Remove all entries from the blob map
   void ResetBlobMap() const;
 
+  // Get the ShapeBlob size by mkldnn_session_id.
+  size_t GetShapeBlobSize(int mkldnn_session_id) const;
+
   // Set data to blob (i.e. name/data pair). Create blob if not existing
   void SetBlob(const std::string& name, std::shared_ptr<void> data) const;
 

From 4c127caeb970844ca22011b8d7cb7efcc26ff80b Mon Sep 17 00:00:00 2001
From: Tao Luo <luotao02@baidu.com>
Date: Fri, 5 Jul 2019 18:03:55 +0800
Subject: [PATCH 2/4] refine with comments

test=develop
---
 .../tests/api/analyzer_mm_dnn_tester.cc       | 21 ++++---
 paddle/fluid/platform/device_context.cc       | 55 ++++++++++---------
 paddle/fluid/platform/device_context.h        | 10 ++--
 3 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
index 92592b7028d059..ce9ad6ff125011 100644
--- a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
@@ -173,16 +173,17 @@ TEST(Analyzer_MM_DNN, compare_determine) {
 }
 
 #ifdef PADDLE_WITH_MKLDNN
-void TestMkldnnCacheClear(int mkldnn_input_shape_cache_size) {
+void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) {
   AnalysisConfig config;
   SetConfig(&config);
   config.EnableMKLDNN();
   // TODO(luotao): explicit following settings will be deprecated after enhance
   // config.EnableMKLDNN() interface.
-  if (mkldnn_input_shape_cache_size > 0) {
+  if (mkldnn_input_shape_cache_capacity > 0) {
     platform::set_cur_mkldnn_session_id(
         platform::kMKLDNNSessionID_CacheClearing);
-    platform::set_cur_input_shape_cache_size(mkldnn_input_shape_cache_size);
+    platform::set_cur_input_shape_cache_capacity(
+        mkldnn_input_shape_cache_capacity);
   }
 
   std::vector<PaddleTensor> input, output;
@@ -196,7 +197,7 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_size) {
       pool.Get(platform::CPUPlace()));
   for (int i = 0; i < sample_num; i++) {
     PrepareInputs(&input, &data, FLAGS_batch_size);
-    if (mkldnn_input_shape_cache_size > 0) {
+    if (mkldnn_input_shape_cache_capacity > 0) {
       std::stringstream ss;
       for (size_t i = 0; i < input.size(); i++) {
         for (size_t j = 0; j < input[i].shape.size(); ++j) {
@@ -209,13 +210,11 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_size) {
     }
     predictor->Run(input, &output, 1);
   }
-  if (mkldnn_input_shape_cache_size > 0) {
-    PADDLE_ENFORCE_EQ(
-        dev_ctx->GetShapeBlobSize(platform::kMKLDNNSessionID_CacheClearing),
-        mkldnn_input_shape_cache_size);
+  if (mkldnn_input_shape_cache_capacity > 0) {
+    PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(),
+                      mkldnn_input_shape_cache_capacity);
   } else {
-    PADDLE_ENFORCE_EQ(
-        dev_ctx->GetShapeBlobSize(platform::kMKLDNNSessionID_Default), 1UL);
+    PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(), 1UL);
   }
   dev_ctx->ResetBlobMap();
 }
@@ -224,7 +223,7 @@ TEST(Analyzer_MM_DNN, mkldnn_cache_clear) {
   // 0 means do not use cache clear strategy.
   TestMkldnnCacheClear(0);
   // 4 means use cache clear strategy, and the
-  // mkldnn_input_shape_cache_size is 4.
+  // mkldnn_input_shape_cache_capacity is 4.
   TestMkldnnCacheClear(4);
 }
 #endif
diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc
index fe18c6a65c2f96..e1eccdf8be605d 100644
--- a/paddle/fluid/platform/device_context.cc
+++ b/paddle/fluid/platform/device_context.cc
@@ -407,9 +407,9 @@ thread_local size_t cur_mkldnn_session_id = kMKLDNNSessionID_Default;
 // - For fixed-shape, it's a null string in default.
 // - For dynamic-shape, it's user specific.
 thread_local std::string cur_input_shape_str = "";
-// the cache size of different input shapes for MKLDNN.
+// the cache capacity of different input shapes for MKLDNN.
 // Default 1 means fixed input shape, not dynamic shape.
-thread_local int cur_input_shape_cache_size = 1;
+thread_local int cur_input_shape_cache_capacity = 1;
 }  // namespace
 
 void set_cur_mkldnn_session_id(size_t sid) { cur_mkldnn_session_id = sid; }
@@ -417,18 +417,18 @@ size_t get_cur_mkldnn_session_id(void) { return cur_mkldnn_session_id; }
 void set_cur_input_shape_str(std::string input_shape_str) {
   cur_input_shape_str = input_shape_str;
 }
-void set_cur_input_shape_cache_size(int input_shape_cache_size) {
-  cur_input_shape_cache_size = input_shape_cache_size;
+void set_cur_input_shape_cache_capacity(int input_shape_cache_capacity) {
+  cur_input_shape_cache_capacity = input_shape_cache_capacity;
 }
 
 void MKLDNNDeviceContext::ResetBlobMap() const { p_blobmap_->clear(); }
 
-size_t MKLDNNDeviceContext::GetShapeBlobSize(int mkldnn_session_id) const {
+size_t MKLDNNDeviceContext::GetShapeBlobSize() const {
   BlobMap* pMap = p_blobmap_.get();
-  auto map_it = pMap->find(mkldnn_session_id);
+  auto map_it = pMap->find(cur_mkldnn_session_id);
   if (map_it == pMap->end()) {
-    LOG(FATAL) << "MKLDNNDeviceContext don't find mkldnn_session_id : "
-               << mkldnn_session_id;
+    LOG(FATAL) << "MKLDNNDeviceContext don't find cur_mkldnn_session_id : "
+               << cur_mkldnn_session_id;
   }
   return map_it->second->size();
 }
@@ -439,18 +439,18 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,
   std::shared_ptr<ShapeBlob> sBlob = nullptr;
   std::shared_ptr<KeyBlob> pBlob = nullptr;
 
-  int tid = platform::get_cur_mkldnn_session_id();
+  int sid = platform::get_cur_mkldnn_session_id();
 
   std::lock_guard<std::mutex> lock(*p_mutex_);
 
-  // Find ShapeBlob for current thread
-  auto map_it = pMap->find(tid);
+  // Find ShapeBlob for current mkldnn session id.
+  auto map_it = pMap->find(sid);
 
   if (map_it == pMap->end()) {
     // 1st time to set blob in current thread
     sBlob = std::shared_ptr<ShapeBlob>(new ShapeBlob());
-    (*pMap)[tid] = sBlob;
-    VLOG(2) << "SetBlob: tid=" << tid << ", add new tid\n";
+    (*pMap)[sid] = sBlob;
+    VLOG(2) << "SetBlob: sid=" << sid << ", add new sid\n";
   } else {
     sBlob = map_it->second;
   }
@@ -459,12 +459,13 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,
   auto key_it = sBlob->find(cur_input_shape_str);
 
   if (key_it == sBlob->end()) {
-    // In cache clearing mode, cur_input_shape_cache_size defines max pblob
-    // capacity
-    if ((tid == kMKLDNNSessionID_CacheClearing) &&
-        (sBlob->size() == static_cast<size_t>(cur_input_shape_cache_size))) {
-      VLOG(2) << "tid=" << tid
-              << ", remove all head blob of shape: " << sBlob->begin()->first;
+    // In cache clearing mode, cur_input_shape_cache_capacity defines
+    // max pblob capacity
+    if ((sid == kMKLDNNSessionID_CacheClearing) &&
+        (sBlob->size() ==
+         static_cast<size_t>(cur_input_shape_cache_capacity))) {
+      VLOG(2) << "sid=" << sid
+              << ", remove all blobs of shape: " << sBlob->begin()->first;
       sBlob->erase(sBlob->begin()->first);
     }
     pBlob = std::shared_ptr<KeyBlob>(new KeyBlob());
@@ -480,7 +481,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,
   } else {
     blob_it->second = data;  // set data to existing blob
   }
-  VLOG(2) << "SetBlob: tid=" << tid << ", add blob=" << name << "\n";
+  VLOG(2) << "SetBlob: sid=" << sid << ", add blob=" << name << "\n";
   // lock will be automatically released when out of scope
   return;
 }
@@ -491,14 +492,14 @@ std::shared_ptr<void> MKLDNNDeviceContext::GetBlob(
   std::shared_ptr<ShapeBlob> sBlob = nullptr;
   std::shared_ptr<KeyBlob> pBlob = nullptr;
 
-  int tid = platform::get_cur_mkldnn_session_id();
+  int sid = platform::get_cur_mkldnn_session_id();
 
   std::lock_guard<std::mutex> lock(*p_mutex_);
 
-  // Find ShapeBlob for current thread firstly
-  auto map_it = pMap->find(tid);
+  // Find ShapeBlob for current mkldnn session id firstly
+  auto map_it = pMap->find(sid);
   if (map_it == pMap->end()) {
-    VLOG(2) << "GetBlob: tid=" << tid << ", miss tid\n";
+    VLOG(2) << "GetBlob: sid=" << sid << ", miss sid\n";
     return nullptr;
   }
   sBlob = map_it->second;
@@ -506,7 +507,7 @@ std::shared_ptr<void> MKLDNNDeviceContext::GetBlob(
   // Find KeyBlob for current input shape secondly
   auto sBlob_it = sBlob->find(cur_input_shape_str);
   if (sBlob_it == sBlob->end()) {
-    VLOG(2) << "GetBlob: tid=" << cur_input_shape_str
+    VLOG(2) << "GetBlob: sid=" << cur_input_shape_str
             << ", miss input_shape_str\n";
     return nullptr;
   }
@@ -516,11 +517,11 @@ std::shared_ptr<void> MKLDNNDeviceContext::GetBlob(
   auto key_it = pBlob->find(name);
 
   if (key_it == pBlob->end()) {
-    VLOG(2) << "GetBlob tid=" << tid << ", miss blob=" << name << "\n";
+    VLOG(2) << "GetBlob sid=" << sid << ", miss blob=" << name << "\n";
     return nullptr;
   }
 
-  VLOG(2) << "GetBlob tid=" << tid << ", get blob=" << name << "\n";
+  VLOG(2) << "GetBlob sid=" << sid << ", get blob=" << name << "\n";
   // lock will be automatically released when out of scope
   return key_it->second;
 }
diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h
index eec80a3fabdf44..a17a0bdfb9aea3 100644
--- a/paddle/fluid/platform/device_context.h
+++ b/paddle/fluid/platform/device_context.h
@@ -389,14 +389,14 @@ using ShapeBlob = std::unordered_map<std::string, std::shared_ptr<KeyBlob>>;
 using BlobMap = std::unordered_map<int, std::shared_ptr<ShapeBlob>>;
 
 // default mkldnn session id
-constexpr int kMKLDNNSessionID_Default = 0;
+constexpr size_t kMKLDNNSessionID_Default = 0;
 // mkldnn session id for cache clearing mode
-constexpr int kMKLDNNSessionID_CacheClearing = -1;
+constexpr size_t kMKLDNNSessionID_CacheClearing = -1;
 
 void set_cur_mkldnn_session_id(size_t);
 size_t get_cur_mkldnn_session_id(void);
 void set_cur_input_shape_str(std::string input_shape_str);
-void set_cur_input_shape_cache_size(int input_shape_cache_size);
+void set_cur_input_shape_cache_capacity(int input_shape_cache_capacity);
 
 class MKLDNNDeviceContext : public CPUDeviceContext {
  public:
@@ -408,8 +408,8 @@ class MKLDNNDeviceContext : public CPUDeviceContext {
   // Remove all entries from the blob map
   void ResetBlobMap() const;
 
-  // Get the ShapeBlob size by mkldnn_session_id.
-  size_t GetShapeBlobSize(int mkldnn_session_id) const;
+  // Get the ShapeBlob size in cur_mkldnn_session_id.
+  size_t GetShapeBlobSize() const;
 
   // Set data to blob (i.e. name/data pair). Create blob if not existing
   void SetBlob(const std::string& name, std::shared_ptr<void> data) const;

From 0ae7c5b5d265be897ef476d267b63da9c36f149e Mon Sep 17 00:00:00 2001
From: Tao Luo <luotao02@baidu.com>
Date: Fri, 5 Jul 2019 18:38:52 +0800
Subject: [PATCH 3/4] make cache clear strategy more safey

test=develop
---
 paddle/fluid/platform/device_context.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc
index e1eccdf8be605d..fd0576414d2142 100644
--- a/paddle/fluid/platform/device_context.cc
+++ b/paddle/fluid/platform/device_context.cc
@@ -462,7 +462,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,
     // In cache clearing mode, cur_input_shape_cache_capacity defines
     // max pblob capacity
     if ((sid == kMKLDNNSessionID_CacheClearing) &&
-        (sBlob->size() ==
+        (sBlob->size() >=
          static_cast<size_t>(cur_input_shape_cache_capacity))) {
       VLOG(2) << "sid=" << sid
               << ", remove all blobs of shape: " << sBlob->begin()->first;

From 60aad7dcfaf29d126fbb58275ff122806e445482 Mon Sep 17 00:00:00 2001
From: Tao Luo <luotao02@baidu.com>
Date: Sat, 6 Jul 2019 11:16:50 +0800
Subject: [PATCH 4/4] add lock for GetShapeBlobSize

test=develop
---
 paddle/fluid/platform/device_context.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc
index fd0576414d2142..87b82ec5e390aa 100644
--- a/paddle/fluid/platform/device_context.cc
+++ b/paddle/fluid/platform/device_context.cc
@@ -424,6 +424,7 @@ void set_cur_input_shape_cache_capacity(int input_shape_cache_capacity) {
 void MKLDNNDeviceContext::ResetBlobMap() const { p_blobmap_->clear(); }
 
 size_t MKLDNNDeviceContext::GetShapeBlobSize() const {
+  std::lock_guard<std::mutex> lock(*p_mutex_);
   BlobMap* pMap = p_blobmap_.get();
   auto map_it = pMap->find(cur_mkldnn_session_id);
   if (map_it == pMap->end()) {