From 64acbebfc923ecbefa9b57489ccdbf8490c504b0 Mon Sep 17 00:00:00 2001 From: Tao Luo Date: Fri, 5 Jul 2019 12:06:11 +0800 Subject: [PATCH 1/4] add mkldnn shapeblob cache clear strategy test=develop --- .../tests/api/analyzer_mm_dnn_tester.cc | 57 +++++++++++++++++++ paddle/fluid/platform/device_context.cc | 27 ++++++++- paddle/fluid/platform/device_context.h | 9 ++- 3 files changed, 87 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc index 17c670a68cc9cb..92592b7028d059 100644 --- a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc @@ -172,5 +172,62 @@ TEST(Analyzer_MM_DNN, compare_determine) { input_slots_all); } +#ifdef PADDLE_WITH_MKLDNN +void TestMkldnnCacheClear(int mkldnn_input_shape_cache_size) { + AnalysisConfig config; + SetConfig(&config); + config.EnableMKLDNN(); + // TODO(luotao): explicit following settings will be deprecated after enhance + // config.EnableMKLDNN() interface. + if (mkldnn_input_shape_cache_size > 0) { + platform::set_cur_mkldnn_session_id( + platform::kMKLDNNSessionID_CacheClearing); + platform::set_cur_input_shape_cache_size(mkldnn_input_shape_cache_size); + } + + std::vector input, output; + auto predictor = CreatePaddlePredictor(config); + + int sample_num = 10; + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); + + auto &pool = platform::DeviceContextPool::Instance(); + auto *dev_ctx = dynamic_cast( + pool.Get(platform::CPUPlace())); + for (int i = 0; i < sample_num; i++) { + PrepareInputs(&input, &data, FLAGS_batch_size); + if (mkldnn_input_shape_cache_size > 0) { + std::stringstream ss; + for (size_t i = 0; i < input.size(); i++) { + for (size_t j = 0; j < input[i].shape.size(); ++j) { + ss << input[i].shape[j] << "-"; + } + } + // TODO(luotao): explicit following settings will be deprecated after + // enhance config.EnableMKLDNN() interface. + platform::set_cur_input_shape_str(ss.str()); + } + predictor->Run(input, &output, 1); + } + if (mkldnn_input_shape_cache_size > 0) { + PADDLE_ENFORCE_EQ( + dev_ctx->GetShapeBlobSize(platform::kMKLDNNSessionID_CacheClearing), + mkldnn_input_shape_cache_size); + } else { + PADDLE_ENFORCE_EQ( + dev_ctx->GetShapeBlobSize(platform::kMKLDNNSessionID_Default), 1UL); + } + dev_ctx->ResetBlobMap(); +} + +TEST(Analyzer_MM_DNN, mkldnn_cache_clear) { + // 0 means do not use cache clear strategy. + TestMkldnnCacheClear(0); + // 4 means use cache clear strategy, and the + // mkldnn_input_shape_cache_size is 4. + TestMkldnnCacheClear(4); +} +#endif + } // namespace inference } // namespace paddle diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 0dabe2ed3d92c1..fe18c6a65c2f96 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -407,6 +407,9 @@ thread_local size_t cur_mkldnn_session_id = kMKLDNNSessionID_Default; // - For fixed-shape, it's a null string in default. // - For dynamic-shape, it's user specific. thread_local std::string cur_input_shape_str = ""; +// the cache size of different input shapes for MKLDNN. +// Default 1 means fixed input shape, not dynamic shape. +thread_local int cur_input_shape_cache_size = 1; } // namespace void set_cur_mkldnn_session_id(size_t sid) { cur_mkldnn_session_id = sid; } @@ -414,10 +417,22 @@ size_t get_cur_mkldnn_session_id(void) { return cur_mkldnn_session_id; } void set_cur_input_shape_str(std::string input_shape_str) { cur_input_shape_str = input_shape_str; } -std::string get_cur_input_shape_str(void) { return cur_input_shape_str; } +void set_cur_input_shape_cache_size(int input_shape_cache_size) { + cur_input_shape_cache_size = input_shape_cache_size; +} void MKLDNNDeviceContext::ResetBlobMap() const { p_blobmap_->clear(); } +size_t MKLDNNDeviceContext::GetShapeBlobSize(int mkldnn_session_id) const { + BlobMap* pMap = p_blobmap_.get(); + auto map_it = pMap->find(mkldnn_session_id); + if (map_it == pMap->end()) { + LOG(FATAL) << "MKLDNNDeviceContext don't find mkldnn_session_id : " + << mkldnn_session_id; + } + return map_it->second->size(); +} + void MKLDNNDeviceContext::SetBlob(const std::string& name, std::shared_ptr data) const { BlobMap* pMap = p_blobmap_.get(); @@ -441,10 +456,17 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, } // Find KeyBlob for current input shape - std::string cur_input_shape_str = platform::get_cur_input_shape_str(); auto key_it = sBlob->find(cur_input_shape_str); if (key_it == sBlob->end()) { + // In cache clearing mode, cur_input_shape_cache_size defines max pblob + // capacity + if ((tid == kMKLDNNSessionID_CacheClearing) && + (sBlob->size() == static_cast(cur_input_shape_cache_size))) { + VLOG(2) << "tid=" << tid + << ", remove all head blob of shape: " << sBlob->begin()->first; + sBlob->erase(sBlob->begin()->first); + } pBlob = std::shared_ptr(new KeyBlob()); (*sBlob)[cur_input_shape_str] = pBlob; } else { @@ -479,7 +501,6 @@ std::shared_ptr MKLDNNDeviceContext::GetBlob( VLOG(2) << "GetBlob: tid=" << tid << ", miss tid\n"; return nullptr; } - std::string cur_input_shape_str = platform::get_cur_input_shape_str(); sBlob = map_it->second; // Find KeyBlob for current input shape secondly diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 1aef2bb45dd796..eec80a3fabdf44 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -389,14 +389,14 @@ using ShapeBlob = std::unordered_map>; using BlobMap = std::unordered_map>; // default mkldnn session id -constexpr size_t kMKLDNNSessionID_Default = 0; +constexpr int kMKLDNNSessionID_Default = 0; // mkldnn session id for cache clearing mode -constexpr size_t kMKLDNNSessionID_CacheClearing = -1; +constexpr int kMKLDNNSessionID_CacheClearing = -1; void set_cur_mkldnn_session_id(size_t); size_t get_cur_mkldnn_session_id(void); void set_cur_input_shape_str(std::string input_shape_str); -std::string get_cur_input_shape_str(void); +void set_cur_input_shape_cache_size(int input_shape_cache_size); class MKLDNNDeviceContext : public CPUDeviceContext { public: @@ -408,6 +408,9 @@ class MKLDNNDeviceContext : public CPUDeviceContext { // Remove all entries from the blob map void ResetBlobMap() const; + // Get the ShapeBlob size by mkldnn_session_id. + size_t GetShapeBlobSize(int mkldnn_session_id) const; + // Set data to blob (i.e. name/data pair). Create blob if not existing void SetBlob(const std::string& name, std::shared_ptr data) const; From 4c127caeb970844ca22011b8d7cb7efcc26ff80b Mon Sep 17 00:00:00 2001 From: Tao Luo Date: Fri, 5 Jul 2019 18:03:55 +0800 Subject: [PATCH 2/4] refine with comments test=develop --- .../tests/api/analyzer_mm_dnn_tester.cc | 21 ++++--- paddle/fluid/platform/device_context.cc | 55 ++++++++++--------- paddle/fluid/platform/device_context.h | 10 ++-- 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc index 92592b7028d059..ce9ad6ff125011 100644 --- a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc @@ -173,16 +173,17 @@ TEST(Analyzer_MM_DNN, compare_determine) { } #ifdef PADDLE_WITH_MKLDNN -void TestMkldnnCacheClear(int mkldnn_input_shape_cache_size) { +void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) { AnalysisConfig config; SetConfig(&config); config.EnableMKLDNN(); // TODO(luotao): explicit following settings will be deprecated after enhance // config.EnableMKLDNN() interface. - if (mkldnn_input_shape_cache_size > 0) { + if (mkldnn_input_shape_cache_capacity > 0) { platform::set_cur_mkldnn_session_id( platform::kMKLDNNSessionID_CacheClearing); - platform::set_cur_input_shape_cache_size(mkldnn_input_shape_cache_size); + platform::set_cur_input_shape_cache_capacity( + mkldnn_input_shape_cache_capacity); } std::vector input, output; @@ -196,7 +197,7 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_size) { pool.Get(platform::CPUPlace())); for (int i = 0; i < sample_num; i++) { PrepareInputs(&input, &data, FLAGS_batch_size); - if (mkldnn_input_shape_cache_size > 0) { + if (mkldnn_input_shape_cache_capacity > 0) { std::stringstream ss; for (size_t i = 0; i < input.size(); i++) { for (size_t j = 0; j < input[i].shape.size(); ++j) { @@ -209,13 +210,11 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_size) { } predictor->Run(input, &output, 1); } - if (mkldnn_input_shape_cache_size > 0) { - PADDLE_ENFORCE_EQ( - dev_ctx->GetShapeBlobSize(platform::kMKLDNNSessionID_CacheClearing), - mkldnn_input_shape_cache_size); + if (mkldnn_input_shape_cache_capacity > 0) { + PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(), + mkldnn_input_shape_cache_capacity); } else { - PADDLE_ENFORCE_EQ( - dev_ctx->GetShapeBlobSize(platform::kMKLDNNSessionID_Default), 1UL); + PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(), 1UL); } dev_ctx->ResetBlobMap(); } @@ -224,7 +223,7 @@ TEST(Analyzer_MM_DNN, mkldnn_cache_clear) { // 0 means do not use cache clear strategy. TestMkldnnCacheClear(0); // 4 means use cache clear strategy, and the - // mkldnn_input_shape_cache_size is 4. + // mkldnn_input_shape_cache_capacity is 4. TestMkldnnCacheClear(4); } #endif diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index fe18c6a65c2f96..e1eccdf8be605d 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -407,9 +407,9 @@ thread_local size_t cur_mkldnn_session_id = kMKLDNNSessionID_Default; // - For fixed-shape, it's a null string in default. // - For dynamic-shape, it's user specific. thread_local std::string cur_input_shape_str = ""; -// the cache size of different input shapes for MKLDNN. +// the cache capacity of different input shapes for MKLDNN. // Default 1 means fixed input shape, not dynamic shape. -thread_local int cur_input_shape_cache_size = 1; +thread_local int cur_input_shape_cache_capacity = 1; } // namespace void set_cur_mkldnn_session_id(size_t sid) { cur_mkldnn_session_id = sid; } @@ -417,18 +417,18 @@ size_t get_cur_mkldnn_session_id(void) { return cur_mkldnn_session_id; } void set_cur_input_shape_str(std::string input_shape_str) { cur_input_shape_str = input_shape_str; } -void set_cur_input_shape_cache_size(int input_shape_cache_size) { - cur_input_shape_cache_size = input_shape_cache_size; +void set_cur_input_shape_cache_capacity(int input_shape_cache_capacity) { + cur_input_shape_cache_capacity = input_shape_cache_capacity; } void MKLDNNDeviceContext::ResetBlobMap() const { p_blobmap_->clear(); } -size_t MKLDNNDeviceContext::GetShapeBlobSize(int mkldnn_session_id) const { +size_t MKLDNNDeviceContext::GetShapeBlobSize() const { BlobMap* pMap = p_blobmap_.get(); - auto map_it = pMap->find(mkldnn_session_id); + auto map_it = pMap->find(cur_mkldnn_session_id); if (map_it == pMap->end()) { - LOG(FATAL) << "MKLDNNDeviceContext don't find mkldnn_session_id : " - << mkldnn_session_id; + LOG(FATAL) << "MKLDNNDeviceContext don't find cur_mkldnn_session_id : " + << cur_mkldnn_session_id; } return map_it->second->size(); } @@ -439,18 +439,18 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, std::shared_ptr sBlob = nullptr; std::shared_ptr pBlob = nullptr; - int tid = platform::get_cur_mkldnn_session_id(); + int sid = platform::get_cur_mkldnn_session_id(); std::lock_guard lock(*p_mutex_); - // Find ShapeBlob for current thread - auto map_it = pMap->find(tid); + // Find ShapeBlob for current mkldnn session id. + auto map_it = pMap->find(sid); if (map_it == pMap->end()) { // 1st time to set blob in current thread sBlob = std::shared_ptr(new ShapeBlob()); - (*pMap)[tid] = sBlob; - VLOG(2) << "SetBlob: tid=" << tid << ", add new tid\n"; + (*pMap)[sid] = sBlob; + VLOG(2) << "SetBlob: sid=" << sid << ", add new sid\n"; } else { sBlob = map_it->second; } @@ -459,12 +459,13 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, auto key_it = sBlob->find(cur_input_shape_str); if (key_it == sBlob->end()) { - // In cache clearing mode, cur_input_shape_cache_size defines max pblob - // capacity - if ((tid == kMKLDNNSessionID_CacheClearing) && - (sBlob->size() == static_cast(cur_input_shape_cache_size))) { - VLOG(2) << "tid=" << tid - << ", remove all head blob of shape: " << sBlob->begin()->first; + // In cache clearing mode, cur_input_shape_cache_capacity defines + // max pblob capacity + if ((sid == kMKLDNNSessionID_CacheClearing) && + (sBlob->size() == + static_cast(cur_input_shape_cache_capacity))) { + VLOG(2) << "sid=" << sid + << ", remove all blobs of shape: " << sBlob->begin()->first; sBlob->erase(sBlob->begin()->first); } pBlob = std::shared_ptr(new KeyBlob()); @@ -480,7 +481,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, } else { blob_it->second = data; // set data to existing blob } - VLOG(2) << "SetBlob: tid=" << tid << ", add blob=" << name << "\n"; + VLOG(2) << "SetBlob: sid=" << sid << ", add blob=" << name << "\n"; // lock will be automatically released when out of scope return; } @@ -491,14 +492,14 @@ std::shared_ptr MKLDNNDeviceContext::GetBlob( std::shared_ptr sBlob = nullptr; std::shared_ptr pBlob = nullptr; - int tid = platform::get_cur_mkldnn_session_id(); + int sid = platform::get_cur_mkldnn_session_id(); std::lock_guard lock(*p_mutex_); - // Find ShapeBlob for current thread firstly - auto map_it = pMap->find(tid); + // Find ShapeBlob for current mkldnn session id firstly + auto map_it = pMap->find(sid); if (map_it == pMap->end()) { - VLOG(2) << "GetBlob: tid=" << tid << ", miss tid\n"; + VLOG(2) << "GetBlob: sid=" << sid << ", miss sid\n"; return nullptr; } sBlob = map_it->second; @@ -506,7 +507,7 @@ std::shared_ptr MKLDNNDeviceContext::GetBlob( // Find KeyBlob for current input shape secondly auto sBlob_it = sBlob->find(cur_input_shape_str); if (sBlob_it == sBlob->end()) { - VLOG(2) << "GetBlob: tid=" << cur_input_shape_str + VLOG(2) << "GetBlob: sid=" << cur_input_shape_str << ", miss input_shape_str\n"; return nullptr; } @@ -516,11 +517,11 @@ std::shared_ptr MKLDNNDeviceContext::GetBlob( auto key_it = pBlob->find(name); if (key_it == pBlob->end()) { - VLOG(2) << "GetBlob tid=" << tid << ", miss blob=" << name << "\n"; + VLOG(2) << "GetBlob sid=" << sid << ", miss blob=" << name << "\n"; return nullptr; } - VLOG(2) << "GetBlob tid=" << tid << ", get blob=" << name << "\n"; + VLOG(2) << "GetBlob sid=" << sid << ", get blob=" << name << "\n"; // lock will be automatically released when out of scope return key_it->second; } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index eec80a3fabdf44..a17a0bdfb9aea3 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -389,14 +389,14 @@ using ShapeBlob = std::unordered_map>; using BlobMap = std::unordered_map>; // default mkldnn session id -constexpr int kMKLDNNSessionID_Default = 0; +constexpr size_t kMKLDNNSessionID_Default = 0; // mkldnn session id for cache clearing mode -constexpr int kMKLDNNSessionID_CacheClearing = -1; +constexpr size_t kMKLDNNSessionID_CacheClearing = -1; void set_cur_mkldnn_session_id(size_t); size_t get_cur_mkldnn_session_id(void); void set_cur_input_shape_str(std::string input_shape_str); -void set_cur_input_shape_cache_size(int input_shape_cache_size); +void set_cur_input_shape_cache_capacity(int input_shape_cache_capacity); class MKLDNNDeviceContext : public CPUDeviceContext { public: @@ -408,8 +408,8 @@ class MKLDNNDeviceContext : public CPUDeviceContext { // Remove all entries from the blob map void ResetBlobMap() const; - // Get the ShapeBlob size by mkldnn_session_id. - size_t GetShapeBlobSize(int mkldnn_session_id) const; + // Get the ShapeBlob size in cur_mkldnn_session_id. + size_t GetShapeBlobSize() const; // Set data to blob (i.e. name/data pair). Create blob if not existing void SetBlob(const std::string& name, std::shared_ptr data) const; From 0ae7c5b5d265be897ef476d267b63da9c36f149e Mon Sep 17 00:00:00 2001 From: Tao Luo Date: Fri, 5 Jul 2019 18:38:52 +0800 Subject: [PATCH 3/4] make cache clear strategy more safey test=develop --- paddle/fluid/platform/device_context.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index e1eccdf8be605d..fd0576414d2142 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -462,7 +462,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, // In cache clearing mode, cur_input_shape_cache_capacity defines // max pblob capacity if ((sid == kMKLDNNSessionID_CacheClearing) && - (sBlob->size() == + (sBlob->size() >= static_cast(cur_input_shape_cache_capacity))) { VLOG(2) << "sid=" << sid << ", remove all blobs of shape: " << sBlob->begin()->first; From 60aad7dcfaf29d126fbb58275ff122806e445482 Mon Sep 17 00:00:00 2001 From: Tao Luo Date: Sat, 6 Jul 2019 11:16:50 +0800 Subject: [PATCH 4/4] add lock for GetShapeBlobSize test=develop --- paddle/fluid/platform/device_context.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index fd0576414d2142..87b82ec5e390aa 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -424,6 +424,7 @@ void set_cur_input_shape_cache_capacity(int input_shape_cache_capacity) { void MKLDNNDeviceContext::ResetBlobMap() const { p_blobmap_->clear(); } size_t MKLDNNDeviceContext::GetShapeBlobSize() const { + std::lock_guard lock(*p_mutex_); BlobMap* pMap = p_blobmap_.get(); auto map_it = pMap->find(cur_mkldnn_session_id); if (map_it == pMap->end()) {