From fcc05d1f9d03319c1597acc5df8e3cbdfd791a92 Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Thu, 13 Jun 2019 19:52:33 +0800 Subject: [PATCH 1/9] create new api to indicate detect thread usage --- paddle/fluid/inference/api/analysis_predictor.cc | 12 ++++++++++++ paddle/fluid/inference/api/analysis_predictor.h | 1 + paddle/fluid/platform/device_context.cc | 3 ++- paddle/fluid/platform/device_context.h | 4 ++++ paddle/fluid/platform/mkldnn_reuse.h | 1 + 5 files changed, 20 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 5d9d5a3178aaa3..cf68a307e9ba1d 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -193,6 +193,18 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) { #endif } +void AnalysisPredictor::SetMkldnnMode(int mode) { +#ifdef PADDLE_WITH_MKLDNN + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto *dev_ctx = + dynamic_cast(pool.Get(place_)); + + dev_ctx->SetMode(mode); +#else + LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN"; +#endif +} + bool AnalysisPredictor::Run(const std::vector &inputs, std::vector *output_data, int batch_size) { diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index b5e134ced70f8b..c06b45b8832cac 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -81,6 +81,7 @@ class AnalysisPredictor : public PaddlePredictor { framework::ProgramDesc &program() { return *inference_program_; } void SetMkldnnThreadID(int tid); + void SetMkldnnMode(int mode); std::string GetSerializedProgram() const override; diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index caaf0e2c50c3ed..8372b78d8308bb 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -396,6 +396,7 @@ MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place) : CPUDeviceContext(place), engine_(mkldnn::engine::cpu, 0), p_blobmap_() { p_blobmap_.reset(new BlobMap()); p_mutex_.reset(new std::mutex()); + mode_ = 0; } namespace { @@ -434,7 +435,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, } else { key_it->second = data; // set data to existing blob } - + // std::cout << "MKLDNNDeviceContext::SetBlob " << name << "\n"; // lock will be automatically released when out of scope return; } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 812181563e6e55..5ae4d10284e841 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -397,7 +397,11 @@ class MKLDNNDeviceContext : public CPUDeviceContext { // Find a saved blob. Return nullptr if not found std::shared_ptr GetBlob(const std::string& name) const; + void SetMode(int mode) { mode_ = mode; } + int GetMode() const { return mode_; } + private: + int mode_; mkldnn::engine engine_; std::shared_ptr p_blobmap_; std::shared_ptr p_mutex_; diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index f1fb6b156aedcb..c9a142070dca3f 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -38,6 +38,7 @@ class MKLDNNHandler { std::stringstream ss; ss << tid; key_ = key_common_ + "-t:" + ss.str(); + if (dev_ctx_.GetMode() == 1) key_ = key_common_; } std::shared_ptr AcquireSrcMemory( From ffc6fc07749c3be83f6fa8af4fd50bb537c44dc6 Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Thu, 13 Jun 2019 20:58:31 +0800 Subject: [PATCH 2/9] add api into paddle_api --- paddle/fluid/inference/api/paddle_api.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index 87f40f09eb9bb5..07c808fb2ca5d9 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -277,6 +277,8 @@ class PaddlePredictor { return "NotImplemented"; } + virtual void SetMkldnnMode(int mode) {} + /** The common configs for all the predictors. */ struct Config { From 4953979e12089bc9fcd7ff93c61622b0c645dcf4 Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Thu, 13 Jun 2019 23:10:55 +0800 Subject: [PATCH 3/9] use another setting to avoid in one devicecontext --- paddle/fluid/inference/api/analysis_predictor.cc | 3 +++ paddle/fluid/platform/mkldnn_reuse.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index cf68a307e9ba1d..8ad0b34064a11f 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -195,11 +195,14 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) { void AnalysisPredictor::SetMkldnnMode(int mode) { #ifdef PADDLE_WITH_MKLDNN +#if 0 platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto *dev_ctx = dynamic_cast(pool.Get(place_)); dev_ctx->SetMode(mode); +#endif + platform::set_cur_thread_id(mode); #else LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN"; #endif diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index c9a142070dca3f..85495226b61d80 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -38,7 +38,7 @@ class MKLDNNHandler { std::stringstream ss; ss << tid; key_ = key_common_ + "-t:" + ss.str(); - if (dev_ctx_.GetMode() == 1) key_ = key_common_; + if (platform::get_cur_thread_id() == 1) key_ = key_common_; } std::shared_ptr AcquireSrcMemory( From 923f038149e8f43f2f6f07287916e9d0fa91a393 Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Fri, 14 Jun 2019 09:32:26 +0800 Subject: [PATCH 4/9] use Analysis Config to set reuse id and fix few potential mkldnn reuse kernel issues --- paddle/fluid/inference/api/analysis_config.cc | 1 + .../fluid/inference/api/analysis_predictor.cc | 23 +++++++------------ .../fluid/inference/api/analysis_predictor.h | 1 - .../inference/api/paddle_analysis_config.h | 6 +++++ paddle/fluid/inference/api/paddle_api.h | 2 -- .../tests/api/analyzer_resnet50_tester.cc | 1 + .../operators/mkldnn/concat_mkldnn_op.cc | 7 ++++++ .../fluid/operators/mkldnn/pool_mkldnn_op.cc | 8 +++++++ paddle/fluid/platform/device_context.cc | 3 +-- paddle/fluid/platform/device_context.h | 4 ---- paddle/fluid/platform/mkldnn_reuse.h | 2 +- 11 files changed, 33 insertions(+), 25 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 890c90697bcd52..bd9be16a411fdb 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -114,6 +114,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { // MKLDNN related. CP_MEMBER(use_mkldnn_); CP_MEMBER(mkldnn_enabled_op_types_); + CP_MEMBER(mkldnn_reuse_id_); // Quantization related. CP_MEMBER(use_mkldnn_quantizer_); CP_MEMBER(mkldnn_quantizer_config_); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 8ad0b34064a11f..4dab2d7c70db38 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -193,25 +193,14 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) { #endif } -void AnalysisPredictor::SetMkldnnMode(int mode) { -#ifdef PADDLE_WITH_MKLDNN -#if 0 - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto *dev_ctx = - dynamic_cast(pool.Get(place_)); - - dev_ctx->SetMode(mode); -#endif - platform::set_cur_thread_id(mode); -#else - LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN"; -#endif -} - bool AnalysisPredictor::Run(const std::vector &inputs, std::vector *output_data, int batch_size) { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); +#ifdef PADDLE_WITH_MKLDNN + if (paddle::platform::get_cur_thread_id() == 0) + paddle::platform::set_cur_thread_id(config_.mkldnn_reuse_id_); +#endif VLOG(3) << "Predictor::predict"; inference::Timer timer; timer.tic(); @@ -610,6 +599,10 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( bool AnalysisPredictor::ZeroCopyRun() { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); +#ifdef PADDLE_WITH_MKLDNN + if (paddle::platform::get_cur_thread_id() == 0) + paddle::platform::set_cur_thread_id(config_.mkldnn_reuse_id_); +#endif executor_->Run(); // Fix TensorArray reuse not cleaned bug. tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_); diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index c06b45b8832cac..b5e134ced70f8b 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -81,7 +81,6 @@ class AnalysisPredictor : public PaddlePredictor { framework::ProgramDesc &program() { return *inference_program_; } void SetMkldnnThreadID(int tid); - void SetMkldnnMode(int mode); std::string GetSerializedProgram() const override; diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index e3682d27054a12..c2ab68d0f0ac6d 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -202,6 +202,11 @@ struct AnalysisConfig { mkldnn_enabled_op_types_ = op_list; } + /** Specify id for MKLDNN primitive reuse. + * @param id reuse id. + */ + void SetMKLDNNReuseID(int id) { mkldnn_reuse_id_ = id; } + /** Turn on quantization. */ void EnableMkldnnQuantizer(); @@ -287,6 +292,7 @@ struct AnalysisConfig { bool use_ngraph_{false}; bool use_mkldnn_{false}; std::unordered_set mkldnn_enabled_op_types_; + int mkldnn_reuse_id_{0}; bool model_from_memory_{false}; diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index 07c808fb2ca5d9..87f40f09eb9bb5 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -277,8 +277,6 @@ class PaddlePredictor { return "NotImplemented"; } - virtual void SetMkldnnMode(int mode) {} - /** The common configs for all the predictors. */ struct Config { diff --git a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc index 07934f96dc55ef..85b6a266d3b345 100644 --- a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc @@ -50,6 +50,7 @@ void profile(bool use_mkldnn = false) { if (use_mkldnn) { cfg.EnableMKLDNN(); + // cfg.SetMKLDNNReuseID(1); if (!FLAGS_disable_mkldnn_fc) cfg.pass_builder()->AppendPass("fc_mkldnn_pass"); } diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index a855ba8475a1b0..2ae493fd39c061 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -81,6 +81,13 @@ std::string CreateKey(const paddle::framework::ExecutionContext& ctx, platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt)); platform::MKLDNNHandler::AppendKey(&key, std::to_string(multi_input[0]->format())); + if (platform::get_cur_thread_id() == 0) { + auto tid = std::this_thread::get_id(); + std::stringstream ss; + ss << tid; + platform::MKLDNNHandler::AppendKey(&key, "-t:"); + platform::MKLDNNHandler::AppendKey(&key, ss.str()); + } return key; } diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index c635fd11c37aec..65ed539d8f93cc 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -48,6 +48,14 @@ std::string CreateKey(const paddle::framework::ExecutionContext& ctx, platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt)); platform::MKLDNNHandler::AppendKey(&key, std::to_string(fmt)); platform::MKLDNNHandler::AppendKey(&key, suffix); + + if (platform::get_cur_thread_id() == 0) { + auto tid = std::this_thread::get_id(); + std::stringstream ss; + ss << tid; + platform::MKLDNNHandler::AppendKey(&key, "-t:"); + platform::MKLDNNHandler::AppendKey(&key, ss.str()); + } return key; } diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 8372b78d8308bb..080d52a9a5d7e2 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -396,7 +396,6 @@ MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place) : CPUDeviceContext(place), engine_(mkldnn::engine::cpu, 0), p_blobmap_() { p_blobmap_.reset(new BlobMap()); p_mutex_.reset(new std::mutex()); - mode_ = 0; } namespace { @@ -435,7 +434,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, } else { key_it->second = data; // set data to existing blob } - // std::cout << "MKLDNNDeviceContext::SetBlob " << name << "\n"; + std::cout << "MKLDNNDeviceContext::SetBlob " << name << "\n"; // lock will be automatically released when out of scope return; } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 5ae4d10284e841..812181563e6e55 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -397,11 +397,7 @@ class MKLDNNDeviceContext : public CPUDeviceContext { // Find a saved blob. Return nullptr if not found std::shared_ptr GetBlob(const std::string& name) const; - void SetMode(int mode) { mode_ = mode; } - int GetMode() const { return mode_; } - private: - int mode_; mkldnn::engine engine_; std::shared_ptr p_blobmap_; std::shared_ptr p_mutex_; diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 85495226b61d80..efb40270431bf6 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -38,7 +38,7 @@ class MKLDNNHandler { std::stringstream ss; ss << tid; key_ = key_common_ + "-t:" + ss.str(); - if (platform::get_cur_thread_id() == 1) key_ = key_common_; + if (platform::get_cur_thread_id() > 0) key_ = key_common_; } std::shared_ptr AcquireSrcMemory( From 81b7f9f6394ed9805bfb9276777b4720f207795b Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Fri, 14 Jun 2019 14:19:24 +0800 Subject: [PATCH 5/9] reset cur_thread_id to 0 after run if reuse_id is set --- paddle/fluid/inference/api/analysis_predictor.cc | 12 ++++++++++++ .../inference/tests/api/analyzer_resnet50_tester.cc | 1 - paddle/fluid/platform/mkldnn_reuse.h | 6 +++++- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 4dab2d7c70db38..b319d983c6a90b 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -198,6 +198,9 @@ bool AnalysisPredictor::Run(const std::vector &inputs, int batch_size) { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); #ifdef PADDLE_WITH_MKLDNN + std::cout << "AnalysisPredictor::Run get_cur_thread_id=" + << paddle::platform::get_cur_thread_id() + << ", mkldnn_reuse_id_=" << config_.mkldnn_reuse_id_ << "\n"; if (paddle::platform::get_cur_thread_id() == 0) paddle::platform::set_cur_thread_id(config_.mkldnn_reuse_id_); #endif @@ -242,6 +245,9 @@ bool AnalysisPredictor::Run(const std::vector &inputs, // recover the cpu_math_library_num_threads to 1, in order to avoid thread // conflict when integrating it into deployment service. paddle::platform::SetNumThreads(1); +#ifdef PADDLE_WITH_MKLDNN + if (config_.mkldnn_reuse_id_ > 0) paddle::platform::set_cur_thread_id(0); +#endif return true; } @@ -600,6 +606,9 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( bool AnalysisPredictor::ZeroCopyRun() { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); #ifdef PADDLE_WITH_MKLDNN + std::cout << "AnalysisPredictor::ZeroCopyRun get_cur_thread_id=" + << paddle::platform::get_cur_thread_id() + << ", mkldnn_reuse_id_=" << config_.mkldnn_reuse_id_ << "\n"; if (paddle::platform::get_cur_thread_id() == 0) paddle::platform::set_cur_thread_id(config_.mkldnn_reuse_id_); #endif @@ -611,6 +620,9 @@ bool AnalysisPredictor::ZeroCopyRun() { // recover the cpu_math_library_num_threads to 1, in order to avoid thread // conflict when integrating it into deployment service. paddle::platform::SetNumThreads(1); +#ifdef PADDLE_WITH_MKLDNN + if (config_.mkldnn_reuse_id_ > 0) paddle::platform::set_cur_thread_id(0); +#endif return true; } diff --git a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc index 85b6a266d3b345..07934f96dc55ef 100644 --- a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc @@ -50,7 +50,6 @@ void profile(bool use_mkldnn = false) { if (use_mkldnn) { cfg.EnableMKLDNN(); - // cfg.SetMKLDNNReuseID(1); if (!FLAGS_disable_mkldnn_fc) cfg.pass_builder()->AppendPass("fc_mkldnn_pass"); } diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index efb40270431bf6..fffa7f3ccea3d4 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -38,7 +38,11 @@ class MKLDNNHandler { std::stringstream ss; ss << tid; key_ = key_common_ + "-t:" + ss.str(); - if (platform::get_cur_thread_id() > 0) key_ = key_common_; + if (platform::get_cur_thread_id() > 0) { + std::cout << "MKLDNNHandler thread id=" << platform::get_cur_thread_id() + << "\n"; + key_ = key_common_; + } } std::shared_ptr AcquireSrcMemory( From 43539db503d715dc753fe876383c4b123355de74 Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Fri, 14 Jun 2019 19:28:56 +0800 Subject: [PATCH 6/9] fix conv and pool mkldnn op issue when mkldnn cache is disabled --- paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc | 9 ++++++--- paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc | 7 ++++--- paddle/fluid/platform/device_context.cc | 3 +++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index 647e09a92911e3..45a38ae16d3848 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -221,6 +221,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { user_weights_memory_p, pipeline, is_test); std::shared_ptr dst_memory_p; + std::shared_ptr user_residual_memory_p; if (fuse_residual_conn) { auto residual_param = ctx.Input("ResidualData"); @@ -243,7 +244,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { auto user_residual_md = platform::MKLDNNMemDesc( residual_data_tz, residual_data_type, residual_param->format()); - auto user_residual_memory_p = handler.AcquireResidualDataMemory( + user_residual_memory_p = handler.AcquireResidualDataMemory( user_residual_md, to_void_cast(residual_param_data)); dst_memory_p = handler.AcquireDstMemoryFromResidualDataMemory( @@ -263,14 +264,16 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { // create convolution op primitive std::shared_ptr conv_p; + std::shared_ptr user_bias_memory_p; + std::shared_ptr bias_memory_p; if (bias) { const T* bias_data = bias->data(); auto user_bias_md = platform::MKLDNNMemDesc( {bias_tz}, platform::MKLDNNGetDataType(), memory::format::x); - auto user_bias_memory_p = + user_bias_memory_p = handler.AcquireBiasMemory(user_bias_md, to_void_cast(bias_data)); - auto bias_memory_p = + bias_memory_p = handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline); conv_p = handler.AcquireConvolution(src_memory_p, weights_memory_p, bias_memory_p, dst_memory_p); diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index 65ed539d8f93cc..338ba408b28718 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -138,6 +138,7 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { auto pool_p = std::static_pointer_cast(dev_ctx.GetBlob(key_pool_p)); + std::shared_ptr src_memory, dst_memory; if (pool_p == nullptr) { const std::vector& padding_left_top(paddings); std::vector padding_right_bottom(paddings); @@ -166,9 +167,9 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { // save pool_pd into global device context to be referred in backward path if (!is_test) dev_ctx.SetBlob(key_pool_pd, pool_pd); - auto src_memory = std::make_shared(pool_pd->src_primitive_desc(), - to_void_cast(input_data)); - auto dst_memory = + src_memory = std::make_shared(pool_pd->src_primitive_desc(), + to_void_cast(input_data)); + dst_memory = std::make_shared(pool_pd->dst_primitive_desc(), output_data); dev_ctx.SetBlob(key_pool_src_mem_p, src_memory); diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 080d52a9a5d7e2..79ddd355b75d71 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -413,6 +413,9 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, int tid = platform::get_cur_thread_id(); + // WA for variable length in detect model + if (tid > 0) return; + std::lock_guard lock(*p_mutex_); // Find KeyBlob for current thread From c074e8776bc03e43739404dc20a80654617bde67 Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Fri, 14 Jun 2019 20:29:57 +0800 Subject: [PATCH 7/9] remove new defined interface and reuse EnableMKLDNN test=develop --- paddle/fluid/inference/api/analysis_config.cc | 3 ++- paddle/fluid/inference/api/analysis_predictor.cc | 12 ++++++------ paddle/fluid/inference/api/paddle_analysis_config.h | 7 +------ paddle/fluid/platform/device_context.cc | 2 +- paddle/fluid/platform/mkldnn_reuse.h | 2 -- 5 files changed, 10 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index bd9be16a411fdb..7195508783ac7f 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -151,9 +151,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { Update(); } -void AnalysisConfig::EnableMKLDNN() { +void AnalysisConfig::EnableMKLDNN(int reuse_id) { #ifdef PADDLE_WITH_MKLDNN use_mkldnn_ = true; + mkldnn_reuse_id_ = reuse_id; #else LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN"; use_mkldnn_ = false; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index b319d983c6a90b..1ef5813f6c01c5 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -198,9 +198,9 @@ bool AnalysisPredictor::Run(const std::vector &inputs, int batch_size) { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); #ifdef PADDLE_WITH_MKLDNN - std::cout << "AnalysisPredictor::Run get_cur_thread_id=" - << paddle::platform::get_cur_thread_id() - << ", mkldnn_reuse_id_=" << config_.mkldnn_reuse_id_ << "\n"; + VLOG(3) << "AnalysisPredictor::Run get_cur_thread_id=" + << paddle::platform::get_cur_thread_id() + << ", mkldnn_reuse_id_=" << config_.mkldnn_reuse_id_ << "\n"; if (paddle::platform::get_cur_thread_id() == 0) paddle::platform::set_cur_thread_id(config_.mkldnn_reuse_id_); #endif @@ -606,9 +606,9 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( bool AnalysisPredictor::ZeroCopyRun() { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); #ifdef PADDLE_WITH_MKLDNN - std::cout << "AnalysisPredictor::ZeroCopyRun get_cur_thread_id=" - << paddle::platform::get_cur_thread_id() - << ", mkldnn_reuse_id_=" << config_.mkldnn_reuse_id_ << "\n"; + VLOG(3) << "AnalysisPredictor::ZeroCopyRun get_cur_thread_id=" + << paddle::platform::get_cur_thread_id() + << ", mkldnn_reuse_id_=" << config_.mkldnn_reuse_id_ << "\n"; if (paddle::platform::get_cur_thread_id() == 0) paddle::platform::set_cur_thread_id(config_.mkldnn_reuse_id_); #endif diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index c2ab68d0f0ac6d..c133875fb21b5b 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -178,7 +178,7 @@ struct AnalysisConfig { /** Turn on MKLDNN. */ - void EnableMKLDNN(); + void EnableMKLDNN(int reuse_id = 0); /** A boolean state telling whether to use the MKLDNN. */ bool mkldnn_enabled() const { return use_mkldnn_; } @@ -202,11 +202,6 @@ struct AnalysisConfig { mkldnn_enabled_op_types_ = op_list; } - /** Specify id for MKLDNN primitive reuse. - * @param id reuse id. - */ - void SetMKLDNNReuseID(int id) { mkldnn_reuse_id_ = id; } - /** Turn on quantization. */ void EnableMkldnnQuantizer(); diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 79ddd355b75d71..2fc82add448abf 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -437,7 +437,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, } else { key_it->second = data; // set data to existing blob } - std::cout << "MKLDNNDeviceContext::SetBlob " << name << "\n"; + VLOG(3) << "MKLDNNDeviceContext::SetBlob " << name << "\n"; // lock will be automatically released when out of scope return; } diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index fffa7f3ccea3d4..53e4c2ea2865e4 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -39,8 +39,6 @@ class MKLDNNHandler { ss << tid; key_ = key_common_ + "-t:" + ss.str(); if (platform::get_cur_thread_id() > 0) { - std::cout << "MKLDNNHandler thread id=" << platform::get_cur_thread_id() - << "\n"; key_ = key_common_; } } From 8db300deceb62f900ab2680dd05c1740b033d594 Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Fri, 14 Jun 2019 21:26:58 +0800 Subject: [PATCH 8/9] refine variable name, and add more comments test=develop --- paddle/fluid/inference/api/analysis_config.cc | 6 +++--- .../fluid/inference/api/analysis_predictor.cc | 20 +++++++++++++------ .../inference/api/paddle_analysis_config.h | 5 +++-- paddle/fluid/platform/device_context.cc | 2 +- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 7195508783ac7f..ae9614c25a90c2 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -114,7 +114,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { // MKLDNN related. CP_MEMBER(use_mkldnn_); CP_MEMBER(mkldnn_enabled_op_types_); - CP_MEMBER(mkldnn_reuse_id_); + CP_MEMBER(mkldnn_disable_cache_); // Quantization related. CP_MEMBER(use_mkldnn_quantizer_); CP_MEMBER(mkldnn_quantizer_config_); @@ -151,10 +151,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { Update(); } -void AnalysisConfig::EnableMKLDNN(int reuse_id) { +void AnalysisConfig::EnableMKLDNN(int mkldnn_disable_cache) { #ifdef PADDLE_WITH_MKLDNN use_mkldnn_ = true; - mkldnn_reuse_id_ = reuse_id; + mkldnn_disable_cache_ = mkldnn_disable_cache; #else LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN"; use_mkldnn_ = false; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 1ef5813f6c01c5..f5e6048d653398 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -198,11 +198,13 @@ bool AnalysisPredictor::Run(const std::vector &inputs, int batch_size) { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); #ifdef PADDLE_WITH_MKLDNN + // TODO(intel): will refactor this code later VLOG(3) << "AnalysisPredictor::Run get_cur_thread_id=" << paddle::platform::get_cur_thread_id() - << ", mkldnn_reuse_id_=" << config_.mkldnn_reuse_id_ << "\n"; + << ", mkldnn_disable_cache_=" << config_.mkldnn_disable_cache_ + << "\n"; if (paddle::platform::get_cur_thread_id() == 0) - paddle::platform::set_cur_thread_id(config_.mkldnn_reuse_id_); + paddle::platform::set_cur_thread_id(config_.mkldnn_disable_cache_); #endif VLOG(3) << "Predictor::predict"; inference::Timer timer; @@ -246,7 +248,9 @@ bool AnalysisPredictor::Run(const std::vector &inputs, // conflict when integrating it into deployment service. paddle::platform::SetNumThreads(1); #ifdef PADDLE_WITH_MKLDNN - if (config_.mkldnn_reuse_id_ > 0) paddle::platform::set_cur_thread_id(0); + // TODO(intel): will refactor this code later + // To avoid confusion when thread is reused from pool + if (config_.mkldnn_disable_cache_ > 0) paddle::platform::set_cur_thread_id(0); #endif return true; @@ -606,11 +610,13 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( bool AnalysisPredictor::ZeroCopyRun() { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); #ifdef PADDLE_WITH_MKLDNN + // TODO(intel): will refactor this code later VLOG(3) << "AnalysisPredictor::ZeroCopyRun get_cur_thread_id=" << paddle::platform::get_cur_thread_id() - << ", mkldnn_reuse_id_=" << config_.mkldnn_reuse_id_ << "\n"; + << ", mkldnn_disable_cache_=" << config_.mkldnn_disable_cache_ + << "\n"; if (paddle::platform::get_cur_thread_id() == 0) - paddle::platform::set_cur_thread_id(config_.mkldnn_reuse_id_); + paddle::platform::set_cur_thread_id(config_.mkldnn_disable_cache_); #endif executor_->Run(); // Fix TensorArray reuse not cleaned bug. @@ -621,7 +627,9 @@ bool AnalysisPredictor::ZeroCopyRun() { // conflict when integrating it into deployment service. paddle::platform::SetNumThreads(1); #ifdef PADDLE_WITH_MKLDNN - if (config_.mkldnn_reuse_id_ > 0) paddle::platform::set_cur_thread_id(0); + // TODO(intel): will refactor this code later + // To avoid confusion when thread is reused from pool + if (config_.mkldnn_disable_cache_ > 0) paddle::platform::set_cur_thread_id(0); #endif return true; diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index c133875fb21b5b..b62405a8a54b52 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -177,8 +177,9 @@ struct AnalysisConfig { bool ngraph_enabled() const { return use_ngraph_; } /** Turn on MKLDNN. + *@param mkldnn_disable_cache if disable mkldnn cache */ - void EnableMKLDNN(int reuse_id = 0); + void EnableMKLDNN(int mkldnn_disable_cache = 0); /** A boolean state telling whether to use the MKLDNN. */ bool mkldnn_enabled() const { return use_mkldnn_; } @@ -287,7 +288,7 @@ struct AnalysisConfig { bool use_ngraph_{false}; bool use_mkldnn_{false}; std::unordered_set mkldnn_enabled_op_types_; - int mkldnn_reuse_id_{0}; + int mkldnn_disable_cache_{0}; bool model_from_memory_{false}; diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 2fc82add448abf..c92e10a7559d52 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -413,7 +413,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, int tid = platform::get_cur_thread_id(); - // WA for variable length in detect model + // use tid to indicate if cache is enabled, tid > 0 means disable cache if (tid > 0) return; std::lock_guard lock(*p_mutex_); From 6eb0e302e6200a31c66de2420b8d7966a11fd948 Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Mon, 17 Jun 2019 09:49:00 +0800 Subject: [PATCH 9/9] update python part for EnableMKLDNN() parameter change test=develop --- .../contrib/slim/quantization/mkldnn_post_training_strategy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/mkldnn_post_training_strategy.py b/python/paddle/fluid/contrib/slim/quantization/mkldnn_post_training_strategy.py index dcaabfadedf32b..70d5fef66eed95 100644 --- a/python/paddle/fluid/contrib/slim/quantization/mkldnn_post_training_strategy.py +++ b/python/paddle/fluid/contrib/slim/quantization/mkldnn_post_training_strategy.py @@ -71,7 +71,7 @@ def on_compression_begin(self, context): infer_config.switch_ir_optim(True) infer_config.disable_gpu() infer_config.set_model(self.fp32_model_path) - infer_config.enable_mkldnn() + infer_config.enable_mkldnn(0) infer_config.set_cpu_math_library_num_threads( self.cpu_math_library_num_threads)