Skip to content
4 changes: 3 additions & 1 deletion paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// MKLDNN related.
CP_MEMBER(use_mkldnn_);
CP_MEMBER(mkldnn_enabled_op_types_);
CP_MEMBER(mkldnn_disable_cache_);
// Quantization related.
CP_MEMBER(use_mkldnn_quantizer_);
CP_MEMBER(mkldnn_quantizer_config_);
Expand Down Expand Up @@ -150,9 +151,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
Update();
}

void AnalysisConfig::EnableMKLDNN() {
void AnalysisConfig::EnableMKLDNN(int mkldnn_disable_cache) {
#ifdef PADDLE_WITH_MKLDNN
use_mkldnn_ = true;
mkldnn_disable_cache_ = mkldnn_disable_cache;
#else
LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN";
use_mkldnn_ = false;
Expand Down
28 changes: 28 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,15 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size) {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
// TODO(intel): will refactor this code later
VLOG(3) << "AnalysisPredictor::Run get_cur_thread_id="
<< paddle::platform::get_cur_thread_id()
<< ", mkldnn_disable_cache_=" << config_.mkldnn_disable_cache_
<< "\n";
if (paddle::platform::get_cur_thread_id() == 0)
paddle::platform::set_cur_thread_id(config_.mkldnn_disable_cache_);
#endif
VLOG(3) << "Predictor::predict";
inference::Timer timer;
timer.tic();
Expand Down Expand Up @@ -238,6 +247,11 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);
#ifdef PADDLE_WITH_MKLDNN
// TODO(intel): will refactor this code later
// To avoid confusion when thread is reused from pool
if (config_.mkldnn_disable_cache_ > 0) paddle::platform::set_cur_thread_id(0);
#endif

return true;
}
Expand Down Expand Up @@ -595,6 +609,15 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(

bool AnalysisPredictor::ZeroCopyRun() {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
// TODO(intel): will refactor this code later
VLOG(3) << "AnalysisPredictor::ZeroCopyRun get_cur_thread_id="
<< paddle::platform::get_cur_thread_id()
<< ", mkldnn_disable_cache_=" << config_.mkldnn_disable_cache_
<< "\n";
if (paddle::platform::get_cur_thread_id() == 0)
paddle::platform::set_cur_thread_id(config_.mkldnn_disable_cache_);
#endif
executor_->Run();
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_);
Expand All @@ -603,6 +626,11 @@ bool AnalysisPredictor::ZeroCopyRun() {
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);
#ifdef PADDLE_WITH_MKLDNN
// TODO(intel): will refactor this code later
// To avoid confusion when thread is reused from pool
if (config_.mkldnn_disable_cache_ > 0) paddle::platform::set_cur_thread_id(0);
#endif

return true;
}
Expand Down
4 changes: 3 additions & 1 deletion paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,9 @@ struct AnalysisConfig {
bool ngraph_enabled() const { return use_ngraph_; }

/** Turn on MKLDNN.
*@param mkldnn_disable_cache if disable mkldnn cache
*/
void EnableMKLDNN();
void EnableMKLDNN(int mkldnn_disable_cache = 0);
/** A boolean state telling whether to use the MKLDNN.
*/
bool mkldnn_enabled() const { return use_mkldnn_; }
Expand Down Expand Up @@ -287,6 +288,7 @@ struct AnalysisConfig {
bool use_ngraph_{false};
bool use_mkldnn_{false};
std::unordered_set<std::string> mkldnn_enabled_op_types_;
int mkldnn_disable_cache_{0};

bool model_from_memory_{false};

Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt));
platform::MKLDNNHandler::AppendKey(&key,
std::to_string(multi_input[0]->format()));
if (platform::get_cur_thread_id() == 0) {
auto tid = std::this_thread::get_id();
std::stringstream ss;
ss << tid;
platform::MKLDNNHandler::AppendKey(&key, "-t:");
platform::MKLDNNHandler::AppendKey(&key, ss.str());
}
return key;
}

Expand Down
9 changes: 6 additions & 3 deletions paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
user_weights_memory_p, pipeline, is_test);

std::shared_ptr<mkldnn::memory> dst_memory_p;
std::shared_ptr<mkldnn::memory> user_residual_memory_p;

if (fuse_residual_conn) {
auto residual_param = ctx.Input<Tensor>("ResidualData");
Expand All @@ -243,7 +244,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {

auto user_residual_md = platform::MKLDNNMemDesc(
residual_data_tz, residual_data_type, residual_param->format());
auto user_residual_memory_p = handler.AcquireResidualDataMemory(
user_residual_memory_p = handler.AcquireResidualDataMemory(
user_residual_md, to_void_cast<T>(residual_param_data));

dst_memory_p = handler.AcquireDstMemoryFromResidualDataMemory(
Expand All @@ -263,14 +264,16 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {

// create convolution op primitive
std::shared_ptr<mkldnn::convolution_forward> conv_p;
std::shared_ptr<mkldnn::memory> user_bias_memory_p;
std::shared_ptr<mkldnn::memory> bias_memory_p;
if (bias) {
const T* bias_data = bias->data<T>();
auto user_bias_md = platform::MKLDNNMemDesc(
{bias_tz}, platform::MKLDNNGetDataType<T>(), memory::format::x);
auto user_bias_memory_p =
user_bias_memory_p =
handler.AcquireBiasMemory(user_bias_md, to_void_cast<T>(bias_data));

auto bias_memory_p =
bias_memory_p =
handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline);
conv_p = handler.AcquireConvolution(src_memory_p, weights_memory_p,
bias_memory_p, dst_memory_p);
Expand Down
15 changes: 12 additions & 3 deletions paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt));
platform::MKLDNNHandler::AppendKey(&key, std::to_string(fmt));
platform::MKLDNNHandler::AppendKey(&key, suffix);

if (platform::get_cur_thread_id() == 0) {
auto tid = std::this_thread::get_id();
std::stringstream ss;
ss << tid;
platform::MKLDNNHandler::AppendKey(&key, "-t:");
platform::MKLDNNHandler::AppendKey(&key, ss.str());
}
return key;
}

Expand Down Expand Up @@ -130,6 +138,7 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {

auto pool_p =
std::static_pointer_cast<pooling_forward>(dev_ctx.GetBlob(key_pool_p));
std::shared_ptr<mkldnn::memory> src_memory, dst_memory;
if (pool_p == nullptr) {
const std::vector<int>& padding_left_top(paddings);
std::vector<int> padding_right_bottom(paddings);
Expand Down Expand Up @@ -158,9 +167,9 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
// save pool_pd into global device context to be referred in backward path
if (!is_test) dev_ctx.SetBlob(key_pool_pd, pool_pd);

auto src_memory = std::make_shared<memory>(pool_pd->src_primitive_desc(),
to_void_cast<T>(input_data));
auto dst_memory =
src_memory = std::make_shared<memory>(pool_pd->src_primitive_desc(),
to_void_cast<T>(input_data));
dst_memory =
std::make_shared<memory>(pool_pd->dst_primitive_desc(), output_data);

dev_ctx.SetBlob(key_pool_src_mem_p, src_memory);
Expand Down
5 changes: 4 additions & 1 deletion paddle/fluid/platform/device_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,9 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,

int tid = platform::get_cur_thread_id();

// use tid to indicate if cache is enabled, tid > 0 means disable cache
if (tid > 0) return;

std::lock_guard<std::mutex> lock(*p_mutex_);

// Find KeyBlob for current thread
Expand All @@ -434,7 +437,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,
} else {
key_it->second = data; // set data to existing blob
}

VLOG(3) << "MKLDNNDeviceContext::SetBlob " << name << "\n";
// lock will be automatically released when out of scope
return;
}
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/platform/mkldnn_reuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ class MKLDNNHandler {
std::stringstream ss;
ss << tid;
key_ = key_common_ + "-t:" + ss.str();
if (platform::get_cur_thread_id() > 0) {
key_ = key_common_;
}
}

std::shared_ptr<mkldnn::memory> AcquireSrcMemory(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def on_compression_begin(self, context):
infer_config.switch_ir_optim(True)
infer_config.disable_gpu()
infer_config.set_model(self.fp32_model_path)
infer_config.enable_mkldnn()
infer_config.enable_mkldnn(0)
infer_config.set_cpu_math_library_num_threads(
self.cpu_math_library_num_threads)

Expand Down