Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ struct Argument {
// Pass a set of op types to enable its mkldnn kernel
DECL_ARGUMENT_FIELD(mkldnn_enabled_op_types, MKLDNNEnabledOpTypes,
std::unordered_set<std::string>);
DECL_ARGUMENT_FIELD(mkldnn_cache_capacity, MkldnnCacheCapacity, int);

#ifdef PADDLE_WITH_MKLDNN
// A set of op types to enable their quantized kernels
Expand Down
11 changes: 11 additions & 0 deletions paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// MKLDNN related.
CP_MEMBER(use_mkldnn_);
CP_MEMBER(mkldnn_enabled_op_types_);
CP_MEMBER(mkldnn_cache_capacity_);
// Quantization related.
CP_MEMBER(use_mkldnn_quantizer_);
CP_MEMBER(mkldnn_quantizer_config_);
Expand Down Expand Up @@ -162,6 +163,15 @@ void AnalysisConfig::EnableMKLDNN() {
Update();
}

void AnalysisConfig::SetMkldnnCacheCapacity(int capacity) {
#ifdef PADDLE_WITH_MKLDNN
mkldnn_cache_capacity_ = capacity;
#else
LOG(ERROR) << "Please compile with MKLDNN first to set MKLDNN Thread Id";
mkldnn_cache_capacity_ = 0;
#endif
}

void AnalysisConfig::EnableMkldnnQuantizer() {
#ifdef PADDLE_WITH_MKLDNN
if (!mkldnn_quantizer_config_)
Expand Down Expand Up @@ -343,6 +353,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << use_ngraph_;

ss << use_mkldnn_;
ss << mkldnn_cache_capacity_;
for (auto &item : mkldnn_enabled_op_types_) ss << item;
ss << ";";

Expand Down
42 changes: 40 additions & 2 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,47 @@ bool AnalysisPredictor::PrepareExecutor() {
return true;
}

void AnalysisPredictor::MkldnnPreSet(const std::vector<PaddleTensor> &inputs) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(2) << "AnalysisPredictor::Run get_cur_mkldnn_session_id="
<< platform::get_cur_mkldnn_session_id();
// In cache clearing mode.
if (config_.mkldnn_cache_capacity_ > 0) {
VLOG(2) << "In mkldnn cache clear mode.";
platform::set_cur_mkldnn_session_id(
platform::kMKLDNNSessionID_CacheClearing);
platform::set_cur_input_shape_cache_capacity(
config_.mkldnn_cache_capacity_);
// Set current_input_shape for caching dynamic shape.
std::stringstream ss;
for (size_t i = 0; i < inputs.size(); ++i) {
for (size_t j = 0; j < inputs[i].shape.size(); ++j) {
ss << inputs[i].shape[j] << "-";
}
}
VLOG(2) << "Set input shape=" << ss.str();
platform::set_cur_input_shape_str(ss.str());
}
#endif
}

void AnalysisPredictor::MkldnnPostReset() {
#ifdef PADDLE_WITH_MKLDNN
// In cache clearing mode.
if (config_.mkldnn_cache_capacity_ > 0) {
paddle::platform::set_cur_mkldnn_session_id(
platform::kMKLDNNSessionID_Default);
}
#endif
}

bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size) {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPreSet(inputs);
#endif
VLOG(3) << "Predictor::predict";
inference::Timer timer;
timer.tic();
Expand Down Expand Up @@ -230,7 +267,9 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);

#ifdef PADDLE_WITH_MKLDNN
if (config_.use_mkldnn_) MkldnnPostReset();
#endif
return true;
}

Expand Down Expand Up @@ -595,7 +634,6 @@ bool AnalysisPredictor::ZeroCopyRun() {
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);

return true;
}

Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ class AnalysisPredictor : public PaddlePredictor {
template <typename T>
void GetFetchOne(const framework::LoDTensor &fetchs,
PaddleTensor *output_data);
// PreSet and PostReset for Mkldnn multi-thread and dynamic shape input.
// Used in AnalysisPredictor::Run(), do not support
// AnalysisPredictor::ZeroRun() now.
void MkldnnPreSet(const std::vector<PaddleTensor> &inputs);
void MkldnnPostReset();

#if PADDLE_WITH_TENSORRT
// When we use Paddle-TRT INT8 engine, we need to generate calibration table
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,10 @@ struct AnalysisConfig {
/** Turn on MKLDNN.
*/
void EnableMKLDNN();
/** set the cache capacity of different input shapes for MKLDNN.
* Default 0 means don't cache any shape.
*/
void SetMkldnnCacheCapacity(int capacity);
/** A boolean state telling whether to use the MKLDNN.
*/
bool mkldnn_enabled() const { return use_mkldnn_; }
Expand Down Expand Up @@ -316,8 +320,11 @@ struct AnalysisConfig {
std::vector<std::string> anakin_passes_filter_;
std::vector<std::string> anakin_ops_filter_;

// mkldnn related.
int mkldnn_cache_capacity_{0};
bool use_mkldnn_quantizer_{false};
std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_;

// If the config is already used on a predictor, it becomes invalid.
// Any config can only be used with one predictor.
// Variables held by config can take up a lot of memory in some cases.
Expand Down
58 changes: 47 additions & 11 deletions paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -173,20 +173,55 @@ TEST(Analyzer_MM_DNN, compare_determine) {
}

#ifdef PADDLE_WITH_MKLDNN
void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) {
void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity,
std::vector<std::vector<PaddleTensor>> *outputs) {
AnalysisConfig config;
SetConfig(&config);
config.EnableMKLDNN();
// TODO(luotao): explicit following settings will be deprecated after enhance
// config.EnableMKLDNN() interface.
config.SetMkldnnCacheCapacity(mkldnn_input_shape_cache_capacity);

std::vector<PaddleTensor> input;
auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);

int sample_num = 10;
DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
outputs->resize(sample_num);

for (int i = 0; i < sample_num; i++) {
PrepareInputs(&input, &data, FLAGS_batch_size);
predictor->Run(input, &(*outputs)[i], 1);
}
}

TEST(Analyzer_MM_DNN, mkldnn_cache_clear) {
std::vector<std::vector<PaddleTensor>> outputs, cache_outputs;
// 0 means do not use cache clear strategy.
TestMkldnnCacheClear(0, &outputs);
// 4 means use cache clear strategy, and the
// mkldnn_input_shape_cache_capacity is 4.
TestMkldnnCacheClear(4, &cache_outputs);
// compare the result.
for (size_t i = 0; i < outputs.size(); i++) {
CompareResult(outputs[i], cache_outputs[i]);
}
}

void TestMkldnnShapeBlobSize(int mkldnn_input_shape_cache_capacity) {
AnalysisConfig config;
SetConfig(&config);
config.EnableMKLDNN();
config.SwitchUseFeedFetchOps(false);
// Since AnalysisPredictor::Run() will reset cur_mkldnn_session_id to default
// before its finished, we use AnalysisPredictor::ZeroCopyRun() here to check
// the mkldnn_shape_blob_size.
if (mkldnn_input_shape_cache_capacity > 0) {
platform::set_cur_mkldnn_session_id(
platform::kMKLDNNSessionID_CacheClearing);
platform::set_cur_input_shape_cache_capacity(
mkldnn_input_shape_cache_capacity);
}

std::vector<PaddleTensor> input, output;
std::vector<PaddleTensor> input;
auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);

int sample_num = 10;
Expand All @@ -195,36 +230,37 @@ void TestMkldnnCacheClear(int mkldnn_input_shape_cache_capacity) {
auto &pool = platform::DeviceContextPool::Instance();
auto *dev_ctx = dynamic_cast<platform::MKLDNNDeviceContext *>(
pool.Get(platform::CPUPlace()));
// clear before test
dev_ctx->ResetBlobMap();

for (int i = 0; i < sample_num; i++) {
PrepareInputs(&input, &data, FLAGS_batch_size);
ConvertPaddleTensorToZeroCopyTensor(predictor.get(), input);
if (mkldnn_input_shape_cache_capacity > 0) {
std::stringstream ss;
for (size_t i = 0; i < input.size(); i++) {
for (size_t j = 0; j < input[i].shape.size(); ++j) {
ss << input[i].shape[j] << "-";
}
}
// TODO(luotao): explicit following settings will be deprecated after
// enhance config.EnableMKLDNN() interface.
platform::set_cur_input_shape_str(ss.str());
}
predictor->Run(input, &output, 1);
predictor->ZeroCopyRun();
}
if (mkldnn_input_shape_cache_capacity > 0) {
PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(),
mkldnn_input_shape_cache_capacity);
} else {
PADDLE_ENFORCE_EQ(dev_ctx->GetShapeBlobSize(), 1UL);
}
dev_ctx->ResetBlobMap();
}

TEST(Analyzer_MM_DNN, mkldnn_cache_clear) {
TEST(Analyzer_MM_DNN, mkldnn_shape_blob_size) {
// 0 means do not use cache clear strategy.
TestMkldnnCacheClear(0);
TestMkldnnShapeBlobSize(0);
// 4 means use cache clear strategy, and the
// mkldnn_input_shape_cache_capacity is 4.
TestMkldnnCacheClear(4);
TestMkldnnShapeBlobSize(4);
}
#endif

Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/platform/device_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,8 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,
if (key_it == sBlob->end()) {
// In cache clearing mode, cur_input_shape_cache_capacity defines
// max pblob capacity
if ((sid == kMKLDNNSessionID_CacheClearing) &&
if ((static_cast<size_t>(sid) == kMKLDNNSessionID_CacheClearing) &&
sBlob->size() &&
(sBlob->size() >=
static_cast<size_t>(cur_input_shape_cache_capacity))) {
VLOG(2) << "sid=" << sid
Expand Down