Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// MKLDNN related.
CP_MEMBER(use_mkldnn_);
CP_MEMBER(mkldnn_enabled_op_types_);
CP_MEMBER(mkldnn_thread_id_);
// Quantization related.
CP_MEMBER(use_mkldnn_quantizer_);
CP_MEMBER(mkldnn_quantizer_config_);
Expand Down Expand Up @@ -161,6 +162,15 @@ void AnalysisConfig::EnableMKLDNN() {
Update();
}

void AnalysisConfig::SetMKLDNNThreadId(int id) {
#ifdef PADDLE_WITH_MKLDNN
mkldnn_thread_id_ = id;
#else
LOG(ERROR) << "Please compile with MKLDNN first to set MKLDNN Thread Id";
mkldnn_thread_id_ = 0;
#endif
}

void AnalysisConfig::EnableMkldnnQuantizer() {
#ifdef PADDLE_WITH_MKLDNN
if (!mkldnn_quantizer_config_)
Expand Down
39 changes: 37 additions & 2 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,16 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size) {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
// TODO(intel): will refactor this code later
// Make sure it not conflict with AnalysisPredictor::SetMkldnnthreadid case
VLOG(2) << "AnalysisPredictor::Run get_cur_thread_id="
<< paddle::platform::get_cur_thread_id()
<< ", mkldnn_thread_id_=" << config_.mkldnn_thread_id_ << "\n";
if (paddle::platform::get_cur_thread_id() == 0)
paddle::platform::set_cur_thread_id(config_.mkldnn_thread_id_);
#endif

VLOG(3) << "Predictor::predict";
inference::Timer timer;
timer.tic();
Expand Down Expand Up @@ -238,7 +248,15 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);

#ifdef PADDLE_WITH_MKLDNN
// TODO(intel): will refactor this code later
// reset thread id to avoid confusion when thread is reused from pool again
// mkldnn_thread_id_ = -1 is reserved for cache clearing mode only
if (paddle::platform::get_cur_thread_id() == -1) {
VLOG(2) << "Clear previous mkldnn thread id -1\n";
paddle::platform::set_cur_thread_id(0);
}
#endif
return true;
}

Expand Down Expand Up @@ -595,6 +613,15 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(

bool AnalysisPredictor::ZeroCopyRun() {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
// TODO(intel): will refactor this code later
// Make sure it not conflict with AnalysisPredictor::SetMkldnnthreadid case
VLOG(2) << "AnalysisPredictor::Run get_cur_thread_id="
<< paddle::platform::get_cur_thread_id()
<< ", mkldnn_thread_id_=" << config_.mkldnn_thread_id_ << "\n";
if (paddle::platform::get_cur_thread_id() == 0)
paddle::platform::set_cur_thread_id(config_.mkldnn_thread_id_);
#endif
executor_->Run();
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_);
Expand All @@ -603,7 +630,15 @@ bool AnalysisPredictor::ZeroCopyRun() {
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);

#ifdef PADDLE_WITH_MKLDNN
// TODO(intel): will refactor this code later
// reset thread id to avoid confusion when thread is reused from pool again
// mkldnn_thread_id_ = -1 is reserved for cache clearing mode only
if (paddle::platform::get_cur_thread_id() == -1) {
VLOG(2) << "Clear previous mkldnn thread id setting\n";
paddle::platform::set_cur_thread_id(0);
}
#endif
return true;
}

Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@ struct AnalysisConfig {
/** A boolean state telling whether to use the MKLDNN.
*/
bool mkldnn_enabled() const { return use_mkldnn_; }
/** Set MKLDNN thread id.
*/
void SetMKLDNNThreadId(int id);

/** Set and get the number of cpu math library threads.
*/
Expand Down Expand Up @@ -287,6 +290,7 @@ struct AnalysisConfig {
bool use_ngraph_{false};
bool use_mkldnn_{false};
std::unordered_set<std::string> mkldnn_enabled_op_types_;
int mkldnn_thread_id_{0};

bool model_from_memory_{false};

Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/inference/tests/api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ if (NOT EXISTS ${MOBILENET_INSTALL_DIR})
endif()
inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose ${MOBILENET_INSTALL_DIR} analyzer_vis_tester.cc)

# detect
inference_analysis_api_test_with_refer_result(test_analyzer_detect ${OCR_INSTALL_DIR} analyzer_detect_tester.cc)

### Image classification tests with fake data
set(IMG_CLASS_TEST_APP "test_analyzer_image_classification")
set(IMG_CLASS_TEST_APP_SRC "analyzer_image_classification_tester.cc")
Expand Down
150 changes: 150 additions & 0 deletions paddle/fluid/inference/tests/api/analyzer_detect_tester.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <gtest/gtest.h>
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/tests/api/tester_helper.h"
DEFINE_string(infer_shape, "", "data shape file");

namespace paddle {
namespace inference {
namespace analysis {

struct Record {
std::vector<float> data;
std::vector<int32_t> shape;
};

Record ProcessALine(const std::string &line, const std::string &shape_line) {
VLOG(3) << "process a line";
std::vector<std::string> columns;

Record record;
std::vector<std::string> data_strs;
split(line, ' ', &data_strs);
for (auto &d : data_strs) {
record.data.push_back(std::stof(d));
}

std::vector<std::string> shape_strs;
split(shape_line, ' ', &shape_strs);
for (auto &s : shape_strs) {
record.shape.push_back(std::stoi(s));
}
// VLOG(3) << "data size " << record.data.size();
// VLOG(3) << "data shape size " << record.shape.size();
VLOG(2) << "data shape size " << record.shape[3];
return record;
}

void SetConfig(AnalysisConfig *cfg) {
cfg->SetModel(FLAGS_infer_model + "/model", FLAGS_infer_model + "/params");
cfg->DisableGpu();
cfg->SwitchIrDebug();
cfg->SwitchSpecifyInputNames(false);
cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
}

void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
std::string line;
std::ifstream file(FLAGS_infer_data);
std::string shape_line;
std::ifstream infer_file(FLAGS_infer_shape);

int iteration = FLAGS_test_all_data ? 1000 : 1;
for (int k = 0; k < iteration; k++) {
std::getline(file, line);
std::getline(infer_file, shape_line);
auto record = ProcessALine(line, shape_line);

PaddleTensor input;
input.shape = record.shape;
input.dtype = PaddleDType::FLOAT32;
size_t input_size = record.data.size() * sizeof(float);
input.data.Resize(input_size);
memcpy(input.data.data(), record.data.data(), input_size);
std::vector<PaddleTensor> input_slots;
input_slots.assign({input});
(*inputs).emplace_back(input_slots);
}
}

// Easy for profiling independently.
// ocr, mobilenet and se_resnext50
void profile(bool use_mkldnn = false) {
AnalysisConfig cfg;
SetConfig(&cfg);
if (use_mkldnn) {
cfg.EnableMKLDNN();
cfg.pass_builder()->AppendPass("fc_mkldnn_pass");
}
// cfg.pass_builder()->TurnOnDebug();
std::vector<std::vector<PaddleTensor>> outputs;

std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all, &outputs, FLAGS_num_threads);
}

TEST(Analyzer_vis, profile) { profile(); }

#ifdef PADDLE_WITH_MKLDNN
TEST(Analyzer_vis, profile_mkldnn) { profile(true /* use_mkldnn */); }
#endif

// Check the fuse status
TEST(Analyzer_vis, fuse_statis) {
AnalysisConfig cfg;
SetConfig(&cfg);
int num_ops;
auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
GetFuseStatis(predictor.get(), &num_ops);
}

// Compare result of NativeConfig and AnalysisConfig
void compare(bool use_mkldnn = false) {
AnalysisConfig cfg;
SetConfig(&cfg);
if (use_mkldnn) {
cfg.EnableMKLDNN();
cfg.pass_builder()->AppendPass("fc_mkldnn_pass");
}

std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
}

TEST(Analyzer_vis, compare) { compare(); }
#ifdef PADDLE_WITH_MKLDNN
TEST(Analyzer_vis, compare_mkldnn) { compare(true /* use_mkldnn */); }
#endif

// Compare Deterministic result
TEST(Analyzer_vis, compare_determine) {
AnalysisConfig cfg;
SetConfig(&cfg);

std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
CompareDeterministic(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all);
}

} // namespace analysis
} // namespace inference
} // namespace paddle
7 changes: 7 additions & 0 deletions paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt));
platform::MKLDNNHandler::AppendKey(&key,
std::to_string(multi_input[0]->format()));
if (platform::get_cur_thread_id() != -1) {
auto tid = std::this_thread::get_id();
std::stringstream ss;
ss << tid;
platform::MKLDNNHandler::AppendKey(&key, "-t:");
platform::MKLDNNHandler::AppendKey(&key, ss.str());
}
return key;
}

Expand Down
8 changes: 5 additions & 3 deletions paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
user_weights_memory_p, pipeline, is_test);

std::shared_ptr<mkldnn::memory> dst_memory_p;
std::shared_ptr<mkldnn::memory> user_residual_memory_p;

if (fuse_residual_conn) {
auto residual_param = ctx.Input<Tensor>("ResidualData");
Expand All @@ -243,7 +244,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {

auto user_residual_md = platform::MKLDNNMemDesc(
residual_data_tz, residual_data_type, residual_param->format());
auto user_residual_memory_p = handler.AcquireResidualDataMemory(
user_residual_memory_p = handler.AcquireResidualDataMemory(
user_residual_md, to_void_cast<T>(residual_param_data));

dst_memory_p = handler.AcquireDstMemoryFromResidualDataMemory(
Expand All @@ -263,14 +264,15 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {

// create convolution op primitive
std::shared_ptr<mkldnn::convolution_forward> conv_p;
std::shared_ptr<mkldnn::memory> user_bias_memory_p, bias_memory_p;
if (bias) {
const T* bias_data = bias->data<T>();
auto user_bias_md = platform::MKLDNNMemDesc(
{bias_tz}, platform::MKLDNNGetDataType<T>(), memory::format::x);
auto user_bias_memory_p =
user_bias_memory_p =
handler.AcquireBiasMemory(user_bias_md, to_void_cast<T>(bias_data));

auto bias_memory_p =
bias_memory_p =
handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline);
conv_p = handler.AcquireConvolution(src_memory_p, weights_memory_p,
bias_memory_p, dst_memory_p);
Expand Down
20 changes: 15 additions & 5 deletions paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt));
platform::MKLDNNHandler::AppendKey(&key, std::to_string(fmt));
platform::MKLDNNHandler::AppendKey(&key, suffix);
if (platform::get_cur_thread_id() != -1) {
auto tid = std::this_thread::get_id();
std::stringstream ss;
ss << tid;
platform::MKLDNNHandler::AppendKey(&key, "-t:");
platform::MKLDNNHandler::AppendKey(&key, ss.str());
}
return key;
}

Expand Down Expand Up @@ -128,6 +135,9 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
const std::string key_pool_workspace_memory =
key + "@pool_workspace_memory";

std::shared_ptr<mkldnn::memory> src_memory, dst_memory;
std::shared_ptr<mkldnn::pooling_forward::primitive_desc> pool_pd;
std::shared_ptr<mkldnn::memory> pool_src_memory_p, pool_dst_memory_p;
auto pool_p =
std::static_pointer_cast<pooling_forward>(dev_ctx.GetBlob(key_pool_p));
if (pool_p == nullptr) {
Expand All @@ -150,17 +160,17 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto propagation = src_md.data.data_type == mkldnn_f32
? mkldnn::prop_kind::forward_training
: mkldnn::prop_kind::forward_scoring;
std::shared_ptr<mkldnn::pooling_forward::primitive_desc> pool_pd =
pool_pd =
CreatePrimitiveDesc(src_md, dst_md, propagation, strides,
padding_left_top, padding_right_bottom, ksize,
pooling_type, mkldnn_engine, ceil_mode, is_test);

// save pool_pd into global device context to be referred in backward path
if (!is_test) dev_ctx.SetBlob(key_pool_pd, pool_pd);

auto src_memory = std::make_shared<memory>(pool_pd->src_primitive_desc(),
to_void_cast<T>(input_data));
auto dst_memory =
src_memory = std::make_shared<memory>(pool_pd->src_primitive_desc(),
to_void_cast<T>(input_data));
dst_memory =
std::make_shared<memory>(pool_pd->dst_primitive_desc(), output_data);

dev_ctx.SetBlob(key_pool_src_mem_p, src_memory);
Expand All @@ -186,7 +196,7 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
(memory::format)dst_memory->get_primitive_desc().desc().data.format;
} else {
// Primitives already exist
auto pool_src_memory_p =
pool_src_memory_p =
std::static_pointer_cast<memory>(dev_ctx.GetBlob(key_pool_src_mem_p));
PADDLE_ENFORCE(pool_src_memory_p != nullptr,
"Fail to find pooling src mem_p in device context");
Expand Down
Loading