Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions paddle/fluid/framework/ir/graph_pattern_detector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,16 @@ PDNode *PDNode::assert_op_has_n_outputs(const std::string &op_type, size_t n) {
return this;
}

PDNode *PDNode::assert_has_n_inputs(size_t n) {
asserts_.emplace_back([=](Node *x) { return x->inputs.size() == n; });
return this;
}

PDNode *PDNode::assert_has_n_outputs(size_t n) {
asserts_.emplace_back([=](Node *x) { return x->outputs.size() == n; });
return this;
}

PDNode *PDNode::assert_more(PDNode::teller_t &&teller) {
asserts_.emplace_back(std::move(teller));
return this;
Expand Down Expand Up @@ -1469,11 +1479,13 @@ PDNode *patterns::ConvAffineChannel::operator()(
auto *ac_scale_var = pattern->NewNode(ac_scale_repr())
->AsInput()
->assert_is_persistable_var()
->assert_has_n_outputs(1)
->assert_is_op_input("affine_channel", "Scale");
// AC Bias
auto *ac_bias_var = pattern->NewNode(ac_bias_repr())
->AsInput()
->assert_is_persistable_var()
->assert_has_n_outputs(1)
->assert_is_op_input("affine_channel", "Bias");

// AC output
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/framework/ir/graph_pattern_detector.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ struct PDNode {
const std::unordered_set<std::string>& op_types,
const std::string& argument, int nth);

PDNode* assert_has_n_inputs(size_t n);
PDNode* assert_has_n_outputs(size_t n);

template <typename T>
PDNode* assert_op_attr(const std::string& attr_name, const T& attr) {
asserts_.emplace_back([=](Node* x) {
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ struct Argument {

using unique_ptr_t = std::unique_ptr<void, std::function<void(void*)>>;
using fusion_statis_t = std::unordered_map<std::string, int>;
using engine_opt_info_t = std::map<std::string, std::string>;
using anakin_max_shape_t = std::map<std::string, std::vector<int>>;

bool Has(const std::string& key) const { return valid_fields_.count(key); }
Expand Down Expand Up @@ -130,7 +129,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string);
DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string);
DECL_ARGUMENT_FIELD(model_from_memory, ModelFromMemory, bool);
DECL_ARGUMENT_FIELD(engine_opt_info, EngineOptInfo, engine_opt_info_t);
DECL_ARGUMENT_FIELD(optim_cache_dir, OptimCacheDir, std::string);

// The overall graph to work on.
DECL_ARGUMENT_UNIQUE_FIELD(main_graph, MainGraph, framework::ir::Graph);
Expand All @@ -149,6 +148,7 @@ struct Argument {
// Pass a set of op types to enable its mkldnn kernel
DECL_ARGUMENT_FIELD(mkldnn_enabled_op_types, MKLDNNEnabledOpTypes,
std::unordered_set<std::string>);
DECL_ARGUMENT_FIELD(mkldnn_cache_capacity, MkldnnCacheCapacity, int);

#ifdef PADDLE_WITH_MKLDNN
// A set of op types to enable their quantized kernels
Expand Down
21 changes: 13 additions & 8 deletions paddle/fluid/inference/analysis/ir_pass_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,20 @@ void IRPassManager::CreatePasses(Argument *argument,

bool use_static_engine = argument->tensorrt_use_static_engine();
bool model_from_memory = argument->model_from_memory();
bool int8_valid = !(model_from_memory && enable_int8);
std::string optim_cache_dir = argument->optim_cache_dir();
bool int8_valid =
!(model_from_memory && optim_cache_dir.empty() && enable_int8);
PADDLE_ENFORCE(int8_valid,
"TRT INT8 Now don't support model load from memory.");

if ((!model_from_memory && use_static_engine) || enable_int8) {
"When you are in TRT INT8 mode, and load model from "
"memory, you should set optim_cache_dir using "
"config.SetOptimCacheDir()");
PADDLE_ENFORCE(!(model_from_memory && use_static_engine),
"When you are using Paddle-TRT, and also using load model "
"from memory, you should set the use_static to false.");

if (!optim_cache_dir.empty()) {
pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
} else if (use_static_engine || enable_int8) {
std::string model_opt_cache_dir =
argument->Has("model_dir")
? argument->model_dir()
Expand All @@ -110,8 +119,6 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
pass->Set("use_static_engine", new bool(use_static_engine));
pass->Set("model_from_memory", new bool(argument->model_from_memory()));
pass->Set("engine_opt_info", new std::map<std::string, std::string>(
argument->engine_opt_info()));
}
if (pass_name == "ngraph_subgraph_pass") {
pass->Set("program",
Expand All @@ -123,8 +130,6 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("use_gpu", new bool(argument->use_gpu()));
pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
pass->Set("model_from_memory", new bool(argument->model_from_memory()));
pass->Set("engine_opt_info", new std::map<std::string, std::string>(
argument->engine_opt_info()));
pass->Set("predictor_id", new int(argument->predictor_id()));
pass->Set("max_input_shape", new std::map<std::string, std::vector<int>>(
argument->anakin_max_input_shape()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std::unique_ptr<tensorrt::TRTInt8Calibrator> calibrator;
if (enable_int8 && calibration_data.size() != 0) {
calibrator.reset(new tensorrt::TRTInt8Calibrator(calibration_data));
LOG(INFO) << "RUN Paddle TRT int8 calibration mode...";
}
// When in int8 mode and calibration_mode, the program just produce the
// calibration table data.
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/inference/analysis/passes/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@ cc_library(memory_optim_pass SRCS memory_optimize_pass.cc DEPS analysis_pass zer
cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_pass.cc DEPS analysis_pass argument ir_pass_manager)
cc_library(ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass)
cc_library(adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass)
cc_library(inference_op_replace_pass SRCS inference_op_replace_pass.cc DEPS analysis_pass graph_to_program_pass)

cc_library(analysis_passes SRCS passes.cc DEPS
ir_graph_build_pass
ir_analysis_pass
ir_params_sync_among_devices_pass
adjust_cudnn_workspace_size_pass
memory_optim_pass
inference_op_replace_pass
ir_graph_to_program_pass
)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h"
#include <unordered_map>

namespace paddle {
namespace inference {
namespace analysis {

void InferenceOpReplacePass::RunImpl(Argument* argument) {
if (!argument->use_gpu()) return;
std::unordered_map<std::string, std::string> replaced_map{
{"conditional_block", "conditional_block_infer"},
};

auto& graph = argument->main_graph();
auto nodes = graph.Nodes();

for (auto& node : nodes) {
if (!node->IsOp()) continue;
auto* op_desc = node->Op();
std::string op_type = op_desc->Type();
if (!replaced_map.count(op_type)) continue;
op_desc->SetType(replaced_map[op_type]);
op_desc->Flush();
}
}

std::string InferenceOpReplacePass::repr() const {
return "inference-op-replace-pass";
}

} // namespace analysis
} // namespace inference
} // namespace paddle
43 changes: 43 additions & 0 deletions paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>
#include <vector>

#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/platform/place.h"

namespace paddle {
namespace inference {
namespace analysis {

/*
* There are some ops (while, conditional_block_op etc) which have different
* optimization points under predicion and training conditions.
* So, We added the corresponding inference impl to these ops separately.
* This pass replaces these ops with corresponding inference ops.
*/
class InferenceOpReplacePass : public AnalysisPass {
public:
void RunImpl(Argument *argument) override;
std::string repr() const override;
};

} // namespace analysis
} // namespace inference
} // namespace paddle
31 changes: 26 additions & 5 deletions paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <functional>
#include <limits>
#include <map>
#include <set>
#include <string>
#include <type_traits>
#include <utility>
Expand Down Expand Up @@ -108,11 +109,34 @@ int DataTypeToSpace(framework::proto::VarType_Type type) {
void MemoryOptimizePass::CollectVarMemorySize(
space_table_t* space_table) const {
const int fake_batch_size = 1;
auto valid_var = [&](framework::ir::Node* node) -> bool {
std::set<std::string> invalid_op = {"while", "conditional_block",
"tensorrt_engine",
"conditional_block_infer"};
for (auto* tmp : node->inputs) {
CHECK(tmp->IsOp());
std::string op_type = tmp->Op()->Type();
if (std::find(invalid_op.begin(), invalid_op.end(), op_type) !=
invalid_op.end()) {
return false;
}
}
for (auto* tmp : node->outputs) {
CHECK(tmp->IsOp());
std::string op_type = tmp->Op()->Type();
if (std::find(invalid_op.begin(), invalid_op.end(), op_type) !=
invalid_op.end()) {
return false;
}
}
return true;
};
// Collect tensors from graph.
for (auto* node : graph_->Nodes()) {
if (node->IsVar() &&
node->Var()->GetType() ==
framework::proto::VarType::Type::VarType_Type_LOD_TENSOR) {
framework::proto::VarType::Type::VarType_Type_LOD_TENSOR &&
valid_var(node)) {
// Parameters will not be reused.
if (node->Var()->Persistable()) continue;
auto shape = node->Var()->GetShape();
Expand All @@ -135,12 +159,9 @@ void MakeSimpleReusePlan(
std::unordered_map<std::string, int>* cluster_size) {
std::vector<MemNode> mem_nodes;
for (auto& data : lifecycles) {
if (!space_table.count(data.first)) continue;
MemNode temp_node;
temp_node.name = data.first;
PADDLE_ENFORCE(
space_table.count(data.first),
"%s variable should be in the spacetable during memory optimize",
data.first);
temp_node.size = space_table.at(data.first);
temp_node.cluster = -1;
temp_node.lifetime = data.second;
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/inference/analysis/passes/passes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "paddle/fluid/inference/analysis/passes/passes.h"
#include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h"
#include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
#include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h"
Expand All @@ -38,6 +39,8 @@ PassRegistry::PassRegistry() {
std::unique_ptr<AnalysisPass>(new IrParamsSyncAmongDevicesPass));
passes_.emplace("adjust_cudnn_workspace_size_pass",
std::unique_ptr<AnalysisPass>(new AdjustCudnnWorkSpacePass));
passes_.emplace("inference_op_replace_pass",
std::unique_ptr<AnalysisPass>(new InferenceOpReplacePass));
passes_.emplace(
"ir_graph_to_program_pass",
std::unique_ptr<IrGraphToProgramPass>(new IrGraphToProgramPass));
Expand Down
17 changes: 12 additions & 5 deletions paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(model_from_memory_); // the memory model reuses prog_file_ and
// params_file_ fields.

CP_MEMBER(opt_cache_dir_);
prog_file_ = std::move(other.prog_file_);
params_file_ = std::move(other.params_file_);

Expand All @@ -114,6 +115,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// MKLDNN related.
CP_MEMBER(use_mkldnn_);
CP_MEMBER(mkldnn_enabled_op_types_);
CP_MEMBER(mkldnn_cache_capacity_);
// Quantization related.
CP_MEMBER(use_mkldnn_quantizer_);
CP_MEMBER(mkldnn_quantizer_config_);
Expand Down Expand Up @@ -161,6 +163,15 @@ void AnalysisConfig::EnableMKLDNN() {
Update();
}

void AnalysisConfig::SetMkldnnCacheCapacity(int capacity) {
#ifdef PADDLE_WITH_MKLDNN
mkldnn_cache_capacity_ = capacity;
#else
LOG(ERROR) << "Please compile with MKLDNN first to set MKLDNN Thread Id";
mkldnn_cache_capacity_ = 0;
#endif
}

void AnalysisConfig::EnableMkldnnQuantizer() {
#ifdef PADDLE_WITH_MKLDNN
if (!mkldnn_quantizer_config_)
Expand Down Expand Up @@ -342,6 +353,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << use_ngraph_;

ss << use_mkldnn_;
ss << mkldnn_cache_capacity_;
for (auto &item : mkldnn_enabled_op_types_) ss << item;
ss << ";";

Expand Down Expand Up @@ -406,11 +418,6 @@ void AnalysisConfig::SetModelBuffer(const char *prog_buffer,
Update();
}

void AnalysisConfig::SetEngineOptInfo(
std::map<std::string, std::string> engine_opt_info) {
engine_opt_info_ = engine_opt_info;
}

NativeConfig AnalysisConfig::ToNativeConfig() const {
NativeConfig config;
config.model_dir = model_dir_;
Expand Down
Loading