Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions paddle/fluid/framework/ir/graph_pattern_detector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1184,6 +1184,17 @@ PDNode *patterns::ElementwiseAdd::operator()(PDNode *x_var, PDNode *y_var) {
return out_var;
}

PDNode *patterns::Concat::operator()() {
auto concat_op = pattern->NewNode(concat_op_repr())->assert_is_op("concat");

auto output_var = pattern->NewNode(concat_out_repr())
->AsOutput()
->assert_is_op_output("concat", "Out");

concat_op->LinksTo({output_var});
return output_var;
}

PDNode *patterns::ConcatReLU::operator()() {
auto concat_op = pattern->NewNode(concat_op_repr())->assert_is_op("concat");
auto relu_op = pattern->NewNode(relu_op_repr())->assert_is_op("relu");
Expand Down
13 changes: 13 additions & 0 deletions paddle/fluid/framework/ir/graph_pattern_detector.h
Original file line number Diff line number Diff line change
Expand Up @@ -728,6 +728,19 @@ struct ElementwiseAdd : public PatternBase {
PATTERN_DECL_NODE(elementwise_add_out);
};

// Concat op
// Forward pass for concat.
// concat_out is a result of the operator.
struct Concat : public PatternBase {
Concat(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "concat") {}

PDNode* operator()();

PATTERN_DECL_NODE(concat_op);
PATTERN_DECL_NODE(concat_out);
};

// Concat + ReLU
// named nodes:
// concat_op, concat_out, relu_op, relu_out
Expand Down
91 changes: 91 additions & 0 deletions paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h"
#include <limits>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
Expand Down Expand Up @@ -72,6 +73,53 @@ void CPUQuantizePass::QuantizeInput(Graph* g, Node* op, Node* input,
if (!scale_attr_name.empty()) op->Op()->SetAttr(scale_attr_name, scale);
}

void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
VarQuantScale* scales, bool are_unsigned,
std::string scale_attr_name) const {
auto inputs = op->inputs;
PADDLE_ENFORCE_GE(inputs.size(), 1);

// create a quantize op desc prototype
OpDesc q_desc;
q_desc.SetType("quantize");

std::vector<Node*> quantize_out_nodes(inputs.size());
std::vector<std::string> quantize_out_node_names(inputs.size());

double scale_min = std::numeric_limits<double>::max();
for (const auto& input : inputs) {
double scale = (*scales)[input->Name()].second.data<double>()[0];
if (scale < scale_min) scale_min = scale;
}
unsigned max = are_unsigned ? U8_MAX : S8_MAX;
float scale = scale_min * max;

for (size_t i = 0; i < inputs.size(); i++) {
// Create quantize output variable
VarDesc quantize_out_desc(patterns::PDNodeName("quantize", "out"));
quantize_out_nodes[i] = g->CreateVarNode(&quantize_out_desc);
quantize_out_node_names[i] = quantize_out_nodes[i]->Name();

q_desc.SetAttr("Scale", scale);
q_desc.SetInput("Input", std::vector<std::string>({inputs[i]->Name()}));
q_desc.SetOutput("Output",
std::vector<std::string>({quantize_out_node_names[i]}));
q_desc.SetAttr("is_negative_input", !are_unsigned);
auto quantize_op = g->CreateOpNode(&q_desc); // OpDesc will be copied.

// link quantize op
UnlinkNodes(inputs[i], op);
IR_NODE_LINK_TO(inputs[i], quantize_op);
IR_NODE_LINK_TO(quantize_op, quantize_out_nodes[i]);
IR_NODE_LINK_TO(quantize_out_nodes[i], op);
}

// update op's input
op->Op()->SetInput(input_name, quantize_out_node_names);

if (!scale_attr_name.empty()) op->Op()->SetAttr(scale_attr_name, scale);
}

void CPUQuantizePass::DequantizeOutput(Graph* g, Node* op, Node* output,
std::string output_name,
double scale_to_one, bool is_unsigned,
Expand Down Expand Up @@ -216,6 +264,48 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const {
PrettyLogDetail("--- quantized %d pool2d ops", quantize_pool_count);
}

void CPUQuantizePass::QuantizeConcat(Graph* graph) const {
GraphPatternDetector gpd;
auto pattern = gpd.mutable_pattern();
patterns::Concat concat_pattern{pattern, name_scope_};
concat_pattern();

int quantize_concat_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(4) << "Quantize concat op";
GET_IR_NODE_FROM_SUBGRAPH(concat_op, concat_op, concat_pattern);
auto* concat_op_desc = concat_op->Op();

// skip if should not be quantized
if (!concat_op_desc->HasAttr("use_quantizer") ||
!boost::get<bool>(concat_op_desc->GetAttr("use_quantizer")))
return;

GET_IR_NODE_FROM_SUBGRAPH(concat_out, concat_out, concat_pattern);

// get scales calculated after warmup, they scale variables to MAX=1.0
auto scales = Get<VarQuantScale>("quant_var_scales");

// if all inputs were unsigned, then the output was set to unsigned
// during the scale calculation step
bool are_all_inputs_unsigned = scales[concat_out->Name()].first;
QuantizeInputs(g, concat_op, "X", &scales, are_all_inputs_unsigned);

auto output_scale = scales[concat_out->Name()].second.data<double>()[0];

DequantizeOutput(g, concat_op, concat_out, "Out", output_scale,
are_all_inputs_unsigned);

++quantize_concat_count;
};

gpd(graph, handler);
AddStatis(quantize_concat_count);

PrettyLogDetail("--- quantized %d concat ops", quantize_concat_count);
}

void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "Quantizing the graph.";
PADDLE_ENFORCE(graph);
Expand All @@ -226,6 +316,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
QuantizeConv(graph, false /* with_residual_data */);
QuantizeConv(graph, true /* with_residual_data */);
QuantizePool(graph);
QuantizeConcat(graph);
}

} // namespace ir
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,17 @@ class CPUQuantizePass : public FusePassBase {

void QuantizePool(Graph* graph) const;

void QuantizeConcat(Graph* graph) const;

void QuantizeInput(Graph* g, Node* op, Node* input, std::string input_name,
double scale_to_one, bool is_unsigned,
std::string scale_attr_name = "") const;

// quantize all inputs of given name with the same (minimum) scale
void QuantizeInputs(Graph* g, Node* op, std::string input_name,
VarQuantScale* scales, bool are_unsigned,
std::string scale_attr_name = "") const;

void DequantizeOutput(Graph* g, Node* op, Node* output,
std::string output_name, double scale_to_one,
bool is_unsigned,
Expand Down
100 changes: 100 additions & 0 deletions paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,14 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
if (inputs.size() > 1) op->SetInput("W", {inputs[1]});
if (inputs.size() > 2) op->SetInput("Bias", {inputs[2]});
op->SetOutput("Out", {outputs[0]});
} else if (type == "concat") {
op->SetInput("X", inputs);
op->SetOutput("Out", outputs);
op->SetAttr("use_quantizer", use_quantizer);
}
}

namespace {
static const std::initializer_list<std::string> variable_names{
"a", "w1", "c", "d", "w2", "e", "f", "g",
"h", "w3", "b1", "i", "j", "w4", "b2"};
Expand Down Expand Up @@ -204,6 +209,101 @@ TEST(CpuQuantizePass, do_not_quantize) {
1.0f);
}

} // namespace

namespace {
static const std::initializer_list<std::string> variable_names_concat = {
"a1", "b1", "a2", "b2", "c", "d"};

// a1->Pool1->b1
// a2->Pool2->b2
// (b1,b2)->Concat->c
// c->Pool3->d
ProgramDesc BuildProgramDescConcat() {
ProgramDesc prog;

SetOp(&prog, "pool2d", "Pool1", {"a1"}, {"b1"}, true, false);
SetOp(&prog, "pool2d", "Pool2", {"a2"}, {"b2"}, true, false);
SetOp(&prog, "concat", "Concat", {"b1", "b2"}, {"c"}, true, true);
SetOp(&prog, "pool2d", "Pool3", {"c"}, {"d"}, true, false);

return prog;
}

void MainTestConcat(const ProgramDesc& prog, int pool_count, int concat_count,
int quant_count, int dequant_count, int added_nodes_count) {
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));

// Init scope, as it is used in pass
auto place = paddle::platform::CPUPlace();
NaiveExecutor exe{place};
Scope scope;
exe.CreateVariables(prog, 0, true, &scope);

auto* scales = new VarQuantScale();

for (auto& v : variable_names_concat) {
InitTensorHolder(&scope, place, v.c_str());
LoDTensor tensor;
tensor.Resize({1});
auto* ptr = tensor.mutable_data<double>(place);
ptr[0] = 2.0;

(*scales)[v] = std::make_pair(false, std::move(tensor));
}

graph->SetNotOwned(kParamScopeAttr, &scope);

auto pass = PassRegistry::Instance().Get("cpu_quantize_pass");
pass->Set("quant_var_scales", scales);

int original_nodes_num = graph->Nodes().size();

graph.reset(pass->Apply(graph.release()));

int current_nodes_num = graph->Nodes().size();

int quantize_nodes_count = 0;
int dequantize_nodes_count = 0;
int concat_nodes_count = 0;
int pool2d_nodes_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->Type() == "concat") {
concat_nodes_count++;
} else if (op->Type() == "pool2d") {
pool2d_nodes_count++;
} else if (op->Type() == "quantize") {
quantize_nodes_count++;
} else if (op->Type() == "dequantize") {
dequantize_nodes_count++;
}
}
}
EXPECT_EQ(concat_nodes_count, concat_count);
EXPECT_EQ(pool2d_nodes_count, pool_count);
EXPECT_EQ(quantize_nodes_count, quant_count);
EXPECT_EQ(dequantize_nodes_count, dequant_count);
EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
}

TEST(CpuQuantizePass, concat) {
// a1->Pool1->b1
// a2->Pool2->b2
// (b1->QUANT1->IN1, b2->QUANT2->IN2)->Concat->c
// c->OUT1->DEQUANT1->Pool3->d
int pool_count = 3;
int concat_count = 1;
int quant_count = 2;
int dequant_count = 1;
int added_nodes_count = 6;
MainTestConcat(BuildProgramDescConcat(), pool_count, concat_count,
quant_count, dequant_count, added_nodes_count);
}

} // namespace

} // namespace ir
} // namespace framework
} // namespace paddle
Expand Down
14 changes: 5 additions & 9 deletions paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
// limitations under the License.

#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h"
#include <algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/platform/enforce.h"
Expand Down Expand Up @@ -81,15 +82,10 @@ void CPUQuantizeSquashPass::Squash(
auto quant_out_var_name = quant_out->Name();
auto next_op_inputs = next_op_desc->InputNames();
for (const auto& name : next_op_inputs) {
if (next_op_desc->Inputs().count(name) == 0 ||
next_op_desc->Input(name).size() == 0)
continue;
auto var_name = next_op_desc->Input(name)[0];
if (var_name.compare(quant_out_var_name) == 0) {
next_op_desc->SetInput(
name, std::vector<std::string>({dequant_in->Name()}));
break;
}
auto input_names = next_op_desc->Input(name);
std::replace(input_names.begin(), input_names.end(), quant_out_var_name,
dequant_in->Name());
next_op_desc->SetInput(name, input_names);
}

if (keep_dequant)
Expand Down
62 changes: 34 additions & 28 deletions paddle/fluid/inference/api/mkldnn_quantizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,40 +50,46 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {

auto glambda = [&](const VariableNameMap& connections, bool is_output) {
for (auto const& conn : connections) {
if (conn.second.size() == 0) continue;
auto& var_name = conn.second[0];

// skip if scale already computed
if (scales_.find(var_name) != scales_.end()) return;

auto* var = predictor_.sub_scope_->FindVar(var_name);
PADDLE_ENFORCE(var, "%s is not in the scope", var_name);
PADDLE_ENFORCE(var->IsType<LoDTensor>(),
"Only support lod tensor now.");
LoDTensor* var_tensor = var->GetMutable<LoDTensor>();

// force unsigned type if already know it
bool is_unsigned = false;
if (is_output && op->Type() == "conv2d") {
// output of conv2d with relu must be unsigned
is_unsigned = op->HasAttr("fuse_relu") &&
boost::get<bool>(op->GetAttr("fuse_relu"));
} else if (is_output && op->Type() == "pool2d") {
// output of pool2d with unsigned input must be unsigned
auto input_var_name = op->Input("X")[0];
if (scales_.find(input_var_name) != scales_.end()) {
is_unsigned = scales_[input_var_name].first;
for (const auto& var_name : conn.second) {
// skip if scale already computed
if (scales_.find(var_name) != scales_.end()) return;

auto* var = predictor_.sub_scope_->FindVar(var_name);
PADDLE_ENFORCE(var, "%s is not in the scope", var_name);
PADDLE_ENFORCE(var->IsType<LoDTensor>(),
"Only support lod tensor now.");
LoDTensor* var_tensor = var->GetMutable<LoDTensor>();

// force unsigned type if already know it
bool is_unsigned = false;
if (is_output && op->Type() == "conv2d") {
// output of conv2d with relu must be unsigned
is_unsigned = op->HasAttr("fuse_relu") &&
boost::get<bool>(op->GetAttr("fuse_relu"));
} else if (is_output && op->Type() == "relu") {
is_unsigned = true;
} else if (is_output &&
(op->Type() == "pool2d" || op->Type() == "transpose2" ||
op->Type() == "reshape2" || op->Type() == "concat")) {
// output of ops with unsigned input must be unsigned
is_unsigned = true;
for (auto input_var_name : op->Input("X")) {
PADDLE_ENFORCE(scales_.find(input_var_name) != scales_.end(),
"Input scales must be calculated before the "
"output scales to infer if output is unsigned.");
is_unsigned = is_unsigned && scales_[input_var_name].first;
}
}
}

CalculateSingleScale(op->Type(), conn.first, var_name, *var_tensor,
is_unsigned);
CalculateSingleScale(op->Type(), conn.first, var_name, *var_tensor,
is_unsigned);
}
}
};

// handle outputs first so unsigned outputs could be inferred
glambda(connections_out, true /* is_output */);
// handle inputs first to let is_unsigned be inferred for the outputs
glambda(connections_in, false /* is_output */);
glambda(connections_out, true /* is_output */);
}
}

Expand Down
Loading