Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
VarQuantScale* scales, bool are_unsigned,
std::string scale_attr_name) const {
auto inputs = op->inputs;
auto output = op->outputs[0];
PADDLE_ENFORCE_GE(inputs.size(), 1);
PADDLE_ENFORCE_EQ(op->outputs.size(), 1);

// create a quantize op desc prototype
OpDesc q_desc;
Expand All @@ -86,13 +88,9 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
std::vector<Node*> quantize_out_nodes(inputs.size());
std::vector<std::string> quantize_out_node_names(inputs.size());

double scale_min = std::numeric_limits<double>::max();
for (const auto& input : inputs) {
double scale = (*scales)[input->Name()].second.data<double>()[0];
if (scale < scale_min) scale_min = scale;
}
double scale_out = (*scales)[output->Name()].second.data<double>()[0];
unsigned max = are_unsigned ? U8_MAX : S8_MAX;
float scale = scale_min * max;
float scale = scale_out * max;

for (size_t i = 0; i < inputs.size(); i++) {
// Create quantize output variable
Expand Down
83 changes: 53 additions & 30 deletions paddle/fluid/inference/api/mkldnn_quantizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "paddle/fluid/inference/api/mkldnn_quantizer.h"
#include <algorithm>
#include <limits>
#include <map>
#include <numeric>
#include <unordered_map>
Expand All @@ -37,6 +38,7 @@ using framework::ir::Graph;
using ConstEigenVectorArrayMap =
Eigen::Map<const Eigen::Array<float, Eigen::Dynamic, 1>>;
using string::PrettyLogH1;
static LoDTensor CreateScaleTensor(int64_t channels_num = 1);

bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
PrettyLogH1("--- Calculating scales for quantization");
Expand All @@ -52,7 +54,7 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
for (auto const& conn : connections) {
for (const auto& var_name : conn.second) {
// skip if scale already computed
if (scales_.find(var_name) != scales_.end()) return;
if (scales_.find(var_name) != scales_.end()) continue;

auto* var = predictor_.sub_scope_->FindVar(var_name);
PADDLE_ENFORCE(var, "%s is not in the scope", var_name);
Expand All @@ -62,29 +64,49 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {

// force unsigned type if already know it
bool is_unsigned = false;
if (is_output && op->Type() == "conv2d") {
// output of conv2d with relu must be unsigned
is_unsigned = (op->HasAttr("fuse_relu") &&
boost::get<bool>(op->GetAttr("fuse_relu"))) ||
(op->HasAttr("fuse_brelu") &&
boost::get<bool>(op->GetAttr("fuse_brelu")));
} else if (is_output && op->Type() == "relu") {
is_unsigned = true;
} else if (is_output &&
(op->Type() == "pool2d" || op->Type() == "transpose2" ||
op->Type() == "reshape2" || op->Type() == "concat")) {
// output of ops with unsigned input must be unsigned
is_unsigned = true;
for (auto input_var_name : op->Input("X")) {
bool compute_scale = true;
if (is_output) {
if (op->Type() == "conv2d") {
// output of conv2d with relu must be unsigned
is_unsigned = (op->HasAttr("fuse_relu") &&
boost::get<bool>(op->GetAttr("fuse_relu"))) ||
(op->HasAttr("fuse_brelu") &&
boost::get<bool>(op->GetAttr("fuse_brelu")));
} else if (op->Type() == "relu") {
is_unsigned = true;
} else if (op->Type() == "transpose2" ||
op->Type() == "reshape2" || op->Type() == "pool2d") {
auto input_var_name = op->Input("X")[0];
PADDLE_ENFORCE(scales_.find(input_var_name) != scales_.end(),
"Input scales must be calculated before the "
"output scales to infer if output is unsigned.");
is_unsigned = is_unsigned && scales_[input_var_name].first;
if (scales_.find(input_var_name) != scales_.end()) {
scales_[var_name] = scales_[input_var_name];
}
compute_scale = false;
} else if (op->Type() == "concat") {
// output of ops with unsigned input must be unsigned
is_unsigned = true;
double min_scale = std::numeric_limits<double>::max();
for (auto input_var_name : op->Input("X")) {
PADDLE_ENFORCE(
scales_.find(input_var_name) != scales_.end(),
"Input scales must be calculated before the "
"output scales to infer if output is unsigned.");
is_unsigned = is_unsigned && scales_[input_var_name].first;
min_scale = std::min(
min_scale,
scales_[input_var_name].second.data<double>()[0]);
}
auto scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = min_scale;
scales_[var_name] = {is_unsigned, scale_tensor};
compute_scale = false;
}
}

CalculateSingleScale(op->Type(), conn.first, var_name, *var_tensor,
is_unsigned);
if (compute_scale)
CalculateSingleScale(op->Type(), conn.first, var_name,
*var_tensor, is_unsigned);
}
}
};
Expand Down Expand Up @@ -127,6 +149,13 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale(
}
}

static LoDTensor CreateScaleTensor(int64_t channels_num) {
LoDTensor scale_tensor;
scale_tensor.Resize({channels_num});
scale_tensor.mutable_data<double>(CPUPlace());
return scale_tensor;
}

std::vector<int> AnalysisPredictor::MkldnnQuantizer::ExpandQuantizedBins(
std::vector<int> quantized_bins, std::vector<int> reference_bins) const {
std::vector<int> expanded_quantized_bins(reference_bins.size(), 0);
Expand Down Expand Up @@ -263,11 +292,8 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor(
min_kl_index = starting_iter;
}

LoDTensor scale_tensor;
scale_tensor.Resize({1});
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());

scale_ptr[0] = 1.0 / ((min_kl_index + 0.5) * bin_width);
LoDTensor scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = 1.0 / ((min_kl_index + 0.5) * bin_width);

return std::make_pair(is_unsigned, scale_tensor);
}
Expand All @@ -285,10 +311,8 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
min_val);

LoDTensor scale_tensor;
scale_tensor.Resize({1});
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
scale_ptr[0] = 1.0 / max_abs;
LoDTensor scale_tensor = CreateScaleTensor();
scale_tensor.data<double>()[0] = 1.0 / max_abs;

return std::make_pair(is_unsigned, scale_tensor);
}
Expand All @@ -308,8 +332,7 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor(
min_val);

int channels = var_tensor.dims()[0];
LoDTensor scale_tensor;
scale_tensor.Resize({channels});
LoDTensor scale_tensor = CreateScaleTensor(channels);
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());

for (int i = 0; i < channels; ++i) {
Expand Down