Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 156 additions & 1 deletion python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

__all__ = [
'QuantizationTransformPass', 'QuantizationFreezePass', 'ConvertToInt8Pass',
'TransformForMobilePass'
'TransformForMobilePass', 'ScaleForTrainingPass', 'ScaleForInferencePass'
]


Expand Down Expand Up @@ -962,3 +962,158 @@ def apply(self, graph):
graph.safe_remove_nodes(op_node)
graph.resolve_hazard()
return graph


class ScaleForTrainingPass(object):
def __init__(self, scope=None, place=None, moving_rate=0.9):
"""
This pass is used for calculating output scales of some operators.
These output scales may be used by tensorRT or some other inference engines.

Args:
scope(fluid.Scope): The scope is used to initialize these new parameters.
place(fluid.CPUPlace|fluid.CUDAPlace): The place is used to initialize new parameters.
moving_rate(float): The decay coefficient of moving average. The default value is 0.9.
"""
self._scope = scope
self._place = place
self._moving_rate = moving_rate
self._is_test = None
self._teller_set = [
"mul", "conv2d", "pool2d", "relu", "softmax", "sigmoid",
"depthwise_conv2d", "batch_norm", "concat", "tanh", "pad",
"elementwise_add", "elementwise_mul", "dropout", "split", "prelu",
"conv2d_transpose", "leaky_relu"
]

def apply(self, graph):
"""
Insert the `moving_average_abs_max_scale` op in order to calculate output scales
of operators in the teller_set.

Args:
graph(IrGraph): the target graph.
"""
self._is_test = graph.is_test()
ops = graph.all_op_nodes()
for op_node in ops:
name = op_node.name()
if name in self._teller_set:
if len(op_node.output_arg_names()) != 1:
continue
in_node = graph._find_node_by_name(
op_node.outputs, op_node.output_arg_names()[0])
out_node = graph.create_var_node_from_desc(in_node.var())
scale_node = graph.create_persistable_node(
name=self._scale_name(in_node.name()),
var_type=core.VarDesc.VarType.LOD_TENSOR,
shape=[1],
var_dtype=in_node.dtype())
ins = {'X': in_node}
outs = {'Out': out_node, 'OutScale': scale_node}
if not self._is_test:
state_in_node = graph.create_persistable_node(
name=unique_name.generate('scale_state@'),
var_type=core.VarDesc.VarType.LOD_TENSOR,
var_dtype=in_node.dtype(),
shape=[1])
data_type = 'float64' if in_node.dtype(
) == core.VarDesc.VarType.FP64 else 'float32'
_init_var_node(
state_in_node,
np.ones(
[1], dtype=data_type),
self._scope,
self._place)
accum_in_node = graph.create_persistable_node(
name=unique_name.generate('scale_accum@'),
var_type=core.VarDesc.VarType.LOD_TENSOR,
var_dtype=in_node.dtype(),
shape=[1])
_init_var_node(
accum_in_node,
np.ones(
[1], dtype=data_type),
self._scope,
self._place)
state_out_node = graph.create_var_node_from_desc(
state_in_node.var())
accum_out_node = graph.create_var_node_from_desc(
accum_in_node.var())

ins['InState'] = state_in_node
ins['InAccum'] = accum_in_node
outs['OutState'] = state_out_node
outs['OutAccum'] = accum_out_node

attrs = {
'moving_rate': self._moving_rate,
'is_test': self._is_test,
'op_role': core.op_proto_and_checker_maker.OpRole.Forward
}
scale_op_node = graph.create_op_node(
op_type='moving_average_abs_max_scale',
attrs=attrs,
inputs=ins,
outputs=outs)
graph.link_to(in_node, scale_op_node)
graph.link_to(scale_op_node, out_node)
graph.link_to(scale_op_node, scale_node)
if not self._is_test:
graph.link_to(state_in_node, scale_op_node)
graph.link_to(accum_in_node, scale_op_node)
graph.link_to(scale_op_node, state_out_node)
graph.link_to(scale_op_node, accum_out_node)
graph.resolve_hazard()
return graph

def _scale_name(self, var_name):
"""
Return the scale name for the var named `var_name`.
"""
return "%s@scale" % (var_name)


class ScaleForInferencePass(object):
def __init__(self, scope=None):
"""
This pass is used for setting output scales of some operators.
These output scales may be used by tensorRT or some other inference engines.

Args:
scope(fluid.Scope): The scope is used to initialize these new parameters.
"""
self._scope = scope
self._teller_set = [
"mul", "conv2d", "pool2d", "relu", "softmax", "sigmoid",
"depthwise_conv2d", "batch_norm", "concat", "tanh", "pad",
"elementwise_add", "elementwise_mul", "dropout", "split", "prelu",
"conv2d_transpose", "leaky_relu"
]

def apply(self, graph):
"""
Get output scales from the scope and set these scales in op_descs
of operators in the teller_set.

Args:
graph(IrGraph): the target graph.
"""
ops = graph.all_op_nodes()
for op_node in ops:
name = op_node.name()
if name in self._teller_set:
if len(op_node.output_arg_names()) != 1:
continue
scale_name = self._scale_name(op_node.output_arg_names()[0])
scale_v = np.array(
self._scope.find_var(scale_name).get_tensor())[0]
op_node.op()._set_attr("out_scale", float(scale_v))
graph.resolve_hazard()
return graph

def _scale_name(self, var_name):
"""
Return the scale name for the var named `var_name`.
"""
return "%s@scale" % (var_name)
190 changes: 190 additions & 0 deletions python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.

import os
import unittest
import random
import numpy as np
import six
import paddle.fluid as fluid
import paddle
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
from paddle.fluid.contrib.slim.quantization import ScaleForTrainingPass
from paddle.fluid.contrib.slim.quantization import ScaleForInferencePass
from paddle.fluid import core

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"


def residual_block(img, label, num=1):
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
act='relu',
bias_attr=False):
tmp = fluid.layers.conv2d(
input=input,
filter_size=filter_size,
num_filters=ch_out,
stride=stride,
padding=padding,
act=None,
bias_attr=bias_attr)
return fluid.layers.batch_norm(input=tmp, act=act)

hidden = img
for _ in six.moves.xrange(num):
conv = conv_bn_layer(hidden, 20, 3, 1, 1, act=None, bias_attr=True)
short = conv_bn_layer(hidden, 20, 1, 1, 0, act=None)
hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
fc = fluid.layers.fc(input=hidden, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=fc, label=label)
loss = fluid.layers.mean(loss)
return loss


class TestQuantizationScalePass(unittest.TestCase):
def quantization_scale(self,
use_cuda,
seed,
activation_quant_type,
weight_quant_type='abs_max',
for_ci=False):
def build_program(main, startup, is_test):
main.random_seed = seed
startup.random_seed = seed
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
img = fluid.layers.data(
name='image', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
loss = residual_block(img, label, 1)
if not is_test:
opt = fluid.optimizer.Adam(learning_rate=0.0001)
opt.minimize(loss)
return [img, label], loss

random.seed(0)
np.random.seed(0)

main = fluid.Program()
startup = fluid.Program()
test_program = fluid.Program()
feeds, loss = build_program(main, startup, False)
build_program(test_program, startup, True)
test_program = test_program.clone(for_test=True)
main_graph = IrGraph(core.Graph(main.desc), for_test=False)
test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)

place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
scope = fluid.Scope()
with fluid.scope_guard(scope):
exe.run(startup)
transform_pass = QuantizationTransformPass(
scope=scope,
place=place,
activation_quantize_type=activation_quant_type,
weight_quantize_type=weight_quant_type)
transform_pass.apply(main_graph)
transform_pass.apply(test_graph)
scale_training_pass = ScaleForTrainingPass(scope=scope, place=place)
scale_training_pass.apply(main_graph)
dev_name = '_gpu' if use_cuda else '_cpu'
if not for_ci:
marked_nodes = set()
for op in main_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
main_graph.draw('.', 'main_scale' + dev_name, marked_nodes)
marked_nodes = set()
for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
test_graph.draw('.', 'test_scale' + dev_name, marked_nodes)

build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = False
binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy)
iters = 5
batch_size = 8

train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500),
batch_size=batch_size)
feeder = fluid.DataFeeder(feed_list=feeds, place=place)
with fluid.scope_guard(scope):
for _ in range(iters):
data = next(train_reader())
loss_v = exe.run(binary,
feed=feeder.feed(data),
fetch_list=[loss])
if not for_ci:
print('{}: {}'.format('loss' + dev_name, loss_v))

scale_inference_pass = ScaleForInferencePass(scope=scope)
scale_inference_pass.apply(test_graph)

# Freeze graph for inference, but the weight of fc/conv is still float type.
freeze_pass = QuantizationFreezePass(
scope=scope, place=place, weight_quantize_type=weight_quant_type)
freeze_pass.apply(test_graph)
server_program = test_graph.to_program()

if not for_ci:
marked_nodes = set()
for op in test_graph.all_op_nodes():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
test_graph.draw('.', 'quant_scale' + dev_name, marked_nodes)

with open('quant_scale_model' + dev_name + '.txt', 'w') as f:
f.write(str(server_program))

with fluid.scope_guard(scope):
fluid.io.save_inference_model('quant_scale_model' + dev_name,
['image', 'label'], [loss], exe,
server_program)

def test_quant_scale_cuda(self):
if fluid.core.is_compiled_with_cuda():
with fluid.unique_name.guard():
self.quantization_scale(
True,
seed=1,
activation_quant_type='moving_average_abs_max',
weight_quant_type='channel_wise_abs_max',
for_ci=True)

def test_quant_scale_cpu(self):
with fluid.unique_name.guard():
self.quantization_scale(
False,
seed=2,
activation_quant_type='moving_average_abs_max',
weight_quant_type='channel_wise_abs_max',
for_ci=True)


if __name__ == '__main__':
unittest.main()