Skip to content

Commit 86cb3fb

Browse files
authored
Distributed Automatic SParsity with Fleet (#33558)
1 parent 1e5437d commit 86cb3fb

File tree

8 files changed

+352
-15
lines changed

8 files changed

+352
-15
lines changed

paddle/fluid/framework/distributed_strategy.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
// Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
23
//
34
// Licensed under the Apache License, Version 2.0 (the "License");
45
// you may not use this file except in compliance with the License.
@@ -189,6 +190,7 @@ message DistributedStrategy {
189190
optional bool without_graph_optimization = 30 [ default = false ];
190191
optional int32 fuse_grad_size_in_num = 31 [ default = 1 ];
191192
optional bool calc_comm_same_stream = 32 [ default = false ];
193+
optional bool asp = 33 [ default = false ];
192194

193195
optional RecomputeConfig recompute_configs = 101;
194196
optional AMPConfig amp_configs = 102;

python/paddle/distributed/fleet/base/distributed_strategy.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
# Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
23
#
34
# Licensed under the Apache License, Version 2.0 (the "License");
45
# you may not use this file except in compliance with the License.
@@ -446,6 +447,31 @@ def amp_configs(self, configs):
446447
check_configs_key(self.strategy.amp_configs, configs, "amp_configs")
447448
assign_configs_value(self.strategy.amp_configs, configs)
448449

450+
@property
451+
def asp(self):
452+
"""
453+
Indicating whether we are using automatic sparsity training
454+
Default Value: False
455+
456+
Examples:
457+
458+
.. code-block:: python
459+
460+
import paddle.distributed.fleet as fleet
461+
strategy = fleet.DistributedStrategy()
462+
strategy.asp = True # by default this is false
463+
464+
"""
465+
return self.strategy.asp
466+
467+
@asp.setter
468+
@is_strict_auto
469+
def asp(self, flag):
470+
if isinstance(flag, bool):
471+
self.strategy.asp = flag
472+
else:
473+
print("WARNING: asp should have value of bool type")
474+
449475
@property
450476
def recompute(self):
451477
"""

python/paddle/distributed/fleet/meta_optimizers/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
1+
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2+
# Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
23
#
34
# Licensed under the Apache License, Version 2.0 (the "License");
45
# you may not use this file except in compliance with the License.
@@ -12,6 +13,7 @@
1213
# See the License for the specific language governing permissions and
1314

1415
from .amp_optimizer import AMPOptimizer
16+
from .asp_optimizer import ASPOptimizer
1517
from .recompute_optimizer import RecomputeOptimizer
1618
from .gradient_merge_optimizer import GradientMergeOptimizer
1719
from .graph_execution_optimizer import GraphExecutionOptimizer
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
# Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
15+
from paddle.fluid.contrib.sparsity.asp import ASPHelper
16+
from .meta_optimizer_base import MetaOptimizerBase
17+
18+
__all__ = []
19+
20+
21+
class ASPOptimizer(MetaOptimizerBase):
22+
def __init__(self, optimizer):
23+
super(ASPOptimizer, self).__init__(optimizer)
24+
self.inner_opt = optimizer
25+
# we do not allow meta optimizer to be inner optimizer currently
26+
self.meta_optimizers_white_list = [
27+
"AMPOptimizer", "LarsOptimizer", "LambOptimizer",
28+
"GraphExecutionOptimizer", "RecomputeOptimizer",
29+
"GradientMergeOptimizer"
30+
]
31+
self.meta_optimizers_black_list = []
32+
33+
def _set_basic_info(self, loss, role_maker, user_defined_optimizer,
34+
user_defined_strategy):
35+
super(ASPOptimizer, self)._set_basic_info(
36+
loss, role_maker, user_defined_optimizer, user_defined_strategy)
37+
38+
def _can_apply(self):
39+
if not self.role_maker._is_collective:
40+
return False
41+
42+
if self.user_defined_strategy.asp:
43+
return True
44+
45+
return False
46+
47+
def _disable_strategy(self, dist_strategy):
48+
dist_strategy.asp = False
49+
50+
def _enable_strategy(self, dist_strategy, context):
51+
dist_strategy.asp = True
52+
53+
def minimize_impl(self,
54+
loss,
55+
startup_program=None,
56+
parameter_list=None,
57+
no_grad_set=None):
58+
59+
optimize_ops, params_grads = ASPHelper._minimize(
60+
self.inner_opt,
61+
loss,
62+
startup_program=startup_program,
63+
parameter_list=parameter_list,
64+
no_grad_set=no_grad_set)
65+
66+
return optimize_ops, params_grads

python/paddle/fluid/contrib/sparsity/asp.py

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,15 @@ def decorate(optimizer):
6464
Examples:
6565
.. code-block:: python
6666
67+
import paddle
6768
import paddle.fluid as fluid
6869
from paddle.fluid.contrib import sparsity
6970
7071
main_program = fluid.Program()
7172
startup_program = fluid.Program()
7273
74+
paddle.enable_static()
75+
7376
with fluid.program_guard(main_program, startup_program):
7477
input_data = fluid.layers.data(name='data', shape=[None, 128])
7578
label = fluid.layers.data(name='label', shape=[None, 10])
@@ -78,17 +81,13 @@ def decorate(optimizer):
7881
loss = fluid.layers.mean(fluid.layers.square_error_cost(prob, label))
7982
8083
optimizer = fluid.optimizer.SGD(learning_rate=0.1)
81-
8284
optimizer = sparsity.decorate(optimizer)
83-
optimizer.minimize(loss, startup_program)
85+
# if do sparse training with Fleet, please replace above decorate with:
86+
# strategy = paddle.distributed.fleet.DistributedStrategy()
87+
# strategy.asp = True
88+
# optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
8489
85-
# When apply distributed training with Fleet
86-
import paddle.distributed.fleet as fleet
87-
88-
optimizer = fluid.optimizer.SGD(learning_rate=0.1)
89-
optimizer = sparsity.decorate(optimizer) # Need to be called before `fleet.distributed_optimizer`
90-
optimizer = fleet.distributed_optimizer(optimizer)
91-
optimizer.minimize(loss, startup_program)
90+
optimizer.minimize(loss, startup_program)
9291
"""
9392
return ASPHelper.decorate(optimizer)
9493

@@ -126,23 +125,38 @@ def prune_model(place,
126125
Examples:
127126
.. code-block:: python
128127
128+
import paddle
129129
import paddle.fluid as fluid
130+
import paddle.fluid.core as core
130131
from paddle.fluid.contrib import sparsity
131132
133+
paddle.enable_static()
134+
132135
main_program = fluid.Program()
133136
startup_program = fluid.Program()
134137
135-
place = fluid.CUDAPlace(0)
138+
place = paddle.CPUPlace()
139+
if core.is_compiled_with_cuda():
140+
place = paddle.CUDAPlace(0)
136141
137142
with fluid.program_guard(main_program, startup_program):
138143
input_data = fluid.layers.data(name='data', shape=[None, 128])
139144
label = fluid.layers.data(name='label', shape=[None, 10])
140-
hidden = fluid.layers.fc(input=input_data, num_flatten_dims=-1, size=32, act=None)
145+
hidden = fluid.layers.fc(input=input_data, num_flatten_dims=-1, size=32, act=None, name="need_sparse")
146+
hidden = fluid.layers.fc(input=hidden, num_flatten_dims=-1, size=32, act=None, name="need_dense")
141147
prob = fluid.layers.fc(input=hidden, num_flatten_dims=-1, size=10, act=None)
142148
loss = fluid.layers.mean(fluid.layers.square_error_cost(prob, label))
143149
144-
optimizer = decorate(fluid.optimizer.SGD(learning_rate=0.1))
145-
optimizer.minimize(optimizer, loss, main_program, startup_program)
150+
# Setup exluded layers out from ASP workflow.
151+
# Please note, excluded_layers must be set before calling `optimizer.minimize()`.
152+
sparsity.set_excluded_layers(main_program, ["need_dense"])
153+
154+
optimizer = fluid.optimizer.SGD(learning_rate=0.1)
155+
optimizer = fluid.contrib.mixed_precision.decorator.decorate(optimizer )
156+
# Calling sparsity.decorate() to wrap minimize() in optimizer, which
157+
# will insert necessary masking operations for ASP workflow.
158+
optimizer = sparsity.decorate(optimizer)
159+
optimizer.minimize(loss, startup_program)
146160
147161
exe = fluid.Executor(place)
148162
exe.run(startup_program)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
22
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
33

4+
list(REMOVE_ITEM TEST_OPS "test_fleet_with_asp"})
5+
list(REMOVE_ITEM TEST_OPS "test_fleet_with_asp_amp"})
6+
47
foreach(TEST_OP ${TEST_OPS})
58
py_test_modules(${TEST_OP} MODULES ${TEST_OP})
69
endforeach(TEST_OP)
10+
11+
if(WITH_DISTRIBUTE)
12+
py_test_modules(test_fleet_with_asp MODULES test_fleet_with_asp ENVS ${dist_ENVS})
13+
py_test_modules(test_fleet_with_asp_amp MODULES test_fleet_with_asp_amp ENVS ${dist_ENVS})
14+
endif()
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
# Copyright (c) 2021 NVIDIA Corporation. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import paddle.distributed.fleet as fleet
17+
import paddle.distributed.fleet.base.role_maker as role_maker
18+
import unittest
19+
import paddle
20+
import paddle.fluid as fluid
21+
import paddle.fluid.core as core
22+
import os
23+
from paddle.fluid.contrib import sparsity
24+
from paddle.fluid.contrib.sparsity.asp import ASPHelper
25+
import numpy as np
26+
cuda_visible_devices = os.getenv('CUDA_VISIBLE_DEVICES')
27+
if cuda_visible_devices is None or cuda_visible_devices == "":
28+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
29+
else:
30+
os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devices.split(',')[0]
31+
32+
paddle.enable_static()
33+
34+
35+
class TestFleetWithASP(unittest.TestCase):
36+
def setUp(self):
37+
os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36213"
38+
os.environ["PADDLE_CURRENT_ENDPOINTS"] = "127.0.0.1:36213"
39+
os.environ["PADDLE_TRAINERS_NUM"] = "1"
40+
os.environ["PADDLE_TRAINER_ID"] = "0"
41+
42+
def net(self, main_prog, startup_prog):
43+
with fluid.program_guard(main_prog, startup_prog):
44+
input_x = paddle.static.data(
45+
name="x", shape=[-1, 32], dtype='float32')
46+
input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64')
47+
48+
fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh')
49+
prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
50+
cost = fluid.layers.cross_entropy(input=prediction, label=input_y)
51+
avg_cost = paddle.mean(x=cost)
52+
53+
strategy = paddle.distributed.fleet.DistributedStrategy()
54+
strategy.asp = True
55+
return avg_cost, strategy, input_x, input_y
56+
57+
def test_with_asp(self):
58+
fleet.init(is_collective=True)
59+
train_prog, startup_prog = fluid.Program(), fluid.Program()
60+
avg_cost, strategy, input_x, input_y = self.net(train_prog,
61+
startup_prog)
62+
63+
with fluid.program_guard(train_prog, startup_prog):
64+
optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01)
65+
optimizer = fleet.distributed_optimizer(
66+
optimizer, strategy=strategy)
67+
optimizer.minimize(avg_cost)
68+
69+
place = fluid.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda(
70+
) else fluid.CPUPlace()
71+
72+
exe = fluid.Executor(place)
73+
feeder = fluid.DataFeeder(feed_list=[input_x, input_y], place=place)
74+
exe.run(startup_prog)
75+
76+
sparsity.prune_model(place, train_prog)
77+
78+
data = (np.random.randn(64, 32), np.random.randint(2, size=(64, 1)))
79+
exe.run(train_prog, feed=feeder.feed([data]))
80+
81+
for param in train_prog.global_block().all_parameters():
82+
if ASPHelper._is_supported_layer(train_prog, param.name):
83+
mat = np.array(fluid.global_scope().find_var(param.name)
84+
.get_tensor())
85+
self.assertTrue(sparsity.check_sparsity(mat.T, n=2, m=4))
86+
87+
88+
if __name__ == "__main__":
89+
unittest.main()

0 commit comments

Comments
 (0)