Skip to content

Commit 832a014

Browse files
authored
Add bf16 support for save and load ops (#33173)
* Add bf16 support for save and load ops * Add bf16 test condition * Add matmul and chagne fluid.io to paddle.static * Reduce the test duration
1 parent 3af1629 commit 832a014

File tree

8 files changed

+174
-23
lines changed

8 files changed

+174
-23
lines changed

paddle/fluid/operators/load_combine_op.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ REGISTER_OP_CPU_KERNEL(
8787
load_combine,
8888
ops::LoadCombineOpKernel<paddle::platform::CPUDeviceContext, float>,
8989
ops::LoadCombineOpKernel<paddle::platform::CPUDeviceContext, double>,
90+
ops::LoadCombineOpKernel<paddle::platform::CPUDeviceContext,
91+
paddle::platform::bfloat16>,
9092
ops::LoadCombineOpKernel<paddle::platform::CPUDeviceContext, int>,
9193
ops::LoadCombineOpKernel<paddle::platform::CPUDeviceContext, int8_t>,
9294
ops::LoadCombineOpKernel<paddle::platform::CPUDeviceContext, int64_t>);

paddle/fluid/operators/load_op.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ REGISTER_OPERATOR(load, ops::LoadOp, ops::LoadOpProtoMaker);
6969
REGISTER_OP_CPU_KERNEL(
7070
load, ops::LoadOpKernel<paddle::platform::CPUDeviceContext, float>,
7171
ops::LoadOpKernel<paddle::platform::CPUDeviceContext, double>,
72+
ops::LoadOpKernel<paddle::platform::CPUDeviceContext,
73+
paddle::platform::bfloat16>,
7274
ops::LoadOpKernel<paddle::platform::CPUDeviceContext, int>,
7375
ops::LoadOpKernel<paddle::platform::CPUDeviceContext, int8_t>,
7476
ops::LoadOpKernel<paddle::platform::CPUDeviceContext, int64_t>);

paddle/fluid/operators/save_combine_op.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,5 +102,7 @@ REGISTER_OP_CPU_KERNEL(
102102
save_combine,
103103
ops::SaveCombineOpKernel<paddle::platform::CPUDeviceContext, float>,
104104
ops::SaveCombineOpKernel<paddle::platform::CPUDeviceContext, double>,
105+
ops::SaveCombineOpKernel<paddle::platform::CPUDeviceContext,
106+
paddle::platform::bfloat16>,
105107
ops::SaveCombineOpKernel<paddle::platform::CPUDeviceContext, int>,
106108
ops::SaveCombineOpKernel<paddle::platform::CPUDeviceContext, int64_t>);

paddle/fluid/operators/save_load_combine_op_test.cc

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License. */
1717
#include <vector>
1818
#include "gtest/gtest.h"
1919
#include "paddle/fluid/framework/op_registry.h"
20+
#include "paddle/fluid/platform/bfloat16.h"
2021
#include "paddle/fluid/platform/float16.h"
2122

2223
USE_CPU_ONLY_OP(save_combine);
@@ -76,33 +77,34 @@ void CheckValues(T* expect, U* actual, const paddle::framework::LoD& expect_lod,
7677

7778
// Here, we create 4 LoDTensors and use save_combine_op to first save these
7879
// in a single file. Then, we use load_combine_op to load these sequentially
79-
TEST(SaveLoadCombineOp, CPU) {
80+
template <typename T, typename U>
81+
void SaveLoadCombineOp() {
8082
paddle::framework::Scope scope;
8183
paddle::platform::CPUPlace place;
8284

8385
std::vector<int> lod1 = {0, 1, 2, 3, 10};
8486
int numel1 = 100;
8587
paddle::framework::LoD expect_lod1;
86-
int* expect1 = CreateForSaveCombineOp<int, int>(10, 10, lod1, "test_var1",
87-
place, &scope, &expect_lod1);
88+
T* expect1 = CreateForSaveCombineOp<T, U>(10, 10, lod1, "test_var1", place,
89+
&scope, &expect_lod1);
8890

8991
std::vector<int> lod2 = {0, 2, 5, 10};
9092
int numel2 = 200;
9193
paddle::framework::LoD expect_lod2;
92-
int* expect2 = CreateForSaveCombineOp<int, int>(10, 20, lod2, "test_var2",
93-
place, &scope, &expect_lod2);
94+
T* expect2 = CreateForSaveCombineOp<T, U>(10, 20, lod2, "test_var2", place,
95+
&scope, &expect_lod2);
9496

9597
std::vector<int> lod3 = {0, 2, 3, 20};
9698
int numel3 = 4000;
9799
paddle::framework::LoD expect_lod3;
98-
int* expect3 = CreateForSaveCombineOp<int, int>(20, 200, lod3, "test_var3",
99-
place, &scope, &expect_lod3);
100+
T* expect3 = CreateForSaveCombineOp<T, U>(20, 200, lod3, "test_var3", place,
101+
&scope, &expect_lod3);
100102

101103
std::vector<int> lod4 = {0, 1, 20};
102104
int numel4 = 1000;
103105
paddle::framework::LoD expect_lod4;
104-
int* expect4 = CreateForSaveCombineOp<int, int>(20, 50, lod4, "test_var4",
105-
place, &scope, &expect_lod4);
106+
T* expect4 = CreateForSaveCombineOp<T, U>(20, 50, lod4, "test_var4", place,
107+
&scope, &expect_lod4);
106108

107109
// Set attributes
108110
std::string filename = "check_tensor.ls";
@@ -128,15 +130,21 @@ TEST(SaveLoadCombineOp, CPU) {
128130
load_combine_op->Run(scope, place);
129131

130132
paddle::framework::LoD actual_lod1, actual_lod2, actual_lod3, actual_lod4;
131-
int* actual1 = GetValuesAfterLoadCombineOp<int>(target1, scope, &actual_lod1);
132-
int* actual2 = GetValuesAfterLoadCombineOp<int>(target2, scope, &actual_lod2);
133-
int* actual3 = GetValuesAfterLoadCombineOp<int>(target3, scope, &actual_lod3);
134-
int* actual4 = GetValuesAfterLoadCombineOp<int>(target4, scope, &actual_lod4);
135-
136-
CheckValues<int, int>(expect1, actual1, expect_lod1, actual_lod1, numel1);
137-
CheckValues<int, int>(expect2, actual2, expect_lod2, actual_lod2, numel2);
138-
CheckValues<int, int>(expect3, actual3, expect_lod3, actual_lod3, numel3);
139-
CheckValues<int, int>(expect4, actual4, expect_lod4, actual_lod4, numel4);
133+
U* actual1 = GetValuesAfterLoadCombineOp<U>(target1, scope, &actual_lod1);
134+
U* actual2 = GetValuesAfterLoadCombineOp<U>(target2, scope, &actual_lod2);
135+
U* actual3 = GetValuesAfterLoadCombineOp<U>(target3, scope, &actual_lod3);
136+
U* actual4 = GetValuesAfterLoadCombineOp<U>(target4, scope, &actual_lod4);
137+
138+
CheckValues<T, U>(expect1, actual1, expect_lod1, actual_lod1, numel1);
139+
CheckValues<T, U>(expect2, actual2, expect_lod2, actual_lod2, numel2);
140+
CheckValues<T, U>(expect3, actual3, expect_lod3, actual_lod3, numel3);
141+
CheckValues<T, U>(expect4, actual4, expect_lod4, actual_lod4, numel4);
142+
}
143+
144+
TEST(SaveLoadCombineOp, CPU) { SaveLoadCombineOp<int, int>(); }
145+
146+
TEST(SaveLoadCombineBF16Op, CPU) {
147+
SaveLoadCombineOp<paddle::platform::bfloat16, paddle::platform::bfloat16>();
140148
}
141149

142150
// FP16 version of SaveLoadCombineOp Test, only altering the saving aspect

paddle/fluid/operators/save_op.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ REGISTER_OP_CPU_KERNEL(
9090
ops::SaveOpKernel<paddle::platform::CPUDeviceContext, double>,
9191
ops::SaveOpKernel<paddle::platform::CPUDeviceContext,
9292
paddle::platform::float16>,
93+
ops::SaveOpKernel<paddle::platform::CPUDeviceContext,
94+
paddle::platform::bfloat16>,
9395
ops::SaveOpKernel<paddle::platform::CPUDeviceContext, int>,
9496
ops::SaveOpKernel<paddle::platform::CPUDeviceContext, uint8_t>,
9597
ops::SaveOpKernel<paddle::platform::CPUDeviceContext, int8_t>,

python/paddle/fluid/tests/book/test_fit_a_line.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,9 @@ def train_loop(main_program):
8484
feed=feeder.feed(data),
8585
fetch_list=[avg_cost])
8686
if avg_loss_value[0] < 10.0 or pure_bf16:
87-
if save_dirname is not None and not pure_bf16:
88-
fluid.io.save_inference_model(save_dirname, ['x'],
89-
[y_predict], exe)
87+
if save_dirname is not None:
88+
paddle.static.save_inference_model(save_dirname, [x],
89+
[y_predict], exe)
9090
return
9191
if math.isnan(float(avg_loss_value)):
9292
sys.exit("got NaN loss, training failed.")
@@ -127,12 +127,12 @@ def infer(use_cuda, save_dirname=None, use_bf16=False):
127127

128128
inference_scope = fluid.core.Scope()
129129
with fluid.scope_guard(inference_scope):
130-
# Use fluid.io.load_inference_model to obtain the inference program desc,
130+
# Use paddle.static.load_inference_model to obtain the inference program desc,
131131
# the feed_target_names (the names of variables that will be fed
132132
# data using feed operators), and the fetch_targets (variables that
133133
# we want to obtain data from using fetch operators).
134134
[inference_program, feed_target_names,
135-
fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
135+
fetch_targets] = paddle.static.load_inference_model(save_dirname, exe)
136136

137137
# The input's dimension should be 2-D and the second dim is 13
138138
# The input data should be >= 0
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import print_function
16+
17+
import unittest
18+
import paddle
19+
import paddle.fluid.core as core
20+
import paddle.fluid as fluid
21+
import paddle.fluid.framework as framework
22+
from paddle.fluid.optimizer import SGDOptimizer
23+
from paddle.fluid.tests.unittests.test_imperative_base import new_program_scope
24+
from paddle.fluid.tests.unittests.test_static_save_load import PtbModel
25+
import numpy as np
26+
27+
28+
@unittest.skipIf(not core.supports_bfloat16(),
29+
"place does not support BF16 evaluation")
30+
class TestSaveLoadBF16(unittest.TestCase):
31+
def set_place(self):
32+
return fluid.CPUPlace()
33+
34+
def test_ptb_rnn_cpu_bfloat16(self):
35+
seed = 90
36+
hidden_size = 10
37+
vocab_size = 500
38+
num_layers = 1
39+
num_steps = 3
40+
init_scale = 0.1
41+
batch_size = 4
42+
batch_num = 100
43+
44+
with new_program_scope():
45+
fluid.default_startup_program().random_seed = seed
46+
fluid.default_main_program().random_seed = seed
47+
ptb_model = PtbModel(
48+
"ptb_model",
49+
hidden_size=hidden_size,
50+
vocab_size=vocab_size,
51+
num_layers=num_layers,
52+
num_steps=num_steps,
53+
init_scale=init_scale)
54+
55+
place = self.set_place()
56+
exe = fluid.Executor(place)
57+
sgd = SGDOptimizer(learning_rate=1e-3)
58+
x = fluid.layers.data(
59+
name="x", shape=[-1, num_steps], dtype='int64')
60+
y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
61+
init_hidden = fluid.layers.data(
62+
name="init_hidden", shape=[1], dtype='float32')
63+
init_cell = fluid.layers.data(
64+
name="init_cell", shape=[1], dtype='float32')
65+
66+
static_loss, static_last_hidden, static_last_cell = ptb_model(
67+
x, y, init_hidden, init_cell)
68+
69+
sgd = paddle.static.amp.bf16.decorate_bf16(
70+
sgd,
71+
amp_lists=paddle.static.amp.bf16.AutoMixedPrecisionListsBF16(
72+
custom_fp32_list={'transpose2', 'concat'}),
73+
use_bf16_guard=False,
74+
use_pure_bf16=True)
75+
76+
sgd.minimize(static_loss, framework.default_startup_program())
77+
out = exe.run(framework.default_startup_program())
78+
79+
for i in range(batch_num):
80+
x_data = np.arange(12).reshape(4, 3).astype('int64')
81+
y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
82+
x_data = x_data.reshape((-1, num_steps, 1))
83+
y_data = y_data.reshape((-1, 1))
84+
init_hidden_data = np.zeros(
85+
(num_layers, batch_size, hidden_size), dtype='float32')
86+
init_cell_data = np.zeros(
87+
(num_layers, batch_size, hidden_size), dtype='float32')
88+
fetch_list = [static_loss, static_last_hidden, static_last_cell]
89+
out = exe.run(fluid.default_main_program(),
90+
feed={
91+
"x": x_data,
92+
"y": y_data,
93+
"init_hidden": init_hidden_data,
94+
"init_cell": init_cell_data
95+
},
96+
fetch_list=fetch_list)
97+
98+
# get value before save
99+
main_program = framework.default_main_program()
100+
base_map = {}
101+
for var in main_program.list_vars():
102+
if isinstance(var, framework.Parameter) or var.persistable:
103+
t = np.array(fluid.global_scope().find_var(var.name)
104+
.get_tensor())
105+
# make sure all the paramerter or optimizer var have been update
106+
self.assertTrue(np.sum(np.abs(t)) != 0)
107+
base_map[var.name] = t
108+
109+
fluid.save(main_program, "./test_1")
110+
111+
# set var to zero
112+
for var in main_program.list_vars():
113+
if isinstance(var, framework.Parameter) or var.persistable:
114+
ten = fluid.global_scope().find_var(var.name).get_tensor()
115+
ten.set(np.zeros_like(np.array(ten)), place)
116+
117+
new_t = np.array(fluid.global_scope().find_var(var.name)
118+
.get_tensor())
119+
# make sure all the paramerter or optimizer var have been set to zero
120+
self.assertTrue(np.sum(np.abs(new_t)) == 0)
121+
122+
fluid.load(main_program, "./test_1.pdparams", exe)
123+
124+
for var in main_program.list_vars():
125+
if isinstance(var, framework.Parameter) or var.persistable:
126+
new_t = np.array(fluid.global_scope().find_var(var.name)
127+
.get_tensor())
128+
base_t = base_map[var.name]
129+
self.assertTrue(np.array_equal(new_t, base_t))
130+
131+
132+
if __name__ == '__main__':
133+
paddle.enable_static()
134+
unittest.main()

tools/static_mode_white_list.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,7 @@
480480
'test_squared_l2_norm_op',
481481
'test_stack_op',
482482
'test_static_save_load',
483+
'test_static_save_load_bf16',
483484
'test_sum_op',
484485
'test_switch',
485486
'test_switch_case',

0 commit comments

Comments
 (0)