-
Notifications
You must be signed in to change notification settings - Fork 6k
conv2d support bfloat16 #32221
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
conv2d support bfloat16 #32221
Changes from 15 commits
687e28b
252dbcf
747d096
8bab3d7
74bb02d
cd612c5
4069f78
7d3a4d5
c41fe74
f3ca4b8
5a3e730
f23f1d2
15f3315
394d8d4
12cc70b
62fcd51
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,7 +32,8 @@ def set_confs(self): | |
| def test_check_output(self): | ||
| for use_seq in {True, False}: | ||
| self.attrs['use_seq'] = use_seq | ||
| self.check_output(check_dygraph=False, no_check_set=["Cell"]) | ||
| self.check_output( | ||
| check_dygraph=False, no_check_set=["Cell"], atol=2e-2) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里指明了atol,因为你把op_test.py中的atol值改了。这样还是会影响到其他op的单测吧,我觉得最好不改op_test.py,重写OpTest函数就不会影响到其他op单测了。
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里只影响bfloat16前向精度测试,之前单测框架中写死用0.03,PR的修改只是取消这种固定值,在有需要的各个单测中指定即可。
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 修改了op单测中的精度检查方式,影响了mkldnn的单测,请@luotao1 review一下。
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In file |
||
|
|
||
| def setUp(self): | ||
| self.op_type = 'fusion_lstm' | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,7 +20,8 @@ | |
| import paddle | ||
| import paddle.fluid.core as core | ||
| import paddle.fluid as fluid | ||
| from op_test import OpTest | ||
| from op_test import OpTest, convert_float_to_uint16, get_numeric_gradient | ||
| from paddle.fluid.tests.unittests.testsuite import create_op | ||
| from paddle.fluid import Program, program_guard | ||
|
|
||
|
|
||
|
|
@@ -167,6 +168,52 @@ def test_check_grad_no_input(self): | |
| globals()[cls_name] = TestConv2DCUDNNFp16 | ||
|
|
||
|
|
||
| def create_test_cudnn_bf16_class(parent): | ||
| @unittest.skipIf( | ||
| not core.is_compiled_with_cuda() or core.cudnn_version() < 8100, | ||
| "core is not compiled with CUDA and cudnn version need larger than 8.1.0" | ||
| ) | ||
| class TestConv2DCUDNNBF16(parent): | ||
| def get_numeric_grad(self, place, check_name): | ||
| scope = core.Scope() | ||
| self._check_grad_helper() | ||
| op = create_op(scope, self.op_type, self.inputs, self.outputs, | ||
| self.attrs) | ||
| return get_numeric_gradient(place, scope, op, self.inputs_fp32, | ||
| check_name, ['Output']) | ||
|
|
||
| def init_kernel_type(self): | ||
| self.use_cudnn = True | ||
| self.no_need_check_grad = True | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这个主要是防止父类里的单测被执行到。 |
||
| self.dtype = np.uint16 | ||
|
|
||
| def test_check_output(self): | ||
| place = core.CUDAPlace(0) | ||
| self.check_output_with_place(place, atol=1e-2) | ||
|
|
||
| def test_check_grad_no_filter(self): | ||
| place = core.CUDAPlace(0) | ||
| numeric_grads = self.get_numeric_grad(place, 'Input') | ||
| self.check_grad_with_place( | ||
| place, ['Input'], | ||
| 'Output', | ||
| no_grad_set=set(['Filter']), | ||
| user_defined_grads=[numeric_grads]) | ||
|
|
||
| def test_check_grad_no_input(self): | ||
| place = core.CUDAPlace(0) | ||
| numeric_grads = self.get_numeric_grad(place, 'Filter') | ||
| self.check_grad_with_place( | ||
| place, ['Filter'], | ||
| 'Output', | ||
| no_grad_set=set(['Input']), | ||
| user_defined_grads=[numeric_grads]) | ||
|
|
||
| cls_name = "{0}_{1}".format(parent.__name__, "CUDNNBF16") | ||
| TestConv2DCUDNNBF16.__name__ = cls_name | ||
| globals()[cls_name] = TestConv2DCUDNNBF16 | ||
|
|
||
|
|
||
| def create_test_channel_last_class(parent): | ||
| class TestChannelLastCase(parent): | ||
| def init_data_format(self): | ||
|
|
@@ -319,7 +366,15 @@ def setUp(self): | |
| 'dilation': self.dilations | ||
| } | ||
|
|
||
| input = np.random.random(self.input_size).astype(self.dtype) | ||
| if self.is_bfloat16_op(): | ||
| input = np.random.random(self.input_size).astype(np.float32) | ||
| filter = np.random.uniform(-1, 1, | ||
| self.filter_size).astype(np.float32) | ||
| else: | ||
| input = np.random.random(self.input_size).astype(self.dtype) | ||
| filter = np.random.uniform(-1, 1, | ||
| self.filter_size).astype(self.dtype) | ||
|
|
||
| if not self.has_cuda(): | ||
| self.fuse_relu_before_depthwise_conv = False | ||
| if self.fuse_relu_before_depthwise_conv: | ||
|
|
@@ -329,16 +384,27 @@ def setUp(self): | |
| input2 = np.maximum(input, 0.0) | ||
| else: | ||
| input2 = input | ||
| filter = np.random.uniform(-1, 1, self.filter_size).astype(self.dtype) | ||
|
|
||
| output, _, _, _, _ = conv2d_forward_naive(input2, filter, self.groups, | ||
| conv2d_param) | ||
| output = output.astype(self.dtype) | ||
|
|
||
| self.inputs = { | ||
| 'Input': OpTest.np_dtype_to_fluid_dtype(input), | ||
| 'Filter': OpTest.np_dtype_to_fluid_dtype(filter) | ||
| } | ||
| if self.is_bfloat16_op(): | ||
| output = output.astype(np.float32) | ||
| self.inputs = { | ||
| 'Input': convert_float_to_uint16(input), | ||
| 'Filter': convert_float_to_uint16(filter) | ||
| } | ||
| self.inputs_fp32 = { | ||
| 'Input': OpTest.np_dtype_to_fluid_dtype(input), | ||
| 'Filter': OpTest.np_dtype_to_fluid_dtype(filter) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这是还构造了fp32的conv2d?在PR描述里面说明一下单测检查的逻辑吧。
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| } | ||
| else: | ||
| output = output.astype(self.dtype) | ||
| self.inputs = { | ||
| 'Input': OpTest.np_dtype_to_fluid_dtype(input), | ||
| 'Filter': OpTest.np_dtype_to_fluid_dtype(filter) | ||
| } | ||
|
|
||
| self.attrs = { | ||
| 'strides': self.stride, | ||
| 'paddings': self.pad, | ||
|
|
@@ -554,6 +620,15 @@ def init_group(self): | |
| create_test_cudnn_fp16_class(TestWith1x1, grad_check=False) | ||
| create_test_cudnn_fp16_class(TestWithInput1x1Filter1x1, grad_check=False) | ||
|
|
||
| #----------------Conv2DCUDNN bf16---------------- | ||
|
|
||
| create_test_cudnn_bf16_class(TestConv2DOp) | ||
| create_test_cudnn_bf16_class(TestWithPad) | ||
| create_test_cudnn_bf16_class(TestWithStride) | ||
| create_test_cudnn_bf16_class(TestWithGroup) | ||
| create_test_cudnn_bf16_class(TestWith1x1) | ||
| create_test_cudnn_bf16_class(TestWithInput1x1Filter1x1) | ||
|
|
||
| #----------------TestDepthwiseConv ----- | ||
|
|
||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
注册的代码超过100+行了,可以简化下。这些注册无非3种类型:
可以定义一些注册的宏,比如:REGISTER_CONV_CUDNN_KERNEL_WITH_FP64_BF16、REGISTER_CONV_CUDNN_KERNEL_WITH_FP64、REGISTER_CONV_CUDNN_KERNEL_WITH_BF16?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
好的,后续跟进。