-
Notifications
You must be signed in to change notification settings - Fork 7.2k
Add autocast for nms, roi_align on CPU #8049
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
e6d6159
58bc1d6
a1acb13
134af61
b4de21e
a792605
8918d2b
11927fa
7e222c1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -122,6 +122,9 @@ def test_forward(self, device, contiguous, x_dtype, rois_dtype=None, determinist | |
| tol = 5e-3 | ||
| else: | ||
| tol = 4e-3 | ||
|
|
||
| if x_dtype == torch.bfloat16: | ||
| tol = 5e-3 | ||
|
|
||
| pool_size = 5 | ||
| # n_channels % (pool_size ** 2) == 0 required for PS operations. | ||
|
|
@@ -493,6 +496,21 @@ def test_autocast(self, aligned, deterministic, x_dtype, rois_dtype): | |
| rois_dtype=rois_dtype, | ||
| ) | ||
|
|
||
| @pytest.mark.parametrize("aligned", (True, False)) | ||
| @pytest.mark.parametrize("deterministic", (True, False)) | ||
| @pytest.mark.parametrize("x_dtype", (torch.float, torch.bfloat16)) | ||
| @pytest.mark.parametrize("rois_dtype", (torch.float, torch.bfloat16)) | ||
| def test_autocast_cpu(self, aligned, deterministic, x_dtype, rois_dtype): | ||
| with torch.cpu.amp.autocast(): | ||
| self.test_forward( | ||
| torch.device("cpu"), | ||
| contiguous=False, | ||
| deterministic=deterministic, | ||
| aligned=aligned, | ||
| x_dtype=x_dtype, | ||
| rois_dtype=rois_dtype, | ||
| ) | ||
|
|
||
| @pytest.mark.parametrize("seed", range(10)) | ||
| @pytest.mark.parametrize("device", cpu_and_cuda_and_mps()) | ||
| @pytest.mark.parametrize("contiguous", (True, False)) | ||
|
|
@@ -751,6 +769,17 @@ def test_qnms(self, iou, scale, zero_point): | |
|
|
||
| torch.testing.assert_close(qkeep, keep, msg=err_msg.format(iou)) | ||
|
|
||
| @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8)) | ||
| def test_nms_cpu(self, iou, dtype=torch.float): | ||
| err_msg = "NMS incompatible between float and {dtype} for IoU={}" | ||
|
|
||
| boxes, scores = self._create_tensors_with_iou(1000, iou) | ||
| r_ref = ops.nms(boxes.to(dtype).float(), scores.to(dtype).float(), iou) | ||
| r_dtype = ops.nms(boxes.to(dtype), scores.to(dtype), iou) | ||
|
|
||
| is_eq = torch.allclose(r_ref, r_dtype) | ||
| assert is_eq, err_msg.format(iou) | ||
|
|
||
| @pytest.mark.parametrize( | ||
| "device", | ||
| ( | ||
|
|
@@ -782,6 +811,12 @@ def test_autocast(self, iou, dtype): | |
| with torch.cuda.amp.autocast(): | ||
| self.test_nms_gpu(iou=iou, dtype=dtype, device="cuda") | ||
|
|
||
| @pytest.mark.parametrize("iou", (0.2, 0.5, 0.8)) | ||
| @pytest.mark.parametrize("dtype", (torch.float, torch.bfloat16)) | ||
| def test_autocast_cpu(self, iou, dtype): | ||
| with torch.cpu.amp.autocast(): | ||
| self.test_nms_cpu(iou=iou, dtype=dtype) | ||
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| "device", | ||
| ( | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.