diff --git a/docs/source/user/ufunc.rst b/docs/source/user/ufunc.rst index 6beb5baab..ddd7eb1ad 100644 --- a/docs/source/user/ufunc.rst +++ b/docs/source/user/ufunc.rst @@ -46,7 +46,7 @@ All CUDA ufunc kernels have the ability to call other CUDA device functions:: from numba import vectorize, cuda # define a device function - @cuda.jit('float32(float32, float32, float32)', device=True, inline=True) + @cuda.jit('float32(float32, float32, float32)', device=True, inline="always") def cu_device_fn(x, y, z): return x ** y / z diff --git a/numba_cuda/numba/cuda/decorators.py b/numba_cuda/numba/cuda/decorators.py index edc904f0d..d5a0a29b3 100644 --- a/numba_cuda/numba/cuda/decorators.py +++ b/numba_cuda/numba/cuda/decorators.py @@ -16,7 +16,7 @@ def jit( func_or_sig=None, device=False, - inline=False, + inline="never", link=[], debug=None, opt=None, @@ -81,6 +81,15 @@ def jit( msg = _msg_deprecated_signature_arg.format("bind") raise DeprecationError(msg) + if isinstance(inline, bool): + DeprecationWarning( + "Passing bool to inline argument is deprecated, please refer to " + "Numba's documentation on inlining: " + "https://numba.readthedocs.io/en/stable/developer/inlining.html" + ) + + inline = "always" if inline else "never" + debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug opt = (config.OPT != 0) if opt is None else opt fastmath = kws.get("fastmath", False) @@ -130,6 +139,7 @@ def _jit(func): targetoptions["opt"] = opt targetoptions["fastmath"] = fastmath targetoptions["device"] = device + targetoptions["inline"] = inline targetoptions["extensions"] = extensions disp = CUDADispatcher(func, targetoptions=targetoptions) @@ -171,6 +181,7 @@ def autojitwrapper(func): return jit( func, device=device, + inline=inline, debug=debug, opt=opt, lineinfo=lineinfo, @@ -194,6 +205,7 @@ def autojitwrapper(func): targetoptions["link"] = link targetoptions["fastmath"] = fastmath targetoptions["device"] = device + targetoptions["inline"] = inline targetoptions["extensions"] = extensions disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions) diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py index 1e3b1d920..31491b3cc 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_args.py @@ -7,7 +7,7 @@ class TestCudaArrayArg(CUDATestCase): def test_array_ary(self): - @cuda.jit("double(double[:],int64)", device=True, inline=True) + @cuda.jit("double(double[:],int64)", device=True, inline="always") def device_function(a, c): return a[c] diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py index 7cf4d288f..027534356 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py @@ -81,7 +81,7 @@ def test_blackscholes(self): VOLATILITY, ) - @cuda.jit(double(double), device=True, inline=True) + @cuda.jit(double(double), device=True, inline="always") def cnd_cuda(d): K = 1.0 / (1.0 + 0.2316419 * math.fabs(d)) ret_val = ( diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_inline.py b/numba_cuda/numba/cuda/tests/cudapy/test_inline.py new file mode 100644 index 000000000..d969d05bc --- /dev/null +++ b/numba_cuda/numba/cuda/tests/cudapy/test_inline.py @@ -0,0 +1,59 @@ +import re +import numpy as np +from numba import cuda, types +from numba.cuda.testing import ( + unittest, + CUDATestCase, + skip_on_cudasim, +) + + +class TestCudaInline(CUDATestCase): + @skip_on_cudasim("Cudasim does not support inline") + def _test_call_inline(self, inline): + """Test @cuda.jit(inline=...)""" + a = np.ones(2, dtype=np.int32) + + sig = (types.int32[::1],) + + @cuda.jit(inline=inline) + def set_zero(a): + a[0] = 0 + + @cuda.jit(sig) + def call_set_zero(a): + set_zero(a) + + call_set_zero[1, 2](a) + + expected = np.arange(2, dtype=np.int32) + self.assertTrue(np.all(a == expected)) + + llvm_ir = call_set_zero.inspect_llvm(sig) + pat = r"call [a-zA-Z0-9]* @" + match = re.compile(pat).search(llvm_ir) + + if inline == "always" or inline is True: + # check that call was inlined + self.assertIsNone(match, msg=llvm_ir) + else: + assert inline == "never" or inline is False + + # check that call was not inlined + self.assertIsNotNone(match, msg=llvm_ir) + + def test_call_inline_always(self): + self._test_call_inline("always") + + def test_call_inline_never(self): + self._test_call_inline("never") + + def test_call_inline_true(self): + self._test_call_inline(True) + + def test_call_inline_false(self): + self._test_call_inline(False) + + +if __name__ == "__main__": + unittest.main() diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py index 3a1dee8b0..fcd3eca13 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_laplace.py @@ -14,7 +14,7 @@ class TestCudaLaplace(CUDATestCase): def test_laplace_small(self): - @cuda.jit(float64(float64, float64), device=True, inline=True) + @cuda.jit(float64(float64, float64), device=True, inline="always") def get_max(a, b): if a > b: return a diff --git a/numba_cuda/numba/cuda/vectorizers.py b/numba_cuda/numba/cuda/vectorizers.py index 4cd80edbf..bffe4ca75 100644 --- a/numba_cuda/numba/cuda/vectorizers.py +++ b/numba_cuda/numba/cuda/vectorizers.py @@ -206,7 +206,7 @@ def __vectorized_{name}({args}, __out__): class CUDAVectorize(deviceufunc.DeviceVectorize): def _compile_core(self, sig): - cudevfn = cuda.jit(sig, device=True, inline=True)(self.pyfunc) + cudevfn = cuda.jit(sig, device=True, inline="always")(self.pyfunc) return cudevfn, cudevfn.overloads[sig.args].signature.return_type def _get_globals(self, corefn):