Skip to content

Commit da60f0a

Browse files
authored
Use dbg.declare for scalar kernel parameters (#828)
## Summary - Describe scalar kernel parameters with `dbg.declare` on their stack slots to avoid unstable parameter-space locations in downstream DWARF (boolean parameters remain on `dbg.value` to avoid an NVVM crash). - Avoid emitting additional `dbg.value` entries for function arguments once declared. - Add a regression test for scalar formal parameters.
1 parent d424989 commit da60f0a

File tree

4 files changed

+274
-197
lines changed

4 files changed

+274
-197
lines changed

numba_cuda/numba/cuda/debuginfo.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -866,8 +866,27 @@ def mark_variable(
866866
int_type = (ir.IntType,)
867867
real_type = ir.FloatType, ir.DoubleType
868868
if isinstance(lltype, int_type + real_type):
869-
# Start with scalar variable, swtiching llvm.dbg.declare
870-
# to llvm.dbg.value
869+
# For scalar locals we use llvm.dbg.value instead of
870+
# llvm.dbg.declare, but for scalar *arguments* we still want a
871+
# stable stack location so they don't get encoded as absolute
872+
# parameter-space addresses in downstream DWARF.
873+
if argidx is not None:
874+
# NVVM has been observed to crash on some boolean-parameter
875+
# debug.declare patterns - use dbg.value for these instead.
876+
if datamodel is not None and isinstance(
877+
datamodel.fe_type, types.Boolean
878+
):
879+
return
880+
return super().mark_variable(
881+
builder,
882+
allocavalue,
883+
name,
884+
lltype,
885+
size,
886+
line,
887+
datamodel,
888+
argidx,
889+
)
871890
return
872891
else:
873892
return super().mark_variable(

numba_cuda/numba/cuda/lowering.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1722,6 +1722,12 @@ def loadvar(self, name):
17221722
src_name not in self.dbg_val_names
17231723
and src_name not in self.poly_var_typ_map
17241724
):
1725+
# Function arguments are declared once in the prologue. Emitting
1726+
# additional dbg.value entries for them can cause ptxas to
1727+
# describe them via parameter-space addresses (DW_OP_addr),
1728+
# which is both unstable and confusing for debuggers.
1729+
if src_name in self.fndesc.args:
1730+
return val
17251731
fetype = self.typeof(name)
17261732
lltype = self.context.get_value_type(fetype)
17271733
int_type = (llvm_ir.IntType,)
@@ -1730,10 +1736,6 @@ def loadvar(self, name):
17301736
sizeof = self.context.get_abi_sizeof(lltype)
17311737
datamodel = self.context.data_model_manager[fetype]
17321738
line = self._adjust_line_if_prologue(self.loc.line)
1733-
if src_name in self.fndesc.args:
1734-
argidx = self.fndesc.args.index(src_name) + 1
1735-
else:
1736-
argidx = None
17371739
self.debuginfo.update_variable(
17381740
self.builder,
17391741
val,
@@ -1742,7 +1744,7 @@ def loadvar(self, name):
17421744
sizeof,
17431745
line,
17441746
datamodel,
1745-
argidx,
1747+
argidx=None,
17461748
)
17471749
self.dbg_val_names.add(src_name)
17481750
return val
@@ -1817,6 +1819,23 @@ def storevar(self, value, name, argidx=None):
18171819
# Emit debug value for user variable
18181820
src_name = name.split(".")[0]
18191821
if src_name not in self.poly_var_typ_map:
1822+
# Function arguments are described via dbg.declare on
1823+
# their stack slots in the prologue.
1824+
if argidx is not None:
1825+
# Boolean parameters are kept on dbg.value to avoid
1826+
# NVVM crashes with dbg.declare.
1827+
if isinstance(fetype, types.Boolean):
1828+
self.debuginfo.update_variable(
1829+
self.builder,
1830+
value,
1831+
src_name,
1832+
lltype,
1833+
sizeof,
1834+
line,
1835+
datamodel,
1836+
argidx,
1837+
)
1838+
return
18201839
# Insert the llvm.dbg.value intrinsic call
18211840
self.debuginfo.update_variable(
18221841
self.builder,
@@ -1839,9 +1858,8 @@ def storevar(self, value, name, argidx=None):
18391858
# Not yet covered by the dbg.value range
18401859
and src_name not in self.dbg_val_names
18411860
):
1842-
# Use fndesc.args to get correct argidx for func args
18431861
if src_name in self.fndesc.args:
1844-
argidx = self.fndesc.args.index(src_name) + 1
1862+
return
18451863
# Insert the llvm.dbg.value intrinsic call
18461864
self.debuginfo.update_variable(
18471865
self.builder,
@@ -1851,7 +1869,7 @@ def storevar(self, value, name, argidx=None):
18511869
sizeof,
18521870
line,
18531871
datamodel,
1854-
argidx,
1872+
argidx=None,
18551873
)
18561874

18571875
def pre_block(self, block):

numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,37 @@ def f(x, y):
114114
match = re.compile(pat).search(llvm_ir)
115115
self.assertIsNotNone(match, msg=llvm_ir)
116116

117+
def test_scalar_kernel_parameters_use_dbg_declare(self):
118+
sig = (types.int32, types.int32, types.int32, types.int32[::1])
119+
120+
@cuda.jit(sig, debug=True, opt=False)
121+
def f(depth1, depth2, depth3, out):
122+
acc = 0
123+
for i in range(depth1):
124+
for j in range(depth2):
125+
for k in range(depth3):
126+
acc += i + j + k
127+
out[0] = acc
128+
129+
llvm_ir = f.inspect_llvm(sig)
130+
131+
# Scalar arguments should be described via dbg.declare on their stack
132+
# slots (stable), rather than only via dbg.value (which ptxas may encode
133+
# as parameter-space DW_OP_addr locations).
134+
for argno, name in enumerate(("depth1", "depth2", "depth3"), start=1):
135+
md_pat = rf"^!(\d+)\s+=\s+!DILocalVariable\([^)]*arg:\s*{argno}[^)]*name:\s*\"{name}\""
136+
md_match = re.compile(md_pat, re.MULTILINE).search(llvm_ir)
137+
self.assertIsNotNone(md_match, msg=llvm_ir)
138+
md_id = md_match.group(1)
139+
140+
declare_pat = (
141+
r"call void @\"llvm\.dbg\.declare\"\("
142+
r"[\s\S]*?"
143+
rf"metadata\s+!{md_id}\b"
144+
)
145+
declare_match = re.compile(declare_pat).search(llvm_ir)
146+
self.assertIsNotNone(declare_match, msg=llvm_ir)
147+
117148
def test_grid_group_type(self):
118149
sig = (types.int32,)
119150

@@ -343,13 +374,23 @@ def f(x, y):
343374
z4 = True # noqa: F841
344375

345376
llvm_ir = f.inspect_llvm(sig)
346-
# Verify the call to llvm.dbg.declare is replaced by llvm.dbg.value
347-
pat1 = r'call void @"llvm.dbg.declare"'
348-
match = re.compile(pat1).search(llvm_ir)
349-
self.assertIsNone(match, msg=llvm_ir)
350-
pat2 = r'call void @"llvm.dbg.value"'
351-
match = re.compile(pat2).search(llvm_ir)
352-
self.assertIsNotNone(match, msg=llvm_ir)
377+
# Scalar locals should be described via llvm.dbg.value, not
378+
# llvm.dbg.declare (formal parameters may still use dbg.declare).
379+
for name in ("z1", "z2", "z3", "z4"):
380+
md_pat = rf'^!(\d+)\s+=\s+!DILocalVariable\(.*arg:\s*0,.*name:\s+"{name}"'
381+
md_match = re.compile(md_pat, re.MULTILINE).search(llvm_ir)
382+
self.assertIsNotNone(md_match, msg=llvm_ir)
383+
md_id = md_match.group(1)
384+
385+
value_pat = rf'call void @"llvm\.dbg\.value"\(metadata [^,]+, metadata !{md_id}\b'
386+
value_match = re.compile(value_pat).search(llvm_ir)
387+
self.assertIsNotNone(value_match, msg=llvm_ir)
388+
389+
declare_pat = (
390+
rf'call void @"llvm\.dbg\.declare"\([^)]*metadata !{md_id}\b'
391+
)
392+
declare_match = re.compile(declare_pat).search(llvm_ir)
393+
self.assertIsNone(declare_match, msg=llvm_ir)
353394

354395
def test_llvm_dbg_value_range(self):
355396
sig = (types.int64,)
@@ -383,10 +424,9 @@ def test_llvm_dbg_value_loadvar_coverage(self):
383424
@cuda.jit("void(int32[:], int32)", debug=True, opt=False)
384425
def foo(arr, scalar):
385426
"""
386-
CHECK: call void @"llvm.dbg.value"(metadata i32 %"scalar"
427+
CHECK: call void @"llvm.dbg.declare"(metadata i32* %"scalar.1", metadata ![[SC:[0-9]+]]
387428
388429
CHECK: load i32, i32* %"scalar.1"
389-
CHECK: call void @"llvm.dbg.value"(metadata i32 %"{{[^"]+}}", metadata ![[SC:[0-9]+]]
390430
391431
CHECK: ![[SC]] = !DILocalVariable{{.+}}name: "scalar"
392432
"""

0 commit comments

Comments
 (0)