diff --git a/emsymbolizer.py b/emsymbolizer.py index 398d36b6bbd8c..1a9cf27f25d56 100755 --- a/emsymbolizer.py +++ b/emsymbolizer.py @@ -8,8 +8,10 @@ # line/column number, potentially including inlining. # If the wasm has separate DWARF info, do the above with the side file # If there is a source map, we can parse it to get file and line number. -# If there is an emscripten symbol map, we can parse that to get the symbol name -# If there is a name section or symbol table, llvm-nm can show the symbol name. +# If there is an emscripten symbol map, we can use that to get the symbol name +# If there is a name section or symbol table, llvm-symbolizer can show the +# symbol name. +# Separate DWARF and emscripten symbol maps are not supported yet. import argparse import json @@ -50,21 +52,30 @@ def get_codesec_offset(module): def has_debug_line_section(module): - for sec in module.sections(): - if sec.name == ".debug_line": - return True - return False + return module.get_custom_section('.debug_line') is not None + + +def has_name_section(module): + return module.get_custom_section('name') is not None + +def has_linking_section(module): + return module.get_custom_section('linking') is not None -def symbolize_address_dwarf(module, address): - vma_adjust = get_codesec_offset(module) + +def symbolize_address_symbolizer(module, address, is_dwarf): + if is_dwarf: + vma_adjust = get_codesec_offset(module) + else: + vma_adjust = 0 cmd = [LLVM_SYMBOLIZER, '-e', module.filename, f'--adjust-vma={vma_adjust}', str(address)] out = shared.run_process(cmd, stdout=subprocess.PIPE).stdout.strip() out_lines = out.splitlines() + # Source location regex, e.g., /abc/def.c:3:5 SOURCE_LOC_RE = re.compile(r'(.+):(\d+):(\d+)$') - # llvm-dwarfdump prints two lines per location. The first line contains a + # llvm-symbolizer prints two lines per location. The first line contains a # function name, and the second contains a source location like # '/abc/def.c:3:5'. If the function or source info is not available, it will # be printed as '??', in which case we store None. If the line and column info @@ -210,22 +221,23 @@ def main(args): with webassembly.Module(args.wasm_file) as module: base = 16 if args.address.lower().startswith('0x') else 10 address = int(args.address, base) - symbolized = 0 if args.addrtype == 'code': address += get_codesec_offset(module) if ((has_debug_line_section(module) and not args.source) or 'dwarf' in args.source): - symbolize_address_dwarf(module, address) - symbolized += 1 - - if ((get_sourceMappingURL_section(module) and not args.source) or - 'sourcemap' in args.source): + symbolize_address_symbolizer(module, address, is_dwarf=True) + elif ((get_sourceMappingURL_section(module) and not args.source) or + 'sourcemap' in args.source): symbolize_address_sourcemap(module, address, args.file) - symbolized += 1 - - if not symbolized: + elif ((has_name_section(module) and not args.source) or + 'names' in args.source): + symbolize_address_symbolizer(module, address, is_dwarf=False) + elif ((has_linking_section(module) and not args.source) or + 'symtab' in args.source): + symbolize_address_symbolizer(module, address, is_dwarf=False) + else: raise Error('No .debug_line or sourceMappingURL section found in ' f'{module.filename}.' " I don't know how to symbolize this file yet") @@ -233,7 +245,8 @@ def main(args): def get_args(): parser = argparse.ArgumentParser() - parser.add_argument('-s', '--source', choices=['dwarf', 'sourcemap'], + parser.add_argument('-s', '--source', choices=['dwarf', 'sourcemap', + 'names', 'symtab'], help='Force debug info source type', default=()) parser.add_argument('-f', '--file', action='store', help='Force debug info source file') diff --git a/test/test_other.py b/test/test_other.py index c6b052f43b776..c909ee06a84da 100644 --- a/test/test_other.py +++ b/test/test_other.py @@ -9771,7 +9771,33 @@ def test(dump_file): test('foo.wasm.dump') test('bar.wasm.dump') - def test_emsymbolizer(self): + def get_instr_addr(self, text, filename): + ''' + Runs llvm-objdump to get the address of the first occurrence of the + specified line within the given function. llvm-objdump's output format + example is as follows: + ... + 00000004 : + ... + 6: 41 00 i32.const 0 + ... + The addresses here are the offsets to the start of the file. Returns + the address string in hexadecimal. + ''' + out = self.run_process([common.LLVM_OBJDUMP, '-d', filename], + stdout=PIPE).stdout.strip() + out_lines = out.splitlines() + found = False + for line in out_lines: + if text in line: + offset = line.strip().split(':')[0] + found = True + break + assert found + return '0x' + offset + + def test_emsymbolizer_srcloc(self): + 'Test emsymbolizer use cases that provide src location granularity info' def check_dwarf_loc_info(address, funcs, locs): out = self.run_process( [emsymbolizer, '-s', 'dwarf', 'test_dwarf.wasm', address], @@ -9783,45 +9809,19 @@ def check_dwarf_loc_info(address, funcs, locs): def check_source_map_loc_info(address, loc): out = self.run_process( - [emsymbolizer, '-s', 'sourcemap', 'test_dwarf.wasm', - address], + [emsymbolizer, '-s', 'sourcemap', 'test_dwarf.wasm', address], stdout=PIPE).stdout self.assertIn(loc, out) - # Runs llvm-objdump to get the address of the first occurrence of the - # specified line within the given function. llvm-objdump's output format - # example is as follows: - # ... - # 00000004 : - # ... - # 6: 41 00 i32.const 0 - # ... - # The addresses here are the offsets to the start of the file. Returns - # the address string in hexadecimal. - def get_addr(text): - out = self.run_process([common.LLVM_OBJDUMP, '-d', 'test_dwarf.wasm'], - stdout=PIPE).stdout.strip() - out_lines = out.splitlines() - found = False - for line in out_lines: - if text in line: - offset = line.strip().split(':')[0] - found = True - break - assert found - return '0x' + offset - # We test two locations within test_dwarf.c: # out_to_js(0); // line 6 # __builtin_trap(); // line 13 - - # 1. Test DWARF + source map together self.run_process([EMCC, test_file('core/test_dwarf.c'), '-g', '-gsource-map', '-O1', '-o', 'test_dwarf.js']) # Address of out_to_js(0) within foo(), uninlined - out_to_js_call_addr = get_addr('call\t0') + out_to_js_call_addr = self.get_instr_addr('call\t0', 'test_dwarf.wasm') # Address of __builtin_trap() within bar(), inlined into main() - unreachable_addr = get_addr('unreachable') + unreachable_addr = self.get_instr_addr('unreachable', 'test_dwarf.wasm') # Function name of out_to_js(0) within foo(), uninlined out_to_js_call_func = ['foo'] @@ -9835,6 +9835,7 @@ def get_addr(text): # The first one corresponds to the innermost inlined location. unreachable_loc = ['test_dwarf.c:13:3', 'test_dwarf.c:18:3'] + # 1. Test DWARF + source map together # For DWARF, we check for the full inlined info for both function names and # source locations. Source maps provide neither function names nor inlined # info. So we only check for the source location of the outermost function. @@ -9860,6 +9861,27 @@ def get_addr(text): out_to_js_call_loc) check_dwarf_loc_info(unreachable_addr, unreachable_func, unreachable_loc) + def test_emsymbolizer_functions(self): + 'Test emsymbolizer use cases that only provide function-granularity info' + def check_func_info(filename, address, func): + out = self.run_process( + [emsymbolizer, filename, address], stdout=PIPE).stdout + self.assertIn(func, out) + + # 1. Test name section only + self.run_process([EMCC, test_file('core/test_dwarf.c'), + '--profiling-funcs', '-O1', '-o', 'test_dwarf.js']) + with webassembly.Module('test_dwarf.wasm') as wasm: + self.assertTrue(wasm.has_name_section()) + self.assertIsNone(wasm.get_custom_section('.debug_info')) + # Address of out_to_js(0) within foo(), uninlined + out_to_js_call_addr = self.get_instr_addr('call\t0', 'test_dwarf.wasm') + # Address of __builtin_trap() within bar(), inlined into main() + unreachable_addr = self.get_instr_addr('unreachable', 'test_dwarf.wasm') + check_func_info('test_dwarf.wasm', out_to_js_call_addr, 'foo') + # The name section will not show bar, as it's inlined into main + check_func_info('test_dwarf.wasm', unreachable_addr, '__original_main') + def test_separate_dwarf(self): self.run_process([EMCC, test_file('hello_world.c'), '-g']) self.assertExists('a.out.wasm')