From 6253c5f64b1b6754e8aa9ed27c2b6c78c9f0745f Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Sat, 6 Sep 2025 08:55:41 +0200 Subject: [PATCH 01/38] fixup! Modernize python code for `construct` and add types (#609) --- elftools/construct/lib/container.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/elftools/construct/lib/container.py b/elftools/construct/lib/container.py index 9b54d316..16971edc 100644 --- a/elftools/construct/lib/container.py +++ b/elftools/construct/lib/container.py @@ -65,7 +65,7 @@ def __getitem__(self, name: Literal[ "length", "n_descsz", "n_offset", "n_namesz", "sh_addralign", "sh_flags", "sh_size", - "bloom_size", "nbuckets", + "bloom_size", "nbuckets", "nchains", ]) -> int: ... @overload def __getitem__(self, name: Literal[ @@ -75,6 +75,10 @@ def __getitem__(self, name: Literal[ "tag", "vendor_name", ]) -> str: ... @overload + def __getitem__(self, name: Literal[ + "buckets", "chains", + ]) -> list[int]: ... + @overload def __getitem__(self, name: str) -> Any: ... def __getitem__(self, name: str) -> Any: return self.__dict__[name] From 0c2dc50dfee1a066a302833465ab30153d3d0d95 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Wed, 12 Mar 2025 16:40:38 +0100 Subject: [PATCH 02/38] construct: Build _printable with a dict-comprehension Signed-off-by: Philipp Hahn --- elftools/construct/lib/hex.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/elftools/construct/lib/hex.py b/elftools/construct/lib/hex.py index 85da15d7..c9e78916 100644 --- a/elftools/construct/lib/hex.py +++ b/elftools/construct/lib/hex.py @@ -7,8 +7,7 @@ # Map an integer in the inclusive range 0-255 to its string byte representation -_printable = dict((i, ".") for i in range(256)) -_printable.update((i, chr(i)) for i in range(32, 128)) +_printable = {i: chr(i) if 32 <= i < 128 else "." for i in range(256)} def hexdump(data: bytes, linesize: int) -> list[str]: From 028653bb81425f7595236b14244cb08fa9a90e8e Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Tue, 18 Feb 2025 10:19:13 +0100 Subject: [PATCH 03/38] typing: Convert to NamedTuple Convert from legacy `collections.namedtuple` to `typing.NamedTuple` to allow adding type hints. Signed-off-by: Philipp Hahn --- elftools/dwarf/aranges.py | 14 +++++++++++--- elftools/dwarf/callframe.py | 8 +++++--- elftools/dwarf/die.py | 11 ++++++++--- elftools/dwarf/dwarf_expr.py | 8 ++++++-- elftools/dwarf/dwarfinfo.py | 16 +++++++++++----- elftools/dwarf/lineprogram.py | 11 ++++++++--- elftools/dwarf/locationlists.py | 29 ++++++++++++++++++++++++----- elftools/dwarf/namelut.py | 8 +++++++- elftools/dwarf/ranges.py | 16 +++++++++++++--- elftools/ehabi/decoder.py | 9 +++++++-- elftools/elf/relocation.py | 10 +++++++--- 11 files changed, 107 insertions(+), 33 deletions(-) diff --git a/elftools/dwarf/aranges.py b/elftools/dwarf/aranges.py index 1493506b..4def5562 100644 --- a/elftools/dwarf/aranges.py +++ b/elftools/dwarf/aranges.py @@ -6,7 +6,8 @@ # Dorothy Chen (dorothchen@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from collections import namedtuple +from typing import NamedTuple + from ..common.utils import struct_parse from bisect import bisect_right import math @@ -16,8 +17,15 @@ # length: The length of the address range in this entry # info_offset: The CU's offset into .debug_info # see 6.1.2 in DWARF4 docs for explanation of the remaining fields -ARangeEntry = namedtuple('ARangeEntry', - 'begin_addr length info_offset unit_length version address_size segment_size') +class ARangeEntry(NamedTuple): + begin_addr: int + length: int + info_offset: int + unit_length: int + version: int + address_size: int + segment_size: int + class ARanges: """ ARanges table in DWARF diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index 0ce2770a..746e328b 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -8,7 +8,8 @@ #------------------------------------------------------------------------------- import copy import os -from collections import namedtuple +from typing import Any, NamedTuple + from ..common.utils import ( struct_parse, dwarf_assert, preserve_stream_pos, iterbytes) from ..construct import Struct, Switch @@ -714,8 +715,9 @@ def __repr__(self): # A list of register numbers that are described in the table by the order of # their appearance. # -DecodedCallFrameTable = namedtuple( - 'DecodedCallFrameTable', 'table reg_order') +class DecodedCallFrameTable(NamedTuple): + table: list[dict[str, Any]] + reg_order: list[int] #---------------- PRIVATE ----------------# diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py index 3c3ee968..243a9c23 100644 --- a/elftools/dwarf/die.py +++ b/elftools/dwarf/die.py @@ -6,8 +6,8 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from collections import namedtuple import os +from typing import Any, NamedTuple from ..common.exceptions import DWARFError, ELFParseError from ..common.utils import bytes2str, struct_parse @@ -40,8 +40,13 @@ # If the form of the attribute is DW_FORM_indirect, the form will contain # the resolved form, and this will contain the length of the indirection chain. # 0 means no indirection. -AttributeValue = namedtuple( - 'AttributeValue', 'name form value raw_value offset indirection_length') +class AttributeValue(NamedTuple): + name: str + form: str + value: Any + raw_value: int + offset: int + indirection_length: int class DIE: diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py index 05f57667..89fd6157 100644 --- a/elftools/dwarf/dwarf_expr.py +++ b/elftools/dwarf/dwarf_expr.py @@ -6,8 +6,8 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from collections import namedtuple from io import BytesIO +from typing import Any, NamedTuple from ..common.utils import struct_parse, read_blob from ..common.exceptions import DWARFError @@ -120,7 +120,11 @@ def _generate_dynamic_values(map, prefix, index_start, index_end, value_start): # Each parsed DWARF expression is returned as this type with its numeric opcode, # op name (as a string) and a list of arguments. -DWARFExprOp = namedtuple('DWARFExprOp', 'op op_name args offset') +class DWARFExprOp(NamedTuple): + op: int + op_name: str + args: list[Any] + offset: int class DWARFExprParser: diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index f7daa682..374803cf 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -6,8 +6,8 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from collections import namedtuple from bisect import bisect_right +from typing import IO, NamedTuple from ..construct.lib.container import Container from ..common.exceptions import DWARFError @@ -38,8 +38,12 @@ # aren't strictly required for the DWARF parsing to work. 'address' is required # to properly decode the special '.eh_frame' format. # -DebugSectionDescriptor = namedtuple('DebugSectionDescriptor', - 'stream name global_offset size address') +class DebugSectionDescriptor(NamedTuple): + stream: IO[bytes] + name: str + global_offset: int | None + size: int + address: int # Some configuration parameters for the DWARF reader. This exists to allow @@ -54,8 +58,10 @@ # default_address_size: # The default address size for the container file (sizeof pointer, in bytes) # -DwarfConfig = namedtuple('DwarfConfig', - 'little_endian machine_arch default_address_size') +class DwarfConfig(NamedTuple): + little_endian: bool + machine_arch: str + default_address_size: int class DWARFInfo: diff --git a/elftools/dwarf/lineprogram.py b/elftools/dwarf/lineprogram.py index 807df78d..a5d9f2bf 100644 --- a/elftools/dwarf/lineprogram.py +++ b/elftools/dwarf/lineprogram.py @@ -6,9 +6,11 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + import os import copy -from collections import namedtuple +from typing import NamedTuple from ..common.utils import struct_parse, dwarf_assert from .constants import * @@ -36,8 +38,11 @@ # For commands that add a new state, it's the relevant LineState object. # For commands that don't add a new state, it's None. # -LineProgramEntry = namedtuple( - 'LineProgramEntry', 'command is_extended args state') +class LineProgramEntry(NamedTuple): + command: int + is_extended: bool + args: list[int] + state: LineState | None class LineState: diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index c719efbc..ba49dbda 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -7,15 +7,34 @@ # This code is in the public domain #------------------------------------------------------------------------------- import os -from collections import namedtuple +from typing import NamedTuple + from ..common.exceptions import DWARFError from ..common.utils import struct_parse from .dwarf_util import _iter_CUs_in_section -LocationExpr = namedtuple('LocationExpr', 'loc_expr') -LocationEntry = namedtuple('LocationEntry', 'entry_offset entry_length begin_offset end_offset loc_expr is_absolute') -BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset entry_length base_address') -LocationViewPair = namedtuple('LocationViewPair', 'entry_offset begin end') + +class LocationExpr(NamedTuple): + loc_expr: list[int] + +class LocationEntry(NamedTuple): + entry_offset: int + entry_length: int + begin_offset: int + end_offset: int + loc_expr: list[int] + is_absolute: bool + +class BaseAddressEntry(NamedTuple): + entry_offset: int + entry_length: int + base_address: int + +class LocationViewPair(NamedTuple): + entry_offset: int + begin: int + end: int + def _translate_startx_length(e, cu): start_offset = cu.dwarfinfo.get_addr(cu, e.start_index) diff --git a/elftools/dwarf/namelut.py b/elftools/dwarf/namelut.py index f29c271c..7a3133ec 100644 --- a/elftools/dwarf/namelut.py +++ b/elftools/dwarf/namelut.py @@ -8,10 +8,16 @@ #------------------------------------------------------------------------------- import collections from collections.abc import Mapping +from typing import NamedTuple + from ..common.utils import struct_parse from ..construct import CString, Struct, If -NameLUTEntry = collections.namedtuple('NameLUTEntry', 'cu_ofs die_ofs') + +class NameLUTEntry(NamedTuple): + cu_ofs: int + die_ofs: int + class NameLUT(Mapping): """ diff --git a/elftools/dwarf/ranges.py b/elftools/dwarf/ranges.py index 986b53d3..d4063ac7 100644 --- a/elftools/dwarf/ranges.py +++ b/elftools/dwarf/ranges.py @@ -7,15 +7,25 @@ # This code is in the public domain #------------------------------------------------------------------------------- import os -from collections import namedtuple +from typing import NamedTuple from ..common.utils import struct_parse from ..common.exceptions import DWARFError from .dwarf_util import _iter_CUs_in_section -RangeEntry = namedtuple('RangeEntry', 'entry_offset entry_length begin_offset end_offset is_absolute') -BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset base_address') +class RangeEntry(NamedTuple): + entry_offset: int + entry_length: int + begin_offset: int + end_offset: int + is_absolute: bool + + +class BaseAddressEntry(NamedTuple): + entry_offset: int + base_address: int + # If we ever see a list with a base entry at the end, there will be an error that entry_length is not a field. def _translate_startx_length(e, cu): diff --git a/elftools/ehabi/decoder.py b/elftools/ehabi/decoder.py index 1e6bcbbe..0ea224cf 100644 --- a/elftools/ehabi/decoder.py +++ b/elftools/ehabi/decoder.py @@ -6,7 +6,9 @@ # LeadroyaL (leadroyal@qq.com) # This code is in the public domain # ------------------------------------------------------------------------------- -from collections import namedtuple +from __future__ import annotations + +from typing import Callable, NamedTuple class EHABIBytecodeDecoder: @@ -244,7 +246,10 @@ def _spare(self): self._index += 1 return 'spare' - _DECODE_RECIPE_TYPE = namedtuple('_DECODE_RECIPE_TYPE', 'mask value handler') + class _DECODE_RECIPE_TYPE(NamedTuple): + mask: int + value: int + handler: Callable[[EHABIBytecodeDecoder], str] ring = ( _DECODE_RECIPE_TYPE(mask=0xc0, value=0x00, handler=_decode_00xxxxxx), diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py index 7011cf9e..feb0d965 100644 --- a/elftools/elf/relocation.py +++ b/elftools/elf/relocation.py @@ -6,7 +6,9 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from collections import namedtuple +from __future__ import annotations + +from typing import Callable, NamedTuple from ..common.exceptions import ELFRelocationError from ..common.utils import elf_assert, struct_parse @@ -368,8 +370,10 @@ def _do_apply_relocation(self, stream, reloc, symtab): # calc_func: A function that performs the relocation on an extracted # value, and returns the updated value. # - _RELOCATION_RECIPE_TYPE = namedtuple('_RELOCATION_RECIPE_TYPE', - 'bytesize has_addend calc_func') + class _RELOCATION_RECIPE_TYPE(NamedTuple): + bytesize: int + has_addend: bool + calc_func: Callable[..., int] _RELOCATION_RECIPES_ARM = { ENUM_RELOC_TYPE_ARM['R_ARM_ABS32']: _RELOCATION_RECIPE_TYPE( From 0c49ffb33dae0e74f14dee7998721d6358381679 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Tue, 18 Feb 2025 10:19:13 +0100 Subject: [PATCH 04/38] typing: Add some Protocols `pyelftools` does not use class hierarchies in all cases, but relies on _duck typing_. Introduce Protocols for those cases to allow type hinting. Signed-off-by: Philipp Hahn --- elftools/elf/dynamic.py | 6 ++++++ elftools/elf/hash.py | 14 ++++++++++++++ elftools/elf/relocation.py | 8 ++++++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/elftools/elf/dynamic.py b/elftools/elf/dynamic.py index 231d7346..966bb562 100644 --- a/elftools/elf/dynamic.py +++ b/elftools/elf/dynamic.py @@ -7,6 +7,7 @@ # This code is in the public domain #------------------------------------------------------------------------------- import itertools +from typing import Protocol, runtime_checkable from collections import defaultdict from .hash import ELFHashTable, GNUHashTable @@ -18,6 +19,11 @@ from ..common.utils import elf_assert, struct_parse, parse_cstring_from_stream +@runtime_checkable +class _StringTable(Protocol): + def get_string(self, offset: int, /) -> str: ... + + class _DynamicStringTable: """ Bare string table based on values found via ELF dynamic tags and loadable segments only. Good enough for get_string() only. diff --git a/elftools/elf/hash.py b/elftools/elf/hash.py index 72882481..dea056cb 100644 --- a/elftools/elf/hash.py +++ b/elftools/elf/hash.py @@ -6,12 +6,26 @@ # Andreas Ziegler (andreas.ziegler@fau.de) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations import struct +from typing import TYPE_CHECKING, Protocol from ..common.utils import struct_parse from .sections import Section +if TYPE_CHECKING: + from .sections import Symbol + + +class _SymbolTable(Protocol): + def get_symbol(self, index: int, /) -> Symbol | None: ... + + +class _HashTable(Protocol): # noqa: PYI046 + def get_number_of_symbols(self) -> int: ... + def get_symbol(self, name: str, /) -> Symbol | None: ... + class ELFHashTable: """ Representation of an ELF hash table to find symbols in the diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py index feb0d965..95feb89a 100644 --- a/elftools/elf/relocation.py +++ b/elftools/elf/relocation.py @@ -8,7 +8,7 @@ #------------------------------------------------------------------------------- from __future__ import annotations -from typing import Callable, NamedTuple +from typing import NamedTuple, Protocol from ..common.exceptions import ELFRelocationError from ..common.utils import elf_assert, struct_parse @@ -205,6 +205,10 @@ def __init__(self, header, name, elffile): self['sh_offset'], self['sh_size'], self['sh_entsize']) +class _RelocationFunction(Protocol): + def __call__(self, value: int, sym_value: int, offset: int, addend: int = 0) -> int: ... + + def _reloc_calc_identity(value, sym_value, offset, addend=0): return value @@ -373,7 +377,7 @@ def _do_apply_relocation(self, stream, reloc, symtab): class _RELOCATION_RECIPE_TYPE(NamedTuple): bytesize: int has_addend: bool - calc_func: Callable[..., int] + calc_func: _RelocationFunction _RELOCATION_RECIPES_ARM = { ENUM_RELOC_TYPE_ARM['R_ARM_ABS32']: _RELOCATION_RECIPE_TYPE( From 1d85348c08e97502e3a7c719550f447d86142d5a Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 11:27:20 +0100 Subject: [PATCH 05/38] typing: Add / fix PEP-484 type hints - [x] `pyright` - [x] `mypy` with several issues: - [ ] `pyrefly` - [ ] `ty` - [ ] `typeguard` To run `pyelftools` under `typeguards` create the file `.venv/lib/python3.12/site-packages/sitecustomize.py` with line `__import__("typeguard").install_import_hook("elftools")` > Cannot determine type of "dwarfinfo" > Cannot determine type of "structs" Signed-off-by: Philipp Hahn --- elftools/__init__.py | 2 +- elftools/common/construct_utils.py | 43 +++++---- elftools/common/utils.py | 47 +++++++--- elftools/dwarf/abbrevtable.py | 28 ++++-- elftools/dwarf/aranges.py | 25 +++-- elftools/dwarf/callframe.py | 119 ++++++++++++++---------- elftools/dwarf/compileunit.py | 42 ++++++--- elftools/dwarf/datatype_cpp.py | 56 +++++++----- elftools/dwarf/descriptions.py | 84 +++++++++-------- elftools/dwarf/die.py | 56 +++++++----- elftools/dwarf/dwarf_expr.py | 52 ++++++----- elftools/dwarf/dwarf_util.py | 20 +++- elftools/dwarf/dwarfinfo.py | 136 ++++++++++++++------------- elftools/dwarf/lineprogram.py | 44 +++++---- elftools/dwarf/locationlists.py | 90 ++++++++++-------- elftools/dwarf/namelut.py | 47 ++++++---- elftools/dwarf/ranges.py | 48 ++++++---- elftools/dwarf/structs.py | 62 +++++++------ elftools/dwarf/typeunit.py | 46 ++++++---- elftools/ehabi/constants.py | 2 +- elftools/ehabi/decoder.py | 69 +++++++------- elftools/ehabi/ehabiinfo.py | 49 +++++----- elftools/ehabi/structs.py | 8 +- elftools/elf/descriptions.py | 81 +++++++++------- elftools/elf/dynamic.py | 81 ++++++++-------- elftools/elf/elffile.py | 120 +++++++++++++----------- elftools/elf/enums.py | 105 +++++++++++---------- elftools/elf/gnuversions.py | 62 ++++++++----- elftools/elf/hash.py | 36 ++++---- elftools/elf/notes.py | 23 +++-- elftools/elf/relocation.py | 91 +++++++++--------- elftools/elf/sections.py | 142 ++++++++++++++++------------- elftools/elf/segments.py | 49 ++++++---- elftools/elf/structs.py | 78 +++++++++------- scripts/dwarfdump.py | 86 ++++++++++------- scripts/readelf.py | 112 +++++++++++++---------- test/test_debuglink.py | 7 +- 37 files changed, 1307 insertions(+), 941 deletions(-) diff --git a/elftools/__init__.py b/elftools/__init__.py index f56437c2..3ea9f8c1 100644 --- a/elftools/__init__.py +++ b/elftools/__init__.py @@ -4,4 +4,4 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -__version__ = '0.32' +__version__: str = '0.32' diff --git a/elftools/common/construct_utils.py b/elftools/common/construct_utils.py index 66b7f092..edd337f1 100644 --- a/elftools/common/construct_utils.py +++ b/elftools/common/construct_utils.py @@ -6,11 +6,20 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + from struct import Struct +from typing import IO, TYPE_CHECKING, Any, NoReturn + from ..construct import ( Subconstruct, ConstructError, ArrayError, SizeofError, Construct, StaticField, FieldError ) +if TYPE_CHECKING: + from collections.abc import Callable, Iterable + + from ..construct import Container + class RepeatUntilExcluding(Subconstruct): """ A version of construct's RepeatUntil that doesn't include the last @@ -21,12 +30,12 @@ class RepeatUntilExcluding(Subconstruct): P.S. removed some code duplication """ __slots__ = ["predicate"] - def __init__(self, predicate, subcon): + def __init__(self, predicate: Callable[[Any, Container], bool], subcon: Construct) -> None: Subconstruct.__init__(self, subcon) self.predicate = predicate self._clear_flag(self.FLAG_COPY_CONTEXT) self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): + def _parse(self, stream: IO[bytes], context: Container) -> list[Any]: obj = [] try: context_for_subcon = context @@ -41,15 +50,15 @@ def _parse(self, stream, context): except ConstructError as ex: raise ArrayError("missing terminator", ex) return obj - def _build(self, obj, stream, context): + def _build(self, obj: Iterable[Any], stream: IO[bytes], context: Container) -> NoReturn: raise NotImplementedError('no building') - def _sizeof(self, context): + def _sizeof(self, context: Container) -> int: raise SizeofError("can't calculate size") class ULEB128(Construct): """A construct based parser for ULEB128 encoding. """ - def _parse(self, stream, context): + def _parse(self, stream: IO[bytes], context: Container) -> int: value = 0 shift = 0 while True: @@ -65,7 +74,7 @@ def _parse(self, stream, context): class SLEB128(Construct): """A construct based parser for SLEB128 encoding. """ - def _parse(self, stream, context): + def _parse(self, stream: IO[bytes], context: Container) -> int: value = 0 shift = 0 while True: @@ -88,15 +97,15 @@ class StreamOffset(Construct): Example: StreamOffset("item_offset") """ - __slots__ = [] - def __init__(self, name): + __slots__: list[str] = [] + def __init__(self, name: str) -> None: Construct.__init__(self, name) self._set_flag(self.FLAG_DYNAMIC) - def _parse(self, stream, context): + def _parse(self, stream: IO[bytes], context: Container) -> int: return stream.tell() - def _build(self, obj, stream, context): + def _build(self, obj: None, stream: IO[bytes], context: Container) -> None: context[self.name] = stream.tell() - def _sizeof(self, context): + def _sizeof(self, context: Container) -> int: return 0 _UBInt24_packer = Struct(">BH") @@ -104,24 +113,24 @@ def _sizeof(self, context): class UBInt24(StaticField): """unsigned, big endian 24-bit integer""" - def __init__(self, name): + def __init__(self, name: str) -> None: StaticField.__init__(self, name, 3) - def _parse(self, stream, context): + def _parse(self, stream: IO[bytes], context: Container) -> int: (h, l) = _UBInt24_packer.unpack(StaticField._parse(self, stream, context)) return l | (h << 16) - def _build(self, obj, stream, context): + def _build(self, obj: int, stream: IO[bytes], context: Container) -> None: StaticField._build(self, _UBInt24_packer.pack(obj >> 16, obj & 0xFFFF), stream, context) class ULInt24(StaticField): """unsigned, little endian 24-bit integer""" - def __init__(self, name): + def __init__(self, name: str) -> None: StaticField.__init__(self, name, 3) - def _parse(self, stream, context): + def _parse(self, stream: IO[bytes], context: Container) -> int: (l, h) = _ULInt24_packer.unpack(StaticField._parse(self, stream, context)) return l | (h << 16) - def _build(self, obj, stream, context): + def _build(self, obj: int, stream: IO[bytes], context: Container) -> None: StaticField._build(self, _ULInt24_packer.pack(obj & 0xFFFF, obj >> 16), stream, context) diff --git a/elftools/common/utils.py b/elftools/common/utils.py index 20211eac..74955626 100644 --- a/elftools/common/utils.py +++ b/elftools/common/utils.py @@ -6,24 +6,43 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + from contextlib import contextmanager +from typing import IO, TYPE_CHECKING, Any, TypeVar, overload + from .exceptions import ELFParseError, ELFError, DWARFError from ..construct import ConstructError, ULInt8 import os +if TYPE_CHECKING: + from collections.abc import Iterator, Mapping + + from ..construct import Construct, FormatField + from ..dwarf.dwarfinfo import DebugSectionDescriptor + from .construct_utils import SLEB128, ULEB128, UBInt24, ULInt24 + + _T = TypeVar("_T") + _K = TypeVar("_K") + _V = TypeVar("_V") + -def merge_dicts(*dicts): +def merge_dicts(*dicts: Mapping[_K, _V]) -> dict[_K, _V]: "Given any number of dicts, merges them into a new one.""" - result = {} + result: dict[_K, _V] = {} for d in dicts: result.update(d) return result -def bytes2str(b): +def bytes2str(b: bytes) -> str: """Decode a bytes object into a string.""" return b.decode('latin-1') -def struct_parse(struct, stream, stream_pos=None): +@overload +def struct_parse(struct: FormatField[_T] | ULEB128 | SLEB128 | UBInt24 | ULInt24, stream: IO[bytes], stream_pos: int | None = ...) -> _T: ... +@overload +def struct_parse(struct: Construct, stream: IO[bytes], stream_pos: int | None = ...) -> Any: ... +def struct_parse(struct: Construct, stream: IO[bytes], stream_pos: int | None = None) -> Any: """ Convenience function for using the given struct to parse a stream. If stream_pos is provided, the stream is seeked to this position before the parsing is done. Otherwise, the current position of the stream is @@ -38,7 +57,7 @@ def struct_parse(struct, stream, stream_pos=None): raise ELFParseError(str(e)) -def parse_cstring_from_stream(stream, stream_pos=None): +def parse_cstring_from_stream(stream: IO[bytes], stream_pos: int | None = None) -> bytes | None: """ Parse a C-string from the given stream. The string is returned without the terminating \x00 byte. If the terminating byte wasn't found, None is returned (the stream is exhausted). @@ -67,20 +86,20 @@ def parse_cstring_from_stream(stream, stream_pos=None): return b''.join(chunks) if found else None -def elf_assert(cond, msg=''): +def elf_assert(cond: object, msg: str = '') -> None: """ Assert that cond is True, otherwise raise ELFError(msg) """ _assert_with_exception(cond, msg, ELFError) -def dwarf_assert(cond, msg=''): +def dwarf_assert(cond: object, msg: str = '') -> None: """ Assert that cond is True, otherwise raise DWARFError(msg) """ _assert_with_exception(cond, msg, DWARFError) @contextmanager -def preserve_stream_pos(stream): +def preserve_stream_pos(stream: IO[bytes]) -> Iterator[None]: """ Usage: # stream has some position FOO (return value of stream.tell()) with preserve_stream_pos(stream): @@ -92,18 +111,18 @@ def preserve_stream_pos(stream): stream.seek(saved_pos) -def roundup(num, bits): +def roundup(num: int, bits: int) -> int: """ Round up a number to nearest multiple of 2^bits. The result is a number where the least significant bits passed in bits are 0. """ return (num - 1 | (1 << bits) - 1) + 1 -def read_blob(stream, length): +def read_blob(stream: IO[bytes], length: int) -> list[int]: """Read length bytes from stream, return a list of ints """ return [struct_parse(ULInt8(''), stream) for i in range(length)] -def save_dwarf_section(section, filename): +def save_dwarf_section(section: DebugSectionDescriptor, filename: str) -> None: """Debug helper: dump section contents into a file Section is expected to be one of the debug_xxx_sec elements of DWARFInfo """ @@ -116,7 +135,7 @@ def save_dwarf_section(section, filename): file.write(data) stream.seek(pos, os.SEEK_SET) -def iterbytes(b): +def iterbytes(b: bytes) -> Iterator[bytes]: """Return an iterator over the elements of a bytes object. For example, for b'abc' yields b'a', b'b' and then b'c'. @@ -124,13 +143,13 @@ def iterbytes(b): for i in range(len(b)): yield b[i:i+1] -def bytes2hex(b, sep=''): +def bytes2hex(b: bytes, sep: str = '') -> str: if not sep: return b.hex() return sep.join(map('{:02x}'.format, b)) #------------------------- PRIVATE ------------------------- -def _assert_with_exception(cond, msg, exception_type): +def _assert_with_exception(cond: object, msg: str, exception_type: type[BaseException]) -> None: if not cond: raise exception_type(msg) diff --git a/elftools/dwarf/abbrevtable.py b/elftools/dwarf/abbrevtable.py index 83f2df2c..6a72b7ef 100644 --- a/elftools/dwarf/abbrevtable.py +++ b/elftools/dwarf/abbrevtable.py @@ -6,14 +6,24 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + +from typing import IO, TYPE_CHECKING, Any + from ..common.utils import struct_parse +if TYPE_CHECKING: + from collections.abc import Iterator + + from ..construct.lib.container import Container + from .structs import DWARFStructs + class AbbrevTable: """ Represents a DWARF abbreviation table. """ __slots__ = ('structs', 'stream', 'offset', '_abbrev_map') - def __init__(self, structs, stream, offset): + def __init__(self, structs: DWARFStructs, stream: IO[bytes], offset: int) -> None: """ Create new abbreviation table. Parses the actual table from the stream and stores it internally. @@ -30,19 +40,19 @@ def __init__(self, structs, stream, offset): self._abbrev_map = self._parse_abbrev_table() - def get_abbrev(self, code): + def get_abbrev(self, code: int) -> AbbrevDecl: """ Get the AbbrevDecl for a given code. Raise KeyError if no declaration for this code exists. """ return self._abbrev_map[code] - def _parse_abbrev_table(self): + def _parse_abbrev_table(self) -> dict[int, AbbrevDecl]: """ Parse the abbrev table from the stream """ - map = {} + map: dict[int, AbbrevDecl] = {} self.stream.seek(self.offset) while True: - decl_code = struct_parse( + decl_code: int = struct_parse( struct=self.structs.the_Dwarf_uleb128, stream=self.stream) if decl_code == 0: @@ -61,20 +71,20 @@ class AbbrevDecl: The abbreviation declaration represents an "entry" that points to it. """ __slots__ = ('code', 'decl', '_has_children') - def __init__(self, code, decl): + def __init__(self, code: int, decl: Container) -> None: self.code = code self.decl = decl self._has_children = decl['children_flag'] == 'DW_CHILDREN_yes' - def has_children(self): + def has_children(self) -> bool: return self._has_children - def iter_attr_specs(self): + def iter_attr_specs(self) -> Iterator[tuple[str, str]]: """ Iterate over the attribute specifications for the entry. Yield (name, form) pairs. """ for attr_spec in self['attr_spec']: yield attr_spec.name, attr_spec.form - def __getitem__(self, entry): + def __getitem__(self, entry: str) -> Any: return self.decl[entry] diff --git a/elftools/dwarf/aranges.py b/elftools/dwarf/aranges.py index 4def5562..3197a147 100644 --- a/elftools/dwarf/aranges.py +++ b/elftools/dwarf/aranges.py @@ -6,12 +6,19 @@ # Dorothy Chen (dorothchen@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from typing import NamedTuple +from __future__ import annotations + +from typing import IO, TYPE_CHECKING, Callable, NamedTuple from ..common.utils import struct_parse from bisect import bisect_right import math +if TYPE_CHECKING: + from ..construct.core import Construct + from .structs import DWARFStructs + + # An entry in the aranges table; # begin_addr: The beginning address in the CU # length: The length of the address range in this entry @@ -36,7 +43,7 @@ class ARanges: structs: A DWARFStructs instance for parsing the data """ - def __init__(self, stream, size, structs): + def __init__(self, stream: IO[bytes], size: int, structs: DWARFStructs) -> None: self.stream = stream self.size = size self.structs = structs @@ -51,7 +58,7 @@ def __init__(self, stream, size, structs): self.keys = [entry.begin_addr for entry in self.entries] - def cu_offset_at_addr(self, addr): + def cu_offset_at_addr(self, addr: int) -> int | None: """ Given an address, get the offset of the CU it belongs to, where 'offset' refers to the offset in the .debug_info section. """ @@ -63,7 +70,7 @@ def cu_offset_at_addr(self, addr): #------ PRIVATE ------# - def _get_entries(self, need_empty=False): + def _get_entries(self, need_empty: bool = False) -> list[ARangeEntry]: """ Populate self.entries with ARangeEntry tuples for each range of addresses Terminating null entries of CU blocks are not returned, unless @@ -72,7 +79,7 @@ def _get_entries(self, need_empty=False): set to 0. """ self.stream.seek(0) - entries = [] + entries: list[ARangeEntry] = [] offset = 0 # one loop == one "set" == one CU @@ -84,7 +91,7 @@ def _get_entries(self, need_empty=False): # No segmentation if aranges_header["segment_size"] == 0: # pad to nearest multiple of tuple size - tuple_size = aranges_header["address_size"] * 2 + tuple_size: int = aranges_header["address_size"] * 2 fp = self.stream.tell() seek_to = int(math.ceil(fp/float(tuple_size)) * tuple_size) self.stream.seek(seek_to) @@ -95,8 +102,8 @@ def _get_entries(self, need_empty=False): got_entries = False # entries in this set/CU - addr = struct_parse(addr_size('addr'), self.stream) - length = struct_parse(addr_size('length'), self.stream) + addr: int = struct_parse(addr_size('addr'), self.stream) + length: int = struct_parse(addr_size('length'), self.stream) while addr != 0 or length != 0 or (not got_entries and need_empty): # 'begin_addr length info_offset version address_size segment_size' entries.append( @@ -122,7 +129,7 @@ def _get_entries(self, need_empty=False): return entries - def _get_addr_size_struct(self, addr_header_value): + def _get_addr_size_struct(self, addr_header_value: int) -> Callable[[str], Construct]: """ Given this set's header value (int) for the address size, get the Construct representation of that size """ diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index 746e328b..91c72705 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -6,9 +6,11 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + import copy import os -from typing import Any, NamedTuple +from typing import IO, TYPE_CHECKING, Any, Callable, Literal, NamedTuple from ..common.utils import ( struct_parse, dwarf_assert, preserve_stream_pos, iterbytes) @@ -17,6 +19,10 @@ from .structs import DWARFStructs from .constants import * +if TYPE_CHECKING: + from ..construct.core import Construct + from ..construct.lib.container import Container, ListContainer + class CallFrameInfo: """ DWARF CFI (Call Frame Info) @@ -46,25 +52,25 @@ class CallFrameInfo: such as guessing which CU contains which FDEs (based on their address ranges) and taking the address_size from those CUs. """ - def __init__(self, stream, size, address, base_structs, - for_eh_frame=False): + def __init__(self, stream: IO[bytes], size: int, address: int, base_structs: DWARFStructs, + for_eh_frame: bool = False) -> None: self.stream = stream self.size = size self.address = address self.base_structs = base_structs - self.entries = None + self.entries: list[CFIEntry | ZERO] | None = None # Map between an offset in the stream and the entry object found at this # offset. Useful for assigning CIE to FDEs according to the CIE_pointer # header field which contains a stream offset. - self._entry_cache = {} + self._entry_cache: dict[int, CFIEntry] = {} # The .eh_frame and .debug_frame section use almost the same CFI # encoding, but there are tiny variations we need to handle during # parsing. self.for_eh_frame = for_eh_frame - def get_entries(self): + def get_entries(self) -> list[CFIEntry | ZERO]: """ Get a list of entries that constitute this CFI. The list consists of CIE or FDE objects, in the order of their appearance in the section. @@ -75,7 +81,7 @@ def get_entries(self): #------------------------- - def _parse_entries(self): + def _parse_entries(self) -> list[CFIEntry | ZERO]: entries = [] offset = 0 while offset < self.size: @@ -83,7 +89,7 @@ def _parse_entries(self): offset = self.stream.tell() return entries - def _parse_entry_at(self, offset): + def _parse_entry_at(self, offset: int) -> CFIEntry | ZERO: """ Parse an entry from self.stream starting with the given offset. Return the entry object. self.stream will point right after the entry (even if pulled from the cache). @@ -94,7 +100,7 @@ def _parse_entry_at(self, offset): entry.structs.initial_length_field_size(), os.SEEK_CUR) return entry - entry_length = struct_parse( + entry_length: int = struct_parse( self.base_structs.the_Dwarf_uint32, self.stream, offset) if self.for_eh_frame and entry_length == 0: @@ -111,7 +117,7 @@ def _parse_entry_at(self, offset): address_size=self.base_structs.address_size) # Read the next field to see whether this is a CIE or FDE - CIE_id = struct_parse( + CIE_id: int = struct_parse( entry_structs.the_Dwarf_offset, self.stream) if self.for_eh_frame: @@ -140,7 +146,7 @@ def _parse_entry_at(self, offset): else: cie = self._parse_cie_for_fde(offset, header, entry_structs) aug_bytes = self._read_augmentation_data(entry_structs) - lsda_encoding = cie.augmentation_dict.get('LSDA_encoding', DW_EH_encoding_flags['DW_EH_PE_omit']) + lsda_encoding: int = cie.augmentation_dict.get('LSDA_encoding', DW_EH_encoding_flags['DW_EH_PE_omit']) if lsda_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']: # parse LSDA pointer lsda_pointer = self._parse_lsda_pointer(entry_structs, @@ -150,7 +156,7 @@ def _parse_entry_at(self, offset): lsda_pointer = None # For convenience, compute the end offset for this entry - end_offset = ( + end_offset: int = ( offset + header.length + entry_structs.initial_length_field_size()) @@ -177,14 +183,14 @@ def _parse_entry_at(self, offset): self._entry_cache[offset] = entry return entry - def _parse_instructions(self, structs, offset, end_offset): + def _parse_instructions(self, structs: DWARFStructs, offset: int, end_offset: int) -> list[CallFrameInstruction]: """ Parse a list of CFI instructions from self.stream, starting with the offset and until (not including) end_offset. Return a list of CallFrameInstruction objects. """ instructions = [] while offset < end_offset: - opcode = struct_parse(structs.the_Dwarf_uint8, self.stream, offset) + opcode: int = struct_parse(structs.the_Dwarf_uint8, self.stream, offset) args = [] primary = opcode & _PRIMARY_MASK @@ -244,7 +250,7 @@ def _parse_instructions(self, structs, offset, end_offset): offset = self.stream.tell() return instructions - def _parse_cie_for_fde(self, fde_offset, fde_header, entry_structs): + def _parse_cie_for_fde(self, fde_offset: int, fde_header: Container, entry_structs: DWARFStructs) -> CFIEntry | ZERO: """ Parse the CIE that corresponds to an FDE. """ # Determine the offset of the CIE that corresponds to this FDE @@ -252,8 +258,8 @@ def _parse_cie_for_fde(self, fde_offset, fde_header, entry_structs): # CIE_pointer contains the offset for a reverse displacement from # the section offset of the CIE_pointer field itself (not from the # FDE header offset). - cie_displacement = fde_header['CIE_pointer'] - cie_offset = (fde_offset + entry_structs.dwarf_format // 8 + cie_displacement: int = fde_header['CIE_pointer'] + cie_offset: int = (fde_offset + entry_structs.dwarf_format // 8 - cie_displacement) else: cie_offset = fde_header['CIE_pointer'] @@ -262,13 +268,13 @@ def _parse_cie_for_fde(self, fde_offset, fde_header, entry_structs): with preserve_stream_pos(self.stream): return self._parse_entry_at(cie_offset) - def _parse_cie_augmentation(self, header, entry_structs): + def _parse_cie_augmentation(self, header: Container, entry_structs: DWARFStructs) -> tuple[bytes, dict[Any, Any]]: """ Parse CIE augmentation data from the annotation string in `header`. Return a tuple that contains 1) the augmentation data as a string (without the length field) and 2) the augmentation data as a dict. """ - augmentation = header.get('augmentation') + augmentation: bytes | None = header.get('augmentation') if not augmentation: return (b'', {}) @@ -281,7 +287,7 @@ def _parse_cie_augmentation(self, header, entry_structs): assert augmentation.startswith(b'z'), ( 'Unhandled augmentation string: {}'.format(repr(augmentation))) - available_fields = { + available_fields: dict[bytes, Construct | Literal[True]] = { b'z': entry_structs.Dwarf_uleb128('length'), b'L': entry_structs.Dwarf_uint8('LSDA_encoding'), b'R': entry_structs.Dwarf_uint8('FDE_encoding'), @@ -297,8 +303,8 @@ def _parse_cie_augmentation(self, header, entry_structs): # Build the Struct we will be using to parse the augmentation data. # Stop as soon as we are not able to match the augmentation string. - fields = [] - aug_dict = {} + fields: list[Construct] = [] + aug_dict: dict[Any, Any] = {} for b in iterbytes(augmentation): try: @@ -323,7 +329,7 @@ def _parse_cie_augmentation(self, header, entry_structs): aug_bytes = self._read_augmentation_data(entry_structs) return (aug_bytes, aug_dict) - def _read_augmentation_data(self, entry_structs): + def _read_augmentation_data(self, entry_structs: DWARFStructs) -> bytes: """ Read augmentation data. This assumes that the augmentation string starts with 'z', i.e. that @@ -332,13 +338,13 @@ def _read_augmentation_data(self, entry_structs): if not self.for_eh_frame: return b'' - augmentation_data_length = struct_parse( + augmentation_data_length: int = struct_parse( Struct('Dummy_Augmentation_Data', entry_structs.Dwarf_uleb128('length')), self.stream)['length'] return self.stream.read(augmentation_data_length) - def _parse_lsda_pointer(self, structs, stream_offset, encoding): + def _parse_lsda_pointer(self, structs: DWARFStructs, stream_offset: int, encoding: int) -> int: """ Parse bytes to get an LSDA pointer. The basic encoding (lower four bits of the encoding) describes how the values are encoded in a CIE or an FDE. @@ -353,7 +359,7 @@ def _parse_lsda_pointer(self, structs, stream_offset, encoding): formats = self._eh_encoding_to_field(structs) - ptr = struct_parse( + ptr: int = struct_parse( Struct('Augmentation_Data', formats[basic_encoding]('LSDA_pointer')), self.stream, stream_pos=stream_offset)['LSDA_pointer'] @@ -369,7 +375,7 @@ def _parse_lsda_pointer(self, structs, stream_offset, encoding): return ptr - def _parse_fde_header(self, entry_structs, offset): + def _parse_fde_header(self, entry_structs: DWARFStructs, offset: int) -> Container: """ Compute a struct to parse the header of the current FDE. """ if not self.for_eh_frame: @@ -389,7 +395,7 @@ def _parse_fde_header(self, entry_structs, offset): # Try to parse the initial location. We need the initial location in # order to create a meaningful FDE, so assume it's there. Omission does # not seem to happen in practice. - encoding = cie.augmentation_dict['FDE_encoding'] + encoding: int = cie.augmentation_dict['FDE_encoding'] assert encoding != DW_EH_encoding_flags['DW_EH_PE_omit'] basic_encoding = encoding & 0x0f encoding_modifier = encoding & 0xf0 @@ -416,7 +422,7 @@ def _parse_fde_header(self, entry_structs, offset): return result @staticmethod - def _eh_encoding_to_field(entry_structs): + def _eh_encoding_to_field(entry_structs: DWARFStructs) -> dict[int, Callable[[str], Construct]]: """ Return a mapping from basic encodings (DW_EH_encoding_flags) the corresponding field constructors (for instance @@ -445,7 +451,7 @@ def _eh_encoding_to_field(entry_structs): } -def instruction_name(opcode): +def instruction_name(opcode: int) -> str: """ Given an opcode, return the instruction name. """ primary = opcode & _PRIMARY_MASK @@ -461,11 +467,11 @@ class CallFrameInstruction: arguments (including arguments embedded in the low bits of some instructions, when applicable), decoded from the stream. """ - def __init__(self, opcode, args): + def __init__(self, opcode: int, args: list[Any]) -> None: self.opcode = opcode self.args = args - def __repr__(self): + def __repr__(self) -> str: return '%s (0x%x): %s' % ( instruction_name(self.opcode), self.opcode, self.args) @@ -482,18 +488,26 @@ class CFIEntry: CallFrameInfo._parse_cie_augmentation and http://www.airs.com/blog/archives/460. """ - def __init__(self, header, structs, instructions, offset, - augmentation_dict=None, augmentation_bytes=b'', cie=None): + def __init__( + self, + header: Container, + structs: DWARFStructs, + instructions: list[CallFrameInstruction], + offset: int, + augmentation_dict: dict[Any, Any] | None = None, + augmentation_bytes: bytes | None = b'', + cie: CIE | None = None, + ) -> None: self.header = header self.structs = structs self.instructions = instructions self.offset = offset self.cie = cie - self._decoded_table = None + self._decoded_table: DecodedCallFrameTable | None = None self.augmentation_dict = augmentation_dict or {} self.augmentation_bytes = augmentation_bytes - def get_decoded(self): + def get_decoded(self) -> DecodedCallFrameTable: """ Decode the CFI contained in this entry and return a DecodedCallFrameTable object representing it. See the documentation of that class to understand how to interpret the decoded table. @@ -502,19 +516,19 @@ def get_decoded(self): self._decoded_table = self._decode_CFI_table() return self._decoded_table - def __getitem__(self, name): + def __getitem__(self, name: str) -> Any: """ Implement dict-like access to header entries """ return self.header[name] - def _decode_CFI_table(self): + def _decode_CFI_table(self) -> DecodedCallFrameTable: """ Decode the instructions contained in the given CFI entry and return a DecodedCallFrameTable. """ if isinstance(self, CIE): # For a CIE, initialize cur_line to an "empty" line cie = self - cur_line = dict(pc=0, cfa=CFARule(reg=None, offset=0)) + cur_line: dict[str, Any] = dict(pc=0, cfa=CFARule(reg=None, offset=0)) reg_order = [] else: # FDE # For a FDE, we need to decode the attached CIE first, because its @@ -536,7 +550,7 @@ def _decode_CFI_table(self): # instructions. line_stack = [] - def _add_to_order(regnum): + def _add_to_order(regnum: int) -> None: # DW_CFA_restore and others remove registers from cur_line, # but they stay in reg_order. Avoid duplicates. if regnum not in reg_order: @@ -622,7 +636,7 @@ def _add_to_order(regnum): elif name == 'DW_CFA_remember_state': line_stack.append(copy.deepcopy(cur_line)) elif name == 'DW_CFA_restore_state': - pc = cur_line['pc'] + pc: int = cur_line['pc'] cur_line = line_stack.pop() cur_line['pc'] = pc @@ -644,7 +658,16 @@ class CIE(CFIEntry): class FDE(CFIEntry): - def __init__(self, header, structs, instructions, offset, augmentation_bytes=None, cie=None, lsda_pointer=None): + def __init__( + self, + header: Container, + structs: DWARFStructs, + instructions: list[CallFrameInstruction], + offset: int, + augmentation_bytes: bytes | None = None, + cie: CIE | None = None, + lsda_pointer: int | None = None, + ) -> None: super().__init__(header, structs, instructions, offset, augmentation_bytes=augmentation_bytes, cie=cie) self.lsda_pointer = lsda_pointer @@ -656,7 +679,7 @@ class ZERO: in pure DWARF. `readelf` displays these as "ZERO terminator", hence the class name. """ - def __init__(self, offset): + def __init__(self, offset: int) -> None: self.offset = offset @@ -674,11 +697,11 @@ class RegisterRule: VAL_EXPRESSION = 'VAL_EXPRESSION' ARCHITECTURAL = 'ARCHITECTURAL' - def __init__(self, type, arg=None): + def __init__(self, type: str, arg: ListContainer | None = None) -> None: self.type = type self.arg = arg - def __repr__(self): + def __repr__(self) -> str: return 'RegisterRule(%s, %s)' % (self.type, self.arg) @@ -686,12 +709,12 @@ class CFARule: """ A CFA rule is used to compute the CFA for each location. It either consists of a register+offset, or a DWARF expression. """ - def __init__(self, reg=None, offset=None, expr=None): + def __init__(self, reg: int | None = None, offset: int | None = None, expr: ListContainer | None = None) -> None: self.reg = reg self.offset = offset self.expr = expr - def __repr__(self): + def __repr__(self) -> str: return 'CFARule(reg=%s, offset=%s, expr=%s)' % ( self.reg, self.offset, self.expr) @@ -728,7 +751,7 @@ class DecodedCallFrameTable(NamedTuple): # This dictionary is filled by automatically scanning the constants module # for DW_CFA_* instructions, and mapping their values to names. Since all # names were imported from constants with `import *`, we look in globals() -_OPCODE_NAME_MAP = { +_OPCODE_NAME_MAP: dict[int, str] = { value: name for name, value in globals().items() if name.startswith('DW_CFA') diff --git a/elftools/dwarf/compileunit.py b/elftools/dwarf/compileunit.py index 392400a4..e3ea9df7 100644 --- a/elftools/dwarf/compileunit.py +++ b/elftools/dwarf/compileunit.py @@ -6,10 +6,22 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + from bisect import bisect_right +from typing import TYPE_CHECKING, Any + from .die import DIE from ..common.utils import dwarf_assert +if TYPE_CHECKING: + from collections.abc import Iterator + + from ..construct.lib.container import Container + from .abbrevtable import AbbrevTable + from .dwarfinfo import DWARFInfo + from .structs import DWARFStructs + class CompileUnit: """ A DWARF compilation unit (CU). @@ -29,7 +41,7 @@ class CompileUnit: To get the top-level DIE describing the compilation unit, call the get_top_DIE method. """ - def __init__(self, header, dwarfinfo, structs, cu_offset, cu_die_offset): + def __init__(self, header: Container, dwarfinfo: DWARFInfo, structs: DWARFStructs, cu_offset: int, cu_die_offset: int) -> None: """ header: CU header for this compile unit @@ -53,25 +65,25 @@ def __init__(self, header, dwarfinfo, structs, cu_offset, cu_die_offset): # The abbreviation table for this CU. Filled lazily when DIEs are # requested. - self._abbrev_table = None + self._abbrev_table: AbbrevTable | None = None # A list of DIEs belonging to this CU. # This list is lazily constructed as DIEs are iterated over. - self._dielist = [] + self._dielist: list[DIE] = [] # A list of file offsets, corresponding (by index) to the DIEs # in `self._dielist`. This list exists separately from # `self._dielist` to make it binary searchable, enabling the # DIE population strategy used in `iter_DIE_children`. # Like `self._dielist`, this list is lazily constructed # as DIEs are iterated over. - self._diemap = [] + self._diemap: list[int] = [] - def dwarf_format(self): + def dwarf_format(self) -> int: """ Get the DWARF format (32 or 64) for this CU """ return self.structs.dwarf_format - def get_abbrev_table(self): + def get_abbrev_table(self) -> AbbrevTable: """ Get the abbreviation table (AbbrevTable object) for this CU """ if self._abbrev_table is None: @@ -79,7 +91,7 @@ def get_abbrev_table(self): self['debug_abbrev_offset']) return self._abbrev_table - def get_top_DIE(self): + def get_top_DIE(self) -> DIE: """ Get the top DIE (which is either a DW_TAG_compile_unit or DW_TAG_partial_unit) of this CU """ @@ -101,17 +113,17 @@ def get_top_DIE(self): return top - def has_top_DIE(self): + def has_top_DIE(self) -> bool: """ Returns whether the top DIE in this CU has already been parsed and cached. No parsing on demand! """ return bool(self._diemap) @property - def size(self): + def size(self) -> int: return self['unit_length'] + self.structs.initial_length_field_size() - def get_DIE_from_refaddr(self, refaddr): + def get_DIE_from_refaddr(self, refaddr: int) -> DIE: """ Obtain a DIE contained in this CU from a reference. refaddr: @@ -129,7 +141,7 @@ def get_DIE_from_refaddr(self, refaddr): return self._get_cached_DIE(refaddr) - def iter_DIEs(self): + def iter_DIEs(self) -> Iterator[DIE]: """ Iterate over all the DIEs in the CU, in order of their appearance. Note that null DIEs will also be returned. """ @@ -170,7 +182,7 @@ def iter_DIEs(self): pos += die.size - def iter_DIE_children(self, die): + def iter_DIE_children(self, die: DIE) -> Iterator[DIE]: """ Given a DIE, yields either its children, without null DIE list terminator, or nothing, if that DIE has no children. @@ -224,12 +236,12 @@ def iter_DIE_children(self, die): #------ PRIVATE ------# - def __getitem__(self, name): + def __getitem__(self, name: str) -> Any: """ Implement dict-like access to header entries """ return self.header[name] - def _iter_DIE_subtree(self, die): + def _iter_DIE_subtree(self, die: DIE) -> Iterator[DIE]: """ Given a DIE, this yields it with its subtree including null DIEs (child list terminators). """ @@ -243,7 +255,7 @@ def _iter_DIE_subtree(self, die): yield from die.cu._iter_DIE_subtree(c) yield die._terminator - def _get_cached_DIE(self, offset): + def _get_cached_DIE(self, offset: int) -> DIE: """ Given a DIE offset, look it up in the cache. If not present, parse the DIE and insert it into the cache. diff --git a/elftools/dwarf/datatype_cpp.py b/elftools/dwarf/datatype_cpp.py index 0f7b8871..6bf4699d 100644 --- a/elftools/dwarf/datatype_cpp.py +++ b/elftools/dwarf/datatype_cpp.py @@ -7,18 +7,26 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + +from typing import TYPE_CHECKING + from ..common.utils import bytes2str +if TYPE_CHECKING: + from .die import DIE + + cpp_symbols = dict( pointer = "*", reference = "&", const = "const", volatile = "volatile") -def describe_cpp_datatype(var_die): +def describe_cpp_datatype(var_die: DIE) -> str: return str(parse_cpp_datatype(var_die)) -def parse_cpp_datatype(var_die): +def parse_cpp_datatype(var_die: DIE) -> TypeDesc: """Given a DIE that describes a variable, a parameter, or a member with DW_AT_type in it, tries to return the C++ datatype as a string @@ -35,7 +43,7 @@ def parse_cpp_datatype(var_die): type_die = var_die.get_DIE_from_attribute('DW_AT_type') - mods = [] + mods: list[str] = [] # Unlike readelf, dwarfdump doesn't chase typedefs while type_die.tag in ('DW_TAG_const_type', 'DW_TAG_volatile_type', 'DW_TAG_pointer_type', 'DW_TAG_reference_type'): modifier = _strip_type_tag(type_die) # const/volatile/reference/pointer @@ -98,7 +106,7 @@ def parse_cpp_datatype(var_die): # Check the nesting - important for parameters parent = type_die.get_parent() - scopes = list() + scopes: list[str] = [] while parent.tag in ('DW_TAG_class_type', 'DW_TAG_structure_type', 'DW_TAG_union_type', 'DW_TAG_namespace'): scopes.insert(0, safe_DIE_name(parent, _strip_type_tag(parent) + " ")) # If unnamed scope, fall back to scope type - like "structure " @@ -130,14 +138,14 @@ class TypeDesc: array. -1 means an array of unknown dimension. """ - def __init__(self): - self.name = None - self.modifiers = () # Reads left to right - self.scopes = () # Reads left to right - self.tag = None - self.dimensions = None - - def __str__(self): + def __init__(self) -> None: + self.name: str + self.modifiers: tuple[str, ...] = () # Reads left to right + self.scopes: tuple[str, ...] = () # Reads left to right + self.tag: str | None = None + self.dimensions: tuple[int, ...] | None = None + + def __str__(self) -> str: # Some reference points from dwarfdump: # const->pointer->const->char = const char *const # const->reference->const->int = const const int & @@ -171,21 +179,21 @@ def __str__(self): return " ".join(parts)+dims -def DIE_name(die): +def DIE_name(die: DIE) -> str: return bytes2str(die.attributes['DW_AT_name'].value) -def safe_DIE_name(die, default = ''): +def safe_DIE_name(die: DIE, default: str = '') -> str: return bytes2str(die.attributes['DW_AT_name'].value) if 'DW_AT_name' in die.attributes else default -def DIE_type(die): +def DIE_type(die: DIE) -> DIE: return die.get_DIE_from_attribute("DW_AT_type") class ClassDesc: - def __init__(self): - self.scopes = () - self.const_member = False + def __init__(self) -> None: + self.scopes: tuple[str, ...] = () + self.const_member: bool = False -def get_class_spec_if_member(func_spec, the_func): +def get_class_spec_if_member(func_spec: DIE, the_func: DIE) -> ClassDesc | None: if 'DW_AT_object_pointer' in the_func.attributes: this_param = the_func.get_DIE_from_attribute('DW_AT_object_pointer') this_type = parse_cpp_datatype(this_param) @@ -198,7 +206,7 @@ def get_class_spec_if_member(func_spec, the_func): # Check the parent element chain - could be a class parent = func_spec.get_parent() - scopes = [] + scopes: list[str] = [] while parent.tag in ("DW_TAG_class_type", "DW_TAG_structure_type", "DW_TAG_namespace"): scopes.insert(0, DIE_name(parent)) parent = parent.get_parent() @@ -209,26 +217,26 @@ def get_class_spec_if_member(func_spec, the_func): return None -def format_function_param(param_spec, param): +def format_function_param(param_spec: DIE, param: DIE) -> str: if param_spec.tag == 'DW_TAG_formal_parameter': type = parse_cpp_datatype(param_spec) return str(type) else: # unspecified_parameters AKA variadic return "..." -def DIE_is_ptr_to_member_struct(type_die): +def DIE_is_ptr_to_member_struct(type_die: DIE) -> bool: if type_die.tag == 'DW_TAG_structure_type': members = tuple(die for die in type_die.iter_children() if die.tag == "DW_TAG_member") return len(members) == 2 and safe_DIE_name(members[0]) == "__pfn" and safe_DIE_name(members[1]) == "__delta" return False -def _strip_type_tag(die): +def _strip_type_tag(die: DIE) -> str: """Given a DIE with DW_TAG_foo_type, returns foo""" if isinstance(die.tag, int): # User-defined tag return "" return die.tag[7:-5] -def _array_subtype_size(sub): +def _array_subtype_size(sub: DIE) -> int: if 'DW_AT_upper_bound' in sub.attributes: return sub.attributes['DW_AT_upper_bound'].value + 1 if 'DW_AT_count' in sub.attributes: diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index 41262111..9d0cdc6f 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -6,7 +6,10 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + from collections import defaultdict +from typing import TYPE_CHECKING, Any from .constants import * from .dwarf_expr import DWARFExprParser @@ -14,13 +17,19 @@ from ..common.utils import preserve_stream_pos, dwarf_assert, bytes2str from .callframe import instruction_name, CIE, FDE +if TYPE_CHECKING: + from collections.abc import Callable, Mapping + + from .callframe import CallFrameInstruction, CFARule, CFIEntry, RegisterRule + from .die import AttributeValue + from .structs import DWARFStructs -def set_global_machine_arch(machine_arch): +def set_global_machine_arch(machine_arch: str) -> None: global _MACHINE_ARCH _MACHINE_ARCH = machine_arch -def describe_attr_value(attr, die, section_offset): +def describe_attr_value(attr: AttributeValue, die: DIE, section_offset: int) -> str: """ Given an attribute attr, return the textual representation of its value, suitable for tools like readelf. @@ -38,16 +47,16 @@ def describe_attr_value(attr, die, section_offset): return str(val_description) + '\t' + extra_info -def describe_CFI_instructions(entry): +def describe_CFI_instructions(entry: CFIEntry) -> str: """ Given a CFI entry (CIE or FDE), return the textual description of its instructions. """ - def _assert_FDE_instruction(instr): + def _assert_FDE_instruction(instr: CallFrameInstruction) -> None: dwarf_assert( isinstance(entry, FDE), 'Unexpected instruction "%s" for a CIE' % instr) - def _full_reg_name(regnum): + def _full_reg_name(regnum: int) -> str: regname = describe_reg_name(regnum, _MACHINE_ARCH, False) if regname: return 'r%s (%s)' % (regnum, regname) @@ -56,6 +65,7 @@ def _full_reg_name(regnum): if isinstance(entry, CIE): cie = entry + pc: int | None = None else: # FDE cie = entry.cie pc = entry['initial_location'] @@ -84,7 +94,7 @@ def _full_reg_name(regnum): elif name in ( 'DW_CFA_advance_loc1', 'DW_CFA_advance_loc2', 'DW_CFA_advance_loc4', 'DW_CFA_advance_loc'): _assert_FDE_instruction(instr) - factored_offset = instr.args[0] * cie['code_alignment_factor'] + factored_offset: int = instr.args[0] * cie['code_alignment_factor'] s += ' %s: %s to %08x\n' % ( name, factored_offset, factored_offset + pc) pc += factored_offset @@ -117,7 +127,7 @@ def _full_reg_name(regnum): return s -def describe_CFI_register_rule(rule): +def describe_CFI_register_rule(rule: RegisterRule) -> str: s = _DESCR_CFI_REGISTER_RULE_TYPE[rule.type] if rule.type in ('OFFSET', 'VAL_OFFSET'): s += '%+d' % rule.arg @@ -126,14 +136,14 @@ def describe_CFI_register_rule(rule): return s -def describe_CFI_CFA_rule(rule): +def describe_CFI_CFA_rule(rule: CFARule) -> str: if rule.expr: return 'exp' else: return '%s%+d' % (describe_reg_name(rule.reg), rule.offset) -def describe_DWARF_expr(expr, structs, cu_offset=None): +def describe_DWARF_expr(expr: Any, structs: DWARFStructs, cu_offset: int | None = None) -> str: """ Textual description of a DWARF expression encoded in 'expr'. structs should come from the entity encompassing the expression - it's needed to be able to parse it correctly. @@ -149,7 +159,7 @@ def describe_DWARF_expr(expr, structs, cu_offset=None): return '(' + dwarf_expr_dumper.dump_expr(expr, cu_offset) + ')' -def describe_reg_name(regnum, machine_arch=None, default=True): +def describe_reg_name(regnum: int, machine_arch: str | None = None, default: bool = True) -> str | None: """ Provide a textual description for a register name, given its serial number. The number is expected to be valid. """ @@ -167,7 +177,7 @@ def describe_reg_name(regnum, machine_arch=None, default=True): else: return None -def describe_form_class(form): +def describe_form_class(form: str) -> str | None: """For a given form name, determine its value class. For example, given 'DW_FORM_data1' returns 'constant'. @@ -183,56 +193,56 @@ def describe_form_class(form): # The machine architecture. Set globally via set_global_machine_arch # -_MACHINE_ARCH = None +_MACHINE_ARCH: str | None = None # Implements the alternative format of readelf: lowercase hex, prefixed with 0x unless 0 -def _format_hex(n): +def _format_hex(n: int) -> str: return '0x%x' % n if n != 0 else '0' -def _describe_attr_ref(attr, die, section_offset): +def _describe_attr_ref(attr: AttributeValue, die: DIE, section_offset: int) -> str: return '<%s>' % _format_hex(attr.value + die.cu.cu_offset) -def _describe_attr_ref_sig8(attr, die, section_offset): +def _describe_attr_ref_sig8(attr: AttributeValue, die: DIE, section_offset: int) -> str: return 'signature: %s' % _format_hex(attr.value) -def _describe_attr_value_passthrough(attr, die, section_offset): +def _describe_attr_value_passthrough(attr: AttributeValue, die: DIE, section_offset: int) -> str | int: return attr.value -def _describe_attr_hex(attr, die, section_offset): +def _describe_attr_hex(attr: AttributeValue, die: DIE, section_offset: int) -> str: return '%s' % _format_hex(attr.value) -def _describe_attr_hex_addr(attr, die, section_offset): +def _describe_attr_hex_addr(attr: AttributeValue, die: DIE, section_offset: int) -> str: return '<%s>' % _format_hex(attr.value) -def _describe_attr_split_64bit(attr, die, section_offset): +def _describe_attr_split_64bit(attr: AttributeValue, die: DIE, section_offset: int) -> str: low_word = attr.value & 0xFFFFFFFF high_word = (attr.value >> 32) & 0xFFFFFFFF return '%s %s' % (_format_hex(low_word), _format_hex(high_word)) -def _describe_attr_strp(attr, die, section_offset): +def _describe_attr_strp(attr: AttributeValue, die: DIE, section_offset: int) -> str: return '(indirect string, offset: %s): %s' % ( _format_hex(attr.raw_value), bytes2str(attr.value)) -def _describe_attr_line_strp(attr, die, section_offset): +def _describe_attr_line_strp(attr: AttributeValue, die: DIE, section_offset: int) -> str: return '(indirect line string, offset: %s): %s' % ( _format_hex(attr.raw_value), bytes2str(attr.value)) -def _describe_attr_string(attr, die, section_offset): +def _describe_attr_string(attr: AttributeValue, die: DIE, section_offset: int) -> str: return bytes2str(attr.value) -def _describe_attr_debool(attr, die, section_offset): +def _describe_attr_debool(attr: AttributeValue, die: DIE, section_offset: int) -> str: """ To be consistent with readelf, generate 1 for True flags, 0 for False flags. """ return '1' if attr.value else '0' -def _describe_attr_present(attr, die, section_offset): +def _describe_attr_present(attr: AttributeValue, die: DIE, section_offset: int) -> str: """ Some forms may simply mean that an attribute is present, without providing any value. """ return '1' -def _describe_attr_block(attr, die, section_offset): +def _describe_attr_block(attr: AttributeValue, die: DIE, section_offset: int) -> str: s = '%s byte block: ' % len(attr.value) s += ' '.join('%x' % item for item in attr.value) + ' ' return s @@ -425,11 +435,11 @@ def _describe_attr_block(attr, die, section_offset): ARCHITECTURAL='a', ) -def _make_extra_mapper(mapping, default, default_interpolate_value=False): +def _make_extra_mapper(mapping: Mapping[int, str], default: str, default_interpolate_value: bool = False) -> Callable[[AttributeValue, DIE, int], str]: """ Create a mapping function from attribute parameters to an extra value that should be displayed. """ - def mapper(attr, die, section_offset): + def mapper(attr: AttributeValue, die: DIE, section_offset: int) -> str: if default_interpolate_value: d = default % attr.value else: @@ -438,17 +448,17 @@ def mapper(attr, die, section_offset): return mapper -def _make_extra_string(s=''): +def _make_extra_string(s: str = '') -> Callable[[AttributeValue, DIE, int], str]: """ Create an extra function that just returns a constant string. """ - def extra(attr, die, section_offset): + def extra(attr: AttributeValue, die: DIE, section_offset: int) -> str: return s return extra -_DWARF_EXPR_DUMPER_CACHE = {} +_DWARF_EXPR_DUMPER_CACHE: dict[int, ExprDumper] = {} -def _location_list_extra(attr, die, section_offset): +def _location_list_extra(attr: AttributeValue, die: DIE, section_offset: int) -> str: # According to section 2.6 of the DWARF spec v3, class loclistptr means # a location list, and class block means a location expression. # DW_FORM_sec_offset is new in DWARFv4 as a section offset. @@ -458,7 +468,7 @@ def _location_list_extra(attr, die, section_offset): return describe_DWARF_expr(attr.value, die.cu.structs, die.cu.cu_offset) -def _data_member_location_extra(attr, die, section_offset): +def _data_member_location_extra(attr: AttributeValue, die: DIE, section_offset: int) -> str: # According to section 5.5.6 of the DWARF spec v4, a data member location # can be an integer offset, or a location description. # @@ -470,7 +480,7 @@ def _data_member_location_extra(attr, die, section_offset): return describe_DWARF_expr(attr.value, die.cu.structs, die.cu.cu_offset) -def _import_extra(attr, die, section_offset): +def _import_extra(attr: AttributeValue, die: DIE, section_offset: int) -> str: # For DW_AT_import the value points to a DIE (that can be either in the # current DIE's CU or in another CU, depending on the FORM). The extra # information for it is the abbreviation number in this DIE and its tag. @@ -587,12 +597,12 @@ class ExprDumper: Usage: after creation, call dump_expr repeatedly - it's stateless. """ - def __init__(self, structs): + def __init__(self, structs: DWARFStructs) -> None: self.structs = structs self.expr_parser = DWARFExprParser(self.structs) self._init_lookups() - def dump_expr(self, expr, cu_offset=None): + def dump_expr(self, expr: list[int], cu_offset: int | None = None) -> str: """ Parse and dump a DWARF expression. expr should be a list of (integer) byte values. cu_offset is the cu_offset value from the CU object where the expression resides. @@ -608,7 +618,7 @@ def dump_expr(self, expr, cu_offset=None): for deo in parsed ) - def _init_lookups(self): + def _init_lookups(self) -> None: self._ops_with_decimal_arg = set([ 'DW_OP_const1u', 'DW_OP_const1s', 'DW_OP_const2u', 'DW_OP_const2s', 'DW_OP_const4u', 'DW_OP_const4s', 'DW_OP_const8u', 'DW_OP_const8s', @@ -624,7 +634,7 @@ def _init_lookups(self): self._ops_with_hex_arg = set( ['DW_OP_addr', 'DW_OP_call2', 'DW_OP_call4', 'DW_OP_call_ref']) - def _dump_to_string(self, opcode, opcode_name, args, cu_offset=None): + def _dump_to_string(self, opcode: int, opcode_name: str, args: list[Any], cu_offset: int | None = None) -> str: # Some GNU ops contain an offset from the current CU as an argument, # but readelf emits those ops with offset from the info section # so we need the base offset of the parent CU. diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py index 243a9c23..0fe1e00e 100644 --- a/elftools/dwarf/die.py +++ b/elftools/dwarf/die.py @@ -6,8 +6,10 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + import os -from typing import Any, NamedTuple +from typing import IO, TYPE_CHECKING, Any, NamedTuple from ..common.exceptions import DWARFError, ELFParseError from ..common.utils import bytes2str, struct_parse @@ -15,6 +17,12 @@ from .dwarf_util import _resolve_via_offset_table, _get_base_offset from ..construct import ConstructError +if TYPE_CHECKING: + from collections.abc import Iterator + + from .compileunit import CompileUnit + from .typeunit import TypeUnit + # AttributeValue - describes an attribute value in the DIE: # @@ -78,7 +86,7 @@ class DIE: See also the public methods. """ - def __init__(self, cu, stream, offset): + def __init__(self, cu: CompileUnit | TypeUnit, stream: IO[bytes], offset: int) -> None: """ cu: CompileUnit object this DIE belongs to. Used to obtain context information (structs, abbrev table, etc.) @@ -91,24 +99,24 @@ def __init__(self, cu, stream, offset): self.stream = stream self.offset = offset - self.attributes = {} - self.tag = None - self.has_children = None - self.abbrev_code = None + self.attributes: dict[str, Any] = {} + self.tag: str | int | None = None + self.has_children: bool | None = None + self.abbrev_code: int | None = None self.size = 0 # Null DIE terminator. It can be used to obtain offset range occupied # by this DIE including its whole subtree. - self._terminator = None - self._parent = None + self._terminator: DIE | None = None + self._parent: DIE | None = None self._parse_DIE() - def is_null(self): + def is_null(self) -> bool: """ Is this a null entry? """ return self.tag is None - def get_DIE_from_attribute(self, name): + def get_DIE_from_attribute(self, name: str) -> DIE: """ Return the DIE referenced by the named attribute of this DIE. The attribute must be in the reference attribute class. @@ -132,7 +140,7 @@ def get_DIE_from_attribute(self, name): else: raise DWARFError('%s is not a reference class form attribute' % attr) - def get_parent(self): + def get_parent(self) -> DIE | None: """ Return the parent DIE of this DIE, or None if the DIE has no parent (i.e. is a top-level DIE). """ @@ -140,7 +148,7 @@ def get_parent(self): self._search_ancestor_offspring() return self._parent - def get_full_path(self): + def get_full_path(self) -> str: """ Return the full path filename for the DIE. The filename is the join of 'DW_AT_comp_dir' and 'DW_AT_name', @@ -154,12 +162,12 @@ def get_full_path(self): fname = bytes2str(fname_attr.value) if fname_attr else '' return os.path.join(comp_dir, fname) - def iter_children(self): + def iter_children(self) -> Iterator[DIE]: """ Iterates all children of this DIE """ return self.cu.iter_DIE_children(self) - def iter_siblings(self): + def iter_siblings(self) -> Iterator[DIE]: """ Yield all siblings of this DIE """ parent = self.get_parent() @@ -174,12 +182,12 @@ def iter_siblings(self): # interesting to consumers # - def set_parent(self, die): + def set_parent(self, die: DIE) -> None: self._parent = die #------ PRIVATE ------# - def _search_ancestor_offspring(self): + def _search_ancestor_offspring(self) -> None: """ Search our ancestors identifying their offspring to find our parent. DIEs are stored as a flattened tree. The top DIE is the ancestor @@ -194,7 +202,7 @@ def _search_ancestor_offspring(self): # called for siblings, it is more efficient if siblings references are # provided and no worse than a single walk if they are missing, while # stopping iteration early could result in O(n^2) walks. - search = self.cu.get_top_DIE() + search: DIE = self.cu.get_top_DIE() while search.offset < self.offset: prev = search for child in search.iter_children(): @@ -215,17 +223,17 @@ def _search_ancestor_offspring(self): search = prev - def __repr__(self): + def __repr__(self) -> str: s = 'DIE %s, size=%s, has_children=%s\n' % ( self.tag, self.size, self.has_children) for attrname, attrval in self.attributes.items(): s += ' |%-18s: %s\n' % (attrname, attrval) return s - def __str__(self): + def __str__(self) -> str: return self.__repr__() - def _parse_DIE(self): + def _parse_DIE(self) -> None: """ Parses the DIE info from the section, based on the abbreviation table of the CU """ @@ -280,13 +288,13 @@ def _parse_DIE(self): except ConstructError as e: raise ELFParseError(str(e)) - def _resolve_indirect(self): + def _resolve_indirect(self) -> tuple[str, int, int]: # Supports arbitrary indirection nesting (the standard doesn't prohibit that). # Expects the stream to be at the real form. # Returns (form, raw_value, length). structs = self.cu.structs length = 1 - real_form_code = struct_parse(structs.the_Dwarf_uleb128, self.stream) # Numeric form code + real_form_code: int = struct_parse(structs.the_Dwarf_uleb128, self.stream) # Numeric form code while True: try: real_form = DW_FORM_raw2name[real_form_code] # Form name or exception if bogus code @@ -303,7 +311,7 @@ def _resolve_indirect(self): # And continue parsing # No explicit infinite loop guard because the stream will end eventually - def _translate_attr_value(self, form, raw_value): + def _translate_attr_value(self, form: str, raw_value: Any) -> Any: """ Translate a raw attr value according to the form """ # Indirect forms can only be parsed if the top DIE of this CU has already been parsed @@ -336,7 +344,7 @@ def _translate_attr_value(self, form, raw_value): return _resolve_via_offset_table(self.dwarfinfo.debug_rnglists_sec.stream, self.cu, raw_value, 'DW_AT_rnglists_base') return raw_value - def _translate_indirect_attributes(self): + def _translate_indirect_attributes(self) -> None: """ This is a hook to translate the DW_FORM_...x values in the top DIE once the top DIE is parsed to the end. They can't be translated while the top DIE is being parsed, because they implicitly make a diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py index 89fd6157..6b5471b8 100644 --- a/elftools/dwarf/dwarf_expr.py +++ b/elftools/dwarf/dwarf_expr.py @@ -6,15 +6,23 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + from io import BytesIO -from typing import Any, NamedTuple +from typing import IO, TYPE_CHECKING, Any, NamedTuple from ..common.utils import struct_parse, read_blob from ..common.exceptions import DWARFError +if TYPE_CHECKING: + from collections.abc import Callable + + from ..construct.core import Construct + from .structs import DWARFStructs + # DWARF expression opcodes. name -> opcode mapping -DW_OP_name2opcode = dict( +DW_OP_name2opcode: dict[str, int] = dict( DW_OP_addr=0x03, DW_OP_deref=0x06, DW_OP_const1u=0x08, @@ -100,7 +108,7 @@ DW_OP_hi_user=0xff, ) -def _generate_dynamic_values(map, prefix, index_start, index_end, value_start): +def _generate_dynamic_values(map: dict[str, int], prefix: str, index_start: int, index_end: int, value_start: int) -> None: """ Generate values in a map (dict) dynamically. Each key starts with a (string) prefix, followed by an index in the inclusive range [index_start, index_end]. The values start at value_start. @@ -115,7 +123,7 @@ def _generate_dynamic_values(map, prefix, index_start, index_end, value_start): _generate_dynamic_values(DW_OP_name2opcode, 'DW_OP_breg', 0, 31, 0x70) # opcode -> name mapping -DW_OP_opcode2name = {v: k for k, v in DW_OP_name2opcode.items()} +DW_OP_opcode2name: dict[int, str] = {v: k for k, v in DW_OP_name2opcode.items()} # Each parsed DWARF expression is returned as this type with its numeric opcode, @@ -134,16 +142,16 @@ class DWARFExprParser: parse_expr can be called repeatedly - it's stateless. """ - def __init__(self, structs): + def __init__(self, structs: DWARFStructs) -> None: self._dispatch_table = _init_dispatch_table(structs) - def parse_expr(self, expr): + def parse_expr(self, expr: list[int]) -> list[DWARFExprOp]: """ Parses expr (a list of integers) into a list of DWARFExprOp. The list can potentially be nested. """ stream = BytesIO(bytes(expr)) - parsed = [] + parsed: list[DWARFExprOp] = [] while True: # Get the next opcode from the stream. If nothing is left in the @@ -166,52 +174,52 @@ def parse_expr(self, expr): return parsed -def _init_dispatch_table(structs): +def _init_dispatch_table(structs: DWARFStructs) -> dict[int, Callable[[IO[bytes]], list[Any]]]: """Creates a dispatch table for parsing args of an op. Returns a dict mapping opcode to a function. The function accepts a stream and return a list of parsed arguments for the opcode from the stream; the stream is advanced by the function as needed. """ - table = {} - def add(opcode_name, func): + table: dict[int, Callable[[IO[bytes]], list[Any]]] = {} + def add(opcode_name: str, func: Callable[[IO[bytes]], list[Any]]) -> None: table[DW_OP_name2opcode[opcode_name]] = func - def parse_noargs(): + def parse_noargs() -> Callable[[IO[bytes]], list[None]]: return lambda stream: [] - def parse_op_addr(): + def parse_op_addr() -> Callable[[IO[bytes]], list[int]]: return lambda stream: [struct_parse(structs.the_Dwarf_target_addr, stream)] - def parse_arg_struct(arg_struct): + def parse_arg_struct(arg_struct: Construct) -> Callable[[IO[bytes]], list[Any]]: return lambda stream: [struct_parse(arg_struct, stream)] - def parse_arg_struct2(arg1_struct, arg2_struct): + def parse_arg_struct2(arg1_struct: Construct, arg2_struct: Construct) -> Callable[[IO[bytes]], list[Any]]: return lambda stream: [struct_parse(arg1_struct, stream), struct_parse(arg2_struct, stream)] # ULEB128, then an expression of that length - def parse_nestedexpr(): - def parse(stream): - size = struct_parse(structs.the_Dwarf_uleb128, stream) + def parse_nestedexpr() -> Callable[[IO[bytes]], list[list[DWARFExprOp]]]: + def parse(stream: IO[bytes]) -> list[list[DWARFExprOp]]: + size: int = struct_parse(structs.the_Dwarf_uleb128, stream) nested_expr_blob = read_blob(stream, size) return [DWARFExprParser(structs).parse_expr(nested_expr_blob)] return parse # ULEB128, then a blob of that size - def parse_blob(): + def parse_blob() -> Callable[[IO[bytes]], list[list[int]]]: return lambda stream: [read_blob(stream, struct_parse(structs.the_Dwarf_uleb128, stream))] # ULEB128 with datatype DIE offset, then byte, then a blob of that size - def parse_typedblob(): + def parse_typedblob() -> Callable[[IO[bytes]], list[int | list[int]]]: return lambda stream: [struct_parse(structs.the_Dwarf_uleb128, stream), read_blob(stream, struct_parse(structs.the_Dwarf_uint8, stream))] # https://yurydelendik.github.io/webassembly-dwarf/ # Byte, then variant: 0, 1, 2 => uleb128, 3 => uint32 - def parse_wasmloc(): - def parse(stream): - op = struct_parse(structs.the_Dwarf_uint8, stream) + def parse_wasmloc() -> Callable[[IO[bytes]], list[int]]: + def parse(stream: IO[bytes]) -> list[int]: + op: int = struct_parse(structs.the_Dwarf_uint8, stream) if 0 <= op <= 2: return [op, struct_parse(structs.the_Dwarf_uleb128, stream)] elif op == 3: diff --git a/elftools/dwarf/dwarf_util.py b/elftools/dwarf/dwarf_util.py index a6c74e60..d588e715 100644 --- a/elftools/dwarf/dwarf_util.py +++ b/elftools/dwarf/dwarf_util.py @@ -6,14 +6,26 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations import os import binascii +from typing import IO, TYPE_CHECKING, Any + from ..construct.macros import Array from ..common.exceptions import DWARFError from ..common.utils import preserve_stream_pos, struct_parse -def _get_base_offset(cu, base_attribute_name): +if TYPE_CHECKING: + from collections.abc import Iterator + + from ..construct import Struct + from .compileunit import CompileUnit + from .structs import DWARFStructs + from .typeunit import TypeUnit + + +def _get_base_offset(cu: CompileUnit | TypeUnit, base_attribute_name: str) -> int: """Retrieves a required, base offset-type atribute from the top DIE in the CU. Applies to several indirectly encoded objects - range lists, location lists, strings, addresses. @@ -23,7 +35,7 @@ def _get_base_offset(cu, base_attribute_name): raise DWARFError("The CU at offset 0x%x needs %s" % (cu.cu_offset, base_attribute_name)) return cu_top_die.attributes[base_attribute_name].value -def _resolve_via_offset_table(stream, cu, index, base_attribute_name): +def _resolve_via_offset_table(stream: IO[bytes], cu: CompileUnit | TypeUnit, index: int, base_attribute_name: str) -> int: """Given an index in the offset table and directions where to find it, retrieves an offset. Works for loclists, rnglists. @@ -41,7 +53,7 @@ def _resolve_via_offset_table(stream, cu, index, base_attribute_name): with preserve_stream_pos(stream): return base_offset + struct_parse(cu.structs.the_Dwarf_offset, stream, base_offset + index*offset_size) -def _iter_CUs_in_section(stream, structs, parser): +def _iter_CUs_in_section(stream: IO[bytes], structs: DWARFStructs, parser: Struct) -> Iterator[Any]: """Iterates through the list of CU sections in loclists or rangelists. Almost identical structures there. get_parser is a lambda that takes structs, returns the parser @@ -61,7 +73,7 @@ def _iter_CUs_in_section(stream, structs, parser): yield header offset = header.offset_after_length + header.unit_length -def _file_crc32(file): +def _file_crc32(file: IO[bytes]) -> int: """ Provided a readable binary stream, reads the stream to the end and computes the CRC32 checksum of its contents, with the initial value of 0. diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 374803cf..540d55f1 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -6,8 +6,10 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + from bisect import bisect_right -from typing import IO, NamedTuple +from typing import IO, TYPE_CHECKING, Callable, NamedTuple from ..construct.lib.container import Container from ..common.exceptions import DWARFError @@ -25,6 +27,14 @@ from .namelut import NameLUT from .dwarf_util import _get_base_offset +if TYPE_CHECKING: + from collections.abc import Iterator + + from ..construct.lib.container import ListContainer + from .callframe import ZERO, CFIEntry + from .die import DIE + from .namelut import NameLUTEntry + # Describes a debug section # @@ -69,27 +79,27 @@ class DWARFInfo: various parts of the debug information. """ def __init__(self, - config, - debug_info_sec, - debug_aranges_sec, - debug_abbrev_sec, - debug_frame_sec, - eh_frame_sec, - debug_str_sec, - debug_loc_sec, - debug_ranges_sec, - debug_line_sec, - debug_pubtypes_sec, - debug_pubnames_sec, - debug_addr_sec, - debug_str_offsets_sec, - debug_line_str_sec, - debug_loclists_sec, - debug_rnglists_sec, - debug_sup_sec, - gnu_debugaltlink_sec, - debug_types_sec - ): + config: DwarfConfig, + debug_info_sec: DebugSectionDescriptor | None, + debug_aranges_sec: DebugSectionDescriptor | None, + debug_abbrev_sec: DebugSectionDescriptor | None, + debug_frame_sec: DebugSectionDescriptor | None, + eh_frame_sec: DebugSectionDescriptor | None, + debug_str_sec: DebugSectionDescriptor | None, + debug_loc_sec: DebugSectionDescriptor | None, + debug_ranges_sec: DebugSectionDescriptor | None, + debug_line_sec: DebugSectionDescriptor | None, + debug_pubtypes_sec: DebugSectionDescriptor | None, + debug_pubnames_sec: DebugSectionDescriptor | None, + debug_addr_sec: DebugSectionDescriptor | None, + debug_str_offsets_sec: DebugSectionDescriptor | None, + debug_line_str_sec: DebugSectionDescriptor | None, + debug_loclists_sec: DebugSectionDescriptor | None, + debug_rnglists_sec: DebugSectionDescriptor | None, + debug_sup_sec: DebugSectionDescriptor | None, + gnu_debugaltlink_sec: DebugSectionDescriptor | None, + debug_types_sec: DebugSectionDescriptor | None, + ) -> None: """ config: A DwarfConfig object @@ -122,7 +132,7 @@ def __init__(self, # Sets the supplementary_dwarfinfo to None. Client code can set this # to something else, typically a DWARFInfo file read from an ELFFile # which path is stored in the debug_sup_sec or gnu_debugaltlink_sec. - self.supplementary_dwarfinfo = None + self.supplementary_dwarfinfo: DWARFInfo | None = None # This is the DWARFStructs the context uses, so it doesn't depend on # DWARF format and address_size (these are determined per CU) - set them @@ -133,20 +143,20 @@ def __init__(self, address_size=self.config.default_address_size) # Cache for abbrev tables: a dict keyed by offset - self._abbrevtable_cache = {} + self._abbrevtable_cache: dict[int, AbbrevTable] = {} # Cache for program lines tables: a dict keyed by offset - self._linetable_cache = {} + self._linetable_cache: dict[int, LineProgram] = {} # Cache of compile units and map of their offsets for bisect lookup. # Access with .iter_CUs(), .get_CU_containing(), and/or .get_CU_at(). - self._cu_cache = [] - self._cu_offsets_map = [] + self._cu_cache: list[CompileUnit] = [] + self._cu_offsets_map: list[int] = [] # DWARF v4 type units by sig8 - ordered dict created when needed - self._type_units_by_sig = None + self._type_units_by_sig: dict[int, TypeUnit] | None = None @property - def has_debug_info(self): + def has_debug_info(self) -> bool: """ Return whether this contains debug information. It can be not the case when the ELF only contains .eh_frame, which is @@ -154,12 +164,12 @@ def has_debug_info(self): """ return bool(self.debug_info_sec) - def has_debug_types(self): + def has_debug_types(self) -> bool: """ Return whether this contains debug types information. """ return bool(self.debug_types_sec) - def get_DIE_from_lut_entry(self, lut_entry): + def get_DIE_from_lut_entry(self, lut_entry: NameLUTEntry) -> DIE: """ Get the DIE from the pubnames or putbtypes lookup table entry. lut_entry: @@ -169,7 +179,7 @@ def get_DIE_from_lut_entry(self, lut_entry): cu = self.get_CU_at(lut_entry.cu_ofs) return self.get_DIE_from_refaddr(lut_entry.die_ofs, cu) - def get_DIE_from_refaddr(self, refaddr, cu=None): + def get_DIE_from_refaddr(self, refaddr: int, cu: CompileUnit | None = None) -> DIE: """ Given a .debug_info section offset of a DIE, return the DIE. refaddr: @@ -183,7 +193,7 @@ def get_DIE_from_refaddr(self, refaddr, cu=None): cu = self.get_CU_containing(refaddr) return cu.get_DIE_from_refaddr(refaddr) - def get_DIE_by_sig8(self, sig8): + def get_DIE_by_sig8(self, sig8: int) -> DIE: """ Find and return a DIE referenced by its type signature. sig8: The 8 byte signature (as a 64-bit unsigned integer) @@ -209,7 +219,7 @@ def get_DIE_by_sig8(self, sig8): raise KeyError("Signature %016x not found in .debug_types" % sig8) return tu._get_cached_DIE(tu.tu_offset + tu['type_offset']) - def get_CU_containing(self, refaddr): + def get_CU_containing(self, refaddr: int) -> CompileUnit: """ Find the CU that includes the given reference address in the .debug_info section. @@ -243,7 +253,7 @@ def get_CU_containing(self, refaddr): raise ValueError("CU for reference address %s not found" % refaddr) - def get_CU_at(self, offset): + def get_CU_at(self, offset: int) -> CompileUnit: """ Given a CU header offset, return the parsed CU. offset: @@ -263,7 +273,7 @@ def get_CU_at(self, offset): return self._cached_CU_at_offset(offset) - def get_TU_by_sig8(self, sig8): + def get_TU_by_sig8(self, sig8: int) -> TypeUnit: """ Find and return a Type Unit referenced by its signature sig8: @@ -279,17 +289,17 @@ def get_TU_by_sig8(self, sig8): raise KeyError("Signature %016x not found in .debug_types" % sig8) return tu - def iter_CUs(self): + def iter_CUs(self) -> Iterator[CompileUnit]: """ Yield all the compile units (CompileUnit objects) in the debug info """ return self._parse_CUs_iter() - def iter_TUs(self): + def iter_TUs(self) -> Iterator[TypeUnit]: """Yield all the type units (TypeUnit objects) in the debug_types """ return self._parse_TUs_iter() - def get_abbrev_table(self, offset): + def get_abbrev_table(self, offset: int) -> AbbrevTable: """ Get an AbbrevTable from the given offset in the debug_abbrev section. @@ -311,19 +321,19 @@ def get_abbrev_table(self, offset): offset=offset) return self._abbrevtable_cache[offset] - def get_string_from_table(self, offset): + def get_string_from_table(self, offset: int) -> bytes | None: """ Obtain a string from the string table section, given an offset relative to the section. """ return parse_cstring_from_stream(self.debug_str_sec.stream, offset) - def get_string_from_linetable(self, offset): + def get_string_from_linetable(self, offset: int) -> bytes | None: """ Obtain a string from the string table section, given an offset relative to the section. """ return parse_cstring_from_stream(self.debug_line_str_sec.stream, offset) - def line_program_for_CU(self, CU): + def line_program_for_CU(self, CU: CompileUnit) -> LineProgram | None: """ Given a CU object, fetch the line program it points to from the .debug_line section. If the CU doesn't point to a line program, return None. @@ -345,12 +355,12 @@ def line_program_for_CU(self, CU): else: return None - def has_CFI(self): + def has_CFI(self) -> bool: """ Does this dwarf info have a dwarf_frame CFI section? """ return self.debug_frame_sec is not None - def CFI_entries(self): + def CFI_entries(self) -> list[CFIEntry | ZERO]: """ Get a list of dwarf_frame CFI entries from the .debug_frame section. """ cfi = CallFrameInfo( @@ -360,12 +370,12 @@ def CFI_entries(self): base_structs=self.structs) return cfi.get_entries() - def has_EH_CFI(self): + def has_EH_CFI(self) -> bool: """ Does this dwarf info have a eh_frame CFI section? """ return self.eh_frame_sec is not None - def EH_CFI_entries(self): + def EH_CFI_entries(self) -> list[CFIEntry | ZERO]: """ Get a list of eh_frame CFI entries from the .eh_frame section. """ cfi = CallFrameInfo( @@ -376,7 +386,7 @@ def EH_CFI_entries(self): for_eh_frame=True) return cfi.get_entries() - def get_pubtypes(self): + def get_pubtypes(self) -> NameLUT | None: """ Returns a NameLUT object that contains information read from the .debug_pubtypes section in the ELF file. @@ -392,7 +402,7 @@ def get_pubtypes(self): else: return None - def get_pubnames(self): + def get_pubnames(self) -> NameLUT | None: """ Returns a NameLUT object that contains information read from the .debug_pubnames section in the ELF file. @@ -408,7 +418,7 @@ def get_pubnames(self): else: return None - def get_aranges(self): + def get_aranges(self) -> ARanges | None: """ Get an ARanges object representing the .debug_aranges section of the DWARF data, or None if the section doesn't exist """ @@ -419,7 +429,7 @@ def get_aranges(self): else: return None - def location_lists(self): + def location_lists(self) -> LocationLists | LocationListsPair | None: """ Get a LocationLists object representing the .debug_loc/debug_loclists section of the DWARF data, or None if this section doesn't exist. @@ -434,7 +444,7 @@ def location_lists(self): else: return None - def range_lists(self): + def range_lists(self) -> RangeLists | RangeListsPair | None: """ Get a RangeLists object representing the .debug_ranges/.debug_rnglists section of the DWARF data, or None if this section doesn't exist. @@ -449,7 +459,7 @@ def range_lists(self): else: return None - def get_addr(self, cu, addr_index): + def get_addr(self, cu: CompileUnit | TypeUnit, addr_index: int) -> int: """Provided a CU and an index, retrieves an address from the debug_addr section """ if not self.debug_addr_sec: @@ -460,7 +470,7 @@ def get_addr(self, cu, addr_index): #------ PRIVATE ------# - def _parse_CUs_iter(self, offset=0): + def _parse_CUs_iter(self, offset: int = 0) -> Iterator[CompileUnit]: """ Iterate CU objects in order of appearance in the debug_info section. offset: @@ -482,7 +492,7 @@ def _parse_CUs_iter(self, offset=0): cu.structs.initial_length_field_size()) yield cu - def _parse_TUs_iter(self, offset=0): + def _parse_TUs_iter(self, offset: int = 0) -> Iterator[TypeUnit]: """ Iterate Type Unit objects in order of appearance in the debug_types section. offset: @@ -505,7 +515,7 @@ def _parse_TUs_iter(self, offset=0): yield tu - def _parse_debug_types(self): + def _parse_debug_types(self) -> None: """ Check if the .debug_types section is previously parsed. If not, parse all TUs and store them in an ordered dict using their unique 64-bit signature as the key. @@ -531,7 +541,7 @@ def _parse_debug_types(self): tu.structs.initial_length_field_size()) self._type_units_by_sig[tu['signature']] = tu - def _cached_CU_at_offset(self, offset): + def _cached_CU_at_offset(self, offset: int) -> CompileUnit: """ Return the CU with unit header at the given offset into the debug_info section from the cache. If not present, the unit is header is parsed and the object is installed in the cache. @@ -557,7 +567,7 @@ def _cached_CU_at_offset(self, offset): self._cu_cache.insert(i, cu) return cu - def _parse_CU_at_offset(self, offset): + def _parse_CU_at_offset(self, offset: int) -> CompileUnit: """ Parse and return a CU at the given offset in the debug_info stream. """ # Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v3 @@ -603,7 +613,7 @@ def _parse_CU_at_offset(self, offset): cu_offset=offset, cu_die_offset=cu_die_offset) - def _parse_TU_at_offset(self, offset): + def _parse_TU_at_offset(self, offset: int) -> TypeUnit: """ Parse and return a Type Unit (TU) at the given offset in the debug_types stream. """ # Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v4 @@ -648,12 +658,12 @@ def _parse_TU_at_offset(self, offset): tu_offset=offset, tu_die_offset=tu_die_offset) - def _is_supported_version(self, version): + def _is_supported_version(self, version: int) -> bool: """ DWARF version supported by this parser """ return 2 <= version <= 5 - def _parse_line_program_at_offset(self, offset, structs): + def _parse_line_program_at_offset(self, offset: int, structs: DWARFStructs) -> LineProgram: """ Given an offset to the .debug_line section, parse the line program starting at this offset in the section and return it. structs is the DWARFStructs object used to do this parsing. @@ -668,11 +678,11 @@ def _parse_line_program_at_offset(self, offset, structs): offset) # DWARF5: resolve names - def resolve_strings(lineprog_header, format_field, data_field) -> None: + def resolve_strings(lineprog_header: Container, format_field: str, data_field: str) -> None: if lineprog_header.get(format_field, False): data = lineprog_header[data_field] for field in lineprog_header[format_field]: - def replace_value(data, content_type, replacer): + def replace_value(data: ListContainer, content_type: str, replacer: Callable[[int], bytes | None]) -> None: for entry in data: entry[content_type] = replacer(entry[content_type]) @@ -717,7 +727,7 @@ def replace_value(data, content_type, replacer): self._linetable_cache[offset] = lineprogram return lineprogram - def parse_debugsupinfo(self): + def parse_debugsupinfo(self) -> bytes | None: """ Extract a filename from .debug_sup, .gnu_debualtlink sections. """ diff --git a/elftools/dwarf/lineprogram.py b/elftools/dwarf/lineprogram.py index a5d9f2bf..fb43d88a 100644 --- a/elftools/dwarf/lineprogram.py +++ b/elftools/dwarf/lineprogram.py @@ -10,11 +10,15 @@ import os import copy -from typing import NamedTuple +from typing import IO, TYPE_CHECKING, Any, NamedTuple from ..common.utils import struct_parse, dwarf_assert from .constants import * +if TYPE_CHECKING: + from ..construct.lib.container import Container + from .structs import DWARFStructs + # LineProgramEntry - an entry in the line program. # A line program is a sequence of encoded entries. Some of these entries add a @@ -51,7 +55,7 @@ class LineState: The instance variables of this class are the "state machine registers" described in section 6.2.2 of DWARFv3 """ - def __init__(self, default_is_stmt): + def __init__(self, default_is_stmt: int) -> None: self.address = 0 self.file = 1 self.line = 1 @@ -65,7 +69,7 @@ def __init__(self, default_is_stmt): self.isa = 0 self.discriminator = 0 - def __repr__(self): + def __repr__(self) -> str: return '\n'.join(( ' None: """ header: The header of this line program. Note: LineProgram may modify @@ -110,9 +114,9 @@ def __init__(self, header, stream, structs, self.structs = structs self.program_start_offset = program_start_offset self.program_end_offset = program_end_offset - self._decoded_entries = None + self._decoded_entries: list[LineProgramEntry] | None = None - def get_entries(self): + def get_entries(self) -> list[LineProgramEntry]: """ Get the decoded entries for this line program. Return a list of LineProgramEntry objects. Note that this contains more information than absolutely required @@ -127,16 +131,16 @@ def get_entries(self): #------ PRIVATE ------# - def __getitem__(self, name): + def __getitem__(self, name: str) -> Any: """ Implement dict-like access to header entries """ return self.header[name] - def _decode_line_program(self): + def _decode_line_program(self) -> list[LineProgramEntry]: entries = [] state = LineState(self.header['default_is_stmt']) - def add_entry_new_state(cmd, args, is_extended=False): + def add_entry_new_state(cmd: int, args: list[int], is_extended: bool = False) -> None: # Add an entry that sets a new state. # After adding, clear some state registers. entries.append(LineProgramEntry( @@ -146,13 +150,13 @@ def add_entry_new_state(cmd, args, is_extended=False): state.prologue_end = False state.epilogue_begin = False - def add_entry_old_state(cmd, args, is_extended=False): + def add_entry_old_state(cmd: int, args: list[int], is_extended: bool = False) -> None: # Add an entry that doesn't visibly set a new state entries.append(LineProgramEntry(cmd, is_extended, args, None)) offset = self.program_start_offset while offset < self.program_end_offset: - opcode = struct_parse( + opcode: int = struct_parse( self.structs.the_Dwarf_uint8, self.stream, offset) @@ -164,25 +168,25 @@ def add_entry_old_state(cmd, args, is_extended=False): # opcodes anyway. if opcode >= self.header['opcode_base']: # Special opcode (follow the recipe in 6.2.5.1) - maximum_operations_per_instruction = self['maximum_operations_per_instruction'] - adjusted_opcode = opcode - self['opcode_base'] - operation_advance = adjusted_opcode // self['line_range'] - address_addend = ( + maximum_operations_per_instruction: int = self['maximum_operations_per_instruction'] + adjusted_opcode: int = opcode - self['opcode_base'] + operation_advance: int = adjusted_opcode // self['line_range'] + address_addend: int = ( self['minimum_instruction_length'] * ((state.op_index + operation_advance) // maximum_operations_per_instruction)) state.address += address_addend state.op_index = (state.op_index + operation_advance) % maximum_operations_per_instruction - line_addend = self['line_base'] + (adjusted_opcode % self['line_range']) + line_addend: int = self['line_base'] + (adjusted_opcode % self['line_range']) state.line += line_addend add_entry_new_state( opcode, [line_addend, address_addend, state.op_index]) elif opcode == 0: # Extended opcode: start with a zero byte, followed by # instruction size and the instruction itself. - inst_len = struct_parse(self.structs.the_Dwarf_uleb128, + inst_len: int = struct_parse(self.structs.the_Dwarf_uleb128, self.stream) - ex_opcode = struct_parse(self.structs.the_Dwarf_uint8, + ex_opcode: int = struct_parse(self.structs.the_Dwarf_uint8, self.stream) if ex_opcode == DW_LNE_end_sequence: @@ -192,7 +196,7 @@ def add_entry_old_state(cmd, args, is_extended=False): # reset state state = LineState(self.header['default_is_stmt']) elif ex_opcode == DW_LNE_set_address: - operand = struct_parse(self.structs.the_Dwarf_target_addr, + operand: int = struct_parse(self.structs.the_Dwarf_target_addr, self.stream) state.address = operand add_entry_old_state(ex_opcode, [operand], is_extended=True) diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index ba49dbda..354147f1 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -6,13 +6,26 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + import os -from typing import NamedTuple +from typing import IO, TYPE_CHECKING, NamedTuple +from typing import Union as TUnion from ..common.exceptions import DWARFError from ..common.utils import struct_parse from .dwarf_util import _iter_CUs_in_section +if TYPE_CHECKING: + from collections.abc import Callable, Iterator, Mapping + + from ..construct.lib.container import Container + from .compileunit import CompileUnit + from .die import DIE, AttributeValue + from .dwarfinfo import DWARFInfo + from .structs import DWARFStructs + from .typeunit import TypeUnit + class LocationExpr(NamedTuple): loc_expr: list[int] @@ -35,13 +48,14 @@ class LocationViewPair(NamedTuple): begin: int end: int +_Location = TUnion[LocationExpr, LocationEntry, BaseAddressEntry, LocationViewPair] -def _translate_startx_length(e, cu): - start_offset = cu.dwarfinfo.get_addr(cu, e.start_index) +def _translate_startx_length(e: Container, cu: CompileUnit | TypeUnit) -> LocationEntry: + start_offset: int = cu.dwarfinfo.get_addr(cu, e.start_index) return LocationEntry(e.entry_offset, e.entry_length, start_offset, start_offset + e.length, e.loc_expr, True) # Maps parsed entries to the tuples above; LocationViewPair is mapped elsewhere -entry_translate = { +entry_translate: dict[str, Callable[[Container, CompileUnit | TypeUnit], _Location]] = { 'DW_LLE_base_address' : lambda e, cu: BaseAddressEntry(e.entry_offset, e.entry_length, e.address), 'DW_LLE_offset_pair' : lambda e, cu: LocationEntry(e.entry_offset, e.entry_length, e.start_offset, e.end_offset, e.loc_expr, False), 'DW_LLE_start_length' : lambda e, cu: LocationEntry(e.entry_offset, e.entry_length, e.start_address, e.start_address + e.length, e.loc_expr, True), @@ -56,11 +70,11 @@ class LocationListsPair: """For those binaries that contain both a debug_loc and a debug_loclists section, it holds a LocationLists object for both and forwards API calls to the right one. """ - def __init__(self, streamv4, streamv5, structs, dwarfinfo=None): + def __init__(self, streamv4: IO[bytes], streamv5: IO[bytes], structs: DWARFStructs, dwarfinfo: DWARFInfo | None = None) -> None: self._loc = LocationLists(streamv4, structs, 4, dwarfinfo) self._loclists = LocationLists(streamv5, structs, 5, dwarfinfo) - def get_location_list_at_offset(self, offset, die=None): + def get_location_list_at_offset(self, offset: int, die: DIE | None = None) -> list[_Location]: """See LocationLists.get_location_list_at_offset(). """ if die is None: @@ -68,13 +82,13 @@ def get_location_list_at_offset(self, offset, die=None): section = self._loclists if die.cu.header.version >= 5 else self._loc return section.get_location_list_at_offset(offset, die) - def iter_location_lists(self): + def iter_location_lists(self) -> Iterator[BaseAddressEntry | LocationEntry]: """Tricky proposition, since the structure of loc and loclists is not identical. A realistic readelf implementation needs to be aware of both """ raise DWARFError("Iterating through two sections is not supported") - def iter_CUs(self): + def iter_CUs(self) -> Iterator[CompileUnit]: """See LocationLists.iter_CUs() There are no CUs in DWARFv4 sections. @@ -99,14 +113,14 @@ class LocationLists: that contain references to other sections (e. g. DW_LLE_startx_endx), and only for location list enumeration. """ - def __init__(self, stream, structs, version=4, dwarfinfo=None): + def __init__(self, stream: IO[bytes], structs: DWARFStructs, version: int = 4, dwarfinfo: DWARFInfo | None = None) -> None: self.stream = stream self.structs = structs self.dwarfinfo = dwarfinfo self.version = version - self._max_addr = 2 ** (self.structs.address_size * 8) - 1 + self._max_addr: int = 2 ** (self.structs.address_size * 8) - 1 - def get_location_list_at_offset(self, offset, die=None): + def get_location_list_at_offset(self, offset: int, die: DIE | None = None) -> list[_Location]: """ Get a location list at the given offset in the section. Passing the die is only neccessary in DWARF5+, for decoding location entry encodings that contain references to other sections. @@ -116,7 +130,7 @@ def get_location_list_at_offset(self, offset, die=None): self.stream.seek(offset, os.SEEK_SET) return self._parse_location_list_from_stream_v5(die.cu) if self.version >= 5 else self._parse_location_list_from_stream() - def iter_location_lists(self): + def iter_location_lists(self) -> Iterator[list[_Location]]: """ Iterates through location lists and view pairs. Returns lists of LocationEntry, BaseAddressEntry, and LocationViewPair objects. """ @@ -147,7 +161,7 @@ def iter_location_lists(self): locviews = dict() # Map of locview offset to the respective loclist offset cu_map = dict() # Map of loclist offsets to CUs for cu in self.dwarfinfo.iter_CUs(): - cu_ver = cu['version'] + cu_ver: int = cu['version'] if (cu_ver >= 5) == ver5: for die in cu.iter_DIEs(): # A combination of location and locviews means there is a location list @@ -155,8 +169,8 @@ def iter_location_lists(self): if 'DW_AT_GNU_locviews' in die.attributes: assert('DW_AT_location' in die.attributes and LocationParser._attribute_has_loc_list(die.attributes['DW_AT_location'], cu_ver)) - views_offset = die.attributes['DW_AT_GNU_locviews'].value - list_offset = die.attributes['DW_AT_location'].value + views_offset: int = die.attributes['DW_AT_GNU_locviews'].value + list_offset: int = die.attributes['DW_AT_location'].value locviews[views_offset] = list_offset cu_map[list_offset] = cu all_offsets.add(views_offset) @@ -186,7 +200,7 @@ def iter_location_lists(self): # We don't have a binary for the former yet. On an off chance that we one day might, # let's parse the header anyway. - cu_end_offset = cu_header.offset_after_length + cu_header.unit_length + cu_end_offset: int = cu_header.offset_after_length + cu_header.unit_length # Unit_length includes the header but doesn't include the length while stream.tell() < cu_end_offset: @@ -210,7 +224,7 @@ def iter_location_lists(self): entries = self._parse_location_list_from_stream() yield locview_pairs + entries - def iter_CUs(self): + def iter_CUs(self) -> Iterator[CompileUnit]: """For DWARF5 returns an array of objects, where each one has an array of offsets """ if self.version < 5: @@ -221,13 +235,13 @@ def iter_CUs(self): #------ PRIVATE ------# - def _parse_location_list_from_stream(self): - lst = [] + def _parse_location_list_from_stream(self) -> list[_Location]: + lst: list[_Location] = [] while True: entry_offset = self.stream.tell() - begin_offset = struct_parse( + begin_offset: int = struct_parse( self.structs.the_Dwarf_target_addr, self.stream) - end_offset = struct_parse( + end_offset: int = struct_parse( self.structs.the_Dwarf_target_addr, self.stream) if begin_offset == 0 and end_offset == 0: # End of list - we're done. @@ -238,9 +252,9 @@ def _parse_location_list_from_stream(self): lst.append(BaseAddressEntry(entry_offset=entry_offset, entry_length=entry_length, base_address=end_offset)) else: # Location list entry - expr_len = struct_parse( + expr_len: int = struct_parse( self.structs.the_Dwarf_uint16, self.stream) - loc_expr = [struct_parse(self.structs.the_Dwarf_uint8, + loc_expr: list[int] = [struct_parse(self.structs.the_Dwarf_uint8, self.stream) for i in range(expr_len)] entry_length = self.stream.tell() - entry_offset @@ -253,7 +267,7 @@ def _parse_location_list_from_stream(self): is_absolute = False)) return lst - def _parse_location_list_from_stream_v5(self, cu=None): + def _parse_location_list_from_stream_v5(self, cu: CompileUnit | TypeUnit | None = None) -> list[_Location]: """ Returns an array with BaseAddressEntry and LocationEntry. No terminator entries. @@ -266,10 +280,10 @@ def _parse_location_list_from_stream_v5(self, cu=None): in struct_parse(self.structs.Dwarf_loclists_entries, self.stream)] # From V5 style entries to a LocationEntry/BaseAddressEntry - def _translate_entry_v5(self, entry, die): - off = entry.entry_offset - len = entry.entry_end_offset - off - type = entry.entry_type + def _translate_entry_v5(self, entry: Container, die: DIE) -> _Location: + off: int = entry.entry_offset + len: int = entry.entry_end_offset - off + type: str = entry.entry_type if type == 'DW_LLE_base_address': return BaseAddressEntry(off, len, entry.address) elif type == 'DW_LLE_offset_pair': @@ -288,10 +302,10 @@ def _translate_entry_v5(self, entry, die): raise DWARFError(False, "Unknown DW_LLE code: %s" % (type,)) # Locviews is the dict, mapping locview offsets to corresponding loclist offsets - def _parse_locview_pairs(self, locviews): + def _parse_locview_pairs(self, locviews: Mapping[int, int]) -> list[LocationViewPair]: stream = self.stream - list_offset = locviews.get(stream.tell(), None) - pairs = [] + list_offset: int | None = locviews.get(stream.tell(), None) + pairs: list[LocationViewPair] = [] if list_offset is not None: while stream.tell() < list_offset: pair = struct_parse(self.structs.Dwarf_locview_pair, stream) @@ -306,18 +320,18 @@ class LocationParser: location lists in the .debug_loc section (represented as a list). """ - def __init__(self, location_lists): + def __init__(self, location_lists: LocationLists | LocationListsPair | None) -> None: self.location_lists = location_lists @staticmethod - def attribute_has_location(attr, dwarf_version): + def attribute_has_location(attr: AttributeValue, dwarf_version: int) -> bool: """ Checks if a DIE attribute contains location information. """ return (LocationParser._attribute_is_loclistptr_class(attr) and (LocationParser._attribute_has_loc_expr(attr, dwarf_version) or LocationParser._attribute_has_loc_list(attr, dwarf_version))) - def parse_from_attribute(self, attr, dwarf_version, die = None): + def parse_from_attribute(self, attr: AttributeValue, dwarf_version: int, die: DIE | None = None) -> LocationExpr | list[_Location]: """ Parses a DIE attribute and returns either a LocationExpr or a list. """ @@ -337,13 +351,13 @@ def parse_from_attribute(self, attr, dwarf_version, die = None): #------ PRIVATE ------# @staticmethod - def _attribute_has_loc_expr(attr, dwarf_version): + def _attribute_has_loc_expr(attr: AttributeValue, dwarf_version: int) -> bool: return ((dwarf_version < 4 and attr.form.startswith('DW_FORM_block') and not attr.name == 'DW_AT_const_value') or attr.form == 'DW_FORM_exprloc') @staticmethod - def _attribute_has_loc_list(attr, dwarf_version): + def _attribute_has_loc_list(attr: AttributeValue, dwarf_version: int) -> bool: return (((dwarf_version < 4 and attr.form in ('DW_FORM_data1', 'DW_FORM_data2', 'DW_FORM_data4', 'DW_FORM_data8') and not attr.name == 'DW_AT_const_value') or @@ -355,13 +369,13 @@ def _attribute_has_loc_list(attr, dwarf_version): # As for DW_AT_upper_bound/DW_AT_count, we've seen it in form DW_FORM_locexpr in a V5 binary. usually it's a constant, # but the constant sholdn't be misinterpreted as a loclist pointer. @staticmethod - def _attribute_is_constant(attr, dwarf_version): + def _attribute_is_constant(attr: AttributeValue, dwarf_version: int) -> bool: return (((dwarf_version >= 3 and attr.name == 'DW_AT_data_member_location') or (attr.name in ('DW_AT_upper_bound', 'DW_AT_count'))) and attr.form in ('DW_FORM_data1', 'DW_FORM_data2', 'DW_FORM_data4', 'DW_FORM_data8', 'DW_FORM_sdata', 'DW_FORM_udata')) @staticmethod - def _attribute_is_loclistptr_class(attr): + def _attribute_is_loclistptr_class(attr: AttributeValue) -> bool: return (attr.name in ( 'DW_AT_location', 'DW_AT_string_length', 'DW_AT_const_value', 'DW_AT_return_addr', 'DW_AT_data_member_location', diff --git a/elftools/dwarf/namelut.py b/elftools/dwarf/namelut.py index 7a3133ec..9f47d1f6 100644 --- a/elftools/dwarf/namelut.py +++ b/elftools/dwarf/namelut.py @@ -6,20 +6,29 @@ # Vijay Ramasami (rvijayc@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -import collections +from __future__ import annotations + from collections.abc import Mapping -from typing import NamedTuple +from typing import IO, TYPE_CHECKING, NamedTuple, TypeVar, overload from ..common.utils import struct_parse from ..construct import CString, Struct, If +if TYPE_CHECKING: + from collections.abc import ItemsView, Iterator + + from ..construct.lib.container import Container + from .structs import DWARFStructs + + _T = TypeVar("_T") + class NameLUTEntry(NamedTuple): cu_ofs: int die_ofs: int -class NameLUT(Mapping): +class NameLUT(Mapping[str, NameLUTEntry]): """ A "Name LUT" holds any of the tables specified by .debug_pubtypes or .debug_pubnames sections. This is basically a dictionary where the key is @@ -63,17 +72,17 @@ class NameLUT(Mapping): """ - def __init__(self, stream, size, structs): + def __init__(self, stream: IO[bytes], size: int, structs: DWARFStructs) -> None: self._stream = stream self._size = size self._structs = structs # entries are lazily loaded on demand. - self._entries = None + self._entries: dict[str, NameLUTEntry] | None = None # CU headers (for readelf). - self._cu_headers = None + self._cu_headers: list[Container] | None = None - def get_entries(self): + def get_entries(self) -> dict[str, NameLUTEntry]: """ Returns the parsed NameLUT entries. The returned object is a dictionary with the symbol name as the key and NameLUTEntry(cu_ofs, die_ofs) as @@ -87,7 +96,7 @@ def get_entries(self): self._entries, self._cu_headers = self._get_entries() return self._entries - def set_entries(self, entries, cu_headers): + def set_entries(self, entries: dict[str, NameLUTEntry], cu_headers: list[Container]) -> None: """ Set the NameLUT entries from an external source. The input is a dictionary with the symbol name as the key and NameLUTEntry(cu_ofs, @@ -100,7 +109,7 @@ def set_entries(self, entries, cu_headers): self._entries = entries self._cu_headers = cu_headers - def __len__(self): + def __len__(self) -> int: """ Returns the number of entries in the NameLUT. """ @@ -108,7 +117,7 @@ def __len__(self): self._entries, self._cu_headers = self._get_entries() return len(self._entries) - def __getitem__(self, name): + def __getitem__(self, name: str) -> NameLUTEntry: """ Returns a namedtuple - NameLUTEntry(cu_ofs, die_ofs) - that corresponds to the given symbol name. @@ -117,7 +126,7 @@ def __getitem__(self, name): self._entries, self._cu_headers = self._get_entries() return self._entries[name] - def __iter__(self): + def __iter__(self) -> Iterator[str]: """ Returns an iterator to the NameLUT dictionary. """ @@ -125,7 +134,7 @@ def __iter__(self): self._entries, self._cu_headers = self._get_entries() return iter(self._entries) - def items(self): + def items(self) -> ItemsView[str, NameLUTEntry]: """ Returns the NameLUT dictionary items. """ @@ -133,7 +142,11 @@ def items(self): self._entries, self._cu_headers = self._get_entries() return self._entries.items() - def get(self, name, default=None): + @overload + def get(self, name: str) -> NameLUTEntry | None: ... + @overload + def get(self, name: str, default: NameLUTEntry | _T = ...) -> NameLUTEntry | _T: ... + def get(self, name: str, default: NameLUTEntry | _T | None = None) -> NameLUTEntry | _T | None: """ Returns NameLUTEntry(cu_ofs, die_ofs) for the provided symbol name or None if the symbol does not exist in the corresponding section. @@ -142,7 +155,7 @@ def get(self, name, default=None): self._entries, self._cu_headers = self._get_entries() return self._entries.get(name, default) - def get_cu_headers(self): + def get_cu_headers(self) -> list[Container]: """ Returns all CU headers. Mainly required for readelf. """ @@ -151,15 +164,15 @@ def get_cu_headers(self): return self._cu_headers - def _get_entries(self): + def _get_entries(self) -> tuple[dict[str, NameLUTEntry], list[Container]]: """ Parse the (name, cu_ofs, die_ofs) information from this section and store as a dictionary. """ self._stream.seek(0) - entries = {} - cu_headers = [] + entries: dict[str, NameLUTEntry] = {} + cu_headers: list[Container] = [] offset = 0 # According to 6.1.1. of DWARFv4, each set of names is terminated by # an offset field containing zero (and no following string). Because diff --git a/elftools/dwarf/ranges.py b/elftools/dwarf/ranges.py index d4063ac7..c4bbce32 100644 --- a/elftools/dwarf/ranges.py +++ b/elftools/dwarf/ranges.py @@ -6,13 +6,23 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + import os -from typing import NamedTuple +from typing import IO, TYPE_CHECKING, NamedTuple, NoReturn from ..common.utils import struct_parse from ..common.exceptions import DWARFError from .dwarf_util import _iter_CUs_in_section +if TYPE_CHECKING: + from collections.abc import Callable, Iterator + + from ..construct.lib.container import Container + from .compileunit import CompileUnit + from .dwarfinfo import DWARFInfo + from .structs import DWARFStructs + class RangeEntry(NamedTuple): entry_offset: int @@ -28,12 +38,12 @@ class BaseAddressEntry(NamedTuple): # If we ever see a list with a base entry at the end, there will be an error that entry_length is not a field. -def _translate_startx_length(e, cu): +def _translate_startx_length(e: Container, cu: CompileUnit) -> RangeEntry: start_offset = cu.dwarfinfo.get_addr(cu, e.start_index) return RangeEntry(e.entry_offset, e.entry_length, start_offset, start_offset + e.length, True) # Maps parsed entry types to RangeEntry/BaseAddressEntry objects -entry_translate = { +entry_translate: dict[str, Callable[[Container, CompileUnit], RangeEntry | BaseAddressEntry]] = { 'DW_RLE_base_address' : lambda e, cu: BaseAddressEntry(e.entry_offset, e.address), 'DW_RLE_offset_pair' : lambda e, cu: RangeEntry(e.entry_offset, e.entry_length, e.start_offset, e.end_offset, False), 'DW_RLE_start_end' : lambda e, cu: RangeEntry(e.entry_offset, e.entry_length, e.start_address, e.end_address, True), @@ -48,11 +58,11 @@ class RangeListsPair: it holds a RangeLists object for both and forwards API calls to the right one based on the CU version. """ - def __init__(self, streamv4, streamv5, structs, dwarfinfo=None): + def __init__(self, streamv4: IO[bytes], streamv5: IO[bytes], structs: DWARFStructs, dwarfinfo: DWARFInfo | None = None) -> None: self._ranges = RangeLists(streamv4, structs, 4, dwarfinfo) self._rnglists = RangeLists(streamv5, structs, 5, dwarfinfo) - def get_range_list_at_offset(self, offset, cu=None): + def get_range_list_at_offset(self, offset: int, cu: CompileUnit | None = None) -> list[RangeEntry | BaseAddressEntry]: """Forwards the call to either v4 section or v5 one, depending on DWARF version in the CU. """ @@ -61,18 +71,18 @@ def get_range_list_at_offset(self, offset, cu=None): section = self._rnglists if cu.header.version >= 5 else self._ranges return section.get_range_list_at_offset(offset, cu) - def get_range_list_at_offset_ex(self, offset): + def get_range_list_at_offset_ex(self, offset: int) -> Container: """Gets an untranslated v5 rangelist from the v5 section. """ return self._rnglists.get_range_list_at_offset_ex(offset) - def iter_range_lists(self): + def iter_range_lists(self) -> NoReturn: """Tricky proposition, since the structure of ranges and rnglists is not identical. A realistic readelf implementation needs to be aware of both. """ raise DWARFError("Iterating through two sections is not supported") - def iter_CUs(self): + def iter_CUs(self) -> Iterator[CompileUnit]: """See RangeLists.iter_CUs() CU structure is only present in DWARFv5 rnglists sections. A well written @@ -80,7 +90,7 @@ def iter_CUs(self): """ return self._rnglists.iter_CUs() - def iter_CU_range_lists_ex(self, cu): + def iter_CU_range_lists_ex(self, cu: Container) -> Iterator[CompileUnit]: """See RangeLists.iter_CU_range_lists_ex() CU structure is only present in DWARFv5 rnglists sections. A well written @@ -88,7 +98,7 @@ def iter_CU_range_lists_ex(self, cu): """ return self._rnglists.iter_CU_range_lists_ex(cu) - def translate_v5_entry(self, entry, cu): + def translate_v5_entry(self, entry: Container, cu: CompileUnit) -> RangeEntry | BaseAddressEntry: """Forwards a V5 entry translation request to the V5 section """ return self._rnglists.translate_v5_entry(entry, cu) @@ -105,14 +115,14 @@ class RangeLists: The dwarfinfo is needed for enumeration, because enumeration requires scanning the DIEs, because ranges may overlap, even on DWARF<=4 """ - def __init__(self, stream, structs, version, dwarfinfo): + def __init__(self, stream: IO[bytes], structs: DWARFStructs, version: int, dwarfinfo: DWARFInfo | None) -> None: self.stream = stream self.structs = structs self._max_addr = 2 ** (self.structs.address_size * 8) - 1 self.version = version self._dwarfinfo = dwarfinfo - def get_range_list_at_offset(self, offset, cu=None): + def get_range_list_at_offset(self, offset: int, cu: CompileUnit | None = None) -> list[RangeEntry | BaseAddressEntry]: """ Get a range list at the given offset in the section. The cu argument is necessary if the ranges section is a @@ -122,13 +132,13 @@ def get_range_list_at_offset(self, offset, cu=None): self.stream.seek(offset, os.SEEK_SET) return self._parse_range_list_from_stream(cu) - def get_range_list_at_offset_ex(self, offset): + def get_range_list_at_offset_ex(self, offset: int) -> Container: """Get a DWARF v5 range list, addresses and offsets unresolved, at the given offset in the section """ return struct_parse(self.structs.Dwarf_rnglists_entries, self.stream, offset) - def iter_range_lists(self): + def iter_range_lists(self) -> Iterator[list[RangeEntry | BaseAddressEntry]]: """ Yields all range lists found in the section according to readelf rules. Scans the DIEs for rangelist offsets, then pulls those. Returned rangelists are always translated into lists of BaseAddressEntry/RangeEntry objects. @@ -154,7 +164,7 @@ def iter_range_lists(self): for offset in all_offsets: yield self.get_range_list_at_offset(offset, cu_map[offset]) - def iter_CUs(self): + def iter_CUs(self) -> Iterator[CompileUnit]: """For DWARF5 returns an array of objects, where each one has an array of offsets """ if self.version < 5: @@ -163,7 +173,7 @@ def iter_CUs(self): structs = next(self._dwarfinfo.iter_CUs()).structs # Just pick one return _iter_CUs_in_section(self.stream, structs, structs.Dwarf_rnglists_CU_header) - def iter_CU_range_lists_ex(self, cu): + def iter_CU_range_lists_ex(self, cu: Container) -> Iterator[CompileUnit]: """For DWARF5, returns untranslated rangelists in the CU, where CU comes from iter_CUs above """ stream = self.stream @@ -171,7 +181,7 @@ def iter_CU_range_lists_ex(self, cu): while stream.tell() < cu.offset_after_length + cu.unit_length: yield struct_parse(self.structs.Dwarf_rnglists_entries, stream) - def translate_v5_entry(self, entry, cu): + def translate_v5_entry(self, entry: Container, cu: CompileUnit) -> RangeEntry | BaseAddressEntry: """Translates entries in a DWARFv5 rangelist from raw parsed format to a list of BaseAddressEntry/RangeEntry, using the CU """ @@ -179,13 +189,13 @@ def translate_v5_entry(self, entry, cu): #------ PRIVATE ------# - def _parse_range_list_from_stream(self, cu): + def _parse_range_list_from_stream(self, cu: CompileUnit | None) -> list[RangeEntry | BaseAddressEntry]: if self.version >= 5: return list(entry_translate[entry.entry_type](entry, cu) for entry in struct_parse(self.structs.Dwarf_rnglists_entries, self.stream)) else: - lst = [] + lst: list[RangeEntry | BaseAddressEntry] = [] while True: entry_offset = self.stream.tell() begin_offset = struct_parse( diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index 1212fc3b..953d7773 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -7,6 +7,10 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + +from typing import IO, TYPE_CHECKING, Any, Callable, ClassVar + from ..construct import ( UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64, SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64, @@ -17,6 +21,12 @@ StreamOffset, ULInt24, UBInt24) from .enums import * +if TYPE_CHECKING: + from typing import Self # 3.11+ + + from ..construct.adapters import LengthValueAdapter + from ..construct.lib.container import Container + class DWARFStructs: """ Exposes Construct structs suitable for parsing information from DWARF @@ -81,9 +91,9 @@ class DWARFStructs: # Cache for structs instances based on creation parameters. Structs # initialization is expensive and we don't won't to repeat it # unnecessarily. - _structs_cache = {} + _structs_cache: ClassVar[dict[tuple[bool, int, int, int], Self]] = {} - def __new__(cls, little_endian, dwarf_format, address_size, dwarf_version=2): + def __new__(cls, little_endian: bool, dwarf_format: int, address_size: int, dwarf_version: int = 2) -> Self: """ dwarf_version: Numeric DWARF version @@ -113,16 +123,16 @@ def __new__(cls, little_endian, dwarf_format, address_size, dwarf_version=2): cls._structs_cache[key] = self return self - def initial_length_field_size(self): + def initial_length_field_size(self) -> int: """ Size of an initial length field. """ return 4 if self.dwarf_format == 32 else 12 - def _create_structs(self): + def _create_structs(self) -> None: if self.little_endian: self.Dwarf_uint8 = ULInt8 self.Dwarf_uint16 = ULInt16 - self.Dwarf_uint24 = ULInt24 + self.Dwarf_uint24: type[ULInt24 | UBInt24] = ULInt24 self.Dwarf_uint32 = ULInt32 self.Dwarf_uint64 = ULInt64 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64 @@ -176,8 +186,8 @@ def _create_structs(self): self._create_debugsup() self._create_gnu_debugaltlink() - def _create_initial_length(self): - def _InitialLength(name): + def _create_initial_length(self) -> None: + def _InitialLength(name: str) -> _InitialLengthAdapter: # Adapts a Struct that parses forward a full initial length field. # Only if the first word is the continuation value, the second # word is parsed from the stream. @@ -189,13 +199,13 @@ def _InitialLength(name): elsevalue=None))) self.Dwarf_initial_length = _InitialLength - def _create_leb128(self): + def _create_leb128(self) -> None: self.Dwarf_uleb128 = ULEB128 self.Dwarf_sleb128 = SLEB128 self.the_Dwarf_uleb128 = self.Dwarf_uleb128('') self.the_Dwarf_sleb128 = self.Dwarf_sleb128('') - def _create_cu_header(self): + def _create_cu_header(self) -> None: dwarfv4_CU_header = Struct('', self.Dwarf_offset('debug_abbrev_offset'), self.Dwarf_uint8('address_size') @@ -238,7 +248,7 @@ def _create_cu_header(self): Embed(dwarfv4_CU_header), )) - def _create_tu_header(self): + def _create_tu_header(self) -> None: self.Dwarf_TU_header = Struct('Dwarf_TU_header', self.Dwarf_initial_length('unit_length'), self.Dwarf_uint16('version'), @@ -247,7 +257,7 @@ def _create_tu_header(self): self.Dwarf_uint64('signature'), self.Dwarf_offset('type_offset')) - def _create_abbrev_declaration(self): + def _create_abbrev_declaration(self) -> None: self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry', Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG), Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN), @@ -260,19 +270,19 @@ def _create_abbrev_declaration(self): If(lambda ctx: ctx['form'] == 'DW_FORM_implicit_const', self.Dwarf_sleb128('value'))))) - def _create_debugsup(self): + def _create_debugsup(self) -> None: # We don't care about checksums, for now. self.Dwarf_debugsup = Struct('Elf_debugsup', self.Dwarf_int16('version'), self.Dwarf_uint8('is_supplementary'), CString('sup_filename')) - def _create_gnu_debugaltlink(self): + def _create_gnu_debugaltlink(self) -> None: self.Dwarf_debugaltlink = Struct('Elf_debugaltlink', CString("sup_filename"), String("sup_checksum", length=20)) - def _create_dw_form(self): + def _create_dw_form(self) -> None: self.Dwarf_dw_form = dict( DW_FORM_addr=self.the_Dwarf_target_addr, DW_FORM_addrx=self.the_Dwarf_uleb128, @@ -336,7 +346,7 @@ def _create_dw_form(self): DW_FORM_rnglistx=self.the_Dwarf_uleb128 ) - def _create_aranges_header(self): + def _create_aranges_header(self) -> None: self.Dwarf_aranges_header = Struct("Dwarf_aranges_header", self.Dwarf_initial_length('unit_length'), self.Dwarf_uint16('version'), @@ -345,7 +355,7 @@ def _create_aranges_header(self): self.Dwarf_uint8('segment_size') ) - def _create_nameLUT_header(self): + def _create_nameLUT_header(self) -> None: self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header", self.Dwarf_initial_length('unit_length'), self.Dwarf_uint16('version'), @@ -353,7 +363,7 @@ def _create_nameLUT_header(self): self.Dwarf_length('debug_info_length') ) - def _create_string_offsets_table_header(self): + def _create_string_offsets_table_header(self) -> None: self.Dwarf_string_offsets_table_header = Struct( "Dwarf_string_offets_table_header", self.Dwarf_initial_length('unit_length'), @@ -361,7 +371,7 @@ def _create_string_offsets_table_header(self): self.Dwarf_uint16('padding'), ) - def _create_address_table_header(self): + def _create_address_table_header(self) -> None: self.Dwarf_address_table_header = Struct("Dwarf_address_table_header", self.Dwarf_initial_length('unit_length'), self.Dwarf_uint16('version'), @@ -369,7 +379,7 @@ def _create_address_table_header(self): self.Dwarf_uint8('segment_selector_size'), ) - def _create_lineprog_header(self): + def _create_lineprog_header(self) -> None: # A file entry is terminated by a NULL byte, so we don't want to parse # past it. Therefore an If is used. self.Dwarf_lineprog_file_entry = Struct('file_entry', @@ -385,12 +395,12 @@ class FormattedEntry(Construct): # similar to deprecared Dynamic. # Strings are resolved later, since it potentially requires # looking at another section. - def __init__(self, name, structs, format_field): + def __init__(self, name: str, structs: DWARFStructs, format_field: str) -> None: Construct.__init__(self, name) self.structs = structs self.format_field = format_field - def _parse(self, stream, context): + def _parse(self, stream: IO[bytes], context: Container) -> Any: # Somewhat tricky technique here, explicitly writing back to the context if self.format_field + "_parser" in context: parser = context[self.format_field + "_parser"] @@ -455,7 +465,7 @@ def _parse(self, stream, context): self.Dwarf_lineprog_file_entry)) # array name is file_entry ) - def _create_callframe_entry_headers(self): + def _create_callframe_entry_headers(self) -> None: self.Dwarf_CIE_header = Struct('Dwarf_CIE_header', self.Dwarf_initial_length('length'), self.Dwarf_offset('CIE_id'), @@ -481,14 +491,14 @@ def _create_callframe_entry_headers(self): self.Dwarf_target_addr('initial_location'), self.Dwarf_target_addr('address_range')) - def _make_block_struct(self, length_field): + def _make_block_struct(self, length_field: Callable[[str], Construct]) -> LengthValueAdapter: """ Create a struct for DW_FORM_block """ return PrefixedArray( subcon=self.Dwarf_uint8('elem'), length_field=length_field('')) - def _create_loclists_parsers(self): + def _create_loclists_parsers(self) -> None: """ Create a struct for debug_loclists CU header, DWARFv5, 7,29 """ self.Dwarf_loclists_CU_header = Struct('Dwarf_loclists_CU_header', @@ -527,7 +537,7 @@ def _create_loclists_parsers(self): self.Dwarf_locview_pair = Struct('locview_pair', StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end')) - def _create_rnglists_parsers(self): + def _create_rnglists_parsers(self) -> None: self.Dwarf_rnglists_CU_header = Struct('Dwarf_rnglists_CU_header', StreamOffset('cu_offset'), self.Dwarf_initial_length('unit_length'), @@ -563,7 +573,7 @@ class _InitialLengthAdapter(Adapter): """ A standard Construct adapter that expects a sub-construct as a struct with one or two values (first, second). """ - def _decode(self, obj, context): + def _decode(self, obj: Container, context: Container) -> int: if obj.first < 0xFFFFFF00: context['is64'] = False return obj.first diff --git a/elftools/dwarf/typeunit.py b/elftools/dwarf/typeunit.py index 3824c165..ffebae55 100644 --- a/elftools/dwarf/typeunit.py +++ b/elftools/dwarf/typeunit.py @@ -6,10 +6,22 @@ # Dinkar Khandalekar (contact@dinkar.dev) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + from bisect import bisect_right +from typing import TYPE_CHECKING, Any + from .die import DIE from ..common.utils import dwarf_assert +if TYPE_CHECKING: + from collections.abc import Iterator + + from ..construct.lib.container import Container + from .abbrevtable import AbbrevTable + from .dwarfinfo import DWARFInfo + from .structs import DWARFStructs + class TypeUnit: """ A DWARF type unit (TU). @@ -34,7 +46,7 @@ class TypeUnit: To get the top-level DIE describing the type unit, call the get_top_DIE method. """ - def __init__(self, header, dwarfinfo, structs, tu_offset, tu_die_offset): + def __init__(self, header: Container, dwarfinfo: DWARFInfo, structs: DWARFStructs, tu_offset: int, tu_die_offset: int) -> None: """ header: TU header for this type unit @@ -58,37 +70,37 @@ def __init__(self, header, dwarfinfo, structs, tu_offset, tu_die_offset): # The abbreviation table for this TU. Filled lazily when DIEs are # requested. - self._abbrev_table = None + self._abbrev_table: AbbrevTable | None = None # A list of DIEs belonging to this TU. # This list is lazily constructed as DIEs are iterated over. - self._dielist = [] + self._dielist: list[DIE] = [] # A list of file offsets, corresponding (by index) to the DIEs # in `self._dielist`. This list exists separately from # `self._dielist` to make it binary searchable, enabling the # DIE population strategy used in `iter_DIE_children`. # Like `self._dielist`, this list is lazily constructed # as DIEs are iterated over. - self._diemap = [] + self._diemap: list[int] = [] @property - def cu_offset(self): + def cu_offset(self) -> int: """Simulates the cu_offset attribute required by the DIE by returning the tu_offset instead """ return self.tu_offset @property - def cu_die_offset(self): + def cu_die_offset(self) -> int: """Simulates the cu_die_offset attribute required by the DIE by returning the tu_offset instead """ return self.tu_die_offset - def dwarf_format(self): + def dwarf_format(self) -> int: """ Get the DWARF format (32 or 64) for this TU """ return self.structs.dwarf_format - def get_abbrev_table(self): + def get_abbrev_table(self) -> AbbrevTable: """ Get the abbreviation table (AbbrevTable object) for this TU """ if self._abbrev_table is None: @@ -96,7 +108,7 @@ def get_abbrev_table(self): self['debug_abbrev_offset']) return self._abbrev_table - def get_top_DIE(self): + def get_top_DIE(self) -> DIE: """ Get the top DIE (which is DW_TAG_type_unit entry) of this TU """ @@ -117,23 +129,23 @@ def get_top_DIE(self): return top - def has_top_DIE(self): + def has_top_DIE(self) -> bool: """ Returns whether the top DIE in this TU has already been parsed and cached. No parsing on demand! """ return bool(self._diemap) @property - def size(self): + def size(self) -> int: return self['unit_length'] + self.structs.initial_length_field_size() - def iter_DIEs(self): + def iter_DIEs(self) -> Iterator[DIE]: """ Iterate over all the DIEs in the TU, in order of their appearance. Note that null DIEs will also be returned. """ return self._iter_DIE_subtree(self.get_top_DIE()) - def iter_DIE_children(self, die): + def iter_DIE_children(self, die: DIE) -> Iterator[DIE]: """ Given a DIE, yields either its children, without null DIE list terminator, or nothing, if that DIE has no children. @@ -185,7 +197,7 @@ def iter_DIE_children(self, die): cur_offset = child._terminator.offset + child._terminator.size - def get_DIE_from_refaddr(self, refaddr): + def get_DIE_from_refaddr(self, refaddr: int) -> DIE: """ Obtain a DIE contained in this CU from a reference. refaddr: The offset into the .debug_info section, which must be @@ -203,12 +215,12 @@ def get_DIE_from_refaddr(self, refaddr): #------ PRIVATE ------# - def __getitem__(self, name): + def __getitem__(self, name: str) -> Any: """ Implement dict-like access to header entries """ return self.header[name] - def _iter_DIE_subtree(self, die): + def _iter_DIE_subtree(self, die: DIE) -> Iterator[DIE]: """ Given a DIE, this yields it with its subtree including null DIEs (child list terminators). """ @@ -222,7 +234,7 @@ def _iter_DIE_subtree(self, die): yield from die.cu._iter_DIE_subtree(c) yield die._terminator - def _get_cached_DIE(self, offset): + def _get_cached_DIE(self, offset: int) -> DIE: """ Given a DIE offset, look it up in the cache. If not present, parse the DIE and insert it into the cache. diff --git a/elftools/ehabi/constants.py b/elftools/ehabi/constants.py index 2921b97e..3fe208cb 100644 --- a/elftools/ehabi/constants.py +++ b/elftools/ehabi/constants.py @@ -1 +1 @@ -EHABI_INDEX_ENTRY_SIZE = 8 +EHABI_INDEX_ENTRY_SIZE: int = 8 diff --git a/elftools/ehabi/decoder.py b/elftools/ehabi/decoder.py index 0ea224cf..925de1f2 100644 --- a/elftools/ehabi/decoder.py +++ b/elftools/ehabi/decoder.py @@ -8,7 +8,10 @@ # ------------------------------------------------------------------------------- from __future__ import annotations -from typing import Callable, NamedTuple +from typing import TYPE_CHECKING, NamedTuple + +if TYPE_CHECKING: + from collections.abc import Callable class EHABIBytecodeDecoder: @@ -30,13 +33,13 @@ class EHABIBytecodeDecoder: """ - def __init__(self, bytecode_array): + def __init__(self, bytecode_array: list[int]) -> None: self._bytecode_array = bytecode_array - self._index = None - self.mnemonic_array = None + self._index: int = 0 + self.mnemonic_array: list[MnemonicItem] | None = None self._decode() - def _decode(self): + def _decode(self) -> None: """ Decode bytecode array, put result into mnemonic_array. """ self._index = 0 @@ -51,14 +54,14 @@ def _decode(self): MnemonicItem(self._bytecode_array[start_idx: end_idx], mnemonic)) break - def _decode_00xxxxxx(self): + def _decode_00xxxxxx(self) -> str: # SW.startLine() << format("0x%02X ; vsp = vsp + %u\n", Opcode, # ((Opcode & 0x3f) << 2) + 4); opcode = self._bytecode_array[self._index] self._index += 1 return 'vsp = vsp + %u' % (((opcode & 0x3f) << 2) + 4) - def _decode_01xxxxxx(self): + def _decode_01xxxxxx(self) -> str: # SW.startLine() << format("0x%02X ; vsp = vsp - %u\n", Opcode, # ((Opcode & 0x3f) << 2) + 4); opcode = self._bytecode_array[self._index] @@ -68,18 +71,18 @@ def _decode_01xxxxxx(self): gpr_register_names = ("r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "fp", "ip", "sp", "lr", "pc") - def _calculate_range(self, start, count): + def _calculate_range(self, start: int, count: int) -> int: return ((1 << (count + 1)) - 1) << start - def _printGPR(self, gpr_mask): + def _printGPR(self, gpr_mask: int) -> str: hits = [self.gpr_register_names[i] for i in range(32) if gpr_mask & (1 << i) != 0] return '{%s}' % ', '.join(hits) - def _print_registers(self, vfp_mask, prefix): + def _print_registers(self, vfp_mask: int, prefix: str) -> str: hits = [prefix + str(i) for i in range(32) if vfp_mask & (1 << i) != 0] return '{%s}' % ', '.join(hits) - def _decode_1000iiii_iiiiiiii(self): + def _decode_1000iiii_iiiiiiii(self) -> str: op0 = self._bytecode_array[self._index] self._index += 1 op1 = self._bytecode_array[self._index] @@ -96,40 +99,40 @@ def _decode_1000iiii_iiiiiiii(self): else: return 'pop %s' % self._printGPR(gpr_mask) - def _decode_10011101(self): + def _decode_10011101(self) -> str: self._index += 1 return 'reserved (ARM MOVrr)' - def _decode_10011111(self): + def _decode_10011111(self) -> str: self._index += 1 return 'reserved (WiMMX MOVrr)' - def _decode_1001nnnn(self): + def _decode_1001nnnn(self) -> str: # SW.startLine() << format("0x%02X ; vsp = r%u\n", Opcode, (Opcode & 0x0f)); opcode = self._bytecode_array[self._index] self._index += 1 return 'vsp = r%u' % (opcode & 0x0f) - def _decode_10100nnn(self): + def _decode_10100nnn(self) -> str: # SW.startLine() << format("0x%02X ; pop ", Opcode); # PrintGPR((((1 << ((Opcode & 0x7) + 1)) - 1) << 4)); opcode = self._bytecode_array[self._index] self._index += 1 return 'pop %s' % self._printGPR(self._calculate_range(4, opcode & 0x07)) - def _decode_10101nnn(self): + def _decode_10101nnn(self) -> str: # SW.startLine() << format("0x%02X ; pop ", Opcode); # PrintGPR((((1 << ((Opcode & 0x7) + 1)) - 1) << 4) | (1 << 14)); opcode = self._bytecode_array[self._index] self._index += 1 return 'pop %s' % self._printGPR(self._calculate_range(4, opcode & 0x07) | (1 << 14)) - def _decode_10110000(self): + def _decode_10110000(self) -> str: # SW.startLine() << format("0x%02X ; finish\n", Opcode); self._index += 1 return 'finish' - def _decode_10110001_0000iiii(self): + def _decode_10110001_0000iiii(self) -> str: # SW.startLine() # << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1, # ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop "); @@ -143,7 +146,7 @@ def _decode_10110001_0000iiii(self): else: return 'pop %s' % self._printGPR(op1 & 0x0f) - def _decode_10110010_uleb128(self): + def _decode_10110010_uleb128(self) -> str: # SmallVector ULEB; # do { ULEB.push_back(Opcodes[OI ^ 3]); } while (Opcodes[OI++ ^ 3] & 0x80); # uint64_t Value = 0; @@ -161,21 +164,21 @@ def _decode_10110010_uleb128(self): value = (value << 7) + (b & 0x7F) return 'vsp = vsp + %u' % (0x204 + (value << 2)) - def _decode_10110011_sssscccc(self): + def _decode_10110011_sssscccc(self) -> str: # these two decoders are equal return self._decode_11001001_sssscccc() - def _decode_101101nn(self): + def _decode_101101nn(self) -> str: return self._spare() - def _decode_10111nnn(self): + def _decode_10111nnn(self) -> str: # SW.startLine() << format("0x%02X ; pop ", Opcode); # PrintRegisters((((1 << ((Opcode & 0x07) + 1)) - 1) << 8), "d"); opcode = self._bytecode_array[self._index] self._index += 1 return 'pop %s' % self._print_registers(self._calculate_range(8, opcode & 0x07), "d") - def _decode_11000110_sssscccc(self): + def _decode_11000110_sssscccc(self) -> str: # SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1); # uint8_t Start = ((Opcode1 & 0xf0) >> 4); # uint8_t Count = ((Opcode1 & 0x0f) >> 0); @@ -187,7 +190,7 @@ def _decode_11000110_sssscccc(self): count = ((op1 & 0x0f) >> 0) return 'pop %s' % self._print_registers(self._calculate_range(start, count), "wR") - def _decode_11000111_0000iiii(self): + def _decode_11000111_0000iiii(self) -> str: # SW.startLine() # << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1, # ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop "); @@ -201,7 +204,7 @@ def _decode_11000111_0000iiii(self): else: return 'pop %s' % self._print_registers(op1 & 0x0f, "wCGR") - def _decode_11001000_sssscccc(self): + def _decode_11001000_sssscccc(self) -> str: # SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1); # uint8_t Start = 16 + ((Opcode1 & 0xf0) >> 4); # uint8_t Count = ((Opcode1 & 0x0f) >> 0); @@ -213,7 +216,7 @@ def _decode_11001000_sssscccc(self): count = ((op1 & 0x0f) >> 0) return 'pop %s' % self._print_registers(self._calculate_range(start, count), "d") - def _decode_11001001_sssscccc(self): + def _decode_11001001_sssscccc(self) -> str: # SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1); # uint8_t Start = ((Opcode1 & 0xf0) >> 4); # uint8_t Count = ((Opcode1 & 0x0f) >> 0); @@ -225,24 +228,24 @@ def _decode_11001001_sssscccc(self): count = ((op1 & 0x0f) >> 0) return 'pop %s' % self._print_registers(self._calculate_range(start, count), "d") - def _decode_11001yyy(self): + def _decode_11001yyy(self) -> str: return self._spare() - def _decode_11000nnn(self): + def _decode_11000nnn(self) -> str: # SW.startLine() << format("0x%02X ; pop ", Opcode); # PrintRegisters((((1 << ((Opcode & 0x07) + 1)) - 1) << 10), "wR"); opcode = self._bytecode_array[self._index] self._index += 1 return 'pop %s' % self._print_registers(self._calculate_range(10, opcode & 0x07), "wR") - def _decode_11010nnn(self): + def _decode_11010nnn(self) -> str: # these two decoders are equal return self._decode_10111nnn() - def _decode_11xxxyyy(self): + def _decode_11xxxyyy(self) -> str: return self._spare() - def _spare(self): + def _spare(self) -> str: self._index += 1 return 'spare' @@ -281,9 +284,9 @@ class MnemonicItem: """ Single mnemonic item. """ - def __init__(self, bytecode, mnemonic): + def __init__(self, bytecode: list[int], mnemonic: str) -> None: self.bytecode = bytecode self.mnemonic = mnemonic - def __repr__(self): + def __repr__(self) -> str: return '%s ; %s' % (' '.join(['0x%02x' % x for x in self.bytecode]), self.mnemonic) diff --git a/elftools/ehabi/ehabiinfo.py b/elftools/ehabi/ehabiinfo.py index e75a332d..4f5f73e7 100644 --- a/elftools/ehabi/ehabiinfo.py +++ b/elftools/ehabi/ehabiinfo.py @@ -6,6 +6,9 @@ # LeadroyaL (leadroyal@qq.com) # This code is in the public domain # ------------------------------------------------------------------------------- +from __future__ import annotations + +from typing import TYPE_CHECKING from ..common.utils import struct_parse @@ -13,6 +16,10 @@ from .constants import EHABI_INDEX_ENTRY_SIZE from .structs import EHABIStructs +if TYPE_CHECKING: + from ..elf.sections import Section + from .decoder import MnemonicItem + class EHABIInfo: """ ARM exception handler abi information class. @@ -26,25 +33,25 @@ class EHABIInfo: bool, endianness of elf file. """ - def __init__(self, arm_idx_section, little_endian): + def __init__(self, arm_idx_section: Section, little_endian: bool) -> None: self._arm_idx_section = arm_idx_section self._struct = EHABIStructs(little_endian) - self._num_entry = None + self._num_entry: int | None = None - def section_name(self): + def section_name(self) -> str: return self._arm_idx_section.name - def section_offset(self): + def section_offset(self) -> int: return self._arm_idx_section['sh_offset'] - def num_entry(self): + def num_entry(self) -> int: """ Number of exception handler entry in the section. """ if self._num_entry is None: self._num_entry = self._arm_idx_section['sh_size'] // EHABI_INDEX_ENTRY_SIZE return self._num_entry - def get_entry(self, n): + def get_entry(self, n: int) -> EHABIEntry: """ Get the exception handler entry at index #n. (EHABIEntry object or a subclass) """ if n >= self.num_entry(): @@ -134,12 +141,12 @@ class EHABIEntry: """ def __init__(self, - function_offset, - personality, - bytecode_array, - eh_table_offset=None, - unwindable=True, - corrupt=False): + function_offset: int | None, + personality: int | None, + bytecode_array: list[int] | None, + eh_table_offset: int | None = None, + unwindable: bool = True, + corrupt: bool = False) -> None: self.function_offset = function_offset self.personality = personality self.bytecode_array = bytecode_array @@ -147,13 +154,13 @@ def __init__(self, self.unwindable = unwindable self.corrupt = corrupt - def mnmemonic_array(self): + def mnmemonic_array(self) -> list[MnemonicItem] | None: if self.bytecode_array: return EHABIBytecodeDecoder(self.bytecode_array).mnemonic_array else: return None - def __repr__(self): + def __repr__(self) -> str: return "" % ( self.function_offset, self.personality, @@ -165,12 +172,12 @@ class CorruptEHABIEntry(EHABIEntry): """ This entry is corrupt. Attribute #corrupt will be True. """ - def __init__(self, reason): + def __init__(self, reason: str) -> None: super().__init__(function_offset=None, personality=None, bytecode_array=None, corrupt=True) self.reason = reason - def __repr__(self): + def __repr__(self) -> str: return "" % self.reason @@ -178,11 +185,11 @@ class CannotUnwindEHABIEntry(EHABIEntry): """ This function cannot be unwind. Attribute #unwindable will be False. """ - def __init__(self, function_offset): + def __init__(self, function_offset: int) -> None: super().__init__(function_offset, personality=None, bytecode_array=None, unwindable=False) - def __repr__(self): + def __repr__(self) -> str: return "" % self.function_offset @@ -190,14 +197,14 @@ class GenericEHABIEntry(EHABIEntry): """ This entry is generic model rather than ARM compact model.Attribute #bytecode_array will be None. """ - def __init__(self, function_offset, personality): + def __init__(self, function_offset: int, personality: int) -> None: super().__init__(function_offset, personality, bytecode_array=None) - def __repr__(self): + def __repr__(self) -> str: return "" % (self.function_offset, self.personality) -def arm_expand_prel31(address, place): +def arm_expand_prel31(address: int, place: int) -> int: """ address: uint32 place: uint32 diff --git a/elftools/ehabi/structs.py b/elftools/ehabi/structs.py index 0b307895..81345759 100644 --- a/elftools/ehabi/structs.py +++ b/elftools/ehabi/structs.py @@ -21,11 +21,11 @@ class EHABIStructs: Struct of item in section .ARM.extab. """ - def __init__(self, little_endian): + def __init__(self, little_endian: bool) -> None: self._little_endian = little_endian self._create_structs() - def _create_structs(self): + def _create_structs(self) -> None: if self._little_endian: self.EHABI_uint32 = ULInt32 else: @@ -33,14 +33,14 @@ def _create_structs(self): self._create_exception_handler_index() self._create_exception_handler_table() - def _create_exception_handler_index(self): + def _create_exception_handler_index(self) -> None: self.EH_index_struct = Struct( 'EH_index', self.EHABI_uint32('word0'), self.EHABI_uint32('word1') ) - def _create_exception_handler_table(self): + def _create_exception_handler_table(self) -> None: self.EH_table_struct = Struct( 'EH_table', self.EHABI_uint32('word0'), diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py index 69422c17..feb00025 100644 --- a/elftools/elf/descriptions.py +++ b/elftools/elf/descriptions.py @@ -6,6 +6,10 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Final, TypeVar + from .enums import ( ENUM_D_TAG, ENUM_E_VERSION, ENUM_P_TYPE_BASE, ENUM_SH_TYPE_BASE, ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, @@ -15,29 +19,40 @@ ENUM_DT_FLAGS_1, ENUM_RELOC_TYPE_PPC) from .constants import ( P_FLAGS, RH_FLAGS, SH_FLAGS, SUNW_SYMINFO_FLAGS, VER_FLAGS) +from .dynamic import DynamicSection from ..common.utils import bytes2hex +if TYPE_CHECKING: + from collections.abc import Container as TContainer + from collections.abc import Iterable, Mapping + + from ..construct.lib.container import Container + from .elffile import ELFFile + + _K = TypeVar("_K") + _V = TypeVar("_V") + -def describe_ei_class(x): +def describe_ei_class(x: str) -> str: return _DESCR_EI_CLASS.get(x, _unknown) -def describe_ei_data(x): +def describe_ei_data(x: str) -> str: return _DESCR_EI_DATA.get(x, _unknown) -def describe_ei_version(x): +def describe_ei_version(x: str) -> str: s = str(ENUM_E_VERSION.get(x, f"{x} ")) if x == 'EV_CURRENT': s += ' (current)' return s -def describe_ei_osabi(x): +def describe_ei_osabi(x: str) -> str: return _DESCR_EI_OSABI.get(x, _unknown) -def describe_e_type(x, elffile=None): +def describe_e_type(x: str, elffile: ELFFile | None = None) -> str: if elffile is not None and x == 'ET_DYN': # Detect whether this is a normal SO or a PIE executable dynamic = elffile.get_section_by_name('.dynamic') @@ -48,15 +63,15 @@ def describe_e_type(x, elffile=None): return _DESCR_E_TYPE.get(x, _unknown) -def describe_e_machine(x): +def describe_e_machine(x: str) -> str: return _DESCR_E_MACHINE.get(x, _unknown) -def describe_e_version_numeric(x): +def describe_e_version_numeric(x: str) -> str: return f"{ENUM_E_VERSION.get(x, x):#x}" -def describe_p_type(x): +def describe_p_type(x: int | str) -> str: if x in _DESCR_P_TYPE: return _DESCR_P_TYPE.get(x) elif x >= ENUM_P_TYPE_BASE['PT_LOOS'] and x <= ENUM_P_TYPE_BASE['PT_HIOS']: @@ -65,14 +80,14 @@ def describe_p_type(x): return _unknown -def describe_p_flags(x): +def describe_p_flags(x: int) -> str: s = '' for flag in (P_FLAGS.PF_R, P_FLAGS.PF_W, P_FLAGS.PF_X): s += _DESCR_P_FLAGS[flag] if (x & flag) else ' ' return s -def describe_rh_flags(x): +def describe_rh_flags(x: int) -> str: return ' '.join( _DESCR_RH_FLAGS[flag] for flag in (RH_FLAGS.RHF_NONE, RH_FLAGS.RHF_QUICKSTART, @@ -88,7 +103,7 @@ def describe_rh_flags(x): if x & flag) -def describe_sh_type(x): +def describe_sh_type(x: int | str) -> str: if x in _DESCR_SH_TYPE: return _DESCR_SH_TYPE.get(x) elif (x >= ENUM_SH_TYPE_BASE['SHT_LOOS'] and @@ -98,7 +113,7 @@ def describe_sh_type(x): return _unknown -def describe_sh_flags(x): +def describe_sh_flags(x: int) -> str: s = '' for flag in ( SH_FLAGS.SHF_WRITE, SH_FLAGS.SHF_ALLOC, SH_FLAGS.SHF_EXECINSTR, @@ -113,34 +128,34 @@ def describe_sh_flags(x): return s -def describe_symbol_type(x): +def describe_symbol_type(x: str) -> str: return _DESCR_ST_INFO_TYPE.get(x, _unknown) -def describe_symbol_bind(x): +def describe_symbol_bind(x: str) -> str: return _DESCR_ST_INFO_BIND.get(x, _unknown) -def describe_symbol_visibility(x): +def describe_symbol_visibility(x: str) -> str: return _DESCR_ST_VISIBILITY.get(x, _unknown) -def describe_symbol_local(x): +def describe_symbol_local(x: int) -> str: return '[: ' + str(1 << x) + ']' -def describe_symbol_other(x): +def describe_symbol_other(x: Container) -> str: vis = describe_symbol_visibility(x['visibility']) if 1 < x['local'] < 7: return vis + ' ' + describe_symbol_local(x['local']) return vis -def describe_symbol_shndx(x): +def describe_symbol_shndx(x: int | str) -> str: return _DESCR_ST_SHNDX.get(x, '%3s' % x) -def describe_reloc_type(x, elffile): +def describe_reloc_type(x: int, elffile: ELFFile) -> str: arch = elffile.get_machine_arch() if arch == 'x86': return _DESCR_RELOC_TYPE_i386.get(x, _unknown) @@ -164,21 +179,21 @@ def describe_reloc_type(x, elffile): return 'unrecognized: %-7x' % (x & 0xFFFFFFFF) -def describe_dyn_tag(x): +def describe_dyn_tag(x: int) -> str: return _DESCR_D_TAG.get(x, _unknown) -def describe_dt_flags(x): +def describe_dt_flags(x: int) -> str: return ' '.join(key[3:] for key, val in sorted(ENUM_DT_FLAGS.items(), key=lambda t: t[1]) if x & val) -def describe_dt_flags_1(x): +def describe_dt_flags_1(x: int) -> str: return ' '.join(key[5:] for key, val in sorted(ENUM_DT_FLAGS_1.items(), key=lambda t: t[1]) if x & val) -def describe_syminfo_flags(x): +def describe_syminfo_flags(x: int) -> str: return ''.join(_DESCR_SYMINFO_FLAGS[flag] for flag in ( SUNW_SYMINFO_FLAGS.SYMINFO_FLG_CAP, SUNW_SYMINFO_FLAGS.SYMINFO_FLG_DIRECT, @@ -192,18 +207,18 @@ def describe_syminfo_flags(x): SUNW_SYMINFO_FLAGS.SYMINFO_FLG_DEFERRED) if x & flag) -def describe_symbol_boundto(x): +def describe_symbol_boundto(x: str) -> str: return _DESCR_SYMINFO_BOUNDTO.get(x, '%3s' % x) -def describe_ver_flags(x): +def describe_ver_flags(x: int) -> str: return ' | '.join(_DESCR_VER_FLAGS[flag] for flag in ( VER_FLAGS.VER_FLG_WEAK, VER_FLAGS.VER_FLG_BASE, VER_FLAGS.VER_FLG_INFO) if x & flag) -def describe_note(x, machine): +def describe_note(x: Container, machine: str) -> str: n_desc = x['n_desc'] desc = '' if x['n_type'] == 'NT_GNU_ABI_TAG': @@ -237,7 +252,7 @@ def describe_note(x, machine): return '%s (%s)%s' % (note_type, note_type_desc, desc) -def describe_attr_tag_arm(tag, val, extra): +def describe_attr_tag_arm(tag: str, val: Any, extra: str | None) -> str: s = _DESCR_ATTR_TAG_ARM.get(tag, '"%s"' % tag) idx = ENUM_ATTR_TAG_ARM[tag] - 1 d_entry = _DESCR_ATTR_VAL_ARM[idx] @@ -263,7 +278,7 @@ def describe_attr_tag_arm(tag, val, extra): else: return s + d_entry[val] -def describe_attr_tag_riscv(tag, val, extra): +def describe_attr_tag_riscv(tag: str, val: Any, extra: str) -> str: idx = ENUM_ATTR_TAG_RISCV[tag] - 1 d_entry = _DESCR_ATTR_VAL_RISCV[idx] @@ -275,14 +290,14 @@ def describe_attr_tag_riscv(tag, val, extra): else: return _DESCR_ATTR_TAG_RISCV[tag] + d_entry[val] -def describe_note_gnu_property_bitmap_and(values, prefix, value): +def describe_note_gnu_property_bitmap_and(values: Iterable[tuple[int, str]], prefix: str, value: int) -> str: descs = [] for mask, desc in values: if value & mask: descs.append(desc) return '%s: %s' % (prefix, ', '.join(descs)) -def describe_note_gnu_properties(properties, machine): +def describe_note_gnu_properties(properties: list[Container], machine: str) -> str: descriptions = [] for prop in properties: t, d, sz = prop.pr_type, prop.pr_data, prop.pr_datasz @@ -337,7 +352,7 @@ def describe_note_gnu_properties(properties, machine): return '\n '.join(descriptions) #------------------------------------------------------------------------------- -_unknown = '' +_unknown: str = '' _DESCR_EI_CLASS = dict( @@ -687,7 +702,7 @@ def describe_note_gnu_properties(properties, machine): (2, 'ZICFISS'), ) -def _reverse_dict(d, low_priority=()): +def _reverse_dict(d: Mapping[_K, _V], low_priority: TContainer[_K] = ()) -> dict[_V, _K]: """ This is a tiny helper function to "reverse" the keys/values of a dictionary provided in the first argument, i.e. {k: v} becomes {v: k}. @@ -696,7 +711,7 @@ def _reverse_dict(d, low_priority=()): the case of conflicting values - if a value is present in this list, it will not override any other entries of the same value. """ - out = {} + out: dict[_V, _K] = {} for k, v in d.items(): if v in out and k in low_priority: continue diff --git a/elftools/elf/dynamic.py b/elftools/elf/dynamic.py index 966bb562..5cd49f51 100644 --- a/elftools/elf/dynamic.py +++ b/elftools/elf/dynamic.py @@ -6,17 +6,26 @@ # Mike Frysinger (vapier@gentoo.org) # This code is in the public domain #------------------------------------------------------------------------------- -import itertools -from typing import Protocol, runtime_checkable +from __future__ import annotations +import itertools from collections import defaultdict +from typing import IO, TYPE_CHECKING, Any, Protocol, cast, runtime_checkable + +from ..common.exceptions import ELFError +from ..common.utils import elf_assert, struct_parse, parse_cstring_from_stream +from .enums import ENUM_D_TAG from .hash import ELFHashTable, GNUHashTable +from .relocation import RelocationTable, RelrRelocationTable from .sections import Section, Symbol -from .enums import ENUM_D_TAG from .segments import Segment -from .relocation import RelocationTable, RelrRelocationTable -from ..common.exceptions import ELFError -from ..common.utils import elf_assert, struct_parse, parse_cstring_from_stream + +if TYPE_CHECKING: + from collections.abc import Iterator + + from ..construct.lib.container import Container + from .elffile import ELFFile + from .hash import _HashTable @runtime_checkable @@ -28,11 +37,11 @@ class _DynamicStringTable: """ Bare string table based on values found via ELF dynamic tags and loadable segments only. Good enough for get_string() only. """ - def __init__(self, stream, table_offset): + def __init__(self, stream: IO[bytes], table_offset: int) -> None: self._stream = stream self._table_offset = table_offset - def get_string(self, offset): + def get_string(self, offset: int) -> str: """ Get the string stored at the given offset in this string table. """ s = parse_cstring_from_stream(self._stream, self._table_offset + offset) @@ -52,7 +61,7 @@ class DynamicTag: ['DT_NEEDED', 'DT_RPATH', 'DT_RUNPATH', 'DT_SONAME', 'DT_SUNW_FILTER']) - def __init__(self, entry, stringtable): + def __init__(self, entry: Container, stringtable: _StringTable | None) -> None: if stringtable is None: raise ELFError('Creating DynamicTag without string table') self.entry = entry @@ -60,15 +69,15 @@ def __init__(self, entry, stringtable): setattr(self, entry.d_tag[3:].lower(), stringtable.get_string(self.entry.d_val)) - def __getitem__(self, name): + def __getitem__(self, name: str) -> Any: """ Implement dict-like access to entries """ return self.entry[name] - def __repr__(self): + def __repr__(self) -> str: return '' % (self.entry.d_tag, self.entry) - def __str__(self): + def __str__(self) -> str: if self.entry.d_tag in self._HANDLED_TAGS: s = '"%s"' % getattr(self, self.entry.d_tag[3:].lower()) else: @@ -79,7 +88,7 @@ def __str__(self): class Dynamic: """ Shared functionality between dynamic sections and segments. """ - def __init__(self, stream, elffile, stringtable, position, empty): + def __init__(self, stream: IO[bytes], elffile: ELFFile, stringtable: _StringTable | Section | None, position: int, empty: bool) -> None: """ stream: The file-like object from which to load data @@ -107,25 +116,25 @@ def __init__(self, stream, elffile, stringtable, position, empty): self._empty = empty # Do not access this directly yourself; use _get_stringtable() instead. - self._stringtable = stringtable + self._stringtable: _StringTable | Section | None = stringtable - def get_table_offset(self, tag_name): + def get_table_offset(self, tag_name: str) -> tuple[int | None, int | None]: """ Return the virtual address and file offset of a dynamic table. """ - ptr = None + ptr: int | None = None for tag in self._iter_tags(type=tag_name): ptr = tag['d_ptr'] break # If we found a virtual address, locate the offset in the file # by using the program headers. - offset = None + offset: int | None = None if ptr: offset = next(self.elffile.address_offsets(ptr), None) return ptr, offset - def _get_stringtable(self): + def _get_stringtable(self) -> _StringTable: """ Return a string table for looking up dynamic tag related strings. This won't be a "full" string table object, but will at least @@ -147,7 +156,7 @@ def _get_stringtable(self): self._stringtable = self.elffile.get_section_by_name('.dynstr') return self._stringtable - def _iter_tags(self, type=None): + def _iter_tags(self, type: str | None = None) -> Iterator[Container]: """ Yield all raw tags (limit to |type| if specified) """ if self._empty: @@ -159,13 +168,13 @@ def _iter_tags(self, type=None): if tag['d_tag'] == 'DT_NULL': break - def iter_tags(self, type=None): + def iter_tags(self, type: str | None = None) -> Iterator[DynamicTag]: """ Yield all tags (limit to |type| if specified) """ for tag in self._iter_tags(type=type): yield DynamicTag(tag, self._get_stringtable()) - def _get_tag(self, n): + def _get_tag(self, n: int) -> Container: """ Get the raw tag at index #n from the file """ if self._num_tags != -1 and n >= self._num_tags: @@ -176,12 +185,12 @@ def _get_tag(self, n): self._stream, stream_pos=offset) - def get_tag(self, n): + def get_tag(self, n: int) -> DynamicTag: """ Get the tag at index #n from the file (DynamicTag object) """ return DynamicTag(self._get_tag(n), self._get_stringtable()) - def num_tags(self): + def num_tags(self) -> int | None: """ Number of dynamic tags in the file, including the DT_NULL tag """ if self._num_tags != -1: @@ -193,14 +202,14 @@ def num_tags(self): self._num_tags = n + 1 return self._num_tags - def get_relocation_tables(self): + def get_relocation_tables(self) -> dict[str, RelocationTable | RelrRelocationTable]: """ Load all available relocation tables from DYNAMIC tags. Returns a dictionary mapping found table types (REL, RELA, RELR, JMPREL) to RelocationTable objects. """ - result = {} + result: dict[str, RelocationTable | RelrRelocationTable] = {} if list(self.iter_tags('DT_REL')): result['REL'] = RelocationTable(self.elffile, @@ -238,7 +247,7 @@ def get_relocation_tables(self): class DynamicSection(Section, Dynamic): """ ELF dynamic table section. Knows how to process the list of tags. """ - def __init__(self, header, name, elffile): + def __init__(self, header: Container, name: str, elffile: ELFFile) -> None: Section.__init__(self, header, name, elffile) stringtable = elffile.get_section(header['sh_link'], ('SHT_STRTAB', 'SHT_NOBITS', 'SHT_NULL')) Dynamic.__init__(self, self.stream, self.elffile, stringtable, @@ -248,14 +257,14 @@ def __init__(self, header, name, elffile): class DynamicSegment(Segment, Dynamic): """ ELF dynamic table segment. Knows how to process the list of tags. """ - def __init__(self, header, stream, elffile): + def __init__(self, header: Container, stream: IO[bytes], elffile: ELFFile) -> None: # The string table section to be used to resolve string names in # the dynamic tag array is the one pointed at by the sh_link field # of the dynamic section header. # So we must look for the dynamic section contained in the dynamic # segment, we do so by searching for the dynamic section whose content # is located at the same offset as the dynamic segment - stringtable = None + stringtable: _StringTable | None = None for section in elffile.iter_sections(): if (isinstance(section, DynamicSection) and section['sh_offset'] == header['p_offset']): @@ -265,10 +274,10 @@ def __init__(self, header, stream, elffile): Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset'], self['p_filesz'] == 0) self._symbol_size = self.elfstructs.Elf_Sym.sizeof() - self._num_symbols = None - self._symbol_name_map = None + self._num_symbols: int | None = None + self._symbol_name_map: dict[str, list[int]] | None = None - def num_symbols(self): + def num_symbols(self) -> int: """ Number of symbols in the table recovered from DT_SYMTAB """ if self._num_symbols is not None: @@ -278,7 +287,7 @@ def num_symbols(self): # from the corresponding hash table _, gnu_hash_offset = self.get_table_offset('DT_GNU_HASH') if gnu_hash_offset is not None: - hash_section = GNUHashTable(self.elffile, gnu_hash_offset, self) + hash_section: _HashTable = GNUHashTable(self.elffile, gnu_hash_offset, self) self._num_symbols = hash_section.get_number_of_symbols() # If DT_GNU_HASH did not exist, maybe we can use DT_HASH @@ -295,7 +304,7 @@ def num_symbols(self): tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB') if tab_ptr is None or tab_offset is None: raise ELFError('Segment does not contain DT_SYMTAB.') - nearest_ptr = None + nearest_ptr: int | None = None for tag in self.iter_tags(): tag_ptr = tag['d_ptr'] if tag['d_tag'] == 'DT_SYMENT': @@ -323,7 +332,7 @@ def num_symbols(self): return self._num_symbols - def get_symbol(self, index): + def get_symbol(self, index: int) -> Symbol: """ Get the symbol at index #index from the table (Symbol object) """ tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB') @@ -340,7 +349,7 @@ def get_symbol(self, index): return Symbol(symbol, symbol_name) - def get_symbol_by_name(self, name): + def get_symbol_by_name(self, name: str) -> list[Symbol] | None: """ Get a symbol(s) by name. Return None if no symbol by the given name exists. """ @@ -354,7 +363,7 @@ def get_symbol_by_name(self, name): symnums = self._symbol_name_map.get(name) return [self.get_symbol(i) for i in symnums] if symnums else None - def iter_symbols(self): + def iter_symbols(self) -> Iterator[Symbol]: """ Yield all symbols in this dynamic segment. The symbols are usually the same as returned by SymbolTableSection.iter_symbols. However, in stripped binaries, SymbolTableSection might have been removed. diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index a5953814..d652ae22 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -6,11 +6,14 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + import io from io import BytesIO import os import struct import zlib +from typing import IO, TYPE_CHECKING, Any from ..common.exceptions import ELFError, ELFParseError from ..common.utils import struct_parse, elf_assert @@ -32,6 +35,15 @@ from .constants import SHN_INDICES from ..dwarf.dwarf_util import _file_crc32 +if TYPE_CHECKING: + from collections.abc import Callable, Iterator + from collections.abc import Container as TContainer + from types import TracebackType + from typing import Self # 3.11+ + + from ..construct.lib.container import Container + + class ELFFile: """ Creation: the constructor accepts a stream (file-like object) with the contents of an ELF file. @@ -65,7 +77,7 @@ class ELFFile: e_ident_raw: the raw e_ident field of the header """ - def __init__(self, stream, stream_loader=None): + def __init__(self, stream: IO[bytes], stream_loader: Callable[[bytes], IO[bytes]] | None = None) -> None: self.stream = stream self.stream.seek(0, io.SEEK_END) self.stream_len = self.stream.tell() @@ -84,12 +96,12 @@ def __init__(self, stream, stream_loader=None): self.stream.seek(0) self.e_ident_raw = self.stream.read(16) - self._section_header_stringtable = None # Lazy load - self._section_name_map = None + self._section_header_stringtable: StringTableSection | None = None # Lazy load + self._section_name_map: dict[str, int] | None = None self.stream_loader = stream_loader @classmethod - def load_from_path(cls, path): + def load_from_path(cls, path: bytes | str) -> ELFFile: """Takes a path to a file on the local filesystem, and returns an ELFFile from it, setting up a correct stream_loader relative to the original file. @@ -98,7 +110,7 @@ def load_from_path(cls, path): return ELFFile(stream, ELFFile.make_relative_loader(path)) @staticmethod - def make_relative_loader(base_path): + def make_relative_loader(base_path: bytes | str) -> Callable[[bytes], IO[bytes]]: """ Return a function that takes a potentially relative path, resolves it against base_path (bytes or str), and opens a file at that. @@ -107,13 +119,13 @@ def make_relative_loader(base_path): if isinstance(base_path, str): base_path = base_path.encode('UTF-8') # resolver takes a bytes path base_directory = os.path.dirname(base_path) - def loader(rel_path): + def loader(rel_path: bytes) -> IO[bytes]: if not os.path.isabs(rel_path): rel_path = os.path.join(base_directory, rel_path) return open(rel_path, 'rb') return loader - def num_sections(self): + def num_sections(self) -> int: """ Number of sections in the file """ if self['e_shoff'] == 0: @@ -130,7 +142,7 @@ def num_sections(self): return self._get_section_header(0)['sh_size'] return self['e_shnum'] - def get_section(self, n, type=None): + def get_section(self, n: int, type: TContainer[str] | None = None) -> Section: """ Get the section at index #n from the file (Section object or a subclass) """ @@ -139,7 +151,7 @@ def get_section(self, n, type=None): raise ELFError("Unexpected section type %s, expected %s" % (section_header['sh_type'], type)) return self._make_section(section_header) - def _get_linked_symtab_section(self, n): + def _get_linked_symtab_section(self, n: int) -> SymbolTableSection: """ Get the section at index #n from the file, throws if it's not a SYMTAB/DYNTAB. Used for resolving section links with target type validation. @@ -149,7 +161,7 @@ def _get_linked_symtab_section(self, n): raise ELFError("Section points at section %d of type %s, expected SHT_SYMTAB/SHT_DYNSYM" % (n, section_header['sh_type'])) return self._make_section(section_header) - def _get_linked_strtab_section(self, n): + def _get_linked_strtab_section(self, n: int) -> StringTableSection: """ Get the section at index #n from the file, throws if it's not a STRTAB. Used for resolving section links with target type validation. @@ -159,7 +171,7 @@ def _get_linked_strtab_section(self, n): raise ELFError("SHT_SYMTAB section points at section %d of type %s, expected SHT_STRTAB" % (n, section_header['sh_type'])) return self._make_section(section_header) - def get_section_by_name(self, name): + def get_section_by_name(self, name: str) -> Section | None: """ Get a section from the file, by name. Return None if no such section exists. """ @@ -171,7 +183,7 @@ def get_section_by_name(self, name): secnum = self._section_name_map.get(name, None) return None if secnum is None else self.get_section(secnum) - def get_section_index(self, section_name): + def get_section_index(self, section_name: str) -> int | None: """ Gets the index of the section by name. Return None if no such section name exists. """ @@ -182,14 +194,14 @@ def get_section_index(self, section_name): self._make_section_name_map() return self._section_name_map.get(section_name, None) - def has_section(self, section_name): + def has_section(self, section_name: str) -> bool: """ Section existence check by name, without the overhead of parsing if found. """ if self._section_name_map is None: self._make_section_name_map() return section_name in self._section_name_map - def iter_sections(self, type=None): + def iter_sections(self, type: str | None = None) -> Iterator[Section]: """ Yield all the sections in the file. If the optional |type| parameter is passed, this method will only yield sections of the given type. The parameter value must be a string containing the @@ -201,7 +213,7 @@ def iter_sections(self, type=None): if type is None or section['sh_type'] == type: yield section - def num_segments(self): + def num_segments(self) -> int: """ Number of segments in the file """ # From: https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI @@ -216,13 +228,13 @@ def num_segments(self): else: return self.get_section(0)['sh_info'] - def get_segment(self, n): + def get_segment(self, n: int) -> Segment: """ Get the segment at index #n from the file (Segment object) """ segment_header = self._get_segment_header(n) return self._make_segment(segment_header) - def iter_segments(self, type=None): + def iter_segments(self, type: str | None = None) -> Iterator[Segment]: """ Yield all the segments in the file. If the optional |type| parameter is passed, this method will only yield segments of the given type. The parameter value must be a string containing the @@ -234,7 +246,7 @@ def iter_segments(self, type=None): if type is None or segment['p_type'] == type: yield segment - def address_offsets(self, start, size=1): + def address_offsets(self, start: int, size: int = 1) -> Iterator[int]: """ Yield a file offset for each ELF segment containing a memory region. A memory region is defined by the range [start...start+size). The @@ -247,7 +259,7 @@ def address_offsets(self, start, size=1): end <= seg['p_vaddr'] + seg['p_filesz']): yield start - seg['p_vaddr'] + seg['p_offset'] - def has_dwarf_info(self, strict=False): + def has_dwarf_info(self, strict: bool = False) -> bool: """ Check whether this file appears to have debugging information. We assume that if it has the .debug_info or .zdebug_info section, it has all the other required sections as well. @@ -260,7 +272,7 @@ def has_dwarf_info(self, strict=False): self.has_section('.zdebug_info') or (not strict and self.has_section('.eh_frame'))) - def get_dwarf_info(self, relocate_dwarf_sections=True, follow_links=True): + def get_dwarf_info(self, relocate_dwarf_sections: bool = True, follow_links: bool = True) -> DWARFInfo: """ Return a DWARFInfo object representing the debugging information in this file. @@ -314,7 +326,7 @@ def get_dwarf_info(self, relocate_dwarf_sections=True, follow_links=True): debug_sup_name, gnu_debugaltlink_name, debug_types_sec_name, eh_frame_sec_name) = section_names - debug_sections = {} + debug_sections: dict[str, DebugSectionDescriptor | None] = {} for secname in section_names: section = self.get_section_by_name(secname) if section is None: @@ -360,19 +372,19 @@ def get_dwarf_info(self, relocate_dwarf_sections=True, follow_links=True): dwarfinfo.supplementary_dwarfinfo = self.get_supplementary_dwarfinfo(dwarfinfo) return dwarfinfo - def has_dwarf_link(self): + def has_dwarf_link(self) -> bool: """ Whether the binary's debug info is in an external file. Use get_dwarf_link to retrieve the path to it. """ return self.has_section('.gnu_debuglink') - def get_dwarf_link(self): + def get_dwarf_link(self) -> Container | None: """ Read the .gnu_debuglink section, return an object with filename (as bytes) and checksum (as number) in it. """ section = self.get_section_by_name('.gnu_debuglink') return struct_parse(self.structs.Gnu_debuglink, section.stream, section.header.sh_offset) if section else None - def get_supplementary_dwarfinfo(self, dwarfinfo): + def get_supplementary_dwarfinfo(self, dwarfinfo: DWARFInfo) -> DWARFInfo | None: """ Read supplementary dwarfinfo, from either the standared .debug_sup section, the GNU proprietary .gnu_debugaltlink, or .gnu_debuglink. @@ -387,12 +399,12 @@ def get_supplementary_dwarfinfo(self, dwarfinfo): return None - def has_ehabi_info(self): + def has_ehabi_info(self) -> bool: """ Check whether this file appears to have arm exception handler index table. """ return any(self.iter_sections(type='SHT_ARM_EXIDX')) - def get_ehabi_infos(self): + def get_ehabi_infos(self) -> list[EHABIInfo] | None: """ Generally, shared library and executable contain 1 .ARM.exidx section. Object file contains many .ARM.exidx sections. So we must traverse every section and filter sections whose type is SHT_ARM_EXIDX. @@ -406,7 +418,7 @@ def get_ehabi_infos(self): ] return _ret if _ret else None - def get_machine_arch(self): + def get_machine_arch(self) -> str: """ Return the machine architecture, as detected from the ELF header. """ architectures = { @@ -600,7 +612,7 @@ def get_machine_arch(self): return architectures.get(self['e_machine'], '') - def get_shstrndx(self): + def get_shstrndx(self) -> int: """ Find the string table section index for the section header table """ # From https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html: @@ -615,12 +627,12 @@ def get_shstrndx(self): #-------------------------------- PRIVATE --------------------------------# - def __getitem__(self, name): + def __getitem__(self, name: str) -> Any: """ Implement dict-like access to header entries """ return self.header[name] - def _identify_file(self): + def _identify_file(self) -> None: """ Verify the ELF file and identify its class and endianness. """ # Note: this code reads the stream directly, without using ELFStructs, @@ -646,7 +658,7 @@ def _identify_file(self): else: raise ELFError('Invalid EI_DATA %s' % repr(ei_data)) - def _section_offset(self, n): + def _section_offset(self, n: int) -> int: """ Compute the offset of section #n in the file """ shentsize = self['e_shentsize'] @@ -654,7 +666,7 @@ def _section_offset(self, n): raise ELFError('Too small e_shentsize: %s' % shentsize) return self['e_shoff'] + n * shentsize - def _segment_offset(self, n): + def _segment_offset(self, n: int) -> int: """ Compute the offset of segment #n in the file """ phentsize = self['e_phentsize'] @@ -662,7 +674,7 @@ def _segment_offset(self, n): raise ELFError('Too small e_phentsize: %s' % phentsize) return self['e_phoff'] + n * phentsize - def _make_segment(self, segment_header): + def _make_segment(self, segment_header: Container) -> Segment: """ Create a Segment object of the appropriate type """ segtype = segment_header['p_type'] @@ -675,7 +687,7 @@ def _make_segment(self, segment_header): else: return Segment(segment_header, self.stream) - def _get_section_header(self, n): + def _get_section_header(self, n: int) -> Container | None: """ Find the header of section #n, parse it and return the struct """ @@ -688,7 +700,7 @@ def _get_section_header(self, n): self.stream, stream_pos=stream_pos) - def _get_section_name(self, section_header): + def _get_section_name(self, section_header: Container) -> str: """ Given a section header, find this section's name in the file's string table """ @@ -700,7 +712,7 @@ def _get_section_name(self, section_header): name_offset = section_header['sh_name'] return self._section_header_stringtable.get_string(name_offset) - def _make_section(self, section_header): + def _make_section(self, section_header: Container) -> Section: """ Create a section object of the appropriate type """ name = self._get_section_name(section_header) @@ -743,12 +755,12 @@ def _make_section(self, section_header): else: return Section(section_header, name, self) - def _make_section_name_map(self): + def _make_section_name_map(self) -> None: self._section_name_map = {} for i, sec in enumerate(self.iter_sections()): self._section_name_map[sec.name] = i - def _make_symbol_table_section(self, section_header, name): + def _make_symbol_table_section(self, section_header: Container, name: str) -> SymbolTableSection: """ Create a SymbolTableSection """ linked_strtab_index = section_header['sh_link'] @@ -758,7 +770,7 @@ def _make_symbol_table_section(self, section_header, name): elffile=self, stringtable=strtab_section) - def _make_symbol_table_index_section(self, section_header, name): + def _make_symbol_table_index_section(self, section_header: Container, name: str) -> SymbolTableIndexSection: """ Create a SymbolTableIndexSection object """ linked_symtab_index = section_header['sh_link'] @@ -766,7 +778,7 @@ def _make_symbol_table_index_section(self, section_header, name): section_header, name, elffile=self, symboltable=linked_symtab_index) - def _make_sunwsyminfo_table_section(self, section_header, name): + def _make_sunwsyminfo_table_section(self, section_header: Container, name: str) -> SUNWSyminfoTableSection: """ Create a SUNWSyminfoTableSection """ linked_strtab_index = section_header['sh_link'] @@ -776,7 +788,7 @@ def _make_sunwsyminfo_table_section(self, section_header, name): elffile=self, symboltable=strtab_section) - def _make_gnu_verneed_section(self, section_header, name): + def _make_gnu_verneed_section(self, section_header: Container, name: str) -> GNUVerNeedSection: """ Create a GNUVerNeedSection """ linked_strtab_index = section_header['sh_link'] @@ -786,7 +798,7 @@ def _make_gnu_verneed_section(self, section_header, name): elffile=self, stringtable=strtab_section) - def _make_gnu_verdef_section(self, section_header, name): + def _make_gnu_verdef_section(self, section_header: Container, name: str) -> GNUVerDefSection: """ Create a GNUVerDefSection """ linked_strtab_index = section_header['sh_link'] @@ -796,7 +808,7 @@ def _make_gnu_verdef_section(self, section_header, name): elffile=self, stringtable=strtab_section) - def _make_gnu_versym_section(self, section_header, name): + def _make_gnu_versym_section(self, section_header: Container, name: str) -> GNUVerSymSection: """ Create a GNUVerSymSection """ linked_symtab_index = section_header['sh_link'] @@ -806,21 +818,21 @@ def _make_gnu_versym_section(self, section_header, name): elffile=self, symboltable=symtab_section) - def _make_elf_hash_section(self, section_header, name): + def _make_elf_hash_section(self, section_header: Container, name: str) -> ELFHashSection: linked_symtab_index = section_header['sh_link'] symtab_section = self._get_linked_symtab_section(linked_symtab_index) return ELFHashSection( section_header, name, self, symtab_section ) - def _make_gnu_hash_section(self, section_header, name): + def _make_gnu_hash_section(self, section_header: Container, name: str) -> GNUHashSection: linked_symtab_index = section_header['sh_link'] symtab_section = self._get_linked_symtab_section(linked_symtab_index) return GNUHashSection( section_header, name, self, symtab_section ) - def _get_segment_header(self, n): + def _get_segment_header(self, n: int) -> Container: # Elf_Phdr: """ Find the header of segment #n, parse it and return the struct """ return struct_parse( @@ -828,7 +840,7 @@ def _get_segment_header(self, n): self.stream, stream_pos=self._segment_offset(n)) - def _get_section_header_stringtable(self): + def _get_section_header_stringtable(self) -> StringTableSection | None: """ Get the string table section corresponding to the section header table. """ @@ -843,13 +855,13 @@ def _get_section_header_stringtable(self): name='', elffile=self) - def _parse_elf_header(self): + def _parse_elf_header(self) -> Container: """ Parses the ELF file header and assigns the result to attributes of this object. """ return struct_parse(self.structs.Elf_Ehdr, self.stream, stream_pos=0) - def _read_dwarf_section(self, section, relocate_dwarf_sections): + def _read_dwarf_section(self, section: Section, relocate_dwarf_sections: bool) -> DebugSectionDescriptor: """ Read the contents of a DWARF section from the stream and return a DebugSectionDescriptor. Apply relocations if asked to. """ @@ -878,7 +890,7 @@ def _read_dwarf_section(self, section, relocate_dwarf_sections): address=section['sh_addr']) @staticmethod - def _decompress_dwarf_section(section): + def _decompress_dwarf_section(section: DebugSectionDescriptor) -> DebugSectionDescriptor: """ Returns the uncompressed contents of the provided DWARF section. """ # TODO: support other compression formats from readelf.c @@ -912,16 +924,16 @@ def _decompress_dwarf_section(section): return section._replace(stream=uncompressed_stream, size=size) - def close(self): + def close(self) -> None: self.stream.close() - def __enter__(self): + def __enter__(self) -> Self: return self - def __exit__(self, type, value, traceback): + def __exit__(self, type: type[BaseException] | None, value: BaseException | None, traceback: TracebackType | None) -> None: self.close() - def has_phantom_bytes(self): + def has_phantom_bytes(self) -> bool: """The XC16 compiler for the PIC microcontrollers emits DWARF where all odd bytes in all DWARF sections are to be discarded ("phantom"). diff --git a/elftools/elf/enums.py b/elftools/elf/enums.py index 7be69d31..6685da8e 100644 --- a/elftools/elf/enums.py +++ b/elftools/elf/enums.py @@ -6,33 +6,40 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + +from typing import TYPE_CHECKING + from ..common.utils import merge_dicts from ..construct import Pass +if TYPE_CHECKING: + from collections.abc import Mapping + # e_ident[EI_CLASS] in the ELF header -ENUM_EI_CLASS = dict( +ENUM_EI_CLASS: Mapping[str, int] = dict( ELFCLASSNONE=0, ELFCLASS32=1, ELFCLASS64=2 ) # e_ident[EI_DATA] in the ELF header -ENUM_EI_DATA = dict( +ENUM_EI_DATA: Mapping[str, int] = dict( ELFDATANONE=0, ELFDATA2LSB=1, ELFDATA2MSB=2 ) # e_version in the ELF header -ENUM_E_VERSION = dict( +ENUM_E_VERSION: Mapping[str, int] = dict( EV_NONE=0, EV_CURRENT=1, _default_=Pass, ) # e_ident[EI_OSABI] in the ELF header -ENUM_EI_OSABI = dict( +ENUM_EI_OSABI: Mapping[str, int] = dict( ELFOSABI_SYSV=0, ELFOSABI_HPUX=1, ELFOSABI_NETBSD=2, @@ -59,7 +66,7 @@ ) # e_type in the ELF header -ENUM_E_TYPE = dict( +ENUM_E_TYPE: Mapping[str, int] = dict( ET_NONE=0, ET_REL=1, ET_EXEC=2, @@ -71,7 +78,7 @@ ) # e_machine in the ELF header -ENUM_E_MACHINE = dict( +ENUM_E_MACHINE: Mapping[str, int] = dict( EM_NONE = 0, # No machine EM_M32 = 1, # AT&T WE 32100 EM_SPARC = 2, # SPARC @@ -278,7 +285,7 @@ # we later create per-processor dicts that use the LOPROC...HIPROC range to # define processor-specific values. The proper dict should be used based on the # machine the ELF header refers to. -ENUM_SH_TYPE_BASE = dict( +ENUM_SH_TYPE_BASE: Mapping[str, int] = dict( SHT_NULL=0, SHT_PROGBITS=1, SHT_SYMTAB=2, @@ -318,11 +325,11 @@ _default_=Pass, ) -ENUM_SH_TYPE_AMD64 = merge_dicts( +ENUM_SH_TYPE_AMD64: Mapping[str, int] = merge_dicts( ENUM_SH_TYPE_BASE, dict(SHT_AMD64_UNWIND=0x70000001)) -ENUM_SH_TYPE_ARM = merge_dicts( +ENUM_SH_TYPE_ARM: Mapping[str, int] = merge_dicts( ENUM_SH_TYPE_BASE, dict( SHT_ARM_EXIDX=0x70000001, @@ -330,15 +337,15 @@ SHT_ARM_ATTRIBUTES=0x70000003, SHT_ARM_DEBUGOVERLAY=0x70000004)) -ENUM_SH_TYPE_AARCH64 = merge_dicts( +ENUM_SH_TYPE_AARCH64: Mapping[str, int] = merge_dicts( ENUM_SH_TYPE_BASE, dict(SHT_AARCH64_ATTRIBUTES=0x70000003)) -ENUM_SH_TYPE_RISCV = merge_dicts( +ENUM_SH_TYPE_RISCV: Mapping[str, int] = merge_dicts( ENUM_SH_TYPE_BASE, dict(SHT_RISCV_ATTRIBUTES=0x70000003)) -ENUM_SH_TYPE_MIPS = merge_dicts( +ENUM_SH_TYPE_MIPS: Mapping[str, int] = merge_dicts( ENUM_SH_TYPE_BASE, dict( SHT_MIPS_LIBLIST=0x70000000, @@ -378,7 +385,7 @@ SHT_MIPS_PDR_EXCEPTION=0x70000029, SHT_MIPS_ABIFLAGS=0x7000002a)) -ENUM_ELFCOMPRESS_TYPE = dict( +ENUM_ELFCOMPRESS_TYPE: Mapping[str, int] = dict( ELFCOMPRESS_ZLIB=1, ELFCOMPRESS_LOOS=0x60000000, ELFCOMPRESS_HIOS=0x6fffffff, @@ -391,7 +398,7 @@ # some values scavenged from the ELF headers in binutils-2.21 # # Using the same base + per-processor augmentation technique as in sh_type. -ENUM_P_TYPE_BASE = dict( +ENUM_P_TYPE_BASE: Mapping[str, int] = dict( PT_NULL=0, PT_LOAD=1, PT_DYNAMIC=2, @@ -415,28 +422,28 @@ _default_=Pass, ) -ENUM_P_TYPE_ARM = merge_dicts( +ENUM_P_TYPE_ARM: Mapping[str, int] = merge_dicts( ENUM_P_TYPE_BASE, dict( PT_ARM_ARCHEXT=0x70000000, PT_ARM_EXIDX=0x70000001)) -ENUM_P_TYPE_AARCH64 = merge_dicts( +ENUM_P_TYPE_AARCH64: Mapping[str, int] = merge_dicts( ENUM_P_TYPE_BASE, dict( PT_AARCH64_ARCHEXT=0x70000000, PT_AARCH64_UNWIND=0x70000001)) -ENUM_P_TYPE_MIPS = merge_dicts( +ENUM_P_TYPE_MIPS: Mapping[str, int] = merge_dicts( ENUM_P_TYPE_BASE, dict(PT_MIPS_ABIFLAGS=0x70000003)) -ENUM_P_TYPE_RISCV = merge_dicts( +ENUM_P_TYPE_RISCV: Mapping[str, int] = merge_dicts( ENUM_P_TYPE_BASE, dict(PT_RISCV_ATTRIBUTES=0x70000003)) # st_info bindings in the symbol header -ENUM_ST_INFO_BIND = dict( +ENUM_ST_INFO_BIND: Mapping[str, int] = dict( STB_LOCAL=0, STB_GLOBAL=1, STB_WEAK=2, @@ -449,7 +456,7 @@ ) # st_info type in the symbol header -ENUM_ST_INFO_TYPE = dict( +ENUM_ST_INFO_TYPE: Mapping[str, int] = dict( STT_NOTYPE=0, STT_OBJECT=1, STT_FUNC=2, @@ -468,7 +475,7 @@ ) # visibility from st_other -ENUM_ST_VISIBILITY = dict( +ENUM_ST_VISIBILITY: Mapping[str, int] = dict( STV_DEFAULT=0, STV_INTERNAL=1, STV_HIDDEN=2, @@ -479,12 +486,12 @@ _default_=Pass, ) -ENUM_ST_LOCAL = dict( +ENUM_ST_LOCAL: Mapping[str, int] = dict( _default_=Pass, ) # st_shndx -ENUM_ST_SHNDX = dict( +ENUM_ST_SHNDX: Mapping[str, int] = dict( SHN_UNDEF=0, SHN_ABS=0xfff1, SHN_COMMON=0xfff2, @@ -492,7 +499,7 @@ ) # d_tag -ENUM_D_TAG_COMMON = dict( +ENUM_D_TAG_COMMON: Mapping[str, int] = dict( DT_NULL=0, DT_NEEDED=1, DT_PLTRELSZ=2, @@ -582,7 +589,7 @@ # Above are the dynamic tags which are valid always. # Below are the dynamic tags which are only valid in certain contexts. -ENUM_D_TAG_SOLARIS = dict( +ENUM_D_TAG_SOLARIS: Mapping[str, int] = dict( DT_SUNW_AUXILIARY=0x6000000d, DT_SUNW_RTLDINF=0x6000000e, DT_SUNW_FILTER=0x6000000f, @@ -603,7 +610,7 @@ DT_SUNW_CAPCHAINSZ=0x6000001f, ) -ENUM_D_TAG_MIPS = dict( +ENUM_D_TAG_MIPS: Mapping[str, int] = dict( DT_MIPS_RLD_VERSION=0x70000001, DT_MIPS_TIME_STAMP=0x70000002, DT_MIPS_ICHECKSUM=0x70000003, @@ -624,7 +631,7 @@ DT_MIPS_XHASH=0x70000036, ) -ENUM_D_TAG_AARCH64 = dict( +ENUM_D_TAG_AARCH64: Mapping[str, int] = dict( DT_AARCH64_BTI_PLT=0x70000001, ) @@ -634,7 +641,7 @@ # TODO: add the rest of the machine-specific dynamic tags, not just mips and # solaris -ENUMMAP_EXTRA_D_TAG_MACHINE = dict( +ENUMMAP_EXTRA_D_TAG_MACHINE: Mapping[str, Mapping[str, int]] = dict( EM_MIPS=ENUM_D_TAG_MIPS, EM_MIPS_RS3_LE=ENUM_D_TAG_MIPS, EM_AARCH64=ENUM_D_TAG_AARCH64 @@ -647,7 +654,7 @@ for k in ENUMMAP_EXTRA_D_TAG_MACHINE: ENUM_D_TAG.update(ENUMMAP_EXTRA_D_TAG_MACHINE[k]) -ENUM_DT_FLAGS = dict( +ENUM_DT_FLAGS: Mapping[str, int] = dict( DF_ORIGIN=0x1, DF_SYMBOLIC=0x2, DF_TEXTREL=0x4, @@ -655,7 +662,7 @@ DF_STATIC_TLS=0x10, ) -ENUM_DT_FLAGS_1 = dict( +ENUM_DT_FLAGS_1: Mapping[str, int] = dict( DF_1_NOW=0x1, DF_1_GLOBAL=0x2, DF_1_GROUP=0x4, @@ -686,7 +693,7 @@ DF_1_PIE=0x8000000, ) -ENUM_RELOC_TYPE_MIPS = dict( +ENUM_RELOC_TYPE_MIPS: Mapping[str, int] = dict( R_MIPS_NONE=0, R_MIPS_16=1, R_MIPS_32=2, @@ -741,7 +748,7 @@ _default_=Pass, ) -ENUM_RELOC_TYPE_i386 = dict( +ENUM_RELOC_TYPE_i386: Mapping[str, int] = dict( R_386_NONE=0, R_386_32=1, R_386_PC32=2, @@ -788,7 +795,7 @@ _default_=Pass, ) -ENUM_RELOC_TYPE_x64 = dict( +ENUM_RELOC_TYPE_x64: Mapping[str, int] = dict( R_X86_64_NONE=0, R_X86_64_64=1, R_X86_64_PC32=2, @@ -831,7 +838,7 @@ _default_=Pass, ) -ENUM_RELOC_TYPE_BPF = dict( +ENUM_RELOC_TYPE_BPF: Mapping[str, int] = dict( R_BPF_NONE=0, R_BPF_64_64=1, R_BPF_64_ABS64=2, @@ -842,7 +849,7 @@ ) # https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc -ENUM_RELOC_TYPE_LOONGARCH = dict( +ENUM_RELOC_TYPE_LOONGARCH: Mapping[str, int] = dict( R_LARCH_NONE=0, R_LARCH_32=1, R_LARCH_64=2, @@ -944,7 +951,7 @@ _default_=Pass, ) -ENUM_RELOC_TYPE_S390X = dict( +ENUM_RELOC_TYPE_S390X: Mapping[str, int] = dict( R_390_NONE=0, R_390_8=1, R_390_12=2, @@ -1008,7 +1015,7 @@ ) # Sunw Syminfo Bound To special values -ENUM_SUNW_SYMINFO_BOUNDTO = dict( +ENUM_SUNW_SYMINFO_BOUNDTO: Mapping[str, int] = dict( SYMINFO_BT_SELF=0xffff, SYMINFO_BT_PARENT=0xfffe, SYMINFO_BT_NONE=0xfffd, @@ -1026,7 +1033,7 @@ ) # PT_NOTE section types for all ELF types except ET_CORE -ENUM_NOTE_N_TYPE = dict( +ENUM_NOTE_N_TYPE: Mapping[str, int] = dict( NT_GNU_ABI_TAG=1, NT_GNU_HWCAP=2, NT_GNU_BUILD_ID=3, @@ -1036,7 +1043,7 @@ ) # PT_NOTE section types for ET_CORE -ENUM_CORE_NOTE_N_TYPE = dict( +ENUM_CORE_NOTE_N_TYPE: Mapping[str, int] = dict( NT_PRSTATUS=1, NT_FPREGSET=2, NT_PRPSINFO=3, @@ -1048,7 +1055,7 @@ ) # Values in GNU .note.ABI-tag notes (n_type=='NT_GNU_ABI_TAG') -ENUM_NOTE_ABI_TAG_OS = dict( +ENUM_NOTE_ABI_TAG_OS: Mapping[str, int] = dict( ELF_NOTE_OS_LINUX=0, ELF_NOTE_OS_GNU=1, ELF_NOTE_OS_SOLARIS2=2, @@ -1059,7 +1066,7 @@ ) # Values in GNU .note.gnu.property notes (n_type=='NT_GNU_PROPERTY_TYPE_0') -ENUM_NOTE_GNU_PROPERTY_TYPE = dict( +ENUM_NOTE_GNU_PROPERTY_TYPE: Mapping[str, int] = dict( GNU_PROPERTY_STACK_SIZE=1, GNU_PROPERTY_NO_COPY_ON_PROTECTED=2, GNU_PROPERTY_X86_FEATURE_1_AND=0xc0000002, @@ -1070,7 +1077,7 @@ _default_=Pass, ) -ENUM_GNU_PROPERTY_X86_FEATURE_1_FLAGS = dict( +ENUM_GNU_PROPERTY_X86_FEATURE_1_FLAGS: Mapping[str, int] = dict( GNU_PROPERTY_X86_FEATURE_1_IBT=1, GNU_PROPERTY_X86_FEATURE_1_SHSTK=2, GNU_PROPERTY_X86_FEATURE_1_LAM_U48=4, @@ -1078,7 +1085,7 @@ _default_=Pass ) -ENUM_RELOC_TYPE_ARM = dict( +ENUM_RELOC_TYPE_ARM: Mapping[str, int] = dict( R_ARM_NONE=0, R_ARM_PC24=1, R_ARM_ABS32=2, @@ -1210,7 +1217,7 @@ R_ARM_IRELATIVE=140, ) -ENUM_RELOC_TYPE_AARCH64 = dict( +ENUM_RELOC_TYPE_AARCH64: Mapping[str, int] = dict( R_AARCH64_NONE=256, R_AARCH64_ABS64=257, R_AARCH64_ABS32=258, @@ -1321,7 +1328,7 @@ R_AARCH64_TLS_TPREL32=1033, ) -ENUM_ATTR_TAG_ARM = dict( +ENUM_ATTR_TAG_ARM: Mapping[str, int] = dict( TAG_FILE=1, TAG_SECTION=2, TAG_SYMBOL=3, @@ -1374,7 +1381,7 @@ TAG_PACRET_USE=76, ) -ENUM_ATTR_TAG_RISCV = dict( +ENUM_ATTR_TAG_RISCV: Mapping[str, int] = dict( TAG_FILE=1, TAG_SECTION=2, TAG_SYMBOL=3, @@ -1390,7 +1397,7 @@ # https://openpowerfoundation.org/wp-content/uploads/2016/03/ABI64BitOpenPOWERv1.1_16July2015_pub4.pdf # See 3.5.3 Relocation Types Table. -ENUM_RELOC_TYPE_PPC64 = dict( +ENUM_RELOC_TYPE_PPC64: Mapping[str, int] = dict( R_PPC64_NONE=0, R_PPC64_ADDR32=1, R_PPC64_ADDR24=2, @@ -1517,7 +1524,7 @@ # https://example61560.files.wordpress.com/2016/11/powerpc_abi.pdf # See 4.13.5 Relocation Types Table. -ENUM_RELOC_TYPE_PPC = dict( +ENUM_RELOC_TYPE_PPC: Mapping[str, int] = dict( R_PPC_NONE=0, R_PPC_ADDR32=1, R_PPC_ADDR24=2, @@ -1574,7 +1581,7 @@ ) # https://gitlab.utu.fi/soft/binutils-gdb/-/blob/d18547d8b08615a58db18ad3e43f721dc92ae298/include/elf/v850.h -ENUM_RELOC_TYPE_V850 = dict( +ENUM_RELOC_TYPE_V850: Mapping[str, int] = dict( R_V800_NONE=0, R_V810_NONE=48, R_V810_BYTE=49, diff --git a/elftools/elf/gnuversions.py b/elftools/elf/gnuversions.py index d514d41f..88996695 100644 --- a/elftools/elf/gnuversions.py +++ b/elftools/elf/gnuversions.py @@ -6,9 +6,21 @@ # Yann Rouillard (yann@pleiades.fr.eu.org) # This code is in the public domain #------------------------------------------------------------------------------ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + from ..common.utils import struct_parse, elf_assert from .sections import Section, Symbol +if TYPE_CHECKING: + from collections.abc import Iterator + + from ..construct.core import Struct + from ..construct.lib.container import Container + from .elffile import ELFFile + from .sections import StringTableSection, SymbolTableSection + class Version: """ Version object - representing a version definition or dependency @@ -23,11 +35,11 @@ class Version: Similarly to Section objects, allows dictionary-like access to verdef/verneed entry """ - def __init__(self, entry, name=None): + def __init__(self, entry: Container, name: str | None = None) -> None: self.entry = entry self.name = name - def __getitem__(self, name): + def __getitem__(self, name: str) -> Any: """ Implement dict-like access to entry """ return self.entry[name] @@ -40,11 +52,11 @@ class VersionAuxiliary: Similarly to Section objects, allows dictionary-like access to the verdaux/vernaux entry """ - def __init__(self, entry, name): + def __init__(self, entry: Container, name: str) -> None: self.entry = entry self.name = name - def __getitem__(self, name): + def __getitem__(self, name: str) -> Any: """ Implement dict-like access to entries """ return self.entry[name] @@ -55,27 +67,35 @@ class GNUVersionSection(Section): sections class which contains shareable code """ - def __init__(self, header, name, elffile, stringtable, - field_prefix, version_struct, version_auxiliaries_struct): + def __init__( + self, + header: Container, + name: str, + elffile: ELFFile, + stringtable: StringTableSection, + field_prefix: str, + version_struct: Struct, + version_auxiliaries_struct: Struct, + ) -> None: super().__init__(header, name, elffile) self.stringtable = stringtable self.field_prefix = field_prefix self.version_struct = version_struct self.version_auxiliaries_struct = version_auxiliaries_struct - def num_versions(self): + def num_versions(self) -> int: """ Number of version entries in the section """ return self['sh_info'] - def _field_name(self, name, auxiliary=False): + def _field_name(self, name: str, auxiliary: bool = False) -> str: """ Return the real field's name of version or a version auxiliary entry """ middle = 'a_' if auxiliary else '_' return self.field_prefix + middle + name - def _iter_version_auxiliaries(self, entry_offset, count): + def _iter_version_auxiliaries(self, entry_offset: int, count: int) -> Iterator[VersionAuxiliary]: """ Yield all auxiliary entries of a version entry """ name_field = self._field_name('name', auxiliary=True) @@ -93,7 +113,7 @@ def _iter_version_auxiliaries(self, entry_offset, count): entry_offset += entry[next_field] - def iter_versions(self): + def iter_versions(self) -> Iterator[tuple[Version, Iterator[VersionAuxiliary]]]: """ Yield all the version entries in the section Each time it returns the main version structure and an iterator to walk through its auxiliaries entries @@ -128,13 +148,13 @@ class GNUVerNeedSection(GNUVersionSection): """ ELF SUNW or GNU Version Needed table section. Has an associated StringTableSection that's passed in the constructor. """ - def __init__(self, header, name, elffile, stringtable): + def __init__(self, header: Container, name: str, elffile: ELFFile, stringtable: StringTableSection) -> None: super().__init__( header, name, elffile, stringtable, 'vn', elffile.structs.Elf_Verneed, elffile.structs.Elf_Vernaux) - self._has_indexes = None + self._has_indexes: bool | None = None - def has_indexes(self): + def has_indexes(self) -> bool: """ Return True if at least one version definition entry has an index that is stored in the vna_other field. This information is used for symbol versioning @@ -149,12 +169,12 @@ def has_indexes(self): return self._has_indexes - def iter_versions(self): + def iter_versions(self) -> Iterator[tuple[Version, Iterator[VersionAuxiliary]]]: for verneed, vernaux in super().iter_versions(): verneed.name = self.stringtable.get_string(verneed['vn_file']) yield verneed, vernaux - def get_version(self, index): + def get_version(self, index: int) -> tuple[Version, VersionAuxiliary] | None: """ Get the version information located at index #n in the table Return boths the verneed structure and the vernaux structure that contains the name of the version @@ -171,12 +191,12 @@ class GNUVerDefSection(GNUVersionSection): """ ELF SUNW or GNU Version Definition table section. Has an associated StringTableSection that's passed in the constructor. """ - def __init__(self, header, name, elffile, stringtable): + def __init__(self, header: Container, name: str, elffile: ELFFile, stringtable: StringTableSection) -> None: super().__init__( header, name, elffile, stringtable, 'vd', elffile.structs.Elf_Verdef, elffile.structs.Elf_Verdaux) - def get_version(self, index): + def get_version(self, index: int) -> tuple[Version, Iterator[VersionAuxiliary]] | None: """ Get the version information located at index #n in the table Return boths the verdef structure and an iterator to retrieve both the version names and dependencies in the form of @@ -193,16 +213,16 @@ class GNUVerSymSection(Section): """ ELF SUNW or GNU Versym table section. Has an associated SymbolTableSection that's passed in the constructor. """ - def __init__(self, header, name, elffile, symboltable): + def __init__(self, header: Container, name: str, elffile: ELFFile, symboltable: SymbolTableSection) -> None: super().__init__(header, name, elffile) self.symboltable = symboltable - def num_symbols(self): + def num_symbols(self) -> int: """ Number of symbols in the table """ return self['sh_size'] // self['sh_entsize'] - def get_symbol(self, n): + def get_symbol(self, n: int) -> Symbol: """ Get the symbol at index #n from the table (Symbol object) It begins at 1 and not 0 since the first entry is used to store the current version of the syminfo table @@ -217,7 +237,7 @@ def get_symbol(self, n): name = self.symboltable.get_symbol(n).name return Symbol(entry, name) - def iter_symbols(self): + def iter_symbols(self) -> Iterator[Symbol]: """ Yield all the symbols in the table """ for i in range(self.num_symbols()): diff --git a/elftools/elf/hash.py b/elftools/elf/hash.py index dea056cb..fd49460e 100644 --- a/elftools/elf/hash.py +++ b/elftools/elf/hash.py @@ -12,9 +12,11 @@ from typing import TYPE_CHECKING, Protocol from ..common.utils import struct_parse +from ..construct.lib.container import Container from .sections import Section if TYPE_CHECKING: + from .elffile import ELFFile from .sections import Symbol @@ -41,7 +43,7 @@ class ELFHashTable: supports symbol lookup without access to a symbol table section. """ - def __init__(self, elffile, start_offset, size, symboltable): + def __init__(self, elffile: ELFFile, start_offset: int, size: int | None, symboltable: _SymbolTable) -> None: """ Args: elffile (ELFFile): The ELF file. @@ -52,23 +54,23 @@ def __init__(self, elffile, start_offset, size, symboltable): self.elffile = elffile self._symboltable = symboltable if size == 0: # size may also be None if its unknown - self.params = { + self.params = Container(**{ 'nbuckets': 0, 'nchains': 0, 'buckets': [], 'chains': [], - } + }) else: self.params = struct_parse(self.elffile.structs.Elf_Hash, self.elffile.stream, start_offset) - def get_number_of_symbols(self): + def get_number_of_symbols(self) -> int: """ Get the number of symbols from the hash table parameters. """ return self.params['nchains'] - def get_symbol(self, name): + def get_symbol(self, name: str) -> Symbol | None: """ Look up a symbol from this hash table with the given name. """ if self.params['nbuckets'] == 0: @@ -83,7 +85,7 @@ def get_symbol(self, name): return None @staticmethod - def elf_hash(name): + def elf_hash(name: bytes | str) -> int: """ Compute the hash value for a given symbol name. """ if not isinstance(name, bytes): @@ -104,7 +106,7 @@ class ELFHashSection(Section, ELFHashTable): allows us to use the common functions defined on Section objects when dealing with the hash table. """ - def __init__(self, header, name, elffile, symboltable): + def __init__(self, header: Container, name: str, elffile: ELFFile, symboltable: _SymbolTable) -> None: Section.__init__(self, header, name, elffile) ELFHashTable.__init__(self, elffile, self['sh_offset'], self['sh_size'], symboltable) @@ -122,20 +124,20 @@ class GNUHashTable: one should use the DynamicSegment object as the symboltable as it supports symbol lookup without access to a symbol table section. """ - def __init__(self, elffile, start_offset, symboltable): + def __init__(self, elffile: ELFFile, start_offset: int, symboltable: _SymbolTable) -> None: self.elffile = elffile self._symboltable = symboltable - self.params = struct_parse(self.elffile.structs.Gnu_Hash, + p: Container = struct_parse(self.elffile.structs.Gnu_Hash, self.elffile.stream, start_offset) # Element sizes in the hash table - self._wordsize = self.elffile.structs.Elf_word('').sizeof() - self._xwordsize = self.elffile.structs.Elf_xword('').sizeof() - self._chain_pos = start_offset + 4 * self._wordsize + \ + self._wordsize: int = self.elffile.structs.Elf_word('').sizeof() + self._xwordsize: int = self.elffile.structs.Elf_xword('').sizeof() + self._chain_pos: int = start_offset + 4 * self._wordsize + \ self.params['bloom_size'] * self._xwordsize + \ self.params['nbuckets'] * self._wordsize - def get_number_of_symbols(self): + def get_number_of_symbols(self) -> int: """ Get the number of symbols in the hash table by finding the bucket with the highest symbol index and walking to the end of its chain. """ @@ -158,7 +160,7 @@ def get_number_of_symbols(self): max_idx += 1 - def _matches_bloom(self, H1): + def _matches_bloom(self, H1: int) -> bool: """ Helper function to check if the given hash could be in the hash table by testing it against the bloom filter. """ @@ -168,7 +170,7 @@ def _matches_bloom(self, H1): BITMASK = (1 << (H1 % arch_bits)) | (1 << (H2 % arch_bits)) return (self.params['bloom'][word_idx] & BITMASK) == BITMASK - def get_symbol(self, name): + def get_symbol(self, name: str) -> Symbol | None: """ Look up a symbol from this hash table with the given name. """ namehash = self.gnu_hash(name) @@ -194,7 +196,7 @@ def get_symbol(self, name): return None @staticmethod - def gnu_hash(key): + def gnu_hash(key: bytes | str) -> int: """ Compute the GNU-style hash value for a given symbol name. """ if not isinstance(key, bytes): @@ -210,6 +212,6 @@ class GNUHashSection(Section, GNUHashTable): allows us to use the common functions defined on Section objects when dealing with the hash table. """ - def __init__(self, header, name, elffile, symboltable): + def __init__(self, header: Container, name: str, elffile: ELFFile, symboltable: _SymbolTable) -> None: Section.__init__(self, header, name, elffile) GNUHashTable.__init__(self, elffile, self['sh_offset'], symboltable) diff --git a/elftools/elf/notes.py b/elftools/elf/notes.py index 2446cb3f..482f494f 100644 --- a/elftools/elf/notes.py +++ b/elftools/elf/notes.py @@ -6,11 +6,20 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + +from typing import TYPE_CHECKING + from ..common.utils import struct_parse, bytes2hex, roundup, bytes2str from ..construct import CString +if TYPE_CHECKING: + from collections.abc import Iterator + + from ..construct.lib.container import Container + from .elffile import ELFFile -def iter_notes(elffile, offset, size): +def iter_notes(elffile: ELFFile, offset: int, size: int) -> Iterator[Container]: """ Yield all the notes in a section or segment. """ end = offset + size @@ -18,7 +27,7 @@ def iter_notes(elffile, offset, size): # Note: a note's name and data are 4-byte aligned, but it's possible there's # additional padding at the end to satisfy the alignment requirement of the segment. while offset + nhdr_size < end: - note = struct_parse( + note: Container = struct_parse( elffile.structs.Elf_Nhdr, elffile.stream, stream_pos=offset) @@ -27,14 +36,14 @@ def iter_notes(elffile, offset, size): elffile.stream.seek(offset) if note['n_namesz']: # n_namesz is 4-byte aligned. - disk_namesz = roundup(note['n_namesz'], 2) + disk_namesz: int = roundup(note['n_namesz'], 2) note['n_name'] = bytes2str( CString('').parse(elffile.stream.read(disk_namesz))) offset += disk_namesz else: note['n_name'] = None - desc_data = elffile.stream.read(note['n_descsz']) + desc_data: bytes = elffile.stream.read(note['n_descsz']) note['n_descdata'] = desc_data if note['n_type'] == 'NT_GNU_ABI_TAG' and note['n_name'] == 'GNU': note['n_desc'] = struct_parse(elffile.structs.Elf_abi, @@ -54,12 +63,12 @@ def iter_notes(elffile, offset, size): offset) elif note['n_type'] == 'NT_GNU_PROPERTY_TYPE_0' and note['n_name'] == 'GNU': off = offset - props = [] + props: list[Container] = [] # n_descsz contains the size of the note "descriptor" (the data payload), # excluding padding. See "Note Section" in https://refspecs.linuxfoundation.org/elf/elf.pdf - current_note_end = offset + note['n_descsz'] + current_note_end: int = offset + note['n_descsz'] while off < current_note_end: - p = struct_parse(elffile.structs.Elf_Prop, elffile.stream, off) + p: Container = struct_parse(elffile.structs.Elf_Prop, elffile.stream, off) off += roundup(p.pr_datasz + 8, 2 if elffile.elfclass == 32 else 3) props.append(p) note['n_desc'] = props diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py index 95feb89a..a7d5a938 100644 --- a/elftools/elf/relocation.py +++ b/elftools/elf/relocation.py @@ -8,17 +8,22 @@ #------------------------------------------------------------------------------- from __future__ import annotations -from typing import NamedTuple, Protocol +from typing import IO, TYPE_CHECKING, Any, NamedTuple, Protocol from ..common.exceptions import ELFRelocationError from ..common.utils import elf_assert, struct_parse -from .sections import Section +from .sections import Section, SymbolTableSection from .enums import ( ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_MIPS, ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64, ENUM_RELOC_TYPE_S390X, ENUM_RELOC_TYPE_BPF, ENUM_RELOC_TYPE_LOONGARCH) from ..construct import Container +if TYPE_CHECKING: + from collections.abc import Iterator, Mapping + + from .elffile import ELFFile + class Relocation: """ Relocation object - representing a single relocation entry. Allows @@ -26,26 +31,26 @@ class Relocation: Can be either a REL or RELA relocation. """ - def __init__(self, entry, elffile): + def __init__(self, entry: Container, elffile: ELFFile) -> None: self.entry = entry self.elffile = elffile - def is_RELA(self): + def is_RELA(self) -> bool: """ Is this a RELA relocation? If not, it's REL. """ return 'r_addend' in self.entry - def __getitem__(self, name): + def __getitem__(self, name: str) -> Any: """ Dict-like access to entries """ return self.entry[name] - def __repr__(self): + def __repr__(self) -> str: return '' % ( 'RELA' if self.is_RELA() else 'REL', self.entry) - def __str__(self): + def __str__(self) -> str: return self.__repr__() @@ -53,7 +58,7 @@ class RelocationTable: """ Shared functionality between relocation sections and relocation tables """ - def __init__(self, elffile, offset, size, is_rela): + def __init__(self, elffile: ELFFile, offset: int, size: int, is_rela: bool) -> None: self._stream = elffile.stream self._elffile = elffile self._elfstructs = elffile.structs @@ -68,17 +73,17 @@ def __init__(self, elffile, offset, size, is_rela): self.entry_size = self.entry_struct.sizeof() - def is_RELA(self): + def is_RELA(self) -> bool: """ Is this a RELA relocation section? If not, it's REL. """ return self._is_rela - def num_relocations(self): + def num_relocations(self) -> int: """ Number of relocations in the section """ return self._size // self.entry_size - def get_relocation(self, n): + def get_relocation(self, n: int) -> Relocation: """ Get the relocation at index #n from the section (Relocation object) """ entry_offset = self._offset + n * self.entry_size @@ -88,7 +93,7 @@ def get_relocation(self, n): stream_pos=entry_offset) return Relocation(entry, self._elffile) - def iter_relocations(self): + def iter_relocations(self) -> Iterator[Relocation]: """ Yield all the relocations in the section """ for i in range(self.num_relocations()): @@ -98,7 +103,7 @@ def iter_relocations(self): class RelocationSection(Section, RelocationTable): """ ELF relocation section. Serves as a collection of Relocation entries. """ - def __init__(self, header, name, elffile): + def __init__(self, header: Container, name: str, elffile: ELFFile) -> None: Section.__init__(self, header, name, elffile) RelocationTable.__init__(self, self.elffile, self['sh_offset'], self['sh_size'], header['sh_type'] == 'SHT_RELA') @@ -120,19 +125,19 @@ class RelrRelocationTable: relocations). """ - def __init__(self, elffile, offset, size, entrysize): + def __init__(self, elffile: ELFFile, offset: int, size: int, entrysize: int) -> None: self._elffile = elffile self._offset = offset self._size = size self._relr_struct = self._elffile.structs.Elf_Relr self._entrysize = self._relr_struct.sizeof() - self._cached_relocations = None + self._cached_relocations: list[Relocation] | None = None elf_assert(self._entrysize == entrysize, 'Expected RELR entry size to be %s, got %s' % ( self._entrysize, entrysize)) - def iter_relocations(self): + def iter_relocations(self) -> Iterator[Relocation]: """ Yield all the relocations in the section """ @@ -144,12 +149,12 @@ def iter_relocations(self): relr = self._offset # The addresses of relocations in a bitmap are calculated from a base # value provided in an initial 'anchor' relocation. - base = None + base: int | None = None while relr < limit: entry = struct_parse(self._relr_struct, self._elffile.stream, stream_pos=relr) - entry_offset = entry['r_offset'] + entry_offset: int = entry['r_offset'] if (entry_offset & 1) == 0: # We found an anchor, take the current value as the base address # for the following bitmaps and move the 'where' pointer to the @@ -181,14 +186,14 @@ def iter_relocations(self): # Advance to the next entry relr += self._entrysize - def num_relocations(self): + def num_relocations(self) -> int: """ Number of relocations in the section """ if self._cached_relocations is None: self._cached_relocations = list(self.iter_relocations()) return len(self._cached_relocations) - def get_relocation(self, n): + def get_relocation(self, n: int) -> Relocation: """ Get the relocation at index #n from the section (Relocation object) """ if self._cached_relocations is None: @@ -199,7 +204,7 @@ def get_relocation(self, n): class RelrRelocationSection(Section, RelrRelocationTable): """ ELF RELR relocation section. Serves as a collection of RELR relocation entries. """ - def __init__(self, header, name, elffile): + def __init__(self, header: Container, name: str, elffile: ELFFile) -> None: Section.__init__(self, header, name, elffile) RelrRelocationTable.__init__(self, self.elffile, self['sh_offset'], self['sh_size'], self['sh_entsize']) @@ -209,45 +214,45 @@ class _RelocationFunction(Protocol): def __call__(self, value: int, sym_value: int, offset: int, addend: int = 0) -> int: ... -def _reloc_calc_identity(value, sym_value, offset, addend=0): +def _reloc_calc_identity(value: int, sym_value: int, offset: int, addend: int = 0) -> int: return value -def _reloc_calc_sym_plus_value(value, sym_value, offset, addend=0): +def _reloc_calc_sym_plus_value(value: int, sym_value: int, offset: int, addend: int = 0) -> int: return sym_value + value + addend -def _reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0): +def _reloc_calc_sym_plus_value_pcrel(value: int, sym_value: int, offset: int, addend: int = 0) -> int: return sym_value + value - offset -def _reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0): +def _reloc_calc_sym_plus_addend(value: int, sym_value: int, offset: int, addend: int = 0) -> int: return sym_value + addend -def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0): +def _reloc_calc_sym_plus_addend_pcrel(value: int, sym_value: int, offset: int, addend: int = 0) -> int: return sym_value + addend - offset -def _reloc_calc_value_minus_sym_addend(value, sym_value, offset, addend=0): +def _reloc_calc_value_minus_sym_addend(value: int, sym_value: int, offset: int, addend: int = 0) -> int: return value - sym_value - addend -def _arm_reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0): +def _arm_reloc_calc_sym_plus_value_pcrel(value: int, sym_value: int, offset: int, addend: int = 0) -> int: return sym_value // 4 + value - offset // 4 -def _bpf_64_32_reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0): +def _bpf_64_32_reloc_calc_sym_plus_addend(value: int, sym_value: int, offset: int, addend: int = 0) -> int: return (sym_value + addend) // 8 - 1 class RelocationHandler: """ Handles the logic of relocations in ELF files. """ - def __init__(self, elffile): + def __init__(self, elffile: ELFFile) -> None: self.elffile = elffile - def find_relocations_for_section(self, section): + def find_relocations_for_section(self, section: Section) -> RelocationSection | None: """ Given a section, find the relocation section for it in the ELF file. Return a RelocationSection object, or None if none was found. @@ -264,7 +269,7 @@ def find_relocations_for_section(self, section): return relsection return None - def apply_section_relocations(self, stream, reloc_section): + def apply_section_relocations(self, stream: IO[bytes], reloc_section: RelocationSection) -> None: """ Apply all relocations in reloc_section (a RelocationSection object) to the given stream, that contains the data of the section that is being relocated. The stream is modified as a result. @@ -274,7 +279,7 @@ def apply_section_relocations(self, stream, reloc_section): for reloc in reloc_section.iter_relocations(): self._do_apply_relocation(stream, reloc, symtab) - def _do_apply_relocation(self, stream, reloc, symtab): + def _do_apply_relocation(self, stream: IO[bytes], reloc: Relocation, symtab: SymbolTableSection) -> None: # Preparations for performing the relocation: obtain the value of # the symbol mentioned in the relocation, as well as the relocation # recipe which tells us how to actually perform it. @@ -379,7 +384,7 @@ class _RELOCATION_RECIPE_TYPE(NamedTuple): has_addend: bool calc_func: _RelocationFunction - _RELOCATION_RECIPES_ARM = { + _RELOCATION_RECIPES_ARM: Mapping[int, _RELOCATION_RECIPE_TYPE] = { ENUM_RELOC_TYPE_ARM['R_ARM_ABS32']: _RELOCATION_RECIPE_TYPE( bytesize=4, has_addend=False, calc_func=_reloc_calc_sym_plus_value), @@ -388,7 +393,7 @@ class _RELOCATION_RECIPE_TYPE(NamedTuple): calc_func=_arm_reloc_calc_sym_plus_value_pcrel), } - _RELOCATION_RECIPES_AARCH64 = { + _RELOCATION_RECIPES_AARCH64: Mapping[int, _RELOCATION_RECIPE_TYPE] = { ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS64']: _RELOCATION_RECIPE_TYPE( bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS32']: _RELOCATION_RECIPE_TYPE( @@ -399,14 +404,14 @@ class _RELOCATION_RECIPE_TYPE(NamedTuple): } # https://dmz-portal.mips.com/wiki/MIPS_relocation_types - _RELOCATION_RECIPES_MIPS_REL = { + _RELOCATION_RECIPES_MIPS_REL: Mapping[int, _RELOCATION_RECIPE_TYPE] = { ENUM_RELOC_TYPE_MIPS['R_MIPS_NONE']: _RELOCATION_RECIPE_TYPE( bytesize=4, has_addend=False, calc_func=_reloc_calc_identity), ENUM_RELOC_TYPE_MIPS['R_MIPS_32']: _RELOCATION_RECIPE_TYPE( bytesize=4, has_addend=False, calc_func=_reloc_calc_sym_plus_value), } - _RELOCATION_RECIPES_MIPS_RELA = { + _RELOCATION_RECIPES_MIPS_RELA: Mapping[int, _RELOCATION_RECIPE_TYPE] = { ENUM_RELOC_TYPE_MIPS['R_MIPS_NONE']: _RELOCATION_RECIPE_TYPE( bytesize=4, has_addend=True, calc_func=_reloc_calc_identity), ENUM_RELOC_TYPE_MIPS['R_MIPS_32']: _RELOCATION_RECIPE_TYPE( @@ -417,7 +422,7 @@ class _RELOCATION_RECIPE_TYPE(NamedTuple): calc_func=_reloc_calc_sym_plus_value), } - _RELOCATION_RECIPES_PPC64 = { + _RELOCATION_RECIPES_PPC64: Mapping[int, _RELOCATION_RECIPE_TYPE] = { ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR32']: _RELOCATION_RECIPE_TYPE( bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), ENUM_RELOC_TYPE_PPC64['R_PPC64_REL32']: _RELOCATION_RECIPE_TYPE( @@ -426,7 +431,7 @@ class _RELOCATION_RECIPE_TYPE(NamedTuple): bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), } - _RELOCATION_RECIPES_X86 = { + _RELOCATION_RECIPES_X86: Mapping[int, _RELOCATION_RECIPE_TYPE] = { ENUM_RELOC_TYPE_i386['R_386_NONE']: _RELOCATION_RECIPE_TYPE( bytesize=4, has_addend=False, calc_func=_reloc_calc_identity), ENUM_RELOC_TYPE_i386['R_386_32']: _RELOCATION_RECIPE_TYPE( @@ -437,7 +442,7 @@ class _RELOCATION_RECIPE_TYPE(NamedTuple): calc_func=_reloc_calc_sym_plus_value_pcrel), } - _RELOCATION_RECIPES_X64 = { + _RELOCATION_RECIPES_X64: Mapping[int, _RELOCATION_RECIPE_TYPE] = { ENUM_RELOC_TYPE_x64['R_X86_64_NONE']: _RELOCATION_RECIPE_TYPE( bytesize=8, has_addend=True, calc_func=_reloc_calc_identity), ENUM_RELOC_TYPE_x64['R_X86_64_64']: _RELOCATION_RECIPE_TYPE( @@ -452,7 +457,7 @@ class _RELOCATION_RECIPE_TYPE(NamedTuple): } # https://www.kernel.org/doc/html/latest/bpf/llvm_reloc.html#different-relocation-types - _RELOCATION_RECIPES_EBPF = { + _RELOCATION_RECIPES_EBPF: Mapping[int, _RELOCATION_RECIPE_TYPE] = { ENUM_RELOC_TYPE_BPF['R_BPF_NONE']: _RELOCATION_RECIPE_TYPE( bytesize=8, has_addend=False, calc_func=_reloc_calc_identity), ENUM_RELOC_TYPE_BPF['R_BPF_64_64']: _RELOCATION_RECIPE_TYPE( @@ -468,7 +473,7 @@ class _RELOCATION_RECIPE_TYPE(NamedTuple): } # https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc - _RELOCATION_RECIPES_LOONGARCH = { + _RELOCATION_RECIPES_LOONGARCH: Mapping[int, _RELOCATION_RECIPE_TYPE] = { ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_NONE']: _RELOCATION_RECIPE_TYPE( bytesize=4, has_addend=False, calc_func=_reloc_calc_identity), ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_32']: _RELOCATION_RECIPE_TYPE( @@ -509,7 +514,7 @@ class _RELOCATION_RECIPE_TYPE(NamedTuple): calc_func=_reloc_calc_sym_plus_addend_pcrel), } - _RELOCATION_RECIPES_S390X = { + _RELOCATION_RECIPES_S390X: Mapping[int, _RELOCATION_RECIPE_TYPE] = { ENUM_RELOC_TYPE_S390X['R_390_32']: _RELOCATION_RECIPE_TYPE( bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), ENUM_RELOC_TYPE_S390X['R_390_PC32']: _RELOCATION_RECIPE_TYPE( diff --git a/elftools/elf/sections.py b/elftools/elf/sections.py index 593e84b9..e1e62442 100644 --- a/elftools/elf/sections.py +++ b/elftools/elf/sections.py @@ -6,6 +6,10 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + +from typing import IO, TYPE_CHECKING, Any, Literal, overload + from ..common.exceptions import ELFCompressionError from ..common.utils import struct_parse, elf_assert, parse_cstring_from_stream from collections import defaultdict @@ -13,6 +17,14 @@ from .notes import iter_notes import zlib +from elftools.construct.lib.container import Container + +if TYPE_CHECKING: + from collections.abc import Iterator + + from ..construct.lib.container import Container + from .elffile import ELFFile + from .structs import ELFStructs class Section: @@ -23,13 +35,13 @@ class Section: > sec = Section(...) > sec['sh_type'] # section type """ - def __init__(self, header, name, elffile): + def __init__(self, header: Container, name: str, elffile: ELFFile) -> None: self.header = header self.name = name self.elffile = elffile - self.stream = self.elffile.stream - self.structs = self.elffile.structs - self._compressed = header['sh_flags'] & SH_FLAGS.SHF_COMPRESSED + self.stream: IO[bytes] = self.elffile.stream + self.structs: ELFStructs = self.elffile.structs + self._compressed: int = header['sh_flags'] & SH_FLAGS.SHF_COMPRESSED if self.compressed: # Read the compression header now to know about the size/alignment @@ -37,21 +49,21 @@ def __init__(self, header, name, elffile): header = struct_parse(self.structs.Elf_Chdr, self.stream, stream_pos=self['sh_offset']) - self._compression_type = header['ch_type'] - self._decompressed_size = header['ch_size'] - self._decompressed_align = header['ch_addralign'] + self._compression_type: str = header['ch_type'] + self._decompressed_size: int = header['ch_size'] + self._decompressed_align: int = header['ch_addralign'] else: self._decompressed_size = header['sh_size'] self._decompressed_align = header['sh_addralign'] @property - def compressed(self): + def compressed(self) -> int: """ Is this section compressed? """ return self._compressed @property - def data_size(self): + def data_size(self) -> int: """ Return the logical size for this section's data. This can be different from the .sh_size header field when the section @@ -60,7 +72,7 @@ def data_size(self): return self._decompressed_size @property - def data_alignment(self): + def data_alignment(self) -> int: """ Return the logical alignment for this section's data. This can be different from the .sh_addralign header field when the @@ -68,7 +80,7 @@ def data_alignment(self): """ return self._decompressed_align - def data(self): + def data(self) -> bytes: """ The section data from the file. Note that data is decompressed if the stored section data is @@ -84,9 +96,9 @@ def data(self): if c_type == 'ELFCOMPRESS_ZLIB': # Read the data to decompress starting right after the # compression header until the end of the section. - hdr_size = self.structs.Elf_Chdr.sizeof() + hdr_size: int = self.structs.Elf_Chdr.sizeof() self.stream.seek(self['sh_offset'] + hdr_size) - compressed = self.stream.read(self['sh_size'] - hdr_size) + compressed: bytes = self.stream.read(self['sh_size'] - hdr_size) decomp = zlib.decompressobj() result = decomp.decompress(compressed, self.data_size) @@ -106,37 +118,43 @@ def data(self): return result - def is_null(self): + def is_null(self) -> bool: """ Is this a null section? """ return False - def __getitem__(self, name): + @overload + def __getitem__(self, name: Literal["sh_addr", "sh_entsize", "sh_flags", "sh_offset", "sh_size"]) -> int: ... + @overload + def __getitem__(self, name: Literal["st_name", "sh_type"]) -> str: ... + @overload + def __getitem__(self, name: str) -> Any: ... + def __getitem__(self, name: str) -> Any: """ Implement dict-like access to header entries """ return self.header[name] - def __eq__(self, other): + def __eq__(self, other: object) -> bool: try: return self.header == other.header except AttributeError: return False - def __hash__(self): + def __hash__(self) -> int: return hash(self.header) class NullSection(Section): """ ELF NULL section """ - def is_null(self): + def is_null(self) -> bool: return True class StringTableSection(Section): """ ELF string table section. """ - def get_string(self, offset): + def get_string(self, offset: int) -> str: """ Get the string stored at the given offset in this string table. """ table_offset = self['sh_offset'] @@ -151,11 +169,11 @@ class SymbolTableIndexSection(Section): SHN_XINDEX (0xffff). The format of the section is described at https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.sheader.html """ - def __init__(self, header, name, elffile, symboltable): + def __init__(self, header: Container, name: str, elffile: ELFFile, symboltable: Container) -> None: super().__init__(header, name, elffile) self.symboltable = symboltable - def get_section_index(self, n): + def get_section_index(self, n: int) -> int: """ Get the section header table index for the symbol with index #n. The section contains an array of Elf32_word values with one entry for every symbol in the associated symbol table. @@ -168,21 +186,21 @@ class SymbolTableSection(Section): """ ELF symbol table section. Has an associated StringTableSection that's passed in the constructor. """ - def __init__(self, header, name, elffile, stringtable): + def __init__(self, header: Container, name: str, elffile: ELFFile, stringtable: StringTableSection) -> None: super().__init__(header, name, elffile) self.stringtable = stringtable elf_assert(self['sh_entsize'] > 0, 'Expected entry size of section %r to be > 0' % name) elf_assert(self['sh_size'] % self['sh_entsize'] == 0, 'Expected section size to be a multiple of entry size in section %r' % name) - self._symbol_name_map = None + self._symbol_name_map: dict[str, list[int]] | None = None - def num_symbols(self): + def num_symbols(self) -> int: """ Number of symbols in the table """ return self['sh_size'] // self['sh_entsize'] - def get_symbol(self, n): + def get_symbol(self, n: int) -> Symbol: """ Get the symbol at index #n from the table (Symbol object) """ # Grab the symbol's entry from the stream @@ -195,7 +213,7 @@ def get_symbol(self, n): name = self.stringtable.get_string(entry['st_name']) return Symbol(entry, name) - def get_symbol_by_name(self, name): + def get_symbol_by_name(self, name: str) -> list[Symbol] | None: """ Get a symbol(s) by name. Return None if no symbol by the given name exists. """ @@ -209,7 +227,7 @@ def get_symbol_by_name(self, name): symnums = self._symbol_name_map.get(name) return [self.get_symbol(i) for i in symnums] if symnums else None - def iter_symbols(self): + def iter_symbols(self) -> Iterator[Symbol]: """ Yield all the symbols in the table """ for i in range(self.num_symbols()): @@ -223,11 +241,11 @@ class Symbol: Similarly to Section objects, allows dictionary-like access to the symbol entry. """ - def __init__(self, entry, name): + def __init__(self, entry: Container, name: str) -> None: self.entry = entry self.name = name - def __getitem__(self, name): + def __getitem__(self, name: str) -> Any: """ Implement dict-like access to entries """ return self.entry[name] @@ -237,16 +255,16 @@ class SUNWSyminfoTableSection(Section): """ ELF .SUNW Syminfo table section. Has an associated SymbolTableSection that's passed in the constructor. """ - def __init__(self, header, name, elffile, symboltable): + def __init__(self, header: Container, name: str, elffile: ELFFile, symboltable: SymbolTableSection) -> None: super().__init__(header, name, elffile) self.symboltable = symboltable - def num_symbols(self): + def num_symbols(self) -> int: """ Number of symbols in the table """ return self['sh_size'] // self['sh_entsize'] - 1 - def get_symbol(self, n): + def get_symbol(self, n: int) -> Symbol: """ Get the symbol at index #n from the table (Symbol object). It begins at 1 and not 0 since the first entry is used to store the current version of the syminfo table. @@ -261,7 +279,7 @@ def get_symbol(self, n): name = self.symboltable.get_symbol(n).name return Symbol(entry, name) - def iter_symbols(self): + def iter_symbols(self) -> Iterator[Symbol]: """ Yield all the symbols in the table """ for i in range(1, self.num_symbols() + 1): @@ -271,7 +289,7 @@ def iter_symbols(self): class NoteSection(Section): """ ELF NOTE section. Knows how to parse notes. """ - def iter_notes(self): + def iter_notes(self) -> Iterator[Container]: """ Yield all the notes in the section. Each result is a dictionary- like object with "n_name", "n_type", and "n_desc" fields, amongst others. @@ -282,11 +300,11 @@ def iter_notes(self): class StabSection(Section): """ ELF stab section. """ - def iter_stabs(self): + def iter_stabs(self) -> Iterator[Container]: """ Yield all stab entries. Result type is ELFStructs.Elf_Stabs. """ - offset = self['sh_offset'] - size = self['sh_size'] + offset: int = self['sh_offset'] + size: int = self['sh_size'] end = offset + size while offset < end: stabs = struct_parse( @@ -306,10 +324,10 @@ def __init__(self, tag): self.extra = None @property - def tag(self): + def tag(self) -> str: return self._tag['tag'] - def __repr__(self): + def __repr__(self) -> str: s = '<%s (%s): %r>' % \ (self.__class__.__name__, self.tag, self.value) s += ' %s' % self.extra if self.extra is not None else '' @@ -329,7 +347,7 @@ def __init__(self, stream, structs, offset, attribute): self.attr_start = self.stream.tell() - def iter_attributes(self, tag=None): + def iter_attributes(self, tag: str | None = None) -> Iterator[Attribute]: """ Yield all attributes (limit to |tag| if specified). """ for attribute in self._make_attributes(): @@ -337,18 +355,18 @@ def iter_attributes(self, tag=None): yield attribute @property - def num_attributes(self): + def num_attributes(self) -> int: """ Number of attributes in the subsubsection. """ return sum(1 for _ in self.iter_attributes()) + 1 @property - def attributes(self): + def attributes(self) -> list[Attribute]: """ List of all attributes in the subsubsection. """ return [self.header, *(self.iter_attributes())] - def _make_attributes(self): + def _make_attributes(self) -> Iterator[Attribute]: """ Create all attributes for this subsubsection except the first one which is the header. """ @@ -359,7 +377,7 @@ def _make_attributes(self): while self.stream.tell() != end: yield self.attribute(self.structs, self.stream) - def __repr__(self): + def __repr__(self) -> str: s = "<%s (%s): %d bytes>" return s % (self.__class__.__name__, self.header.tag[4:], self.header.value) @@ -378,7 +396,7 @@ def __init__(self, stream, structs, offset, header, subsubsection): self.subsubsec_start = self.stream.tell() - def iter_subsubsections(self, scope=None): + def iter_subsubsections(self, scope: str | None = None) -> Iterator[AttributesSubsubsection]: """ Yield all subsubsections (limit to |scope| if specified). """ for subsubsec in self._make_subsubsections(): @@ -386,18 +404,18 @@ def iter_subsubsections(self, scope=None): yield subsubsec @property - def num_subsubsections(self): + def num_subsubsections(self) -> int: """ Number of subsubsections in the subsection. """ return sum(1 for _ in self.iter_subsubsections()) @property - def subsubsections(self): + def subsubsections(self) -> list[AttributesSubsubsection]: """ List of all subsubsections in the subsection. """ return list(self.iter_subsubsections()) - def _make_subsubsections(self): + def _make_subsubsections(self) -> Iterator[AttributesSubsubsection]: """ Create all subsubsections for this subsection. """ end = self.offset + self['length'] @@ -411,12 +429,12 @@ def _make_subsubsections(self): self.stream.seek(self.subsubsec_start + subsubsec.header.value) yield subsubsec - def __getitem__(self, name): + def __getitem__(self, name: str) -> Any: """ Implement dict-like access to header entries. """ return self.header[name] - def __repr__(self): + def __repr__(self) -> str: s = "<%s (%s): %d bytes>" return s % (self.__class__.__name__, self.header['vendor_name'], self.header['length']) @@ -429,7 +447,7 @@ def __init__(self, header, name, elffile, subsection): super().__init__(header, name, elffile) self.subsection = subsection - fv = struct_parse(self.structs.Elf_byte('format_version'), + fv: int = struct_parse(self.structs.Elf_byte('format_version'), self.stream, self['sh_offset']) @@ -438,7 +456,7 @@ def __init__(self, header, name, elffile, subsection): self.subsec_start = self.stream.tell() - def iter_subsections(self, vendor_name=None): + def iter_subsections(self, vendor_name: str | None = None) -> Iterator[AttributesSubsection]: """ Yield all subsections (limit to |vendor_name| if specified). """ for subsec in self._make_subsections(): @@ -446,18 +464,18 @@ def iter_subsections(self, vendor_name=None): yield subsec @property - def num_subsections(self): + def num_subsections(self) -> int: """ Number of subsections in the section. """ return sum(1 for _ in self.iter_subsections()) @property - def subsections(self): + def subsections(self) -> list[AttributesSubsection]: """ List of all subsections in the section. """ return list(self.iter_subsections()) - def _make_subsections(self): + def _make_subsections(self) -> Iterator[AttributesSubsection]: """ Create all subsections for this section. """ end = self['sh_offset'] + self.data_size @@ -475,7 +493,7 @@ def _make_subsections(self): class ARMAttribute(Attribute): """ ARM attribute object - representing a build attribute of ARM ELF files. """ - def __init__(self, structs, stream): + def __init__(self, structs: ELFStructs, stream: IO[bytes]) -> None: super().__init__( struct_parse(structs.Elf_Arm_Attribute_Tag, stream)) @@ -483,8 +501,8 @@ def __init__(self, structs, stream): self.value = struct_parse(structs.Elf_word('value'), stream) if self.tag != 'TAG_FILE': - self.extra = [] - s_number = struct_parse(structs.Elf_uleb128('s_number'), stream) + self.extra: list[int] = [] # type: ignore[assignment] + s_number: int = struct_parse(structs.Elf_uleb128('s_number'), stream) while s_number != 0: self.extra.append(s_number) @@ -506,7 +524,7 @@ def __init__(self, structs, stream): self.value = ARMAttribute(structs, stream) if type(self.value.value) is not str: - nul = struct_parse(structs.Elf_byte('nul'), stream) + nul: int = struct_parse(structs.Elf_byte('nul'), stream) elf_assert(nul == 0, "Invalid terminating byte %r, expecting NUL." % nul) @@ -543,7 +561,7 @@ def __init__(self, header, name, elffile): class RISCVAttribute(Attribute): """ Attribute of an ELF .riscv.attributes section. """ - def __init__(self, structs, stream): + def __init__(self, structs: ELFStructs, stream: IO[bytes]) -> None: super().__init__( struct_parse(structs.Elf_RiscV_Attribute_Tag, stream)) @@ -551,8 +569,8 @@ def __init__(self, structs, stream): self.value = struct_parse(structs.Elf_word('value'), stream) if self.tag != 'TAG_FILE': - self.extra = [] - s_number = struct_parse(structs.Elf_uleb128('s_number'), stream) + self.extra: list[int] = [] # type: ignore[assignment] + s_number: int = struct_parse(structs.Elf_uleb128('s_number'), stream) while s_number != 0: self.extra.append(s_number) diff --git a/elftools/elf/segments.py b/elftools/elf/segments.py index 8d757169..5eb28111 100644 --- a/elftools/elf/segments.py +++ b/elftools/elf/segments.py @@ -6,29 +6,46 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + +from typing import IO, TYPE_CHECKING, Any, Literal, overload + from ..construct import CString from ..common.utils import struct_parse from .constants import SH_FLAGS from .notes import iter_notes +if TYPE_CHECKING: + from collections.abc import Iterator + + from ..construct import Container + from .elffile import ELFFile + from .sections import Section + class Segment: - def __init__(self, header, stream): + def __init__(self, header: Container, stream: IO[bytes]) -> None: self.header = header self.stream = stream - def data(self): + def data(self) -> bytes: """ The segment data from the file. """ self.stream.seek(self['p_offset']) return self.stream.read(self['p_filesz']) - def __getitem__(self, name): + @overload + def __getitem__(self, name: Literal["p_filesz", "p_memsz", "p_offset", "p_vaddr"]) -> int: ... + @overload + def __getitem__(self, name: Literal["p_type"]) -> str: ... + @overload + def __getitem__(self, name: str) -> Any: ... + def __getitem__(self, name: str) -> Any: """ Implement dict-like access to header entries """ return self.header[name] - def section_in_segment(self, section): + def section_in_segment(self, section: Section) -> bool: """ Is the given section contained in this segment? Note: this tries to reproduce the intricate rules of the @@ -36,9 +53,9 @@ def section_in_segment(self, section): elf/include/internal.h in the source of binutils. """ # Only the 'strict' checks from ELF_SECTION_IN_SEGMENT_1 are included - segtype = self['p_type'] - sectype = section['sh_type'] - secflags = section['sh_flags'] + segtype: str = self['p_type'] + sectype: str = section['sh_type'] + secflags: int = section['sh_flags'] # Only PT_LOAD, PT_GNU_RELRO and PT_TLS segments can contain SHF_TLS # sections @@ -62,8 +79,8 @@ def section_in_segment(self, section): # In ELF_SECTION_IN_SEGMENT_STRICT the flag check_vma is on, so if # this is an alloc section, check whether its VMA is in bounds. if secflags & SH_FLAGS.SHF_ALLOC: - secaddr = section['sh_addr'] - vaddr = self['p_vaddr'] + secaddr: int = section['sh_addr'] + vaddr: int = self['p_vaddr'] # This checks that the section is wholly contained in the segment. # The third condition is the 'strict' one - an empty section will @@ -83,8 +100,8 @@ def section_in_segment(self, section): if sectype == 'SHT_NOBITS': return True - secoffset = section['sh_offset'] - poffset = self['p_offset'] + secoffset: int = section['sh_offset'] + poffset: int = self['p_offset'] # Same logic as with secaddr vs. vaddr checks above, just on offsets in # the file @@ -98,13 +115,13 @@ class InterpSegment(Segment): """ INTERP segment. Knows how to obtain the path to the interpreter used for this ELF file. """ - def __init__(self, header, stream): + def __init__(self, header: Container, stream: IO[bytes]) -> None: super().__init__(header, stream) - def get_interp_name(self): + def get_interp_name(self) -> str: """ Obtain the interpreter path used for this ELF file. """ - path_offset = self['p_offset'] + path_offset: int = self['p_offset'] return struct_parse( CString('', encoding='utf-8'), self.stream, @@ -114,11 +131,11 @@ def get_interp_name(self): class NoteSegment(Segment): """ NOTE segment. Knows how to parse notes. """ - def __init__(self, header, stream, elffile): + def __init__(self, header: Container, stream: IO[bytes], elffile: ELFFile) -> None: super().__init__(header, stream) self.elffile = elffile - def iter_notes(self): + def iter_notes(self) -> Iterator[Container]: """ Yield all the notes in the segment. Each result is a dictionary- like object with "n_name", "n_type", and "n_desc" fields, amongst diff --git a/elftools/elf/structs.py b/elftools/elf/structs.py index eb9829d8..fd19bde7 100644 --- a/elftools/elf/structs.py +++ b/elftools/elf/structs.py @@ -7,6 +7,10 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + +from typing import TYPE_CHECKING + from ..construct import ( UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64, @@ -18,6 +22,12 @@ from ..common.utils import roundup from .enums import * +if TYPE_CHECKING: + from collections.abc import Callable + + from ..construct.core import FormatField + from ..construct.lib.container import Container + class ELFStructs: """ Accessible attributes: @@ -41,23 +51,23 @@ class ELFStructs: Elf_Rel, Elf_Rela: Entries in relocation sections """ - def __init__(self, little_endian=True, elfclass=32): + def __init__(self, little_endian: bool = True, elfclass: int = 32) -> None: assert elfclass == 32 or elfclass == 64 self.little_endian = little_endian self.elfclass = elfclass - self.e_type = None - self.e_machine = None - self.e_ident_osabi = None + self.e_type: str | None = None # ENUM_E_TYPE + self.e_machine: str | None = None # ENUM_E_MACHINE + self.e_ident_osabi: str | None = None # ENUM_E_VERSION - def __getstate__(self): + def __getstate__(self) -> tuple[bool, int, str | None, str | None, str | None]: return self.little_endian, self.elfclass, self.e_type, self.e_machine, self.e_ident_osabi - def __setstate__(self, state): + def __setstate__(self, state: tuple[bool, int, str | None, str | None, str | None]) -> None: self.little_endian, self.elfclass, e_type, e_machine, e_osabi = state self.create_basic_structs() self.create_advanced_structs(e_type, e_machine, e_osabi) - def create_basic_structs(self): + def create_basic_structs(self) -> None: """ Create word-size related structs and ehdr struct needed for initial determining of ELF type. """ @@ -85,7 +95,7 @@ def create_basic_structs(self): self._create_leb128() self._create_ntbs() - def create_advanced_structs(self, e_type=None, e_machine=None, e_ident_osabi=None): + def create_advanced_structs(self, e_type: str | None = None, e_machine: str | None = None, e_ident_osabi: str | None = None) -> None: """ Create all ELF structs except the ehdr. They may possibly depend on provided e_type and/or e_machine parsed from ehdr. """ @@ -116,7 +126,7 @@ def create_advanced_structs(self, e_type=None, e_machine=None, e_ident_osabi=Non #-------------------------------- PRIVATE --------------------------------# - def _create_ehdr(self): + def _create_ehdr(self) -> None: self.Elf_Ehdr = Struct('Elf_Ehdr', Struct('e_ident', Array(4, self.Elf_byte('EI_MAG')), @@ -142,13 +152,13 @@ def _create_ehdr(self): self.Elf_half('e_shstrndx'), ) - def _create_leb128(self): + def _create_leb128(self) -> None: self.Elf_uleb128 = ULEB128 - def _create_ntbs(self): + def _create_ntbs(self) -> None: self.Elf_ntbs = CString - def _create_phdr(self): + def _create_phdr(self) -> None: p_type_dict = ENUM_P_TYPE_BASE if self.e_machine == 'EM_ARM': p_type_dict = ENUM_P_TYPE_ARM @@ -182,7 +192,7 @@ def _create_phdr(self): self.Elf_xword('p_align'), ) - def _create_shdr(self): + def _create_shdr(self) -> None: """Section header parsing. Depends on e_machine because of machine-specific values in sh_type. @@ -212,7 +222,7 @@ def _create_shdr(self): self.Elf_xword('sh_entsize'), ) - def _create_chdr(self): + def _create_chdr(self) -> None: # Structure of compressed sections header. It is documented in Oracle # "Linker and Libraries Guide", Part IV ELF Application Binary # Interface, Chapter 13 Object File Format, Section Compression: @@ -226,7 +236,7 @@ def _create_chdr(self): fields.insert(1, self.Elf_word('ch_reserved')) self.Elf_Chdr = Struct('Elf_Chdr', *fields) - def _create_rel(self): + def _create_rel(self) -> None: # r_info is also taken apart into r_info_sym and r_info_type. This is # done in Value to avoid endianity issues while parsing. if self.elfclass == 32: @@ -285,7 +295,7 @@ def _create_rel(self): # For us, this is the same as self.Elf_addr (or self.Elf_xword). self.Elf_Relr = Struct('Elf_Relr', self.Elf_addr('r_offset')) - def _create_dyn(self): + def _create_dyn(self) -> None: d_tag_dict = dict(ENUM_D_TAG_COMMON) if self.e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE: d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[self.e_machine]) @@ -298,7 +308,7 @@ def _create_dyn(self): Value('d_ptr', lambda ctx: ctx['d_val']), ) - def _create_sym(self): + def _create_sym(self) -> None: # st_info is hierarchical. To access the type, use # container['st_info']['type'] st_info_struct = BitStruct('st_info', @@ -331,13 +341,13 @@ def _create_sym(self): self.Elf_xword('st_size'), ) - def _create_sunw_syminfo(self): + def _create_sunw_syminfo(self) -> None: self.Elf_Sunw_Syminfo = Struct('Elf_Sunw_Syminfo', Enum(self.Elf_half('si_boundto'), **ENUM_SUNW_SYMINFO_BOUNDTO), self.Elf_half('si_flags'), ) - def _create_gnu_verneed(self): + def _create_gnu_verneed(self) -> None: # Structure of "version needed" entries is documented in # Oracle "Linker and Libraries Guide", Chapter 13 Object File Format self.Elf_Verneed = Struct('Elf_Verneed', @@ -355,7 +365,7 @@ def _create_gnu_verneed(self): self.Elf_word('vna_next'), ) - def _create_gnu_verdef(self): + def _create_gnu_verdef(self) -> None: # Structure of "version definition" entries are documented in # Oracle "Linker and Libraries Guide", Chapter 13 Object File Format self.Elf_Verdef = Struct('Elf_Verdef', @@ -372,14 +382,14 @@ def _create_gnu_verdef(self): self.Elf_word('vda_next'), ) - def _create_gnu_versym(self): + def _create_gnu_versym(self) -> None: # Structure of "version symbol" entries are documented in # Oracle "Linker and Libraries Guide", Chapter 13 Object File Format self.Elf_Versym = Struct('Elf_Versym', Enum(self.Elf_half('ndx'), **ENUM_VERSYM), ) - def _create_gnu_abi(self): + def _create_gnu_abi(self) -> None: # Structure of GNU ABI notes is documented in # https://code.woboq.org/userspace/glibc/csu/abi-note.S.html self.Elf_abi = Struct('Elf_abi', @@ -389,20 +399,20 @@ def _create_gnu_abi(self): self.Elf_word('abi_tiny'), ) - def _create_gnu_debugaltlink(self): + def _create_gnu_debugaltlink(self) -> None: self.Elf_debugaltlink = Struct('Elf_debugaltlink', CString("sup_filename"), String("sup_checksum", length=20)) - def _create_gnu_property(self): + def _create_gnu_property(self) -> None: # Structure of GNU property notes is documented in # https://github.com/hjl-tools/linux-abi/wiki/linux-abi-draft.pdf - def roundup_padding(ctx): + def roundup_padding(ctx: Container) -> int: if self.elfclass == 32: return roundup(ctx.pr_datasz, 2) - ctx.pr_datasz return roundup(ctx.pr_datasz, 3) - ctx.pr_datasz - def classify_pr_data(ctx): + def classify_pr_data(ctx: Container) -> tuple[str, int, int] | None: if type(ctx.pr_type) is not str: return None if ctx.pr_type.startswith('GNU_PROPERTY_X86_'): @@ -428,7 +438,7 @@ def classify_pr_data(ctx): Padding(roundup_padding) ) - def _create_note(self, e_type=None): + def _create_note(self, e_type: str | None = None) -> None: # Structure of "PT_NOTE" section self.Elf_ugid = self.Elf_half if self.elfclass == 32 and self.e_machine in { @@ -503,7 +513,7 @@ def _create_note(self, e_type=None): Array(lambda ctx: ctx.num_map_entries, CString('filename'))) - def _create_stabs(self): + def _create_stabs(self) -> None: # Structure of one stabs entry, see binutils/bfd/stabs.c # Names taken from https://sourceware.org/gdb/current/onlinedocs/stabs.html#Overview self.Elf_Stabs = Struct('Elf_Stabs', @@ -514,7 +524,7 @@ def _create_stabs(self): self.Elf_word('n_value'), ) - def _create_attributes_subsection(self): + def _create_attributes_subsection(self) -> None: # Structure of a build attributes subsection header. A subsection is # either public to all tools that process the ELF file or private to # the vendor's tools. @@ -524,21 +534,21 @@ def _create_attributes_subsection(self): encoding='utf-8') ) - def _create_arm_attributes(self): + def _create_arm_attributes(self) -> None: # Structure of an ARM build attribute tag. self.Elf_Arm_Attribute_Tag = Struct('Elf_Arm_Attribute_Tag', Enum(self.Elf_uleb128('tag'), **ENUM_ATTR_TAG_ARM) ) - def _create_riscv_attributes(self): + def _create_riscv_attributes(self) -> None: # Structure of a RISC-V build attribute tag. self.Elf_RiscV_Attribute_Tag = Struct('Elf_RiscV_Attribute_Tag', Enum(self.Elf_uleb128('tag'), **ENUM_ATTR_TAG_RISCV) ) - def _create_elf_hash(self): + def _create_elf_hash(self) -> None: # Structure of the old SYSV-style hash table header. It is documented # in the Oracle "Linker and Libraries Guide", Part IV ELF Application # Binary Interface, Chapter 14 Object File Format, Section Hash Table @@ -551,7 +561,7 @@ def _create_elf_hash(self): Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets')), Array(lambda ctx: ctx['nchains'], self.Elf_word('chains'))) - def _create_gnu_hash(self): + def _create_gnu_hash(self) -> None: # Structure of the GNU-style hash table header. Documentation for this # table is mostly in the GLIBC source code, a good explanation of the # format can be found in this blog post: @@ -564,7 +574,7 @@ def _create_gnu_hash(self): Array(lambda ctx: ctx['bloom_size'], self.Elf_xword('bloom')), Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets'))) - def _create_gnu_debuglink(self): + def _create_gnu_debuglink(self) -> None: self.Gnu_debuglink = Struct('Gnu_debuglink', CString("filename"), Padding(lambda ctx: 3 - len(ctx.filename) % 4, strict=True), diff --git a/scripts/dwarfdump.py b/scripts/dwarfdump.py index 0e5484fd..9038524c 100644 --- a/scripts/dwarfdump.py +++ b/scripts/dwarfdump.py @@ -13,10 +13,13 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + import argparse import os import sys import traceback +from typing import IO, TYPE_CHECKING, overload # For running from development directory. It should take precedence over the # installed pyelftools. @@ -34,10 +37,15 @@ from elftools.dwarf.datatype_cpp import describe_cpp_datatype from elftools.dwarf.descriptions import describe_reg_name +if TYPE_CHECKING: + from elftools.dwarf.compileunit import CompileUnit + from elftools.dwarf.die import DIE + + # ------------------------------ # ------------------------------ -def _get_cu_base(cu): +def _get_cu_base(cu: CompileUnit): top_die = cu.get_top_DIE() attr = top_die.attributes if 'DW_AT_low_pc' in attr: @@ -47,10 +55,10 @@ def _get_cu_base(cu): else: raise ValueError("Can't find the base IP (low_pc) for a CU") -def _addr_str_length(die): +def _addr_str_length(die: DIE) -> int: return die.cu.header.address_size*2 -def _DIE_name(die): +def _DIE_name(die: DIE) -> str: if 'DW_AT_name' in die.attributes: return bytes2str(die.attributes['DW_AT_name'].value) elif 'DW_AT_linkage_name' in die.attributes: @@ -58,7 +66,7 @@ def _DIE_name(die): else: raise DWARFError() -def _DIE_linkage_name(die): +def _DIE_linkage_name(die: DIE) -> str: if 'DW_AT_linkage_name' in die.attributes: return bytes2str(die.attributes['DW_AT_linkage_name'].value) elif 'DW_AT_name' in die.attributes: @@ -66,7 +74,11 @@ def _DIE_linkage_name(die): else: raise DWARFError() -def _safe_DIE_name(die, default=None): +@overload +def _safe_DIE_name(die: DIE, default: str) -> str: ... +@overload +def _safe_DIE_name(die: DIE) -> str | None: ... +def _safe_DIE_name(die: DIE, default: str | None = None) -> str | None: if 'DW_AT_name' in die.attributes: return bytes2str(die.attributes['DW_AT_name'].value) elif 'DW_AT_linkage_name' in die.attributes: @@ -74,7 +86,11 @@ def _safe_DIE_name(die, default=None): else: return default -def _safe_DIE_linkage_name(die, default=None): +@overload +def _safe_DIE_linkage_name(die: DIE, default: str) -> str: ... +@overload +def _safe_DIE_linkage_name(die: DIE) -> str | None: ... +def _safe_DIE_linkage_name(die: DIE, default: str | None = None) -> str | None: if 'DW_AT_linkage_name' in die.attributes: return bytes2str(die.attributes['DW_AT_linkage_name'].value) elif 'DW_AT_name' in die.attributes: @@ -82,7 +98,7 @@ def _safe_DIE_linkage_name(die, default=None): else: return default -def _desc_ref(attr, die, extra=''): +def _desc_ref(attr, die: DIE, extra: str = '') -> str: if extra: extra = " \"%s\"" % extra if attr.form == 'DW_FORM_ref_addr': @@ -96,13 +112,13 @@ def _desc_ref(attr, die, extra=''): die.cu.cu_offset + attr.raw_value, extra) -def _desc_data(attr, die): +def _desc_data(attr, die: DIE) -> str: """ Hex with length driven by form """ len = int(attr.form[12:]) * 2 return "0x%0*x" % (len, attr.value,) -def _desc_strx(attr, die): +def _desc_strx(attr, die: DIE) -> str: return "indexed (%08x) string = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\")) FORM_DESCRIPTIONS = dict( @@ -131,16 +147,16 @@ def _desc_strx(attr, die): DW_FORM_exprloc=lambda attr, die: _desc_expression(attr.value, die) ) -def _desc_enum(attr, enum): +def _desc_enum(attr, enum: dict[str, int]) -> str: """For attributes like DW_AT_language, physically int, logically an enum """ return next((k for (k, v) in enum.items() if v == attr.value), str(attr.value)) -def _cu_comp_dir(cu): +def _cu_comp_dir(cu: CompileUnit) -> str: return bytes2str(cu.get_top_DIE().attributes['DW_AT_comp_dir'].value) -def _desc_decl_file(attr, die): +def _desc_decl_file(attr, die: DIE) -> str: # Filename/dirname arrays are 0 based in DWARFv5 cu = die.cu if not hasattr(cu, "_lineprogram"): @@ -163,7 +179,7 @@ def _desc_decl_file(attr, die): return "\"%s\"" % (os.path.join(dir, file_name),) -def _desc_ranges(attr, die): +def _desc_ranges(attr, die: DIE) -> str: di = die.cu.dwarfinfo if not hasattr(di, '_rnglists'): di._rangelists = di.range_lists() @@ -185,7 +201,7 @@ def _desc_ranges(attr, die): prefix = "indexed (0x%x) rangelist = " % attr.raw_value if attr.form == 'DW_FORM_rnglistx' else '' return ("%s0x%08x\n" % (prefix, attr.value)) + "\n".join(lines) -def _desc_locations(attr, die): +def _desc_locations(attr, die: DIE) -> str: cu = die.cu di = cu.dwarfinfo if not hasattr(di, '_loclists'): @@ -215,7 +231,7 @@ def _desc_locations(attr, die): return ("%s0x%08x:\n" % (prefix, attr.value)) + "\n".join(lines) # By default, numeric arguments are spelled in hex with a leading 0x -def _desc_operationarg(s, cu): +def _desc_operationarg(s: str | int | list, cu: CompileUnit) -> str: if isinstance(s, str): return s elif isinstance(s, int): @@ -226,14 +242,14 @@ def _desc_operationarg(s, cu): else: return " ".join((hex(len(s)), *("0x%02x" % b for b in s))) -def _arch(cu): +def _arch(cu: CompileUnit) -> str: return cu.dwarfinfo.config.machine_arch -def _desc_reg(reg_no, cu): +def _desc_reg(reg_no: int, cu: CompileUnit) -> str: reg_name = describe_reg_name(reg_no, _arch(cu), False) return reg_name.upper() if reg_name else "" -def _desc_operation(op, op_name, args, cu): +def _desc_operation(op, op_name: str, args, cu: CompileUnit) -> str: # Not sure about regx(regno) and bregx(regno, offset) if 0x50 <= op <= 0x6f: # reg0...reg31 - decode reg name return op_name + " " + _desc_reg(op - 0x50, cu) @@ -276,7 +292,7 @@ def _desc_operation(op, op_name, args, cu): 'DW_OP_GNU_convert', 'DW_OP_GNU_regval_type') -def _desc_expression(expr, die): +def _desc_expression(expr, die: DIE) -> str: cu = die.cu if not hasattr(cu, '_exprparser'): cu._exprparser = DWARFExprParser(cu.structs) @@ -292,12 +308,12 @@ def _desc_expression(expr, die): lines.append(" " + " ".join("%02x" % b for b in expr[start_of_unparsed:])) return ", ".join(lines) -def _desc_datatype(attr, die): +def _desc_datatype(attr, die: DIE) -> str: """Oy vey """ return _desc_ref(attr, die, describe_cpp_datatype(die)) -def _get_origin_name(die): +def _get_origin_name(die: DIE) -> str: func_die = die.get_DIE_from_attribute('DW_AT_abstract_origin') name = _safe_DIE_linkage_name(func_die, '') if not name: @@ -307,14 +323,14 @@ def _get_origin_name(die): return _get_origin_name(func_die) return name -def _desc_origin(attr, die): +def _desc_origin(attr, die: DIE) -> str: return _desc_ref(attr, die, _get_origin_name(die)) -def _desc_spec(attr, die): +def _desc_spec(attr, die: DIE) -> str: return _desc_ref(attr, die, _DIE_linkage_name(die.get_DIE_from_attribute('DW_AT_specification'))) -def _desc_value(attr, die): +def _desc_value(attr, die: DIE) -> str: return str(attr.value) ATTR_DESCRIPTIONS = dict( @@ -342,7 +358,7 @@ class ReadElf: """ dump_xxx is used to dump the respective section. Mimics the output of dwarfdump with --verbose """ - def __init__(self, filename, file, output): + def __init__(self, filename: str, file: IO[bytes], output: IO[str]) -> None: """ file: stream object with the ELF file to read @@ -357,17 +373,17 @@ def __init__(self, filename, file, output): bits = self.elffile.elfclass self._emitline("%s: file format elf%d-%s" % (filename, bits, arch)) - def _emit(self, s=''): + def _emit(self, s: str = '') -> None: """ Emit an object to output """ self.output.write(str(s)) - def _emitline(self, s=''): + def _emitline(self, s: str = '') -> None: """ Emit an object to output, followed by a newline """ self.output.write(str(s).rstrip() + '\n') - def dump_info(self): + def dump_info(self) -> None: # TODO: DWARF64 will cause discrepancies in hex offset sizes self._emitline(".debug_info contents:") for cu in self._dwarfinfo.iter_CUs(): @@ -408,7 +424,7 @@ def dump_info(self): parent = die.get_parent() self._emitline() - def describe_attr_value(self, die, attr): + def describe_attr_value(self, die: DIE, attr) -> str: """This describes the attribute value in the way that's compatible with llvm_dwarfdump. Somewhat duplicates the work of describe_attr_value() in descriptions """ @@ -419,16 +435,16 @@ def describe_attr_value(self, die, attr): else: return str(attr.value) - def dump_loc(self): + def dump_loc(self) -> None: pass - def dump_loclists(self): + def dump_loclists(self) -> None: pass - def dump_ranges(self): + def dump_ranges(self) -> None: pass - def dump_v4_rangelist(self, rangelist, cu_map): + def dump_v4_rangelist(self, rangelist, cu_map) -> None: cu = cu_map[rangelist[0].entry_offset] addr_str_len = cu.header.address_size*2 base_ip = _get_cu_base(cu) @@ -444,7 +460,7 @@ def dump_v4_rangelist(self, rangelist, cu_map): else: raise NotImplementedError("Unknown object in a range list") - def dump_rnglists(self): + def dump_rnglists(self) -> None: self._emitline(".debug_rnglists contents:") ranges_sec = self._dwarfinfo.range_lists() if ranges_sec.version < 5: @@ -511,7 +527,7 @@ def dump_v5_rangelist(self, rangelist, cu_map, max_type_len): SCRIPT_DESCRIPTION = 'Display information about the contents of ELF format files' VERSION_STRING = '%%(prog)s: based on pyelftools %s' % __version__ -def main(stream=None): +def main(stream: IO[str] | None = None) -> None: # parse the command-line arguments and invoke ReadElf argparser = argparse.ArgumentParser( usage='usage: %(prog)s [options] ', diff --git a/scripts/readelf.py b/scripts/readelf.py index 1f15621a..1d4c33cc 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -7,12 +7,15 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from __future__ import annotations + import argparse import os import sys import re import traceback import itertools +from typing import IO, TYPE_CHECKING # For running from development directory. It should take precedence over the # installed pyelftools. @@ -60,7 +63,12 @@ from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry from elftools.dwarf.enums import ENUM_DW_UT -def _get_cu_base(cu): +if TYPE_CHECKING: + from elftools.elf.sections import Section + from elftools.construct.lib.container import Container + from elftools.dwarf.compileunit import CompileUnit + +def _get_cu_base(cu: CompileUnit): top_die = cu.get_top_DIE() attr = top_die.attributes if 'DW_AT_low_pc' in attr: @@ -92,13 +100,13 @@ def _get_cu_base(cu): # formatting symbol names for display. _CONTROL_CHAR_RE = re.compile(r'[\x01-\x1f]') -def _format_symbol_name(s): +def _format_symbol_name(s: str) -> str: return _CONTROL_CHAR_RE.sub(lambda match: '^' + chr(0x40 + ord(match[0])), s) class ReadElf: """ display_* methods are used to emit output into the output stream """ - def __init__(self, file, output): + def __init__(self, file: IO[bytes], output: IO[str]) -> None: """ file: stream object with the ELF file to read @@ -115,7 +123,7 @@ def __init__(self, file, output): self._shndx_sections = None - def display_file_header(self): + def display_file_header(self) -> None: """ Display the ELF file header """ self._emitline('ELF Header:') @@ -173,7 +181,7 @@ def display_file_header(self): else: self._emitline('') - def decode_flags(self, flags): + def decode_flags(self, flags: int) -> str: description = "" if self.elffile['e_machine'] == "EM_ARM": eabi = flags & E_FLAGS.EF_ARM_EABIMASK @@ -273,7 +281,7 @@ def decode_flags(self, flags): return description - def display_program_headers(self, show_heading=True): + def display_program_headers(self, show_heading: bool = True) -> None: """ Display the ELF program headers. If show_heading is True, displays the heading for this information (Elf file type is...) @@ -362,7 +370,7 @@ def display_program_headers(self, show_heading=True): self._emitline('') - def display_section_headers(self, show_heading=True): + def display_section_headers(self, show_heading: bool = True) -> None: """ Display the ELF section headers """ elfheader = self.elffile.header @@ -425,7 +433,7 @@ def display_section_headers(self, show_heading=True): self._emit('y (purecode), ') self._emitline('p (processor specific)') - def display_symbol_tables(self): + def display_symbol_tables(self) -> None: """ Display the symbol tables contained in the file """ self._init_versioninfo() @@ -499,7 +507,7 @@ def display_symbol_tables(self): _format_symbol_name(symbol_name), version_info)) - def display_dynamic_tags(self): + def display_dynamic_tags(self) -> None: """ Display the dynamic tags contained in the file """ has_dynamic_sections = False @@ -556,7 +564,7 @@ def display_dynamic_tags(self): if not has_dynamic_sections: self._emitline("\nThere is no dynamic section in this file.") - def display_notes(self): + def display_notes(self) -> None: """ Display the notes contained in the file """ for section in self.elffile.iter_sections(): @@ -570,7 +578,7 @@ def display_notes(self): self._format_hex(note['n_descsz'], fieldsize=8), describe_note(note, self.elffile.header.e_machine))) - def display_relocations(self): + def display_relocations(self) -> None: """ Display the relocations contained in the file """ has_relocation_sections = False @@ -656,7 +664,7 @@ def display_relocations(self): if not has_relocation_sections: self._emitline('\nThere are no relocations in this file.') - def display_arm_unwind(self): + def display_arm_unwind(self) -> None: if not self.elffile.has_ehabi_info(): self._emitline('There are no .ARM.idx sections in this file.') return @@ -691,7 +699,7 @@ def display_arm_unwind(self): self._emit(' ') self._emitline(mnemonic_item) - def display_version_info(self): + def display_version_info(self) -> None: """ Display the version info contained in the file """ self._init_versioninfo() @@ -794,7 +802,7 @@ def display_version_info(self): offset += verneed['vn_next'] - def display_arch_specific(self): + def display_arch_specific(self) -> None: """ Display the architecture-specific info contained in the file. """ if self.elffile['e_machine'] == 'EM_ARM': @@ -802,7 +810,7 @@ def display_arch_specific(self): elif self.elffile['e_machine'] == 'EM_RISCV': self._display_arch_specific_riscv() - def display_hex_dump(self, section_spec): + def display_hex_dump(self, section_spec) -> None: """ Display a hex dump of a section. section_spec is either a section number or a name. """ @@ -850,7 +858,7 @@ def display_hex_dump(self, section_spec): self._emitline() - def display_string_dump(self, section_spec): + def display_string_dump(self, section_spec) -> None: """ Display a strings dump of a section. section_spec is either a section number or a name. """ @@ -895,7 +903,7 @@ def display_string_dump(self, section_spec): else: self._emitline() - def display_debug_dump(self, dump_what): + def display_debug_dump(self, dump_what: str) -> None: """ Dump a DWARF section """ self._init_dwarfinfo() @@ -924,8 +932,14 @@ def display_debug_dump(self, dump_what): else: self._emitline('debug dump not yet supported for "%s"' % dump_what) - def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True, - alternate=False): + def _format_hex( + self, + addr: int, + fieldsize: int | None = None, + fullhex: bool = False, + lead0x: bool = True, + alternate: bool = False, + ) -> str: """ Format an address into a hexadecimal string. fieldsize: @@ -964,8 +978,8 @@ def _format_hex(self, addr, fieldsize=None, fullhex=False, lead0x=True, field = '%' + '0%sx' % fieldsize return s + field % addr - def _print_version_section_header(self, version_section, name, lead0x=True, - indent=1): + def _print_version_section_header(self, version_section, name: str, lead0x: bool = True, + indent=1) -> None: """ Print a section header of one version related section (versym, verneed or verdef) with some options to accomodate readelf little differences between each header (e.g. indentation @@ -990,7 +1004,7 @@ def _print_version_section_header(self, version_section, name, lead0x=True, ) ) - def _init_versioninfo(self): + def _init_versioninfo(self) -> None: """ Search and initialize informations about version related sections and the kind of versioning used (GNU or Solaris). """ @@ -1017,7 +1031,7 @@ def _init_versioninfo(self): self._versioninfo['verneed'] or self._versioninfo['verdef']): self._versioninfo['type'] = 'Solaris' - def _symbol_version(self, nsym): + def _symbol_version(self, nsym) -> dict | None: """ Return a dict containing information on the or None if no version information is available """ @@ -1055,7 +1069,7 @@ def _symbol_version(self, nsym): symbol_version['index'] = index return symbol_version - def _section_from_spec(self, spec): + def _section_from_spec(self, spec) -> Section | None: """ Retrieve a section given a "spec" (either number or name). Return None if no such section exists in the file. """ @@ -1085,7 +1099,7 @@ def _get_symbol_shndx(self, symbol, symbol_index, symtab_index): if isinstance(sec, SymbolTableIndexSection)} return self._shndx_sections[symtab_index].get_section_index(symbol_index) - def _note_relocs_for_section(self, section): + def _note_relocs_for_section(self, section: Section) -> None: """ If there are relocation sections pointing to the givne section, emit a note about it. """ @@ -1096,7 +1110,7 @@ def _note_relocs_for_section(self, section): self._emitline(' Note: This section has relocations against it, but these have NOT been applied to this dump.') return - def _init_dwarfinfo(self): + def _init_dwarfinfo(self) -> None: """ Initialize the DWARF info contained in the file and assign it to self._dwarfinfo. Leave self._dwarfinfo at None if no DWARF info was found in the file @@ -1109,7 +1123,7 @@ def _init_dwarfinfo(self): else: self._dwarfinfo = None - def _dump_debug_info(self): + def _dump_debug_info(self) -> None: """ Dump the debugging info section. """ if not self._dwarfinfo.has_debug_info: @@ -1188,7 +1202,7 @@ def _dump_debug_info(self): self._emitline() - def _dump_debug_types(self): + def _dump_debug_types(self) -> None: """Dump the debug types section """ if not self._dwarfinfo.has_debug_info: @@ -1240,7 +1254,7 @@ def _dump_debug_types(self): self._emitline() - def _dump_debug_line_programs(self): + def _dump_debug_line_programs(self) -> None: """ Dump the (decoded) line programs from .debug_line The programs are dumped in the order of the CUs they belong to. """ @@ -1308,7 +1322,7 @@ def _dump_debug_line_programs(self): # Another readelf oddity... self._emitline() - def _dump_frames_info(self, section, cfi_entries): + def _dump_frames_info(self, section, cfi_entries) -> None: """ Dump the raw call frame info in a section. `section` is the Section instance that contains the call frame info @@ -1364,7 +1378,7 @@ def _dump_frames_info(self, section, cfi_entries): self._emit(describe_CFI_instructions(entry)) self._emitline() - def _dump_debug_frames(self): + def _dump_debug_frames(self) -> None: """ Dump the raw frame info from .debug_frame and .eh_frame sections. """ if self._dwarfinfo.has_EH_CFI(): @@ -1378,7 +1392,7 @@ def _dump_debug_frames(self): self._dwarfinfo.debug_frame_sec, self._dwarfinfo.CFI_entries()) - def _dump_debug_namelut(self, what): + def _dump_debug_namelut(self, what: str) -> None: """ Dump the debug pubnames section. """ @@ -1412,7 +1426,7 @@ def _dump_debug_namelut(self, what): self._emitline(' %x %s' % (item[1].die_ofs - cu_ofs, item[0])) self._emitline() - def _dump_debug_aranges(self): + def _dump_debug_aranges(self) -> None: """ Dump the aranges table """ aranges_table = self._dwarfinfo.get_aranges() @@ -1454,7 +1468,7 @@ def _dump_debug_aranges(self): self._format_hex(0, fullhex=True, lead0x=False), self._format_hex(0, fullhex=True, lead0x=False))) - def _dump_frames_interp_info(self, section, cfi_entries): + def _dump_frames_interp_info(self, section, cfi_entries) -> None: """ Dump interpreted (decoded) frame information in a section. `section` is the Section instance that contains the call frame info @@ -1545,7 +1559,7 @@ def _dump_frames_interp_info(self, section, cfi_entries): self._emitline() self._emitline() - def _dump_debug_frames_interp(self): + def _dump_debug_frames_interp(self) -> None: """ Dump the interpreted (decoded) frame information from .debug_frame and .eh_frame sections. """ @@ -1560,7 +1574,7 @@ def _dump_debug_frames_interp(self): self._dwarfinfo.debug_frame_sec, self._dwarfinfo.CFI_entries()) - def _dump_debug_locations(self): + def _dump_debug_locations(self) -> None: """ Dump the location lists from .debug_loc/.debug_loclists section """ di = self._dwarfinfo @@ -1574,7 +1588,7 @@ def _dump_debug_locations(self): else: self._dump_debug_locsection(di, loc_lists_sec) - def _dump_debug_locsection(self, di, loc_lists_sec): + def _dump_debug_locsection(self, di, loc_lists_sec) -> None: """ Dump the location lists from .debug_loc/.debug_loclists section """ ver5 = loc_lists_sec.version >= 5 @@ -1629,7 +1643,7 @@ def _dump_debug_locsection(self, di, loc_lists_sec): self._emitline(' Offset Begin End Expression') self._dump_loclist(loc_list, line_template, cu_map) - def _dump_loclist(self, loc_list, line_template, cu_map): + def _dump_loclist(self, loc_list, line_template, cu_map) -> None: in_views = False has_views = False base_ip = None @@ -1691,7 +1705,7 @@ def _dump_loclist(self, loc_list, line_template, cu_map): last = loc_list[-1] self._emitline(" %08x " % (last.entry_offset + last.entry_length)) - def _dump_debug_loclists_CU_header(self, cu): + def _dump_debug_loclists_CU_header(self, cu: Container) -> None: # Header slightly different from that of v5 rangelist in-section CU header dump self._emitline('Table at Offset %s' % self._format_hex(cu.cu_offset, alternate=True)) self._emitline(' Length: %s' % self._format_hex(cu.unit_length, alternate=True)) @@ -1704,7 +1718,7 @@ def _dump_debug_loclists_CU_header(self, cu): for i_offset in enumerate(cu.offsets): self._emitline(' [%6d] 0x%x' % i_offset) - def _dump_debug_ranges(self): + def _dump_debug_ranges(self) -> None: # TODO: GNU readelf format doesn't need entry_length? di = self._dwarfinfo range_lists_sec = di.range_lists() @@ -1717,7 +1731,7 @@ def _dump_debug_ranges(self): else: self._dump_debug_rangesection(di, range_lists_sec) - def _dump_debug_rnglists_CU_header(self, cu): + def _dump_debug_rnglists_CU_header(self, cu: CompileUnit) -> None: self._emitline(' Table at Offset: %s:' % self._format_hex(cu.cu_offset, alternate=True)) self._emitline(' Length: %s' % self._format_hex(cu.unit_length, alternate=True)) self._emitline(' DWARF version: %d' % cu.version) @@ -1729,7 +1743,7 @@ def _dump_debug_rnglists_CU_header(self, cu): for i_offset in enumerate(cu.offsets): self._emitline(' [%6d] 0x%x' % i_offset) - def _dump_debug_rangesection(self, di, range_lists_sec): + def _dump_debug_rangesection(self, di, range_lists_sec) -> None: # Last amended to match readelf 2.41 ver5 = range_lists_sec.version >= 5 section_name = (di.debug_rnglists_sec if ver5 else di.debug_ranges_sec).name @@ -1810,7 +1824,7 @@ def _dump_rangelist(self, range_list, cu_map, ver5, line_template, base_template last = range_list[-1] self._emitline(' %08x ' % (last.entry_offset + last.entry_length if ver5 else first.entry_offset)) - def _display_attributes(self, attr_sec, descriptor): + def _display_attributes(self, attr_sec, descriptor) -> None: """ Display the attributes contained in the section. """ for s in attr_sec.iter_subsections(): @@ -1823,26 +1837,26 @@ def _display_attributes(self, attr_sec, descriptor): self._emit(' ') self._emitline(descriptor(attr.tag, attr.value, attr.extra)) - def _display_arch_specific_arm(self): + def _display_arch_specific_arm(self) -> None: """ Display the ARM architecture-specific info contained in the file. """ attr_sec = self.elffile.get_section_by_name('.ARM.attributes') if attr_sec: self._display_attributes(attr_sec, describe_attr_tag_arm) - def _display_arch_specific_riscv(self): + def _display_arch_specific_riscv(self) -> None: """ Display the RISC-V architecture-specific info contained in the file. """ attr_sec = self.elffile.get_section_by_name('.riscv.attributes') if attr_sec: self._display_attributes(attr_sec, describe_attr_tag_riscv) - def _emit(self, s=''): + def _emit(self, s: str = '') -> None: """ Emit an object to output """ self.output.write(str(s)) - def _emitline(self, s=''): + def _emitline(self, s: str = '') -> None: """ Emit an object to output, followed by a newline """ self.output.write(str(s).rstrip() + '\n') @@ -1852,7 +1866,7 @@ def _emitline(self, s=''): VERSION_STRING = '%%(prog)s: based on pyelftools %s' % __version__ -def main(stream=None): +def main(stream: IO[str] | None = None) -> None: # parse the command-line arguments and invoke ReadElf argparser = argparse.ArgumentParser( usage='usage: %(prog)s [options] ', @@ -1977,7 +1991,7 @@ def main(stream=None): sys.exit(1) -def profile_main(): +def profile_main() -> None: # Run 'main' redirecting its output to readelfout.txt # Saves profiling information in readelf.profile PROFFILE = 'readelf.profile' diff --git a/test/test_debuglink.py b/test/test_debuglink.py index 932ea253..797ffd8f 100644 --- a/test/test_debuglink.py +++ b/test/test_debuglink.py @@ -4,6 +4,9 @@ # Gabriele Digregorio - Io_no # This code is in the public domain #------------------------------------------------------------------------------ +from __future__ import annotations + +from typing import IO from elftools.elf.elffile import ELFFile import unittest @@ -16,7 +19,7 @@ class TestDebuglink(unittest.TestCase): We verify that the subprograms are correctly retrieved from the debug file. """ - def stream_loader(self, external_filename: str) -> 'IO[bytes]': + def stream_loader(self, external_filename: str) -> IO[bytes]: """ This function takes an external filename to load a supplementary object file, and returns a stream suitable for creating a new ELFFile. @@ -30,7 +33,7 @@ def stream_loader(self, external_filename: str) -> 'IO[bytes]': stream = open(b'test/testfiles_for_unittests/' + external_filename, 'rb') return stream - def subprograms_from_debuglink(self, elf: ELFFile) -> dict[str, (int, int)]: + def subprograms_from_debuglink(self, elf: ELFFile) -> dict[str, tuple[int, int]]: """Returns a dictionary containing the subprograms of the specified ELF file from the linked debug file. Args: From 5dedf0f59cf6a953bafabfdf5bbbd6a2ff391787 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Tue, 18 Feb 2025 10:19:13 +0100 Subject: [PATCH 06/38] typing: Hint static attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several classes inheriting from `Construct`, which only has a generic `__getattr__(self, name: str) -> Any`. To improve typing explicitly name several attributes with their type. For `name` we know that it (almost) never will be `None` – an unnamed entity does not make much sense. Overwrite the type-hint `str | None` inherited from `Construct` with just `str` which saves us from a ton if `name is not None` checks. Signed-off-by: Philipp Hahn --- elftools/common/construct_utils.py | 6 ++++++ elftools/dwarf/structs.py | 6 ++++++ elftools/elf/structs.py | 17 +++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/elftools/common/construct_utils.py b/elftools/common/construct_utils.py index edd337f1..1a5f30a4 100644 --- a/elftools/common/construct_utils.py +++ b/elftools/common/construct_utils.py @@ -58,6 +58,8 @@ def _sizeof(self, context: Container) -> int: class ULEB128(Construct): """A construct based parser for ULEB128 encoding. """ + if TYPE_CHECKING: + name: str # instead of `str|None` from Construct to save us from `is None` checks everywhere def _parse(self, stream: IO[bytes], context: Container) -> int: value = 0 shift = 0 @@ -74,6 +76,8 @@ def _parse(self, stream: IO[bytes], context: Container) -> int: class SLEB128(Construct): """A construct based parser for SLEB128 encoding. """ + if TYPE_CHECKING: + name: str # instead of `str|None` from Construct to save us from `is None` checks everywhere def _parse(self, stream: IO[bytes], context: Container) -> int: value = 0 shift = 0 @@ -98,6 +102,8 @@ class StreamOffset(Construct): StreamOffset("item_offset") """ __slots__: list[str] = [] + if TYPE_CHECKING: + name: str # instead of `str|None` from Construct to save us from `is None` checks everywhere def __init__(self, name: str) -> None: Construct.__init__(self, name) self._set_flag(self.FLAG_DYNAMIC) diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index 953d7773..0ad24579 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -88,6 +88,12 @@ class DWARFStructs: See also the documentation of public methods. """ + if TYPE_CHECKING: + little_endian: bool + dwarf_format: int + address_size: int + dwarf_version: int + # Cache for structs instances based on creation parameters. Structs # initialization is expensive and we don't won't to repeat it # unnecessarily. diff --git a/elftools/elf/structs.py b/elftools/elf/structs.py index fd19bde7..7773893d 100644 --- a/elftools/elf/structs.py +++ b/elftools/elf/structs.py @@ -51,6 +51,23 @@ class ELFStructs: Elf_Rel, Elf_Rela: Entries in relocation sections """ + if TYPE_CHECKING: + Elf_byte: Callable[[str], FormatField[int]] + Elf_half: Callable[[str], FormatField[int]] + Elf_word: Callable[[str], FormatField[int]] + Elf_word64: Callable[[str], FormatField[int]] + Elf_addr: Callable[[str], FormatField[int]] + Elf_offset: Callable[[str], FormatField[int]] + Elf_sword: Callable[[str], FormatField[int]] + Elf_sxword: Callable[[str], FormatField[int]] + Elf_xsword: Callable[[str], FormatField[int]] + Elf_Ehdr: Struct + Elf_Phdr: Struct + Elf_Shdr: Struct + Elf_Sym: Struct + Elf_Rel: Struct + Elf_Rela: Struct + def __init__(self, little_endian: bool = True, elfclass: int = 32) -> None: assert elfclass == 32 or elfclass == 64 self.little_endian = little_endian From 82da3aaa33a6404d887de87efeb14b4fe1e5763e Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 08:22:05 +0100 Subject: [PATCH 07/38] typing: assert isinstance Several variable may have a `Union` type. Explicitly assert that those variables have the expected type. Signed-off-by: Philipp Hahn --- elftools/dwarf/callframe.py | 3 +++ elftools/dwarf/descriptions.py | 8 +++++++- elftools/elf/descriptions.py | 16 ++++++++-------- elftools/elf/dynamic.py | 3 +++ elftools/elf/relocation.py | 1 + 5 files changed, 22 insertions(+), 9 deletions(-) diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index 91c72705..d54e55ed 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -145,6 +145,7 @@ def _parse_entry_at(self, offset: int) -> CFIEntry | ZERO: header, entry_structs) else: cie = self._parse_cie_for_fde(offset, header, entry_structs) + assert isinstance(cie, CFIEntry) aug_bytes = self._read_augmentation_data(entry_structs) lsda_encoding: int = cie.augmentation_dict.get('LSDA_encoding', DW_EH_encoding_flags['DW_EH_PE_omit']) if lsda_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']: @@ -174,6 +175,7 @@ def _parse_entry_at(self, offset: int) -> CFIEntry | ZERO: else: # FDE cie = self._parse_cie_for_fde(offset, header, entry_structs) + assert isinstance(cie, CIE) entry = FDE( header=header, instructions=instructions, offset=offset, structs=entry_structs, cie=cie, @@ -390,6 +392,7 @@ def _parse_fde_header(self, entry_structs: DWARFStructs, offset: int) -> Contain minimal_header = struct_parse(Struct('eh_frame_minimal_header', *fields), self.stream, offset) cie = self._parse_cie_for_fde(offset, minimal_header, entry_structs) + assert isinstance(cie, CFIEntry) initial_location_offset = self.stream.tell() # Try to parse the initial location. We need the initial location in diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index 9d0cdc6f..be9481d6 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -130,9 +130,13 @@ def _full_reg_name(regnum: int) -> str: def describe_CFI_register_rule(rule: RegisterRule) -> str: s = _DESCR_CFI_REGISTER_RULE_TYPE[rule.type] if rule.type in ('OFFSET', 'VAL_OFFSET'): + assert isinstance(rule.arg, int) s += '%+d' % rule.arg elif rule.type == 'REGISTER': - s += describe_reg_name(rule.arg) + assert isinstance(rule.arg, int) + reg = describe_reg_name(rule.arg) + assert reg is not None + s += reg return s @@ -140,6 +144,8 @@ def describe_CFI_CFA_rule(rule: CFARule) -> str: if rule.expr: return 'exp' else: + assert isinstance(rule.reg, int) + assert isinstance(rule.offset, int) return '%s%+d' % (describe_reg_name(rule.reg), rule.offset) diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py index feb00025..fd9ca95c 100644 --- a/elftools/elf/descriptions.py +++ b/elftools/elf/descriptions.py @@ -57,6 +57,7 @@ def describe_e_type(x: str, elffile: ELFFile | None = None) -> str: # Detect whether this is a normal SO or a PIE executable dynamic = elffile.get_section_by_name('.dynamic') if dynamic: + assert isinstance(dynamic, DynamicSection) for t in dynamic.iter_tags('DT_FLAGS_1'): if t.entry.d_val & ENUM_DT_FLAGS_1['DF_1_PIE']: return 'DYN (Position-Independent Executable file)' @@ -72,9 +73,9 @@ def describe_e_version_numeric(x: str) -> str: def describe_p_type(x: int | str) -> str: - if x in _DESCR_P_TYPE: + if isinstance(x, str) and x in _DESCR_P_TYPE: return _DESCR_P_TYPE.get(x) - elif x >= ENUM_P_TYPE_BASE['PT_LOOS'] and x <= ENUM_P_TYPE_BASE['PT_HIOS']: + elif isinstance(x, int) and ENUM_P_TYPE_BASE['PT_LOOS'] <= x <= ENUM_P_TYPE_BASE['PT_HIOS']: return 'LOOS+%lx' % (x - ENUM_P_TYPE_BASE['PT_LOOS']) else: return _unknown @@ -104,10 +105,9 @@ def describe_rh_flags(x: int) -> str: def describe_sh_type(x: int | str) -> str: - if x in _DESCR_SH_TYPE: + if isinstance(x, str) and x in _DESCR_SH_TYPE: return _DESCR_SH_TYPE.get(x) - elif (x >= ENUM_SH_TYPE_BASE['SHT_LOOS'] and - x < ENUM_SH_TYPE_BASE['SHT_GNU_versym']): + elif isinstance(x, int) and ENUM_SH_TYPE_BASE['SHT_LOOS'] <= x < ENUM_SH_TYPE_BASE['SHT_GNU_versym']: return 'loos+0x%lx' % (x - ENUM_SH_TYPE_BASE['SHT_LOOS']) else: return _unknown @@ -302,7 +302,7 @@ def describe_note_gnu_properties(properties: list[Container], machine: str) -> s for prop in properties: t, d, sz = prop.pr_type, prop.pr_data, prop.pr_datasz if t == 'GNU_PROPERTY_STACK_SIZE': - if type(d) is int: + if isinstance(d, int): prop_desc = 'stack size: 0x%x' % d else: prop_desc = 'stack size: ' % sz @@ -342,9 +342,9 @@ def describe_note_gnu_properties(properties: list[Container], machine: str) -> s prop_desc = ' ' % sz else: prop_desc = describe_note_gnu_property_bitmap_and(_DESCR_NOTE_GNU_PROPERTY_RISCV_FEATURE_1_AND, 'RISC-V AND feature', d) - elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC: + elif isinstance(t, int) and _DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC: prop_desc = '' % (t, bytes2hex(d, sep=' ')) - elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER: + elif isinstance(t, int) and _DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER: prop_desc = '' % (t, bytes2hex(d, sep=' ')) else: prop_desc = '' % (t, bytes2hex(d, sep=' ')) diff --git a/elftools/elf/dynamic.py b/elftools/elf/dynamic.py index 5cd49f51..8cda8783 100644 --- a/elftools/elf/dynamic.py +++ b/elftools/elf/dynamic.py @@ -141,6 +141,7 @@ def _get_stringtable(self) -> _StringTable: support the get_string() function. """ if self._stringtable: + assert isinstance(self._stringtable, _StringTable) return self._stringtable # If the ELF has stripped its section table (which is unusual, but @@ -149,11 +150,13 @@ def _get_stringtable(self) -> _StringTable: _, table_offset = self.get_table_offset('DT_STRTAB') if table_offset is not None: self._stringtable = _DynamicStringTable(self._stream, table_offset) + assert isinstance(self._stringtable, _StringTable) return self._stringtable # That didn't work for some reason. Let's use the section header # even though this ELF is super weird. self._stringtable = self.elffile.get_section_by_name('.dynstr') + assert isinstance(self._stringtable, _StringTable) return self._stringtable def _iter_tags(self, type: str | None = None) -> Iterator[Container]: diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py index a7d5a938..d9dc7443 100644 --- a/elftools/elf/relocation.py +++ b/elftools/elf/relocation.py @@ -276,6 +276,7 @@ def apply_section_relocations(self, stream: IO[bytes], reloc_section: Relocation """ # The symbol table associated with this relocation section symtab = self.elffile.get_section(reloc_section['sh_link']) + assert isinstance(symtab, SymbolTableSection) for reloc in reloc_section.iter_relocations(): self._do_apply_relocation(stream, reloc, symtab) From 59c93d9a8c2160e6804e01fa29a269a8b2060076 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 08:22:05 +0100 Subject: [PATCH 08/38] typing: assert not None Several variable may have an `Optional` type and can be `None`. Explicitly assert that those variables are `not None`. Signed-off-by: Philipp Hahn --- elftools/dwarf/callframe.py | 2 ++ elftools/dwarf/compileunit.py | 8 ++++++-- elftools/dwarf/descriptions.py | 4 ++++ elftools/dwarf/die.py | 4 ++++ elftools/dwarf/dwarfinfo.py | 12 ++++++++++++ elftools/dwarf/locationlists.py | 3 +++ elftools/dwarf/ranges.py | 2 ++ elftools/dwarf/typeunit.py | 3 +++ elftools/elf/dynamic.py | 1 + elftools/elf/elffile.py | 6 ++++++ elftools/elf/relocation.py | 1 + elftools/elf/structs.py | 1 + scripts/dwarfdump.py | 1 + 13 files changed, 46 insertions(+), 2 deletions(-) mode change 100644 => 100755 scripts/dwarfdump.py diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index d54e55ed..6a3315f9 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -537,6 +537,7 @@ def _decode_CFI_table(self) -> DecodedCallFrameTable: # For a FDE, we need to decode the attached CIE first, because its # decoded table is needed. Its "initial instructions" describe a # line that serves as the base (first) line in the FDE's table. + assert self.cie is not None cie = self.cie cie_decoded_table = cie.get_decoded() if cie_decoded_table.table: @@ -632,6 +633,7 @@ def _add_to_order(regnum: int) -> None: dwarf_assert( isinstance(self, FDE), '%s instruction must be in a FDE' % name) + assert last_line_in_CIE is not None if instr.args[0] in last_line_in_CIE: cur_line[instr.args[0]] = last_line_in_CIE[instr.args[0]] else: diff --git a/elftools/dwarf/compileunit.py b/elftools/dwarf/compileunit.py index e3ea9df7..52258ddc 100644 --- a/elftools/dwarf/compileunit.py +++ b/elftools/dwarf/compileunit.py @@ -101,6 +101,7 @@ def get_top_DIE(self) -> DIE: if self._diemap: return self._dielist[0] + assert self.dwarfinfo.debug_info_sec is not None top = DIE( cu=self, stream=self.dwarfinfo.debug_info_sec.stream, @@ -145,6 +146,7 @@ def iter_DIEs(self) -> Iterator[DIE]: """ Iterate over all the DIEs in the CU, in order of their appearance. Note that null DIEs will also be returned. """ + assert self.dwarfinfo.debug_info_sec is not None stm = self.dwarfinfo.debug_info_sec.stream pos = self.cu_die_offset end_pos = self.cu_offset + self.size @@ -152,7 +154,7 @@ def iter_DIEs(self) -> Iterator[DIE]: die = self.get_top_DIE() yield die pos += die.size - parent = die + parent: DIE | None = die i = 1 while pos < end_pos: if i < len(self._diemap) and self._diemap[i] == pos: # DIE already cached @@ -165,7 +167,7 @@ def iter_DIEs(self) -> Iterator[DIE]: die._parent = parent - if die.tag is None: + if die.tag is None and parent is not None: parent._terminator = die parent = parent._parent @@ -231,6 +233,7 @@ def iter_DIE_children(self, die: DIE) -> Iterator[DIE]: if child._terminator is None: for _ in self.iter_DIE_children(child): pass + assert child._terminator is not None cur_offset = child._terminator.offset + child._terminator.size @@ -253,6 +256,7 @@ def _iter_DIE_subtree(self, die: DIE) -> Iterator[DIE]: if die.has_children: for c in die.iter_children(): yield from die.cu._iter_DIE_subtree(c) + assert die._terminator is not None yield die._terminator def _get_cached_DIE(self, offset: int) -> DIE: diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index be9481d6..bcd824eb 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -67,6 +67,7 @@ def _full_reg_name(regnum: int) -> str: cie = entry pc: int | None = None else: # FDE + assert entry.cie is not None cie = entry.cie pc = entry['initial_location'] @@ -90,10 +91,12 @@ def _full_reg_name(regnum: int) -> str: _full_reg_name(instr.args[1])) elif name == 'DW_CFA_set_loc': pc = instr.args[0] + assert pc is not None s += ' %s: %08x\n' % (name, pc) elif name in ( 'DW_CFA_advance_loc1', 'DW_CFA_advance_loc2', 'DW_CFA_advance_loc4', 'DW_CFA_advance_loc'): _assert_FDE_instruction(instr) + assert pc is not None factored_offset: int = instr.args[0] * cie['code_alignment_factor'] s += ' %s: %s to %08x\n' % ( name, factored_offset, factored_offset + pc) @@ -111,6 +114,7 @@ def _full_reg_name(regnum: int) -> str: elif name in ('DW_CFA_def_cfa_offset', 'DW_CFA_GNU_args_size'): s += ' %s: %s\n' % (name, instr.args[0]) elif name == 'DW_CFA_def_cfa_offset_sf': + assert entry.cie is not None s += ' %s: %s\n' % (name, instr.args[0]*entry.cie['data_alignment_factor']) elif name == 'DW_CFA_def_cfa_expression': expr_dumper = ExprDumper(entry.structs) diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py index 0fe1e00e..8c4675ef 100644 --- a/elftools/dwarf/die.py +++ b/elftools/dwarf/die.py @@ -247,6 +247,7 @@ def _parse_DIE(self) -> None: # that manipulate the stream by reading data from it. stream.seek(self.offset) self.abbrev_code = structs.the_Dwarf_uleb128.parse_stream(stream) + assert self.abbrev_code is not None # This may be a null entry if self.abbrev_code == 0: @@ -333,14 +334,17 @@ def _translate_attr_value(self, form: str, raw_value: Any) -> Any: elif form in ('DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4') and translate_indirect: return self.cu.dwarfinfo.get_addr(self.cu, raw_value) elif form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4') and translate_indirect: + assert self.dwarfinfo.debug_str_offsets_sec is not None stream = self.dwarfinfo.debug_str_offsets_sec.stream base_offset = _get_base_offset(self.cu, 'DW_AT_str_offsets_base') offset_size = 4 if self.cu.structs.dwarf_format == 32 else 8 str_offset = struct_parse(self.cu.structs.the_Dwarf_offset, stream, base_offset + raw_value*offset_size) return self.dwarfinfo.get_string_from_table(str_offset) elif form == 'DW_FORM_loclistx' and translate_indirect: + assert self.dwarfinfo.debug_loclists_sec is not None return _resolve_via_offset_table(self.dwarfinfo.debug_loclists_sec.stream, self.cu, raw_value, 'DW_AT_loclists_base') elif form == 'DW_FORM_rnglistx' and translate_indirect: + assert self.dwarfinfo.debug_rnglists_sec is not None return _resolve_via_offset_table(self.dwarfinfo.debug_rnglists_sec.stream, self.cu, raw_value, 'DW_AT_rnglists_base') return raw_value diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 540d55f1..56b1f34d 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -214,6 +214,7 @@ def get_DIE_by_sig8(self, sig8: int) -> DIE: .debug_types section. """ self._parse_debug_types() + assert self._type_units_by_sig is not None tu = self._type_units_by_sig.get(sig8) if tu is None: raise KeyError("Signature %016x not found in .debug_types" % sig8) @@ -235,6 +236,7 @@ def get_CU_containing(self, refaddr: int) -> CompileUnit: dwarf_assert( self.has_debug_info, 'CU lookup but no debug info section') + assert self.debug_info_sec is not None dwarf_assert( 0 <= refaddr < self.debug_info_sec.size, "refaddr %s beyond .debug_info size" % refaddr) @@ -267,6 +269,7 @@ def get_CU_at(self, offset: int) -> CompileUnit: dwarf_assert( self.has_debug_info, 'CU lookup but no debug info section') + assert self.debug_info_sec is not None dwarf_assert( 0 <= offset < self.debug_info_sec.size, "offset %s beyond .debug_info size" % offset) @@ -284,6 +287,7 @@ def get_TU_by_sig8(self, sig8: int) -> TypeUnit: """ self._parse_debug_types() + assert self._type_units_by_sig is not None tu = self._type_units_by_sig.get(sig8) if tu is None: raise KeyError("Signature %016x not found in .debug_types" % sig8) @@ -311,6 +315,7 @@ def get_abbrev_table(self, offset: int) -> AbbrevTable: AbbrevTable objects are cached internally (two calls for the same offset will return the same object). """ + assert self.debug_abbrev_sec is not None dwarf_assert( offset < self.debug_abbrev_sec.size, "Offset '0x%x' to abbrev table out of section bounds" % offset) @@ -325,12 +330,14 @@ def get_string_from_table(self, offset: int) -> bytes | None: """ Obtain a string from the string table section, given an offset relative to the section. """ + assert self.debug_str_sec is not None return parse_cstring_from_stream(self.debug_str_sec.stream, offset) def get_string_from_linetable(self, offset: int) -> bytes | None: """ Obtain a string from the string table section, given an offset relative to the section. """ + assert self.debug_line_str_sec is not None return parse_cstring_from_stream(self.debug_line_str_sec.stream, offset) def line_program_for_CU(self, CU: CompileUnit) -> LineProgram | None: @@ -363,6 +370,7 @@ def has_CFI(self) -> bool: def CFI_entries(self) -> list[CFIEntry | ZERO]: """ Get a list of dwarf_frame CFI entries from the .debug_frame section. """ + assert self.debug_frame_sec is not None cfi = CallFrameInfo( stream=self.debug_frame_sec.stream, size=self.debug_frame_sec.size, @@ -378,6 +386,7 @@ def has_EH_CFI(self) -> bool: def EH_CFI_entries(self) -> list[CFIEntry | ZERO]: """ Get a list of eh_frame CFI entries from the .eh_frame section. """ + assert self.eh_frame_sec is not None cfi = CallFrameInfo( stream=self.eh_frame_sec.stream, size=self.eh_frame_sec.size, @@ -578,6 +587,7 @@ def _parse_CU_at_offset(self, offset: int) -> CompileUnit: # dwarf format. Based on it, we then create a new DWARFStructs # instance suitable for this CU and use it to parse the rest. # + assert self.debug_info_sec is not None initial_length = struct_parse( self.structs.the_Dwarf_uint32, self.debug_info_sec.stream, offset) dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32 @@ -624,6 +634,7 @@ def _parse_TU_at_offset(self, offset: int) -> TypeUnit: # dwarf format. Based on it, we then create a new DWARFStructs # instance suitable for this TU and use it to parse the rest. # + assert self.debug_types_sec is not None initial_length = struct_parse( self.structs.the_Dwarf_uint32, self.debug_types_sec.stream, offset) dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32 @@ -672,6 +683,7 @@ def _parse_line_program_at_offset(self, offset: int, structs: DWARFStructs) -> L if offset in self._linetable_cache: return self._linetable_cache[offset] + assert self.debug_line_sec is not None lineprog_header = struct_parse( structs.Dwarf_lineprog_header, self.debug_line_sec.stream, diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index 354147f1..d19d3629 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -160,6 +160,7 @@ def iter_location_lists(self) -> Iterator[list[_Location]]: all_offsets = set() # Set of offsets where either a locview pair set can be found, or a view-less loclist locviews = dict() # Map of locview offset to the respective loclist offset cu_map = dict() # Map of loclist offsets to CUs + assert self.dwarfinfo is not None for cu in self.dwarfinfo.iter_CUs(): cu_ver: int = cu['version'] if (cu_ver >= 5) == ver5: @@ -230,6 +231,7 @@ def iter_CUs(self) -> Iterator[CompileUnit]: if self.version < 5: raise DWARFError("CU iteration in loclists is not supported with DWARF<5") + assert self.dwarfinfo is not None structs = next(self.dwarfinfo.iter_CUs()).structs # Just pick one return _iter_CUs_in_section(self.stream, structs, structs.Dwarf_loclists_CU_header) @@ -339,6 +341,7 @@ def parse_from_attribute(self, attr: AttributeValue, dwarf_version: int, die: DI if self._attribute_has_loc_expr(attr, dwarf_version): return LocationExpr(attr.value) elif self._attribute_has_loc_list(attr, dwarf_version): + assert self.location_lists is not None return self.location_lists.get_location_list_at_offset( attr.value, die) # We don't yet know if the DIE context will be needed. diff --git a/elftools/dwarf/ranges.py b/elftools/dwarf/ranges.py index c4bbce32..411e7837 100644 --- a/elftools/dwarf/ranges.py +++ b/elftools/dwarf/ranges.py @@ -170,6 +170,7 @@ def iter_CUs(self) -> Iterator[CompileUnit]: if self.version < 5: raise DWARFError("CU iteration in rnglists is not supported with DWARF<5") + assert self._dwarfinfo is not None structs = next(self._dwarfinfo.iter_CUs()).structs # Just pick one return _iter_CUs_in_section(self.stream, structs, structs.Dwarf_rnglists_CU_header) @@ -191,6 +192,7 @@ def translate_v5_entry(self, entry: Container, cu: CompileUnit) -> RangeEntry | def _parse_range_list_from_stream(self, cu: CompileUnit | None) -> list[RangeEntry | BaseAddressEntry]: if self.version >= 5: + assert cu is not None return list(entry_translate[entry.entry_type](entry, cu) for entry in struct_parse(self.structs.Dwarf_rnglists_entries, self.stream)) diff --git a/elftools/dwarf/typeunit.py b/elftools/dwarf/typeunit.py index ffebae55..e7de9692 100644 --- a/elftools/dwarf/typeunit.py +++ b/elftools/dwarf/typeunit.py @@ -117,6 +117,7 @@ def get_top_DIE(self) -> DIE: if self._diemap: return self._dielist[0] + assert self.dwarfinfo.debug_types_sec is not None top = DIE( cu=self, stream=self.dwarfinfo.debug_types_sec.stream, @@ -194,6 +195,7 @@ def iter_DIE_children(self, die: DIE) -> Iterator[DIE]: if child._terminator is None: for _ in self.iter_DIE_children(child): pass + assert child._terminator is not None cur_offset = child._terminator.offset + child._terminator.size @@ -232,6 +234,7 @@ def _iter_DIE_subtree(self, die: DIE) -> Iterator[DIE]: if die.has_children: for c in die.iter_children(): yield from die.cu._iter_DIE_subtree(c) + assert die._terminator is not None yield die._terminator def _get_cached_DIE(self, offset: int) -> DIE: diff --git a/elftools/elf/dynamic.py b/elftools/elf/dynamic.py index 8cda8783..a92c7b2c 100644 --- a/elftools/elf/dynamic.py +++ b/elftools/elf/dynamic.py @@ -327,6 +327,7 @@ def num_symbols(self) -> int: tab_ptr <= (segment['p_vaddr'] + segment['p_filesz'])): nearest_ptr = segment['p_vaddr'] + segment['p_filesz'] + assert nearest_ptr is not None end_ptr = nearest_ptr self._num_symbols = (end_ptr - tab_ptr) // self._symbol_size diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index d652ae22..073d6fe8 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -147,6 +147,7 @@ def get_section(self, n: int, type: TContainer[str] | None = None) -> Section: subclass) """ section_header = self._get_section_header(n) + assert section_header is not None if type and section_header.sh_type not in type: raise ELFError("Unexpected section type %s, expected %s" % (section_header['sh_type'], type)) return self._make_section(section_header) @@ -157,6 +158,7 @@ def _get_linked_symtab_section(self, n: int) -> SymbolTableSection: Used for resolving section links with target type validation. """ section_header = self._get_section_header(n) + assert section_header is not None if section_header['sh_type'] not in ('SHT_SYMTAB', 'SHT_DYNSYM'): raise ELFError("Section points at section %d of type %s, expected SHT_SYMTAB/SHT_DYNSYM" % (n, section_header['sh_type'])) return self._make_section(section_header) @@ -167,6 +169,7 @@ def _get_linked_strtab_section(self, n: int) -> StringTableSection: Used for resolving section links with target type validation. """ section_header = self._get_section_header(n) + assert section_header is not None if section_header['sh_type'] != 'SHT_STRTAB': raise ELFError("SHT_SYMTAB section points at section %d of type %s, expected SHT_STRTAB" % (n, section_header['sh_type'])) return self._make_section(section_header) @@ -180,6 +183,7 @@ def get_section_by_name(self, name: str) -> Section | None: # if self._section_name_map is None: self._make_section_name_map() + assert self._section_name_map is not None secnum = self._section_name_map.get(name, None) return None if secnum is None else self.get_section(secnum) @@ -192,6 +196,7 @@ def get_section_index(self, section_name: str) -> int | None: # if self._section_name_map is None: self._make_section_name_map() + assert self._section_name_map is not None return self._section_name_map.get(section_name, None) def has_section(self, section_name: str) -> bool: @@ -199,6 +204,7 @@ def has_section(self, section_name: str) -> bool: """ if self._section_name_map is None: self._make_section_name_map() + assert self._section_name_map is not None return section_name in self._section_name_map def iter_sections(self, type: str | None = None) -> Iterator[Section]: diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py index d9dc7443..4ea75986 100644 --- a/elftools/elf/relocation.py +++ b/elftools/elf/relocation.py @@ -165,6 +165,7 @@ def iter_relocations(self) -> Iterator[Relocation]: else: # We're processing a bitmap. elf_assert(base is not None, 'RELR bitmap without base address') + assert base is not None i = 0 while True: # Iterate over all bits except the least significant one. diff --git a/elftools/elf/structs.py b/elftools/elf/structs.py index 7773893d..9ea31f6c 100644 --- a/elftools/elf/structs.py +++ b/elftools/elf/structs.py @@ -315,6 +315,7 @@ def _create_rel(self) -> None: def _create_dyn(self) -> None: d_tag_dict = dict(ENUM_D_TAG_COMMON) if self.e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE: + assert self.e_machine is not None d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[self.e_machine]) elif self.e_ident_osabi == 'ELFOSABI_SOLARIS': d_tag_dict.update(ENUM_D_TAG_SOLARIS) diff --git a/scripts/dwarfdump.py b/scripts/dwarfdump.py old mode 100644 new mode 100755 index 9038524c..4d807351 --- a/scripts/dwarfdump.py +++ b/scripts/dwarfdump.py @@ -463,6 +463,7 @@ def dump_v4_rangelist(self, rangelist, cu_map) -> None: def dump_rnglists(self) -> None: self._emitline(".debug_rnglists contents:") ranges_sec = self._dwarfinfo.range_lists() + assert ranges_sec is not None if ranges_sec.version < 5: return From d9797b637089bb26c2eefc25101e6fdddfc12066 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 08:33:11 +0100 Subject: [PATCH 09/38] typing: declare before conditional Declare `lsda_pointer` and `aug_dict` and `last_line_in_CIE` before the conditional. Static type checking will complain otherwise that variables might not be declared on the `else` case. Signed-off-by: Philipp Hahn --- elftools/dwarf/callframe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index 6a3315f9..a93b272f 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -140,6 +140,8 @@ def _parse_entry_at(self, offset: int) -> CFIEntry | ZERO: # If the augmentation string is not empty, hope to find a length field # in order to skip the data specified augmentation. + lsda_pointer: int | None = None + aug_dict: dict[Any, Any] | None = None if is_CIE: aug_bytes, aug_dict = self._parse_cie_augmentation( header, entry_structs) @@ -153,8 +155,6 @@ def _parse_entry_at(self, offset: int) -> CFIEntry | ZERO: lsda_pointer = self._parse_lsda_pointer(entry_structs, self.stream.tell() - len(aug_bytes), lsda_encoding) - else: - lsda_pointer = None # For convenience, compute the end offset for this entry end_offset: int = ( @@ -528,6 +528,7 @@ def _decode_CFI_table(self) -> DecodedCallFrameTable: """ Decode the instructions contained in the given CFI entry and return a DecodedCallFrameTable. """ + last_line_in_CIE: dict[str, Any] | None = None if isinstance(self, CIE): # For a CIE, initialize cur_line to an "empty" line cie = self From 1c05bf33d14d97a21efe553a13b6bf92ff741c3e Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 08:37:55 +0100 Subject: [PATCH 10/38] typing: Check for None values `Dwarf_dw_form` contains `None` values, but type checkers fail to do a static lookup to see, that they are constant and `not None`. Signed-off-by: Philipp Hahn --- elftools/dwarf/callframe.py | 10 ++++++---- elftools/dwarf/die.py | 8 ++++++-- elftools/dwarf/structs.py | 2 +- elftools/elf/elffile.py | 16 ++++++++++++---- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index a93b272f..7f706914 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -230,13 +230,15 @@ def _parse_instructions(self, structs: DWARFStructs, offset: int, end_offset: in elif opcode == DW_CFA_def_cfa_offset_sf: args = [struct_parse(structs.the_Dwarf_sleb128, self.stream)] elif opcode == DW_CFA_def_cfa_expression: - args = [struct_parse( - structs.Dwarf_dw_form['DW_FORM_block'], self.stream)] + struct = structs.Dwarf_dw_form['DW_FORM_block'] + assert struct is not None + args = [struct_parse(struct, self.stream)] elif opcode in (DW_CFA_expression, DW_CFA_val_expression): + struct = structs.Dwarf_dw_form['DW_FORM_block'] + assert struct is not None args = [ struct_parse(structs.the_Dwarf_uleb128, self.stream), - struct_parse( - structs.Dwarf_dw_form['DW_FORM_block'], self.stream)] + struct_parse(struct, self.stream)] elif opcode in (DW_CFA_offset_extended_sf, DW_CFA_def_cfa_sf, DW_CFA_val_offset_sf): args = [ diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py index 8c4675ef..462d9c6e 100644 --- a/elftools/dwarf/die.py +++ b/elftools/dwarf/die.py @@ -275,7 +275,9 @@ def _parse_DIE(self) -> None: (form, raw_value, indirection_length) = self._resolve_indirect() value = self._translate_attr_value(form, raw_value) else: - raw_value = structs.Dwarf_dw_form[form].parse_stream(stream) + dw_form = structs.Dwarf_dw_form[form] + assert dw_form is not None + raw_value = dw_form.parse_stream(stream) value = self._translate_attr_value(form, raw_value) self.attributes[name] = AttributeValue( name=name, @@ -302,7 +304,9 @@ def _resolve_indirect(self) -> tuple[str, int, int]: except KeyError: raise DWARFError('Found DW_FORM_indirect with unknown real form 0x%x' % real_form_code) - raw_value = struct_parse(structs.Dwarf_dw_form[real_form], self.stream) + dw_form = structs.Dwarf_dw_form[real_form] + assert dw_form is not None + raw_value: int = struct_parse(dw_form, self.stream) if real_form != 'DW_FORM_indirect': # Happy path: one level of indirection return (real_form, raw_value, length) diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index 0ad24579..7b9f13c6 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -412,7 +412,7 @@ def _parse(self, stream: IO[bytes], context: Container) -> Any: parser = context[self.format_field + "_parser"] else: fields = tuple( - Rename(f.content_type, self.structs.Dwarf_dw_form[f.form]) + Rename(f.content_type, self.structs.Dwarf_dw_form[f.form]) # type: ignore[arg-type] for f in context[self.format_field]) parser = Struct('formatted_entry', *fields) context[self.format_field + "_parser"] = parser diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index 073d6fe8..77e0b214 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -139,7 +139,9 @@ def num_sections(self) -> int: # sh_size field of the section header at index 0 (otherwise, the sh_size # member of the initial entry contains 0)." if self['e_shnum'] == 0: - return self._get_section_header(0)['sh_size'] + section_header = self._get_section_header(0) + assert section_header is not None + return section_header['sh_size'] return self['e_shnum'] def get_section(self, n: int, type: TContainer[str] | None = None) -> Section: @@ -161,7 +163,9 @@ def _get_linked_symtab_section(self, n: int) -> SymbolTableSection: assert section_header is not None if section_header['sh_type'] not in ('SHT_SYMTAB', 'SHT_DYNSYM'): raise ELFError("Section points at section %d of type %s, expected SHT_SYMTAB/SHT_DYNSYM" % (n, section_header['sh_type'])) - return self._make_section(section_header) + section = self._make_section(section_header) + assert isinstance(section, SymbolTableSection) + return section def _get_linked_strtab_section(self, n: int) -> StringTableSection: """ Get the section at index #n from the file, throws @@ -172,7 +176,9 @@ def _get_linked_strtab_section(self, n: int) -> StringTableSection: assert section_header is not None if section_header['sh_type'] != 'SHT_STRTAB': raise ELFError("SHT_SYMTAB section points at section %d of type %s, expected SHT_STRTAB" % (n, section_header['sh_type'])) - return self._make_section(section_header) + section = self._make_section(section_header) + assert isinstance(section, StringTableSection) + return section def get_section_by_name(self, name: str) -> Section | None: """ Get a section from the file, by name. Return None if no such @@ -629,7 +635,9 @@ def get_shstrndx(self) -> int: if self['e_shstrndx'] != SHN_INDICES.SHN_XINDEX: return self['e_shstrndx'] else: - return self._get_section_header(0)['sh_link'] + section_header = self._get_section_header(0) + assert section_header is not None + return section_header['sh_link'] #-------------------------------- PRIVATE --------------------------------# From 7e8055a45442d70688da465b501bc8d2c0055dd3 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 08:43:56 +0100 Subject: [PATCH 11/38] typing: Invert type check for _strip_type_tag `die.tag` can be an `int` for "user-defined tags". Invert the type check to silence type checking. Signed-off-by: Philipp Hahn --- elftools/dwarf/datatype_cpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elftools/dwarf/datatype_cpp.py b/elftools/dwarf/datatype_cpp.py index 6bf4699d..42e0d3b7 100644 --- a/elftools/dwarf/datatype_cpp.py +++ b/elftools/dwarf/datatype_cpp.py @@ -232,7 +232,7 @@ def DIE_is_ptr_to_member_struct(type_die: DIE) -> bool: def _strip_type_tag(die: DIE) -> str: """Given a DIE with DW_TAG_foo_type, returns foo""" - if isinstance(die.tag, int): # User-defined tag + if not isinstance(die.tag, str): # User-defined tag return "" return die.tag[7:-5] From 73aa16540e6bcf60a035e3c3c7288a4ade483e30 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 08:50:28 +0100 Subject: [PATCH 12/38] typing: Check for DIE._terminator None `DIE._terminator` may be `None` and `search._terminator.offset` would raise a `AttributeError`. Signed-off-by: Philipp Hahn --- elftools/dwarf/die.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py index 462d9c6e..0bc10367 100644 --- a/elftools/dwarf/die.py +++ b/elftools/dwarf/die.py @@ -211,7 +211,7 @@ def _search_ancestor_offspring(self) -> None: prev = child # We also need to check the offset of the terminator DIE - if search.has_children and search._terminator.offset <= self.offset: + if search.has_children and search._terminator and search._terminator.offset <= self.offset: prev = search._terminator # If we didn't find a closer parent, give up, don't loop. From 7a1fbcd14d8cc6fc0e8e994af678a703e081a2f1 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 09:49:35 +0100 Subject: [PATCH 13/38] typing: Convert ATTR_DESC into Final tuple Declaring constants as `list` is bad as `list` is modifiable. Declare them as `Final[tuple]` instead to help type-checkers de-reference individual entries for checking and using their correct type `dict` or `None`. Signed-off-by: Philipp Hahn --- elftools/elf/descriptions.py | 10 +++++----- pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py index fd9ca95c..f12b7652 100644 --- a/elftools/elf/descriptions.py +++ b/elftools/elf/descriptions.py @@ -264,7 +264,7 @@ def describe_attr_tag_arm(tag: str, val: Any, extra: str | None) -> str: elif tag == 'TAG_ALSO_COMPATIBLE_WITH': if val.tag == 'TAG_CPU_ARCH': d_entry = _DESCR_ATTR_VAL_ARM[5] # TAG_CPU_ARCH - return s + d_entry.get(val.value, '??? (%d)' % val.value) + return s + (d_entry.get(val.value) or '??? (%d)' % val.value) else: return s + '??? (%d)' % val.tag @@ -786,7 +786,7 @@ def _reverse_dict(d: Mapping[_K, _V], low_priority: TContainer[_K] = ()) -> dict TAG_MPEXTENSION_USE_OLD='Tag_MPextension_use_old: ', ) -_DESCR_ATTR_VAL_ARM = [ +_DESCR_ATTR_VAL_ARM: Final = ( None, #1 None, #2 None, #3 @@ -1034,7 +1034,7 @@ def _reverse_dict(d: Mapping[_K, _V], low_priority: TContainer[_K] = ()) -> dict 0: 'Not Allowed', 1: 'Allowed', }, -] +) _DESCR_ATTR_TAG_RISCV = dict( TAG_FILE='File Attributes', @@ -1050,7 +1050,7 @@ def _reverse_dict(d: Mapping[_K, _V], low_priority: TContainer[_K] = ()) -> dict TAG_X3_REG_USAGE='Tag_RISCV_x3_reg_usage: ', ) -_DESCR_ATTR_VAL_RISCV = [ +_DESCR_ATTR_VAL_RISCV: Final = ( None, #1 None, #2 None, #3 @@ -1078,4 +1078,4 @@ def _reverse_dict(d: Mapping[_K, _V], low_priority: TContainer[_K] = ()) -> dict 2: 'This object uses x3 as the shadow stack pointer.', 3: 'This object uses X3 as a temporary register.', }, -] +) diff --git a/pyproject.toml b/pyproject.toml index a0187a96..c84a5c8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ Repository = "https://github.com/eliben/pyelftools.git" Issues = "https://github.com/eliben/pyelftools/issues" [dependency-groups] -typing = ["mypy[reports]", "pyright", "typeguard"] +typing = ["mypy[reports]", "pyright", "typeguard", "typing_extensions"] [tool.setuptools] packages = [ From 402fe448b747798f16e8aa8b07faa15c3747f98e Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 09:53:48 +0100 Subject: [PATCH 14/38] typing: describe_note_gnu_properties: Add hints Improve type hinting - at least for `t` and `sz` - `data` remains `Any`. Signed-off-by: Philipp Hahn --- elftools/elf/descriptions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py index f12b7652..7868b4e6 100644 --- a/elftools/elf/descriptions.py +++ b/elftools/elf/descriptions.py @@ -300,7 +300,9 @@ def describe_note_gnu_property_bitmap_and(values: Iterable[tuple[int, str]], pre def describe_note_gnu_properties(properties: list[Container], machine: str) -> str: descriptions = [] for prop in properties: - t, d, sz = prop.pr_type, prop.pr_data, prop.pr_datasz + t: str | int = prop.pr_type + d = prop.pr_data + sz: int = prop.pr_datasz if t == 'GNU_PROPERTY_STACK_SIZE': if isinstance(d, int): prop_desc = 'stack size: 0x%x' % d From 5dd5e5677ef5b646cd4829562722acff287604f4 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 10:06:45 +0100 Subject: [PATCH 15/38] typing: elffile.section_names list `section_names` is only used internally as a `list`, as section names might get modified and the list is appended. Signed-off-by: Philipp Hahn --- elftools/elf/elffile.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index 77e0b214..6519af95 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -315,20 +315,21 @@ def get_dwarf_info(self, relocate_dwarf_sections: bool = True, follow_links: boo # is relative to the other file's directory as opposed to this file's directory. return ext_elffile.get_dwarf_info(relocate_dwarf_sections=relocate_dwarf_sections, follow_links=True) - section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev', + section_names: list[str] = ['.debug_info', '.debug_aranges', '.debug_abbrev', '.debug_str', '.debug_line', '.debug_frame', '.debug_loc', '.debug_ranges', '.debug_pubtypes', '.debug_pubnames', '.debug_addr', '.debug_str_offsets', '.debug_line_str', '.debug_loclists', '.debug_rnglists', - '.debug_sup', '.gnu_debugaltlink', '.debug_types') + '.debug_sup', '.gnu_debugaltlink', '.debug_types', + ] compressed = self.has_section('.zdebug_info') if compressed: - section_names = tuple(map(lambda x: '.z' + x[1:], section_names)) + section_names = [f'.z{s[1:]}' for s in section_names] # As it is loaded in the process image, .eh_frame cannot be compressed - section_names += ('.eh_frame', ) + section_names.append('.eh_frame') (debug_info_sec_name, debug_aranges_sec_name, debug_abbrev_sec_name, debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name, From 4d89eff0fc7feb44a89a9c0cd8fab5a14fde374f Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 10:14:41 +0100 Subject: [PATCH 16/38] typing: ENUM_D_TAG Mapping `ENUM_D_TAG` is a constant, but built by code. Convert it into a dict-comprehension. Signed-off-by: Philipp Hahn --- elftools/elf/enums.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/elftools/elf/enums.py b/elftools/elf/enums.py index 6685da8e..8871a99e 100644 --- a/elftools/elf/enums.py +++ b/elftools/elf/enums.py @@ -649,10 +649,11 @@ # Here is the full combined mapping from tag name to value -ENUM_D_TAG = dict(ENUM_D_TAG_COMMON) -ENUM_D_TAG.update(ENUM_D_TAG_SOLARIS) -for k in ENUMMAP_EXTRA_D_TAG_MACHINE: - ENUM_D_TAG.update(ENUMMAP_EXTRA_D_TAG_MACHINE[k]) +ENUM_D_TAG: Mapping[str, int] = { + **ENUM_D_TAG_COMMON, + **ENUM_D_TAG_SOLARIS, + **{k: v for kv in ENUMMAP_EXTRA_D_TAG_MACHINE.values() for k, v in kv.items()}, +} ENUM_DT_FLAGS: Mapping[str, int] = dict( DF_ORIGIN=0x1, From 11b9e3c5a4567ba283829e52d806e888c3b9d59f Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 10:16:22 +0100 Subject: [PATCH 17/38] typing: get_symbol None `get_symbol()` may return `None` and `symbol.name` would raise an `AttributeError`. Check for `not None` explicitly. Signed-off-by: Philipp Hahn --- elftools/elf/hash.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elftools/elf/hash.py b/elftools/elf/hash.py index fd49460e..eec65d30 100644 --- a/elftools/elf/hash.py +++ b/elftools/elf/hash.py @@ -79,7 +79,7 @@ def get_symbol(self, name: str) -> Symbol | None: symndx = self.params['buckets'][hval] while symndx != 0: sym = self._symboltable.get_symbol(symndx) - if sym.name == name: + if sym and sym.name == name: return sym symndx = self.params['chains'][symndx] return None @@ -187,7 +187,7 @@ def get_symbol(self, name: str) -> Symbol | None: cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0] if cur_hash | 1 == namehash | 1: symbol = self._symboltable.get_symbol(symidx) - if name == symbol.name: + if symbol and name == symbol.name: return symbol if cur_hash & 1: From e4e6ea00bc30bb82fb7750b7fd667ba83c228c60 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 09:03:43 +0100 Subject: [PATCH 18/38] typing ignore: LocationLists.entry_translate `entry_translate` may contain `None`. Silence type checking. Signed-off-by: Philipp Hahn --- elftools/dwarf/locationlists.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index d19d3629..1d8f42fe 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -277,7 +277,7 @@ def _parse_location_list_from_stream_v5(self, cu: CompileUnit | TypeUnit | None DWARFv5 debug_loclists one, and the target loclist contains indirect encodings. """ - return [entry_translate[entry.entry_type](entry, cu) + return [entry_translate[entry.entry_type](entry, cu) # type: ignore[arg-type] for entry in struct_parse(self.structs.Dwarf_loclists_entries, self.stream)] From 384c33f913397454e4c37f9cb8cdba9929fef3ff Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 09:47:42 +0100 Subject: [PATCH 19/38] typing ignore: describe_symbol_shndx An unknown `ST_SHNDC` `str` will raise `ValueError`. Signed-off-by: Philipp Hahn --- elftools/elf/descriptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py index 7868b4e6..58cb7903 100644 --- a/elftools/elf/descriptions.py +++ b/elftools/elf/descriptions.py @@ -152,7 +152,7 @@ def describe_symbol_other(x: Container) -> str: def describe_symbol_shndx(x: int | str) -> str: - return _DESCR_ST_SHNDX.get(x, '%3s' % x) + return _DESCR_ST_SHNDX.get(x, '%3s' % x) # type: ignore[arg-type] def describe_reloc_type(x: int, elffile: ELFFile) -> str: From dce4a4873333f421777f589961b1e8db6f261744 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 09:58:33 +0100 Subject: [PATCH 20/38] typing ignore: Dynamic.get_table_offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `get_table_offset()` may return `tuple[…, None]`. Signed-off-by: Philipp Hahn --- elftools/elf/dynamic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/elftools/elf/dynamic.py b/elftools/elf/dynamic.py index a92c7b2c..0cf18086 100644 --- a/elftools/elf/dynamic.py +++ b/elftools/elf/dynamic.py @@ -216,7 +216,7 @@ def get_relocation_tables(self) -> dict[str, RelocationTable | RelrRelocationTab if list(self.iter_tags('DT_REL')): result['REL'] = RelocationTable(self.elffile, - self.get_table_offset('DT_REL')[1], + self.get_table_offset('DT_REL')[1], # type: ignore[arg-type] next(self.iter_tags('DT_RELSZ'))['d_val'], False) relentsz = next(self.iter_tags('DT_RELENT'))['d_val'] @@ -225,7 +225,7 @@ def get_relocation_tables(self) -> dict[str, RelocationTable | RelrRelocationTab if list(self.iter_tags('DT_RELA')): result['RELA'] = RelocationTable(self.elffile, - self.get_table_offset('DT_RELA')[1], + self.get_table_offset('DT_RELA')[1], # type: ignore[arg-type] next(self.iter_tags('DT_RELASZ'))['d_val'], True) relentsz = next(self.iter_tags('DT_RELAENT'))['d_val'] @@ -234,13 +234,13 @@ def get_relocation_tables(self) -> dict[str, RelocationTable | RelrRelocationTab if list(self.iter_tags('DT_RELR')): result['RELR'] = RelrRelocationTable(self.elffile, - self.get_table_offset('DT_RELR')[1], + self.get_table_offset('DT_RELR')[1], # type: ignore[arg-type] next(self.iter_tags('DT_RELRSZ'))['d_val'], next(self.iter_tags('DT_RELRENT'))['d_val']) if list(self.iter_tags('DT_JMPREL')): result['JMPREL'] = RelocationTable(self.elffile, - self.get_table_offset('DT_JMPREL')[1], + self.get_table_offset('DT_JMPREL')[1], # type: ignore[arg-type] next(self.iter_tags('DT_PLTRELSZ'))['d_val'], next(self.iter_tags('DT_PLTREL'))['d_val'] == ENUM_D_TAG['DT_RELA']) From c1045e2d2380f151894945692a04e6508c6abbee Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 09:10:09 +0100 Subject: [PATCH 21/38] typing ignore: RangeLists.dwarfinfo `entry_translate` may contain `None` in the DWARF-5-case. Silence type checking. Signed-off-by: Philipp Hahn --- elftools/dwarf/ranges.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elftools/dwarf/ranges.py b/elftools/dwarf/ranges.py index 411e7837..4e2b1b6d 100644 --- a/elftools/dwarf/ranges.py +++ b/elftools/dwarf/ranges.py @@ -155,7 +155,7 @@ def iter_range_lists(self) -> Iterator[list[RangeEntry | BaseAddressEntry]]: ver5 = self.version >= 5 # This maps list offset to CU cu_map = {die.attributes['DW_AT_ranges'].value : cu - for cu in self._dwarfinfo.iter_CUs() + for cu in self._dwarfinfo.iter_CUs() # type: ignore[union-attr] for die in cu.iter_DIEs() if 'DW_AT_ranges' in die.attributes and (cu['version'] >= 5) == ver5} all_offsets = list(cu_map.keys()) From 4464dc65aa307bb28e869b32604bc21a66662e78 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 10:21:04 +0100 Subject: [PATCH 22/38] typing ignore: Section.__eq__ Code explicitly checks for `AttributeError`, but `mypy` is picky here. Signed-off-by: Philipp Hahn --- elftools/elf/sections.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elftools/elf/sections.py b/elftools/elf/sections.py index e1e62442..1c3051fe 100644 --- a/elftools/elf/sections.py +++ b/elftools/elf/sections.py @@ -136,7 +136,7 @@ def __getitem__(self, name: str) -> Any: def __eq__(self, other: object) -> bool: try: - return self.header == other.header + return self.header == other.header # type: ignore[attr-defined] except AttributeError: return False From 91ab25c0ebce6c7f188b16fdd94f3e5dbd1ff50c Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 08:45:27 +0100 Subject: [PATCH 23/38] mypy: Rename `params` for type change `params` is first declared as a `tuple` and then changed to `str`, which static type checkers like `mypy` do not like. Rename the first variable. Signed-off-by: Philipp Hahn --- elftools/dwarf/datatype_cpp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elftools/dwarf/datatype_cpp.py b/elftools/dwarf/datatype_cpp.py index 42e0d3b7..cc69df34 100644 --- a/elftools/dwarf/datatype_cpp.py +++ b/elftools/dwarf/datatype_cpp.py @@ -68,8 +68,8 @@ def parse_cpp_datatype(var_die: DIE) -> TypeDesc: ptr_prefix = '' if t.tag == 'subroutine': - params = tuple(format_function_param(p, p) for p in type_die.iter_children() if p.tag in ("DW_TAG_formal_parameter", "DW_TAG_unspecified_parameters") and 'DW_AT_artificial' not in p.attributes) - params = ", ".join(params) + params_ = tuple(format_function_param(p, p) for p in type_die.iter_children() if p.tag in ("DW_TAG_formal_parameter", "DW_TAG_unspecified_parameters") and 'DW_AT_artificial' not in p.attributes) + params = ", ".join(params_) if 'DW_AT_type' in type_die.attributes: retval_type = parse_cpp_datatype(type_die) is_pointer = retval_type.modifiers and retval_type.modifiers[-1] == 'pointer' From 2c5af665ecb7ca0659b36f373ae99bfd75c98eb7 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 08:46:58 +0100 Subject: [PATCH 24/38] mypy: Rename `reveal_type` for type change `reveal_type` is first declared as `TypeDesc` and then changed to `str`, which static type checkers like `mypy` do not like. Rename the first variable. PS: Better rename `reveal_type()` to somethings else as there is `typing.reveal_type()`[^1]. [^1]: https://docs.python.org/3/library/typing.html#typing.reveal_type Signed-off-by: Philipp Hahn --- elftools/dwarf/datatype_cpp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/elftools/dwarf/datatype_cpp.py b/elftools/dwarf/datatype_cpp.py index cc69df34..1e12f668 100644 --- a/elftools/dwarf/datatype_cpp.py +++ b/elftools/dwarf/datatype_cpp.py @@ -71,9 +71,9 @@ def parse_cpp_datatype(var_die: DIE) -> TypeDesc: params_ = tuple(format_function_param(p, p) for p in type_die.iter_children() if p.tag in ("DW_TAG_formal_parameter", "DW_TAG_unspecified_parameters") and 'DW_AT_artificial' not in p.attributes) params = ", ".join(params_) if 'DW_AT_type' in type_die.attributes: - retval_type = parse_cpp_datatype(type_die) - is_pointer = retval_type.modifiers and retval_type.modifiers[-1] == 'pointer' - retval_type = str(retval_type) + retval_type_ = parse_cpp_datatype(type_die) + is_pointer = retval_type_.modifiers and retval_type_.modifiers[-1] == 'pointer' + retval_type = str(retval_type_) if not is_pointer: retval_type += " " else: From 3720019d17d0d2c7ee58658d25e20bfcc81add17 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 08:45:27 +0100 Subject: [PATCH 25/38] mypy: Rename `all_offsets` for type change `all_offsets` is first declared as a `set` and then changed to `list`, which static type checkers like `mypy` do not like. Rename the first variable. Signed-off-by: Philipp Hahn --- elftools/dwarf/locationlists.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index 1d8f42fe..ac3a042a 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -157,7 +157,7 @@ def iter_location_lists(self) -> Iterator[list[_Location]]: # Need to provide support for DW_AT_GNU_locviews. They are interspersed in # the locations section, no way to tell where short of checking all DIEs - all_offsets = set() # Set of offsets where either a locview pair set can be found, or a view-less loclist + all_offsets_ = set() # Set of offsets where either a locview pair set can be found, or a view-less loclist locviews = dict() # Map of locview offset to the respective loclist offset cu_map = dict() # Map of loclist offsets to CUs assert self.dwarfinfo is not None @@ -174,7 +174,7 @@ def iter_location_lists(self) -> Iterator[list[_Location]]: list_offset: int = die.attributes['DW_AT_location'].value locviews[views_offset] = list_offset cu_map[list_offset] = cu - all_offsets.add(views_offset) + all_offsets_.add(views_offset) # Scan other attributes for location lists for key in die.attributes: @@ -183,10 +183,9 @@ def iter_location_lists(self) -> Iterator[list[_Location]]: LocationParser.attribute_has_location(attr, cu_ver) and LocationParser._attribute_has_loc_list(attr, cu_ver)): list_offset = attr.value - all_offsets.add(list_offset) + all_offsets_.add(list_offset) cu_map[list_offset] = cu - all_offsets = list(all_offsets) - all_offsets.sort() + all_offsets = sorted(all_offsets_) if ver5: # Loclists section is organized as an array of CUs, each length prefixed. From 68c9cddcc48c8e6f25a3dfc8062e98d7433039bc Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 08:40:40 +0100 Subject: [PATCH 26/38] datatype: Check for parent None `get_parent()` may return `None`, in wich case `parent.tag` would raise an `AttributeError`. Make the check explicit for type checking. Signed-off-by: Philipp Hahn --- elftools/dwarf/datatype_cpp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elftools/dwarf/datatype_cpp.py b/elftools/dwarf/datatype_cpp.py index 1e12f668..1c6c9c4a 100644 --- a/elftools/dwarf/datatype_cpp.py +++ b/elftools/dwarf/datatype_cpp.py @@ -107,7 +107,7 @@ def parse_cpp_datatype(var_die: DIE) -> TypeDesc: # Check the nesting - important for parameters parent = type_die.get_parent() scopes: list[str] = [] - while parent.tag in ('DW_TAG_class_type', 'DW_TAG_structure_type', 'DW_TAG_union_type', 'DW_TAG_namespace'): + while parent and parent.tag in ('DW_TAG_class_type', 'DW_TAG_structure_type', 'DW_TAG_union_type', 'DW_TAG_namespace'): scopes.insert(0, safe_DIE_name(parent, _strip_type_tag(parent) + " ")) # If unnamed scope, fall back to scope type - like "structure " parent = parent.get_parent() @@ -207,7 +207,7 @@ def get_class_spec_if_member(func_spec: DIE, the_func: DIE) -> ClassDesc | None: parent = func_spec.get_parent() scopes: list[str] = [] - while parent.tag in ("DW_TAG_class_type", "DW_TAG_structure_type", "DW_TAG_namespace"): + while parent and parent.tag in ("DW_TAG_class_type", "DW_TAG_structure_type", "DW_TAG_namespace"): scopes.insert(0, DIE_name(parent)) parent = parent.get_parent() if scopes: From e59feb6528988149c0b06798c959bd082161635c Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 09:02:16 +0100 Subject: [PATCH 27/38] LocationParser.parse_from_attribute: raise ValueError Raise `ValueError` in all `else` cases. Signed-off-by: Philipp Hahn --- elftools/dwarf/locationlists.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index ac3a042a..d0888b58 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -347,8 +347,7 @@ def parse_from_attribute(self, attr: AttributeValue, dwarf_version: int, die: DI # We might get it without a full tree traversal using # attr.offset as a key, but we assume a good DWARF5 # aware consumer would pass a DIE along. - else: - raise ValueError("Attribute does not have location information") + raise ValueError("Attribute does not have location information") #------ PRIVATE ------# From 3e48786486ec3b57d446f7ba2668a8614850f82f Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 09:06:33 +0100 Subject: [PATCH 28/38] Rework get_location_list_at_offset() logic Combine the `if` statements to help type checking: Otherwise the 2nd DWARF-5-case need another case to check for `die not None`. FYI: This is a behavioral change as the file position gets changed in the error case. Signed-off-by: Philipp Hahn --- elftools/dwarf/locationlists.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index d0888b58..d54ac03f 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -125,10 +125,12 @@ def get_location_list_at_offset(self, offset: int, die: DIE | None = None) -> li Passing the die is only neccessary in DWARF5+, for decoding location entry encodings that contain references to other sections. """ - if self.version >= 5 and die is None: - raise DWARFError("For this binary, \"die\" needs to be provided") self.stream.seek(offset, os.SEEK_SET) - return self._parse_location_list_from_stream_v5(die.cu) if self.version >= 5 else self._parse_location_list_from_stream() + if self.version >= 5: + if die is None: + raise DWARFError("For this binary, \"die\" needs to be provided") + return self._parse_location_list_from_stream_v5(die.cu) + return self._parse_location_list_from_stream() def iter_location_lists(self) -> Iterator[list[_Location]]: """ Iterates through location lists and view pairs. Returns lists of From 57611b88614642d3c5e7f7f787f24417838b181f Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 09:43:47 +0100 Subject: [PATCH 29/38] elf.description: Do not return None MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `dict.get() -> … | None` Signed-off-by: Philipp Hahn --- elftools/elf/descriptions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py index 58cb7903..b9f13bcc 100644 --- a/elftools/elf/descriptions.py +++ b/elftools/elf/descriptions.py @@ -74,7 +74,7 @@ def describe_e_version_numeric(x: str) -> str: def describe_p_type(x: int | str) -> str: if isinstance(x, str) and x in _DESCR_P_TYPE: - return _DESCR_P_TYPE.get(x) + return _DESCR_P_TYPE[x] elif isinstance(x, int) and ENUM_P_TYPE_BASE['PT_LOOS'] <= x <= ENUM_P_TYPE_BASE['PT_HIOS']: return 'LOOS+%lx' % (x - ENUM_P_TYPE_BASE['PT_LOOS']) else: @@ -106,7 +106,7 @@ def describe_rh_flags(x: int) -> str: def describe_sh_type(x: int | str) -> str: if isinstance(x, str) and x in _DESCR_SH_TYPE: - return _DESCR_SH_TYPE.get(x) + return _DESCR_SH_TYPE[x] elif isinstance(x, int) and ENUM_SH_TYPE_BASE['SHT_LOOS'] <= x < ENUM_SH_TYPE_BASE['SHT_GNU_versym']: return 'loos+0x%lx' % (x - ENUM_SH_TYPE_BASE['SHT_LOOS']) else: From 94e509aa88df08a4ae32c3cc441a3e70028be136 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 10:00:36 +0100 Subject: [PATCH 30/38] get_relocation_tables: Store intermediate references Also store the reference to `RelocationTable` in a local variable to help type checkers using the correct type when checking `entry_size`. Signed-off-by: Philipp Hahn --- elftools/elf/dynamic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/elftools/elf/dynamic.py b/elftools/elf/dynamic.py index 0cf18086..0c9f2daf 100644 --- a/elftools/elf/dynamic.py +++ b/elftools/elf/dynamic.py @@ -215,21 +215,21 @@ def get_relocation_tables(self) -> dict[str, RelocationTable | RelrRelocationTab result: dict[str, RelocationTable | RelrRelocationTable] = {} if list(self.iter_tags('DT_REL')): - result['REL'] = RelocationTable(self.elffile, + result['REL'] = rel = RelocationTable(self.elffile, self.get_table_offset('DT_REL')[1], # type: ignore[arg-type] next(self.iter_tags('DT_RELSZ'))['d_val'], False) relentsz = next(self.iter_tags('DT_RELENT'))['d_val'] - elf_assert(result['REL'].entry_size == relentsz, + elf_assert(rel.entry_size == relentsz, 'Expected DT_RELENT to be %s' % relentsz) if list(self.iter_tags('DT_RELA')): - result['RELA'] = RelocationTable(self.elffile, + result['RELA'] = rela = RelocationTable(self.elffile, self.get_table_offset('DT_RELA')[1], # type: ignore[arg-type] next(self.iter_tags('DT_RELASZ'))['d_val'], True) relentsz = next(self.iter_tags('DT_RELAENT'))['d_val'] - elf_assert(result['RELA'].entry_size == relentsz, + elf_assert(rela.entry_size == relentsz, 'Expected DT_RELAENT to be %s' % relentsz) if list(self.iter_tags('DT_RELR')): From 4d4e5ae569eb5a5151624b76ca7572c2030d98a4 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 10:02:15 +0100 Subject: [PATCH 31/38] DynamicSegment: cast to _StringTable `Dynamic` expects an instance following the protocol `_StringTable`, but `get_section()` just returns a `Section`. Signed-off-by: Philipp Hahn --- elftools/elf/dynamic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elftools/elf/dynamic.py b/elftools/elf/dynamic.py index 0c9f2daf..5a41901c 100644 --- a/elftools/elf/dynamic.py +++ b/elftools/elf/dynamic.py @@ -271,7 +271,7 @@ def __init__(self, header: Container, stream: IO[bytes], elffile: ELFFile) -> No for section in elffile.iter_sections(): if (isinstance(section, DynamicSection) and section['sh_offset'] == header['p_offset']): - stringtable = elffile.get_section(section['sh_link']) + stringtable = cast(_StringTable, elffile.get_section(section['sh_link'])) break Segment.__init__(self, header, stream) Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset'], From 19d72f755105aa7c0143339b308db0ef71aae039 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 10:03:50 +0100 Subject: [PATCH 32/38] Dynamic.num_tags: Return None Explicitly return `None` to silence `mypy`. Signed-off-by: Philipp Hahn --- elftools/elf/dynamic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/elftools/elf/dynamic.py b/elftools/elf/dynamic.py index 5a41901c..893fe764 100644 --- a/elftools/elf/dynamic.py +++ b/elftools/elf/dynamic.py @@ -205,6 +205,8 @@ def num_tags(self) -> int | None: self._num_tags = n + 1 return self._num_tags + return None + def get_relocation_tables(self) -> dict[str, RelocationTable | RelrRelocationTable]: """ Load all available relocation tables from DYNAMIC tags. From 3a6fc26572766320e6eea11abd424af3e596c184 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 10:00:36 +0100 Subject: [PATCH 33/38] GNUHashTable: Store intermediate references Also store the reference to `Container` in a local variable to help type checkers using the correct type when checking `bloom_size` and `nbuckets`. Signed-off-by: Philipp Hahn --- elftools/elf/hash.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/elftools/elf/hash.py b/elftools/elf/hash.py index eec65d30..577c5a3d 100644 --- a/elftools/elf/hash.py +++ b/elftools/elf/hash.py @@ -130,12 +130,13 @@ def __init__(self, elffile: ELFFile, start_offset: int, symboltable: _SymbolTabl p: Container = struct_parse(self.elffile.structs.Gnu_Hash, self.elffile.stream, start_offset) + self.params = p # Element sizes in the hash table self._wordsize: int = self.elffile.structs.Elf_word('').sizeof() self._xwordsize: int = self.elffile.structs.Elf_xword('').sizeof() self._chain_pos: int = start_offset + 4 * self._wordsize + \ - self.params['bloom_size'] * self._xwordsize + \ - self.params['nbuckets'] * self._wordsize + p['bloom_size'] * self._xwordsize + \ + p['nbuckets'] * self._wordsize def get_number_of_symbols(self) -> int: """ Get the number of symbols in the hash table by finding the bucket From 73d7098fc21fd47838480196db3169b093abb481 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 17 Mar 2025 10:22:45 +0100 Subject: [PATCH 34/38] typing: Re-implement Attribute instantiation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `Attribute` and its related classes `Attribute(Sub)*Section` have two concrete sub-classes for ARM and RISCV with *different* constructors. This confuses type checkers like `mypy` and `pyright` and humans like me, as this is not type safe: the `AttributeSubsubsection` classes are factory methods for `Attributes`, but their actual signature differs from the prototype. Basically you have to tell type-checkers: Expect a class with this signature, but you will get back an instance of another class having a different signature for `__init__()`. After having tried `Protocols` and `TypeVars` I gave up and re-implemented all 3*4 classes to have sane constructors: - The concrete sub-classes for ARM and RISCV sections only set different class variables; no extra code needed. - For `…Attributes` there's a new class method as `structs` must be handled at runtime. Signed-off-by: Philipp Hahn --- elftools/elf/sections.py | 80 ++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/elftools/elf/sections.py b/elftools/elf/sections.py index 1c3051fe..9591f223 100644 --- a/elftools/elf/sections.py +++ b/elftools/elf/sections.py @@ -319,9 +319,16 @@ def iter_stabs(self) -> Iterator[Container]: class Attribute: """ Attribute object - representing a build attribute of ELF files. """ - def __init__(self, tag): - self._tag = tag - self.extra = None + if TYPE_CHECKING: + value: Any + + def __init__(self, structs: ELFStructs, stream: IO[bytes]) -> None: + self._tag = self._parse(structs, stream) + self.extra: Any | None = None + + @classmethod + def _parse(cls, structs: ELFStructs, stream: IO[bytes]) -> Container: + raise NotImplementedError @property def tag(self) -> str: @@ -337,13 +344,14 @@ def __repr__(self) -> str: class AttributesSubsubsection(Section): """ Subsubsection of an ELF attribute section's subsection. """ - def __init__(self, stream, structs, offset, attribute): + attribute: type[Attribute] + + def __init__(self, stream: IO[bytes], structs: ELFStructs, offset: int) -> None: self.stream = stream self.offset = offset self.structs = structs - self.attribute = attribute - self.header = self.attribute(self.structs, self.stream) + self.header: Attribute = self.attribute(self.structs, self.stream) # type: ignore[assignment] self.attr_start = self.stream.tell() @@ -386,13 +394,14 @@ def __repr__(self) -> str: class AttributesSubsection(Section): """ Subsection of an ELF attributes section. """ - def __init__(self, stream, structs, offset, header, subsubsection): + subsubsection = AttributesSubsubsection + + def __init__(self, stream: IO[bytes], structs: ELFStructs, offset: int) -> None: self.stream = stream self.offset = offset self.structs = structs - self.subsubsection = subsubsection - self.header = struct_parse(header, self.stream, self.offset) + self.header: Container = struct_parse(structs.Elf_Attr_Subsection_Header, self.stream, self.offset) self.subsubsec_start = self.stream.tell() @@ -443,9 +452,10 @@ def __repr__(self) -> str: class AttributesSection(Section): """ ELF attributes section. """ - def __init__(self, header, name, elffile, subsection): + subsection = AttributesSubsection + + def __init__(self, header: Container, name: str, elffile: ELFFile) -> None: super().__init__(header, name, elffile) - self.subsection = subsection fv: int = struct_parse(self.structs.Elf_byte('format_version'), self.stream, @@ -493,15 +503,19 @@ def _make_subsections(self) -> Iterator[AttributesSubsection]: class ARMAttribute(Attribute): """ ARM attribute object - representing a build attribute of ARM ELF files. """ + + @classmethod + def _parse(cls, structs: ELFStructs, stream: IO[bytes]) -> Container: + return struct_parse(structs.Elf_Arm_Attribute_Tag, stream) + def __init__(self, structs: ELFStructs, stream: IO[bytes]) -> None: - super().__init__( - struct_parse(structs.Elf_Arm_Attribute_Tag, stream)) + super().__init__(structs, stream) if self.tag in ('TAG_FILE', 'TAG_SECTION', 'TAG_SYMBOL'): self.value = struct_parse(structs.Elf_word('value'), stream) if self.tag != 'TAG_FILE': - self.extra: list[int] = [] # type: ignore[assignment] + self.extra: list[int] = [] s_number: int = struct_parse(structs.Elf_uleb128('s_number'), stream) while s_number != 0: @@ -535,41 +549,37 @@ def __init__(self, structs: ELFStructs, stream: IO[bytes]) -> None: class ARMAttributesSubsubsection(AttributesSubsubsection): """ Subsubsection of an ELF .ARM.attributes section's subsection. """ - def __init__(self, stream, structs, offset): - super().__init__( - stream, structs, offset, ARMAttribute) + attribute = ARMAttribute class ARMAttributesSubsection(AttributesSubsection): """ Subsection of an ELF .ARM.attributes section. """ - def __init__(self, stream, structs, offset): - super().__init__( - stream, structs, offset, - structs.Elf_Attr_Subsection_Header, - ARMAttributesSubsubsection) + subsubsection = ARMAttributesSubsubsection class ARMAttributesSection(AttributesSection): """ ELF .ARM.attributes section. """ - def __init__(self, header, name, elffile): - super().__init__( - header, name, elffile, ARMAttributesSubsection) + subsection = ARMAttributesSubsection class RISCVAttribute(Attribute): """ Attribute of an ELF .riscv.attributes section. """ + + @classmethod + def _parse(cls, structs: ELFStructs, stream: IO[bytes]) -> Container: + return struct_parse(structs.Elf_RiscV_Attribute_Tag, stream) + def __init__(self, structs: ELFStructs, stream: IO[bytes]) -> None: - super().__init__( - struct_parse(structs.Elf_RiscV_Attribute_Tag, stream)) + super().__init__(structs, stream) if self.tag in ('TAG_FILE', 'TAG_SECTION', 'TAG_SYMBOL'): self.value = struct_parse(structs.Elf_word('value'), stream) if self.tag != 'TAG_FILE': - self.extra: list[int] = [] # type: ignore[assignment] + self.extra: list[int] = [] s_number: int = struct_parse(structs.Elf_uleb128('s_number'), stream) while s_number != 0: @@ -589,24 +599,16 @@ def __init__(self, structs: ELFStructs, stream: IO[bytes]) -> None: class RISCVAttributesSubsubsection(AttributesSubsubsection): """ Subsubsection of an ELF .riscv.attributes subsection. """ - def __init__(self, stream, structs, offset): - super().__init__( - stream, structs, offset, RISCVAttribute) + attribute = RISCVAttribute class RISCVAttributesSubsection(AttributesSubsection): """ Subsection of an ELF .riscv.attributes section. """ - def __init__(self, stream, structs, offset): - super().__init__( - stream, structs, offset, - structs.Elf_Attr_Subsection_Header, - RISCVAttributesSubsubsection) + subsubsection = RISCVAttributesSubsubsection class RISCVAttributesSection(AttributesSection): """ ELF .riscv.attributes section. """ - def __init__(self, header, name, elffile): - super().__init__( - header, name, elffile, RISCVAttributesSubsection) + subsection = RISCVAttributesSubsection From 586c13225b28c3e3fa6645cf0d34a8134b41db0b Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Tue, 18 Mar 2025 21:29:21 +0100 Subject: [PATCH 35/38] typing: add mypy configuration Signed-off-by: Philipp Hahn --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index c84a5c8d..b1a6baf0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,11 @@ packages = "elftools" # junit_xml = "mypy.junit.xml" junit_format = "per_file" +[[tool.mypy.overrides]] +module = "elftools.construct.*" +follow_imports = "skip" +ignore_errors = true + [tool.pyright] include = [ "elftools", From 7051c4aa1eef6ff2ea9a4706a07fb09569b4096e Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 9 Mar 2026 06:59:57 +0100 Subject: [PATCH 36/38] typing: Fix print format Printed value may not be an integer, which '%x' expects. Use '%r' to print the `repr()`esentation. Signed-off-by: Philipp Hahn --- elftools/ehabi/ehabiinfo.py | 10 +++++++++- elftools/elf/descriptions.py | 13 ++++++++----- elftools/elf/sections.py | 6 +++++- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/elftools/ehabi/ehabiinfo.py b/elftools/ehabi/ehabiinfo.py index 4f5f73e7..165f6aeb 100644 --- a/elftools/ehabi/ehabiinfo.py +++ b/elftools/ehabi/ehabiinfo.py @@ -161,7 +161,8 @@ def mnmemonic_array(self) -> list[MnemonicItem] | None: return None def __repr__(self) -> str: - return "" % ( + assert isinstance(self.function_offset, int) + return "" % ( self.function_offset, self.personality, "eh_table_offset=0x%x, " % self.eh_table_offset if self.eh_table_offset else "", @@ -185,6 +186,9 @@ class CannotUnwindEHABIEntry(EHABIEntry): """ This function cannot be unwind. Attribute #unwindable will be False. """ + if TYPE_CHECKING: + function_offset: int + def __init__(self, function_offset: int) -> None: super().__init__(function_offset, personality=None, bytecode_array=None, unwindable=False) @@ -197,6 +201,10 @@ class GenericEHABIEntry(EHABIEntry): """ This entry is generic model rather than ARM compact model.Attribute #bytecode_array will be None. """ + if TYPE_CHECKING: + function_offset: int + personality: int + def __init__(self, function_offset: int, personality: int) -> None: super().__init__(function_offset, personality, bytecode_array=None) diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py index b9f13bcc..e220937a 100644 --- a/elftools/elf/descriptions.py +++ b/elftools/elf/descriptions.py @@ -344,12 +344,15 @@ def describe_note_gnu_properties(properties: list[Container], machine: str) -> s prop_desc = ' ' % sz else: prop_desc = describe_note_gnu_property_bitmap_and(_DESCR_NOTE_GNU_PROPERTY_RISCV_FEATURE_1_AND, 'RISC-V AND feature', d) - elif isinstance(t, int) and _DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC: - prop_desc = '' % (t, bytes2hex(d, sep=' ')) - elif isinstance(t, int) and _DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER: - prop_desc = '' % (t, bytes2hex(d, sep=' ')) + elif isinstance(t, int): + if _DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC: + prop_desc = '' % (t, bytes2hex(d, sep=' ')) + elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER: + prop_desc = '' % (t, bytes2hex(d, sep=' ')) + else: + prop_desc = '' % (t, bytes2hex(d, sep=' ')) else: - prop_desc = '' % (t, bytes2hex(d, sep=' ')) + prop_desc = '' % (t, bytes2hex(d, sep=' ')) descriptions.append(prop_desc) return '\n '.join(descriptions) diff --git a/elftools/elf/sections.py b/elftools/elf/sections.py index 9591f223..0eac4e81 100644 --- a/elftools/elf/sections.py +++ b/elftools/elf/sections.py @@ -102,10 +102,14 @@ def data(self) -> bytes: decomp = zlib.decompressobj() result = decomp.decompress(compressed, self.data_size) - else: + elif isinstance(c_type, int): raise ELFCompressionError( 'Unknown compression type: {:#0x}'.format(c_type) ) + else: + raise ELFCompressionError( + 'Unknown compression type: {!r}'.format(c_type) + ) if len(result) != self._decompressed_size: raise ELFCompressionError( From 24376bb1e440214e977b9af8d60cffa062f48bed Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 9 Mar 2026 08:15:21 +0100 Subject: [PATCH 37/38] elftools/ehabi/ehabiinfo.py: Fix format string `_num_entry` is types as `int|None`, but calling `num_entry()` will make it `int`. Type checkers don't see this and will complain about `_num_entry` being `None`, which is incompatible with type format `%d`. Signed-off-by: Philipp Hahn --- elftools/ehabi/ehabiinfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elftools/ehabi/ehabiinfo.py b/elftools/ehabi/ehabiinfo.py index 165f6aeb..bef434ab 100644 --- a/elftools/ehabi/ehabiinfo.py +++ b/elftools/ehabi/ehabiinfo.py @@ -55,7 +55,7 @@ def get_entry(self, n: int) -> EHABIEntry: """ Get the exception handler entry at index #n. (EHABIEntry object or a subclass) """ if n >= self.num_entry(): - raise IndexError('Invalid entry %d/%d' % (n, self._num_entry)) + raise IndexError('Invalid entry %d/%d' % (n, self.num_entry())) eh_index_entry_offset = self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE eh_index_data = struct_parse(self._struct.EH_index_struct, self._arm_idx_section.stream, eh_index_entry_offset) word0, word1 = eh_index_data['word0'], eh_index_data['word1'] From 1631db5a06e4f9f67a05fdb4d06860126a495f99 Mon Sep 17 00:00:00 2001 From: Philipp Hahn Date: Mon, 9 Mar 2026 07:44:24 +0100 Subject: [PATCH 38/38] test_hash.py: Mock elffile and symboltable To run the unittest with typeguard mock ELFFile and _SymbolTable instead of passing in `None`. Signed-off-by: Philipp Hahn --- test/test_hash.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/test_hash.py b/test/test_hash.py index 83512ec6..0efc8203 100644 --- a/test/test_hash.py +++ b/test/test_hash.py @@ -5,10 +5,11 @@ # This code is in the public domain #------------------------------------------------------------------------------- import unittest +import unittest.mock import os from elftools.elf.elffile import ELFFile -from elftools.elf.hash import ELFHashTable, GNUHashTable +from elftools.elf.hash import ELFHashTable, GNUHashTable, _SymbolTable class TestELFHash(unittest.TestCase): """ Tests for the ELF hash table. @@ -61,8 +62,8 @@ def test_get_symbol(self): def test_empty_table_without_header(self): """ Verify we can handle an empty (0 byte) ELF hash section. """ - elffile = None - symboltable = None + elffile = unittest.mock.MagicMock(ELFFile) + symboltable = unittest.mock.MagicMock(_SymbolTable) empty_hash_section = ELFHashTable(elffile, 0, 0, symboltable) self.assertEqual(empty_hash_section.get_number_of_symbols(), 0) self.assertEqual(empty_hash_section.params['nbuckets'], 0)