From 97e5fb0a1605f4fe7af27c3e3b35ab7ad7b698f3 Mon Sep 17 00:00:00 2001 From: Schamper <1254028+Schamper@users.noreply.github.com> Date: Mon, 14 Jul 2025 20:40:44 +0200 Subject: [PATCH] Minor ELF improvements --- dissect/executable/elf/elf.py | 85 +++++++++--------- pyproject.toml | 11 +-- tests/{data => _data/elf}/hello_world.out | Bin .../elf}/hello_world.stripped.out | Bin tests/{docs => _docs}/Makefile | 0 tests/{docs => _docs}/conf.py | 2 + tests/{docs => _docs}/index.rst | 0 tests/_utils.py | 5 ++ tests/elf/__init__.py | 0 tests/{ => elf}/test_dump.py | 14 +-- tests/{ => elf}/test_elf.py | 2 +- tests/{ => elf}/test_section.py | 0 tests/{ => elf}/test_segment.py | 2 +- tests/{ => elf}/test_segment_table.py | 0 tests/util.py | 5 -- tox.ini | 12 +-- 16 files changed, 72 insertions(+), 66 deletions(-) rename tests/{data => _data/elf}/hello_world.out (100%) rename tests/{data => _data/elf}/hello_world.stripped.out (100%) rename tests/{docs => _docs}/Makefile (100%) rename tests/{docs => _docs}/conf.py (96%) rename tests/{docs => _docs}/index.rst (100%) create mode 100644 tests/_utils.py create mode 100644 tests/elf/__init__.py rename tests/{ => elf}/test_dump.py (66%) rename tests/{ => elf}/test_elf.py (89%) rename tests/{ => elf}/test_section.py (100%) rename tests/{ => elf}/test_segment.py (92%) rename tests/{ => elf}/test_segment_table.py (100%) delete mode 100644 tests/util.py diff --git a/dissect/executable/elf/elf.py b/dissect/executable/elf/elf.py index f2b827f..8bae0c3 100644 --- a/dissect/executable/elf/elf.py +++ b/dissect/executable/elf/elf.py @@ -27,12 +27,14 @@ class ELF: def __init__(self, fh: BinaryIO): self.fh = fh - offset = fh.tell() + + fh.seek(0) self.e_ident = fh.read(0x10) - fh.seek(offset) if self.e_ident[:4] != c_common_elf.ELFMAG: - raise InvalidSignatureError("Invalid header magic") + raise InvalidSignatureError( + f"Invalid ELF header magic, expected {c_common_elf.ELFMAG!r}, got {self.e_ident[:4]!r}" + ) c_elf_version = c_elf_32 if self.e_ident[c_common_elf.EI_CLASS] == c_common_elf.ELFCLASS64: @@ -43,10 +45,12 @@ def __init__(self, fh: BinaryIO): is_little = self.e_ident[c_common_elf.EI_DATA] == c_common_elf.ELFDATA2LSB self.c_elf.endian = "<" if is_little else ">" + fh.seek(0) self.header = self.c_elf.Ehdr(fh) + self.segments = SegmentTable.from_elf(self) - self.section_table = SectionTable.from_elf(self) - self.symbol_tables: list[SymbolTable] = self.section_table.by_type([SHT.SYMTAB, SHT.DYNSYM]) + self.sections = SectionTable.from_elf(self) + self.symbol_tables: list[SymbolTable] = self.sections.by_type([SHT.SYMTAB, SHT.DYNSYM]) def __repr__(self) -> str: return str(self.header) @@ -54,9 +58,9 @@ def __repr__(self) -> str: def dump(self) -> bytes: output_data = [ self.segments.dump_table(), - self.section_table.dump_table(), + self.sections.dump_table(), *self.segments.dump_data(), - *self.section_table.dump_data(), + *self.sections.dump_data(), ] output_data.sort(key=itemgetter(0)) @@ -87,12 +91,12 @@ def dynamic(self) -> bool: class Table(Generic[T]): - def __init__(self, entries: int) -> None: - self.entries = entries - self.items: list[T] = [None] * entries + def __init__(self, num: int) -> None: + self.num = num + self.items: list[T] = [None] * num def __iter__(self) -> Iterator[T]: - for idx in range(self.entries): + for idx in range(self.num): yield self[idx] def __getitem__(self, idx: int) -> T: @@ -125,7 +129,7 @@ def __init__(self, fh: BinaryIO, idx: int | None = None, c_elf: cstruct = c_elf_ def __repr__(self) -> str: return ( - f"<{self.__class__.__name__} idx={self.idx} name={self.name} type={self.type}" + f"<{self.__class__.__name__} idx={self.idx} name={self.name!r} type={self.type}" f" offset=0x{self.offset:x} size=0x{self.size:x}>" ) @@ -138,16 +142,12 @@ def _set_link(self, table: SectionTable) -> None: self._link = table[self.header.sh_link] @classmethod - def from_section_table(cls, section_table: SectionTable, idx: int) -> Section: - result = cls(section_table.fh, idx=idx, c_elf=section_table.c_elf) - result._set_link(section_table) - - string_table = section_table.string_table - if isinstance(result, StringTable): - string_table = result + def from_section_table(cls, table: SectionTable, idx: int) -> Section: + result = cls(table.fh, idx=idx, c_elf=table.c_elf) + result._set_link(table) - if string_table: - result._set_name(string_table) + if sh_strtab := (result if idx == table._sh_strtab_idx else table._sh_strtab): + result._set_name(sh_strtab) return result @@ -163,7 +163,7 @@ def link(self) -> Section | None: return self._link @cached_property - def contents(self) -> bytes: + def data(self) -> bytes: self.fh.seek(self.offset) return self.fh.read(self.size) @@ -173,20 +173,22 @@ def __init__( self, fh: BinaryIO, offset: int, - entries: int, + num: int, size: int, - string_index: int | None = None, + sh_strtab_idx: int | None = None, c_elf: cstruct = c_elf_64, ): - super().__init__(entries) + super().__init__(num) self.fh = fh self.offset = offset self.size = size - self.string_table = None self.c_elf = c_elf - if string_index: - self.string_table: StringTable = self[string_index] + self._sh_strtab_idx = sh_strtab_idx + self._sh_strtab = None + + if sh_strtab_idx: + self._sh_strtab: StringTable = self[sh_strtab_idx] def __repr__(self) -> str: return f"" @@ -206,11 +208,14 @@ def _create_item(self, idx: int) -> Section: @classmethod def from_elf(cls, elf: ELF) -> SectionTable: - offset = elf.header.e_shoff - entries = elf.header.e_shnum - size = elf.header.e_shentsize - other_index = elf.header.e_shstrndx - return cls(elf.fh, offset, entries, size, other_index, elf.c_elf) + return cls( + elf.fh, + elf.header.e_shoff, + elf.header.e_shnum, + elf.header.e_shentsize, + elf.header.e_shstrndx, + elf.c_elf, + ) def by_type(self, section_types: list[int] | int) -> list[Section]: types = section_types @@ -223,14 +228,14 @@ def related_sections(self, segment: Segment) -> list[Section]: return self.find(lambda x: x.is_related(segment)) def by_name(self, name: str) -> list[Section]: - return self.find(lambda x: x.name in name) + return self.find(lambda x: name == x.name) def dump_table(self) -> tuple[int, bytes]: buf = bytearray() return self.offset, buf.join([x.header.dumps() for x in self]) def dump_data(self) -> list[tuple[int, bytes]]: - return [(x.offset, x.contents) for x in self] + return [(x.offset, x.data) for x in self] class Segment: @@ -268,7 +273,7 @@ def is_related(self, section: Section) -> bool: return self.offset <= section.offset < self.end @property - def contents(self) -> bytes: + def data(self) -> bytes: if not self._data: self.fh.seek(self.offset) self._data = self.fh.read(self.size) @@ -320,7 +325,7 @@ def by_type(self, segment_types: list[int] | int) -> list[Segment]: return self.find(lambda x: x.type in types) def dump_data(self) -> list[tuple[int, bytearray]]: - return [(x.offset, x.contents) for x in self] + return [(x.offset, x.data) for x in self] def dump_table(self) -> tuple[int, bytearray]: buf = bytearray() @@ -337,9 +342,9 @@ def __getitem__(self, offset: int) -> str: return self._get_string(offset) def _get_string(self, index: int) -> str: - if index > len(self.contents) or index == SHN.UNDEF: + if index > self.size or index == SHN.UNDEF: return None - return self.c_elf.char[None](self.contents[index:]).decode("utf8") + return self.c_elf.char[None](self.data[index:]).decode("utf8") class Symbol: @@ -372,7 +377,7 @@ def _set_name(self, table: StringTable) -> None: @classmethod def from_symbol_table(cls, table: SymbolTable, idx: int) -> Symbol: offset = idx * table.entry_size - data = table.contents[offset : offset + table.entry_size] + data = table.data[offset : offset + table.entry_size] output = cls(io.BytesIO(data), idx, table.c_elf) output._set_name(table.link) return output diff --git a/pyproject.toml b/pyproject.toml index 805ba96..9da98eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,8 @@ name = "dissect.executable" description = "A Dissect module implementing a parsers for various executable formats such as PE, ELF and Macho-O" readme = "README.md" requires-python = "~=3.9" -license.text = "Affero General Public License v3" +license = "AGPL-3.0-or-later" +license-files = ["LICENSE", "COPYRIGHT"] authors = [ {name = "Dissect Team", email = "dissect@fox-it.com"} ] @@ -16,7 +17,6 @@ classifiers = [ "Environment :: Console", "Intended Audience :: Developers", "Intended Audience :: Information Technology", - "License :: OSI Approved", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Topic :: Internet :: Log Analysis", @@ -43,7 +43,7 @@ dev = [ [tool.ruff] line-length = 120 -required-version = ">=0.9.0" +required-version = ">=0.12.0" [tool.ruff.format] docstring-code-format = true @@ -86,15 +86,12 @@ select = [ ignore = ["E203", "B904", "UP024", "ANN002", "ANN003", "ANN204", "ANN401", "SIM105", "TRY003"] [tool.ruff.lint.per-file-ignores] -"tests/docs/**" = ["INP001"] +"tests/_docs/**" = ["INP001"] [tool.ruff.lint.isort] known-first-party = ["dissect.executable"] known-third-party = ["dissect"] -[tool.setuptools] -license-files = ["LICENSE", "COPYRIGHT"] - [tool.setuptools.packages.find] include = ["dissect.*"] diff --git a/tests/data/hello_world.out b/tests/_data/elf/hello_world.out similarity index 100% rename from tests/data/hello_world.out rename to tests/_data/elf/hello_world.out diff --git a/tests/data/hello_world.stripped.out b/tests/_data/elf/hello_world.stripped.out similarity index 100% rename from tests/data/hello_world.stripped.out rename to tests/_data/elf/hello_world.stripped.out diff --git a/tests/docs/Makefile b/tests/_docs/Makefile similarity index 100% rename from tests/docs/Makefile rename to tests/_docs/Makefile diff --git a/tests/docs/conf.py b/tests/_docs/conf.py similarity index 96% rename from tests/docs/conf.py rename to tests/_docs/conf.py index 7ef62d3..4bd4f7b 100644 --- a/tests/docs/conf.py +++ b/tests/_docs/conf.py @@ -1,3 +1,5 @@ +project = "dissect.executable" + extensions = [ "autoapi.extension", "sphinx.ext.autodoc", diff --git a/tests/docs/index.rst b/tests/_docs/index.rst similarity index 100% rename from tests/docs/index.rst rename to tests/_docs/index.rst diff --git a/tests/_utils.py b/tests/_utils.py new file mode 100644 index 0000000..8d43746 --- /dev/null +++ b/tests/_utils.py @@ -0,0 +1,5 @@ +from pathlib import Path + + +def absolute_path(path: str) -> Path: + return (Path(__file__).parent / path).resolve() diff --git a/tests/elf/__init__.py b/tests/elf/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_dump.py b/tests/elf/test_dump.py similarity index 66% rename from tests/test_dump.py rename to tests/elf/test_dump.py index 8f0f043..de47d67 100644 --- a/tests/test_dump.py +++ b/tests/elf/test_dump.py @@ -6,20 +6,22 @@ import pytest from dissect.executable import ELF - -from .util import data_file +from tests._utils import absolute_path if TYPE_CHECKING: from pathlib import Path @pytest.mark.parametrize( - "file_name", - ["hello_world.out", "hello_world.stripped.out"], + "path", + [ + "_data/elf/hello_world.out", + "_data/elf/hello_world.stripped.out", + ], ) -def test_dump(tmp_path: Path, file_name: str) -> None: +def test_dump(path: str, tmp_path: Path) -> None: output_path = tmp_path / "output" - input_path = data_file(file_name) + input_path = absolute_path(path) with input_path.open("rb") as input_file: elf_file = ELF(input_file) diff --git a/tests/test_elf.py b/tests/elf/test_elf.py similarity index 89% rename from tests/test_elf.py rename to tests/elf/test_elf.py index b412f41..9620086 100644 --- a/tests/test_elf.py +++ b/tests/elf/test_elf.py @@ -4,7 +4,7 @@ import pytest -from dissect.executable import ELF +from dissect.executable.elf.elf import ELF from dissect.executable.exception import InvalidSignatureError diff --git a/tests/test_section.py b/tests/elf/test_section.py similarity index 100% rename from tests/test_section.py rename to tests/elf/test_section.py diff --git a/tests/test_segment.py b/tests/elf/test_segment.py similarity index 92% rename from tests/test_segment.py rename to tests/elf/test_segment.py index 1d2d945..41bbf9c 100644 --- a/tests/test_segment.py +++ b/tests/elf/test_segment.py @@ -16,4 +16,4 @@ def test_segment() -> None: segment = create_segment(orig_data) assert segment.offset == len(c_elf_64.Phdr) assert segment.size == len(orig_data) - assert segment.contents == orig_data + assert segment.data == orig_data diff --git a/tests/test_segment_table.py b/tests/elf/test_segment_table.py similarity index 100% rename from tests/test_segment_table.py rename to tests/elf/test_segment_table.py diff --git a/tests/util.py b/tests/util.py deleted file mode 100644 index 0a6b5f9..0000000 --- a/tests/util.py +++ /dev/null @@ -1,5 +0,0 @@ -from pathlib import Path - - -def data_file(path: str) -> Path: - return Path(__file__).parent / "data" / path diff --git a/tox.ini b/tox.ini index e82fbf9..965587c 100644 --- a/tox.ini +++ b/tox.ini @@ -32,14 +32,14 @@ commands = [testenv:fix] package = skip deps = - ruff==0.9.2 + ruff==0.12.3 commands = ruff format dissect tests [testenv:lint] package = skip deps = - ruff==0.9.2 + ruff==0.12.3 vermin commands = ruff check dissect tests @@ -62,12 +62,12 @@ deps = sphinx-design furo commands = - make -C tests/docs clean - make -C tests/docs html + make -C tests/_docs clean + make -C tests/_docs html [testenv:docs-linkcheck] allowlist_externals = make deps = {[testenv:docs-build]deps} commands = - make -C tests/docs clean - make -C tests/docs linkcheck + make -C tests/_docs clean + make -C tests/_docs linkcheck