Skip to content

Commit 84b92aa

Browse files
authored
Add DYLD command parsing (#60)
* progress * dyld * dyld * types * fix * fix * fix * tests
1 parent f88eca8 commit 84b92aa

File tree

5 files changed

+216
-116
lines changed

5 files changed

+216
-116
lines changed

src/launchpad/models/range_mapping.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,13 @@ class BinaryTag(Enum):
3333
CODE_SIGNATURE = "code_signature"
3434

3535
# DYLD info categories
36+
DYLD = "dyld" # Parent category for all DYLD-related ranges
3637
DYLD_REBASE = "dyld_rebase"
3738
DYLD_BIND = "dyld_bind"
3839
DYLD_LAZY_BIND = "dyld_lazy_bind"
3940
DYLD_EXPORTS = "dyld_exports"
4041
DYLD_FIXUPS = "dyld_fixups"
42+
DYLD_STRING_TABLE = "dyld_string_table"
4143

4244
# Binary modules/classes
4345
OBJC_CLASSES = "objc_classes"

src/launchpad/parsers/apple/range_mapping_builder.py

Lines changed: 152 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,6 @@ class RangeMappingBuilder:
1717
"""Builds range mappings for Mach-O binaries."""
1818

1919
def __init__(self, parser: MachOParser, file_size: int) -> None:
20-
"""Initialize the builder.
21-
22-
Args:
23-
parser: MachO parser instance
24-
file_size: Total file size
25-
"""
2620
self.parser = parser
2721
self.file_size = file_size
2822

@@ -83,123 +77,168 @@ def _map_load_commands(self, range_map: RangeMap) -> None:
8377

8478
try:
8579
if cmd_type == lief.MachO.LoadCommand.TYPE.SYMTAB:
86-
self._map_symtab_command(range_map, command)
80+
if cast_command := self._cast_command(command, lief.MachO.SymbolCommand):
81+
self._map_symtab_command(range_map, cast_command)
8782
elif cmd_type in [
8883
lief.MachO.LoadCommand.TYPE.DYLD_INFO,
8984
lief.MachO.LoadCommand.TYPE.DYLD_INFO_ONLY,
9085
]:
91-
self._map_dyld_info_command(range_map, command)
86+
if cast_command := self._cast_command(command, lief.MachO.DyldInfo):
87+
self._map_dyld_info_command(range_map, cast_command)
9288
elif cmd_type == lief.MachO.LoadCommand.TYPE.FUNCTION_STARTS:
93-
self._map_function_starts_command(range_map, command)
89+
if cast_command := self._cast_command(command, lief.MachO.FunctionStarts):
90+
self._map_function_starts_command(range_map, cast_command)
9491
elif cmd_type == lief.MachO.LoadCommand.TYPE.CODE_SIGNATURE:
95-
self._map_code_signature_command(range_map, command)
92+
if cast_command := self._cast_command(command, lief.MachO.CodeSignature):
93+
self._map_code_signature_command(range_map, cast_command)
9694
elif cmd_type == lief.MachO.LoadCommand.TYPE.DATA_IN_CODE:
97-
self._map_data_in_code_command(range_map, command)
95+
if cast_command := self._cast_command(command, lief.MachO.DataInCode):
96+
self._map_data_in_code_command(range_map, cast_command)
9897
elif cmd_type == lief.MachO.LoadCommand.TYPE.DYLIB_CODE_SIGN_DRS:
9998
self._map_dylib_code_sign_drs_command(range_map, command)
10099
elif cmd_type == lief.MachO.LoadCommand.TYPE.LINKER_OPTIMIZATION_HINT:
101100
self._map_linker_optimization_hint_command(range_map, command)
102101
elif cmd_type == lief.MachO.LoadCommand.TYPE.DYLD_EXPORTS_TRIE:
103102
self._map_dyld_exports_trie_command(range_map, command)
104103
elif cmd_type == lief.MachO.LoadCommand.TYPE.DYLD_CHAINED_FIXUPS:
105-
self._map_dyld_chained_fixups_command(range_map, command)
104+
if cast_command := self._cast_command(command, lief.MachO.DyldChainedFixups):
105+
self._map_dyld_chained_fixups_command(range_map, cast_command)
106+
elif cmd_type == lief.MachO.LoadCommand.TYPE.RPATH:
107+
if cast_command := self._cast_command(command, lief.MachO.RPathCommand):
108+
self._map_rpath_command(range_map, cast_command)
109+
elif cmd_type in [
110+
lief.MachO.LoadCommand.TYPE.LOAD_DYLIB,
111+
lief.MachO.LoadCommand.TYPE.LOAD_WEAK_DYLIB,
112+
lief.MachO.LoadCommand.TYPE.REEXPORT_DYLIB,
113+
]:
114+
if cast_command := self._cast_command(command, lief.MachO.DylibCommand):
115+
self._map_dylib_command(range_map, cast_command)
106116
except Exception as e:
107117
logger.debug(f"Failed to process command {i} {command.command.name}: {e}")
108118

109119
current_offset += cmd_size
110120

111-
def _map_symtab_command(self, range_map: RangeMap, command: Any) -> None:
121+
def _cast_command(self, command: lief.MachO.LoadCommand, expected_type: type) -> Any | None:
122+
if isinstance(command, expected_type):
123+
return command
124+
else:
125+
logger.warning(f"Expected {expected_type.__name__}, got {type(command).__name__}")
126+
return None
127+
128+
def _map_symtab_command(self, range_map: RangeMap, command: lief.MachO.SymbolCommand) -> None:
112129
"""Map symbol table and string table from LC_SYMTAB command."""
113-
try:
114-
if command.symbol_offset > 0 and command.nb_symbols > 0:
115-
# Each symbol entry is typically 16 bytes (64-bit)
116-
symbol_size = command.nb_symbols * 16
117-
range_map.add_range(
118-
command.symbol_offset, command.symbol_offset + symbol_size, BinaryTag.DEBUG_INFO, "symbol_table"
119-
)
120130

121-
if command.string_offset > 0 and command.string_size > 0:
122-
range_map.add_range(
123-
command.string_offset,
124-
command.string_offset + command.string_size,
125-
BinaryTag.C_STRINGS,
126-
"string_table",
127-
)
128-
except Exception as e:
129-
logger.error(f"Failed to map symtab command: {e}")
131+
# Map symbol table
132+
if command.symbol_offset > 0 and command.numberof_symbols > 0:
133+
# Each symbol entry is typically 16 bytes (64-bit)
134+
symbol_size = command.numberof_symbols * 16
135+
range_map.add_range(
136+
command.symbol_offset, command.symbol_offset + symbol_size, BinaryTag.DEBUG_INFO, "symbol_table"
137+
)
138+
139+
# Map string table
140+
if command.strings_offset > 0 and command.strings_size > 0:
141+
range_map.add_range(
142+
command.strings_offset,
143+
command.strings_offset + command.strings_size,
144+
BinaryTag.DYLD_STRING_TABLE,
145+
"string_table",
146+
)
130147

131-
def _map_dyld_info_command(self, range_map: RangeMap, command: Any) -> None:
148+
def _map_dyld_info_command(self, range_map: RangeMap, command: lief.MachO.DyldInfo) -> None:
132149
"""Map DYLD info sections from LC_DYLD_INFO command."""
133-
try:
134-
if hasattr(command, "rebase_off") and command.rebase_off > 0 and command.rebase_size > 0:
135-
range_map.add_range(
136-
command.rebase_off,
137-
command.rebase_off + command.rebase_size,
138-
BinaryTag.DYLD_REBASE,
139-
"dyld_rebase_info",
140-
)
150+
# Rebase information
151+
rebase_offset, rebase_size = command.rebase
152+
if rebase_offset > 0 and rebase_size > 0:
153+
range_map.add_range(
154+
rebase_offset,
155+
rebase_offset + rebase_size,
156+
BinaryTag.DYLD_REBASE,
157+
"dyld_rebase_info",
158+
)
141159

142-
if hasattr(command, "bind_off") and command.bind_off > 0 and command.bind_size > 0:
143-
range_map.add_range(
144-
command.bind_off, command.bind_off + command.bind_size, BinaryTag.DYLD_BIND, "dyld_bind_info"
145-
)
160+
# Bind information
161+
bind_offset, bind_size = command.bind
162+
if bind_offset > 0 and bind_size > 0:
163+
range_map.add_range(bind_offset, bind_offset + bind_size, BinaryTag.DYLD_BIND, "dyld_bind_info")
146164

147-
if hasattr(command, "lazy_bind_off") and command.lazy_bind_off > 0 and command.lazy_bind_size > 0:
148-
range_map.add_range(
149-
command.lazy_bind_off,
150-
command.lazy_bind_off + command.lazy_bind_size,
151-
BinaryTag.DYLD_LAZY_BIND,
152-
"dyld_lazy_bind_info",
153-
)
165+
# Weak bind information
166+
weak_bind_offset, weak_bind_size = command.weak_bind
167+
if weak_bind_offset > 0 and weak_bind_size > 0:
168+
range_map.add_range(
169+
weak_bind_offset,
170+
weak_bind_offset + weak_bind_size,
171+
BinaryTag.DYLD_BIND,
172+
"dyld_weak_bind_info",
173+
)
154174

155-
if hasattr(command, "export_off") and command.export_off > 0 and command.export_size > 0:
156-
range_map.add_range(
157-
command.export_off,
158-
command.export_off + command.export_size,
159-
BinaryTag.DYLD_EXPORTS,
160-
"dyld_export_info",
161-
)
162-
except Exception as e:
163-
logger.debug(f"Failed to map DYLD info command: {e}")
175+
# Lazy bind information
176+
lazy_bind_offset, lazy_bind_size = command.lazy_bind
177+
if lazy_bind_offset > 0 and lazy_bind_size > 0:
178+
range_map.add_range(
179+
lazy_bind_offset,
180+
lazy_bind_offset + lazy_bind_size,
181+
BinaryTag.DYLD_LAZY_BIND,
182+
"dyld_lazy_bind_info",
183+
)
164184

165-
def _map_function_starts_command(self, range_map: RangeMap, command: Any) -> None:
185+
# Export information
186+
export_offset, export_size = command.export_info
187+
if export_offset > 0 and export_size > 0:
188+
range_map.add_range(
189+
export_offset,
190+
export_offset + export_size,
191+
BinaryTag.DYLD_EXPORTS,
192+
"dyld_export_info",
193+
)
194+
195+
def _map_function_starts_command(self, range_map: RangeMap, command: lief.MachO.FunctionStarts) -> None:
166196
"""Map function starts information from LC_FUNCTION_STARTS command."""
167-
try:
168-
if hasattr(command, "data_offset") and command.data_offset > 0 and command.data_size > 0:
169-
range_map.add_range(
170-
command.data_offset,
171-
command.data_offset + command.data_size,
172-
BinaryTag.FUNCTION_STARTS,
173-
"function_starts",
174-
)
175-
except Exception as e:
176-
logger.debug(f"Failed to map function starts command: {e}")
197+
range_map.add_range(
198+
command.data_offset,
199+
command.data_offset + command.data_size,
200+
BinaryTag.FUNCTION_STARTS,
201+
"function_starts",
202+
)
177203

178-
def _map_code_signature_command(self, range_map: RangeMap, command: Any) -> None:
204+
def _map_code_signature_command(self, range_map: RangeMap, command: lief.MachO.CodeSignature) -> None:
179205
"""Map code signature from LC_CODE_SIGNATURE command."""
180-
try:
181-
if hasattr(command, "data_offset") and command.data_offset > 0 and command.data_size > 0:
182-
range_map.add_range(
183-
command.data_offset,
184-
command.data_offset + command.data_size,
185-
BinaryTag.CODE_SIGNATURE,
186-
"code_signature",
187-
)
188-
except Exception as e:
189-
logger.debug(f"Failed to map code signature command: {e}")
206+
range_map.add_range(
207+
command.data_offset,
208+
command.data_offset + command.data_size,
209+
BinaryTag.CODE_SIGNATURE,
210+
"code_signature",
211+
)
190212

191-
def _map_data_in_code_command(self, range_map: RangeMap, command: Any) -> None:
213+
def _map_data_in_code_command(self, range_map: RangeMap, command: lief.MachO.DataInCode) -> None:
192214
"""Map data-in-code information from LC_DATA_IN_CODE command."""
193-
try:
194-
if hasattr(command, "data_offset") and command.data_offset > 0 and command.data_size > 0:
195-
range_map.add_range(
196-
command.data_offset,
197-
command.data_offset + command.data_size,
198-
BinaryTag.DEBUG_INFO,
199-
"data_in_code",
200-
)
201-
except Exception as e:
202-
logger.debug(f"Failed to map data-in-code command: {e}")
215+
range_map.add_range(
216+
command.data_offset,
217+
command.data_offset + command.data_size,
218+
BinaryTag.DEBUG_INFO,
219+
"data_in_code",
220+
)
221+
222+
# Parse individual data-in-code entries
223+
self._map_data_in_code_entries(range_map, command)
224+
225+
def _map_data_in_code_entries(self, range_map: RangeMap, command: lief.MachO.DataInCode) -> None:
226+
"""Map individual data-in-code entries."""
227+
# Each data_in_code_entry is typically 8 bytes:
228+
# - offset: UInt32 (offset from start of function)
229+
# - length: UInt16 (length of data)
230+
# - kind: UInt16 (type of data)
231+
entry_size = 8
232+
num_entries = command.data_size // entry_size
233+
234+
if num_entries > 0:
235+
# Map the entries table
236+
range_map.add_range(
237+
command.data_offset,
238+
command.data_offset + (num_entries * entry_size),
239+
BinaryTag.DEBUG_INFO,
240+
"data_in_code_entries",
241+
)
203242

204243
def _map_dylib_code_sign_drs_command(self, range_map: RangeMap, command: Any) -> None:
205244
"""Map code signature DRs from LC_DYLIB_CODE_SIGN_DRS command."""
@@ -240,12 +279,12 @@ def _map_dyld_exports_trie_command(self, range_map: RangeMap, command: Any) -> N
240279
except Exception as e:
241280
logger.debug(f"Failed to map exports trie command: {e}")
242281

243-
def _map_dyld_chained_fixups_command(self, range_map: RangeMap, command: Any) -> None:
282+
def _map_dyld_chained_fixups_command(self, range_map: RangeMap, command: lief.MachO.DyldChainedFixups) -> None:
244283
"""Map chained fixups from LC_DYLD_CHAINED_FIXUPS command."""
245284
range_map.add_range(
246285
command.data_offset,
247286
command.data_offset + command.data_size,
248-
BinaryTag.DYLD_BIND,
287+
BinaryTag.DYLD_FIXUPS,
249288
"dyld_chained_fixups",
250289
)
251290

@@ -269,6 +308,26 @@ def _map_segments_and_sections(self, range_map: RangeMap) -> None:
269308
except Exception as e:
270309
logger.debug(f"Failed to map section {section_name}: {e}")
271310

311+
def _map_rpath_command(self, range_map: RangeMap, command: lief.MachO.RPathCommand) -> None:
312+
"""Map RPATH command data."""
313+
if command.path:
314+
range_map.add_range(
315+
command.command_offset,
316+
command.command_offset + command.size,
317+
BinaryTag.C_STRINGS,
318+
"rpath_string",
319+
)
320+
321+
def _map_dylib_command(self, range_map: RangeMap, command: lief.MachO.DylibCommand) -> None:
322+
"""Map dylib loading command data (LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB)."""
323+
if command.name:
324+
range_map.add_range(
325+
command.command_offset,
326+
command.command_offset + command.size,
327+
BinaryTag.C_STRINGS,
328+
"dylib_name",
329+
)
330+
272331
def _categorize_section(self, section_name: str) -> BinaryTag:
273332
"""Categorize a section based on its name."""
274333
name_lower = section_name.lower()

src/launchpad/utils/treemap/macho_element_builder.py

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,20 +47,59 @@ def _build_binary_treemap(self, range_map: RangeMap, name: str, binary_path: str
4747

4848
# Create child elements for each tag
4949
children: list[TreemapElement] = []
50+
dyld_children: list[TreemapElement] = []
51+
52+
logger.debug(f"Processing tags: {list(ranges_by_tag.keys())}")
53+
5054
for tag, ranges in ranges_by_tag.items():
5155
total_size = sum(r.size for r in ranges)
52-
children.append(
53-
TreemapElement(
54-
name=tag,
55-
install_size=total_size,
56-
download_size=total_size, # Binary sections don't compress
57-
element_type=TreemapType.EXECUTABLES,
58-
path=None,
59-
is_directory=False,
60-
children=[],
61-
details={"tag": tag},
62-
)
56+
57+
# Determine element type based on tag
58+
element_type = TreemapType.EXECUTABLES # Default
59+
if tag.startswith("dyld_"):
60+
element_type = TreemapType.DYLD
61+
elif tag == "unmapped":
62+
element_type = TreemapType.UNMAPPED
63+
elif tag == "code_signature":
64+
element_type = TreemapType.CODE_SIGNATURE
65+
elif tag == "function_starts":
66+
element_type = TreemapType.FUNCTION_STARTS
67+
elif tag == "external_methods":
68+
element_type = TreemapType.EXTERNAL_METHODS
69+
70+
element = TreemapElement(
71+
name=tag,
72+
install_size=total_size,
73+
download_size=total_size, # TODO: add download size
74+
element_type=element_type,
75+
path=None,
76+
is_directory=False,
77+
children=[],
78+
details={"tag": tag},
79+
)
80+
81+
# Group DYLD-related tags under a parent DYLD element
82+
if tag.startswith("dyld_"):
83+
logger.debug(f"Adding {tag} to DYLD group")
84+
dyld_children.append(element)
85+
else:
86+
logger.debug(f"Adding {tag} to regular children")
87+
children.append(element)
88+
89+
# Create parent DYLD element if we have DYLD children
90+
if dyld_children:
91+
dyld_total_size = sum(child.install_size for child in dyld_children)
92+
dyld_element = TreemapElement(
93+
name="DYLD",
94+
install_size=dyld_total_size,
95+
download_size=dyld_total_size,
96+
element_type=TreemapType.DYLD,
97+
path=None,
98+
is_directory=True,
99+
children=dyld_children,
100+
details={"tag": "dyld"},
63101
)
102+
children.append(dyld_element)
64103

65104
# Add unmapped regions if any
66105
if range_map.unmapped_size > 0:

0 commit comments

Comments
 (0)