From 98dc409cf0908ee1064f365a3e8f1e03d6b987af Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Mon, 8 Sep 2025 23:39:40 -0700 Subject: [PATCH 1/8] APP: allow retrieval of Register by name from dictionary --- chb/app/BDictionary.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/chb/app/BDictionary.py b/chb/app/BDictionary.py index e7842b9c..dd53d714 100644 --- a/chb/app/BDictionary.py +++ b/chb/app/BDictionary.py @@ -46,7 +46,7 @@ import chb.util.IndexedTable as IT import chb.util.StringIndexedTable as SI -from typing import Callable, List, Tuple, TYPE_CHECKING +from typing import Callable, List, Optional, Tuple, TYPE_CHECKING if TYPE_CHECKING: from chb.app.AppAccess import AppAccess @@ -125,6 +125,21 @@ def register(self, ix: int) -> Register: return bdregistry.mk_instance( self, self.register_table.retrieve(ix), Register) + def register_index_by_name(self, name: str) -> Optional[int]: + ixvalues = self.register_table.values() + for ixvalue in ixvalues: + if len(ixvalue.tags) >= 2: + if ixvalue.tags[1] == name: + return ixvalue.index + return None + + def register_by_name(self, name: str) -> Optional[Register]: + ix = self.register_index_by_name(name) + if ix is not None: + return self.register(ix) + else: + return None + # ----------------------- xml accessors ------------------------------------ def read_xml_string(self, n: ET.Element) -> str: From 63cca4368d3e4e0ff164ca72251caf3319e1ec40 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Mon, 8 Sep 2025 23:40:59 -0700 Subject: [PATCH 2/8] AST: visual representation of reaching defs --- chb/app/CHVersion.py | 2 +- chb/app/Function.py | 29 +++- chb/app/Instruction.py | 7 +- chb/arm/ARMInstruction.py | 27 +-- chb/cmdline/astcmds.py | 146 +++++++++++----- chb/cmdline/chkx | 7 +- chb/graphics/DotRdefPath.py | 328 ++++++++++++++++++++++++++++++------ chb/util/dotutil.py | 2 +- chb/util/graphutil.py | 8 +- 9 files changed, 439 insertions(+), 117 deletions(-) diff --git a/chb/app/CHVersion.py b/chb/app/CHVersion.py index a3250784..4c3badfc 100644 --- a/chb/app/CHVersion.py +++ b/chb/app/CHVersion.py @@ -1 +1 @@ -chbversion: str = "0.3.0-20250902" +chbversion: str = "0.3.0-20250908" diff --git a/chb/app/Function.py b/chb/app/Function.py index e1318bbe..d64991bc 100644 --- a/chb/app/Function.py +++ b/chb/app/Function.py @@ -525,14 +525,36 @@ def instruction(self, iaddr: str) -> Instruction: else: raise UF.CHBError("No instruction found at address " + iaddr) - def rdef_locations(self) -> Dict[str, List[List[str]]]: + def rdef_location_partition(self) -> Dict[str, List[List[str]]]: + """Return a map of registers to partitions of their reaching definitions.""" + result: Dict[str, List[List[str]]] = {} for (iaddr, instr) in self.instructions.items(): irdefs = instr.rdef_locations() - for (reg, rdeflists) in irdefs.items(): + for (reg, rdeflist) in irdefs.items(): result.setdefault(reg, []) - result[reg].extend(rdeflists) + for rrlist in result[reg]: + if set(rrlist) == set(rdeflist): + break + else: + result[reg].append(rdeflist) + return result + + def use_location_partition(self) -> Dict[str, List[List[str]]]: + """Return a map of registers to partitions of their use locations.""" + + result: Dict[str, List[List[str]]] = {} + + for (iaddr, instr) in self.instructions.items(): + iuses = instr.use_locations() + for (reg, uselist) in iuses.items(): + result.setdefault(reg, []) + for rrlist in result[reg]: + if set(rrlist) == set(uselist): + break + else: + result[reg].append(uselist) return result def lhs_types(self) -> Dict[str, Dict[str, "BCTyp"]]: @@ -545,7 +567,6 @@ def lhs_types(self) -> Dict[str, Dict[str, "BCTyp"]]: result[iaddr] = {} for (vname, vtype) in ilhs_types.items(): result[iaddr][vname] = vtype - return result def globalrefs(self) -> Dict[str, List["GlobalReference"]]: diff --git a/chb/app/Instruction.py b/chb/app/Instruction.py index b5a6cdb2..213a9e10 100644 --- a/chb/app/Instruction.py +++ b/chb/app/Instruction.py @@ -340,11 +340,16 @@ def ast_switch_condition_prov( Optional[AST.ASTExpr], Optional[AST.ASTExpr]]: raise UF.CHBError("ast-switch-condition-prov not defined") - def rdef_locations(self) -> Dict[str, List[List[str]]]: + def rdef_locations(self) -> Dict[str, List[str]]: """Returns for each register, which locations must be combined.""" return {} + def use_locations(self) -> Dict[str, List[str]]: + """Returns for each register defined, which locations use the definition.""" + + return {} + def lhs_types(self) -> Dict[str, "BCTyp"]: """Returns a mapping from lhs assigned to its type.""" diff --git a/chb/arm/ARMInstruction.py b/chb/arm/ARMInstruction.py index 636de6a2..a308a731 100644 --- a/chb/arm/ARMInstruction.py +++ b/chb/arm/ARMInstruction.py @@ -411,20 +411,25 @@ def lhs_variables( def rhs_expressions(self, filter: Callable[[XXpr], bool]) -> List[XXpr]: return [x for x in self.opcode.rhs(self.xdata) if filter(x)] - def rdef_locations(self) -> Dict[str, List[List[str]]]: - result: Dict[str, Dict[str, List[str]]] = {} + def rdef_locations(self) -> Dict[str, List[str]]: + result: Dict[str, List[str]] = {} for rdef in self.xdata.reachingdefs: if rdef is not None: - rdefvar = str(rdef.vardefuse.variable) - rdeflocs = [str(s) for s in rdef.valid_deflocations] - result.setdefault(rdefvar, {}) - for sx in rdeflocs: - result[rdefvar].setdefault(sx, []) - for sy in rdeflocs: - if sy not in result[rdefvar][sx]: - result[rdefvar][sx].append(sy) - return {x:list(r.values()) for (x, r) in result.items()} + rdefvar = str(rdef.variable) + rdeflocs = sorted([str(s) for s in rdef.valid_deflocations]) + result[rdefvar] = rdeflocs + return result + + def use_locations(self) -> Dict[str, List[str]]: + result: Dict[str, List[str]] = {} + + for use in self.xdata.defuses: + if use is not None: + usevar = str(use.variable) + uselocs = sorted([str(s) for s in use.uselocations]) + result[usevar] = uselocs + return result def lhs_types(self) -> Dict[str, "BCTyp"]: result: Dict[str, "BCTyp"] = {} diff --git a/chb/cmdline/astcmds.py b/chb/cmdline/astcmds.py index f284ae88..a889b95f 100644 --- a/chb/cmdline/astcmds.py +++ b/chb/cmdline/astcmds.py @@ -36,6 +36,7 @@ Any, cast, Dict, List, NoReturn, Optional, Set, Tuple, TYPE_CHECKING) from chb.app.AppAccess import AppAccess +from chb.app.Function import Function from chb.ast.AbstractSyntaxTree import AbstractSyntaxTree from chb.ast.ASTApplicationInterface import ASTApplicationInterface @@ -62,6 +63,7 @@ from chb.userdata.UserHints import UserHints import chb.util.dotutil as UD +from chb.util.DotGraph import DotGraph import chb.util.fileutil as UF import chb.util.graphutil as UG from chb.util.loggingutil import chklogger, LogLevel @@ -150,6 +152,7 @@ def buildast(args: argparse.Namespace) -> NoReturn: hide_annotations: bool = args.hide_annotations show_reachingdefs: str = args.show_reachingdefs output_reachingdefs: str = args.output_reachingdefs + fileformat: str = args.format verbose: bool = args.verbose loglevel: str = args.loglevel logfilename: Optional[str] = args.logfilename @@ -347,7 +350,7 @@ def buildast(args: argparse.Namespace) -> NoReturn: # xdata records for all instructions in the function. Locations that # have a common user are merged. Types are provided by lhs_types. astinterface.introduce_ssa_variables( - f.rdef_locations(), f.register_lhs_types, f.lhs_names) + f.rdef_location_partition(), f.register_lhs_types, f.lhs_names) # Introduce stack variables for all stack buffers with types astinterface.introduce_stack_variables( @@ -393,51 +396,14 @@ def buildast(args: argparse.Namespace) -> NoReturn: UC.print_error("\nSpecify a file to save the reaching defs") continue - rdefspec = show_reachingdefs.split(":") - if len(rdefspec) != 2: - UC.print_error( - "\nArgument to show_reachingdefs not recognized") - continue - - useloc = rdefspec[0] - register = rdefspec[1] - - if not f.has_instruction(useloc): - UC.print_status_update("Useloc: " + useloc + " not found") - continue - - tgtinstr = f.instruction(useloc) - - if not register in f.rdef_locations(): + register = show_reachingdefs + if not register in f.rdef_location_partition(): UC.print_status_update( "Register " + register + " not found in rdeflocations") continue - cblock = f.containing_block(useloc) - graph = UG.DirectedGraph(list(f.cfg.blocks.keys()), f.cfg.edges) - rdefs = tgtinstr.reaching_definitions(register) - dotpaths: List[DotRdefPath] = [] - graph.find_paths(f.faddr, cblock) - for (i, p) in enumerate( - sorted(graph.get_paths(), key=lambda p: len(p))): - cfgpath = DotRdefPath( - "path" + str(i), - f, - astinterface, - p, - subgraph=True, - nodeprefix = str(i) +":", - rdefinstrs = rdefs) - dotpaths.append(cfgpath) - - pdffilename = UD.print_dot_subgraphs( - app.path, - "paths", - output_reachingdefs, - "pdf", - [dotcfg.build() for dotcfg in dotpaths]) - - UC.print_status_update("Printed " + pdffilename) + print_reachingdefs( + app, astinterface, output_reachingdefs, fileformat, f, register) else: UC.print_error("Unable to find function " + faddr) @@ -469,6 +435,102 @@ def buildast(args: argparse.Namespace) -> NoReturn: exit(0) +def print_reachingdefs( + app: AppAccess, + astinterface: ASTInterface, + filename: str, + fileformat: str, + f: Function, + register: str) -> None: + dotpaths: List[Tuple[DotRdefPath, str, str]] = [] + regspill = register + "_spill" + for (iaddr, instr) in f.instructions.items(): + if register in instr.rdef_locations(): + register_o = app.bdictionary.register_by_name(register) + cblock = f.containing_block(iaddr) + rdefs = instr.reaching_definitions(register) + for rdef in rdefs: + if rdef == "init": + if regspill in instr.annotation: + continue + rdblock = f.faddr + graph = UG.DirectedGraph(list(f.cfg.blocks.keys()), f.cfg.edges) + graph.find_paths(rdblock, cblock) + for (i, p) in enumerate( + sorted(graph.get_paths(), key=lambda p: len(p))): + p = ["init"] + p + cfgpath = DotRdefPath( + "path_" + str(rdef) + "_" + str(iaddr) + "_" + str(i), + f, + astinterface, + p, + register_o, + subgraph=True, + nodeprefix = str(iaddr) + str(rdef) + str(i) + ":", + rdefinstrs = rdefs, + useinstrs=[iaddr]) + dotpaths.append((cfgpath, rdef, iaddr)) + else: + rdblock = f.containing_block(rdef) + graph = UG.DirectedGraph(list(f.cfg.blocks.keys()), f.cfg.edges) + graph.find_paths(rdblock, cblock) + for (i, p) in enumerate( + sorted(graph.get_paths(), key=lambda p: len(p))): + cfgpath = DotRdefPath( + "path_" + str(rdef) + "_" + str(iaddr) + "_" + str(i), + f, + astinterface, + p, + subgraph=True, + nodeprefix = str(iaddr) + str(rdef) + str(i) + ":", + rdefinstrs = rdefs, + useinstrs=[iaddr]) + dotpaths.append((cfgpath, rdef, iaddr)) + + possibly_spurious_rdefs: List[Tuple[str, str]] = [] + legitimate_rdefs: List[Tuple[str, str]] = [] + dotgraphs: List[Tuple[DotRdefPath, DotGraph, str, str]] = [] + + for (dotcfg, rdef, iaddr) in dotpaths: + dotgr = dotcfg.build() + if dotgr is not None: + if dotcfg.is_potentially_spurious(): + possibly_spurious_rdefs.append((rdef, iaddr)) + else: + legitimate_rdefs.append((rdef, iaddr)) + dotgraphs.append((dotcfg, dotgr, rdef, iaddr)) + + printgraphs: List[DotGraph] = [] + for (dotcfg, dg, rdef, iaddr) in dotgraphs: + if dotcfg.is_potentially_spurious(): + if (rdef, iaddr) in legitimate_rdefs: + pass + else: + printgraphs.append(dg) + else: + printgraphs.append(dg) + + pdffilename = UD.print_dot_subgraphs( + app.path, + "paths", + filename, + fileformat, + printgraphs) + + UC.print_status_update("Printed " + pdffilename) + if len(possibly_spurious_rdefs) > 0: + print("Possibly spurious reachingdefs to be removed: ") + print("~" * 80) + for (rdef, iaddr) in set(possibly_spurious_rdefs): + if not (rdef, iaddr) in legitimate_rdefs: + print(" rdefloc: " + rdef + "; useloc: " + iaddr) + print("~" * 80) + + # print("\nLegitimate reaching defs:") + # for (rdef, iaddr) in set(legitimate_rdefs): + # print(" rdefloc: " + rdef + "; useloc: " + iaddr) + + def showast(args: argparse.Namespace) -> NoReturn: print("still under construction ..") exit(1) diff --git a/chb/cmdline/chkx b/chb/cmdline/chkx index f9095d9f..3c3f41ae 100755 --- a/chb/cmdline/chkx +++ b/chb/cmdline/chkx @@ -744,10 +744,15 @@ def parse() -> argparse.Namespace: action="store_true") buildast.add_argument( "--show_reachingdefs", - help="create a dot file for the reaching defs of :") + help="create a dot file for the reaching defs of ") buildast.add_argument( "--output_reachingdefs", help="name of output file (without extension) to store dot/pdf file of reachingdefs") + buildast.add_argument( + "--format", + choices=["pdf", "png"], + default="pdf", + help="format for the graph file generated from dot") buildast.add_argument( "--loglevel", "-log", choices=UL.LogLevel.options(), diff --git a/chb/graphics/DotRdefPath.py b/chb/graphics/DotRdefPath.py index a93c302c..b298d437 100644 --- a/chb/graphics/DotRdefPath.py +++ b/chb/graphics/DotRdefPath.py @@ -25,7 +25,9 @@ # SOFTWARE. # ------------------------------------------------------------------------------ -from typing import Dict, List, Optional, TYPE_CHECKING +from typing import Dict, List, Optional, Tuple, TYPE_CHECKING + +import chb.invariants.XXprUtil as XU import chb.util.fileutil as UF @@ -34,7 +36,179 @@ if TYPE_CHECKING: from chb.app.Function import Function from chb.app.Instruction import Instruction + from chb.app.Register import Register from chb.astinterface.ASTInterface import ASTInterface + from chb.invariants import XXpr + + +class DotRdefPathNode: + + def __init__( + self, + astree: "ASTInterface", + nodename: str, + nodeprefix: str, + exposed: bool, + register: Optional["Register"], + rdefinstrs: List["Instruction"], + useinstrs: List["Instruction"], + branchcondition: Optional[str], + revbranchcondition: Optional[str], + ) -> None: + self._astree = astree + self._nodename = nodename + self._nodeprefix = nodeprefix + self._exposed = exposed + self._register = register + self._rdefinstrs = rdefinstrs + self._useinstrs = useinstrs + self._branchcondition = branchcondition + self._revbranchcondition = revbranchcondition + + @property + def astree(self) -> "ASTInterface": + return self._astree + + @property + def nodename(self) -> str: + return self._nodename + + @property + def nodeprefix(self) -> str: + return self._nodeprefix + + @property + def exposed(self) -> bool: + return self._exposed + + @property + def register(self) -> Optional["Register"]: + return self._register + + @property + def rdefinstrs(self) -> Dict[str, "Instruction"]: + return {rdef.iaddr:rdef for rdef in self._rdefinstrs} + + @property + def useinstrs(self) -> Dict[str, "Instruction"]: + return {use.iaddr:use for use in self._useinstrs} + + @property + def branchcondition(self) -> Optional[str]: + return self._branchcondition + + @property + def revbranchcondition(self) -> Optional[str]: + return self._revbranchcondition + + @property + def fillcolor(self) -> Optional[str]: + if len(self.useinstrs) > 0: + return "lightblue" + elif len(self.rdefinstrs) > 0 and self.exposed: + if any(instr.has_control_flow() for instr in self.rdefinstrs.values()): + return "yellow" + else: + return "orange" + else: + return None + + def has_active_cc_condition(self) -> bool: + for instr in self.rdefinstrs.values(): + if instr.has_control_flow(): + if self.branchcondition: + (cc, _) = instr.ast_cc_condition_prov(self.astree) + if str(cc) == str(self.branchcondition): + return True + return False + + def has_inactive_cc_condition(self) -> bool: + for instr in self.rdefinstrs.values(): + if instr.has_control_flow(): + if self.revbranchcondition: + (cc, _) = instr.ast_cc_condition_prov(self.astree) + if str(cc) == str(self.revbranchcondition): + return True + return False + + @property + def blocktxt(self) -> str: + if self.nodename == "init": + default_init = "{ init | par: " + str(self.register) + ": ? }" + fsig = self.astree.appsignature + if self.register is not None and fsig is not None: + optindex = fsig.index_of_register_parameter_location(self.register) + if optindex is not None: + (fvar, _) = self.astree.get_formal_locindices(optindex - 1) + bctype = fvar.bctyp + return ( + "{ init | par: " + + str(self.register) + + ": " + + str(bctype) + + " " + + fvar.vname + + "}") + else: + return default_init + else: + return default_init + + rpinstrs: List[str] = [] + for (iaddr, instr) in self.rdefinstrs.items(): + (hlinstrs, _) = instr.ast_prov(self.astree) + rpinstrs.extend( + ("def: " + iaddr + ": " + str(hlinstr)) for hlinstr in hlinstrs) + upinstrs: List[str] = [] + for (iaddr, instr) in self.useinstrs.items(): + if instr.is_return_instruction: + rv = instr.return_value() + if rv is not None: + astexpr = XU.xxpr_to_ast_def_expr( + rv, instr.xdata, iaddr, self.astree) + upinstrs.append( + "use: " + iaddr + ": return " + str(astexpr)) + else: + (hlinstrs, llinstrs) = instr.ast_prov(self.astree) + if len(hlinstrs) > 0: + upinstrs.extend( + ("use: " + iaddr + ":" + str(hlinstr)) for hlinstr in hlinstrs) + else: + if len(llinstrs) == 1 and str(llinstrs[0]) == "NOP:BX": + upinstrs.append("use: " + iaddr + ": return R0") + elif len(llinstrs) > 0: + upinstrs.extend( + ("use:" + iaddr + ":" + str(llinstrs)) for llinstr in llinstrs) + else: + upinstrs.append("use: " + iaddr + ": " + str(instr.mnemonic)) + + conditions: List[str] = [] + for (iaddr, instr) in self.rdefinstrs.items(): + if instr.has_control_flow(): + (cc, _) = instr.ast_cc_condition_prov(self.astree) + if self.branchcondition: + if str(cc) == str(self.branchcondition): + status = " (active)" + elif str(cc) == str(self.revbranchcondition): + status = " (inactive)" + else: + status = "" + else: + status = "" + conditions.append("cc-cond: " + iaddr + ": " + str(cc) + status) + + if len(conditions) > 0: + return ( + "{" + self.nodename + "|" + ("if " + "\\n".join(conditions)) + + "|" + "\\n".join(rpinstrs) + + ("|" + "\\n".join(upinstrs) if len(upinstrs) > 0 else "") + + "}") + else: + return ( + "{" + self.nodename + + ("|" + "\\n".join(rpinstrs) if len(rpinstrs) > 0 else "") + + ("|" + "\\n".join(upinstrs) if len(upinstrs) > 0 else "") + + "}") class DotRdefPath: @@ -45,20 +219,25 @@ def __init__( fn: "Function", astree: "ASTInterface", path: List[str], + register: Optional["Register"] = None, nodeprefix: str = "", replacements: Dict[str, str] = {}, rdefinstrs: List[str] = [], + useinstrs: List[str] = [], subgraph: bool = False) -> None: self._fn = fn self._graphname = graphname self._astree = astree self._path = path + self._register = register self._nodeprefix = nodeprefix self._subgraph = subgraph self._replacements = replacements self._rdefinstrs = rdefinstrs + self._useinstrs = useinstrs self._dotgraph = DotGraph(graphname, subgraph=self.subgraph) + self._nodes: Dict[str, DotRdefPathNode] = {} @property def function(self) -> "Function": @@ -76,6 +255,10 @@ def astree(self) -> "ASTInterface": def path(self) -> List[str]: return self._path + @property + def register(self) -> Optional["Register"]: + return self._register + @property def nodeprefix(self) -> str: return self._nodeprefix @@ -84,40 +267,41 @@ def nodeprefix(self) -> str: def subgraph(self) -> bool: return self._subgraph + @property + def nodes(self) -> Dict[str, DotRdefPathNode]: + return self._nodes + def pathindex(self, baddr: str) -> int: for (i, n) in enumerate(self.path): if n == baddr: return i raise UF.CHBError("Address " + baddr + " not found in path") - def build(self) -> DotGraph: - for n in self.path: - self.add_node(n) + def build(self) -> Optional[DotGraph]: + # hide paths with a single block that includes both def and use + if len(self.path) <= 1: + return None + + # hide paths in which a downstream def hides the def shown for + # reachability + if not self.is_exposed(self.path[0]): + return None + + for i, n in enumerate(self.path): + if i == len(self.path) - 1: + self.add_node(n, None) + else: + self.add_node(n, self.path[i+1]) for i in range(len(self.path) - 1): self.add_edge(self.path[i], self.path[i+1]) - if self.init_is_exposed(): - (fvar, _) = self.astree.get_formal_locindices(0) - btype = fvar.bctyp - self._dotgraph.add_node( - self.nodeprefix + "init", - labeltxt="{ init | " + str(btype) + " " + fvar.vname + "}", - shaded=True, - color="orange", - recordformat=True) - self._dotgraph.add_edge( - self.nodeprefix + "init", self.nodeprefix + self.path[0]) - return self._dotgraph - def init_is_exposed(self) -> bool: - result = True - for p in self.path: - instrs = self.rdef_instructions(p) - if any(not instr.has_control_flow() for instr in instrs): - result = False - return result + def is_potentially_spurious(self) -> bool: + return ( + self.nodes[self.path[0]].has_inactive_cc_condition() + or any(self.nodes[n].has_active_cc_condition() for n in self.path[1:])) def is_exposed(self, n: str) -> bool: index = self.pathindex(n) @@ -140,6 +324,8 @@ def get_branch_instruction(self, n: str) -> Optional["Instruction"]: return self.function.instruction(instraddr) def rdef_instructions(self, n: str) -> List["Instruction"]: + if n == "init": + return [] block = self.function.blocks[n] lastaddr = block.lastaddr baddr = int(n, 16) @@ -154,43 +340,81 @@ def rdef_instructions(self, n: str) -> List["Instruction"]: result.append(instr) return result - def add_node(self, n: str) -> None: - nodename = self.nodeprefix + n - rdefinstrs = self.rdef_instructions(n) - blocktxt = n - color: Optional[str] = None - fillcolor: Optional[str] = None - if len(rdefinstrs) > 0: - conditions: List[str] = [] - pinstrs: List[str] = [] - for instr in rdefinstrs: - (hlinstrs, _) = instr.ast_prov(self.astree) - pinstrs.extend(str(hlinstr) for hlinstr in hlinstrs) - if instr.has_control_flow(): - (cc, _) = instr.ast_cc_condition_prov(self.astree) - conditions.append(str(cc)) - if self.is_exposed(n): - if any(instr.has_control_flow() for instr in rdefinstrs): - fillcolor = "yellow" - else: - fillcolor = "orange" - if len(conditions) > 0: - blocktxt = ( - "{" + n + "|" + ("if " + "\\n".join(conditions)) - + "|" + "\\n".join(pinstrs) + "}") - else: - blocktxt = ("{" + n + "|" + "\\n".join(pinstrs) + "}") + def use_instructions(self, n: str) -> List["Instruction"]: + if n == "init": + return [] + block = self.function.blocks[n] + lastaddr = block.lastaddr + baddr = int(n, 16) + xaddr = int(lastaddr, 16) + result: List["Instruction"] = [] + for i in self._useinstrs: + if i == "init": + continue + ix = int(i, 16) + if ix >= baddr and ix <= xaddr: + instr = block.instructions[i] + result.append(instr) + return result + + def add_node(self, n: str, successor: Optional[str]) -> None: + branchconds = self.node_branch_conditions(n, successor) + rdefnode = DotRdefPathNode( + self.astree, + n, + self.nodeprefix, + self.is_exposed(n), + self.register, + self.rdef_instructions(n), + self.use_instructions(n), + branchconds[0] if branchconds else None, + branchconds[1] if branchconds else None) + self._nodes[n] = rdefnode + + if n!= self.path[0] and rdefnode.has_active_cc_condition(): + fillcolor: Optional[str] = "red" + elif n == self.path[0] and rdefnode.has_inactive_cc_condition(): + fillcolor = "red" + else: + fillcolor = rdefnode.fillcolor + self._dotgraph.add_node( - str(nodename), - labeltxt=blocktxt, + self.nodeprefix + n, + labeltxt=rdefnode.blocktxt, shaded=True, - color=color, + color=None, fillcolor=fillcolor, recordformat=True) + def node_branch_conditions( + self, n: str, successor: Optional[str]) -> Optional[Tuple[str, str]]: + """Return T, F condition for the exit instr of node n, dependent on successor """ + + if successor is None: + return None + + if n in self.function.cfg.edges and len(self.function.cfg.edges[n]) == 2: + tgtedges = self.function.cfg.edges[n] + branchinstr = self.get_branch_instruction(n) + if branchinstr and branchinstr.is_branch_instruction: + ftconds = branchinstr.ft_conditions + if len(ftconds) == 2: + (tcond, _) = branchinstr.ast_condition_prov( + self.astree, reverse=True) + (fcond, _) = branchinstr.ast_condition_prov( + self.astree, reverse=False) + if successor == tgtedges[0]: + return (str(tcond), str(fcond)) + else: + return (str(fcond), str(tcond)) + return None + def add_edge(self, n1: str, n2: str) -> None: nodename1 = self.nodeprefix + n1 nodename2 = self.nodeprefix + n2 + if n1 == "init": + self._dotgraph.add_edge(nodename1, nodename2, labeltxt=None) + return srcblock = self.function.block(n1) labeltxt: Optional[str] = None if len(self.function.cfg.edges[n1]) == 2: diff --git a/chb/util/dotutil.py b/chb/util/dotutil.py index e7cc34d8..eaa1e309 100644 --- a/chb/util/dotutil.py +++ b/chb/util/dotutil.py @@ -74,7 +74,7 @@ def print_dot_subgraphs( if len(subgraphs) == 0: print("No subgraphs supplied") return "error" - if len(subgraphs) > 20: + if len(subgraphs) > 30: print("Too many subgraphs: " + str(len(subgraphs))) return "error" diff --git a/chb/util/graphutil.py b/chb/util/graphutil.py index 6504d822..00dc5202 100644 --- a/chb/util/graphutil.py +++ b/chb/util/graphutil.py @@ -57,10 +57,10 @@ def find_paths_aux( self, src: str, dst: Optional[str], - visited: Dict[str, bool], + visited: Dict[str, int], path: List[str], depth: int = 0) -> None: - visited[src] = True + visited[src] = visited[src] + 1 path.append(src) if not dst and (src not in self.edges): self.paths.append(path[:]) @@ -68,7 +68,7 @@ def find_paths_aux( self.paths.append(path[:]) elif src in self.edges: for d in self.edges[src]: - if not visited[d]: + if visited[d] < 2: self.find_paths_aux(d, dst, visited, path, depth + 1) path.pop() visited[src] = False @@ -86,7 +86,7 @@ def find_paths( self.maxtime = maxtime visited = {} for n in self.nodes: - visited[n] = False + visited[n] = 0 try: self.find_paths_aux(src, dst, visited, []) except SearchTimeoutException as e: From f378330d18fc2d29a30073bca92e70254765a666 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Tue, 9 Sep 2025 10:22:09 -0700 Subject: [PATCH 3/8] AST: use untyped registers as argument for show_reachingdefs --- chb/astinterface/ASTInterface.py | 9 ++++- chb/cmdline/astcmds.py | 68 +++++++++++++++++++++----------- chb/cmdline/chkx | 6 +++ 3 files changed, 57 insertions(+), 26 deletions(-) diff --git a/chb/astinterface/ASTInterface.py b/chb/astinterface/ASTInterface.py index 857de76e..5e8cee88 100644 --- a/chb/astinterface/ASTInterface.py +++ b/chb/astinterface/ASTInterface.py @@ -970,7 +970,7 @@ def introduce_ssa_variables( self, rdeflocs: Dict[str, List[List[str]]], ftypes: Dict[str, Dict[str, "BCTyp"]], - ssanames: Dict[str, str] = {}) -> None: + ssanames: Dict[str, str] = {}) -> Dict[str, Dict[str, List[str]]]: """Creates ssa variables based on reaching definition locations. Lists with multiple locations will give rise to a single variable @@ -985,10 +985,10 @@ def introduce_ssa_variables( where is the name of the register being assigned. """ + untyped: Dict[str, Dict[str, List[str]]] = {} for (reg, locs) in rdeflocs.items(): for lst in locs: if len(lst) > 0: - # print("DEBUG: " + str(reg) + ": [" + ", ".join(str(loc) for loc in lst) + "]") loc1 = lst[0] vtype = None if loc1 in ftypes: @@ -997,11 +997,16 @@ def introduce_ssa_variables( vtype = vbctype.convert(self.typconverter) vinfo = self.mk_ssa_register_varinfo( reg, loc1, vtype=vtype, prefix=ssanames.get(loc1)) + if vtype is None: + untyped.setdefault(reg, {}) + untyped[reg].setdefault(vinfo.vname, []) + untyped[reg][vinfo.vname].append(loc1) self._ssa_addresses.setdefault(vinfo.vname, set([])) for loc in lst: self._ssa_intros.setdefault(loc, {}) self._ssa_intros[loc][reg] = vinfo self._ssa_addresses[vinfo.vname].add(loc) + return untyped def introduce_stack_variables( self, diff --git a/chb/cmdline/astcmds.py b/chb/cmdline/astcmds.py index a889b95f..75152dd8 100644 --- a/chb/cmdline/astcmds.py +++ b/chb/cmdline/astcmds.py @@ -150,7 +150,8 @@ def buildast(args: argparse.Namespace) -> NoReturn: xpatchresultsfile = args.patch_results_file hide_globals: bool = args.hide_globals hide_annotations: bool = args.hide_annotations - show_reachingdefs: str = args.show_reachingdefs + show_reachingdefs: bool = args.show_reachingdefs + reachingdefs_registers: List[str] = args.reachingdefs_registers output_reachingdefs: str = args.output_reachingdefs fileformat: str = args.format verbose: bool = args.verbose @@ -349,13 +350,26 @@ def buildast(args: argparse.Namespace) -> NoReturn: # Introduce ssa variables for all reaching definitions referenced in # xdata records for all instructions in the function. Locations that # have a common user are merged. Types are provided by lhs_types. - astinterface.introduce_ssa_variables( + untyped = astinterface.introduce_ssa_variables( f.rdef_location_partition(), f.register_lhs_types, f.lhs_names) # Introduce stack variables for all stack buffers with types astinterface.introduce_stack_variables( f.stackframe, f.stack_variable_types) + regsuntyped: List[str] = [] + for (reg, varlocs) in untyped.items(): + for (var, locs) in varlocs.items(): + if len(locs) == 1 and locs[0] == "init": + continue + if "_spill" in var: + continue + if reg == "SP": + continue + # print(" Untyped: " + reg + ": " + var + " [" + ",".join(locs) + "]") + if not reg in regsuntyped: + regsuntyped.append(reg) + astfunction = ASTInterfaceFunction( faddr, fname, f, astinterface, patchevents=patchevents) @@ -396,14 +410,18 @@ def buildast(args: argparse.Namespace) -> NoReturn: UC.print_error("\nSpecify a file to save the reaching defs") continue - register = show_reachingdefs - if not register in f.rdef_location_partition(): - UC.print_status_update( - "Register " + register + " not found in rdeflocations") - continue + if len(reachingdefs_registers) == 0: + reachingdefs_registers = regsuntyped - print_reachingdefs( - app, astinterface, output_reachingdefs, fileformat, f, register) + for register in reachingdefs_registers: + if not register in f.rdef_location_partition(): + UC.print_status_update( + "Register " + register + " not found in rdeflocations") + continue + + for reg in reachingdefs_registers: + print_reachingdefs( + app, astinterface, output_reachingdefs + "__" + reg, fileformat, f, reg) else: UC.print_error("Unable to find function " + faddr) @@ -510,21 +528,23 @@ def print_reachingdefs( else: printgraphs.append(dg) - pdffilename = UD.print_dot_subgraphs( - app.path, - "paths", - filename, - fileformat, - printgraphs) - - UC.print_status_update("Printed " + pdffilename) - if len(possibly_spurious_rdefs) > 0: - print("Possibly spurious reachingdefs to be removed: ") - print("~" * 80) - for (rdef, iaddr) in set(possibly_spurious_rdefs): - if not (rdef, iaddr) in legitimate_rdefs: - print(" rdefloc: " + rdef + "; useloc: " + iaddr) - print("~" * 80) + if len(printgraphs) > 0: + pdffilename = UD.print_dot_subgraphs( + app.path, + "paths", + filename, + fileformat, + printgraphs) + + UC.print_status_update("Printed " + pdffilename) + if len(possibly_spurious_rdefs) > 0: + print("\nPossibly spurious reachingdefs to be removed for register " + + register + ": ") + print("~" * 80) + for (rdef, iaddr) in set(possibly_spurious_rdefs): + if not (rdef, iaddr) in legitimate_rdefs: + print(" rdefloc: " + rdef + "; useloc: " + iaddr) + print("~" * 80) # print("\nLegitimate reaching defs:") # for (rdef, iaddr) in set(legitimate_rdefs): diff --git a/chb/cmdline/chkx b/chb/cmdline/chkx index 3c3f41ae..b97ac120 100755 --- a/chb/cmdline/chkx +++ b/chb/cmdline/chkx @@ -744,10 +744,16 @@ def parse() -> argparse.Namespace: action="store_true") buildast.add_argument( "--show_reachingdefs", + action="store_true", help="create a dot file for the reaching defs of ") buildast.add_argument( "--output_reachingdefs", help="name of output file (without extension) to store dot/pdf file of reachingdefs") + buildast.add_argument( + "--reachingdefs_registers", + help="show reachingdefs for these registers only (default: generate automatically)", + nargs="*", + default=[]) buildast.add_argument( "--format", choices=["pdf", "png"], From 9b325bea99920d640c2f90c6fdc28fe459b5c6c0 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Sat, 11 Oct 2025 22:59:32 -0700 Subject: [PATCH 4/8] ASTI: prune empty if statements --- chb/astinterface/ASTICodeTransformer.py | 13 ++++++++++++- chb/astinterface/ASTIProvenance.py | 22 ++++++++++++++++++++++ chb/astinterface/ASTInterface.py | 8 ++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/chb/astinterface/ASTICodeTransformer.py b/chb/astinterface/ASTICodeTransformer.py index ac0f5270..76b94c2c 100644 --- a/chb/astinterface/ASTICodeTransformer.py +++ b/chb/astinterface/ASTICodeTransformer.py @@ -68,7 +68,15 @@ def transform_loop_stmt(self, stmt: AST.ASTLoop) -> AST.ASTStmt: optlocationid=stmt.locationid) def transform_block_stmt(self, stmt: AST.ASTBlock) -> AST.ASTStmt: - newstmts = [s.transform(self) for s in stmt.stmts] + newstmts: List[AST.ASTStmt] = [] + for s in stmt.stmts: + newstmt = s.transform(self) + # prune empty blocks that may have been created by the pruning + # of redundant if statements + if newstmt.is_ast_block and len((cast(AST.ASTBlock, newstmt)).stmts) == 0: + continue + newstmts.append(newstmt) + return self.astinterface.mk_block( newstmts, labels=stmt.labels, @@ -117,6 +125,9 @@ def transform_instruction_sequence_stmt( def transform_branch_stmt(self, stmt: AST.ASTBranch) -> AST.ASTStmt: newif = stmt.ifstmt.transform(self) newelse = stmt.elsestmt.transform(self) + if newif.is_empty() and newelse.is_empty(): + return self.astinterface.mk_block([]) + return self.astinterface.mk_branch( stmt.condition, newif, diff --git a/chb/astinterface/ASTIProvenance.py b/chb/astinterface/ASTIProvenance.py index d3c1c607..838c7851 100644 --- a/chb/astinterface/ASTIProvenance.py +++ b/chb/astinterface/ASTIProvenance.py @@ -428,6 +428,8 @@ def resolve_reaching_defs(self) -> None: v = str(rd.variable) addrs = [str(d) for d in rd.deflocations] for addr in addrs: + if addr == "init": + continue if addr in self.address_instructions: instrids = self.address_instructions[addr] for instrid in instrids: @@ -442,6 +444,26 @@ def resolve_reaching_defs(self) -> None: # Allow for change of name of return value if str(instr.lhs) == v or v == "R0" or v == "S0": self.add_reaching_definition(xid, instrid) + else: + chklogger.logger.warning( + "Variable names don't match: %s vs %s", + str(instr.lhs), v) + else: + chklogger.logger.warning( + "Expression is defined by unknown instruction: " + + "var: %s defined by %s", + str(v), str(instr)) + else: + chklogger.logger.warning( + "Instruction id in reaching definitions for %s " + + "not found", + str(v)) + else: + chklogger.logger.warning( + "Reaching definition address %s for variable %s " + + " not found", + str(addr), str(v)) + def resolve_flag_reaching_defs(self) -> None: for (xid, frds) in self.flag_expr_rdefs.items(): diff --git a/chb/astinterface/ASTInterface.py b/chb/astinterface/ASTInterface.py index 5e8cee88..6ddeef4b 100644 --- a/chb/astinterface/ASTInterface.py +++ b/chb/astinterface/ASTInterface.py @@ -1470,6 +1470,14 @@ def mk_byte_sum(self, bytes: List[AST.ASTExpr]) -> AST.ASTExpr: shift += 8 return result + def mk_doubleword_sum( + self, hiword: AST.ASTExpr, loword: AST.ASTExpr) -> AST.ASTExpr: + """Return concatenation 64 bit hiword:loword.""" + + shift = self.mk_integer_constant(32) + shifthi = self.mk_binary_op("lsl", hiword, shift) + return self.mk_binary_op("plus", shifthi, loword) + def mk_binary_expression( self, op: str, From 9dc5083197d7f3e41ad30193f39cdcc5f89286ec Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Sat, 11 Oct 2025 23:02:26 -0700 Subject: [PATCH 5/8] ASTCMD: add call clobber to rdefs graph --- chb/cmdline/astcmds.py | 27 +++++++++++++++++++++++++-- chb/cmdline/chkx | 2 +- chb/cmdline/runcmds.py | 2 +- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/chb/cmdline/astcmds.py b/chb/cmdline/astcmds.py index 75152dd8..d61e8337 100644 --- a/chb/cmdline/astcmds.py +++ b/chb/cmdline/astcmds.py @@ -405,7 +405,7 @@ def buildast(args: argparse.Namespace) -> NoReturn: functions_failed += 1 continue - if show_reachingdefs is not None: + if show_reachingdefs: if output_reachingdefs is None: UC.print_error("\nSpecify a file to save the reaching defs") continue @@ -421,7 +421,12 @@ def buildast(args: argparse.Namespace) -> NoReturn: for reg in reachingdefs_registers: print_reachingdefs( - app, astinterface, output_reachingdefs + "__" + reg, fileformat, f, reg) + app, + astinterface, + output_reachingdefs + "__" + reg, + fileformat, + f, + reg) else: UC.print_error("Unable to find function " + faddr) @@ -488,6 +493,24 @@ def print_reachingdefs( rdefinstrs = rdefs, useinstrs=[iaddr]) dotpaths.append((cfgpath, rdef, iaddr)) + elif rdef.endswith("_clobber"): + rdefaddr = rdef[:-8] + rdblock = f.containing_block(rdefaddr) + graph = UG.DirectedGraph(list(f.cfg.blocks.keys()), f.cfg.edges) + graph.find_paths(rdblock, cblock) + for (i, p) in enumerate( + sorted(graph.get_paths(), key=lambda p: len(p))): + cfgpath = DotRdefPath( + "path_" + str(rdef) + "_" + str(iaddr) + "_" + str(i), + f, + astinterface, + p, + subgraph=True, + nodeprefix = str(iaddr) + str(rdef) + str(i) + ":", + rdefinstrs = rdefs, + useinstrs=[iaddr]) + dotpaths.append((cfgpath, rdef, iaddr)) + else: rdblock = f.containing_block(rdef) graph = UG.DirectedGraph(list(f.cfg.blocks.keys()), f.cfg.edges) diff --git a/chb/cmdline/chkx b/chb/cmdline/chkx index b97ac120..5eb94c97 100755 --- a/chb/cmdline/chkx +++ b/chb/cmdline/chkx @@ -745,7 +745,7 @@ def parse() -> argparse.Namespace: buildast.add_argument( "--show_reachingdefs", action="store_true", - help="create a dot file for the reaching defs of ") + help="create dot files for the reaching defs of untyped registers") buildast.add_argument( "--output_reachingdefs", help="name of output file (without extension) to store dot/pdf file of reachingdefs") diff --git a/chb/cmdline/runcmds.py b/chb/cmdline/runcmds.py index d7d3dc71..e835b24a 100644 --- a/chb/cmdline/runcmds.py +++ b/chb/cmdline/runcmds.py @@ -177,7 +177,7 @@ def run_commands(args: argparse.Namespace) -> NoReturn: opcode_output["opcode-distribution"] = opcode_distribution opcode_output["unknowns"] = unknowns with open(outputfilename, "w") as fp: - json.dump(opcode_output, fp, indent=2) + json.dump(opcode_output, fp, indent=2, sort_keys=True) else: print("\nOpcode distribution") for (opc, c) in sorted(opcode_distribution.items()): From 2c7e5211c233de6c813952b2f4784cfcd174889f Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Sat, 11 Oct 2025 23:05:17 -0700 Subject: [PATCH 6/8] ARM: add ast provenance to UMLAL --- .../ARMUnsignedMultiplyAccumulateLong.py | 90 ++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/chb/arm/opcodes/ARMUnsignedMultiplyAccumulateLong.py b/chb/arm/opcodes/ARMUnsignedMultiplyAccumulateLong.py index 9c8b8b41..7d474f77 100644 --- a/chb/arm/opcodes/ARMUnsignedMultiplyAccumulateLong.py +++ b/chb/arm/opcodes/ARMUnsignedMultiplyAccumulateLong.py @@ -25,7 +25,7 @@ # SOFTWARE. # ------------------------------------------------------------------------------ -from typing import List, TYPE_CHECKING +from typing import List, Tuple, TYPE_CHECKING from chb.app.InstrXData import InstrXData @@ -33,6 +33,11 @@ from chb.arm.ARMOpcode import ARMOpcode, ARMOpcodeXData, simplify_result from chb.arm.ARMOperand import ARMOperand +import chb.ast.ASTNode as AST +from chb.astinterface.ASTInterface import ASTInterface + +import chb.invariants.XXprUtil as XU + import chb.util.fileutil as UF from chb.util.IndexedTable import IndexedTableValue from chb.util.loggingutil import chklogger @@ -110,6 +115,10 @@ def __init__(self, d: "ARMDictionary", ixval: IndexedTableValue) -> None: def operands(self) -> List[ARMOperand]: return [self.armd.arm_operand(self.args[i]) for i in [1, 2, 3, 4]] + @property + def opargs(self) -> List[ARMOperand]: + return [self.armd.arm_operand(self.args[i]) for i in [1, 2, 3, 4]] + def mnemonic_extension(self) -> str: cc = ARMOpcode.mnemonic_extension(self) wb = "S" if self.is_writeback else "" @@ -125,3 +134,82 @@ def annotation(self, xdata: InstrXData) -> str: return xd.annotation else: return "Error value" + + def ast_prov( + self, + astree: ASTInterface, + iaddr: str, + bytestring: str, + xdata: InstrXData) -> Tuple[ + List[AST.ASTInstruction], List[AST.ASTInstruction]]: + + annotations: List[str] = [iaddr, "UMLAL"] + + # low-level assignment + + (ll_lhslo, _, _) = self.opargs[0].ast_lvalue(astree) + (ll_lhshi, _, _) = self.opargs[1].ast_lvalue(astree) + (ll_lo, _, _) = self.opargs[0].ast_rvalue(astree) + (ll_hi, _, _) = self.opargs[1].ast_rvalue(astree) + (ll_rn, _, _) = self.opargs[2].ast_rvalue(astree) + (ll_rm, _, _) = self.opargs[3].ast_rvalue(astree) + + i32 = astree.mk_integer_constant(32) + ll_rhs1 = astree.mk_doubleword_sum(ll_hi, ll_lo) + ll_rhs2 = astree.mk_binary_op("mult", ll_rn, ll_rm) + ll_rhs = astree.mk_binary_op("plus", ll_rhs2, ll_rhs1) + ll_rhslo = astree.mk_binary_op("mod", ll_rhs, i32) + ll_rhshi = astree.mk_binary_op("div", ll_rhs, i32) + + ll_assign_lo = astree.mk_assign( + ll_lhslo, + ll_rhslo, + iaddr=iaddr, + bytestring=bytestring, + annotations=annotations) + + ll_assign_hi = astree.mk_assign( + ll_lhshi, + ll_rhshi, + iaddr=iaddr, + bytestring=bytestring, + annotations=annotations) + + rdefs = xdata.reachingdefs + + astree.add_expr_reachingdefs(ll_rn, [rdefs[0]]) + astree.add_expr_reachingdefs(ll_rm, [rdefs[1]]) + astree.add_expr_reachingdefs(ll_lo, [rdefs[2]]) + astree.add_expr_reachingdefs(ll_hi, [rdefs[3]]) + + # high-level assignment + + xd = ARMUnsignedMultiplyAccumulateLongXData(xdata) + if not xd.is_ok: + chklogger.logger.error( + "Error value encountered for UMLAL at %s", iaddr) + return ([], []) + + hl_lhs = XU.xvariable_to_ast_lval(xd.vlo, xdata, iaddr, astree) + hl_rhs = XU.xxpr_to_ast_def_expr(xd.rresult, xdata, iaddr, astree) + + defuses = xdata.defuses + defuseshigh = xdata.defuseshigh + + hl_assign = astree.mk_assign( + hl_lhs, + hl_rhs, + iaddr=iaddr, + bytestring=bytestring, + annotations=annotations) + + astree.add_instr_mapping(hl_assign, ll_assign_lo) + astree.add_instr_mapping(hl_assign, ll_assign_hi) + astree.add_instr_address(hl_assign, [iaddr]) + astree.add_expr_mapping(hl_rhs, ll_rhslo) + astree.add_lval_mapping(hl_lhs, ll_lhslo) + astree.add_expr_reachingdefs(hl_rhs, rdefs[4:]) + astree.add_lval_defuses(hl_lhs, defuses[0]) + astree.add_lval_defuses_high(hl_lhs, defuseshigh[0]) + + return ([hl_assign], [ll_assign_lo, ll_assign_hi]) From 926b4db142691858f929ab890d044c753d0ec1f3 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Sat, 11 Oct 2025 23:07:05 -0700 Subject: [PATCH 7/8] XPR: add function name as global variable to symbol table --- chb/invariants/XXprUtil.py | 39 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/chb/invariants/XXprUtil.py b/chb/invariants/XXprUtil.py index c9c8b926..12cd92ae 100644 --- a/chb/invariants/XXprUtil.py +++ b/chb/invariants/XXprUtil.py @@ -211,6 +211,25 @@ def xconstant_to_ast_expr( return astree.mk_start_of(lval) else: return astree.mk_address_of(lval) + + elif ( + xdata.app.systeminfo.has_function_name(hex(xc.intvalue))): + fname = xdata.app.systeminfo.function_name(hex(xc.intvalue)) + if xdata.app.bcfiles.has_vardecl(fname): + vardecl = xdata.app.bcfiles.vardecl(fname) + astree.globalsymboltable.add_symbol( + vardecl.vname, + vtype=vardecl.vtype.convert(astree.typconverter), + globaladdress=xc.intvalue) + gvaddr = astree.globalsymboltable.global_variable_name( + hex(xc.intvalue)) + if gvaddr is not None: + lval = astree.mk_vinfo_lval(gvaddr, anonymous=anonymous) + return astree.mk_address_of(lval) + else: + return astree.mk_integer_constant(xc.intvalue) + else: + return astree.mk_integer_constant(xc.intvalue) else: return astree.mk_integer_constant(xc.intvalue) @@ -564,6 +583,13 @@ def stack_variable_to_lval_expression( return astree.mk_vinfo_lval_expression( vinfo, astoffset, anonymous=anonymous) + if offset.offset.is_field_offset: + fieldoffset = cast("VMemoryOffsetFieldOffset", offset.offset) + astoffset = field_offset_to_ast_offset( + fieldoffset, xdata, iaddr, astree, anonymous=anonymous) + return astree.mk_vinfo_lval_expression( + vinfo, astoffset, anonymous=anonymous) + chklogger.logger.warning( "Stack variable offset %s of %s not yet handled at address %s", str(offset.offset), str(vinfo), iaddr) @@ -636,6 +662,19 @@ def global_variable_to_lval_expression( return astree.mk_vinfo_lval_expression( vinfo, astoffset, anonymous=anonymous) + if not anonymous: + if vinfo is None: + chklogger.logger.error( + "Conversion of global variable with address %s and offset " + + "%s at address %s not yet supported", + str(hexgaddr), str(offset.offset), iaddr) + else: + chklogger.logger.error( + "Conversion of global variable %s access with offset " + + "%s at address %s not yet supported", + str(vinfo), str(offset.offset), iaddr) + return astree.mk_temp_lval_expression() + if not anonymous: chklogger.logger.error( "Conversion of global variable %s at address %s not yet supported", From f3c4f1e1b5d55aa6b2d66b6bbf3f6a2e7d4de806 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Sat, 11 Oct 2025 23:09:08 -0700 Subject: [PATCH 8/8] ARM: add base-updates to memory instructions --- chb/app/CHVersion.py | 2 +- chb/app/InstrXData.py | 6 ++++++ chb/arm/ARMAssembly.py | 2 +- chb/arm/ARMOpcode.py | 8 +++++++- chb/arm/opcodes/ARMBitwiseBitClear.py | 9 +++++++++ chb/arm/opcodes/ARMLoadCoprocessor.py | 2 ++ chb/arm/opcodes/ARMLoadRegisterByte.py | 9 +++++++++ chb/arm/opcodes/ARMLoadRegisterHalfword.py | 4 ++++ chb/arm/opcodes/ARMLoadRegisterSignedByte.py | 4 ---- chb/arm/opcodes/ARMMove.py | 1 + chb/arm/opcodes/ARMStoreCoprocessor.py | 2 ++ chb/arm/opcodes/ARMStoreRegisterByte.py | 6 ++++++ chb/graphics/DotRdefPath.py | 2 ++ chb/util/graphutil.py | 2 +- 14 files changed, 51 insertions(+), 8 deletions(-) diff --git a/chb/app/CHVersion.py b/chb/app/CHVersion.py index 4c3badfc..94759afc 100644 --- a/chb/app/CHVersion.py +++ b/chb/app/CHVersion.py @@ -1 +1 @@ -chbversion: str = "0.3.0-20250908" +chbversion: str = "0.3.0-20251011" diff --git a/chb/app/InstrXData.py b/chb/app/InstrXData.py index 9060a735..3017943d 100644 --- a/chb/app/InstrXData.py +++ b/chb/app/InstrXData.py @@ -578,6 +578,12 @@ def get_base_update_xpr(self) -> XXpr: "Unexpected error value in base-update expression") return self.xprdictionary.xpr(xbuval) + def is_base_update_xpr_ok(self) -> bool: + xbutag = next(t for t in self.tags if t.startswith("xbu:")) + xix = int(xbutag[4:]) + xbuval = self.args[xix] + return (xbuval != -2) + def get_base_update_cxpr(self) -> XXpr: cbutag = next(t for t in self.tags if t.startswith("cbu:")) cix = int(cbutag[4:]) diff --git a/chb/arm/ARMAssembly.py b/chb/arm/ARMAssembly.py index e7838722..32908e41 100644 --- a/chb/arm/ARMAssembly.py +++ b/chb/arm/ARMAssembly.py @@ -62,7 +62,7 @@ def opcode(self) -> ARMOpcode: @property def mnemonic(self) -> str: - return self.opcode.mnemonic + return self.opcode.mnemonic_stem def mnemonic_extension(self) -> str: return self.opcode.mnemonic_extension() diff --git a/chb/arm/ARMOpcode.py b/chb/arm/ARMOpcode.py index aeb56c52..c87785bb 100644 --- a/chb/arm/ARMOpcode.py +++ b/chb/arm/ARMOpcode.py @@ -229,6 +229,9 @@ def get_base_update_xpr(self) -> "XXpr": raise UF.CHBError( self.__class__.__name__ + " does not have writeback") + def is_base_update_xpr_ok(self) -> bool: + return self.xdata.is_base_update_xpr_ok() + def get_base_update_cxpr(self) -> "XXpr": if self.is_writeback: return self.xdata.get_base_update_cxpr() @@ -239,7 +242,10 @@ def get_base_update_cxpr(self) -> "XXpr": def writeback_update(self) -> str: if self.xdata.has_base_update(): vbu = self.get_base_update_var() - xbu = self.get_base_update_cxpr() + if self.is_base_update_xpr_ok(): + xbu = str(self.get_base_update_cxpr()) + else: + xbu = "?" return "; wbu: " + str(vbu) + " := " + str(xbu) else: return "" diff --git a/chb/arm/opcodes/ARMBitwiseBitClear.py b/chb/arm/opcodes/ARMBitwiseBitClear.py index c542b2b5..cbc44dce 100644 --- a/chb/arm/opcodes/ARMBitwiseBitClear.py +++ b/chb/arm/opcodes/ARMBitwiseBitClear.py @@ -156,6 +156,15 @@ def operands(self) -> List[ARMOperand]: def opargs(self) -> List[ARMOperand]: return [self.armd.arm_operand(i) for i in self.args[1: -1]] + def mnemonic_extension(self) -> str: + cc = ARMOpcode.mnemonic_extension(self) + wb = "S" if self.is_writeback else "" + return wb + cc + + @property + def is_writeback(self) -> bool: + return self.args[0] == 1 + def annotation(self, xdata: InstrXData) -> str: xd = ARMBitwiseBitClearXData(xdata) return xd.annotation diff --git a/chb/arm/opcodes/ARMLoadCoprocessor.py b/chb/arm/opcodes/ARMLoadCoprocessor.py index 0c5c3193..a5729d4a 100644 --- a/chb/arm/opcodes/ARMLoadCoprocessor.py +++ b/chb/arm/opcodes/ARMLoadCoprocessor.py @@ -41,6 +41,8 @@ from chb.arm.ARMDictionary import ARMDictionary +@armregistry.register_tag("LDC2", ARMOpcode) +@armregistry.register_tag("LDC2L", ARMOpcode) @armregistry.register_tag("LDCL", ARMOpcode) @armregistry.register_tag("LDC", ARMOpcode) class ARMLoadCoprocessor(ARMOpcode): diff --git a/chb/arm/opcodes/ARMLoadRegisterByte.py b/chb/arm/opcodes/ARMLoadRegisterByte.py index ebe843aa..ee10b223 100644 --- a/chb/arm/opcodes/ARMLoadRegisterByte.py +++ b/chb/arm/opcodes/ARMLoadRegisterByte.py @@ -198,6 +198,10 @@ def operands(self) -> List[ARMOperand]: def opargs(self) -> List[ARMOperand]: return [self.armd.arm_operand(self.args[i]) for i in [0, 1, 2, 3]] + @property + def is_write_back(self) -> bool: + return self.opargs[3].is_write_back + def lhs(self, xdata: InstrXData) -> List[XVariable]: xd = ARMLoadRegisterByteXData(xdata) return [xd.vrt] @@ -324,6 +328,11 @@ def has_cast() -> bool: annotations=annotations) ll_assigns: List[AST.ASTInstruction] = [ll_assign, ll_addr_assign] + if not xd.is_base_update_xpr_ok(): + chklogger.logger.error( + "LDRB: Error encountered for writeback address at address %s", iaddr) + return ([], ll_assigns) + basereg = xd.get_base_update_var() newaddr = xd.get_base_update_xpr() hl_addr_lhs = XU.xvariable_to_ast_lval(basereg, xdata, iaddr, astree) diff --git a/chb/arm/opcodes/ARMLoadRegisterHalfword.py b/chb/arm/opcodes/ARMLoadRegisterHalfword.py index f0ef1251..a09991fc 100644 --- a/chb/arm/opcodes/ARMLoadRegisterHalfword.py +++ b/chb/arm/opcodes/ARMLoadRegisterHalfword.py @@ -193,6 +193,10 @@ def operands(self) -> List[ARMOperand]: def opargs(self) -> List[ARMOperand]: return [self.armd.arm_operand(self.args[i]) for i in [0, 1, 2, 3]] + @property + def is_write_back(self) -> bool: + return self.opargs[3].is_write_back + def lhs(self, xdata: InstrXData) -> List[XVariable]: xd = ARMLoadRegisterHalfwordXData(xdata) return [xd.vrt] diff --git a/chb/arm/opcodes/ARMLoadRegisterSignedByte.py b/chb/arm/opcodes/ARMLoadRegisterSignedByte.py index 581f4e8a..696b99d5 100644 --- a/chb/arm/opcodes/ARMLoadRegisterSignedByte.py +++ b/chb/arm/opcodes/ARMLoadRegisterSignedByte.py @@ -195,10 +195,6 @@ def ast_prov( defuses = xdata.defuses defuseshigh = xdata.defuseshigh - hl_lhs = XU.xvariable_to_ast_lval(lhs, xdata, iaddr, astree) - hl_rhs = XU.xxpr_to_ast_def_expr( - rhs, xdata, iaddr, astree, size=1, memaddr=xaddr) - hl_assign = astree.mk_assign( hl_lhs, hl_rhs, diff --git a/chb/arm/opcodes/ARMMove.py b/chb/arm/opcodes/ARMMove.py index 2804feff..06d1071c 100644 --- a/chb/arm/opcodes/ARMMove.py +++ b/chb/arm/opcodes/ARMMove.py @@ -340,6 +340,7 @@ def ast_prov_predicate_assign( chklogger.logger.warning( "Predicate assignment without associated predicate at " + "address %s", iaddr) + astree.add_instr_address(ll_assign, [iaddr]) return ([], [ll_assign]) rhs = xd.cpredicate if xd.is_cpredicate_ok else xd.xpredicate diff --git a/chb/arm/opcodes/ARMStoreCoprocessor.py b/chb/arm/opcodes/ARMStoreCoprocessor.py index be4daf01..dc725d99 100644 --- a/chb/arm/opcodes/ARMStoreCoprocessor.py +++ b/chb/arm/opcodes/ARMStoreCoprocessor.py @@ -43,6 +43,8 @@ @armregistry.register_tag("STCL", ARMOpcode) @armregistry.register_tag("STC", ARMOpcode) +@armregistry.register_tag("STC2", ARMOpcode) +@armregistry.register_tag("STC2L", ARMOpcode) class ARMStoreCoprocessor(ARMOpcode): """Stores memory data from a coprocessor to a sequence of addresses. diff --git a/chb/arm/opcodes/ARMStoreRegisterByte.py b/chb/arm/opcodes/ARMStoreRegisterByte.py index 7dccc74d..4d564676 100644 --- a/chb/arm/opcodes/ARMStoreRegisterByte.py +++ b/chb/arm/opcodes/ARMStoreRegisterByte.py @@ -328,6 +328,12 @@ def ast_prov( annotations=annotations) ll_assigns: List[AST.ASTInstruction] = [ll_assign, ll_addr_assign] + if not xd.is_base_update_xpr_ok(): + chklogger.logger.error( + "STRB: Error encountered fro writeback address at address %s", + iaddr) + return ([], ll_assigns) + basereg = xd.get_base_update_var() newaddr = xd.get_base_update_xpr() hl_addr_lhs = XU.xvariable_to_ast_lval(basereg, xdata, iaddr, astree) diff --git a/chb/graphics/DotRdefPath.py b/chb/graphics/DotRdefPath.py index b298d437..148ee3c8 100644 --- a/chb/graphics/DotRdefPath.py +++ b/chb/graphics/DotRdefPath.py @@ -334,6 +334,8 @@ def rdef_instructions(self, n: str) -> List["Instruction"]: for i in self._rdefinstrs: if i == "init": continue + if i.endswith("_clobber"): + i = i[:-8] ix = int(i, 16) if ix >= baddr and ix <= xaddr: instr = block.instructions[i] diff --git a/chb/util/graphutil.py b/chb/util/graphutil.py index 00dc5202..ede9db75 100644 --- a/chb/util/graphutil.py +++ b/chb/util/graphutil.py @@ -68,7 +68,7 @@ def find_paths_aux( self.paths.append(path[:]) elif src in self.edges: for d in self.edges[src]: - if visited[d] < 2: + if visited[d] < 2 and depth < 8: self.find_paths_aux(d, dst, visited, path, depth + 1) path.pop() visited[src] = False