From c97ff3e3ee582a9ffb5aaf5d2a6a5165950ed22f Mon Sep 17 00:00:00 2001 From: Eli Davis Date: Tue, 15 Jun 2021 12:46:09 -0400 Subject: [PATCH 1/3] file types could be generic I'm not sure this is actually better than current setup, but it does allow you to avoid passing some strings around I'm not sure how to test this code, so you'd want to take this as a starting point/make sure I got it right, rather than just merging this --- chb/app/AppAccess.py | 48 +++++++---------------- chb/arm/ARMAccess.py | 7 ++-- chb/cmdline/fileformatcmds.py | 4 +- chb/elfformat/ELFHeader.py | 14 ++++++- chb/mips/MIPSAccess.py | 7 ++-- chb/mips/simulation/MIPSimulationState.py | 8 ++-- chb/peformat/PEHeader.py | 8 ++++ chb/x86/X86Access.py | 7 ++-- 8 files changed, 52 insertions(+), 51 deletions(-) diff --git a/chb/app/AppAccess.py b/chb/app/AppAccess.py index 81c324ed..2b5a9d00 100644 --- a/chb/app/AppAccess.py +++ b/chb/app/AppAccess.py @@ -29,7 +29,7 @@ """Access point for most analysis results.""" from abc import ABC, abstractmethod -from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence +from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Generic, Type, TypeVar from chb.api.InterfaceDictionary import InterfaceDictionary @@ -55,27 +55,26 @@ import chb.util.fileutil as UF -class AppAccess(ABC): +HeaderTy = TypeVar('HeaderTy', PEHeader, ELFHeader) +class AppAccess(ABC, Generic[HeaderTy]): def __init__( self, path: str, filename: str, deps: List[str] = [], - fileformat: str = "elf", + fileformat: Type[HeaderTy] = ELFHeader, arch: str = "x86") -> None: """Initializes access to analysis results.""" self._path = path self._filename = filename self._deps = deps # list of summary jars registered as dependencies - self._fileformat = fileformat # currently supported: elf, pe + self._header_ty = fileformat # currently supported: elf, pe self._arch = arch # currently supported: arm, mips, x86 self._userdata: Optional[UserData] = None - # file-format specific - self._peheader: Optional[PEHeader] = None - self._elfheader: Optional[ELFHeader] = None + self._header: Optional[HeaderTy] = None # functions self._appresultdata: Optional[AppResultData] = None @@ -113,7 +112,7 @@ def architecture(self) -> str: @property def fileformat(self) -> str: - return self._fileformat + return self._header_ty.fmt_name() @property def arm(self) -> bool: @@ -129,11 +128,11 @@ def x86(self) -> bool: @property def elf(self) -> bool: - return self.fileformat == "elf" + return self._header_ty == ELFHeader @property def pe(self) -> bool: - return self.fileformat in ["pe", "pe32"] + return self._header_ty == PEHeader # Dictionaries ------------------------------------------------------------ @@ -152,31 +151,12 @@ def interfacedictionary(self) -> InterfaceDictionary: return self._interfacedictionary # File format -------------------------------------------------------------- - @property - def peheader(self) -> PEHeader: - if self.pe: - if self._peheader is None: - x = UF.get_pe_header_xnode(self.path, self.filename) - self._peheader = PEHeader( - self.path, self.filename, x, self.dependencies) - return self._peheader - else: - raise UF.CHBError("File with file format " - + self.fileformat - + " does not have a PE header") - - @property - def elfheader(self) -> ELFHeader: - if self.elf: - if self._elfheader is None: - x = UF.get_elf_header_xnode(self.path, self.filename) - self._elfheader = ELFHeader(self.path, self.filename, x) - return self._elfheader - else: - raise UF.CHBError("File with file format " - + self.fileformat - + " does not have an ELF header") + def header(self) -> HeaderTy: + if self._header is None: + x = self._fileformat.get_xnode(self.path, self.filename) + self._header = self._header_ty(self.path, self.filename, x, self.dependencies) + return self._header # Systeminfo --------------------------------------------------------------- diff --git a/chb/arm/ARMAccess.py b/chb/arm/ARMAccess.py index 3e8b7932..566ba5b6 100644 --- a/chb/arm/ARMAccess.py +++ b/chb/arm/ARMAccess.py @@ -25,9 +25,10 @@ # SOFTWARE. # ------------------------------------------------------------------------------ +from chb.elfformat.ELFHeader import ELFHeader from typing import Dict, List, Mapping, Optional -from chb.app.AppAccess import AppAccess +from chb.app.AppAccess import AppAccess, HeaderTy from chb.arm.ARMDictionary import ARMDictionary from chb.arm.ARMFunction import ARMFunction @@ -35,14 +36,14 @@ import chb.util.fileutil as UF -class ARMAccess(AppAccess): +class ARMAccess(AppAccess[HeaderTy]): def __init__( self, path: str, filename: str, deps: List[str] = [], - fileformat: str = "elf", + fileformat: HeaderTy = ELFHeader, arch: str = "arm") -> None: AppAccess.__init__(self, path, filename, deps, fileformat, arch) self._armd: Optional[ARMDictionary] = None diff --git a/chb/cmdline/fileformatcmds.py b/chb/cmdline/fileformatcmds.py index 39434971..98c84ae3 100644 --- a/chb/cmdline/fileformatcmds.py +++ b/chb/cmdline/fileformatcmds.py @@ -62,7 +62,7 @@ def pedatacmd(args: argparse.Namespace) -> NoReturn: exit(1) app = UC.get_app(path, xfile, xinfo) - peheader = app.peheader + peheader = app.header if headeronly: print(peheader) exit(0) @@ -118,7 +118,7 @@ def elfdatacmd(args: argparse.Namespace) -> NoReturn: app = UC.get_app(path, xfile, xinfo) # app = AP.AppAccess( # path, xfile, fileformat=xinfo.format, arch=xinfo.architecture) - elfheader = app.elfheader + elfheader = app.header try: print(str(elfheader)) diff --git a/chb/elfformat/ELFHeader.py b/chb/elfformat/ELFHeader.py index 8142f540..ec51959e 100644 --- a/chb/elfformat/ELFHeader.py +++ b/chb/elfformat/ELFHeader.py @@ -28,7 +28,7 @@ # ------------------------------------------------------------------------------ import xml.etree.ElementTree as ET -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple from chb.elfformat.ELFProgramHeader import ELFProgramHeader from chb.elfformat.ELFSectionHeader import ELFSectionHeader @@ -105,11 +105,21 @@ def get_value(d: Dict[str, str], v: str) -> str: class ELFHeader: + @staticmethod + def fmt_name() -> str: + return "elf" + + @staticmethod + def get_xnode(path: str, filename: str) -> ET.Element: + return UF.get_elf_header_xnode(path, filename) + def __init__( self, pathname: str, filename: str, - xnode: ET.Element) -> None: + xnode: ET.Element, + # ignored, used for compatibility with PEHeader + deps: Sequence[str] = []) -> None: self._pathname = pathname self._filename = filename self.xnode = xnode diff --git a/chb/mips/MIPSAccess.py b/chb/mips/MIPSAccess.py index b5b07f20..66147c47 100644 --- a/chb/mips/MIPSAccess.py +++ b/chb/mips/MIPSAccess.py @@ -27,9 +27,10 @@ # SOFTWARE. # ------------------------------------------------------------------------------ +from chb.elfformat.ELFHeader import ELFHeader from typing import Callable, cast, Dict, List, Mapping, Optional, Sequence, Tuple -from chb.app.AppAccess import AppAccess +from chb.app.AppAccess import AppAccess, HeaderTy from chb.mips.MIPSDictionary import MIPSDictionary from chb.mips.MIPSFunction import MIPSFunction @@ -37,14 +38,14 @@ import chb.util.fileutil as UF -class MIPSAccess(AppAccess): +class MIPSAccess(AppAccess[HeaderTy]): def __init__( self, path: str, filename: str, deps: List[str] = [], - fileformat: str = "elf", + fileformat: HeaderTy = ELFHeader, arch: str = "mips") -> None: AppAccess.__init__(self, path, filename, deps, fileformat, arch) self._mipsd: Optional[MIPSDictionary] = None diff --git a/chb/mips/simulation/MIPSimulationState.py b/chb/mips/simulation/MIPSimulationState.py index 3d6d922d..ef19d27c 100644 --- a/chb/mips/simulation/MIPSimulationState.py +++ b/chb/mips/simulation/MIPSimulationState.py @@ -195,17 +195,17 @@ def __init__(self, self.registers: Dict[str, SV.SimValue] = {} # register name -> SimValue self.registers['zero'] = SV.SimDoubleWordValue(0) self.stackmem = MIPSimStackMemory(self) - self.globalmem = MIPSimGlobalMemory(self, self.app.elfheader) + self.globalmem = MIPSimGlobalMemory(self, self.app.header) self.basemem: Dict[str, MIPSimBaseMemory] = {} # static library (optional) self.libapp = libapp if self.libapp is not None: self.libstubs: Dict[int, Tuple[str, Optional[MIPSimStub]]] = {} - self.libglobalmem = MIPSimGlobalMemory(self, self.libapp.elfheader) + self.libglobalmem = MIPSimGlobalMemory(self, self.libapp.header) # function-name -> function address in static lib self.static_lib: Dict[str, str] = {} - libimgbase = self.libapp.elfheader.image_base + libimgbase = self.libapp.header.image_base self.libimagebase = SSV.SimGlobalAddress(SV.SimDoubleWordValue( int(libimgbase, 16))) @@ -214,7 +214,7 @@ def __init__(self, # target executable for dynamic loading (optional) self.xapp = xapp if self.xapp is not None: - self.xglobalmem = MIPSimGlobalMemory(self, self.xapp.elfheader) + self.xglobalmem = MIPSimGlobalMemory(self, self.xapp.header) # log self.fnlog: Dict[str, List[str]] = {} # iaddr -> msg list2 diff --git a/chb/peformat/PEHeader.py b/chb/peformat/PEHeader.py index 373662ae..16720931 100644 --- a/chb/peformat/PEHeader.py +++ b/chb/peformat/PEHeader.py @@ -242,6 +242,14 @@ def subsystem(self) -> str: class PEHeader: """Main entry point to access the raw data in the executable.""" + @staticmethod + def fmt_name() -> str: + return "pe32" + + @staticmethod + def get_xnode(path: str, filename: str) -> ET.Element: + return UF.get_pe_header_xnode(path, filename) + def __init__( self, pathname: str, diff --git a/chb/x86/X86Access.py b/chb/x86/X86Access.py index 4f2ee948..7b46f1ef 100644 --- a/chb/x86/X86Access.py +++ b/chb/x86/X86Access.py @@ -27,9 +27,10 @@ # SOFTWARE. # ------------------------------------------------------------------------------ +from chb.elfformat.ELFHeader import ELFHeader from typing import Dict, List, Mapping, Optional, Sequence -from chb.app.AppAccess import AppAccess +from chb.app.AppAccess import AppAccess, HeaderTy import chb.util.fileutil as UF @@ -38,14 +39,14 @@ from chb.x86.X86Instruction import X86Instruction -class X86Access(AppAccess): +class X86Access(AppAccess[HeaderTy]): def __init__( self, path: str, filename: str, deps: List[str] = [], - fileformat: str = "elf", + fileformat: HeaderTy = ELFHeader, arch: str = "x86") -> None: AppAccess.__init__(self, path, filename, deps, fileformat, arch) self._x86d: Optional[X86Dictionary] = None From 073934fc18370230268f2b37556dbb15cb1c29f8 Mon Sep 17 00:00:00 2001 From: Eli Davis Date: Tue, 15 Jun 2021 12:54:18 -0400 Subject: [PATCH 2/3] AppAccess doesn't need architechture fields If you want to check if it's a specific architecture, just do `isinstance(app, X86Access)` or whathaveyou. This has the advantage of telling the type system the type you're looking at -- if you do that, you can do app.x86dictionary without having to assert that it's an X86Access or cast to x86Access or something --- chb/app/AppAccess.py | 21 +-------------------- chb/arm/ARMAccess.py | 5 ++--- chb/cmdline/commandutil.py | 6 +++--- chb/mips/MIPSAccess.py | 5 ++--- chb/x86/X86Access.py | 5 ++--- 5 files changed, 10 insertions(+), 32 deletions(-) diff --git a/chb/app/AppAccess.py b/chb/app/AppAccess.py index 2b5a9d00..f1ff0f9e 100644 --- a/chb/app/AppAccess.py +++ b/chb/app/AppAccess.py @@ -63,14 +63,12 @@ def __init__( path: str, filename: str, deps: List[str] = [], - fileformat: Type[HeaderTy] = ELFHeader, - arch: str = "x86") -> None: + fileformat: Type[HeaderTy] = ELFHeader) -> None: """Initializes access to analysis results.""" self._path = path self._filename = filename self._deps = deps # list of summary jars registered as dependencies self._header_ty = fileformat # currently supported: elf, pe - self._arch = arch # currently supported: arm, mips, x86 self._userdata: Optional[UserData] = None @@ -105,27 +103,10 @@ def dependencies(self) -> Sequence[str]: return self._deps # Architecture and file format --------------------------------------------- - - @property - def architecture(self) -> str: - return self._arch - @property def fileformat(self) -> str: return self._header_ty.fmt_name() - @property - def arm(self) -> bool: - return self.architecture == "arm" - - @property - def mips(self) -> bool: - return self.architecture == "mips" - - @property - def x86(self) -> bool: - return self.architecture == "x86" - @property def elf(self) -> bool: return self._header_ty == ELFHeader diff --git a/chb/arm/ARMAccess.py b/chb/arm/ARMAccess.py index 566ba5b6..8ae49c0d 100644 --- a/chb/arm/ARMAccess.py +++ b/chb/arm/ARMAccess.py @@ -43,9 +43,8 @@ def __init__( path: str, filename: str, deps: List[str] = [], - fileformat: HeaderTy = ELFHeader, - arch: str = "arm") -> None: - AppAccess.__init__(self, path, filename, deps, fileformat, arch) + fileformat: HeaderTy = ELFHeader) -> None: + AppAccess.__init__(self, path, filename, deps, fileformat) self._armd: Optional[ARMDictionary] = None self._functions: Dict[str, ARMFunction] = {} diff --git a/chb/cmdline/commandutil.py b/chb/cmdline/commandutil.py index 717db075..135e3557 100644 --- a/chb/cmdline/commandutil.py +++ b/chb/cmdline/commandutil.py @@ -113,11 +113,11 @@ def get_app(path: str, xfile: str, xinfo: XI.XInfo) -> AppAccess: arch = xinfo.architecture format = xinfo.format if arch == "x86": - return X86Access(path, xfile, fileformat=format, arch=arch) + return X86Access(path, xfile, fileformat=format) elif arch == "mips": - return MIPSAccess(path, xfile, fileformat=format, arch=arch) + return MIPSAccess(path, xfile, fileformat=format) elif arch == "arm": - return ARMAccess(path, xfile, fileformat=format, arch=arch) + return ARMAccess(path, xfile, fileformat=format) else: raise UF.CHBError("Archicture " + arch + " not yet supported") diff --git a/chb/mips/MIPSAccess.py b/chb/mips/MIPSAccess.py index 66147c47..722604c4 100644 --- a/chb/mips/MIPSAccess.py +++ b/chb/mips/MIPSAccess.py @@ -45,9 +45,8 @@ def __init__( path: str, filename: str, deps: List[str] = [], - fileformat: HeaderTy = ELFHeader, - arch: str = "mips") -> None: - AppAccess.__init__(self, path, filename, deps, fileformat, arch) + fileformat: HeaderTy = ELFHeader) -> None: + AppAccess.__init__(self, path, filename, deps, fileformat) self._mipsd: Optional[MIPSDictionary] = None self._functions: Dict[str, MIPSFunction] = {} diff --git a/chb/x86/X86Access.py b/chb/x86/X86Access.py index 7b46f1ef..9691cb74 100644 --- a/chb/x86/X86Access.py +++ b/chb/x86/X86Access.py @@ -46,9 +46,8 @@ def __init__( path: str, filename: str, deps: List[str] = [], - fileformat: HeaderTy = ELFHeader, - arch: str = "x86") -> None: - AppAccess.__init__(self, path, filename, deps, fileformat, arch) + fileformat: HeaderTy = ELFHeader) -> None: + AppAccess.__init__(self, path, filename, deps, fileformat) self._x86d: Optional[X86Dictionary] = None self._functions: Dict[str, X86Function] = {} From 15f1193b47fc17d0bd7e4df1cb4f26fc83efc10c Mon Sep 17 00:00:00 2001 From: Eli Davis Date: Tue, 15 Jun 2021 13:12:53 -0400 Subject: [PATCH 3/3] There's a looooot of is_foo, cast-to-foo calls that can/should be replaced with isinstance checks I did them for x86SimulationState's set_register and set, but I think if you did it for the rest of the SimLocation stuff, you'd avoid a looot of casts. I think you could probably go so far as to remove the is_ methods entirely, but that's totally up to you Note that if you're suuuuper concerned with performance of the python, you miiight want to disregard this one? I think the bool-check cast is going to be ever so slightly faster? But if you're super concerned with performance, this stuff should probably just be in ocaml :P --- chb/x86/simulation/X86SimulationState.py | 40 ++++++++---------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/chb/x86/simulation/X86SimulationState.py b/chb/x86/simulation/X86SimulationState.py index 1fb0179f..6297efb0 100644 --- a/chb/x86/simulation/X86SimulationState.py +++ b/chb/x86/simulation/X86SimulationState.py @@ -128,8 +128,7 @@ def set_register(self, iaddr: str, reg: str, srcval: SV.SimValue) -> None: elif SU.is_half_reg(reg): fullreg = SU.fullregmap[reg] fullregval = self.get_regval(iaddr, fullreg) - if fullregval.is_literal: - fullregval = cast(SV.SimDoubleWordValue, fullregval) + if isinstance(fullregval, SV.SimDoubleWordValue): newval = fullregval.set_low_word(srcval) self.set_register(iaddr, fullreg, newval) else: @@ -137,17 +136,13 @@ def set_register(self, iaddr: str, reg: str, srcval: SV.SimValue) -> None: elif SU.is_qlow_reg(reg): fullreg = SU.fullregmap[reg] fullregval = self.get_regval(iaddr, fullreg) - if fullregval.is_literal: - fullregval = cast(SV.SimDoubleWordValue, fullregval) + if isinstance(fullregval, SV.SimDoubleWordValue): if srcval.is_literal: - if srcval.is_doubleword: - srcval = cast(SV.SimDoubleWordValue, srcval) + if isinstance(srcval, SV.SimDoubleWordValue): newval = fullregval.set_byte1(srcval.simbyte1) - elif srcval.is_word: - srcval = cast(SV.SimWordValue, srcval) + elif isinstance(srcval, SV.SimWordValue): newval = fullregval.set_byte1(srcval.lowbyte) - elif srcval.is_byte: - srcval = cast(SV.SimByteValue, srcval) + elif isinstance(srcval, SV.SimByteValue): newval = fullregval.set_byte1(srcval) else: raise SU.CHBSimError( @@ -160,17 +155,13 @@ def set_register(self, iaddr: str, reg: str, srcval: SV.SimValue) -> None: elif SU.is_qhigh_reg(reg): fullreg = SU.fullregmap[reg] fullregval = self.get_regval(iaddr, fullreg) - if fullregval.is_literal: - fullregval = cast(SV.SimDoubleWordValue, fullregval) + if isinstance(fullregval, SV.SimDoubleWordValue): if srcval.is_literal: - if srcval.is_doubleword: - srcval = cast(SV.SimDoubleWordValue, srcval) + if isinstance(srcval, SV.SimDoubleWordValue): newval = fullregval.set_byte2(srcval.simbyte1) - elif srcval.is_word: - srcval = cast(SV.SimWordValue, srcval) + elif isinstance(srcval, SV.SimWordValue): newval = fullregval.set_byte2(srcval.lowbyte) - elif srcval.is_byte: - srcval = cast(SV.SimByteValue, srcval) + elif isinstance(srcval, SV.SimByteValue): newval = fullregval.set_byte2(srcval) else: raise SU.CHBSimError( @@ -187,20 +178,15 @@ def set(self, iaddr: str, dstop: X86Operand, srcval: SV.SimValue) -> None: if not srcval.is_defined: self.add_logmsg(iaddr, 'Source value is undefined: ' + str(dstop)) lhs = self.get_lhs(iaddr, dstop) - if lhs.is_register: - lhs = cast(SimRegister, lhs) + if isinstance(lhs, SimRegister): self.set_register(iaddr, lhs.register, srcval) - elif (lhs.is_double_register - and srcval.is_literal - and srcval.is_quadword): - srcval = cast(SV.SimQuadWordValue, srcval) - lhs = cast(SimDoubleRegister, lhs) + elif (isinstance(lhs, SimDoubleRegister) + and isinstance(srcval, SV.SimQuadWordValue)): self.set_register(iaddr, lhs.lowregister, srcval.lowhalf) self.set_register(iaddr, lhs.highregister, srcval.highhalf) - elif lhs.is_memory_location: - lhs = cast(SimMemoryLocation, lhs) + elif isinstance(lhs, SimMemoryLocation): if lhs.is_global: self.globalmem.set(iaddr, lhs.simaddress, srcval) self.add_logmsg(iaddr, str(lhs) + ' := ' + str(srcval))