Skip to content
This repository was archived by the owner on Mar 9, 2026. It is now read-only.
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,10 @@ __pycache__/
# Sphinx documentation
docs/_build/

# Build files
venv/

# Temporary files
_.*
/output/
/tests/_scratch/
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
default: example

example:
python3 -m autoarchaeologist --excavator examples.excavations.showcase examples/30001393.bin

test:
@./venv/bin/python3 -m unittest
111 changes: 111 additions & 0 deletions autoarchaeologist/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import argparse
import importlib
import os
import sys

from autoarchaeologist import Excavation


class MissingArgumentsError(argparse.ArgumentError):
def __init__(self, detail):
super().__init__(None, detail)

def __str__(self):
return f"{self.__class__.__name__}: {self.args[0]}"


class NoSuchExcavationError(RuntimeError):
def __init__(self, excavation_name):
super().__init__(excavation_name)

def __str__(self):
return f"{self.__class__.__name__}: {self.args[0]}"


def action_for_args(args):
if getattr(args, 'excavator', None):
return ("excavator", load_excavator_by_name(args.excavator))

raise MissingArgumentsError("no valid action was requsted")


def load_excavator_by_name(excavator):
# first try to grab an arbitrary excavation within a single file

try:
# directly load the excavator as a named module
exacations_package = importlib.import_module(excavator)
return getattr(exacations_package, 'excavation')
except AttributeError:
# no excavation property found in the loaded module so error out
raise NoSuchExcavationError(excavator)
except ModuleNotFoundError:
# no such module so proceed to try as a named property within a module
pass

# now try to access a named property within a module
excavaor_parts = excavator.split('.')
excavation_name = excavaor_parts.pop()
package_name = '.'.join(excavaor_parts)
try:
exacations_package = importlib.import_module(package_name)
return getattr(exacations_package, excavation_name)
except Exception as e:
raise NoSuchExcavationError(excavator)


def parse_arguments(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--dir", default="/tmp/_autoarchaologist")
parser.add_argument('--excavator',
help="The name of a file to import that contains an excavation"
"which will be used to extract information from the artifact.")
parser.add_argument('filename')

return parser.parse_args(args=argv)


def process_arguments(args):
if args.dir == ".":
args.dir = os.path.join(os.getcwd(), "output", "_autoarchaologist")

if args.filename:
args.filename = os.path.abspath(args.filename)

return args


def perform_excavation(args):
match action_for_args(args):
case "excavator", AnExcavation:
assert issubclass(AnExcavation, Excavation)
ctx = AnExcavation(html_dir=args.dir)
case action, _:
raise NotImplementedError(f"action: {action}")

ctx.add_file_artifact(args.filename)

ctx.start_examination()

return ctx


def main_throwing():
args = process_arguments(parse_arguments())

try:
os.mkdir(args.dir)
except FileExistsError:
pass

ctx = perform_excavation(args)
ctx.produce_html()
print("Now point your browser at", ctx.filename_for(ctx).link)


if __name__ == "__main__":
try:
main_throwing()
except Exception as e:
print(str(e))
sys.exit(1)
4 changes: 1 addition & 3 deletions autoarchaeologist/base/interpretation.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,6 @@ def __exit__(self, exc_type, exc_value, traceback):
self.file.close()
self.file = None

def __del__(self):
os.remove(self.filename)

def write(self, *args, **kwargs):
''' ... '''
if self.file is None:
Expand All @@ -55,6 +52,7 @@ def html_interpretation(self, fo, _this):
fo.write("<H3>" + self.title + "</H3>\n")
for i in file:
fo.write(i)
os.remove(self.filename)

class Utf8Interpretation(HtmlInterpretation):
'''
Expand Down
18 changes: 17 additions & 1 deletion ddhf/ddhf/decorated_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,29 @@ def from_argv(self):
"AUTOARCHAEOLOGIST_BITSTORE_CACHE": "ddhf_bitstore_cache",
}

def main(job, html_subdir="tmp", **kwargs):
def parse_arguments(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument('-o', '--out', default='/tmp/_autoarchaologist')

args = parser.parse_args(args=argv)
if args.out == '.':
args.out = os.path.join(os.getcwd(), "_autoarchaologist")
return args

def main(job, html_subdir, **kwargs):
args = parse_arguments()
kwargs["html_dir"] = args.out

''' A standard main routine to reduce boiler-plate '''
for key in os.environ:
i = OK_ENVS.get(key)
if i:
kwargs[i] = os.environ[key]

if 'html_dir' not in kwargs:
raise AttributeError("missing: html_dir")


kwargs['html_dir'] = os.path.join(kwargs['html_dir'], html_subdir)
kwargs.setdefault('download_links', True)
kwargs.setdefault('download_limit', 1 << 20)
Expand Down
1 change: 1 addition & 0 deletions examples/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import examples.excavations as excavations
18 changes: 18 additions & 0 deletions examples/excavations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from autoarchaeologist.base.excavation import Excavation
from autoarchaeologist.generic.bigtext import BigText
from autoarchaeologist.generic.samesame import SameSame
from autoarchaeologist.data_general.absbin import AbsBin
from autoarchaeologist.data_general.papertapechecksum import DGC_PaperTapeCheckSum


class ShowcaseExcacation(Excavation):
def __init__(self, **kwargs):
super().__init__(**kwargs)

self.add_examiner(BigText)
self.add_examiner(AbsBin)
self.add_examiner(DGC_PaperTapeCheckSum)
self.add_examiner(SameSame)


showcase = ShowcaseExcacation
Empty file added output/.gitkeep
Empty file.
32 changes: 0 additions & 32 deletions run_example.py

This file was deleted.

Empty file added tests/__init__.py
Empty file.
Empty file added tests/data/__init__.py
Empty file.
1 change: 1 addition & 0 deletions tests/data/single_excavation--missing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# exporting nothing here such that we can test loading with missing property
6 changes: 6 additions & 0 deletions tests/data/single_excavation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from examples.excavations import ShowcaseExcacation

# single file excavations must export an "excavation" property
# whose value is expected to be a subclass or the base Excavation

excavation = ShowcaseExcacation
61 changes: 61 additions & 0 deletions tests/test_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import os
import shutil
from types import SimpleNamespace
import unittest

TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
SCRATCH_DIR = os.path.join(TESTS_DIR, "_scratch")

from autoarchaeologist.__main__ import perform_excavation


def _dir_to_listing(absolute_dir, /, kind):
assert os.path.isabs(absolute_dir)
_, dirs, files = list(os.walk(absolute_dir))[0]
match kind:
case 'dirs':
return sorted(dirs)
case 'files':
return set(files)
case _:
raise NotImplementedError()


class Test_Example_BasicHtml(unittest.TestCase):
"""
Ensure run_example produces expected HTML files for the example input.
"""

ARGS = None

@classmethod
def setUpClass(cls):
args = SimpleNamespace(
dir=SCRATCH_DIR,
filename="examples/30001393.bin",
excavator="examples.excavations.showcase",
)
shutil.rmtree(args.dir, ignore_errors=True)
os.makedirs(args.dir, exist_ok=True)
cls.ARGS = args

def test_produces_top_level_index(self):
ctx = perform_excavation(self.ARGS)
ctx.produce_html()

toplevel_filenames = _dir_to_listing(self.ARGS.dir, kind='files')

self.assertTrue("index.html" in toplevel_filenames)
self.assertTrue("index.css" in toplevel_filenames)

def test_produces_digest_directories(self):
ctx = perform_excavation(self.ARGS)
ctx.produce_html()

toplevel_dirnames = _dir_to_listing(self.ARGS.dir, kind='dirs')

self.assertEqual(toplevel_dirnames, ['08', 'bf', 'fa'])


if __name__ == '__main__':
unittest.main()
Loading