diff --git a/.gitignore b/.gitignore
index 53bb4e6..7ae0c03 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,5 +6,10 @@ __pycache__/
# Sphinx documentation
docs/_build/
+# Build files
+venv/
+
# Temporary files
_.*
+/output/
+/tests/_scratch/
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..96d0cfe
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,7 @@
+default: example
+
+example:
+ python3 -m autoarchaeologist --excavator examples.excavations.showcase examples/30001393.bin
+
+test:
+ @./venv/bin/python3 -m unittest
diff --git a/autoarchaeologist/__main__.py b/autoarchaeologist/__main__.py
new file mode 100644
index 0000000..9d19bc7
--- /dev/null
+++ b/autoarchaeologist/__main__.py
@@ -0,0 +1,111 @@
+import argparse
+import importlib
+import os
+import sys
+
+from autoarchaeologist import Excavation
+
+
+class MissingArgumentsError(argparse.ArgumentError):
+ def __init__(self, detail):
+ super().__init__(None, detail)
+
+ def __str__(self):
+ return f"{self.__class__.__name__}: {self.args[0]}"
+
+
+class NoSuchExcavationError(RuntimeError):
+ def __init__(self, excavation_name):
+ super().__init__(excavation_name)
+
+ def __str__(self):
+ return f"{self.__class__.__name__}: {self.args[0]}"
+
+
+def action_for_args(args):
+ if getattr(args, 'excavator', None):
+ return ("excavator", load_excavator_by_name(args.excavator))
+
+ raise MissingArgumentsError("no valid action was requsted")
+
+
+def load_excavator_by_name(excavator):
+ # first try to grab an arbitrary excavation within a single file
+
+ try:
+ # directly load the excavator as a named module
+ exacations_package = importlib.import_module(excavator)
+ return getattr(exacations_package, 'excavation')
+ except AttributeError:
+ # no excavation property found in the loaded module so error out
+ raise NoSuchExcavationError(excavator)
+ except ModuleNotFoundError:
+ # no such module so proceed to try as a named property within a module
+ pass
+
+ # now try to access a named property within a module
+ excavaor_parts = excavator.split('.')
+ excavation_name = excavaor_parts.pop()
+ package_name = '.'.join(excavaor_parts)
+ try:
+ exacations_package = importlib.import_module(package_name)
+ return getattr(exacations_package, excavation_name)
+ except Exception as e:
+ raise NoSuchExcavationError(excavator)
+
+
+def parse_arguments(argv=None):
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-d", "--dir", default="/tmp/_autoarchaologist")
+ parser.add_argument('--excavator',
+ help="The name of a file to import that contains an excavation"
+ "which will be used to extract information from the artifact.")
+ parser.add_argument('filename')
+
+ return parser.parse_args(args=argv)
+
+
+def process_arguments(args):
+ if args.dir == ".":
+ args.dir = os.path.join(os.getcwd(), "output", "_autoarchaologist")
+
+ if args.filename:
+ args.filename = os.path.abspath(args.filename)
+
+ return args
+
+
+def perform_excavation(args):
+ match action_for_args(args):
+ case "excavator", AnExcavation:
+ assert issubclass(AnExcavation, Excavation)
+ ctx = AnExcavation(html_dir=args.dir)
+ case action, _:
+ raise NotImplementedError(f"action: {action}")
+
+ ctx.add_file_artifact(args.filename)
+
+ ctx.start_examination()
+
+ return ctx
+
+
+def main_throwing():
+ args = process_arguments(parse_arguments())
+
+ try:
+ os.mkdir(args.dir)
+ except FileExistsError:
+ pass
+
+ ctx = perform_excavation(args)
+ ctx.produce_html()
+ print("Now point your browser at", ctx.filename_for(ctx).link)
+
+
+if __name__ == "__main__":
+ try:
+ main_throwing()
+ except Exception as e:
+ print(str(e))
+ sys.exit(1)
diff --git a/autoarchaeologist/base/interpretation.py b/autoarchaeologist/base/interpretation.py
index d850b9f..5503ca3 100644
--- a/autoarchaeologist/base/interpretation.py
+++ b/autoarchaeologist/base/interpretation.py
@@ -40,9 +40,6 @@ def __exit__(self, exc_type, exc_value, traceback):
self.file.close()
self.file = None
- def __del__(self):
- os.remove(self.filename)
-
def write(self, *args, **kwargs):
''' ... '''
if self.file is None:
@@ -55,6 +52,7 @@ def html_interpretation(self, fo, _this):
fo.write("
" + self.title + "
\n")
for i in file:
fo.write(i)
+ os.remove(self.filename)
class Utf8Interpretation(HtmlInterpretation):
'''
diff --git a/ddhf/ddhf/decorated_context.py b/ddhf/ddhf/decorated_context.py
index 7fe5bb6..83f3f05 100644
--- a/ddhf/ddhf/decorated_context.py
+++ b/ddhf/ddhf/decorated_context.py
@@ -116,13 +116,29 @@ def from_argv(self):
"AUTOARCHAEOLOGIST_BITSTORE_CACHE": "ddhf_bitstore_cache",
}
-def main(job, html_subdir="tmp", **kwargs):
+def parse_arguments(argv=None):
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-o', '--out', default='/tmp/_autoarchaologist')
+
+ args = parser.parse_args(args=argv)
+ if args.out == '.':
+ args.out = os.path.join(os.getcwd(), "_autoarchaologist")
+ return args
+
+def main(job, html_subdir, **kwargs):
+ args = parse_arguments()
+ kwargs["html_dir"] = args.out
+
''' A standard main routine to reduce boiler-plate '''
for key in os.environ:
i = OK_ENVS.get(key)
if i:
kwargs[i] = os.environ[key]
+ if 'html_dir' not in kwargs:
+ raise AttributeError("missing: html_dir")
+
+
kwargs['html_dir'] = os.path.join(kwargs['html_dir'], html_subdir)
kwargs.setdefault('download_links', True)
kwargs.setdefault('download_limit', 1 << 20)
diff --git a/examples/__init__.py b/examples/__init__.py
new file mode 100644
index 0000000..a85e989
--- /dev/null
+++ b/examples/__init__.py
@@ -0,0 +1 @@
+import examples.excavations as excavations
diff --git a/examples/excavations.py b/examples/excavations.py
new file mode 100644
index 0000000..7fd5c91
--- /dev/null
+++ b/examples/excavations.py
@@ -0,0 +1,18 @@
+from autoarchaeologist.base.excavation import Excavation
+from autoarchaeologist.generic.bigtext import BigText
+from autoarchaeologist.generic.samesame import SameSame
+from autoarchaeologist.data_general.absbin import AbsBin
+from autoarchaeologist.data_general.papertapechecksum import DGC_PaperTapeCheckSum
+
+
+class ShowcaseExcacation(Excavation):
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+
+ self.add_examiner(BigText)
+ self.add_examiner(AbsBin)
+ self.add_examiner(DGC_PaperTapeCheckSum)
+ self.add_examiner(SameSame)
+
+
+showcase = ShowcaseExcacation
diff --git a/output/.gitkeep b/output/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/run_example.py b/run_example.py
deleted file mode 100644
index ad7c063..0000000
--- a/run_example.py
+++ /dev/null
@@ -1,32 +0,0 @@
-
-import os
-
-import autoarchaeologist
-
-from autoarchaeologist.generic.bigdigits import BigDigits
-from autoarchaeologist.generic.samesame import SameSame
-from autoarchaeologist.data_general.absbin import AbsBin
-from autoarchaeologist.data_general.papertapechecksum import DGC_PaperTapeCheckSum
-
-
-if __name__ == "__main__":
-
- ctx = autoarchaeologist.Excavation()
-
- ctx.add_examiner(BigDigits)
- ctx.add_examiner(AbsBin)
- ctx.add_examiner(DGC_PaperTapeCheckSum)
- ctx.add_examiner(SameSame)
-
- ff = ctx.add_file_artifact("examples/30001393.bin")
-
- ctx.start_examination()
-
- try:
- os.mkdir("/tmp/_autoarchaologist")
- except FileExistsError:
- pass
-
- ctx.produce_html(html_dir="/tmp/_autoarchaologist")
-
- print("Now point your browser at", ctx.filename_for(ctx).link)
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/__init__.py b/tests/data/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/single_excavation--missing.py b/tests/data/single_excavation--missing.py
new file mode 100644
index 0000000..1c7bd42
--- /dev/null
+++ b/tests/data/single_excavation--missing.py
@@ -0,0 +1 @@
+# exporting nothing here such that we can test loading with missing property
diff --git a/tests/data/single_excavation.py b/tests/data/single_excavation.py
new file mode 100644
index 0000000..2abfd75
--- /dev/null
+++ b/tests/data/single_excavation.py
@@ -0,0 +1,6 @@
+from examples.excavations import ShowcaseExcacation
+
+# single file excavations must export an "excavation" property
+# whose value is expected to be a subclass or the base Excavation
+
+excavation = ShowcaseExcacation
diff --git a/tests/test_example.py b/tests/test_example.py
new file mode 100644
index 0000000..8290ff9
--- /dev/null
+++ b/tests/test_example.py
@@ -0,0 +1,61 @@
+import os
+import shutil
+from types import SimpleNamespace
+import unittest
+
+TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
+SCRATCH_DIR = os.path.join(TESTS_DIR, "_scratch")
+
+from autoarchaeologist.__main__ import perform_excavation
+
+
+def _dir_to_listing(absolute_dir, /, kind):
+ assert os.path.isabs(absolute_dir)
+ _, dirs, files = list(os.walk(absolute_dir))[0]
+ match kind:
+ case 'dirs':
+ return sorted(dirs)
+ case 'files':
+ return set(files)
+ case _:
+ raise NotImplementedError()
+
+
+class Test_Example_BasicHtml(unittest.TestCase):
+ """
+ Ensure run_example produces expected HTML files for the example input.
+ """
+
+ ARGS = None
+
+ @classmethod
+ def setUpClass(cls):
+ args = SimpleNamespace(
+ dir=SCRATCH_DIR,
+ filename="examples/30001393.bin",
+ excavator="examples.excavations.showcase",
+ )
+ shutil.rmtree(args.dir, ignore_errors=True)
+ os.makedirs(args.dir, exist_ok=True)
+ cls.ARGS = args
+
+ def test_produces_top_level_index(self):
+ ctx = perform_excavation(self.ARGS)
+ ctx.produce_html()
+
+ toplevel_filenames = _dir_to_listing(self.ARGS.dir, kind='files')
+
+ self.assertTrue("index.html" in toplevel_filenames)
+ self.assertTrue("index.css" in toplevel_filenames)
+
+ def test_produces_digest_directories(self):
+ ctx = perform_excavation(self.ARGS)
+ ctx.produce_html()
+
+ toplevel_dirnames = _dir_to_listing(self.ARGS.dir, kind='dirs')
+
+ self.assertEqual(toplevel_dirnames, ['08', 'bf', 'fa'])
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/test_main.py b/tests/test_main.py
new file mode 100644
index 0000000..4bfdc15
--- /dev/null
+++ b/tests/test_main.py
@@ -0,0 +1,138 @@
+import importlib
+import os
+import shutil
+import sys
+from types import SimpleNamespace
+import unittest
+
+TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
+SCRATCH_DIR = os.path.join(TESTS_DIR, "_scratch")
+ROOT_DIR = os.path.normpath(os.path.join(TESTS_DIR, ".."))
+
+sys.path.append(TESTS_DIR)
+
+from autoarchaeologist.__main__ import process_arguments, perform_excavation, \
+ action_for_args
+from autoarchaeologist.base.artifact import ArtifactBase, ArtifactStream
+from examples.excavations import ShowcaseExcacation
+
+
+class Test_Main_arguments(unittest.TestCase):
+ """
+ Ensure run_example excavates the expected artifacts for the example input.
+ """
+
+ def test_excavator_argument_loads_named_excavation(self):
+ args = process_arguments(SimpleNamespace(
+ dir=SCRATCH_DIR,
+ filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'),
+ excavator='examples.excavations.ShowcaseExcacation',
+ ))
+
+ action_name, action_arg = action_for_args(args)
+
+ self.assertIs(action_name, "excavator")
+ self.assertIs(action_arg, ShowcaseExcacation)
+
+ def test_excavator_argument_loads_single_excavation(self):
+ args = process_arguments(SimpleNamespace(
+ dir=SCRATCH_DIR,
+ filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'),
+ excavator='tests.data.single_excavation',
+ ))
+
+ action_name, action_arg = action_for_args(args)
+
+ self.assertIs(action_name, "excavator")
+ self.assertIs(action_arg, ShowcaseExcacation)
+
+ def test_excavator_argument_missing_excavation_package(self):
+ args = process_arguments(SimpleNamespace(
+ dir=SCRATCH_DIR,
+ filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'),
+ excavator='nonexistent.excavations.showcase',
+ ))
+
+ with self.assertRaises(Exception) as raised:
+ action_for_args(args)
+ exception = raised.exception
+ self.assertEqual(str(exception), "NoSuchExcavationError: nonexistent.excavations.showcase")
+
+ def test_excavator_argument_missing_excavation_property(self):
+ args = process_arguments(SimpleNamespace(
+ dir=SCRATCH_DIR,
+ filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'),
+ excavator='tests.data.single_excavation--missing',
+ ))
+
+ with self.assertRaises(Exception) as raised:
+ action_for_args(args)
+ exception = raised.exception
+ self.assertEqual(str(exception), "NoSuchExcavationError: tests.data.single_excavation--missing")
+
+
+class Test_Main_processing(unittest.TestCase):
+ """
+ Ensure run_example excavates the expected artifacts for the example input.
+ """
+
+ ARGS = None
+
+ @classmethod
+ def setUpClass(cls):
+ args = process_arguments(SimpleNamespace(
+ dir=SCRATCH_DIR,
+ filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'),
+ excavator='examples.excavations.showcase',
+ ))
+ shutil.rmtree(args.dir, ignore_errors=True)
+ os.makedirs(args.dir, exist_ok=True)
+ # record the unchanging bits against the test case
+ cls.ARGS = args
+
+ def assertArtifactIsChild(self, artifact, parent):
+ assert issubclass(artifact.__class__, ArtifactBase)
+ self.assertEqual(list(artifact.parents), [parent])
+
+ def test_excavated_three_total_artifacts(self):
+ excavation = perform_excavation(self.ARGS)
+
+ arfifact_hash_keys = list(excavation.hashes.keys())
+ self.assertEqual(len(arfifact_hash_keys), 3)
+
+ def test_excavated_one_top_level_artifact(self):
+ excavation = perform_excavation(self.ARGS)
+
+ excavatoin_child_count = len(excavation.children)
+ self.assertEqual(excavatoin_child_count, 1)
+
+ def test_produces_top_level_artifact(self):
+ excavation = perform_excavation(self.ARGS)
+
+ artifact = excavation.children[0]
+ self.assertIsInstance(artifact, ArtifactStream)
+ self.assertEqual(artifact.digest, '083a3d5e3098aec38ee5d9bc9f9880d3026e120ff8f058782d49ee3ccafd2a6c')
+ self.assertTrue(artifact.digest in excavation.hashes)
+
+ def test_produces_top_level_artifact_whose_parent_is_excavation(self):
+ excavation = perform_excavation(self.ARGS)
+
+ artifact = excavation.children[0]
+ self.assertArtifactIsChild(artifact, excavation)
+
+ def test_produces_two_children_of_the_top_level(self):
+ excavation = perform_excavation(self.ARGS)
+
+ artifact = excavation.children[0]
+ artifact_children = sorted(artifact.children, key=lambda a: a.digest)
+ self.assertEqual(len(artifact_children), 2)
+ self.assertTrue(artifact_children[0].digest in excavation.hashes)
+ self.assertTrue(artifact_children[0].digest.startswith('bf'))
+ self.assertArtifactIsChild(artifact_children[0], artifact)
+ self.assertTrue(artifact_children[1].digest in excavation.hashes)
+ self.assertTrue(artifact_children[1].digest.startswith('fa'))
+ self.assertArtifactIsChild(artifact_children[1], artifact)
+
+
+if __name__ == '__main__':
+ unittest.main()