diff --git a/.gitignore b/.gitignore index 53bb4e6..7ae0c03 100644 --- a/.gitignore +++ b/.gitignore @@ -6,5 +6,10 @@ __pycache__/ # Sphinx documentation docs/_build/ +# Build files +venv/ + # Temporary files _.* +/output/ +/tests/_scratch/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..96d0cfe --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +default: example + +example: + python3 -m autoarchaeologist --excavator examples.excavations.showcase examples/30001393.bin + +test: + @./venv/bin/python3 -m unittest diff --git a/autoarchaeologist/__main__.py b/autoarchaeologist/__main__.py new file mode 100644 index 0000000..9d19bc7 --- /dev/null +++ b/autoarchaeologist/__main__.py @@ -0,0 +1,111 @@ +import argparse +import importlib +import os +import sys + +from autoarchaeologist import Excavation + + +class MissingArgumentsError(argparse.ArgumentError): + def __init__(self, detail): + super().__init__(None, detail) + + def __str__(self): + return f"{self.__class__.__name__}: {self.args[0]}" + + +class NoSuchExcavationError(RuntimeError): + def __init__(self, excavation_name): + super().__init__(excavation_name) + + def __str__(self): + return f"{self.__class__.__name__}: {self.args[0]}" + + +def action_for_args(args): + if getattr(args, 'excavator', None): + return ("excavator", load_excavator_by_name(args.excavator)) + + raise MissingArgumentsError("no valid action was requsted") + + +def load_excavator_by_name(excavator): + # first try to grab an arbitrary excavation within a single file + + try: + # directly load the excavator as a named module + exacations_package = importlib.import_module(excavator) + return getattr(exacations_package, 'excavation') + except AttributeError: + # no excavation property found in the loaded module so error out + raise NoSuchExcavationError(excavator) + except ModuleNotFoundError: + # no such module so proceed to try as a named property within a module + pass + + # now try to access a named property within a module + excavaor_parts = excavator.split('.') + excavation_name = excavaor_parts.pop() + package_name = '.'.join(excavaor_parts) + try: + exacations_package = importlib.import_module(package_name) + return getattr(exacations_package, excavation_name) + except Exception as e: + raise NoSuchExcavationError(excavator) + + +def parse_arguments(argv=None): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--dir", default="/tmp/_autoarchaologist") + parser.add_argument('--excavator', + help="The name of a file to import that contains an excavation" + "which will be used to extract information from the artifact.") + parser.add_argument('filename') + + return parser.parse_args(args=argv) + + +def process_arguments(args): + if args.dir == ".": + args.dir = os.path.join(os.getcwd(), "output", "_autoarchaologist") + + if args.filename: + args.filename = os.path.abspath(args.filename) + + return args + + +def perform_excavation(args): + match action_for_args(args): + case "excavator", AnExcavation: + assert issubclass(AnExcavation, Excavation) + ctx = AnExcavation(html_dir=args.dir) + case action, _: + raise NotImplementedError(f"action: {action}") + + ctx.add_file_artifact(args.filename) + + ctx.start_examination() + + return ctx + + +def main_throwing(): + args = process_arguments(parse_arguments()) + + try: + os.mkdir(args.dir) + except FileExistsError: + pass + + ctx = perform_excavation(args) + ctx.produce_html() + print("Now point your browser at", ctx.filename_for(ctx).link) + + +if __name__ == "__main__": + try: + main_throwing() + except Exception as e: + print(str(e)) + sys.exit(1) diff --git a/autoarchaeologist/base/interpretation.py b/autoarchaeologist/base/interpretation.py index d850b9f..5503ca3 100644 --- a/autoarchaeologist/base/interpretation.py +++ b/autoarchaeologist/base/interpretation.py @@ -40,9 +40,6 @@ def __exit__(self, exc_type, exc_value, traceback): self.file.close() self.file = None - def __del__(self): - os.remove(self.filename) - def write(self, *args, **kwargs): ''' ... ''' if self.file is None: @@ -55,6 +52,7 @@ def html_interpretation(self, fo, _this): fo.write("

" + self.title + "

\n") for i in file: fo.write(i) + os.remove(self.filename) class Utf8Interpretation(HtmlInterpretation): ''' diff --git a/ddhf/ddhf/decorated_context.py b/ddhf/ddhf/decorated_context.py index 7fe5bb6..83f3f05 100644 --- a/ddhf/ddhf/decorated_context.py +++ b/ddhf/ddhf/decorated_context.py @@ -116,13 +116,29 @@ def from_argv(self): "AUTOARCHAEOLOGIST_BITSTORE_CACHE": "ddhf_bitstore_cache", } -def main(job, html_subdir="tmp", **kwargs): +def parse_arguments(argv=None): + parser = argparse.ArgumentParser() + parser.add_argument('-o', '--out', default='/tmp/_autoarchaologist') + + args = parser.parse_args(args=argv) + if args.out == '.': + args.out = os.path.join(os.getcwd(), "_autoarchaologist") + return args + +def main(job, html_subdir, **kwargs): + args = parse_arguments() + kwargs["html_dir"] = args.out + ''' A standard main routine to reduce boiler-plate ''' for key in os.environ: i = OK_ENVS.get(key) if i: kwargs[i] = os.environ[key] + if 'html_dir' not in kwargs: + raise AttributeError("missing: html_dir") + + kwargs['html_dir'] = os.path.join(kwargs['html_dir'], html_subdir) kwargs.setdefault('download_links', True) kwargs.setdefault('download_limit', 1 << 20) diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..a85e989 --- /dev/null +++ b/examples/__init__.py @@ -0,0 +1 @@ +import examples.excavations as excavations diff --git a/examples/excavations.py b/examples/excavations.py new file mode 100644 index 0000000..7fd5c91 --- /dev/null +++ b/examples/excavations.py @@ -0,0 +1,18 @@ +from autoarchaeologist.base.excavation import Excavation +from autoarchaeologist.generic.bigtext import BigText +from autoarchaeologist.generic.samesame import SameSame +from autoarchaeologist.data_general.absbin import AbsBin +from autoarchaeologist.data_general.papertapechecksum import DGC_PaperTapeCheckSum + + +class ShowcaseExcacation(Excavation): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + self.add_examiner(BigText) + self.add_examiner(AbsBin) + self.add_examiner(DGC_PaperTapeCheckSum) + self.add_examiner(SameSame) + + +showcase = ShowcaseExcacation diff --git a/output/.gitkeep b/output/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/run_example.py b/run_example.py deleted file mode 100644 index ad7c063..0000000 --- a/run_example.py +++ /dev/null @@ -1,32 +0,0 @@ - -import os - -import autoarchaeologist - -from autoarchaeologist.generic.bigdigits import BigDigits -from autoarchaeologist.generic.samesame import SameSame -from autoarchaeologist.data_general.absbin import AbsBin -from autoarchaeologist.data_general.papertapechecksum import DGC_PaperTapeCheckSum - - -if __name__ == "__main__": - - ctx = autoarchaeologist.Excavation() - - ctx.add_examiner(BigDigits) - ctx.add_examiner(AbsBin) - ctx.add_examiner(DGC_PaperTapeCheckSum) - ctx.add_examiner(SameSame) - - ff = ctx.add_file_artifact("examples/30001393.bin") - - ctx.start_examination() - - try: - os.mkdir("/tmp/_autoarchaologist") - except FileExistsError: - pass - - ctx.produce_html(html_dir="/tmp/_autoarchaologist") - - print("Now point your browser at", ctx.filename_for(ctx).link) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/__init__.py b/tests/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/single_excavation--missing.py b/tests/data/single_excavation--missing.py new file mode 100644 index 0000000..1c7bd42 --- /dev/null +++ b/tests/data/single_excavation--missing.py @@ -0,0 +1 @@ +# exporting nothing here such that we can test loading with missing property diff --git a/tests/data/single_excavation.py b/tests/data/single_excavation.py new file mode 100644 index 0000000..2abfd75 --- /dev/null +++ b/tests/data/single_excavation.py @@ -0,0 +1,6 @@ +from examples.excavations import ShowcaseExcacation + +# single file excavations must export an "excavation" property +# whose value is expected to be a subclass or the base Excavation + +excavation = ShowcaseExcacation diff --git a/tests/test_example.py b/tests/test_example.py new file mode 100644 index 0000000..8290ff9 --- /dev/null +++ b/tests/test_example.py @@ -0,0 +1,61 @@ +import os +import shutil +from types import SimpleNamespace +import unittest + +TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) +SCRATCH_DIR = os.path.join(TESTS_DIR, "_scratch") + +from autoarchaeologist.__main__ import perform_excavation + + +def _dir_to_listing(absolute_dir, /, kind): + assert os.path.isabs(absolute_dir) + _, dirs, files = list(os.walk(absolute_dir))[0] + match kind: + case 'dirs': + return sorted(dirs) + case 'files': + return set(files) + case _: + raise NotImplementedError() + + +class Test_Example_BasicHtml(unittest.TestCase): + """ + Ensure run_example produces expected HTML files for the example input. + """ + + ARGS = None + + @classmethod + def setUpClass(cls): + args = SimpleNamespace( + dir=SCRATCH_DIR, + filename="examples/30001393.bin", + excavator="examples.excavations.showcase", + ) + shutil.rmtree(args.dir, ignore_errors=True) + os.makedirs(args.dir, exist_ok=True) + cls.ARGS = args + + def test_produces_top_level_index(self): + ctx = perform_excavation(self.ARGS) + ctx.produce_html() + + toplevel_filenames = _dir_to_listing(self.ARGS.dir, kind='files') + + self.assertTrue("index.html" in toplevel_filenames) + self.assertTrue("index.css" in toplevel_filenames) + + def test_produces_digest_directories(self): + ctx = perform_excavation(self.ARGS) + ctx.produce_html() + + toplevel_dirnames = _dir_to_listing(self.ARGS.dir, kind='dirs') + + self.assertEqual(toplevel_dirnames, ['08', 'bf', 'fa']) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..4bfdc15 --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,138 @@ +import importlib +import os +import shutil +import sys +from types import SimpleNamespace +import unittest + +TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) +SCRATCH_DIR = os.path.join(TESTS_DIR, "_scratch") +ROOT_DIR = os.path.normpath(os.path.join(TESTS_DIR, "..")) + +sys.path.append(TESTS_DIR) + +from autoarchaeologist.__main__ import process_arguments, perform_excavation, \ + action_for_args +from autoarchaeologist.base.artifact import ArtifactBase, ArtifactStream +from examples.excavations import ShowcaseExcacation + + +class Test_Main_arguments(unittest.TestCase): + """ + Ensure run_example excavates the expected artifacts for the example input. + """ + + def test_excavator_argument_loads_named_excavation(self): + args = process_arguments(SimpleNamespace( + dir=SCRATCH_DIR, + filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'), + excavator='examples.excavations.ShowcaseExcacation', + )) + + action_name, action_arg = action_for_args(args) + + self.assertIs(action_name, "excavator") + self.assertIs(action_arg, ShowcaseExcacation) + + def test_excavator_argument_loads_single_excavation(self): + args = process_arguments(SimpleNamespace( + dir=SCRATCH_DIR, + filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'), + excavator='tests.data.single_excavation', + )) + + action_name, action_arg = action_for_args(args) + + self.assertIs(action_name, "excavator") + self.assertIs(action_arg, ShowcaseExcacation) + + def test_excavator_argument_missing_excavation_package(self): + args = process_arguments(SimpleNamespace( + dir=SCRATCH_DIR, + filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'), + excavator='nonexistent.excavations.showcase', + )) + + with self.assertRaises(Exception) as raised: + action_for_args(args) + exception = raised.exception + self.assertEqual(str(exception), "NoSuchExcavationError: nonexistent.excavations.showcase") + + def test_excavator_argument_missing_excavation_property(self): + args = process_arguments(SimpleNamespace( + dir=SCRATCH_DIR, + filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'), + excavator='tests.data.single_excavation--missing', + )) + + with self.assertRaises(Exception) as raised: + action_for_args(args) + exception = raised.exception + self.assertEqual(str(exception), "NoSuchExcavationError: tests.data.single_excavation--missing") + + +class Test_Main_processing(unittest.TestCase): + """ + Ensure run_example excavates the expected artifacts for the example input. + """ + + ARGS = None + + @classmethod + def setUpClass(cls): + args = process_arguments(SimpleNamespace( + dir=SCRATCH_DIR, + filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'), + excavator='examples.excavations.showcase', + )) + shutil.rmtree(args.dir, ignore_errors=True) + os.makedirs(args.dir, exist_ok=True) + # record the unchanging bits against the test case + cls.ARGS = args + + def assertArtifactIsChild(self, artifact, parent): + assert issubclass(artifact.__class__, ArtifactBase) + self.assertEqual(list(artifact.parents), [parent]) + + def test_excavated_three_total_artifacts(self): + excavation = perform_excavation(self.ARGS) + + arfifact_hash_keys = list(excavation.hashes.keys()) + self.assertEqual(len(arfifact_hash_keys), 3) + + def test_excavated_one_top_level_artifact(self): + excavation = perform_excavation(self.ARGS) + + excavatoin_child_count = len(excavation.children) + self.assertEqual(excavatoin_child_count, 1) + + def test_produces_top_level_artifact(self): + excavation = perform_excavation(self.ARGS) + + artifact = excavation.children[0] + self.assertIsInstance(artifact, ArtifactStream) + self.assertEqual(artifact.digest, '083a3d5e3098aec38ee5d9bc9f9880d3026e120ff8f058782d49ee3ccafd2a6c') + self.assertTrue(artifact.digest in excavation.hashes) + + def test_produces_top_level_artifact_whose_parent_is_excavation(self): + excavation = perform_excavation(self.ARGS) + + artifact = excavation.children[0] + self.assertArtifactIsChild(artifact, excavation) + + def test_produces_two_children_of_the_top_level(self): + excavation = perform_excavation(self.ARGS) + + artifact = excavation.children[0] + artifact_children = sorted(artifact.children, key=lambda a: a.digest) + self.assertEqual(len(artifact_children), 2) + self.assertTrue(artifact_children[0].digest in excavation.hashes) + self.assertTrue(artifact_children[0].digest.startswith('bf')) + self.assertArtifactIsChild(artifact_children[0], artifact) + self.assertTrue(artifact_children[1].digest in excavation.hashes) + self.assertTrue(artifact_children[1].digest.startswith('fa')) + self.assertArtifactIsChild(artifact_children[1], artifact) + + +if __name__ == '__main__': + unittest.main()