From d589ed41761ff9cc14de75ee60bf452e7fc79fbb Mon Sep 17 00:00:00 2001 From: Alex J Burke Date: Sat, 10 Feb 2024 13:15:39 +0100 Subject: [PATCH] Implement a basic __main__.py file able to process a --excavator arg. This commit adds a package level main file which uses argparse to expose a CLI. Given a filename and a named excavator will perform an excavation and output the usual HTML files. Default to writing to the classic /tmp output path, but provide support for specifying a -d option to relocate the output directory. allow a directory argument of "." which will cause outputting output folder in the cwd: ./output/_autoarchaologist In order to ensure things are working include tests of the new command line as well as the basic operation of excavating the example file. The tests check the expected HTML files were written for this known input and assert some basic properties of the excavated artifacts. While here repair the example excavation which was not functional after recent renaming. In addition, multiple runs of the test suite showed that the temporary file cleanup placed in __del__ could occur as such a point as to cause an exception opening the file. Move the removal to such that it occurs after its content is consumed and is safe to remove. --- .gitignore | 5 + Makefile | 7 ++ autoarchaeologist/__main__.py | 111 ++++++++++++++++++ autoarchaeologist/base/interpretation.py | 4 +- ddhf/ddhf/decorated_context.py | 18 ++- examples/__init__.py | 1 + examples/excavations.py | 18 +++ output/.gitkeep | 0 run_example.py | 32 ------ tests/__init__.py | 0 tests/data/__init__.py | 0 tests/data/single_excavation--missing.py | 1 + tests/data/single_excavation.py | 6 + tests/test_example.py | 61 ++++++++++ tests/test_main.py | 138 +++++++++++++++++++++++ 15 files changed, 366 insertions(+), 36 deletions(-) create mode 100644 Makefile create mode 100644 autoarchaeologist/__main__.py create mode 100644 examples/__init__.py create mode 100644 examples/excavations.py create mode 100644 output/.gitkeep delete mode 100644 run_example.py create mode 100644 tests/__init__.py create mode 100644 tests/data/__init__.py create mode 100644 tests/data/single_excavation--missing.py create mode 100644 tests/data/single_excavation.py create mode 100644 tests/test_example.py create mode 100644 tests/test_main.py diff --git a/.gitignore b/.gitignore index 53bb4e6..7ae0c03 100644 --- a/.gitignore +++ b/.gitignore @@ -6,5 +6,10 @@ __pycache__/ # Sphinx documentation docs/_build/ +# Build files +venv/ + # Temporary files _.* +/output/ +/tests/_scratch/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..96d0cfe --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +default: example + +example: + python3 -m autoarchaeologist --excavator examples.excavations.showcase examples/30001393.bin + +test: + @./venv/bin/python3 -m unittest diff --git a/autoarchaeologist/__main__.py b/autoarchaeologist/__main__.py new file mode 100644 index 0000000..9d19bc7 --- /dev/null +++ b/autoarchaeologist/__main__.py @@ -0,0 +1,111 @@ +import argparse +import importlib +import os +import sys + +from autoarchaeologist import Excavation + + +class MissingArgumentsError(argparse.ArgumentError): + def __init__(self, detail): + super().__init__(None, detail) + + def __str__(self): + return f"{self.__class__.__name__}: {self.args[0]}" + + +class NoSuchExcavationError(RuntimeError): + def __init__(self, excavation_name): + super().__init__(excavation_name) + + def __str__(self): + return f"{self.__class__.__name__}: {self.args[0]}" + + +def action_for_args(args): + if getattr(args, 'excavator', None): + return ("excavator", load_excavator_by_name(args.excavator)) + + raise MissingArgumentsError("no valid action was requsted") + + +def load_excavator_by_name(excavator): + # first try to grab an arbitrary excavation within a single file + + try: + # directly load the excavator as a named module + exacations_package = importlib.import_module(excavator) + return getattr(exacations_package, 'excavation') + except AttributeError: + # no excavation property found in the loaded module so error out + raise NoSuchExcavationError(excavator) + except ModuleNotFoundError: + # no such module so proceed to try as a named property within a module + pass + + # now try to access a named property within a module + excavaor_parts = excavator.split('.') + excavation_name = excavaor_parts.pop() + package_name = '.'.join(excavaor_parts) + try: + exacations_package = importlib.import_module(package_name) + return getattr(exacations_package, excavation_name) + except Exception as e: + raise NoSuchExcavationError(excavator) + + +def parse_arguments(argv=None): + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--dir", default="/tmp/_autoarchaologist") + parser.add_argument('--excavator', + help="The name of a file to import that contains an excavation" + "which will be used to extract information from the artifact.") + parser.add_argument('filename') + + return parser.parse_args(args=argv) + + +def process_arguments(args): + if args.dir == ".": + args.dir = os.path.join(os.getcwd(), "output", "_autoarchaologist") + + if args.filename: + args.filename = os.path.abspath(args.filename) + + return args + + +def perform_excavation(args): + match action_for_args(args): + case "excavator", AnExcavation: + assert issubclass(AnExcavation, Excavation) + ctx = AnExcavation(html_dir=args.dir) + case action, _: + raise NotImplementedError(f"action: {action}") + + ctx.add_file_artifact(args.filename) + + ctx.start_examination() + + return ctx + + +def main_throwing(): + args = process_arguments(parse_arguments()) + + try: + os.mkdir(args.dir) + except FileExistsError: + pass + + ctx = perform_excavation(args) + ctx.produce_html() + print("Now point your browser at", ctx.filename_for(ctx).link) + + +if __name__ == "__main__": + try: + main_throwing() + except Exception as e: + print(str(e)) + sys.exit(1) diff --git a/autoarchaeologist/base/interpretation.py b/autoarchaeologist/base/interpretation.py index d850b9f..5503ca3 100644 --- a/autoarchaeologist/base/interpretation.py +++ b/autoarchaeologist/base/interpretation.py @@ -40,9 +40,6 @@ def __exit__(self, exc_type, exc_value, traceback): self.file.close() self.file = None - def __del__(self): - os.remove(self.filename) - def write(self, *args, **kwargs): ''' ... ''' if self.file is None: @@ -55,6 +52,7 @@ def html_interpretation(self, fo, _this): fo.write("

" + self.title + "

\n") for i in file: fo.write(i) + os.remove(self.filename) class Utf8Interpretation(HtmlInterpretation): ''' diff --git a/ddhf/ddhf/decorated_context.py b/ddhf/ddhf/decorated_context.py index 7fe5bb6..83f3f05 100644 --- a/ddhf/ddhf/decorated_context.py +++ b/ddhf/ddhf/decorated_context.py @@ -116,13 +116,29 @@ def from_argv(self): "AUTOARCHAEOLOGIST_BITSTORE_CACHE": "ddhf_bitstore_cache", } -def main(job, html_subdir="tmp", **kwargs): +def parse_arguments(argv=None): + parser = argparse.ArgumentParser() + parser.add_argument('-o', '--out', default='/tmp/_autoarchaologist') + + args = parser.parse_args(args=argv) + if args.out == '.': + args.out = os.path.join(os.getcwd(), "_autoarchaologist") + return args + +def main(job, html_subdir, **kwargs): + args = parse_arguments() + kwargs["html_dir"] = args.out + ''' A standard main routine to reduce boiler-plate ''' for key in os.environ: i = OK_ENVS.get(key) if i: kwargs[i] = os.environ[key] + if 'html_dir' not in kwargs: + raise AttributeError("missing: html_dir") + + kwargs['html_dir'] = os.path.join(kwargs['html_dir'], html_subdir) kwargs.setdefault('download_links', True) kwargs.setdefault('download_limit', 1 << 20) diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..a85e989 --- /dev/null +++ b/examples/__init__.py @@ -0,0 +1 @@ +import examples.excavations as excavations diff --git a/examples/excavations.py b/examples/excavations.py new file mode 100644 index 0000000..7fd5c91 --- /dev/null +++ b/examples/excavations.py @@ -0,0 +1,18 @@ +from autoarchaeologist.base.excavation import Excavation +from autoarchaeologist.generic.bigtext import BigText +from autoarchaeologist.generic.samesame import SameSame +from autoarchaeologist.data_general.absbin import AbsBin +from autoarchaeologist.data_general.papertapechecksum import DGC_PaperTapeCheckSum + + +class ShowcaseExcacation(Excavation): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + self.add_examiner(BigText) + self.add_examiner(AbsBin) + self.add_examiner(DGC_PaperTapeCheckSum) + self.add_examiner(SameSame) + + +showcase = ShowcaseExcacation diff --git a/output/.gitkeep b/output/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/run_example.py b/run_example.py deleted file mode 100644 index ad7c063..0000000 --- a/run_example.py +++ /dev/null @@ -1,32 +0,0 @@ - -import os - -import autoarchaeologist - -from autoarchaeologist.generic.bigdigits import BigDigits -from autoarchaeologist.generic.samesame import SameSame -from autoarchaeologist.data_general.absbin import AbsBin -from autoarchaeologist.data_general.papertapechecksum import DGC_PaperTapeCheckSum - - -if __name__ == "__main__": - - ctx = autoarchaeologist.Excavation() - - ctx.add_examiner(BigDigits) - ctx.add_examiner(AbsBin) - ctx.add_examiner(DGC_PaperTapeCheckSum) - ctx.add_examiner(SameSame) - - ff = ctx.add_file_artifact("examples/30001393.bin") - - ctx.start_examination() - - try: - os.mkdir("/tmp/_autoarchaologist") - except FileExistsError: - pass - - ctx.produce_html(html_dir="/tmp/_autoarchaologist") - - print("Now point your browser at", ctx.filename_for(ctx).link) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/__init__.py b/tests/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/single_excavation--missing.py b/tests/data/single_excavation--missing.py new file mode 100644 index 0000000..1c7bd42 --- /dev/null +++ b/tests/data/single_excavation--missing.py @@ -0,0 +1 @@ +# exporting nothing here such that we can test loading with missing property diff --git a/tests/data/single_excavation.py b/tests/data/single_excavation.py new file mode 100644 index 0000000..2abfd75 --- /dev/null +++ b/tests/data/single_excavation.py @@ -0,0 +1,6 @@ +from examples.excavations import ShowcaseExcacation + +# single file excavations must export an "excavation" property +# whose value is expected to be a subclass or the base Excavation + +excavation = ShowcaseExcacation diff --git a/tests/test_example.py b/tests/test_example.py new file mode 100644 index 0000000..8290ff9 --- /dev/null +++ b/tests/test_example.py @@ -0,0 +1,61 @@ +import os +import shutil +from types import SimpleNamespace +import unittest + +TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) +SCRATCH_DIR = os.path.join(TESTS_DIR, "_scratch") + +from autoarchaeologist.__main__ import perform_excavation + + +def _dir_to_listing(absolute_dir, /, kind): + assert os.path.isabs(absolute_dir) + _, dirs, files = list(os.walk(absolute_dir))[0] + match kind: + case 'dirs': + return sorted(dirs) + case 'files': + return set(files) + case _: + raise NotImplementedError() + + +class Test_Example_BasicHtml(unittest.TestCase): + """ + Ensure run_example produces expected HTML files for the example input. + """ + + ARGS = None + + @classmethod + def setUpClass(cls): + args = SimpleNamespace( + dir=SCRATCH_DIR, + filename="examples/30001393.bin", + excavator="examples.excavations.showcase", + ) + shutil.rmtree(args.dir, ignore_errors=True) + os.makedirs(args.dir, exist_ok=True) + cls.ARGS = args + + def test_produces_top_level_index(self): + ctx = perform_excavation(self.ARGS) + ctx.produce_html() + + toplevel_filenames = _dir_to_listing(self.ARGS.dir, kind='files') + + self.assertTrue("index.html" in toplevel_filenames) + self.assertTrue("index.css" in toplevel_filenames) + + def test_produces_digest_directories(self): + ctx = perform_excavation(self.ARGS) + ctx.produce_html() + + toplevel_dirnames = _dir_to_listing(self.ARGS.dir, kind='dirs') + + self.assertEqual(toplevel_dirnames, ['08', 'bf', 'fa']) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..4bfdc15 --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,138 @@ +import importlib +import os +import shutil +import sys +from types import SimpleNamespace +import unittest + +TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) +SCRATCH_DIR = os.path.join(TESTS_DIR, "_scratch") +ROOT_DIR = os.path.normpath(os.path.join(TESTS_DIR, "..")) + +sys.path.append(TESTS_DIR) + +from autoarchaeologist.__main__ import process_arguments, perform_excavation, \ + action_for_args +from autoarchaeologist.base.artifact import ArtifactBase, ArtifactStream +from examples.excavations import ShowcaseExcacation + + +class Test_Main_arguments(unittest.TestCase): + """ + Ensure run_example excavates the expected artifacts for the example input. + """ + + def test_excavator_argument_loads_named_excavation(self): + args = process_arguments(SimpleNamespace( + dir=SCRATCH_DIR, + filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'), + excavator='examples.excavations.ShowcaseExcacation', + )) + + action_name, action_arg = action_for_args(args) + + self.assertIs(action_name, "excavator") + self.assertIs(action_arg, ShowcaseExcacation) + + def test_excavator_argument_loads_single_excavation(self): + args = process_arguments(SimpleNamespace( + dir=SCRATCH_DIR, + filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'), + excavator='tests.data.single_excavation', + )) + + action_name, action_arg = action_for_args(args) + + self.assertIs(action_name, "excavator") + self.assertIs(action_arg, ShowcaseExcacation) + + def test_excavator_argument_missing_excavation_package(self): + args = process_arguments(SimpleNamespace( + dir=SCRATCH_DIR, + filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'), + excavator='nonexistent.excavations.showcase', + )) + + with self.assertRaises(Exception) as raised: + action_for_args(args) + exception = raised.exception + self.assertEqual(str(exception), "NoSuchExcavationError: nonexistent.excavations.showcase") + + def test_excavator_argument_missing_excavation_property(self): + args = process_arguments(SimpleNamespace( + dir=SCRATCH_DIR, + filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'), + excavator='tests.data.single_excavation--missing', + )) + + with self.assertRaises(Exception) as raised: + action_for_args(args) + exception = raised.exception + self.assertEqual(str(exception), "NoSuchExcavationError: tests.data.single_excavation--missing") + + +class Test_Main_processing(unittest.TestCase): + """ + Ensure run_example excavates the expected artifacts for the example input. + """ + + ARGS = None + + @classmethod + def setUpClass(cls): + args = process_arguments(SimpleNamespace( + dir=SCRATCH_DIR, + filename=os.path.join(ROOT_DIR, 'examples/30001393.bin'), + excavator='examples.excavations.showcase', + )) + shutil.rmtree(args.dir, ignore_errors=True) + os.makedirs(args.dir, exist_ok=True) + # record the unchanging bits against the test case + cls.ARGS = args + + def assertArtifactIsChild(self, artifact, parent): + assert issubclass(artifact.__class__, ArtifactBase) + self.assertEqual(list(artifact.parents), [parent]) + + def test_excavated_three_total_artifacts(self): + excavation = perform_excavation(self.ARGS) + + arfifact_hash_keys = list(excavation.hashes.keys()) + self.assertEqual(len(arfifact_hash_keys), 3) + + def test_excavated_one_top_level_artifact(self): + excavation = perform_excavation(self.ARGS) + + excavatoin_child_count = len(excavation.children) + self.assertEqual(excavatoin_child_count, 1) + + def test_produces_top_level_artifact(self): + excavation = perform_excavation(self.ARGS) + + artifact = excavation.children[0] + self.assertIsInstance(artifact, ArtifactStream) + self.assertEqual(artifact.digest, '083a3d5e3098aec38ee5d9bc9f9880d3026e120ff8f058782d49ee3ccafd2a6c') + self.assertTrue(artifact.digest in excavation.hashes) + + def test_produces_top_level_artifact_whose_parent_is_excavation(self): + excavation = perform_excavation(self.ARGS) + + artifact = excavation.children[0] + self.assertArtifactIsChild(artifact, excavation) + + def test_produces_two_children_of_the_top_level(self): + excavation = perform_excavation(self.ARGS) + + artifact = excavation.children[0] + artifact_children = sorted(artifact.children, key=lambda a: a.digest) + self.assertEqual(len(artifact_children), 2) + self.assertTrue(artifact_children[0].digest in excavation.hashes) + self.assertTrue(artifact_children[0].digest.startswith('bf')) + self.assertArtifactIsChild(artifact_children[0], artifact) + self.assertTrue(artifact_children[1].digest in excavation.hashes) + self.assertTrue(artifact_children[1].digest.startswith('fa')) + self.assertArtifactIsChild(artifact_children[1], artifact) + + +if __name__ == '__main__': + unittest.main()