Skip to content
This repository was archived by the owner on Mar 9, 2026. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ docs/_build/

# Temporary files
_.*
_autoarchaologist/
tests/_scratch/
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
test:
@python3 -m unittest discover
@python3 -m unittest discover tests/autoarchaologist

default: test
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,35 @@ computers, in a convenient and user-friendly way.
The AutoArchaeologist is a framework where plugins can be written to take
apart and present old data media, in a browser-friendly fashion.

## Getting started

To get a feel for how this works and ensure you've got everything set up,
we've included a sample file that can be excavated as an example:

```sh
python3 run_example.py
```

When you're ready to for further and process you own images use `run.py`.

## Using run.py

The various utilities that allow the extraction and processing of data are
designed to be composed into an Excavation that can be run against binary
images of disks and other media. Some standard excavations have been put
together in bundles ad exposed via a single `run` command.

Performing an exavation of a file is as simple as the following:

```sh
python3 run.py --excavator <excavator> <filename>
```

Usage information including the list of excavatos that are avalable as well as
other options will be output when running without arguments: `python3 run.py`.

## From the DDHF

# First Example: Commodore CBM900 Harddisk

Our first historic use of the AutoArchaeologist is now online:
Expand Down
1 change: 1 addition & 0 deletions autoarchaeologist/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .base.excavation import Excavation, DuplicateArtifact
from .record import Record
from .base.namespace import NameSpace
from . import excavators
2 changes: 1 addition & 1 deletion autoarchaeologist/base/excavation.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ def summary(self, *args, **kwargs):
font-family: "Inconsolata", "Courier New", mono-space;
}
td,th {
padding: 0 10px 0;
padding: 0 10px 0;
}
th {
position: sticky; top: 0; background-color: #eeeeee;
Expand Down
6 changes: 0 additions & 6 deletions autoarchaeologist/ddhf/bitstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import mmap
import urllib.request

import ddhf_bitstore_metadata

from ..base import artifact
from ..base import excavation
from ..container import simh_tap_file
Expand Down Expand Up @@ -165,10 +163,6 @@ def fetch_single(self, arg):
print("Could not fetch", arg)
return

meta = ddhf_bitstore_metadata.internals.metadata.MetadataBase(metatxt)
if meta is None:
return

if self.media_types and not self.check_media_type(meta):
return

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
'''


from ..base import excavation
from ..base import type_case
from ..generic import samesame
from ..generic import textfiles
Expand All @@ -12,10 +13,12 @@
cpm.cpm_filename_typecase.set_slug(0x5f, '_', '_')
cpm.cpm_filename_typecase.set_slug(0x3b, ';', ';')

def std_cpm_excavation(exc):
class Cpm(excavation.Excavation):

''' Standard CP/M excavation '''

exc.type_case = type_case.DS2089Cpm()
exc.add_examiner(cpm.CpmFileSystem)
exc.add_examiner(textfiles.TextFile)
exc.add_examiner(samesame.SameSame)
def __init__(self, **kwargs):
self.type_case = type_case.DS2089Cpm()
self.add_examiner(cpm.CpmFileSystem)
self.add_examiner(textfiles.TextFile)
self.add_examiner(samesame.SameSame)
23 changes: 22 additions & 1 deletion autoarchaeologist/ddhf/decorated_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

'''

import argparse
import os

from ..base import excavation
Expand All @@ -18,6 +19,7 @@ class DDHF_Excavation(excavation.Excavation):

def __init__(
self,
Excavation,
ddhf_topic=None,
ddhf_topic_link=None,
ddhf_bitstore_cache=None,
Expand All @@ -30,6 +32,9 @@ def __init__(
self.ddhf_bitstore_cache = ddhf_bitstore_cache
super().__init__(**kwargs)

# apply the configuration of the chosen excavation
Excavation.__init__(self, **kwargs)

def html_prefix_banner(self, file, _this):
''' Emit the banner for this excavation '''
file.write('<table>\n')
Expand Down Expand Up @@ -84,13 +89,29 @@ def from_bitstore(self, *args, **kwargs):
"AUTOARCHAEOLOGIST_BITSTORE_CACHE": "ddhf_bitstore_cache",
}

def main(job, html_subdir="tmp", **kwargs):
def parse_arguments(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument('-o', '--out', default='/tmp/_autoarchaologist')

args = parser.parse_args(args=argv)
if args.out == '.':
args.out = os.path.join(os.getcwd(), "_autoarchaologist")
return args

def main(job, html_subdir, **kwargs):
args = parse_arguments()
kwargs["html_dir"] = args.out

''' A standard main routine to reduce boiler-plate '''
for key in os.environ:
i = OK_ENVS.get(key)
if i:
kwargs[i] = os.environ[key]

if 'html_dir' not in kwargs:
raise AttributeError("missing: html_dir")


kwargs['html_dir'] = os.path.join(kwargs['html_dir'], html_subdir)
kwargs.setdefault('download_links', True)
kwargs.setdefault('download_limit', 1 << 20)
Expand Down
55 changes: 55 additions & 0 deletions autoarchaeologist/excavators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import importlib.util
import os
import sys

# XXX: importing excavations this way is a kludge to avoid multiple rounds
# of file splitting churn that would pollute diffs.. important as the
# the most critical thing (for now) is avoiding changes to behaviour
def _import_name(import_name):
import_path = f"{import_name}.py"
spec = importlib.util.spec_from_file_location(import_name, import_path)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
return mod

from .ddhf.cpm_excavator import Cpm as cpm
cbm900 = _import_name("ddhf_cbm900").Cbm900
cr80 = _import_name("ddhf_cr80").Cr80Floppy
cr80_wang = _import_name("ddhf_cr80_wang").Cr80Wang
dask = _import_name("ddhf_dask").Dask
gier = _import_name("ddhf_gier").Gier
intel_isis = _import_name("ddhf_intel_isis").IntelISIS
r1k = _import_name("ddhf_r1k_tapes").R1k
r1k_backup = _import_name("ddhf_r1k_backup").R1kBackup
r1k_dfs = _import_name("ddhf_r1k_dfs").R1kDFS
rc3600 = _import_name("ddhf_rc3600").Rc3600
uug = _import_name("ddhf_dkuug").DkuugEuug
zilog_mcz = _import_name("ddhf_zilog_mcz").ZilogMCZ

# from .gier import configure_excavation as gier
# from .intel_isis import configure_excavation as intel_isis
# from .r1kdfs import configure_excavation as r1kdfs
# from .uug import configure_excavation as uug

__all__ = [
"cbm900",
"cpm",
"cr80",
"cr80_wang",
"dask",
"gier",
"intel_isis",
"r1k",
"r1k_backup",
"r1k_dfs",
"rc3600",
"uug",
"zilog_mcz",
]

EXCAVATORS = {name:getattr(sys.modules[__name__], name) for name in __all__}

def excavator_by_name(excavator_name):
if excavator_name not in EXCAVATORS:
raise LookupError(f'no extractor named "{excavator_name}"')
return EXCAVATORS[excavator_name]
12 changes: 5 additions & 7 deletions ddhf_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,22 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
'''

from autoarchaeologist import ddhf
from autoarchaeologist.ddhf import cpm_exc
from autoarchaeologist import ddhf, Excavation
from autoarchaeologist.ddhf.cpm_excavator import Cpm

class Butler(ddhf.DDHF_Excavation):
class DDHF_Butler(ddhf.DDHF_Excavation):
''' All Butler artifacts '''

def __init__(self, **kwargs):
super().__init__(**kwargs)

cpm_exc.std_cpm_excavation(self)
super().__init__(Cpm, **kwargs)

self.from_bitstore(
"COMPANY/BOGIKA",
)

if __name__ == "__main__":
ddhf.main(
Butler,
DDHF_Butler,
html_subdir="butler",
ddhf_topic = 'Bogika Butler',
ddhf_topic_link = 'https://datamuseum.dk/wiki/BDS_Butler'
Expand Down
9 changes: 7 additions & 2 deletions ddhf_cbm900.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
'''

from autoarchaeologist import ddhf
from autoarchaeologist.base import excavation

from autoarchaeologist.unix import cbm900_partition
from autoarchaeologist.unix import v7_filesystem
Expand All @@ -12,7 +13,7 @@
from autoarchaeologist.generic import textfiles
from autoarchaeologist.generic import samesame

class CBM900(ddhf.DDHF_Excavation):
class Cbm900(excavation.Excavation):

'''
Two CBM900 hard-disk images, one also contains the four distribution
Expand All @@ -29,14 +30,18 @@ def __init__(self, **kwargs):
self.add_examiner(textfiles.TextFile)
self.add_examiner(samesame.SameSame)

class DDHF_Cbm900(ddhf.DDHF_Excavation):
def __init__(self, **kwargs):
super().__init__(Cbm900, **kwargs)

self.from_bitstore(
"30001199",
"30001972",
)

if __name__ == "__main__":
ddhf.main(
CBM900,
DDHF_Cbm900,
html_subdir="cbm900",
ddhf_topic = "Commodore CBM-900",
ddhf_topic_link = 'https://datamuseum.dk/wiki/Commodore/CBM900',
Expand Down
10 changes: 4 additions & 6 deletions ddhf_comet.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,22 @@
'''

from autoarchaeologist import ddhf
from autoarchaeologist.ddhf import cpm_exc
from autoarchaeologist.ddhf.cpm_excavator import Cpm

class Comet(ddhf.DDHF_Excavation):
class DDHF_Comet(ddhf.DDHF_Excavation):

''' All Comet artifacts '''

def __init__(self, **kwargs):
super().__init__(**kwargs)

cpm_exc.std_cpm_excavation(self)
super().__init__(Cpm, **kwargs)

self.from_bitstore(
"COMPANY/ICL/COMET",
)

if __name__ == "__main__":
ddhf.main(
Comet,
DDHF_Comet,
html_subdir="comet",
ddhf_topic = 'ICL Comet',
ddhf_topic_link = 'https://datamuseum.dk/wiki/ICL_Comet'
Expand Down
6 changes: 2 additions & 4 deletions ddhf_cpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,14 @@
'''

from autoarchaeologist import ddhf
from autoarchaeologist.ddhf import cpm_exc
from autoarchaeologist.ddhf.cpm_excavator import Cpm

class Cpm(ddhf.DDHF_Excavation):

''' All Cpm artifacts '''

def __init__(self, **kwargs):
super().__init__(**kwargs)

cpm_exc.std_cpm_excavation(self)
super().__init__(Cpm, **kwargs)

self.from_bitstore(
"-30002875", # PASCAL
Expand Down
13 changes: 11 additions & 2 deletions ddhf_cr80.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@

from autoarchaeologist import ddhf

from autoarchaeologist.base import excavation
from autoarchaeologist.christianrovsing import cr80_sysone
from autoarchaeologist.christianrovsing import cr80_fs2
from autoarchaeologist.intel import isis
from autoarchaeologist.generic import textfiles
from autoarchaeologist.zilog import mcz

class Cr80Floppy(ddhf.DDHF_Excavation):
class Cr80Floppy(excavation.Excavation):
''' CR80 Floppy disk images'''

def __init__(self, **kwargs):
Expand All @@ -25,13 +26,21 @@ def __init__(self, **kwargs):
self.add_examiner(mcz.MCZRIO)
self.add_examiner(textfiles.TextFile)


class DDHF_Cr80Floppy(ddhf.DDHF_Excavation):

''' CR80 Floppy disk images'''

def __init__(self, **kwargs):
super().__init__(Cr80Floppy, **kwargs)

self.from_bitstore(
"CR/CR80/SW",
)

if __name__ == "__main__":
ddhf.main(
Cr80Floppy,
DDHF_Cr80Floppy,
html_subdir="cr80",
ddhf_topic = "CR80 Hard and Floppy Disks",
ddhf_topic_link = 'https://datamuseum.dk/wiki/CR80',
Expand Down
Loading