diff --git a/mandible/umm_generator/__init__.py b/mandible/umm_generator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mandible/umm_generator/base.py b/mandible/umm_generator/base.py new file mode 100644 index 0000000..086d330 --- /dev/null +++ b/mandible/umm_generator/base.py @@ -0,0 +1,154 @@ +import collections +import datetime +import inspect +from typing import Any, Dict, Optional, Type + + +class MISSING: + __slots__ = () + + +class Umm: + _attributes = {} + + def __init_subclass__(cls, **kwargs): + super().__init_subclass__(**kwargs) + + # TODO(reweeden): Make this work with multiple inheritance? + parent_cls = super(cls, cls) + attributes = {**parent_cls._attributes} + + for name, typ in get_annotations(cls).items(): + # TODO(reweeden): What if we're overwriting an attribute from the + # parent and the types don't match? + attributes[name] = (typ, cls.__dict__.get(name, MISSING)) + + # Update attributes with unannotated default values + for name, value in inspect.getmembers(cls): + if name.startswith("_") or inspect.isfunction(value): + continue + + if name not in attributes: + attributes[name] = (Any, value) + + cls._attributes = attributes + + def __init__( + self, + metadata: Dict[str, Any], + debug_name: Optional[str] = None, + ): + if debug_name is None: + debug_name = self.__class__.__name__ + for name, (typ, default) in self._attributes.items(): + attr_debug_name = f"{debug_name}.{name}" + try: + value = self._init_attr_value( + name, + attr_debug_name, + typ, + default, + metadata, + ) + setattr(self, name, value) + except RuntimeError: + raise + except Exception as e: + raise RuntimeError( + f"Encountered an error initializing " + f"'{attr_debug_name}': {e}", + ) from e + + def _init_attr_value( + self, + attr_name: str, + debug_name: Optional[str], + typ: type, + default: Any, + metadata: dict, + ) -> Any: + if inspect.isclass(typ) and issubclass(typ, Umm): + if type(self) is typ: + # TODO(reweeden): Error type? + raise RuntimeError( + f"Self-reference detected for attribute '{debug_name}'", + ) + + return typ(metadata, debug_name=debug_name) + + value = default + # TODO(reweeden): Ability to set handler function manually? + # For example: + # class Foo(Umm): + # Attribute: str = Attr() + # + # @Attribute.getter + # def get_attribute(self, metadata): + # ... + handler_name = f"get_{attr_name}" + handler = getattr(self, handler_name, None) + + if value is MISSING: + if handler is None: + if ( + hasattr(typ, "__origin__") + and hasattr(typ, "__args__") + and issubclass(typ.__origin__, collections.abc.Sequence) + ): + for cls in typ.__args__: + if not issubclass(cls, Umm): + # TODO(reweeden): Error type? + raise RuntimeError( + f"Non-Umm element of tuple type found for " + f"'{debug_name}'", + ) + return tuple( + cls(metadata, debug_name=debug_name) + for cls in typ.__args__ + ) + + # TODO(reweeden): Error type? + raise RuntimeError( + f"Missing value for '{debug_name}'. " + f"Try implementing a '{handler_name}' method", + ) + + return handler(metadata) + elif value is not MISSING and handler is not None: + # TODO(reweeden): Error type? + raise RuntimeError( + f"Found both explicit value and handler function for " + f"'{debug_name}'", + ) + + return value + + def to_dict(self) -> Dict[str, Any]: + return _to_dict(self) + + +def get_annotations(cls) -> Dict[str, Type[Any]]: + if hasattr(inspect, "get_annotations"): + return inspect.get_annotations(cls, eval_str=True) + + # TODO(reweeden): String evaluation + return dict(cls.__annotations__) + + +def _to_dict(obj: Any) -> Any: + if isinstance(obj, Umm): + return { + name: _to_dict(value) + for name in obj._attributes + # Filter out optional keys, marked by having a `None` value + if (value := getattr(obj, name)) is not None + } + + if isinstance(obj, collections.abc.Sequence) and not isinstance(obj, str): + return [_to_dict(item) for item in obj] + + # TODO(reweeden): Serialize to string here, or do that via JSON encoder? + if isinstance(obj, datetime.datetime): + return obj + + return obj diff --git a/mandible/umm_generator/umm_g.py b/mandible/umm_generator/umm_g.py new file mode 100644 index 0000000..bbae634 --- /dev/null +++ b/mandible/umm_generator/umm_g.py @@ -0,0 +1,98 @@ +from datetime import datetime +from typing import Any, Dict, Optional, Sequence, Union + +from .base import Umm + +UMM_DATE_FORMAT = "%Y-%m-%d" +UMM_DATETIME_FORMAT = f"{UMM_DATE_FORMAT}T%H:%M:%SZ" + + +# AdditionalAttributes +class AdditionalAttribute(Umm): + Name: str + Values: Sequence[str] + + +# CollectionReference +class CollectionReferenceShortNameVersion(Umm): + ShortName: str + Version: str + + +class CollectionReferenceEntryTitle(Umm): + EntryTitle: str + + +CollectionReference = Union[ + CollectionReferenceShortNameVersion, + CollectionReferenceEntryTitle, +] + + +# DataGranule +# ArchiveAndDistributionInformation +class Checksum(Umm): + Value: str + Algorithm: str + + +class ArchiveAndDistributionInformation(Umm): + Name: str + SizeInBytes: Optional[int] = None + Size: Optional[int] = None + SizeUnit: Optional[str] = None + Format: Optional[str] = None + FormatType: Optional[str] = None + MimeType: Optional[str] + Checksum: Optional[Checksum] = None + + +class Identifier(Umm): + IdentifierType: str + Identifier: str + IdentifierName: Optional[str] = None + + +class DataGranule(Umm): + ArchiveAndDistributionInformation: Optional[ + Sequence[ArchiveAndDistributionInformation] + ] = None + DayNightFlag: str = "Unspecified" + Identifiers: Optional[Sequence[Identifier]] = None + ProductionDateTime: datetime + ReprocessingActual: Optional[str] = None + ReprocessingPlanned: Optional[str] = None + + +# MetadataSpecification +class MetadataSpecification(Umm): + Name: str = "UMM-G" + URL: str = "https://cdn.earthdata.nasa.gov/umm/granule/v1.6.5" + Version: str = "1.6.5" + + +# PGEVersionClass +class PGEVersionClass(Umm): + PGEName: Optional[str] = None + PGEVersion: str + + +class UmmG(Umm): + # Sorted? + AdditionalAttributes: Optional[Sequence[AdditionalAttribute]] = None + CollectionReference: CollectionReference + DataGranule: Optional[DataGranule] = None + GranuleUR: str + MetadataSpecification: MetadataSpecification + # OrbitCalculatedSpatialDomains: Optional[self.get_orbit_calculated_spatial_domains()] + PGEVersionClass: Optional[PGEVersionClass] = None + # Platforms: Optional[self.get_platforms()] + # Projects: Optional[self.get_projects()] + # ProviderDates: self.get_provider_dates(), + # RelatedUrls: Optional[self.get_related_urls()] + # SpatialExtent: Optional[self.get_spatial_extent()] + # TemporalExtent: Optional[self.get_temporal_extent()] + # InputGranules: Optional[self.get_input_granules()] + + def get_GranuleUR(self, metadata: Dict[str, Any]) -> str: + return metadata["granule"]["granuleId"] diff --git a/tests/test_umm_generator.py b/tests/test_umm_generator.py new file mode 100644 index 0000000..5a17332 --- /dev/null +++ b/tests/test_umm_generator.py @@ -0,0 +1,195 @@ +from datetime import datetime +from typing import List, Tuple + +import pytest + +from mandible.umm_generator.base import Umm +from mandible.umm_generator.umm_g import ( + AdditionalAttribute, + CollectionReferenceShortNameVersion, + DataGranule, + Identifier, + PGEVersionClass, + UmmG, +) + + +def test_custom_umm(): + class TestComponent(Umm): + Field1: str + Field2: int + + def get_Field1(self, metadata) -> str: + return metadata["field_1"] + + def get_Field2(self, metadata) -> int: + return metadata["field_2"] + + class TestMain(Umm): + Name: str + Component: TestComponent + + def get_Name(self, metadata) -> str: + return metadata["name"] + + metadata = { + "field_1": "Value 1", + "field_2": "Value 2", + "name": "Test Name", + } + item = TestMain(metadata) + + assert item.Name == "Test Name" + assert item.to_dict() == { + "Name": "Test Name", + "Component": { + "Field1": "Value 1", + "Field2": "Value 2", + } + } + + +def test_custom_error_missing_handler(): + class TestUmm(Umm): + Field1: str + + with pytest.raises( + RuntimeError, + match=( + "Missing value for 'TestUmm.Field1'. Try implementing a " + "'get_Field1' method" + ), + ): + TestUmm({}) + + +def test_custom_error_default_and_handler(): + class TestUmm(Umm): + Field1: str = "default" + + def get_Field1(self, metadata) -> str: + return metadata["field_1"] + + with pytest.raises( + RuntimeError, + match=( + "Found both explicit value and handler function for " + "'TestUmm.Field1'" + ), + ): + TestUmm({}) + + +def test_custom_error_tuple_non_ummg(): + class TestUmm(Umm): + Field1: Tuple[str] + + with pytest.raises( + RuntimeError, + match="Non-Umm element of tuple type found for 'TestUmm.Field1'", + ): + TestUmm({}) + + +def test_umm_g_abstract(): + with pytest.raises(Exception): + _ = UmmG({}) + + +def test_umm_g(): + class CustomOrbitNumberAdditionalAttribute(AdditionalAttribute): + Name: str = "OrbitNumber" + + def get_Values(self, metadata: dict) -> List[str]: + return [str(metadata["ProductMd"]["orbit_number"])] + + class CustomCollectionReference(CollectionReferenceShortNameVersion): + ShortName: str = "FOOBAR" + Version: str = "10" + + class CustomIdentifier(Identifier): + Identifier: str + IdentifierType: str = "ProducerGranuleId" + + def get_Identifier(self, metadata: dict) -> str: + return metadata["granule"]["granuleId"] + + class CustomDataGranule(DataGranule): + ArchiveAndDistributionInformation: list = [] + Identifiers: Tuple[CustomIdentifier] + + def get_ProductionDateTime(self, metadata: dict) -> datetime: + return datetime.strptime( + metadata["ProductMd"]["start_date"], + "%Y-%m-%d", + ) + + class CustomPGEVersionClass(PGEVersionClass): + PGEName: str + PGEVersion: str + + def get_PGEName(self, metadata: dict) -> str: + return metadata["ProductMd"]["pge_version_string"].split()[0] + + def get_PGEVersion(self, metadata: dict) -> str: + return metadata["ProductMd"]["pge_version_string"].split()[1] + + class BasicUmmG(UmmG): + AdditionalAttributes: Tuple[ + CustomOrbitNumberAdditionalAttribute, + ] + CollectionReference: CustomCollectionReference + DataGranule: CustomDataGranule + PGEVersionClass: CustomPGEVersionClass + + metadata = { + "granule": { + "granuleId": "SomeGranuleId", + }, + "ProductMd": { + "orbit_number": 1234, + "start_date": "2024-10-16", + "pge_version_string": "SomePGE 1.000.2" + }, + } + umm_g = BasicUmmG(metadata) + + assert len(umm_g.AdditionalAttributes) == 1 + assert umm_g.AdditionalAttributes[0].Name == "OrbitNumber" + assert umm_g.AdditionalAttributes[0].Values == ["1234"] + assert umm_g.CollectionReference.ShortName == "FOOBAR" + assert umm_g.CollectionReference.Version == "10" + + assert umm_g.to_dict() == { + "AdditionalAttributes": [ + { + "Name": "OrbitNumber", + "Values": ["1234"], + }, + ], + "CollectionReference": { + "ShortName": "FOOBAR", + "Version": "10", + }, + "DataGranule": { + "ArchiveAndDistributionInformation": [], + "DayNightFlag": "Unspecified", + "Identifiers": [ + { + "Identifier": "SomeGranuleId", + "IdentifierType": "ProducerGranuleId", + }, + ], + "ProductionDateTime": datetime(2024, 10, 16), + }, + "GranuleUR": "SomeGranuleId", + "MetadataSpecification": { + "Name": "UMM-G", + "URL": "https://cdn.earthdata.nasa.gov/umm/granule/v1.6.5", + "Version": "1.6.5", + }, + "PGEVersionClass": { + "PGEName": "SomePGE", + "PGEVersion": "1.000.2", + }, + }