Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[![contraqctor](./assets/logo-letter.svg)](https://allenneuraldynamics.github.io/contraqctor/)

[![contraqctor](https://tinyurl.com/zf46ufwa)](https://allenneuraldynamics.github.io/contraqctor/)
![CI](https://github.com/AllenNeuralDynamics/contraqctor/actions/workflows/ci.yml/badge.svg)
![CI](https://github.com/AllenNeuralDynamics/contraqctor/actions/workflows/contraqctor.yml/badge.svg)
[![PyPI - Version](https://img.shields.io/pypi/v/contraqctor)](https://pypi.org/project/contraqctor/)
[![License](https://img.shields.io/badge/license-MIT-brightgreen)](LICENSE)
[![ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
Expand Down
25 changes: 13 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,15 @@ build-backend = "uv_build"
[project]
name = "contraqctor"
description = "A library for managing data contracts and quality control in behavioral datasets."
authors = [
{ name = "Bruno Cruz", email = "bruno.cruz@alleninstitute.org" },
]
authors = [{ name = "Bruno Cruz", email = "bruno.cruz@alleninstitute.org" }]
requires-python = ">=3.11"
license = "MIT"

classifiers = [
"Programming Language :: Python :: 3.11",
"Operating System :: Microsoft :: Windows",
]
version = "0.4.10rc1"
version = "0.5.0rc0"
readme = {file = "README.md", content-type = "text/markdown"}

dependencies = [
Expand All @@ -42,13 +40,7 @@ Changelog = "https://github.com/AllenNeuralDynamics/contraqctor/releases"

[dependency-groups]

dev = [
'codespell',
'pytest',
'pytest-cov',
'ruff',
'interrogate'
]
dev = ['codespell', 'pytest', 'pytest-cov', 'ruff', 'interrogate']

docs = [
'mkdocs',
Expand Down Expand Up @@ -81,10 +73,19 @@ testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
env = ["MPLBACKEND=Agg"]

[tool.interrogate]
ignore-init-method = true
ignore-magic = true
ignore_module = true
fail-under = 100
exclude = ["__init__.py", "tests", "docs", "build", "setup.py", "examples", "site"]
exclude = [
"__init__.py",
"tests",
"docs",
"build",
"setup.py",
"examples",
"site",
]
50 changes: 47 additions & 3 deletions src/contraqctor/_typing.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from typing import Any, Generic, Protocol, TypeAlias, TypeVar, Union, cast, final
from typing import TYPE_CHECKING, Any, Generic, Protocol, TypeAlias, TypeVar, Union, cast, final

if TYPE_CHECKING:
from contraqctor.contract.base import DataStream
else:
DataStream = Any # type: ignore

# Type variables
TData = TypeVar("TData", bound=Union[Any, "_UnsetData"])
"""TypeVar: Type variable bound to Union[Any, "_UnsetData"] for data types."""
TData = TypeVar("TData", bound=Union[Any, "_UnsetData", "ErrorOnLoad"])
"""TypeVar: Type variable bound to Union[Any, "_UnsetData", "ErrorOnLoad"] for data types."""

TReaderParams = TypeVar("TReaderParams", contravariant=True)
"""TypeVar: Contravariant type variable for reader parameters."""
Expand Down Expand Up @@ -157,3 +162,42 @@ def is_unset(obj: Any) -> bool:
True if the object is an unset sentinel value, False otherwise.
"""
return (obj is UnsetReader) or (obj is UnsetParams) or (obj is UnsetData)


@final
class ErrorOnLoad:
"""A class representing data that failed to load due to an error.

Attributes:
datastream: The data stream that failed to load.
error: The exception that occurred during data loading.

This class is used to encapsulate information about data loading failures,
allowing for graceful handling of errors in data processing workflows.
"""

def __init__(self, data_stream: "DataStream", exception: Exception | None = None):
self._data_stream = data_stream
self._exception = exception

@property
def data_stream(self) -> "DataStream":
"""The data stream that failed to load."""
return self._data_stream

@property
def exception(self) -> Exception | None:
"""The exception that occurred during data loading, if any."""
return self._exception

def __repr__(self):
return f"<ErrorData stream={self.data_stream} error={self.exception}>"

def raise_from_error(self):
"""Raises the stored error if it exists.

Raises:
The stored exception if it is not None.
"""
if self.exception is not None:
raise self.exception
82 changes: 67 additions & 15 deletions src/contraqctor/contract/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
import abc
import dataclasses
import os
from typing import Any, ClassVar, Dict, Generator, Generic, List, Optional, Protocol, Self, TypeVar, runtime_checkable
from typing import (
Any,
ClassVar,
Dict,
Generator,
Generic,
List,
Optional,
Protocol,
Self,
TypeVar,
cast,
runtime_checkable,
)

from semver import Version
from typing_extensions import override
Expand Down Expand Up @@ -201,7 +214,16 @@ def has_data(self) -> bool:
Returns:
bool: True if data has been loaded, False otherwise.
"""
return not _typing.is_unset(self._data)
return not (_typing.is_unset(self._data) or self.has_error)

@property
def has_error(self) -> bool:
"""Check if the data stream encountered an error during loading.

Returns:
bool: True if an error occurred, False otherwise.
"""
return isinstance(self._data, _typing.ErrorOnLoad)

@property
def data(self) -> _typing.TData:
Expand All @@ -213,9 +235,22 @@ def data(self) -> _typing.TData:
Raises:
ValueError: If data has not been loaded yet.
"""
if self.has_error:
cast(_typing.ErrorOnLoad, self._data).raise_from_error()
if not self.has_data:
raise ValueError("Data has not been loaded yet.")
return self._data
return cast(_typing.TData, self._data)

def clear(self) -> Self:
"""Clear the loaded data from the data stream.

Resets the data to an unset state, allowing for reloading.

Returns:
Self: The data stream instance for method chaining.
"""
self._data = _typing.UnsetData
return self

def load(self) -> Self:
"""Load data into the data stream.
Expand All @@ -239,7 +274,10 @@ def load(self) -> Self:
print(f"Loaded {len(df)} rows")
```
"""
self._data = self.read()
try:
self._data = self.read()
except Exception as e: # pylint: disable=broad-except
self._data = _typing.ErrorOnLoad(self, exception=e)
return self

def __str__(self):
Expand All @@ -266,9 +304,27 @@ def __iter__(self) -> Generator["DataStream", None, None]:
Yields:
DataStream: Child data streams (none for base DataStream).
"""
yield
return
yield # This line is unreachable but needed for the generator type

def collect_errors(self) -> List[_typing.ErrorOnLoad]:
"""Collect all errors from this stream and its children.

def load_all(self, strict: bool = False) -> list[tuple["DataStream", Exception], None, None]:
Performs a depth-first traversal to gather all ErrorOnLoad instances.

Returns:
List[ErrorOnLoad]: List of all errors raised on load encountered in the hierarchy.
"""
errors = []
if self.has_error:
errors.append(cast(_typing.ErrorOnLoad, self._data))
for stream in self:
if stream is None:
continue
errors.extend(stream.collect_errors())
return errors

def load_all(self, strict: bool = False) -> Self:
"""Recursively load this data stream and all child streams.

Performs depth-first traversal to load all streams in the hierarchy.
Expand All @@ -293,17 +349,13 @@ def load_all(self, strict: bool = False) -> list[tuple["DataStream", Exception],
```
"""
self.load()
exceptions = []
for stream in self:
if stream is None:
continue
try:
exceptions += stream.load_all(strict=strict)
except Exception as e:
if strict:
raise e
exceptions.append((stream, e))
return exceptions
stream.load_all(strict=strict)
if stream.has_error and strict:
cast(_typing.ErrorOnLoad, stream.data).raise_from_error()
return self


TDataStream = TypeVar("TDataStream", bound=DataStream[Any, Any])
Expand Down Expand Up @@ -411,7 +463,7 @@ def at(self) -> _At[TDataStream]:
return self._at

@override
def load(self):
def load(self) -> Self:
"""Load data for this collection.

Overrides the base method to add validation that loaded data is a list of DataStreams.
Expand Down
Loading