Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion RUFAS/data_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ def _metadata_array_validator(self, key_path: list[str], value: dict[str, Any])
"function": DataValidator._metadata_array_validator.__name__,
}
required_array_property_keys = {"type", "properties"}
optional_array_property_keys = {"description", "minimum_length", "maximum_length", "nullable"}
optional_array_property_keys = {"description", "minimum_length", "maximum_length", "nullable", "default"}
valid, message = self._validate_metadata_properties_keys(
required_array_property_keys, optional_array_property_keys, value, key_path
)
Expand Down
44 changes: 22 additions & 22 deletions RUFAS/input_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,14 @@
"user_feeds",
"tractor_dataset",
"EEE_constants",
"properties",
"feed_storage_configurations",
"feed_storage_instances",
}
PRROPERTIES_FILE_PATHS: dict[str, Path] = {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think docstrings would be helpful here? I know we're not doing it anywhere else in this file but we do it in manure files for file-level constants. Just a thought.

"default": Path("input/metadata/properties/default.json"),
"tasks_properties": Path("input/metadata/properties/tasks_properties.json"),
"commodity_properties": Path("input/metadata/properties/commodity_properties.json"),
}


class InputManager:
Expand Down Expand Up @@ -100,7 +104,12 @@ def pool(self, incoming_pool: dict[str, Any]) -> None:
self.__pool = incoming_pool

def start_data_processing(
self, metadata_path: Path, input_root: Path, task_id: Any, eager_termination: bool = True
self,
metadata_path: Path,
input_root: Path,
task_id: Any,
cross_validation_file_paths: list[str] | None,
eager_termination: bool = True,
) -> bool:
"""
Starts the pipeline for organizing metadata and input data processing.
Expand All @@ -113,6 +122,8 @@ def start_data_processing(
Root directory for all input files.
task_id : Any
Task ID for the current process.
cross_validation_file_paths : list[str] | None
A list of file paths to cross-validation files.
eager_termination : bool, default=True
If True, the process will be terminated as soon as finding invalid data and failing to fix it.
If False, the process will be terminated after going through and validating the entire data.
Expand Down Expand Up @@ -145,16 +156,20 @@ def start_data_processing(
self.om.route_logs(self.data_validator.event_logs)
raise ValueError(message)
is_input_data_valid = self._populate_pool(input_root, eager_termination)
is_input_data_valid = self._cross_validate_data(eager_termination) and is_input_data_valid
is_input_data_valid = (
self._cross_validate_data(cross_validation_file_paths, eager_termination) and is_input_data_valid
)
self.om.route_logs(self.data_validator.event_logs)
return is_input_data_valid

def _cross_validate_data(self, eager_termination: bool) -> bool:
def _cross_validate_data(self, cross_validation_file_paths: list[str] | None, eager_termination: bool) -> bool:
"""
Validates data against cross-validation rules and reports any failures.

Parameters
----------
cross_validation_file_paths : list[str] | None
A list of file paths to cross-validation rules.
eager_termination : bool
If True, the validation process stops after the first cross-validation
failure. Otherwise, it continues validating all the rules.
Expand All @@ -166,7 +181,6 @@ def _cross_validate_data(self, eager_termination: bool) -> bool:
or more rules fail.
"""
failing_cross_validation_blocks: list[str] = []
cross_validation_file_paths: list[str] | None = self.__metadata.get("cross-validation", None)
cross_validation_rules = self._load_cross_validation(cross_validation_file_paths)
if cross_validation_rules is not None and len(cross_validation_rules) > 0:
for cross_validation_ruleset in cross_validation_rules:
Expand Down Expand Up @@ -642,37 +656,23 @@ def _load_properties(self) -> None:
"function": self._load_properties.__name__,
}
try:
properties_metadata = self.__metadata["files"]["properties"]
properties_paths = properties_metadata.get("paths") or properties_metadata.get("path")

if isinstance(properties_paths, str):
properties_paths = [properties_paths]
if not isinstance(properties_paths, list) or len(properties_paths) == 0:
raise ValueError("Input Manager Error: Properties paths must be a non-empty string or list of strings")

if not all(isinstance(path, str) and path for path in properties_paths):
raise ValueError("Input Manager Error: Each properties path must be a non-empty string")

self.om.add_log(
"load_properties_attempt",
f"Attempting to load properties from {properties_paths}",
f"Attempting to load properties from {PRROPERTIES_FILE_PATHS.values()}",
info_map,
)

combined_properties: dict[str, Any] = {}
for properties_path_str in properties_paths:
properties_path = Path(properties_path_str)
for properties_path in PRROPERTIES_FILE_PATHS.values():
if not properties_path.exists():
raise FileNotFoundError(f"Input Manager Error: Properties file not found at {properties_path}")
loaded_properties = self._load_data_from_json(properties_path)
combined_properties.update(loaded_properties)

del self.__metadata["files"]["properties"]

self.__metadata["properties"] = combined_properties
self.om.add_log(
"load_properties_success",
f"Successfully loaded properties from {properties_paths}",
f"Successfully loaded properties from {PRROPERTIES_FILE_PATHS.values()}",
info_map,
)

Expand Down
11 changes: 9 additions & 2 deletions RUFAS/task_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,9 @@ def start(
"function": TaskManager.start.__name__,
}
self.output_manager.add_log("Task Manager Start", "Task Manager Started.", info_map)
is_data_valid = self.input_manager.start_data_processing(metadata_path, Path(""), task_id="TASK MANAGER")
is_data_valid = self.input_manager.start_data_processing(
metadata_path=metadata_path, input_root=Path(""), task_id="TASK MANAGER", cross_validation_file_paths=None
)
task_config: dict[str, Any] = self.input_manager.get_data("tasks")
for task in task_config.get("tasks", []):
filters_path = Path(task["filters_directory"])
Expand Down Expand Up @@ -768,8 +770,13 @@ def handle_input_data_audit(
"units": MeasurementUnits.UNITLESS,
}
output_manager.add_log("Validation start", f"Validating data for {args['metadata_file_path']}...", info_map)
cross_validation_file_paths: list[str] | None = args.get("cross_validation_file_paths", None)
is_data_valid = input_manager.start_data_processing(
Path(args["metadata_file_path"]), Path(args["input_root"]), args["task_id"], eager_termination
Path(args["metadata_file_path"]),
Path(args["input_root"]),
args["task_id"],
cross_validation_file_paths,
eager_termination,
)
output_manager.add_log(
"Validation complete", f"{args['output_prefix']} validation status: {is_data_valid}", info_map
Expand Down
1 change: 1 addition & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ v1.0.0
- [2854](https://github.com/RuminantFarmSystems/MASM/pull/2854) - [minor change] [NoInputChange] [NoOutputChange] Update `emissions.py` filtering process and remove `use_filter_key_name` option in the OM filter.
- [2872](https://github.com/RuminantFarmSystems/RuFaS/pull/2872) - [minor change] [NoInputChange] [NoOutputChange] Adds information and links for onboarding videos.
- [2850](https://github.com/RuminantFarmSystems/MASM/pull/2850) - [minor change] [NoInputChange] [NoOutputChange] Refactor `Pen.get_manure_stream()`.
- [2869](https://github.com/RuminantFarmSystems/RuFaS/pull/2869) - [minor change] [InputChange] [NoOutputChange] Removes `properties` and `cross-validation` file path definitions from metadata JSON files. Properties file paths are now defined as a module-level constant in `input_manager.py`, and cross-validation file paths are moved to the task configuration JSON files.

### v1.0.0

Expand Down
10 changes: 8 additions & 2 deletions input/data/tasks/available_simulation_tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,20 @@
"metadata_file_path": "input/metadata/example_freestall_dairy_metadata.json",
"output_prefix": "example",
"log_verbosity": "errors",
"random_seed": 42
"random_seed": 42,
"cross_validation_file_paths": [
"input/metadata/cross_validation/example_cross_validation.json"
]
},
{
"task_type": "SIMULATION_SINGLE_RUN",
"metadata_file_path": "input/metadata/example_open_lot_metadata.json",
"output_prefix": "example",
"log_verbosity": "errors",
"random_seed": 42
"random_seed": 42,
"cross_validation_file_paths": [
"input/metadata/cross_validation/example_cross_validation.json"
]
}
]
}
13 changes: 10 additions & 3 deletions input/data/tasks/end_to_end_testing_task.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@
"csv_output_directory": "output/CSVs/.",
"json_output_directory": "output/JSONs/.",
"report_directory": "output/reports/.",
"graphics_directory": "output/graphics/."
"graphics_directory": "output/graphics/.",
"cross_validation_file_paths": [
"input/metadata/cross_validation/example_cross_validation.json"
]
},
{
"task_type": "END_TO_END_TESTING",
Expand Down Expand Up @@ -64,7 +67,10 @@
"csv_output_directory": "output/CSVs/.",
"json_output_directory": "output/JSONs/.",
"report_directory": "output/reports/.",
"graphics_directory": "output/graphics/."
"graphics_directory": "output/graphics/.",
"cross_validation_file_paths": [
"input/metadata/cross_validation/example_cross_validation.json"
]
},
{
"task_type": "END_TO_END_TESTING",
Expand Down Expand Up @@ -95,7 +101,8 @@
"csv_output_directory": "output/CSVs/.",
"json_output_directory": "output/JSONs/.",
"report_directory": "output/reports/.",
"graphics_directory": "output/graphics/."
"graphics_directory": "output/graphics/.",
"cross_validation_file_paths": []
}
]
}
5 changes: 4 additions & 1 deletion input/data/tasks/example_freestall_task.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
"output_prefix": "freestall",
"log_verbosity": "errors",
"random_seed": 42,
"exclude_info_maps": false
"exclude_info_maps": false,
"cross_validation_file_paths": [
"input/metadata/cross_validation/example_cross_validation.json"
]
}
]
}
3 changes: 2 additions & 1 deletion input/data/tasks/example_no_animal_task.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
"csv_output_directory": "output/CSVs/.",
"json_output_directory": "output/JSONs/.",
"report_directory": "output/reports/.",
"graphics_directory": "output/graphics/."
"graphics_directory": "output/graphics/.",
"cross_validation_file_paths": []
}
]
}
5 changes: 4 additions & 1 deletion input/data/tasks/example_open_lot_task.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
"metadata_file_path": "input/metadata/example_open_lot_metadata.json",
"output_prefix": "open_lot",
"log_verbosity": "errors",
"random_seed": 42
"random_seed": 42,
"cross_validation_file_paths": [
"input/metadata/cross_validation/example_cross_validation.json"
]
}
]
}
5 changes: 4 additions & 1 deletion input/data/tasks/single_run.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
"metadata_file_path": "input/metadata/example_freestall_dairy_metadata.json",
"output_prefix": "Task 2",
"log_verbosity": "warnings",
"random_seed": 42
"random_seed": 42,
"cross_validation_file_paths": [
"input/metadata/cross_validation/example_cross_validation.json"
]
}
]
}
10 changes: 1 addition & 9 deletions input/metadata/available_simulations_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@
"path": "input/data/tasks/available_simulation_tasks.json",
"type": "json",
"properties": "tasks_properties"
},
"properties": {
"title": "Metadata Properties",
"description": "The properties of input data.",
"paths": ["input/metadata/properties/tasks_properties.json"],
"type": "json",
"properties": "NA"
}
},
"cross-validation": null
}
}
10 changes: 0 additions & 10 deletions input/metadata/end_to_end_testing/freestall_e2e_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -231,16 +231,6 @@
"type": "json",
"properties": "EEE_constants_properties"
},
"properties": {
"title": "Metadata Properties",
"description": "The properties of input data.",
"paths": [
"input/metadata/properties/default.json",
"input/metadata/properties/commodity_properties.json"
],
"type": "json",
"properties": "NA"
},
"feed_storage_configurations": {
"title": "Feed Management",
"description": "Configurations for feed storage units.",
Expand Down
10 changes: 0 additions & 10 deletions input/metadata/end_to_end_testing/no_animal_e2e_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -231,16 +231,6 @@
"type": "json",
"properties": "EEE_constants_properties"
},
"properties": {
"title": "Metadata Properties",
"description": "The properties of input data.",
"paths": [
"input/metadata/properties/default.json",
"input/metadata/properties/commodity_properties.json"
],
"type": "json",
"properties": "NA"
},
"feed_storage_configurations": {
"title": "Feed Management",
"description": "Configurations for feed storage units.",
Expand Down
10 changes: 0 additions & 10 deletions input/metadata/end_to_end_testing/open_lot_e2e_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -231,16 +231,6 @@
"type": "json",
"properties": "EEE_constants_properties"
},
"properties": {
"title": "Metadata Properties",
"description": "The properties of input data.",
"paths": [
"input/metadata/properties/default.json",
"input/metadata/properties/commodity_properties.json"
],
"type": "json",
"properties": "NA"
},
"feed_storage_configurations": {
"title": "Feed Management",
"description": "Configurations for feed storage units.",
Expand Down
10 changes: 1 addition & 9 deletions input/metadata/end_to_end_testing_tm_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@
"path": "input/data/tasks/end_to_end_testing_task.json",
"type": "json",
"properties": "tasks_properties"
},
"properties": {
"title": "Metadata Properties",
"description": "The properties of input data.",
"paths": ["input/metadata/properties/tasks_properties.json"],
"type": "json",
"properties": "NA"
}
},
"cross-validation": null
}
}
11 changes: 0 additions & 11 deletions input/metadata/example_freestall_dairy_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -224,16 +224,6 @@
"type": "json",
"properties": "EEE_constants_properties"
},
"properties": {
"title": "Metadata Properties",
"description": "The properties of input data.",
"paths": [
"input/metadata/properties/default.json",
"input/metadata/properties/commodity_properties.json"
],
"type": "json",
"properties": "NA"
},
"feed_storage_configurations": {
"title": "Feed Management",
"description": "Configurations for feed storage units.",
Expand All @@ -249,7 +239,6 @@
"properties": "feed_storage_instances"
}
},
"cross-validation": ["input/metadata/cross_validation/example_cross_validation.json"],
"runtime_metadata": {
"EEE_econ": {
"path": "input/metadata/EEE/econ_metadata.json"
Expand Down
11 changes: 0 additions & 11 deletions input/metadata/example_no_animal_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -224,16 +224,6 @@
"type": "json",
"properties": "EEE_constants_properties"
},
"properties": {
"title": "Metadata Properties",
"description": "The properties of input data.",
"paths": [
"input/metadata/properties/default.json",
"input/metadata/properties/commodity_properties.json"
],
"type": "json",
"properties": "NA"
},
"feed_management": {
"title": "Feed Management",
"description": "Configurations for feed storage units.",
Expand All @@ -256,7 +246,6 @@
"properties": "feed_storage_instances"
}
},
"cross-validation": [],
"runtime_metadata": {
"EEE_econ": {
"path": "input/metadata/EEE/econ_metadata.json"
Expand Down
Loading