From 63f563ac9c3df072866bd99d4dfeba045d9530a4 Mon Sep 17 00:00:00 2001 From: Allister Liu Date: Fri, 20 Mar 2026 07:21:48 +0800 Subject: [PATCH 01/11] Remove `properties` and `cross_validation` FilePaths from metadata --- RUFAS/input_manager.py | 26 ++++++------------- .../available_simulations_metadata.json | 7 ----- .../freestall_e2e_metadata.json | 10 ------- .../no_animal_e2e_metadata.json | 10 ------- .../open_lot_e2e_metadata.json | 10 ------- .../end_to_end_testing_tm_metadata.json | 7 ----- .../example_freestall_dairy_metadata.json | 10 ------- .../metadata/example_no_animal_metadata.json | 10 ------- input/metadata/example_open_lot_metadata.json | 10 ------- input/metadata/herd_init_metadata.json | 7 ----- ...update_end_to_end_testing_tm_metadata.json | 7 ----- input/task_manager_metadata.json | 7 ----- 12 files changed, 8 insertions(+), 113 deletions(-) diff --git a/RUFAS/input_manager.py b/RUFAS/input_manager.py index cddacb360a..98cb7589c0 100644 --- a/RUFAS/input_manager.py +++ b/RUFAS/input_manager.py @@ -47,10 +47,14 @@ "user_feeds", "tractor_dataset", "EEE_constants", - "properties", "feed_storage_configurations", "feed_storage_instances", } +PRROPERTIES_FILE_PATHS: dict[str, Path] = { + "default": Path("input/metadata/properties/default.json"), + "tasks_properties": Path("input/metadata/properties/tasks_properties.json"), + "commodity_properties": Path("input/metadata/properties/commodity_properties.json"), +} class InputManager: @@ -642,37 +646,23 @@ def _load_properties(self) -> None: "function": self._load_properties.__name__, } try: - properties_metadata = self.__metadata["files"]["properties"] - properties_paths = properties_metadata.get("paths") or properties_metadata.get("path") - - if isinstance(properties_paths, str): - properties_paths = [properties_paths] - if not isinstance(properties_paths, list) or len(properties_paths) == 0: - raise ValueError("Input Manager Error: Properties paths must be a non-empty string or list of strings") - - if not all(isinstance(path, str) and path for path in properties_paths): - raise ValueError("Input Manager Error: Each properties path must be a non-empty string") - self.om.add_log( "load_properties_attempt", - f"Attempting to load properties from {properties_paths}", + f"Attempting to load properties from {PRROPERTIES_FILE_PATHS.values()}", info_map, ) combined_properties: dict[str, Any] = {} - for properties_path_str in properties_paths: - properties_path = Path(properties_path_str) + for properties_path in PRROPERTIES_FILE_PATHS.values(): if not properties_path.exists(): raise FileNotFoundError(f"Input Manager Error: Properties file not found at {properties_path}") loaded_properties = self._load_data_from_json(properties_path) combined_properties.update(loaded_properties) - del self.__metadata["files"]["properties"] - self.__metadata["properties"] = combined_properties self.om.add_log( "load_properties_success", - f"Successfully loaded properties from {properties_paths}", + f"Successfully loaded properties from {PRROPERTIES_FILE_PATHS.values()}", info_map, ) diff --git a/input/metadata/available_simulations_metadata.json b/input/metadata/available_simulations_metadata.json index 9d6d35b3ae..056abcd340 100644 --- a/input/metadata/available_simulations_metadata.json +++ b/input/metadata/available_simulations_metadata.json @@ -6,13 +6,6 @@ "path": "input/data/tasks/available_simulation_tasks.json", "type": "json", "properties": "tasks_properties" - }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": ["input/metadata/properties/tasks_properties.json"], - "type": "json", - "properties": "NA" } }, "cross-validation": null diff --git a/input/metadata/end_to_end_testing/freestall_e2e_metadata.json b/input/metadata/end_to_end_testing/freestall_e2e_metadata.json index ec906b15a7..da04f16758 100644 --- a/input/metadata/end_to_end_testing/freestall_e2e_metadata.json +++ b/input/metadata/end_to_end_testing/freestall_e2e_metadata.json @@ -231,16 +231,6 @@ "type": "json", "properties": "EEE_constants_properties" }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": [ - "input/metadata/properties/default.json", - "input/metadata/properties/commodity_properties.json" - ], - "type": "json", - "properties": "NA" - }, "feed_storage_configurations": { "title": "Feed Management", "description": "Configurations for feed storage units.", diff --git a/input/metadata/end_to_end_testing/no_animal_e2e_metadata.json b/input/metadata/end_to_end_testing/no_animal_e2e_metadata.json index 95bf425328..e17b518619 100644 --- a/input/metadata/end_to_end_testing/no_animal_e2e_metadata.json +++ b/input/metadata/end_to_end_testing/no_animal_e2e_metadata.json @@ -231,16 +231,6 @@ "type": "json", "properties": "EEE_constants_properties" }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": [ - "input/metadata/properties/default.json", - "input/metadata/properties/commodity_properties.json" - ], - "type": "json", - "properties": "NA" - }, "feed_storage_configurations": { "title": "Feed Management", "description": "Configurations for feed storage units.", diff --git a/input/metadata/end_to_end_testing/open_lot_e2e_metadata.json b/input/metadata/end_to_end_testing/open_lot_e2e_metadata.json index 630c44fae0..745192a557 100644 --- a/input/metadata/end_to_end_testing/open_lot_e2e_metadata.json +++ b/input/metadata/end_to_end_testing/open_lot_e2e_metadata.json @@ -231,16 +231,6 @@ "type": "json", "properties": "EEE_constants_properties" }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": [ - "input/metadata/properties/default.json", - "input/metadata/properties/commodity_properties.json" - ], - "type": "json", - "properties": "NA" - }, "feed_storage_configurations": { "title": "Feed Management", "description": "Configurations for feed storage units.", diff --git a/input/metadata/end_to_end_testing_tm_metadata.json b/input/metadata/end_to_end_testing_tm_metadata.json index 6c0df55ae7..3278fae38d 100644 --- a/input/metadata/end_to_end_testing_tm_metadata.json +++ b/input/metadata/end_to_end_testing_tm_metadata.json @@ -6,13 +6,6 @@ "path": "input/data/tasks/end_to_end_testing_task.json", "type": "json", "properties": "tasks_properties" - }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": ["input/metadata/properties/tasks_properties.json"], - "type": "json", - "properties": "NA" } }, "cross-validation": null diff --git a/input/metadata/example_freestall_dairy_metadata.json b/input/metadata/example_freestall_dairy_metadata.json index 566dc24af6..9f2b7fb961 100644 --- a/input/metadata/example_freestall_dairy_metadata.json +++ b/input/metadata/example_freestall_dairy_metadata.json @@ -224,16 +224,6 @@ "type": "json", "properties": "EEE_constants_properties" }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": [ - "input/metadata/properties/default.json", - "input/metadata/properties/commodity_properties.json" - ], - "type": "json", - "properties": "NA" - }, "feed_storage_configurations": { "title": "Feed Management", "description": "Configurations for feed storage units.", diff --git a/input/metadata/example_no_animal_metadata.json b/input/metadata/example_no_animal_metadata.json index 014c99e455..4d6ffaf628 100644 --- a/input/metadata/example_no_animal_metadata.json +++ b/input/metadata/example_no_animal_metadata.json @@ -224,16 +224,6 @@ "type": "json", "properties": "EEE_constants_properties" }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": [ - "input/metadata/properties/default.json", - "input/metadata/properties/commodity_properties.json" - ], - "type": "json", - "properties": "NA" - }, "feed_management": { "title": "Feed Management", "description": "Configurations for feed storage units.", diff --git a/input/metadata/example_open_lot_metadata.json b/input/metadata/example_open_lot_metadata.json index 16ef55ba1f..2bd42a9299 100644 --- a/input/metadata/example_open_lot_metadata.json +++ b/input/metadata/example_open_lot_metadata.json @@ -224,16 +224,6 @@ "type": "json", "properties": "EEE_constants_properties" }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": [ - "input/metadata/properties/default.json", - "input/metadata/properties/commodity_properties.json" - ], - "type": "json", - "properties": "NA" - }, "feed_management": { "title": "Feed Management", "description": "Configurations for feed storage units.", diff --git a/input/metadata/herd_init_metadata.json b/input/metadata/herd_init_metadata.json index f10e035d88..061453edf6 100644 --- a/input/metadata/herd_init_metadata.json +++ b/input/metadata/herd_init_metadata.json @@ -6,13 +6,6 @@ "path": "input/data/tasks/herd_init_task.json", "type": "json", "properties": "tasks_properties" - }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": ["input/metadata/properties/tasks_properties.json"], - "type": "json", - "properties": "NA" } }, "cross-validation": null diff --git a/input/metadata/update_end_to_end_testing_tm_metadata.json b/input/metadata/update_end_to_end_testing_tm_metadata.json index 673f5a6e06..1213d5efb0 100644 --- a/input/metadata/update_end_to_end_testing_tm_metadata.json +++ b/input/metadata/update_end_to_end_testing_tm_metadata.json @@ -6,13 +6,6 @@ "path": "input/data/tasks/update_end_to_end_expected_results.json", "type": "json", "properties": "tasks_properties" - }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": ["input/metadata/properties/tasks_properties.json"], - "type": "json", - "properties": "NA" } }, "cross-validation": null diff --git a/input/task_manager_metadata.json b/input/task_manager_metadata.json index 54a2ad3a34..c2f7f3514c 100644 --- a/input/task_manager_metadata.json +++ b/input/task_manager_metadata.json @@ -6,13 +6,6 @@ "path": "input/data/tasks/example_freestall_task.json", "type": "json", "properties": "tasks_properties" - }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "path": "input/metadata/properties/tasks_properties.json", - "type": "json", - "properties": "NA" } }, "cross-validation": null From 953784d4edf8161a71d33f7a6fa90f33ae5f465e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 19 Mar 2026 23:24:00 +0000 Subject: [PATCH 02/11] Apply Black Formatting From 581c2392db09386f43e55ec28ce31a8dfd7be5c9 Mon Sep 17 00:00:00 2001 From: allisterakun Date: Thu, 19 Mar 2026 23:27:11 +0000 Subject: [PATCH 03/11] Update badges on README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0742e29fba..a35fc7c723 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![Flake8](https://img.shields.io/badge/Flake8-passed-brightgreen)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) -[![Pytest](https://img.shields.io/badge/Pytest-passed-brightgreen)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) -[![Coverage](https://img.shields.io/badge/Coverage-99%25-brightgreen)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) +[![Pytest](https://img.shields.io/badge/Pytest-failed-red)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) +[![Coverage](https://img.shields.io/badge/Coverage-%25-red)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) [![Mypy](https://img.shields.io/badge/Mypy-1213%20errors-red)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) From ec3dfa845d5d68f2313d2d99728a7a801a2a5c57 Mon Sep 17 00:00:00 2001 From: Allister Liu Date: Fri, 20 Mar 2026 14:06:20 +0800 Subject: [PATCH 04/11] move "cross_validation" and fix unit tests --- RUFAS/data_validator.py | 2 +- RUFAS/input_manager.py | 18 +++- RUFAS/task_manager.py | 14 ++- .../tasks/available_simulation_tasks.json | 10 +- input/data/tasks/end_to_end_testing_task.json | 13 ++- input/data/tasks/example_freestall_task.json | 5 +- input/data/tasks/example_no_animal_task.json | 3 +- input/data/tasks/example_open_lot_task.json | 5 +- input/data/tasks/single_run.json | 5 +- .../available_simulations_metadata.json | 3 +- .../end_to_end_testing_tm_metadata.json | 3 +- .../example_freestall_dairy_metadata.json | 1 - .../metadata/example_no_animal_metadata.json | 1 - input/metadata/example_open_lot_metadata.json | 1 - input/metadata/herd_init_metadata.json | 3 +- .../metadata/properties/tasks_properties.json | 11 +++ ...update_end_to_end_testing_tm_metadata.json | 3 +- input/task_manager_metadata.json | 3 +- tests/test_input_manager.py | 93 +++++++------------ tests/test_task_manager.py | 7 +- 20 files changed, 112 insertions(+), 92 deletions(-) diff --git a/RUFAS/data_validator.py b/RUFAS/data_validator.py index 4ec83a5dfd..b1716fb772 100644 --- a/RUFAS/data_validator.py +++ b/RUFAS/data_validator.py @@ -578,7 +578,7 @@ def _metadata_array_validator(self, key_path: list[str], value: dict[str, Any]) "function": DataValidator._metadata_array_validator.__name__, } required_array_property_keys = {"type", "properties"} - optional_array_property_keys = {"description", "minimum_length", "maximum_length", "nullable"} + optional_array_property_keys = {"description", "minimum_length", "maximum_length", "nullable", "default"} valid, message = self._validate_metadata_properties_keys( required_array_property_keys, optional_array_property_keys, value, key_path ) diff --git a/RUFAS/input_manager.py b/RUFAS/input_manager.py index 98cb7589c0..aa0ef8e34a 100644 --- a/RUFAS/input_manager.py +++ b/RUFAS/input_manager.py @@ -104,7 +104,12 @@ def pool(self, incoming_pool: dict[str, Any]) -> None: self.__pool = incoming_pool def start_data_processing( - self, metadata_path: Path, input_root: Path, task_id: Any, eager_termination: bool = True + self, + metadata_path: Path, + input_root: Path, + task_id: Any, + cross_validation_file_paths: list[str] | None, + eager_termination: bool = True ) -> bool: """ Starts the pipeline for organizing metadata and input data processing. @@ -117,6 +122,8 @@ def start_data_processing( Root directory for all input files. task_id : Any Task ID for the current process. + cross_validation_file_paths : list[str] | None + A list of file paths to cross-validation files. eager_termination : bool, default=True If True, the process will be terminated as soon as finding invalid data and failing to fix it. If False, the process will be terminated after going through and validating the entire data. @@ -149,16 +156,20 @@ def start_data_processing( self.om.route_logs(self.data_validator.event_logs) raise ValueError(message) is_input_data_valid = self._populate_pool(input_root, eager_termination) - is_input_data_valid = self._cross_validate_data(eager_termination) and is_input_data_valid + is_input_data_valid = ( + self._cross_validate_data(cross_validation_file_paths, eager_termination) and is_input_data_valid + ) self.om.route_logs(self.data_validator.event_logs) return is_input_data_valid - def _cross_validate_data(self, eager_termination: bool) -> bool: + def _cross_validate_data(self, cross_validation_file_paths: list[str] | None, eager_termination: bool) -> bool: """ Validates data against cross-validation rules and reports any failures. Parameters ---------- + cross_validation_file_paths : list[str] | None + A list of file paths to cross-validation rules. eager_termination : bool If True, the validation process stops after the first cross-validation failure. Otherwise, it continues validating all the rules. @@ -170,7 +181,6 @@ def _cross_validate_data(self, eager_termination: bool) -> bool: or more rules fail. """ failing_cross_validation_blocks: list[str] = [] - cross_validation_file_paths: list[str] | None = self.__metadata.get("cross-validation", None) cross_validation_rules = self._load_cross_validation(cross_validation_file_paths) if cross_validation_rules is not None and len(cross_validation_rules) > 0: for cross_validation_ruleset in cross_validation_rules: diff --git a/RUFAS/task_manager.py b/RUFAS/task_manager.py index 38e353fd12..5a3ef29204 100644 --- a/RUFAS/task_manager.py +++ b/RUFAS/task_manager.py @@ -146,7 +146,12 @@ def start( "function": TaskManager.start.__name__, } self.output_manager.add_log("Task Manager Start", "Task Manager Started.", info_map) - is_data_valid = self.input_manager.start_data_processing(metadata_path, Path(""), task_id="TASK MANAGER") + is_data_valid = self.input_manager.start_data_processing( + metadata_path=metadata_path, + input_root=Path(""), + task_id="TASK MANAGER", + cross_validation_file_paths=None + ) task_config: dict[str, Any] = self.input_manager.get_data("tasks") for task in task_config.get("tasks", []): filters_path = Path(task["filters_directory"]) @@ -768,8 +773,13 @@ def handle_input_data_audit( "units": MeasurementUnits.UNITLESS, } output_manager.add_log("Validation start", f"Validating data for {args['metadata_file_path']}...", info_map) + cross_validation_file_paths: list[str] | None = args.get("cross_validation_file_paths", None) is_data_valid = input_manager.start_data_processing( - Path(args["metadata_file_path"]), Path(args["input_root"]), args["task_id"], eager_termination + Path(args["metadata_file_path"]), + Path(args["input_root"]), + args["task_id"], + cross_validation_file_paths, + eager_termination ) output_manager.add_log( "Validation complete", f"{args['output_prefix']} validation status: {is_data_valid}", info_map diff --git a/input/data/tasks/available_simulation_tasks.json b/input/data/tasks/available_simulation_tasks.json index f1041b195b..f7669d582c 100644 --- a/input/data/tasks/available_simulation_tasks.json +++ b/input/data/tasks/available_simulation_tasks.json @@ -6,14 +6,20 @@ "metadata_file_path": "input/metadata/example_freestall_dairy_metadata.json", "output_prefix": "example", "log_verbosity": "errors", - "random_seed": 42 + "random_seed": 42, + "cross_validation_file_paths": [ + "input/metadata/cross_validation/example_cross_validation.json" + ] }, { "task_type": "SIMULATION_SINGLE_RUN", "metadata_file_path": "input/metadata/example_open_lot_metadata.json", "output_prefix": "example", "log_verbosity": "errors", - "random_seed": 42 + "random_seed": 42, + "cross_validation_file_paths": [ + "input/metadata/cross_validation/example_cross_validation.json" + ] } ] } \ No newline at end of file diff --git a/input/data/tasks/end_to_end_testing_task.json b/input/data/tasks/end_to_end_testing_task.json index 6aeebbee4d..27ba23818d 100644 --- a/input/data/tasks/end_to_end_testing_task.json +++ b/input/data/tasks/end_to_end_testing_task.json @@ -33,7 +33,10 @@ "csv_output_directory": "output/CSVs/.", "json_output_directory": "output/JSONs/.", "report_directory": "output/reports/.", - "graphics_directory": "output/graphics/." + "graphics_directory": "output/graphics/.", + "cross_validation_file_paths": [ + "input/metadata/cross_validation/example_cross_validation.json" + ] }, { "task_type": "END_TO_END_TESTING", @@ -64,7 +67,10 @@ "csv_output_directory": "output/CSVs/.", "json_output_directory": "output/JSONs/.", "report_directory": "output/reports/.", - "graphics_directory": "output/graphics/." + "graphics_directory": "output/graphics/.", + "cross_validation_file_paths": [ + "input/metadata/cross_validation/example_cross_validation.json" + ] }, { "task_type": "END_TO_END_TESTING", @@ -95,7 +101,8 @@ "csv_output_directory": "output/CSVs/.", "json_output_directory": "output/JSONs/.", "report_directory": "output/reports/.", - "graphics_directory": "output/graphics/." + "graphics_directory": "output/graphics/.", + "cross_validation_file_paths": [] } ] } \ No newline at end of file diff --git a/input/data/tasks/example_freestall_task.json b/input/data/tasks/example_freestall_task.json index 586dc0a653..e36254fe3a 100644 --- a/input/data/tasks/example_freestall_task.json +++ b/input/data/tasks/example_freestall_task.json @@ -7,7 +7,10 @@ "output_prefix": "freestall", "log_verbosity": "errors", "random_seed": 42, - "exclude_info_maps": false + "exclude_info_maps": false, + "cross_validation_file_paths": [ + "input/metadata/cross_validation/example_cross_validation.json" + ] } ] } diff --git a/input/data/tasks/example_no_animal_task.json b/input/data/tasks/example_no_animal_task.json index f609c76a9e..91e2638aad 100644 --- a/input/data/tasks/example_no_animal_task.json +++ b/input/data/tasks/example_no_animal_task.json @@ -33,7 +33,8 @@ "csv_output_directory": "output/CSVs/.", "json_output_directory": "output/JSONs/.", "report_directory": "output/reports/.", - "graphics_directory": "output/graphics/." + "graphics_directory": "output/graphics/.", + "cross_validation_file_paths": [] } ] } \ No newline at end of file diff --git a/input/data/tasks/example_open_lot_task.json b/input/data/tasks/example_open_lot_task.json index 920fc49de3..c8e9b8e5ef 100644 --- a/input/data/tasks/example_open_lot_task.json +++ b/input/data/tasks/example_open_lot_task.json @@ -6,7 +6,10 @@ "metadata_file_path": "input/metadata/example_open_lot_metadata.json", "output_prefix": "open_lot", "log_verbosity": "errors", - "random_seed": 42 + "random_seed": 42, + "cross_validation_file_paths": [ + "input/metadata/cross_validation/example_cross_validation.json" + ] } ] } \ No newline at end of file diff --git a/input/data/tasks/single_run.json b/input/data/tasks/single_run.json index bb57ae2003..b6643ddf42 100644 --- a/input/data/tasks/single_run.json +++ b/input/data/tasks/single_run.json @@ -6,7 +6,10 @@ "metadata_file_path": "input/metadata/example_freestall_dairy_metadata.json", "output_prefix": "Task 2", "log_verbosity": "warnings", - "random_seed": 42 + "random_seed": 42, + "cross_validation_file_paths": [ + "input/metadata/cross_validation/example_cross_validation.json" + ] } ] } diff --git a/input/metadata/available_simulations_metadata.json b/input/metadata/available_simulations_metadata.json index 056abcd340..5046a90bed 100644 --- a/input/metadata/available_simulations_metadata.json +++ b/input/metadata/available_simulations_metadata.json @@ -7,6 +7,5 @@ "type": "json", "properties": "tasks_properties" } - }, - "cross-validation": null + } } diff --git a/input/metadata/end_to_end_testing_tm_metadata.json b/input/metadata/end_to_end_testing_tm_metadata.json index 3278fae38d..497d2c3425 100644 --- a/input/metadata/end_to_end_testing_tm_metadata.json +++ b/input/metadata/end_to_end_testing_tm_metadata.json @@ -7,6 +7,5 @@ "type": "json", "properties": "tasks_properties" } - }, - "cross-validation": null + } } diff --git a/input/metadata/example_freestall_dairy_metadata.json b/input/metadata/example_freestall_dairy_metadata.json index 9f2b7fb961..3498b9c9de 100644 --- a/input/metadata/example_freestall_dairy_metadata.json +++ b/input/metadata/example_freestall_dairy_metadata.json @@ -239,7 +239,6 @@ "properties": "feed_storage_instances" } }, - "cross-validation": ["input/metadata/cross_validation/example_cross_validation.json"], "runtime_metadata": { "EEE_econ": { "path": "input/metadata/EEE/econ_metadata.json" diff --git a/input/metadata/example_no_animal_metadata.json b/input/metadata/example_no_animal_metadata.json index 4d6ffaf628..d62fc9048c 100644 --- a/input/metadata/example_no_animal_metadata.json +++ b/input/metadata/example_no_animal_metadata.json @@ -246,7 +246,6 @@ "properties": "feed_storage_instances" } }, - "cross-validation": [], "runtime_metadata": { "EEE_econ": { "path": "input/metadata/EEE/econ_metadata.json" diff --git a/input/metadata/example_open_lot_metadata.json b/input/metadata/example_open_lot_metadata.json index 2bd42a9299..9d62c9c405 100644 --- a/input/metadata/example_open_lot_metadata.json +++ b/input/metadata/example_open_lot_metadata.json @@ -246,7 +246,6 @@ "properties": "feed_storage_instances" } }, - "cross-validation": ["input/metadata/cross_validation/example_cross_validation.json"], "runtime_metadata": { "EEE_econ": { "path": "input/metadata/EEE/econ_metadata.json" diff --git a/input/metadata/herd_init_metadata.json b/input/metadata/herd_init_metadata.json index 061453edf6..31c495da56 100644 --- a/input/metadata/herd_init_metadata.json +++ b/input/metadata/herd_init_metadata.json @@ -7,6 +7,5 @@ "type": "json", "properties": "tasks_properties" } - }, - "cross-validation": null + } } diff --git a/input/metadata/properties/tasks_properties.json b/input/metadata/properties/tasks_properties.json index edfd01bce9..660576f454 100644 --- a/input/metadata/properties/tasks_properties.json +++ b/input/metadata/properties/tasks_properties.json @@ -273,6 +273,17 @@ "minimum": 0.01, "maximum": 1, "default": 1 + }, + "cross_validation_file_paths": { + "type": "array", + "description": "Collection of file paths to the cross validation rule JSON files.", + "properties": { + "type": "string", + "pattern": "^(?:[a-zA-Z]:[\\\\/]|/)?(?:[a-zA-Z0-9._\\-\\s]+[\\\\/])*(?:[a-zA-Z0-9._\\-\\s]+\\.json)$", + "description": "The path to the cross validation rule JSON files." + }, + "nullable": true, + "default": null } } } diff --git a/input/metadata/update_end_to_end_testing_tm_metadata.json b/input/metadata/update_end_to_end_testing_tm_metadata.json index 1213d5efb0..4d36414fac 100644 --- a/input/metadata/update_end_to_end_testing_tm_metadata.json +++ b/input/metadata/update_end_to_end_testing_tm_metadata.json @@ -7,6 +7,5 @@ "type": "json", "properties": "tasks_properties" } - }, - "cross-validation": null + } } diff --git a/input/task_manager_metadata.json b/input/task_manager_metadata.json index c2f7f3514c..5674a5fcdc 100644 --- a/input/task_manager_metadata.json +++ b/input/task_manager_metadata.json @@ -7,6 +7,5 @@ "type": "json", "properties": "tasks_properties" } - }, - "cross-validation": null + } } diff --git a/tests/test_input_manager.py b/tests/test_input_manager.py index 115f834da0..5464e47f0f 100644 --- a/tests/test_input_manager.py +++ b/tests/test_input_manager.py @@ -162,24 +162,16 @@ def test_load_properties_combines_multiple_files(mock_input_manager: InputManage mocker.patch.object(Path, "exists", return_value=True) first_properties = {"key1": "value1"} second_properties = {"key2": "value2"} + third_properties = {"key3": "value3"} mocker.patch( "RUFAS.input_manager.InputManager._load_data_from_json", - side_effect=[first_properties, second_properties], + side_effect=[first_properties, second_properties, third_properties], ) setattr( mock_input_manager, "_InputManager__metadata", - { - "files": { - "properties": { - "paths": [ - "path/to/properties.json", - "path/to/commodity_properties.json", - ] - } - } - }, + {}, ) mock_input_manager._load_properties() @@ -188,6 +180,7 @@ def test_load_properties_combines_multiple_files(mock_input_manager: InputManage assert metadata["properties"] == { "key1": "value1", "key2": "value2", + "key3": "value3", } @@ -198,22 +191,11 @@ def test_load_properties_overlapping_keys_last_file_wins( mocker.patch.object(Path, "exists", return_value=True) first_properties = {"key1": "value1", "shared": "original"} - second_properties = {"shared": "updated"} + second_properties = {"shared": "intermediate"} + third_properties = {"shared": "updated"} load_json = mocker.patch( "RUFAS.input_manager.InputManager._load_data_from_json", - side_effect=[first_properties, second_properties], - ) - - setattr( - mock_input_manager, - "_InputManager__metadata", - { - "files": { - "properties": { - "paths": ["path/to/properties.json", "path/to/commodity_properties.json"], - } - } - }, + side_effect=[first_properties, second_properties, third_properties], ) mock_input_manager._load_properties() @@ -223,39 +205,7 @@ def test_load_properties_overlapping_keys_last_file_wins( "key1": "value1", "shared": "updated", } - assert "properties" not in metadata["files"] - assert load_json.call_count == 2 - - -def test_load_properties_empty_paths_list_raises_value_error( - mock_input_manager: InputManager, mocker: MockerFixture -) -> None: - mocker.patch.object(Path, "exists", return_value=True) - setattr( - mock_input_manager, - "_InputManager__metadata", - {"files": {"properties": {"paths": []}}}, - ) - - with patch("RUFAS.output_manager.OutputManager.add_error") as add_error: - with pytest.raises(ValueError): - mock_input_manager._load_properties() - add_error.assert_called_once() - - -def test_load_properties_rejects_non_string_paths(mock_input_manager: InputManager, mocker: MockerFixture) -> None: - """Tests the_load_properties on invalid non string paths.""" - mocker.patch.object(Path, "exists", return_value=True) - setattr( - mock_input_manager, - "_InputManager__metadata", - {"files": {"properties": {"paths": ["valid/path.json", 123]}}}, - ) - - with patch("RUFAS.output_manager.OutputManager.add_error") as add_error: - with pytest.raises(ValueError): - mock_input_manager._load_properties() - add_error.assert_called_once() + assert load_json.call_count == 3 def test_load_properties_missing_second_file_triggers_error( @@ -466,7 +416,11 @@ def test_start_data_processing( mock_input_manager.data_validator.event_logs.clear() result = mock_input_manager.start_data_processing( - metadata_path=Path("mock/metadata/path"), input_root=Path(""), task_id="1", eager_termination=eager_termination + metadata_path=Path("mock/metadata/path"), + input_root=Path(""), + task_id="1", + cross_validation_file_paths=[], + eager_termination=eager_termination ) assert result is expected_return @@ -571,7 +525,10 @@ def test_cross_validate_data( cv_call = mocker.patch.object(cv_mock, "cross_validate_data", side_effect=side_effect) mock_input_manager.data_validator.event_logs.clear() - result = mock_input_manager._cross_validate_data(eager_termination=eager_termination) + result = mock_input_manager._cross_validate_data( + cross_validation_file_paths=["dummy_path_1", "dummy_path_2"], + eager_termination=eager_termination + ) assert result is expected_return @@ -606,7 +563,13 @@ def test_start_data_processing_invalid_metadata_raises(mock_input_manager: Input setattr(mock_input_manager, "_InputManager__metadata", {"files": {}, "cross-validation": []}) with pytest.raises(ValueError, match="bad meta"): - mock_input_manager.start_data_processing(Path("meta"), Path(""), task_id="1", eager_termination=True) + mock_input_manager.start_data_processing( + Path("meta"), + Path(""), + task_id="1", + cross_validation_file_paths=[], + eager_termination=True + ) mock_load_props.assert_not_called() mock_validate_props.assert_not_called() @@ -630,7 +593,13 @@ def test_start_data_processing_invalid_properties_routes_logs_and_raises( route_logs = mocker.patch.object(mock_input_manager.om, "route_logs") with pytest.raises(ValueError, match="bad props"): - mock_input_manager.start_data_processing(Path("meta"), Path(""), task_id="1", eager_termination=False) + mock_input_manager.start_data_processing( + Path("meta"), + Path(""), + task_id="1", + cross_validation_file_paths=[], + eager_termination=False + ) route_logs.assert_called_once_with(mock_input_manager.data_validator.event_logs) diff --git a/tests/test_task_manager.py b/tests/test_task_manager.py index 24eeec6edb..a09b8457d2 100644 --- a/tests/test_task_manager.py +++ b/tests/test_task_manager.py @@ -179,7 +179,12 @@ def test_task_manager_start( ] mock_add_log.assert_has_calls(expected_add_log_calls) - mock_start_data.assert_called_once_with(Path("metadata/path"), Path(""), task_id="TASK MANAGER") + mock_start_data.assert_called_once_with( + metadata_path=Path("metadata/path"), + input_root=Path(""), + task_id="TASK MANAGER", + cross_validation_file_paths=None + ) mock_get_data.assert_called_once_with("tasks") mock_parse_input_tasks.assert_called_once() mock_expand_multi_runs_to_single_runs.assert_called_once() From 8dea66487c03c330c946a7c62757ee53fa960b93 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 20 Mar 2026 06:08:20 +0000 Subject: [PATCH 05/11] Apply Black Formatting --- RUFAS/input_manager.py | 4 ++-- RUFAS/task_manager.py | 7 ++----- tests/test_input_manager.py | 17 ++++------------- tests/test_task_manager.py | 2 +- 4 files changed, 9 insertions(+), 21 deletions(-) diff --git a/RUFAS/input_manager.py b/RUFAS/input_manager.py index aa0ef8e34a..361f5e0466 100644 --- a/RUFAS/input_manager.py +++ b/RUFAS/input_manager.py @@ -109,7 +109,7 @@ def start_data_processing( input_root: Path, task_id: Any, cross_validation_file_paths: list[str] | None, - eager_termination: bool = True + eager_termination: bool = True, ) -> bool: """ Starts the pipeline for organizing metadata and input data processing. @@ -157,7 +157,7 @@ def start_data_processing( raise ValueError(message) is_input_data_valid = self._populate_pool(input_root, eager_termination) is_input_data_valid = ( - self._cross_validate_data(cross_validation_file_paths, eager_termination) and is_input_data_valid + self._cross_validate_data(cross_validation_file_paths, eager_termination) and is_input_data_valid ) self.om.route_logs(self.data_validator.event_logs) return is_input_data_valid diff --git a/RUFAS/task_manager.py b/RUFAS/task_manager.py index 5a3ef29204..40592070f2 100644 --- a/RUFAS/task_manager.py +++ b/RUFAS/task_manager.py @@ -147,10 +147,7 @@ def start( } self.output_manager.add_log("Task Manager Start", "Task Manager Started.", info_map) is_data_valid = self.input_manager.start_data_processing( - metadata_path=metadata_path, - input_root=Path(""), - task_id="TASK MANAGER", - cross_validation_file_paths=None + metadata_path=metadata_path, input_root=Path(""), task_id="TASK MANAGER", cross_validation_file_paths=None ) task_config: dict[str, Any] = self.input_manager.get_data("tasks") for task in task_config.get("tasks", []): @@ -779,7 +776,7 @@ def handle_input_data_audit( Path(args["input_root"]), args["task_id"], cross_validation_file_paths, - eager_termination + eager_termination, ) output_manager.add_log( "Validation complete", f"{args['output_prefix']} validation status: {is_data_valid}", info_map diff --git a/tests/test_input_manager.py b/tests/test_input_manager.py index 5464e47f0f..7613221351 100644 --- a/tests/test_input_manager.py +++ b/tests/test_input_manager.py @@ -420,7 +420,7 @@ def test_start_data_processing( input_root=Path(""), task_id="1", cross_validation_file_paths=[], - eager_termination=eager_termination + eager_termination=eager_termination, ) assert result is expected_return @@ -526,8 +526,7 @@ def test_cross_validate_data( mock_input_manager.data_validator.event_logs.clear() result = mock_input_manager._cross_validate_data( - cross_validation_file_paths=["dummy_path_1", "dummy_path_2"], - eager_termination=eager_termination + cross_validation_file_paths=["dummy_path_1", "dummy_path_2"], eager_termination=eager_termination ) assert result is expected_return @@ -564,11 +563,7 @@ def test_start_data_processing_invalid_metadata_raises(mock_input_manager: Input with pytest.raises(ValueError, match="bad meta"): mock_input_manager.start_data_processing( - Path("meta"), - Path(""), - task_id="1", - cross_validation_file_paths=[], - eager_termination=True + Path("meta"), Path(""), task_id="1", cross_validation_file_paths=[], eager_termination=True ) mock_load_props.assert_not_called() @@ -594,11 +589,7 @@ def test_start_data_processing_invalid_properties_routes_logs_and_raises( with pytest.raises(ValueError, match="bad props"): mock_input_manager.start_data_processing( - Path("meta"), - Path(""), - task_id="1", - cross_validation_file_paths=[], - eager_termination=False + Path("meta"), Path(""), task_id="1", cross_validation_file_paths=[], eager_termination=False ) route_logs.assert_called_once_with(mock_input_manager.data_validator.event_logs) diff --git a/tests/test_task_manager.py b/tests/test_task_manager.py index a09b8457d2..f3786422f7 100644 --- a/tests/test_task_manager.py +++ b/tests/test_task_manager.py @@ -183,7 +183,7 @@ def test_task_manager_start( metadata_path=Path("metadata/path"), input_root=Path(""), task_id="TASK MANAGER", - cross_validation_file_paths=None + cross_validation_file_paths=None, ) mock_get_data.assert_called_once_with("tasks") mock_parse_input_tasks.assert_called_once() From 6d4dbb74e91b37353781dbcf1f119c6641f69fc4 Mon Sep 17 00:00:00 2001 From: allisterakun Date: Fri, 20 Mar 2026 06:11:46 +0000 Subject: [PATCH 06/11] Update badges on README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a35fc7c723..0742e29fba 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![Flake8](https://img.shields.io/badge/Flake8-passed-brightgreen)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) -[![Pytest](https://img.shields.io/badge/Pytest-failed-red)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) -[![Coverage](https://img.shields.io/badge/Coverage-%25-red)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) +[![Pytest](https://img.shields.io/badge/Pytest-passed-brightgreen)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) +[![Coverage](https://img.shields.io/badge/Coverage-99%25-brightgreen)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) [![Mypy](https://img.shields.io/badge/Mypy-1213%20errors-red)](https://github.com/RuminantFarmSystems/MASM/actions/workflows/combined_format_lint_test_mypy.yml) From 1dd81eda52291f96621302cd26138076325b3e4e Mon Sep 17 00:00:00 2001 From: Allister Liu <39185942+allisterakun@users.noreply.github.com> Date: Tue, 24 Mar 2026 10:28:55 -0400 Subject: [PATCH 07/11] Update input/metadata/properties/tasks_properties.json Co-authored-by: Niko <70217952+ew3361zh@users.noreply.github.com> --- input/metadata/properties/tasks_properties.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/input/metadata/properties/tasks_properties.json b/input/metadata/properties/tasks_properties.json index 660576f454..ce0a98cf0b 100644 --- a/input/metadata/properties/tasks_properties.json +++ b/input/metadata/properties/tasks_properties.json @@ -276,7 +276,7 @@ }, "cross_validation_file_paths": { "type": "array", - "description": "Collection of file paths to the cross validation rule JSON files.", + "description": "List the of file path(s) to the cross validation rule JSON file(s).", "properties": { "type": "string", "pattern": "^(?:[a-zA-Z]:[\\\\/]|/)?(?:[a-zA-Z0-9._\\-\\s]+[\\\\/])*(?:[a-zA-Z0-9._\\-\\s]+\\.json)$", From 40add3879a4b7f5d7816f64c0ae5d61ecf8cc671 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 24 Mar 2026 14:30:45 +0000 Subject: [PATCH 08/11] Apply Black Formatting From 64886e5e4e9eefe54474cc92d038e6b6f55ea1a9 Mon Sep 17 00:00:00 2001 From: Allister Liu Date: Wed, 25 Mar 2026 00:47:12 +0800 Subject: [PATCH 09/11] Update changelog.md --- changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/changelog.md b/changelog.md index a2ec4b10fd..8cfb10af41 100644 --- a/changelog.md +++ b/changelog.md @@ -49,6 +49,7 @@ v1.0.0 - [2863](https://github.com/RuminantFarmSystems/MASM/pull/2863) - [minor change] [NoInputChange] [NoOutputChange] Updates TaskManager to avoid using multiprocessing when running single tasks. - [2854](https://github.com/RuminantFarmSystems/MASM/pull/2854) - [minor change] [NoInputChange] [NoOutputChange] Update `emissions.py` filtering process and remove `use_filter_key_name` option in the OM filter. - [2872](https://github.com/RuminantFarmSystems/RuFaS/pull/2872) - [minor change] [NoInputChange] [NoOutputChange] Adds information and links for onboarding videos. +- [2869](https://github.com/RuminantFarmSystems/RuFaS/pull/2869) - [minor change] [InputChange] [NoOutputChange] Removes `properties` and `cross-validation` file path definitions from metadata JSON files. Properties file paths are now defined as a module-level constant in `input_manager.py`, and cross-validation file paths are moved to the task configuration JSON files. ### v1.0.0 From a7cbe1dd435f0c09938e83190ee9468ebbfe91a3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 24 Mar 2026 16:48:58 +0000 Subject: [PATCH 10/11] Apply Black Formatting From e62698293b1150630952a6bc30190adb4f7451ab Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 24 Mar 2026 17:32:50 +0000 Subject: [PATCH 11/11] Apply Black Formatting