diff --git a/RUFAS/data_validator.py b/RUFAS/data_validator.py index 4ec83a5dfd..b1716fb772 100644 --- a/RUFAS/data_validator.py +++ b/RUFAS/data_validator.py @@ -578,7 +578,7 @@ def _metadata_array_validator(self, key_path: list[str], value: dict[str, Any]) "function": DataValidator._metadata_array_validator.__name__, } required_array_property_keys = {"type", "properties"} - optional_array_property_keys = {"description", "minimum_length", "maximum_length", "nullable"} + optional_array_property_keys = {"description", "minimum_length", "maximum_length", "nullable", "default"} valid, message = self._validate_metadata_properties_keys( required_array_property_keys, optional_array_property_keys, value, key_path ) diff --git a/RUFAS/input_manager.py b/RUFAS/input_manager.py index cddacb360a..361f5e0466 100644 --- a/RUFAS/input_manager.py +++ b/RUFAS/input_manager.py @@ -47,10 +47,14 @@ "user_feeds", "tractor_dataset", "EEE_constants", - "properties", "feed_storage_configurations", "feed_storage_instances", } +PRROPERTIES_FILE_PATHS: dict[str, Path] = { + "default": Path("input/metadata/properties/default.json"), + "tasks_properties": Path("input/metadata/properties/tasks_properties.json"), + "commodity_properties": Path("input/metadata/properties/commodity_properties.json"), +} class InputManager: @@ -100,7 +104,12 @@ def pool(self, incoming_pool: dict[str, Any]) -> None: self.__pool = incoming_pool def start_data_processing( - self, metadata_path: Path, input_root: Path, task_id: Any, eager_termination: bool = True + self, + metadata_path: Path, + input_root: Path, + task_id: Any, + cross_validation_file_paths: list[str] | None, + eager_termination: bool = True, ) -> bool: """ Starts the pipeline for organizing metadata and input data processing. @@ -113,6 +122,8 @@ def start_data_processing( Root directory for all input files. task_id : Any Task ID for the current process. + cross_validation_file_paths : list[str] | None + A list of file paths to cross-validation files. eager_termination : bool, default=True If True, the process will be terminated as soon as finding invalid data and failing to fix it. If False, the process will be terminated after going through and validating the entire data. @@ -145,16 +156,20 @@ def start_data_processing( self.om.route_logs(self.data_validator.event_logs) raise ValueError(message) is_input_data_valid = self._populate_pool(input_root, eager_termination) - is_input_data_valid = self._cross_validate_data(eager_termination) and is_input_data_valid + is_input_data_valid = ( + self._cross_validate_data(cross_validation_file_paths, eager_termination) and is_input_data_valid + ) self.om.route_logs(self.data_validator.event_logs) return is_input_data_valid - def _cross_validate_data(self, eager_termination: bool) -> bool: + def _cross_validate_data(self, cross_validation_file_paths: list[str] | None, eager_termination: bool) -> bool: """ Validates data against cross-validation rules and reports any failures. Parameters ---------- + cross_validation_file_paths : list[str] | None + A list of file paths to cross-validation rules. eager_termination : bool If True, the validation process stops after the first cross-validation failure. Otherwise, it continues validating all the rules. @@ -166,7 +181,6 @@ def _cross_validate_data(self, eager_termination: bool) -> bool: or more rules fail. """ failing_cross_validation_blocks: list[str] = [] - cross_validation_file_paths: list[str] | None = self.__metadata.get("cross-validation", None) cross_validation_rules = self._load_cross_validation(cross_validation_file_paths) if cross_validation_rules is not None and len(cross_validation_rules) > 0: for cross_validation_ruleset in cross_validation_rules: @@ -642,37 +656,23 @@ def _load_properties(self) -> None: "function": self._load_properties.__name__, } try: - properties_metadata = self.__metadata["files"]["properties"] - properties_paths = properties_metadata.get("paths") or properties_metadata.get("path") - - if isinstance(properties_paths, str): - properties_paths = [properties_paths] - if not isinstance(properties_paths, list) or len(properties_paths) == 0: - raise ValueError("Input Manager Error: Properties paths must be a non-empty string or list of strings") - - if not all(isinstance(path, str) and path for path in properties_paths): - raise ValueError("Input Manager Error: Each properties path must be a non-empty string") - self.om.add_log( "load_properties_attempt", - f"Attempting to load properties from {properties_paths}", + f"Attempting to load properties from {PRROPERTIES_FILE_PATHS.values()}", info_map, ) combined_properties: dict[str, Any] = {} - for properties_path_str in properties_paths: - properties_path = Path(properties_path_str) + for properties_path in PRROPERTIES_FILE_PATHS.values(): if not properties_path.exists(): raise FileNotFoundError(f"Input Manager Error: Properties file not found at {properties_path}") loaded_properties = self._load_data_from_json(properties_path) combined_properties.update(loaded_properties) - del self.__metadata["files"]["properties"] - self.__metadata["properties"] = combined_properties self.om.add_log( "load_properties_success", - f"Successfully loaded properties from {properties_paths}", + f"Successfully loaded properties from {PRROPERTIES_FILE_PATHS.values()}", info_map, ) diff --git a/RUFAS/task_manager.py b/RUFAS/task_manager.py index 38e353fd12..40592070f2 100644 --- a/RUFAS/task_manager.py +++ b/RUFAS/task_manager.py @@ -146,7 +146,9 @@ def start( "function": TaskManager.start.__name__, } self.output_manager.add_log("Task Manager Start", "Task Manager Started.", info_map) - is_data_valid = self.input_manager.start_data_processing(metadata_path, Path(""), task_id="TASK MANAGER") + is_data_valid = self.input_manager.start_data_processing( + metadata_path=metadata_path, input_root=Path(""), task_id="TASK MANAGER", cross_validation_file_paths=None + ) task_config: dict[str, Any] = self.input_manager.get_data("tasks") for task in task_config.get("tasks", []): filters_path = Path(task["filters_directory"]) @@ -768,8 +770,13 @@ def handle_input_data_audit( "units": MeasurementUnits.UNITLESS, } output_manager.add_log("Validation start", f"Validating data for {args['metadata_file_path']}...", info_map) + cross_validation_file_paths: list[str] | None = args.get("cross_validation_file_paths", None) is_data_valid = input_manager.start_data_processing( - Path(args["metadata_file_path"]), Path(args["input_root"]), args["task_id"], eager_termination + Path(args["metadata_file_path"]), + Path(args["input_root"]), + args["task_id"], + cross_validation_file_paths, + eager_termination, ) output_manager.add_log( "Validation complete", f"{args['output_prefix']} validation status: {is_data_valid}", info_map diff --git a/changelog.md b/changelog.md index fc0fd47919..ca144ea1f0 100644 --- a/changelog.md +++ b/changelog.md @@ -50,6 +50,7 @@ v1.0.0 - [2854](https://github.com/RuminantFarmSystems/MASM/pull/2854) - [minor change] [NoInputChange] [NoOutputChange] Update `emissions.py` filtering process and remove `use_filter_key_name` option in the OM filter. - [2872](https://github.com/RuminantFarmSystems/RuFaS/pull/2872) - [minor change] [NoInputChange] [NoOutputChange] Adds information and links for onboarding videos. - [2850](https://github.com/RuminantFarmSystems/MASM/pull/2850) - [minor change] [NoInputChange] [NoOutputChange] Refactor `Pen.get_manure_stream()`. +- [2869](https://github.com/RuminantFarmSystems/RuFaS/pull/2869) - [minor change] [InputChange] [NoOutputChange] Removes `properties` and `cross-validation` file path definitions from metadata JSON files. Properties file paths are now defined as a module-level constant in `input_manager.py`, and cross-validation file paths are moved to the task configuration JSON files. ### v1.0.0 diff --git a/input/data/tasks/available_simulation_tasks.json b/input/data/tasks/available_simulation_tasks.json index f1041b195b..f7669d582c 100644 --- a/input/data/tasks/available_simulation_tasks.json +++ b/input/data/tasks/available_simulation_tasks.json @@ -6,14 +6,20 @@ "metadata_file_path": "input/metadata/example_freestall_dairy_metadata.json", "output_prefix": "example", "log_verbosity": "errors", - "random_seed": 42 + "random_seed": 42, + "cross_validation_file_paths": [ + "input/metadata/cross_validation/example_cross_validation.json" + ] }, { "task_type": "SIMULATION_SINGLE_RUN", "metadata_file_path": "input/metadata/example_open_lot_metadata.json", "output_prefix": "example", "log_verbosity": "errors", - "random_seed": 42 + "random_seed": 42, + "cross_validation_file_paths": [ + "input/metadata/cross_validation/example_cross_validation.json" + ] } ] } \ No newline at end of file diff --git a/input/data/tasks/end_to_end_testing_task.json b/input/data/tasks/end_to_end_testing_task.json index 6aeebbee4d..27ba23818d 100644 --- a/input/data/tasks/end_to_end_testing_task.json +++ b/input/data/tasks/end_to_end_testing_task.json @@ -33,7 +33,10 @@ "csv_output_directory": "output/CSVs/.", "json_output_directory": "output/JSONs/.", "report_directory": "output/reports/.", - "graphics_directory": "output/graphics/." + "graphics_directory": "output/graphics/.", + "cross_validation_file_paths": [ + "input/metadata/cross_validation/example_cross_validation.json" + ] }, { "task_type": "END_TO_END_TESTING", @@ -64,7 +67,10 @@ "csv_output_directory": "output/CSVs/.", "json_output_directory": "output/JSONs/.", "report_directory": "output/reports/.", - "graphics_directory": "output/graphics/." + "graphics_directory": "output/graphics/.", + "cross_validation_file_paths": [ + "input/metadata/cross_validation/example_cross_validation.json" + ] }, { "task_type": "END_TO_END_TESTING", @@ -95,7 +101,8 @@ "csv_output_directory": "output/CSVs/.", "json_output_directory": "output/JSONs/.", "report_directory": "output/reports/.", - "graphics_directory": "output/graphics/." + "graphics_directory": "output/graphics/.", + "cross_validation_file_paths": [] } ] } \ No newline at end of file diff --git a/input/data/tasks/example_freestall_task.json b/input/data/tasks/example_freestall_task.json index 586dc0a653..e36254fe3a 100644 --- a/input/data/tasks/example_freestall_task.json +++ b/input/data/tasks/example_freestall_task.json @@ -7,7 +7,10 @@ "output_prefix": "freestall", "log_verbosity": "errors", "random_seed": 42, - "exclude_info_maps": false + "exclude_info_maps": false, + "cross_validation_file_paths": [ + "input/metadata/cross_validation/example_cross_validation.json" + ] } ] } diff --git a/input/data/tasks/example_no_animal_task.json b/input/data/tasks/example_no_animal_task.json index f609c76a9e..91e2638aad 100644 --- a/input/data/tasks/example_no_animal_task.json +++ b/input/data/tasks/example_no_animal_task.json @@ -33,7 +33,8 @@ "csv_output_directory": "output/CSVs/.", "json_output_directory": "output/JSONs/.", "report_directory": "output/reports/.", - "graphics_directory": "output/graphics/." + "graphics_directory": "output/graphics/.", + "cross_validation_file_paths": [] } ] } \ No newline at end of file diff --git a/input/data/tasks/example_open_lot_task.json b/input/data/tasks/example_open_lot_task.json index 920fc49de3..c8e9b8e5ef 100644 --- a/input/data/tasks/example_open_lot_task.json +++ b/input/data/tasks/example_open_lot_task.json @@ -6,7 +6,10 @@ "metadata_file_path": "input/metadata/example_open_lot_metadata.json", "output_prefix": "open_lot", "log_verbosity": "errors", - "random_seed": 42 + "random_seed": 42, + "cross_validation_file_paths": [ + "input/metadata/cross_validation/example_cross_validation.json" + ] } ] } \ No newline at end of file diff --git a/input/data/tasks/single_run.json b/input/data/tasks/single_run.json index bb57ae2003..b6643ddf42 100644 --- a/input/data/tasks/single_run.json +++ b/input/data/tasks/single_run.json @@ -6,7 +6,10 @@ "metadata_file_path": "input/metadata/example_freestall_dairy_metadata.json", "output_prefix": "Task 2", "log_verbosity": "warnings", - "random_seed": 42 + "random_seed": 42, + "cross_validation_file_paths": [ + "input/metadata/cross_validation/example_cross_validation.json" + ] } ] } diff --git a/input/metadata/available_simulations_metadata.json b/input/metadata/available_simulations_metadata.json index 9d6d35b3ae..5046a90bed 100644 --- a/input/metadata/available_simulations_metadata.json +++ b/input/metadata/available_simulations_metadata.json @@ -6,14 +6,6 @@ "path": "input/data/tasks/available_simulation_tasks.json", "type": "json", "properties": "tasks_properties" - }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": ["input/metadata/properties/tasks_properties.json"], - "type": "json", - "properties": "NA" } - }, - "cross-validation": null + } } diff --git a/input/metadata/end_to_end_testing/freestall_e2e_metadata.json b/input/metadata/end_to_end_testing/freestall_e2e_metadata.json index ec906b15a7..da04f16758 100644 --- a/input/metadata/end_to_end_testing/freestall_e2e_metadata.json +++ b/input/metadata/end_to_end_testing/freestall_e2e_metadata.json @@ -231,16 +231,6 @@ "type": "json", "properties": "EEE_constants_properties" }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": [ - "input/metadata/properties/default.json", - "input/metadata/properties/commodity_properties.json" - ], - "type": "json", - "properties": "NA" - }, "feed_storage_configurations": { "title": "Feed Management", "description": "Configurations for feed storage units.", diff --git a/input/metadata/end_to_end_testing/no_animal_e2e_metadata.json b/input/metadata/end_to_end_testing/no_animal_e2e_metadata.json index 95bf425328..e17b518619 100644 --- a/input/metadata/end_to_end_testing/no_animal_e2e_metadata.json +++ b/input/metadata/end_to_end_testing/no_animal_e2e_metadata.json @@ -231,16 +231,6 @@ "type": "json", "properties": "EEE_constants_properties" }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": [ - "input/metadata/properties/default.json", - "input/metadata/properties/commodity_properties.json" - ], - "type": "json", - "properties": "NA" - }, "feed_storage_configurations": { "title": "Feed Management", "description": "Configurations for feed storage units.", diff --git a/input/metadata/end_to_end_testing/open_lot_e2e_metadata.json b/input/metadata/end_to_end_testing/open_lot_e2e_metadata.json index 630c44fae0..745192a557 100644 --- a/input/metadata/end_to_end_testing/open_lot_e2e_metadata.json +++ b/input/metadata/end_to_end_testing/open_lot_e2e_metadata.json @@ -231,16 +231,6 @@ "type": "json", "properties": "EEE_constants_properties" }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": [ - "input/metadata/properties/default.json", - "input/metadata/properties/commodity_properties.json" - ], - "type": "json", - "properties": "NA" - }, "feed_storage_configurations": { "title": "Feed Management", "description": "Configurations for feed storage units.", diff --git a/input/metadata/end_to_end_testing_tm_metadata.json b/input/metadata/end_to_end_testing_tm_metadata.json index 6c0df55ae7..497d2c3425 100644 --- a/input/metadata/end_to_end_testing_tm_metadata.json +++ b/input/metadata/end_to_end_testing_tm_metadata.json @@ -6,14 +6,6 @@ "path": "input/data/tasks/end_to_end_testing_task.json", "type": "json", "properties": "tasks_properties" - }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": ["input/metadata/properties/tasks_properties.json"], - "type": "json", - "properties": "NA" } - }, - "cross-validation": null + } } diff --git a/input/metadata/example_freestall_dairy_metadata.json b/input/metadata/example_freestall_dairy_metadata.json index 566dc24af6..3498b9c9de 100644 --- a/input/metadata/example_freestall_dairy_metadata.json +++ b/input/metadata/example_freestall_dairy_metadata.json @@ -224,16 +224,6 @@ "type": "json", "properties": "EEE_constants_properties" }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": [ - "input/metadata/properties/default.json", - "input/metadata/properties/commodity_properties.json" - ], - "type": "json", - "properties": "NA" - }, "feed_storage_configurations": { "title": "Feed Management", "description": "Configurations for feed storage units.", @@ -249,7 +239,6 @@ "properties": "feed_storage_instances" } }, - "cross-validation": ["input/metadata/cross_validation/example_cross_validation.json"], "runtime_metadata": { "EEE_econ": { "path": "input/metadata/EEE/econ_metadata.json" diff --git a/input/metadata/example_no_animal_metadata.json b/input/metadata/example_no_animal_metadata.json index 014c99e455..d62fc9048c 100644 --- a/input/metadata/example_no_animal_metadata.json +++ b/input/metadata/example_no_animal_metadata.json @@ -224,16 +224,6 @@ "type": "json", "properties": "EEE_constants_properties" }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": [ - "input/metadata/properties/default.json", - "input/metadata/properties/commodity_properties.json" - ], - "type": "json", - "properties": "NA" - }, "feed_management": { "title": "Feed Management", "description": "Configurations for feed storage units.", @@ -256,7 +246,6 @@ "properties": "feed_storage_instances" } }, - "cross-validation": [], "runtime_metadata": { "EEE_econ": { "path": "input/metadata/EEE/econ_metadata.json" diff --git a/input/metadata/example_open_lot_metadata.json b/input/metadata/example_open_lot_metadata.json index 16ef55ba1f..9d62c9c405 100644 --- a/input/metadata/example_open_lot_metadata.json +++ b/input/metadata/example_open_lot_metadata.json @@ -224,16 +224,6 @@ "type": "json", "properties": "EEE_constants_properties" }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": [ - "input/metadata/properties/default.json", - "input/metadata/properties/commodity_properties.json" - ], - "type": "json", - "properties": "NA" - }, "feed_management": { "title": "Feed Management", "description": "Configurations for feed storage units.", @@ -256,7 +246,6 @@ "properties": "feed_storage_instances" } }, - "cross-validation": ["input/metadata/cross_validation/example_cross_validation.json"], "runtime_metadata": { "EEE_econ": { "path": "input/metadata/EEE/econ_metadata.json" diff --git a/input/metadata/herd_init_metadata.json b/input/metadata/herd_init_metadata.json index f10e035d88..31c495da56 100644 --- a/input/metadata/herd_init_metadata.json +++ b/input/metadata/herd_init_metadata.json @@ -6,14 +6,6 @@ "path": "input/data/tasks/herd_init_task.json", "type": "json", "properties": "tasks_properties" - }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": ["input/metadata/properties/tasks_properties.json"], - "type": "json", - "properties": "NA" } - }, - "cross-validation": null + } } diff --git a/input/metadata/properties/tasks_properties.json b/input/metadata/properties/tasks_properties.json index edfd01bce9..ce0a98cf0b 100644 --- a/input/metadata/properties/tasks_properties.json +++ b/input/metadata/properties/tasks_properties.json @@ -273,6 +273,17 @@ "minimum": 0.01, "maximum": 1, "default": 1 + }, + "cross_validation_file_paths": { + "type": "array", + "description": "List the of file path(s) to the cross validation rule JSON file(s).", + "properties": { + "type": "string", + "pattern": "^(?:[a-zA-Z]:[\\\\/]|/)?(?:[a-zA-Z0-9._\\-\\s]+[\\\\/])*(?:[a-zA-Z0-9._\\-\\s]+\\.json)$", + "description": "The path to the cross validation rule JSON files." + }, + "nullable": true, + "default": null } } } diff --git a/input/metadata/update_end_to_end_testing_tm_metadata.json b/input/metadata/update_end_to_end_testing_tm_metadata.json index 673f5a6e06..4d36414fac 100644 --- a/input/metadata/update_end_to_end_testing_tm_metadata.json +++ b/input/metadata/update_end_to_end_testing_tm_metadata.json @@ -6,14 +6,6 @@ "path": "input/data/tasks/update_end_to_end_expected_results.json", "type": "json", "properties": "tasks_properties" - }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "paths": ["input/metadata/properties/tasks_properties.json"], - "type": "json", - "properties": "NA" } - }, - "cross-validation": null + } } diff --git a/input/task_manager_metadata.json b/input/task_manager_metadata.json index 54a2ad3a34..5674a5fcdc 100644 --- a/input/task_manager_metadata.json +++ b/input/task_manager_metadata.json @@ -6,14 +6,6 @@ "path": "input/data/tasks/example_freestall_task.json", "type": "json", "properties": "tasks_properties" - }, - "properties": { - "title": "Metadata Properties", - "description": "The properties of input data.", - "path": "input/metadata/properties/tasks_properties.json", - "type": "json", - "properties": "NA" } - }, - "cross-validation": null + } } diff --git a/tests/test_input_manager.py b/tests/test_input_manager.py index 115f834da0..7613221351 100644 --- a/tests/test_input_manager.py +++ b/tests/test_input_manager.py @@ -162,24 +162,16 @@ def test_load_properties_combines_multiple_files(mock_input_manager: InputManage mocker.patch.object(Path, "exists", return_value=True) first_properties = {"key1": "value1"} second_properties = {"key2": "value2"} + third_properties = {"key3": "value3"} mocker.patch( "RUFAS.input_manager.InputManager._load_data_from_json", - side_effect=[first_properties, second_properties], + side_effect=[first_properties, second_properties, third_properties], ) setattr( mock_input_manager, "_InputManager__metadata", - { - "files": { - "properties": { - "paths": [ - "path/to/properties.json", - "path/to/commodity_properties.json", - ] - } - } - }, + {}, ) mock_input_manager._load_properties() @@ -188,6 +180,7 @@ def test_load_properties_combines_multiple_files(mock_input_manager: InputManage assert metadata["properties"] == { "key1": "value1", "key2": "value2", + "key3": "value3", } @@ -198,22 +191,11 @@ def test_load_properties_overlapping_keys_last_file_wins( mocker.patch.object(Path, "exists", return_value=True) first_properties = {"key1": "value1", "shared": "original"} - second_properties = {"shared": "updated"} + second_properties = {"shared": "intermediate"} + third_properties = {"shared": "updated"} load_json = mocker.patch( "RUFAS.input_manager.InputManager._load_data_from_json", - side_effect=[first_properties, second_properties], - ) - - setattr( - mock_input_manager, - "_InputManager__metadata", - { - "files": { - "properties": { - "paths": ["path/to/properties.json", "path/to/commodity_properties.json"], - } - } - }, + side_effect=[first_properties, second_properties, third_properties], ) mock_input_manager._load_properties() @@ -223,39 +205,7 @@ def test_load_properties_overlapping_keys_last_file_wins( "key1": "value1", "shared": "updated", } - assert "properties" not in metadata["files"] - assert load_json.call_count == 2 - - -def test_load_properties_empty_paths_list_raises_value_error( - mock_input_manager: InputManager, mocker: MockerFixture -) -> None: - mocker.patch.object(Path, "exists", return_value=True) - setattr( - mock_input_manager, - "_InputManager__metadata", - {"files": {"properties": {"paths": []}}}, - ) - - with patch("RUFAS.output_manager.OutputManager.add_error") as add_error: - with pytest.raises(ValueError): - mock_input_manager._load_properties() - add_error.assert_called_once() - - -def test_load_properties_rejects_non_string_paths(mock_input_manager: InputManager, mocker: MockerFixture) -> None: - """Tests the_load_properties on invalid non string paths.""" - mocker.patch.object(Path, "exists", return_value=True) - setattr( - mock_input_manager, - "_InputManager__metadata", - {"files": {"properties": {"paths": ["valid/path.json", 123]}}}, - ) - - with patch("RUFAS.output_manager.OutputManager.add_error") as add_error: - with pytest.raises(ValueError): - mock_input_manager._load_properties() - add_error.assert_called_once() + assert load_json.call_count == 3 def test_load_properties_missing_second_file_triggers_error( @@ -466,7 +416,11 @@ def test_start_data_processing( mock_input_manager.data_validator.event_logs.clear() result = mock_input_manager.start_data_processing( - metadata_path=Path("mock/metadata/path"), input_root=Path(""), task_id="1", eager_termination=eager_termination + metadata_path=Path("mock/metadata/path"), + input_root=Path(""), + task_id="1", + cross_validation_file_paths=[], + eager_termination=eager_termination, ) assert result is expected_return @@ -571,7 +525,9 @@ def test_cross_validate_data( cv_call = mocker.patch.object(cv_mock, "cross_validate_data", side_effect=side_effect) mock_input_manager.data_validator.event_logs.clear() - result = mock_input_manager._cross_validate_data(eager_termination=eager_termination) + result = mock_input_manager._cross_validate_data( + cross_validation_file_paths=["dummy_path_1", "dummy_path_2"], eager_termination=eager_termination + ) assert result is expected_return @@ -606,7 +562,9 @@ def test_start_data_processing_invalid_metadata_raises(mock_input_manager: Input setattr(mock_input_manager, "_InputManager__metadata", {"files": {}, "cross-validation": []}) with pytest.raises(ValueError, match="bad meta"): - mock_input_manager.start_data_processing(Path("meta"), Path(""), task_id="1", eager_termination=True) + mock_input_manager.start_data_processing( + Path("meta"), Path(""), task_id="1", cross_validation_file_paths=[], eager_termination=True + ) mock_load_props.assert_not_called() mock_validate_props.assert_not_called() @@ -630,7 +588,9 @@ def test_start_data_processing_invalid_properties_routes_logs_and_raises( route_logs = mocker.patch.object(mock_input_manager.om, "route_logs") with pytest.raises(ValueError, match="bad props"): - mock_input_manager.start_data_processing(Path("meta"), Path(""), task_id="1", eager_termination=False) + mock_input_manager.start_data_processing( + Path("meta"), Path(""), task_id="1", cross_validation_file_paths=[], eager_termination=False + ) route_logs.assert_called_once_with(mock_input_manager.data_validator.event_logs) diff --git a/tests/test_task_manager.py b/tests/test_task_manager.py index 24eeec6edb..f3786422f7 100644 --- a/tests/test_task_manager.py +++ b/tests/test_task_manager.py @@ -179,7 +179,12 @@ def test_task_manager_start( ] mock_add_log.assert_has_calls(expected_add_log_calls) - mock_start_data.assert_called_once_with(Path("metadata/path"), Path(""), task_id="TASK MANAGER") + mock_start_data.assert_called_once_with( + metadata_path=Path("metadata/path"), + input_root=Path(""), + task_id="TASK MANAGER", + cross_validation_file_paths=None, + ) mock_get_data.assert_called_once_with("tasks") mock_parse_input_tasks.assert_called_once() mock_expand_multi_runs_to_single_runs.assert_called_once()