diff --git a/RUFAS/output_manager.py b/RUFAS/output_manager.py index d79b1548af..bd55b9cca6 100644 --- a/RUFAS/output_manager.py +++ b/RUFAS/output_manager.py @@ -1336,15 +1336,23 @@ def filter_variables_pool( ) if filter_content.get("expand_data", False): + if self.time is None: + raise RuntimeError("Cannot expand data because OutputManager's 'time' attribute is not initialized.") + simulation_length = self.time.simulation_length_days fill_value = filter_content.get("fill_value", np.nan) + use_fill_value_before_start = filter_content.get("use_fill_value_before_start", True) use_fill_value_in_gaps = filter_content.get("use_fill_value_in_gaps", True) use_fill_value_at_end = filter_content.get("use_fill_value_at_end", True) + expand_data_to_observed_range = filter_content.get("expand_data_to_observed_range", False) try: results = Utility.expand_data_temporally( results, + simulation_length=simulation_length, fill_value=fill_value, + use_fill_value_before_start=use_fill_value_before_start, use_fill_value_in_gaps=use_fill_value_in_gaps, use_fill_value_at_end=use_fill_value_at_end, + expand_data_to_observed_range=expand_data_to_observed_range, ) except (TypeError, ValueError) as e: error_title = f"Error {e} raised when padding data" @@ -2567,6 +2575,8 @@ def validate_report_filters(self, filter_content: dict[Any, Any], filter_name: s "direction": self.validate_direction, "use_name": partial(self.validate_type, expected=bool, type_label="a boolean"), "use_verbose_report_name": partial(self.validate_type, expected=bool, type_label="a boolean"), + "expand_data_to_observed_range": partial(self.validate_type, expected=bool, type_label="a boolean"), + "use_fill_value_before_start": partial(self.validate_type, expected=bool, type_label="a boolean"), } for key, value in filter_content.items(): diff --git a/RUFAS/rufas_time.py b/RUFAS/rufas_time.py index 37fc15f9e8..96ccf949bf 100644 --- a/RUFAS/rufas_time.py +++ b/RUFAS/rufas_time.py @@ -24,7 +24,7 @@ def __init__(self, start_date: datetime = None, end_date: datetime = None, curre self.end_date: datetime = end_date or datetime.strptime(str(config_data["end_date"]), "%Y:%j") self.current_date: datetime = current_date or self.start_date - self.simulation_length_days: int = (self.end_date - self.start_date).days + self.simulation_length_days: int = (self.end_date - self.start_date).days + 1 self.simulation_length_years: int = self.end_date.year - self.start_date.year + 1 def advance(self) -> None: @@ -162,7 +162,7 @@ def convert_slice_to_simulation_day(self, slice_day: int) -> int: if slice_day == 0: return 1 if slice_day < 0: - return self.simulation_length_days + slice_day + 1 + return self.simulation_length_days + slice_day return slice_day def __str__(self) -> str: diff --git a/RUFAS/util.py b/RUFAS/util.py index f45edd677a..f849e3ad8e 100644 --- a/RUFAS/util.py +++ b/RUFAS/util.py @@ -158,9 +158,12 @@ def find_group_prefixes_from_keys( @staticmethod def expand_data_temporally( data_to_expand: dict[str, dict[str, list[Any]]], + simulation_length: int, fill_value: Any = np.nan, + use_fill_value_before_start: bool = True, use_fill_value_in_gaps: bool = True, use_fill_value_at_end: bool = True, + expand_data_to_observed_range: bool = False, ) -> dict[str, dict[str, list[Any]]]: """ Pads and expands data based on the simulation day(s) it was recorded on, relative to when other data was @@ -171,20 +174,27 @@ def expand_data_temporally( data_to_expand : dict[str, dict[str, list[Any]]] The data to be padded and expanded. The top level key is a variable name, and points to a dictionary that contains the keys "values" and optionally "info_maps". + simulation_length : int + Total number of simulation days. fill_value : Any, default numpy.nan - Value that is used to pad the front of the data values, and optionally the values in between original values - and after the last original value. + Value used when a region is configured to use fill values. + use_fill_value_before_start : bool, default True + If true, days before the first known datapoint are filled with `fill_value`. If false, they are filled with + the first known value. use_fill_value_in_gaps : bool, default True - If false, values between known data points are expanded with the last known value from the data set. If - true, values between known data points are filled with `fill_value`. + If true, days between known datapoints are filled with `fill_value`. If false, they are filled with the last + known value. use_fill_value_at_end : bool, default True - If false, values after last known data point are padded with the last known value from the data set. If - true, values after the last known data point are filled with `fill_value`. + If true, days after the last known datapoint are filled with `fill_value`. If false, they are filled + with the last known value. + expand_data_to_observed_range : bool, default False + If false, expands data from simulation day 1 through `simulation_length`. If true, expands only + from the first simulation day present in the dataset through the last simulation day present in the dataset. Returns ------- dict[str, dict[str, list[Any]]] - The filled data, so that gaps in the data are filled in with the last known value or `fill_value`. + The expanded data. Raises ------ @@ -194,16 +204,96 @@ def expand_data_temporally( If there is no data to be filled. If the number of info maps does not match the number of values for a variable. If a value for "simulation_day" is not present in every info map. - - Notes - ----- - This method assumes there will never be multiple values recorded for a single variable on a single simulation - day. - """ if not data_to_expand: raise ValueError("Data Expansion error: Cannot fill empty dataset.") + all_simulation_days = Utility._gather_data_sim_days(data_to_expand) + filtered_simulation_days = sorted(set(all_simulation_days)) + + first_day = filtered_simulation_days[0] if expand_data_to_observed_range else 0 + last_day = filtered_simulation_days[-1] if expand_data_to_observed_range else simulation_length + + expanded_data: dict[str, dict[str, list[Any]]] = {} + for key, data in data_to_expand.items(): + expanded_variable_data: dict[str, list[Any]] = {"values": [], "info_maps": []} + original_units = data["info_maps"][0]["units"] + + indexed_data = { + info_map["simulation_day"]: (value, info_map) + for value, info_map in zip(data["values"], data["info_maps"]) + } + + first_day_of_original_data = min(indexed_data.keys()) + last_day_of_original_data = max(indexed_data.keys()) + + first_known_value, first_known_info_map = indexed_data[first_day_of_original_data] + last_known_value = fill_value + last_known_info_map = {"simulation_day": 0, "units": original_units} + + for day in range(first_day, last_day + 1): + if day in indexed_data: + value, info_map = indexed_data[day] + expanded_variable_data["values"].append(value) + expanded_variable_data["info_maps"].append(info_map.copy()) + expanded_variable_data["info_maps"][-1]["simulation_day"] = day + + last_known_value = value + last_known_info_map = info_map.copy() + + elif day < first_day_of_original_data: + value_to_add = fill_value if use_fill_value_before_start else first_known_value + info_map_to_add = first_known_info_map.copy() + info_map_to_add["simulation_day"] = day + + expanded_variable_data["values"].append(value_to_add) + expanded_variable_data["info_maps"].append(info_map_to_add) + + elif day < last_day_of_original_data: + value_to_add = fill_value if use_fill_value_in_gaps else last_known_value + info_map_to_add = last_known_info_map.copy() + info_map_to_add["simulation_day"] = day + + expanded_variable_data["values"].append(value_to_add) + expanded_variable_data["info_maps"].append(info_map_to_add) + + else: + value_to_add = fill_value if use_fill_value_at_end else last_known_value + info_map_to_add = last_known_info_map.copy() + info_map_to_add["simulation_day"] = day + + expanded_variable_data["values"].append(value_to_add) + expanded_variable_data["info_maps"].append(info_map_to_add) + + expanded_data[key] = expanded_variable_data + + return expanded_data + + @staticmethod + def _gather_data_sim_days(data_to_expand: dict[str, dict[str, list[Any]]]) -> list[int]: + """ + Helper function for `expand_data_temporally()`. + Validates the data structure and gathers the simulations days from the accompanying info maps. + + Parameters + ---------- + data_to_expand : dict[str, dict[str, list[Any]]] + The data to be expanded. + + Returns + ------- + list[int] + A list of simulation days from the info maps of the data_to_expand. + + Raises + ------ + TypeError + If info_maps are not present in the data_to_expand. + ValueError + If the lists of info_maps and values are not the same length. + ValueError + If `simulation_day` has not been reported in every info_maps instance. + """ all_simulation_days = [] for key, value in data_to_expand.items(): info_maps = value.get("info_maps") @@ -219,37 +309,7 @@ def expand_data_temporally( ) all_simulation_days += [info_map["simulation_day"] for info_map in info_maps] - filtered_simulation_days = sorted(set(all_simulation_days)) - first_day = filtered_simulation_days[0] - last_day = filtered_simulation_days[-1] - - expanded_data: dict[str, dict[str, list[Any]]] = {} - for key, data in data_to_expand.items(): - expanded_variable_data: dict[str, list[Any]] = {"values": [], "info_maps": []} - original_units = data["info_maps"][0]["units"] - zipped_data = zip(data["values"], data["info_maps"]) - indexed_data = {data[1]["simulation_day"]: data for data in zipped_data} - last_day_of_original_data = max(indexed_data.keys()) - last_value = (fill_value, {"simulation_day": 0, "units": original_units}) - for day in range(first_day, last_day_of_original_data + 1): - if day in indexed_data.keys(): - last_value = indexed_data[day] if not use_fill_value_in_gaps else (fill_value, indexed_data[day][1]) - expanded_variable_data["values"].append(indexed_data[day][0]) - expanded_variable_data["info_maps"].append(indexed_data[day][1]) - expanded_variable_data["info_maps"][-1]["simulation_day"] = day - else: - expanded_variable_data["values"].append(last_value[0]) - expanded_variable_data["info_maps"].append(last_value[1].copy()) - expanded_variable_data["info_maps"][-1]["simulation_day"] = day - - tail_fill_value = indexed_data[last_day_of_original_data][0] if not use_fill_value_at_end else fill_value - for day in range(last_day_of_original_data + 1, last_day + 1): - expanded_variable_data["values"].append(tail_fill_value) - expanded_variable_data["info_maps"].append({"simulation_day": day, "units": original_units}) - - expanded_data[key] = expanded_variable_data - - return expanded_data + return all_simulation_days @staticmethod def deep_merge(target: Dict[Any, Any], updates: Dict[Any, Any]) -> None: diff --git a/changelog.md b/changelog.md index 71ebb80e04..6ef2bec27f 100644 --- a/changelog.md +++ b/changelog.md @@ -48,6 +48,7 @@ v1.0.0 - [2852](https://github.com/RuminantFarmSystems/MASM/pull/2852) - [minor change] [NoInputChange] [NoOutputChange] Fix AssertionError on `dev`. - [2866](https://github.com/RuminantFarmSystems/MASM/pull/2866) - [minor change] [NoInputChange] [NoOutputChange] Clears all mypy errors in test_field_manager.py. - [2863](https://github.com/RuminantFarmSystems/MASM/pull/2863) - [minor change] [NoInputChange] [NoOutputChange] Updates TaskManager to avoid using multiprocessing when running single tasks. +- [2867](https://github.com/RuminantFarmSystems/MASM/pull/2867) - [minor change] [NoInputChange] [NoOutputChange] Updates expand_data_temporally() util function to offer options of full simulation expansion and front-padding data. - [2854](https://github.com/RuminantFarmSystems/MASM/pull/2854) - [minor change] [Emissions][OutputManager] [NoInputChange] [NoOutputChange] Update `emissions.py` filtering process and remove `use_filter_key_name` option in the OM filter. - [2872](https://github.com/RuminantFarmSystems/RuFaS/pull/2872) - [minor change] [NoInputChange] [NoOutputChange] Adds information and links for onboarding videos. - [2876](https://github.com/RuminantFarmSystems/RuFaS/pull/2876) - [minor change] [DieselConsumption][OutputManager] [NoInputChange] [NoOutputChange] Removes `use_name` output filter option and updates `DieselConsumption` to filter properly without it. diff --git a/tests/test_output_manager.py b/tests/test_output_manager.py index 61556c0a06..f95d207ec3 100644 --- a/tests/test_output_manager.py +++ b/tests/test_output_manager.py @@ -2187,6 +2187,7 @@ def test_filter_variables_pool( ) -> None: """Tests filter_variables_pool in the OutputManager.""" mock_output_manager.variables_pool = mock_simple_variables_pool + mocker.patch.object(mock_output_manager, "time") expand_data_temporally = mocker.patch.object(Utility, "expand_data_temporally", side_effect=lambda _: _) assert mock_output_manager.filter_variables_pool(filter_content) == expected diff --git a/tests/test_time.py b/tests/test_time.py index 9578dad694..419406effe 100644 --- a/tests/test_time.py +++ b/tests/test_time.py @@ -26,7 +26,7 @@ def test_time_initialization() -> None: assert time.end_date == datetime(year=2000, month=1, day=1) assert time.current_date == time.start_date - assert time.simulation_length_days == (time.end_date - time.start_date).days + assert time.simulation_length_days == (time.end_date - time.start_date).days + 1 assert time.simulation_day == 0 @@ -211,8 +211,8 @@ def test_convert_year_jday_to_date( [ (0, 100, 1), (5, 100, 5), - (-1, 100, 100), - (-100, 100, 1), + (-1, 100, 99), + (-100, 100, 0), (100, 100, 100), (150, 100, 150), ], diff --git a/tests/test_util.py b/tests/test_util.py index e5947ec197..3cefbd6f12 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -457,7 +457,10 @@ def test_find_group_prefixes_multiple_suffix_filtering() -> None: @pytest.mark.parametrize( - "data_to_pad,fill_value,gap_pad,end_pad,expected", + ( + "data_to_expand,simulation_length,fill_value,use_fill_value_before_start," + "use_fill_value_in_gaps,use_fill_value_at_end,expand_data_to_observed_range,expected" + ), [ ( { @@ -478,13 +481,17 @@ def test_find_group_prefixes_multiple_suffix_filtering() -> None: ], }, }, + 6, math.nan, False, + False, True, + False, { "a": { - "values": ["a", "a", "a", "b", "c", math.nan], + "values": ["a", "a", "a", "a", "b", "c", math.nan], "info_maps": [ + {"simulation_day": 0, "units": "kg"}, {"simulation_day": 1, "units": "kg"}, {"simulation_day": 2, "units": "kg"}, {"simulation_day": 3, "units": "kg"}, @@ -494,8 +501,9 @@ def test_find_group_prefixes_multiple_suffix_filtering() -> None: ], }, "b": { - "values": [math.nan, math.nan, "d", "e", "e", "f"], + "values": ["d", "d", "d", "d", "e", "e", "f"], "info_maps": [ + {"simulation_day": 0, "units": "g"}, {"simulation_day": 1, "units": "g"}, {"simulation_day": 2, "units": "g"}, {"simulation_day": 3, "units": "g"}, @@ -525,13 +533,17 @@ def test_find_group_prefixes_multiple_suffix_filtering() -> None: ], }, }, + 6, math.nan, True, + True, + False, False, { "a": { - "values": ["a", math.nan, math.nan, "b", "c", "c"], + "values": [math.nan, "a", math.nan, math.nan, "b", "c", "c"], "info_maps": [ + {"simulation_day": 0, "units": "kg"}, {"simulation_day": 1, "units": "kg"}, {"simulation_day": 2, "units": "kg"}, {"simulation_day": 3, "units": "kg"}, @@ -541,8 +553,9 @@ def test_find_group_prefixes_multiple_suffix_filtering() -> None: ], }, "b": { - "values": [math.nan, math.nan, "d", "e", math.nan, "f"], + "values": [math.nan, math.nan, math.nan, "d", "e", math.nan, "f"], "info_maps": [ + {"simulation_day": 0, "units": "g"}, {"simulation_day": 1, "units": "g"}, {"simulation_day": 2, "units": "g"}, {"simulation_day": 3, "units": "g"}, @@ -561,9 +574,12 @@ def test_find_group_prefixes_multiple_suffix_filtering() -> None: "info_maps": [{"simulation_day": 3, "units": "pi"}, {"simulation_day": 4, "units": "pi"}], }, }, + 4, None, True, + True, False, + True, { "a": { "values": ["a", "a", "a"], @@ -594,17 +610,28 @@ def test_find_group_prefixes_multiple_suffix_filtering() -> None: "info_maps": [{"simulation_day": 1, "units": "ha"}, {"simulation_day": 2, "units": "ha"}], }, }, + 2, 8, False, True, + True, + False, { "a": { - "values": ["a", "b"], - "info_maps": [{"simulation_day": 1, "units": "ha"}, {"simulation_day": 2, "units": "ha"}], + "values": ["a", "a", "b"], + "info_maps": [ + {"simulation_day": 0, "units": "ha"}, + {"simulation_day": 1, "units": "ha"}, + {"simulation_day": 2, "units": "ha"}, + ], }, "b": { - "values": ["c", "d"], - "info_maps": [{"simulation_day": 1, "units": "ha"}, {"simulation_day": 2, "units": "ha"}], + "values": ["c", "c", "d"], + "info_maps": [ + {"simulation_day": 0, "units": "ha"}, + {"simulation_day": 1, "units": "ha"}, + {"simulation_day": 2, "units": "ha"}, + ], }, }, ), @@ -619,21 +646,26 @@ def test_find_group_prefixes_multiple_suffix_filtering() -> None: "info_maps": [{"simulation_day": 1, "units": "l"}, {"simulation_day": 3, "units": "l"}], }, }, + 3, "fill", + False, True, False, + False, { "a": { - "values": ["a", "fill", "b"], + "values": ["a", "a", "fill", "b"], "info_maps": [ + {"simulation_day": 0, "units": "ha^2"}, {"simulation_day": 1, "units": "ha^2"}, {"simulation_day": 2, "units": "ha^2"}, {"simulation_day": 3, "units": "ha^2"}, ], }, "b": { - "values": ["c", "fill", "d"], + "values": ["c", "c", "fill", "d"], "info_maps": [ + {"simulation_day": 0, "units": "l"}, {"simulation_day": 1, "units": "l"}, {"simulation_day": 2, "units": "l"}, {"simulation_day": 3, "units": "l"}, @@ -645,16 +677,20 @@ def test_find_group_prefixes_multiple_suffix_filtering() -> None: { "a": { "values": ["a", "b"], - "info_maps": [{"simulation_day": 1, "units": "GB"}, {"simulation_day": 3, "units": "GB"}], + "info_maps": [{"simulation_day": 2, "units": "GB"}, {"simulation_day": 3, "units": "GB"}], }, }, + 3, math.pi, True, True, + True, + False, { "a": { - "values": ["a", math.pi, "b"], + "values": [math.pi, math.pi, "a", "b"], "info_maps": [ + {"simulation_day": 0, "units": "GB"}, {"simulation_day": 1, "units": "GB"}, {"simulation_day": 2, "units": "GB"}, {"simulation_day": 3, "units": "GB"}, @@ -665,43 +701,150 @@ def test_find_group_prefixes_multiple_suffix_filtering() -> None: ], ) def test_expand_data_temporally( - data_to_pad: dict[str, dict[str, list[Any]]], + data_to_expand: dict[str, dict[str, list[Any]]], + simulation_length: int, fill_value: Any, - gap_pad: bool, - end_pad: bool, + use_fill_value_before_start: bool, + use_fill_value_in_gaps: bool, + use_fill_value_at_end: bool, + expand_data_to_observed_range: bool, expected: dict[str, dict[str, list[Any]]], ) -> None: - """Tests the utility method expand_data_temporally.""" + """Tests the util method expand_data_temporally().""" actual = Utility.expand_data_temporally( - data_to_pad, fill_value=fill_value, use_fill_value_in_gaps=gap_pad, use_fill_value_at_end=end_pad + data_to_expand=data_to_expand, + simulation_length=simulation_length, + fill_value=fill_value, + use_fill_value_before_start=use_fill_value_before_start, + use_fill_value_in_gaps=use_fill_value_in_gaps, + use_fill_value_at_end=use_fill_value_at_end, + expand_data_to_observed_range=expand_data_to_observed_range, ) assert actual == expected -def test_expand_data_temporally_errors() -> None: - """Tests that errors are correctly raised by expand_data_temporally.""" - empty_data: dict[str, dict[str, list[Any]]] = {} - with pytest.raises(ValueError, match="empty dataset"): - Utility.expand_data_temporally(empty_data) +def test_expand_data_temporally_observed_range_only() -> None: + """Tests observed range case for expand_data_temporally().""" + data_to_expand: dict[str, dict[str, list[Any]]] = { + "a": { + "values": ["x", "y"], + "info_maps": [ + {"simulation_day": 3, "units": "kg"}, + {"simulation_day": 5, "units": "kg"}, + ], + } + } - data_one = {"a": {"values": ["a", "b"]}, "b": {"values": ["c", "d"]}} - with pytest.raises(TypeError, match="no info maps"): - Utility.expand_data_temporally(data_one) + actual = Utility.expand_data_temporally( + data_to_expand=data_to_expand, + simulation_length=10, + fill_value="fill", + use_fill_value_before_start=False, + use_fill_value_in_gaps=False, + use_fill_value_at_end=False, + expand_data_to_observed_range=True, + ) - data_two: dict[str, dict[str, list[Any]]] = { - "a": {"values": ["a", "b"], "info_maps": [{"simulation_day": 1}]}, - "b": {"values": ["c", "d"], "info_maps": [{"simulation_day": 1}, {"simulation_day": 3}]}, + expected = { + "a": { + "values": ["x", "x", "y"], + "info_maps": [ + {"simulation_day": 3, "units": "kg"}, + {"simulation_day": 4, "units": "kg"}, + {"simulation_day": 5, "units": "kg"}, + ], + } } - with pytest.raises(ValueError, match="number of values and info maps"): - Utility.expand_data_temporally(data_two) - data_three: dict[str, dict[str, list[Any]]] = { - "a": {"values": ["a", "b"], "info_maps": [{"simulation_day": 1}, {"foo": "bar"}]}, - "b": {"values": ["c", "d"], "info_maps": [{"simulation_day": 1}, {"simulation_day": 3}]}, - } - with pytest.raises(ValueError, match="simulation day value in every info map"): - Utility.expand_data_temporally(data_three) + assert actual == expected + + +@pytest.mark.parametrize( + "data_to_expand,expected", + [ + ( + { + "a": { + "values": ["a", "b"], + "info_maps": [ + {"simulation_day": 1, "units": "kg"}, + {"simulation_day": 4, "units": "kg"}, + ], + } + }, + [1, 4], + ), + ( + { + "a": { + "values": ["a", "b"], + "info_maps": [ + {"simulation_day": 1, "units": "kg"}, + {"simulation_day": 4, "units": "kg"}, + ], + }, + "b": { + "values": ["c", "d", "e"], + "info_maps": [ + {"simulation_day": 2, "units": "g"}, + {"simulation_day": 3, "units": "g"}, + {"simulation_day": 6, "units": "g"}, + ], + }, + }, + [1, 4, 2, 3, 6], + ), + ( + {}, + [], + ), + ], +) +def test_gather_data_sim_days( + data_to_expand: dict[str, dict[str, list[Any]]], + expected: list[int], +) -> None: + """Tests _gather_data_sim_days returns the expected simulation days.""" + actual = Utility._gather_data_sim_days(data_to_expand) + + assert actual == expected + + +@pytest.mark.parametrize( + "data_to_expand,error_type,error_match", + [ + ( + {"a": {"values": ["a", "b"]}, "b": {"values": ["c", "d"]}}, + TypeError, + "Variable 'a' has no info maps", + ), + ( + { + "a": {"values": ["a", "b"], "info_maps": [{"simulation_day": 1}]}, + "b": {"values": ["c", "d"], "info_maps": [{"simulation_day": 1}, {"simulation_day": 3}]}, + }, + ValueError, + "Variable 'a' does not have matching number of values and info maps", + ), + ( + { + "a": {"values": ["a", "b"], "info_maps": [{"simulation_day": 1}, {"foo": "bar"}]}, + "b": {"values": ["c", "d"], "info_maps": [{"simulation_day": 1}, {"simulation_day": 3}]}, + }, + ValueError, + "Variable 'a' does not have simulation day value in every info map", + ), + ], +) +def test_gather_data_sim_days_errors( + data_to_expand: dict[str, dict[str, list[Any]]], + error_type: type[Exception], + error_match: str, +) -> None: + """Tests _gather_data_sim_days raises the expected errors for invalid input.""" + with pytest.raises(error_type, match=error_match): + Utility._gather_data_sim_days(data_to_expand) def test_deep_merge_dict() -> None: