From 6053eab46b255c8e48175ac87c70a7a0fdd1d358 Mon Sep 17 00:00:00 2001 From: Niko <70217952+ew3361zh@users.noreply.github.com> Date: Mon, 16 Mar 2026 11:08:55 -0400 Subject: [PATCH 01/20] data padder second attempt --- RUFAS/output_manager.py | 6 ++++++ RUFAS/util.py | 45 ++++++++++++++++++++++++++--------------- 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/RUFAS/output_manager.py b/RUFAS/output_manager.py index e79cfbf03c..2f1f868775 100644 --- a/RUFAS/output_manager.py +++ b/RUFAS/output_manager.py @@ -1339,12 +1339,18 @@ def filter_variables_pool( fill_value = filter_content.get("fill_value", np.nan) use_fill_value_in_gaps = filter_content.get("use_fill_value_in_gaps", True) use_fill_value_at_end = filter_content.get("use_fill_value_at_end", True) + expand_data_to_full_simulation = filter_content.get("expand_data_to_full_simulation", + True) + assert self.time is not None + simulation_length = self.time.simulation_length_days try: results = Utility.expand_data_temporally( results, + simulation_length=simulation_length, fill_value=fill_value, use_fill_value_in_gaps=use_fill_value_in_gaps, use_fill_value_at_end=use_fill_value_at_end, + expand_data_to_full_simulation=expand_data_to_full_simulation, ) except (TypeError, ValueError) as e: error_title = f"Error {e} raised when padding data" diff --git a/RUFAS/util.py b/RUFAS/util.py index e2c49ceece..47db333bae 100644 --- a/RUFAS/util.py +++ b/RUFAS/util.py @@ -141,9 +141,11 @@ def find_max_index_from_keys(data: dict[str, Any]) -> int | None: @staticmethod def expand_data_temporally( data_to_expand: dict[str, dict[str, list[Any]]], + simulation_length: int, fill_value: Any = np.nan, use_fill_value_in_gaps: bool = True, use_fill_value_at_end: bool = True, + expand_data_to_full_simulation: bool = True, ) -> dict[str, dict[str, list[Any]]]: """ Pads and expands data based on the simulation day(s) it was recorded on, relative to when other data was @@ -187,24 +189,16 @@ def expand_data_temporally( if not data_to_expand: raise ValueError("Data Expansion error: Cannot fill empty dataset.") - all_simulation_days = [] - for key, value in data_to_expand.items(): - info_maps = value.get("info_maps") - if info_maps is None: - raise TypeError(f"Data Expansion error: Variable '{key}' has no info maps.") - if len(info_maps) != len(value["values"]): - raise ValueError( - f"Data Expansion error: Variable '{key}' does not have matching number of values and " "info maps." - ) - if not all("simulation_day" in info_map.keys() for info_map in info_maps): - raise ValueError( - f"Data Expansion error: Variable '{key}' does not have simulation day value in every " "info map." - ) - all_simulation_days += [info_map["simulation_day"] for info_map in info_maps] + all_simulation_days = Utility._gather_data_sim_days(data_to_expand) filtered_simulation_days = sorted(set(all_simulation_days)) - first_day = filtered_simulation_days[0] - last_day = filtered_simulation_days[-1] + # TODO Add version of first_day and last_day that represent the full simulation length? + if expand_data_to_full_simulation: + first_day = 1 + last_day = simulation_length + else: + first_day = filtered_simulation_days[0] + last_day = filtered_simulation_days[-1] expanded_data: dict[str, dict[str, list[Any]]] = {} for key, data in data_to_expand.items(): @@ -234,6 +228,25 @@ def expand_data_temporally( return expanded_data + @staticmethod + def _gather_data_sim_days(data_to_expand: dict[str, dict[str, list[Any]]]) -> list[int]: + all_simulation_days = [] + for key, value in data_to_expand.items(): + info_maps = value.get("info_maps") + if info_maps is None: + raise TypeError(f"Data Expansion error: Variable '{key}' has no info maps.") + if len(info_maps) != len(value["values"]): + raise ValueError( + f"Data Expansion error: Variable '{key}' does not have matching number of values and " "info maps." + ) + if not all("simulation_day" in info_map.keys() for info_map in info_maps): + raise ValueError( + f"Data Expansion error: Variable '{key}' does not have simulation day value in every " "info map." + ) + all_simulation_days += [info_map["simulation_day"] for info_map in info_maps] + + return all_simulation_days + @staticmethod def deep_merge(target: Dict[Any, Any], updates: Dict[Any, Any]) -> None: """ From 2c4091a291817835c2cd99c74ff1cefcb7b7c979 Mon Sep 17 00:00:00 2001 From: Niko <70217952+ew3361zh@users.noreply.github.com> Date: Mon, 16 Mar 2026 16:38:56 -0400 Subject: [PATCH 02/20] working version of expand data full simulation --- RUFAS/output_manager.py | 6 ++ RUFAS/util.py | 118 +++++++++++++++++++++++++++------------- 2 files changed, 87 insertions(+), 37 deletions(-) diff --git a/RUFAS/output_manager.py b/RUFAS/output_manager.py index 2f1f868775..faa4beb2d9 100644 --- a/RUFAS/output_manager.py +++ b/RUFAS/output_manager.py @@ -1337,17 +1337,21 @@ def filter_variables_pool( if filter_content.get("expand_data", False): fill_value = filter_content.get("fill_value", np.nan) + use_fill_value_before_start = filter_content.get("use_fill_value_before_start", True) use_fill_value_in_gaps = filter_content.get("use_fill_value_in_gaps", True) use_fill_value_at_end = filter_content.get("use_fill_value_at_end", True) expand_data_to_full_simulation = filter_content.get("expand_data_to_full_simulation", True) assert self.time is not None simulation_length = self.time.simulation_length_days + if filter_content.get("name") == "Feed Expand Full Sim, No Fill": + print("pause") try: results = Utility.expand_data_temporally( results, simulation_length=simulation_length, fill_value=fill_value, + use_fill_value_before_start=use_fill_value_before_start, use_fill_value_in_gaps=use_fill_value_in_gaps, use_fill_value_at_end=use_fill_value_at_end, expand_data_to_full_simulation=expand_data_to_full_simulation, @@ -2589,6 +2593,8 @@ def validate_report_filters(self, filter_content: dict[Any, Any], filter_name: s "use_name": partial(self.validate_type, expected=bool, type_label="a boolean"), "use_filter_key_name": partial(self.validate_type, expected=bool, type_label="a boolean"), "use_verbose_report_name": partial(self.validate_type, expected=bool, type_label="a boolean"), + "expand_data_to_full_simulation": partial(self.validate_type, expected=bool, type_label="a boolean"), + "use_fill_value_before_start": partial(self.validate_type, expected=bool, type_label="a boolean"), } for key, value in filter_content.items(): diff --git a/RUFAS/util.py b/RUFAS/util.py index 47db333bae..64a692d0a4 100644 --- a/RUFAS/util.py +++ b/RUFAS/util.py @@ -143,6 +143,7 @@ def expand_data_temporally( data_to_expand: dict[str, dict[str, list[Any]]], simulation_length: int, fill_value: Any = np.nan, + use_fill_value_before_start: bool = True, use_fill_value_in_gaps: bool = True, use_fill_value_at_end: bool = True, expand_data_to_full_simulation: bool = True, @@ -156,20 +157,27 @@ def expand_data_temporally( data_to_expand : dict[str, dict[str, list[Any]]] The data to be padded and expanded. The top level key is a variable name, and points to a dictionary that contains the keys "values" and optionally "info_maps". + simulation_length : int + Total number of simulation days. fill_value : Any, default numpy.nan - Value that is used to pad the front of the data values, and optionally the values in between original values - and after the last original value. + Value used when a region is configured to use fill values. + use_fill_value_before_start : bool, default True + If true, days before the first known datapoint are filled with `fill_value`. If false, they are filled with + the first known value. use_fill_value_in_gaps : bool, default True - If false, values between known data points are expanded with the last known value from the data set. If - true, values between known data points are filled with `fill_value`. + If true, days between known datapoints are filled with `fill_value`. If false, they are filled with the last + known value. use_fill_value_at_end : bool, default True - If false, values after last known data point are padded with the last known value from the data set. If - true, values after the last known data point are filled with `fill_value`. + If true, days after the last known datapoint are filled with `fill_value`. If false, they are filled + with the last known value. + expand_data_to_full_simulation : bool, default True + If true, expands data from simulation day 1 through `simulation_length`. If false, expands only + from the first simulation day present in the dataset through the last simulation day present in the dataset. Returns ------- dict[str, dict[str, list[Any]]] - The filled data, so that gaps in the data are filled in with the last known value or `fill_value`. + The expanded data. Raises ------ @@ -179,50 +187,63 @@ def expand_data_temporally( If there is no data to be filled. If the number of info maps does not match the number of values for a variable. If a value for "simulation_day" is not present in every info map. - - Notes - ----- - This method assumes there will never be multiple values recorded for a single variable on a single simulation - day. - """ if not data_to_expand: raise ValueError("Data Expansion error: Cannot fill empty dataset.") all_simulation_days = Utility._gather_data_sim_days(data_to_expand) - filtered_simulation_days = sorted(set(all_simulation_days)) - # TODO Add version of first_day and last_day that represent the full simulation length? - if expand_data_to_full_simulation: - first_day = 1 - last_day = simulation_length - else: - first_day = filtered_simulation_days[0] - last_day = filtered_simulation_days[-1] + + first_day = 1 if expand_data_to_full_simulation else filtered_simulation_days[0] + last_day = simulation_length if expand_data_to_full_simulation else filtered_simulation_days[-1] expanded_data: dict[str, dict[str, list[Any]]] = {} for key, data in data_to_expand.items(): expanded_variable_data: dict[str, list[Any]] = {"values": [], "info_maps": []} original_units = data["info_maps"][0]["units"] - zipped_data = zip(data["values"], data["info_maps"]) - indexed_data = {data[1]["simulation_day"]: data for data in zipped_data} + + indexed_data = { + info_map["simulation_day"]: (value, info_map) + for value, info_map in zip(data["values"], data["info_maps"]) + } + + first_day_of_original_data = min(indexed_data.keys()) last_day_of_original_data = max(indexed_data.keys()) - last_value = (fill_value, {"simulation_day": 0, "units": original_units}) - for day in range(first_day, last_day_of_original_data + 1): - if day in indexed_data.keys(): - last_value = indexed_data[day] if not use_fill_value_in_gaps else (fill_value, indexed_data[day][1]) - expanded_variable_data["values"].append(indexed_data[day][0]) - expanded_variable_data["info_maps"].append(indexed_data[day][1]) - expanded_variable_data["info_maps"][-1]["simulation_day"] = day - else: - expanded_variable_data["values"].append(last_value[0]) - expanded_variable_data["info_maps"].append(last_value[1].copy()) + + first_known_value, first_known_info_map = indexed_data[first_day_of_original_data] + last_known_value = fill_value + last_known_info_map = {"simulation_day": 0, "units": original_units} + + for day in range(first_day, last_day + 1): + if day in indexed_data: + value, info_map = indexed_data[day] + expanded_variable_data["values"].append(value) + expanded_variable_data["info_maps"].append(info_map.copy()) expanded_variable_data["info_maps"][-1]["simulation_day"] = day - tail_fill_value = indexed_data[last_day_of_original_data][0] if not use_fill_value_at_end else fill_value - for day in range(last_day_of_original_data + 1, last_day + 1): - expanded_variable_data["values"].append(tail_fill_value) - expanded_variable_data["info_maps"].append({"simulation_day": day, "units": original_units}) + last_known_value = value + last_known_info_map = info_map.copy() + + elif day < first_day_of_original_data: + value_to_add = fill_value if use_fill_value_before_start else first_known_value + info_map_to_add = first_known_info_map.copy() + info_map_to_add["simulation_day"] = day + + expanded_variable_data["values"].append(value_to_add) + expanded_variable_data["info_maps"].append(info_map_to_add) + + elif day < last_day_of_original_data: + value_to_add = fill_value if use_fill_value_in_gaps else last_known_value + info_map_to_add = last_known_info_map.copy() + info_map_to_add["simulation_day"] = day + + expanded_variable_data["values"].append(value_to_add) + expanded_variable_data["info_maps"].append(info_map_to_add) + + else: + value_to_add = fill_value if use_fill_value_at_end else last_known_value + expanded_variable_data["values"].append(value_to_add) + expanded_variable_data["info_maps"].append({"simulation_day": day, "units": original_units}) expanded_data[key] = expanded_variable_data @@ -230,6 +251,29 @@ def expand_data_temporally( @staticmethod def _gather_data_sim_days(data_to_expand: dict[str, dict[str, list[Any]]]) -> list[int]: + """ + Helper function for `expand_data_temporally()`. + Validates the data structure and gathers the simulations days from the accompanying info maps. + + Parameters + ---------- + data_to_expand : dict[str, dict[str, list[Any]]] + The data to be expanded. + + Returns + ------- + list[int] + A list of simulation days from the info maps of the data_to_expand. + + Raises + ------ + TypeError + If info_maps are not present in the data_to_expand. + ValueError + If the lists of info_maps and values are not the same length. + ValueError + If `simulation_day` has not been reported in every info_maps instance. + """ all_simulation_days = [] for key, value in data_to_expand.items(): info_maps = value.get("info_maps") From 3cac0cdab0099bb3bf62138bce9c648cc17b6128 Mon Sep 17 00:00:00 2001 From: Niko <70217952+ew3361zh@users.noreply.github.com> Date: Mon, 16 Mar 2026 16:59:37 -0400 Subject: [PATCH 03/20] fixes unit tests --- tests/test_output_manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_output_manager.py b/tests/test_output_manager.py index a65c49c932..2658014efc 100644 --- a/tests/test_output_manager.py +++ b/tests/test_output_manager.py @@ -2187,6 +2187,7 @@ def test_filter_variables_pool( ) -> None: """Tests filter_variables_pool in the OutputManager.""" mock_output_manager.variables_pool = mock_simple_variables_pool + mocker.patch.object(mock_output_manager, "time") expand_data_temporally = mocker.patch.object(Utility, "expand_data_temporally", side_effect=lambda _: _) assert mock_output_manager.filter_variables_pool(filter_content) == expected From 21f74c824ea7e09475d4fe9335fba0c066f2b2cd Mon Sep 17 00:00:00 2001 From: Niko <70217952+ew3361zh@users.noreply.github.com> Date: Mon, 16 Mar 2026 17:41:35 -0400 Subject: [PATCH 04/20] updates unit test coverage --- RUFAS/output_manager.py | 8 +- RUFAS/util.py | 10 +-- tests/test_util.py | 184 ++++++++++++++++++++++++++++++++++------ 3 files changed, 165 insertions(+), 37 deletions(-) diff --git a/RUFAS/output_manager.py b/RUFAS/output_manager.py index faa4beb2d9..fa15976caa 100644 --- a/RUFAS/output_manager.py +++ b/RUFAS/output_manager.py @@ -1340,8 +1340,8 @@ def filter_variables_pool( use_fill_value_before_start = filter_content.get("use_fill_value_before_start", True) use_fill_value_in_gaps = filter_content.get("use_fill_value_in_gaps", True) use_fill_value_at_end = filter_content.get("use_fill_value_at_end", True) - expand_data_to_full_simulation = filter_content.get("expand_data_to_full_simulation", - True) + expand_data_to_observed_range = filter_content.get("expand_data_to_observed_range", + False) assert self.time is not None simulation_length = self.time.simulation_length_days if filter_content.get("name") == "Feed Expand Full Sim, No Fill": @@ -1354,7 +1354,7 @@ def filter_variables_pool( use_fill_value_before_start=use_fill_value_before_start, use_fill_value_in_gaps=use_fill_value_in_gaps, use_fill_value_at_end=use_fill_value_at_end, - expand_data_to_full_simulation=expand_data_to_full_simulation, + expand_data_to_observed_range=expand_data_to_observed_range, ) except (TypeError, ValueError) as e: error_title = f"Error {e} raised when padding data" @@ -2593,7 +2593,7 @@ def validate_report_filters(self, filter_content: dict[Any, Any], filter_name: s "use_name": partial(self.validate_type, expected=bool, type_label="a boolean"), "use_filter_key_name": partial(self.validate_type, expected=bool, type_label="a boolean"), "use_verbose_report_name": partial(self.validate_type, expected=bool, type_label="a boolean"), - "expand_data_to_full_simulation": partial(self.validate_type, expected=bool, type_label="a boolean"), + "expand_data_to_observed_range": partial(self.validate_type, expected=bool, type_label="a boolean"), "use_fill_value_before_start": partial(self.validate_type, expected=bool, type_label="a boolean"), } diff --git a/RUFAS/util.py b/RUFAS/util.py index 64a692d0a4..32e41574fc 100644 --- a/RUFAS/util.py +++ b/RUFAS/util.py @@ -146,7 +146,7 @@ def expand_data_temporally( use_fill_value_before_start: bool = True, use_fill_value_in_gaps: bool = True, use_fill_value_at_end: bool = True, - expand_data_to_full_simulation: bool = True, + expand_data_to_observed_range: bool = False, ) -> dict[str, dict[str, list[Any]]]: """ Pads and expands data based on the simulation day(s) it was recorded on, relative to when other data was @@ -170,8 +170,8 @@ def expand_data_temporally( use_fill_value_at_end : bool, default True If true, days after the last known datapoint are filled with `fill_value`. If false, they are filled with the last known value. - expand_data_to_full_simulation : bool, default True - If true, expands data from simulation day 1 through `simulation_length`. If false, expands only + expand_data_to_observed_range : bool, default False + If false, expands data from simulation day 1 through `simulation_length`. If true, expands only from the first simulation day present in the dataset through the last simulation day present in the dataset. Returns @@ -194,8 +194,8 @@ def expand_data_temporally( all_simulation_days = Utility._gather_data_sim_days(data_to_expand) filtered_simulation_days = sorted(set(all_simulation_days)) - first_day = 1 if expand_data_to_full_simulation else filtered_simulation_days[0] - last_day = simulation_length if expand_data_to_full_simulation else filtered_simulation_days[-1] + first_day = filtered_simulation_days[0] if expand_data_to_observed_range else 1 + last_day = filtered_simulation_days[-1] if expand_data_to_observed_range else simulation_length expanded_data: dict[str, dict[str, list[Any]]] = {} for key, data in data_to_expand.items(): diff --git a/tests/test_util.py b/tests/test_util.py index 2b7e1170e0..b1c61c0700 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -344,7 +344,10 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: @pytest.mark.parametrize( - "data_to_pad,fill_value,gap_pad,end_pad,expected", + ( + "data_to_expand,simulation_length,fill_value,use_fill_value_before_start," + "use_fill_value_in_gaps,use_fill_value_at_end,expand_data_to_observed_range,expected" + ), [ ( { @@ -365,9 +368,12 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: ], }, }, + 6, math.nan, False, + False, True, + False, { "a": { "values": ["a", "a", "a", "b", "c", math.nan], @@ -381,7 +387,7 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: ], }, "b": { - "values": [math.nan, math.nan, "d", "e", "e", "f"], + "values": ["d", "d", "d", "e", "e", "f"], "info_maps": [ {"simulation_day": 1, "units": "g"}, {"simulation_day": 2, "units": "g"}, @@ -412,8 +418,11 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: ], }, }, + 6, math.nan, True, + True, + False, False, { "a": { @@ -448,9 +457,12 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: "info_maps": [{"simulation_day": 3, "units": "pi"}, {"simulation_day": 4, "units": "pi"}], }, }, + 4, None, True, + True, False, + True, { "a": { "values": ["a", "a", "a"], @@ -481,9 +493,12 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: "info_maps": [{"simulation_day": 1, "units": "ha"}, {"simulation_day": 2, "units": "ha"}], }, }, + 2, 8, False, True, + True, + False, { "a": { "values": ["a", "b"], @@ -506,9 +521,12 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: "info_maps": [{"simulation_day": 1, "units": "l"}, {"simulation_day": 3, "units": "l"}], }, }, + 3, "fill", + False, True, False, + False, { "a": { "values": ["a", "fill", "b"], @@ -532,15 +550,18 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: { "a": { "values": ["a", "b"], - "info_maps": [{"simulation_day": 1, "units": "GB"}, {"simulation_day": 3, "units": "GB"}], + "info_maps": [{"simulation_day": 2, "units": "GB"}, {"simulation_day": 3, "units": "GB"}], }, }, + 3, math.pi, True, True, + True, + False, { "a": { - "values": ["a", math.pi, "b"], + "values": [math.pi, "a", "b"], "info_maps": [ {"simulation_day": 1, "units": "GB"}, {"simulation_day": 2, "units": "GB"}, @@ -552,43 +573,150 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: ], ) def test_expand_data_temporally( - data_to_pad: dict[str, dict[str, list[Any]]], + data_to_expand: dict[str, dict[str, list[Any]]], + simulation_length: int, fill_value: Any, - gap_pad: bool, - end_pad: bool, + use_fill_value_before_start: bool, + use_fill_value_in_gaps: bool, + use_fill_value_at_end: bool, + expand_data_to_observed_range: bool, expected: dict[str, dict[str, list[Any]]], ) -> None: - """Tests the utility method expand_data_temporally.""" + """Tests the util method expand_data_temporally().""" actual = Utility.expand_data_temporally( - data_to_pad, fill_value=fill_value, use_fill_value_in_gaps=gap_pad, use_fill_value_at_end=end_pad + data_to_expand=data_to_expand, + simulation_length=simulation_length, + fill_value=fill_value, + use_fill_value_before_start=use_fill_value_before_start, + use_fill_value_in_gaps=use_fill_value_in_gaps, + use_fill_value_at_end=use_fill_value_at_end, + expand_data_to_observed_range=expand_data_to_observed_range, ) assert actual == expected -def test_expand_data_temporally_errors() -> None: - """Tests that errors are correctly raised by expand_data_temporally.""" - empty_data: dict[str, dict[str, list[Any]]] = {} - with pytest.raises(ValueError, match="empty dataset"): - Utility.expand_data_temporally(empty_data) +def test_expand_data_temporally_observed_range_only() -> None: + """Tests observed range case for expand_data_temporally().""" + data_to_expand: dict[str, dict[str, list[Any]]] = { + "a": { + "values": ["x", "y"], + "info_maps": [ + {"simulation_day": 3, "units": "kg"}, + {"simulation_day": 5, "units": "kg"}, + ], + } + } - data_one = {"a": {"values": ["a", "b"]}, "b": {"values": ["c", "d"]}} - with pytest.raises(TypeError, match="no info maps"): - Utility.expand_data_temporally(data_one) + actual = Utility.expand_data_temporally( + data_to_expand=data_to_expand, + simulation_length=10, + fill_value="fill", + use_fill_value_before_start=False, + use_fill_value_in_gaps=False, + use_fill_value_at_end=False, + expand_data_to_observed_range=True, + ) - data_two: dict[str, dict[str, list[Any]]] = { - "a": {"values": ["a", "b"], "info_maps": [{"simulation_day": 1}]}, - "b": {"values": ["c", "d"], "info_maps": [{"simulation_day": 1}, {"simulation_day": 3}]}, + expected = { + "a": { + "values": ["x", "x", "y"], + "info_maps": [ + {"simulation_day": 3, "units": "kg"}, + {"simulation_day": 4, "units": "kg"}, + {"simulation_day": 5, "units": "kg"}, + ], + } } - with pytest.raises(ValueError, match="number of values and info maps"): - Utility.expand_data_temporally(data_two) - data_three: dict[str, dict[str, list[Any]]] = { - "a": {"values": ["a", "b"], "info_maps": [{"simulation_day": 1}, {"foo": "bar"}]}, - "b": {"values": ["c", "d"], "info_maps": [{"simulation_day": 1}, {"simulation_day": 3}]}, - } - with pytest.raises(ValueError, match="simulation day value in every info map"): - Utility.expand_data_temporally(data_three) + assert actual == expected + + +@pytest.mark.parametrize( + "data_to_expand,expected", + [ + ( + { + "a": { + "values": ["a", "b"], + "info_maps": [ + {"simulation_day": 1, "units": "kg"}, + {"simulation_day": 4, "units": "kg"}, + ], + } + }, + [1, 4], + ), + ( + { + "a": { + "values": ["a", "b"], + "info_maps": [ + {"simulation_day": 1, "units": "kg"}, + {"simulation_day": 4, "units": "kg"}, + ], + }, + "b": { + "values": ["c", "d", "e"], + "info_maps": [ + {"simulation_day": 2, "units": "g"}, + {"simulation_day": 3, "units": "g"}, + {"simulation_day": 6, "units": "g"}, + ], + }, + }, + [1, 4, 2, 3, 6], + ), + ( + {}, + [], + ), + ], +) +def test_gather_data_sim_days( + data_to_expand: dict[str, dict[str, list[Any]]], + expected: list[int], +) -> None: + """Tests _gather_data_sim_days returns the expected simulation days.""" + actual = Utility._gather_data_sim_days(data_to_expand) + + assert actual == expected + + +@pytest.mark.parametrize( + "data_to_expand,error_type,error_match", + [ + ( + {"a": {"values": ["a", "b"]}, "b": {"values": ["c", "d"]}}, + TypeError, + "Variable 'a' has no info maps", + ), + ( + { + "a": {"values": ["a", "b"], "info_maps": [{"simulation_day": 1}]}, + "b": {"values": ["c", "d"], "info_maps": [{"simulation_day": 1}, {"simulation_day": 3}]}, + }, + ValueError, + "Variable 'a' does not have matching number of values and info maps", + ), + ( + { + "a": {"values": ["a", "b"], "info_maps": [{"simulation_day": 1}, {"foo": "bar"}]}, + "b": {"values": ["c", "d"], "info_maps": [{"simulation_day": 1}, {"simulation_day": 3}]}, + }, + ValueError, + "Variable 'a' does not have simulation day value in every info map", + ), + ], +) +def test_gather_data_sim_days_errors( + data_to_expand: dict[str, dict[str, list[Any]]], + error_type: type[Exception], + error_match: str, +) -> None: + """Tests _gather_data_sim_days raises the expected errors for invalid input.""" + with pytest.raises(error_type, match=error_match): + Utility._gather_data_sim_days(data_to_expand) def test_deep_merge_dict() -> None: From e8b81fa125b6c801cb7a1fa40fbd613637b6a943 Mon Sep 17 00:00:00 2001 From: Niko <70217952+ew3361zh@users.noreply.github.com> Date: Mon, 16 Mar 2026 17:43:44 -0400 Subject: [PATCH 05/20] changelog --- changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/changelog.md b/changelog.md index ee46440809..f92e548e4a 100644 --- a/changelog.md +++ b/changelog.md @@ -46,6 +46,7 @@ v1.0.0 - [2843](https://github.com/RuminantFarmSystems/MASM/pull/2843) - [minor change] [NoInputChange] [NoOutputChange] Fix Simple `#noqa`s in codebase. - [2852](https://github.com/RuminantFarmSystems/MASM/pull/2852) - [minor change] [NoInputChange] [NoOutputChange] Fix AssertionError on `dev`. - [2863](https://github.com/RuminantFarmSystems/MASM/pull/2863) - [minor change] [NoInputChange] [NoOutputChange] Updates TaskManager to avoid using multiprocessing when running single tasks. +- [2863](https://github.com/RuminantFarmSystems/MASM/pull/2865) - [minor change] [NoInputChange] [NoOutputChange] Updates expand_data_temporally() util function to offer options of full simulation expansion and front-padding data. ### v1.0.0 From 03f2dd15f48b54d8594ddede2de9adf997cd2378 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 16 Mar 2026 21:55:22 +0000 Subject: [PATCH 06/20] Apply Black Formatting --- RUFAS/output_manager.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/RUFAS/output_manager.py b/RUFAS/output_manager.py index fa15976caa..a692f3f870 100644 --- a/RUFAS/output_manager.py +++ b/RUFAS/output_manager.py @@ -1340,8 +1340,7 @@ def filter_variables_pool( use_fill_value_before_start = filter_content.get("use_fill_value_before_start", True) use_fill_value_in_gaps = filter_content.get("use_fill_value_in_gaps", True) use_fill_value_at_end = filter_content.get("use_fill_value_at_end", True) - expand_data_to_observed_range = filter_content.get("expand_data_to_observed_range", - False) + expand_data_to_observed_range = filter_content.get("expand_data_to_observed_range", False) assert self.time is not None simulation_length = self.time.simulation_length_days if filter_content.get("name") == "Feed Expand Full Sim, No Fill": From 146ca080118761221a231b48a62966374053b337 Mon Sep 17 00:00:00 2001 From: Niko <70217952+ew3361zh@users.noreply.github.com> Date: Mon, 16 Mar 2026 18:02:23 -0400 Subject: [PATCH 07/20] fixes changelog --- changelog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index 2341c744a9..b8a35cff22 100644 --- a/changelog.md +++ b/changelog.md @@ -47,7 +47,7 @@ v1.0.0 - [2852](https://github.com/RuminantFarmSystems/MASM/pull/2852) - [minor change] [NoInputChange] [NoOutputChange] Fix AssertionError on `dev`. - [2866](https://github.com/RuminantFarmSystems/MASM/pull/2866) - [minor change] [NoInputChange] [NoOutputChange] Clears all mypy errors in test_field_manager.py. - [2863](https://github.com/RuminantFarmSystems/MASM/pull/2863) - [minor change] [NoInputChange] [NoOutputChange] Updates TaskManager to avoid using multiprocessing when running single tasks. -- [2863](https://github.com/RuminantFarmSystems/MASM/pull/2865) - [minor change] [NoInputChange] [NoOutputChange] Updates expand_data_temporally() util function to offer options of full simulation expansion and front-padding data. +- [2867](https://github.com/RuminantFarmSystems/MASM/pull/2867) - [minor change] [NoInputChange] [NoOutputChange] Updates expand_data_temporally() util function to offer options of full simulation expansion and front-padding data. ### v1.0.0 From 73a5c461ccc417643eaf7fd03ccf95663ea74059 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 16 Mar 2026 22:06:10 +0000 Subject: [PATCH 08/20] Apply Black Formatting From 9699b4ad2950722cefa94fdafb0a36e91532623e Mon Sep 17 00:00:00 2001 From: Niko <70217952+ew3361zh@users.noreply.github.com> Date: Tue, 17 Mar 2026 09:37:16 -0400 Subject: [PATCH 09/20] removes print statement used for testing --- RUFAS/output_manager.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/RUFAS/output_manager.py b/RUFAS/output_manager.py index a692f3f870..a1bd1b8d39 100644 --- a/RUFAS/output_manager.py +++ b/RUFAS/output_manager.py @@ -1343,8 +1343,6 @@ def filter_variables_pool( expand_data_to_observed_range = filter_content.get("expand_data_to_observed_range", False) assert self.time is not None simulation_length = self.time.simulation_length_days - if filter_content.get("name") == "Feed Expand Full Sim, No Fill": - print("pause") try: results = Utility.expand_data_temporally( results, From 95bea4ad7d5a540e1f6cd0e5f3ebea90f4ba615f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 17 Mar 2026 13:39:08 +0000 Subject: [PATCH 10/20] Apply Black Formatting From 830fdee56897fe70e8d28306c7f6142cbad80538 Mon Sep 17 00:00:00 2001 From: Niko <70217952+ew3361zh@users.noreply.github.com> Date: Tue, 17 Mar 2026 14:16:29 -0400 Subject: [PATCH 11/20] addresses allister's fb --- RUFAS/output_manager.py | 7 +++++-- RUFAS/rufas_time.py | 4 ++-- RUFAS/util.py | 4 ++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/RUFAS/output_manager.py b/RUFAS/output_manager.py index a1bd1b8d39..c23541f8cb 100644 --- a/RUFAS/output_manager.py +++ b/RUFAS/output_manager.py @@ -1336,13 +1336,16 @@ def filter_variables_pool( ) if filter_content.get("expand_data", False): + if self.time is None: + raise RuntimeError( + "Cannot expand data because OutputManager's 'time' attribute is not initialized." + ) + simulation_length = self.time.simulation_length_days fill_value = filter_content.get("fill_value", np.nan) use_fill_value_before_start = filter_content.get("use_fill_value_before_start", True) use_fill_value_in_gaps = filter_content.get("use_fill_value_in_gaps", True) use_fill_value_at_end = filter_content.get("use_fill_value_at_end", True) expand_data_to_observed_range = filter_content.get("expand_data_to_observed_range", False) - assert self.time is not None - simulation_length = self.time.simulation_length_days try: results = Utility.expand_data_temporally( results, diff --git a/RUFAS/rufas_time.py b/RUFAS/rufas_time.py index 37fc15f9e8..96ccf949bf 100644 --- a/RUFAS/rufas_time.py +++ b/RUFAS/rufas_time.py @@ -24,7 +24,7 @@ def __init__(self, start_date: datetime = None, end_date: datetime = None, curre self.end_date: datetime = end_date or datetime.strptime(str(config_data["end_date"]), "%Y:%j") self.current_date: datetime = current_date or self.start_date - self.simulation_length_days: int = (self.end_date - self.start_date).days + self.simulation_length_days: int = (self.end_date - self.start_date).days + 1 self.simulation_length_years: int = self.end_date.year - self.start_date.year + 1 def advance(self) -> None: @@ -162,7 +162,7 @@ def convert_slice_to_simulation_day(self, slice_day: int) -> int: if slice_day == 0: return 1 if slice_day < 0: - return self.simulation_length_days + slice_day + 1 + return self.simulation_length_days + slice_day return slice_day def __str__(self) -> str: diff --git a/RUFAS/util.py b/RUFAS/util.py index 32e41574fc..c1a87d5600 100644 --- a/RUFAS/util.py +++ b/RUFAS/util.py @@ -194,8 +194,8 @@ def expand_data_temporally( all_simulation_days = Utility._gather_data_sim_days(data_to_expand) filtered_simulation_days = sorted(set(all_simulation_days)) - first_day = filtered_simulation_days[0] if expand_data_to_observed_range else 1 - last_day = filtered_simulation_days[-1] if expand_data_to_observed_range else simulation_length + first_day = filtered_simulation_days[0] if expand_data_to_observed_range else 0 + last_day = filtered_simulation_days[-1] if expand_data_to_observed_range else simulation_length - 1 expanded_data: dict[str, dict[str, list[Any]]] = {} for key, data in data_to_expand.items(): From e40281991c29509c3e7b30d2b99bf635391be8be Mon Sep 17 00:00:00 2001 From: Niko <70217952+ew3361zh@users.noreply.github.com> Date: Tue, 17 Mar 2026 14:17:05 -0400 Subject: [PATCH 12/20] one final piece of fb addressed --- RUFAS/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RUFAS/util.py b/RUFAS/util.py index c1a87d5600..ccf9ce9f66 100644 --- a/RUFAS/util.py +++ b/RUFAS/util.py @@ -195,7 +195,7 @@ def expand_data_temporally( filtered_simulation_days = sorted(set(all_simulation_days)) first_day = filtered_simulation_days[0] if expand_data_to_observed_range else 0 - last_day = filtered_simulation_days[-1] if expand_data_to_observed_range else simulation_length - 1 + last_day = filtered_simulation_days[-1] if expand_data_to_observed_range else simulation_length expanded_data: dict[str, dict[str, list[Any]]] = {} for key, data in data_to_expand.items(): From 08fb31b21e911e049b64a1a049c74cc111c3ab76 Mon Sep 17 00:00:00 2001 From: Niko <70217952+ew3361zh@users.noreply.github.com> Date: Tue, 17 Mar 2026 17:23:16 -0400 Subject: [PATCH 13/20] updated unit tests --- tests/test_time.py | 6 +++--- tests/test_util.py | 37 ++++++++++++++++++++++++++----------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/tests/test_time.py b/tests/test_time.py index 9578dad694..419406effe 100644 --- a/tests/test_time.py +++ b/tests/test_time.py @@ -26,7 +26,7 @@ def test_time_initialization() -> None: assert time.end_date == datetime(year=2000, month=1, day=1) assert time.current_date == time.start_date - assert time.simulation_length_days == (time.end_date - time.start_date).days + assert time.simulation_length_days == (time.end_date - time.start_date).days + 1 assert time.simulation_day == 0 @@ -211,8 +211,8 @@ def test_convert_year_jday_to_date( [ (0, 100, 1), (5, 100, 5), - (-1, 100, 100), - (-100, 100, 1), + (-1, 100, 99), + (-100, 100, 0), (100, 100, 100), (150, 100, 150), ], diff --git a/tests/test_util.py b/tests/test_util.py index b1c61c0700..c771963a5b 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -376,8 +376,9 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: False, { "a": { - "values": ["a", "a", "a", "b", "c", math.nan], + "values": ["a", "a", "a", "a", "b", "c", math.nan], "info_maps": [ + {"simulation_day": 0, "units": "kg"}, {"simulation_day": 1, "units": "kg"}, {"simulation_day": 2, "units": "kg"}, {"simulation_day": 3, "units": "kg"}, @@ -387,8 +388,9 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: ], }, "b": { - "values": ["d", "d", "d", "e", "e", "f"], + "values": ["d", "d", "d", "d", "e", "e", "f"], "info_maps": [ + {"simulation_day": 0, "units": "g"}, {"simulation_day": 1, "units": "g"}, {"simulation_day": 2, "units": "g"}, {"simulation_day": 3, "units": "g"}, @@ -426,8 +428,9 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: False, { "a": { - "values": ["a", math.nan, math.nan, "b", "c", "c"], + "values": [math.nan, "a", math.nan, math.nan, "b", "c", "c"], "info_maps": [ + {"simulation_day": 0, "units": "kg"}, {"simulation_day": 1, "units": "kg"}, {"simulation_day": 2, "units": "kg"}, {"simulation_day": 3, "units": "kg"}, @@ -437,8 +440,9 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: ], }, "b": { - "values": [math.nan, math.nan, "d", "e", math.nan, "f"], + "values": [math.nan, math.nan, math.nan, "d", "e", math.nan, "f"], "info_maps": [ + {"simulation_day": 0, "units": "g"}, {"simulation_day": 1, "units": "g"}, {"simulation_day": 2, "units": "g"}, {"simulation_day": 3, "units": "g"}, @@ -501,12 +505,20 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: False, { "a": { - "values": ["a", "b"], - "info_maps": [{"simulation_day": 1, "units": "ha"}, {"simulation_day": 2, "units": "ha"}], + "values": ["a", "a", "b"], + "info_maps": [ + {"simulation_day": 0, "units": "ha"}, + {"simulation_day": 1, "units": "ha"}, + {"simulation_day": 2, "units": "ha"} + ], }, "b": { - "values": ["c", "d"], - "info_maps": [{"simulation_day": 1, "units": "ha"}, {"simulation_day": 2, "units": "ha"}], + "values": ["c", "c", "d"], + "info_maps": [ + {"simulation_day": 0, "units": "ha"}, + {"simulation_day": 1, "units": "ha"}, + {"simulation_day": 2, "units": "ha"} + ], }, }, ), @@ -529,16 +541,18 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: False, { "a": { - "values": ["a", "fill", "b"], + "values": ["a", "a", "fill", "b"], "info_maps": [ + {"simulation_day": 0, "units": "ha^2"}, {"simulation_day": 1, "units": "ha^2"}, {"simulation_day": 2, "units": "ha^2"}, {"simulation_day": 3, "units": "ha^2"}, ], }, "b": { - "values": ["c", "fill", "d"], + "values": ["c", "c", "fill", "d"], "info_maps": [ + {"simulation_day": 0, "units": "l"}, {"simulation_day": 1, "units": "l"}, {"simulation_day": 2, "units": "l"}, {"simulation_day": 3, "units": "l"}, @@ -561,8 +575,9 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: False, { "a": { - "values": [math.pi, "a", "b"], + "values": [math.pi, math.pi, "a", "b"], "info_maps": [ + {"simulation_day": 0, "units": "GB"}, {"simulation_day": 1, "units": "GB"}, {"simulation_day": 2, "units": "GB"}, {"simulation_day": 3, "units": "GB"}, From 0005dbec07992b11da3f5562da555f75607d6704 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 17 Mar 2026 21:25:09 +0000 Subject: [PATCH 14/20] Apply Black Formatting --- RUFAS/output_manager.py | 4 +--- tests/test_util.py | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/RUFAS/output_manager.py b/RUFAS/output_manager.py index c23541f8cb..cbfd344f86 100644 --- a/RUFAS/output_manager.py +++ b/RUFAS/output_manager.py @@ -1337,9 +1337,7 @@ def filter_variables_pool( if filter_content.get("expand_data", False): if self.time is None: - raise RuntimeError( - "Cannot expand data because OutputManager's 'time' attribute is not initialized." - ) + raise RuntimeError("Cannot expand data because OutputManager's 'time' attribute is not initialized.") simulation_length = self.time.simulation_length_days fill_value = filter_content.get("fill_value", np.nan) use_fill_value_before_start = filter_content.get("use_fill_value_before_start", True) diff --git a/tests/test_util.py b/tests/test_util.py index c771963a5b..eb4ec48df2 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -509,7 +509,7 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: "info_maps": [ {"simulation_day": 0, "units": "ha"}, {"simulation_day": 1, "units": "ha"}, - {"simulation_day": 2, "units": "ha"} + {"simulation_day": 2, "units": "ha"}, ], }, "b": { @@ -517,7 +517,7 @@ def test_flatten_keys_to_nested_structure_dict_w_list() -> None: "info_maps": [ {"simulation_day": 0, "units": "ha"}, {"simulation_day": 1, "units": "ha"}, - {"simulation_day": 2, "units": "ha"} + {"simulation_day": 2, "units": "ha"}, ], }, }, From 72e05caa854d568e3061a4cd61625765dc938e98 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 19 Mar 2026 13:40:21 +0000 Subject: [PATCH 15/20] Apply Black Formatting From 68010e7b1c93ad726670661498e519b79ed1a342 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 25 Mar 2026 01:56:20 +0000 Subject: [PATCH 16/20] Apply Black Formatting From ba793d3b6d4523b08ffaa2b172b14e885811be1b Mon Sep 17 00:00:00 2001 From: Niko <70217952+ew3361zh@users.noreply.github.com> Date: Tue, 17 Mar 2026 18:13:34 -0400 Subject: [PATCH 17/20] addresses final piece of fb from allister --- RUFAS/util.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/RUFAS/util.py b/RUFAS/util.py index ccf9ce9f66..b17e651f64 100644 --- a/RUFAS/util.py +++ b/RUFAS/util.py @@ -242,8 +242,11 @@ def expand_data_temporally( else: value_to_add = fill_value if use_fill_value_at_end else last_known_value + info_map_to_add = last_known_info_map.copy() + info_map_to_add["simulation_day"] = day + expanded_variable_data["values"].append(value_to_add) - expanded_variable_data["info_maps"].append({"simulation_day": day, "units": original_units}) + expanded_variable_data["info_maps"].append(info_map_to_add) expanded_data[key] = expanded_variable_data From 23ddcf6c7ccdc0cbb6dc4f39493956ed91baedd8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 25 Mar 2026 11:48:50 +0000 Subject: [PATCH 18/20] Apply Black Formatting From 13faccde506608dbb33f019f8c8f680c20abfc06 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 25 Mar 2026 13:09:53 +0000 Subject: [PATCH 19/20] Apply Black Formatting From 7ae954ad47900a479885ba9d697dacbfc7354919 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 25 Mar 2026 20:19:15 +0000 Subject: [PATCH 20/20] Apply Black Formatting