diff --git a/src/supy/__init__.py b/src/supy/__init__.py index 1207a10ae..c50234097 100644 --- a/src/supy/__init__.py +++ b/src/supy/__init__.py @@ -55,6 +55,8 @@ "ValidationResult", # Modern interface "SUEWSSimulation", + "SUEWSForcing", + "SUEWSOutput", # Version "show_version", "__version__", @@ -153,6 +155,24 @@ def __getattr__(name): except ImportError: return None + if name == "SUEWSForcing": + try: + from .suews_forcing import SUEWSForcing + + _lazy_cache[name] = SUEWSForcing + return _lazy_cache[name] + except ImportError: + return None + + if name == "SUEWSOutput": + try: + from .suews_output import SUEWSOutput + + _lazy_cache[name] = SUEWSOutput + return _lazy_cache[name] + except ImportError: + return None + # Version info if name == "show_version": from ._version import show_version diff --git a/src/supy/meson.build b/src/supy/meson.build index 82d3849c9..6d464220e 100644 --- a/src/supy/meson.build +++ b/src/supy/meson.build @@ -12,6 +12,8 @@ py.install_sources( '_supy_module.py', '_version.py', '_version_scm.py', + 'suews_forcing.py', + 'suews_output.py', 'suews_sim.py', 'code2file.json', 'checker_rules_indiv.json', diff --git a/src/supy/suews_forcing.py b/src/supy/suews_forcing.py new file mode 100644 index 000000000..9c5a0b95c --- /dev/null +++ b/src/supy/suews_forcing.py @@ -0,0 +1,839 @@ +""" +SUEWSForcing - OOP wrapper for SUEWS meteorological forcing data. + +Provides a structured interface for loading, validating, and analysing +meteorological forcing data for SUEWS simulations. +""" + +import warnings +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union + +import numpy as np +import pandas as pd + + +# Variable aliases for more intuitive access +FORCING_ALIASES = { + # Technical name -> Human-readable aliases + "Tair": ["temperature", "air_temperature", "temp", "t_air"], + "RH": ["relative_humidity", "humidity", "rh"], + "pres": ["pressure", "air_pressure", "p"], + "U": ["wind_speed", "wind", "u"], + "kdown": ["shortwave_down", "solar_radiation", "sw_down", "k_down"], + "ldown": ["longwave_down", "lw_down", "l_down"], + "rain": ["precipitation", "rainfall", "precip"], + "fcld": ["cloud_fraction", "cloud_cover", "clouds"], + "xsmd": ["soil_moisture", "smd"], + "qn": ["net_radiation", "qstar", "q_star"], + "qh": ["sensible_heat", "h"], + "qe": ["latent_heat", "le"], + "qf": ["anthropogenic_heat", "qf_obs"], + "qs": ["storage_heat"], + "snow": ["snowfall"], + "Wuh": ["water_use", "external_water"], + "lai": ["leaf_area_index"], + "kdiff": ["diffuse_radiation", "diffuse"], + "kdir": ["direct_radiation", "direct"], + "wdir": ["wind_direction"], +} + +# Build reverse mapping: alias -> canonical name +_ALIAS_TO_CANONICAL = {} +for canonical, aliases in FORCING_ALIASES.items(): + for alias in aliases: + _ALIAS_TO_CANONICAL[alias.lower()] = canonical + _ALIAS_TO_CANONICAL[canonical.lower()] = canonical + + +# Variable types for resampling (from _load.py) +FORCING_VAR_TYPES = { + "iy": "time", + "id": "time", + "it": "time", + "imin": "time", + "qn": "avg", + "qh": "avg", + "qe": "avg", + "qs": "avg", + "qf": "avg", + "U": "inst", + "RH": "inst", + "Tair": "inst", + "pres": "inst", + "rain": "sum", + "kdown": "avg", + "snow": "inst", + "ldown": "avg", + "fcld": "inst", + "Wuh": "sum", + "xsmd": "inst", + "lai": "inst", + "kdiff": "avg", + "kdir": "avg", + "wdir": "inst", +} + +# Required columns for SUEWS forcing +REQUIRED_COLUMNS = [ + "iy", + "id", + "it", + "imin", + "qn", + "qh", + "qe", + "qs", + "qf", + "U", + "RH", + "Tair", + "pres", + "rain", + "kdown", + "snow", + "ldown", + "fcld", + "Wuh", + "xsmd", + "lai", + "kdiff", + "kdir", + "wdir", +] + + +@dataclass +class ValidationResult: + """Container for forcing validation results.""" + + is_valid: bool + errors: List[str] + warnings: List[str] + + def raise_if_invalid(self): + """Raise ValueError if forcing is invalid.""" + if not self.is_valid: + error_msg = "; ".join(self.errors) + raise ValueError(f"Invalid forcing data: {error_msg}") + + def __repr__(self) -> str: + status = "Valid" if self.is_valid else "Invalid" + n_errors = len(self.errors) + n_warnings = len(self.warnings) + return f"ValidationResult({status}, {n_errors} errors, {n_warnings} warnings)" + + +class SUEWSForcing: + """ + Wrapper for meteorological forcing data with convenience functions. + + Provides intuitive access to forcing variables, validation, analysis, + and manipulation methods for SUEWS meteorological input data. + + Parameters + ---------- + data : pd.DataFrame + Forcing DataFrame with DatetimeIndex + source : str, optional + Description of data source (e.g., file path) + + Examples + -------- + Load from file: + + >>> forcing = SUEWSForcing.from_file("forcing_2023.txt") + >>> forcing + SUEWSForcing(2023-01-01 00:00 to 2023-12-31 23:00, 8760 timesteps @ 3600s) + + Access variables with intuitive names: + + >>> forcing.temperature # Same as forcing.Tair + >>> forcing.wind_speed # Same as forcing.U + + Validate forcing data: + + >>> result = forcing.validate() + >>> if not result.is_valid: + ... print(result.errors) + + Analyse data: + + >>> forcing.summary() # Statistical summary + >>> forcing.diurnal_pattern() # Mean diurnal cycle + + Manipulate data: + + >>> forcing_2023 = forcing.slice("2023-01-01", "2023-12-31") + >>> forcing_hourly = forcing.resample("1H") + """ + + def __init__(self, data: pd.DataFrame, source: Optional[str] = None): + """ + Initialise SUEWSForcing with validated DataFrame. + + Parameters + ---------- + data : pd.DataFrame + Forcing DataFrame with DatetimeIndex + source : str, optional + Description of data source (e.g., file path) + """ + self._data = data.copy() + self._source = source + self._validation_result: Optional[ValidationResult] = None + + # ========================================================================= + # Construction methods + # ========================================================================= + + @classmethod + def from_file(cls, path: Union[str, Path], tstep_mod: int = 300) -> "SUEWSForcing": + """ + Load forcing from a single file. + + Parameters + ---------- + path : str or Path + Path to forcing file + tstep_mod : int, optional + Model timestep in seconds (default 300s = 5 min) + + Returns + ------- + SUEWSForcing + Loaded forcing data + """ + from .util._io import _read_forcing_impl + + path = Path(path).expanduser().resolve() + if not path.exists(): + raise FileNotFoundError(f"Forcing file not found: {path}") + + df = _read_forcing_impl(str(path), tstep_mod=tstep_mod) + return cls(df, source=str(path)) + + @classmethod + def from_files( + cls, paths: List[Union[str, Path]], tstep_mod: int = 300 + ) -> "SUEWSForcing": + """ + Load and concatenate forcing from multiple files. + + Parameters + ---------- + paths : list of str or Path + Paths to forcing files (concatenated in order) + tstep_mod : int, optional + Model timestep in seconds (default 300s = 5 min) + + Returns + ------- + SUEWSForcing + Concatenated forcing data + """ + from .util._io import _read_forcing_impl + + if not paths: + raise ValueError("Empty forcing file list provided") + + dfs = [] + for p in paths: + path = Path(p).expanduser().resolve() + if not path.exists(): + raise FileNotFoundError(f"Forcing file not found: {path}") + df = _read_forcing_impl(str(path), tstep_mod=tstep_mod) + dfs.append(df) + + combined = pd.concat(dfs, axis=0).sort_index() + # Remove any duplicates + combined = combined[~combined.index.duplicated(keep="first")] + return cls(combined, source=f"[{len(paths)} files]") + + @classmethod + def from_dataframe(cls, df: pd.DataFrame) -> "SUEWSForcing": + """ + Create from existing DataFrame. + + Parameters + ---------- + df : pd.DataFrame + DataFrame with forcing data and DatetimeIndex + + Returns + ------- + SUEWSForcing + Wrapped forcing data + """ + return cls(df, source="DataFrame") + + # ========================================================================= + # Core data access + # ========================================================================= + + @property + def df(self) -> pd.DataFrame: + """Access underlying DataFrame.""" + return self._data.copy() + + @property + def index(self) -> pd.DatetimeIndex: + """Datetime index of forcing data (pandas-compatible).""" + return self._data.index + + @property + def loc(self): + """Label-based indexer (pandas-compatible).""" + return self._data.loc + + @property + def iloc(self): + """Integer-based indexer (pandas-compatible).""" + return self._data.iloc + + @property + def times(self) -> pd.DatetimeIndex: + """Datetime index of forcing data. + + .. deprecated:: + Use :attr:`index` instead for pandas-compatible access. + """ + warnings.warn( + "SUEWSForcing.times is deprecated, use .index instead", + DeprecationWarning, + stacklevel=2, + ) + return self._data.index + + @property + def time_range(self) -> Tuple[pd.Timestamp, pd.Timestamp]: + """Return (start, end) timestamps.""" + return (self._data.index[0], self._data.index[-1]) + + @property + def timestep(self) -> pd.Timedelta: + """Timestep of forcing data.""" + freq = self._data.index.freq + if freq is not None: + return pd.Timedelta(freq) + # Calculate from data + if len(self._data) > 1: + diff = self._data.index.to_series().diff().iloc[-1] + return diff + return pd.Timedelta("5min") # Default + + @property + def timestep_seconds(self) -> int: + """Timestep in seconds.""" + return int(self.timestep.total_seconds()) + + @property + def n_timesteps(self) -> int: + """Number of timesteps. + + .. deprecated:: + Use ``len(forcing)`` instead. + """ + warnings.warn( + "SUEWSForcing.n_timesteps is deprecated, use len(forcing) instead", + DeprecationWarning, + stacklevel=2, + ) + return len(self._data) + + @property + def columns(self) -> pd.Index: + """Column names in the forcing data.""" + return self._data.columns + + @property + def source(self) -> Optional[str]: + """Data source description.""" + return self._source + + def __getattr__(self, name: str) -> pd.Series: + """ + Dynamic attribute access for variables with alias support. + + Allows access like `forcing.temperature` instead of `forcing['Tair']`. + """ + # Check if it's a known alias + canonical = _ALIAS_TO_CANONICAL.get(name.lower()) + if canonical is not None and canonical in self._data.columns: + return self._data[canonical] + + # Check if it's a direct column name + if name in self._data.columns: + return self._data[name] + + raise AttributeError( + f"'{type(self).__name__}' has no attribute '{name}'. " + f"Available columns: {list(self._data.columns)}" + ) + + def __getitem__(self, key: str) -> pd.Series: + """Access variables by column name.""" + # Support alias lookup + canonical = _ALIAS_TO_CANONICAL.get(key.lower()) + if canonical is not None and canonical in self._data.columns: + return self._data[canonical] + return self._data[key] + + # ========================================================================= + # Validation + # ========================================================================= + + def validate( + self, + physics: Optional[Any] = None, + raise_on_error: bool = False, + ) -> ValidationResult: + """ + Validate forcing data comprehensively. + + Checks: + 1. Required columns present + 2. Temporal index validity (DatetimeIndex, monotonic, no duplicates) + 3. Physical range validation for each variable + 4. Physics-specific requirements (e.g., if netradiationmethod=0, qn required) + + Parameters + ---------- + physics : ModelPhysics, optional + Model physics configuration for physics-aware validation + raise_on_error : bool + If True, raise ValueError on any validation failure + + Returns + ------- + ValidationResult + Validation results with errors and warnings + """ + from ._check import check_forcing + + errors = [] + warnings = [] + + # Use existing check_forcing logic + physics_dict = None + if physics is not None: + if hasattr(physics, "model_dump"): + physics_dict = physics.model_dump() + elif isinstance(physics, dict): + physics_dict = physics + + result = check_forcing(self._data, fix=False, physics=physics_dict) + + if isinstance(result, list): + errors.extend(result) + + self._validation_result = ValidationResult( + is_valid=len(errors) == 0, + errors=errors, + warnings=warnings, + ) + + if raise_on_error: + self._validation_result.raise_if_invalid() + + return self._validation_result + + # ========================================================================= + # Analysis (convenience functions) + # ========================================================================= + + def summary(self) -> pd.DataFrame: + """ + Statistical summary of all forcing variables. + + Returns + ------- + pd.DataFrame + Summary statistics (count, mean, std, min, max, etc.) + """ + # Exclude time columns + time_cols = ["iy", "id", "it", "imin", "isec"] + data_cols = [c for c in self._data.columns if c not in time_cols] + return self._data[data_cols].describe() + + def diurnal_pattern(self, var: Optional[str] = None) -> pd.DataFrame: + """ + Calculate mean diurnal patterns. + + Parameters + ---------- + var : str, optional + Specific variable to analyse. If None, analyses all. + + Returns + ------- + pd.DataFrame + Hourly mean values grouped by hour of day + """ + df = self._data.copy() + df["hour"] = df.index.hour + + # Select columns + time_cols = ["iy", "id", "it", "imin", "isec", "hour"] + if var is not None: + canonical = _ALIAS_TO_CANONICAL.get(var.lower(), var) + cols = [canonical] if canonical in df.columns else [var] + else: + cols = [c for c in df.columns if c not in time_cols] + + return df.groupby("hour")[cols].mean() + + def check_gaps(self) -> pd.DataFrame: + """ + Report gaps in forcing data. + + Returns + ------- + pd.DataFrame + DataFrame with gap information (start, end, duration) + """ + idx = self._data.index + expected_freq = self.timestep + + # Find gaps + gaps = [] + for i in range(1, len(idx)): + actual_diff = idx[i] - idx[i - 1] + if actual_diff > expected_freq: + gaps.append({ + "gap_start": idx[i - 1], + "gap_end": idx[i], + "duration": actual_diff, + "missing_steps": int(actual_diff / expected_freq) - 1, + }) + + if gaps: + return pd.DataFrame(gaps) + else: + return pd.DataFrame( + columns=["gap_start", "gap_end", "duration", "missing_steps"] + ) + + def completeness(self) -> Dict[str, float]: + """ + Calculate data completeness for each variable. + + Returns + ------- + dict + Mapping variable names to completeness percentage (0-100) + """ + time_cols = ["iy", "id", "it", "imin", "isec"] + data_cols = [c for c in self._data.columns if c not in time_cols] + + result = {} + for col in data_cols: + # Count non-null and non-missing (-999) values + valid = self._data[col].replace(-999, np.nan).notna().sum() + total = len(self._data) + result[col] = 100.0 * valid / total if total > 0 else 0.0 + + return result + + # ========================================================================= + # Manipulation + # ========================================================================= + + def slice( + self, + start: Optional[str] = None, + end: Optional[str] = None, + ) -> "SUEWSForcing": + """ + Return a new SUEWSForcing for a time slice. + + Parameters + ---------- + start : str, optional + Start date/time (inclusive) + end : str, optional + End date/time (inclusive) + + Returns + ------- + SUEWSForcing + New forcing object with sliced data + """ + sliced_data = self._data.loc[start:end] + return SUEWSForcing(sliced_data, source=f"{self._source}[{start}:{end}]") + + def resample(self, freq: str) -> "SUEWSForcing": + """ + Resample to different temporal resolution. + + Uses appropriate aggregation methods for each variable type: + - Instantaneous variables: last value + - Average variables: mean + - Sum variables: sum + + Parameters + ---------- + freq : str + Target frequency (e.g., "1H", "30min") + + Returns + ------- + SUEWSForcing + New forcing object at resampled frequency + """ + resampled = self._data.copy() + + # Build aggregation dict based on variable types + agg_dict = {} + for col in resampled.columns: + var_type = FORCING_VAR_TYPES.get(col, "inst") + if var_type == "time": + agg_dict[col] = "last" + elif var_type == "avg": + agg_dict[col] = "mean" + elif var_type == "sum": + agg_dict[col] = "sum" + else: # inst + agg_dict[col] = "last" + + resampled = resampled.resample(freq, closed="right", label="right").agg( + agg_dict + ) + + return SUEWSForcing(resampled, source=f"{self._source}@{freq}") + + def fill_gaps(self, method: str = "interpolate", **kwargs) -> "SUEWSForcing": + """ + Fill missing values in forcing data. + + Parameters + ---------- + method : str + Fill method: "interpolate", "ffill", "bfill" + **kwargs + Additional arguments passed to the fill method + + Returns + ------- + SUEWSForcing + New forcing object with gaps filled + """ + filled = self._data.copy() + + # Replace -999 with NaN for filling + filled = filled.replace(-999, np.nan) + + if method == "interpolate": + filled = filled.interpolate(**kwargs) + elif method == "ffill": + filled = filled.ffill(**kwargs) + elif method == "bfill": + filled = filled.bfill(**kwargs) + else: + raise ValueError(f"Unknown fill method: {method}") + + return SUEWSForcing(filled, source=f"{self._source}[filled]") + + # ========================================================================= + # Plotting + # ========================================================================= + + def plot( + self, + var: Optional[str] = None, + ax=None, + **kwargs, + ): + """ + Quick timeseries plot. + + Parameters + ---------- + var : str, optional + Variable to plot. If None, plots temperature. + ax : matplotlib.axes.Axes, optional + Axes to plot on + **kwargs + Additional arguments passed to plot + + Returns + ------- + tuple + (fig, ax) matplotlib objects + """ + import matplotlib.pyplot as plt + + if ax is None: + fig, ax = plt.subplots(figsize=(12, 4)) + else: + fig = ax.get_figure() + + # Default to temperature + if var is None: + var = "Tair" + + # Resolve alias + canonical = _ALIAS_TO_CANONICAL.get(var.lower(), var) + if canonical not in self._data.columns: + raise ValueError(f"Variable '{var}' not found in forcing data") + + self._data[canonical].plot(ax=ax, **kwargs) + ax.set_ylabel(canonical) + ax.set_xlabel("Time") + ax.set_title(f"Forcing: {canonical}") + + return fig, ax + + def plot_availability(self, ax=None, **kwargs): + """ + Plot data availability heatmap. + + Shows which variables have data at which times. + + Parameters + ---------- + ax : matplotlib.axes.Axes, optional + Axes to plot on + **kwargs + Additional arguments passed to imshow + + Returns + ------- + tuple + (fig, ax) matplotlib objects + """ + import matplotlib.pyplot as plt + + if ax is None: + fig, ax = plt.subplots(figsize=(14, 6)) + else: + fig = ax.get_figure() + + # Exclude time columns + time_cols = ["iy", "id", "it", "imin", "isec"] + data_cols = [c for c in self._data.columns if c not in time_cols] + + # Create availability matrix (1 = valid, 0 = missing) + avail = self._data[data_cols].replace(-999, np.nan).notna().astype(int) + + # Downsample for visualisation if too many timesteps + if len(avail) > 1000: + avail = avail.resample("1D").mean() + + ax.imshow(avail.T, aspect="auto", cmap="RdYlGn", **kwargs) + ax.set_yticks(range(len(data_cols))) + ax.set_yticklabels(data_cols) + ax.set_xlabel("Time") + ax.set_title("Data Availability (green=available, red=missing)") + + return fig, ax + + # ========================================================================= + # Export + # ========================================================================= + + def to_dataframe(self) -> pd.DataFrame: + """Return copy of underlying DataFrame. + + .. deprecated:: + Use :attr:`df` property instead. + """ + warnings.warn( + "SUEWSForcing.to_dataframe() is deprecated, use .df instead", + DeprecationWarning, + stacklevel=2, + ) + return self._data.copy() + + def to_suews_format(self, path: Union[str, Path]) -> Path: + """ + Export in SUEWS native text format. + + Parameters + ---------- + path : str or Path + Output file path + + Returns + ------- + Path + Path to saved file + """ + path = Path(path) + + # Generate datetime columns + df_save = self._data.copy() + + # Format and save + df_save.to_csv(path, sep="\t", index=True) + + return path + + def to_csv(self, path: Union[str, Path], **kwargs) -> Path: + """ + Export to CSV format. + + Parameters + ---------- + path : str or Path + Output file path + **kwargs + Additional arguments passed to to_csv + + Returns + ------- + Path + Path to saved file + """ + path = Path(path) + self._data.to_csv(path, **kwargs) + return path + + # ========================================================================= + # Rich display + # ========================================================================= + + def __repr__(self) -> str: + """Concise representation of forcing data.""" + start, end = self.time_range + n_steps = self.n_timesteps + tstep = self.timestep_seconds + + # Count available variables (excluding time columns) + time_cols = ["iy", "id", "it", "imin", "isec"] + n_vars = len([c for c in self._data.columns if c not in time_cols]) + + return ( + f"SUEWSForcing({start} to {end}, " + f"{n_steps} timesteps @ {tstep}s, {n_vars} variables)" + ) + + def _repr_html_(self) -> str: + """HTML representation for Jupyter notebooks.""" + start, end = self.time_range + n_steps = self.n_timesteps + tstep = self.timestep_seconds + + # Get completeness info + completeness = self.completeness() + complete_vars = sum(1 for v in completeness.values() if v > 99) + total_vars = len(completeness) + + html = f""" +
+

SUEWSForcing

+ + + + + + +
Time range:{start} to {end}
Timestep:{tstep} seconds
Timesteps:{n_steps}
Variables:{complete_vars}/{total_vars} complete
Source:{self._source or "Unknown"}
+
+ """ + return html + + def __len__(self) -> int: + """Number of timesteps.""" + return len(self._data) diff --git a/src/supy/suews_output.py b/src/supy/suews_output.py new file mode 100644 index 000000000..0f603f325 --- /dev/null +++ b/src/supy/suews_output.py @@ -0,0 +1,881 @@ +""" +SUEWSOutput - OOP wrapper for SUEWS simulation results. + +Provides a structured interface for accessing, analysing, and exporting +SUEWS model output data. +""" + +import warnings +from pathlib import Path +from typing import Any, Dict, List, Optional, Union + +import numpy as np +import pandas as pd + + +class SUEWSOutput: + """ + Wrapper for SUEWS simulation results with analysis convenience functions. + + Provides intuitive access to output variables, analysis methods, + and export capabilities for SUEWS simulation results. + + Parameters + ---------- + df_output : pd.DataFrame + Output DataFrame with MultiIndex columns (group, var) + df_state_final : pd.DataFrame + Final model state DataFrame + config : SUEWSConfig, optional + Configuration used for this run + metadata : dict, optional + Additional metadata (timing, version, etc.) + + Examples + -------- + Run simulation and access output: + + >>> sim = SUEWSSimulation.from_sample_data() + >>> output = sim.run() # Returns SUEWSOutput + >>> output + SUEWSOutput(2012-01-01 to 2012-12-31, 1 grid(s), 6 groups) + + Access variables directly: + + >>> output.QH # Sensible heat flux + >>> output.QE # Latent heat flux + >>> output.get_variable("Tair", group="SUEWS") + + Access by group: + + >>> output.groups # ['SUEWS', 'DailyState', 'snow', ...] + >>> output.get_group("SUEWS") + + Analysis methods: + + >>> output.energy_balance_closure() + >>> output.diurnal_average("QH") + >>> output.monthly_summary() + + Export: + + >>> output.save("output/", format="parquet") + >>> output.to_netcdf("results.nc") + + Restart runs: + + >>> initial_state = output.to_initial_state() + >>> sim2 = SUEWSSimulation.from_state(output.state_final) + """ + + def __init__( + self, + df_output: pd.DataFrame, + df_state_final: pd.DataFrame, + config: Optional[Any] = None, + metadata: Optional[Dict[str, Any]] = None, + ): + """ + Initialise SUEWSOutput. + + Parameters + ---------- + df_output : pd.DataFrame + Output DataFrame with MultiIndex columns (group, var) + df_state_final : pd.DataFrame + Final model state DataFrame + config : SUEWSConfig, optional + Configuration used for this run (for save context) + metadata : dict, optional + Additional metadata (timing, version, etc.) + """ + self._df_output = df_output.copy() + self._df_state_final = df_state_final.copy() + self._config = config + self._metadata = metadata or {} + + # ========================================================================= + # Core data access + # ========================================================================= + + @property + def df(self) -> pd.DataFrame: + """Access underlying output DataFrame.""" + return self._df_output.copy() + + @property + def empty(self) -> bool: + """Check if output is empty (pandas-compatible).""" + return self._df_output.empty + + @property + def columns(self) -> pd.Index: + """Column index of output DataFrame (pandas-compatible).""" + return self._df_output.columns + + @property + def state_final(self) -> pd.DataFrame: + """ + Final model state for restart runs. + + Use with SUEWSSimulation.from_state() to continue simulations. + + Returns + ------- + pd.DataFrame + Final state DataFrame ready for restart + """ + return self._df_state_final.copy() + + @property + def times(self) -> pd.DatetimeIndex: + """Datetime index of output.""" + return self._df_output.index.get_level_values("datetime").unique() + + @property + def time_range(self) -> tuple: + """Return (start, end) timestamps.""" + times = self.times + return (times.min(), times.max()) + + @property + def grids(self) -> List: + """List of grid identifiers.""" + return self._df_output.index.get_level_values("grid").unique().tolist() + + @property + def n_grids(self) -> int: + """Number of grids. + + .. deprecated:: + Use ``len(output.grids)`` instead. + """ + warnings.warn( + "SUEWSOutput.n_grids is deprecated, use len(output.grids) instead", + DeprecationWarning, + stacklevel=2, + ) + return len(self.grids) + + @property + def n_timesteps(self) -> int: + """Number of output timesteps. + + .. deprecated:: + Use ``len(output.times)`` instead. + """ + warnings.warn( + "SUEWSOutput.n_timesteps is deprecated, use len(output.times) instead", + DeprecationWarning, + stacklevel=2, + ) + return len(self.times) + + @property + def groups(self) -> List[str]: + """List of available output groups.""" + return self._df_output.columns.get_level_values("group").unique().tolist() + + @property + def config(self) -> Optional[Any]: + """Configuration used for this run.""" + return self._config + + @property + def metadata(self) -> Dict[str, Any]: + """Metadata about the simulation.""" + return self._metadata.copy() + + @property + def loc(self): + """Label-based indexer (pandas-compatible).""" + return self._df_output.loc + + @property + def iloc(self): + """Integer-based indexer (pandas-compatible).""" + return self._df_output.iloc + + def xs(self, key, axis=0, level=None, drop_level=True): + """Cross-section from MultiIndex DataFrame (pandas-compatible).""" + return self._df_output.xs(key, axis=axis, level=level, drop_level=drop_level) + + def __getattr__(self, name: str) -> pd.DataFrame: + """ + Dynamic attribute access for output variables and groups. + + Allows access like `output.QH` instead of `output.get_variable('QH')`, + and `output.SUEWS` instead of `output.get_group('SUEWS')`. + """ + # Check if it's a group name first + all_groups = self._df_output.columns.get_level_values("group").unique() + if name in all_groups: + return self.get_group(name) + + # Check if it's a variable name in any group + all_vars = self._df_output.columns.get_level_values("var").unique() + if name in all_vars: + return self.get_variable(name) + + # Check case-insensitively for variables + name_lower = name.lower() + for var in all_vars: + if var.lower() == name_lower: + return self.get_variable(var) + + raise AttributeError( + f"'{type(self).__name__}' has no attribute '{name}'. " + f"Available groups: {list(all_groups)}, " + f"Available variables (first 10): {list(all_vars[:10])}..." + ) + + def __getitem__(self, key: str) -> pd.DataFrame: + """Access variables or groups by name.""" + # Check if it's a group name + if key in self.groups: + return self.get_group(key) + # Otherwise treat as variable + return self.get_variable(key) + + # ========================================================================= + # Group access + # ========================================================================= + + def get_group(self, group: str) -> pd.DataFrame: + """ + Get all variables for an output group. + + Parameters + ---------- + group : str + Output group name (e.g., 'SUEWS', 'DailyState', 'snow') + + Returns + ------- + pd.DataFrame + All variables in the group + """ + if group not in self.groups: + raise ValueError( + f"Group '{group}' not found. Available groups: {self.groups}" + ) + return self._df_output[group].copy() + + def get_variable( + self, + var_name: str, + group: Optional[str] = None, + grid: Optional[Union[int, str]] = None, + ) -> pd.DataFrame: + """ + Extract specific variable from output. + + Parameters + ---------- + var_name : str + Variable name (e.g., 'QH', 'QE', 'Tair') + group : str, optional + Output group if ambiguous + grid : int or str, optional + Grid filter + + Returns + ------- + pd.DataFrame + Time series of requested variable + """ + # Check if variable exists + all_vars = self._df_output.columns.get_level_values("var").unique() + if var_name not in all_vars: + raise ValueError( + f"Variable '{var_name}' not found. " + f"Available variables (first 10): {list(all_vars[:10])}..." + ) + + # Find which groups contain this variable + matching_groups = [] + for grp in self.groups: + try: + _ = self._df_output.xs((grp, var_name), level=("group", "var"), axis=1) + matching_groups.append(grp) + except KeyError: + continue + + if len(matching_groups) == 0: + raise ValueError(f"Variable '{var_name}' not found in any group") + + if len(matching_groups) > 1 and group is None: + raise ValueError( + f"Variable '{var_name}' appears in multiple groups: " + f"{matching_groups}. Specify group parameter." + ) + + target_group = group or matching_groups[0] + if group is not None and group not in matching_groups: + raise ValueError( + f"Variable '{var_name}' not found in group '{group}'. " + f"Available groups for this variable: {matching_groups}" + ) + + result = self._df_output.xs( + (target_group, var_name), level=("group", "var"), axis=1 + ) + + # Filter by grid if requested + if grid is not None: + if isinstance(grid, str): + result = result.xs(grid, level="grid") + else: + result = result.iloc[:, grid] + + return result + + @property + def available_variables(self) -> Dict[str, List[str]]: + """Dict mapping groups to their variables.""" + result = {} + for group in self.groups: + try: + vars_in_group = ( + self._df_output[group] + .columns.get_level_values("var") + .unique() + .tolist() + ) + result[group] = vars_in_group + except KeyError: + result[group] = [] + return result + + # ========================================================================= + # State for restart + # ========================================================================= + + def to_initial_state(self) -> pd.DataFrame: + """ + Extract the final timestep as initial state for next simulation. + + This method handles the conversion from multi-timestep state + to single-timestep initial state format. + + Returns + ------- + pd.DataFrame + Initial state ready for next simulation + """ + df = self._df_state_final + idx_names = list(df.index.names) + + if "datetime" in idx_names: + datetime_level = idx_names.index("datetime") + last_datetime = df.index.get_level_values(datetime_level).max() + if isinstance(df.index, pd.MultiIndex): + return df.xs(last_datetime, level="datetime").copy() + else: + return df.loc[[last_datetime]].copy() + return df.copy() + + # ========================================================================= + # Analysis (convenience functions) + # ========================================================================= + + def energy_balance_closure(self, method: str = "residual") -> pd.DataFrame: + """ + Calculate energy balance closure statistics. + + Energy balance: QN = QH + QE + QS (+ QF + dQs) + + Parameters + ---------- + method : str + Calculation method: + - "residual": Calculate QN - (QH + QE + QS) + - "ratio": Calculate (QH + QE + QS) / QN + + Returns + ------- + pd.DataFrame + Energy balance closure statistics + """ + try: + qn = self.get_variable("QN", group="SUEWS") + qh = self.get_variable("QH", group="SUEWS") + qe = self.get_variable("QE", group="SUEWS") + qs = self.get_variable("QS", group="SUEWS") + except ValueError as e: + raise ValueError( + f"Cannot calculate energy balance: {e}. " + "Requires QN, QH, QE, QS variables in SUEWS group." + ) + + if method == "residual": + # Residual: QN - (QH + QE + QS) + residual = qn - (qh + qe + qs) + result = pd.DataFrame({ + "QN": qn.mean(), + "QH": qh.mean(), + "QE": qe.mean(), + "QS": qs.mean(), + "Residual": residual.mean(), + "Residual_std": residual.std(), + }) + elif method == "ratio": + # Closure ratio: (QH + QE + QS) / QN + with np.errstate(divide="ignore", invalid="ignore"): + ratio = (qh + qe + qs) / qn + ratio = ratio.replace([np.inf, -np.inf], np.nan) + result = pd.DataFrame({ + "Closure_ratio_mean": ratio.mean(), + "Closure_ratio_std": ratio.std(), + }) + else: + raise ValueError(f"Unknown method: {method}. Use 'residual' or 'ratio'.") + + return result + + def diurnal_average(self, var: Optional[str] = None) -> pd.DataFrame: + """ + Calculate mean diurnal cycle. + + Parameters + ---------- + var : str, optional + Specific variable. If None, calculates for common energy variables. + + Returns + ------- + pd.DataFrame + Hourly mean values grouped by hour of day + """ + if var is not None: + data = self.get_variable(var) + else: + # Default to key energy balance variables + vars_to_use = ["QN", "QH", "QE", "QS"] + available = self.available_variables.get("SUEWS", []) + vars_to_use = [v for v in vars_to_use if v in available] + data = pd.concat( + [self.get_variable(v, group="SUEWS") for v in vars_to_use], + axis=1, + keys=vars_to_use, + ) + + # Extract hour and group + df = data.copy() + + # Handle MultiIndex + if isinstance(df.index, pd.MultiIndex): + df = df.reset_index(level="grid", drop=True) + + df["hour"] = df.index.hour + return df.groupby("hour").mean() + + def monthly_summary(self, var: Optional[str] = None) -> pd.DataFrame: + """ + Monthly statistics summary. + + Parameters + ---------- + var : str, optional + Specific variable. If None, summarises key energy variables. + + Returns + ------- + pd.DataFrame + Monthly statistics + """ + if var is not None: + data = self.get_variable(var) + else: + # Default to key variables + vars_to_use = ["QN", "QH", "QE", "QS"] + available = self.available_variables.get("SUEWS", []) + vars_to_use = [v for v in vars_to_use if v in available] + data = pd.concat( + [self.get_variable(v, group="SUEWS") for v in vars_to_use], + axis=1, + keys=vars_to_use, + ) + + df = data.copy() + + # Handle MultiIndex + if isinstance(df.index, pd.MultiIndex): + df = df.reset_index(level="grid", drop=True) + + return df.resample("M").agg(["mean", "std", "min", "max"]) + + def compare_with(self, other: "SUEWSOutput") -> pd.DataFrame: + """ + Compare two output objects. + + Useful for comparing different scenarios or model runs. + + Parameters + ---------- + other : SUEWSOutput + Another output object to compare with + + Returns + ------- + pd.DataFrame + Comparison statistics (mean difference, correlation, etc.) + """ + # Get common variables + self_vars = set() + other_vars = set() + for vars in self.available_variables.values(): + self_vars.update(vars) + for vars in other.available_variables.values(): + other_vars.update(vars) + common_vars = self_vars.intersection(other_vars) + + # Focus on key energy variables + key_vars = ["QN", "QH", "QE", "QS", "Tair"] + compare_vars = [v for v in key_vars if v in common_vars] + + results = [] + for var in compare_vars: + try: + self_data = self.get_variable(var) + other_data = other.get_variable(var) + + # Align data + common_idx = self_data.index.intersection(other_data.index) + if len(common_idx) == 0: + continue + + s = self_data.loc[common_idx].values.flatten() + o = other_data.loc[common_idx].values.flatten() + + diff = s - o + results.append({ + "variable": var, + "mean_diff": np.nanmean(diff), + "rmse": np.sqrt(np.nanmean(diff**2)), + "correlation": np.corrcoef(s, o)[0, 1], + "n_common": len(common_idx), + }) + except (ValueError, KeyError): + continue + + return pd.DataFrame(results) + + # ========================================================================= + # Manipulation + # ========================================================================= + + def resample(self, freq: str) -> "SUEWSOutput": + """ + Resample output to different frequency. + + Parameters + ---------- + freq : str + Target frequency (e.g., "1H", "1D") + + Returns + ------- + SUEWSOutput + New output at resampled frequency + """ + from ._post import resample_output + + resampled = resample_output(self._df_output, freq) + return SUEWSOutput( + resampled, + self._df_state_final, + self._config, + {**self._metadata, "resampled_to": freq}, + ) + + # ========================================================================= + # Plotting + # ========================================================================= + + def plot_timeseries(self, var: Optional[str] = None, ax=None, **kwargs): + """ + Quick timeseries plot. + + Parameters + ---------- + var : str, optional + Variable to plot. If None, plots QH. + ax : matplotlib.axes.Axes, optional + Axes to plot on + **kwargs + Additional arguments passed to plot + + Returns + ------- + tuple + (fig, ax) matplotlib objects + """ + import matplotlib.pyplot as plt + + if ax is None: + fig, ax = plt.subplots(figsize=(12, 4)) + else: + fig = ax.get_figure() + + var = var or "QH" + data = self.get_variable(var) + + # Handle MultiIndex + if isinstance(data.index, pd.MultiIndex): + data = data.reset_index(level="grid", drop=True) + + data.plot(ax=ax, **kwargs) + ax.set_ylabel(var) + ax.set_xlabel("Time") + ax.set_title(f"SUEWS Output: {var}") + + return fig, ax + + def plot_diurnal(self, var: Optional[str] = None, ax=None, **kwargs): + """ + Plot diurnal climatology. + + Parameters + ---------- + var : str, optional + Variable to plot. If None, plots energy balance components. + ax : matplotlib.axes.Axes, optional + Axes to plot on + **kwargs + Additional arguments passed to plot + + Returns + ------- + tuple + (fig, ax) matplotlib objects + """ + import matplotlib.pyplot as plt + + if ax is None: + fig, ax = plt.subplots(figsize=(10, 6)) + else: + fig = ax.get_figure() + + diurnal = self.diurnal_average(var) + diurnal.plot(ax=ax, **kwargs) + ax.set_xlabel("Hour of Day") + ax.set_ylabel("Flux (W/m²)" if var is None else var) + ax.set_title("Diurnal Cycle") + ax.legend() + + return fig, ax + + def plot_energy_balance(self, ax=None, **kwargs): + """ + Standard energy balance plot (QN, QH, QE, QS). + + Parameters + ---------- + ax : matplotlib.axes.Axes, optional + Axes to plot on + **kwargs + Additional arguments passed to plot + + Returns + ------- + tuple + (fig, ax) matplotlib objects + """ + import matplotlib.pyplot as plt + + if ax is None: + fig, ax = plt.subplots(figsize=(10, 6)) + else: + fig = ax.get_figure() + + diurnal = self.diurnal_average() + diurnal.plot(ax=ax, **kwargs) + ax.set_xlabel("Hour of Day") + ax.set_ylabel("Flux (W/m²)") + ax.set_title("Energy Balance - Diurnal Cycle") + ax.axhline(y=0, color="k", linestyle="--", alpha=0.3) + ax.legend() + + return fig, ax + + # ========================================================================= + # Export + # ========================================================================= + + def to_dataframe(self) -> pd.DataFrame: + """Return copy of full output DataFrame. + + .. deprecated:: + Use :attr:`df` property instead. + """ + warnings.warn( + "SUEWSOutput.to_dataframe() is deprecated, use .df instead", + DeprecationWarning, + stacklevel=2, + ) + return self._df_output.copy() + + def save( + self, + path: Union[str, Path] = ".", + format: str = "parquet", + freq_s: Optional[int] = None, + groups: Optional[List[str]] = None, + ) -> List[Path]: + """ + Save output to files. + + Parameters + ---------- + path : str or Path + Output directory + format : str + 'txt' or 'parquet' + freq_s : int, optional + Output frequency in seconds (default: from config or 3600) + groups : list, optional + Groups to save (default: ['SUEWS', 'DailyState']) + + Returns + ------- + list + Paths to saved files + """ + from ._supy_module import _save_supy + + path = Path(path) + path.mkdir(parents=True, exist_ok=True) + + # Get defaults from config if available + freq = freq_s or 3600 + site = "" + output_config = None + + if self._config: + try: + if hasattr(self._config, "model"): + control = self._config.model.control + if hasattr(control, "output_file"): + output_file = control.output_file + if not isinstance(output_file, str): + if hasattr(output_file, "freq") and output_file.freq: + freq = freq_s or output_file.freq + output_config = output_file + if hasattr(self._config, "sites") and len(self._config.sites) > 0: + site = self._config.sites[0].name + except AttributeError: + pass + + return _save_supy( + df_output=self._df_output, + df_state_final=self._df_state_final, + freq_s=int(freq), + site=site, + path_dir_save=str(path), + output_config=output_config, + output_format=format, + ) + + def to_netcdf(self, path: Union[str, Path]) -> Path: + """ + Export to NetCDF format. + + Parameters + ---------- + path : str or Path + Output file path + + Returns + ------- + Path + Path to saved file + """ + try: + import xarray as xr + except ImportError as e: + raise ImportError( + "NetCDF export requires 'xarray' and 'netcdf4'. " + "Install with: pip install xarray netcdf4" + ) from e + + path = Path(path) + + # Convert to xarray Dataset + df = self._df_output.copy() + + # Reset index for cleaner xarray conversion + df_reset = df.reset_index() + + # Create Dataset + ds = xr.Dataset() + + # Add coordinates + ds["datetime"] = (["time"], df_reset["datetime"].values) + ds["grid"] = (["grid"], df_reset["grid"].unique()) + + # Add variables by group + for group in self.groups: + group_data = self._df_output[group] + for var in group_data.columns.get_level_values("var").unique(): + var_data = group_data[var] + if isinstance(var_data, pd.DataFrame): + var_data = var_data.iloc[:, 0] # Take first column if multiple + ds[f"{group}_{var}"] = (["time"], var_data.values) + + # Add metadata + ds.attrs["source"] = "SUEWS model output" + ds.attrs["created"] = pd.Timestamp.now().isoformat() + if self._metadata: + for key, val in self._metadata.items(): + if isinstance(val, (str, int, float)): + ds.attrs[key] = val + + # Save + ds.to_netcdf(path) + + return path + + # ========================================================================= + # Rich display + # ========================================================================= + + def __repr__(self) -> str: + """Concise representation of output data.""" + start, end = self.time_range + n_grids = self.n_grids + n_groups = len(self.groups) + n_timesteps = self.n_timesteps + + return ( + f"SUEWSOutput({start.date()} to {end.date()}, " + f"{n_grids} grid(s), {n_groups} groups, {n_timesteps} timesteps)" + ) + + def _repr_html_(self) -> str: + """HTML representation for Jupyter notebooks.""" + start, end = self.time_range + n_grids = self.n_grids + n_groups = len(self.groups) + n_timesteps = self.n_timesteps + + # Count variables + total_vars = sum(len(v) for v in self.available_variables.values()) + + html = f""" +
+

SUEWSOutput

+ + + + + + +
Time range:{start} to {end}
Grids:{n_grids}
Timesteps:{n_timesteps}
Groups:{", ".join(self.groups)}
Variables:{total_vars} total
+
+ """ + return html + + def __len__(self) -> int: + """Number of timesteps.""" + return self.n_timesteps diff --git a/src/supy/suews_sim.py b/src/supy/suews_sim.py index fb32af52b..20fb97ecc 100644 --- a/src/supy/suews_sim.py +++ b/src/supy/suews_sim.py @@ -19,6 +19,10 @@ from .data_model.core import SUEWSConfig from .util._io import read_forcing +# Import new OOP classes +from .suews_forcing import SUEWSForcing +from .suews_output import SUEWSOutput + # Constants DEFAULT_OUTPUT_FREQ_SECONDS = 3600 # Default hourly output frequency DEFAULT_FORCING_FILE_PATTERNS = [ @@ -203,19 +207,20 @@ def recursive_update(obj, upd): recursive_update(self._config, updates) def update_forcing( - self, forcing_data: Union[str, Path, list, pd.DataFrame] + self, forcing_data: Union[str, Path, list, pd.DataFrame, SUEWSForcing] ) -> "SUEWSSimulation": """ Update meteorological forcing data. Parameters ---------- - forcing_data : str, Path, list of paths, or pandas.DataFrame + forcing_data : str, Path, list of paths, pandas.DataFrame, or SUEWSForcing Forcing data source: - Path to a single forcing file - List of paths to forcing files (concatenated in order) - Path to directory containing forcing files (deprecated) - DataFrame with forcing data + - SUEWSForcing object Returns ------- @@ -227,11 +232,14 @@ def update_forcing( >>> sim.update_forcing("forcing_2023.txt") >>> sim.update_forcing(["forcing_2023.txt", "forcing_2024.txt"]) >>> sim.update_forcing(df_forcing) + >>> sim.update_forcing(SUEWSForcing.from_file("forcing.txt")) >>> sim.update_config(cfg).update_forcing(forcing).run() """ if isinstance(forcing_data, RefValue): forcing_data = forcing_data.value - if isinstance(forcing_data, pd.DataFrame): + if isinstance(forcing_data, SUEWSForcing): + self._df_forcing = forcing_data.df.copy() + elif isinstance(forcing_data, pd.DataFrame): self._df_forcing = forcing_data.copy() elif isinstance(forcing_data, list): # Handle list of files @@ -384,7 +392,7 @@ def _load_forcing_file(forcing_path: Path) -> pd.DataFrame: else: return read_forcing(str(forcing_path)) - def run(self, start_date=None, end_date=None, **run_kwargs) -> pd.DataFrame: + def run(self, start_date=None, end_date=None, **run_kwargs) -> SUEWSOutput: """ Run SUEWS simulation. @@ -409,8 +417,10 @@ def run(self, start_date=None, end_date=None, **run_kwargs) -> pd.DataFrame: Returns ------- - pandas.DataFrame - Simulation results with MultiIndex columns (group, variable). + SUEWSOutput + Simulation results wrapped in an OOP interface with analysis + and plotting convenience methods. Access raw DataFrame via + ``.to_dataframe()`` or ``.df``. Raises ------ @@ -422,6 +432,14 @@ def run(self, start_date=None, end_date=None, **run_kwargs) -> pd.DataFrame: The simulation runs with fixed internal settings. For advanced control over simulation parameters, consider using the lower-level functional API (though it is deprecated). + + Examples + -------- + >>> sim = SUEWSSimulation.from_sample_data() + >>> output = sim.run() + >>> output.QH # Access sensible heat flux + >>> output.diurnal_average("QH") # Get diurnal pattern + >>> output.to_dataframe() # Get raw DataFrame """ # Validate inputs if self._df_state_init is None: @@ -439,7 +457,13 @@ def run(self, start_date=None, end_date=None, **run_kwargs) -> pd.DataFrame: self._df_state_final = result[1] self._run_completed = True - return self._df_output + + # Wrap results in SUEWSOutput + return SUEWSOutput( + df_output=self._df_output, + df_state_final=self._df_state_final, + config=self._config, + ) def save( self, output_path: Optional[Union[str, Path]] = None, **save_kwargs @@ -755,6 +779,50 @@ def from_state(cls, state: Union[str, Path, pd.DataFrame]): return sim + @classmethod + def from_output(cls, output: SUEWSOutput) -> "SUEWSSimulation": + """Create SUEWSSimulation from previous output for continuation runs. + + Convenience method that extracts the final state from a SUEWSOutput + object and creates a new simulation ready for continuation. + + Parameters + ---------- + output : SUEWSOutput + Output object from a previous simulation run. + + Returns + ------- + SUEWSSimulation + Simulation instance initialised with final state from output, + ready for new forcing data and run. + + Examples + -------- + Seamless continuation from previous run: + + >>> # Run first period + >>> sim1 = SUEWSSimulation("config.yaml") + >>> sim1.update_forcing("forcing_2023.txt") + >>> output1 = sim1.run() + + >>> # Continue from output state + >>> sim2 = SUEWSSimulation.from_output(output1) + >>> sim2.update_forcing("forcing_2024.txt") + >>> output2 = sim2.run() + + See Also + -------- + from_state : Create from saved state file or DataFrame + SUEWSOutput.to_initial_state : Extract state for restart + """ + df_state_init = output.to_initial_state() + sim = cls() + sim._df_state_init = df_state_init + sim._config = output._config # Preserve config if available + + return sim + def __repr__(self) -> str: """Concise representation showing simulation status. @@ -967,25 +1035,34 @@ def config(self) -> Optional[SUEWSConfig]: return self._config @property - def forcing(self) -> Optional[pd.DataFrame]: - """Access to forcing data DataFrame. + def forcing(self) -> Optional[SUEWSForcing]: + """Access to forcing data as SUEWSForcing object. Returns ------- - pandas.DataFrame or None - Meteorological forcing data with required variables. - None if no forcing loaded. + SUEWSForcing or None + Meteorological forcing data wrapped in OOP interface with + validation and analysis methods. None if no forcing loaded. See Also -------- :ref:`df_forcing_var` : Complete forcing data structure and variable descriptions update_forcing : Load forcing data from files or DataFrames + + Examples + -------- + >>> sim = SUEWSSimulation.from_sample_data() + >>> sim.forcing.summary() # Get forcing statistics + >>> sim.forcing.Tair # Access air temperature + >>> sim.forcing.df # Access raw DataFrame """ - return self._df_forcing + if self._df_forcing is None: + return None + return SUEWSForcing(self._df_forcing) @property def results(self) -> Optional[pd.DataFrame]: - """Access to simulation results DataFrame. + """Access to simulation results DataFrame (raw). Returns ------- @@ -995,12 +1072,46 @@ def results(self) -> Optional[pd.DataFrame]: See Also -------- + output : Access results as SUEWSOutput object with analysis methods :ref:`df_output_var` : Complete output data structure and variable descriptions get_variable : Extract specific variables from output groups save : Save results to files """ return self._df_output + @property + def output(self) -> Optional[SUEWSOutput]: + """Access to simulation results as SUEWSOutput object. + + Returns + ------- + SUEWSOutput or None + Simulation results wrapped in OOP interface with analysis + and plotting convenience methods. None if simulation hasn't + been run yet. + + See Also + -------- + results : Access raw DataFrame directly + run : Run simulation and return SUEWSOutput + :ref:`df_output_var` : Complete output data structure + + Examples + -------- + >>> sim = SUEWSSimulation.from_sample_data() + >>> sim.run() + >>> sim.output.QH # Access sensible heat flux + >>> sim.output.diurnal_average("QH") # Get diurnal pattern + >>> sim.output.energy_balance_closure() # Analyse energy balance + """ + if self._df_output is None: + return None + return SUEWSOutput( + df_output=self._df_output, + df_state_final=self._df_state_final, + config=self._config, + ) + @property def state_init(self) -> Optional[pd.DataFrame]: """Initial state DataFrame for simulation. diff --git a/src/supy/util/_io.py b/src/supy/util/_io.py index c7cf1752b..a655f9b7f 100644 --- a/src/supy/util/_io.py +++ b/src/supy/util/_io.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np from pathlib import Path import pandas as pd @@ -33,24 +35,8 @@ def read_suews(path_suews_file: str) -> pd.DataFrame: return df_suews -def read_forcing(path_suews_file: str, tstep_mod=300) -> pd.DataFrame: - """read in SUEWS forcing files as DataFrame ready for SuPy simulation. - - Parameters - ---------- - path_suews_file : str - a string that represents wildcard pattern can locate SUEWS forcing files, which should follow `SUEWS convention `_. - - tstep_mod: int or None, optional - time step [s] for resampling, by default 300. - If `None`, resampling will be skipped. - - Returns - ------- - pd.DataFrame - datetime-aware DataFrame - """ - +def _read_forcing_impl(path_suews_file: str, tstep_mod=300) -> pd.DataFrame: + """Internal implementation of read_forcing (no deprecation warning).""" path_suews_file = Path(path_suews_file) path_input = path_suews_file.parent str_pattern = path_suews_file.name @@ -73,3 +59,38 @@ def read_forcing(path_suews_file: str, tstep_mod=300) -> pd.DataFrame: df_forcing = from_nan(df_forcing) return df_forcing + + +def read_forcing(path_suews_file: str, tstep_mod=300) -> pd.DataFrame: + """Read in SUEWS forcing files as DataFrame ready for SuPy simulation. + + .. deprecated:: + Use :class:`supy.SUEWSForcing.from_file` instead for the modern + object-oriented interface with additional validation and analysis features. + + Parameters + ---------- + path_suews_file : str + a string that represents wildcard pattern can locate SUEWS forcing files, + which should follow `SUEWS convention + `_. + + tstep_mod: int or None, optional + time step [s] for resampling, by default 300. + If `None`, resampling will be skipped. + + Returns + ------- + pd.DataFrame + datetime-aware DataFrame + + See Also + -------- + supy.SUEWSForcing.from_file : Modern OOP interface (recommended) + """ + warnings.warn( + "supy.util.read_forcing is deprecated, use SUEWSForcing.from_file() instead", + DeprecationWarning, + stacklevel=2, + ) + return _read_forcing_impl(path_suews_file, tstep_mod) diff --git a/test/core/test_suews_simulation.py b/test/core/test_suews_simulation.py index 687949ba5..16955f501 100644 --- a/test/core/test_suews_simulation.py +++ b/test/core/test_suews_simulation.py @@ -408,10 +408,10 @@ def test_basic_run(self): sim._df_state_init = df_state sim.update_forcing(df_forcing.iloc[:24]) # 2 hours - results = sim.run() - assert results is not None - assert len(results) > 0 - assert "QH" in results.columns.get_level_values("var") + output = sim.run() + assert output is not None + assert len(output.df) > 0 + assert "QH" in output.df.columns.get_level_values("var") def test_run_without_forcing(self): """Test run fails without forcing.""" @@ -512,7 +512,7 @@ def test_repr_ready(self): def test_repr_complete(self): """Test repr after running.""" sim = SUEWSSimulation.from_sample_data() - sim.run(end_date=sim.forcing.index[23]) # Run only 24 timesteps + sim.run(end_date=sim.forcing.times[23]) # Run only 24 timesteps repr_str = repr(sim) assert "Complete" in repr_str assert "results" in repr_str @@ -523,7 +523,7 @@ def test_state_properties(self): assert sim.state_init is not None assert sim.state_final is None - sim.run(end_date=sim.forcing.index[23]) + sim.run(end_date=sim.forcing.times[23]) assert sim.state_final is not None def test_validation_methods(self): @@ -536,7 +536,7 @@ def test_validation_methods(self): assert sim.is_ready() assert not sim.is_complete() - sim.run(end_date=sim.forcing.index[23]) + sim.run(end_date=sim.forcing.times[23]) assert sim.is_complete() @@ -560,7 +560,7 @@ def test_update_forcing_returns_self(self): def test_reset_returns_self(self): """Test reset enables chaining.""" sim = SUEWSSimulation.from_sample_data() - sim.run(end_date=sim.forcing.index[23]) + sim.run(end_date=sim.forcing.times[23]) result = sim.reset() assert result is sim assert not sim.is_complete() @@ -589,7 +589,7 @@ class TestGetVariable: def test_get_variable_with_group(self): """Test variable extraction with group specification.""" sim = SUEWSSimulation.from_sample_data() - sim.run(end_date=sim.forcing.index[23]) + sim.run(end_date=sim.forcing.times[23]) # QH appears in multiple groups, must specify which one qh = sim.get_variable("QH", group="SUEWS") @@ -599,7 +599,7 @@ def test_get_variable_with_group(self): def test_get_variable_ambiguous_raises(self): """Test get_variable raises error for ambiguous variable without group.""" sim = SUEWSSimulation.from_sample_data() - sim.run(end_date=sim.forcing.index[23]) + sim.run(end_date=sim.forcing.times[23]) # QH appears in multiple groups - should raise error with pytest.raises(ValueError, match="appears in multiple groups"): @@ -614,7 +614,7 @@ def test_get_variable_not_run(self): def test_get_variable_invalid_name(self): """Test get_variable with invalid variable name.""" sim = SUEWSSimulation.from_sample_data() - sim.run(end_date=sim.forcing.index[23]) + sim.run(end_date=sim.forcing.times[23]) with pytest.raises(ValueError, match="not found"): sim.get_variable("INVALID_VAR") @@ -622,7 +622,7 @@ def test_get_variable_invalid_name(self): def test_get_variable_wrong_group(self): """Test get_variable with wrong group specification.""" sim = SUEWSSimulation.from_sample_data() - sim.run(end_date=sim.forcing.index[23]) + sim.run(end_date=sim.forcing.times[23]) # QH exists but not in a non-existent group with pytest.raises(ValueError, match="not found in group"): @@ -722,7 +722,7 @@ def test_from_state_csv(self, tmp_path): sim1 = SUEWSSimulation.from_sample_data() # Save full forcing before subsetting - df_forcing_full = sim1.forcing.copy() + df_forcing_full = sim1.forcing.df.copy() df_forcing = df_forcing_full.iloc[:288] # First day only sim1.update_forcing(df_forcing) @@ -759,7 +759,7 @@ def test_from_state_parquet(self, tmp_path): sim1 = SUEWSSimulation.from_sample_data() # Save full forcing before subsetting - df_forcing_full = sim1.forcing.copy() + df_forcing_full = sim1.forcing.df.copy() df_forcing = df_forcing_full.iloc[:288] # First day only sim1.update_forcing(df_forcing) @@ -788,7 +788,7 @@ def test_from_state_dataframe(self): sim1 = SUEWSSimulation.from_sample_data() # Save full forcing before subsetting - df_forcing_full = sim1.forcing.copy() + df_forcing_full = sim1.forcing.df.copy() df_forcing = df_forcing_full.iloc[:288] sim1.update_forcing(df_forcing)