diff --git a/.github/workflows/full_test.yml b/.github/workflows/full_test.yml index 4776c572b..7c0ff5acc 100644 --- a/.github/workflows/full_test.yml +++ b/.github/workflows/full_test.yml @@ -18,7 +18,7 @@ jobs: - uses: astral-sh/ruff-action@v2 with: - version: 0.6.2 + version: 0.14.5 src: src - name: Set up uv diff --git a/pyproject.toml b/pyproject.toml index 633cd9e74..a7a619a6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ classifiers = [ dev = [ "pytest", "plotly >= 4.5", - "ruff==0.6.2", + "ruff==0.14.5", "quarto-cli==1.5.57", "quartodoc==0.11.1", "netCDF4", @@ -70,8 +70,8 @@ notebooks = ["nbformat", "nbconvert", "jupyter", "plotly", "shapely", "seaborn"] [tool.ruff.lint] -ignore = ["E501"] -select = ["E4", "E7", "E9", "F", "D200", "D205"] +ignore = ["E501", "PD011"] +select = ["E4", "E7", "E9", "F", "D200", "D205", "NPY", "PD"] [tool.mypy] python_version = "3.10" diff --git a/src/modelskill/comparison/_collection_plotter.py b/src/modelskill/comparison/_collection_plotter.py index 2122b8488..1ed831ce6 100644 --- a/src/modelskill/comparison/_collection_plotter.py +++ b/src/modelskill/comparison/_collection_plotter.py @@ -72,6 +72,7 @@ def scatter( xlabel: Optional[str] = None, ylabel: Optional[str] = None, skill_table: Optional[Union[str, List[str], Mapping[str, str], bool]] = None, + random_state: Optional[int] = None, ax: Optional[Axes] = None, **kwargs, ) -> Axes | list[Axes]: @@ -138,6 +139,8 @@ def scatter( This kword adds a box at the right of the scatter plot. mapping can be used to rename the metrics in the table. by default False + random_state : int, optional + random seed for point sampling, by default None ax : matplotlib axes, optional axes to plot on, by default None **kwargs @@ -176,6 +179,7 @@ def scatter( xlabel=xlabel, ylabel=ylabel, skill_table=skill_table, + random_state=random_state, ax=ax, **kwargs, ) @@ -201,6 +205,7 @@ def _scatter_one_model( xlabel: Optional[str], ylabel: Optional[str], skill_table: Optional[Union[str, List[str], Mapping[str, str], bool]], + random_state: Optional[int] = None, ax, **kwargs, ): @@ -268,6 +273,7 @@ def _scatter_one_model( skill_scores=skill_scores, skill_score_unit=skill_score_unit, ax=ax, + random_state=random_state, **kwargs, ) diff --git a/src/modelskill/comparison/_comparer_plotter.py b/src/modelskill/comparison/_comparer_plotter.py index 5467eafc2..722217da3 100644 --- a/src/modelskill/comparison/_comparer_plotter.py +++ b/src/modelskill/comparison/_comparer_plotter.py @@ -476,6 +476,7 @@ def scatter( xlabel: Optional[str] = None, ylabel: Optional[str] = None, skill_table: Optional[Union[str, List[str], Mapping[str, str], bool]] = None, + random_state: Optional[int] = None, ax: Optional[matplotlib.axes.Axes] = None, **kwargs, ) -> matplotlib.axes.Axes | list[matplotlib.axes.Axes]: @@ -542,6 +543,8 @@ def scatter( modelskill.options.metrics.list. This kword adds a box at the right of the scatter plot, by default False mapping can be used to rename the metrics in the table. + random_state : int, optional + random seed for point sampling, by default None ax : matplotlib.axes.Axes, optional axes to plot on, by default None **kwargs @@ -581,6 +584,7 @@ def scatter( ylabel=ylabel, skill_table=skill_table, ax=ax, + random_state=random_state, **kwargs, ) axes.append(ax_mod) @@ -746,7 +750,14 @@ def taylor( df = df.rename(columns={"_std_obs": "obs_std", "_std_mod": "std"}) pts = [ - TaylorPoint(name=r.model, obs_std=r.obs_std, std=r.std, cc=r.cc, marker=marker, marker_size=marker_size) + TaylorPoint( + name=r.model, + obs_std=r.obs_std, + std=r.std, + cc=r.cc, + marker=marker, + marker_size=marker_size, + ) for r in df.itertuples() ] diff --git a/src/modelskill/comparison/_comparison.py b/src/modelskill/comparison/_comparison.py index b7a33a7c6..f0fdd22ec 100644 --- a/src/modelskill/comparison/_comparison.py +++ b/src/modelskill/comparison/_comparison.py @@ -56,7 +56,7 @@ def _parse_dataset(data: xr.Dataset) -> xr.Dataset: assert "Observation" in data.data_vars # no missing values allowed in Observation - if data["Observation"].isnull().any(): + if data["Observation"].isnull().any(): # noqa: PD003 raise ValueError("Observation data must not contain missing values.") # coordinates diff --git a/src/modelskill/metrics.py b/src/modelskill/metrics.py index 3f1dcdaf7..ee32d6cdd 100644 --- a/src/modelskill/metrics.py +++ b/src/modelskill/metrics.py @@ -632,7 +632,7 @@ def peak_ratio( } ) df_filter["Maximum"] = df_filter.max(axis=1) - df_filter.sort_values(by="Maximum", ascending=False, inplace=True) + df_filter = df_filter.sort_values(by="Maximum", ascending=False) # Finally we do the selection of the N- largest peaks from either model or measured df_filter = df_filter.iloc[0:top_n_peaks, :] # Rename to avoid further refactoring diff --git a/src/modelskill/model/dfsu.py b/src/modelskill/model/dfsu.py index 7ab7cf041..511e829d6 100644 --- a/src/modelskill/model/dfsu.py +++ b/src/modelskill/model/dfsu.py @@ -219,7 +219,7 @@ def _extract_point( # TODO not sure why we rename here assert self.name is not None - ds_model.rename({ds_model.items[0].name: self.name}, inplace=True) + ds_model = ds_model.rename({ds_model.items[0].name: self.name}) return PointModelResult( data=ds_model, @@ -268,7 +268,7 @@ def _extract_track( if isinstance(self.data, mikeio.DataArray): ds_model = self.data.extract_track(track=track, method=method) - ds_model.rename({self.data.name: self.name}, inplace=True) + ds_model = ds_model.rename({self.data.name: self.name}) aux_items = None else: if isinstance(self.data, mikeio.dfsu.Dfsu2DH): @@ -279,7 +279,7 @@ def _extract_track( ds_model = self.data[self.sel_items.all].extract_track( track=track, method=method ) - ds_model.rename({self.sel_items.values: self.name}, inplace=True) + ds_model = ds_model.rename({self.sel_items.values: self.name}) aux_items = self.sel_items.aux item_names = [i.name for i in ds_model.items] diff --git a/src/modelskill/plotting/_misc.py b/src/modelskill/plotting/_misc.py index 4716fb799..d228cde93 100644 --- a/src/modelskill/plotting/_misc.py +++ b/src/modelskill/plotting/_misc.py @@ -53,25 +53,11 @@ def _xyticks(n_sectors=8, lim=None): def sample_points( - x: np.ndarray, y: np.ndarray, show_points: bool | int | float | None = None + x: np.ndarray, + y: np.ndarray, + show_points: bool | int | float | None = None, + random_state: int | None = None, ) -> Tuple[np.ndarray, np.ndarray]: - """Sample points to be plotted - - Parameters - ---------- - x: np.ndarray, 1d - y: np.ndarray, 1d - include: bool, int or float, optional - default is subset the data to 50k points - - Returns - ------- - np.ndarray, np.ndarray - x and y arrays with sampled points - """ - - assert len(x) == len(y), "x and y must have same length" - if show_points is True: return x, y @@ -104,8 +90,8 @@ def sample_points( show_points = len(x) n_samples = show_points - np.random.seed(20) # TODO should this be a parameter? - ran_index = np.random.choice(range(len(x)), n_samples, replace=False) + rng = np.random.default_rng(seed=random_state) + ran_index = rng.choice(range(len(x)), n_samples, replace=False) x_sample = x[ran_index] y_sample = y[ran_index] diff --git a/src/modelskill/plotting/_scatter.py b/src/modelskill/plotting/_scatter.py index de2cd700a..1d9521fb6 100644 --- a/src/modelskill/plotting/_scatter.py +++ b/src/modelskill/plotting/_scatter.py @@ -44,6 +44,7 @@ def scatter( skill_scores: Mapping[str, float] | None = None, skill_score_unit: Optional[str] = "", ax: Optional[Axes] = None, + random_state: Optional[int] = None, **kwargs, ) -> Axes: """Scatter plot tailored for model skill comparison. @@ -120,6 +121,8 @@ def scatter( unit for skill_scores, by default None ax : matplotlib.axes.Axes, optional axes to plot on, by default None + random_state : int, optional + random seed for point sampling, by default None **kwargs Returns @@ -155,7 +158,7 @@ def scatter( if norm is None: norm = colors.PowerNorm(vmin=1, gamma=0.5) - x_sample, y_sample = sample_points(x, y, show_points) + x_sample, y_sample = sample_points(x, y, show_points, random_state=random_state) show_points = len(x_sample) > 0 xq, yq = quantiles_xy(x, y, quantiles) diff --git a/src/modelskill/skill.py b/src/modelskill/skill.py index d16bda8dd..066f90c23 100644 --- a/src/modelskill/skill.py +++ b/src/modelskill/skill.py @@ -51,7 +51,7 @@ def _name_to_title_in_kwargs(self, kwargs: Any) -> None: def _get_plot_df(self, level: int | str = 0) -> pd.DataFrame: ser = self.skillarray._ser if isinstance(ser.index, pd.MultiIndex): - df = ser.unstack(level=level) + df = ser.unstack(level=level) # noqa: PD010 else: df = ser.to_frame() return df @@ -226,7 +226,7 @@ def grid( # TODO raise error? return None # df = self.df[field] TODO: at_least_2d... - df = ser.unstack() + df = ser.unstack() # noqa: PD010 vmin = None vmax = None diff --git a/tests/plot/test_plot.py b/tests/plot/test_plot.py index 6a47c3c3e..16eef97a6 100644 --- a/tests/plot/test_plot.py +++ b/tests/plot/test_plot.py @@ -122,9 +122,9 @@ def test_format_skill_table(): @pytest.fixture def x_y(): - np.random.seed(42) - x = np.random.rand(100000) - y = np.random.rand(100000) + rng = np.random.default_rng(seed=42) + x = rng.random(100000) + y = rng.random(100000) return x, y diff --git a/tests/test_comparercollection.py b/tests/test_comparercollection.py index a291cd0f5..338b52a49 100644 --- a/tests/test_comparercollection.py +++ b/tests/test_comparercollection.py @@ -455,7 +455,7 @@ def test_save_and_load_preserves_raw_model_data(cc, tmp_path): def test_plot_scatter(cc): - ax = cc.plot.scatter(skill_table=True) + ax = cc.plot.scatter(skill_table=True, random_state=20) assert ax is not None