From 614b15ada3e0b6335acc8d2292e2e3cdd76b7e62 Mon Sep 17 00:00:00 2001
From: Charlelie Laurent <claurent@nvidia.com>
Date: Fri, 6 Feb 2026 13:21:05 -0800
Subject: [PATCH 01/14] Initial commit of guidance

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
---
 .importlinter                                 |   2 +-
 physicsnemo/diffusion/denoisers/denoisers.py  |  26 --
 .../{denoisers => guidance}/__init__.py       |   9 +
 .../diffusion/guidance/dps_guidance.py        | 350 ++++++++++++++++++
 4 files changed, 360 insertions(+), 27 deletions(-)
 delete mode 100644 physicsnemo/diffusion/denoisers/denoisers.py
 rename physicsnemo/diffusion/{denoisers => guidance}/__init__.py (80%)
 create mode 100644 physicsnemo/diffusion/guidance/dps_guidance.py

diff --git a/.importlinter b/.importlinter
index 4b2567c999..2fdaf8d135 100644
--- a/.importlinter
+++ b/.importlinter
@@ -99,7 +99,7 @@ containers=
 layers = 
    generate
    samplers : metrics
-   noise_schedulers | multi_diffusion | preconditioners | denoisers
+   noise_schedulers | multi_diffusion | preconditioners | guidance
    utils
 
 [importlinter:contract:physicsnemo-external-imports]
diff --git a/physicsnemo/diffusion/denoisers/denoisers.py b/physicsnemo/diffusion/denoisers/denoisers.py
deleted file mode 100644
index dc827f5fe3..0000000000
--- a/physicsnemo/diffusion/denoisers/denoisers.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2026 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-FileCopyrightText: All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import warnings
-
-from physicsnemo.core.warnings import FutureFeatureWarning
-
-warnings.warn(
-    "The 'physicsnemo.diffusion.denoisers.denoisers' module is a placeholder for "
-    "future functionality that will be implemented in an upcoming release.",
-    FutureFeatureWarning,
-    stacklevel=2,
-)
diff --git a/physicsnemo/diffusion/denoisers/__init__.py b/physicsnemo/diffusion/guidance/__init__.py
similarity index 80%
rename from physicsnemo/diffusion/denoisers/__init__.py
rename to physicsnemo/diffusion/guidance/__init__.py
index af85283aa4..67bd8f273e 100644
--- a/physicsnemo/diffusion/denoisers/__init__.py
+++ b/physicsnemo/diffusion/guidance/__init__.py
@@ -13,3 +13,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+"""DPS (Diffusion Posterior Sampling) guidance for diffusion models."""
+
+from .dps_guidance import DPSDenoiser, DPSGuidance
+
+__all__ = [
+    "DPSGuidance",
+    "DPSDenoiser",
+]
diff --git a/physicsnemo/diffusion/guidance/dps_guidance.py b/physicsnemo/diffusion/guidance/dps_guidance.py
new file mode 100644
index 0000000000..3621525c54
--- /dev/null
+++ b/physicsnemo/diffusion/guidance/dps_guidance.py
@@ -0,0 +1,350 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""DPS (Diffusion Posterior Sampling) guidance for diffusion models."""
+
+from typing import Callable, Protocol, Sequence, runtime_checkable
+
+import torch
+from jaxtyping import Float
+from torch import Tensor
+
+from physicsnemo.diffusion.base import DiffusionDenoiser
+
+
+@runtime_checkable
+class DPSGuidance(Protocol):
+    r"""
+    Protocol defining the interface for Diffusion Posterior Sampling (DPS)
+    guidance.
+
+    A DPS guidance is a callable that computes a guidance term to steer the
+    diffusion sampling process toward satisfying some observation constraint.
+    A DPSGuidance is expected to be a score-predictor, as it returns a quantity
+    analogous to a score.
+
+    The typical form is:
+
+    .. math::
+        \gamma(t) \nabla_{\mathbf{x}}
+        \ell(A(\hat{\mathbf{x}}_0) - \mathbf{y})
+
+    where :math:`\gamma(t)` is a time-dependent guidance strength,
+    :math:`A` is a (potentially nonlinear) observation operator,
+    :math:`\mathbf{y}` is the observed data, and :math:`\ell` is a scalar loss
+    function. However, variants are possible as long as the guidance produces
+    a quantity similar to a score (e.g., a likelihood score).
+
+    This is the minimal interface for guidance, and any object that implements
+    this interface can be used with diffusion utilities such as
+    :class:`DPSDenoiser` or
+    :meth:`~physicsnemo.diffusion.noise_schedulers.get_denoiser`.
+
+    See Also
+    --------
+    :class:`DPSDenoiser` : Combines a denoiser with one or more guidances.
+
+    Examples
+    --------
+    **Example 1:** Minimal guidance for inpainting. Given a binary mask and
+    observed pixels, guide the diffusion to match observations:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.guidance import DPSGuidance
+    >>>
+    >>> class InpaintingGuidance:
+    ...     def __init__(self, mask, y_obs, gamma=1.0):
+    ...         self.mask = mask  # Binary mask: 1 = observed, 0 = missing
+    ...         self.y_obs = y_obs  # Observed pixel values
+    ...         self.gamma = gamma
+    ...
+    ...     def __call__(self, x, t, x_0):
+    ...         # Compute residual at observed locations
+    ...         residual = self.mask * (x_0 - self.y_obs)
+    ...         # Gradient of L2 loss w.r.t. x_0 is just the residual
+    ...         # (simplified: assumes identity observation operator)
+    ...         return -self.gamma * residual
+    ...
+    >>> mask = torch.ones(1, 3, 8, 8)
+    >>> y_obs = torch.randn(1, 3, 8, 8)
+    >>> guidance = InpaintingGuidance(mask, y_obs)
+    >>> isinstance(guidance, DPSGuidance)
+    True
+
+    **Example 2:** Building a guided denoiser from scratch. A common pattern
+    is to combine an x0-predictor with a guidance to create a score predictor
+    that can be used for sampling. This shows the complete workflow:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.guidance import DPSGuidance
+    >>>
+    >>> # Define a guidance that pushes toward observed values
+    >>> class MyGuidance:
+    ...     def __init__(self, y_obs, gamma=0.1):
+    ...         self.y_obs = y_obs
+    ...         self.gamma = gamma
+    ...
+    ...     def __call__(self, x, t, x_0):
+    ...         return -self.gamma * (x_0 - self.y_obs)
+    ...
+    >>> # Toy x0-predictor (in practice, a trained neural network)
+    >>> x0_predictor = lambda x, t: x * 0.9
+    >>> y_obs = torch.randn(1, 3, 8, 8)
+    >>> guidance = MyGuidance(y_obs, gamma=0.5)
+    >>>
+    >>> # Build a guided denoiser that combines x0-predictor + guidance
+    >>> def guided_denoiser(x, t):
+    ...     # Step 1: Get x0 estimate
+    ...     x_0 = x0_predictor(x, t)
+    ...     # Step 2: Compute guidance term
+    ...     guidance_term = guidance(x, t, x_0)
+    ...     # Step 3: Convert x0 to score (for EDM: score = (x_0 - x) / t^2)
+    ...     t_bc = t.reshape(-1, *([1] * (x.ndim - 1)))
+    ...     score = (x_0 - x) / (t_bc ** 2)
+    ...     # Step 4: Sum and return
+    ...     return score + guidance_term
+    ...
+    >>> # guided_denoiser is now a DiffusionDenoiser (score predictor),
+    >>> # and can be used with any sampling utility that expects this interface
+    >>> x = torch.randn(1, 3, 8, 8)
+    >>> t = torch.tensor([1.0])
+    >>> output = guided_denoiser(x, t)
+    >>> output.shape
+    torch.Size([1, 3, 8, 8])
+
+    Note: :class:`DPSDenoiser` provides a convenient way to apply one or more
+    guidances to a denoiser without manually implementing the above pattern.
+    """
+
+    def __call__(
+        self,
+        x: Float[Tensor, " B *dims"],
+        t: Float[Tensor, " B"],
+        x_0: Float[Tensor, " B *dims"],
+    ) -> Float[Tensor, " B *dims"]:
+        r"""
+        Compute the guidance term.
+
+        Parameters
+        ----------
+        x : Tensor
+            Noisy latent state at diffusion time ``t``, of shape :math:`(B, *)`.
+            Typically used to compute gradients when the guidance requires
+            backpropagation through the diffusion process, in which case it
+            needs to have ``requires_grad=True``.
+        t : Tensor
+            Batched diffusion time of shape :math:`(B,)`.
+        x_0 : Tensor
+            Estimate of the clean latent state, of shape :math:`(B, *)`.
+            Typically produced by an x0-predictor or clean data predictor.
+
+        Returns
+        -------
+        Tensor
+            Guidance term of the same shape as ``x``. This is analogous to a
+            likelihood score and is typically added to the unconditional score
+            to guide the sampling process.
+        """
+        ...
+
+
+class DPSDenoiser(DiffusionDenoiser):
+    r"""
+    Denoiser that combines an x0-predictor with DPS-style guidance.
+
+    This class transforms a :class:`~physicsnemo.diffusion.DiffusionDenoiser`
+    (specifically, an **x0-predictor**) into another
+    :class:`~physicsnemo.diffusion.DiffusionDenoiser` (a **score predictor**)
+    by applying one or more DPS guidances. The resulting denoiser can be used
+    directly with ODE/SDE solvers and sampling utilities.
+
+    The output is the sum of the unconditional score (derived from the
+    x0-prediction) and all guidance terms:
+
+    .. math::
+        \nabla_{\mathbf{x}} \log p(\mathbf{x})
+        + \sum_i g_i(\mathbf{x}, t, \hat{\mathbf{x}}_0)
+
+    where :math:`g_i` are the guidance terms implementing the
+    :class:`DPSGuidance` interface.
+
+    Each guidance must implement the :class:`DPSGuidance` protocol, which is a
+    callable with the following signature:
+
+    .. code-block:: python
+
+        def guidance(x: Tensor, t: Tensor, x_0: Tensor) -> Tensor:
+            # x: noisy latent state at time t, shape (B, *)
+            # t: diffusion time, shape (B,)
+            # x_0: estimated clean state, shape (B, *)
+            # returns: guidance term, shape (B, *)
+            ...
+
+    Parameters
+    ----------
+    denoiser_in : DiffusionDenoiser
+        Input denoiser that takes ``(x, t)`` and returns an estimate of the
+        clean data :math:`\hat{\mathbf{x}}_0`. This is typically an x0-predictor
+        obtained from a trained diffusion model.
+    x0_to_score_fn : Callable[[Tensor, Tensor, Tensor], Tensor]
+        Callback to convert x0-prediction to score. Signature:
+        ``x0_to_score_fn(x_0, x, t) -> score``. Typically obtained from a noise
+        scheduler, e.g.,
+        :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.x0_to_score`.
+    guidances : DPSGuidance | Sequence[DPSGuidance]
+        One or more guidance objects implementing the :class:`DPSGuidance`
+        interface.
+
+    See Also
+    --------
+    :class:`DPSGuidance` : Protocol for guidance implementations.
+    :func:`~physicsnemo.diffusion.samplers.sample` : Sampling function that
+        uses denoisers.
+
+    Examples
+    --------
+    **Example 1:** Basic usage with a single guidance for inpainting:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.guidance import DPSDenoiser, DPSGuidance
+    >>>
+    >>> # Toy x0-predictor (in practice, this is a trained neural network)
+    >>> x0_predictor = lambda x, t: x * 0.9
+    >>>
+    >>> # Simple x0_to_score function (for EDM: score = (x_0 - x) / t^2)
+    >>> def x0_to_score_fn(x_0, x, t):
+    ...     t_bc = t.reshape(-1, *([1] * (x.ndim - 1)))
+    ...     return (x_0 - x) / (t_bc ** 2)
+    ...
+    >>> # Simple inpainting guidance
+    >>> class InpaintGuidance:
+    ...     def __init__(self, mask, y_obs, gamma=1.0):
+    ...         self.mask = mask
+    ...         self.y_obs = y_obs
+    ...         self.gamma = gamma
+    ...     def __call__(self, x, t, x_0):
+    ...         return -self.gamma * self.mask * (x_0 - self.y_obs)
+    ...
+    >>> mask = torch.ones(1, 3, 8, 8)
+    >>> y_obs = torch.randn(1, 3, 8, 8)
+    >>> guidance = InpaintGuidance(mask, y_obs)
+    >>>
+    >>> # Create DPS denoiser
+    >>> dps_denoiser = DPSDenoiser(
+    ...     denoiser_in=x0_predictor,
+    ...     x0_to_score_fn=x0_to_score_fn,
+    ...     guidances=guidance,
+    ... )
+    >>>
+    >>> # Use in sampling
+    >>> x = torch.randn(1, 3, 8, 8)
+    >>> t = torch.tensor([1.0])
+    >>> output = dps_denoiser(x, t)
+    >>> output.shape
+    torch.Size([1, 3, 8, 8])
+
+    **Example 2:** Multiple guidances for multi-constraint problems:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.guidance import DPSDenoiser
+    >>> from physicsnemo.diffusion.noise_schedulers import EDMNoiseScheduler
+    >>>
+    >>> # Use scheduler to get x0_to_score_fn
+    >>> scheduler = EDMNoiseScheduler()
+    >>> x0_predictor = lambda x, t: x * 0.9
+    >>>
+    >>> # Guidance 1: match observed values at specific locations
+    >>> class ObservationGuidance:
+    ...     def __init__(self, mask, y_obs, gamma=1.0):
+    ...         self.mask = mask
+    ...         self.y_obs = y_obs
+    ...         self.gamma = gamma
+    ...     def __call__(self, x, t, x_0):
+    ...         return -self.gamma * self.mask * (x_0 - self.y_obs)
+    ...
+    >>> # Guidance 2: regularization toward zero mean
+    >>> class ZeroMeanGuidance:
+    ...     def __init__(self, gamma=0.1):
+    ...         self.gamma = gamma
+    ...     def __call__(self, x, t, x_0):
+    ...         return -self.gamma * x_0.mean() * torch.ones_like(x_0)
+    ...
+    >>> mask = torch.ones(1, 3, 8, 8)
+    >>> y_obs = torch.randn(1, 3, 8, 8)
+    >>> guidance1 = ObservationGuidance(mask, y_obs)
+    >>> guidance2 = ZeroMeanGuidance()
+    >>>
+    >>> # Combine multiple guidances
+    >>> dps_denoiser = DPSDenoiser(
+    ...     denoiser_in=x0_predictor,
+    ...     x0_to_score_fn=scheduler.x0_to_score,
+    ...     guidances=[guidance1, guidance2],
+    ... )
+    >>>
+    >>> x = torch.randn(2, 3, 8, 8)
+    >>> t = torch.tensor([1.0, 1.0])
+    >>> output = dps_denoiser(x, t)
+    >>> output.shape
+    torch.Size([2, 3, 8, 8])
+    """
+
+    def __init__(
+        self,
+        denoiser_in: DiffusionDenoiser,
+        x0_to_score_fn: Callable[
+            [Float[Tensor, " B *dims"], Float[Tensor, " B *dims"], Float[Tensor, " B"]],
+            Float[Tensor, " B *dims"],
+        ],
+        guidances: DPSGuidance | Sequence[DPSGuidance],
+    ) -> None:
+        self.denoiser_in = denoiser_in
+        self.x0_to_score_fn = x0_to_score_fn
+        # Normalize guidances to a list
+        if isinstance(guidances, Sequence) and not isinstance(guidances, str):
+            self.guidances = list(guidances)
+        else:
+            self.guidances = [guidances]
+
+    def __call__(
+        self,
+        x: Float[Tensor, " B *dims"],
+        t: Float[Tensor, " B"],
+    ) -> Float[Tensor, " B *dims"]:
+        r"""
+        Compute the guided score for sampling.
+
+        Parameters
+        ----------
+        x : Tensor
+            Noisy latent state at diffusion time ``t``, of shape :math:`(B, *)`.
+        t : Tensor
+            Batched diffusion time of shape :math:`(B,)`.
+
+        Returns
+        -------
+        Tensor
+            Guided score of shape :math:`(B, *)`, computed as the sum of the
+            unconditional score and all guidance terms.
+        """
+        x = x.detach().clone().requires_grad_(True)
+        x_0 = self.denoiser_in(x, t)
+
+        guidance_sum = torch.zeros_like(x)
+        for guidance in self.guidances:
+            guidance_sum += guidance(x, t, x_0)
+
+        score = self.x0_to_score_fn(x_0, x, t)
+        return score + guidance_sum

From 26fca9d0281bc59507aee1cef7c10ba1fdc77a96 Mon Sep 17 00:00:00 2001
From: Charlelie Laurent <claurent@nvidia.com>
Date: Fri, 6 Feb 2026 14:47:24 -0800
Subject: [PATCH 02/14] Added data consistency and model consistency DPS
 guidance

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
---
 physicsnemo/diffusion/guidance/__init__.py    |   9 +-
 .../diffusion/guidance/dps_guidance.py        | 421 +++++++++++++++++-
 2 files changed, 427 insertions(+), 3 deletions(-)

diff --git a/physicsnemo/diffusion/guidance/__init__.py b/physicsnemo/diffusion/guidance/__init__.py
index 67bd8f273e..81b87ea3ca 100644
--- a/physicsnemo/diffusion/guidance/__init__.py
+++ b/physicsnemo/diffusion/guidance/__init__.py
@@ -16,9 +16,16 @@
 
 """DPS (Diffusion Posterior Sampling) guidance for diffusion models."""
 
-from .dps_guidance import DPSDenoiser, DPSGuidance
+from .dps_guidance import (
+    DataConsistencyDPSGuidance,
+    DPSDenoiser,
+    DPSGuidance,
+    ModelConsistencyDPSGuidance,
+)
 
 __all__ = [
     "DPSGuidance",
     "DPSDenoiser",
+    "ModelConsistencyDPSGuidance",
+    "DataConsistencyDPSGuidance",
 ]
diff --git a/physicsnemo/diffusion/guidance/dps_guidance.py b/physicsnemo/diffusion/guidance/dps_guidance.py
index 3621525c54..51b4f01318 100644
--- a/physicsnemo/diffusion/guidance/dps_guidance.py
+++ b/physicsnemo/diffusion/guidance/dps_guidance.py
@@ -336,8 +336,8 @@ def __call__(
         Returns
         -------
         Tensor
-            Guided score of shape :math:`(B, *)`, computed as the sum of the
-            unconditional score and all guidance terms.
+            Guided score of same shape :math:`(B, *)` as ``x``. Computed as the
+            sum of the unconditional score and all guidance terms.
         """
         x = x.detach().clone().requires_grad_(True)
         x_0 = self.denoiser_in(x, t)
@@ -348,3 +348,420 @@ def __call__(
 
         score = self.x0_to_score_fn(x_0, x, t)
         return score + guidance_sum
+
+
+class ModelConsistencyDPSGuidance:
+    r"""
+    DPS guidance for generic observation models with Gaussian noise.
+
+    Computes the likelihood score for an observation model of the form:
+
+    .. math::
+        \mathbf{y} = A(\mathbf{x}_0) + \boldsymbol{\epsilon}, \quad
+        \boldsymbol{\epsilon} \sim \mathcal{N}(0, \sigma_y^2 \mathbf{I})
+
+    where :math:`A` is a (potentially nonlinear) observation operator,
+    :math:`\mathbf{y}` is the observed data, and :math:`\sigma_y` is the
+    measurement noise standard deviation.
+
+    The guidance term is the likelihood score:
+
+    .. math::
+        \nabla_{\mathbf{x}} \log p(\mathbf{y} | \hat{\mathbf{x}}_0)
+        = -\frac{1}{\sigma_y^2} \nabla_{\mathbf{x}}
+        \| A(\hat{\mathbf{x}}_0) - \mathbf{y} \|_p^p
+
+    where :math:`\| \cdot \|_p` is the :math:`L^p` norm and :math:`p` is the
+    ``norm_order``. This is computed via automatic differentiation.
+
+    An optional **SDA (Score-Based Data Assimilation) scaling** can be applied,
+    which scales the guidance by :math:`\sigma(t)^2` to properly weight the
+    likelihood relative to the prior at different noise levels:
+
+    .. math::
+        \text{guidance} = \sigma(t)^2 \cdot \nabla_{\mathbf{x}}
+        \log p(\mathbf{y} | \hat{\mathbf{x}}_0)
+
+    The observation operator ``A`` must be a differentiable callable with the
+    following signature:
+
+    .. code-block:: python
+
+        def A(x_0: Float[Tensor, "B *dims"]) -> Float[Tensor, "B *obs_dims"]:
+            # x_0: estimated clean state, shape (B, *)
+            # returns: predicted observations, shape (B, *obs_dims)
+            ...
+
+    Parameters
+    ----------
+    A : Callable[[Tensor], Tensor]
+        Observation operator mapping clean state to observations.
+        Must be differentiable (supports ``torch.autograd``).
+    y : Tensor
+        Observed data of shape :math:`(B, *obs\_dims)` matching the output
+        of ``A``.
+    std_y : float
+        Standard deviation of the measurement noise :math:`\sigma_y`.
+    norm_order : int, default=2
+        Order of the norm used to compute the residual. Use ``2`` for
+        standard Gaussian likelihood (L2 norm), ``1`` for L1 norm, etc.
+    sda_scaling : bool, default=False
+        If ``True``, applies SDA scaling by multiplying the guidance by
+        :math:`\sigma(t)^2`. Requires ``sigma_fn`` to be provided.
+    sigma_fn : Callable[[Tensor], Tensor] | None, default=None
+        Function mapping diffusion time to noise level :math:`\sigma(t)`.
+        Required when ``sda_scaling=True``. Typically obtained from a noise
+        scheduler, e.g.,
+        :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.sigma`.
+
+    See Also
+    --------
+    :class:`DataConsistencyDPSGuidance` : Simplified guidance for masked
+        observations.
+    :class:`DPSDenoiser` : Combines a denoiser with one or more guidances.
+
+    Examples
+    --------
+    **Example 1:** Guidance for a downsampling observation operator:
+
+    >>> import torch
+    >>> import torch.nn.functional as F
+    >>> from physicsnemo.diffusion.guidance import ModelConsistencyDPSGuidance
+    >>>
+    >>> # Observation operator: 2x downsampling
+    >>> def downsample_2x(x):
+    ...     return F.avg_pool2d(x, kernel_size=2, stride=2)
+    ...
+    >>> # Low-resolution observations
+    >>> y_obs = torch.randn(1, 3, 4, 4)  # 4x4 from 8x8 original
+    >>>
+    >>> guidance = ModelConsistencyDPSGuidance(
+    ...     A=downsample_2x,
+    ...     y=y_obs,
+    ...     std_y=0.1,
+    ... )
+    >>>
+    >>> # Use in DPS sampling
+    >>> x = torch.randn(1, 3, 8, 8, requires_grad=True)
+    >>> t = torch.tensor([1.0])
+    >>> x_0 = x * 0.9  # Toy x0 estimate
+    >>> output = guidance(x, t, x_0)
+    >>> output.shape
+    torch.Size([1, 3, 8, 8])
+
+    **Example 2:** With SDA scaling for improved assimilation:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.guidance import ModelConsistencyDPSGuidance
+    >>> from physicsnemo.diffusion.noise_schedulers import EDMNoiseScheduler
+    >>>
+    >>> scheduler = EDMNoiseScheduler()
+    >>>
+    >>> # Simple linear observation operator (select first channel)
+    >>> A = lambda x: x[:, :1]
+    >>> y_obs = torch.randn(1, 1, 8, 8)
+    >>>
+    >>> guidance = ModelConsistencyDPSGuidance(
+    ...     A=A,
+    ...     y=y_obs,
+    ...     std_y=0.05,
+    ...     sda_scaling=True,
+    ...     sigma_fn=scheduler.sigma,
+    ... )
+    >>>
+    >>> x = torch.randn(1, 3, 8, 8, requires_grad=True)
+    >>> t = torch.tensor([1.0])
+    >>> x_0 = x * 0.9
+    >>> output = guidance(x, t, x_0)
+    >>> output.shape
+    torch.Size([1, 3, 8, 8])
+    """
+
+    def __init__(
+        self,
+        A: Callable[[Float[Tensor, " B *dims"]], Float[Tensor, " B *obs_dims"]],
+        y: Float[Tensor, " B *obs_dims"],
+        std_y: float,
+        norm_order: int = 2,
+        sda_scaling: bool = False,
+        sigma_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
+        | None = None,
+    ) -> None:
+        if sda_scaling and sigma_fn is None:
+            raise ValueError("sigma_fn must be provided when sda_scaling=True")
+        self.A = A
+        self.y = y
+        self.std_y = std_y
+        self.norm_order = norm_order
+        self.sda_scaling = sda_scaling
+        self.sigma_fn = sigma_fn
+
+    def __call__(
+        self,
+        x: Float[Tensor, " B *dims"],
+        t: Float[Tensor, " B"],
+        x_0: Float[Tensor, " B *dims"],
+    ) -> Float[Tensor, " B *dims"]:
+        r"""
+        Compute the likelihood score guidance term.
+
+        Parameters
+        ----------
+        x : Tensor
+            Noisy latent state at diffusion time ``t``, of shape :math:`(B, *)`.
+            Must have ``requires_grad=True`` for gradient computation.
+        t : Tensor
+            Batched diffusion time of shape :math:`(B,)`.
+        x_0 : Tensor
+            Estimate of the clean latent state, of shape :math:`(B, *)`.
+
+        Returns
+        -------
+        Tensor
+            Likelihood score guidance term of same shape as ``x``.
+        """
+        # Ensure x_0 has gradients for autograd
+        x_0_grad = x_0.detach().requires_grad_(True)
+
+        # Compute predicted observations and residual
+        y_pred = self.A(x_0_grad)
+        residual = y_pred - self.y
+
+        # Compute norm^p of residual (summed over all dims except batch)
+        residual_flat = residual.reshape(residual.shape[0], -1)
+        norm_p = residual_flat.abs().pow(self.norm_order).sum(dim=1)
+
+        # Compute gradient of norm w.r.t. x_0
+        grad_x0 = torch.autograd.grad(
+            outputs=norm_p.sum(),
+            inputs=x_0_grad,
+            create_graph=False,
+        )[0]
+
+        # Likelihood score: -1/std_y^2 * grad
+        guidance = -grad_x0 / (self.std_y**2)
+
+        # Apply SDA scaling if enabled
+        if self.sda_scaling and self.sigma_fn is not None:
+            t_bc = t.reshape(-1, *([1] * (x.ndim - 1)))
+            sigma_t_sq = self.sigma_fn(t_bc) ** 2
+            guidance = sigma_t_sq * guidance
+
+        return guidance
+
+
+class DataConsistencyDPSGuidance:
+    r"""
+    DPS guidance for masked observations with Gaussian noise.
+
+    A simplified version of :class:`ModelConsistencyDPSGuidance` where the
+    observation operator is a mask applied element-wise. This is typical for
+    data assimilation tasks like inpainting or outpainting, where observations
+    are available at specific locations.
+
+    The observation model is:
+
+    .. math::
+        \mathbf{y} = \mathbf{M} \odot \mathbf{x}_0 + \boldsymbol{\epsilon},
+        \quad \boldsymbol{\epsilon} \sim \mathcal{N}(0, \sigma_y^2 \mathbf{I})
+
+    where :math:`\mathbf{M}` is a binary mask (1 = observed, 0 = missing),
+    :math:`\odot` denotes element-wise multiplication, and :math:`\sigma_y`
+    is the measurement noise standard deviation.
+
+    The guidance term is the likelihood score:
+
+    .. math::
+        \nabla_{\mathbf{x}} \log p(\mathbf{y} | \hat{\mathbf{x}}_0)
+        = -\frac{1}{\sigma_y^2} \nabla_{\mathbf{x}}
+        \| \mathbf{M} \odot (\hat{\mathbf{x}}_0 - \mathbf{y}) \|_p^p
+
+    An optional **SDA (Score-Based Data Assimilation) scaling** can be applied,
+    which scales the guidance by :math:`\sigma(t)^2`.
+
+    Parameters
+    ----------
+    mask : Tensor
+        Binary mask of shape :math:`(B, *)` or broadcastable shape.
+        Values should be 1 for observed locations and 0 for missing.
+    y : Tensor
+        Observed data of shape :math:`(B, *)` matching the state shape.
+        Values at unobserved locations (where ``mask=0``) are ignored.
+    std_y : float
+        Standard deviation of the measurement noise :math:`\sigma_y`.
+    norm_order : int, default=2
+        Order of the norm used to compute the residual. Use ``2`` for
+        standard Gaussian likelihood (L2 norm), ``1`` for L1 norm, etc.
+    sda_scaling : bool, default=False
+        If ``True``, applies SDA scaling by multiplying the guidance by
+        :math:`\sigma(t)^2`. Requires ``sigma_fn`` to be provided.
+    sigma_fn : Callable[[Tensor], Tensor] | None, default=None
+        Function mapping diffusion time to noise level :math:`\sigma(t)`.
+        Required when ``sda_scaling=True``.
+
+    See Also
+    --------
+    :class:`ModelConsistencyDPSGuidance` : Guidance for general observation
+        operators.
+    :class:`DPSDenoiser` : Combines a denoiser with one or more guidances.
+
+    Examples
+    --------
+    **Example 1:** Inpainting with known pixels at specific locations:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.guidance import DataConsistencyDPSGuidance
+    >>>
+    >>> # Mask: observe 50% of pixels randomly
+    >>> mask = (torch.rand(1, 3, 8, 8) > 0.5).float()
+    >>> y_obs = torch.randn(1, 3, 8, 8)  # Observed values
+    >>>
+    >>> guidance = DataConsistencyDPSGuidance(
+    ...     mask=mask,
+    ...     y=y_obs,
+    ...     std_y=0.1,
+    ... )
+    >>>
+    >>> x = torch.randn(1, 3, 8, 8, requires_grad=True)
+    >>> t = torch.tensor([1.0])
+    >>> x_0 = x * 0.9  # Toy x0 estimate
+    >>> output = guidance(x, t, x_0)
+    >>> output.shape
+    torch.Size([1, 3, 8, 8])
+
+    **Example 2:** With SDA scaling and L1 norm for robustness:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.guidance import DataConsistencyDPSGuidance
+    >>> from physicsnemo.diffusion.noise_schedulers import EDMNoiseScheduler
+    >>>
+    >>> scheduler = EDMNoiseScheduler()
+    >>>
+    >>> # Observe boundary pixels only (outpainting scenario)
+    >>> mask = torch.zeros(1, 3, 8, 8)
+    >>> mask[:, :, 0, :] = 1  # Top row
+    >>> mask[:, :, -1, :] = 1  # Bottom row
+    >>> mask[:, :, :, 0] = 1  # Left column
+    >>> mask[:, :, :, -1] = 1  # Right column
+    >>> y_obs = torch.randn(1, 3, 8, 8)
+    >>>
+    >>> guidance = DataConsistencyDPSGuidance(
+    ...     mask=mask,
+    ...     y=y_obs,
+    ...     std_y=0.05,
+    ...     norm_order=1,  # L1 norm for robustness to outliers
+    ...     sda_scaling=True,
+    ...     sigma_fn=scheduler.sigma,
+    ... )
+    >>>
+    >>> x = torch.randn(1, 3, 8, 8, requires_grad=True)
+    >>> t = torch.tensor([1.0])
+    >>> x_0 = x * 0.9
+    >>> output = guidance(x, t, x_0)
+    >>> output.shape
+    torch.Size([1, 3, 8, 8])
+
+    **Example 3:** Using with DPSDenoiser for complete sampling:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.guidance import (
+    ...     DataConsistencyDPSGuidance,
+    ...     DPSDenoiser,
+    ... )
+    >>> from physicsnemo.diffusion.noise_schedulers import EDMNoiseScheduler
+    >>>
+    >>> scheduler = EDMNoiseScheduler()
+    >>> x0_predictor = lambda x, t: x * 0.9  # Toy x0-predictor
+    >>>
+    >>> mask = torch.ones(1, 3, 8, 8)
+    >>> y_obs = torch.randn(1, 3, 8, 8)
+    >>>
+    >>> guidance = DataConsistencyDPSGuidance(
+    ...     mask=mask,
+    ...     y=y_obs,
+    ...     std_y=0.1,
+    ... )
+    >>>
+    >>> dps_denoiser = DPSDenoiser(
+    ...     denoiser_in=x0_predictor,
+    ...     x0_to_score_fn=scheduler.x0_to_score,
+    ...     guidances=guidance,
+    ... )
+    >>>
+    >>> x = torch.randn(1, 3, 8, 8)
+    >>> t = torch.tensor([1.0])
+    >>> output = dps_denoiser(x, t)
+    >>> output.shape
+    torch.Size([1, 3, 8, 8])
+    """
+
+    def __init__(
+        self,
+        mask: Float[Tensor, " *mask_shape"],
+        y: Float[Tensor, " B *dims"],
+        std_y: float,
+        norm_order: int = 2,
+        sda_scaling: bool = False,
+        sigma_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
+        | None = None,
+    ) -> None:
+        if sda_scaling and sigma_fn is None:
+            raise ValueError("sigma_fn must be provided when sda_scaling=True")
+        self.mask = mask
+        self.y = y
+        self.std_y = std_y
+        self.norm_order = norm_order
+        self.sda_scaling = sda_scaling
+        self.sigma_fn = sigma_fn
+
+    def __call__(
+        self,
+        x: Float[Tensor, " B *dims"],
+        t: Float[Tensor, " B"],
+        x_0: Float[Tensor, " B *dims"],
+    ) -> Float[Tensor, " B *dims"]:
+        r"""
+        Compute the likelihood score guidance term.
+
+        Parameters
+        ----------
+        x : Tensor
+            Noisy latent state at diffusion time ``t``, of shape :math:`(B, *)`.
+            Must have ``requires_grad=True`` for gradient computation.
+        t : Tensor
+            Batched diffusion time of shape :math:`(B,)`.
+        x_0 : Tensor
+            Estimate of the clean latent state, of shape :math:`(B, *)`.
+
+        Returns
+        -------
+        Tensor
+            Likelihood score guidance term of same shape as ``x``.
+        """
+        # Ensure x_0 has gradients for autograd
+        x_0_grad = x_0.detach().requires_grad_(True)
+
+        # Compute masked residual
+        residual = self.mask * (x_0_grad - self.y)
+
+        # Compute norm^p of residual (summed over all dims except batch)
+        residual_flat = residual.reshape(residual.shape[0], -1)
+        norm_p = residual_flat.abs().pow(self.norm_order).sum(dim=1)
+
+        # Compute gradient of norm w.r.t. x_0
+        grad_x0 = torch.autograd.grad(
+            outputs=norm_p.sum(),
+            inputs=x_0_grad,
+            create_graph=False,
+        )[0]
+
+        # Likelihood score: -1/std_y^2 * grad
+        guidance = -grad_x0 / (self.std_y**2)
+
+        # Apply SDA scaling if enabled
+        if self.sda_scaling and self.sigma_fn is not None:
+            t_bc = t.reshape(-1, *([1] * (x.ndim - 1)))
+            sigma_t_sq = self.sigma_fn(t_bc) ** 2
+            guidance = sigma_t_sq * guidance
+
+        return guidance

From 308969bb3b54fbc11f8b8f397075e8ae8d9e2c98 Mon Sep 17 00:00:00 2001
From: Charlelie Laurent <claurent@nvidia.com>
Date: Fri, 6 Feb 2026 20:59:03 -0800
Subject: [PATCH 03/14] Completed impl of DPS guidances

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
---
 .../diffusion/guidance/dps_guidance.py        | 438 ++++++++++--------
 1 file changed, 257 insertions(+), 181 deletions(-)

diff --git a/physicsnemo/diffusion/guidance/dps_guidance.py b/physicsnemo/diffusion/guidance/dps_guidance.py
index 51b4f01318..9c7fb60e61 100644
--- a/physicsnemo/diffusion/guidance/dps_guidance.py
+++ b/physicsnemo/diffusion/guidance/dps_guidance.py
@@ -339,57 +339,49 @@ def __call__(
             Guided score of same shape :math:`(B, *)` as ``x``. Computed as the
             sum of the unconditional score and all guidance terms.
         """
-        x = x.detach().clone().requires_grad_(True)
-        x_0 = self.denoiser_in(x, t)
+        x = x.detach().requires_grad_(True)
 
-        guidance_sum = torch.zeros_like(x)
-        for guidance in self.guidances:
-            guidance_sum += guidance(x, t, x_0)
+        with torch.enable_grad():
+            x_0 = self.denoiser_in(x, t)
+            guidance_sum = torch.zeros_like(x)
+            for guidance in self.guidances:
+                guidance_sum += guidance(x, t, x_0)
 
         score = self.x0_to_score_fn(x_0, x, t)
         return score + guidance_sum
 
 
-class ModelConsistencyDPSGuidance:
+class ModelConsistencyDPSGuidance(DPSGuidance):
     r"""
     DPS guidance for generic observation models with Gaussian noise.
 
-    Computes the likelihood score for an observation model of the form:
+    Implements the :class:`DPSGuidance` interface for generic (possibly
+    nonlinear) observation operators.
 
-    .. math::
-        \mathbf{y} = A(\mathbf{x}_0) + \boldsymbol{\epsilon}, \quad
-        \boldsymbol{\epsilon} \sim \mathcal{N}(0, \sigma_y^2 \mathbf{I})
-
-    where :math:`A` is a (potentially nonlinear) observation operator,
-    :math:`\mathbf{y}` is the observed data, and :math:`\sigma_y` is the
-    measurement noise standard deviation.
-
-    The guidance term is the likelihood score:
-
-    .. math::
-        \nabla_{\mathbf{x}} \log p(\mathbf{y} | \hat{\mathbf{x}}_0)
-        = -\frac{1}{\sigma_y^2} \nabla_{\mathbf{x}}
-        \| A(\hat{\mathbf{x}}_0) - \mathbf{y} \|_p^p
-
-    where :math:`\| \cdot \|_p` is the :math:`L^p` norm and :math:`p` is the
-    ``norm_order``. This is computed via automatic differentiation.
-
-    An optional **SDA (Score-Based Data Assimilation) scaling** can be applied,
-    which scales the guidance by :math:`\sigma(t)^2` to properly weight the
-    likelihood relative to the prior at different noise levels:
+    Computes the likelihood score assuming Gaussian measurement noise with
+    standard deviation ``std_y``. The guidance term is:
 
     .. math::
-        \text{guidance} = \sigma(t)^2 \cdot \nabla_{\mathbf{x}}
-        \log p(\mathbf{y} | \hat{\mathbf{x}}_0)
+        \nabla_{\mathbf{x}} \log p(\mathbf{y} | \mathbf{x}_t)
+        = -\frac{1}{2 \left( \sigma_y^2 + \gamma \frac{\sigma(t)^2}{\alpha(t)^2}
+        \right)} \nabla_{\mathbf{x}}
+        \| A\left(\hat{\mathbf{x}}_0 (\mathbf{x}_t, t)\right) - \mathbf{y} \|_p^p
+
+    where :math:`A` is the observation operator, :math:`\| \cdot \|_p` is the
+    :math:`L^p` norm, and the scaling incorporates a Score-Based Data
+    Assimilation (SDA) correction through
+    the parameter :math:`\gamma` that accounts for the variance of the
+    :math:`\hat{\mathbf{x}}_0(\mathbf{x}_t, t)` estimate at different diffusion
+    times.
 
     The observation operator ``A`` must be a differentiable callable with the
     following signature:
 
     .. code-block:: python
 
-        def A(x_0: Float[Tensor, "B *dims"]) -> Float[Tensor, "B *obs_dims"]:
+        def A(x_0: Tensor) -> Tensor:
             # x_0: estimated clean state, shape (B, *)
-            # returns: predicted observations, shape (B, *obs_dims)
+            # returns: predicted observations, same shape (B, *obs_dims) as y
             ...
 
     Parameters
@@ -405,14 +397,30 @@ def A(x_0: Float[Tensor, "B *dims"]) -> Float[Tensor, "B *obs_dims"]:
     norm_order : int, default=2
         Order of the norm used to compute the residual. Use ``2`` for
         standard Gaussian likelihood (L2 norm), ``1`` for L1 norm, etc.
-    sda_scaling : bool, default=False
-        If ``True``, applies SDA scaling by multiplying the guidance by
-        :math:`\sigma(t)^2`. Requires ``sigma_fn`` to be provided.
+    gamma : float, default=0.0
+        SDA scaling parameter. When ``gamma > 0``, applies SDA correction
+        that accounts for the variance of the x0 estimate. Set to ``0`` for
+        classical DPS without SDA scaling.
     sigma_fn : Callable[[Tensor], Tensor] | None, default=None
         Function mapping diffusion time to noise level :math:`\sigma(t)`.
-        Required when ``sda_scaling=True``. Typically obtained from a noise
+        Required when ``gamma > 0``. Typically obtained from a noise
         scheduler, e.g.,
-        :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.sigma`.
+        :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.sigma`
+        for a linear-Gaussian noise schedule.
+    alpha_fn : Callable[[Tensor], Tensor] | None, default=None
+        Function mapping diffusion time to signal coefficient :math:`\alpha(t)`.
+        Optional; defaults to :math:`\alpha(t) = 1` if not provided. Typically
+        obtained from a noise scheduler, e.g.,
+        :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.alpha`
+        for a linear-Gaussian noise schedule.
+
+    Note
+    ----
+    References:
+
+    - DPS: `Diffusion Posterior Sampling for General Noisy Inverse Problems
+      <https://arxiv.org/abs/2209.14687>`_
+    - SDA: `Score-based Data Assimilation <https://arxiv.org/abs/2306.10574>`_
 
     See Also
     --------
@@ -422,21 +430,30 @@ def A(x_0: Float[Tensor, "B *dims"]) -> Float[Tensor, "B *obs_dims"]:
 
     Examples
     --------
-    **Example 1:** Guidance for a downsampling observation operator:
+    **Example 1:** Super-resolution with a nonlinear blur + downsampling
+    operator:
 
     >>> import torch
     >>> import torch.nn.functional as F
-    >>> from physicsnemo.diffusion.guidance import ModelConsistencyDPSGuidance
+    >>> from physicsnemo.diffusion.guidance import (
+    ...     ModelConsistencyDPSGuidance,
+    ...     DPSDenoiser,
+    ... )
     >>>
-    >>> # Observation operator: 2x downsampling
-    >>> def downsample_2x(x):
-    ...     return F.avg_pool2d(x, kernel_size=2, stride=2)
+    >>> # Observation operator: Gaussian blur + 2x downsampling
+    >>> def blur_downsample(x):
+    ...     # Apply 3x3 Gaussian-like blur
+    ...     kernel = torch.ones(1, 1, 3, 3, device=x.device) / 9
+    ...     kernel = kernel.expand(x.shape[1], 1, 3, 3)
+    ...     blurred = F.conv2d(x, kernel, padding=1, groups=x.shape[1])
+    ...     # Downsample 2x
+    ...     return F.avg_pool2d(blurred, kernel_size=2, stride=2)
     ...
-    >>> # Low-resolution observations
-    >>> y_obs = torch.randn(1, 3, 4, 4)  # 4x4 from 8x8 original
+    >>> # Low-resolution observations (4x4 from 8x8 high-res)
+    >>> y_obs = torch.randn(1, 3, 4, 4)
     >>>
     >>> guidance = ModelConsistencyDPSGuidance(
-    ...     A=downsample_2x,
+    ...     A=blur_downsample,
     ...     y=y_obs,
     ...     std_y=0.1,
     ... )
@@ -448,25 +465,45 @@ def A(x_0: Float[Tensor, "B *dims"]) -> Float[Tensor, "B *obs_dims"]:
     >>> output = guidance(x, t, x_0)
     >>> output.shape
     torch.Size([1, 3, 8, 8])
+    >>>
+    >>> # Combine with DPSDenoiser for complete sampling workflow
+    >>> x0_predictor = lambda x, t: x * 0.9
+    >>> def x0_to_score_fn(x_0, x, t):
+    ...     t_bc = t.reshape(-1, *([1] * (x.ndim - 1)))
+    ...     return (x_0 - x) / (t_bc ** 2)
+    ...
+    >>> dps_denoiser = DPSDenoiser(
+    ...     denoiser_in=x0_predictor,
+    ...     x0_to_score_fn=x0_to_score_fn,
+    ...     guidances=guidance,
+    ... )
+    >>> score = dps_denoiser(x, t)
+    >>> score.shape
+    torch.Size([1, 3, 8, 8])
 
-    **Example 2:** With SDA scaling for improved assimilation:
+    **Example 2:** With SDA scaling using noise scheduler methods:
 
     >>> import torch
-    >>> from physicsnemo.diffusion.guidance import ModelConsistencyDPSGuidance
+    >>> from physicsnemo.diffusion.guidance import (
+    ...     ModelConsistencyDPSGuidance,
+    ...     DPSDenoiser,
+    ... )
     >>> from physicsnemo.diffusion.noise_schedulers import EDMNoiseScheduler
     >>>
     >>> scheduler = EDMNoiseScheduler()
     >>>
-    >>> # Simple linear observation operator (select first channel)
+    >>> # Linear observation operator (select first channel)
     >>> A = lambda x: x[:, :1]
     >>> y_obs = torch.randn(1, 1, 8, 8)
     >>>
+    >>> # Enable SDA scaling with gamma > 0, providing sigma and alpha functions
     >>> guidance = ModelConsistencyDPSGuidance(
     ...     A=A,
     ...     y=y_obs,
-    ...     std_y=0.05,
-    ...     sda_scaling=True,
+    ...     std_y=0.075,
+    ...     gamma=0.05,  # Enable SDA scaling
     ...     sigma_fn=scheduler.sigma,
+    ...     alpha_fn=scheduler.alpha,
     ... )
     >>>
     >>> x = torch.randn(1, 3, 8, 8, requires_grad=True)
@@ -475,6 +512,17 @@ def A(x_0: Float[Tensor, "B *dims"]) -> Float[Tensor, "B *obs_dims"]:
     >>> output = guidance(x, t, x_0)
     >>> output.shape
     torch.Size([1, 3, 8, 8])
+    >>>
+    >>> # Use with DPSDenoiser and scheduler's x0_to_score
+    >>> x0_predictor = lambda x, t: x * 0.9
+    >>> dps_denoiser = DPSDenoiser(
+    ...     denoiser_in=x0_predictor,
+    ...     x0_to_score_fn=scheduler.x0_to_score,
+    ...     guidances=guidance,
+    ... )
+    >>> score = dps_denoiser(x, t)
+    >>> score.shape
+    torch.Size([1, 3, 8, 8])
     """
 
     def __init__(
@@ -483,18 +531,21 @@ def __init__(
         y: Float[Tensor, " B *obs_dims"],
         std_y: float,
         norm_order: int = 2,
-        sda_scaling: bool = False,
+        gamma: float = 0.0,
         sigma_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
         | None = None,
+        alpha_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
+        | None = None,
     ) -> None:
-        if sda_scaling and sigma_fn is None:
-            raise ValueError("sigma_fn must be provided when sda_scaling=True")
+        if gamma > 0 and sigma_fn is None:
+            raise ValueError("sigma_fn must be provided when gamma > 0")
         self.A = A
         self.y = y
         self.std_y = std_y
         self.norm_order = norm_order
-        self.sda_scaling = sda_scaling
+        self.gamma = gamma
         self.sigma_fn = sigma_fn
+        self.alpha_fn = alpha_fn
 
     def __call__(
         self,
@@ -508,81 +559,79 @@ def __call__(
         Parameters
         ----------
         x : Tensor
-            Noisy latent state at diffusion time ``t``, of shape :math:`(B, *)`.
-            Must have ``requires_grad=True`` for gradient computation.
+            Noisy latent state :math:`\mathbf{x}_t`, of shape :math:`(B, *)`.
+            Must have ``requires_grad=True`` and be part of a computational
+            graph connecting to ``x_0``.
         t : Tensor
             Batched diffusion time of shape :math:`(B,)`.
         x_0 : Tensor
-            Estimate of the clean latent state, of shape :math:`(B, *)`.
+            Estimate of the clean latent state :math:`\hat{\mathbf{x}}_0
+            (\mathbf{x}_t, t)`, with same shape as ``x``. Must be computed
+            from ``x`` via an x0-predictor to allow gradient backpropagation.
 
         Returns
         -------
         Tensor
             Likelihood score guidance term of same shape as ``x``.
         """
-        # Ensure x_0 has gradients for autograd
-        x_0_grad = x_0.detach().requires_grad_(True)
-
-        # Compute predicted observations and residual
-        y_pred = self.A(x_0_grad)
-        residual = y_pred - self.y
-
-        # Compute norm^p of residual (summed over all dims except batch)
-        residual_flat = residual.reshape(residual.shape[0], -1)
-        norm_p = residual_flat.abs().pow(self.norm_order).sum(dim=1)
-
-        # Compute gradient of norm w.r.t. x_0
-        grad_x0 = torch.autograd.grad(
-            outputs=norm_p.sum(),
-            inputs=x_0_grad,
-            create_graph=False,
-        )[0]
-
-        # Likelihood score: -1/std_y^2 * grad
-        guidance = -grad_x0 / (self.std_y**2)
+        with torch.enable_grad():
+            # Compute predicted observations and residual
+            y_pred = self.A(x_0)
+            residual = y_pred - self.y
+
+            # Compute norm^p of residual (summed over all dims except batch)
+            residual_flat = residual.reshape(residual.shape[0], -1)
+            norm_p = residual_flat.abs().pow(self.norm_order).sum(dim=1)
+
+            # Compute gradient of norm w.r.t. x
+            grad_x = torch.autograd.grad(
+                outputs=norm_p.sum(),
+                inputs=x,
+                create_graph=False,
+            )[0]
+
+        # Compute scaling factor
+        t_bc = t.reshape(-1, *([1] * (x.ndim - 1)))
+        if self.gamma > 0 and self.sigma_fn is not None:
+            sigma_t = self.sigma_fn(t_bc)
+            alpha_t = self.alpha_fn(t_bc) if self.alpha_fn is not None else 1.0
+            variance = self.std_y**2 + self.gamma * (sigma_t**2) / (alpha_t**2)
+        else:
+            variance = self.std_y**2
 
-        # Apply SDA scaling if enabled
-        if self.sda_scaling and self.sigma_fn is not None:
-            t_bc = t.reshape(-1, *([1] * (x.ndim - 1)))
-            sigma_t_sq = self.sigma_fn(t_bc) ** 2
-            guidance = sigma_t_sq * guidance
+        # Likelihood score
+        guidance = -grad_x / (2 * variance)
 
         return guidance
 
 
-class DataConsistencyDPSGuidance:
+class DataConsistencyDPSGuidance(DPSGuidance):
     r"""
     DPS guidance for masked observations with Gaussian noise.
 
-    A simplified version of :class:`ModelConsistencyDPSGuidance` where the
-    observation operator is a mask applied element-wise. This is typical for
-    data assimilation tasks like inpainting or outpainting, where observations
-    are available at specific locations.
-
-    The observation model is:
-
-    .. math::
-        \mathbf{y} = \mathbf{M} \odot \mathbf{x}_0 + \boldsymbol{\epsilon},
-        \quad \boldsymbol{\epsilon} \sim \mathcal{N}(0, \sigma_y^2 \mathbf{I})
-
-    where :math:`\mathbf{M}` is a binary mask (1 = observed, 0 = missing),
-    :math:`\odot` denotes element-wise multiplication, and :math:`\sigma_y`
-    is the measurement noise standard deviation.
+    Implements the :class:`DPSGuidance` interface for masked observation
+    operators, a simplified version of :class:`ModelConsistencyDPSGuidance`.
+    This is typical for data assimilation tasks like inpainting, outpainting,
+    or sparse observations, where measurements are available at specific
+    locations.
 
-    The guidance term is the likelihood score:
+    Computes the likelihood score assuming Gaussian measurement noise with
+    standard deviation ``std_y``. The guidance term is:
 
     .. math::
-        \nabla_{\mathbf{x}} \log p(\mathbf{y} | \hat{\mathbf{x}}_0)
-        = -\frac{1}{\sigma_y^2} \nabla_{\mathbf{x}}
+        \nabla_{\mathbf{x}} \log p(\mathbf{y} | \mathbf{x}_t)
+        = -\frac{1}{2 \left( \sigma_y^2 + \gamma \frac{\sigma(t)^2}{\alpha(t)^2}
+        \right)} \nabla_{\mathbf{x}}
         \| \mathbf{M} \odot (\hat{\mathbf{x}}_0 - \mathbf{y}) \|_p^p
 
-    An optional **SDA (Score-Based Data Assimilation) scaling** can be applied,
-    which scales the guidance by :math:`\sigma(t)^2`.
+    where :math:`\mathbf{M}` is a binary mask (1 = observed, 0 = missing),
+    :math:`\odot` denotes element-wise multiplication, and the scaling
+    incorporates an SDA correction through the parameter :math:`\gamma`.
 
     Parameters
     ----------
     mask : Tensor
-        Binary mask of shape :math:`(B, *)` or broadcastable shape.
+        Binary mask of shape :math:`(B, *)` matching the state shape.
         Values should be 1 for observed locations and 0 for missing.
     y : Tensor
         Observed data of shape :math:`(B, *)` matching the state shape.
@@ -592,12 +641,29 @@ class DataConsistencyDPSGuidance:
     norm_order : int, default=2
         Order of the norm used to compute the residual. Use ``2`` for
         standard Gaussian likelihood (L2 norm), ``1`` for L1 norm, etc.
-    sda_scaling : bool, default=False
-        If ``True``, applies SDA scaling by multiplying the guidance by
-        :math:`\sigma(t)^2`. Requires ``sigma_fn`` to be provided.
+    gamma : float, default=0.0
+        SDA scaling parameter. When ``gamma > 0``, applies SDA correction
+        that accounts for the variance of the x0 estimate. Set to ``0`` for
+        classical DPS without SDA scaling.
     sigma_fn : Callable[[Tensor], Tensor] | None, default=None
         Function mapping diffusion time to noise level :math:`\sigma(t)`.
-        Required when ``sda_scaling=True``.
+        Required when ``gamma > 0``. Typically obtained from a noise
+        scheduler. For example, use
+        :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.sigma`
+        for a linear-Gaussian noise schedule.
+    alpha_fn : Callable[[Tensor], Tensor] | None, default=None
+        Function mapping diffusion time to signal coefficient :math:`\alpha(t)`.
+        Optional; defaults to :math:`\alpha(t) = 1` if not provided. For example, use
+        :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.alpha`
+        for a linear-Gaussian noise schedule.
+
+    Note
+    ----
+    References:
+
+    - DPS: `Diffusion Posterior Sampling for General Noisy Inverse Problems
+      <https://arxiv.org/abs/2209.14687>`_
+    - SDA: `Score-based Data Assimilation <https://arxiv.org/abs/2306.10574>`_
 
     See Also
     --------
@@ -607,13 +673,19 @@ class DataConsistencyDPSGuidance:
 
     Examples
     --------
-    **Example 1:** Inpainting with known pixels at specific locations:
+    **Example 1:** Sparse observations at probe locations:
 
     >>> import torch
-    >>> from physicsnemo.diffusion.guidance import DataConsistencyDPSGuidance
+    >>> from physicsnemo.diffusion.guidance import (
+    ...     DataConsistencyDPSGuidance,
+    ...     DPSDenoiser,
+    ... )
     >>>
-    >>> # Mask: observe 50% of pixels randomly
-    >>> mask = (torch.rand(1, 3, 8, 8) > 0.5).float()
+    >>> # Sparse mask: only observe a few probe locations
+    >>> mask = torch.zeros(1, 3, 8, 8)
+    >>> mask[:, :, 2, 3] = 1  # Probe at (2, 3)
+    >>> mask[:, :, 5, 6] = 1  # Probe at (5, 6)
+    >>> mask[:, :, 1, 7] = 1  # Probe at (1, 7)
     >>> y_obs = torch.randn(1, 3, 8, 8)  # Observed values
     >>>
     >>> guidance = DataConsistencyDPSGuidance(
@@ -624,74 +696,71 @@ class DataConsistencyDPSGuidance:
     >>>
     >>> x = torch.randn(1, 3, 8, 8, requires_grad=True)
     >>> t = torch.tensor([1.0])
-    >>> x_0 = x * 0.9  # Toy x0 estimate
+    >>> x_0 = x * 0.9  # Toy x0 estimate (must be computed from x)
     >>> output = guidance(x, t, x_0)
     >>> output.shape
     torch.Size([1, 3, 8, 8])
+    >>>
+    >>> # Use with DPSDenoiser for complete sampling workflow
+    >>> x0_predictor = lambda x, t: x * 0.9
+    >>> def x0_to_score_fn(x_0, x, t):
+    ...     t_bc = t.reshape(-1, *([1] * (x.ndim - 1)))
+    ...     return (x_0 - x) / (t_bc ** 2)
+    ...
+    >>> dps_denoiser = DPSDenoiser(
+    ...     denoiser_in=x0_predictor,
+    ...     x0_to_score_fn=x0_to_score_fn,
+    ...     guidances=guidance,
+    ... )
+    >>> score = dps_denoiser(x, t)
+    >>> score.shape
+    torch.Size([1, 3, 8, 8])
 
-    **Example 2:** With SDA scaling and L1 norm for robustness:
+    **Example 2:** With SDA scaling and L1 norm using noise scheduler:
 
     >>> import torch
-    >>> from physicsnemo.diffusion.guidance import DataConsistencyDPSGuidance
+    >>> from physicsnemo.diffusion.guidance import (
+    ...     DataConsistencyDPSGuidance,
+    ...     DPSDenoiser,
+    ... )
     >>> from physicsnemo.diffusion.noise_schedulers import EDMNoiseScheduler
     >>>
     >>> scheduler = EDMNoiseScheduler()
     >>>
-    >>> # Observe boundary pixels only (outpainting scenario)
+    >>> # Same sparse probe locations as Example 1
     >>> mask = torch.zeros(1, 3, 8, 8)
-    >>> mask[:, :, 0, :] = 1  # Top row
-    >>> mask[:, :, -1, :] = 1  # Bottom row
-    >>> mask[:, :, :, 0] = 1  # Left column
-    >>> mask[:, :, :, -1] = 1  # Right column
+    >>> mask[:, :, 2, 3] = 1
+    >>> mask[:, :, 5, 6] = 1
+    >>> mask[:, :, 1, 7] = 1
     >>> y_obs = torch.randn(1, 3, 8, 8)
     >>>
+    >>> # Enable SDA scaling and use L1 norm for robustness
     >>> guidance = DataConsistencyDPSGuidance(
     ...     mask=mask,
     ...     y=y_obs,
-    ...     std_y=0.05,
-    ...     norm_order=1,  # L1 norm for robustness to outliers
-    ...     sda_scaling=True,
+    ...     std_y=0.075,
+    ...     norm_order=1,  # L1 norm
+    ...     gamma=1.0,  # Enable SDA scaling
     ...     sigma_fn=scheduler.sigma,
+    ...     alpha_fn=scheduler.alpha,
     ... )
     >>>
     >>> x = torch.randn(1, 3, 8, 8, requires_grad=True)
     >>> t = torch.tensor([1.0])
-    >>> x_0 = x * 0.9
+    >>> x_0 = x * 0.9  # Must be computed from x
     >>> output = guidance(x, t, x_0)
     >>> output.shape
     torch.Size([1, 3, 8, 8])
-
-    **Example 3:** Using with DPSDenoiser for complete sampling:
-
-    >>> import torch
-    >>> from physicsnemo.diffusion.guidance import (
-    ...     DataConsistencyDPSGuidance,
-    ...     DPSDenoiser,
-    ... )
-    >>> from physicsnemo.diffusion.noise_schedulers import EDMNoiseScheduler
-    >>>
-    >>> scheduler = EDMNoiseScheduler()
-    >>> x0_predictor = lambda x, t: x * 0.9  # Toy x0-predictor
-    >>>
-    >>> mask = torch.ones(1, 3, 8, 8)
-    >>> y_obs = torch.randn(1, 3, 8, 8)
-    >>>
-    >>> guidance = DataConsistencyDPSGuidance(
-    ...     mask=mask,
-    ...     y=y_obs,
-    ...     std_y=0.1,
-    ... )
     >>>
+    >>> # Use with DPSDenoiser and scheduler's x0_to_score
+    >>> x0_predictor = lambda x, t: x * 0.9
     >>> dps_denoiser = DPSDenoiser(
     ...     denoiser_in=x0_predictor,
     ...     x0_to_score_fn=scheduler.x0_to_score,
     ...     guidances=guidance,
     ... )
-    >>>
-    >>> x = torch.randn(1, 3, 8, 8)
-    >>> t = torch.tensor([1.0])
-    >>> output = dps_denoiser(x, t)
-    >>> output.shape
+    >>> score = dps_denoiser(x, t)
+    >>> score.shape
     torch.Size([1, 3, 8, 8])
     """
 
@@ -701,18 +770,21 @@ def __init__(
         y: Float[Tensor, " B *dims"],
         std_y: float,
         norm_order: int = 2,
-        sda_scaling: bool = False,
+        gamma: float = 0.0,
         sigma_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
         | None = None,
+        alpha_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
+        | None = None,
     ) -> None:
-        if sda_scaling and sigma_fn is None:
-            raise ValueError("sigma_fn must be provided when sda_scaling=True")
+        if gamma > 0 and sigma_fn is None:
+            raise ValueError("sigma_fn must be provided when gamma > 0")
         self.mask = mask
         self.y = y
         self.std_y = std_y
         self.norm_order = norm_order
-        self.sda_scaling = sda_scaling
+        self.gamma = gamma
         self.sigma_fn = sigma_fn
+        self.alpha_fn = alpha_fn
 
     def __call__(
         self,
@@ -726,42 +798,46 @@ def __call__(
         Parameters
         ----------
         x : Tensor
-            Noisy latent state at diffusion time ``t``, of shape :math:`(B, *)`.
-            Must have ``requires_grad=True`` for gradient computation.
+            Noisy latent state :math:`\mathbf{x}_t`, of shape :math:`(B, *)`.
+            Must have ``requires_grad=True`` and be part of a computational
+            graph connecting to ``x_0``.
         t : Tensor
             Batched diffusion time of shape :math:`(B,)`.
         x_0 : Tensor
-            Estimate of the clean latent state, of shape :math:`(B, *)`.
+            Estimate of the clean latent state :math:`\hat{\mathbf{x}}_0
+            (\mathbf{x}_t, t)`, with same shape as ``x``. Must be computed
+            from ``x`` via an x0-predictor to allow gradient backpropagation.
 
         Returns
         -------
         Tensor
             Likelihood score guidance term of same shape as ``x``.
         """
-        # Ensure x_0 has gradients for autograd
-        x_0_grad = x_0.detach().requires_grad_(True)
-
-        # Compute masked residual
-        residual = self.mask * (x_0_grad - self.y)
-
-        # Compute norm^p of residual (summed over all dims except batch)
-        residual_flat = residual.reshape(residual.shape[0], -1)
-        norm_p = residual_flat.abs().pow(self.norm_order).sum(dim=1)
-
-        # Compute gradient of norm w.r.t. x_0
-        grad_x0 = torch.autograd.grad(
-            outputs=norm_p.sum(),
-            inputs=x_0_grad,
-            create_graph=False,
-        )[0]
-
-        # Likelihood score: -1/std_y^2 * grad
-        guidance = -grad_x0 / (self.std_y**2)
-
-        # Apply SDA scaling if enabled
-        if self.sda_scaling and self.sigma_fn is not None:
-            t_bc = t.reshape(-1, *([1] * (x.ndim - 1)))
-            sigma_t_sq = self.sigma_fn(t_bc) ** 2
-            guidance = sigma_t_sq * guidance
+        with torch.enable_grad():
+            # Compute masked residual
+            residual = self.mask * (x_0 - self.y)
+
+            # Compute norm^p of residual
+            residual_flat = residual.reshape(residual.shape[0], -1)
+            norm_p = residual_flat.abs().pow(self.norm_order).sum(dim=1)
+
+            # Compute gradient of norm w.r.t. x
+            grad_x = torch.autograd.grad(
+                outputs=norm_p.sum(),
+                inputs=x,
+                create_graph=False,
+            )[0]
+
+        # Compute scaling factor
+        t_bc = t.reshape(-1, *([1] * (x.ndim - 1)))
+        if self.gamma > 0 and self.sigma_fn is not None:
+            sigma_t = self.sigma_fn(t_bc)
+            alpha_t = self.alpha_fn(t_bc) if self.alpha_fn is not None else 1.0
+            variance = self.std_y**2 + self.gamma * (sigma_t**2) / (alpha_t**2)
+        else:
+            variance = self.std_y**2
+
+        # Likelihood score
+        guidance = -grad_x / (2 * variance)
 
         return guidance

From 709180999dfdb83dc6934438449f4c3ea04bac8d Mon Sep 17 00:00:00 2001
From: Charlelie Laurent <claurent@nvidia.com>
Date: Mon, 9 Feb 2026 14:25:59 -0800
Subject: [PATCH 04/14] Added option to pass arbitrary loss functions to DPS
 guidance

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
---
 .../diffusion/guidance/dps_guidance.py        | 172 ++++++++++++++----
 1 file changed, 137 insertions(+), 35 deletions(-)

diff --git a/physicsnemo/diffusion/guidance/dps_guidance.py b/physicsnemo/diffusion/guidance/dps_guidance.py
index 9c7fb60e61..0454095860 100644
--- a/physicsnemo/diffusion/guidance/dps_guidance.py
+++ b/physicsnemo/diffusion/guidance/dps_guidance.py
@@ -365,14 +365,14 @@ class ModelConsistencyDPSGuidance(DPSGuidance):
         \nabla_{\mathbf{x}} \log p(\mathbf{y} | \mathbf{x}_t)
         = -\frac{1}{2 \left( \sigma_y^2 + \gamma \frac{\sigma(t)^2}{\alpha(t)^2}
         \right)} \nabla_{\mathbf{x}}
-        \| A\left(\hat{\mathbf{x}}_0 (\mathbf{x}_t, t)\right) - \mathbf{y} \|_p^p
+        \| A\left(\hat{\mathbf{x}}_0\right) - \mathbf{y} \|^2
 
-    where :math:`A` is the observation operator, :math:`\| \cdot \|_p` is the
-    :math:`L^p` norm, and the scaling incorporates a Score-Based Data
-    Assimilation (SDA) correction through
-    the parameter :math:`\gamma` that accounts for the variance of the
+    where :math:`A` is the observation operator and the scaling incorporates
+    a Score-Based Data Assimilation (SDA) correction through the parameter
+    :math:`\gamma` that accounts for the variance of the
     :math:`\hat{\mathbf{x}}_0(\mathbf{x}_t, t)` estimate at different diffusion
-    times.
+    times. The L2 norm can be replaced by other Lp norms or custom loss
+    functions via the ``norm`` parameter.
 
     The observation operator ``A`` must be a differentiable callable with the
     following signature:
@@ -384,6 +384,16 @@ def A(x_0: Tensor) -> Tensor:
             # returns: predicted observations, same shape (B, *obs_dims) as y
             ...
 
+    When ``norm`` is a callable, it must have the following signature:
+
+    .. code-block:: python
+
+        def norm(
+            y_pred,  # Shape: (B, *obs_dims)
+            y_true,  # Shape: (B, *obs_dims)
+        ) -> Tensor:  # Scalar loss per batch element, shape: (B,)
+            ...
+
     Parameters
     ----------
     A : Callable[[Tensor], Tensor]
@@ -394,9 +404,11 @@ def A(x_0: Tensor) -> Tensor:
         of ``A``.
     std_y : float
         Standard deviation of the measurement noise :math:`\sigma_y`.
-    norm_order : int, default=2
-        Order of the norm used to compute the residual. Use ``2`` for
-        standard Gaussian likelihood (L2 norm), ``1`` for L1 norm, etc.
+    norm : int | Callable[[Tensor, Tensor], Tensor] | None, default=None
+        Loss function used to compute the residual. ``None`` (default) uses
+        the L2 norm. An ``int`` value uses the corresponding Lp norm. A
+        callable receives ``(y_pred, y_true)`` and returns a scalar loss per
+        batch element of shape :math:`(B,)`.
     gamma : float, default=0.0
         SDA scaling parameter. When ``gamma > 0``, applies SDA correction
         that accounts for the variance of the x0 estimate. Set to ``0`` for
@@ -523,6 +535,34 @@ def A(x_0: Tensor) -> Tensor:
     >>> score = dps_denoiser(x, t)
     >>> score.shape
     torch.Size([1, 3, 8, 8])
+
+    **Example 3:** With a custom loss function (Huber loss):
+
+    >>> import torch
+    >>> import torch.nn.functional as F
+    >>> from physicsnemo.diffusion.guidance import ModelConsistencyDPSGuidance
+    >>>
+    >>> # Wrap torch's Huber loss to return per-batch scalars
+    >>> def huber_loss(y_pred, y_true):
+    ...     per_elem = F.huber_loss(y_pred, y_true, reduction="none")
+    ...     return per_elem.reshape(y_pred.shape[0], -1).sum(dim=1)
+    ...
+    >>> A = lambda x: x[:, :1]  # Select first channel
+    >>> y_obs = torch.randn(1, 1, 8, 8)
+    >>>
+    >>> guidance = ModelConsistencyDPSGuidance(
+    ...     A=A,
+    ...     y=y_obs,
+    ...     std_y=0.1,
+    ...     norm=huber_loss,  # Custom loss function
+    ... )
+    >>>
+    >>> x = torch.randn(1, 3, 8, 8, requires_grad=True)
+    >>> t = torch.tensor([1.0])
+    >>> x_0 = x * 0.9
+    >>> output = guidance(x, t, x_0)
+    >>> output.shape
+    torch.Size([1, 3, 8, 8])
     """
 
     def __init__(
@@ -530,7 +570,12 @@ def __init__(
         A: Callable[[Float[Tensor, " B *dims"]], Float[Tensor, " B *obs_dims"]],
         y: Float[Tensor, " B *obs_dims"],
         std_y: float,
-        norm_order: int = 2,
+        norm: int
+        | Callable[
+            [Float[Tensor, " B *obs_dims"], Float[Tensor, " B *obs_dims"]],
+            Float[Tensor, " B"],
+        ]
+        | None = None,
         gamma: float = 0.0,
         sigma_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
         | None = None,
@@ -542,7 +587,7 @@ def __init__(
         self.A = A
         self.y = y
         self.std_y = std_y
-        self.norm_order = norm_order
+        self.norm = norm
         self.gamma = gamma
         self.sigma_fn = sigma_fn
         self.alpha_fn = alpha_fn
@@ -575,17 +620,20 @@ def __call__(
             Likelihood score guidance term of same shape as ``x``.
         """
         with torch.enable_grad():
-            # Compute predicted observations and residual
+            # Compute predicted observations
             y_pred = self.A(x_0)
-            residual = y_pred - self.y
 
-            # Compute norm^p of residual (summed over all dims except batch)
-            residual_flat = residual.reshape(residual.shape[0], -1)
-            norm_p = residual_flat.abs().pow(self.norm_order).sum(dim=1)
+            # Compute loss
+            if callable(self.norm):
+                loss = self.norm(y_pred, self.y)
+            else:
+                p = self.norm if self.norm is not None else 2
+                residual = (y_pred - self.y).reshape(y_pred.shape[0], -1)
+                loss = residual.abs().pow(p).sum(dim=1)
 
-            # Compute gradient of norm w.r.t. x
+            # Compute gradient of loss w.r.t. x (backprop through x_0)
             grad_x = torch.autograd.grad(
-                outputs=norm_p.sum(),
+                outputs=loss.sum(),
                 inputs=x,
                 create_graph=False,
             )[0]
@@ -622,11 +670,23 @@ class DataConsistencyDPSGuidance(DPSGuidance):
         \nabla_{\mathbf{x}} \log p(\mathbf{y} | \mathbf{x}_t)
         = -\frac{1}{2 \left( \sigma_y^2 + \gamma \frac{\sigma(t)^2}{\alpha(t)^2}
         \right)} \nabla_{\mathbf{x}}
-        \| \mathbf{M} \odot (\hat{\mathbf{x}}_0 - \mathbf{y}) \|_p^p
+        \| \mathbf{M} \odot (\hat{\mathbf{x}}_0 - \mathbf{y}) \|^2
 
     where :math:`\mathbf{M}` is a binary mask (1 = observed, 0 = missing),
     :math:`\odot` denotes element-wise multiplication, and the scaling
-    incorporates an SDA correction through the parameter :math:`\gamma`.
+    incorporates an SDA correction through the parameter :math:`\gamma`. The
+    L2 norm can be replaced by other Lp norms or custom loss functions via the
+    ``norm`` parameter.
+
+    When ``norm`` is a callable, it must have the following signature:
+
+    .. code-block:: python
+
+        def norm(
+            y_pred,  # Shape: (B, *obs_dims)
+            y_true,  # Shape: (B, *obs_dims)
+        ) -> Tensor:  # Scalar loss per batch element, shape: (B,)
+            ...
 
     Parameters
     ----------
@@ -638,9 +698,11 @@ class DataConsistencyDPSGuidance(DPSGuidance):
         Values at unobserved locations (where ``mask=0``) are ignored.
     std_y : float
         Standard deviation of the measurement noise :math:`\sigma_y`.
-    norm_order : int, default=2
-        Order of the norm used to compute the residual. Use ``2`` for
-        standard Gaussian likelihood (L2 norm), ``1`` for L1 norm, etc.
+    norm : int | Callable[[Tensor, Tensor], Tensor] | None, default=None
+        Loss function used to compute the residual. ``None`` (default) uses
+        the L2 norm. An ``int`` value uses the corresponding Lp norm. A
+        callable receives ``(mask * x_0, mask * y)`` and returns a scalar loss
+        per batch element of shape :math:`(B,)`.
     gamma : float, default=0.0
         SDA scaling parameter. When ``gamma > 0``, applies SDA correction
         that accounts for the variance of the x0 estimate. Set to ``0`` for
@@ -739,7 +801,7 @@ class DataConsistencyDPSGuidance(DPSGuidance):
     ...     mask=mask,
     ...     y=y_obs,
     ...     std_y=0.075,
-    ...     norm_order=1,  # L1 norm
+    ...     norm=1,  # L1 norm
     ...     gamma=1.0,  # Enable SDA scaling
     ...     sigma_fn=scheduler.sigma,
     ...     alpha_fn=scheduler.alpha,
@@ -762,6 +824,36 @@ class DataConsistencyDPSGuidance(DPSGuidance):
     >>> score = dps_denoiser(x, t)
     >>> score.shape
     torch.Size([1, 3, 8, 8])
+
+    **Example 3:** With a custom loss function (Huber loss):
+
+    >>> import torch
+    >>> import torch.nn.functional as F
+    >>> from physicsnemo.diffusion.guidance import DataConsistencyDPSGuidance
+    >>>
+    >>> # Wrap torch's Huber loss to return per-batch scalars
+    >>> def huber_loss(y_pred, y_true):
+    ...     per_elem = F.huber_loss(y_pred, y_true, reduction="none")
+    ...     return per_elem.reshape(y_pred.shape[0], -1).sum(dim=1)
+    ...
+    >>> mask = torch.zeros(1, 3, 8, 8)
+    >>> mask[:, :, 2, 3] = 1
+    >>> mask[:, :, 5, 6] = 1
+    >>> y_obs = torch.randn(1, 3, 8, 8)
+    >>>
+    >>> guidance = DataConsistencyDPSGuidance(
+    ...     mask=mask,
+    ...     y=y_obs,
+    ...     std_y=0.1,
+    ...     norm=huber_loss,  # Custom loss function
+    ... )
+    >>>
+    >>> x = torch.randn(1, 3, 8, 8, requires_grad=True)
+    >>> t = torch.tensor([1.0])
+    >>> x_0 = x * 0.9
+    >>> output = guidance(x, t, x_0)
+    >>> output.shape
+    torch.Size([1, 3, 8, 8])
     """
 
     def __init__(
@@ -769,7 +861,12 @@ def __init__(
         mask: Float[Tensor, " *mask_shape"],
         y: Float[Tensor, " B *dims"],
         std_y: float,
-        norm_order: int = 2,
+        norm: int
+        | Callable[
+            [Float[Tensor, "B *dims"], Float[Tensor, "B *dims"]],  # noqa: F821
+            Float[Tensor, " B"],
+        ]
+        | None = None,
         gamma: float = 0.0,
         sigma_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
         | None = None,
@@ -781,7 +878,7 @@ def __init__(
         self.mask = mask
         self.y = y
         self.std_y = std_y
-        self.norm_order = norm_order
+        self.norm = norm
         self.gamma = gamma
         self.sigma_fn = sigma_fn
         self.alpha_fn = alpha_fn
@@ -814,16 +911,21 @@ def __call__(
             Likelihood score guidance term of same shape as ``x``.
         """
         with torch.enable_grad():
-            # Compute masked residual
-            residual = self.mask * (x_0 - self.y)
-
-            # Compute norm^p of residual
-            residual_flat = residual.reshape(residual.shape[0], -1)
-            norm_p = residual_flat.abs().pow(self.norm_order).sum(dim=1)
-
-            # Compute gradient of norm w.r.t. x
+            # Compute masked predicted and observed values
+            y_pred = self.mask * x_0
+            y_true = self.mask * self.y
+
+            # Compute loss
+            if callable(self.norm):
+                loss = self.norm(y_pred, y_true)
+            else:
+                p = self.norm if self.norm is not None else 2
+                residual = (y_pred - y_true).reshape(x_0.shape[0], -1)
+                loss = residual.abs().pow(p).sum(dim=1)
+
+            # Compute gradient of loss w.r.t. x (backprop through x_0)
             grad_x = torch.autograd.grad(
-                outputs=norm_p.sum(),
+                outputs=loss.sum(),
                 inputs=x,
                 create_graph=False,
             )[0]

From 9da0f76145298b9780e57916ead344a9ef7ae1d2 Mon Sep 17 00:00:00 2001
From: Charlelie Laurent <claurent@nvidia.com>
Date: Wed, 11 Feb 2026 19:27:09 -0800
Subject: [PATCH 05/14] Made norm int = 2 by default

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
---
 .../diffusion/guidance/dps_guidance.py        | 32 ++++++++-----------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/physicsnemo/diffusion/guidance/dps_guidance.py b/physicsnemo/diffusion/guidance/dps_guidance.py
index 0454095860..3ec37f6545 100644
--- a/physicsnemo/diffusion/guidance/dps_guidance.py
+++ b/physicsnemo/diffusion/guidance/dps_guidance.py
@@ -404,11 +404,11 @@ def norm(
         of ``A``.
     std_y : float
         Standard deviation of the measurement noise :math:`\sigma_y`.
-    norm : int | Callable[[Tensor, Tensor], Tensor] | None, default=None
-        Loss function used to compute the residual. ``None`` (default) uses
-        the L2 norm. An ``int`` value uses the corresponding Lp norm. A
-        callable receives ``(y_pred, y_true)`` and returns a scalar loss per
-        batch element of shape :math:`(B,)`.
+    norm : int | Callable[[Tensor, Tensor], Tensor], default=2
+        Loss function used to compute the residual. An ``int`` value (default
+        ``2``) uses the corresponding Lp norm. A callable receives
+        ``(y_pred, y_true)`` and returns a scalar loss per batch element of
+        shape :math:`(B,)`.
     gamma : float, default=0.0
         SDA scaling parameter. When ``gamma > 0``, applies SDA correction
         that accounts for the variance of the x0 estimate. Set to ``0`` for
@@ -574,8 +574,7 @@ def __init__(
         | Callable[
             [Float[Tensor, " B *obs_dims"], Float[Tensor, " B *obs_dims"]],
             Float[Tensor, " B"],
-        ]
-        | None = None,
+        ] = 2,
         gamma: float = 0.0,
         sigma_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
         | None = None,
@@ -627,9 +626,8 @@ def __call__(
             if callable(self.norm):
                 loss = self.norm(y_pred, self.y)
             else:
-                p = self.norm if self.norm is not None else 2
                 residual = (y_pred - self.y).reshape(y_pred.shape[0], -1)
-                loss = residual.abs().pow(p).sum(dim=1)
+                loss = residual.abs().pow(self.norm).sum(dim=1)
 
             # Compute gradient of loss w.r.t. x (backprop through x_0)
             grad_x = torch.autograd.grad(
@@ -698,11 +696,11 @@ def norm(
         Values at unobserved locations (where ``mask=0``) are ignored.
     std_y : float
         Standard deviation of the measurement noise :math:`\sigma_y`.
-    norm : int | Callable[[Tensor, Tensor], Tensor] | None, default=None
-        Loss function used to compute the residual. ``None`` (default) uses
-        the L2 norm. An ``int`` value uses the corresponding Lp norm. A
-        callable receives ``(mask * x_0, mask * y)`` and returns a scalar loss
-        per batch element of shape :math:`(B,)`.
+    norm : int | Callable[[Tensor, Tensor], Tensor], default=2
+        Loss function used to compute the residual. An ``int`` value (default
+        ``2``) uses the corresponding Lp norm. A callable receives
+        ``(mask * x_0, mask * y)`` and returns a scalar loss per batch element
+        of shape :math:`(B,)`.
     gamma : float, default=0.0
         SDA scaling parameter. When ``gamma > 0``, applies SDA correction
         that accounts for the variance of the x0 estimate. Set to ``0`` for
@@ -865,8 +863,7 @@ def __init__(
         | Callable[
             [Float[Tensor, "B *dims"], Float[Tensor, "B *dims"]],  # noqa: F821
             Float[Tensor, " B"],
-        ]
-        | None = None,
+        ] = 2,
         gamma: float = 0.0,
         sigma_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
         | None = None,
@@ -919,9 +916,8 @@ def __call__(
             if callable(self.norm):
                 loss = self.norm(y_pred, y_true)
             else:
-                p = self.norm if self.norm is not None else 2
                 residual = (y_pred - y_true).reshape(x_0.shape[0], -1)
-                loss = residual.abs().pow(p).sum(dim=1)
+                loss = residual.abs().pow(self.norm).sum(dim=1)
 
             # Compute gradient of loss w.r.t. x (backprop through x_0)
             grad_x = torch.autograd.grad(

From 896218d423b70dde6e8cb4871ab9d8dd959c6857 Mon Sep 17 00:00:00 2001
From: Charlelie Laurent <claurent@nvidia.com>
Date: Wed, 11 Feb 2026 19:33:28 -0800
Subject: [PATCH 06/14] Fixed license header year + missing backticks in
 doctsring

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
---
 physicsnemo/diffusion/guidance/dps_guidance.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/physicsnemo/diffusion/guidance/dps_guidance.py b/physicsnemo/diffusion/guidance/dps_guidance.py
index 3ec37f6545..ed860bfeb2 100644
--- a/physicsnemo/diffusion/guidance/dps_guidance.py
+++ b/physicsnemo/diffusion/guidance/dps_guidance.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 - 2025 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2026 NVIDIA CORPORATION & AFFILIATES.
 # SPDX-FileCopyrightText: All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -33,7 +33,7 @@ class DPSGuidance(Protocol):
 
     A DPS guidance is a callable that computes a guidance term to steer the
     diffusion sampling process toward satisfying some observation constraint.
-    A DPSGuidance is expected to be a score-predictor, as it returns a quantity
+    A ``DPSGuidance`` is expected to be a score-predictor, as it returns a quantity
     analogous to a score.
 
     The typical form is:

From 28a92328a2bdccd1711a8e9d991b1c5a8cf1b0e7 Mon Sep 17 00:00:00 2001
From: Charlelie Laurent <claurent@nvidia.com>
Date: Wed, 11 Feb 2026 19:45:19 -0800
Subject: [PATCH 07/14] Renamed A into observation_operator

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
---
 .../diffusion/guidance/dps_guidance.py        | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/physicsnemo/diffusion/guidance/dps_guidance.py b/physicsnemo/diffusion/guidance/dps_guidance.py
index ed860bfeb2..dbd9e8ecc7 100644
--- a/physicsnemo/diffusion/guidance/dps_guidance.py
+++ b/physicsnemo/diffusion/guidance/dps_guidance.py
@@ -374,12 +374,12 @@ class ModelConsistencyDPSGuidance(DPSGuidance):
     times. The L2 norm can be replaced by other Lp norms or custom loss
     functions via the ``norm`` parameter.
 
-    The observation operator ``A`` must be a differentiable callable with the
+    The ``observation_operator`` must be a differentiable callable with the
     following signature:
 
     .. code-block:: python
 
-        def A(x_0: Tensor) -> Tensor:
+        def observation_operator(x_0: Tensor) -> Tensor:
             # x_0: estimated clean state, shape (B, *)
             # returns: predicted observations, same shape (B, *obs_dims) as y
             ...
@@ -396,7 +396,7 @@ def norm(
 
     Parameters
     ----------
-    A : Callable[[Tensor], Tensor]
+    observation_operator : Callable[[Tensor], Tensor]
         Observation operator mapping clean state to observations.
         Must be differentiable (supports ``torch.autograd``).
     y : Tensor
@@ -465,7 +465,7 @@ def norm(
     >>> y_obs = torch.randn(1, 3, 4, 4)
     >>>
     >>> guidance = ModelConsistencyDPSGuidance(
-    ...     A=blur_downsample,
+    ...     observation_operator=blur_downsample,
     ...     y=y_obs,
     ...     std_y=0.1,
     ... )
@@ -510,7 +510,7 @@ def norm(
     >>>
     >>> # Enable SDA scaling with gamma > 0, providing sigma and alpha functions
     >>> guidance = ModelConsistencyDPSGuidance(
-    ...     A=A,
+    ...     observation_operator=A,
     ...     y=y_obs,
     ...     std_y=0.075,
     ...     gamma=0.05,  # Enable SDA scaling
@@ -551,7 +551,7 @@ def norm(
     >>> y_obs = torch.randn(1, 1, 8, 8)
     >>>
     >>> guidance = ModelConsistencyDPSGuidance(
-    ...     A=A,
+    ...     observation_operator=A,
     ...     y=y_obs,
     ...     std_y=0.1,
     ...     norm=huber_loss,  # Custom loss function
@@ -567,7 +567,9 @@ def norm(
 
     def __init__(
         self,
-        A: Callable[[Float[Tensor, " B *dims"]], Float[Tensor, " B *obs_dims"]],
+        observation_operator: Callable[
+            [Float[Tensor, " B *dims"]], Float[Tensor, " B *obs_dims"]
+        ],
         y: Float[Tensor, " B *obs_dims"],
         std_y: float,
         norm: int
@@ -583,7 +585,7 @@ def __init__(
     ) -> None:
         if gamma > 0 and sigma_fn is None:
             raise ValueError("sigma_fn must be provided when gamma > 0")
-        self.A = A
+        self.observation_operator = observation_operator
         self.y = y
         self.std_y = std_y
         self.norm = norm
@@ -620,7 +622,7 @@ def __call__(
         """
         with torch.enable_grad():
             # Compute predicted observations
-            y_pred = self.A(x_0)
+            y_pred = self.observation_operator(x_0)
 
             # Compute loss
             if callable(self.norm):

From d5f3c22cc44820ce3d54737794c53acb7154188c Mon Sep 17 00:00:00 2001
From: Charlelie Laurent <claurent@nvidia.com>
Date: Wed, 11 Feb 2026 19:59:57 -0800
Subject: [PATCH 08/14] Moved validations to __init__ to avoid graph breaks

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
---
 .../diffusion/guidance/dps_guidance.py        | 44 +++++++++----------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/physicsnemo/diffusion/guidance/dps_guidance.py b/physicsnemo/diffusion/guidance/dps_guidance.py
index dbd9e8ecc7..8ae31eb321 100644
--- a/physicsnemo/diffusion/guidance/dps_guidance.py
+++ b/physicsnemo/diffusion/guidance/dps_guidance.py
@@ -590,8 +590,12 @@ def __init__(
         self.std_y = std_y
         self.norm = norm
         self.gamma = gamma
-        self.sigma_fn = sigma_fn
-        self.alpha_fn = alpha_fn
+        self.sigma_fn = (
+            sigma_fn if sigma_fn is not None else lambda t: torch.zeros_like(t)
+        )
+        self.alpha_fn = (
+            alpha_fn if alpha_fn is not None else lambda t: torch.ones_like(t)
+        )
 
     def __call__(
         self,
@@ -640,17 +644,11 @@ def __call__(
 
         # Compute scaling factor
         t_bc = t.reshape(-1, *([1] * (x.ndim - 1)))
-        if self.gamma > 0 and self.sigma_fn is not None:
-            sigma_t = self.sigma_fn(t_bc)
-            alpha_t = self.alpha_fn(t_bc) if self.alpha_fn is not None else 1.0
-            variance = self.std_y**2 + self.gamma * (sigma_t**2) / (alpha_t**2)
-        else:
-            variance = self.std_y**2
-
-        # Likelihood score
-        guidance = -grad_x / (2 * variance)
+        sigma_t = self.sigma_fn(t_bc)
+        alpha_t = self.alpha_fn(t_bc)
+        variance = self.std_y**2 + self.gamma * (sigma_t**2) / (alpha_t**2)
 
-        return guidance
+        return -grad_x / (2 * variance)
 
 
 class DataConsistencyDPSGuidance(DPSGuidance):
@@ -879,8 +877,12 @@ def __init__(
         self.std_y = std_y
         self.norm = norm
         self.gamma = gamma
-        self.sigma_fn = sigma_fn
-        self.alpha_fn = alpha_fn
+        self.sigma_fn = (
+            sigma_fn if sigma_fn is not None else lambda t: torch.zeros_like(t)
+        )
+        self.alpha_fn = (
+            alpha_fn if alpha_fn is not None else lambda t: torch.ones_like(t)
+        )
 
     def __call__(
         self,
@@ -930,14 +932,8 @@ def __call__(
 
         # Compute scaling factor
         t_bc = t.reshape(-1, *([1] * (x.ndim - 1)))
-        if self.gamma > 0 and self.sigma_fn is not None:
-            sigma_t = self.sigma_fn(t_bc)
-            alpha_t = self.alpha_fn(t_bc) if self.alpha_fn is not None else 1.0
-            variance = self.std_y**2 + self.gamma * (sigma_t**2) / (alpha_t**2)
-        else:
-            variance = self.std_y**2
-
-        # Likelihood score
-        guidance = -grad_x / (2 * variance)
+        sigma_t = self.sigma_fn(t_bc)
+        alpha_t = self.alpha_fn(t_bc)
+        variance = self.std_y**2 + self.gamma * (sigma_t**2) / (alpha_t**2)
 
-        return guidance
+        return -grad_x / (2 * variance)

From 3b2ab9102c9f8beaddfd8844c66d0f1c9e9db1f5 Mon Sep 17 00:00:00 2001
From: Charlelie Laurent <claurent@nvidia.com>
Date: Wed, 11 Feb 2026 20:17:50 -0800
Subject: [PATCH 09/14] Fixes on mask argument in DataConsistencyDPSGuidance

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
---
 .../diffusion/guidance/dps_guidance.py        | 39 ++++++++++---------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/physicsnemo/diffusion/guidance/dps_guidance.py b/physicsnemo/diffusion/guidance/dps_guidance.py
index 8ae31eb321..fd568c2140 100644
--- a/physicsnemo/diffusion/guidance/dps_guidance.py
+++ b/physicsnemo/diffusion/guidance/dps_guidance.py
@@ -19,7 +19,7 @@
 from typing import Callable, Protocol, Sequence, runtime_checkable
 
 import torch
-from jaxtyping import Float
+from jaxtyping import Bool, Float
 from torch import Tensor
 
 from physicsnemo.diffusion.base import DiffusionDenoiser
@@ -689,8 +689,8 @@ def norm(
     Parameters
     ----------
     mask : Tensor
-        Binary mask of shape :math:`(B, *)` matching the state shape.
-        Values should be 1 for observed locations and 0 for missing.
+        Boolean mask of shape :math:`(B, *)` matching the state shape.
+        ``True`` for observed locations, ``False`` for missing.
     y : Tensor
         Observed data of shape :math:`(B, *)` matching the state shape.
         Values at unobserved locations (where ``mask=0``) are ignored.
@@ -741,11 +741,11 @@ def norm(
     ...     DPSDenoiser,
     ... )
     >>>
-    >>> # Sparse mask: only observe a few probe locations
-    >>> mask = torch.zeros(1, 3, 8, 8)
-    >>> mask[:, :, 2, 3] = 1  # Probe at (2, 3)
-    >>> mask[:, :, 5, 6] = 1  # Probe at (5, 6)
-    >>> mask[:, :, 1, 7] = 1  # Probe at (1, 7)
+    >>> # Boolean mask: only observe a few probe locations
+    >>> mask = torch.zeros(1, 3, 8, 8, dtype=torch.bool)
+    >>> mask[:, :, 2, 3] = True  # Probe at (2, 3)
+    >>> mask[:, :, 5, 6] = True  # Probe at (5, 6)
+    >>> mask[:, :, 1, 7] = True  # Probe at (1, 7)
     >>> y_obs = torch.randn(1, 3, 8, 8)  # Observed values
     >>>
     >>> guidance = DataConsistencyDPSGuidance(
@@ -788,10 +788,10 @@ def norm(
     >>> scheduler = EDMNoiseScheduler()
     >>>
     >>> # Same sparse probe locations as Example 1
-    >>> mask = torch.zeros(1, 3, 8, 8)
-    >>> mask[:, :, 2, 3] = 1
-    >>> mask[:, :, 5, 6] = 1
-    >>> mask[:, :, 1, 7] = 1
+    >>> mask = torch.zeros(1, 3, 8, 8, dtype=torch.bool)
+    >>> mask[:, :, 2, 3] = True
+    >>> mask[:, :, 5, 6] = True
+    >>> mask[:, :, 1, 7] = True
     >>> y_obs = torch.randn(1, 3, 8, 8)
     >>>
     >>> # Enable SDA scaling and use L1 norm for robustness
@@ -834,9 +834,9 @@ def norm(
     ...     per_elem = F.huber_loss(y_pred, y_true, reduction="none")
     ...     return per_elem.reshape(y_pred.shape[0], -1).sum(dim=1)
     ...
-    >>> mask = torch.zeros(1, 3, 8, 8)
-    >>> mask[:, :, 2, 3] = 1
-    >>> mask[:, :, 5, 6] = 1
+    >>> mask = torch.zeros(1, 3, 8, 8, dtype=torch.bool)
+    >>> mask[:, :, 2, 3] = True
+    >>> mask[:, :, 5, 6] = True
     >>> y_obs = torch.randn(1, 3, 8, 8)
     >>>
     >>> guidance = DataConsistencyDPSGuidance(
@@ -856,7 +856,7 @@ def norm(
 
     def __init__(
         self,
-        mask: Float[Tensor, " *mask_shape"],
+        mask: Bool[Tensor, " B *dims"],
         y: Float[Tensor, " B *dims"],
         std_y: float,
         norm: int
@@ -872,7 +872,7 @@ def __init__(
     ) -> None:
         if gamma > 0 and sigma_fn is None:
             raise ValueError("sigma_fn must be provided when gamma > 0")
-        self.mask = mask
+        self.mask = mask.float()
         self.y = y
         self.std_y = std_y
         self.norm = norm
@@ -913,8 +913,9 @@ def __call__(
         """
         with torch.enable_grad():
             # Compute masked predicted and observed values
-            y_pred = self.mask * x_0
-            y_true = self.mask * self.y
+            mask = self.mask.to(dtype=x_0.dtype, device=x_0.device)
+            y_pred = mask * x_0
+            y_true = mask * self.y
 
             # Compute loss
             if callable(self.norm):

From fe58b4bbdca9c2eac8615e83091d0fafff2108c9 Mon Sep 17 00:00:00 2001
From: Charlelie Laurent <claurent@nvidia.com>
Date: Wed, 11 Feb 2026 20:33:52 -0800
Subject: [PATCH 10/14] Added automnatic dtype casting and device transfer for
 tensor parameters

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
---
 .../diffusion/guidance/dps_guidance.py        | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/physicsnemo/diffusion/guidance/dps_guidance.py b/physicsnemo/diffusion/guidance/dps_guidance.py
index fd568c2140..0de3be9384 100644
--- a/physicsnemo/diffusion/guidance/dps_guidance.py
+++ b/physicsnemo/diffusion/guidance/dps_guidance.py
@@ -611,7 +611,8 @@ def __call__(
         x : Tensor
             Noisy latent state :math:`\mathbf{x}_t`, of shape :math:`(B, *)`.
             Must have ``requires_grad=True`` and be part of a computational
-            graph connecting to ``x_0``.
+            graph connecting to ``x_0``. Its ``dtype`` and ``device``
+            determine those of all internal computations.
         t : Tensor
             Batched diffusion time of shape :math:`(B,)`.
         x_0 : Tensor
@@ -624,15 +625,18 @@ def __call__(
         Tensor
             Likelihood score guidance term of same shape as ``x``.
         """
+        # Ensure stored tensors match x's dtype and device
+        y = self.y.to(dtype=x.dtype, device=x.device)
+
         with torch.enable_grad():
             # Compute predicted observations
             y_pred = self.observation_operator(x_0)
 
             # Compute loss
             if callable(self.norm):
-                loss = self.norm(y_pred, self.y)
+                loss = self.norm(y_pred, y)
             else:
-                residual = (y_pred - self.y).reshape(y_pred.shape[0], -1)
+                residual = (y_pred - y).reshape(y_pred.shape[0], -1)
                 loss = residual.abs().pow(self.norm).sum(dim=1)
 
             # Compute gradient of loss w.r.t. x (backprop through x_0)
@@ -898,7 +902,8 @@ def __call__(
         x : Tensor
             Noisy latent state :math:`\mathbf{x}_t`, of shape :math:`(B, *)`.
             Must have ``requires_grad=True`` and be part of a computational
-            graph connecting to ``x_0``.
+            graph connecting to ``x_0``. Its ``dtype`` and ``device``
+            determine those of all internal computations.
         t : Tensor
             Batched diffusion time of shape :math:`(B,)`.
         x_0 : Tensor
@@ -911,11 +916,14 @@ def __call__(
         Tensor
             Likelihood score guidance term of same shape as ``x``.
         """
+        # Ensure stored tensors match x's dtype and device
+        mask = self.mask.to(dtype=x.dtype, device=x.device)
+        y = self.y.to(dtype=x.dtype, device=x.device)
+
         with torch.enable_grad():
             # Compute masked predicted and observed values
-            mask = self.mask.to(dtype=x_0.dtype, device=x_0.device)
             y_pred = mask * x_0
-            y_true = mask * self.y
+            y_true = mask * y
 
             # Compute loss
             if callable(self.norm):

From 04236a267724ac08fae25809cb62894c1f3d1d03 Mon Sep 17 00:00:00 2001
From: Charlelie Laurent <84199758+CharlelieLrt@users.noreply.github.com>
Date: Tue, 17 Feb 2026 18:24:48 -0800
Subject: [PATCH 11/14] Refactored diffusion sampler (#1363)

* Add missing init files

* Update build system and specify some deps.

* Reorganize tests.

* Update init files

* Clean up neighbor tools.

* Update testing

* Fix compat tests

* Move core model tests to tests/core/

* Add import lint config

* Relocate layers

* Move graphcast utils into model directory

* Relocating util functionalities.

* Further clean up and organize tests.

* utils tests are passing now

* Cleaning up distributed tests

* Patching tests working again in nn

* Fix sdf test

* Fix zenith angle tests

* Some organization of tests.  Checkpoints is moved into utils.

* Remove launch.utils and launch.config.  Checkpointing is moved to
phsyicsnemo.utils, launch.config is just gone.  It was empty.

* Most nn tests are passing

* Further cleanup.  Getting there!

* Remove constants file

* Add import linting to pre-commit.

* Refactor (#1208)

* Move filesystems and version_check to core

* Fix version check tests

* Reorganize distributed, domain_parallel, and begin nn / utils cleanup.

* Move modules and meta to core.  Move registry to core.

No tests fixed yet.

* Add missing init files

* Update build system and specify some deps.

* Reorganize tests.

* Update init files

* Clean up neighbor tools.

* Update testing

* Fix compat tests

* Move core model tests to tests/core/

* Add import lint config

* Relocate layers

* Move graphcast utils into model directory

* Relocating util functionalities.

* Add FIGConvNet to crash example (#1207)

* Add FIGConvNet to crash example.

* Add FIGConvNet to crash example

* Update model config

* propose fix some typos (#1209)

Signed-off-by: John E <jeis4wpi@outlook.com>
Co-authored-by: Corey adams <6619961+coreyjadams@users.noreply.github.com>

* Further clean up and organize tests.

* utils tests are passing now

* Cleaning up distributed tests

* Patching tests working again in nn

* Fix sdf test

* Fix zenith angle tests

* Some organization of tests.  Checkpoints is moved into utils.

* Remove launch.utils and launch.config.  Checkpointing is moved to
phsyicsnemo.utils, launch.config is just gone.  It was empty.

* Most nn tests are passing

* Further cleanup.  Getting there!

* Remove constants file

* Add import linting to pre-commit.

---------

Signed-off-by: John E <jeis4wpi@outlook.com>
Co-authored-by: Alexey Kamenev <alex.kamenev@gmail.com>
Co-authored-by: John Eismeier <42679190+jeis4wpi@users.noreply.github.com>

* Unmigrate the insolation utils (#1211)

* unmigrate the insolation utils

* Revert test and compat map

* Update importlinter

* Move gnn layers and start to fix several model tests.

* AFNO is now passing.

* Rnn models passing.

* Fix improt

* Healpix tests are working

* Domino and unet working

* Refactor (#1216)

* Move filesystems and version_check to core

* Fix version check tests

* Reorganize distributed, domain_parallel, and begin nn / utils cleanup.

* Move modules and meta to core.  Move registry to core.

No tests fixed yet.

* Add missing init files

* Update build system and specify some deps.

* Reorganize tests.

* Update init files

* Clean up neighbor tools.

* Update testing

* Fix compat tests

* Move core model tests to tests/core/

* Add import lint config

* Relocate layers

* Move graphcast utils into model directory

* Relocating util functionalities.

* Further clean up and organize tests.

* utils tests are passing now

* Cleaning up distributed tests

* Patching tests working again in nn

* Fix sdf test

* Fix zenith angle tests

* Some organization of tests.  Checkpoints is moved into utils.

* Remove launch.utils and launch.config.  Checkpointing is moved to
phsyicsnemo.utils, launch.config is just gone.  It was empty.

* Most nn tests are passing

* Further cleanup.  Getting there!

* Remove constants file

* Add import linting to pre-commit.

* Move gnn layers and start to fix several model tests.

* AFNO is now passing.

* Rnn models passing.

* Fix improt

* Healpix tests are working

* Domino and unet working

* Update activations path in dlwp tests (#1217)

* Update activations path in dlwp tests

* Update example paths

* Updating to address some test issues

* MGN tests passing again

* Most graphcast tests passing again

* Move nd conv layers.

* update fengwu and pangu

* Update sfno and pix2pix test

* update tests for figconvnet, swinrnn, superresnet

* updating more models to pass

* Update distributed tests, now passing.

* Domain parallel tests now passing.

* Fix active learning imports so tests pass in refactor

* Fix some metric imports

* Remove deploy package

* Remove unused test file

* unmigrate these files ... again?

* Update import linter.

* Refactor (#1224)

* Move filesystems and version_check to core

* Fix version check tests

* Reorganize distributed, domain_parallel, and begin nn / utils cleanup.

* Move modules and meta to core.  Move registry to core.

No tests fixed yet.

* Add missing init files

* Update build system and specify some deps.

* Reorganize tests.

* Update init files

* Clean up neighbor tools.

* Update testing

* Fix compat tests

* Move core model tests to tests/core/

* Add import lint config

* Relocate layers

* Move graphcast utils into model directory

* Relocating util functionalities.

* Further clean up and organize tests.

* utils tests are passing now

* Cleaning up distributed tests

* Patching tests working again in nn

* Fix sdf test

* Fix zenith angle tests

* Some organization of tests.  Checkpoints is moved into utils.

* Remove launch.utils and launch.config.  Checkpointing is moved to
phsyicsnemo.utils, launch.config is just gone.  It was empty.

* Most nn tests are passing

* Further cleanup.  Getting there!

* Remove constants file

* Add import linting to pre-commit.

* Update crash readme (#1212)

* update license headers- second try

* update readme

* Bump multi-storage-client to v0.33.0 with rust client (#1156)

* Move gnn layers and start to fix several model tests.

* AFNO is now passing.

* Rnn models passing.

* Fix improt

* Healpix tests are working

* Domino and unet working

* Add jaxtyping to requirements.txt for crash sample (#1218)

* update license headers- second try

* Update requirements.txt

* Updating to address some test issues

* MGN tests passing again

* Most graphcast tests passing again

* Move nd conv layers.

* update fengwu and pangu

* Update sfno and pix2pix test

* update tests for figconvnet, swinrnn, superresnet

* updating more models to pass

* Update distributed tests, now passing.

* Domain parallel tests now passing.

* Fix active learning imports so tests pass in refactor

* Fix some metric imports

* Remove deploy package

* Remove unused test file

* unmigrate these files ... again?

* Update import linter.

---------

Co-authored-by: Mohammad Amin Nabian <m.a.nabiyan@gmail.com>
Co-authored-by: Yongming Ding <yongmingd@nvidia.com>

* Cleaning up diffusion models. Not quite done yet.

* Restore deleted files

* Updating more tests.

* Further updates to tests.  Datapipes almost working.

* Refactor (#1231)

* Move filesystems and version_check to core

* Fix version check tests

* Reorganize distributed, domain_parallel, and begin nn / utils cleanup.

* Move modules and meta to core.  Move registry to core.

No tests fixed yet.

* Add missing init files

* Update build system and specify some deps.

* Reorganize tests.

* Update init files

* Clean up neighbor tools.

* Update testing

* Fix compat tests

* Move core model tests to tests/core/

* Add import lint config

* Relocate layers

* Move graphcast utils into model directory

* Relocating util functionalities.

* Further clean up and organize tests.

* utils tests are passing now

* Cleaning up distributed tests

* Patching tests working again in nn

* Fix sdf test

* Fix zenith angle tests

* Some organization of tests.  Checkpoints is moved into utils.

* Remove launch.utils and launch.config.  Checkpointing is moved to
phsyicsnemo.utils, launch.config is just gone.  It was empty.

* Most nn tests are passing

* Further cleanup.  Getting there!

* Remove constants file

* Add import linting to pre-commit.

* Update crash readme (#1212)

* update license headers- second try

* update readme

* Bump multi-storage-client to v0.33.0 with rust client (#1156)

* Move gnn layers and start to fix several model tests.

* AFNO is now passing.

* Rnn models passing.

* Fix improt

* Healpix tests are working

* Domino and unet working

* Add jaxtyping to requirements.txt for crash sample (#1218)

* update license headers- second try

* Update requirements.txt

* Updating to address some test issues

* Replace 'License' link with 'Dev blog' link (#1215)

Co-authored-by: Corey adams <6619961+coreyjadams@users.noreply.github.com>

* MGN tests passing again

* Most graphcast tests passing again

* Move nd conv layers.

* update fengwu and pangu

* Update sfno and pix2pix test

* update tests for figconvnet, swinrnn, superresnet

* updating more models to pass

* Update distributed tests, now passing.

* Validation fu added to examples/structural_mechanics/crash/train.py (#1204)

* validation added: works for multi-node job.

* rename and rearrange validation function

* validate_every_n_epochs, save_ckpt_every_n_epochs added in config

* corrected bug (args of model) in inference

* args in validation code updated

* val path added and args name changed

* validation split added -> write_vtp=False

* fixed inference bug

* bug fix: write_vtp

* Domain parallel tests now passing.

* Fix active learning imports so tests pass in refactor

* Fix some metric imports

* Remove deploy package

* Remove unused test file

* unmigrate these files ... again?

* Update import linter.

* Add saikrishnanc-nv to github actors (#1225)

* Integrate Curator instructions to the Crash example (#1213)

* Integrate Curator instructions

* Update docs

* Formatting changes

* Adding code of conduct (#1214)

* Adding code of conduct

Adopting the code of conduct from the https://www.contributor-covenant.org/

* Update CODE_OF_CONDUCT.MD

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Create .markdownlintignore

* Revise README for PhysicsNeMo resources and guidance

Updated the 'Getting Started' section and added new resources for learning AI Physics.

* Update README.md

---------

Co-authored-by: Mohammad Amin Nabian <m.a.nabiyan@gmail.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: Corey adams <6619961+coreyjadams@users.noreply.github.com>

* Cleaning up diffusion models. Not quite done yet.

* Restore deleted files

* Updating more tests.

* Further updates to tests.  Datapipes almost working.

---------

Co-authored-by: Mohammad Amin Nabian <m.a.nabiyan@gmail.com>
Co-authored-by: Yongming Ding <yongmingd@nvidia.com>
Co-authored-by: ram-cherukuri <104155145+ram-cherukuri@users.noreply.github.com>
Co-authored-by: Deepak Akhare <dakhare@nvidia.com>
Co-authored-by: Sai Krishnan Chandrasekar <157182662+saikrishnanc-nv@users.noreply.github.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* update import paths

* Starting to clean up dependency tree.

* Refactor (#1233)

* Move filesystems and version_check to core

* Fix version check tests

* Reorganize distributed, domain_parallel, and begin nn / utils cleanup.

* Move modules and meta to core.  Move registry to core.

No tests fixed yet.

* Add missing init files

* Update build system and specify some deps.

* Reorganize tests.

* Update init files

* Clean up neighbor tools.

* Update testing

* Fix compat tests

* Move core model tests to tests/core/

* Add import lint config

* Relocate layers

* Move graphcast utils into model directory

* Relocating util functionalities.

* Further clean up and organize tests.

* utils tests are passing now

* Cleaning up distributed tests

* Patching tests working again in nn

* Fix sdf test

* Fix zenith angle tests

* Some organization of tests.  Checkpoints is moved into utils.

* Remove launch.utils and launch.config.  Checkpointing is moved to
phsyicsnemo.utils, launch.config is just gone.  It was empty.

* Most nn tests are passing

* Further cleanup.  Getting there!

* Remove constants file

* Add import linting to pre-commit.

* Update crash readme (#1212)

* update license headers- second try

* update readme

* Bump multi-storage-client to v0.33.0 with rust client (#1156)

* Move gnn layers and start to fix several model tests.

* AFNO is now passing.

* Rnn models passing.

* Fix improt

* Healpix tests are working

* Domino and unet working

* Add jaxtyping to requirements.txt for crash sample (#1218)

* update license headers- second try

* Update requirements.txt

* Updating to address some test issues

* Replace 'License' link with 'Dev blog' link (#1215)

Co-authored-by: Corey adams <6619961+coreyjadams@users.noreply.github.com>

* MGN tests passing again

* Most graphcast tests passing again

* Move nd conv layers.

* update fengwu and pangu

* Update sfno and pix2pix test

* update tests for figconvnet, swinrnn, superresnet

* updating more models to pass

* Update distributed tests, now passing.

* Validation fu added to examples/structural_mechanics/crash/train.py (#1204)

* validation added: works for multi-node job.

* rename and rearrange validation function

* validate_every_n_epochs, save_ckpt_every_n_epochs added in config

* corrected bug (args of model) in inference

* args in validation code updated

* val path added and args name changed

* validation split added -> write_vtp=False

* fixed inference bug

* bug fix: write_vtp

* Domain parallel tests now passing.

* Fix active learning imports so tests pass in refactor

* Fix some metric imports

* Remove deploy package

* Remove unused test file

* unmigrate these files ... again?

* Update import linter.

* Add saikrishnanc-nv to github actors (#1225)

* Integrate Curator instructions to the Crash example (#1213)

* Integrate Curator instructions

* Update docs

* Formatting changes

* Adding code of conduct (#1214)

* Adding code of conduct

Adopting the code of conduct from the https://www.contributor-covenant.org/

* Update CODE_OF_CONDUCT.MD

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Create .markdownlintignore

* Revise README for PhysicsNeMo resources and guidance

Updated the 'Getting Started' section and added new resources for learning AI Physics.

* Update README.md

---------

Co-authored-by: Mohammad Amin Nabian <m.a.nabiyan@gmail.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: Corey adams <6619961+coreyjadams@users.noreply.github.com>

* Cleaning up diffusion models. Not quite done yet.

* Restore deleted files

* Updating more tests.

* Fixed minor bug in shape validation in SongUNet (#1230)

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Add Zarr reader for Crash (#1228)

* Add Zarr reader for Crash

* Update README

* Update validation logic of point data in Zarr reader

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update examples/structural_mechanics/crash/zarr_reader.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Add a test for 2D feature arrays

* Update examples/structural_mechanics/crash/zarr_reader.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Further updates to tests.  Datapipes almost working.

* update import paths

* Starting to clean up dependency tree.

---------

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
Co-authored-by: Mohammad Amin Nabian <m.a.nabiyan@gmail.com>
Co-authored-by: Yongming Ding <yongmingd@nvidia.com>
Co-authored-by: ram-cherukuri <104155145+ram-cherukuri@users.noreply.github.com>
Co-authored-by: Deepak Akhare <dakhare@nvidia.com>
Co-authored-by: Sai Krishnan Chandrasekar <157182662+saikrishnanc-nv@users.noreply.github.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: Charlelie Laurent <84199758+CharlelieLrt@users.noreply.github.com>

* Added coding standards for model implementations as a custom context for greptile (#1219)

* Added initial set of coding standards for model implementations

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Fixed typos + review comments + added details

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Added more rules for models

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Added model rules to PR checklist

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Added cusror rules for models

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Linked the wiki page to the PR template

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Fixed typo in PR checklist

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

---------

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Fixing and adjusting a broad suite of tests.

* Update test/domain_parallel/conftest.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Minor fix

* Refactor (#1234)

* Move filesystems and version_check to core

* Fix version check tests

* Reorganize distributed, domain_parallel, and begin nn / utils cleanup.

* Move modules and meta to core.  Move registry to core.

No tests fixed yet.

* Add missing init files

* Update build system and specify some deps.

* Reorganize tests.

* Update init files

* Clean up neighbor tools.

* Update testing

* Fix compat tests

* Move core model tests to tests/core/

* Add import lint config

* Relocate layers

* Move graphcast utils into model directory

* Relocating util functionalities.

* Further clean up and organize tests.

* utils tests are passing now

* Cleaning up distributed tests

* Patching tests working again in nn

* Fix sdf test

* Fix zenith angle tests

* Some organization of tests.  Checkpoints is moved into utils.

* Remove launch.utils and launch.config.  Checkpointing is moved to
phsyicsnemo.utils, launch.config is just gone.  It was empty.

* Most nn tests are passing

* Further cleanup.  Getting there!

* Remove constants file

* Add import linting to pre-commit.

* Update crash readme (#1212)

* update license headers- second try

* update readme

* Bump multi-storage-client to v0.33.0 with rust client (#1156)

* Move gnn layers and start to fix several model tests.

* AFNO is now passing.

* Rnn models passing.

* Fix improt

* Healpix tests are working

* Domino and unet working

* Add jaxtyping to requirements.txt for crash sample (#1218)

* update license headers- second try

* Update requirements.txt

* Updating to address some test issues

* Replace 'License' link with 'Dev blog' link (#1215)

Co-authored-by: Corey adams <6619961+coreyjadams@users.noreply.github.com>

* MGN tests passing again

* Most graphcast tests passing again

* Move nd conv layers.

* update fengwu and pangu

* Update sfno and pix2pix test

* update tests for figconvnet, swinrnn, superresnet

* updating more models to pass

* Update distributed tests, now passing.

* Validation fu added to examples/structural_mechanics/crash/train.py (#1204)

* validation added: works for multi-node job.

* rename and rearrange validation function

* validate_every_n_epochs, save_ckpt_every_n_epochs added in config

* corrected bug (args of model) in inference

* args in validation code updated

* val path added and args name changed

* validation split added -> write_vtp=False

* fixed inference bug

* bug fix: write_vtp

* Domain parallel tests now passing.

* Fix active learning imports so tests pass in refactor

* Fix some metric imports

* Remove deploy package

* Remove unused test file

* unmigrate these files ... again?

* Update import linter.

* Add saikrishnanc-nv to github actors (#1225)

* Integrate Curator instructions to the Crash example (#1213)

* Integrate Curator instructions

* Update docs

* Formatting changes

* Adding code of conduct (#1214)

* Adding code of conduct

Adopting the code of conduct from the https://www.contributor-covenant.org/

* Update CODE_OF_CONDUCT.MD

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Create .markdownlintignore

* Revise README for PhysicsNeMo resources and guidance

Updated the 'Getting Started' section and added new resources for learning AI Physics.

* Update README.md

---------

Co-authored-by: Mohammad Amin Nabian <m.a.nabiyan@gmail.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: Corey adams <6619961+coreyjadams@users.noreply.github.com>

* Cleaning up diffusion models. Not quite done yet.

* Restore deleted files

* Updating more tests.

* Fixed minor bug in shape validation in SongUNet (#1230)

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Add Zarr reader for Crash (#1228)

* Add Zarr reader for Crash

* Update README

* Update validation logic of point data in Zarr reader

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Update examples/structural_mechanics/crash/zarr_reader.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Add a test for 2D feature arrays

* Update examples/structural_mechanics/crash/zarr_reader.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Further updates to tests.  Datapipes almost working.

* update import paths

* Starting to clean up dependency tree.

* Add AR RT and OT schemes to Crash FIGConvNet (#1232)

* Add AR and OT schemes for FIGConvNet

* Add tests

* Soothe the linter

* Fix the tests

* Fixing and adjusting a broad suite of tests.

* Update test/domain_parallel/conftest.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Minor fix

---------

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
Co-authored-by: Mohammad Amin Nabian <m.a.nabiyan@gmail.com>
Co-authored-by: Yongming Ding <yongmingd@nvidia.com>
Co-authored-by: ram-cherukuri <104155145+ram-cherukuri@users.noreply.github.com>
Co-authored-by: Deepak Akhare <dakhare@nvidia.com>
Co-authored-by: Sai Krishnan Chandrasekar <157182662+saikrishnanc-nv@users.noreply.github.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: Charlelie Laurent <84199758+CharlelieLrt@users.noreply.github.com>
Co-authored-by: Alexey Kamenev <alex.kamenev@gmail.com>

* Not seeing any errors in testing ...

* Breakdown of rules into smaller rules (#1236)

* Breakdown of rules into smaller rules

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Fix mismatches in rule IDs referenced in rule text

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

---------

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Refactor (#1240)

* Move filesystems and version_check to core

* Fix version check tests

* Reorganize distributed, domain_parallel, and begin nn / utils cleanup.

* Move modules and meta to core.  Move registry to core.

No tests fixed yet.

* Add missing init files

* Update build system and specify some deps.

* Reorganize tests.

* Update init files

* Clean up neighbor tools.

* Update testing

* Fix compat tests

* Move core model tests to tests/core/

* Add import lint config

* Relocate layers

* Move graphcast utils into model directory

* Relocating util functionalities.

* Further clean up and organize tests.

* utils tests are passing now

* Cleaning up distributed tests

* Patching tests working again in nn

* Fix sdf test

* Fix zenith angle tests

* Some organization of tests.  Checkpoints is moved into utils.

* Remove launch.utils and launch.config.  Checkpointing is moved to
phsyicsnemo.utils, launch.config is just gone.  It was empty.

* Most nn tests are passing

* Further cleanup.  Getting there!

* Remove constants file

* Add import linting to pre-commit.

* Move gnn layers and start to fix several model tests.

* AFNO is now passing.

* Rnn models passing.

* Fix improt

* Healpix tests are working

* Domino and unet working

* Updating to address some test issues

* MGN tests passing again

* Most graphcast tests passing again

* Move nd conv layers.

* update fengwu and pangu

* Update sfno and pix2pix test

* update tests for figconvnet, swinrnn, superresnet

* updating more models to pass

* Update distributed tests, now passing.

* Domain parallel tests now passing.

* Fix active learning imports so tests pass in refactor

* Fix some metric imports

* Remove deploy package

* Remove unused test file

* unmigrate these files ... again?

* Update import linter.

* Cleaning up diffusion models. Not quite done yet.

* Restore deleted files

* Updating more tests.

* Further updates to tests.  Datapipes almost working.

* update import paths

* Starting to clean up dependency tree.

* Fixing and adjusting a broad suite of tests.

* Update test/domain_parallel/conftest.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Minor fix

* Not seeing any errors in testing ...

* Formatting active learning module docstrings (#1238)

* docs: fixing Protocol class reference formatting

Signed-off-by: Kelvin Lee <kinlongkelvi@nvidia.com>

* docs: removing mermaid diagram from protocols

Signed-off-by: Kelvin Lee <kinlongkelvi@nvidia.com>

* docs: adding active learning index

* docs: revising docstrings for sphinx formatting

* docs: fix placeholder URL for active learning main docs

---------

Signed-off-by: Kelvin Lee <kinlongkelvi@nvidia.com>

---------

Signed-off-by: Kelvin Lee <kinlongkelvi@nvidia.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: Kelvin Lee <kin.long.kelvin.lee@gmail.com>

* Refactor (#1247)

* Move filesystems and version_check to core

* Fix version check tests

* Reorganize distributed, domain_parallel, and begin nn / utils cleanup.

* Move modules and meta to core.  Move registry to core.

No tests fixed yet.

* Add missing init files

* Update build system and specify some deps.

* Reorganize tests.

* Update init files

* Clean up neighbor tools.

* Update testing

* Fix compat tests

* Move core model tests to tests/core/

* Add import lint config

* Relocate layers

* Move graphcast utils into model directory

* Relocating util functionalities.

* Further clean up and organize tests.

* utils tests are passing now

* Cleaning up distributed tests

* Patching tests working again in nn

* Fix sdf test

* Fix zenith angle tests

* Some organization of tests.  Checkpoints is moved into utils.

* Remove launch.utils and launch.config.  Checkpointing is moved to
phsyicsnemo.utils, launch.config is just gone.  It was empty.

* Most nn tests are passing

* Further cleanup.  Getting there!

* Remove constants file

* Add import linting to pre-commit.

* Move gnn layers and start to fix several model tests.

* AFNO is now passing.

* Rnn models passing.

* Fix improt

* Healpix tests are working

* Domino and unet working

* Updating to address some test issues

* MGN tests passing again

* Most graphcast tests passing again

* Move nd conv layers.

* update fengwu and pangu

* Update sfno and pix2pix test

* update tests for figconvnet, swinrnn, superresnet

* updating more models to pass

* Update distributed tests, now passing.

* Domain parallel tests now passing.

* Fix active learning imports so tests pass in refactor

* Fix some metric imports

* Remove deploy package

* Remove unused test file

* unmigrate these files ... again?

* Update import linter.

* Cleaning up diffusion models. Not quite done yet.

* Restore deleted files

* Updating more tests.

* Further updates to tests.  Datapipes almost working.

* update import paths

* Starting to clean up dependency tree.

* Fixing and adjusting a broad suite of tests.

* Update test/domain_parallel/conftest.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Minor fix

* Not seeing any errors in testing ...

* A new X-MeshGraphNet example for reservoir simulation. (#1186)

* X-MGN for reservoir simulation

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* installation bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* well object docstring fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* more well object docstring fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve path_utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fix while space in config

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fix version inconsistency in  requirement.txt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* add versions for some libs in requirement.txt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve exception handling in mldlow_utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve mldlow_utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve datetiem in mlflow_utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve exception handling in inference

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve inference

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve ecl_reader

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* formatting

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve preprocessor

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve preprocessor loop

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* grad accum bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* total loss bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* added some safe guard for connection indexing

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update ecl_reader

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup

* cleanup

* update configs

* Update README.md

style guide rule changes

* Update README.md

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve docstring fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update license yr

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup well

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup preproc fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup preproc fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cimprove infrence fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve datetime

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve train.py fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve requirement

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* ilcense header

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve ecl reader logging

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* license header

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve graph builder (parallel) + added results to readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* delete some unsed files

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* address PR comments

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve inference grdecl header

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* support time series

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update config

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* minor update

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve graph builder

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update ecl_reader logging

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* replace pickle with json

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* add license headers

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* remove unused png files

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* remove unsed import

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* remove emojis

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* replace print with logger

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update docstring

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* minor updates

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update header

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

---------

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>
Co-authored-by: megnvidia <mmiranda@nvidia.com>

* Add knn to autodoc table. (#1244)

---------

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: tonishi-nv <tonishi@nvidia.com>
Co-authored-by: megnvidia <mmiranda@nvidia.com>

* Enable import linting on internal imports.

* Remove ensure_available function, it's confusing

* Add logging imports to utils, and fix imports in examples.

* Update imports in minimal examples

* Update structural mechanics examples

* Update import paths: reservoir_sim

* Update import paths: additive manufacturing

* Update import paths: topodiff

* Update import paths: weather part 1

* Update import paths: weather part 2

* Update import paths: molecular dynamics

* Update import paths: geophysics

* Update import paths: cfd + external_aero 1

* Update import paths: cfd + external_aero 2

* Remove more DGL examples

* Remove more DGL examples

* cfd examples 3

* Last batch of example import fixes!

* Enforce and protect external deps in utils.

* Remove DGL.  :party:

* Don't force models yet

* Refactor (#1249)

* Move filesystems and version_check to core

* Fix version check tests

* Reorganize distributed, domain_parallel, and begin nn / utils cleanup.

* Move modules and meta to core.  Move registry to core.

No tests fixed yet.

* Add missing init files

* Update build system and specify some deps.

* Reorganize tests.

* Update init files

* Clean up neighbor tools.

* Update testing

* Fix compat tests

* Move core model tests to tests/core/

* Add import lint config

* Relocate layers

* Move graphcast utils into model directory

* Relocating util functionalities.

* Further clean up and organize tests.

* utils tests are passing now

* Cleaning up distributed tests

* Patching tests working again in nn

* Fix sdf test

* Fix zenith angle tests

* Some organization of tests.  Checkpoints is moved into utils.

* Remove launch.utils and launch.config.  Checkpointing is moved to
phsyicsnemo.utils, launch.config is just gone.  It was empty.

* Most nn tests are passing

* Further cleanup.  Getting there!

* Remove constants file

* Add import linting to pre-commit.

* Move gnn layers and start to fix several model tests.

* AFNO is now passing.

* Rnn models passing.

* Fix improt

* Healpix tests are working

* Domino and unet working

* Updating to address some test issues

* MGN tests passing again

* Most graphcast tests passing again

* Move nd conv layers.

* update fengwu and pangu

* Update sfno and pix2pix test

* update tests for figconvnet, swinrnn, superresnet

* updating more models to pass

* Update distributed tests, now passing.

* Domain parallel tests now passing.

* Fix active learning imports so tests pass in refactor

* Fix some metric imports

* Remove deploy package

* Remove unused test file

* unmigrate these files ... again?

* Update import linter.

* Cleaning up diffusion models. Not quite done yet.

* Restore deleted files

* Updating more tests.

* Further updates to tests.  Datapipes almost working.

* update import paths

* Starting to clean up dependency tree.

* Fixing and adjusting a broad suite of tests.

* Update test/domain_parallel/conftest.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Minor fix

* Not seeing any errors in testing ...

* A new X-MeshGraphNet example for reservoir simulation. (#1186)

* X-MGN for reservoir simulation

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* installation bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* well object docstring fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* more well object docstring fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve path_utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fix while space in config

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fix version inconsistency in  requirement.txt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* add versions for some libs in requirement.txt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve exception handling in mldlow_utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve mldlow_utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve datetiem in mlflow_utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve exception handling in inference

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve inference

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve ecl_reader

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* formatting

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve preprocessor

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve preprocessor loop

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* grad accum bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* total loss bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* added some safe guard for connection indexing

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update ecl_reader

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup

* cleanup

* update configs

* Update README.md

style guide rule changes

* Update README.md

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve docstring fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update license yr

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup well

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup preproc fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup preproc fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cimprove infrence fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve datetime

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve train.py fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve requirement

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* ilcense header

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve ecl reader logging

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* license header

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve graph builder (parallel) + added results to readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* delete some unsed files

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* address PR comments

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve inference grdecl header

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* support time series

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update config

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* minor update

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve graph builder

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update ecl_reader logging

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* replace pickle with json

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* add license headers

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* remove unused png files

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* remove unsed import

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* remove emojis

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* replace print with logger

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update docstring

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* minor updates

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update header

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

---------

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>
Co-authored-by: megnvidia <mmiranda@nvidia.com>

* Add knn to autodoc table. (#1244)

* Enable import linting on internal imports.

* Remove ensure_available function, it's confusing

* Add logging imports to utils, and fix imports in examples.

* Update imports in minimal examples

* Update structural mechanics examples

* Update import paths: reservoir_sim

* Update import paths: additive manufacturing

* Update import paths: topodiff

* Update import paths: weather part 1

* Update import paths: weather part 2

* Update import paths: molecular dynamics

* Update import paths: geophysics

* Update import paths: cfd + external_aero 1

* Update import paths: cfd + external_aero 2

* Remove more DGL examples

* Remove more DGL examples

* cfd examples 3

* Last batch of example import fixes!

* Enforce and protect external deps in utils.

* Remove DGL.  :party:

* Don't force models yet

---------

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: tonishi-nv <tonishi@nvidia.com>
Co-authored-by: megnvidia <mmiranda@nvidia.com>

* Automated model registry (#1252)

* Deleted RegistreableModule

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Removed 'PhysicsNeMo' suffix in Module.from_torch method

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Implemented automatic registration for Module subclasses

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Fixed unused name

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

---------

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Metadata name deprecation (#1257)

* Initiated deprecation of field 'name' in ModelMetaData

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Removed all occurences of 'name' field in ModelMetaData

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

---------

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Refactor (#1258)

* Move filesystems and version_check to core

* Fix version check tests

* Reorganize distributed, domain_parallel, and begin nn / utils cleanup.

* Move modules and meta to core.  Move registry to core.

No tests fixed yet.

* Add missing init files

* Update build system and specify some deps.

* Reorganize tests.

* Update init files

* Clean up neighbor tools.

* Update testing

* Fix compat tests

* Move core model tests to tests/core/

* Add import lint config

* Relocate layers

* Move graphcast utils into model directory

* Relocating util functionalities.

* Further clean up and organize tests.

* utils tests are passing now

* Cleaning up distributed tests

* Patching tests working again in nn

* Fix sdf test

* Fix zenith angle tests

* Some organization of tests.  Checkpoints is moved into utils.

* Remove launch.utils and launch.config.  Checkpointing is moved to
phsyicsnemo.utils, launch.config is just gone.  It was empty.

* Most nn tests are passing

* Further cleanup.  Getting there!

* Remove constants file

* Add import linting to pre-commit.

* Move gnn layers and start to fix several model tests.

* AFNO is now passing.

* Rnn models passing.

* Fix improt

* Healpix tests are working

* Domino and unet working

* Updating to address some test issues

* MGN tests passing again

* Most graphcast tests passing again

* Move nd conv layers.

* update fengwu and pangu

* Update sfno and pix2pix test

* update tests for figconvnet, swinrnn, superresnet

* updating more models to pass

* Update distributed tests, now passing.

* Domain parallel tests now passing.

* Fix active learning imports so tests pass in refactor

* Fix some metric imports

* Remove deploy package

* Remove unused test file

* unmigrate these files ... again?

* Update import linter.

* Cleaning up diffusion models. Not quite done yet.

* Restore deleted files

* Updating more tests.

* Further updates to tests.  Datapipes almost working.

* update import paths

* Starting to clean up dependency tree.

* Fixing and adjusting a broad suite of tests.

* Update test/domain_parallel/conftest.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Minor fix

* Not seeing any errors in testing ...

* A new X-MeshGraphNet example for reservoir simulation. (#1186)

* X-MGN for reservoir simulation

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* installation bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* well object docstring fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* more well object docstring fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve path_utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fix while space in config

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fix version inconsistency in  requirement.txt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* add versions for some libs in requirement.txt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve exception handling in mldlow_utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve mldlow_utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve datetiem in mlflow_utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve exception handling in inference

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve inference

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve ecl_reader

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* formatting

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve preprocessor

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve preprocessor loop

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* grad accum bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* total loss bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* added some safe guard for connection indexing

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* bug fix

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update ecl_reader

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup utils

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup

* cleanup

* update configs

* Update README.md

style guide rule changes

* Update README.md

* fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve docstring fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update license yr

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup well

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup preproc fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup preproc fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cimprove infrence fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve datetime

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve train.py fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme fmt

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve requirement

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* ilcense header

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve ecl reader logging

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* cleanup

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* license header

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve graph builder (parallel) + added results to readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* delete some unsed files

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* address PR comments

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve inference grdecl header

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* support time series

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update config

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* minor update

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* improve graph builder

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update ecl_reader logging

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* replace pickle with json

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* add license headers

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* remove unused png files

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* remove unsed import

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* remove emojis

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* replace print with logger

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update docstring

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* minor updates

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update readme

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

* update header

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>

---------

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>
Co-authored-by: megnvidia <mmiranda@nvidia.com>

* Add knn to autodoc table. (#1244)

* Enable import linting on internal imports.

* Remove ensure_available function, it's confusing

* Add logging imports to utils, and fix imports in examples.

* Update imports in minimal examples

* Update structural mechanics examples

* Update import paths: reservoir_sim

* Update import paths: additive manufacturing

* Update import paths: topodiff

* Update import paths: weather part 1

* Update import paths: weather part 2

* Update import paths: molecular dynamics

* Update import paths: geophysics

* Update import paths: cfd + external_aero 1

* Update import paths: cfd + external_aero 2

* Remove more DGL examples

* Remove more DGL examples

* cfd examples 3

* Last batch of example import fixes!

* Enforce and protect external deps in utils.

* Remove DGL.  :party:

* Don't force models yet

* Update version (#1193)

* Fix depenedncies to enable hello world (#1195)

* Remove zero-len arrays from test dataset (#1198)

* Merge updates to Gray Scott example (#1239)

* Remove pyevtk

* update dependency

* update dimensions

* ci issues

* Interpolation model example (#1149)

* Temporal interpolation training recipe

* Add README

* Docs changes based on comments

* Update docstrings and README

* Add temporal interpolation animation

* Add animation link

* Add shape check in loss

* Updates of configs + trainer

* Update config comments

* Update README.md

style guide edits

* Added wandb logging

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Reformated sections in docstring for GeometricL2Loss

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Update README and configs

* README changes + type hint fixes

* Update README.md

* Draft of validation script

* Update validation and README

* Fixed command in README.md for temporal_interpolation example

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Removed unused import in datapipe/climate_interp.py

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Updated license headers in temporal_interpolation example

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Renamed methods to avoid implicit shadowing in Trainer class

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Cosmetic changes in train.py and removed unused import in validate.py

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Added clamp in validate.py to make sure step does not go out of bounds

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Added the temporal_interpolation example to the docs + updated CHANGELOG.md

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Addressing remaining comments

* Merged two data source classes in climate_interp.py

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

---------

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
Co-authored-by: Charlelie Laurent <claurent@nvidia.com>
Co-authored-by: megnvidia <mmiranda@nvidia.com>
Co-authored-by: Charlelie Laurent <84199758+CharlelieLrt@users.noreply.github.com>

* update versions

---------

Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>
Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: tonishi-nv <tonishi@nvidia.com>
Co-authored-by: megnvidia <mmiranda@nvidia.com>
Co-authored-by: Kaustubh Tangsali <71059996+ktangsali@users.noreply.github.com>
Co-authored-by: Jussi Leinonen <jleinonen@nvidia.com>
Co-authored-by: Charlelie Laurent <claurent@nvidia.com>
Co-authored-by: Charlelie Laurent <84199758+CharlelieLrt@users.noreply.github.com>
Co-authored-by: Kaustubh Tangsali <ktangsali@nvidia.com>

* Remove IPDB

* Few more dep fixes.

* Refactor (#1261)

* Move filesystems and version_check to core

* Fix version check tests

* Reorganize distributed, domain_parallel, and begin nn / utils cleanup.

* Move modules and meta to core.  Move registry to core.

No tests fixed yet.

* Add missing init files

* Update build system and specify some deps.

* Reorganize tests.

* Update init files

* Clean up neighbor tools.

* Update testing

* Fix compat tests

* Move core model tests to tests/core/

* Add import lint config

* Relocate layers

* Move graphcast utils into model directory

* Relocating util functionalities.

* Further clean up and organize tests.

* utils tests are passing now

* Cleaning up distributed tests

* Patching tests working again in nn

* Fix sdf test

* Fix zenith angle tests

* Some organization of tests.  Checkpoints is moved into utils.

* Remove launch.utils and launch.config.  Checkpointing is moved to
phsyicsnemo.utils, launch.config is just gone.  It was empty.

* Most nn tests are passing

* Further cleanup.  Getting there!

* Remove constants file

* Add import linting to pre-commit.

* Move gnn layers and start to fix several model tests.

* AFNO is now passing.

* Rnn models passing.

* Fix improt

* Healpix tests are working

* Domino and unet working

* Updating to address some test issues

* MGN tests passing again

* Most graphcast tests passing again

* Move nd conv layers.

* update fengwu and pangu

* Update sfno and pix2pix test

* update tests for figconvnet, swinrnn, superresnet

* updating more models to pass

* Update distributed tests, now passing.

* Domain parallel tests now passing.

* Fix active learning imports so tests pass in refactor

* Fix some metric imports

* Remove deploy package

* Remove unused test file

* unmigrate these files ... again?

* Update import linter.

* Cleaning up diffusion models. Not quite done yet.

* Restore deleted files

* Updating more tests.

* Further updates to tests.  Datapipes almost working.

* update import paths

* Starting to clean up dependency tree.

* Fixing and adjusting a broad suite of tests.

* Update test/domain_parallel/conftest.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Minor fix

* Not seeing any errors in testing ...

* Enable import linting on internal imports.

* Remove ensure_available function, it's confusing

* Add logging imports to utils, and fix imports in examples.

* Update imports in minimal examples

* Update structural mechanics examples

* Update import paths: reservoir_sim

* Update import paths: additive manufacturing

* Update import paths: topodiff

* Update import paths: weather part 1

* Update import paths: weather part 2

* Update import paths: molecular dynamics

* Update import paths: geophysics

* Update import paths: cfd + external_aero 1

* Update import paths: cfd + external_aero 2

* Remove more DGL examples

* Remove more DGL examples

* cfd examples 3

* Last batch of example import fixes!

* Enforce and protect external deps in utils.

* Remove DGL.  :party:

* Don't force models yet

* Remove IPDB

* Few more dep fixes.

* Enhance checkpoint configuration for DLWP Healpix and GraphCast (#1253)

* feat(weather): Improve configuration for DLWP Healpix and GraphCast examples

- Added configurable checkpoint directory to DLWP Healpix config and training script.
- Implemented Trainer logic to use specific checkpoint directory.
- Updated utils.py to respect exact checkpoint path.
- Made Weights & Biases entity and project configurable in GraphCast example.

* fix(dlwp_healpix): remove deprecated configs

- Removed the deprecated `verbose` parameter from the `CosineAnnealingLR` configuration in DLWP HEALPix, which was causing a TypeError.
- Removed unused configs from examples/weather/dlwp_healpix/

* Transolver volume (#1242)

* Implement transolver ++ physics attention

* Enable ++ in Transolver.

* Fix temperature correction terms.

* Starting work adapting the domino datapipe techniques to transolver.

* Working towards transolver volume training by mergeing with domino dataset.

Surface dataloading is prototyped, not finished yet.

* Updating

* Remove printout

* Enable transolver for volumetric data

* Update transolver training script to support either surface or volume data.

Applied some cleanup to make the datapipe similar to domino, which
is a step towards unification.

* Updating datapipe

* Tweak transolver volume configs

* Add transolverX model

* Enable nearly-uniform sampling of very very large arrays

* limit benchmarking to train epoch, enable profiler in config

* Update volume config slightly

* Update training scripts to properly enable data preloading

* Working towards adding a muon optimzier in transolver

* Add peter's implementation of muon with a combined optimizer.  switch to a flat LR.

* Add updated inference script that can also calculate drag and lift

* Add better docstrings for typhon

* Move typhon to experimental

* Move forwards docstring

* Adding typhon model and configs.

* Update readme.

* Update

* Remove extra model.  Update recipes.

* Update cae_dataset.py

Implement abstract methods in base classes.

* Update Physics_Attention.py

Ensure plus parameter is passed to base class.

* Update test_mesh_datapipe.py

Update import path for mesh datapipe.

* Fix ruff issues

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: Dileep Ranganathan <8152399+dran-dev@users.noreply.github.com>

* Add external import coding standards.

* Update external import standards.

* Ensure vtk functions are protected.

* Protect pyvista import

* Closing more import gaps

* Remove DGL from meshgraphkan

* All models now comply with external import linting.

* Remove DGL datapipes

* cae datapipes in compliance

* Update pyproject.toml

* Add version numbers to deps

* Refactor (#1261)

* Move filesystems and version_check to core

* Fix version check tests

* Reorganize distributed, domain_parallel, and begin nn / utils cleanup.

* Move modules and meta to core.  Move registry to core.

No tests fixed yet.

* Add missing init files

* Update build system and specify some deps.

* Reorganize tests.

* Update init files

* Clean up neighbor tools.

* Update testing

* Fix compat tests

* Move core model tests to tests/core/

* Add import lint config

* Relocate layers

* Move graphcast utils into model directory

* Relocating util functionalities.

* Further clean up and organize tests.

* utils tests are passing now

* Cleaning up distributed tests

* Patching tests working again in nn

* Fix sdf test

* Fix zenith angle tests

* Some organization of tests.  Checkpoints is moved into utils.

* Remove launch.utils and launch.config.  Checkpointing is moved to
phsyicsnemo.utils, launch.config is just gone.  It was empty.

* Most nn tests are passing

* Further cleanup.  Getting there!

* Remove constants file

* Add import linting to pre-commit.

* Move gnn layers and start to fix several model tests.

* AFNO is now passing.

* Rnn models passing.

* Fix improt

* Healpix tests are working

* Domino and unet working

* Updating to address some test issues

* MGN tests passing again

* Most graphcast tests passing again

* Move nd conv layers.

* update fengwu and pangu

* Update sfno and pix2pix test

* update tests for figconvnet, swinrnn, superresnet

* updating more models to pass

* Update distributed tests, now passing.

* Domain parallel tests now passing.

* Fix active learning imports so tests pass in refactor

* Fix some metric imports

* Remove deploy package

* Remove unused test file

* unmigrate these files ... again?

* Update import linter.

* Cleaning up diffusion models. Not quite done yet.

* Restore deleted files

* Updating more tests.

* Further updates to tests.  Datapipes almost working.

* update import paths

* Starting to clean up dependency tree.

* Fixing and adjusting a broad suite of tests.

* Update test/domain_parallel/conftest.py

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Minor fix

* Not seeing any errors in testing ...

* Enable import linting on internal imports.

* Remove ensure_available function, it's confusing

* Add logging imports to utils, and fix imports in examples.

* Update imports in minimal examples

* Update structural mechanics examples

* Update import paths: reservoir_sim

* Update import paths: additive manufacturing

* Update import paths: topodiff

* Update import paths: weather part 1

* Update import paths: weather part 2

* Update import paths: molecular dynamics

* Update import paths: geophysics

* Update import paths: cfd + external_aero 1

* Update import paths: cfd + external_aero 2

* Remove more DGL examples

* Remove more DGL examples

* cfd examples 3

* Last batch of example import fixes!

* Enforce and protect external deps in utils.

* Remove DGL.  :party:

* Don't force models yet

* Remove IPDB

* Few more dep fixes.

* Enhance checkpoint configuration for DLWP Healpix and GraphCast (#1253)

* feat(weather): Improve configuration for DLWP Healpix and GraphCast examples

- Added configurable checkpoint directory to DLWP Healpix config and training script.
- Implemented Trainer logic to use specific checkpoint directory.
- Updated utils.py to respect exact checkpoint path.
- Made Weights & Biases entity and project configurable in GraphCast example.

* fix(dlwp_healpix): remove deprecated configs

- Removed the deprecated `verbose` parameter from the `CosineAnnealingLR` configuration in DLWP HEALPix, which was causing a TypeError.
- Removed unused configs from examples/weather/dlwp_healpix/

* Transolver volume (#1242)

* Implement transolver ++ physics attention

* Enable ++ in Transolver.

* Fix temperature correction terms.

* Starting work adapting the domino datapipe techniques to transolver.

* Working towards transolver volume training by mergeing with domino dataset.

Surface dataloading is prototyped, not finished yet.

* Updating

* Remove printout

* Enable transolver for volumetric data

* Update transolver training script to support either surface or volume data.

Applied some cleanup to make the datapipe similar to domino, which
is a step towards unification.

* Updating datapipe

* Tweak transolver volume configs

* Add transolverX model

* Enable nearly-uniform sampling of very very large arrays

* limit benchmarking to train epoch, enable profiler in config

* Update volume config slightly

* Update training scripts to properly enable data preloading

* Working towards adding a muon optimzier in transolver

* Add peter's implementation of muon with a combined optimizer.  switch to a flat LR.

* Add updated inference script that can also calculate drag and lift

* Add better docstrings for typhon

* Move typhon to experimental

* Move forwards docstring

* Adding typhon model and configs.

* Update readme.

* Update

* Remove extra model.  Update recipes.

* Update cae_dataset.py

Implement abstract methods in base classes.

* Update Physics_Attention.py

Ensure plus parameter is passed to base class.

* Update test_mesh_datapipe.py

Update import path for mesh datapipe.

* Fix ruff issues

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: Dileep Ranganathan <8152399+dran-dev@users.noreply.github.com>

* fix import error from wandb

* remove instance check

* Initial restructure

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Completed restructure of diffusion package

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* UV <---> Pip must stay in sync. (#1264)

* Ensure that the pip dependency group exclusively matches the
resolution of the dependency-groups from Pep 735 that uv enables.

* Revert some dependency tracking.

Now, we allow unbare external imports anywhere.  But the import list is smaller,
and more easily managed with uv / pip equally.  extras are now
chained and built in the optional deps.

* Add dev deps.  Add docstring for ci script.

* Update pyproject with small changes from review.

* Fix broken imports

* Fix README links in transolver and domino examples (#1259)

- Update links to physicsnemo-curator examples in DoMINO and Transolver READMEs.
- Update link to CachedDoMINODataset and path reference for cache_data.py script.

* Add xarray, timm to core deps

* update import

* Somehow, a number of import protections got broken

* Automatically select CPU or CPU+CUDA instead of decorating every test.

* ensure te installed for serialization test

* All CPU tests are passing

* Remove DGL/PyG equivalency tests (#1273)

* Install ci (#1274)

* Adding cpu ci

* Make sure to get dev deps

* target tests properly, maybe fix uv build

* make sure radius search passes on CPU

* maybe fix uv sync.  Hopefully fix ubuntu testing.

* Fix a test typo

* Matrix to more python versions

* drop support for python < 3.11

* fix link, enable nightly, target only selected branches

* Only run python 3.12 on PRs, ubuntu and mac os, uv and pip

* Add coverage to devs

* Limit to one test path for PRs.  nightly CI is consistent to all tests.  uv install gets a retry.

* Remove TensorFlow dependency in Vortex Shedding and Lagrangian MGN examples (#1276)

* Remove TensorFlow dependency in Vortex Shedding and Lagrangian MGN examples

* Update READMEs

* Change registry behavior and list all models as entry points (#1278)

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Renamed LearnedSimulator into VGFNLearnedSimulator

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Fix tests + improve docs for new register arg in from_torch

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Remove physicsnemo.model.Module remaining items

* Remove incorrect meta import

* Remove incorrect comment

* Fix linting errors

* Fixing some linting errors

* More linter errors

* One more.

* Update knn tests

* Purge pylib cugraphops

* Remove more cugraphops paths.

* Trying to close some CI errors.

* Fixing more CI issues

* Fix MGN tests (#1281)

* Fix apex issues on CPU with a diffusion-specific device fixture.

* Fixing shard tensor import; adjusting pytorch geometric import point in tests.

* Fixing more imports.

* fix one or two more

* Fix MGK, HMGN tests (#1282)

* Fix import error

* Remove cugraphops

* Fix many tests

* Add migration guide early draft.  Update external imports.

* Attempting to fix the last failing tests.

* Add pre-commit action. (#1286)

* Add pre-commit action.

* Remove complexipy

* Maybe fix import linting

* Tweak the CI install and testing of imports / docstrings

* Wow, the tests were not tied to ANY timezone.  It only passes in UTC....

* fix all but 2 docstring tests

* Resolve circular import + fix linting errors.

* Fixed broken Group Norm

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Added diffusion.generate

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Added future feature and deprecation warnings for diffusion module

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Defined import-linter contracts for physicsnemo.diffusion

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Updated PR template with missing item

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Added missing diffusion.generate

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Fixed a few remaining paths physicsnemo.models.diffusion that does not exists anymore

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* CI tests fixes

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* mmiranda nvidia style guide Updates diffusion.rst

I still have mixed feelings about the 'spell out any number less than 10', but it is the style guide rule that I am contractually required to make.  The only way around it is for it to be in code font....

* mmiranda smol style guide Updates physicsnemo.utils.rst

I want to init-cap all the 'utils' in the headings.....but it could be considered a code thing, so I am leaving it.

* Fixed checklist in PR template

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Deleted comment in .importlinter

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Fixed references in diffusion.rst

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Fix checkpoint loading with Module subclass when known

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Deleted physicsnemo/compat

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Deleted useless comments in flow_reconstruction_diffusion example

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Renamed Attantion into UNetAttention

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Implemented BasePreconditioner

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Improvements to BaseConditioner docs

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Implemented new preconditioners based on BasePerconditioner

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Migrated legacy preconditioners to reuse new preconditioners

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Initial implementation of tests for preconditioners

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Added reference data for non-regression CI tests of preconditioners

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Improvements to preconditioners CI tests

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Adedd a few details in BasePreconditioner doctrsing

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Updated CHANGELOG.md

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Improved documentation of signature requirement in BasePreconditioner

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Renamed BasePreconditioner into BaseAffinePreconditioner

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Added DiffusionModel protocol to specify diffusion models signature

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Changed condition argument to TensorDict instead of Dict of tensors

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Moved all preconditioners scalar attributes to pytorch buffers instead of python float

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Improvements to make precondtioners tests more robust on GPU

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Initial implementation of diffusion sampler

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Some updates to samplers and solvers

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Some progress

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Mostly completed implementation of sampling utilities

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Added tEDM and VP noise schedulers

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Changed str to Literal

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Replaced scale s(t) with alpha in stochastic solvers

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Removed inheritance in student-t EDM noise scheduler

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Addressed PR comments

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Refactored protocols Denoiser and Predictor

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Refactored get_denoiser to use keyword arguments for the input predictor

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Fix license header

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Fixed docstring example in samplers.py

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Revert "Fixed docstring example in samplers.py"

This reverts commit 79db00feb25b34211cc8f78d74bb5e88048d7fbc.

* Fixed docstring example in samplers.py

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Changed alpha_fn to diffusion_fn in solvers.py

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Fixed code-blocks and missing jaxtyping

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

* Temporarily omit physicsnemo.diffusion from coverage

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>

---------

Signed-off-by: John E <jeis4wpi@outlook.com>
Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
Signed-off-by: Kelvin Lee <kinlongkelvi@nvidia.com>
Signed-off-by: Tsubasa Onishi <tonishi@nvidia.com>
Co-authored-by: Corey Adams <6619961+coreyjadams@users.noreply.github.com>
Co-authored-by: Alexey Kamenev <alex.kamenev@gmail.com>
Co-authored-by: John Eismeier <42679190+jeis4wpi@users.noreply.github.com>
Co-authored-by: Peter Harrington <48932392+pzharrington@users.noreply.github.com>
Co-authored-by: Mohammad Amin Nabian <m.a.nabiyan@gmail.com>
Co-authored-by: Yongming Ding <yongmingd@nvidia.com>
Co-authored-by: ram-cherukuri <104155145+ram-cherukuri@users.noreply.github.com>
Co-authored-by: Deepak Akhare <dakhare@nvidia.com>
Co-authored-by: Sai Krishnan Chandrasekar <157182662+saikrishnanc-nv@users.noreply.github.com>
Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
Co-authored-by: Kelvin Lee <kin.long.kelvin.lee@gmail.com>
Co-authored-by: tonishi-nv <tonishi@nvidia.com>
Co-authored-by: megnvidia <mmiranda@nvidia.com>
Co-authored-by: Kaustubh Tangsali <71059996+ktangsali@users.noreply.github.com>
Co-authored-by: Jussi Leinonen <jleinonen@nvidia.com>
Co-authored-by: Kaustubh Tangsali <ktangsali@nvidia.com>
Co-authored-by: Dileep Ranganathan <8152399+dran-dev@users.noreply.github.com>
---
 Makefile                                      |    3 +-
 physicsnemo/diffusion/__init__.py             |    2 +-
 physicsnemo/diffusion/base.py                 |  210 +-
 .../diffusion/noise_schedulers/__init__.py    |   17 +-
 .../noise_schedulers/noise_schedulers.py      | 1905 ++++++++++++++++-
 physicsnemo/diffusion/samplers/__init__.py    |    8 +
 physicsnemo/diffusion/samplers/samplers.py    |  358 +++-
 physicsnemo/diffusion/samplers/solvers.py     |  766 +++++++
 8 files changed, 3240 insertions(+), 29 deletions(-)
 create mode 100644 physicsnemo/diffusion/samplers/solvers.py

diff --git a/Makefile b/Makefile
index 19a46fc909..f33e87d9fd 100644
--- a/Makefile
+++ b/Makefile
@@ -48,9 +48,10 @@ pytest-internal:
 		pytest && \
 		cd ../../
 
+# NOTE: temporarily omitting diffusion coverage until we have a better way to test it.
 coverage:
 	coverage combine && \
-		coverage report --show-missing --omit=*test* --omit=*internal* --omit=*experimental* --fail-under=60 && \
+		coverage report --show-missing --omit=*test* --omit=*internal* --omit=*experimental* --omit=*diffusion* --fail-under=60 && \
 		coverage html
 
 all-ci: get-data setup-ci black interrogate lint license install pytest doctest coverage
diff --git a/physicsnemo/diffusion/__init__.py b/physicsnemo/diffusion/__init__.py
index 2bcfab462c..48c8022b1f 100644
--- a/physicsnemo/diffusion/__init__.py
+++ b/physicsnemo/diffusion/__init__.py
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .base import DiffusionModel  # noqa: F401
+from .base import Denoiser, DiffusionModel, Predictor  # noqa: F401
diff --git a/physicsnemo/diffusion/base.py b/physicsnemo/diffusion/base.py
index b088eb68a9..83c04df47c 100644
--- a/physicsnemo/diffusion/base.py
+++ b/physicsnemo/diffusion/base.py
@@ -58,11 +58,11 @@ class DiffusionModel(Protocol):
     >>> import torch.nn.functional as F
     >>> from physicsnemo.diffusion import DiffusionModel
     >>>
-    >>> class Denoiser:
+    >>> class Model:
     ...     def __call__(self, x, t, condition=None, **kwargs):
     ...         return F.relu(x)
     ...
-    >>> isinstance(Denoiser(), DiffusionModel)
+    >>> isinstance(Model(), DiffusionModel)
     True
     """
 
@@ -98,3 +98,209 @@ def __call__(
             Model output with the same shape as ``x``.
         """
         ...
+
+
+@runtime_checkable
+class Predictor(Protocol):
+    r"""
+    Protocol defining a predictor interface for diffusion models.
+
+    A predictor is any callable that takes a noisy state ``x``
+    and diffusion time ``t``, and returns a prediction about the clean data or
+    the noise. Common types of predictors include x0-predictor (predicts the
+    clean data :math:`\mathbf{x}_0`), score-predictor, noise-predictor
+    (predicts the noise :math:`\boldsymbol{\epsilon}`), velocity-predictor etc.
+
+    This protocol is **generic** and does not assume any specific type of
+    prediction. A predictor can be a trained neural network, a guidance
+    function (e.g., classifier-free guidance, DPS-style guidance), or any
+    combination thereof.  The exact meaning of the output depends on the
+    predictor type and how it is used. Any callable that implements this
+    interface can be used as a predictor in sampling utilities.
+
+    This protocol is typically used during inference. For training, which
+    often requires additional inputs like conditioning, use the more general
+    :class:`DiffusionModel` protocol instead. A :class:`Predictor` can be
+    obtained from a :class:`DiffusionModel` by partially applying the
+    ``condition`` and any other keyword arguments using
+    ``functools.partial``.
+
+    **Relationship to Denoiser:**
+
+    A :class:`Denoiser` is the update function used during sampling (e.g.,
+    the right-hand side of an ODE/SDE). It is obtained from a
+    :class:`Predictor` via the
+    :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.get_denoiser`
+    factory. A typical case is ODE/SDE-based sampling, where one solves:
+
+    .. math::
+        \frac{d\mathbf{x}}{dt} = D(\mathbf{x}, t;\, P(\mathbf{x}, t))
+
+    where :math:`P` is the **predictor** and :math:`D` is the **denoiser**
+    that wraps it. This equation captures the essence of how these two
+    concepts are related in the framework.
+
+    See Also
+    --------
+    :class:`Denoiser` : The interface for sampling update functions.
+    :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.get_denoiser` :
+        Factory to convert a predictor into a denoiser.
+
+    Examples
+    --------
+    **Example 1:** Convert a trained conditional model into a predictor using
+    ``functools.partial``:
+
+    >>> import torch
+    >>> from functools import partial
+    >>> from tensordict import TensorDict
+    >>> from physicsnemo.diffusion import Predictor
+    >>>
+    >>> class MyModel:
+    ...     def __call__(self, x, t, condition=None):
+    ...         # x0-predictor: returns estimate of clean data
+    ...         # (here assumes conditional normal distribution N(x|y))
+    ...         t_bc = t.view(-1, *([1] * (x.ndim - 1)))
+    ...         return x / (1 + t_bc**2) + condition["y"]
+    ...
+    >>> model = MyModel()
+    >>> cond = TensorDict({"y": torch.randn(2, 4)}, batch_size=[2])
+    >>> x0_predictor = partial(model, condition=cond)
+    >>> isinstance(x0_predictor, Predictor)
+    True
+
+    **Example 2:** Convert the x0-predictor above into a score-predictor
+    (using a simple EDM-like schedule where :math:`\sigma(t) = t` and
+    :math:`\alpha(t) = 1`):
+
+    >>> def x0_to_score(x0, x_t, t):
+    ...     sigma_sq = t.view(-1, 1) ** 2
+    ...     return (x0 - x_t) / sigma_sq
+    >>>
+    >>> def score_predictor(x, t):
+    ...     x0_pred = x0_predictor(x, t)
+    ...     return x0_to_score(x0_pred, x, t)
+    >>>
+    >>> isinstance(score_predictor, Predictor)
+    True
+    """
+
+    def __call__(
+        self,
+        x: Float[torch.Tensor, " B *dims"],
+        t: Float[torch.Tensor, " B"],
+    ) -> Float[torch.Tensor, " B *dims"]:
+        r"""
+        Forward pass of the predictor.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+            Noisy latent state of shape :math:`(B, *)` where :math:`B` is the
+            batch size and :math:`*` denotes any number of additional
+            dimensions (e.g., channels and spatial dimensions).
+        t : torch.Tensor
+            Batched diffusion time tensor of shape :math:`(B,)`.
+
+        Returns
+        -------
+        torch.Tensor
+            Prediction output with the same shape as ``x``. The exact meaning
+            depends on the predictor type (x0, score, noise, velocity, etc.).
+        """
+        ...
+
+
+@runtime_checkable
+class Denoiser(Protocol):
+    r"""
+    Protocol defining a denoiser interface for diffusion model sampling.
+
+    A denoiser is the **update function** used during sampling. It takes a
+    noisy state ``x`` and diffusion time ``t``, and returns the update term
+    consumed by a :class:`~physicsnemo.diffusion.samplers.solvers.Solver`.
+    For continuous-time methods this is typically the right-hand side of the
+    ODE/SDE, but the interface is generic and can support other sampling
+    methods as well.
+
+    This is the interface used by
+    :class:`~physicsnemo.diffusion.samplers.solvers.Solver` classes and the
+    :func:`~physicsnemo.diffusion.samplers.sample` function. Any callable
+    that implements this interface can be used as a denoiser.
+
+    **Important distinction from Predictor:**
+
+    - A :class:`Predictor` is any callable that outputs a raw prediction
+      (e.g., clean data :math:`\mathbf{x}_0`, score, guidance signal, etc.).
+    - A :class:`Denoiser` is the update function derived from one or more
+      predictors, used directly by the solver during sampling.
+
+    **Typical workflow:**
+
+    1. Start with one or more :class:`Predictor` instances (e.g. trained model)
+    2. Optionally combine predictors (e.g., conditional + guidance scores)
+    3. Convert to a :class:`Denoiser` using
+       :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.get_denoiser`
+    4. Pass the denoiser to
+       :func:`~physicsnemo.diffusion.samplers.sample` together with a
+       :class:`~physicsnemo.diffusion.samplers.solvers.Solver`
+
+    See Also
+    --------
+    :class:`Predictor` : The interface for raw predictions.
+    :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.get_denoiser` :
+        Factory to convert a predictor into a denoiser.
+    :func:`~physicsnemo.diffusion.samplers.sample` : The sampling function
+        that uses this denoiser interface.
+
+    Examples
+    --------
+    Manually creating a denoiser from an x0-predictor using a simple EDM-like
+    schedule (:math:`\sigma(t)=t`, :math:`\alpha(t)=1`):
+
+    >>> import torch
+    >>> from physicsnemo.diffusion import Denoiser
+    >>>
+    >>> # Start from a predictor (x0-predictor)
+    >>> def x0_predictor(x, t):
+    ...     t_bc = t.view(-1, *([1] * (x.ndim - 1)))
+    ...     return x / (1 + t_bc**2)
+    >>>
+    >>> # Build a denoiser (ODE RHS) from scratch:
+    >>> # score = (x0 - x) / sigma^2,  ODE RHS = -0.5 * g^2 * score
+    >>> # For EDM: sigma = t, g^2 = 2*t, so RHS = (x0 - x) / t
+    >>> def my_denoiser(x, t):
+    ...     x0 = x0_predictor(x, t)
+    ...     t_bc = t.view(-1, *([1] * (x.ndim - 1)))
+    ...     return (x0 - x) / t_bc
+    ...
+    >>> isinstance(my_denoiser, Denoiser)
+    True
+    """
+
+    def __call__(
+        self,
+        x: Float[torch.Tensor, " B *dims"],
+        t: Float[torch.Tensor, " B"],
+    ) -> Float[torch.Tensor, " B *dims"]:
+        r"""
+        Compute the denoising update at the given state and time.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+            Noisy latent state of shape :math:`(B, *)` where :math:`B` is the
+            batch size and :math:`*` denotes any number of additional
+            dimensions (e.g., channels and spatial dimensions).
+        t : torch.Tensor
+            Batched diffusion time tensor of shape :math:`(B,)`.
+            All batch elements in the latent state ``x`` typically share the
+            same diffusion time values, but ``t`` is still required to be a
+            batched tensor.
+
+        Returns
+        -------
+        torch.Tensor
+            Denoising update term with the same shape as ``x``.
+        """
+        ...
diff --git a/physicsnemo/diffusion/noise_schedulers/__init__.py b/physicsnemo/diffusion/noise_schedulers/__init__.py
index 9524fd5dd9..9b0aad7788 100644
--- a/physicsnemo/diffusion/noise_schedulers/__init__.py
+++ b/physicsnemo/diffusion/noise_schedulers/__init__.py
@@ -14,13 +14,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import warnings
-
-from physicsnemo.core.warnings import FutureFeatureWarning
-
-warnings.warn(
-    "The 'physicsnemo.diffusion.noise_schedulers' module is a placeholder for "
-    "future functionality that will be implemented in an upcoming release.",
-    FutureFeatureWarning,
-    stacklevel=2,
+from .noise_schedulers import (  # noqa: F401
+    EDMNoiseScheduler,
+    IDDPMNoiseScheduler,
+    LinearGaussianNoiseScheduler,
+    NoiseScheduler,
+    StudentTEDMNoiseScheduler,
+    VENoiseScheduler,
+    VPNoiseScheduler,
 )
diff --git a/physicsnemo/diffusion/noise_schedulers/noise_schedulers.py b/physicsnemo/diffusion/noise_schedulers/noise_schedulers.py
index 91c579e142..d553635816 100644
--- a/physicsnemo/diffusion/noise_schedulers/noise_schedulers.py
+++ b/physicsnemo/diffusion/noise_schedulers/noise_schedulers.py
@@ -14,14 +14,1901 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import warnings
+"""Noise schedulers for diffusion models."""
 
-from physicsnemo.core.warnings import FutureFeatureWarning
+import math
+from abc import ABC, abstractmethod
+from typing import Any, Literal, Protocol, Tuple, runtime_checkable
 
-warnings.warn(
-    "The 'physicsnemo.diffusion.noise_schedulers.noise_schedulers' module is a "
-    "placeholder for future functionality that will be implemented in an "
-    "upcoming release.",
-    FutureFeatureWarning,
-    stacklevel=2,
-)
+import torch
+from jaxtyping import Float
+from torch import Tensor
+
+from physicsnemo.diffusion.base import Denoiser, Predictor
+
+
+@runtime_checkable
+class NoiseScheduler(Protocol):
+    r"""
+    Protocol defining the minimal interface for noise schedulers.
+
+    A noise scheduler defines methods for training (adding noise, sampling
+    diffusion time) and for sampling (generating diffusion time-steps,
+    initializing latent state, obtaining a denoiser). This interface is generic
+    and does not assume any specific form of noise schedule.
+
+    Any object that implements this interface can be used with the diffusion
+    training and sampling utilities.
+
+    **Training methods:**
+
+    - :meth:`sample_time`: Sample diffusion time values for training
+    - :meth:`add_noise`: Add noise to clean data at given diffusion time
+
+    **Sampling methods:**
+
+    - :meth:`timesteps`: Generate discrete time-steps for sampling
+    - :meth:`init_latents`: Initialize noisy latent state :math:`\mathbf{x}_N`
+    - :meth:`get_denoiser`: Convert a predictor (e.g. model that predicts
+         clean, data, score, etc.) to a sampling-compatible denoiser
+
+    See Also
+    --------
+    :class:`LinearGaussianNoiseScheduler` : base abstract class for
+        linear-Gaussian schedules. Implements the NoiseScheduler protocol.
+    :func:`~physicsnemo.diffusion.samplers.sample` : sampling function for
+        generating data samples from a diffusion model.
+
+    Examples
+    --------
+    >>> import torch
+    >>> from physicsnemo.diffusion.noise_schedulers import NoiseScheduler
+    >>>
+    >>> class MyScheduler:
+    ...     def sample_time(self, N, device=None, dtype=None):
+    ...         return torch.rand(N, device=device, dtype=dtype)
+    ...     def add_noise(self, x0, time):
+    ...         return x0 + time.view(-1, 1) * torch.randn_like(x0)
+    ...     def timesteps(self, num_steps, device=None, dtype=None):
+    ...         return torch.linspace(1, 0, num_steps + 1, device=device)
+    ...     def init_latents(self, spatial_shape, tN, device=None, dtype=None):
+    ...         return torch.randn(tN.shape[0], *spatial_shape, device=device)
+    ...     def get_denoiser(self, x0_predictor=None, score_predictor=None, **kwargs):
+    ...         def denoiser(x, t):
+    ...             if x0_predictor is not None:
+    ...                 return (x - x0_predictor(x, t)) / (t.view(-1, 1))
+    ...             elif score_predictor is not None:
+    ...                 return -score_predictor(x, t) * t.view(-1, 1)
+    ...         return denoiser
+    ...
+    >>> scheduler = MyScheduler()
+    >>> isinstance(scheduler, NoiseScheduler)
+    True
+    """
+
+    def sample_time(
+        self,
+        N: int,
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " N"]:
+        r"""
+        Sample N diffusion time values for training.
+
+        Used in training to sample random diffusion times, typically in the
+        denoising score matching loss.
+
+        Parameters
+        ----------
+        N : int
+            Number of time values to sample.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        Tensor
+            Sampled diffusion times of shape :math:`(N,)`.
+        """
+        ...
+
+    def add_noise(
+        self,
+        x0: Float[Tensor, " B *dims"],
+        time: Float[Tensor, " B"],
+    ) -> Float[Tensor, " B *dims"]:
+        r"""
+        Add noise to clean data at the given diffusion times.
+
+        Used in training to create noisy samples from clean data.
+
+        Parameters
+        ----------
+        x0 : Tensor
+            Clean latent state of shape :math:`(B, *)`.
+        time : Tensor
+            Diffusion time values of shape :math:`(B,)`.
+
+        Returns
+        -------
+        Tensor
+            Noisy latent state of shape :math:`(B, *)`.
+        """
+        ...
+
+    def timesteps(
+        self,
+        num_steps: int,
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " N+1"]:
+        r"""
+        Generate discrete time-steps for sampling.
+
+        Used in sampling to produce the sequence of diffusion times.
+
+        Parameters
+        ----------
+        num_steps : int
+            Number of sampling steps.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        Tensor
+            Time-steps tensor of shape :math:`(N + 1,)` in decreasing order,
+            with the last element being 0.
+        """
+        ...
+
+    def init_latents(
+        self,
+        spatial_shape: Tuple[int, ...],
+        tN: Float[Tensor, " B"],
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " B *spatial_shape"]:
+        r"""
+        Initialize the noisy latent state :math:`\mathbf{x}_N` for sampling.
+
+        Used in sampling to generate the initial condition at diffusion time
+        ``tN``.
+
+        Parameters
+        ----------
+        spatial_shape : Tuple[int, ...]
+            Spatial shape of the latent state, e.g., ``(C, H, W)``.
+        tN : Tensor
+            Initial diffusion time of shape :math:`(B,)`. Determines the noise
+            level for the initial latent state.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        Tensor
+            Initial noisy latent state of shape :math:`(B, *spatial\_shape)`.
+        """
+        ...
+
+    def get_denoiser(
+        self,
+        **kwargs: Any,
+    ) -> Denoiser:
+        r"""
+        Factory that converts a predictor into a denoiser for sampling.
+
+        Used in sampling to transform a :class:`Predictor` (e.g., x0-predictor,
+        score-predictor) into a :class:`Denoiser` that returns the
+        update term compatible with the solver. The exact transformation
+        depends on the noise scheduler implementation.
+
+        Parameters
+        ----------
+        **kwargs : Any
+            Implementation-specific keyword arguments. Concrete
+            implementations typically accept keyword-only predictor arguments
+            (e.g., ``score_predictor``, ``x0_predictor``). See concrete classes
+            docstrings for details (e.g.
+            :meth:`LinearGaussianNoiseScheduler.get_denoiser`).
+
+        Returns
+        -------
+        Denoiser
+            A callable that implements the
+            :class:`~physicsnemo.diffusion.Denoiser` interface, for use
+            with solvers and the
+            :func:`~physicsnemo.diffusion.samplers.sample` function.
+        """
+        ...
+
+
+class LinearGaussianNoiseScheduler(ABC, NoiseScheduler):
+    r"""
+    Abstract base class for linear-Gaussian noise schedules.
+
+    It implements the :class:`NoiseScheduler` interface and it can be
+    subclassed to define custom linear-Gaussian noise schedules of the form:
+
+    .. math::
+        \mathbf{x}(t) = \alpha(t) \mathbf{x}_0
+        + \sigma(t) \boldsymbol{\epsilon}
+
+    where :math:`\boldsymbol{\epsilon} \sim \mathcal{N}(0, \mathbf{I})` is
+    standard Gaussian noise, :math:`\alpha(t)` is the signal coefficient, and
+    :math:`\sigma(t)` is the noise level.
+
+    **Training:**
+
+    The :meth:`add_noise` method implements the forward diffusion process using
+    the formula above. The :meth:`sample_time` method samples diffusion times.
+
+    **Sampling:**
+
+    For ODE-based sampling, the reverse process follows the probability flow
+    ODE:
+
+    .. math::
+        \frac{d\mathbf{x}}{dt} = f(\mathbf{x}, t)
+        - \frac{1}{2} g^2(\mathbf{x}, t) \nabla_{\mathbf{x}} \log p(\mathbf{x})
+
+    For SDE-based sampling:
+
+    .. math::
+        d\mathbf{x} = \left[ f(\mathbf{x}, t)
+        - g^2(\mathbf{x}, t) \nabla_{\mathbf{x}} \log p(\mathbf{x}) \right] dt
+        + g(\mathbf{x}, t) d\mathbf{W}
+
+    The :meth:`get_denoiser` factory converts a predictor (either a
+    score-predictor or an x0-predictor) into the appropriate ODE/SDE
+    right-hand side.
+
+    **Abstract methods (must be implemented by subclasses):**
+
+    - :meth:`sigma`: Map time to noise level :math:`\sigma(t)`
+    - :meth:`sigma_inv`: Map noise level back to time
+    - :meth:`sigma_dot`: Time derivative :math:`\dot{\sigma}(t)`
+    - :meth:`alpha`: Compute the signal coefficient :math:`\alpha(t)`
+    - :meth:`alpha_dot`: Time derivative :math:`\dot{\alpha}(t)`
+    - :meth:`timesteps`: Generate discrete time-steps for sampling
+    - :meth:`sample_time`: Sample diffusion times for training
+
+    **Concrete methods (have default implementations, but can be overridden for
+    custom behavior):**
+
+    - :meth:`drift`: Drift term :math:`f(\mathbf{x}, t)` for ODE/SDE
+    - :meth:`diffusion`: Squared diffusion term :math:`g^2(\mathbf{x}, t)`
+    - :meth:`x0_to_score`: Convert x0-prediction to score
+    - :meth:`add_noise`: Add noise to clean data (training)
+    - :meth:`init_latents`: Initialize latent state (sampling)
+    - :meth:`get_denoiser`: Get ODE/SDE RHS (sampling)
+
+    Examples
+    --------
+    **Example 1:** A minimal EDM-like noise schedule. Only the abstract methods
+    need to be implemented since defaults work for EDM:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.noise_schedulers import (
+    ...     LinearGaussianNoiseScheduler,
+    ... )
+    >>>
+    >>> class SimpleEDMScheduler(LinearGaussianNoiseScheduler):
+    ...     def __init__(self, sigma_min=0.002, sigma_max=80.0, rho=7.0):
+    ...         self.sigma_min = sigma_min
+    ...         self.sigma_max = sigma_max
+    ...         self.rho = rho
+    ...
+    ...     def sigma(self, t): return t
+    ...     def sigma_inv(self, sigma): return sigma
+    ...     def sigma_dot(self, t): return torch.ones_like(t)
+    ...     def alpha(self, t): return torch.ones_like(t)
+    ...     def alpha_dot(self, t): return torch.zeros_like(t)
+    ...
+    ...     def timesteps(self, num_steps, *, device=None, dtype=None):
+    ...         i = torch.arange(num_steps, device=device, dtype=dtype)
+    ...         smax_rho = self.sigma_max**(1/self.rho)
+    ...         smin_rho = self.sigma_min**(1/self.rho)
+    ...         frac = i/(num_steps-1)
+    ...         t = (smax_rho + frac * (smin_rho - smax_rho))**self.rho
+    ...         return torch.cat([t, torch.zeros(1, device=device)])
+    ...
+    ...     def sample_time(self, N, *, device=None, dtype=None):
+    ...         u = torch.rand(N, device=device, dtype=dtype)
+    ...         return self.sigma_min * (self.sigma_max/self.sigma_min)**u
+    ...
+    >>> scheduler = SimpleEDMScheduler()
+    >>> t_steps = scheduler.timesteps(10)
+    >>> t_steps.shape
+    torch.Size([11])
+
+    **Example 2:** Customizing behavior by overriding concrete methods. This
+    shows how to override the drift term for a custom diffusion process:
+
+    >>> class CustomDriftScheduler(SimpleEDMScheduler):
+    ...     def drift(self, x, t):
+    ...         # Custom drift: f(x, t) = -0.5 * x (Ornstein-Uhlenbeck style)
+    ...         return -0.5 * x
+    ...
+    >>> custom = CustomDriftScheduler()
+    >>>
+    >>> # The custom drift is used internally by get_denoiser
+    >>> score_pred = lambda x, t: -x / (1 + t.view(-1, 1)**2)  # Toy score predictor
+    >>> denoiser = custom.get_denoiser(score_predictor=score_pred)
+    >>> x = torch.randn(2, 4)
+    >>> t = torch.tensor([1.0, 1.0])
+    >>> out = denoiser(x, t)  # Uses custom drift in ODE RHS computation
+    >>> out.shape
+    torch.Size([2, 4])
+
+    """
+
+    @abstractmethod
+    def sigma(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""
+        Map diffusion time to noise level :math:`\sigma(t)`.
+
+        Used in both training and sampling.
+
+        Parameters
+        ----------
+        t : Tensor
+            Diffusion time tensor of any shape.
+
+        Returns
+        -------
+        Tensor
+            Noise coefficient :math:`\sigma(t)` with same shape as ``t``.
+        """
+        ...
+
+    @abstractmethod
+    def sigma_inv(
+        self,
+        sigma: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""
+        Map noise level back to diffusion time.
+
+        Used in both training and sampling.
+
+        Parameters
+        ----------
+        sigma : Tensor
+            Noise level tensor of any shape.
+
+        Returns
+        -------
+        Tensor
+            Diffusion time with same shape as ``sigma``.
+        """
+        ...
+
+    @abstractmethod
+    def sigma_dot(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""
+        Compute time derivative of noise level :math:`\dot{\sigma}(t)`.
+
+        Used in sampling.
+
+        Parameters
+        ----------
+        t : Tensor
+            Diffusion time tensor of any shape.
+
+        Returns
+        -------
+        Tensor
+            Time derivative :math:`\dot{\sigma}(t)` with same shape as ``t``.
+        """
+        ...
+
+    @abstractmethod
+    def alpha(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""
+        Compute the signal coefficient :math:`\alpha(t)`.
+
+        Used in both training and sampling.
+
+        Parameters
+        ----------
+        t : Tensor
+            Diffusion time tensor of any shape.
+
+        Returns
+        -------
+        Tensor
+            Signal coefficient :math:`\alpha(t)` with same shape as ``t``.
+        """
+        ...
+
+    @abstractmethod
+    def alpha_dot(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""
+        Compute time derivative of signal coefficient :math:`\dot{\alpha}(t)`.
+
+        Used in sampling.
+
+        Parameters
+        ----------
+        t : Tensor
+            Diffusion time tensor of any shape.
+
+        Returns
+        -------
+        Tensor
+            Time derivative :math:`\dot{\alpha}(t)` with same shape as ``t``.
+        """
+        ...
+
+    @abstractmethod
+    def timesteps(
+        self,
+        num_steps: int,
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " N+1"]:
+        r"""
+        Generate discrete time-steps for sampling.
+
+        Used in sampling to produce the sequence of diffusion times. Returns
+        a tensor of shape :math:`(N + 1,)` in decreasing order, with the last
+        element being 0.
+
+        Parameters
+        ----------
+        num_steps : int
+            Number of sampling steps.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        Tensor
+            Time-steps tensor of shape :math:`(N + 1,)`.
+        """
+        ...
+
+    @abstractmethod
+    def sample_time(
+        self,
+        N: int,
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " N"]:
+        r"""
+        Sample N diffusion time values for training.
+
+        Used in training to sample random diffusion times for the denoising
+        score matching loss.
+
+        Parameters
+        ----------
+        N : int
+            Number of time values to sample.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        Tensor
+            Sampled diffusion times of shape :math:`(N,)`.
+        """
+        ...
+
+    def drift(
+        self,
+        x: Float[Tensor, " B *dims"],
+        t: Float[Tensor, " B"],
+    ) -> Float[Tensor, " B *dims"]:
+        r"""
+        Compute drift term :math:`f(\mathbf{x}, t)` for ODE/SDE sampling.
+
+        Used by :meth:`get_denoiser` to build the ODE/SDE right-hand side.
+
+        By default: :math:`f(\mathbf{x}, t) = \frac{\dot{\alpha}(t)}{\alpha(t)}
+        \mathbf{x}`.
+
+        This method can be overridden to implement different drift terms.
+
+        Parameters
+        ----------
+        x : Tensor
+            Latent state of shape :math:`(B, *)`.
+        t : Tensor
+            Diffusion time of shape :math:`(B,)`.
+
+        Returns
+        -------
+        Tensor
+            Drift term with same shape as ``x``.
+        """
+        t_bc = t.reshape(-1, *([1] * (x.ndim - 1)))
+        alpha_t_bc = self.alpha(t_bc)
+        alpha_dot_t_bc = self.alpha_dot(t_bc)
+        return (alpha_dot_t_bc / alpha_t_bc) * x
+
+    def diffusion(
+        self,
+        x: Float[Tensor, " B *dims"],
+        t: Float[Tensor, " B"],
+    ) -> Float[Tensor, " B *_"]:
+        r"""
+        Compute squared diffusion term :math:`g^2(\mathbf{x}, t)`.
+
+        Used by :meth:`get_denoiser` to build the ODE/SDE right-hand side.
+
+        By default: :math:`g^2 = 2 \dot{\sigma} \sigma - 2 \frac{\dot{\alpha}}
+        {\alpha} \sigma^2`.
+        This method can be overridden to implement different diffusion terms.
+
+        Parameters
+        ----------
+        x : Tensor
+            Latent state of shape :math:`(B, *)`.
+        t : Tensor
+            Diffusion time of shape :math:`(B,)`.
+
+        Returns
+        -------
+        Tensor
+            Squared diffusion term, broadcastable to shape of ``x``.
+        """
+        t_bc = t.reshape(-1, *([1] * (x.ndim - 1)))
+        sigma_t_bc = self.sigma(t_bc)
+        sigma_dot_t_bc = self.sigma_dot(t_bc)
+        alpha_t_bc = self.alpha(t_bc)
+        alpha_dot_t_bc = self.alpha_dot(t_bc)
+        g_sq = (
+            2 * sigma_dot_t_bc * sigma_t_bc
+            - 2 * (alpha_dot_t_bc / alpha_t_bc) * sigma_t_bc**2
+        )
+        return g_sq
+
+    def x0_to_score(
+        self,
+        x0: Float[Tensor, " B *dims"],
+        x_t: Float[Tensor, " B *dims"],
+        t: Float[Tensor, " B"],
+    ) -> Float[Tensor, " B *dims"]:
+        r"""
+        Convert x0-predictor output to score.
+
+        This conversion is done automatically by :meth:`get_denoiser` when
+        ``x0_predictor`` is provided, but can also be called manually.
+
+        The score is: :math:`\nabla_{\mathbf{x}_t} \log p(\mathbf{x}_t)
+        = \frac{\alpha(t) \hat{\mathbf{x}}_0 - \mathbf{x}_t}{\sigma^2(t)}`.
+
+        This is a helper method that usually does not need to be overridden in
+        subclasses.
+
+        Parameters
+        ----------
+        x0 : Tensor
+            Predicted clean data :math:`\hat{\mathbf{x}}_0` of shape
+            :math:`(B, *)`.
+        x_t : Tensor
+            Current noisy state :math:`\mathbf{x}_t` of shape :math:`(B, *)`.
+        t : Tensor
+            Diffusion time of shape :math:`(B,)`.
+
+        Returns
+        -------
+        Tensor
+            Score with same shape as ``x0``.
+
+        Examples
+        --------
+        >>> scheduler = EDMNoiseScheduler()
+        >>> # If you have an x0-predictor, wrap it for manual conversion
+        >>> # (done automatically by get_denoiser):
+        >>> def x0_predictor(x, t):
+        ...     t_bc = t.view(-1, *([1] * (x.ndim - 1)))
+        ...     return x / (1 + t_bc**2)
+        >>> def score_predictor(x, t):
+        ...     x0_pred = x0_predictor(x, t)
+        ...     return scheduler.x0_to_score(x0_pred, x, t)
+        >>> # Or simply: scheduler.get_denoiser(x0_predictor=x0_predictor)
+        """
+        t_bc = t.reshape(-1, *([1] * (x0.ndim - 1)))
+        alpha_t_bc = self.alpha(t_bc)
+        sigma_t_bc = self.sigma(t_bc)
+        return (alpha_t_bc * x0 - x_t) / (sigma_t_bc**2)
+
+    def get_denoiser(
+        self,
+        *,
+        score_predictor: Predictor | None = None,
+        x0_predictor: Predictor | None = None,
+        denoising_type: Literal["ode", "sde"] = "ode",
+        **kwargs: Any,
+    ) -> Denoiser:
+        r"""
+        Factory that converts a predictor to a denoiser for sampling.
+
+        Accepts either a **score-predictor** or an **x0-predictor** (exactly
+        one must be provided). The returned denoiser computes the right-hand
+        side of the reverse ODE or SDE.
+
+        For ODE (``denoising_type="ode"``):
+
+        .. math::
+            \frac{d\mathbf{x}}{dt} = f(\mathbf{x}, t) - \frac{1}{2} g^2(t)
+            s(\mathbf{x}, t)
+
+        For SDE (``denoising_type="sde"``):
+
+        .. math::
+            d\mathbf{x} = \left[ f(\mathbf{x}, t) - g^2(t) s(\mathbf{x}, t)
+            \right] dt + g(t) d\mathbf{W}
+
+        where :math:`s(\mathbf{x}, t)` is the score. When an x0-predictor is
+        provided, the score is computed internally via :meth:`x0_to_score`.
+        When a score-predictor is provided, it is used directly.
+        *Note:* As usually done in SDE integration, the stochastic term
+        :math:`g(t) d\mathbf{W}` is handled by the solver, not returned by the
+        denoiser itself.
+
+        Parameters
+        ----------
+        score_predictor : Predictor, optional
+            A score-predictor that takes ``(x_t, t)`` and returns a score
+            (e.g. :math:`\nabla_{\mathbf{x}} \log p(\mathbf{x}_t)`). Can be
+            unconditional, conditional, guidance-augmented, etc. Mutually
+            exclusive with ``x0_predictor``.
+        x0_predictor : Predictor, optional
+            An x0-predictor that takes ``(x_t, t)`` and returns an estimate
+            of clean data :math:`\hat{\mathbf{x}}_0`. The score is computed
+            internally via :meth:`x0_to_score`. Mutually exclusive with
+            ``score_predictor``.
+        denoising_type : {"ode", "sde"}, default="ode"
+            Type of reverse process. Use ``"ode"`` for deterministic sampling,
+            ``"sde"`` for stochastic sampling.
+        **kwargs : Any
+            Ignored.
+
+        Returns
+        -------
+        Denoiser
+            A denoiser computing the RHS of the reverse ODE/SDE. Implements
+            the :class:`~physicsnemo.diffusion.Denoiser` interface.
+
+        Raises
+        ------
+        ValueError
+            If both or neither ``score_predictor`` and ``x0_predictor`` are
+            provided.
+
+        Examples
+        --------
+        Generate ODE RHS from a score-predictor:
+
+        >>> import torch
+        >>> scheduler = EDMNoiseScheduler()
+        >>> score_pred = lambda x, t: -x / t.view(-1, 1, 1, 1)**2  # Toy score-predictor
+        >>> denoiser = scheduler.get_denoiser(
+        ...     score_predictor=score_pred, denoising_type="ode")
+        >>> x = torch.randn(2, 3, 8, 8)
+        >>> t = torch.tensor([1.0, 1.0])
+        >>> dx_dt = denoiser(x, t)  # Returns ODE RHS for sampling
+        >>> dx_dt.shape
+        torch.Size([2, 3, 8, 8])
+
+        Generate ODE RHS from an x0-predictor (score conversion is done internally):
+
+        >>> x0_pred = lambda x, t: x / (1 + t.view(-1, 1, 1, 1)**2)  # Toy x0-predictor
+        >>> denoiser = scheduler.get_denoiser(
+        ...     x0_predictor=x0_pred, denoising_type="ode")
+        >>> dx_dt = denoiser(x, t)  # Returns ODE RHS for sampling
+        >>> dx_dt.shape
+        torch.Size([2, 3, 8, 8])
+        """
+        # Validate: exactly one of score_predictor or x0_predictor
+        if (score_predictor is None) == (x0_predictor is None):
+            raise ValueError(
+                "Exactly one of 'score_predictor' or 'x0_predictor' "
+                "must be provided, not both or neither."
+            )
+
+        # Capture methods as local variables to avoid referencing self
+        drift = self.drift
+        diffusion = self.diffusion
+        # Build the score function
+        if x0_predictor is not None:
+            x0_to_score = self.x0_to_score
+
+            def _score(
+                x: Float[Tensor, " B *dims"],
+                t: Float[Tensor, " B"],
+            ) -> Float[Tensor, " B *dims"]:
+                x0 = x0_predictor(x, t)
+                return x0_to_score(x0, x, t)
+
+            score_fn = _score
+        else:
+            score_fn = score_predictor
+
+        if denoising_type == "ode":
+
+            def ode_denoiser(
+                x: Float[Tensor, "B *dims"],  # noqa: F821
+                t: Float[Tensor, "B"],  # noqa: F821
+            ) -> Float[Tensor, " B *dims"]:
+                score = score_fn(x, t)
+                f = drift(x, t)
+                g_sq = diffusion(x, t)
+                dx_dt = f - 0.5 * g_sq * score
+                return dx_dt
+
+            return ode_denoiser
+
+        elif denoising_type == "sde":
+
+            def sde_denoiser(
+                x: Float[Tensor, "B *dims"],  # noqa: F821
+                t: Float[Tensor, "B"],  # noqa: F821
+            ) -> Float[Tensor, " B *dims"]:
+                score = score_fn(x, t)
+                f = drift(x, t)
+                g_sq = diffusion(x, t)
+                # Deterministic part of the SDE drift
+                # Note: stochastic term g(t)*dW is handled by the solver
+                dx_dt = f - g_sq * score
+                return dx_dt
+
+            return sde_denoiser
+
+        else:
+            raise ValueError(
+                f"denoising_type must be 'ode' or 'sde', got '{denoising_type}'"
+            )
+
+    def add_noise(
+        self,
+        x0: Float[Tensor, " B *dims"],
+        time: Float[Tensor, " B"],
+    ) -> Float[Tensor, " B *dims"]:
+        r"""
+        Add noise to clean data at the given diffusion times.
+
+        Used in training to create noisy samples from clean data. Implements:
+
+        .. math::
+            \mathbf{x}(t) = \alpha(t) \mathbf{x}_0
+            + \sigma(t) \boldsymbol{\epsilon}
+
+        Usually does not need to be overridden in subclasses: overriding the
+        :meth:`alpha` and :meth:`sigma` methods is sufficient for most use
+        cases.
+
+
+        Parameters
+        ----------
+        x0 : Tensor
+            Clean latent state of shape :math:`(B, *)`.
+        time : Tensor
+            Diffusion time values of shape :math:`(B,)`.
+
+        Returns
+        -------
+        Tensor
+            Noisy latent state of shape :math:`(B, *)`.
+        """
+        t_bc = time.reshape(-1, *([1] * (x0.ndim - 1)))
+        alpha_t_bc = self.alpha(t_bc)
+        sigma_t_bc = self.sigma(t_bc)
+        noise = torch.randn_like(x0)
+        return alpha_t_bc * x0 + sigma_t_bc * noise
+
+    def init_latents(
+        self,
+        spatial_shape: Tuple[int, ...],
+        tN: Float[Tensor, " B"],
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " B *spatial_shape"]:
+        r"""
+        Initialize the noisy latent state :math:`\mathbf{x}_N` for sampling.
+
+        Generates:
+
+        .. math::
+            \mathbf{x}_N = \sigma(t_N) \cdot \boldsymbol{\epsilon}
+
+        where :math:`\boldsymbol{\epsilon} \sim \mathcal{N}(0, \mathbf{I})`.
+
+        Parameters
+        ----------
+        spatial_shape : Tuple[int, ...]
+            Spatial shape of the latent state, e.g., ``(C, H, W)``.
+        tN : Tensor
+            Initial diffusion time of shape :math:`(B,)`.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        Tensor
+            Initial noisy latent of shape :math:`(B, *spatial\_shape)`.
+        """
+        B = tN.shape[0]
+        noise = torch.randn(B, *spatial_shape, device=device, dtype=dtype)
+        tN_bc = tN.reshape(-1, *([1] * len(spatial_shape)))
+        sigma_tN_bc = self.sigma(tN_bc)
+        return sigma_tN_bc * noise
+
+
+# =============================================================================
+# Concrete noise schedule implementations
+# =============================================================================
+
+
+class EDMNoiseScheduler(LinearGaussianNoiseScheduler):
+    r"""
+    EDM noise scheduler with identity mapping :math:`\sigma(t) = t`.
+
+    The EDM formulation uses :math:`\alpha(t) = 1` (no signal attenuation)
+    and :math:`\sigma(t) = t` (identity mapping between time and noise level).
+
+    **Sampling time-steps** are computed with polynomial spacing:
+
+    .. math::
+        t_i = \left(\sigma_{\max}^{1/\rho} + \frac{i}{N-1}
+        \left(\sigma_{\min}^{1/\rho} - \sigma_{\max}^{1/\rho}\right)
+        \right)^{\rho}
+
+    **Training times** are sampled log-uniformly between ``sigma_min`` and
+    ``sigma_max``.
+
+    Parameters
+    ----------
+    sigma_min : float, optional
+        Minimum noise level, by default 0.002.
+    sigma_max : float, optional
+        Maximum noise level, by default 80.
+    rho : float, optional
+        Exponent controlling time-step spacing. Larger values concentrate more
+        steps at lower noise levels (better for fine details). By default 7.
+
+    Note
+    ----
+    Reference: `Elucidating the Design Space of Diffusion-Based
+    Generative Models <https://arxiv.org/abs/2206.00364>`_
+
+    Examples
+    --------
+    Basic training and sampling workflow using the EDM noise scheduler:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.noise_schedulers import EDMNoiseScheduler
+    >>>
+    >>> scheduler = EDMNoiseScheduler(sigma_min=0.002, sigma_max=80.0, rho=7)
+    >>>
+    >>> # Training: sample times and add noise
+    >>> x0 = torch.randn(4, 3, 8, 8)  # Clean data
+    >>> t = scheduler.sample_time(4)    # Sample diffusion times
+    >>> x_t = scheduler.add_noise(x0, t)  # Create noisy samples
+    >>> x_t.shape
+    torch.Size([4, 3, 8, 8])
+    >>>
+    >>> # Sampling: generate timesteps and initial latents
+    >>> t_steps = scheduler.timesteps(10)
+    >>> tN = t_steps[0].expand(4)  # Initial time for batch of 4
+    >>> xN = scheduler.init_latents((3, 8, 8), tN)  # Initial noise
+    >>> xN.shape
+    torch.Size([4, 3, 8, 8])
+    >>>
+    >>> # Convert x0-predictor to denoiser for sampling
+    >>> x0_predictor = lambda x, t: x / (1 + t.view(-1, 1, 1, 1)**2)  # Toy x0-predictor
+    >>> denoiser = scheduler.get_denoiser(x0_predictor=x0_predictor)
+    >>> denoiser(xN, tN).shape  # ODE RHS for sampling
+    torch.Size([4, 3, 8, 8])
+    """
+
+    def __init__(
+        self,
+        sigma_min: float = 0.002,
+        sigma_max: float = 80.0,
+        rho: float = 7.0,
+    ) -> None:
+        self.sigma_min = sigma_min
+        self.sigma_max = sigma_max
+        self.rho = rho
+
+    def sigma(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Identity mapping: :math:`\sigma(t) = t`."""
+        return t
+
+    def sigma_inv(
+        self,
+        sigma: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Identity mapping: :math:`t = \sigma`."""
+        return sigma
+
+    def sigma_dot(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Constant derivative: :math:`\dot{\sigma}(t) = 1`."""
+        return torch.ones_like(t)
+
+    def alpha(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Constant signal coefficient: :math:`\alpha(t) = 1`."""
+        return torch.ones_like(t)
+
+    def alpha_dot(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Zero derivative: :math:`\dot{\alpha}(t) = 0`."""
+        return torch.zeros_like(t)
+
+    def timesteps(
+        self,
+        num_steps: int,
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " N+1"]:
+        r"""
+        Generate EDM time-steps with polynomial spacing.
+
+        Parameters
+        ----------
+        num_steps : int
+            Number of sampling steps.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        torch.Tensor
+            Time-steps tensor of shape :math:`(N + 1,)` where :math:`N` is
+            ``num_steps``.
+        """
+        step_indices = torch.arange(num_steps, dtype=dtype, device=device)
+        smax_inv_rho = self.sigma_max ** (1 / self.rho)
+        smin_inv_rho = self.sigma_min ** (1 / self.rho)
+        frac = step_indices / (num_steps - 1)
+        interp = smax_inv_rho + frac * (smin_inv_rho - smax_inv_rho)
+        t_steps = interp**self.rho
+        zero = torch.zeros(1, dtype=dtype, device=device)
+        return torch.cat([t_steps, zero])
+
+    def sample_time(
+        self,
+        N: int,
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " N"]:
+        r"""
+        Sample N diffusion times log-uniformly in :math:`[\sigma_{min},
+        \sigma_{max}]`.
+
+        Parameters
+        ----------
+        N : int
+            Number of time values to sample.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        Tensor
+            Sampled diffusion times of shape :math:`(N,)`.
+        """
+        u = torch.rand(N, device=device, dtype=dtype)
+        log_ratio = math.log(self.sigma_max / self.sigma_min)
+        return self.sigma_min * torch.exp(u * log_ratio)
+
+
+class VENoiseScheduler(LinearGaussianNoiseScheduler):
+    r"""
+    Variance Exploding (VE) noise scheduler.
+
+    Implements the VE formulation with :math:`\sigma(t) = \sqrt{t}` and
+    :math:`\alpha(t) = 1` (no signal attenuation).
+
+    **Sampling time-steps** use geometric spacing in :math:`\sigma^2` space:
+
+    .. math::
+        \sigma_i^2 = \sigma_{\max}^2 \cdot
+        \left(\frac{\sigma_{\min}^2}{\sigma_{\max}^2}\right)^{i/(N-1)}
+
+    **Training times** are sampled log-uniformly between ``sigma_min`` and
+    ``sigma_max``, then mapped to time via :math:`t = \sigma^2`.
+
+    Parameters
+    ----------
+    sigma_min : float, optional
+        Minimum noise level, by default 0.02.
+    sigma_max : float, optional
+        Maximum noise level, by default 100.
+
+    Note
+    ----
+    Reference: `Score-Based Generative Modeling through Stochastic
+    Differential Equations <https://arxiv.org/abs/2011.13456>`_
+
+    Examples
+    --------
+    Basic training and sampling workflow using the VE noise scheduler:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.noise_schedulers import VENoiseScheduler
+    >>>
+    >>> scheduler = VENoiseScheduler(sigma_min=0.02, sigma_max=100.0)
+    >>>
+    >>> # Training: sample times and add noise
+    >>> x0 = torch.randn(4, 3, 8, 8)  # Clean data
+    >>> t = scheduler.sample_time(4)    # Sample diffusion times
+    >>> x_t = scheduler.add_noise(x0, t)  # Create noisy samples
+    >>> x_t.shape
+    torch.Size([4, 3, 8, 8])
+    >>>
+    >>> # Sampling: generate timesteps and initial latents
+    >>> t_steps = scheduler.timesteps(10)
+    >>> tN = t_steps[0].expand(4)  # Initial time for batch of 4
+    >>> xN = scheduler.init_latents((3, 8, 8), tN)  # Initial noise
+    >>> xN.shape
+    torch.Size([4, 3, 8, 8])
+    >>>
+    >>> # Convert x0-predictor to denoiser for sampling
+    >>> x0_predictor = lambda x, t: x / (1 + t.view(-1, 1, 1, 1)**2)  # Toy x0-predictor
+    >>> denoiser = scheduler.get_denoiser(x0_predictor=x0_predictor)
+    >>> denoiser(xN, tN).shape  # ODE RHS for sampling
+    torch.Size([4, 3, 8, 8])
+    """
+
+    def __init__(
+        self,
+        sigma_min: float = 0.02,
+        sigma_max: float = 100.0,
+    ) -> None:
+        self.sigma_min = sigma_min
+        self.sigma_max = sigma_max
+
+    def sigma(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""VE noise coefficient: :math:`\sigma(t) = \sqrt{t}`."""
+        return t.sqrt()
+
+    def sigma_inv(
+        self,
+        sigma: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Inverse VE mapping: :math:`t = \sigma^2`."""
+        return sigma**2
+
+    def sigma_dot(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Time derivative: :math:`\dot{\sigma}(t) = 1/(2\sqrt{t})`."""
+        return 0.5 / t.sqrt()
+
+    def alpha(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Constant signal coefficient: :math:`\alpha(t) = 1`."""
+        return torch.ones_like(t)
+
+    def alpha_dot(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Zero derivative: :math:`\dot{\alpha}(t) = 0`."""
+        return torch.zeros_like(t)
+
+    def timesteps(
+        self,
+        num_steps: int,
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " N+1"]:
+        r"""
+        Generate VE time-steps with geometric spacing in :math:`\sigma^2`.
+
+        Parameters
+        ----------
+        num_steps : int
+            Number of sampling steps.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        torch.Tensor
+            Time-steps tensor of shape :math:`(N + 1,)`.
+        """
+        step_indices = torch.arange(num_steps, dtype=dtype, device=device)
+        ratio = self.sigma_min**2 / self.sigma_max**2
+        exponent = step_indices / (num_steps - 1)
+        t_steps = (self.sigma_max**2) * (ratio**exponent)
+        zero = torch.zeros(1, dtype=dtype, device=device)
+        return torch.cat([t_steps, zero])
+
+    def sample_time(
+        self,
+        N: int,
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " N"]:
+        r"""
+        Sample N diffusion times log-uniformly in sigma space, mapped to time.
+
+        Parameters
+        ----------
+        N : int
+            Number of time values to sample.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        Tensor
+            Sampled diffusion times of shape :math:`(N,)`.
+        """
+        u = torch.rand(N, device=device, dtype=dtype)
+        log_ratio = math.log(self.sigma_max / self.sigma_min)
+        sigma = self.sigma_min * torch.exp(u * log_ratio)
+        return self.sigma_inv(sigma)
+
+
+class IDDPMNoiseScheduler(LinearGaussianNoiseScheduler):
+    r"""
+    Improved DDPM (iDDPM) noise scheduler with cosine-based schedule.
+
+    Uses identity mappings :math:`\sigma(t) = t` and :math:`\alpha(t) = 1`.
+    The key feature is a precomputed noise level schedule derived from a
+    cosine schedule, providing improved sample quality in comparison to
+    original DDPM.
+
+    **Sampling time-steps** are selected from a precomputed schedule of
+    :math:`M` discrete noise levels, subsampled to ``num_steps``.
+
+    **Training times** are sampled uniformly from the precomputed schedule.
+
+    Parameters
+    ----------
+    sigma_min : float, optional
+        Minimum noise level for filtering, by default 0.002.
+    sigma_max : float, optional
+        Maximum noise level for filtering, by default 81.
+    C_1 : float, optional
+        Clipping threshold for alpha ratio, by default 0.001.
+    C_2 : float, optional
+        Cosine schedule parameter, by default 0.008.
+    M : int, optional
+        Number of precomputed discretization steps, by default 1000.
+
+    Note
+    ----
+    Reference: `Improved Denoising Diffusion Probabilistic Models
+    <https://arxiv.org/abs/2102.09672>`_
+
+    Examples
+    --------
+    Basic training and sampling workflow using the iDDPM noise scheduler:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.noise_schedulers import IDDPMNoiseScheduler
+    >>>
+    >>> scheduler = IDDPMNoiseScheduler(C_1=0.001, C_2=0.008, M=1000)
+    >>>
+    >>> # Training: sample times and add noise
+    >>> x0 = torch.randn(4, 3, 8, 8)  # Clean data
+    >>> t = scheduler.sample_time(4)    # Sample diffusion times
+    >>> x_t = scheduler.add_noise(x0, t)  # Create noisy samples
+    >>> x_t.shape
+    torch.Size([4, 3, 8, 8])
+    >>>
+    >>> # Sampling: generate timesteps and initial latents
+    >>> t_steps = scheduler.timesteps(10)
+    >>> tN = t_steps[0].expand(4)  # Initial time for batch of 4
+    >>> xN = scheduler.init_latents((3, 8, 8), tN)  # Initial noise
+    >>> xN.shape
+    torch.Size([4, 3, 8, 8])
+    >>>
+    >>> # Convert x0-predictor to denoiser for sampling
+    >>> x0_predictor = lambda x, t: x / (1 + t.view(-1, 1, 1, 1)**2)  # Toy x0-predictor
+    >>> denoiser = scheduler.get_denoiser(x0_predictor=x0_predictor)
+    >>> denoiser(xN, tN).shape  # ODE RHS for sampling
+    torch.Size([4, 3, 8, 8])
+    """
+
+    def __init__(
+        self,
+        sigma_min: float = 0.002,
+        sigma_max: float = 81.0,
+        C_1: float = 0.001,
+        C_2: float = 0.008,
+        M: int = 1000,
+    ) -> None:
+        self.sigma_min = sigma_min
+        self.sigma_max = sigma_max
+        self.C_1 = C_1
+        self.C_2 = C_2
+        self.M = M
+
+        # Precompute the noise level schedule u_j, j = 0, ..., M
+        self._u = self._compute_u_schedule()
+
+    def _compute_u_schedule(self) -> Tensor:
+        """Precompute the iDDPM noise level schedule."""
+        u = torch.zeros(self.M + 1)
+        for j in range(self.M, 0, -1):
+            angle_j = 0.5 * math.pi * j / self.M / (self.C_2 + 1)
+            angle_jm1 = 0.5 * math.pi * (j - 1) / self.M / (self.C_2 + 1)
+            alpha_bar_j = math.sin(angle_j) ** 2
+            alpha_bar_jm1 = math.sin(angle_jm1) ** 2
+            alpha_ratio = alpha_bar_jm1 / alpha_bar_j
+            val = (u[j] ** 2 + 1) / max(alpha_ratio, self.C_1) - 1
+            u[j - 1] = val.sqrt()
+        return u
+
+    def sigma(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""For iDDPM, :math:`\sigma(t) = t` (identity mapping)."""
+        return t
+
+    def sigma_inv(
+        self,
+        sigma: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""For iDDPM, :math:`t = \sigma` (identity mapping)."""
+        return sigma
+
+    def sigma_dot(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Constant derivative: :math:`\dot{\sigma}(t) = 1`."""
+        return torch.ones_like(t)
+
+    def alpha(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Constant signal coefficient: :math:`\alpha(t) = 1`."""
+        return torch.ones_like(t)
+
+    def alpha_dot(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Zero derivative: :math:`\dot{\alpha}(t) = 0`."""
+        return torch.zeros_like(t)
+
+    def timesteps(
+        self,
+        num_steps: int,
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " N+1"]:
+        r"""
+        Generate iDDPM time-steps from precomputed schedule.
+
+        Subsamples ``num_steps`` values from the precomputed schedule of
+        :math:`M` noise levels.
+
+        Parameters
+        ----------
+        num_steps : int
+            Number of sampling steps.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        torch.Tensor
+            Time-steps tensor of shape :math:`(N + 1,)`.
+        """
+        u = self._u.to(device=device, dtype=dtype)
+        # Filter to valid sigma range
+        in_range = torch.logical_and(u >= self.sigma_min, u <= self.sigma_max)
+        u_filtered = u[in_range]
+
+        step_indices = torch.arange(num_steps, dtype=dtype, device=device)
+        scale = (len(u_filtered) - 1) / (num_steps - 1)
+        indices = (scale * step_indices).round().to(torch.int64)
+        sigma_steps = u_filtered[indices]
+
+        zero = torch.zeros(1, dtype=dtype, device=device)
+        return torch.cat([sigma_steps, zero])
+
+    def sample_time(
+        self,
+        N: int,
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " N"]:
+        r"""
+        Sample N diffusion times uniformly from precomputed schedule.
+
+        Parameters
+        ----------
+        N : int
+            Number of time values to sample.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        Tensor
+            Sampled diffusion times of shape :math:`(N,)`.
+        """
+        u = self._u.to(device=device, dtype=dtype)
+        in_range = torch.logical_and(u >= self.sigma_min, u <= self.sigma_max)
+        u_filtered = u[in_range]
+        # Sample random indices
+        indices = torch.randint(0, len(u_filtered), (N,), device=device)
+        return u_filtered[indices]
+
+
+class VPNoiseScheduler(LinearGaussianNoiseScheduler):
+    r"""
+    Variance Preserving (VP) noise scheduler.
+
+    Implements the VP formulation where the total variance is preserved:
+    :math:`\alpha(t)^2 + \sigma(t)^2 = 1`. This is based on a linear beta
+    schedule: :math:`\beta(t) = \beta_{\min} + t \cdot \beta_d`.
+
+    The noise and signal coefficients are:
+
+    .. math::
+        \alpha(t) = \exp\left(-\frac{1}{2}
+        \left(\frac{\beta_d}{2} t^2 + \beta_{\min} t\right)\right)
+
+    .. math::
+        \sigma(t) = \sqrt{1 - \alpha(t)^2}
+        = \sqrt{1 - \exp\left(-\frac{\beta_d}{2} t^2
+        - \beta_{\min} t\right)}
+
+    **Sampling time-steps** are linearly spaced from ``t_max`` (usually 1) to
+    ``epsilon_s`` (small positive value to avoid singularities).
+
+    **Training times** are sampled uniformly between ``epsilon_s`` and
+    ``t_max``.
+
+    Parameters
+    ----------
+    beta_min : float, optional
+        Minimum beta value for the linear schedule, by default 0.1.
+    beta_d : float, optional
+        Beta slope (delta) for the linear schedule, by default 19.1.
+    epsilon_s : float, optional
+        Small positive value for minimum time, by default 1e-3.
+    t_max : float, optional
+        Maximum diffusion time, by default 1.0.
+
+    Note
+    ----
+    Reference: `Score-Based Generative Modeling through Stochastic
+    Differential Equations <https://arxiv.org/abs/2011.13456>`_
+
+    Examples
+    --------
+    Basic training and sampling workflow using the VP noise scheduler:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.noise_schedulers import VPNoiseScheduler
+    >>>
+    >>> scheduler = VPNoiseScheduler(beta_min=0.1, beta_d=19.1)
+    >>>
+    >>> # Training: sample times and add noise
+    >>> x0 = torch.randn(4, 3, 8, 8)  # Clean data
+    >>> t = scheduler.sample_time(4)    # Sample diffusion times
+    >>> x_t = scheduler.add_noise(x0, t)  # Create noisy samples
+    >>> x_t.shape
+    torch.Size([4, 3, 8, 8])
+    >>>
+    >>> # Sampling: generate timesteps and initial latents
+    >>> t_steps = scheduler.timesteps(10)
+    >>> tN = t_steps[0].expand(4)  # Initial time for batch of 4
+    >>> xN = scheduler.init_latents((3, 8, 8), tN)  # Initial noise
+    >>> xN.shape
+    torch.Size([4, 3, 8, 8])
+    >>>
+    >>> # Convert x0-predictor to denoiser for sampling
+    >>> x0_predictor = lambda x, t: x * 0.9  # Toy x0-predictor
+    >>> denoiser = scheduler.get_denoiser(x0_predictor=x0_predictor)
+    >>> denoiser(xN, tN).shape  # ODE RHS for sampling
+    torch.Size([4, 3, 8, 8])
+    """
+
+    def __init__(
+        self,
+        beta_min: float = 0.1,
+        beta_d: float = 19.1,
+        epsilon_s: float = 1e-3,
+        t_max: float = 1.0,
+    ) -> None:
+        self.beta_min = beta_min
+        self.beta_d = beta_d
+        self.epsilon_s = epsilon_s
+        self.t_max = t_max
+
+    def _exponent(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Compute exponent: :math:`a(t) = \frac{\beta_d}{2} t^2 + \beta_{\min} t`."""
+        return 0.5 * self.beta_d * t**2 + self.beta_min * t
+
+    def alpha(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Signal coefficient: :math:`\alpha(t) = \exp(-a(t)/2)`."""
+        return torch.exp(-0.5 * self._exponent(t))
+
+    def alpha_dot(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Derivative: :math:`\dot{\alpha}(t) = -\frac{\beta(t)}{2} \alpha(t)`."""
+        beta_t = self.beta_min + self.beta_d * t
+        return -0.5 * beta_t * self.alpha(t)
+
+    def sigma(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Noise level: :math:`\sigma(t) = \sqrt{1 - \alpha(t)^2}`."""
+        alpha_sq = self.alpha(t) ** 2
+        return torch.sqrt(1 - alpha_sq)
+
+    def sigma_dot(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Derivative: :math:`\dot{\sigma}(t) = -\alpha(t) \dot{\alpha}(t) / \sigma(t)`."""  # noqa: E501
+        alpha_t = self.alpha(t)
+        sigma_t = self.sigma(t)
+        alpha_dot_t = self.alpha_dot(t)
+        # d/dt sqrt(1 - alpha^2) = -alpha * alpha_dot / sqrt(1 - alpha^2)
+        return -alpha_t * alpha_dot_t / sigma_t
+
+    def sigma_inv(
+        self,
+        sigma: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""
+        Inverse mapping from sigma to time.
+
+        Solves: :math:`\sigma^2 = 1 - \exp(-a(t))` for :math:`t`.
+        """
+        # sigma^2 = 1 - exp(-a) => a = -log(1 - sigma^2)
+        # a = beta_d/2 * t^2 + beta_min * t
+        # Quadratic: beta_d * t^2 + 2*beta_min * t + 2*log(1-sigma^2) = 0
+        log_term = torch.log(1 - sigma**2 + 1e-8)  # small eps for stability
+        discriminant = self.beta_min**2 - 2 * self.beta_d * log_term
+        return (-self.beta_min + torch.sqrt(discriminant.clamp(min=0))) / self.beta_d
+
+    def timesteps(
+        self,
+        num_steps: int,
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " N+1"]:
+        r"""
+        Generate VP time-steps with linear spacing.
+
+        Parameters
+        ----------
+        num_steps : int
+            Number of sampling steps.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        torch.Tensor
+            Time-steps tensor of shape :math:`(N + 1,)`.
+        """
+        # Linear spacing from t_max to epsilon_s
+        step_indices = torch.arange(num_steps, dtype=dtype, device=device)
+        frac = step_indices / (num_steps - 1)
+        t_steps = self.t_max + frac * (self.epsilon_s - self.t_max)
+        zero = torch.zeros(1, dtype=dtype, device=device)
+        return torch.cat([t_steps, zero])
+
+    def sample_time(
+        self,
+        N: int,
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " N"]:
+        r"""
+        Sample N diffusion times uniformly in :math:`[\epsilon_s, t_{max}]`.
+
+        Parameters
+        ----------
+        N : int
+            Number of time values to sample.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        Tensor
+            Sampled diffusion times of shape :math:`(N,)`.
+        """
+        u = torch.rand(N, device=device, dtype=dtype)
+        return self.epsilon_s + u * (self.t_max - self.epsilon_s)
+
+
+class StudentTEDMNoiseScheduler(LinearGaussianNoiseScheduler):
+    r"""
+    Student-t EDM noise scheduler for heavy-tailed diffusion models.
+
+    This scheduler is a variant of :class:`EDMNoiseScheduler` that uses
+    Student-t noise instead of Gaussian noise. It is useful for modeling
+    heavy-tailed distributions and can improve sample quality for certain
+    data types.
+
+    .. important::
+
+        Despite inheriting from :class:`LinearGaussianNoiseScheduler`, this
+        scheduler is **not truly Gaussian**. It uses the same linear structure
+        (identity mappings :math:`\sigma(t) = t` and :math:`\alpha(t) = 1`) but
+        replaces Gaussian noise with Student-t noise. The "Linear" part of
+        :class:`LinearGaussianNoiseScheduler` still applies, but the "Gaussian"
+        part does not.
+
+    This scheduler uses a non-gaussian forward process:
+
+    .. math::
+        \mathbf{x}(t) = \mathbf{x}_0 + \sigma(t) \mathbf{n}, \quad
+        \mathbf{n} \sim \text{Student-}t(\nu)
+
+    The marginal distribution :math:`p(\mathbf{x}_t | \mathbf{x}_0)` is
+    therefore a scaled Student-t distribution, not Gaussian.
+
+    **Comparison with EDMNoiseScheduler:**
+
+    This scheduler shares the same time-to-noise mappings as
+    :class:`EDMNoiseScheduler`.
+    The only differences are in :meth:`add_noise` and :meth:`init_latents`,
+    which use Student-t noise instead of Gaussian noise.
+
+    Parameters
+    ----------
+    sigma_min : float, optional
+        Minimum noise level, by default 0.002.
+    sigma_max : float, optional
+        Maximum noise level, by default 80.
+    rho : float, optional
+        Exponent controlling time-step spacing. Larger values concentrate more
+        steps at lower noise levels (better for fine details). By default 7.
+    nu : int, optional
+        Degrees of freedom for Student-t distribution. Must be > 2.
+        As ``nu`` increases, the distribution approaches Gaussian. Lower values
+        produce heavier tails. By default 10.
+
+    Note
+    ----
+    Reference: `Heavy-Tailed Diffusion Models
+    <https://arxiv.org/abs/2410.14171>`_
+
+    Examples
+    --------
+    Basic training and sampling workflow with Student-t noise:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.noise_schedulers import (
+    ...     StudentTEDMNoiseScheduler,
+    ... )
+    >>>
+    >>> scheduler = StudentTEDMNoiseScheduler(nu=10)
+    >>>
+    >>> # Training: sample times and add Student-t noise
+    >>> x0 = torch.randn(4, 3, 8, 8)  # Clean data
+    >>> t = scheduler.sample_time(4)    # Sample diffusion times
+    >>> x_t = scheduler.add_noise(x0, t)  # Adds Student-t noise
+    >>> x_t.shape
+    torch.Size([4, 3, 8, 8])
+    >>>
+    >>> # Sampling: generate timesteps and Student-t initial latents
+    >>> t_steps = scheduler.timesteps(10)
+    >>> tN = t_steps[0].expand(4)
+    >>> xN = scheduler.init_latents((3, 8, 8), tN)  # Student-t latents
+    >>> xN.shape
+    torch.Size([4, 3, 8, 8])
+    """
+
+    def __init__(
+        self,
+        sigma_min: float = 0.002,
+        sigma_max: float = 80.0,
+        rho: float = 7.0,
+        nu: int = 10,
+    ) -> None:
+        if nu <= 2:
+            raise ValueError(f"nu must be > 2, got {nu}")
+        self.sigma_min = sigma_min
+        self.sigma_max = sigma_max
+        self.rho = rho
+        self.nu = nu
+
+    def sigma(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Identity mapping: :math:`\sigma(t) = t`."""
+        return t
+
+    def sigma_inv(
+        self,
+        sigma: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Identity mapping: :math:`t = \sigma`."""
+        return sigma
+
+    def sigma_dot(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Constant derivative: :math:`\dot{\sigma}(t) = 1`."""
+        return torch.ones_like(t)
+
+    def alpha(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Constant signal coefficient: :math:`\alpha(t) = 1`."""
+        return torch.ones_like(t)
+
+    def alpha_dot(
+        self,
+        t: Float[Tensor, " *shape"],
+    ) -> Float[Tensor, " *shape"]:
+        r"""Zero derivative: :math:`\dot{\alpha}(t) = 0`."""
+        return torch.zeros_like(t)
+
+    def timesteps(
+        self,
+        num_steps: int,
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " N+1"]:
+        r"""
+        Generate EDM time-steps with polynomial spacing.
+
+        Parameters
+        ----------
+        num_steps : int
+            Number of sampling steps.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        torch.Tensor
+            Time-steps tensor of shape :math:`(N + 1,)` where :math:`N` is
+            ``num_steps``.
+        """
+        step_indices = torch.arange(num_steps, dtype=dtype, device=device)
+        smax_inv_rho = self.sigma_max ** (1 / self.rho)
+        smin_inv_rho = self.sigma_min ** (1 / self.rho)
+        frac = step_indices / (num_steps - 1)
+        interp = smax_inv_rho + frac * (smin_inv_rho - smax_inv_rho)
+        t_steps = interp**self.rho
+        zero = torch.zeros(1, dtype=dtype, device=device)
+        return torch.cat([t_steps, zero])
+
+    def sample_time(
+        self,
+        N: int,
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " N"]:
+        r"""
+        Sample N diffusion times log-uniformly in :math:`[\sigma_{min},
+        \sigma_{max}]`.
+
+        Parameters
+        ----------
+        N : int
+            Number of time values to sample.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        Tensor
+            Sampled diffusion times of shape :math:`(N,)`.
+        """
+        u = torch.rand(N, device=device, dtype=dtype)
+        log_ratio = math.log(self.sigma_max / self.sigma_min)
+        return self.sigma_min * torch.exp(u * log_ratio)
+
+    def _sample_student_t(
+        self,
+        shape: Tuple[int, ...],
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Tensor:
+        r"""
+        Sample from standard Student-t distribution.
+
+        Student-t samples are generated as: :math:`X / \sqrt{V / \nu}` where
+        :math:`X \sim \mathcal{N}(0, 1)` and :math:`V \sim \chi^2(\nu)`.
+
+        Parameters
+        ----------
+        shape : Tuple[int, ...]
+            Shape of the output tensor.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        Tensor
+            Student-t samples of the specified shape.
+        """
+        # Sample standard normal
+        normal = torch.randn(shape, device=device, dtype=dtype)
+
+        # Sample chi-squared and compute scaling
+        chi2_dist = torch.distributions.Chi2(df=self.nu)
+        chi2_samples = chi2_dist.sample((shape[0],))
+        if device is not None:
+            chi2_samples = chi2_samples.to(device)
+        if dtype is not None:
+            chi2_samples = chi2_samples.to(dtype)
+
+        # kappa = chi2 / nu, reshape for broadcasting
+        kappa = chi2_samples / self.nu
+        kappa = kappa.view(-1, *([1] * (len(shape) - 1)))
+
+        # Student-t = normal / sqrt(kappa)
+        return normal / torch.sqrt(kappa)
+
+    def add_noise(
+        self,
+        x0: Float[Tensor, " B *dims"],
+        time: Float[Tensor, " B"],
+    ) -> Float[Tensor, " B *dims"]:
+        r"""
+        Add Student-t noise to clean data at the given diffusion times.
+
+        Unlike the Gaussian case in :class:`LinearGaussianNoiseScheduler`,
+        this method uses Student-t noise:
+
+        .. math::
+            \mathbf{x}(t) = \mathbf{x}_0 + \sigma(t) \mathbf{n}, \quad
+            \mathbf{n} \sim \text{Student-}t(\nu)
+
+        Parameters
+        ----------
+        x0 : Tensor
+            Clean latent state of shape :math:`(B, *)`.
+        time : Tensor
+            Diffusion time values of shape :math:`(B,)`.
+
+        Returns
+        -------
+        Tensor
+            Noisy latent state of shape :math:`(B, *)`.
+        """
+        t_bc = time.reshape(-1, *([1] * (x0.ndim - 1)))
+        sigma_t_bc = self.sigma(t_bc)
+        noise = self._sample_student_t(x0.shape, device=x0.device, dtype=x0.dtype)
+        return x0 + sigma_t_bc * noise
+
+    def init_latents(
+        self,
+        spatial_shape: Tuple[int, ...],
+        tN: Float[Tensor, " B"],
+        *,
+        device: torch.device | None = None,
+        dtype: torch.dtype | None = None,
+    ) -> Float[Tensor, " B *spatial_shape"]:
+        r"""
+        Initialize noisy latent state with Student-t noise.
+
+        Unlike the Gaussian case in :class:`LinearGaussianNoiseScheduler`,
+        this method uses Student-t noise:
+
+        .. math::
+            \mathbf{x}_N = \sigma(t_N) \cdot \mathbf{n}, \quad
+            \mathbf{n} \sim \text{Student-}t(\nu)
+
+        Parameters
+        ----------
+        spatial_shape : Tuple[int, ...]
+            Spatial shape of the latent state, e.g., ``(C, H, W)``.
+        tN : Tensor
+            Initial diffusion time of shape :math:`(B,)`.
+        device : torch.device, optional
+            Device to place the tensor on.
+        dtype : torch.dtype, optional
+            Data type of the tensor.
+
+        Returns
+        -------
+        Tensor
+            Initial noisy latent of shape :math:`(B, *spatial\_shape)`.
+        """
+        B = tN.shape[0]
+        noise = self._sample_student_t((B, *spatial_shape), device=device, dtype=dtype)
+        tN_bc = tN.reshape(-1, *([1] * len(spatial_shape)))
+        sigma_tN_bc = self.sigma(tN_bc)
+        return sigma_tN_bc * noise
diff --git a/physicsnemo/diffusion/samplers/__init__.py b/physicsnemo/diffusion/samplers/__init__.py
index 8c0ad6fd97..2ef817e33f 100644
--- a/physicsnemo/diffusion/samplers/__init__.py
+++ b/physicsnemo/diffusion/samplers/__init__.py
@@ -16,3 +16,11 @@
 
 from .legacy_deterministic_sampler import deterministic_sampler  # noqa: F401
 from .legacy_stochastic_sampler import stochastic_sampler  # noqa: F401
+from .samplers import sample  # noqa: F401
+from .solvers import (  # noqa: F401
+    EDMStochasticEulerSolver,
+    EDMStochasticHeunSolver,
+    EulerSolver,
+    HeunSolver,
+    Solver,
+)
diff --git a/physicsnemo/diffusion/samplers/samplers.py b/physicsnemo/diffusion/samplers/samplers.py
index aaa7a7a2ae..b49c2e2be9 100644
--- a/physicsnemo/diffusion/samplers/samplers.py
+++ b/physicsnemo/diffusion/samplers/samplers.py
@@ -14,13 +14,357 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import warnings
+"""Diffusion model sampling interface."""
 
-from physicsnemo.core.warnings import FutureFeatureWarning
+from typing import Any, Dict, List, Literal
 
-warnings.warn(
-    "The 'physicsnemo.diffusion.samplers.samplers' module is a placeholder for "
-    "future functionality that will be implemented in an upcoming release.",
-    FutureFeatureWarning,
-    stacklevel=2,
+from jaxtyping import Float
+from torch import Tensor
+
+from physicsnemo.diffusion.base import Denoiser
+from physicsnemo.diffusion.noise_schedulers import NoiseScheduler
+
+from .solvers import (
+    EDMStochasticEulerSolver,
+    EDMStochasticHeunSolver,
+    EulerSolver,
+    HeunSolver,
+    Solver,
 )
+
+SOLVERS: Dict[str, type[Solver]] = {
+    "euler": EulerSolver,
+    "heun": HeunSolver,
+    "edm_stochastic_euler": EDMStochasticEulerSolver,
+    "edm_stochastic_heun": EDMStochasticHeunSolver,
+}
+
+
+def sample(
+    denoiser: Denoiser,
+    xN: Float[Tensor, " B *dims"],
+    noise_scheduler: NoiseScheduler,
+    num_steps: int,
+    solver: Literal["euler", "heun", "edm_stochastic_euler", "edm_stochastic_heun"]
+    | Solver = "heun",
+    time_steps: Float[Tensor, " N_plus_1"] | None = None,
+    solver_options: Dict[str, Any] | None = None,
+    time_eval: list[int] | None = None,
+) -> Float[Tensor, " B *dims"] | List[Float[Tensor, " B *dims"]]:
+    r"""
+    Generate batched samples from a diffusion model.
+
+    This interface is quite generic and can be used to generate samples from
+    any reverse diffusion process of the form:
+
+    .. math::
+        \mathbf{x}_{n-1} = G (\mathbf{x}_{i \geq n}, t_{i \geq n-1})
+
+    This covers both ODE/SDE-based sampling (e.g. VP, VE, EDM) and discrete
+    Markov chain-based sampling (e.g. DDPM). The exact expression of the
+    operator :math:`G` depends on the combination of:
+
+    - The ``solver``, which determines the numerical method to update
+      the latent state :math:`\mathbf{x}_n` at each time-step.
+    - The ``denoiser``, which can be the right hand side for ODE/SDE-based
+      sampling, the denoised latent state for discrete Markov chain-based
+      sampling, etc.
+
+    Typically, the update applied is roughly:
+
+    .. math::
+        \mathbf{x}_{n-1} = \text{Step}(D(\mathbf{x}_n, t_n);
+        \mathbf{x}_n, t_n, t_{n-1})
+
+    where :math:`D` is the ``denoiser`` and :math:`\text{Step}` is the
+    update rule of the solver, implemented by the
+    :meth:`~physicsnemo.diffusion.samplers.solvers.Solver.step` method.
+    Variants are possible by passing more complex solvers and denoisers.
+
+    The ``solver`` can be specified as a string key (with optional
+    ``solver_options``), or as a pre-configured object implementing the
+    :class:`~physicsnemo.diffusion.samplers.solvers.Solver` interface (in
+    which case ``solver_options`` must be ``None``). The solver must implement
+    a ``step`` method with the following signature:
+
+    .. code-block:: python
+
+        def step(
+            self,
+            x: Tensor,      # shape: (B, *dims)
+            t_cur: Tensor,   # shape: (B,)
+            t_next: Tensor,  # shape: (B,)
+        ) -> Tensor: ...  # updated x, shape: (B, *dims)
+
+    Any object that implements the
+    :class:`~physicsnemo.diffusion.samplers.solvers.Solver` interface can be
+    used as a solver.
+
+    The ``denoiser`` must implement the
+    :class:`~physicsnemo.diffusion.Denoiser` interface, with the following
+    signature:
+
+    .. code-block:: python
+
+        def denoiser(
+            x: Tensor,  # Noisy latent state, shape (B, *dims)
+            t: Tensor,  # Diffusion time, shape (B,)
+        ) -> Tensor: # ODE/SDE RHS, same shape (B, *dims) as x
+
+    Any object that implements the :class:`~physicsnemo.diffusion.Denoiser`
+    interface can be used as a denoiser. A denoiser is typically obtained from
+    a :class:`~physicsnemo.diffusion.Predictor` using the noise scheduler's
+    :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.get_denoiser`
+    factory.
+
+    Time-steps are generated by the ``noise_scheduler`` using its
+    :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.timesteps`
+    method with the provided ``num_steps``. To use custom time-steps, pass a
+    1D tensor to ``time_steps`` which will override the schedule's time-steps.
+
+    Parameters
+    ----------
+    denoiser : Denoiser
+        A callable that takes ``(x, t)`` and returns the denoising update
+        term with the same shape as the latent state ``xN``. See
+        :class:`~physicsnemo.diffusion.Denoiser` for the expected interface.
+        Typically obtained via the
+        :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.get_denoiser`
+        factory, which converts a :class:`~physicsnemo.diffusion.Predictor`
+        (e.g., score-predictor, x0-predictor) into a denoiser.
+    xN : Tensor
+        Initial noisy latent state :math:`\mathbf{x}_N` of shape :math:`(B, *)`
+        where :math:`B` is the batch size. All batch elements share the same
+        diffusion time values. The ``dtype`` and ``device`` of ``xN`` determine
+        the ``dtype`` and ``device`` of the generated samples and any
+        internally created tensors. Can usually be obtained by using
+        :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.init_latents`
+        from a noise scheduler (typically from the same noise scheduler
+        instance as the ``noise_scheduler`` argument, but can be different if
+        desired).
+    noise_scheduler : NoiseScheduler
+        The noise scheduler instance used for generating time-steps. The
+        schedule's
+        :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.timesteps`
+        method is called with ``num_steps`` to produce the diffusion time
+        values, unless ``time_steps`` is provided to override them.
+    num_steps : int
+        Number of sampling steps. Passed to the noise scheduler's
+        :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.timesteps`
+        method. Ignored when ``time_steps`` is provided.
+    solver : str | Solver, default="heun"
+        The numerical solver to use. Supports three levels of customizability:
+
+        **Basic**: Pass a string key to use a built-in solver
+        with default settings.
+
+        **Moderately advanced**: Pass a string key plus
+        ``solver_options`` to override default solver parameters.
+
+        **Advanced**: Pass a custom :class:`Solver` instance
+        implementing the
+        :class:`~physicsnemo.diffusion.samplers.solvers.Solver` interface.
+        In this case, ``solver_options`` must be empty.
+
+        Available string keys:
+
+        * ``"euler"``: First-order Euler method. Fast but lower quality.
+          See :class:`~physicsnemo.diffusion.samplers.solvers.EulerSolver`.
+
+        * ``"heun"``: Second-order Heun method. Higher quality but requires
+          two denoiser evaluations per step.
+          See :class:`~physicsnemo.diffusion.samplers.solvers.HeunSolver`.
+
+        * ``"edm_stochastic_euler"``: First-order stochastic sampler from
+          the EDM paper with configurable noise injection. See
+          :class:`~physicsnemo.diffusion.samplers.solvers.EDMStochasticEulerSolver`.
+
+        * ``"edm_stochastic_heun"``: Second-order stochastic sampler from
+          the EDM paper with configurable noise injection. See
+          :class:`~physicsnemo.diffusion.samplers.solvers.EDMStochasticHeunSolver`.
+
+    time_steps : Tensor | None, default=None
+        Optional 1D tensor of shape :math:`(N + 1,)` containing explicit
+        diffusion time values :math:`t_N, t_{N-1}, ..., t_0` in decreasing
+        order. If provided, overrides the time-steps from ``noise_scheduler``
+        and ``num_steps`` is ignored. To produce a fully denoised latent state
+        :math:`\mathbf{x}_0`, the last element must be :math:`t_0 = 0`.
+    solver_options : Dict[str, Any], default={}
+        Additional options passed to the solver constructor. Only used when
+        ``solver`` is a string; must be empty when ``solver`` is a
+        :class:`Solver` instance. See individual solver classes for available
+        options.
+    time_eval : List[int] | None, default=None
+        Indices of time-steps at which to return intermediate samples. If
+        provided, returns a list of tensors. If ``None``, returns only the
+        final denoised latent state :math:`\mathbf{x}_0`.
+
+    Returns
+    -------
+    Tensor | List[Tensor]
+        If ``time_eval`` is ``None``, returns the final denoised latent state
+        :math:`\mathbf{x}_0` of shape :math:`(B, *)`. Otherwise, returns a list
+        of tensors :math:`\mathbf{x}_t` of shape :math:`(B, *)` containing
+        latent states at time-step indices specified in ``time_eval``.
+
+    See Also
+    --------
+    :mod:`~physicsnemo.diffusion.samplers.solvers` : Available ODE/SDE solvers.
+    :mod:`~physicsnemo.diffusion.noise_schedulers` : Available noise schedules.
+
+    Examples
+    --------
+    **Example 1:** Minimal usage. Just provide a denoiser, initial noise, a
+    scheduler, and the number of steps.
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.samplers import sample
+    >>> from physicsnemo.diffusion.noise_schedulers import EDMNoiseScheduler
+    >>>
+    >>> # Toy denoiser (in practice, this would be a trained neural network)
+    >>> denoiser = lambda x, t: x / (1 + t.view(-1, *([1] * (x.ndim - 1)))**2)  # Toy denoiser
+    >>> scheduler = EDMNoiseScheduler()
+    >>> xN = torch.randn(2, 3, 8, 8) * 80  # Initial noise scaled by sigma_max
+    >>> x0 = sample(denoiser, xN, scheduler, num_steps=10)
+    >>> x0.shape
+    torch.Size([2, 3, 8, 8])
+
+    **Example 2:** Standard pattern using scheduler methods. Use
+    ``init_latents`` to generate initial noise and ``get_denoiser`` to convert
+    a predictor to a denoiser for sampling.
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.samplers import sample
+    >>> from physicsnemo.diffusion.noise_schedulers import EDMNoiseScheduler
+    >>>
+    >>> scheduler = EDMNoiseScheduler()
+    >>> t_steps = scheduler.timesteps(10)
+    >>> tN = t_steps[0].expand(2)  # Initial time for batch of 2
+    >>>
+    >>> # Use scheduler to generate initial latents at time tN
+    >>> xN = scheduler.init_latents((3, 8, 8), tN)
+    >>>
+    >>> # Convert x0-predictor to denoiser (score conversion is automatic)
+    >>> x0_predictor = lambda x, t: x / (1 + t.view(-1, *([1] * (x.ndim - 1)))**2)  # Toy x0-predictor
+    >>> denoiser = scheduler.get_denoiser(x0_predictor=x0_predictor)
+    >>>
+    >>> x0 = sample(denoiser, xN, scheduler, num_steps=10)
+    >>> x0.shape
+    torch.Size([2, 3, 8, 8])
+
+    **Example 3:** Custom time-steps and solver. Same as Example 2, but using
+    explicit time-steps and the faster (but lower quality) Euler solver.
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.samplers import sample
+    >>> from physicsnemo.diffusion.noise_schedulers import EDMNoiseScheduler
+    >>>
+    >>> scheduler = EDMNoiseScheduler()
+    >>>
+    >>> # Custom time-steps (fewer steps for faster sampling)
+    >>> custom_t = torch.tensor([80.0, 40.0, 20.0, 10.0, 5.0, 0.0])
+    >>> tN = custom_t[0].expand(2)
+    >>> xN = scheduler.init_latents((3, 8, 8), tN)
+    >>>
+    >>> # Same denoiser setup as Example 2
+    >>> x0_predictor = lambda x, t: x / (1 + t.view(-1, *([1] * (x.ndim - 1)))**2)  # Toy x0-predictor
+    >>> denoiser = scheduler.get_denoiser(x0_predictor=x0_predictor)
+    >>>
+    >>> # Use custom time-steps and Euler solver (num_steps ignored)
+    >>> x0 = sample(denoiser, xN, scheduler, num_steps=0, time_steps=custom_t,
+    ...             solver="euler")
+    >>> x0.shape
+    torch.Size([2, 3, 8, 8])
+
+    **Example 4:** Bare-bone custom scheduler. Define a scheduler from scratch
+    implementing the :class:`NoiseScheduler` protocol, without importing any
+    built-in scheduler class.
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.samplers import sample
+    >>>
+    >>> # Define a minimal EDM-like scheduler from scratch
+    >>> class MinimalScheduler:
+    ...     def timesteps(self, num_steps, *, device=None, dtype=None):
+    ...         return torch.linspace(1.0, 0.0, num_steps + 1,
+    ...                               device=device, dtype=dtype)
+    ...     def sample_time(self, N, *, device=None, dtype=None):
+    ...         return torch.rand(N, device=device, dtype=dtype)
+    ...     def add_noise(self, x0, time):
+    ...         return x0 + time.view(-1, 1, 1, 1) * torch.randn_like(x0)
+    ...     def init_latents(self, spatial_shape, tN, *, device=None,
+    ...                      dtype=None):
+    ...         return tN.view(-1, 1, 1, 1) * torch.randn(
+    ...             tN.shape[0], *spatial_shape, device=device, dtype=dtype)
+    ...     def get_denoiser(self, *, x0_predictor=None, **kwargs):
+    ...         # EDM-like: sigma=t, alpha=1, g^2=2t
+    ...         # score = (x0 - x) / t^2, ODE RHS = (x0 - x) / t
+    ...         def _denoiser(x, t):
+    ...             x0 = x0_predictor(x, t)
+    ...             t_bc = t.view(-1, *([1] * (x.ndim - 1)))
+    ...             return (x0 - x) / t_bc
+    ...         return _denoiser
+    >>>
+    >>> scheduler = MinimalScheduler()
+    >>> tN = torch.tensor([1.0, 1.0])
+    >>> xN = scheduler.init_latents((3, 8, 8), tN)
+    >>>
+    >>> # x0-predictor -> denoiser via the scheduler factory
+    >>> x0_predictor = lambda x, t: x / (1 + t.view(-1, *([1] * (x.ndim - 1)))**2)  # Toy x0-predictor
+    >>> denoiser = scheduler.get_denoiser(x0_predictor=x0_predictor)
+    >>> x0 = sample(denoiser, xN, scheduler, num_steps=10, solver="euler")
+    >>> x0.shape
+    torch.Size([2, 3, 8, 8])
+    """
+    if solver_options is None:
+        solver_options = {}
+
+    # Validate and instantiate solver
+    if isinstance(solver, str):
+        if solver not in SOLVERS:
+            available = ", ".join(f'"{k}"' for k in SOLVERS.keys())
+            raise ValueError(
+                f"Unknown solver '{solver}'. Available solvers: {available}."
+            )
+        solver_cls = SOLVERS[solver]
+        solver_ = solver_cls(denoiser, **solver_options)
+    else:
+        # Assume solver is a Solver-like object with a step method
+        if solver_options:
+            raise ValueError(
+                "solver_options must be None when solver is a Solver instance."
+            )
+        solver_ = solver
+
+    # Generate time-steps from noise_scheduler or use provided ones
+    if time_steps is not None:
+        t_steps = time_steps.to(device=xN.device, dtype=xN.dtype)
+    else:
+        t_steps = noise_scheduler.timesteps(num_steps, device=xN.device, dtype=xN.dtype)
+
+    # Main sampling loop
+    samples: List[Tensor] = []
+    x = xN
+    n_steps = len(t_steps) - 1  # Last element is 0 (final time)
+
+    for i in range(n_steps):
+        t_cur = t_steps[i]
+        t_next = t_steps[i + 1]
+
+        # Expand t to batch dimension: scalar -> (B,)
+        batch_size = x.shape[0]
+        t_cur_batch = t_cur.expand(batch_size)
+        t_next_batch = t_next.expand(batch_size)
+
+        # Perform one solver step
+        x = solver_.step(x, t_cur_batch, t_next_batch)
+
+        # Collect sample if requested
+        if time_eval is not None and i in time_eval:
+            samples.append(x.clone())
+
+    # Return based on time_eval
+    if time_eval is not None:
+        return samples
+
+    return x
diff --git a/physicsnemo/diffusion/samplers/solvers.py b/physicsnemo/diffusion/samplers/solvers.py
new file mode 100644
index 0000000000..76dd49d3c3
--- /dev/null
+++ b/physicsnemo/diffusion/samplers/solvers.py
@@ -0,0 +1,766 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2026 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ODE/SDE solvers for diffusion model sampling."""
+
+import math
+from typing import Callable, Protocol, runtime_checkable
+
+import torch
+from jaxtyping import Float
+from torch import Tensor
+
+from physicsnemo.diffusion.base import Denoiser
+
+
+@runtime_checkable
+class Solver(Protocol):
+    r"""
+    Protocol defining the interface for diffusion solvers.
+
+    A solver implements a numerical method to integrate the diffusion process
+    from a noisy state to a less noisy (or clean) state. Each call to
+    :meth:`step` advances the state from time ``t_cur`` (:math:`t_n`) to
+    ``t_next`` (:math:`t_{n-1}`).
+
+    This is the minimal interface required for sampling from a diffusion model,
+    and any object that implements this interface can be used as a solver in
+    sampling utilities.
+
+    The update rule applied by the sampler is roughly:
+
+    .. math::
+        \mathbf{x}_{n-1} = \text{Step}(F(\mathbf{x}_n, t_n); \mathbf{x}_n, t_n, t_{n-1})
+
+    where :math:`F` is the denoiser (e.g. the right hand side in the case of
+    ODE/SDE-based sampling, the denoised latent state in the case of discrete
+    Markov chain-based sampling, etc.) and :math:`\text{Step}` is
+    the update rule of the solver, implemented by the :meth:`step` method.
+
+    See Also
+    --------
+    :func:`~physicsnemo.diffusion.samplers.sample` : The sampling function that
+        uses solvers to generate samples.
+
+    Examples
+    --------
+    >>> import torch
+    >>> from physicsnemo.diffusion.samplers.solvers import Solver
+    >>>
+    >>> class SimpleEuler:
+    ...     def __init__(self, denoiser):
+    ...         self.denoiser = denoiser
+    ...     def step(self, x, t_cur, t_next):
+    ...         d = (x - self.denoiser(x, t_cur)) / t_cur
+    ...         return x + (t_next - t_cur) * d
+    ...
+    >>> denoiser = lambda x, t: x / (1 + t.view(-1, 1)**2)  # Toy denoiser
+    >>> solver = SimpleEuler(denoiser)
+    >>> isinstance(solver, Solver)
+    True
+    """
+
+    def step(
+        self,
+        x: Float[Tensor, " B *dims"],
+        t_cur: Float[Tensor, " B"],
+        t_next: Float[Tensor, " B"],
+    ) -> Float[Tensor, " B *dims"]:
+        r"""
+        Perform one integration step from ``t_cur`` to ``t_next``.
+
+        Parameters
+        ----------
+        x : Tensor
+            Current noisy latent state :math:`\mathbf{x}_{n}` of shape
+            :math:`(B, *)` where :math:`B` is the batch size.
+        t_cur : Tensor
+            Current diffusion time :math:`t_n` of shape :math:`(B,)`.
+        t_next : Tensor
+            Target diffusion time :math:`t_{n-1}` of shape :math:`(B,)`.
+
+        Returns
+        -------
+        Tensor
+            Updated latent state :math:`\mathbf{x}_{n-1}` at time
+            ``t_next``, same shape as ``x``.
+        """
+        ...
+
+
+class EulerSolver(Solver):
+    r"""
+    First-order Euler solver for diffusion ODEs.
+
+    This is a fast solver with one denoiser evaluation per step, but typically
+    produces lower quality samples compared to higher-order methods.
+
+    Parameters
+    ----------
+    denoiser : Denoiser
+        A callable implementing the
+        :class:`~physicsnemo.diffusion.Denoiser` interface. Here it is
+        expected to return the right hand side of the ODE. Typically obtained
+        via
+        :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.get_denoiser`,
+        but any callable with the correct signature can be used.
+
+    Examples
+    --------
+    >>> import torch
+    >>> from physicsnemo.diffusion.samplers.solvers import EulerSolver
+    >>>
+    >>> denoiser = lambda x, t: x / (1 + t.view(-1, 1, 1, 1)**2)  # Toy denoiser
+    >>> solver = EulerSolver(denoiser)
+    >>> x_t = torch.randn(1, 3, 8, 8)
+    >>> t_cur = torch.tensor([1.0])
+    >>> t_next = torch.tensor([0.5])
+    >>> x_tm1 = solver.step(x_t, t_cur, t_next)
+    >>> x_tm1.shape
+    torch.Size([1, 3, 8, 8])
+    >>> isinstance(solver, Solver)
+    True
+    """
+
+    def __init__(self, denoiser: Denoiser) -> None:
+        self.denoiser = denoiser
+
+    def step(
+        self,
+        x: Float[Tensor, " B *dims"],
+        t_cur: Float[Tensor, " B"],
+        t_next: Float[Tensor, " B"],
+    ) -> Float[Tensor, " B *dims"]:
+        r"""
+        Perform one Euler integration step.
+
+        Parameters
+        ----------
+        x : Tensor
+            Current noisy latent state :math:`\mathbf{x}_{n}` of shape
+            :math:`(B, *)` where :math:`B` is the batch size.
+        t_cur : Tensor
+            Current diffusion time :math:`t_n` of shape :math:`(B,)`.
+        t_next : Tensor
+            Target diffusion time :math:`t_{n-1}` of shape :math:`(B,)`.
+
+        Returns
+        -------
+        Tensor
+            Updated latent state :math:`\mathbf{x}_{n-1}` at time
+            ``t_next``, same shape as ``x``.
+        """
+        # Reshape t for broadcasting: (B,) -> (B, 1, ..., 1)
+        t_cur_bc = t_cur.reshape(-1, *([1] * (x.ndim - 1)))
+        t_next_bc = t_next.reshape(-1, *([1] * (x.ndim - 1)))
+
+        # RHS evaluation and step update
+        d_cur = self.denoiser(x, t_cur)
+        x_next = x + (t_next_bc - t_cur_bc) * d_cur
+
+        return x_next
+
+
+class HeunSolver(Solver):
+    r"""
+    Second-order Heun solver for diffusion ODEs.
+
+    This method requires two denoiser evaluations per step but usually produces
+    higher quality samples than :class:`EulerSolver`.
+
+    Parameters
+    ----------
+    denoiser : Denoiser
+        A callable implementing the
+        :class:`~physicsnemo.diffusion.Denoiser` interface. Here it is
+        expected to return the right hand side of the ODE. Typically obtained
+        via
+        :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.get_denoiser`,
+        but any callable with the correct signature can be used.
+    alpha : float, optional
+        Interpolation parameter for the corrector step, must be in (0, 1].
+        ``alpha=1`` gives the standard Heun method (trapezoidal rule),
+        ``alpha=0.5`` gives the midpoint method. By default 1.
+
+    Examples
+    --------
+    >>> import torch
+    >>> from physicsnemo.diffusion.samplers.solvers import HeunSolver
+    >>>
+    >>> denoiser = lambda x, t: x / (1 + t.view(-1, 1, 1, 1)**2)  # Toy denoiser
+    >>> solver = HeunSolver(denoiser)
+    >>> x_t = torch.randn(1, 3, 8, 8)
+    >>> t_cur = torch.tensor([1.0])
+    >>> t_next = torch.tensor([0.5])
+    >>> x_tm1 = solver.step(x_t, t_cur, t_next)
+    >>> x_tm1.shape
+    torch.Size([1, 3, 8, 8])
+    """
+
+    def __init__(
+        self,
+        denoiser: Denoiser,
+        alpha: float = 1.0,
+    ) -> None:
+        self.denoiser = denoiser
+        if not 0 < alpha <= 1:
+            raise ValueError(f"alpha must be in (0, 1], got {alpha}")
+        self.alpha = alpha
+
+    def step(
+        self,
+        x: Float[Tensor, " B *dims"],
+        t_cur: Float[Tensor, " B"],
+        t_next: Float[Tensor, " B"],
+    ) -> Float[Tensor, " B *dims"]:
+        r"""
+        Perform one Heun integration step.
+
+        Parameters
+        ----------
+        x : Tensor
+            Current noisy latent state :math:`\mathbf{x}_n` of shape
+            :math:`(B, *)` where :math:`B` is the batch size.
+        t_cur : Tensor
+            Current diffusion time :math:`t_n` of shape :math:`(B,)`.
+        t_next : Tensor
+            Target diffusion time :math:`t_{n-1}` of shape :math:`(B,)`.
+
+        Returns
+        -------
+        Tensor
+            Updated latent state :math:`\mathbf{x}_{n-1}` at time
+            ``t_next``, same shape as ``x``.
+        """
+        # Reshape t for broadcasting: (B,) -> (B, 1, ..., 1)
+        t_cur_bc = t_cur.reshape(-1, *([1] * (x.ndim - 1)))
+        t_next_bc = t_next.reshape(-1, *([1] * (x.ndim - 1)))
+
+        h_bc = t_next_bc - t_cur_bc
+
+        # First RHS evaluation
+        d_cur = self.denoiser(x, t_cur)
+
+        # Predictor step to intermediate point
+        t_prime_bc = t_cur_bc + self.alpha * h_bc
+        x_prime = x + self.alpha * h_bc * d_cur
+
+        # Mask for elements where t_next != 0 (need 2nd order correction)
+        # Shape: (B, 1, ..., 1) for broadcasting
+        mask_bc = (t_next_bc != 0).float()
+
+        # Second RHS evaluation (compute everywhere, masked later)
+        # Avoid division by zero in denoiser by using t_cur where t_prime is 0
+        t_prime = t_prime_bc.reshape(x.shape[0])
+        t_prime_safe = torch.where(t_prime == 0, t_cur, t_prime)
+        d_prime = self.denoiser(x_prime, t_prime_safe)
+
+        # Apply 2nd order correction only where t_next != 0
+        # Where t_next == 0, use first-order Euler step
+        w_cur = 1 - 1 / (2 * self.alpha)
+        w_prime = 1 / (2 * self.alpha)
+        x_euler = x + h_bc * d_cur
+        x_heun = x + h_bc * (w_cur * d_cur + w_prime * d_prime)
+        x_next = mask_bc * x_heun + (1 - mask_bc) * x_euler
+
+        return x_next
+
+
+class EDMStochasticEulerSolver(Solver):
+    r"""
+    First-order stochastic Euler sampler from the EDM paper.
+
+    Implements stochastic sampling with configurable noise injection
+    controlled by the "churn" parameters.
+
+    .. important::
+
+        This is **not** a true SDE solver. It performs ad-hoc noise injection
+        ("churn") at each step to improve sample diversity, but the underlying
+        integration is still an ODE step. Therefore, the denoiser should return
+        the right-hand side of the **ODE**, not the SDE.
+
+    By default, noise injection is performed directly in time-step space.
+    For linear-Gaussian noise schedules where diffusion time and noise level
+    are not equal (e.g., VP schedule), provide ``sigma_fn`` and
+    ``sigma_inv_fn`` to apply churn in noise-level space rather than
+    time-step space. Optionally provide ``diffusion_fn`` to control the
+    time-dependent magnitude of the injected noise.
+
+    .. code-block:: python
+
+        def sigma_fn(
+            t: Tensor,  # shape: (B,) or broadcastable
+        ) -> Tensor: ...  # noise level, same shape as t
+
+        def sigma_inv_fn(
+            sigma: Tensor,  # shape: (B,) or broadcastable
+        ) -> Tensor: ...  # diffusion time, same shape as sigma
+
+        def diffusion_fn(
+            x: Tensor,  # shape: (B, *dims)
+            t: Tensor,  # shape: (B,)
+        ) -> Tensor: ...  # g^2(x, t), broadcastable to shape of x
+
+    Parameters
+    ----------
+    denoiser : Denoiser
+        A callable implementing the
+        :class:`~physicsnemo.diffusion.Denoiser` interface. Should
+        return the right-hand side of the **ODE** (not the SDE, since the
+        stochastic noise injection is handled internally by this solver).
+        Typically obtained via
+        :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.get_denoiser`
+        with ``denoising_type="ode"``.
+    S_churn : float, optional
+        Controls the amount of noise added at each step. Higher values add
+        more stochasticity. By default 0 (deterministic), in which case this
+        solver is equivalent to the deterministic :class:`EulerSolver`.
+    S_min : float, optional
+        Minimum diffusion time (or noise level if ``sigma_fn`` and
+        ``sigma_inv_fn`` are provided) for applying churn. By default 0.
+    S_max : float, optional
+        Maximum diffusion time (or noise level if ``sigma_fn`` and
+        ``sigma_inv_fn`` are provided) for applying churn. By default
+        ``float("inf")``.
+    S_noise : float, optional
+        Noise scaling factor. Large values add more noise to the latent state.
+        By default 1.
+    num_steps : int, optional
+        Total number of sampling steps, used to scale churn. By default 18.
+    sigma_fn : Callable[[Tensor], Tensor] | None, optional
+        Maps time to noise level :math:`\sigma(t)`. Useful for linear-Gaussian
+        schedules where :math:`\sigma(t) \neq t`. Typically
+        :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.sigma`.
+        If provided, ``sigma_inv_fn`` must also be provided.
+        By default ``None`` (identity mapping).
+    sigma_inv_fn : Callable[[Tensor], Tensor] | None, optional
+        Maps noise level back to time. Typically
+        :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.sigma_inv`.
+        If provided, ``sigma_fn`` must also be provided.
+        By default ``None`` (identity mapping).
+    diffusion_fn : Callable[[Tensor, Tensor], Tensor] | None, optional
+        Controls the time-dependent magnitude of the injected
+        noise, in addition of the ``S_noise`` scaling factor. Typically the
+        squared diffusion coefficient :math:`g^2(\mathbf{x}, t)` from the
+        reverse SDE, obtained from
+        :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.diffusion`.
+        By default ``None`` (:math:`g^2 = 2t`), which corresponds to an
+        EDM-like noise schedule.
+
+    Note
+    ----
+    Reference: `Elucidating the Design Space of Diffusion-Based
+    Generative Models <https://arxiv.org/abs/2206.00364>`_
+
+    Examples
+    --------
+    Basic usage with default parameters (noise injection in time-step space):
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.samplers.solvers import (
+    ...     EDMStochasticEulerSolver,
+    ... )
+    >>> denoiser = lambda x, t: x / (1 + t.view(-1, 1, 1, 1)**2)  # Toy denoiser
+    >>> solver = EDMStochasticEulerSolver(denoiser, S_churn=40, num_steps=18)
+    >>> x_t = torch.randn(1, 3, 8, 8)
+    >>> t_cur = torch.tensor([1.0])
+    >>> t_next = torch.tensor([0.5])
+    >>> x_tm1 = solver.step(x_t, t_cur, t_next)
+    >>> x_tm1.shape
+    torch.Size([1, 3, 8, 8])
+
+    Using noise scheduler methods for linear-Gaussian schedules where
+    :math:`\sigma(t) \neq t` (e.g., VP schedule). The callbacks map between
+    time and noise level, allowing the churn to be applied in noise-level
+    space before converting back to time-step space:
+
+    >>> from physicsnemo.diffusion.noise_schedulers import VPNoiseScheduler
+    >>> scheduler = VPNoiseScheduler()
+    >>> num_steps = 10
+    >>> solver = EDMStochasticEulerSolver(
+    ...     denoiser,
+    ...     S_churn=40,
+    ...     num_steps=num_steps,
+    ...     sigma_fn=scheduler.sigma,
+    ...     sigma_inv_fn=scheduler.sigma_inv,
+    ...     diffusion_fn=scheduler.diffusion,
+    ... )
+    >>> x_tm1 = solver.step(x_t, t_cur, t_next)
+    >>> x_tm1.shape
+    torch.Size([1, 3, 8, 8])
+    """
+
+    def __init__(
+        self,
+        denoiser: Denoiser,
+        S_churn: float = 0,
+        S_min: float = 0,
+        S_max: float = float("inf"),
+        S_noise: float = 1,
+        num_steps: int = 18,
+        sigma_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
+        | None = None,
+        sigma_inv_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
+        | None = None,
+        diffusion_fn: Callable[
+            [Float[Tensor, " B *dims"], Float[Tensor, " B"]], Float[Tensor, " B *_"]
+        ]
+        | None = None,
+    ) -> None:
+        self.denoiser = denoiser
+        self.S_churn = S_churn
+        self.S_min = S_min
+        self.S_max = S_max
+        self.S_noise = S_noise
+        self.num_steps = num_steps
+        # Validate sigma_fn and sigma_inv_fn
+        if (sigma_fn is None) != (sigma_inv_fn is None):
+            raise ValueError(
+                "sigma_fn and sigma_inv_fn must both be provided or both None."
+            )
+        if sigma_fn is None and sigma_inv_fn is None:
+            self.sigma_fn = lambda t: t
+            self.sigma_inv_fn = lambda sigma: sigma
+            self._use_noise_level_space = False
+        else:
+            self.sigma_fn = sigma_fn
+            self.sigma_inv_fn = sigma_inv_fn
+            self._use_noise_level_space = True
+        if diffusion_fn is None:
+            self.diffusion_fn = lambda x, t: 2 * t.reshape(-1, *([1] * (x.ndim - 1)))
+        else:
+            self.diffusion_fn = diffusion_fn
+
+    def step(
+        self,
+        x: Float[Tensor, " B *dims"],
+        t_cur: Float[Tensor, " B"],
+        t_next: Float[Tensor, " B"],
+    ) -> Float[Tensor, " B *dims"]:
+        r"""
+        Perform one stochastic Euler sampling step.
+
+        Parameters
+        ----------
+        x : Tensor
+            Current noisy latent state :math:`\mathbf{x}_n` of shape
+            :math:`(B, *)` where :math:`B` is the batch size.
+        t_cur : Tensor
+            Current diffusion time :math:`t_n` of shape :math:`(B,)`.
+        t_next : Tensor
+            Target diffusion time :math:`t_{n-1}` of shape :math:`(B,)`.
+
+        Returns
+        -------
+        Tensor
+            Updated latent state :math:`\mathbf{x}_{n-1}` at time
+            ``t_next``, same shape as ``x``.
+        """
+        # Reshape t for broadcasting: (B,) -> (B, 1, ..., 1)
+        t_cur_bc = t_cur.reshape(-1, *([1] * (x.ndim - 1)))
+        t_next_bc = t_next.reshape(-1, *([1] * (x.ndim - 1)))
+
+        gamma_base = min(self.S_churn / self.num_steps, math.sqrt(2) - 1)
+
+        # Compute perturbed time t_hat with increased noise
+        # NOTE: sigma_fn and sigma_inv_fn are identity if not provided (stays
+        # in time-step space). diffusion_fn defaults to g^2 = 2t (EDM-like
+        # noise schedule).
+        sigma_cur_bc = self.sigma_fn(t_cur_bc)
+        # Mask: apply churn only where S_min <= sigma <= S_max
+        churn_mask = (sigma_cur_bc >= self.S_min) & (sigma_cur_bc <= self.S_max)
+        gamma_bc = torch.where(churn_mask, gamma_base, 0.0)
+        sigma_hat_bc = sigma_cur_bc + gamma_bc * sigma_cur_bc
+        t_hat_bc = self.sigma_inv_fn(sigma_hat_bc)
+        # Noise scale: sqrt(sigma_hat^2 - sigma_cur^2) * S_noise * g(x,t) / sqrt(2*t)
+        g_sq_bc = self.diffusion_fn(x, t_cur)
+        safe_t_cur_bc = torch.where(t_cur_bc == 0, torch.ones_like(t_cur_bc), t_cur_bc)
+        noise_scale_bc = (
+            (t_hat_bc**2 - t_cur_bc**2).clamp(min=0).sqrt()
+            * self.S_noise
+            * (g_sq_bc / (2 * safe_t_cur_bc)).sqrt()
+        )
+        noise_scale_bc = torch.where(
+            t_cur_bc == 0, torch.zeros_like(noise_scale_bc), noise_scale_bc
+        )
+
+        # Perturb latent with noise
+        x_hat = x + noise_scale_bc * torch.randn_like(x)
+
+        # Euler step from t_hat to t_next
+        t_hat = t_hat_bc.reshape(x.shape[0])
+        d_cur = self.denoiser(x_hat, t_hat)
+        x_next = x_hat + (t_next_bc - t_hat_bc) * d_cur
+
+        return x_next
+
+
+class EDMStochasticHeunSolver(Solver):
+    r"""
+    Second-order stochastic Heun sampler from the EDM paper.
+
+    Implements stochastic sampling with configurable noise injection
+    controlled by the "churn" parameters, using a second-order Heun
+    correction step.
+
+    .. important::
+
+        This is **not** a true SDE solver. It performs ad-hoc noise injection
+        ("churn") at each step to improve sample diversity, but the underlying
+        integration is still an ODE step. Therefore, the denoiser should return
+        the right-hand side of the **ODE**, not the SDE.
+
+    By default, noise injection is performed directly in time-step space.
+    For linear-Gaussian noise schedules where diffusion time and noise level
+    are not equal (e.g., VP schedule), provide ``sigma_fn`` and
+    ``sigma_inv_fn`` to apply churn in noise-level space rather than
+    time-step space. Optionally provide ``diffusion_fn`` to control the
+    time-dependent magnitude of the injected noise.
+
+    .. code-block:: python
+
+        def sigma_fn(
+            t: Tensor,  # shape: (B,) or broadcastable
+        ) -> Tensor: ...  # noise level, same shape as t
+
+        def sigma_inv_fn(
+            sigma: Tensor,  # shape: (B,) or broadcastable
+        ) -> Tensor: ...  # diffusion time, same shape as sigma
+
+        def diffusion_fn(
+            x: Tensor,  # shape: (B, *dims)
+            t: Tensor,  # shape: (B,)
+        ) -> Tensor: ...  # g^2(x, t), broadcastable to shape of x
+
+    Parameters
+    ----------
+    denoiser : Denoiser
+        A callable implementing the
+        :class:`~physicsnemo.diffusion.Denoiser` interface. Should
+        return the right-hand side of the **ODE** (not the SDE, since the
+        stochastic noise injection is handled internally by this solver).
+        Typically obtained via
+        :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.get_denoiser`
+        with ``denoising_type="ode"``.
+    alpha : float, optional
+        Interpolation parameter for the corrector step, must be in (0, 1].
+        ``alpha=1`` gives the standard Heun method (trapezoidal rule),
+        ``alpha=0.5`` gives the midpoint method. By default 1.
+    S_churn : float, optional
+        Controls the amount of noise added at each step. Higher values add
+        more stochasticity. By default 0 (deterministic), in which case this
+        solver is equivalent to the deterministic :class:`HeunSolver`.
+    S_min : float, optional
+        Minimum diffusion time (or noise level if ``sigma_fn`` and
+        ``sigma_inv_fn`` are provided) for applying churn. By default 0.
+    S_max : float, optional
+        Maximum diffusion time (or noise level if ``sigma_fn`` and
+        ``sigma_inv_fn`` are provided) for applying churn. By default
+        ``float("inf")``.
+    S_noise : float, optional
+        Noise scaling factor. Large values add more noise to the latent state.
+        By default 1.
+    num_steps : int, optional
+        Total number of sampling steps, used to scale churn. By default 18.
+    sigma_fn : Callable[[Tensor], Tensor] | None, optional
+        Maps time to noise level :math:`\sigma(t)`. Useful for linear-Gaussian
+        schedules where :math:`\sigma(t) \neq t`. Typically
+        :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.sigma`.
+        If provided, ``sigma_inv_fn`` must also be provided.
+        By default ``None`` (identity mapping).
+    sigma_inv_fn : Callable[[Tensor], Tensor] | None, optional
+        Maps noise level back to time. Typically
+        :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.sigma_inv`.
+        If provided, ``sigma_fn`` must also be provided.
+        By default ``None`` (identity mapping).
+    diffusion_fn : Callable[[Tensor, Tensor], Tensor] | None, optional
+        Controls the time-dependent magnitude of the injected
+        noise, in addition of the ``S_noise`` scaling factor. Typically the
+        squared diffusion coefficient :math:`g^2(\mathbf{x}, t)` from the
+        reverse SDE, obtained from
+        :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.diffusion`.
+        By default ``None`` (:math:`g^2 = 2t`), which corresponds to an
+        EDM-like noise schedule.
+
+    Note
+    ----
+    Reference: `Elucidating the Design Space of Diffusion-Based
+    Generative Models <https://arxiv.org/abs/2206.00364>`_
+
+    Examples
+    --------
+    Basic usage with default parameters (noise injection in time-step space):
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.samplers.solvers import (
+    ...     EDMStochasticHeunSolver,
+    ... )
+    >>> denoiser = lambda x, t: x / (1 + t.view(-1, 1, 1, 1)**2)  # Toy denoiser
+    >>> solver = EDMStochasticHeunSolver(denoiser, S_churn=40, num_steps=18)
+    >>> x_t = torch.randn(1, 3, 8, 8)
+    >>> t_cur = torch.tensor([1.0])
+    >>> t_next = torch.tensor([0.5])
+    >>> x_tm1 = solver.step(x_t, t_cur, t_next)
+    >>> x_tm1.shape
+    torch.Size([1, 3, 8, 8])
+
+    Using noise scheduler methods for linear-Gaussian schedules where
+    :math:`\sigma(t) \neq t` (e.g., VP schedule). The callbacks map between
+    time and noise level, allowing the churn to be applied in noise-level
+    space before converting back to time-step space:
+
+    >>> from physicsnemo.diffusion.noise_schedulers import VPNoiseScheduler
+    >>> scheduler = VPNoiseScheduler()
+    >>> num_steps = 10
+    >>> solver = EDMStochasticHeunSolver(
+    ...     denoiser,
+    ...     S_churn=40,
+    ...     num_steps=num_steps,
+    ...     sigma_fn=scheduler.sigma,
+    ...     sigma_inv_fn=scheduler.sigma_inv,
+    ...     diffusion_fn=scheduler.diffusion,
+    ... )
+    >>> x_tm1 = solver.step(x_t, t_cur, t_next)
+    >>> x_tm1.shape
+    torch.Size([1, 3, 8, 8])
+    """
+
+    def __init__(
+        self,
+        denoiser: Denoiser,
+        alpha: float = 1.0,
+        S_churn: float = 0,
+        S_min: float = 0,
+        S_max: float = float("inf"),
+        S_noise: float = 1,
+        num_steps: int = 18,
+        sigma_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
+        | None = None,
+        sigma_inv_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
+        | None = None,
+        diffusion_fn: Callable[
+            [Float[Tensor, " B *dims"], Float[Tensor, " B"]], Float[Tensor, " B *_"]
+        ]
+        | None = None,
+    ) -> None:
+        self.denoiser = denoiser
+        if not 0 < alpha <= 1:
+            raise ValueError(f"alpha must be in (0, 1], got {alpha}")
+        self.alpha = alpha
+        self.S_churn = S_churn
+        self.S_min = S_min
+        self.S_max = S_max
+        self.S_noise = S_noise
+        self.num_steps = num_steps
+        # Validate sigma_fn and sigma_inv_fn
+        if (sigma_fn is None) != (sigma_inv_fn is None):
+            raise ValueError(
+                "sigma_fn and sigma_inv_fn must both be provided or both None."
+            )
+        if sigma_fn is None and sigma_inv_fn is None:
+            self.sigma_fn = lambda t: t
+            self.sigma_inv_fn = lambda sigma: sigma
+            self._use_noise_level_space = False
+        else:
+            self.sigma_fn = sigma_fn
+            self.sigma_inv_fn = sigma_inv_fn
+            self._use_noise_level_space = True
+        if diffusion_fn is None:
+            self.diffusion_fn = lambda x, t: 2 * t.reshape(-1, *([1] * (x.ndim - 1)))
+        else:
+            self.diffusion_fn = diffusion_fn
+
+    def step(
+        self,
+        x: Float[Tensor, " B *dims"],
+        t_cur: Float[Tensor, " B"],
+        t_next: Float[Tensor, " B"],
+    ) -> Float[Tensor, " B *dims"]:
+        r"""
+        Perform one stochastic Heun sampling step.
+
+        Parameters
+        ----------
+        x : Tensor
+            Current noisy latent state :math:`\mathbf{x}_n` of shape
+            :math:`(B, *)` where :math:`B` is the batch size.
+        t_cur : Tensor
+            Current diffusion time :math:`t_n` of shape :math:`(B,)`.
+        t_next : Tensor
+            Target diffusion time :math:`t_{n-1}` of shape :math:`(B,)`.
+
+        Returns
+        -------
+        Tensor
+            Updated latent state :math:`\mathbf{x}_{n-1}` at time
+            ``t_next``, same shape as ``x``.
+        """
+        # Reshape t for broadcasting: (B,) -> (B, 1, ..., 1)
+        t_cur_bc = t_cur.reshape(-1, *([1] * (x.ndim - 1)))
+        t_next_bc = t_next.reshape(-1, *([1] * (x.ndim - 1)))
+
+        gamma_base = min(self.S_churn / self.num_steps, math.sqrt(2) - 1)
+
+        # Compute perturbed time t_hat with increased noise
+        # NOTE: sigma_fn and sigma_inv_fn are identity if not provided (stays
+        # in time-step space). diffusion_fn defaults to g^2 = 2t (EDM-like
+        # noise schedule).
+        sigma_cur_bc = self.sigma_fn(t_cur_bc)
+        # Mask: apply churn only where S_min <= sigma <= S_max
+        churn_mask = (sigma_cur_bc >= self.S_min) & (sigma_cur_bc <= self.S_max)
+        gamma_bc = torch.where(churn_mask, gamma_base, 0.0)
+        sigma_hat_bc = sigma_cur_bc + gamma_bc * sigma_cur_bc
+        t_hat_bc = self.sigma_inv_fn(sigma_hat_bc)
+        # Noise scale: sqrt(sigma_hat^2 - sigma_cur^2) * S_noise * g(x,t) / sqrt(2*t)
+        g_sq_bc = self.diffusion_fn(x, t_cur)
+        safe_t_cur_bc = torch.where(t_cur_bc == 0, torch.ones_like(t_cur_bc), t_cur_bc)
+        noise_scale_bc = (
+            (sigma_hat_bc**2 - sigma_cur_bc**2).clamp(min=0).sqrt()
+            * self.S_noise
+            * (g_sq_bc / (2 * safe_t_cur_bc)).sqrt()
+        )
+        noise_scale_bc = torch.where(
+            t_cur_bc == 0, torch.zeros_like(noise_scale_bc), noise_scale_bc
+        )
+
+        # Perturb latent with noise
+        x_hat = x + noise_scale_bc * torch.randn_like(x)
+
+        # Euler step from t_hat to intermediate point (predictor)
+        t_hat = t_hat_bc.reshape(x.shape[0])
+        h_bc = t_next_bc - t_hat_bc
+        d_cur = self.denoiser(x_hat, t_hat)
+        t_prime_bc = t_hat_bc + self.alpha * h_bc
+        x_prime = x_hat + self.alpha * h_bc * d_cur
+
+        # Mask for elements where t_next != 0 (need 2nd order correction)
+        mask_bc = (t_next_bc != 0).float()
+
+        # Second RHS evaluation (compute everywhere, masked later)
+        t_prime = t_prime_bc.reshape(x.shape[0])
+        # Avoid issues by using t_hat where t_prime would be 0
+        t_prime_safe = torch.where(t_prime == 0, t_hat, t_prime)
+        d_prime = self.denoiser(x_prime, t_prime_safe)
+
+        # Apply 2nd order correction only where t_next != 0
+        w_cur = 1 - 1 / (2 * self.alpha)
+        w_prime = 1 / (2 * self.alpha)
+        x_euler = x_hat + h_bc * d_cur
+        x_heun = x_hat + h_bc * (w_cur * d_cur + w_prime * d_prime)
+        x_next = mask_bc * x_heun + (1 - mask_bc) * x_euler
+
+        return x_next

From c5973f2770b5a31beca2cc1d0af035de8d61bcff Mon Sep 17 00:00:00 2001
From: Charlelie Laurent <claurent@nvidia.com>
Date: Tue, 17 Feb 2026 18:44:37 -0800
Subject: [PATCH 12/14] Reformatted dps_guidance.py with Denoiser and Predictor
 protocols

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
---
 .../diffusion/guidance/dps_guidance.py        | 106 +++++++++---------
 1 file changed, 53 insertions(+), 53 deletions(-)

diff --git a/physicsnemo/diffusion/guidance/dps_guidance.py b/physicsnemo/diffusion/guidance/dps_guidance.py
index 0de3be9384..95ed26139b 100644
--- a/physicsnemo/diffusion/guidance/dps_guidance.py
+++ b/physicsnemo/diffusion/guidance/dps_guidance.py
@@ -22,7 +22,7 @@
 from jaxtyping import Bool, Float
 from torch import Tensor
 
-from physicsnemo.diffusion.base import DiffusionDenoiser
+from physicsnemo.diffusion.base import Denoiser, Predictor
 
 
 @runtime_checkable
@@ -33,8 +33,8 @@ class DPSGuidance(Protocol):
 
     A DPS guidance is a callable that computes a guidance term to steer the
     diffusion sampling process toward satisfying some observation constraint.
-    A ``DPSGuidance`` is expected to be a score-predictor, as it returns a quantity
-    analogous to a score.
+    It returns a quantity analogous to a likelihood score, which is typically
+    added to the unconditional score during sampling.
 
     The typical form is:
 
@@ -49,13 +49,13 @@ class DPSGuidance(Protocol):
     a quantity similar to a score (e.g., a likelihood score).
 
     This is the minimal interface for guidance, and any object that implements
-    this interface can be used with diffusion utilities such as
-    :class:`DPSDenoiser` or
-    :meth:`~physicsnemo.diffusion.noise_schedulers.get_denoiser`.
+    this interface can be used with :class:`DPSDenoiser` to build a guided
+    score-predictor, which implements the
+    :class:`~physicsnemo.diffusion.Predictor` interface.
 
     See Also
     --------
-    :class:`DPSDenoiser` : Combines a denoiser with one or more guidances.
+    :class:`DPSDenoiser` : Combines an x0-predictor with one or more guidances.
 
     Examples
     --------
@@ -117,8 +117,8 @@ class DPSGuidance(Protocol):
     ...     # Step 4: Sum and return
     ...     return score + guidance_term
     ...
-    >>> # guided_denoiser is now a DiffusionDenoiser (score predictor),
-    >>> # and can be used with any sampling utility that expects this interface
+    >>> # guided_denoiser is now a Predictor (score predictor); pass it to
+    >>> # scheduler.get_denoiser(score_predictor=...) to obtain a Denoiser
     >>> x = torch.randn(1, 3, 8, 8)
     >>> t = torch.tensor([1.0])
     >>> output = guided_denoiser(x, t)
@@ -126,7 +126,7 @@ class DPSGuidance(Protocol):
     torch.Size([1, 3, 8, 8])
 
     Note: :class:`DPSDenoiser` provides a convenient way to apply one or more
-    guidances to a denoiser without manually implementing the above pattern.
+    guidances to an x0-predictor without manually implementing the above pattern.
     """
 
     def __call__(
@@ -161,15 +161,16 @@ def __call__(
         ...
 
 
-class DPSDenoiser(DiffusionDenoiser):
+class DPSDenoiser(Denoiser):
     r"""
-    Denoiser that combines an x0-predictor with DPS-style guidance.
+    Score predictor that combines an x0-predictor with DPS-style guidance.
 
-    This class transforms a :class:`~physicsnemo.diffusion.DiffusionDenoiser`
-    (specifically, an **x0-predictor**) into another
-    :class:`~physicsnemo.diffusion.DiffusionDenoiser` (a **score predictor**)
-    by applying one or more DPS guidances. The resulting denoiser can be used
-    directly with ODE/SDE solvers and sampling utilities.
+    This class transforms a :class:`~physicsnemo.diffusion.Predictor`
+    (specifically, an **x0-predictor**) into a score-predictor
+    (with the :class:`~physicsnemo.diffusion.Denoiser` interface) by applying one or more DPS
+    guidances. The resulting score-predictor can be passed to
+    :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.get_denoiser`
+    to obtain a :class:`~physicsnemo.diffusion.Denoiser` for sampling.
 
     The output is the sum of the unconditional score (derived from the
     x0-prediction) and all guidance terms:
@@ -186,19 +187,20 @@ class DPSDenoiser(DiffusionDenoiser):
 
     .. code-block:: python
 
-        def guidance(x: Tensor, t: Tensor, x_0: Tensor) -> Tensor:
-            # x: noisy latent state at time t, shape (B, *)
-            # t: diffusion time, shape (B,)
-            # x_0: estimated clean state, shape (B, *)
-            # returns: guidance term, shape (B, *)
-            ...
+        def guidance(
+            x: Tensor,    # shape: (B, *dims)
+            t: Tensor,    # shape: (B,)
+            x_0: Tensor,  # shape: (B, *dims)
+        ) -> Tensor: ...  # guidance term, shape: (B, *dims)
 
     Parameters
     ----------
-    denoiser_in : DiffusionDenoiser
-        Input denoiser that takes ``(x, t)`` and returns an estimate of the
-        clean data :math:`\hat{\mathbf{x}}_0`. This is typically an x0-predictor
-        obtained from a trained diffusion model.
+    x0_predictor : Predictor
+        A :class:`~physicsnemo.diffusion.Predictor` that takes ``(x, t)``
+        and returns an estimate of the clean data
+        :math:`\hat{\mathbf{x}}_0`. Typically obtained from a trained
+        :class:`~physicsnemo.diffusion.DiffusionModel` via
+        ``functools.partial``.
     x0_to_score_fn : Callable[[Tensor, Tensor, Tensor], Tensor]
         Callback to convert x0-prediction to score. Signature:
         ``x0_to_score_fn(x_0, x, t) -> score``. Typically obtained from a noise
@@ -211,8 +213,9 @@ def guidance(x: Tensor, t: Tensor, x_0: Tensor) -> Tensor:
     See Also
     --------
     :class:`DPSGuidance` : Protocol for guidance implementations.
-    :func:`~physicsnemo.diffusion.samplers.sample` : Sampling function that
-        uses denoisers.
+    :class:`~physicsnemo.diffusion.Predictor` : Protocol satisfied by this class.
+    :meth:`~physicsnemo.diffusion.noise_schedulers.NoiseScheduler.get_denoiser` :
+        Converts the score-predictor to a denoiser for sampling.
 
     Examples
     --------
@@ -242,9 +245,9 @@ def guidance(x: Tensor, t: Tensor, x_0: Tensor) -> Tensor:
     >>> y_obs = torch.randn(1, 3, 8, 8)
     >>> guidance = InpaintGuidance(mask, y_obs)
     >>>
-    >>> # Create DPS denoiser
+    >>> # Create DPS score predictor
     >>> dps_denoiser = DPSDenoiser(
-    ...     denoiser_in=x0_predictor,
+    ...     x0_predictor=x0_predictor,
     ...     x0_to_score_fn=x0_to_score_fn,
     ...     guidances=guidance,
     ... )
@@ -289,7 +292,7 @@ def guidance(x: Tensor, t: Tensor, x_0: Tensor) -> Tensor:
     >>>
     >>> # Combine multiple guidances
     >>> dps_denoiser = DPSDenoiser(
-    ...     denoiser_in=x0_predictor,
+    ...     x0_predictor=x0_predictor,
     ...     x0_to_score_fn=scheduler.x0_to_score,
     ...     guidances=[guidance1, guidance2],
     ... )
@@ -303,14 +306,14 @@ def guidance(x: Tensor, t: Tensor, x_0: Tensor) -> Tensor:
 
     def __init__(
         self,
-        denoiser_in: DiffusionDenoiser,
+        x0_predictor: Predictor,
         x0_to_score_fn: Callable[
             [Float[Tensor, " B *dims"], Float[Tensor, " B *dims"], Float[Tensor, " B"]],
             Float[Tensor, " B *dims"],
         ],
         guidances: DPSGuidance | Sequence[DPSGuidance],
     ) -> None:
-        self.denoiser_in = denoiser_in
+        self.x0_predictor = x0_predictor
         self.x0_to_score_fn = x0_to_score_fn
         # Normalize guidances to a list
         if isinstance(guidances, Sequence) and not isinstance(guidances, str):
@@ -342,7 +345,7 @@ def __call__(
         x = x.detach().requires_grad_(True)
 
         with torch.enable_grad():
-            x_0 = self.denoiser_in(x, t)
+            x_0 = self.x0_predictor(x, t)
             guidance_sum = torch.zeros_like(x)
             for guidance in self.guidances:
                 guidance_sum += guidance(x, t, x_0)
@@ -379,20 +382,18 @@ class ModelConsistencyDPSGuidance(DPSGuidance):
 
     .. code-block:: python
 
-        def observation_operator(x_0: Tensor) -> Tensor:
-            # x_0: estimated clean state, shape (B, *)
-            # returns: predicted observations, same shape (B, *obs_dims) as y
-            ...
+        def observation_operator(
+            x_0: Tensor,  # shape: (B, *dims)
+        ) -> Tensor: ...  # predicted observations, shape: (B, *obs_dims)
 
     When ``norm`` is a callable, it must have the following signature:
 
     .. code-block:: python
 
         def norm(
-            y_pred,  # Shape: (B, *obs_dims)
-            y_true,  # Shape: (B, *obs_dims)
-        ) -> Tensor:  # Scalar loss per batch element, shape: (B,)
-            ...
+            y_pred: Tensor,  # shape: (B, *obs_dims)
+            y_true: Tensor,  # shape: (B, *obs_dims)
+        ) -> Tensor: ...    # scalar loss per batch element, shape: (B,)
 
     Parameters
     ----------
@@ -438,7 +439,7 @@ def norm(
     --------
     :class:`DataConsistencyDPSGuidance` : Simplified guidance for masked
         observations.
-    :class:`DPSDenoiser` : Combines a denoiser with one or more guidances.
+    :class:`DPSDenoiser` : Combines an x0-predictor with one or more guidances.
 
     Examples
     --------
@@ -485,7 +486,7 @@ def norm(
     ...     return (x_0 - x) / (t_bc ** 2)
     ...
     >>> dps_denoiser = DPSDenoiser(
-    ...     denoiser_in=x0_predictor,
+    ...     x0_predictor=x0_predictor,
     ...     x0_to_score_fn=x0_to_score_fn,
     ...     guidances=guidance,
     ... )
@@ -528,7 +529,7 @@ def norm(
     >>> # Use with DPSDenoiser and scheduler's x0_to_score
     >>> x0_predictor = lambda x, t: x * 0.9
     >>> dps_denoiser = DPSDenoiser(
-    ...     denoiser_in=x0_predictor,
+    ...     x0_predictor=x0_predictor,
     ...     x0_to_score_fn=scheduler.x0_to_score,
     ...     guidances=guidance,
     ... )
@@ -685,10 +686,9 @@ class DataConsistencyDPSGuidance(DPSGuidance):
     .. code-block:: python
 
         def norm(
-            y_pred,  # Shape: (B, *obs_dims)
-            y_true,  # Shape: (B, *obs_dims)
-        ) -> Tensor:  # Scalar loss per batch element, shape: (B,)
-            ...
+            y_pred: Tensor,  # shape: (B, *obs_dims)
+            y_true: Tensor,  # shape: (B, *obs_dims)
+        ) -> Tensor: ...    # scalar loss per batch element, shape: (B,)
 
     Parameters
     ----------
@@ -733,7 +733,7 @@ def norm(
     --------
     :class:`ModelConsistencyDPSGuidance` : Guidance for general observation
         operators.
-    :class:`DPSDenoiser` : Combines a denoiser with one or more guidances.
+    :class:`DPSDenoiser` : Combines an x0-predictor with one or more guidances.
 
     Examples
     --------
@@ -772,7 +772,7 @@ def norm(
     ...     return (x_0 - x) / (t_bc ** 2)
     ...
     >>> dps_denoiser = DPSDenoiser(
-    ...     denoiser_in=x0_predictor,
+    ...     x0_predictor=x0_predictor,
     ...     x0_to_score_fn=x0_to_score_fn,
     ...     guidances=guidance,
     ... )
@@ -819,7 +819,7 @@ def norm(
     >>> # Use with DPSDenoiser and scheduler's x0_to_score
     >>> x0_predictor = lambda x, t: x * 0.9
     >>> dps_denoiser = DPSDenoiser(
-    ...     denoiser_in=x0_predictor,
+    ...     x0_predictor=x0_predictor,
     ...     x0_to_score_fn=scheduler.x0_to_score,
     ...     guidances=guidance,
     ... )

From 0d3e2ed978312dc785983beece182c699f894bc2 Mon Sep 17 00:00:00 2001
From: Charlelie Laurent <claurent@nvidia.com>
Date: Tue, 17 Feb 2026 19:13:21 -0800
Subject: [PATCH 13/14] Added retain_graph option in dps_guidance.py to be able
 to chain multiple guidances into a single denoiser

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
---
 .../diffusion/guidance/dps_guidance.py        | 80 ++++++++++++++++++-
 1 file changed, 78 insertions(+), 2 deletions(-)

diff --git a/physicsnemo/diffusion/guidance/dps_guidance.py b/physicsnemo/diffusion/guidance/dps_guidance.py
index 95ed26139b..ef970caaf2 100644
--- a/physicsnemo/diffusion/guidance/dps_guidance.py
+++ b/physicsnemo/diffusion/guidance/dps_guidance.py
@@ -193,6 +193,16 @@ def guidance(
             x_0: Tensor,  # shape: (B, *dims)
         ) -> Tensor: ...  # guidance term, shape: (B, *dims)
 
+    .. important::
+
+        When using **multiple guidances** that internally call
+        ``torch.autograd.grad`` (e.g., :class:`ModelConsistencyDPSGuidance`
+        or :class:`DataConsistencyDPSGuidance`), each guidance except the last
+        must be constructed with ``retain_graph=True``. Otherwise the
+        computational graph is destroyed after the first guidance computes its
+        gradient and subsequent guidances will fail. With a **single guidance**
+        this is not needed.
+
     Parameters
     ----------
     x0_predictor : Predictor
@@ -302,6 +312,44 @@ def guidance(
     >>> output = dps_denoiser(x, t)
     >>> output.shape
     torch.Size([2, 3, 8, 8])
+
+    **Example 3:** Multiple autograd-based guidances require
+    ``retain_graph=True`` on all but the last:
+
+    >>> import torch
+    >>> from physicsnemo.diffusion.guidance import (
+    ...     DPSDenoiser,
+    ...     DataConsistencyDPSGuidance,
+    ... )
+    >>> from physicsnemo.diffusion.noise_schedulers import EDMNoiseScheduler
+    >>>
+    >>> scheduler = EDMNoiseScheduler()
+    >>> x0_predictor = lambda x, t: x * 0.9
+    >>>
+    >>> mask1 = torch.zeros(1, 3, 8, 8, dtype=torch.bool)
+    >>> mask1[:, :, 2, 3] = True
+    >>> mask2 = torch.zeros(1, 3, 8, 8, dtype=torch.bool)
+    >>> mask2[:, :, 5, 6] = True
+    >>> y_obs = torch.randn(1, 3, 8, 8)
+    >>>
+    >>> # First guidance retains the graph for the second one
+    >>> g1 = DataConsistencyDPSGuidance(
+    ...     mask=mask1, y=y_obs, std_y=0.1, retain_graph=True,
+    ... )
+    >>> # Last guidance does not need retain_graph
+    >>> g2 = DataConsistencyDPSGuidance(
+    ...     mask=mask2, y=y_obs, std_y=0.1,
+    ... )
+    >>>
+    >>> dps = DPSDenoiser(
+    ...     x0_predictor=x0_predictor,
+    ...     x0_to_score_fn=scheduler.x0_to_score,
+    ...     guidances=[g1, g2],
+    ... )
+    >>> x = torch.randn(1, 3, 8, 8)
+    >>> t = torch.tensor([1.0])
+    >>> dps(x, t).shape
+    torch.Size([1, 3, 8, 8])
     """
 
     def __init__(
@@ -426,6 +474,15 @@ def norm(
         obtained from a noise scheduler, e.g.,
         :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.alpha`
         for a linear-Gaussian noise schedule.
+    retain_graph : bool, default=False
+        If ``True``, the computational graph is retained after computing
+        gradients. Required when combining multiple autograd-based guidances
+        in a single :class:`DPSDenoiser` — all guidances except the last
+        must set this to ``True``.
+    create_graph : bool, default=False
+        If ``True``, a graph of the derivative is constructed, allowing
+        higher-order derivatives (e.g., differentiating through the entire
+        sampling process).
 
     Note
     ----
@@ -583,6 +640,8 @@ def __init__(
         | None = None,
         alpha_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
         | None = None,
+        retain_graph: bool = False,
+        create_graph: bool = False,
     ) -> None:
         if gamma > 0 and sigma_fn is None:
             raise ValueError("sigma_fn must be provided when gamma > 0")
@@ -597,6 +656,8 @@ def __init__(
         self.alpha_fn = (
             alpha_fn if alpha_fn is not None else lambda t: torch.ones_like(t)
         )
+        self.retain_graph = retain_graph
+        self.create_graph = create_graph
 
     def __call__(
         self,
@@ -644,7 +705,8 @@ def __call__(
             grad_x = torch.autograd.grad(
                 outputs=loss.sum(),
                 inputs=x,
-                create_graph=False,
+                retain_graph=self.retain_graph,
+                create_graph=self.create_graph,
             )[0]
 
         # Compute scaling factor
@@ -720,6 +782,15 @@ def norm(
         Optional; defaults to :math:`\alpha(t) = 1` if not provided. For example, use
         :meth:`~physicsnemo.diffusion.noise_schedulers.LinearGaussianNoiseScheduler.alpha`
         for a linear-Gaussian noise schedule.
+    retain_graph : bool, default=False
+        If ``True``, the computational graph is retained after computing
+        gradients. Required when combining multiple autograd-based guidances
+        in a single :class:`DPSDenoiser` — all guidances except the last
+        must set this to ``True``.
+    create_graph : bool, default=False
+        If ``True``, a graph of the derivative is constructed, allowing
+        higher-order derivatives (e.g., differentiating through the entire
+        sampling process).
 
     Note
     ----
@@ -873,6 +944,8 @@ def __init__(
         | None = None,
         alpha_fn: Callable[[Float[Tensor, " *shape"]], Float[Tensor, " *shape"]]
         | None = None,
+        retain_graph: bool = False,
+        create_graph: bool = False,
     ) -> None:
         if gamma > 0 and sigma_fn is None:
             raise ValueError("sigma_fn must be provided when gamma > 0")
@@ -887,6 +960,8 @@ def __init__(
         self.alpha_fn = (
             alpha_fn if alpha_fn is not None else lambda t: torch.ones_like(t)
         )
+        self.retain_graph = retain_graph
+        self.create_graph = create_graph
 
     def __call__(
         self,
@@ -936,7 +1011,8 @@ def __call__(
             grad_x = torch.autograd.grad(
                 outputs=loss.sum(),
                 inputs=x,
-                create_graph=False,
+                retain_graph=self.retain_graph,
+                create_graph=self.create_graph,
             )[0]
 
         # Compute scaling factor

From 1afbfe438d8edab46059ada51d33f421cccb1b12 Mon Sep 17 00:00:00 2001
From: Charlelie Laurent <claurent@nvidia.com>
Date: Tue, 17 Feb 2026 19:22:55 -0800
Subject: [PATCH 14/14] Minor variable renaming in noise_schedulers.py

Signed-off-by: Charlelie Laurent <claurent@nvidia.com>
---
 .../diffusion/noise_schedulers/noise_schedulers.py   | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/physicsnemo/diffusion/noise_schedulers/noise_schedulers.py b/physicsnemo/diffusion/noise_schedulers/noise_schedulers.py
index d553635816..932d484729 100644
--- a/physicsnemo/diffusion/noise_schedulers/noise_schedulers.py
+++ b/physicsnemo/diffusion/noise_schedulers/noise_schedulers.py
@@ -587,11 +587,11 @@ def diffusion(
         sigma_dot_t_bc = self.sigma_dot(t_bc)
         alpha_t_bc = self.alpha(t_bc)
         alpha_dot_t_bc = self.alpha_dot(t_bc)
-        g_sq = (
+        g_sq_bc = (
             2 * sigma_dot_t_bc * sigma_t_bc
             - 2 * (alpha_dot_t_bc / alpha_t_bc) * sigma_t_bc**2
         )
-        return g_sq
+        return g_sq_bc
 
     def x0_to_score(
         self,
@@ -765,8 +765,8 @@ def ode_denoiser(
             ) -> Float[Tensor, " B *dims"]:
                 score = score_fn(x, t)
                 f = drift(x, t)
-                g_sq = diffusion(x, t)
-                dx_dt = f - 0.5 * g_sq * score
+                g_sq_bc = diffusion(x, t)
+                dx_dt = f - 0.5 * g_sq_bc * score
                 return dx_dt
 
             return ode_denoiser
@@ -779,10 +779,10 @@ def sde_denoiser(
             ) -> Float[Tensor, " B *dims"]:
                 score = score_fn(x, t)
                 f = drift(x, t)
-                g_sq = diffusion(x, t)
+                g_sq_bc = diffusion(x, t)
                 # Deterministic part of the SDE drift
                 # Note: stochastic term g(t)*dW is handled by the solver
-                dx_dt = f - g_sq * score
+                dx_dt = f - g_sq_bc * score
                 return dx_dt
 
             return sde_denoiser