ByteDance-Seed · eminmeydanoglu · Nov 14, 2025 · Nov 14, 2025 · Nov 14, 2025 · Nov 14, 2025
diff --git a/README.md b/README.md
@@ -40,8 +40,7 @@ All models are trained exclusively on **public academic datasets**.
 
 
 ## 📰 News
-- **25-11-2025:** Add [Awesome DA3 Projects](#-awesome-da3-projects), a community-driven section featuring DA3-based applications.
-- **14-11-2025:** Paper, project page, code and models are all released.
+- **2025-11-14:** 🎉 Paper, project page, code and models are all released.
 
 ## ✨ Highlights
 
@@ -83,7 +82,7 @@ We introduce a new benchmark to rigorously evaluate geometry prediction models o
 ### 📦 Installation
 
 ```bash
-pip install xformers torch\>=2 torchvision
+pip install torch\>=2 torchvision
 pip install -e . # Basic
 pip install --no-build-isolation git+https://github.com/nerfstudio-project/gsplat.git@0b4dddf04cb687367602c01196913cde6a743d70 # for gaussian head
 pip install -e ".[app]" # Gradio, python>=3.10
@@ -224,38 +223,9 @@ The Nested series uses an Any-view model to estimate pose and depth, and a monoc
 
 ## ❓ FAQ
 
-- **Monocular Metric Depth**: To obtain metric depth in meters from `DA3METRIC-LARGE`, use `metric_depth = focal * net_output / 300.`, where `focal` is the focal length in pixels (typically the average of fx and fy from the camera intrinsic matrix K). Note that the output from `DA3NESTED-GIANT-LARGE` is already in meters.
-
-- **Ray Head**:  Our API and CLI support `use_ray_head` arg, which means that the model will derive camera pose from ray head, which is generally slightly slower, but more accurate. Note that the default is `False` for faster inference speed. 
-  <details>
-  <summary>AUC3 Results for DA3NESTED-GIANT-LARGE</summary>
-
-  | Model | HiRoom | ETH3D | DTU | 7Scenes | ScanNet++ | 
-  |-------|------|-------|-----|---------|-----------|
-  | `ray_head` | 84.4 | 52.6 | 93.9 | 29.5 | 89.4 |
-  | `cam_head` | 80.3 | 48.4 | 94.1 | 28.5 | 85.0 |
-
-  </details>
-
-
-
-
 - **Older GPUs without XFormers support**: See [Issue #11](https://github.com/ByteDance-Seed/Depth-Anything-3/issues/11). Thanks to [@S-Mahoney](https://github.com/S-Mahoney) for the solution!
 
 
-## 🏢 Awesome DA3 Projects
-
-A community-curated list of Depth Anything 3 integrations across 3D tools, creative pipelines, robotics, and web/VR viewers, including but not limited to these. You are welcome to submit your DA3-based project via PR, and we will review and feature it if applicable.
-
-- [DA3-blender](https://github.com/xy-gao/DA3-blender): Blender addon for DA3-based 3D reconstruction from a set of images. 
-
-- [ComfyUI-DepthAnythingV3](https://github.com/PozzettiAndrea/ComfyUI-DepthAnythingV3): ComfyUI nodes for Depth Anything 3, supporting single/multi-view and video-consistent depth with optional point‑cloud export.
-
-- [DA3-ROS2-Wrapper](https://github.com/GerdsenAI/GerdsenAI-Depth-Anything-3-ROS2-Wrapper): Real-time DA3 depth in ROS2 with multi-camera support. 
-
-- [VideoDepthViewer3D](https://github.com/amariichi/VideoDepthViewer3D): Streaming videos with DA3 metric depth to a Three.js/WebXR 3D viewer for VR/stereo playback.
-
-
 ## 📝 Citations
 If you find Depth Anything 3 useful in your research or projects, please cite our work:
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ name = "depth-anything-3"
 version = "0.0.0"
 description = "Depth Anything 3"
 readme = "README.md"
-requires-python = ">=3.9, <=3.13"
+requires-python = ">=3.10, <=3.13"
 license = { text = "Apache-2.0" }
 authors = [{ name = "Your Name" }]
 
@@ -39,6 +39,7 @@ dependencies = [
     "moviepy==1.0.3",
     "typer>=0.9.0",
     "pycolmap",
+    "pyzmq>=27.1.0",
 ]
 
 [project.optional-dependencies]
@@ -69,6 +70,9 @@ include = [
 [tool.hatch.metadata]
 allow-direct-references = true
 
+[tool.uv]
+extra-build-dependencies = { gsplat = ["torch"] }
+
 [tool.mypy]
 plugins = ["jaxtyping.mypy_plugin"]
 

diff --git a/src/depth_anything_3/api.py b/src/depth_anything_3/api.py
@@ -344,13 +344,22 @@ def _align_to_input_extrinsics_intrinsics(
         if extrinsics is None:
             return prediction
         prediction.intrinsics = intrinsics.numpy()
-        _, _, scale, aligned_extrinsics = align_poses_umeyama(
+        alignment_result = align_poses_umeyama(
             prediction.extrinsics,
             extrinsics.numpy(),
             ransac=len(extrinsics) >= ransac_view_thresh,
             return_aligned=True,
             random_state=42,
         )
+        if alignment_result is None:
+            # Alignment failed due to degenerate poses (insufficient camera motion)
+            # Fall back to using model-estimated poses without alignment
+            logger.warning(
+                "Pose alignment failed due to insufficient camera motion. "
+                "Using model-estimated poses and relative depth."
+            )
+            return prediction
+        _, _, scale, aligned_extrinsics = alignment_result
         if align_to_input_ext_scale:
             prediction.extrinsics = extrinsics[..., :3, :].numpy()
             prediction.depth /= scale

diff --git a/src/depth_anything_3/utils/logger.py b/src/depth_anything_3/utils/logger.py
@@ -64,6 +64,10 @@ def error(self, *args, **kwargs):
     def warn(self, *args, **kwargs):
         self.log("WARN:", *args, **kwargs)
 
+    def warning(self, *args, **kwargs):
+        """Alias for warn() to match standard logging interface."""
+        self.warn(*args, **kwargs)
+
     def info(self, *args, **kwargs):
         self.log("INFO:", *args, **kwargs)
 

diff --git a/src/depth_anything_3/utils/pose_align.py b/src/depth_anything_3/utils/pose_align.py
@@ -12,13 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List
+import logging
+from typing import List, Optional, Tuple
+
 import numpy as np
 import torch
+from evo.core.geometry import GeometryException
 from evo.core.trajectory import PosePath3D
 
 from depth_anything_3.utils.geometry import affine_inverse, affine_inverse_np
 
+logger = logging.getLogger(__name__)
+
 
 def batch_apply_alignment_to_enc(
     rots: torch.Tensor, trans: torch.Tensor, scales: torch.Tensor, enc_list: List[torch.Tensor]
@@ -81,12 +86,26 @@ def _poses_from_ext(ext_ref, ext_est):
     return pose_ref, pose_est
 
 
-def _umeyama_sim3_from_paths(pose_ref, pose_est):
-    path_ref = PosePath3D(poses_se3=pose_ref.copy())
-    path_est = PosePath3D(poses_se3=pose_est.copy())
-    r, t, s = path_est.align(path_ref, correct_scale=True)
-    pose_est_aligned = np.stack(path_est.poses_se3)
-    return r, t, s, pose_est_aligned
+def _umeyama_sim3_from_paths(
+    pose_ref, pose_est
+) -> Optional[Tuple[np.ndarray, np.ndarray, float, np.ndarray]]:
+    """Compute Umeyama Sim(3) alignment between two pose trajectories.
+
+    Returns:
+        Tuple of (rotation, translation, scale, aligned_poses) if successful,
+        None if alignment fails due to degenerate poses (insufficient motion).
+    """
+    try:
+        path_ref = PosePath3D(poses_se3=pose_ref.copy())
+        path_est = PosePath3D(poses_se3=pose_est.copy())
+        r, t, s = path_est.align(path_ref, correct_scale=True)
+        pose_est_aligned = np.stack(path_est.poses_se3)
+        return r, t, s, pose_est_aligned
+    except GeometryException as e:
+        logger.warning(
+            f"Umeyama alignment failed (degenerate poses - insufficient camera motion): {e}"
+        )
+        return None
 
 
 def _apply_sim3_to_poses(poses, r, t, s):
@@ -149,7 +168,11 @@ def _ransac_align_sim3(
 
     # Fit again with best inliers
     if best_inliers is not None and best_inliers.sum() >= 3:
-        r, t, s, _ = _umeyama_sim3_from_paths(pose_ref[best_inliers], pose_est[best_inliers])
+        result = _umeyama_sim3_from_paths(pose_ref[best_inliers], pose_est[best_inliers])
+        if result is not None:
+            r, t, s, _ = result
+        else:
+            r, t, s = best_model
     else:
         r, t, s = best_model
     return r, t, s
@@ -164,19 +187,25 @@ def align_poses_umeyama(
     inlier_thresh=None,
     ransac_max_iters=10,
     random_state=None,
-):
+) -> Optional[Tuple]:
     """
     Align estimated trajectory to reference using Umeyama Sim(3).
     Default no RANSAC; if ransac=True, use RANSAC (max iterations default 10).
     - sub_n defaults to half the number of frames (rounded up, at least 3)
     - inlier_thresh defaults to median of "distance from each estimated pose to
       nearest reference pose after pre-alignment"
-    Returns rotation (3x3), translation (3,), scale; optionally returns aligned extrinsics (4x4).
+
+    Returns:
+        If return_aligned=False: (rotation (3x3), translation (3,), scale) or None if alignment fails.
+        If return_aligned=True: (rotation, translation, scale, aligned_extrinsics (4x4)) or None if alignment fails.
     """
     pose_ref, pose_est = _poses_from_ext(ext_ref, ext_est)
 
     if not ransac:
-        r, t, s, pose_est_aligned = _umeyama_sim3_from_paths(pose_ref, pose_est)
+        result = _umeyama_sim3_from_paths(pose_ref, pose_est)
+        if result is None:
+            return None
+        r, t, s, pose_est_aligned = result
     else:
         r, t, s = _ransac_align_sim3(
             pose_ref,

diff --git a/test_imports.py b/test_imports.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+"""Simple test to verify all imports work correctly."""
+
+print("Testing imports...")
+print("-" * 60)
+
+try:
+    import os
+    print("✓ os")
+except Exception as e:
+    print(f"✗ os: {e}")
+
+try:
+    import numpy as np
+    print(f"✓ numpy (version: {np.__version__})")
+except Exception as e:
+    print(f"✗ numpy: {e}")
+
+try:
+    import matplotlib
+    import matplotlib.pyplot as plt
+    print(f"✓ matplotlib (version: {matplotlib.__version__})")
+except Exception as e:
+    print(f"✗ matplotlib: {e}")
+    import traceback
+    traceback.print_exc()
+
+try:
+    from PIL import Image
+    print(f"✓ PIL (Pillow)")
+except Exception as e:
+    print(f"✗ PIL: {e}")
+
+try:
+    import torch
+    print(f"✓ torch (version: {torch.__version__})")
+    print(f"  - CUDA available: {torch.cuda.is_available()}")
+    if torch.cuda.is_available():
+        print(f"  - CUDA version: {torch.version.cuda}")
+        print(f"  - GPU: {torch.cuda.get_device_name(0)}")
+except Exception as e:
+    print(f"✗ torch: {e}")
+
+try:
+    from depth_anything_3.api import DepthAnything3
+    print("✓ depth_anything_3.api.DepthAnything3")
+except Exception as e:
+    print(f"✗ depth_anything_3.api: {e}")
+    import traceback
+    traceback.print_exc()
+
+try:
+    from depth_anything_3.utils.visualize import visualize_depth
+    print("✓ depth_anything_3.utils.visualize.visualize_depth")
+except Exception as e:
+    print(f"✗ depth_anything_3.utils.visualize: {e}")
+
+print("-" * 60)
+print("All import tests completed!")