From 4b1bbaae95a89bcdab4f86fc2b0d1ab0b5f21bf3 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Wed, 25 Mar 2026 13:03:49 +0200
Subject: [PATCH 01/24] Add COCO annotation import/export support

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 dagshub/__init__.py                           |   2 +-
 dagshub/auth/token_auth.py                    |   2 +-
 dagshub/data_engine/annotation/importer.py    |  14 +-
 dagshub/data_engine/annotation/metadata.py    |  84 ++++++-
 dagshub/data_engine/model/datapoint.py        |  54 ++++-
 dagshub/data_engine/model/query_result.py     | 157 +++++++++----
 dagshub/data_engine/util/__init__.py          |   0
 dagshub/data_engine/util/not_implemented.py   |  48 ++++
 .../res/audio_annotation.json                 |  82 +++++++
 .../test_annotation_parsing.py                | 103 ++++++++-
 .../annotation_import/test_coco.py            | 218 ++++++++++++++++++
 tests/data_engine/conftest.py                 |   3 +-
 tests/mocks/repo_api.py                       |   4 +
 13 files changed, 693 insertions(+), 78 deletions(-)
 create mode 100644 dagshub/data_engine/util/__init__.py
 create mode 100644 dagshub/data_engine/util/not_implemented.py
 create mode 100644 tests/data_engine/annotation_import/res/audio_annotation.json
 create mode 100644 tests/data_engine/annotation_import/test_coco.py

diff --git a/dagshub/__init__.py b/dagshub/__init__.py
index 7f4d765d..b14e2564 100644
--- a/dagshub/__init__.py
+++ b/dagshub/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.6.5"
+__version__ = "0.6.7"
 from .logger import DAGsHubLogger, dagshub_logger
 from .common.init import init
 from .upload.wrapper import upload_files
diff --git a/dagshub/auth/token_auth.py b/dagshub/auth/token_auth.py
index 31ec32ac..7ba3a70a 100644
--- a/dagshub/auth/token_auth.py
+++ b/dagshub/auth/token_auth.py
@@ -37,7 +37,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]:
 
     def can_renegotiate(self):
         # Env var tokens cannot renegotiate, every other token type can
-        return not type(self._token) is EnvVarDagshubToken
+        return type(self._token) is not EnvVarDagshubToken
 
     def renegotiate_token(self):
         if not self._token_storage.is_valid_token(self._token, self._host):
diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py
index c19212de..80e62468 100644
--- a/dagshub/data_engine/annotation/importer.py
+++ b/dagshub/data_engine/annotation/importer.py
@@ -3,6 +3,7 @@
 from tempfile import TemporaryDirectory
 from typing import TYPE_CHECKING, Literal, Optional, Union, Sequence, Mapping, Callable, List
 
+from dagshub_annotation_converter.converters.coco import load_coco_from_file
 from dagshub_annotation_converter.converters.cvat import load_cvat_from_zip
 from dagshub_annotation_converter.converters.yolo import load_yolo_from_fs
 from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask
@@ -16,7 +17,7 @@
 if TYPE_CHECKING:
     from dagshub.data_engine.model.datasource import Datasource
 
-AnnotationType = Literal["yolo", "cvat"]
+AnnotationType = Literal["yolo", "cvat", "coco"]
 AnnotationLocation = Literal["repo", "disk"]
 
 
@@ -85,6 +86,8 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]:
                 )
             elif self.annotations_type == "cvat":
                 annotation_dict = load_cvat_from_zip(annotations_file)
+            elif self.annotations_type == "coco":
+                annotation_dict, _ = load_coco_from_file(annotations_file)
 
             return annotation_dict
 
@@ -92,7 +95,6 @@ def download_annotations(self, dest_dir: Path):
         log_message("Downloading annotations from repository")
         repoApi = self.ds.source.repoApi
         if self.annotations_type == "cvat":
-            # Download just the annotation file
             repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True)
         elif self.annotations_type == "yolo":
             # Download the dataset .yaml file and the images + annotations
@@ -104,6 +106,8 @@ def download_annotations(self, dest_dir: Path):
             # Download the annotation data
             assert context.path is not None
             repoApi.download(self.annotations_file.parent / context.path, dest_dir, keep_source_prefix=True)
+        elif self.annotations_type == "coco":
+            repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True)
 
     @staticmethod
     def determine_load_location(ds: "Datasource", annotations_path: Union[str, Path]) -> AnnotationLocation:
@@ -153,8 +157,10 @@ def remap_annotations(
                 )
                 continue
             for ann in anns:
-                assert ann.filename is not None
-                ann.filename = remap_func(ann.filename)
+                if ann.filename is not None:
+                    ann.filename = remap_func(ann.filename)
+                else:
+                    ann.filename = new_filename
             remapped[new_filename] = anns
 
         return remapped
diff --git a/dagshub/data_engine/annotation/metadata.py b/dagshub/data_engine/annotation/metadata.py
index 8b5d632c..0b080e0f 100644
--- a/dagshub/data_engine/annotation/metadata.py
+++ b/dagshub/data_engine/annotation/metadata.py
@@ -1,25 +1,32 @@
-from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Union, Literal, Dict
+from typing import TYPE_CHECKING, Dict, Literal, Optional, Sequence, Tuple, Union
 
-from dagshub_annotation_converter.formats.label_studio.task import parse_ls_task, LabelStudioTask
-from dagshub_annotation_converter.formats.yolo import import_lookup, import_yolo_result, YoloContext
+from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask, parse_ls_task
+from dagshub_annotation_converter.formats.yolo import YoloContext, import_lookup, import_yolo_result
 from dagshub_annotation_converter.formats.yolo.categories import Categories
 from dagshub_annotation_converter.ir.image import (
-    IRBBoxImageAnnotation,
     CoordinateStyle,
-    IRSegmentationImageAnnotation,
-    IRSegmentationPoint,
+    IRBBoxImageAnnotation,
     IRPoseImageAnnotation,
     IRPosePoint,
+    IRSegmentationImageAnnotation,
+    IRSegmentationPoint,
 )
 from dagshub_annotation_converter.ir.image.annotations.base import IRAnnotationBase, IRImageAnnotationBase
 
 from dagshub.common.api import UserAPI
 from dagshub.common.helpers import log_message
+from dagshub.data_engine.util.not_implemented import NotImplementedMeta
 
 if TYPE_CHECKING:
-    from dagshub.data_engine.model.datapoint import Datapoint
     import ultralytics.engine.results
 
+    from dagshub.data_engine.model.datapoint import Datapoint
+
+from dagshub_annotation_converter.formats.label_studio.videorectangle import VideoRectangleAnnotation
+from dagshub_annotation_converter.formats.label_studio.task import task_lookup as _task_lookup
+
+_task_lookup["videorectangle"] = VideoRectangleAnnotation
+
 
 class AnnotationMetaDict(dict):
     def __init__(self, annotation: "MetadataAnnotations", *args, **kwargs):
@@ -269,6 +276,28 @@ def add_image_pose(
         self.annotations.append(ann)
         self._update_datapoint()
 
+    def add_coco_annotation(
+        self,
+        coco_json: str,
+    ):
+        """
+        Add annotations from a COCO-format JSON string.
+
+        Args:
+            coco_json: A COCO-format JSON string with ``categories``, ``images``, and ``annotations`` keys.
+        """
+        from dagshub_annotation_converter.converters.coco import load_coco_from_json_string
+
+        grouped, _ = load_coco_from_json_string(coco_json)
+        new_anns: list[IRAnnotationBase] = []
+        for anns in grouped.values():
+            for ann in anns:
+                ann.filename = self.datapoint.path
+                new_anns.append(ann)
+        self.annotations.extend(new_anns)
+        log_message(f"Added {len(new_anns)} COCO annotation(s) to datapoint {self.datapoint.path}")
+        self._update_datapoint()
+
     def add_yolo_annotation(
         self,
         annotation_type: Literal["bbox", "segmentation", "pose"],
@@ -315,3 +344,44 @@ def _generate_yolo_context(annotation_type, categories: Dict[int, str]) -> YoloC
         for cat_id, cat_name in categories.items():
             cats.add(cat_name, cat_id)
         return YoloContext(annotation_type=annotation_type, categories=cats)
+
+
+class UnsupportedMetadataAnnotations(MetadataAnnotations, metaclass=NotImplementedMeta):
+    def __init__(
+        self,
+        datapoint: "Datapoint",
+        field: str,
+        original_value: bytes,
+    ):
+        super().__init__(datapoint, field, None, None, original_value)
+
+    @property
+    def value(self) -> Optional[bytes]:
+        return self._original_value
+
+    def to_ls_task(self) -> Optional[bytes]:
+        return self._original_value
+
+    def __repr__(self):
+        return "Label Studio annotations of unrecognized type"
+
+
+class ErrorMetadataAnnotations(MetadataAnnotations, metaclass=NotImplementedMeta):
+    def __init__(
+        self,
+        datapoint: "Datapoint",
+        field: str,
+        error_message: str,
+    ):
+        super().__init__(datapoint, field, None, None, None)
+        self._error_message = error_message
+
+    @property
+    def value(self) -> Optional[bytes]:
+        raise ValueError(self._error_message)
+
+    def to_ls_task(self) -> Optional[bytes]:
+        raise ValueError(self._error_message)
+
+    def __repr__(self):
+        return f"Label Studio annotation download error: {self._error_message}"
diff --git a/dagshub/data_engine/model/datapoint.py b/dagshub/data_engine/model/datapoint.py
index b7aa89b5..f0c31925 100644
--- a/dagshub/data_engine/model/datapoint.py
+++ b/dagshub/data_engine/model/datapoint.py
@@ -3,14 +3,14 @@
 from dataclasses import dataclass
 from os import PathLike
 from pathlib import Path
-from typing import Optional, Union, List, Dict, Any, Callable, TYPE_CHECKING, Literal, Sequence
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Literal, Optional, Sequence, Union
 
-from tenacity import Retrying, stop_after_attempt, wait_exponential, before_sleep_log, retry_if_exception_type
+from tenacity import Retrying, before_sleep_log, retry_if_exception_type, stop_after_attempt, wait_exponential
 
 from dagshub.common.download import download_files
 from dagshub.common.helpers import http_request
 from dagshub.data_engine.annotation import MetadataAnnotations
-from dagshub.data_engine.client.models import MetadataSelectFieldSchema, DatapointHistoryResult
+from dagshub.data_engine.client.models import DatapointHistoryResult, MetadataSelectFieldSchema
 from dagshub.data_engine.dtypes import MetadataFieldType
 
 if TYPE_CHECKING:
@@ -25,6 +25,23 @@
 logger = logging.getLogger(__name__)
 
 
+@dataclass(frozen=True)
+class BlobHashMetadata:
+    hash: str
+
+    def __str__(self) -> str:
+        return self.hash
+
+    def __repr__(self) -> str:
+        return f"BlobHashMetadata(hash={self.hash!r})"
+
+
+class BlobDownloadError(Exception):
+    def __init__(self, message):
+        super().__init__(message)
+        self.message = message
+
+
 @dataclass
 class Datapoint:
     datapoint_id: int
@@ -128,6 +145,7 @@ def from_gql_edge(edge: Dict, datasource: "Datasource", fields: List[MetadataSel
 
         float_fields = {f.name for f in fields if f.valueType == MetadataFieldType.FLOAT}
         date_fields = {f.name for f in fields if f.valueType == MetadataFieldType.DATETIME}
+        blob_fields = {f.name for f in fields if f.valueType == MetadataFieldType.BLOB}
 
         for meta_dict in edge["node"]["metadata"]:
             key = meta_dict["key"]
@@ -138,6 +156,8 @@ def from_gql_edge(edge: Dict, datasource: "Datasource", fields: List[MetadataSel
                 if key in date_fields:
                     timezone = meta_dict.get("timeZone")
                     value = _datetime_from_timestamp(value / 1000, timezone or "+00:00")
+                elif key in blob_fields and isinstance(value, str):
+                    value = BlobHashMetadata(value)
             res.metadata[key] = value
         return res
 
@@ -164,7 +184,7 @@ def get_blob(self, column: str, cache_on_disk=True, store_value=False) -> bytes:
         if type(current_value) is bytes:
             # Bytes - it's already there!
             return current_value
-        if isinstance(current_value, Path):
+        elif isinstance(current_value, Path):
             # Path - assume the path exists and is already downloaded,
             #   because it's unlikely that the user has set it themselves
             with current_value.open("rb") as f:
@@ -173,18 +193,16 @@ def get_blob(self, column: str, cache_on_disk=True, store_value=False) -> bytes:
                 self.metadata[column] = content
             return content
 
-        elif type(current_value) is str:
-            # String - This is probably the hash of the blob, get that from dagshub
-            blob_url = self.blob_url(current_value)
-            blob_location = self.blob_cache_location / current_value
+        elif isinstance(current_value, BlobHashMetadata):
+            # Blob hash metadata - download blob from DagsHub
+            blob_url = self.blob_url(current_value.hash)
+            blob_location = self.blob_cache_location / current_value.hash
 
             # Make sure that the cache location exists
             if cache_on_disk:
                 self.blob_cache_location.mkdir(parents=True, exist_ok=True)
 
             content = _get_blob(blob_url, blob_location, self.datasource.source.repoApi.auth, cache_on_disk, True)
-            if type(content) is str:
-                raise RuntimeError(f"Error while downloading blob: {content}")
 
             if store_value:
                 self.metadata[column] = content
@@ -192,6 +210,11 @@ def get_blob(self, column: str, cache_on_disk=True, store_value=False) -> bytes:
                 self.metadata[column] = blob_location
 
             return content
+        elif isinstance(current_value, MetadataAnnotations):
+            ls_task = current_value.to_ls_task()
+            if ls_task is None:
+                return b""
+            return ls_task
         else:
             raise ValueError(f"Can't extract blob metadata from value {current_value} of type {type(current_value)}")
 
@@ -274,10 +297,17 @@ def _get_blob(
     """
     Args:
         url: url to download the blob from
-        cache_path: where the cache for the blob is (laods from it if exists, stores there if it doesn't)
+        cache_path: where the cache for the blob is (loads from it if exists, stores there if it doesn't)
         auth: auth to use for getting the blob
         cache_on_disk: whether to store the downloaded blob on disk. If False we also turn off the cache checking
         return_blob: if True returns the blob of the downloaded data, if False returns the path to the file with it
+        path_format: if return_blob is False, controls path representation. "path" returns Path, "str" returns str
+
+    Returns:
+        bytes, Path, or str path on success.
+
+    Raises:
+        BlobDownloadError on download failure.
     """
     if url is None:
         return None
@@ -313,7 +343,7 @@ def get():
             with attempt:
                 content = get()
     except Exception as e:
-        return f"Error while downloading binary blob: {e}"
+        raise BlobDownloadError(str(e)) from e
 
     if cache_on_disk:
         with cache_path.open("wb") as f:
diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
index 6c326eab..6031e0bf 100644
--- a/dagshub/data_engine/model/query_result.py
+++ b/dagshub/data_engine/model/query_result.py
@@ -15,6 +15,8 @@
 import dacite
 import dagshub_annotation_converter.converters.yolo
 import rich.progress
+from dagshub_annotation_converter.converters.coco import export_to_coco_file
+from dagshub_annotation_converter.formats.coco import CocoContext
 from dagshub_annotation_converter.formats.yolo import YoloContext
 from dagshub_annotation_converter.formats.yolo.categories import Categories
 from dagshub_annotation_converter.formats.yolo.common import ir_mapping
@@ -30,6 +32,7 @@
 from dagshub.common.rich_util import get_rich_progress
 from dagshub.common.util import lazy_load, multi_urljoin
 from dagshub.data_engine.annotation import MetadataAnnotations
+from dagshub.data_engine.annotation.metadata import ErrorMetadataAnnotations, UnsupportedMetadataAnnotations
 from dagshub.data_engine.annotation.voxel_conversion import (
     add_ls_annotations,
     add_voxel_annotations,
@@ -37,7 +40,13 @@
 from dagshub.data_engine.client.loaders.base import DagsHubDataset
 from dagshub.data_engine.client.models import DatasourceType, MetadataSelectFieldSchema
 from dagshub.data_engine.dtypes import MetadataFieldType
-from dagshub.data_engine.model.datapoint import Datapoint, _generated_fields, _get_blob
+from dagshub.data_engine.model.datapoint import (
+    BlobDownloadError,
+    BlobHashMetadata,
+    Datapoint,
+    _generated_fields,
+    _get_blob,
+)
 from dagshub.data_engine.model.schema_util import dacite_config
 from dagshub.data_engine.voxel_plugin_server.utils import set_voxel_envvars
 
@@ -389,10 +398,9 @@ def get_blob_fields(
         for dp in self.entries:
             for fld in fields:
                 field_value = dp.metadata.get(fld)
-                # If field_value is a blob or a path, then ignore, means it's already been downloaded
-                if not isinstance(field_value, str):
+                if not isinstance(field_value, BlobHashMetadata):
                     continue
-                download_task = (dp, fld, dp.blob_url(field_value), dp.blob_cache_location / field_value)
+                download_task = (dp, fld, dp.blob_url(field_value.hash), dp.blob_cache_location / field_value.hash)
                 to_download.append(download_task)
 
         progress = get_rich_progress(rich.progress.MofNCompleteColumn())
@@ -402,8 +410,6 @@ def get_blob_fields(
 
         def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path):
             blob_or_path = _get_blob(url, blob_path, auth, cache_on_disk, load_into_memory, path_format)
-            if isinstance(blob_or_path, str) and path_format != "str":
-                logger.warning(f"Error while downloading blob for field {field} in datapoint {dp.path}:{blob_or_path}")
             dp.metadata[field] = blob_or_path
 
         with progress:
@@ -415,7 +421,7 @@ def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path):
                         logger.warning(f"Got exception {type(exc)} while downloading blob: {exc}")
                     progress.update(task, advance=1)
 
-        self._convert_annotation_fields(*fields, load_into_memory=load_into_memory)
+        self._convert_annotation_fields(*fields)
 
         # Convert any downloaded document fields
         document_fields = [f for f in fields if f in self.document_fields]
@@ -424,49 +430,63 @@ def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path):
         if document_fields:
             for dp in self:
                 for fld in document_fields:
-                    if fld in dp.metadata:
-                        # Override the load_into_memory flag, because we need the contents
-                        if not load_into_memory:
-                            dp.metadata[fld] = Path(dp.metadata[fld]).read_bytes()
-                        dp.metadata[fld] = dp.metadata[fld].decode("utf-8")
+                    if fld not in dp.metadata:
+                        continue
+                    try:
+                        content = dp.get_blob(fld)
+                        dp.metadata[fld] = content.decode("utf-8")
+                    except BlobDownloadError as e:
+                        logger.warning(f"Failed to download document field '{fld}' for datapoint '{dp.path}': {e}")
 
         return self
 
-    def _convert_annotation_fields(self, *fields, load_into_memory):
+    def _convert_annotation_fields(self, *fields):
         # Convert any downloaded annotation column
         annotation_fields = [f for f in fields if f in self.annotation_fields]
+        if not annotation_fields:
+            return
 
+        # List of datapoints with annotations that couldn't be parsed
         bad_annotations = defaultdict(list)
 
-        if annotation_fields:
-            # Convert them
-            for dp in self:
-                for fld in annotation_fields:
-                    if fld in dp.metadata:
-                        # Already loaded - skip
-                        if isinstance(dp.metadata[fld], MetadataAnnotations):
-                            continue
-                        # Override the load_into_memory flag, because we need the contents
-                        if not load_into_memory:
-                            dp.metadata[fld] = Path(dp.metadata[fld]).read_bytes()
-                        try:
-                            dp.metadata[fld] = MetadataAnnotations.from_ls_task(
-                                datapoint=dp, field=fld, ls_task=dp.metadata[fld]
-                            )
-                        except ValidationError:
-                            bad_annotations[fld].append(dp.path)
-                    else:
-                        dp.metadata[fld] = MetadataAnnotations(datapoint=dp, field=fld)
+        for dp in self:
+            for fld in annotation_fields:
+                metadata_value = dp.metadata.get(fld)
+                # No value - create empty annotation container
+                if metadata_value is None:
+                    dp.metadata[fld] = MetadataAnnotations(datapoint=dp, field=fld)
+                    continue
+                # Already loaded - skip
+                elif isinstance(metadata_value, MetadataAnnotations):
+                    continue
+                # Parse annotation from the content of the field
+                else:
+                    try:
+                        annotation_content = dp.get_blob(fld)
+                        dp.metadata[fld] = MetadataAnnotations.from_ls_task(
+                            datapoint=dp, field=fld, ls_task=annotation_content
+                        )
+                    except BlobDownloadError as e:
+                        dp.metadata[fld] = ErrorMetadataAnnotations(datapoint=dp, field=fld, error_message=e.message)
+                        bad_annotations[fld].append(dp.path)
+                    except ValidationError:
+                        dp.metadata[fld] = UnsupportedMetadataAnnotations(
+                            datapoint=dp, field=fld, original_value=annotation_content
+                        )
+                        bad_annotations[fld].append(dp.path)
 
         if bad_annotations:
             log_message(
-                "Warning: The following datapoints had invalid annotations, "
-                "any annotation-related operations will not work on these:"
+                "Warning: The following datapoints had unsupported or invalid annotations, "
+                "convenience functions like `add_bounding_box` won't work on these:"
             )
             err_msg = ""
             for fld, dps in bad_annotations.items():
-                err_msg += f'Field "{fld}" in datapoints:\n\t'
-                err_msg += "\n\t".join(dps)
+                err_msg += f'\nField "{fld}" in datapoints:\n\t'
+                if len(dps) > 10:
+                    err_msg += "\n\t".join(dps[:10]) + f"\n\t... and {len(dps) - 10} more"
+                else:
+                    err_msg += "\n\t".join(dps)
             log_message(err_msg)
 
     def download_binary_columns(
@@ -760,6 +780,16 @@ def _get_all_annotations(self, annotation_field: str) -> List[IRImageAnnotationB
                 annotations.extend(dp.metadata[annotation_field].annotations)
         return annotations
 
+    def _resolve_annotation_field(self, annotation_field: Optional[str]) -> str:
+        if annotation_field is not None:
+            return annotation_field
+        annotation_fields = sorted([f.name for f in self.fields if f.is_annotation()])
+        if len(annotation_fields) == 0:
+            raise ValueError("No annotation fields found in the datasource")
+        annotation_field = annotation_fields[0]
+        log_message(f"Using annotations from field {annotation_field}")
+        return annotation_field
+
     def export_as_yolo(
         self,
         download_dir: Optional[Union[str, Path]] = None,
@@ -785,12 +815,7 @@ def export_as_yolo(
         Returns:
             The path to the YAML file with the metadata. Pass this path to ``YOLO.train()`` to train a model.
         """
-        if annotation_field is None:
-            annotation_fields = sorted([f.name for f in self.fields if f.is_annotation()])
-            if len(annotation_fields) == 0:
-                raise ValueError("No annotation fields found in the datasource")
-            annotation_field = annotation_fields[0]
-            log_message(f"Using annotations from field {annotation_field}")
+        annotation_field = self._resolve_annotation_field(annotation_field)
 
         if download_dir is None:
             download_dir = Path("dagshub_export")
@@ -843,6 +868,54 @@ def export_as_yolo(
         log_message(f"Done! Saved YOLO Dataset, YAML file is at {yaml_path.absolute()}")
         return yaml_path
 
+    def export_as_coco(
+        self,
+        download_dir: Optional[Union[str, Path]] = None,
+        annotation_field: Optional[str] = None,
+        output_filename: str = "annotations.json",
+        classes: Optional[Dict[int, str]] = None,
+    ) -> Path:
+        """
+        Downloads the files and exports annotations in COCO format.
+
+        Args:
+            download_dir: Where to download the files. Defaults to ``./dagshub_export``
+            annotation_field: Field with the annotations. If None, uses the first alphabetical annotation field.
+            output_filename: Name of the output COCO JSON file. Default is ``annotations.json``.
+            classes: Category mapping for the COCO dataset as ``{id: name}``.
+                If ``None``, categories will be inferred from the annotations.
+
+        Returns:
+            Path to the exported COCO JSON file.
+        """
+        annotation_field = self._resolve_annotation_field(annotation_field)
+
+        if download_dir is None:
+            download_dir = Path("dagshub_export")
+        download_dir = Path(download_dir)
+
+        annotations = self._get_all_annotations(annotation_field)
+        if not annotations:
+            raise RuntimeError("No annotations found to export")
+
+        context = CocoContext()
+        if classes is not None:
+            context.categories = dict(classes)
+
+        # Add the source prefix to all annotations
+        for ann in annotations:
+            ann.filename = os.path.join(self.datasource.source.source_prefix, ann.filename)
+
+        image_download_path = download_dir / "data"
+        log_message("Downloading image files...")
+        self.download_files(image_download_path)
+
+        output_path = download_dir / output_filename
+        log_message("Exporting COCO annotations...")
+        result_path = export_to_coco_file(annotations, output_path, context=context)
+        log_message(f"Done! Saved COCO annotations to {result_path.absolute()}")
+        return result_path
+
     def to_voxel51_dataset(self, **kwargs) -> "fo.Dataset":
         """
         Creates a voxel51 dataset that can be used with\
diff --git a/dagshub/data_engine/util/__init__.py b/dagshub/data_engine/util/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/dagshub/data_engine/util/not_implemented.py b/dagshub/data_engine/util/not_implemented.py
new file mode 100644
index 00000000..d9a81285
--- /dev/null
+++ b/dagshub/data_engine/util/not_implemented.py
@@ -0,0 +1,48 @@
+class NotImplementedMeta(type):
+    """
+    A metaclass that replaces all parent class methods and properties that aren't overridden in the subclass
+    with NotImplementedError.
+    """
+
+    def __new__(mcs, name, bases, namespace):
+        # Get all attributes from base classes
+        for base in bases:
+            for attr_name in dir(base):
+                if attr_name.startswith("_"):
+                    continue
+
+                # Skip if already defined in subclass
+                if attr_name in namespace:
+                    continue
+
+                base_attr = getattr(base, attr_name)
+
+                # Handle properties
+                if isinstance(base_attr, property):
+                    # Create a property that raises NotImplementedError
+                    def make_not_implemented_property(prop_name):
+                        def getter(self):
+                            raise NotImplementedError(f"Property '{prop_name}' not implemented")
+
+                        def setter(self, value):
+                            raise NotImplementedError(f"Property '{prop_name}' not implemented")
+
+                        def deleter(self):
+                            raise NotImplementedError(f"Property '{prop_name}' not implemented")
+
+                        return property(getter, setter, deleter)
+
+                    namespace[attr_name] = make_not_implemented_property(attr_name)
+
+                # Handle regular methods
+                elif callable(base_attr):
+
+                    def make_not_implemented(method_name):
+                        def not_impl(self, *args, **kwargs):
+                            raise NotImplementedError(f"Method '{method_name}' not implemented")
+
+                        return not_impl
+
+                    namespace[attr_name] = make_not_implemented(attr_name)
+
+        return super().__new__(mcs, name, bases, namespace)
diff --git a/tests/data_engine/annotation_import/res/audio_annotation.json b/tests/data_engine/annotation_import/res/audio_annotation.json
new file mode 100644
index 00000000..adc356e2
--- /dev/null
+++ b/tests/data_engine/annotation_import/res/audio_annotation.json
@@ -0,0 +1,82 @@
+{
+  "id": 41,
+  "data": {
+    "audio": "https://example.com/some-non-existent-file.mp3",
+    "media type": "audio/mpeg",
+    "size": 111699
+  },
+  "meta": {
+    "datapoint_id": 12345678,
+    "datasource_id": 6565
+  },
+  "created_at": "2025-12-20T13:44:02.316027Z",
+  "updated_at": "2026-01-26T15:00:13.046967Z",
+  "is_labeled": true,
+  "project": 1,
+  "annotations": [
+    {
+      "completed_by": 1,
+      "result": [
+        {
+          "type": "choices",
+          "value": {
+            "choices": [
+              "true"
+            ]
+          },
+          "origin": "manual",
+          "to_name": "audio",
+          "from_name": "further_utterance",
+          "id": "deadbeef1"
+        },
+        {
+          "type": "rating",
+          "value": {
+            "rating": 1
+          },
+          "origin": "manual",
+          "to_name": "audio",
+          "from_name": "difficulty",
+          "id": "deadbeef1"
+        },
+        {
+          "type": "textarea",
+          "value": {
+            "text": [
+              "kirill@dagshub.com"
+            ]
+          },
+          "origin": "manual",
+          "to_name": "audio",
+          "from_name": "email_address",
+          "id": "deadbeef1"
+        },
+        {
+          "to_name": "audio",
+          "from_name": "first_name",
+          "id": "Qzu1dR2RQ8",
+          "type": "textarea",
+          "value": {
+            "text": [
+              "Kirill"
+            ]
+          },
+          "origin": "manual"
+        },
+        {
+          "type": "textarea",
+          "value": {
+            "text": [
+              "Bolashev"
+            ]
+          },
+          "origin": "manual",
+          "to_name": "audio",
+          "from_name": "last_name",
+          "id": "deadbeef1"
+        }
+      ],
+      "ground_truth": false
+    }
+  ]
+}
diff --git a/tests/data_engine/annotation_import/test_annotation_parsing.py b/tests/data_engine/annotation_import/test_annotation_parsing.py
index 66840ecb..c04b0d51 100644
--- a/tests/data_engine/annotation_import/test_annotation_parsing.py
+++ b/tests/data_engine/annotation_import/test_annotation_parsing.py
@@ -1,19 +1,24 @@
 import json
+from os import PathLike
 from pathlib import Path
+from typing import Union
 from unittest.mock import MagicMock
 
 import pytest
 from dagshub_annotation_converter.ir.image import IRSegmentationImageAnnotation
+from pytest import MonkeyPatch
 
 from dagshub.data_engine.annotation import MetadataAnnotations
+from dagshub.data_engine.annotation.metadata import ErrorMetadataAnnotations, UnsupportedMetadataAnnotations
 from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags
-from dagshub.data_engine.model import query_result
+from dagshub.data_engine.model import datapoint, query_result
+from dagshub.data_engine.model.datapoint import BlobDownloadError, BlobHashMetadata
 from dagshub.data_engine.model.datasource import Datasource
+from dagshub.data_engine.model.query_result import QueryResult
 from tests.data_engine.util import add_metadata_field
 
 _annotation_field_name = "annotation"
 _dp_path = "data/sample_datapoint.jpg"
-_annotation_hash = "annotation1"  # Corresponds to a resource JSON
 _res_folder = Path(__file__).parent / "res"
 
 
@@ -51,17 +56,24 @@ def mock_annotation_query_result(
     return query_result.QueryResult.from_gql_query(data_dict, ds)
 
 
-def mock_get_blob(*args, **kwargs) -> bytes:
+def mock_get_blob(*args, **kwargs) -> Union[bytes, PathLike]:
     download_url: str = args[0]
     blob_hash = download_url.split("/")[-1]
+    load_into_memory = args[4]
     blob_path = _res_folder / f"{blob_hash}.json"
-    if not blob_path.exists():
-        raise FileNotFoundError(f"Mock blob file not found: {blob_path}")
-    return blob_path.read_bytes()
 
+    try:
+        if not blob_path.exists():
+            raise FileNotFoundError(f"Blob with hash {blob_hash} not found in res folder")
+        if load_into_memory:
+            return blob_path.read_bytes()
+        else:
+            return blob_path
+    except Exception as e:
+        raise BlobDownloadError(str(e)) from e
 
-@pytest.fixture
-def ds_with_document_annotation(ds, monkeypatch):
+
+def _ds_with_annotation(ds: "Datasource", monkeypatch: MonkeyPatch, annotation_hash: str):
     add_metadata_field(
         ds,
         _annotation_field_name,
@@ -70,18 +82,89 @@ def ds_with_document_annotation(ds, monkeypatch):
     )
 
     ds.source.client.get_datapoints = MagicMock(
-        return_value=mock_annotation_query_result(ds, _annotation_field_name, _dp_path, _annotation_hash)
+        return_value=mock_annotation_query_result(ds, _annotation_field_name, _dp_path, annotation_hash)
     )
 
     monkeypatch.setattr(query_result, "_get_blob", mock_get_blob)
+    monkeypatch.setattr(datapoint, "_get_blob", mock_get_blob)
 
-    yield ds
+    return ds
+
+
+@pytest.fixture
+def ds_with_document_annotation(ds, monkeypatch):
+    yield _ds_with_annotation(ds, monkeypatch, "annotation1")
 
 
 def test_annotation_with_document_are_parsed_as_annotation(ds_with_document_annotation):
     qr = ds_with_document_annotation.all()
+    _test_annotation(qr)
+
+
+def test_double_loading_annotation_works(ds_with_document_annotation):
+    qr = ds_with_document_annotation.all()
+    qr.get_blob_fields(_annotation_field_name)
+    _test_annotation(qr)
+
+
+def _test_annotation(qr: QueryResult):
     annotation: MetadataAnnotations = qr[0].metadata[_annotation_field_name]
     assert isinstance(annotation, MetadataAnnotations)
     # Check that the annotation got parsed correctly, the JSON should have one segmentation annotation in it
     assert len(annotation.annotations) == 1
     assert isinstance(annotation.annotations[0], IRSegmentationImageAnnotation)
+
+
+@pytest.fixture
+def ds_with_unsupported_annotation(ds, monkeypatch):
+    yield _ds_with_annotation(ds, monkeypatch, "audio_annotation")
+
+
+def test_handling_unsupported_annotation(ds_with_unsupported_annotation):
+    qr = ds_with_unsupported_annotation.all()
+
+    annotation: MetadataAnnotations = qr[0].metadata[_annotation_field_name]
+
+    assert isinstance(annotation, UnsupportedMetadataAnnotations)
+    # Unsupported annotation is still a subclass of regular annotation
+    # This is crucial for logic that checks if annotation metadata was parsed already,
+    # so if this starts failing, that logic will need to be changed too
+    assert isinstance(annotation, MetadataAnnotations)
+
+    with pytest.raises(NotImplementedError):
+        annotation.add_image_bbox("cat", 0, 0, 10, 10, 1920, 1080)
+
+    expected_content = (_res_folder / "audio_annotation.json").read_bytes()
+    assert annotation.value == expected_content
+    assert annotation.to_ls_task() == expected_content
+
+
+@pytest.fixture
+def ds_with_nonexistent_annotation(ds, monkeypatch):
+    yield _ds_with_annotation(ds, monkeypatch, "nonexistent_annotation")
+
+
+def test_nonexistent_annotation(ds_with_nonexistent_annotation):
+    qr = ds_with_nonexistent_annotation.all(load_documents=False, load_annotations=False)
+    qr.get_annotations()
+
+    annotation: MetadataAnnotations = qr[0].metadata[_annotation_field_name]
+
+    assert isinstance(annotation, ErrorMetadataAnnotations)
+    # Error annotation is still a subclass of regular annotation
+    # This is crucial for logic that checks if annotation metadata was parsed already,
+    # so if this starts failing, that logic will need to be changed too
+    assert isinstance(annotation, MetadataAnnotations)
+
+    with pytest.raises(NotImplementedError):
+        annotation.add_image_bbox("cat", 0, 0, 10, 10, 1920, 1080)
+
+    with pytest.raises(ValueError, match="Blob with hash nonexistent_annotation not found in res folder"):
+        _ = annotation.value
+    with pytest.raises(ValueError, match="Blob with hash nonexistent_annotation not found in res folder"):
+        annotation.to_ls_task()
+
+
+def test_blob_metadata_is_wrapped_from_backend(ds_with_document_annotation):
+    qr = ds_with_document_annotation.all(load_documents=False, load_annotations=False)
+    assert isinstance(qr[0].metadata[_annotation_field_name], BlobHashMetadata)
diff --git a/tests/data_engine/annotation_import/test_coco.py b/tests/data_engine/annotation_import/test_coco.py
new file mode 100644
index 00000000..9b238fd1
--- /dev/null
+++ b/tests/data_engine/annotation_import/test_coco.py
@@ -0,0 +1,218 @@
+import datetime
+import json
+from pathlib import PurePosixPath
+from unittest.mock import patch, PropertyMock
+
+import pytest
+from dagshub_annotation_converter.ir.image import (
+    IRBBoxImageAnnotation,
+    CoordinateStyle,
+)
+
+from dagshub.data_engine.annotation.importer import AnnotationImporter, AnnotationsNotFoundError
+from dagshub.data_engine.annotation.metadata import MetadataAnnotations
+from dagshub.data_engine.client.models import MetadataSelectFieldSchema
+from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags
+from dagshub.data_engine.model.datapoint import Datapoint
+from dagshub.data_engine.model.query_result import QueryResult
+
+
+@pytest.fixture(autouse=True)
+def mock_source_prefix(ds):
+    with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()):
+        yield
+
+
+# --- import ---
+
+
+def test_import_coco_from_file(ds, tmp_path):
+    _write_coco(tmp_path, _make_coco_json())
+    importer = AnnotationImporter(ds, "coco", tmp_path / "annotations.json", load_from="disk")
+    result = importer.import_annotations()
+
+    assert "image1.jpg" in result
+    assert len(result["image1.jpg"]) == 1
+    assert isinstance(result["image1.jpg"][0], IRBBoxImageAnnotation)
+
+
+def test_import_coco_nonexistent_raises(ds, tmp_path):
+    importer = AnnotationImporter(ds, "coco", tmp_path / "nope.json", load_from="disk")
+    with pytest.raises(AnnotationsNotFoundError):
+        importer.import_annotations()
+
+
+def test_coco_convert_to_ls_tasks(ds, tmp_path, mock_dagshub_auth):
+    importer = AnnotationImporter(ds, "coco", tmp_path / "ann.json", load_from="disk")
+    bbox = IRBBoxImageAnnotation(
+        filename="test.jpg", categories={"cat": 1.0},
+        top=0.1, left=0.1, width=0.2, height=0.2,
+        image_width=640, image_height=480,
+        coordinate_style=CoordinateStyle.NORMALIZED,
+    )
+    tasks = importer.convert_to_ls_tasks({"test.jpg": [bbox]})
+
+    assert "test.jpg" in tasks
+    task_json = json.loads(tasks["test.jpg"])
+    assert "annotations" in task_json
+    assert len(task_json["annotations"]) > 0
+
+
+# --- add_coco_annotation ---
+
+
+def test_add_coco_annotation_rewrites_filename(ds, mock_dagshub_auth):
+    dp = Datapoint(datasource=ds, path="my_images/photo.jpg", datapoint_id=0, metadata={})
+    meta_ann = MetadataAnnotations(datapoint=dp, field="ann")
+    meta_ann.add_coco_annotation(json.dumps(_make_coco_json()))
+
+    assert len(meta_ann.annotations) == 1
+    assert isinstance(meta_ann.annotations[0], IRBBoxImageAnnotation)
+    assert meta_ann.annotations[0].filename == "my_images/photo.jpg"
+
+
+# --- _resolve_annotation_field ---
+
+
+def test_resolve_explicit_field(ds):
+    qr = _make_qr(ds, [], ann_field="my_ann")
+    assert qr._resolve_annotation_field("explicit") == "explicit"
+
+
+def test_resolve_auto_field(ds):
+    qr = _make_qr(ds, [], ann_field="my_ann")
+    assert qr._resolve_annotation_field(None) == "my_ann"
+
+
+def test_resolve_no_fields_raises(ds):
+    qr = _make_qr(ds, [], ann_field=None)
+    with pytest.raises(ValueError, match="No annotation fields"):
+        qr._resolve_annotation_field(None)
+
+
+def test_resolve_picks_alphabetically_first(ds):
+    fields = []
+    for name in ["zebra_ann", "alpha_ann"]:
+        fields.append(MetadataSelectFieldSchema(
+            asOf=int(datetime.datetime.now().timestamp()),
+            autoGenerated=False, originalName=name,
+            multiple=False, valueType=MetadataFieldType.BLOB,
+            name=name, tags={ReservedTags.ANNOTATION.value},
+        ))
+    qr = QueryResult(datasource=ds, _entries=[], fields=fields)
+    assert qr._resolve_annotation_field(None) == "alpha_ann"
+
+
+# --- export_as_coco ---
+
+
+def test_export_coco_bbox_coordinates(ds, tmp_path):
+    dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={})
+    ann = IRBBoxImageAnnotation(
+        filename="images/test.jpg", categories={"cat": 1.0},
+        top=20.0, left=10.0, width=30.0, height=40.0,
+        image_width=640, image_height=480,
+        coordinate_style=CoordinateStyle.DENORMALIZED,
+    )
+    dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann])
+
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    with patch.object(qr, "download_files"):
+        result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann")
+
+    coco = json.loads(result.read_text())
+    assert coco["annotations"][0]["bbox"] == [10.0, 20.0, 30.0, 40.0]
+
+
+def test_export_coco_no_annotations_raises(ds, tmp_path):
+    dp = Datapoint(datasource=ds, path="test.jpg", datapoint_id=0, metadata={})
+    dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[])
+
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    with pytest.raises(RuntimeError, match="No annotations found"):
+        qr.export_as_coco(download_dir=tmp_path, annotation_field="ann")
+
+
+def test_export_coco_explicit_classes(ds, tmp_path):
+    dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={})
+    dp.metadata["ann"] = MetadataAnnotations(
+        datapoint=dp, field="ann", annotations=[_make_image_bbox("images/test.jpg")]
+    )
+
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    with patch.object(qr, "download_files"):
+        result = qr.export_as_coco(
+            download_dir=tmp_path, annotation_field="ann", classes={1: "cat", 2: "dog"}
+        )
+
+    coco = json.loads(result.read_text())
+    assert "cat" in {c["name"] for c in coco["categories"]}
+
+
+def test_export_coco_custom_filename(ds, tmp_path):
+    dp = Datapoint(datasource=ds, path="images/test.jpg", datapoint_id=0, metadata={})
+    dp.metadata["ann"] = MetadataAnnotations(
+        datapoint=dp, field="ann", annotations=[_make_image_bbox("images/test.jpg")]
+    )
+
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    with patch.object(qr, "download_files"):
+        result = qr.export_as_coco(
+            download_dir=tmp_path, annotation_field="ann", output_filename="custom.json"
+        )
+
+    assert result.name == "custom.json"
+
+
+def test_export_coco_multiple_datapoints(ds, tmp_path):
+    dps = []
+    for i, name in enumerate(["a.jpg", "b.jpg"]):
+        dp = Datapoint(datasource=ds, path=name, datapoint_id=i, metadata={})
+        dp.metadata["ann"] = MetadataAnnotations(
+            datapoint=dp, field="ann", annotations=[_make_image_bbox(name)]
+        )
+        dps.append(dp)
+
+    qr = _make_qr(ds, dps, ann_field="ann")
+    with patch.object(qr, "download_files"):
+        result = qr.export_as_coco(download_dir=tmp_path, annotation_field="ann")
+
+    coco = json.loads(result.read_text())
+    assert len(coco["annotations"]) == 2
+    assert len(coco["images"]) == 2
+
+
+# --- helpers ---
+
+
+def _make_coco_json():
+    return {
+        "categories": [{"id": 1, "name": "cat"}],
+        "images": [{"id": 1, "width": 640, "height": 480, "file_name": "image1.jpg"}],
+        "annotations": [{"id": 1, "image_id": 1, "category_id": 1, "bbox": [10, 20, 30, 40]}],
+    }
+
+
+def _write_coco(tmp_path, coco):
+    (tmp_path / "annotations.json").write_text(json.dumps(coco))
+
+
+def _make_image_bbox(filename="test.jpg") -> IRBBoxImageAnnotation:
+    return IRBBoxImageAnnotation(
+        filename=filename, categories={"cat": 1.0},
+        top=20.0, left=10.0, width=30.0, height=40.0,
+        image_width=640, image_height=480,
+        coordinate_style=CoordinateStyle.DENORMALIZED,
+    )
+
+
+def _make_qr(ds, datapoints, ann_field=None):
+    fields = []
+    if ann_field:
+        fields.append(MetadataSelectFieldSchema(
+            asOf=int(datetime.datetime.now().timestamp()),
+            autoGenerated=False, originalName=ann_field,
+            multiple=False, valueType=MetadataFieldType.BLOB,
+            name=ann_field, tags={ReservedTags.ANNOTATION.value},
+        ))
+    return QueryResult(datasource=ds, _entries=datapoints, fields=fields)
diff --git a/tests/data_engine/conftest.py b/tests/data_engine/conftest.py
index e8f0c70a..e57d1e83 100644
--- a/tests/data_engine/conftest.py
+++ b/tests/data_engine/conftest.py
@@ -5,7 +5,7 @@
 from dagshub.common.api import UserAPI
 from dagshub.common.api.responses import UserAPIResponse
 from dagshub.data_engine import datasources
-from dagshub.data_engine.client.models import MetadataSelectFieldSchema, PreprocessingStatus
+from dagshub.data_engine.client.models import DatasourceType, MetadataSelectFieldSchema, PreprocessingStatus
 from dagshub.data_engine.model.datapoint import Datapoint
 from dagshub.data_engine.model.datasource import DatasetState, Datasource
 from dagshub.data_engine.model.query_result import QueryResult
@@ -26,6 +26,7 @@ def other_ds(mocker, mock_dagshub_auth) -> Datasource:
 
 def _create_mock_datasource(mocker, id, name) -> Datasource:
     ds_state = datasources.DatasourceState(id=id, name=name, repo="kirill/repo")
+    ds_state.source_type = DatasourceType.REPOSITORY
     ds_state.path = "repo://kirill/repo/data/"
     ds_state.preprocessing_status = PreprocessingStatus.READY
     mocker.patch.object(ds_state, "client")
diff --git a/tests/mocks/repo_api.py b/tests/mocks/repo_api.py
index d457d161..22b6c94c 100644
--- a/tests/mocks/repo_api.py
+++ b/tests/mocks/repo_api.py
@@ -113,6 +113,10 @@ def generate_content_api_entry(path, is_dir=False, versioning="dvc") -> ContentA
     def default_branch(self) -> str:
         return self._default_branch
 
+    @property
+    def id(self) -> int:
+        return 1
+
     def get_connected_storages(self) -> List[StorageAPIEntry]:
         return self.storages
 

From 14350c49f82b0386c616dac18618ee6bebd45497 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Wed, 25 Mar 2026 13:03:50 +0200
Subject: [PATCH 02/24] Add MOT and CVAT video annotation import/export support

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 dagshub/__init__.py                           |   2 +-
 dagshub/auth/token_auth.py                    |   2 +-
 dagshub/data_engine/annotation/importer.py    | 167 +++++++-
 dagshub/data_engine/annotation/metadata.py    |  62 ++-
 dagshub/data_engine/model/datapoint.py        |  54 ++-
 dagshub/data_engine/model/query_result.py     | 351 ++++++++++++++--
 dagshub/data_engine/util/__init__.py          |   0
 dagshub/data_engine/util/not_implemented.py   |  48 +++
 .../res/audio_annotation.json                 |  82 ++++
 .../test_annotation_parsing.py                | 103 ++++-
 .../annotation_import/test_cvat_video.py      | 276 +++++++++++++
 .../data_engine/annotation_import/test_mot.py | 383 ++++++++++++++++++
 tests/data_engine/conftest.py                 |   3 +-
 tests/mocks/repo_api.py                       |   4 +
 14 files changed, 1454 insertions(+), 83 deletions(-)
 create mode 100644 dagshub/data_engine/util/__init__.py
 create mode 100644 dagshub/data_engine/util/not_implemented.py
 create mode 100644 tests/data_engine/annotation_import/res/audio_annotation.json
 create mode 100644 tests/data_engine/annotation_import/test_cvat_video.py
 create mode 100644 tests/data_engine/annotation_import/test_mot.py

diff --git a/dagshub/__init__.py b/dagshub/__init__.py
index 7f4d765d..b14e2564 100644
--- a/dagshub/__init__.py
+++ b/dagshub/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.6.5"
+__version__ = "0.6.7"
 from .logger import DAGsHubLogger, dagshub_logger
 from .common.init import init
 from .upload.wrapper import upload_files
diff --git a/dagshub/auth/token_auth.py b/dagshub/auth/token_auth.py
index 31ec32ac..7ba3a70a 100644
--- a/dagshub/auth/token_auth.py
+++ b/dagshub/auth/token_auth.py
@@ -37,7 +37,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]:
 
     def can_renegotiate(self):
         # Env var tokens cannot renegotiate, every other token type can
-        return not type(self._token) is EnvVarDagshubToken
+        return type(self._token) is not EnvVarDagshubToken
 
     def renegotiate_token(self):
         if not self._token_storage.is_valid_token(self._token, self._host):
diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py
index c19212de..2c19a550 100644
--- a/dagshub/data_engine/annotation/importer.py
+++ b/dagshub/data_engine/annotation/importer.py
@@ -1,13 +1,20 @@
 from difflib import SequenceMatcher
 from pathlib import Path, PurePosixPath, PurePath
 from tempfile import TemporaryDirectory
-from typing import TYPE_CHECKING, Literal, Optional, Union, Sequence, Mapping, Callable, List
-
-from dagshub_annotation_converter.converters.cvat import load_cvat_from_zip
+from typing import TYPE_CHECKING, Dict, Literal, Optional, Union, Sequence, Mapping, Callable, List
+
+from dagshub_annotation_converter.converters.cvat import (
+    load_cvat_from_fs,
+    load_cvat_from_zip,
+    load_cvat_from_xml_file,
+)
+from dagshub_annotation_converter.converters.mot import load_mot_from_dir, load_mot_from_fs, load_mot_from_zip
 from dagshub_annotation_converter.converters.yolo import load_yolo_from_fs
+from dagshub_annotation_converter.converters.label_studio_video import video_ir_to_ls_video_tasks
 from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask
 from dagshub_annotation_converter.formats.yolo import YoloContext
 from dagshub_annotation_converter.ir.image.annotations.base import IRAnnotationBase
+from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation
 
 from dagshub.common.api import UserAPI
 from dagshub.common.api.repo import PathNotFoundError
@@ -16,7 +23,7 @@
 if TYPE_CHECKING:
     from dagshub.data_engine.model.datasource import Datasource
 
-AnnotationType = Literal["yolo", "cvat"]
+AnnotationType = Literal["yolo", "cvat", "mot", "cvat_video"]
 AnnotationLocation = Literal["repo", "disk"]
 
 
@@ -57,6 +64,10 @@ def __init__(
                     'Add `yolo_type="bbox"|"segmentation"|pose"` to the arguments.'
                 )
 
+    @property
+    def is_video_format(self) -> bool:
+        return self.annotations_type in ("mot", "cvat_video")
+
     def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]:
         # Double check that the annotation file exists
         if self.load_from == "disk":
@@ -84,15 +95,128 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]:
                     annotation_type=self.additional_args["yolo_type"], meta_file=annotations_file
                 )
             elif self.annotations_type == "cvat":
-                annotation_dict = load_cvat_from_zip(annotations_file)
+                if annotations_file.is_dir():
+                    annotation_dict = self._flatten_cvat_fs_annotations(load_cvat_from_fs(annotations_file))
+                else:
+                    result = load_cvat_from_zip(annotations_file)
+                    if self._is_video_annotation_dict(result):
+                        annotation_dict = self._flatten_video_annotations(result)
+                    else:
+                        annotation_dict = result
+            elif self.annotations_type == "mot":
+                mot_kwargs = {}
+                if "image_width" in self.additional_args:
+                    mot_kwargs["image_width"] = self.additional_args["image_width"]
+                if "image_height" in self.additional_args:
+                    mot_kwargs["image_height"] = self.additional_args["image_height"]
+                if "video_name" in self.additional_args:
+                    mot_kwargs["video_file"] = self.additional_args["video_name"]
+                if annotations_file.is_dir():
+                    video_files = self.additional_args.get("video_files")
+                    raw_datasource_path = self.additional_args.get("datasource_path")
+                    if raw_datasource_path is None:
+                        raw_datasource_path = self.ds.source.source_prefix
+                    datasource_path = PurePosixPath(raw_datasource_path).as_posix().lstrip("/")
+                    if datasource_path == ".":
+                        datasource_path = ""
+                    mot_results = load_mot_from_fs(
+                        annotations_file,
+                        image_width=mot_kwargs.get("image_width"),
+                        image_height=mot_kwargs.get("image_height"),
+                        video_files=video_files,
+                        datasource_path=datasource_path,
+                    )
+                    annotation_dict = self._flatten_mot_fs_annotations(mot_results)
+                elif annotations_file.suffix == ".zip":
+                    video_anns, _ = load_mot_from_zip(annotations_file, **mot_kwargs)
+                    annotation_dict = self._flatten_video_annotations(video_anns)
+                else:
+                    video_anns, _ = load_mot_from_dir(annotations_file, **mot_kwargs)
+                    annotation_dict = self._flatten_video_annotations(video_anns)
+            elif self.annotations_type == "cvat_video":
+                cvat_kwargs = {}
+                if "image_width" in self.additional_args:
+                    cvat_kwargs["image_width"] = self.additional_args["image_width"]
+                if "image_height" in self.additional_args:
+                    cvat_kwargs["image_height"] = self.additional_args["image_height"]
+                if annotations_file.is_dir():
+                    raw = load_cvat_from_fs(annotations_file, **cvat_kwargs)
+                    annotation_dict = self._flatten_cvat_fs_annotations(raw)
+                elif annotations_file.suffix == ".zip":
+                    result = load_cvat_from_zip(annotations_file, **cvat_kwargs)
+                    if self._is_video_annotation_dict(result):
+                        annotation_dict = self._flatten_video_annotations(result)
+                    else:
+                        annotation_dict = result
+                else:
+                    result = load_cvat_from_xml_file(annotations_file, **cvat_kwargs)
+                    if self._is_video_annotation_dict(result):
+                        annotation_dict = self._flatten_video_annotations(result)
+                    else:
+                        annotation_dict = result
+            else:
+                raise ValueError(f"Unsupported annotation type: {self.annotations_type}")
 
             return annotation_dict
 
+    @staticmethod
+    def _is_video_annotation_dict(result) -> bool:
+        """Check if the result from a CVAT loader is video annotations (int keys) vs image annotations (str keys)."""
+        if not isinstance(result, dict) or len(result) == 0:
+            return False
+        first_key = next(iter(result.keys()))
+        return isinstance(first_key, int)
+
+    def _flatten_video_annotations(
+        self,
+        frame_annotations: Dict[int, Sequence[IRAnnotationBase]],
+    ) -> Dict[str, Sequence[IRAnnotationBase]]:
+        """Flatten frame-indexed video annotations into a single entry keyed by video name."""
+        video_name = self.additional_args.get("video_name", self.annotations_file.stem)
+        all_anns: List[IRAnnotationBase] = []
+        for frame_anns in frame_annotations.values():
+            all_anns.extend(frame_anns)
+        return {video_name: all_anns}
+
+    def _flatten_cvat_fs_annotations(
+        self, fs_annotations: Mapping[str, object]
+    ) -> Dict[str, Sequence[IRAnnotationBase]]:
+        flattened: Dict[str, List[IRAnnotationBase]] = {}
+        for rel_path, result in fs_annotations.items():
+            if not isinstance(result, dict):
+                continue
+            if self._is_video_annotation_dict(result):
+                video_key = Path(rel_path).stem
+                flattened.setdefault(video_key, [])
+                for frame_anns in result.values():
+                    flattened[video_key].extend(frame_anns)
+            else:
+                for filename, anns in result.items():
+                    flattened.setdefault(filename, [])
+                    flattened[filename].extend(anns)
+        return flattened
+
+    def _flatten_mot_fs_annotations(
+        self,
+        fs_annotations: Mapping[str, object],
+    ) -> Dict[str, Sequence[IRAnnotationBase]]:
+        flattened: Dict[str, List[IRAnnotationBase]] = {}
+        for rel_path, result in fs_annotations.items():
+            if not isinstance(result, tuple) or len(result) != 2:
+                continue
+            frame_annotations = result[0]
+            if not isinstance(frame_annotations, dict):
+                continue
+            sequence_name = Path(rel_path).stem if rel_path not in (".", "") else self.annotations_file.stem
+            flattened.setdefault(sequence_name, [])
+            for frame_anns in frame_annotations.values():
+                flattened[sequence_name].extend(frame_anns)
+        return flattened
+
     def download_annotations(self, dest_dir: Path):
         log_message("Downloading annotations from repository")
         repoApi = self.ds.source.repoApi
-        if self.annotations_type == "cvat":
-            # Download just the annotation file
+        if self.annotations_type in ("cvat", "cvat_video"):
             repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True)
         elif self.annotations_type == "yolo":
             # Download the dataset .yaml file and the images + annotations
@@ -104,6 +228,8 @@ def download_annotations(self, dest_dir: Path):
             # Download the annotation data
             assert context.path is not None
             repoApi.download(self.annotations_file.parent / context.path, dest_dir, keep_source_prefix=True)
+        elif self.annotations_type == "mot":
+            repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True)
 
     @staticmethod
     def determine_load_location(ds: "Datasource", annotations_path: Union[str, Path]) -> AnnotationLocation:
@@ -153,8 +279,12 @@ def remap_annotations(
                 )
                 continue
             for ann in anns:
-                assert ann.filename is not None
-                ann.filename = remap_func(ann.filename)
+                if ann.filename is not None:
+                    ann.filename = remap_func(ann.filename)
+                else:
+                    if not self.is_video_format:
+                        raise ValueError(f"Non-video annotation has no filename: {ann}")
+                    ann.filename = new_filename
             remapped[new_filename] = anns
 
         return remapped
@@ -288,6 +418,8 @@ def convert_to_ls_tasks(self, annotations: Mapping[str, Sequence[IRAnnotationBas
         """
         Converts the annotations to Label Studio tasks.
         """
+        if self.is_video_format:
+            return self._convert_to_ls_video_tasks(annotations)
         current_user_id = UserAPI.get_current_user(self.ds.source.repoApi.host).user_id
         tasks = {}
         for filename, anns in annotations.items():
@@ -296,3 +428,20 @@ def convert_to_ls_tasks(self, annotations: Mapping[str, Sequence[IRAnnotationBas
             t.add_ir_annotations(anns)
             tasks[filename] = t.model_dump_json().encode("utf-8")
         return tasks
+
+    def _convert_to_ls_video_tasks(
+        self, annotations: Mapping[str, Sequence[IRAnnotationBase]]
+    ) -> Mapping[str, bytes]:
+        """
+        Converts video annotations to Label Studio video tasks.
+        """
+        tasks = {}
+        for filename, anns in annotations.items():
+            video_anns = [a for a in anns if isinstance(a, IRVideoBBoxAnnotation)]
+            if not video_anns:
+                continue
+            video_path = self.ds.source.raw_path(filename)
+            ls_tasks = video_ir_to_ls_video_tasks(video_anns, video_path=video_path)
+            if ls_tasks:
+                tasks[filename] = ls_tasks[0].model_dump_json().encode("utf-8")
+        return tasks
diff --git a/dagshub/data_engine/annotation/metadata.py b/dagshub/data_engine/annotation/metadata.py
index 8b5d632c..8f9ca765 100644
--- a/dagshub/data_engine/annotation/metadata.py
+++ b/dagshub/data_engine/annotation/metadata.py
@@ -1,25 +1,32 @@
-from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Union, Literal, Dict
+from typing import TYPE_CHECKING, Dict, Literal, Optional, Sequence, Tuple, Union
 
-from dagshub_annotation_converter.formats.label_studio.task import parse_ls_task, LabelStudioTask
-from dagshub_annotation_converter.formats.yolo import import_lookup, import_yolo_result, YoloContext
+from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask, parse_ls_task
+from dagshub_annotation_converter.formats.yolo import YoloContext, import_lookup, import_yolo_result
 from dagshub_annotation_converter.formats.yolo.categories import Categories
 from dagshub_annotation_converter.ir.image import (
-    IRBBoxImageAnnotation,
     CoordinateStyle,
-    IRSegmentationImageAnnotation,
-    IRSegmentationPoint,
+    IRBBoxImageAnnotation,
     IRPoseImageAnnotation,
     IRPosePoint,
+    IRSegmentationImageAnnotation,
+    IRSegmentationPoint,
 )
 from dagshub_annotation_converter.ir.image.annotations.base import IRAnnotationBase, IRImageAnnotationBase
 
 from dagshub.common.api import UserAPI
 from dagshub.common.helpers import log_message
+from dagshub.data_engine.util.not_implemented import NotImplementedMeta
 
 if TYPE_CHECKING:
-    from dagshub.data_engine.model.datapoint import Datapoint
     import ultralytics.engine.results
 
+    from dagshub.data_engine.model.datapoint import Datapoint
+
+from dagshub_annotation_converter.formats.label_studio.videorectangle import VideoRectangleAnnotation
+from dagshub_annotation_converter.formats.label_studio.task import task_lookup as _task_lookup
+
+_task_lookup["videorectangle"] = VideoRectangleAnnotation
+
 
 class AnnotationMetaDict(dict):
     def __init__(self, annotation: "MetadataAnnotations", *args, **kwargs):
@@ -315,3 +322,44 @@ def _generate_yolo_context(annotation_type, categories: Dict[int, str]) -> YoloC
         for cat_id, cat_name in categories.items():
             cats.add(cat_name, cat_id)
         return YoloContext(annotation_type=annotation_type, categories=cats)
+
+
+class UnsupportedMetadataAnnotations(MetadataAnnotations, metaclass=NotImplementedMeta):
+    def __init__(
+        self,
+        datapoint: "Datapoint",
+        field: str,
+        original_value: bytes,
+    ):
+        super().__init__(datapoint, field, None, None, original_value)
+
+    @property
+    def value(self) -> Optional[bytes]:
+        return self._original_value
+
+    def to_ls_task(self) -> Optional[bytes]:
+        return self._original_value
+
+    def __repr__(self):
+        return "Label Studio annotations of unrecognized type"
+
+
+class ErrorMetadataAnnotations(MetadataAnnotations, metaclass=NotImplementedMeta):
+    def __init__(
+        self,
+        datapoint: "Datapoint",
+        field: str,
+        error_message: str,
+    ):
+        super().__init__(datapoint, field, None, None, None)
+        self._error_message = error_message
+
+    @property
+    def value(self) -> Optional[bytes]:
+        raise ValueError(self._error_message)
+
+    def to_ls_task(self) -> Optional[bytes]:
+        raise ValueError(self._error_message)
+
+    def __repr__(self):
+        return f"Label Studio annotation download error: {self._error_message}"
diff --git a/dagshub/data_engine/model/datapoint.py b/dagshub/data_engine/model/datapoint.py
index b7aa89b5..f0c31925 100644
--- a/dagshub/data_engine/model/datapoint.py
+++ b/dagshub/data_engine/model/datapoint.py
@@ -3,14 +3,14 @@
 from dataclasses import dataclass
 from os import PathLike
 from pathlib import Path
-from typing import Optional, Union, List, Dict, Any, Callable, TYPE_CHECKING, Literal, Sequence
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Literal, Optional, Sequence, Union
 
-from tenacity import Retrying, stop_after_attempt, wait_exponential, before_sleep_log, retry_if_exception_type
+from tenacity import Retrying, before_sleep_log, retry_if_exception_type, stop_after_attempt, wait_exponential
 
 from dagshub.common.download import download_files
 from dagshub.common.helpers import http_request
 from dagshub.data_engine.annotation import MetadataAnnotations
-from dagshub.data_engine.client.models import MetadataSelectFieldSchema, DatapointHistoryResult
+from dagshub.data_engine.client.models import DatapointHistoryResult, MetadataSelectFieldSchema
 from dagshub.data_engine.dtypes import MetadataFieldType
 
 if TYPE_CHECKING:
@@ -25,6 +25,23 @@
 logger = logging.getLogger(__name__)
 
 
+@dataclass(frozen=True)
+class BlobHashMetadata:
+    hash: str
+
+    def __str__(self) -> str:
+        return self.hash
+
+    def __repr__(self) -> str:
+        return f"BlobHashMetadata(hash={self.hash!r})"
+
+
+class BlobDownloadError(Exception):
+    def __init__(self, message):
+        super().__init__(message)
+        self.message = message
+
+
 @dataclass
 class Datapoint:
     datapoint_id: int
@@ -128,6 +145,7 @@ def from_gql_edge(edge: Dict, datasource: "Datasource", fields: List[MetadataSel
 
         float_fields = {f.name for f in fields if f.valueType == MetadataFieldType.FLOAT}
         date_fields = {f.name for f in fields if f.valueType == MetadataFieldType.DATETIME}
+        blob_fields = {f.name for f in fields if f.valueType == MetadataFieldType.BLOB}
 
         for meta_dict in edge["node"]["metadata"]:
             key = meta_dict["key"]
@@ -138,6 +156,8 @@ def from_gql_edge(edge: Dict, datasource: "Datasource", fields: List[MetadataSel
                 if key in date_fields:
                     timezone = meta_dict.get("timeZone")
                     value = _datetime_from_timestamp(value / 1000, timezone or "+00:00")
+                elif key in blob_fields and isinstance(value, str):
+                    value = BlobHashMetadata(value)
             res.metadata[key] = value
         return res
 
@@ -164,7 +184,7 @@ def get_blob(self, column: str, cache_on_disk=True, store_value=False) -> bytes:
         if type(current_value) is bytes:
             # Bytes - it's already there!
             return current_value
-        if isinstance(current_value, Path):
+        elif isinstance(current_value, Path):
             # Path - assume the path exists and is already downloaded,
             #   because it's unlikely that the user has set it themselves
             with current_value.open("rb") as f:
@@ -173,18 +193,16 @@ def get_blob(self, column: str, cache_on_disk=True, store_value=False) -> bytes:
                 self.metadata[column] = content
             return content
 
-        elif type(current_value) is str:
-            # String - This is probably the hash of the blob, get that from dagshub
-            blob_url = self.blob_url(current_value)
-            blob_location = self.blob_cache_location / current_value
+        elif isinstance(current_value, BlobHashMetadata):
+            # Blob hash metadata - download blob from DagsHub
+            blob_url = self.blob_url(current_value.hash)
+            blob_location = self.blob_cache_location / current_value.hash
 
             # Make sure that the cache location exists
             if cache_on_disk:
                 self.blob_cache_location.mkdir(parents=True, exist_ok=True)
 
             content = _get_blob(blob_url, blob_location, self.datasource.source.repoApi.auth, cache_on_disk, True)
-            if type(content) is str:
-                raise RuntimeError(f"Error while downloading blob: {content}")
 
             if store_value:
                 self.metadata[column] = content
@@ -192,6 +210,11 @@ def get_blob(self, column: str, cache_on_disk=True, store_value=False) -> bytes:
                 self.metadata[column] = blob_location
 
             return content
+        elif isinstance(current_value, MetadataAnnotations):
+            ls_task = current_value.to_ls_task()
+            if ls_task is None:
+                return b""
+            return ls_task
         else:
             raise ValueError(f"Can't extract blob metadata from value {current_value} of type {type(current_value)}")
 
@@ -274,10 +297,17 @@ def _get_blob(
     """
     Args:
         url: url to download the blob from
-        cache_path: where the cache for the blob is (laods from it if exists, stores there if it doesn't)
+        cache_path: where the cache for the blob is (loads from it if exists, stores there if it doesn't)
         auth: auth to use for getting the blob
         cache_on_disk: whether to store the downloaded blob on disk. If False we also turn off the cache checking
         return_blob: if True returns the blob of the downloaded data, if False returns the path to the file with it
+        path_format: if return_blob is False, controls path representation. "path" returns Path, "str" returns str
+
+    Returns:
+        bytes, Path, or str path on success.
+
+    Raises:
+        BlobDownloadError on download failure.
     """
     if url is None:
         return None
@@ -313,7 +343,7 @@ def get():
             with attempt:
                 content = get()
     except Exception as e:
-        return f"Error while downloading binary blob: {e}"
+        raise BlobDownloadError(str(e)) from e
 
     if cache_on_disk:
         with cache_path.open("wb") as f:
diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
index 6c326eab..f30643a9 100644
--- a/dagshub/data_engine/model/query_result.py
+++ b/dagshub/data_engine/model/query_result.py
@@ -15,10 +15,14 @@
 import dacite
 import dagshub_annotation_converter.converters.yolo
 import rich.progress
+from dagshub_annotation_converter.converters.cvat import export_cvat_video_to_zip, export_cvat_videos_to_zips
+from dagshub_annotation_converter.converters.mot import export_mot_sequences_to_dirs, export_mot_to_dir
+from dagshub_annotation_converter.formats.mot import MOTContext
 from dagshub_annotation_converter.formats.yolo import YoloContext
 from dagshub_annotation_converter.formats.yolo.categories import Categories
 from dagshub_annotation_converter.formats.yolo.common import ir_mapping
 from dagshub_annotation_converter.ir.image import IRImageAnnotationBase
+from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation
 from pydantic import ValidationError
 
 from dagshub.auth import get_token
@@ -30,6 +34,7 @@
 from dagshub.common.rich_util import get_rich_progress
 from dagshub.common.util import lazy_load, multi_urljoin
 from dagshub.data_engine.annotation import MetadataAnnotations
+from dagshub.data_engine.annotation.metadata import ErrorMetadataAnnotations, UnsupportedMetadataAnnotations
 from dagshub.data_engine.annotation.voxel_conversion import (
     add_ls_annotations,
     add_voxel_annotations,
@@ -37,7 +42,13 @@
 from dagshub.data_engine.client.loaders.base import DagsHubDataset
 from dagshub.data_engine.client.models import DatasourceType, MetadataSelectFieldSchema
 from dagshub.data_engine.dtypes import MetadataFieldType
-from dagshub.data_engine.model.datapoint import Datapoint, _generated_fields, _get_blob
+from dagshub.data_engine.model.datapoint import (
+    BlobDownloadError,
+    BlobHashMetadata,
+    Datapoint,
+    _generated_fields,
+    _get_blob,
+)
 from dagshub.data_engine.model.schema_util import dacite_config
 from dagshub.data_engine.voxel_plugin_server.utils import set_voxel_envvars
 
@@ -389,10 +400,9 @@ def get_blob_fields(
         for dp in self.entries:
             for fld in fields:
                 field_value = dp.metadata.get(fld)
-                # If field_value is a blob or a path, then ignore, means it's already been downloaded
-                if not isinstance(field_value, str):
+                if not isinstance(field_value, BlobHashMetadata):
                     continue
-                download_task = (dp, fld, dp.blob_url(field_value), dp.blob_cache_location / field_value)
+                download_task = (dp, fld, dp.blob_url(field_value.hash), dp.blob_cache_location / field_value.hash)
                 to_download.append(download_task)
 
         progress = get_rich_progress(rich.progress.MofNCompleteColumn())
@@ -402,8 +412,6 @@ def get_blob_fields(
 
         def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path):
             blob_or_path = _get_blob(url, blob_path, auth, cache_on_disk, load_into_memory, path_format)
-            if isinstance(blob_or_path, str) and path_format != "str":
-                logger.warning(f"Error while downloading blob for field {field} in datapoint {dp.path}:{blob_or_path}")
             dp.metadata[field] = blob_or_path
 
         with progress:
@@ -415,7 +423,7 @@ def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path):
                         logger.warning(f"Got exception {type(exc)} while downloading blob: {exc}")
                     progress.update(task, advance=1)
 
-        self._convert_annotation_fields(*fields, load_into_memory=load_into_memory)
+        self._convert_annotation_fields(*fields)
 
         # Convert any downloaded document fields
         document_fields = [f for f in fields if f in self.document_fields]
@@ -424,49 +432,63 @@ def _get_blob_fn(dp: Datapoint, field: str, url: str, blob_path: Path):
         if document_fields:
             for dp in self:
                 for fld in document_fields:
-                    if fld in dp.metadata:
-                        # Override the load_into_memory flag, because we need the contents
-                        if not load_into_memory:
-                            dp.metadata[fld] = Path(dp.metadata[fld]).read_bytes()
-                        dp.metadata[fld] = dp.metadata[fld].decode("utf-8")
+                    if fld not in dp.metadata:
+                        continue
+                    try:
+                        content = dp.get_blob(fld)
+                        dp.metadata[fld] = content.decode("utf-8")
+                    except BlobDownloadError as e:
+                        logger.warning(f"Failed to download document field '{fld}' for datapoint '{dp.path}': {e}")
 
         return self
 
-    def _convert_annotation_fields(self, *fields, load_into_memory):
+    def _convert_annotation_fields(self, *fields):
         # Convert any downloaded annotation column
         annotation_fields = [f for f in fields if f in self.annotation_fields]
+        if not annotation_fields:
+            return
 
+        # List of datapoints with annotations that couldn't be parsed
         bad_annotations = defaultdict(list)
 
-        if annotation_fields:
-            # Convert them
-            for dp in self:
-                for fld in annotation_fields:
-                    if fld in dp.metadata:
-                        # Already loaded - skip
-                        if isinstance(dp.metadata[fld], MetadataAnnotations):
-                            continue
-                        # Override the load_into_memory flag, because we need the contents
-                        if not load_into_memory:
-                            dp.metadata[fld] = Path(dp.metadata[fld]).read_bytes()
-                        try:
-                            dp.metadata[fld] = MetadataAnnotations.from_ls_task(
-                                datapoint=dp, field=fld, ls_task=dp.metadata[fld]
-                            )
-                        except ValidationError:
-                            bad_annotations[fld].append(dp.path)
-                    else:
-                        dp.metadata[fld] = MetadataAnnotations(datapoint=dp, field=fld)
+        for dp in self:
+            for fld in annotation_fields:
+                metadata_value = dp.metadata.get(fld)
+                # No value - create empty annotation container
+                if metadata_value is None:
+                    dp.metadata[fld] = MetadataAnnotations(datapoint=dp, field=fld)
+                    continue
+                # Already loaded - skip
+                elif isinstance(metadata_value, MetadataAnnotations):
+                    continue
+                # Parse annotation from the content of the field
+                else:
+                    try:
+                        annotation_content = dp.get_blob(fld)
+                        dp.metadata[fld] = MetadataAnnotations.from_ls_task(
+                            datapoint=dp, field=fld, ls_task=annotation_content
+                        )
+                    except BlobDownloadError as e:
+                        dp.metadata[fld] = ErrorMetadataAnnotations(datapoint=dp, field=fld, error_message=e.message)
+                        bad_annotations[fld].append(dp.path)
+                    except ValidationError:
+                        dp.metadata[fld] = UnsupportedMetadataAnnotations(
+                            datapoint=dp, field=fld, original_value=annotation_content
+                        )
+                        bad_annotations[fld].append(dp.path)
 
         if bad_annotations:
             log_message(
-                "Warning: The following datapoints had invalid annotations, "
-                "any annotation-related operations will not work on these:"
+                "Warning: The following datapoints had unsupported or invalid annotations, "
+                "convenience functions like `add_bounding_box` won't work on these:"
             )
             err_msg = ""
             for fld, dps in bad_annotations.items():
-                err_msg += f'Field "{fld}" in datapoints:\n\t'
-                err_msg += "\n\t".join(dps)
+                err_msg += f'\nField "{fld}" in datapoints:\n\t'
+                if len(dps) > 10:
+                    err_msg += "\n\t".join(dps[:10]) + f"\n\t... and {len(dps) - 10} more"
+                else:
+                    err_msg += "\n\t".join(dps)
             log_message(err_msg)
 
     def download_binary_columns(
@@ -760,6 +782,44 @@ def _get_all_annotations(self, annotation_field: str) -> List[IRImageAnnotationB
                 annotations.extend(dp.metadata[annotation_field].annotations)
         return annotations
 
+    def _get_all_video_annotations(self, annotation_field: str) -> List[IRVideoBBoxAnnotation]:
+        all_anns = self._get_all_annotations(annotation_field)
+        return [a for a in all_anns if isinstance(a, IRVideoBBoxAnnotation)]
+
+    def _prepare_video_file_for_export(self, local_root: Path, repo_relative_filename: str) -> Optional[Path]:
+        ann_path = Path(repo_relative_filename)
+        primary = local_root / ann_path
+        if primary.exists():
+            return primary
+        source_prefix = Path(self.datasource.source.source_prefix)
+        with_prefix = local_root / source_prefix / ann_path
+        if with_prefix.exists():
+            return with_prefix
+        return None
+
+    @staticmethod
+    def _get_annotation_filename(ann: IRVideoBBoxAnnotation) -> Optional[str]:
+        filename = ann.filename
+        if filename is None:
+            return None
+        if isinstance(filename, (list, tuple)):
+            if len(filename) == 0:
+                return None
+            if len(filename) > 1:
+                raise ValueError(f"Annotation has multiple filenames: {filename}")
+            filename = filename[0]
+        return str(filename)
+
+    def _resolve_annotation_field(self, annotation_field: Optional[str]) -> str:
+        if annotation_field is not None:
+            return annotation_field
+        annotation_fields = sorted([f.name for f in self.fields if f.is_annotation()])
+        if len(annotation_fields) == 0:
+            raise ValueError("No annotation fields found in the datasource")
+        annotation_field = annotation_fields[0]
+        log_message(f"Using annotations from field {annotation_field}")
+        return annotation_field
+
     def export_as_yolo(
         self,
         download_dir: Optional[Union[str, Path]] = None,
@@ -785,12 +845,7 @@ def export_as_yolo(
         Returns:
             The path to the YAML file with the metadata. Pass this path to ``YOLO.train()`` to train a model.
         """
-        if annotation_field is None:
-            annotation_fields = sorted([f.name for f in self.fields if f.is_annotation()])
-            if len(annotation_fields) == 0:
-                raise ValueError("No annotation fields found in the datasource")
-            annotation_field = annotation_fields[0]
-            log_message(f"Using annotations from field {annotation_field}")
+        annotation_field = self._resolve_annotation_field(annotation_field)
 
         if download_dir is None:
             download_dir = Path("dagshub_export")
@@ -843,6 +898,218 @@ def export_as_yolo(
         log_message(f"Done! Saved YOLO Dataset, YAML file is at {yaml_path.absolute()}")
         return yaml_path
 
+    def export_as_mot(
+        self,
+        download_dir: Optional[Union[str, Path]] = None,
+        annotation_field: Optional[str] = None,
+        image_width: Optional[int] = None,
+        image_height: Optional[int] = None,
+    ) -> Path:
+        """
+        Exports video annotations in MOT (Multiple Object Tracking) format.
+
+        The output follows the MOT Challenge directory structure::
+
+            output_dir/
+              gt/
+                gt.txt
+                labels.txt
+              seqinfo.ini
+
+        Args:
+            download_dir: Where to export. Defaults to ``./dagshub_export``
+            annotation_field: Field with the annotations. If None, uses the first alphabetical annotation field.
+            image_width: Frame width. If None, inferred from annotations.
+            image_height: Frame height. If None, inferred from annotations.
+
+        Returns:
+            Path to the exported MOT directory.
+        """
+        annotation_field = self._resolve_annotation_field(annotation_field)
+
+        if download_dir is None:
+            download_dir = Path("dagshub_export")
+        download_dir = Path(download_dir)
+        labels_dir = download_dir / "labels"
+        labels_dir.mkdir(parents=True, exist_ok=True)
+
+        video_annotations = self._get_all_video_annotations(annotation_field)
+        if not video_annotations:
+            raise RuntimeError("No video annotations found to export")
+
+        source_names = sorted(
+            {
+                Path(ann_filename).name
+                for ann_filename in (self._get_annotation_filename(ann) for ann in video_annotations)
+                if ann_filename
+            }
+        )
+        has_multiple_sources = len(source_names) > 1
+
+        local_download_root: Optional[Path] = None
+        if image_width is None or image_height is None:
+            log_message("Missing video dimensions in annotations, downloading videos for converter-side probing...")
+            local_download_root = self.download_files(download_dir / "data", keep_source_prefix=True)
+
+        log_message("Exporting MOT annotations...")
+        if has_multiple_sources:
+            video_files: Optional[Dict[str, Union[str, Path]]] = None
+            if local_download_root is not None:
+                video_files = {}
+                for ann_filename in {
+                    self._get_annotation_filename(ann)
+                    for ann in video_annotations
+                    if self._get_annotation_filename(ann)
+                }:
+                    assert ann_filename is not None
+                    sequence_name = Path(ann_filename).stem
+                    local_video = self._prepare_video_file_for_export(local_download_root, ann_filename)
+                    if local_video is None:
+                        raise FileNotFoundError(
+                            f"Could not find local downloaded video file for '{ann_filename}' under "
+                            f"'{local_download_root}'."
+                        )
+                    video_files[sequence_name] = local_video
+
+            context = MOTContext()
+            context.image_width = image_width
+            context.image_height = image_height
+            export_mot_sequences_to_dirs(video_annotations, context, labels_dir, video_files=video_files)
+            result_path = labels_dir
+        else:
+            video_file: Optional[Path] = None
+            if local_download_root is not None:
+                ref_filename = next((self._get_annotation_filename(a) for a in video_annotations), None)
+                if ref_filename is None:
+                    raise FileNotFoundError("Missing annotation filename for MOT export.")
+                video_file = self._prepare_video_file_for_export(local_download_root, ref_filename)
+                if video_file is None:
+                    raise FileNotFoundError(
+                        f"Could not find local downloaded video file for '{ref_filename}' "
+                        f"under '{local_download_root}'."
+                    )
+
+            context = MOTContext()
+            context.image_width = image_width
+            context.image_height = image_height
+            single_name = Path(source_names[0]).stem if source_names else "sequence"
+            output_dir = labels_dir / single_name
+            result_path = export_mot_to_dir(video_annotations, context, output_dir, video_file=video_file)
+
+        log_message(f"Done! Saved MOT annotations to {result_path.absolute()}")
+        return result_path
+
+    def export_as_cvat_video(
+        self,
+        download_dir: Optional[Union[str, Path]] = None,
+        annotation_field: Optional[str] = None,
+        video_name: str = "video.mp4",
+        image_width: Optional[int] = None,
+        image_height: Optional[int] = None,
+    ) -> Path:
+        """
+        Exports video annotations in CVAT video ZIP format.
+
+        Args:
+            download_dir: Where to export. Defaults to ``./dagshub_export``
+            annotation_field: Field with the annotations. If None, uses the first alphabetical annotation field.
+            video_name: Name of the source video to embed in the XML metadata.
+            image_width: Frame width. If None, inferred from annotations.
+            image_height: Frame height. If None, inferred from annotations.
+
+        Returns:
+            Path to the exported CVAT video ZIP file for single-video exports,
+            or output directory for multi-video exports.
+        """
+        annotation_field = self._resolve_annotation_field(annotation_field)
+
+        if download_dir is None:
+            download_dir = Path("dagshub_export")
+        download_dir = Path(download_dir)
+
+        video_annotations = self._get_all_video_annotations(annotation_field)
+        if not video_annotations:
+            raise RuntimeError("No video annotations found to export")
+
+        source_names = sorted(
+            {
+                Path(ann_filename).name
+                for ann_filename in (self._get_annotation_filename(ann) for ann in video_annotations)
+                if ann_filename
+            }
+        )
+        has_multiple_sources = len(source_names) > 1
+
+        log_message("Exporting CVAT video annotations...")
+        local_download_root: Optional[Path] = None
+        if not has_multiple_sources and (image_width is None or image_height is None):
+            log_message("Missing video dimensions in annotations, downloading videos for converter-side probing...")
+            local_download_root = self.download_files(download_dir / "data", keep_source_prefix=True)
+
+        if has_multiple_sources:
+            video_files: Optional[Dict[str, Union[str, Path]]] = None
+            if image_width is None or image_height is None:
+                log_message("Missing video dimensions in annotations, downloading videos for converter-side probing...")
+                local_download_root = self.download_files(download_dir / "data", keep_source_prefix=True)
+                video_files = {}
+                for ann_filename in {
+                    self._get_annotation_filename(ann)
+                    for ann in video_annotations
+                    if self._get_annotation_filename(ann)
+                }:
+                    assert ann_filename is not None
+                    local_video = self._prepare_video_file_for_export(local_download_root, ann_filename)
+                    if local_video is None:
+                        raise FileNotFoundError(
+                            f"Could not find local downloaded video file for '{ann_filename}' "
+                            f"under '{local_download_root}'."
+                        )
+                    ann_path = Path(ann_filename)
+                    video_files[ann_filename] = local_video
+                    video_files[ann_path.name] = local_video
+                    video_files[ann_path.stem] = local_video
+
+            output_dir = download_dir / "labels"
+            output_dir.mkdir(parents=True, exist_ok=True)
+            export_cvat_videos_to_zips(
+                video_annotations,
+                output_dir,
+                image_width=image_width,
+                image_height=image_height,
+                video_files=video_files if video_files else None,
+            )
+            result_path = output_dir
+        else:
+            single_video_file: Optional[Path] = None
+            if local_download_root is not None:
+                ref_filename = next((self._get_annotation_filename(a) for a in video_annotations), None)
+                if ref_filename is None:
+                    raise FileNotFoundError("Missing annotation filename for single-video CVAT export.")
+                single_video_file = self._prepare_video_file_for_export(local_download_root, ref_filename)
+                if single_video_file is None:
+                    raise FileNotFoundError(
+                        f"Could not find local downloaded video file for '{ref_filename}' "
+                        f"under '{local_download_root}'."
+                    )
+
+            labels_dir = download_dir / "labels"
+            labels_dir.mkdir(parents=True, exist_ok=True)
+            if source_names:
+                output_name = f"{Path(source_names[0]).name}.zip"
+            else:
+                output_name = "annotations.zip"
+            output_path = labels_dir / output_name
+            result_path = export_cvat_video_to_zip(
+                video_annotations,
+                output_path,
+                video_name=video_name,
+                image_width=image_width,
+                image_height=image_height,
+                video_file=single_video_file,
+            )
+        log_message(f"Done! Saved CVAT video annotations to {result_path.absolute()}")
+        return result_path
+
     def to_voxel51_dataset(self, **kwargs) -> "fo.Dataset":
         """
         Creates a voxel51 dataset that can be used with\
diff --git a/dagshub/data_engine/util/__init__.py b/dagshub/data_engine/util/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/dagshub/data_engine/util/not_implemented.py b/dagshub/data_engine/util/not_implemented.py
new file mode 100644
index 00000000..d9a81285
--- /dev/null
+++ b/dagshub/data_engine/util/not_implemented.py
@@ -0,0 +1,48 @@
+class NotImplementedMeta(type):
+    """
+    A metaclass that replaces all parent class methods and properties that aren't overridden in the subclass
+    with NotImplementedError.
+    """
+
+    def __new__(mcs, name, bases, namespace):
+        # Get all attributes from base classes
+        for base in bases:
+            for attr_name in dir(base):
+                if attr_name.startswith("_"):
+                    continue
+
+                # Skip if already defined in subclass
+                if attr_name in namespace:
+                    continue
+
+                base_attr = getattr(base, attr_name)
+
+                # Handle properties
+                if isinstance(base_attr, property):
+                    # Create a property that raises NotImplementedError
+                    def make_not_implemented_property(prop_name):
+                        def getter(self):
+                            raise NotImplementedError(f"Property '{prop_name}' not implemented")
+
+                        def setter(self, value):
+                            raise NotImplementedError(f"Property '{prop_name}' not implemented")
+
+                        def deleter(self):
+                            raise NotImplementedError(f"Property '{prop_name}' not implemented")
+
+                        return property(getter, setter, deleter)
+
+                    namespace[attr_name] = make_not_implemented_property(attr_name)
+
+                # Handle regular methods
+                elif callable(base_attr):
+
+                    def make_not_implemented(method_name):
+                        def not_impl(self, *args, **kwargs):
+                            raise NotImplementedError(f"Method '{method_name}' not implemented")
+
+                        return not_impl
+
+                    namespace[attr_name] = make_not_implemented(attr_name)
+
+        return super().__new__(mcs, name, bases, namespace)
diff --git a/tests/data_engine/annotation_import/res/audio_annotation.json b/tests/data_engine/annotation_import/res/audio_annotation.json
new file mode 100644
index 00000000..adc356e2
--- /dev/null
+++ b/tests/data_engine/annotation_import/res/audio_annotation.json
@@ -0,0 +1,82 @@
+{
+  "id": 41,
+  "data": {
+    "audio": "https://example.com/some-non-existent-file.mp3",
+    "media type": "audio/mpeg",
+    "size": 111699
+  },
+  "meta": {
+    "datapoint_id": 12345678,
+    "datasource_id": 6565
+  },
+  "created_at": "2025-12-20T13:44:02.316027Z",
+  "updated_at": "2026-01-26T15:00:13.046967Z",
+  "is_labeled": true,
+  "project": 1,
+  "annotations": [
+    {
+      "completed_by": 1,
+      "result": [
+        {
+          "type": "choices",
+          "value": {
+            "choices": [
+              "true"
+            ]
+          },
+          "origin": "manual",
+          "to_name": "audio",
+          "from_name": "further_utterance",
+          "id": "deadbeef1"
+        },
+        {
+          "type": "rating",
+          "value": {
+            "rating": 1
+          },
+          "origin": "manual",
+          "to_name": "audio",
+          "from_name": "difficulty",
+          "id": "deadbeef1"
+        },
+        {
+          "type": "textarea",
+          "value": {
+            "text": [
+              "kirill@dagshub.com"
+            ]
+          },
+          "origin": "manual",
+          "to_name": "audio",
+          "from_name": "email_address",
+          "id": "deadbeef1"
+        },
+        {
+          "to_name": "audio",
+          "from_name": "first_name",
+          "id": "Qzu1dR2RQ8",
+          "type": "textarea",
+          "value": {
+            "text": [
+              "Kirill"
+            ]
+          },
+          "origin": "manual"
+        },
+        {
+          "type": "textarea",
+          "value": {
+            "text": [
+              "Bolashev"
+            ]
+          },
+          "origin": "manual",
+          "to_name": "audio",
+          "from_name": "last_name",
+          "id": "deadbeef1"
+        }
+      ],
+      "ground_truth": false
+    }
+  ]
+}
diff --git a/tests/data_engine/annotation_import/test_annotation_parsing.py b/tests/data_engine/annotation_import/test_annotation_parsing.py
index 66840ecb..c04b0d51 100644
--- a/tests/data_engine/annotation_import/test_annotation_parsing.py
+++ b/tests/data_engine/annotation_import/test_annotation_parsing.py
@@ -1,19 +1,24 @@
 import json
+from os import PathLike
 from pathlib import Path
+from typing import Union
 from unittest.mock import MagicMock
 
 import pytest
 from dagshub_annotation_converter.ir.image import IRSegmentationImageAnnotation
+from pytest import MonkeyPatch
 
 from dagshub.data_engine.annotation import MetadataAnnotations
+from dagshub.data_engine.annotation.metadata import ErrorMetadataAnnotations, UnsupportedMetadataAnnotations
 from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags
-from dagshub.data_engine.model import query_result
+from dagshub.data_engine.model import datapoint, query_result
+from dagshub.data_engine.model.datapoint import BlobDownloadError, BlobHashMetadata
 from dagshub.data_engine.model.datasource import Datasource
+from dagshub.data_engine.model.query_result import QueryResult
 from tests.data_engine.util import add_metadata_field
 
 _annotation_field_name = "annotation"
 _dp_path = "data/sample_datapoint.jpg"
-_annotation_hash = "annotation1"  # Corresponds to a resource JSON
 _res_folder = Path(__file__).parent / "res"
 
 
@@ -51,17 +56,24 @@ def mock_annotation_query_result(
     return query_result.QueryResult.from_gql_query(data_dict, ds)
 
 
-def mock_get_blob(*args, **kwargs) -> bytes:
+def mock_get_blob(*args, **kwargs) -> Union[bytes, PathLike]:
     download_url: str = args[0]
     blob_hash = download_url.split("/")[-1]
+    load_into_memory = args[4]
     blob_path = _res_folder / f"{blob_hash}.json"
-    if not blob_path.exists():
-        raise FileNotFoundError(f"Mock blob file not found: {blob_path}")
-    return blob_path.read_bytes()
 
+    try:
+        if not blob_path.exists():
+            raise FileNotFoundError(f"Blob with hash {blob_hash} not found in res folder")
+        if load_into_memory:
+            return blob_path.read_bytes()
+        else:
+            return blob_path
+    except Exception as e:
+        raise BlobDownloadError(str(e)) from e
 
-@pytest.fixture
-def ds_with_document_annotation(ds, monkeypatch):
+
+def _ds_with_annotation(ds: "Datasource", monkeypatch: MonkeyPatch, annotation_hash: str):
     add_metadata_field(
         ds,
         _annotation_field_name,
@@ -70,18 +82,89 @@ def ds_with_document_annotation(ds, monkeypatch):
     )
 
     ds.source.client.get_datapoints = MagicMock(
-        return_value=mock_annotation_query_result(ds, _annotation_field_name, _dp_path, _annotation_hash)
+        return_value=mock_annotation_query_result(ds, _annotation_field_name, _dp_path, annotation_hash)
     )
 
     monkeypatch.setattr(query_result, "_get_blob", mock_get_blob)
+    monkeypatch.setattr(datapoint, "_get_blob", mock_get_blob)
 
-    yield ds
+    return ds
+
+
+@pytest.fixture
+def ds_with_document_annotation(ds, monkeypatch):
+    yield _ds_with_annotation(ds, monkeypatch, "annotation1")
 
 
 def test_annotation_with_document_are_parsed_as_annotation(ds_with_document_annotation):
     qr = ds_with_document_annotation.all()
+    _test_annotation(qr)
+
+
+def test_double_loading_annotation_works(ds_with_document_annotation):
+    qr = ds_with_document_annotation.all()
+    qr.get_blob_fields(_annotation_field_name)
+    _test_annotation(qr)
+
+
+def _test_annotation(qr: QueryResult):
     annotation: MetadataAnnotations = qr[0].metadata[_annotation_field_name]
     assert isinstance(annotation, MetadataAnnotations)
     # Check that the annotation got parsed correctly, the JSON should have one segmentation annotation in it
     assert len(annotation.annotations) == 1
     assert isinstance(annotation.annotations[0], IRSegmentationImageAnnotation)
+
+
+@pytest.fixture
+def ds_with_unsupported_annotation(ds, monkeypatch):
+    yield _ds_with_annotation(ds, monkeypatch, "audio_annotation")
+
+
+def test_handling_unsupported_annotation(ds_with_unsupported_annotation):
+    qr = ds_with_unsupported_annotation.all()
+
+    annotation: MetadataAnnotations = qr[0].metadata[_annotation_field_name]
+
+    assert isinstance(annotation, UnsupportedMetadataAnnotations)
+    # Unsupported annotation is still a subclass of regular annotation
+    # This is crucial for logic that checks if annotation metadata was parsed already,
+    # so if this starts failing, that logic will need to be changed too
+    assert isinstance(annotation, MetadataAnnotations)
+
+    with pytest.raises(NotImplementedError):
+        annotation.add_image_bbox("cat", 0, 0, 10, 10, 1920, 1080)
+
+    expected_content = (_res_folder / "audio_annotation.json").read_bytes()
+    assert annotation.value == expected_content
+    assert annotation.to_ls_task() == expected_content
+
+
+@pytest.fixture
+def ds_with_nonexistent_annotation(ds, monkeypatch):
+    yield _ds_with_annotation(ds, monkeypatch, "nonexistent_annotation")
+
+
+def test_nonexistent_annotation(ds_with_nonexistent_annotation):
+    qr = ds_with_nonexistent_annotation.all(load_documents=False, load_annotations=False)
+    qr.get_annotations()
+
+    annotation: MetadataAnnotations = qr[0].metadata[_annotation_field_name]
+
+    assert isinstance(annotation, ErrorMetadataAnnotations)
+    # Error annotation is still a subclass of regular annotation
+    # This is crucial for logic that checks if annotation metadata was parsed already,
+    # so if this starts failing, that logic will need to be changed too
+    assert isinstance(annotation, MetadataAnnotations)
+
+    with pytest.raises(NotImplementedError):
+        annotation.add_image_bbox("cat", 0, 0, 10, 10, 1920, 1080)
+
+    with pytest.raises(ValueError, match="Blob with hash nonexistent_annotation not found in res folder"):
+        _ = annotation.value
+    with pytest.raises(ValueError, match="Blob with hash nonexistent_annotation not found in res folder"):
+        annotation.to_ls_task()
+
+
+def test_blob_metadata_is_wrapped_from_backend(ds_with_document_annotation):
+    qr = ds_with_document_annotation.all(load_documents=False, load_annotations=False)
+    assert isinstance(qr[0].metadata[_annotation_field_name], BlobHashMetadata)
diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py
new file mode 100644
index 00000000..3676b82d
--- /dev/null
+++ b/tests/data_engine/annotation_import/test_cvat_video.py
@@ -0,0 +1,276 @@
+import datetime
+import zipfile
+from pathlib import PurePosixPath
+from unittest.mock import patch, PropertyMock
+
+import pytest
+from dagshub_annotation_converter.converters.cvat import export_cvat_video_to_xml_string
+from dagshub_annotation_converter.ir.image import IRBBoxImageAnnotation, CoordinateStyle
+from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation
+
+from dagshub.data_engine.annotation.importer import AnnotationImporter
+from dagshub.data_engine.annotation.metadata import MetadataAnnotations
+from dagshub.data_engine.client.models import MetadataSelectFieldSchema
+from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags
+from dagshub.data_engine.model.datapoint import Datapoint
+from dagshub.data_engine.model.query_result import QueryResult
+
+
+@pytest.fixture(autouse=True)
+def mock_source_prefix(ds):
+    with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()):
+        yield
+
+
+# --- import ---
+
+
+def test_import_cvat_video(ds, tmp_path):
+    xml_file = tmp_path / "annotations.xml"
+    xml_file.write_bytes(_make_cvat_video_xml())
+
+    importer = AnnotationImporter(ds, "cvat_video", xml_file, load_from="disk")
+    result = importer.import_annotations()
+
+    assert len(result) == 1
+    anns = list(result.values())[0]
+    assert len(anns) == 2
+    assert all(isinstance(a, IRVideoBBoxAnnotation) for a in anns)
+
+
+# --- _get_all_video_annotations ---
+
+
+def test_get_all_video_filters(ds):
+    image_ann = IRBBoxImageAnnotation(
+        filename="test.jpg", categories={"cat": 1.0},
+        top=0.1, left=0.1, width=0.2, height=0.2,
+        image_width=640, image_height=480,
+        coordinate_style=CoordinateStyle.NORMALIZED,
+    )
+    video_ann = _make_video_bbox()
+
+    dp = Datapoint(datasource=ds, path="dp_0", datapoint_id=0, metadata={})
+    dp.metadata["ann"] = MetadataAnnotations(
+        datapoint=dp, field="ann", annotations=[image_ann, video_ann]
+    )
+
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    result = qr._get_all_video_annotations("ann")
+    assert len(result) == 1
+    assert isinstance(result[0], IRVideoBBoxAnnotation)
+
+
+def test_get_all_video_empty(ds):
+    dp = Datapoint(datasource=ds, path="dp_0", datapoint_id=0, metadata={})
+    dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[])
+
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    assert qr._get_all_video_annotations("ann") == []
+
+
+def test_get_all_video_aggregates_across_datapoints(ds):
+    dps = []
+    for i in range(3):
+        dp = Datapoint(datasource=ds, path=f"dp_{i}", datapoint_id=i, metadata={})
+        dp.metadata["ann"] = MetadataAnnotations(
+            datapoint=dp, field="ann", annotations=[_make_video_bbox(frame=i)]
+        )
+        dps.append(dp)
+
+    qr = _make_qr(ds, dps, ann_field="ann")
+    assert len(qr._get_all_video_annotations("ann")) == 3
+
+
+# --- export_as_cvat_video ---
+
+
+def test_export_cvat_video_xml(ds, tmp_path, monkeypatch):
+    qr, _ = _make_video_qr(ds)
+
+    def _mock_download_files(self, target_dir, *args, **kwargs):
+        (target_dir / "video.mp4").parent.mkdir(parents=True, exist_ok=True)
+        (target_dir / "video.mp4").write_bytes(b"fake")
+        return target_dir
+
+    monkeypatch.setattr(QueryResult, "download_files", _mock_download_files)
+    result = qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann")
+
+    assert result.exists()
+    assert result == tmp_path / "labels" / "video.mp4.zip"
+    with zipfile.ZipFile(result, "r") as z:
+        content = z.read("annotations.xml").decode("utf-8")
+    assert "<track" in content
+    assert "<box" in content
+
+
+def test_export_cvat_video_no_annotations_raises(ds, tmp_path):
+    dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
+    dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[])
+
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    with pytest.raises(RuntimeError, match="No video annotations"):
+        qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann")
+
+
+def test_export_cvat_video_custom_name(ds, tmp_path, monkeypatch):
+    qr, _ = _make_video_qr(ds)
+
+    def _mock_download_files(self, target_dir, *args, **kwargs):
+        (target_dir / "video.mp4").parent.mkdir(parents=True, exist_ok=True)
+        (target_dir / "video.mp4").write_bytes(b"fake")
+        return target_dir
+
+    monkeypatch.setattr(QueryResult, "download_files", _mock_download_files)
+    result = qr.export_as_cvat_video(
+        download_dir=tmp_path, annotation_field="ann", video_name="my_clip.avi"
+    )
+
+    with zipfile.ZipFile(result, "r") as z:
+        content = z.read("annotations.xml").decode("utf-8")
+    assert "my_clip.avi" in content
+
+
+def test_export_cvat_video_image_only_raises(ds, tmp_path):
+    dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
+    image_ann = IRBBoxImageAnnotation(
+        filename="test.jpg", categories={"cat": 1.0},
+        top=0.1, left=0.1, width=0.2, height=0.2,
+        image_width=640, image_height=480,
+        coordinate_style=CoordinateStyle.NORMALIZED,
+    )
+    dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[image_ann])
+
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    with pytest.raises(RuntimeError, match="No video annotations"):
+        qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann")
+
+
+def test_export_cvat_video_multiple_datapoints(ds, tmp_path, monkeypatch):
+    dps = []
+    for i in range(2):
+        dp = Datapoint(datasource=ds, path=f"video_{i}.mp4", datapoint_id=i, metadata={})
+        ann = _make_video_bbox(frame=i, track_id=i)
+        ann.filename = dp.path
+        dp.metadata["ann"] = MetadataAnnotations(
+            datapoint=dp, field="ann",
+            annotations=[ann],
+        )
+        dps.append(dp)
+
+    def _mock_download_files(self, target_dir, *args, **kwargs):
+        target_dir.mkdir(parents=True, exist_ok=True)
+        for i in range(2):
+            (target_dir / f"video_{i}.mp4").write_bytes(b"fake")
+        return target_dir
+
+    monkeypatch.setattr(QueryResult, "download_files", _mock_download_files)
+    qr = _make_qr(ds, dps, ann_field="ann")
+    result = qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann")
+
+    assert result.is_dir()
+    assert result == tmp_path / "labels"
+    zips = list(result.glob("*.zip"))
+    assert len(zips) == 2
+
+
+def test_export_cvat_video_passes_video_file_when_dimensions_missing(ds, tmp_path, monkeypatch):
+    dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
+    anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)]
+    for ann in anns:
+        ann.image_width = 0
+        ann.image_height = 0
+        ann.filename = "video.mp4"
+    dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns)
+    qr = _make_qr(ds, [dp], ann_field="ann")
+
+    captured = {}
+
+    def _mock_download_files(self, target_dir, *args, **kwargs):
+        video_path = target_dir / "video.mp4"
+        video_path.parent.mkdir(parents=True, exist_ok=True)
+        video_path.write_bytes(b"video")
+        return target_dir
+
+    def _mock_export_cvat_video_to_zip(
+        video_annotations,
+        output_path,
+        video_name,
+        image_width,
+        image_height,
+        video_file=None,
+    ):
+        captured["video_file"] = str(video_file) if video_file is not None else None
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path.write_text("<annotations/>")
+        return output_path
+
+    monkeypatch.setattr(QueryResult, "download_files", _mock_download_files)
+    monkeypatch.setattr(
+        "dagshub.data_engine.model.query_result.export_cvat_video_to_zip",
+        _mock_export_cvat_video_to_zip,
+    )
+
+    qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann")
+
+    assert captured["video_file"] is not None
+    assert captured["video_file"].endswith("video.mp4")
+
+
+def test_export_cvat_video_missing_local_file_raises(ds, tmp_path, monkeypatch):
+    dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
+    ann = _make_video_bbox(frame=0, track_id=0)
+    ann.image_width = 0
+    ann.image_height = 0
+    ann.filename = "missing.mp4"
+    dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann])
+    qr = _make_qr(ds, [dp], ann_field="ann")
+
+    def _mock_download_files(self, target_dir, *args, **kwargs):
+        target_dir.mkdir(parents=True, exist_ok=True)
+        return target_dir
+
+    monkeypatch.setattr(QueryResult, "download_files", _mock_download_files)
+
+    with pytest.raises(FileNotFoundError, match="missing.mp4"):
+        qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann")
+
+
+# --- helpers ---
+
+
+def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxAnnotation:
+    return IRVideoBBoxAnnotation(
+        track_id=track_id, frame_number=frame,
+        left=100.0, top=150.0, width=50.0, height=80.0,
+        image_width=1920, image_height=1080,
+        categories={"person": 1.0},
+        coordinate_style=CoordinateStyle.DENORMALIZED,
+    )
+
+
+def _make_cvat_video_xml() -> bytes:
+    anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)]
+    return export_cvat_video_to_xml_string(anns)
+
+
+def _make_video_qr(ds):
+    dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
+    anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)]
+    for ann in anns:
+        ann.filename = "video.mp4"
+    dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns)
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    return qr, dp
+
+
+def _make_qr(ds, datapoints, ann_field=None):
+    fields = []
+    if ann_field:
+        fields.append(MetadataSelectFieldSchema(
+            asOf=int(datetime.datetime.now().timestamp()),
+            autoGenerated=False, originalName=ann_field,
+            multiple=False, valueType=MetadataFieldType.BLOB,
+            name=ann_field, tags={ReservedTags.ANNOTATION.value},
+        ))
+    return QueryResult(datasource=ds, _entries=datapoints, fields=fields)
diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py
new file mode 100644
index 00000000..9070e676
--- /dev/null
+++ b/tests/data_engine/annotation_import/test_mot.py
@@ -0,0 +1,383 @@
+import configparser
+import datetime
+import json
+import zipfile
+from pathlib import Path, PurePosixPath
+from unittest.mock import patch, PropertyMock
+
+import pytest
+from dagshub_annotation_converter.ir.image import CoordinateStyle
+from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation
+
+from dagshub.data_engine.annotation.importer import AnnotationImporter, AnnotationsNotFoundError
+from dagshub.data_engine.annotation.metadata import MetadataAnnotations
+from dagshub.data_engine.client.models import MetadataSelectFieldSchema
+from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags
+from dagshub.data_engine.model.datapoint import Datapoint
+from dagshub.data_engine.model.query_result import QueryResult
+
+
+@pytest.fixture(autouse=True)
+def mock_source_prefix(ds):
+    with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()):
+        yield
+
+
+# --- _is_video_annotation_dict ---
+
+
+def test_is_video_dict_int_keys():
+    assert AnnotationImporter._is_video_annotation_dict({0: [], 1: []}) is True
+
+
+def test_is_video_dict_str_keys():
+    assert AnnotationImporter._is_video_annotation_dict({"file.jpg": []}) is False
+
+
+def test_is_video_dict_empty():
+    assert AnnotationImporter._is_video_annotation_dict({}) is False
+
+
+def test_is_video_dict_non_dict():
+    assert AnnotationImporter._is_video_annotation_dict([]) is False
+
+
+def test_is_video_dict_mixed_first_int():
+    assert AnnotationImporter._is_video_annotation_dict({0: [], "a": []}) is True
+
+
+# --- is_video_format ---
+
+
+@pytest.mark.parametrize(
+    "ann_type, expected",
+    [
+        ("yolo", False),
+        ("cvat", False),
+        ("coco", False),
+        ("mot", True),
+        ("cvat_video", True),
+    ],
+)
+def test_is_video_format(ds, ann_type, expected, tmp_path):
+    kwargs = {}
+    if ann_type == "yolo":
+        kwargs["yolo_type"] = "bbox"
+    importer = AnnotationImporter(ds, ann_type, tmp_path / "dummy", load_from="disk", **kwargs)
+    assert importer.is_video_format is expected
+
+
+# --- _flatten_video_annotations ---
+
+
+def test_flatten_merges_frames(ds, tmp_path):
+    importer = AnnotationImporter(ds, "mot", tmp_path / "test_video", load_from="disk")
+    result = importer._flatten_video_annotations({
+        0: [_make_video_bbox(frame=0)],
+        5: [_make_video_bbox(frame=5)],
+    })
+    assert "test_video" in result
+    assert len(result["test_video"]) == 2
+
+
+def test_flatten_defaults_to_file_stem(ds, tmp_path):
+    importer = AnnotationImporter(ds, "mot", tmp_path / "my_sequence", load_from="disk")
+    result = importer._flatten_video_annotations({0: [_make_video_bbox()]})
+    assert "my_sequence" in result
+
+
+def test_flatten_video_name_override(ds, tmp_path):
+    importer = AnnotationImporter(
+        ds, "mot", tmp_path / "test_video", load_from="disk", video_name="custom.mp4"
+    )
+    result = importer._flatten_video_annotations({0: [_make_video_bbox()]})
+    assert "custom.mp4" in result
+
+
+# --- import ---
+
+
+def test_import_mot_from_dir(ds, tmp_path):
+    mot_dir = tmp_path / "mot_seq"
+    _create_mot_dir(mot_dir)
+
+    importer = AnnotationImporter(ds, "mot", mot_dir, load_from="disk")
+    result = importer.import_annotations()
+
+    assert len(result) == 1
+    anns = list(result.values())[0]
+    assert len(anns) == 2
+    assert all(isinstance(a, IRVideoBBoxAnnotation) for a in anns)
+
+
+def test_import_mot_from_zip(ds, tmp_path):
+    mot_dir = tmp_path / "mot_seq"
+    _create_mot_dir(mot_dir)
+    zip_path = _zip_mot_dir(tmp_path, mot_dir)
+
+    importer = AnnotationImporter(ds, "mot", zip_path, load_from="disk")
+    result = importer.import_annotations()
+
+    assert len(result) == 1
+    assert len(list(result.values())[0]) == 2
+
+
+def test_import_mot_from_fs_passes_datasource_path_from_source_prefix(ds, tmp_path, monkeypatch):
+    captured = {}
+
+    def _mock_load_mot_from_fs(import_dir, image_width=None, image_height=None, video_files=None, datasource_path=""):
+        captured["import_dir"] = import_dir
+        captured["image_width"] = image_width
+        captured["image_height"] = image_height
+        captured["video_files"] = video_files
+        captured["datasource_path"] = datasource_path
+        return {"seq_a": ({0: [_make_video_bbox(frame=0)]}, object())}
+
+    monkeypatch.setattr("dagshub.data_engine.annotation.importer.load_mot_from_fs", _mock_load_mot_from_fs)
+
+    with patch.object(
+        type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath("data/videos")
+    ):
+        importer = AnnotationImporter(
+            ds,
+            "mot",
+            tmp_path,
+            load_from="disk",
+            image_width=1280,
+            image_height=720,
+            video_files={"seq_a": "dummy.mp4"},
+        )
+        result = importer.import_annotations()
+
+    assert captured["datasource_path"] == "data/videos"
+    assert captured["video_files"] == {"seq_a": "dummy.mp4"}
+    assert captured["image_width"] == 1280
+    assert captured["image_height"] == 720
+    assert "seq_a" in result
+
+
+def test_import_mot_nonexistent_raises(ds, tmp_path):
+    importer = AnnotationImporter(ds, "mot", tmp_path / "missing", load_from="disk")
+    with pytest.raises(AnnotationsNotFoundError):
+        importer.import_annotations()
+
+
+# --- convert_to_ls_tasks ---
+
+
+def test_convert_video_to_ls_tasks(ds, tmp_path):
+    importer = AnnotationImporter(ds, "mot", tmp_path / "video", load_from="disk")
+    video_anns = {"video.mp4": [_make_video_bbox(frame=0), _make_video_bbox(frame=1)]}
+    tasks = importer.convert_to_ls_tasks(video_anns)
+
+    assert "video.mp4" in tasks
+    task_json = json.loads(tasks["video.mp4"])
+    assert "annotations" in task_json
+
+
+def test_convert_video_empty_skipped(ds, tmp_path):
+    importer = AnnotationImporter(ds, "mot", tmp_path / "video", load_from="disk")
+    tasks = importer.convert_to_ls_tasks({"video.mp4": []})
+    assert "video.mp4" not in tasks
+
+
+# --- export_as_mot ---
+
+
+def test_export_mot_directory_structure(ds, tmp_path, monkeypatch):
+    qr, _ = _make_video_qr(ds)
+
+    def _mock_download_files(self, target_dir, *args, **kwargs):
+        (target_dir / "video.mp4").parent.mkdir(parents=True, exist_ok=True)
+        (target_dir / "video.mp4").write_bytes(b"fake")
+        return target_dir
+
+    def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=None):
+        output_dir.mkdir(parents=True, exist_ok=True)
+        (output_dir / "gt").mkdir(parents=True, exist_ok=True)
+        (output_dir / "gt" / "gt.txt").write_text("")
+        (output_dir / "gt" / "labels.txt").write_text("person\n")
+        config = configparser.ConfigParser()
+        config["Sequence"] = {"imWidth": "1920", "imHeight": "1080"}
+        with open(output_dir / "seqinfo.ini", "w") as f:
+            config.write(f)
+        return output_dir
+
+    monkeypatch.setattr(QueryResult, "download_files", _mock_download_files)
+    monkeypatch.setattr(
+        "dagshub.data_engine.model.query_result.export_mot_to_dir",
+        _mock_export_mot_to_dir,
+    )
+    result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann")
+
+    assert result.exists()
+    assert result == tmp_path / "labels" / "video"
+    assert (result / "gt" / "gt.txt").exists()
+    assert (result / "gt" / "labels.txt").exists()
+    assert (result / "seqinfo.ini").exists()
+
+
+def test_export_mot_explicit_dimensions(ds, tmp_path, monkeypatch):
+    qr, _ = _make_video_qr(ds)
+
+    def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=None):
+        output_dir.mkdir(parents=True, exist_ok=True)
+        config = configparser.ConfigParser()
+        config["Sequence"] = {
+            "imWidth": str(context.image_width),
+            "imHeight": str(context.image_height),
+        }
+        with open(output_dir / "seqinfo.ini", "w") as f:
+            config.write(f)
+        (output_dir / "gt").mkdir(parents=True, exist_ok=True)
+        (output_dir / "gt" / "gt.txt").write_text("")
+        (output_dir / "gt" / "labels.txt").write_text("person\n")
+        return output_dir
+
+    monkeypatch.setattr(
+        "dagshub.data_engine.model.query_result.export_mot_to_dir",
+        _mock_export_mot_to_dir,
+    )
+    result = qr.export_as_mot(
+        download_dir=tmp_path, annotation_field="ann", image_width=1280, image_height=720
+    )
+
+    seqinfo = (result / "seqinfo.ini").read_text()
+    assert "1280" in seqinfo
+    assert "720" in seqinfo
+
+
+def test_export_mot_no_annotations_raises(ds, tmp_path):
+    dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
+    dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[])
+
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    with pytest.raises(RuntimeError, match="No video annotations"):
+        qr.export_as_mot(download_dir=tmp_path, annotation_field="ann")
+
+
+def test_export_mot_multiple_videos(ds, tmp_path, monkeypatch):
+    dps = []
+    for i in range(2):
+        dp = Datapoint(datasource=ds, path=f"video_{i}.mp4", datapoint_id=i, metadata={})
+        ann = _make_video_bbox(frame=i, track_id=i)
+        ann.filename = dp.path
+        dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann])
+        dps.append(dp)
+
+    def _mock_download_files(self, target_dir, *args, **kwargs):
+        target_dir.mkdir(parents=True, exist_ok=True)
+        for i in range(2):
+            (target_dir / f"video_{i}.mp4").write_bytes(b"fake")
+        return target_dir
+
+    def _mock_export_mot_sequences_to_dirs(video_annotations, context, labels_dir, video_files=None):
+        for i in range(2):
+            seq_dir = labels_dir / f"video_{i}"
+            seq_dir.mkdir(parents=True, exist_ok=True)
+            (seq_dir / "gt").mkdir(parents=True, exist_ok=True)
+            (seq_dir / "gt" / "gt.txt").write_text("")
+            (seq_dir / "gt" / "labels.txt").write_text("person\n")
+        return labels_dir
+
+    monkeypatch.setattr(QueryResult, "download_files", _mock_download_files)
+    monkeypatch.setattr(
+        "dagshub.data_engine.model.query_result.export_mot_sequences_to_dirs",
+        _mock_export_mot_sequences_to_dirs,
+    )
+    qr = _make_qr(ds, dps, ann_field="ann")
+    result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann")
+
+    assert result == tmp_path / "labels"
+    assert (result / "video_0" / "gt" / "gt.txt").exists()
+    assert (result / "video_1" / "gt" / "gt.txt").exists()
+
+
+def test_export_mot_passes_video_file_when_dimensions_missing(ds, tmp_path, monkeypatch):
+    dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
+    anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=1, track_id=1)]
+    for ann in anns:
+        ann.image_width = 0
+        ann.image_height = 0
+        ann.filename = "video.mp4"
+    dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns)
+    qr = _make_qr(ds, [dp], ann_field="ann")
+
+    captured = {}
+
+    def _mock_download_files(self, target_dir, *args, **kwargs):
+        video_path = target_dir / "video.mp4"
+        video_path.parent.mkdir(parents=True, exist_ok=True)
+        video_path.write_bytes(b"video")
+        return target_dir
+
+    def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=None):
+        captured["video_file"] = str(video_file) if video_file is not None else None
+        output_dir.mkdir(parents=True, exist_ok=True)
+        return output_dir
+
+    monkeypatch.setattr(QueryResult, "download_files", _mock_download_files)
+    monkeypatch.setattr("dagshub.data_engine.model.query_result.export_mot_to_dir", _mock_export_mot_to_dir)
+
+    qr.export_as_mot(download_dir=tmp_path, annotation_field="ann")
+
+    assert captured["video_file"] is not None
+    assert captured["video_file"].endswith("video.mp4")
+
+
+# --- helpers ---
+
+
+def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxAnnotation:
+    return IRVideoBBoxAnnotation(
+        track_id=track_id, frame_number=frame,
+        left=100.0, top=150.0, width=50.0, height=80.0,
+        image_width=1920, image_height=1080,
+        categories={"person": 1.0},
+        coordinate_style=CoordinateStyle.DENORMALIZED,
+    )
+
+
+def _create_mot_dir(mot_dir: Path):
+    gt_dir = mot_dir / "gt"
+    gt_dir.mkdir(parents=True)
+    (gt_dir / "gt.txt").write_text("1,1,100,150,50,80,1,1,1.0\n2,1,110,160,50,80,1,1,0.9\n")
+    (gt_dir / "labels.txt").write_text("person\n")
+    config = configparser.ConfigParser()
+    config["Sequence"] = {
+        "name": "test", "frameRate": "30", "seqLength": "100",
+        "imWidth": "1920", "imHeight": "1080",
+    }
+    with open(mot_dir / "seqinfo.ini", "w") as f:
+        config.write(f)
+
+
+def _zip_mot_dir(tmp_path: Path, mot_dir: Path) -> Path:
+    zip_path = tmp_path / "mot.zip"
+    with zipfile.ZipFile(zip_path, "w") as z:
+        z.write(mot_dir / "gt" / "gt.txt", "gt/gt.txt")
+        z.write(mot_dir / "gt" / "labels.txt", "gt/labels.txt")
+        z.write(mot_dir / "seqinfo.ini", "seqinfo.ini")
+    return zip_path
+
+
+def _make_video_qr(ds):
+    dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
+    anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=1, track_id=1)]
+    for ann in anns:
+        ann.filename = "video.mp4"
+    dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns)
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    return qr, dp
+
+
+def _make_qr(ds, datapoints, ann_field=None):
+    fields = []
+    if ann_field:
+        fields.append(MetadataSelectFieldSchema(
+            asOf=int(datetime.datetime.now().timestamp()),
+            autoGenerated=False, originalName=ann_field,
+            multiple=False, valueType=MetadataFieldType.BLOB,
+            name=ann_field, tags={ReservedTags.ANNOTATION.value},
+        ))
+    return QueryResult(datasource=ds, _entries=datapoints, fields=fields)
diff --git a/tests/data_engine/conftest.py b/tests/data_engine/conftest.py
index e8f0c70a..e57d1e83 100644
--- a/tests/data_engine/conftest.py
+++ b/tests/data_engine/conftest.py
@@ -5,7 +5,7 @@
 from dagshub.common.api import UserAPI
 from dagshub.common.api.responses import UserAPIResponse
 from dagshub.data_engine import datasources
-from dagshub.data_engine.client.models import MetadataSelectFieldSchema, PreprocessingStatus
+from dagshub.data_engine.client.models import DatasourceType, MetadataSelectFieldSchema, PreprocessingStatus
 from dagshub.data_engine.model.datapoint import Datapoint
 from dagshub.data_engine.model.datasource import DatasetState, Datasource
 from dagshub.data_engine.model.query_result import QueryResult
@@ -26,6 +26,7 @@ def other_ds(mocker, mock_dagshub_auth) -> Datasource:
 
 def _create_mock_datasource(mocker, id, name) -> Datasource:
     ds_state = datasources.DatasourceState(id=id, name=name, repo="kirill/repo")
+    ds_state.source_type = DatasourceType.REPOSITORY
     ds_state.path = "repo://kirill/repo/data/"
     ds_state.preprocessing_status = PreprocessingStatus.READY
     mocker.patch.object(ds_state, "client")
diff --git a/tests/mocks/repo_api.py b/tests/mocks/repo_api.py
index d457d161..22b6c94c 100644
--- a/tests/mocks/repo_api.py
+++ b/tests/mocks/repo_api.py
@@ -113,6 +113,10 @@ def generate_content_api_entry(path, is_dir=False, versioning="dvc") -> ContentA
     def default_branch(self) -> str:
         return self._default_branch
 
+    @property
+    def id(self) -> int:
+        return 1
+
     def get_connected_storages(self) -> List[StorageAPIEntry]:
         return self.storages
 

From 4b08cb217a4d38d0f0122a9963e1566824c893ef Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Wed, 25 Mar 2026 13:57:59 +0200
Subject: [PATCH 03/24] rename classes according to
 dagshub-annotation-converter refactor

---
 dagshub/data_engine/annotation/importer.py             |  4 ++--
 dagshub/data_engine/model/query_result.py              |  8 ++++----
 tests/data_engine/annotation_import/test_cvat_video.py | 10 +++++-----
 tests/data_engine/annotation_import/test_mot.py        |  8 ++++----
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py
index 2c19a550..36ec2a42 100644
--- a/dagshub/data_engine/annotation/importer.py
+++ b/dagshub/data_engine/annotation/importer.py
@@ -14,7 +14,7 @@
 from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask
 from dagshub_annotation_converter.formats.yolo import YoloContext
 from dagshub_annotation_converter.ir.image.annotations.base import IRAnnotationBase
-from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation
+from dagshub_annotation_converter.ir.video import IRVideoBBoxFrameAnnotation
 
 from dagshub.common.api import UserAPI
 from dagshub.common.api.repo import PathNotFoundError
@@ -437,7 +437,7 @@ def _convert_to_ls_video_tasks(
         """
         tasks = {}
         for filename, anns in annotations.items():
-            video_anns = [a for a in anns if isinstance(a, IRVideoBBoxAnnotation)]
+            video_anns = [a for a in anns if isinstance(a, IRVideoBBoxFrameAnnotation)]
             if not video_anns:
                 continue
             video_path = self.ds.source.raw_path(filename)
diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
index f30643a9..19f19332 100644
--- a/dagshub/data_engine/model/query_result.py
+++ b/dagshub/data_engine/model/query_result.py
@@ -22,7 +22,7 @@
 from dagshub_annotation_converter.formats.yolo.categories import Categories
 from dagshub_annotation_converter.formats.yolo.common import ir_mapping
 from dagshub_annotation_converter.ir.image import IRImageAnnotationBase
-from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation
+from dagshub_annotation_converter.ir.video import IRVideoBBoxFrameAnnotation
 from pydantic import ValidationError
 
 from dagshub.auth import get_token
@@ -782,9 +782,9 @@ def _get_all_annotations(self, annotation_field: str) -> List[IRImageAnnotationB
                 annotations.extend(dp.metadata[annotation_field].annotations)
         return annotations
 
-    def _get_all_video_annotations(self, annotation_field: str) -> List[IRVideoBBoxAnnotation]:
+    def _get_all_video_annotations(self, annotation_field: str) -> List[IRVideoBBoxFrameAnnotation]:
         all_anns = self._get_all_annotations(annotation_field)
-        return [a for a in all_anns if isinstance(a, IRVideoBBoxAnnotation)]
+        return [a for a in all_anns if isinstance(a, IRVideoBBoxFrameAnnotation)]
 
     def _prepare_video_file_for_export(self, local_root: Path, repo_relative_filename: str) -> Optional[Path]:
         ann_path = Path(repo_relative_filename)
@@ -798,7 +798,7 @@ def _prepare_video_file_for_export(self, local_root: Path, repo_relative_filenam
         return None
 
     @staticmethod
-    def _get_annotation_filename(ann: IRVideoBBoxAnnotation) -> Optional[str]:
+    def _get_annotation_filename(ann: IRVideoBBoxFrameAnnotation) -> Optional[str]:
         filename = ann.filename
         if filename is None:
             return None
diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py
index 3676b82d..940a2428 100644
--- a/tests/data_engine/annotation_import/test_cvat_video.py
+++ b/tests/data_engine/annotation_import/test_cvat_video.py
@@ -6,7 +6,7 @@
 import pytest
 from dagshub_annotation_converter.converters.cvat import export_cvat_video_to_xml_string
 from dagshub_annotation_converter.ir.image import IRBBoxImageAnnotation, CoordinateStyle
-from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation
+from dagshub_annotation_converter.ir.video import IRVideoBBoxFrameAnnotation
 
 from dagshub.data_engine.annotation.importer import AnnotationImporter
 from dagshub.data_engine.annotation.metadata import MetadataAnnotations
@@ -35,7 +35,7 @@ def test_import_cvat_video(ds, tmp_path):
     assert len(result) == 1
     anns = list(result.values())[0]
     assert len(anns) == 2
-    assert all(isinstance(a, IRVideoBBoxAnnotation) for a in anns)
+    assert all(isinstance(a, IRVideoBBoxFrameAnnotation) for a in anns)
 
 
 # --- _get_all_video_annotations ---
@@ -58,7 +58,7 @@ def test_get_all_video_filters(ds):
     qr = _make_qr(ds, [dp], ann_field="ann")
     result = qr._get_all_video_annotations("ann")
     assert len(result) == 1
-    assert isinstance(result[0], IRVideoBBoxAnnotation)
+    assert isinstance(result[0], IRVideoBBoxFrameAnnotation)
 
 
 def test_get_all_video_empty(ds):
@@ -239,8 +239,8 @@ def _mock_download_files(self, target_dir, *args, **kwargs):
 # --- helpers ---
 
 
-def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxAnnotation:
-    return IRVideoBBoxAnnotation(
+def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxFrameAnnotation:
+    return IRVideoBBoxFrameAnnotation(
         track_id=track_id, frame_number=frame,
         left=100.0, top=150.0, width=50.0, height=80.0,
         image_width=1920, image_height=1080,
diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py
index 9070e676..40cfd637 100644
--- a/tests/data_engine/annotation_import/test_mot.py
+++ b/tests/data_engine/annotation_import/test_mot.py
@@ -7,7 +7,7 @@
 
 import pytest
 from dagshub_annotation_converter.ir.image import CoordinateStyle
-from dagshub_annotation_converter.ir.video import IRVideoBBoxAnnotation
+from dagshub_annotation_converter.ir.video import IRVideoBBoxFrameAnnotation
 
 from dagshub.data_engine.annotation.importer import AnnotationImporter, AnnotationsNotFoundError
 from dagshub.data_engine.annotation.metadata import MetadataAnnotations
@@ -107,7 +107,7 @@ def test_import_mot_from_dir(ds, tmp_path):
     assert len(result) == 1
     anns = list(result.values())[0]
     assert len(anns) == 2
-    assert all(isinstance(a, IRVideoBBoxAnnotation) for a in anns)
+    assert all(isinstance(a, IRVideoBBoxFrameAnnotation) for a in anns)
 
 
 def test_import_mot_from_zip(ds, tmp_path):
@@ -328,8 +328,8 @@ def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=N
 # --- helpers ---
 
 
-def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxAnnotation:
-    return IRVideoBBoxAnnotation(
+def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxFrameAnnotation:
+    return IRVideoBBoxFrameAnnotation(
         track_id=track_id, frame_number=frame,
         left=100.0, top=150.0, width=50.0, height=80.0,
         image_width=1920, image_height=1080,

From 3096e28b52e639ec9e64eb7081125876badd42ea Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Wed, 25 Mar 2026 17:54:28 +0200
Subject: [PATCH 04/24] update client to refactor of
 dagshub-annotation-converter for video converters

---
 dagshub/data_engine/annotation/importer.py    | 75 ++++++++++++-------
 dagshub/data_engine/annotation/video.py       | 24 ++++++
 dagshub/data_engine/model/query_result.py     | 29 ++++++-
 .../annotation_import/test_cvat_video.py      | 16 ++--
 .../data_engine/annotation_import/test_mot.py | 56 +++++++++++---
 5 files changed, 153 insertions(+), 47 deletions(-)
 create mode 100644 dagshub/data_engine/annotation/video.py

diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py
index 36ec2a42..47f90573 100644
--- a/dagshub/data_engine/annotation/importer.py
+++ b/dagshub/data_engine/annotation/importer.py
@@ -19,6 +19,7 @@
 from dagshub.common.api import UserAPI
 from dagshub.common.api.repo import PathNotFoundError
 from dagshub.common.helpers import log_message
+from dagshub.data_engine.annotation.video import build_video_sequence_from_annotations
 
 if TYPE_CHECKING:
     from dagshub.data_engine.model.datasource import Datasource
@@ -99,7 +100,7 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]:
                     annotation_dict = self._flatten_cvat_fs_annotations(load_cvat_from_fs(annotations_file))
                 else:
                     result = load_cvat_from_zip(annotations_file)
-                    if self._is_video_annotation_dict(result):
+                    if self._is_video_annotation(result):
                         annotation_dict = self._flatten_video_annotations(result)
                     else:
                         annotation_dict = result
@@ -144,13 +145,13 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]:
                     annotation_dict = self._flatten_cvat_fs_annotations(raw)
                 elif annotations_file.suffix == ".zip":
                     result = load_cvat_from_zip(annotations_file, **cvat_kwargs)
-                    if self._is_video_annotation_dict(result):
+                    if self._is_video_annotation(result):
                         annotation_dict = self._flatten_video_annotations(result)
                     else:
                         annotation_dict = result
                 else:
                     result = load_cvat_from_xml_file(annotations_file, **cvat_kwargs)
-                    if self._is_video_annotation_dict(result):
+                    if self._is_video_annotation(result):
                         annotation_dict = self._flatten_video_annotations(result)
                     else:
                         annotation_dict = result
@@ -160,8 +161,12 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]:
             return annotation_dict
 
     @staticmethod
-    def _is_video_annotation_dict(result) -> bool:
-        """Check if the result from a CVAT loader is video annotations (int keys) vs image annotations (str keys)."""
+    def _is_video_annotation(result) -> bool:
+        """Check if the result from a CVAT loader is video annotations (IRVideoSequence or int keys) vs image annotations (str keys)."""
+        from dagshub_annotation_converter.ir.video import IRVideoSequence
+
+        if isinstance(result, IRVideoSequence):
+            return True
         if not isinstance(result, dict) or len(result) == 0:
             return False
         first_key = next(iter(result.keys()))
@@ -169,48 +174,62 @@ def _is_video_annotation_dict(result) -> bool:
 
     def _flatten_video_annotations(
         self,
-        frame_annotations: Dict[int, Sequence[IRAnnotationBase]],
+        video_data,
     ) -> Dict[str, Sequence[IRAnnotationBase]]:
-        """Flatten frame-indexed video annotations into a single entry keyed by video name."""
+        """Flatten video annotations (IRVideoSequence or frame-indexed dict) into a single entry keyed by video name."""
+        from dagshub_annotation_converter.ir.video import IRVideoSequence
+
         video_name = self.additional_args.get("video_name", self.annotations_file.stem)
-        all_anns: List[IRAnnotationBase] = []
-        for frame_anns in frame_annotations.values():
-            all_anns.extend(frame_anns)
-        return {video_name: all_anns}
+        if isinstance(video_data, IRVideoSequence):
+            return {video_name: video_data.to_annotations()}
+        else:
+            # Legacy dict[int, list[annotation]] format
+            all_anns: List[IRAnnotationBase] = []
+            for frame_anns in video_data.values():
+                all_anns.extend(frame_anns)
+            return {video_name: all_anns}
 
     def _flatten_cvat_fs_annotations(
         self, fs_annotations: Mapping[str, object]
     ) -> Dict[str, Sequence[IRAnnotationBase]]:
+        from dagshub_annotation_converter.ir.video import IRVideoSequence
+
         flattened: Dict[str, List[IRAnnotationBase]] = {}
         for rel_path, result in fs_annotations.items():
-            if not isinstance(result, dict):
-                continue
-            if self._is_video_annotation_dict(result):
+            if isinstance(result, IRVideoSequence):
                 video_key = Path(rel_path).stem
                 flattened.setdefault(video_key, [])
-                for frame_anns in result.values():
-                    flattened[video_key].extend(frame_anns)
-            else:
-                for filename, anns in result.items():
-                    flattened.setdefault(filename, [])
-                    flattened[filename].extend(anns)
+                flattened[video_key].extend(result.to_annotations())
+            elif isinstance(result, dict):
+                if self._is_video_annotation(result):
+                    video_key = Path(rel_path).stem
+                    flattened.setdefault(video_key, [])
+                    for frame_anns in result.values():
+                        flattened[video_key].extend(frame_anns)
+                else:
+                    for filename, anns in result.items():
+                        flattened.setdefault(filename, [])
+                        flattened[filename].extend(anns)
         return flattened
 
     def _flatten_mot_fs_annotations(
         self,
         fs_annotations: Mapping[str, object],
     ) -> Dict[str, Sequence[IRAnnotationBase]]:
+        from dagshub_annotation_converter.ir.video import IRVideoSequence
+
         flattened: Dict[str, List[IRAnnotationBase]] = {}
         for rel_path, result in fs_annotations.items():
             if not isinstance(result, tuple) or len(result) != 2:
                 continue
-            frame_annotations = result[0]
-            if not isinstance(frame_annotations, dict):
-                continue
+            sequence_or_dict = result[0]
             sequence_name = Path(rel_path).stem if rel_path not in (".", "") else self.annotations_file.stem
             flattened.setdefault(sequence_name, [])
-            for frame_anns in frame_annotations.values():
-                flattened[sequence_name].extend(frame_anns)
+            if isinstance(sequence_or_dict, IRVideoSequence):
+                flattened[sequence_name].extend(sequence_or_dict.to_annotations())
+            elif isinstance(sequence_or_dict, dict):
+                for frame_anns in sequence_or_dict.values():
+                    flattened[sequence_name].extend(frame_anns)
         return flattened
 
     def download_annotations(self, dest_dir: Path):
@@ -262,6 +281,9 @@ def remap_annotations(
             remap_func: Function that maps from an annotation path to a datapoint path. \
                 If None, we try to guess it by getting a datapoint and remapping that path
         """
+        if not annotations:
+            return {}
+
         if remap_func is None:
             first_ann = list(annotations.keys())[0]
             first_ann_filename = Path(first_ann).name
@@ -440,8 +462,9 @@ def _convert_to_ls_video_tasks(
             video_anns = [a for a in anns if isinstance(a, IRVideoBBoxFrameAnnotation)]
             if not video_anns:
                 continue
+            sequence = build_video_sequence_from_annotations(video_anns, filename=filename)
             video_path = self.ds.source.raw_path(filename)
-            ls_tasks = video_ir_to_ls_video_tasks(video_anns, video_path=video_path)
+            ls_tasks = video_ir_to_ls_video_tasks(sequence, video_path=video_path)
             if ls_tasks:
                 tasks[filename] = ls_tasks[0].model_dump_json().encode("utf-8")
         return tasks
diff --git a/dagshub/data_engine/annotation/video.py b/dagshub/data_engine/annotation/video.py
new file mode 100644
index 00000000..8482f04f
--- /dev/null
+++ b/dagshub/data_engine/annotation/video.py
@@ -0,0 +1,24 @@
+from typing import Optional, Sequence
+
+from dagshub_annotation_converter.ir.video import IRVideoBBoxFrameAnnotation, IRVideoSequence
+
+
+def build_video_sequence_from_annotations(
+    annotations: Sequence[IRVideoBBoxFrameAnnotation],
+    filename: Optional[str] = None,
+) -> IRVideoSequence:
+    sequence = IRVideoSequence.from_annotations(annotations, filename=filename)
+
+    resolved_width = sequence.resolved_video_width()
+    if sequence.video_width is None and resolved_width is not None:
+        sequence.video_width = resolved_width
+
+    resolved_height = sequence.resolved_video_height()
+    if sequence.video_height is None and resolved_height is not None:
+        sequence.video_height = resolved_height
+
+    resolved_length = sequence.resolved_sequence_length()
+    if sequence.sequence_length is None and resolved_length is not None:
+        sequence.sequence_length = resolved_length
+
+    return sequence
diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
index 19f19332..2403f823 100644
--- a/dagshub/data_engine/model/query_result.py
+++ b/dagshub/data_engine/model/query_result.py
@@ -35,6 +35,7 @@
 from dagshub.common.util import lazy_load, multi_urljoin
 from dagshub.data_engine.annotation import MetadataAnnotations
 from dagshub.data_engine.annotation.metadata import ErrorMetadataAnnotations, UnsupportedMetadataAnnotations
+from dagshub.data_engine.annotation.video import build_video_sequence_from_annotations
 from dagshub.data_engine.annotation.voxel_conversion import (
     add_ls_annotations,
     add_voxel_annotations,
@@ -786,6 +787,22 @@ def _get_all_video_annotations(self, annotation_field: str) -> List[IRVideoBBoxF
         all_anns = self._get_all_annotations(annotation_field)
         return [a for a in all_anns if isinstance(a, IRVideoBBoxFrameAnnotation)]
 
+    @staticmethod
+    def _annotations_to_sequences(
+        video_annotations: List[IRVideoBBoxFrameAnnotation],
+    ) -> List["IRVideoSequence"]:
+        """Reconstruct IRVideoSequence objects from a flat list of frame annotations, grouped by filename."""
+        # Group annotations by source filename
+        by_source: Dict[str, List[IRVideoBBoxFrameAnnotation]] = {}
+        for ann in video_annotations:
+            filename = QueryResult._get_annotation_filename(ann) or ""
+            by_source.setdefault(filename, []).append(ann)
+
+        return [
+            build_video_sequence_from_annotations(anns, filename=source_filename or None)
+            for source_filename, anns in by_source.items()
+        ]
+
     def _prepare_video_file_for_export(self, local_root: Path, repo_relative_filename: str) -> Optional[Path]:
         ann_path = Path(repo_relative_filename)
         primary = local_root / ann_path
@@ -952,6 +969,8 @@ def export_as_mot(
             local_download_root = self.download_files(download_dir / "data", keep_source_prefix=True)
 
         log_message("Exporting MOT annotations...")
+        sequences = self._annotations_to_sequences(video_annotations)
+
         if has_multiple_sources:
             video_files: Optional[Dict[str, Union[str, Path]]] = None
             if local_download_root is not None:
@@ -974,7 +993,7 @@ def export_as_mot(
             context = MOTContext()
             context.image_width = image_width
             context.image_height = image_height
-            export_mot_sequences_to_dirs(video_annotations, context, labels_dir, video_files=video_files)
+            export_mot_sequences_to_dirs(sequences, context, labels_dir, video_files=video_files)
             result_path = labels_dir
         else:
             video_file: Optional[Path] = None
@@ -994,7 +1013,7 @@ def export_as_mot(
             context.image_height = image_height
             single_name = Path(source_names[0]).stem if source_names else "sequence"
             output_dir = labels_dir / single_name
-            result_path = export_mot_to_dir(video_annotations, context, output_dir, video_file=video_file)
+            result_path = export_mot_to_dir(sequences[0], context, output_dir, video_file=video_file)
 
         log_message(f"Done! Saved MOT annotations to {result_path.absolute()}")
         return result_path
@@ -1041,6 +1060,8 @@ def export_as_cvat_video(
         has_multiple_sources = len(source_names) > 1
 
         log_message("Exporting CVAT video annotations...")
+        sequences = self._annotations_to_sequences(video_annotations)
+
         local_download_root: Optional[Path] = None
         if not has_multiple_sources and (image_width is None or image_height is None):
             log_message("Missing video dimensions in annotations, downloading videos for converter-side probing...")
@@ -1072,7 +1093,7 @@ def export_as_cvat_video(
             output_dir = download_dir / "labels"
             output_dir.mkdir(parents=True, exist_ok=True)
             export_cvat_videos_to_zips(
-                video_annotations,
+                sequences,
                 output_dir,
                 image_width=image_width,
                 image_height=image_height,
@@ -1100,7 +1121,7 @@ def export_as_cvat_video(
                 output_name = "annotations.zip"
             output_path = labels_dir / output_name
             result_path = export_cvat_video_to_zip(
-                video_annotations,
+                sequences[0],
                 output_path,
                 video_name=video_name,
                 image_width=image_width,
diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py
index 940a2428..4738eae4 100644
--- a/tests/data_engine/annotation_import/test_cvat_video.py
+++ b/tests/data_engine/annotation_import/test_cvat_video.py
@@ -4,12 +4,13 @@
 from unittest.mock import patch, PropertyMock
 
 import pytest
-from dagshub_annotation_converter.converters.cvat import export_cvat_video_to_xml_string
+from dagshub_annotation_converter.converters.cvat import export_cvat_video_to_xml_bytes
 from dagshub_annotation_converter.ir.image import IRBBoxImageAnnotation, CoordinateStyle
 from dagshub_annotation_converter.ir.video import IRVideoBBoxFrameAnnotation
 
 from dagshub.data_engine.annotation.importer import AnnotationImporter
 from dagshub.data_engine.annotation.metadata import MetadataAnnotations
+from dagshub.data_engine.annotation.video import build_video_sequence_from_annotations
 from dagshub.data_engine.client.models import MetadataSelectFieldSchema
 from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags
 from dagshub.data_engine.model.datapoint import Datapoint
@@ -178,8 +179,8 @@ def test_export_cvat_video_passes_video_file_when_dimensions_missing(ds, tmp_pat
     dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
     anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)]
     for ann in anns:
-        ann.image_width = 0
-        ann.image_height = 0
+        ann.video_width = 0
+        ann.video_height = 0
         ann.filename = "video.mp4"
     dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns)
     qr = _make_qr(ds, [dp], ann_field="ann")
@@ -220,8 +221,8 @@ def _mock_export_cvat_video_to_zip(
 def test_export_cvat_video_missing_local_file_raises(ds, tmp_path, monkeypatch):
     dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
     ann = _make_video_bbox(frame=0, track_id=0)
-    ann.image_width = 0
-    ann.image_height = 0
+    ann.video_width = 0
+    ann.video_height = 0
     ann.filename = "missing.mp4"
     dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann])
     qr = _make_qr(ds, [dp], ann_field="ann")
@@ -243,7 +244,7 @@ def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxFrameAnnotation:
     return IRVideoBBoxFrameAnnotation(
         track_id=track_id, frame_number=frame,
         left=100.0, top=150.0, width=50.0, height=80.0,
-        image_width=1920, image_height=1080,
+        video_width=1920, video_height=1080,
         categories={"person": 1.0},
         coordinate_style=CoordinateStyle.DENORMALIZED,
     )
@@ -251,7 +252,8 @@ def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxFrameAnnotation:
 
 def _make_cvat_video_xml() -> bytes:
     anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)]
-    return export_cvat_video_to_xml_string(anns)
+    sequence = build_video_sequence_from_annotations(anns, filename="video.mp4")
+    return export_cvat_video_to_xml_bytes(sequence, video_name="video.mp4")
 
 
 def _make_video_qr(ds):
diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py
index 40cfd637..5b3cfb00 100644
--- a/tests/data_engine/annotation_import/test_mot.py
+++ b/tests/data_engine/annotation_import/test_mot.py
@@ -7,10 +7,11 @@
 
 import pytest
 from dagshub_annotation_converter.ir.image import CoordinateStyle
-from dagshub_annotation_converter.ir.video import IRVideoBBoxFrameAnnotation
+from dagshub_annotation_converter.ir.video import IRVideoBBoxFrameAnnotation, IRVideoSequence
 
 from dagshub.data_engine.annotation.importer import AnnotationImporter, AnnotationsNotFoundError
 from dagshub.data_engine.annotation.metadata import MetadataAnnotations
+from dagshub.data_engine.annotation.video import build_video_sequence_from_annotations
 from dagshub.data_engine.client.models import MetadataSelectFieldSchema
 from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags
 from dagshub.data_engine.model.datapoint import Datapoint
@@ -23,27 +24,31 @@ def mock_source_prefix(ds):
         yield
 
 
-# --- _is_video_annotation_dict ---
+# --- _is_video_annotation ---
 
 
 def test_is_video_dict_int_keys():
-    assert AnnotationImporter._is_video_annotation_dict({0: [], 1: []}) is True
+    assert AnnotationImporter._is_video_annotation({0: [], 1: []}) is True
 
 
 def test_is_video_dict_str_keys():
-    assert AnnotationImporter._is_video_annotation_dict({"file.jpg": []}) is False
+    assert AnnotationImporter._is_video_annotation({"file.jpg": []}) is False
 
 
 def test_is_video_dict_empty():
-    assert AnnotationImporter._is_video_annotation_dict({}) is False
+    assert AnnotationImporter._is_video_annotation({}) is False
 
 
 def test_is_video_dict_non_dict():
-    assert AnnotationImporter._is_video_annotation_dict([]) is False
+    assert AnnotationImporter._is_video_annotation([]) is False
 
 
 def test_is_video_dict_mixed_first_int():
-    assert AnnotationImporter._is_video_annotation_dict({0: [], "a": []}) is True
+    assert AnnotationImporter._is_video_annotation({0: [], "a": []}) is True
+
+
+def test_is_video_sequence():
+    assert AnnotationImporter._is_video_annotation(IRVideoSequence.from_annotations([_make_video_bbox()])) is True
 
 
 # --- is_video_format ---
@@ -94,6 +99,37 @@ def test_flatten_video_name_override(ds, tmp_path):
     assert "custom.mp4" in result
 
 
+def test_flatten_sequence(ds, tmp_path):
+    importer = AnnotationImporter(ds, "mot", tmp_path / "test_video", load_from="disk")
+    sequence = IRVideoSequence.from_annotations([_make_video_bbox(frame=0), _make_video_bbox(frame=5)])
+    result = importer._flatten_video_annotations(sequence)
+
+    assert "test_video" in result
+    assert len(result["test_video"]) == 2
+
+
+def test_build_video_sequence_without_legacy_image_dimensions():
+    anns = [
+        IRVideoBBoxFrameAnnotation(
+            track_id=0,
+            frame_number=0,
+            left=100.0,
+            top=150.0,
+            width=50.0,
+            height=80.0,
+            video_width=1920,
+            video_height=1080,
+            categories={"person": 1.0},
+            coordinate_style=CoordinateStyle.DENORMALIZED,
+        )
+    ]
+
+    sequence = build_video_sequence_from_annotations(anns, filename="video.mp4")
+
+    assert sequence.video_width == 1920
+    assert sequence.video_height == 1080
+
+
 # --- import ---
 
 
@@ -297,8 +333,8 @@ def test_export_mot_passes_video_file_when_dimensions_missing(ds, tmp_path, monk
     dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
     anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=1, track_id=1)]
     for ann in anns:
-        ann.image_width = 0
-        ann.image_height = 0
+        ann.video_width = 0
+        ann.video_height = 0
         ann.filename = "video.mp4"
     dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns)
     qr = _make_qr(ds, [dp], ann_field="ann")
@@ -332,7 +368,7 @@ def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxFrameAnnotation:
     return IRVideoBBoxFrameAnnotation(
         track_id=track_id, frame_number=frame,
         left=100.0, top=150.0, width=50.0, height=80.0,
-        image_width=1920, image_height=1080,
+        video_width=1920, video_height=1080,
         categories={"person": 1.0},
         coordinate_style=CoordinateStyle.DENORMALIZED,
     )

From 6c68a982067b63e385a053209ad2f22acef3c555 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Wed, 25 Mar 2026 17:57:52 +0200
Subject: [PATCH 05/24] deslopped code slightly

---
 dagshub/data_engine/annotation/importer.py      | 15 +++++++--------
 dagshub/data_engine/model/query_result.py       |  3 +--
 tests/data_engine/annotation_import/test_mot.py |  2 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py
index 47f90573..a18b23c5 100644
--- a/dagshub/data_engine/annotation/importer.py
+++ b/dagshub/data_engine/annotation/importer.py
@@ -162,7 +162,7 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]:
 
     @staticmethod
     def _is_video_annotation(result) -> bool:
-        """Check if the result from a CVAT loader is video annotations (IRVideoSequence or int keys) vs image annotations (str keys)."""
+        """Check whether a loader result contains video annotations."""
         from dagshub_annotation_converter.ir.video import IRVideoSequence
 
         if isinstance(result, IRVideoSequence):
@@ -176,18 +176,17 @@ def _flatten_video_annotations(
         self,
         video_data,
     ) -> Dict[str, Sequence[IRAnnotationBase]]:
-        """Flatten video annotations (IRVideoSequence or frame-indexed dict) into a single entry keyed by video name."""
+        """Flatten video annotations into a single entry keyed by video name."""
         from dagshub_annotation_converter.ir.video import IRVideoSequence
 
         video_name = self.additional_args.get("video_name", self.annotations_file.stem)
         if isinstance(video_data, IRVideoSequence):
             return {video_name: video_data.to_annotations()}
-        else:
-            # Legacy dict[int, list[annotation]] format
-            all_anns: List[IRAnnotationBase] = []
-            for frame_anns in video_data.values():
-                all_anns.extend(frame_anns)
-            return {video_name: all_anns}
+
+        all_anns: List[IRAnnotationBase] = []
+        for frame_anns in video_data.values():
+            all_anns.extend(frame_anns)
+        return {video_name: all_anns}
 
     def _flatten_cvat_fs_annotations(
         self, fs_annotations: Mapping[str, object]
diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
index 2403f823..c46f9c1a 100644
--- a/dagshub/data_engine/model/query_result.py
+++ b/dagshub/data_engine/model/query_result.py
@@ -791,8 +791,7 @@ def _get_all_video_annotations(self, annotation_field: str) -> List[IRVideoBBoxF
     def _annotations_to_sequences(
         video_annotations: List[IRVideoBBoxFrameAnnotation],
     ) -> List["IRVideoSequence"]:
-        """Reconstruct IRVideoSequence objects from a flat list of frame annotations, grouped by filename."""
-        # Group annotations by source filename
+        """Group frame annotations into per-source video sequences."""
         by_source: Dict[str, List[IRVideoBBoxFrameAnnotation]] = {}
         for ann in video_annotations:
             filename = QueryResult._get_annotation_filename(ann) or ""
diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py
index 5b3cfb00..aa067a64 100644
--- a/tests/data_engine/annotation_import/test_mot.py
+++ b/tests/data_engine/annotation_import/test_mot.py
@@ -108,7 +108,7 @@ def test_flatten_sequence(ds, tmp_path):
     assert len(result["test_video"]) == 2
 
 
-def test_build_video_sequence_without_legacy_image_dimensions():
+def test_build_video_sequence_sets_top_level_dimensions():
     anns = [
         IRVideoBBoxFrameAnnotation(
             track_id=0,

From e68048a4ea48d8fbb1f6176f7d56dc0de9b71d33 Mon Sep 17 00:00:00 2001
From: Kirill Bolashev <kirill@dagshub.com>
Date: Sun, 29 Mar 2026 13:44:10 +0300
Subject: [PATCH 06/24] Test: use the coco_converter branch of the annotation
 converter while PR is WIP

---
 setup.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 6cdef855..a4a08913 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,7 @@
-import setuptools
 import os.path
 
+import setuptools
+
 
 # Thank you pip contributors
 def read(rel_path: str) -> str:
@@ -41,7 +42,11 @@ def get_version(rel_path: str) -> str:
     "python-dateutil",
     "boto3",
     "semver",
-    "dagshub-annotation-converter>=0.1.12",
+    # FIXME: roll back to main after merging
+    # "dagshub-annotation-converter>=0.1.12",
+    "dagshub-annotation-converter @ "
+    + "git+https://github.com/DagsHub/"
+    + "dagshub-annotation-converter@coco_converter#egg=dagshub-annotation-converter",
 ]
 
 extras_require = {

From 4f830e466ab76dc9bca4d44d16d2d4c46a582172 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Sun, 29 Mar 2026 15:06:41 +0300
Subject: [PATCH 07/24] Fix review comments

---
 dagshub/__init__.py                           |  2 +-
 dagshub/auth/token_auth.py                    |  2 +-
 dagshub/data_engine/annotation/importer.py    |  8 +++----
 dagshub/data_engine/annotation/metadata.py    | 22 -------------------
 dagshub/data_engine/model/query_result.py     |  5 ++++-
 .../annotation_import/test_coco.py            | 22 +------------------
 tests/data_engine/conftest.py                 |  3 +++
 7 files changed, 14 insertions(+), 50 deletions(-)

diff --git a/dagshub/__init__.py b/dagshub/__init__.py
index 10f3c0cb..7f68de54 100644
--- a/dagshub/__init__.py
+++ b/dagshub/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.6.10"
+__version__ = "0.6.9"
 from .logger import DAGsHubLogger, dagshub_logger
 from .common.init import init
 from .upload.wrapper import upload_files
diff --git a/dagshub/auth/token_auth.py b/dagshub/auth/token_auth.py
index 7ba3a70a..31ec32ac 100644
--- a/dagshub/auth/token_auth.py
+++ b/dagshub/auth/token_auth.py
@@ -37,7 +37,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]:
 
     def can_renegotiate(self):
         # Env var tokens cannot renegotiate, every other token type can
-        return type(self._token) is not EnvVarDagshubToken
+        return not type(self._token) is EnvVarDagshubToken
 
     def renegotiate_token(self):
         if not self._token_storage.is_valid_token(self._token, self._host):
diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py
index 80e62468..90661df1 100644
--- a/dagshub/data_engine/annotation/importer.py
+++ b/dagshub/data_engine/annotation/importer.py
@@ -95,6 +95,7 @@ def download_annotations(self, dest_dir: Path):
         log_message("Downloading annotations from repository")
         repoApi = self.ds.source.repoApi
         if self.annotations_type == "cvat":
+            # Download just the annotation file
             repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True)
         elif self.annotations_type == "yolo":
             # Download the dataset .yaml file and the images + annotations
@@ -107,6 +108,7 @@ def download_annotations(self, dest_dir: Path):
             assert context.path is not None
             repoApi.download(self.annotations_file.parent / context.path, dest_dir, keep_source_prefix=True)
         elif self.annotations_type == "coco":
+            # Download just the annotation file
             repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True)
 
     @staticmethod
@@ -157,10 +159,8 @@ def remap_annotations(
                 )
                 continue
             for ann in anns:
-                if ann.filename is not None:
-                    ann.filename = remap_func(ann.filename)
-                else:
-                    ann.filename = new_filename
+                assert ann.filename is not None
+                ann.filename = remap_func(ann.filename)
             remapped[new_filename] = anns
 
         return remapped
diff --git a/dagshub/data_engine/annotation/metadata.py b/dagshub/data_engine/annotation/metadata.py
index 140f7733..06f7bc28 100644
--- a/dagshub/data_engine/annotation/metadata.py
+++ b/dagshub/data_engine/annotation/metadata.py
@@ -271,28 +271,6 @@ def add_image_pose(
         self.annotations.append(ann)
         self._update_datapoint()
 
-    def add_coco_annotation(
-        self,
-        coco_json: str,
-    ):
-        """
-        Add annotations from a COCO-format JSON string.
-
-        Args:
-            coco_json: A COCO-format JSON string with ``categories``, ``images``, and ``annotations`` keys.
-        """
-        from dagshub_annotation_converter.converters.coco import load_coco_from_json_string
-
-        grouped, _ = load_coco_from_json_string(coco_json)
-        new_anns: list[IRAnnotationBase] = []
-        for anns in grouped.values():
-            for ann in anns:
-                ann.filename = self.datapoint.path
-                new_anns.append(ann)
-        self.annotations.extend(new_anns)
-        log_message(f"Added {len(new_anns)} COCO annotation(s) to datapoint {self.datapoint.path}")
-        self._update_datapoint()
-
     def add_yolo_annotation(
         self,
         annotation_type: Literal["bbox", "segmentation", "pose"],
diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
index 6031e0bf..ddec542c 100644
--- a/dagshub/data_engine/model/query_result.py
+++ b/dagshub/data_engine/model/query_result.py
@@ -900,7 +900,10 @@ def export_as_coco(
 
         context = CocoContext()
         if classes is not None:
-            context.categories = dict(classes)
+            categories = Categories()
+            for category_id, category_name in classes.items():
+                categories.add(category_name, category_id)
+            context.categories = categories
 
         # Add the source prefix to all annotations
         for ann in annotations:
diff --git a/tests/data_engine/annotation_import/test_coco.py b/tests/data_engine/annotation_import/test_coco.py
index 9b238fd1..0db9cf8f 100644
--- a/tests/data_engine/annotation_import/test_coco.py
+++ b/tests/data_engine/annotation_import/test_coco.py
@@ -1,7 +1,6 @@
 import datetime
 import json
-from pathlib import PurePosixPath
-from unittest.mock import patch, PropertyMock
+from unittest.mock import patch
 
 import pytest
 from dagshub_annotation_converter.ir.image import (
@@ -17,12 +16,6 @@
 from dagshub.data_engine.model.query_result import QueryResult
 
 
-@pytest.fixture(autouse=True)
-def mock_source_prefix(ds):
-    with patch.object(type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath()):
-        yield
-
-
 # --- import ---
 
 
@@ -58,19 +51,6 @@ def test_coco_convert_to_ls_tasks(ds, tmp_path, mock_dagshub_auth):
     assert len(task_json["annotations"]) > 0
 
 
-# --- add_coco_annotation ---
-
-
-def test_add_coco_annotation_rewrites_filename(ds, mock_dagshub_auth):
-    dp = Datapoint(datasource=ds, path="my_images/photo.jpg", datapoint_id=0, metadata={})
-    meta_ann = MetadataAnnotations(datapoint=dp, field="ann")
-    meta_ann.add_coco_annotation(json.dumps(_make_coco_json()))
-
-    assert len(meta_ann.annotations) == 1
-    assert isinstance(meta_ann.annotations[0], IRBBoxImageAnnotation)
-    assert meta_ann.annotations[0].filename == "my_images/photo.jpg"
-
-
 # --- _resolve_annotation_field ---
 
 
diff --git a/tests/data_engine/conftest.py b/tests/data_engine/conftest.py
index e57d1e83..02ee8331 100644
--- a/tests/data_engine/conftest.py
+++ b/tests/data_engine/conftest.py
@@ -1,4 +1,6 @@
 import datetime
+from pathlib import PurePosixPath
+from unittest.mock import PropertyMock
 
 import pytest
 
@@ -34,6 +36,7 @@ def _create_mock_datasource(mocker, id, name) -> Datasource:
     mocker.patch.object(ds_state, "get_from_dagshub")
     # Stub out root path so all the content_path/etc work without also mocking out RepoAPI
     mocker.patch.object(ds_state, "_root_path", return_value="http://example.com")
+    mocker.patch.object(type(ds_state), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath())
     ds_state.repoApi = MockRepoAPI("kirill/repo")
     return Datasource(ds_state)
 

From 6a5a201a2c8abf37c50d53fc0f027d338be24170 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Mon, 6 Apr 2026 17:42:57 +0300
Subject: [PATCH 08/24] initial update to latest converter version

---
 dagshub/data_engine/annotation/importer.py    | 163 ++++++++++++------
 dagshub/data_engine/annotation/metadata.py    |  36 +++-
 dagshub/data_engine/annotation/video.py       |  43 +++--
 dagshub/data_engine/model/query_result.py     |  76 ++++----
 setup.py                                      |   2 +-
 .../test_annotation_parsing.py                |  45 ++++-
 .../annotation_import/test_cvat_video.py      |  41 ++++-
 .../data_engine/annotation_import/test_mot.py |  76 +++++---
 8 files changed, 344 insertions(+), 138 deletions(-)

diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py
index a18b23c5..8e09e786 100644
--- a/dagshub/data_engine/annotation/importer.py
+++ b/dagshub/data_engine/annotation/importer.py
@@ -1,7 +1,7 @@
 from difflib import SequenceMatcher
 from pathlib import Path, PurePosixPath, PurePath
 from tempfile import TemporaryDirectory
-from typing import TYPE_CHECKING, Dict, Literal, Optional, Union, Sequence, Mapping, Callable, List
+from typing import TYPE_CHECKING, Dict, Iterable, Literal, Optional, Union, Sequence, Mapping, Callable, List
 
 from dagshub_annotation_converter.converters.cvat import (
     load_cvat_from_fs,
@@ -10,11 +10,12 @@
 )
 from dagshub_annotation_converter.converters.mot import load_mot_from_dir, load_mot_from_fs, load_mot_from_zip
 from dagshub_annotation_converter.converters.yolo import load_yolo_from_fs
-from dagshub_annotation_converter.converters.label_studio_video import video_ir_to_ls_video_tasks
+from dagshub_annotation_converter.converters.label_studio_video import video_ir_to_ls_video_task
 from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask
 from dagshub_annotation_converter.formats.yolo import YoloContext
+from dagshub_annotation_converter.ir.base import IRTaskAnnotation
 from dagshub_annotation_converter.ir.image.annotations.base import IRAnnotationBase
-from dagshub_annotation_converter.ir.video import IRVideoBBoxFrameAnnotation
+from dagshub_annotation_converter.ir.video import IRVideoAnnotationTrack, IRVideoBBoxFrameAnnotation, IRVideoSequence
 
 from dagshub.common.api import UserAPI
 from dagshub.common.api.repo import PathNotFoundError
@@ -69,7 +70,7 @@ def __init__(
     def is_video_format(self) -> bool:
         return self.annotations_type in ("mot", "cvat_video")
 
-    def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]:
+    def import_annotations(self) -> Mapping[str, Sequence[IRTaskAnnotation]]:
         # Double check that the annotation file exists
         if self.load_from == "disk":
             if not self.annotations_file.exists():
@@ -90,7 +91,7 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]:
 
             # Convert annotations
             log_message("Loading annotations...")
-            annotation_dict: Mapping[str, Sequence[IRAnnotationBase]]
+            annotation_dict: Mapping[str, Sequence[IRTaskAnnotation]]
             if self.annotations_type == "yolo":
                 annotation_dict, _ = load_yolo_from_fs(
                     annotation_type=self.additional_args["yolo_type"], meta_file=annotations_file
@@ -113,21 +114,21 @@ def import_annotations(self) -> Mapping[str, Sequence[IRAnnotationBase]]:
                 if "video_name" in self.additional_args:
                     mot_kwargs["video_file"] = self.additional_args["video_name"]
                 if annotations_file.is_dir():
-                    video_files = self.additional_args.get("video_files")
-                    raw_datasource_path = self.additional_args.get("datasource_path")
-                    if raw_datasource_path is None:
-                        raw_datasource_path = self.ds.source.source_prefix
-                    datasource_path = PurePosixPath(raw_datasource_path).as_posix().lstrip("/")
-                    if datasource_path == ".":
-                        datasource_path = ""
-                    mot_results = load_mot_from_fs(
-                        annotations_file,
-                        image_width=mot_kwargs.get("image_width"),
-                        image_height=mot_kwargs.get("image_height"),
-                        video_files=video_files,
-                        datasource_path=datasource_path,
-                    )
-                    annotation_dict = self._flatten_mot_fs_annotations(mot_results)
+                    # Detect whether this is an fs layout (videos/ + labels/) or a single MOT dir
+                    video_dir_name = self.additional_args.get("video_dir_name", "videos")
+                    label_dir_name = self.additional_args.get("label_dir_name", "labels")
+                    if (annotations_file / label_dir_name).is_dir():
+                        mot_results = load_mot_from_fs(
+                            annotations_file,
+                            image_width=mot_kwargs.get("image_width"),
+                            image_height=mot_kwargs.get("image_height"),
+                            video_dir_name=video_dir_name,
+                            label_dir_name=label_dir_name,
+                        )
+                        annotation_dict = self._flatten_mot_fs_annotations(mot_results)
+                    else:
+                        video_anns, _ = load_mot_from_dir(annotations_file, **mot_kwargs)
+                        annotation_dict = self._flatten_video_annotations(video_anns)
                 elif annotations_file.suffix == ".zip":
                     video_anns, _ = load_mot_from_zip(annotations_file, **mot_kwargs)
                     annotation_dict = self._flatten_video_annotations(video_anns)
@@ -175,33 +176,36 @@ def _is_video_annotation(result) -> bool:
     def _flatten_video_annotations(
         self,
         video_data,
-    ) -> Dict[str, Sequence[IRAnnotationBase]]:
-        """Flatten video annotations into a single entry keyed by video name."""
-        from dagshub_annotation_converter.ir.video import IRVideoSequence
-
-        video_name = self.additional_args.get("video_name", self.annotations_file.stem)
+    ) -> Dict[str, Sequence[IRTaskAnnotation]]:
+        """Flatten video annotations into a single entry keyed by the source video path."""
+        video_name = self.additional_args.get("video_name")
         if isinstance(video_data, IRVideoSequence):
-            return {video_name: video_data.to_annotations()}
+            sequence_name = self._resolve_video_annotation_key(video_data.filename, fallback=video_name)
+            return {sequence_name: video_data.to_annotations()}
+
+        if video_name is None:
+            video_name = self._first_video_annotation_filename(video_data.values())
+        if video_name is None:
+            video_name = self.annotations_file.stem
 
-        all_anns: List[IRAnnotationBase] = []
+        all_anns: List[IRTaskAnnotation] = []
         for frame_anns in video_data.values():
             all_anns.extend(frame_anns)
         return {video_name: all_anns}
 
     def _flatten_cvat_fs_annotations(
         self, fs_annotations: Mapping[str, object]
-    ) -> Dict[str, Sequence[IRAnnotationBase]]:
-        from dagshub_annotation_converter.ir.video import IRVideoSequence
-
-        flattened: Dict[str, List[IRAnnotationBase]] = {}
+    ) -> Dict[str, Sequence[IRTaskAnnotation]]:
+        flattened: Dict[str, List[IRTaskAnnotation]] = {}
         for rel_path, result in fs_annotations.items():
             if isinstance(result, IRVideoSequence):
-                video_key = Path(rel_path).stem
+                video_key = self._resolve_video_annotation_key(result.filename, fallback=str(rel_path))
                 flattened.setdefault(video_key, [])
                 flattened[video_key].extend(result.to_annotations())
             elif isinstance(result, dict):
                 if self._is_video_annotation(result):
-                    video_key = Path(rel_path).stem
+                    video_key = self._first_video_annotation_filename(result.values())
+                    video_key = self._resolve_video_annotation_key(video_key, fallback=str(rel_path))
                     flattened.setdefault(video_key, [])
                     for frame_anns in result.values():
                         flattened[video_key].extend(frame_anns)
@@ -214,23 +218,59 @@ def _flatten_cvat_fs_annotations(
     def _flatten_mot_fs_annotations(
         self,
         fs_annotations: Mapping[str, object],
-    ) -> Dict[str, Sequence[IRAnnotationBase]]:
-        from dagshub_annotation_converter.ir.video import IRVideoSequence
-
-        flattened: Dict[str, List[IRAnnotationBase]] = {}
+    ) -> Dict[str, Sequence[IRTaskAnnotation]]:
+        flattened: Dict[str, List[IRTaskAnnotation]] = {}
         for rel_path, result in fs_annotations.items():
             if not isinstance(result, tuple) or len(result) != 2:
                 continue
             sequence_or_dict = result[0]
-            sequence_name = Path(rel_path).stem if rel_path not in (".", "") else self.annotations_file.stem
-            flattened.setdefault(sequence_name, [])
+            rel_path_str = self._stringify_video_path(rel_path)
+            sequence_name = self.annotations_file.stem if rel_path_str in (None, "", ".") else rel_path_str
             if isinstance(sequence_or_dict, IRVideoSequence):
+                sequence_name = self._resolve_video_annotation_key(sequence_or_dict.filename, fallback=sequence_name)
+                flattened.setdefault(sequence_name, [])
                 flattened[sequence_name].extend(sequence_or_dict.to_annotations())
             elif isinstance(sequence_or_dict, dict):
+                sequence_name = self._first_video_annotation_filename(sequence_or_dict.values()) or sequence_name
+                flattened.setdefault(sequence_name, [])
                 for frame_anns in sequence_or_dict.values():
                     flattened[sequence_name].extend(frame_anns)
         return flattened
 
+    @staticmethod
+    def _stringify_video_path(path: Optional[Union[str, Path, PurePath]]) -> Optional[str]:
+        if path is None:
+            return None
+        if isinstance(path, (Path, PurePath)):
+            return path.as_posix()
+        return str(path).replace("\\", "/")
+
+    def _resolve_video_annotation_key(
+        self,
+        filename: Optional[Union[str, Path, PurePath]],
+        fallback: Optional[str] = None,
+    ) -> str:
+        resolved = self._stringify_video_path(filename)
+        if resolved not in (None, "", "."):
+            return resolved
+
+        resolved_fallback = self._stringify_video_path(fallback)
+        if resolved_fallback not in (None, "", "."):
+            return resolved_fallback
+        return self.annotations_file.stem
+
+    @classmethod
+    def _first_video_annotation_filename(
+        cls,
+        frame_groups: Iterable[Sequence[IRAnnotationBase]],
+    ) -> Optional[str]:
+        for frame_anns in frame_groups:
+            for ann in frame_anns:
+                ann_filename = cls._stringify_video_path(getattr(ann, "filename", None))
+                if ann_filename not in (None, "", "."):
+                    return ann_filename
+        return None
+
     def download_annotations(self, dest_dir: Path):
         log_message("Downloading annotations from repository")
         repoApi = self.ds.source.repoApi
@@ -269,9 +309,9 @@ def determine_load_location(ds: "Datasource", annotations_path: Union[str, Path]
 
     def remap_annotations(
         self,
-        annotations: Mapping[str, Sequence[IRAnnotationBase]],
+        annotations: Mapping[str, Sequence[IRTaskAnnotation]],
         remap_func: Optional[Callable[[str], Optional[str]]] = None,
-    ) -> Mapping[str, Sequence[IRAnnotationBase]]:
+    ) -> Mapping[str, Sequence[IRTaskAnnotation]]:
         """
         Remaps the filenames in the annotations to the datasource's data points.
 
@@ -300,6 +340,14 @@ def remap_annotations(
                 )
                 continue
             for ann in anns:
+                if isinstance(ann, IRVideoAnnotationTrack):
+                    for track_ann in ann.annotations:
+                        if track_ann.filename is not None:
+                            track_ann.filename = remap_func(track_ann.filename)
+                        else:
+                            track_ann.filename = new_filename
+                    continue
+
                 if ann.filename is not None:
                     ann.filename = remap_func(ann.filename)
                 else:
@@ -435,7 +483,7 @@ def get_best_fit_datapoint_path(ann_path: str, datapoint_paths: List[str]) -> st
             raise ValueError(f"No good match found for annotation path {ann_path} in the datasource.")
         return best_match
 
-    def convert_to_ls_tasks(self, annotations: Mapping[str, Sequence[IRAnnotationBase]]) -> Mapping[str, bytes]:
+    def convert_to_ls_tasks(self, annotations: Mapping[str, Sequence[IRTaskAnnotation]]) -> Mapping[str, bytes]:
         """
         Converts the annotations to Label Studio tasks.
         """
@@ -451,19 +499,36 @@ def convert_to_ls_tasks(self, annotations: Mapping[str, Sequence[IRAnnotationBas
         return tasks
 
     def _convert_to_ls_video_tasks(
-        self, annotations: Mapping[str, Sequence[IRAnnotationBase]]
+        self, annotations: Mapping[str, Sequence[IRTaskAnnotation]]
     ) -> Mapping[str, bytes]:
         """
         Converts video annotations to Label Studio video tasks.
         """
         tasks = {}
         for filename, anns in annotations.items():
-            video_anns = [a for a in anns if isinstance(a, IRVideoBBoxFrameAnnotation)]
-            if not video_anns:
+            sequence = self._build_video_sequence(anns, filename)
+            if sequence is None:
                 continue
-            sequence = build_video_sequence_from_annotations(video_anns, filename=filename)
             video_path = self.ds.source.raw_path(filename)
-            ls_tasks = video_ir_to_ls_video_tasks(sequence, video_path=video_path)
-            if ls_tasks:
-                tasks[filename] = ls_tasks[0].model_dump_json().encode("utf-8")
+            ls_task = video_ir_to_ls_video_task(sequence, video_path=video_path)
+            if ls_task is not None:
+                tasks[filename] = ls_task.model_dump_json().encode("utf-8")
         return tasks
+
+    @staticmethod
+    def _build_video_sequence(
+        annotations: Sequence[IRTaskAnnotation],
+        filename: str,
+    ) -> Optional[IRVideoSequence]:
+        tracks = [ann.model_copy(deep=True) for ann in annotations if isinstance(ann, IRVideoAnnotationTrack)]
+        frame_annotations = [ann for ann in annotations if isinstance(ann, IRVideoBBoxFrameAnnotation)]
+        if frame_annotations:
+            tracks.extend(build_video_sequence_from_annotations(frame_annotations).tracks)
+        if not tracks:
+            return None
+
+        sequence = IRVideoSequence.from_annotations(tracks=tracks, filename=filename)
+        sequence.resolved_video_width()
+        sequence.resolved_video_height()
+        sequence.resolved_sequence_length()
+        return sequence
diff --git a/dagshub/data_engine/annotation/metadata.py b/dagshub/data_engine/annotation/metadata.py
index 8f9ca765..88eeb073 100644
--- a/dagshub/data_engine/annotation/metadata.py
+++ b/dagshub/data_engine/annotation/metadata.py
@@ -3,6 +3,7 @@
 from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask, parse_ls_task
 from dagshub_annotation_converter.formats.yolo import YoloContext, import_lookup, import_yolo_result
 from dagshub_annotation_converter.formats.yolo.categories import Categories
+from dagshub_annotation_converter.ir.base import IRAnnotationBase, IRTaskAnnotation
 from dagshub_annotation_converter.ir.image import (
     CoordinateStyle,
     IRBBoxImageAnnotation,
@@ -11,7 +12,8 @@
     IRSegmentationImageAnnotation,
     IRSegmentationPoint,
 )
-from dagshub_annotation_converter.ir.image.annotations.base import IRAnnotationBase, IRImageAnnotationBase
+from dagshub_annotation_converter.ir.image.annotations.base import IRImageAnnotationBase
+from dagshub_annotation_converter.ir.video import IRVideoAnnotationTrack
 
 from dagshub.common.api import UserAPI
 from dagshub.common.helpers import log_message
@@ -68,13 +70,13 @@ def __init__(
         self,
         datapoint: "Datapoint",
         field: str,
-        annotations: Optional[Sequence["IRAnnotationBase"]] = None,
+        annotations: Optional[Sequence["IRTaskAnnotation"]] = None,
         meta: Optional[Dict] = None,
         original_value: Optional[bytes] = None,
     ):
         self.datapoint = datapoint
         self.field = field
-        self.annotations: list["IRAnnotationBase"]
+        self.annotations: list["IRTaskAnnotation"]
         if annotations is None:
             annotations = []
         self.annotations = list(annotations)
@@ -99,12 +101,34 @@ def to_ls_task(self) -> Optional[bytes]:
         task = LabelStudioTask(
             user_id=UserAPI.get_current_user(self.datapoint.datasource.source.repoApi.host).user_id,
         )
-        task.data["image"] = self.datapoint.download_url
-        # TODO: need to filter out non-image annotations here maybe?
-        task.add_ir_annotations(self.annotations)
+        if any(isinstance(ann, IRVideoAnnotationTrack) for ann in self.annotations):
+            task.data["video"] = self.datapoint.download_url
+            frames_count = self._get_video_frames_count()
+            for ann in self.annotations:
+                if isinstance(ann, IRVideoAnnotationTrack):
+                    ls_ann = VideoRectangleAnnotation.from_ir_track(ann, frames_count=frames_count)
+                    if ann.__pydantic_extra__ is not None:
+                        ls_ann.__pydantic_extra__ = ann.__pydantic_extra__.copy()
+                    task.add_annotation(ls_ann)
+                else:
+                    task.add_ir_annotation(ann)
+        else:
+            task.data["image"] = self.datapoint.download_url
+            task.add_ir_annotations(self.annotations)
         task.meta.update(self.meta)
         return task.model_dump_json().encode("utf-8")
 
+    def _get_video_frames_count(self) -> Optional[int]:
+        max_frame: Optional[int] = None
+        for ann in self.annotations:
+            if not isinstance(ann, IRVideoAnnotationTrack):
+                continue
+            for track_ann in ann.annotations:
+                max_frame = track_ann.frame_number if max_frame is None else max(max_frame, track_ann.frame_number)
+        if max_frame is None:
+            return None
+        return max_frame + 1
+
     @property
     def value(self) -> Optional[bytes]:
         """
diff --git a/dagshub/data_engine/annotation/video.py b/dagshub/data_engine/annotation/video.py
index 8482f04f..25eb50f1 100644
--- a/dagshub/data_engine/annotation/video.py
+++ b/dagshub/data_engine/annotation/video.py
@@ -1,24 +1,45 @@
+from collections import defaultdict
 from typing import Optional, Sequence
 
-from dagshub_annotation_converter.ir.video import IRVideoBBoxFrameAnnotation, IRVideoSequence
+from dagshub_annotation_converter.ir.video import (
+    IRVideoBBoxFrameAnnotation,
+    IRVideoAnnotationTrack,
+    IRVideoSequence,
+)
 
 
 def build_video_sequence_from_annotations(
     annotations: Sequence[IRVideoBBoxFrameAnnotation],
     filename: Optional[str] = None,
 ) -> IRVideoSequence:
-    sequence = IRVideoSequence.from_annotations(annotations, filename=filename)
+    # Pre-group annotations into tracks (required by new from_annotations API)
+    by_track: dict[str, list[IRVideoBBoxFrameAnnotation]] = defaultdict(list)
+    for ann in annotations:
+        object_id = getattr(ann, "object_id", None)
+        if object_id is None and ann.__pydantic_extra__ is not None:
+            object_id = ann.__pydantic_extra__.get("object_id")
+        if object_id is None:
+            object_id = ann.imported_id
+        if object_id is None:
+            raise ValueError("Video annotation is missing an object identifier")
+        by_track[str(object_id)].append(ann)
 
-    resolved_width = sequence.resolved_video_width()
-    if sequence.video_width is None and resolved_width is not None:
-        sequence.video_width = resolved_width
+    tracks = [
+        IRVideoAnnotationTrack.from_annotations(anns, object_id=str(tid))
+        for tid, anns in by_track.items()
+    ]
 
-    resolved_height = sequence.resolved_video_height()
-    if sequence.video_height is None and resolved_height is not None:
-        sequence.video_height = resolved_height
+    sequence = IRVideoSequence.from_annotations(tracks=tracks, filename=filename)
 
-    resolved_length = sequence.resolved_sequence_length()
-    if sequence.sequence_length is None and resolved_length is not None:
-        sequence.sequence_length = resolved_length
+    if filename is not None:
+        for track in sequence.tracks:
+            for ann in track.annotations:
+                if ann.filename is None:
+                    ann.filename = filename
+
+    # resolved_* methods now cache results automatically
+    sequence.resolved_video_width()
+    sequence.resolved_video_height()
+    sequence.resolved_sequence_length()
 
     return sequence
diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
index c46f9c1a..be42ef37 100644
--- a/dagshub/data_engine/model/query_result.py
+++ b/dagshub/data_engine/model/query_result.py
@@ -21,8 +21,9 @@
 from dagshub_annotation_converter.formats.yolo import YoloContext
 from dagshub_annotation_converter.formats.yolo.categories import Categories
 from dagshub_annotation_converter.formats.yolo.common import ir_mapping
+from dagshub_annotation_converter.ir.base import IRTaskAnnotation
 from dagshub_annotation_converter.ir.image import IRImageAnnotationBase
-from dagshub_annotation_converter.ir.video import IRVideoBBoxFrameAnnotation
+from dagshub_annotation_converter.ir.video import IRVideoAnnotationTrack, IRVideoBBoxFrameAnnotation
 from pydantic import ValidationError
 
 from dagshub.auth import get_token
@@ -773,8 +774,8 @@ def dp_url(dp: Datapoint):
         download_files(download_args, skip_if_exists=not redownload)
         return target_path
 
-    def _get_all_annotations(self, annotation_field: str) -> List[IRImageAnnotationBase]:
-        annotations = []
+    def _get_all_annotations(self, annotation_field: str) -> List[IRTaskAnnotation]:
+        annotations: List[IRTaskAnnotation] = []
         for dp in self.entries:
             if annotation_field in dp.metadata:
                 if not hasattr(dp.metadata[annotation_field], "annotations"):
@@ -783,9 +784,21 @@ def _get_all_annotations(self, annotation_field: str) -> List[IRImageAnnotationB
                 annotations.extend(dp.metadata[annotation_field].annotations)
         return annotations
 
+    def _get_all_image_annotations(self, annotation_field: str) -> List[IRImageAnnotationBase]:
+        return [ann for ann in self._get_all_annotations(annotation_field) if isinstance(ann, IRImageAnnotationBase)]
+
     def _get_all_video_annotations(self, annotation_field: str) -> List[IRVideoBBoxFrameAnnotation]:
-        all_anns = self._get_all_annotations(annotation_field)
-        return [a for a in all_anns if isinstance(a, IRVideoBBoxFrameAnnotation)]
+        video_annotations: List[IRVideoBBoxFrameAnnotation] = []
+        for ann in self._get_all_annotations(annotation_field):
+            if isinstance(ann, IRVideoBBoxFrameAnnotation):
+                video_annotations.append(ann)
+            elif isinstance(ann, IRVideoAnnotationTrack):
+                video_annotations.extend(
+                    track_ann
+                    for track_ann in ann.to_annotations()
+                    if isinstance(track_ann, IRVideoBBoxFrameAnnotation)
+                )
+        return video_annotations
 
     @staticmethod
     def _annotations_to_sequences(
@@ -867,7 +880,7 @@ def export_as_yolo(
             download_dir = Path("dagshub_export")
         download_dir = Path(download_dir) / "data"
 
-        annotations = self._get_all_annotations(annotation_field)
+        annotations = self._get_all_image_annotations(annotation_field)
 
         categories = Categories()
         if classes is not None:
@@ -924,13 +937,13 @@ def export_as_mot(
         """
         Exports video annotations in MOT (Multiple Object Tracking) format.
 
-        The output follows the MOT Challenge directory structure::
+        Single-video exports write a MOT sequence directory under ``output_dir/labels/``.
+        Multi-video exports write a dataset root compatible with
+        ``load_mot_from_fs()``::
 
             output_dir/
-              gt/
-                gt.txt
-                labels.txt
-              seqinfo.ini
+              videos/
+              labels/
 
         Args:
             download_dir: Where to export. Defaults to ``./dagshub_export``
@@ -946,8 +959,6 @@ def export_as_mot(
         if download_dir is None:
             download_dir = Path("dagshub_export")
         download_dir = Path(download_dir)
-        labels_dir = download_dir / "labels"
-        labels_dir.mkdir(parents=True, exist_ok=True)
 
         video_annotations = self._get_all_video_annotations(annotation_field)
         if not video_annotations:
@@ -963,37 +974,22 @@ def export_as_mot(
         has_multiple_sources = len(source_names) > 1
 
         local_download_root: Optional[Path] = None
-        if image_width is None or image_height is None:
+        if has_multiple_sources:
+            log_message("Downloading videos into MOT dataset layout...")
+            self.download_files(download_dir / "videos")
+        elif image_width is None or image_height is None:
             log_message("Missing video dimensions in annotations, downloading videos for converter-side probing...")
-            local_download_root = self.download_files(download_dir / "data", keep_source_prefix=True)
+            local_download_root = self.download_files(download_dir / "videos")
 
         log_message("Exporting MOT annotations...")
         sequences = self._annotations_to_sequences(video_annotations)
 
         if has_multiple_sources:
-            video_files: Optional[Dict[str, Union[str, Path]]] = None
-            if local_download_root is not None:
-                video_files = {}
-                for ann_filename in {
-                    self._get_annotation_filename(ann)
-                    for ann in video_annotations
-                    if self._get_annotation_filename(ann)
-                }:
-                    assert ann_filename is not None
-                    sequence_name = Path(ann_filename).stem
-                    local_video = self._prepare_video_file_for_export(local_download_root, ann_filename)
-                    if local_video is None:
-                        raise FileNotFoundError(
-                            f"Could not find local downloaded video file for '{ann_filename}' under "
-                            f"'{local_download_root}'."
-                        )
-                    video_files[sequence_name] = local_video
-
             context = MOTContext()
-            context.image_width = image_width
-            context.image_height = image_height
-            export_mot_sequences_to_dirs(sequences, context, labels_dir, video_files=video_files)
-            result_path = labels_dir
+            context.video_width = image_width
+            context.video_height = image_height
+            export_mot_sequences_to_dirs(sequences, context, download_dir)
+            result_path = download_dir
         else:
             video_file: Optional[Path] = None
             if local_download_root is not None:
@@ -1008,8 +1004,10 @@ def export_as_mot(
                     )
 
             context = MOTContext()
-            context.image_width = image_width
-            context.image_height = image_height
+            context.video_width = image_width
+            context.video_height = image_height
+            labels_dir = download_dir / "labels"
+            labels_dir.mkdir(parents=True, exist_ok=True)
             single_name = Path(source_names[0]).stem if source_names else "sequence"
             output_dir = labels_dir / single_name
             result_path = export_mot_to_dir(sequences[0], context, output_dir, video_file=video_file)
diff --git a/setup.py b/setup.py
index 6cdef855..237e0904 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@ def get_version(rel_path: str) -> str:
     "python-dateutil",
     "boto3",
     "semver",
-    "dagshub-annotation-converter>=0.1.12",
+    "dagshub-annotation-converter>=0.1.16",
 ]
 
 extras_require = {
diff --git a/tests/data_engine/annotation_import/test_annotation_parsing.py b/tests/data_engine/annotation_import/test_annotation_parsing.py
index c04b0d51..5cc5bfa7 100644
--- a/tests/data_engine/annotation_import/test_annotation_parsing.py
+++ b/tests/data_engine/annotation_import/test_annotation_parsing.py
@@ -6,13 +6,15 @@
 
 import pytest
 from dagshub_annotation_converter.ir.image import IRSegmentationImageAnnotation
+from dagshub_annotation_converter.ir.video import CoordinateStyle, IRVideoBBoxFrameAnnotation
 from pytest import MonkeyPatch
 
 from dagshub.data_engine.annotation import MetadataAnnotations
+from dagshub.data_engine.annotation.video import build_video_sequence_from_annotations
 from dagshub.data_engine.annotation.metadata import ErrorMetadataAnnotations, UnsupportedMetadataAnnotations
 from dagshub.data_engine.dtypes import MetadataFieldType, ReservedTags
 from dagshub.data_engine.model import datapoint, query_result
-from dagshub.data_engine.model.datapoint import BlobDownloadError, BlobHashMetadata
+from dagshub.data_engine.model.datapoint import BlobDownloadError, BlobHashMetadata, Datapoint
 from dagshub.data_engine.model.datasource import Datasource
 from dagshub.data_engine.model.query_result import QueryResult
 from tests.data_engine.util import add_metadata_field
@@ -168,3 +170,44 @@ def test_nonexistent_annotation(ds_with_nonexistent_annotation):
 def test_blob_metadata_is_wrapped_from_backend(ds_with_document_annotation):
     qr = ds_with_document_annotation.all(load_documents=False, load_annotations=False)
     assert isinstance(qr[0].metadata[_annotation_field_name], BlobHashMetadata)
+
+
+def test_video_tracks_to_ls_task_use_video_data_and_sequence_length(ds):
+    dp = Datapoint(datasource=ds, path="nested/video.mp4", datapoint_id=1, metadata={})
+    frame_annotations = [
+        IRVideoBBoxFrameAnnotation(
+            object_id=1,
+            frame_number=0,
+            left=100.0,
+            top=150.0,
+            width=50.0,
+            height=80.0,
+            video_width=1920,
+            video_height=1080,
+            categories={"person": 1.0},
+            coordinate_style=CoordinateStyle.DENORMALIZED,
+        ),
+        IRVideoBBoxFrameAnnotation(
+            object_id=1,
+            frame_number=5,
+            left=110.0,
+            top=155.0,
+            width=50.0,
+            height=80.0,
+            video_width=1920,
+            video_height=1080,
+            categories={"person": 1.0},
+            coordinate_style=CoordinateStyle.DENORMALIZED,
+        ),
+    ]
+    for ann in frame_annotations:
+        ann.filename = dp.path
+
+    sequence = build_video_sequence_from_annotations(frame_annotations, filename=dp.path)
+    annotations = MetadataAnnotations(dp, _annotation_field_name, annotations=sequence.tracks)
+
+    task = json.loads(annotations.to_ls_task())
+
+    assert task["data"]["video"] == dp.download_url
+    assert task["annotations"][0]["result"][0]["type"] == "videorectangle"
+    assert task["annotations"][0]["result"][0]["value"]["framesCount"] == sequence.sequence_length
diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py
index 4738eae4..aa1b6bbf 100644
--- a/tests/data_engine/annotation_import/test_cvat_video.py
+++ b/tests/data_engine/annotation_import/test_cvat_video.py
@@ -39,6 +39,18 @@ def test_import_cvat_video(ds, tmp_path):
     assert all(isinstance(a, IRVideoBBoxFrameAnnotation) for a in anns)
 
 
+def test_flatten_cvat_fs_preserves_sequence_filename(ds, tmp_path):
+    importer = AnnotationImporter(ds, "cvat_video", tmp_path / "dataset", load_from="disk")
+    sequence = build_video_sequence_from_annotations(
+        [_make_video_bbox(frame=0), _make_video_bbox(frame=5)],
+        filename="nested/folder/video.mp4",
+    )
+
+    result = importer._flatten_cvat_fs_annotations({"nested/annotations.xml": sequence})
+
+    assert "nested/folder/video.mp4" in result
+
+
 # --- _get_all_video_annotations ---
 
 
@@ -83,6 +95,21 @@ def test_get_all_video_aggregates_across_datapoints(ds):
     assert len(qr._get_all_video_annotations("ann")) == 3
 
 
+def test_get_all_video_expands_tracks(ds):
+    dp = Datapoint(datasource=ds, path="nested/video.mp4", datapoint_id=0, metadata={})
+    sequence = build_video_sequence_from_annotations(
+        [_make_video_bbox(frame=0), _make_video_bbox(frame=5)],
+        filename=dp.path,
+    )
+    dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=sequence.tracks)
+
+    qr = _make_qr(ds, [dp], ann_field="ann")
+    result = qr._get_all_video_annotations("ann")
+
+    assert len(result) == 2
+    assert all(ann.filename == dp.path for ann in result)
+
+
 # --- export_as_cvat_video ---
 
 
@@ -151,7 +178,7 @@ def test_export_cvat_video_multiple_datapoints(ds, tmp_path, monkeypatch):
     dps = []
     for i in range(2):
         dp = Datapoint(datasource=ds, path=f"video_{i}.mp4", datapoint_id=i, metadata={})
-        ann = _make_video_bbox(frame=i, track_id=i)
+        ann = _make_video_bbox(frame=i, object_id=i)
         ann.filename = dp.path
         dp.metadata["ann"] = MetadataAnnotations(
             datapoint=dp, field="ann",
@@ -177,7 +204,7 @@ def _mock_download_files(self, target_dir, *args, **kwargs):
 
 def test_export_cvat_video_passes_video_file_when_dimensions_missing(ds, tmp_path, monkeypatch):
     dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
-    anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)]
+    anns = [_make_video_bbox(frame=0, object_id=0), _make_video_bbox(frame=5, object_id=0)]
     for ann in anns:
         ann.video_width = 0
         ann.video_height = 0
@@ -220,7 +247,7 @@ def _mock_export_cvat_video_to_zip(
 
 def test_export_cvat_video_missing_local_file_raises(ds, tmp_path, monkeypatch):
     dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
-    ann = _make_video_bbox(frame=0, track_id=0)
+    ann = _make_video_bbox(frame=0, object_id=0)
     ann.video_width = 0
     ann.video_height = 0
     ann.filename = "missing.mp4"
@@ -240,9 +267,9 @@ def _mock_download_files(self, target_dir, *args, **kwargs):
 # --- helpers ---
 
 
-def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxFrameAnnotation:
+def _make_video_bbox(frame=0, object_id=0) -> IRVideoBBoxFrameAnnotation:
     return IRVideoBBoxFrameAnnotation(
-        track_id=track_id, frame_number=frame,
+        object_id=object_id, frame_number=frame,
         left=100.0, top=150.0, width=50.0, height=80.0,
         video_width=1920, video_height=1080,
         categories={"person": 1.0},
@@ -251,14 +278,14 @@ def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxFrameAnnotation:
 
 
 def _make_cvat_video_xml() -> bytes:
-    anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)]
+    anns = [_make_video_bbox(frame=0, object_id=0), _make_video_bbox(frame=5, object_id=0)]
     sequence = build_video_sequence_from_annotations(anns, filename="video.mp4")
     return export_cvat_video_to_xml_bytes(sequence, video_name="video.mp4")
 
 
 def _make_video_qr(ds):
     dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
-    anns = [_make_video_bbox(frame=0, track_id=0), _make_video_bbox(frame=5, track_id=0)]
+    anns = [_make_video_bbox(frame=0, object_id=0), _make_video_bbox(frame=5, object_id=0)]
     for ann in anns:
         ann.filename = "video.mp4"
     dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns)
diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py
index aa067a64..fd66ff97 100644
--- a/tests/data_engine/annotation_import/test_mot.py
+++ b/tests/data_engine/annotation_import/test_mot.py
@@ -48,7 +48,8 @@ def test_is_video_dict_mixed_first_int():
 
 
 def test_is_video_sequence():
-    assert AnnotationImporter._is_video_annotation(IRVideoSequence.from_annotations([_make_video_bbox()])) is True
+    seq = build_video_sequence_from_annotations([_make_video_bbox()])
+    assert AnnotationImporter._is_video_annotation(seq) is True
 
 
 # --- is_video_format ---
@@ -101,17 +102,41 @@ def test_flatten_video_name_override(ds, tmp_path):
 
 def test_flatten_sequence(ds, tmp_path):
     importer = AnnotationImporter(ds, "mot", tmp_path / "test_video", load_from="disk")
-    sequence = IRVideoSequence.from_annotations([_make_video_bbox(frame=0), _make_video_bbox(frame=5)])
+    sequence = build_video_sequence_from_annotations([_make_video_bbox(frame=0), _make_video_bbox(frame=5)])
     result = importer._flatten_video_annotations(sequence)
 
     assert "test_video" in result
     assert len(result["test_video"]) == 2
 
 
+def test_flatten_sequence_preserves_sequence_filename(ds, tmp_path):
+    importer = AnnotationImporter(ds, "mot", tmp_path / "dataset", load_from="disk")
+    sequence = build_video_sequence_from_annotations(
+        [_make_video_bbox(frame=0), _make_video_bbox(frame=5)],
+        filename="nested/videos/video.mp4",
+    )
+
+    result = importer._flatten_video_annotations(sequence)
+
+    assert "nested/videos/video.mp4" in result
+
+
+def test_flatten_mot_fs_preserves_relative_video_path(ds, tmp_path):
+    importer = AnnotationImporter(ds, "mot", tmp_path / "dataset", load_from="disk")
+    sequence = build_video_sequence_from_annotations(
+        [_make_video_bbox(frame=0), _make_video_bbox(frame=5)],
+        filename="nested/video.mp4",
+    )
+
+    result = importer._flatten_mot_fs_annotations({Path("nested/video.mp4"): (sequence, object())})
+
+    assert "nested/video.mp4" in result
+
+
 def test_build_video_sequence_sets_top_level_dimensions():
     anns = [
         IRVideoBBoxFrameAnnotation(
-            track_id=0,
+            object_id=0,
             frame_number=0,
             left=100.0,
             top=150.0,
@@ -158,16 +183,16 @@ def test_import_mot_from_zip(ds, tmp_path):
     assert len(list(result.values())[0]) == 2
 
 
-def test_import_mot_from_fs_passes_datasource_path_from_source_prefix(ds, tmp_path, monkeypatch):
+def test_import_mot_from_fs_passes_dimensions(ds, tmp_path, monkeypatch):
+    # Create the labels/ subdir so the importer takes the load_mot_from_fs path
+    (tmp_path / "labels").mkdir()
     captured = {}
 
-    def _mock_load_mot_from_fs(import_dir, image_width=None, image_height=None, video_files=None, datasource_path=""):
+    def _mock_load_mot_from_fs(import_dir, image_width=None, image_height=None, **kwargs):
         captured["import_dir"] = import_dir
         captured["image_width"] = image_width
         captured["image_height"] = image_height
-        captured["video_files"] = video_files
-        captured["datasource_path"] = datasource_path
-        return {"seq_a": ({0: [_make_video_bbox(frame=0)]}, object())}
+        return {Path("seq_a"): ({0: [_make_video_bbox(frame=0)]}, object())}
 
     monkeypatch.setattr("dagshub.data_engine.annotation.importer.load_mot_from_fs", _mock_load_mot_from_fs)
 
@@ -181,12 +206,9 @@ def _mock_load_mot_from_fs(import_dir, image_width=None, image_height=None, vide
             load_from="disk",
             image_width=1280,
             image_height=720,
-            video_files={"seq_a": "dummy.mp4"},
         )
         result = importer.import_annotations()
 
-    assert captured["datasource_path"] == "data/videos"
-    assert captured["video_files"] == {"seq_a": "dummy.mp4"}
     assert captured["image_width"] == 1280
     assert captured["image_height"] == 720
     assert "seq_a" in result
@@ -260,8 +282,8 @@ def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=N
         output_dir.mkdir(parents=True, exist_ok=True)
         config = configparser.ConfigParser()
         config["Sequence"] = {
-            "imWidth": str(context.image_width),
-            "imHeight": str(context.image_height),
+            "imWidth": str(context.video_width),
+            "imHeight": str(context.video_height),
         }
         with open(output_dir / "seqinfo.ini", "w") as f:
             config.write(f)
@@ -296,25 +318,29 @@ def test_export_mot_multiple_videos(ds, tmp_path, monkeypatch):
     dps = []
     for i in range(2):
         dp = Datapoint(datasource=ds, path=f"video_{i}.mp4", datapoint_id=i, metadata={})
-        ann = _make_video_bbox(frame=i, track_id=i)
+        ann = _make_video_bbox(frame=i, object_id=i)
         ann.filename = dp.path
         dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=[ann])
         dps.append(dp)
 
+    captured = {}
+
     def _mock_download_files(self, target_dir, *args, **kwargs):
+        captured["download_dir"] = target_dir
         target_dir.mkdir(parents=True, exist_ok=True)
         for i in range(2):
             (target_dir / f"video_{i}.mp4").write_bytes(b"fake")
         return target_dir
 
-    def _mock_export_mot_sequences_to_dirs(video_annotations, context, labels_dir, video_files=None):
+    def _mock_export_mot_sequences_to_dirs(video_annotations, context, output_dir):
+        captured["output_dir"] = output_dir
         for i in range(2):
-            seq_dir = labels_dir / f"video_{i}"
+            seq_dir = output_dir / "labels" / f"video_{i}"
             seq_dir.mkdir(parents=True, exist_ok=True)
             (seq_dir / "gt").mkdir(parents=True, exist_ok=True)
             (seq_dir / "gt" / "gt.txt").write_text("")
             (seq_dir / "gt" / "labels.txt").write_text("person\n")
-        return labels_dir
+        return output_dir / "labels"
 
     monkeypatch.setattr(QueryResult, "download_files", _mock_download_files)
     monkeypatch.setattr(
@@ -324,14 +350,16 @@ def _mock_export_mot_sequences_to_dirs(video_annotations, context, labels_dir, v
     qr = _make_qr(ds, dps, ann_field="ann")
     result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann")
 
-    assert result == tmp_path / "labels"
-    assert (result / "video_0" / "gt" / "gt.txt").exists()
-    assert (result / "video_1" / "gt" / "gt.txt").exists()
+    assert result == tmp_path
+    assert captured["download_dir"] == tmp_path / "videos"
+    assert captured["output_dir"] == tmp_path
+    assert (result / "labels" / "video_0" / "gt" / "gt.txt").exists()
+    assert (result / "labels" / "video_1" / "gt" / "gt.txt").exists()
 
 
 def test_export_mot_passes_video_file_when_dimensions_missing(ds, tmp_path, monkeypatch):
     dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
-    anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=1, track_id=1)]
+    anns = [_make_video_bbox(frame=0, object_id=1), _make_video_bbox(frame=1, object_id=1)]
     for ann in anns:
         ann.video_width = 0
         ann.video_height = 0
@@ -364,9 +392,9 @@ def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=N
 # --- helpers ---
 
 
-def _make_video_bbox(frame=0, track_id=0) -> IRVideoBBoxFrameAnnotation:
+def _make_video_bbox(frame=0, object_id=0) -> IRVideoBBoxFrameAnnotation:
     return IRVideoBBoxFrameAnnotation(
-        track_id=track_id, frame_number=frame,
+        object_id=object_id, frame_number=frame,
         left=100.0, top=150.0, width=50.0, height=80.0,
         video_width=1920, video_height=1080,
         categories={"person": 1.0},
@@ -399,7 +427,7 @@ def _zip_mot_dir(tmp_path: Path, mot_dir: Path) -> Path:
 
 def _make_video_qr(ds):
     dp = Datapoint(datasource=ds, path="video.mp4", datapoint_id=0, metadata={})
-    anns = [_make_video_bbox(frame=0, track_id=1), _make_video_bbox(frame=1, track_id=1)]
+    anns = [_make_video_bbox(frame=0, object_id=1), _make_video_bbox(frame=1, object_id=1)]
     for ann in anns:
         ann.filename = "video.mp4"
     dp.metadata["ann"] = MetadataAnnotations(datapoint=dp, field="ann", annotations=anns)

From 30ac9433a314e98628bab5995b3f3c7fcae9655d Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Tue, 7 Apr 2026 11:10:57 +0300
Subject: [PATCH 09/24] update client to latest converter version

---
 dagshub/data_engine/model/query_result.py     | 83 ++++++++++---------
 .../annotation_import/test_cvat_video.py      | 17 +++-
 .../data_engine/annotation_import/test_mot.py | 59 ++++++++++++-
 3 files changed, 114 insertions(+), 45 deletions(-)

diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
index be42ef37..0a259579 100644
--- a/dagshub/data_engine/model/query_result.py
+++ b/dagshub/data_engine/model/query_result.py
@@ -826,6 +826,23 @@ def _prepare_video_file_for_export(self, local_root: Path, repo_relative_filenam
             return with_prefix
         return None
 
+    def _get_media_export_layout(self, download_dir: Path, media_dir_name: str) -> Tuple[Path, Path, Path]:
+        data_root = download_dir / "data"
+        source_prefix = self.datasource.source.source_prefix
+        prefix_parts = source_prefix.parts
+        if prefix_parts and prefix_parts[0] == "data":
+            prefix_parts = prefix_parts[1:]
+
+        media_dir = data_root
+        if prefix_parts:
+            media_dir = media_dir.joinpath(*prefix_parts)
+        if not prefix_parts or prefix_parts[-1] != media_dir_name:
+            media_dir = media_dir / media_dir_name
+
+        dataset_root = media_dir.parent
+        labels_dir = dataset_root / "labels"
+        return media_dir, labels_dir, dataset_root
+
     @staticmethod
     def _get_annotation_filename(ann: IRVideoBBoxFrameAnnotation) -> Optional[str]:
         filename = ann.filename
@@ -959,6 +976,7 @@ def export_as_mot(
         if download_dir is None:
             download_dir = Path("dagshub_export")
         download_dir = Path(download_dir)
+        video_dir, labels_dir, dataset_root = self._get_media_export_layout(download_dir, "videos")
 
         video_annotations = self._get_all_video_annotations(annotation_field)
         if not video_annotations:
@@ -973,13 +991,8 @@ def export_as_mot(
         )
         has_multiple_sources = len(source_names) > 1
 
-        local_download_root: Optional[Path] = None
-        if has_multiple_sources:
-            log_message("Downloading videos into MOT dataset layout...")
-            self.download_files(download_dir / "videos")
-        elif image_width is None or image_height is None:
-            log_message("Missing video dimensions in annotations, downloading videos for converter-side probing...")
-            local_download_root = self.download_files(download_dir / "videos")
+        log_message(f"Downloading videos into {video_dir}...")
+        local_download_root = self.download_files(video_dir, keep_source_prefix=False)
 
         log_message("Exporting MOT annotations...")
         sequences = self._annotations_to_sequences(video_annotations)
@@ -988,25 +1001,23 @@ def export_as_mot(
             context = MOTContext()
             context.video_width = image_width
             context.video_height = image_height
-            export_mot_sequences_to_dirs(sequences, context, download_dir)
-            result_path = download_dir
+            export_mot_sequences_to_dirs(sequences, context, dataset_root)
+            result_path = dataset_root
         else:
             video_file: Optional[Path] = None
-            if local_download_root is not None:
-                ref_filename = next((self._get_annotation_filename(a) for a in video_annotations), None)
-                if ref_filename is None:
-                    raise FileNotFoundError("Missing annotation filename for MOT export.")
-                video_file = self._prepare_video_file_for_export(local_download_root, ref_filename)
-                if video_file is None:
-                    raise FileNotFoundError(
-                        f"Could not find local downloaded video file for '{ref_filename}' "
-                        f"under '{local_download_root}'."
-                    )
+            ref_filename = next((self._get_annotation_filename(a) for a in video_annotations), None)
+            if ref_filename is None:
+                raise FileNotFoundError("Missing annotation filename for MOT export.")
+            video_file = self._prepare_video_file_for_export(local_download_root, ref_filename)
+            if video_file is None:
+                raise FileNotFoundError(
+                    f"Could not find local downloaded video file for '{ref_filename}' "
+                    f"under '{local_download_root}'."
+                )
 
             context = MOTContext()
             context.video_width = image_width
             context.video_height = image_height
-            labels_dir = download_dir / "labels"
             labels_dir.mkdir(parents=True, exist_ok=True)
             single_name = Path(source_names[0]).stem if source_names else "sequence"
             output_dir = labels_dir / single_name
@@ -1042,6 +1053,7 @@ def export_as_cvat_video(
         if download_dir is None:
             download_dir = Path("dagshub_export")
         download_dir = Path(download_dir)
+        video_dir, labels_dir, _ = self._get_media_export_layout(download_dir, "videos")
 
         video_annotations = self._get_all_video_annotations(annotation_field)
         if not video_annotations:
@@ -1059,16 +1071,12 @@ def export_as_cvat_video(
         log_message("Exporting CVAT video annotations...")
         sequences = self._annotations_to_sequences(video_annotations)
 
-        local_download_root: Optional[Path] = None
-        if not has_multiple_sources and (image_width is None or image_height is None):
-            log_message("Missing video dimensions in annotations, downloading videos for converter-side probing...")
-            local_download_root = self.download_files(download_dir / "data", keep_source_prefix=True)
+        log_message(f"Downloading videos into {video_dir}...")
+        local_download_root = self.download_files(video_dir, keep_source_prefix=False)
 
         if has_multiple_sources:
             video_files: Optional[Dict[str, Union[str, Path]]] = None
             if image_width is None or image_height is None:
-                log_message("Missing video dimensions in annotations, downloading videos for converter-side probing...")
-                local_download_root = self.download_files(download_dir / "data", keep_source_prefix=True)
                 video_files = {}
                 for ann_filename in {
                     self._get_annotation_filename(ann)
@@ -1087,7 +1095,7 @@ def export_as_cvat_video(
                     video_files[ann_path.name] = local_video
                     video_files[ann_path.stem] = local_video
 
-            output_dir = download_dir / "labels"
+            output_dir = labels_dir
             output_dir.mkdir(parents=True, exist_ok=True)
             export_cvat_videos_to_zips(
                 sequences,
@@ -1098,19 +1106,16 @@ def export_as_cvat_video(
             )
             result_path = output_dir
         else:
-            single_video_file: Optional[Path] = None
-            if local_download_root is not None:
-                ref_filename = next((self._get_annotation_filename(a) for a in video_annotations), None)
-                if ref_filename is None:
-                    raise FileNotFoundError("Missing annotation filename for single-video CVAT export.")
-                single_video_file = self._prepare_video_file_for_export(local_download_root, ref_filename)
-                if single_video_file is None:
-                    raise FileNotFoundError(
-                        f"Could not find local downloaded video file for '{ref_filename}' "
-                        f"under '{local_download_root}'."
-                    )
+            ref_filename = next((self._get_annotation_filename(a) for a in video_annotations), None)
+            if ref_filename is None:
+                raise FileNotFoundError("Missing annotation filename for single-video CVAT export.")
+            single_video_file = self._prepare_video_file_for_export(local_download_root, ref_filename)
+            if single_video_file is None:
+                raise FileNotFoundError(
+                    f"Could not find local downloaded video file for '{ref_filename}' "
+                    f"under '{local_download_root}'."
+                )
 
-            labels_dir = download_dir / "labels"
             labels_dir.mkdir(parents=True, exist_ok=True)
             if source_names:
                 output_name = f"{Path(source_names[0]).name}.zip"
diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py
index aa1b6bbf..a5b97b42 100644
--- a/tests/data_engine/annotation_import/test_cvat_video.py
+++ b/tests/data_engine/annotation_import/test_cvat_video.py
@@ -115,8 +115,11 @@ def test_get_all_video_expands_tracks(ds):
 
 def test_export_cvat_video_xml(ds, tmp_path, monkeypatch):
     qr, _ = _make_video_qr(ds)
+    captured = {}
 
     def _mock_download_files(self, target_dir, *args, **kwargs):
+        captured["download_dir"] = target_dir
+        captured["keep_source_prefix"] = kwargs.get("keep_source_prefix", True)
         (target_dir / "video.mp4").parent.mkdir(parents=True, exist_ok=True)
         (target_dir / "video.mp4").write_bytes(b"fake")
         return target_dir
@@ -125,7 +128,9 @@ def _mock_download_files(self, target_dir, *args, **kwargs):
     result = qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann")
 
     assert result.exists()
-    assert result == tmp_path / "labels" / "video.mp4.zip"
+    assert result == tmp_path / "data" / "labels" / "video.mp4.zip"
+    assert captured["download_dir"] == tmp_path / "data" / "videos"
+    assert captured["keep_source_prefix"] is False
     with zipfile.ZipFile(result, "r") as z:
         content = z.read("annotations.xml").decode("utf-8")
     assert "<track" in content
@@ -143,8 +148,10 @@ def test_export_cvat_video_no_annotations_raises(ds, tmp_path):
 
 def test_export_cvat_video_custom_name(ds, tmp_path, monkeypatch):
     qr, _ = _make_video_qr(ds)
+    captured = {}
 
     def _mock_download_files(self, target_dir, *args, **kwargs):
+        captured["download_dir"] = target_dir
         (target_dir / "video.mp4").parent.mkdir(parents=True, exist_ok=True)
         (target_dir / "video.mp4").write_bytes(b"fake")
         return target_dir
@@ -154,6 +161,7 @@ def _mock_download_files(self, target_dir, *args, **kwargs):
         download_dir=tmp_path, annotation_field="ann", video_name="my_clip.avi"
     )
 
+    assert captured["download_dir"] == tmp_path / "data" / "videos"
     with zipfile.ZipFile(result, "r") as z:
         content = z.read("annotations.xml").decode("utf-8")
     assert "my_clip.avi" in content
@@ -186,7 +194,10 @@ def test_export_cvat_video_multiple_datapoints(ds, tmp_path, monkeypatch):
         )
         dps.append(dp)
 
+    captured = {}
+
     def _mock_download_files(self, target_dir, *args, **kwargs):
+        captured["download_dir"] = target_dir
         target_dir.mkdir(parents=True, exist_ok=True)
         for i in range(2):
             (target_dir / f"video_{i}.mp4").write_bytes(b"fake")
@@ -197,7 +208,8 @@ def _mock_download_files(self, target_dir, *args, **kwargs):
     result = qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann")
 
     assert result.is_dir()
-    assert result == tmp_path / "labels"
+    assert result == tmp_path / "data" / "labels"
+    assert captured["download_dir"] == tmp_path / "data" / "videos"
     zips = list(result.glob("*.zip"))
     assert len(zips) == 2
 
@@ -242,6 +254,7 @@ def _mock_export_cvat_video_to_zip(
     qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann")
 
     assert captured["video_file"] is not None
+    assert "data/videos" in captured["video_file"]
     assert captured["video_file"].endswith("video.mp4")
 
 
diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py
index fd66ff97..eeac97d1 100644
--- a/tests/data_engine/annotation_import/test_mot.py
+++ b/tests/data_engine/annotation_import/test_mot.py
@@ -155,6 +155,42 @@ def test_build_video_sequence_sets_top_level_dimensions():
     assert sequence.video_height == 1080
 
 
+def test_video_export_layout_uses_datasource_prefix(ds):
+    qr, _ = _make_video_qr(ds)
+    with patch.object(
+        type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath("my_ds_path")
+    ):
+        video_dir, labels_dir, dataset_root = qr._get_media_export_layout(Path("export"), "videos")
+
+    assert video_dir == Path("export") / "data" / "my_ds_path" / "videos"
+    assert labels_dir == Path("export") / "data" / "my_ds_path" / "labels"
+    assert dataset_root == Path("export") / "data" / "my_ds_path"
+
+
+def test_video_export_layout_reuses_existing_videos_suffix(ds):
+    qr, _ = _make_video_qr(ds)
+    with patch.object(
+        type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath("my_ds_path/videos")
+    ):
+        video_dir, labels_dir, dataset_root = qr._get_media_export_layout(Path("export"), "videos")
+
+    assert video_dir == Path("export") / "data" / "my_ds_path" / "videos"
+    assert labels_dir == Path("export") / "data" / "my_ds_path" / "labels"
+    assert dataset_root == Path("export") / "data" / "my_ds_path"
+
+
+def test_video_export_layout_strips_leading_data_prefix(ds):
+    qr, _ = _make_video_qr(ds)
+    with patch.object(
+        type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath("data/videos")
+    ):
+        video_dir, labels_dir, dataset_root = qr._get_media_export_layout(Path("export"), "videos")
+
+    assert video_dir == Path("export") / "data" / "videos"
+    assert labels_dir == Path("export") / "data" / "labels"
+    assert dataset_root == Path("export") / "data"
+
+
 # --- import ---
 
 
@@ -244,8 +280,11 @@ def test_convert_video_empty_skipped(ds, tmp_path):
 
 def test_export_mot_directory_structure(ds, tmp_path, monkeypatch):
     qr, _ = _make_video_qr(ds)
+    captured = {}
 
     def _mock_download_files(self, target_dir, *args, **kwargs):
+        captured["download_dir"] = target_dir
+        captured["keep_source_prefix"] = kwargs.get("keep_source_prefix", True)
         (target_dir / "video.mp4").parent.mkdir(parents=True, exist_ok=True)
         (target_dir / "video.mp4").write_bytes(b"fake")
         return target_dir
@@ -269,7 +308,9 @@ def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=N
     result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann")
 
     assert result.exists()
-    assert result == tmp_path / "labels" / "video"
+    assert result == tmp_path / "data" / "labels" / "video"
+    assert captured["download_dir"] == tmp_path / "data" / "videos"
+    assert captured["keep_source_prefix"] is False
     assert (result / "gt" / "gt.txt").exists()
     assert (result / "gt" / "labels.txt").exists()
     assert (result / "seqinfo.ini").exists()
@@ -277,6 +318,13 @@ def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=N
 
 def test_export_mot_explicit_dimensions(ds, tmp_path, monkeypatch):
     qr, _ = _make_video_qr(ds)
+    captured = {}
+
+    def _mock_download_files(self, target_dir, *args, **kwargs):
+        captured["download_dir"] = target_dir
+        (target_dir / "video.mp4").parent.mkdir(parents=True, exist_ok=True)
+        (target_dir / "video.mp4").write_bytes(b"fake")
+        return target_dir
 
     def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=None):
         output_dir.mkdir(parents=True, exist_ok=True)
@@ -292,6 +340,7 @@ def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=N
         (output_dir / "gt" / "labels.txt").write_text("person\n")
         return output_dir
 
+    monkeypatch.setattr(QueryResult, "download_files", _mock_download_files)
     monkeypatch.setattr(
         "dagshub.data_engine.model.query_result.export_mot_to_dir",
         _mock_export_mot_to_dir,
@@ -301,6 +350,7 @@ def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=N
     )
 
     seqinfo = (result / "seqinfo.ini").read_text()
+    assert captured["download_dir"] == tmp_path / "data" / "videos"
     assert "1280" in seqinfo
     assert "720" in seqinfo
 
@@ -350,9 +400,9 @@ def _mock_export_mot_sequences_to_dirs(video_annotations, context, output_dir):
     qr = _make_qr(ds, dps, ann_field="ann")
     result = qr.export_as_mot(download_dir=tmp_path, annotation_field="ann")
 
-    assert result == tmp_path
-    assert captured["download_dir"] == tmp_path / "videos"
-    assert captured["output_dir"] == tmp_path
+    assert result == tmp_path / "data"
+    assert captured["download_dir"] == tmp_path / "data" / "videos"
+    assert captured["output_dir"] == tmp_path / "data"
     assert (result / "labels" / "video_0" / "gt" / "gt.txt").exists()
     assert (result / "labels" / "video_1" / "gt" / "gt.txt").exists()
 
@@ -386,6 +436,7 @@ def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=N
     qr.export_as_mot(download_dir=tmp_path, annotation_field="ann")
 
     assert captured["video_file"] is not None
+    assert "data/videos" in captured["video_file"]
     assert captured["video_file"].endswith("video.mp4")
 
 

From 924c718f5cbfcce574894c1483e24bd148659a49 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Tue, 7 Apr 2026 11:25:46 +0300
Subject: [PATCH 10/24] fix linting

---
 dagshub/data_engine/model/query_result.py       | 2 +-
 tests/data_engine/annotation_import/test_mot.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
index 0a259579..e9e1211a 100644
--- a/dagshub/data_engine/model/query_result.py
+++ b/dagshub/data_engine/model/query_result.py
@@ -23,7 +23,7 @@
 from dagshub_annotation_converter.formats.yolo.common import ir_mapping
 from dagshub_annotation_converter.ir.base import IRTaskAnnotation
 from dagshub_annotation_converter.ir.image import IRImageAnnotationBase
-from dagshub_annotation_converter.ir.video import IRVideoAnnotationTrack, IRVideoBBoxFrameAnnotation
+from dagshub_annotation_converter.ir.video import IRVideoAnnotationTrack, IRVideoBBoxFrameAnnotation, IRVideoSequence
 from pydantic import ValidationError
 
 from dagshub.auth import get_token
diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py
index eeac97d1..3324c185 100644
--- a/tests/data_engine/annotation_import/test_mot.py
+++ b/tests/data_engine/annotation_import/test_mot.py
@@ -7,7 +7,7 @@
 
 import pytest
 from dagshub_annotation_converter.ir.image import CoordinateStyle
-from dagshub_annotation_converter.ir.video import IRVideoBBoxFrameAnnotation, IRVideoSequence
+from dagshub_annotation_converter.ir.video import IRVideoBBoxFrameAnnotation
 
 from dagshub.data_engine.annotation.importer import AnnotationImporter, AnnotationsNotFoundError
 from dagshub.data_engine.annotation.metadata import MetadataAnnotations

From ad1c465caf7eedaba47f2dc5d6c94fcad39c9f97 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Tue, 7 Apr 2026 11:27:50 +0300
Subject: [PATCH 11/24] temporarily pin converter version for tests to pass

---
 setup.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 237e0904..deeaef43 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,11 @@ def get_version(rel_path: str) -> str:
     "python-dateutil",
     "boto3",
     "semver",
-    "dagshub-annotation-converter>=0.1.16",
+    # FIXME: roll back to main after merging
+    # "dagshub-annotation-converter>=0.1.12",
+    "dagshub-annotation-converter @ "
+    + "git+https://github.com/DagsHub/"
+    + "dagshub-annotation-converter@video_converters#egg=dagshub-annotation-converter",
 ]
 
 extras_require = {

From 7a76372aae02d04b224f3c52660279bd1c0dd61a Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Tue, 7 Apr 2026 11:33:45 +0300
Subject: [PATCH 12/24] fix review comments

---
 dagshub/__init__.py                                    | 2 +-
 tests/data_engine/annotation_import/test_cvat_video.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dagshub/__init__.py b/dagshub/__init__.py
index 10f3c0cb..7f68de54 100644
--- a/dagshub/__init__.py
+++ b/dagshub/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.6.10"
+__version__ = "0.6.9"
 from .logger import DAGsHubLogger, dagshub_logger
 from .common.init import init
 from .upload.wrapper import upload_files
diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py
index a5b97b42..1cea5226 100644
--- a/tests/data_engine/annotation_import/test_cvat_video.py
+++ b/tests/data_engine/annotation_import/test_cvat_video.py
@@ -273,7 +273,7 @@ def _mock_download_files(self, target_dir, *args, **kwargs):
 
     monkeypatch.setattr(QueryResult, "download_files", _mock_download_files)
 
-    with pytest.raises(FileNotFoundError, match="missing.mp4"):
+    with pytest.raises(FileNotFoundError, match=r"missing\.mp4"):
         qr.export_as_cvat_video(download_dir=tmp_path, annotation_field="ann")
 
 

From eb2c6437eb9f635ed0913a030fc41c03ff30f464 Mon Sep 17 00:00:00 2001
From: Dean P <dean@dagshub.com>
Date: Mon, 13 Apr 2026 12:33:22 +0300
Subject: [PATCH 13/24] Update setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index a4a08913..a26ff6b7 100644
--- a/setup.py
+++ b/setup.py
@@ -46,7 +46,7 @@ def get_version(rel_path: str) -> str:
     # "dagshub-annotation-converter>=0.1.12",
     "dagshub-annotation-converter @ "
     + "git+https://github.com/DagsHub/"
-    + "dagshub-annotation-converter@coco_converter#egg=dagshub-annotation-converter",
+    + "dagshub-annotation-converter@main#egg=dagshub-annotation-converter",
 ]
 
 extras_require = {

From f3eb3a7c507d4f087e7bd243224141882c6e60fb Mon Sep 17 00:00:00 2001
From: Dean P <dean@dagshub.com>
Date: Mon, 13 Apr 2026 12:33:57 +0300
Subject: [PATCH 14/24] Update setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index deeaef43..93f34832 100644
--- a/setup.py
+++ b/setup.py
@@ -45,7 +45,7 @@ def get_version(rel_path: str) -> str:
     # "dagshub-annotation-converter>=0.1.12",
     "dagshub-annotation-converter @ "
     + "git+https://github.com/DagsHub/"
-    + "dagshub-annotation-converter@video_converters#egg=dagshub-annotation-converter",
+    + "dagshub-annotation-converter@main#egg=dagshub-annotation-converter",
 ]
 
 extras_require = {

From 9fc5cd4b86921ef813d63a4c259a81411a1a65c7 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Mon, 13 Apr 2026 14:23:59 +0300
Subject: [PATCH 15/24] fix build_video_sequence_from_annotations

---
 dagshub/data_engine/annotation/video.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/dagshub/data_engine/annotation/video.py b/dagshub/data_engine/annotation/video.py
index 25eb50f1..72587b6d 100644
--- a/dagshub/data_engine/annotation/video.py
+++ b/dagshub/data_engine/annotation/video.py
@@ -2,27 +2,23 @@
 from typing import Optional, Sequence
 
 from dagshub_annotation_converter.ir.video import (
-    IRVideoBBoxFrameAnnotation,
+    IRVideoFrameAnnotationBase,
     IRVideoAnnotationTrack,
     IRVideoSequence,
 )
 
 
 def build_video_sequence_from_annotations(
-    annotations: Sequence[IRVideoBBoxFrameAnnotation],
+    annotations: Sequence[IRVideoFrameAnnotationBase],
     filename: Optional[str] = None,
 ) -> IRVideoSequence:
     # Pre-group annotations into tracks (required by new from_annotations API)
-    by_track: dict[str, list[IRVideoBBoxFrameAnnotation]] = defaultdict(list)
+    by_track: dict[str, list[IRVideoFrameAnnotationBase]] = defaultdict(list)
     for ann in annotations:
-        object_id = getattr(ann, "object_id", None)
-        if object_id is None and ann.__pydantic_extra__ is not None:
-            object_id = ann.__pydantic_extra__.get("object_id")
-        if object_id is None:
-            object_id = ann.imported_id
+        object_id = ann.imported_id
         if object_id is None:
             raise ValueError("Video annotation is missing an object identifier")
-        by_track[str(object_id)].append(ann)
+        by_track[object_id].append(ann)
 
     tracks = [
         IRVideoAnnotationTrack.from_annotations(anns, object_id=str(tid))

From 5be1f0f777c3f5fed991d1aa818cf4ac4e7762f7 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Mon, 13 Apr 2026 15:13:28 +0300
Subject: [PATCH 16/24] fixing review comments

---
 dagshub/data_engine/annotation/importer.py | 47 ++++++++--------------
 dagshub/data_engine/model/datasource.py    | 10 +++++
 2 files changed, 26 insertions(+), 31 deletions(-)

diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py
index 8e09e786..bf193fa8 100644
--- a/dagshub/data_engine/annotation/importer.py
+++ b/dagshub/data_engine/annotation/importer.py
@@ -1,14 +1,16 @@
 from difflib import SequenceMatcher
 from pathlib import Path, PurePosixPath, PurePath
 from tempfile import TemporaryDirectory
-from typing import TYPE_CHECKING, Dict, Iterable, Literal, Optional, Union, Sequence, Mapping, Callable, List
+from typing import TYPE_CHECKING, Dict, Iterable, Literal, Optional, Union, Sequence, Mapping, Callable, List, Tuple
 
 from dagshub_annotation_converter.converters.cvat import (
+    CVATAnnotations,
     load_cvat_from_fs,
     load_cvat_from_zip,
     load_cvat_from_xml_file,
 )
 from dagshub_annotation_converter.converters.mot import load_mot_from_dir, load_mot_from_fs, load_mot_from_zip
+from dagshub_annotation_converter.formats.mot.context import MOTContext
 from dagshub_annotation_converter.converters.yolo import load_yolo_from_fs
 from dagshub_annotation_converter.converters.label_studio_video import video_ir_to_ls_video_task
 from dagshub_annotation_converter.formats.label_studio.task import LabelStudioTask
@@ -164,8 +166,6 @@ def import_annotations(self) -> Mapping[str, Sequence[IRTaskAnnotation]]:
     @staticmethod
     def _is_video_annotation(result) -> bool:
         """Check whether a loader result contains video annotations."""
-        from dagshub_annotation_converter.ir.video import IRVideoSequence
-
         if isinstance(result, IRVideoSequence):
             return True
         if not isinstance(result, dict) or len(result) == 0:
@@ -175,7 +175,7 @@ def _is_video_annotation(result) -> bool:
 
     def _flatten_video_annotations(
         self,
-        video_data,
+        video_data: CVATAnnotations,
     ) -> Dict[str, Sequence[IRTaskAnnotation]]:
         """Flatten video annotations into a single entry keyed by the source video path."""
         video_name = self.additional_args.get("video_name")
@@ -194,7 +194,7 @@ def _flatten_video_annotations(
         return {video_name: all_anns}
 
     def _flatten_cvat_fs_annotations(
-        self, fs_annotations: Mapping[str, object]
+        self, fs_annotations: Mapping[str, CVATAnnotations]
     ) -> Dict[str, Sequence[IRTaskAnnotation]]:
         flattened: Dict[str, List[IRTaskAnnotation]] = {}
         for rel_path, result in fs_annotations.items():
@@ -202,39 +202,23 @@ def _flatten_cvat_fs_annotations(
                 video_key = self._resolve_video_annotation_key(result.filename, fallback=str(rel_path))
                 flattened.setdefault(video_key, [])
                 flattened[video_key].extend(result.to_annotations())
-            elif isinstance(result, dict):
-                if self._is_video_annotation(result):
-                    video_key = self._first_video_annotation_filename(result.values())
-                    video_key = self._resolve_video_annotation_key(video_key, fallback=str(rel_path))
-                    flattened.setdefault(video_key, [])
-                    for frame_anns in result.values():
-                        flattened[video_key].extend(frame_anns)
-                else:
-                    for filename, anns in result.items():
-                        flattened.setdefault(filename, [])
-                        flattened[filename].extend(anns)
+            elif isinstance(result, dict):  # CVATImageAnnotations: Dict[str, Sequence[IRImageAnnotationBase]]
+                for filename, anns in result.items():
+                    flattened.setdefault(filename, [])
+                    flattened[filename].extend(anns)
         return flattened
 
     def _flatten_mot_fs_annotations(
         self,
-        fs_annotations: Mapping[str, object],
+        fs_annotations: Mapping[Path, Tuple[IRVideoSequence, MOTContext]],
     ) -> Dict[str, Sequence[IRTaskAnnotation]]:
         flattened: Dict[str, List[IRTaskAnnotation]] = {}
-        for rel_path, result in fs_annotations.items():
-            if not isinstance(result, tuple) or len(result) != 2:
-                continue
-            sequence_or_dict = result[0]
+        for rel_path, (sequence, _) in fs_annotations.items():
             rel_path_str = self._stringify_video_path(rel_path)
             sequence_name = self.annotations_file.stem if rel_path_str in (None, "", ".") else rel_path_str
-            if isinstance(sequence_or_dict, IRVideoSequence):
-                sequence_name = self._resolve_video_annotation_key(sequence_or_dict.filename, fallback=sequence_name)
-                flattened.setdefault(sequence_name, [])
-                flattened[sequence_name].extend(sequence_or_dict.to_annotations())
-            elif isinstance(sequence_or_dict, dict):
-                sequence_name = self._first_video_annotation_filename(sequence_or_dict.values()) or sequence_name
-                flattened.setdefault(sequence_name, [])
-                for frame_anns in sequence_or_dict.values():
-                    flattened[sequence_name].extend(frame_anns)
+            sequence_name = self._resolve_video_annotation_key(sequence.filename, fallback=sequence_name)
+            flattened.setdefault(sequence_name, [])
+            flattened[sequence_name].extend(sequence.to_annotations())
         return flattened
 
     @staticmethod
@@ -266,7 +250,7 @@ def _first_video_annotation_filename(
     ) -> Optional[str]:
         for frame_anns in frame_groups:
             for ann in frame_anns:
-                ann_filename = cls._stringify_video_path(getattr(ann, "filename", None))
+                ann_filename = cls._stringify_video_path(ann.filename)
                 if ann_filename not in (None, "", "."):
                     return ann_filename
         return None
@@ -275,6 +259,7 @@ def download_annotations(self, dest_dir: Path):
         log_message("Downloading annotations from repository")
         repoApi = self.ds.source.repoApi
         if self.annotations_type in ("cvat", "cvat_video"):
+            # Download just the annotation file
             repoApi.download(self.annotations_file.as_posix(), dest_dir, keep_source_prefix=True)
         elif self.annotations_type == "yolo":
             # Download the dataset .yaml file and the images + annotations
diff --git a/dagshub/data_engine/model/datasource.py b/dagshub/data_engine/model/datasource.py
index 94f78522..bbeab214 100644
--- a/dagshub/data_engine/model/datasource.py
+++ b/dagshub/data_engine/model/datasource.py
@@ -1668,6 +1668,16 @@ def import_annotations_from_files(
 
         Keyword Args:
             yolo_type: Type of YOLO annotations to import. Either ``bbox``, ``segmentation`` or ``pose``.
+            image_width: (MOT, CVAT video) Width of the video frames in pixels. \
+                Used when the annotation file does not contain dimension metadata.
+            image_height: (MOT, CVAT video) Height of the video frames in pixels. \
+                Used when the annotation file does not contain dimension metadata.
+            video_name: (MOT) Name/path of the video file these annotations belong to. \
+                Used to key the resulting annotations when it cannot be inferred from the annotation file.
+            video_dir_name: (MOT filesystem layout) Name of the subdirectory containing video files. \
+                Defaults to ``"videos"``.
+            label_dir_name: (MOT filesystem layout) Name of the subdirectory containing label files. \
+                Defaults to ``"labels"``.
 
         Example to import segmentation annotations into an ``imported_annotations`` field,
         using YOLO information from an ``annotations.yaml`` file (can be local, or in the repo)::

From 3ddb0eafd8c2e5fbcc8e1ea7ef5ac1628ea24a60 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Mon, 13 Apr 2026 15:20:37 +0300
Subject: [PATCH 17/24] finish fixing importer comments

---
 dagshub/data_engine/annotation/importer.py | 24 ++++++++--------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py
index bf193fa8..115e777c 100644
--- a/dagshub/data_engine/annotation/importer.py
+++ b/dagshub/data_engine/annotation/importer.py
@@ -103,7 +103,7 @@ def import_annotations(self) -> Mapping[str, Sequence[IRTaskAnnotation]]:
                     annotation_dict = self._flatten_cvat_fs_annotations(load_cvat_from_fs(annotations_file))
                 else:
                     result = load_cvat_from_zip(annotations_file)
-                    if self._is_video_annotation(result):
+                    if self._determine_cvat_annotation(result) == "video":
                         annotation_dict = self._flatten_video_annotations(result)
                     else:
                         annotation_dict = result
@@ -148,13 +148,13 @@ def import_annotations(self) -> Mapping[str, Sequence[IRTaskAnnotation]]:
                     annotation_dict = self._flatten_cvat_fs_annotations(raw)
                 elif annotations_file.suffix == ".zip":
                     result = load_cvat_from_zip(annotations_file, **cvat_kwargs)
-                    if self._is_video_annotation(result):
+                    if self._determine_cvat_annotation(result) == "video":
                         annotation_dict = self._flatten_video_annotations(result)
                     else:
                         annotation_dict = result
                 else:
                     result = load_cvat_from_xml_file(annotations_file, **cvat_kwargs)
-                    if self._is_video_annotation(result):
+                    if self._determine_cvat_annotation(result) == "video":
                         annotation_dict = self._flatten_video_annotations(result)
                     else:
                         annotation_dict = result
@@ -164,14 +164,11 @@ def import_annotations(self) -> Mapping[str, Sequence[IRTaskAnnotation]]:
             return annotation_dict
 
     @staticmethod
-    def _is_video_annotation(result) -> bool:
-        """Check whether a loader result contains video annotations."""
+    def _determine_cvat_annotation(result: CVATAnnotations) -> Literal["video", "image"]:
+        """Determine whether a CVAT loader result contains video or image annotations."""
         if isinstance(result, IRVideoSequence):
-            return True
-        if not isinstance(result, dict) or len(result) == 0:
-            return False
-        first_key = next(iter(result.keys()))
-        return isinstance(first_key, int)
+            return "video"
+        return "image"
 
     def _flatten_video_annotations(
         self,
@@ -327,10 +324,7 @@ def remap_annotations(
             for ann in anns:
                 if isinstance(ann, IRVideoAnnotationTrack):
                     for track_ann in ann.annotations:
-                        if track_ann.filename is not None:
-                            track_ann.filename = remap_func(track_ann.filename)
-                        else:
-                            track_ann.filename = new_filename
+                        track_ann.filename = new_filename
                     continue
 
                 if ann.filename is not None:
@@ -505,7 +499,7 @@ def _build_video_sequence(
         annotations: Sequence[IRTaskAnnotation],
         filename: str,
     ) -> Optional[IRVideoSequence]:
-        tracks = [ann.model_copy(deep=True) for ann in annotations if isinstance(ann, IRVideoAnnotationTrack)]
+        tracks = [ann for ann in annotations if isinstance(ann, IRVideoAnnotationTrack)]
         frame_annotations = [ann for ann in annotations if isinstance(ann, IRVideoBBoxFrameAnnotation)]
         if frame_annotations:
             tracks.extend(build_video_sequence_from_annotations(frame_annotations).tracks)

From 993572090b5298f29e82e8a72b7619b84f00d18d Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Mon, 13 Apr 2026 15:21:24 +0300
Subject: [PATCH 18/24] fix metadata review comment

---
 dagshub/data_engine/annotation/metadata.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/dagshub/data_engine/annotation/metadata.py b/dagshub/data_engine/annotation/metadata.py
index 88eeb073..a3e9638c 100644
--- a/dagshub/data_engine/annotation/metadata.py
+++ b/dagshub/data_engine/annotation/metadata.py
@@ -25,9 +25,6 @@
     from dagshub.data_engine.model.datapoint import Datapoint
 
 from dagshub_annotation_converter.formats.label_studio.videorectangle import VideoRectangleAnnotation
-from dagshub_annotation_converter.formats.label_studio.task import task_lookup as _task_lookup
-
-_task_lookup["videorectangle"] = VideoRectangleAnnotation
 
 
 class AnnotationMetaDict(dict):

From 51c6d21d12992965ea7738636585ada9c83c5f3a Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Mon, 13 Apr 2026 15:59:02 +0300
Subject: [PATCH 19/24] finish queryresult review comment

---
 dagshub/data_engine/model/query_result.py | 80 +++++++++++++----------
 1 file changed, 45 insertions(+), 35 deletions(-)

diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
index e9e1211a..62383488 100644
--- a/dagshub/data_engine/model/query_result.py
+++ b/dagshub/data_engine/model/query_result.py
@@ -23,7 +23,7 @@
 from dagshub_annotation_converter.formats.yolo.common import ir_mapping
 from dagshub_annotation_converter.ir.base import IRTaskAnnotation
 from dagshub_annotation_converter.ir.image import IRImageAnnotationBase
-from dagshub_annotation_converter.ir.video import IRVideoAnnotationTrack, IRVideoBBoxFrameAnnotation, IRVideoSequence
+from dagshub_annotation_converter.ir.video import IRVideoAnnotationTrack, IRVideoFrameAnnotationBase, IRVideoSequence
 from pydantic import ValidationError
 
 from dagshub.auth import get_token
@@ -787,25 +787,21 @@ def _get_all_annotations(self, annotation_field: str) -> List[IRTaskAnnotation]:
     def _get_all_image_annotations(self, annotation_field: str) -> List[IRImageAnnotationBase]:
         return [ann for ann in self._get_all_annotations(annotation_field) if isinstance(ann, IRImageAnnotationBase)]
 
-    def _get_all_video_annotations(self, annotation_field: str) -> List[IRVideoBBoxFrameAnnotation]:
-        video_annotations: List[IRVideoBBoxFrameAnnotation] = []
+    def _get_all_video_annotations(self, annotation_field: str) -> List[IRVideoFrameAnnotationBase]:
+        video_annotations: List[IRVideoFrameAnnotationBase] = []
         for ann in self._get_all_annotations(annotation_field):
-            if isinstance(ann, IRVideoBBoxFrameAnnotation):
+            if isinstance(ann, IRVideoFrameAnnotationBase):
                 video_annotations.append(ann)
             elif isinstance(ann, IRVideoAnnotationTrack):
-                video_annotations.extend(
-                    track_ann
-                    for track_ann in ann.to_annotations()
-                    if isinstance(track_ann, IRVideoBBoxFrameAnnotation)
-                )
+                video_annotations.extend(ann.to_annotations())
         return video_annotations
 
     @staticmethod
     def _annotations_to_sequences(
-        video_annotations: List[IRVideoBBoxFrameAnnotation],
+        video_annotations: List[IRVideoFrameAnnotationBase],
     ) -> List["IRVideoSequence"]:
         """Group frame annotations into per-source video sequences."""
-        by_source: Dict[str, List[IRVideoBBoxFrameAnnotation]] = {}
+        by_source: Dict[str, List[IRVideoFrameAnnotationBase]] = {}
         for ann in video_annotations:
             filename = QueryResult._get_annotation_filename(ann) or ""
             by_source.setdefault(filename, []).append(ann)
@@ -815,7 +811,14 @@ def _annotations_to_sequences(
             for source_filename, anns in by_source.items()
         ]
 
-    def _prepare_video_file_for_export(self, local_root: Path, repo_relative_filename: str) -> Optional[Path]:
+    def _resolve_local_path(self, local_root: Path, repo_relative_filename: str) -> Optional[Path]:
+        """
+        Resolves the local path of a downloaded file given its repo-relative filename.
+
+        Tries the path directly under ``local_root`` first, then falls back to prepending
+        the datasource's source prefix (e.g. when files were downloaded with the prefix intact).
+        Returns ``None`` if the file is not found at either location.
+        """
         ann_path = Path(repo_relative_filename)
         primary = local_root / ann_path
         if primary.exists():
@@ -826,7 +829,16 @@ def _prepare_video_file_for_export(self, local_root: Path, repo_relative_filenam
             return with_prefix
         return None
 
-    def _get_media_export_layout(self, download_dir: Path, media_dir_name: str) -> Tuple[Path, Path, Path]:
+    def _resolve_export_dirs(self, download_dir: Path, media_dir_name: str) -> Tuple[Path, Path, Path]:
+        """
+        Resolves the three directory paths for an export given a download root and a media subdirectory name.
+
+        Strips any leading ``data/`` segment from the datasource's source prefix to avoid duplication,
+        then nests the media directory under ``<download_dir>/data/<prefix>/<media_dir_name>``
+        (skipping the final segment if the prefix already ends with ``media_dir_name``).
+
+        Returns ``(media_dir, labels_dir, dataset_root)`` where ``labels_dir`` is a sibling of ``media_dir``.
+        """
         data_root = download_dir / "data"
         source_prefix = self.datasource.source.source_prefix
         prefix_parts = source_prefix.parts
@@ -844,17 +856,8 @@ def _get_media_export_layout(self, download_dir: Path, media_dir_name: str) -> T
         return media_dir, labels_dir, dataset_root
 
     @staticmethod
-    def _get_annotation_filename(ann: IRVideoBBoxFrameAnnotation) -> Optional[str]:
-        filename = ann.filename
-        if filename is None:
-            return None
-        if isinstance(filename, (list, tuple)):
-            if len(filename) == 0:
-                return None
-            if len(filename) > 1:
-                raise ValueError(f"Annotation has multiple filenames: {filename}")
-            filename = filename[0]
-        return str(filename)
+    def _get_annotation_filename(ann: IRVideoFrameAnnotationBase) -> Optional[str]:
+        return ann.filename
 
     def _resolve_annotation_field(self, annotation_field: Optional[str]) -> str:
         if annotation_field is not None:
@@ -976,7 +979,7 @@ def export_as_mot(
         if download_dir is None:
             download_dir = Path("dagshub_export")
         download_dir = Path(download_dir)
-        video_dir, labels_dir, dataset_root = self._get_media_export_layout(download_dir, "videos")
+        video_dir, labels_dir, dataset_root = self._resolve_export_dirs(download_dir, "videos")
 
         video_annotations = self._get_all_video_annotations(annotation_field)
         if not video_annotations:
@@ -992,7 +995,12 @@ def export_as_mot(
         has_multiple_sources = len(source_names) > 1
 
         log_message(f"Downloading videos into {video_dir}...")
-        local_download_root = self.download_files(video_dir, keep_source_prefix=False)
+        annotated = QueryResult(
+            [dp for dp in self.entries if annotation_field in dp.metadata],
+            self.datasource,
+            self.fields,
+        )
+        local_download_root = annotated.download_files(video_dir, keep_source_prefix=False)
 
         log_message("Exporting MOT annotations...")
         sequences = self._annotations_to_sequences(video_annotations)
@@ -1004,11 +1012,8 @@ def export_as_mot(
             export_mot_sequences_to_dirs(sequences, context, dataset_root)
             result_path = dataset_root
         else:
-            video_file: Optional[Path] = None
-            ref_filename = next((self._get_annotation_filename(a) for a in video_annotations), None)
-            if ref_filename is None:
-                raise FileNotFoundError("Missing annotation filename for MOT export.")
-            video_file = self._prepare_video_file_for_export(local_download_root, ref_filename)
+            ref_filename = video_annotations[0].filename
+            video_file = self._resolve_local_path(local_download_root, ref_filename)
             if video_file is None:
                 raise FileNotFoundError(
                     f"Could not find local downloaded video file for '{ref_filename}' "
@@ -1053,7 +1058,7 @@ def export_as_cvat_video(
         if download_dir is None:
             download_dir = Path("dagshub_export")
         download_dir = Path(download_dir)
-        video_dir, labels_dir, _ = self._get_media_export_layout(download_dir, "videos")
+        video_dir, labels_dir, _ = self._resolve_export_dirs(download_dir, "videos")
 
         video_annotations = self._get_all_video_annotations(annotation_field)
         if not video_annotations:
@@ -1072,7 +1077,12 @@ def export_as_cvat_video(
         sequences = self._annotations_to_sequences(video_annotations)
 
         log_message(f"Downloading videos into {video_dir}...")
-        local_download_root = self.download_files(video_dir, keep_source_prefix=False)
+        annotated = QueryResult(
+            [dp for dp in self.entries if annotation_field in dp.metadata],
+            self.datasource,
+            self.fields,
+        )
+        local_download_root = annotated.download_files(video_dir, keep_source_prefix=False)
 
         if has_multiple_sources:
             video_files: Optional[Dict[str, Union[str, Path]]] = None
@@ -1084,7 +1094,7 @@ def export_as_cvat_video(
                     if self._get_annotation_filename(ann)
                 }:
                     assert ann_filename is not None
-                    local_video = self._prepare_video_file_for_export(local_download_root, ann_filename)
+                    local_video = self._resolve_local_path(local_download_root, ann_filename)
                     if local_video is None:
                         raise FileNotFoundError(
                             f"Could not find local downloaded video file for '{ann_filename}' "
@@ -1109,7 +1119,7 @@ def export_as_cvat_video(
             ref_filename = next((self._get_annotation_filename(a) for a in video_annotations), None)
             if ref_filename is None:
                 raise FileNotFoundError("Missing annotation filename for single-video CVAT export.")
-            single_video_file = self._prepare_video_file_for_export(local_download_root, ref_filename)
+            single_video_file = self._resolve_local_path(local_download_root, ref_filename)
             if single_video_file is None:
                 raise FileNotFoundError(
                     f"Could not find local downloaded video file for '{ref_filename}' "

From fbd1cb12c1011215d1003cd10e4e407974d39984 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Mon, 13 Apr 2026 16:05:41 +0300
Subject: [PATCH 20/24] finalize review comments

---
 dagshub/data_engine/annotation/importer.py    | 24 +++++-----
 .../annotation_import/test_cvat_video.py      |  2 +-
 .../data_engine/annotation_import/test_mot.py | 48 ++++---------------
 3 files changed, 23 insertions(+), 51 deletions(-)

diff --git a/dagshub/data_engine/annotation/importer.py b/dagshub/data_engine/annotation/importer.py
index 115e777c..e691fd78 100644
--- a/dagshub/data_engine/annotation/importer.py
+++ b/dagshub/data_engine/annotation/importer.py
@@ -100,11 +100,11 @@ def import_annotations(self) -> Mapping[str, Sequence[IRTaskAnnotation]]:
                 )
             elif self.annotations_type == "cvat":
                 if annotations_file.is_dir():
-                    annotation_dict = self._flatten_cvat_fs_annotations(load_cvat_from_fs(annotations_file))
+                    annotation_dict = self._key_cvat_fs_annotations_by_filename(load_cvat_from_fs(annotations_file))
                 else:
                     result = load_cvat_from_zip(annotations_file)
                     if self._determine_cvat_annotation(result) == "video":
-                        annotation_dict = self._flatten_video_annotations(result)
+                        annotation_dict = self._key_video_annotations_by_filename(result)
                     else:
                         annotation_dict = result
             elif self.annotations_type == "mot":
@@ -127,16 +127,16 @@ def import_annotations(self) -> Mapping[str, Sequence[IRTaskAnnotation]]:
                             video_dir_name=video_dir_name,
                             label_dir_name=label_dir_name,
                         )
-                        annotation_dict = self._flatten_mot_fs_annotations(mot_results)
+                        annotation_dict = self._key_mot_fs_annotations_by_filename(mot_results)
                     else:
                         video_anns, _ = load_mot_from_dir(annotations_file, **mot_kwargs)
-                        annotation_dict = self._flatten_video_annotations(video_anns)
+                        annotation_dict = self._key_video_annotations_by_filename(video_anns)
                 elif annotations_file.suffix == ".zip":
                     video_anns, _ = load_mot_from_zip(annotations_file, **mot_kwargs)
-                    annotation_dict = self._flatten_video_annotations(video_anns)
+                    annotation_dict = self._key_video_annotations_by_filename(video_anns)
                 else:
                     video_anns, _ = load_mot_from_dir(annotations_file, **mot_kwargs)
-                    annotation_dict = self._flatten_video_annotations(video_anns)
+                    annotation_dict = self._key_video_annotations_by_filename(video_anns)
             elif self.annotations_type == "cvat_video":
                 cvat_kwargs = {}
                 if "image_width" in self.additional_args:
@@ -145,17 +145,17 @@ def import_annotations(self) -> Mapping[str, Sequence[IRTaskAnnotation]]:
                     cvat_kwargs["image_height"] = self.additional_args["image_height"]
                 if annotations_file.is_dir():
                     raw = load_cvat_from_fs(annotations_file, **cvat_kwargs)
-                    annotation_dict = self._flatten_cvat_fs_annotations(raw)
+                    annotation_dict = self._key_cvat_fs_annotations_by_filename(raw)
                 elif annotations_file.suffix == ".zip":
                     result = load_cvat_from_zip(annotations_file, **cvat_kwargs)
                     if self._determine_cvat_annotation(result) == "video":
-                        annotation_dict = self._flatten_video_annotations(result)
+                        annotation_dict = self._key_video_annotations_by_filename(result)
                     else:
                         annotation_dict = result
                 else:
                     result = load_cvat_from_xml_file(annotations_file, **cvat_kwargs)
                     if self._determine_cvat_annotation(result) == "video":
-                        annotation_dict = self._flatten_video_annotations(result)
+                        annotation_dict = self._key_video_annotations_by_filename(result)
                     else:
                         annotation_dict = result
             else:
@@ -170,7 +170,7 @@ def _determine_cvat_annotation(result: CVATAnnotations) -> Literal["video", "ima
             return "video"
         return "image"
 
-    def _flatten_video_annotations(
+    def _key_video_annotations_by_filename(
         self,
         video_data: CVATAnnotations,
     ) -> Dict[str, Sequence[IRTaskAnnotation]]:
@@ -190,7 +190,7 @@ def _flatten_video_annotations(
             all_anns.extend(frame_anns)
         return {video_name: all_anns}
 
-    def _flatten_cvat_fs_annotations(
+    def _key_cvat_fs_annotations_by_filename(
         self, fs_annotations: Mapping[str, CVATAnnotations]
     ) -> Dict[str, Sequence[IRTaskAnnotation]]:
         flattened: Dict[str, List[IRTaskAnnotation]] = {}
@@ -205,7 +205,7 @@ def _flatten_cvat_fs_annotations(
                     flattened[filename].extend(anns)
         return flattened
 
-    def _flatten_mot_fs_annotations(
+    def _key_mot_fs_annotations_by_filename(
         self,
         fs_annotations: Mapping[Path, Tuple[IRVideoSequence, MOTContext]],
     ) -> Dict[str, Sequence[IRTaskAnnotation]]:
diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py
index 1cea5226..d6230d9b 100644
--- a/tests/data_engine/annotation_import/test_cvat_video.py
+++ b/tests/data_engine/annotation_import/test_cvat_video.py
@@ -46,7 +46,7 @@ def test_flatten_cvat_fs_preserves_sequence_filename(ds, tmp_path):
         filename="nested/folder/video.mp4",
     )
 
-    result = importer._flatten_cvat_fs_annotations({"nested/annotations.xml": sequence})
+    result = importer._key_cvat_fs_annotations_by_filename({"nested/annotations.xml": sequence})
 
     assert "nested/folder/video.mp4" in result
 
diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py
index 3324c185..d9c15a4b 100644
--- a/tests/data_engine/annotation_import/test_mot.py
+++ b/tests/data_engine/annotation_import/test_mot.py
@@ -24,34 +24,6 @@ def mock_source_prefix(ds):
         yield
 
 
-# --- _is_video_annotation ---
-
-
-def test_is_video_dict_int_keys():
-    assert AnnotationImporter._is_video_annotation({0: [], 1: []}) is True
-
-
-def test_is_video_dict_str_keys():
-    assert AnnotationImporter._is_video_annotation({"file.jpg": []}) is False
-
-
-def test_is_video_dict_empty():
-    assert AnnotationImporter._is_video_annotation({}) is False
-
-
-def test_is_video_dict_non_dict():
-    assert AnnotationImporter._is_video_annotation([]) is False
-
-
-def test_is_video_dict_mixed_first_int():
-    assert AnnotationImporter._is_video_annotation({0: [], "a": []}) is True
-
-
-def test_is_video_sequence():
-    seq = build_video_sequence_from_annotations([_make_video_bbox()])
-    assert AnnotationImporter._is_video_annotation(seq) is True
-
-
 # --- is_video_format ---
 
 
@@ -73,12 +45,12 @@ def test_is_video_format(ds, ann_type, expected, tmp_path):
     assert importer.is_video_format is expected
 
 
-# --- _flatten_video_annotations ---
+# --- _key_video_annotations_by_filename ---
 
 
 def test_flatten_merges_frames(ds, tmp_path):
     importer = AnnotationImporter(ds, "mot", tmp_path / "test_video", load_from="disk")
-    result = importer._flatten_video_annotations({
+    result = importer._key_video_annotations_by_filename({
         0: [_make_video_bbox(frame=0)],
         5: [_make_video_bbox(frame=5)],
     })
@@ -88,7 +60,7 @@ def test_flatten_merges_frames(ds, tmp_path):
 
 def test_flatten_defaults_to_file_stem(ds, tmp_path):
     importer = AnnotationImporter(ds, "mot", tmp_path / "my_sequence", load_from="disk")
-    result = importer._flatten_video_annotations({0: [_make_video_bbox()]})
+    result = importer._key_video_annotations_by_filename({0: [_make_video_bbox()]})
     assert "my_sequence" in result
 
 
@@ -96,14 +68,14 @@ def test_flatten_video_name_override(ds, tmp_path):
     importer = AnnotationImporter(
         ds, "mot", tmp_path / "test_video", load_from="disk", video_name="custom.mp4"
     )
-    result = importer._flatten_video_annotations({0: [_make_video_bbox()]})
+    result = importer._key_video_annotations_by_filename({0: [_make_video_bbox()]})
     assert "custom.mp4" in result
 
 
 def test_flatten_sequence(ds, tmp_path):
     importer = AnnotationImporter(ds, "mot", tmp_path / "test_video", load_from="disk")
     sequence = build_video_sequence_from_annotations([_make_video_bbox(frame=0), _make_video_bbox(frame=5)])
-    result = importer._flatten_video_annotations(sequence)
+    result = importer._key_video_annotations_by_filename(sequence)
 
     assert "test_video" in result
     assert len(result["test_video"]) == 2
@@ -116,7 +88,7 @@ def test_flatten_sequence_preserves_sequence_filename(ds, tmp_path):
         filename="nested/videos/video.mp4",
     )
 
-    result = importer._flatten_video_annotations(sequence)
+    result = importer._key_video_annotations_by_filename(sequence)
 
     assert "nested/videos/video.mp4" in result
 
@@ -128,7 +100,7 @@ def test_flatten_mot_fs_preserves_relative_video_path(ds, tmp_path):
         filename="nested/video.mp4",
     )
 
-    result = importer._flatten_mot_fs_annotations({Path("nested/video.mp4"): (sequence, object())})
+    result = importer._key_mot_fs_annotations_by_filename({Path("nested/video.mp4"): (sequence, object())})
 
     assert "nested/video.mp4" in result
 
@@ -160,7 +132,7 @@ def test_video_export_layout_uses_datasource_prefix(ds):
     with patch.object(
         type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath("my_ds_path")
     ):
-        video_dir, labels_dir, dataset_root = qr._get_media_export_layout(Path("export"), "videos")
+        video_dir, labels_dir, dataset_root = qr._resolve_export_dirs(Path("export"), "videos")
 
     assert video_dir == Path("export") / "data" / "my_ds_path" / "videos"
     assert labels_dir == Path("export") / "data" / "my_ds_path" / "labels"
@@ -172,7 +144,7 @@ def test_video_export_layout_reuses_existing_videos_suffix(ds):
     with patch.object(
         type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath("my_ds_path/videos")
     ):
-        video_dir, labels_dir, dataset_root = qr._get_media_export_layout(Path("export"), "videos")
+        video_dir, labels_dir, dataset_root = qr._resolve_export_dirs(Path("export"), "videos")
 
     assert video_dir == Path("export") / "data" / "my_ds_path" / "videos"
     assert labels_dir == Path("export") / "data" / "my_ds_path" / "labels"
@@ -184,7 +156,7 @@ def test_video_export_layout_strips_leading_data_prefix(ds):
     with patch.object(
         type(ds.source), "source_prefix", new_callable=PropertyMock, return_value=PurePosixPath("data/videos")
     ):
-        video_dir, labels_dir, dataset_root = qr._get_media_export_layout(Path("export"), "videos")
+        video_dir, labels_dir, dataset_root = qr._resolve_export_dirs(Path("export"), "videos")
 
     assert video_dir == Path("export") / "data" / "videos"
     assert labels_dir == Path("export") / "data" / "labels"

From fd9a1037c2169e3f0fa0c9217331792d7fd3daea Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Mon, 13 Apr 2026 16:33:15 +0300
Subject: [PATCH 21/24] fix tests

---
 .../annotation_import/test_annotation_parsing.py           | 4 ++--
 tests/data_engine/annotation_import/test_cvat_video.py     | 2 +-
 tests/data_engine/annotation_import/test_mot.py            | 7 ++++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/data_engine/annotation_import/test_annotation_parsing.py b/tests/data_engine/annotation_import/test_annotation_parsing.py
index 5cc5bfa7..133311c0 100644
--- a/tests/data_engine/annotation_import/test_annotation_parsing.py
+++ b/tests/data_engine/annotation_import/test_annotation_parsing.py
@@ -176,7 +176,7 @@ def test_video_tracks_to_ls_task_use_video_data_and_sequence_length(ds):
     dp = Datapoint(datasource=ds, path="nested/video.mp4", datapoint_id=1, metadata={})
     frame_annotations = [
         IRVideoBBoxFrameAnnotation(
-            object_id=1,
+            imported_id="1",
             frame_number=0,
             left=100.0,
             top=150.0,
@@ -188,7 +188,7 @@ def test_video_tracks_to_ls_task_use_video_data_and_sequence_length(ds):
             coordinate_style=CoordinateStyle.DENORMALIZED,
         ),
         IRVideoBBoxFrameAnnotation(
-            object_id=1,
+            imported_id="1",
             frame_number=5,
             left=110.0,
             top=155.0,
diff --git a/tests/data_engine/annotation_import/test_cvat_video.py b/tests/data_engine/annotation_import/test_cvat_video.py
index d6230d9b..4bdef534 100644
--- a/tests/data_engine/annotation_import/test_cvat_video.py
+++ b/tests/data_engine/annotation_import/test_cvat_video.py
@@ -282,7 +282,7 @@ def _mock_download_files(self, target_dir, *args, **kwargs):
 
 def _make_video_bbox(frame=0, object_id=0) -> IRVideoBBoxFrameAnnotation:
     return IRVideoBBoxFrameAnnotation(
-        object_id=object_id, frame_number=frame,
+        imported_id=str(object_id), frame_number=frame,
         left=100.0, top=150.0, width=50.0, height=80.0,
         video_width=1920, video_height=1080,
         categories={"person": 1.0},
diff --git a/tests/data_engine/annotation_import/test_mot.py b/tests/data_engine/annotation_import/test_mot.py
index d9c15a4b..8de30655 100644
--- a/tests/data_engine/annotation_import/test_mot.py
+++ b/tests/data_engine/annotation_import/test_mot.py
@@ -108,7 +108,7 @@ def test_flatten_mot_fs_preserves_relative_video_path(ds, tmp_path):
 def test_build_video_sequence_sets_top_level_dimensions():
     anns = [
         IRVideoBBoxFrameAnnotation(
-            object_id=0,
+            imported_id="0",
             frame_number=0,
             left=100.0,
             top=150.0,
@@ -200,7 +200,8 @@ def _mock_load_mot_from_fs(import_dir, image_width=None, image_height=None, **kw
         captured["import_dir"] = import_dir
         captured["image_width"] = image_width
         captured["image_height"] = image_height
-        return {Path("seq_a"): ({0: [_make_video_bbox(frame=0)]}, object())}
+        seq = build_video_sequence_from_annotations([_make_video_bbox(frame=0)], filename="seq_a")
+        return {Path("seq_a"): (seq, object())}
 
     monkeypatch.setattr("dagshub.data_engine.annotation.importer.load_mot_from_fs", _mock_load_mot_from_fs)
 
@@ -417,7 +418,7 @@ def _mock_export_mot_to_dir(video_annotations, context, output_dir, video_file=N
 
 def _make_video_bbox(frame=0, object_id=0) -> IRVideoBBoxFrameAnnotation:
     return IRVideoBBoxFrameAnnotation(
-        object_id=object_id, frame_number=frame,
+        imported_id=str(object_id), frame_number=frame,
         left=100.0, top=150.0, width=50.0, height=80.0,
         video_width=1920, video_height=1080,
         categories={"person": 1.0},

From bb75f409c98dae4244bf60b8bc7fd386fe2a58c0 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Tue, 14 Apr 2026 11:26:36 +0300
Subject: [PATCH 22/24] remove redundant function

---
 dagshub/data_engine/model/query_result.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
index 62383488..5626f800 100644
--- a/dagshub/data_engine/model/query_result.py
+++ b/dagshub/data_engine/model/query_result.py
@@ -803,7 +803,7 @@ def _annotations_to_sequences(
         """Group frame annotations into per-source video sequences."""
         by_source: Dict[str, List[IRVideoFrameAnnotationBase]] = {}
         for ann in video_annotations:
-            filename = QueryResult._get_annotation_filename(ann) or ""
+            filename = ann.filename or ""
             by_source.setdefault(filename, []).append(ann)
 
         return [
@@ -855,10 +855,6 @@ def _resolve_export_dirs(self, download_dir: Path, media_dir_name: str) -> Tuple
         labels_dir = dataset_root / "labels"
         return media_dir, labels_dir, dataset_root
 
-    @staticmethod
-    def _get_annotation_filename(ann: IRVideoFrameAnnotationBase) -> Optional[str]:
-        return ann.filename
-
     def _resolve_annotation_field(self, annotation_field: Optional[str]) -> str:
         if annotation_field is not None:
             return annotation_field
@@ -988,7 +984,7 @@ def export_as_mot(
         source_names = sorted(
             {
                 Path(ann_filename).name
-                for ann_filename in (self._get_annotation_filename(ann) for ann in video_annotations)
+                for ann_filename in (ann.filename for ann in video_annotations)
                 if ann_filename
             }
         )
@@ -1067,7 +1063,7 @@ def export_as_cvat_video(
         source_names = sorted(
             {
                 Path(ann_filename).name
-                for ann_filename in (self._get_annotation_filename(ann) for ann in video_annotations)
+                for ann_filename in (ann.filename for ann in video_annotations)
                 if ann_filename
             }
         )
@@ -1089,9 +1085,9 @@ def export_as_cvat_video(
             if image_width is None or image_height is None:
                 video_files = {}
                 for ann_filename in {
-                    self._get_annotation_filename(ann)
+                    ann.filename
                     for ann in video_annotations
-                    if self._get_annotation_filename(ann)
+                    if ann.filename
                 }:
                     assert ann_filename is not None
                     local_video = self._resolve_local_path(local_download_root, ann_filename)
@@ -1116,7 +1112,7 @@ def export_as_cvat_video(
             )
             result_path = output_dir
         else:
-            ref_filename = next((self._get_annotation_filename(a) for a in video_annotations), None)
+            ref_filename = next((a.filename for a in video_annotations), None)
             if ref_filename is None:
                 raise FileNotFoundError("Missing annotation filename for single-video CVAT export.")
             single_video_file = self._resolve_local_path(local_download_root, ref_filename)

From 38f76fa6306abea32131aa1709a828b5b4420447 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Tue, 14 Apr 2026 12:14:15 +0300
Subject: [PATCH 23/24] fix last comment and bump converter

---
 dagshub/data_engine/model/query_result.py | 23 +++--------------------
 setup.py                                  |  6 +-----
 2 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/dagshub/data_engine/model/query_result.py b/dagshub/data_engine/model/query_result.py
index 5626f800..53db70d6 100644
--- a/dagshub/data_engine/model/query_result.py
+++ b/dagshub/data_engine/model/query_result.py
@@ -981,13 +981,7 @@ def export_as_mot(
         if not video_annotations:
             raise RuntimeError("No video annotations found to export")
 
-        source_names = sorted(
-            {
-                Path(ann_filename).name
-                for ann_filename in (ann.filename for ann in video_annotations)
-                if ann_filename
-            }
-        )
+        source_names = sorted({Path(ann.filename).name for ann in video_annotations if ann.filename})
         has_multiple_sources = len(source_names) > 1
 
         log_message(f"Downloading videos into {video_dir}...")
@@ -1060,13 +1054,7 @@ def export_as_cvat_video(
         if not video_annotations:
             raise RuntimeError("No video annotations found to export")
 
-        source_names = sorted(
-            {
-                Path(ann_filename).name
-                for ann_filename in (ann.filename for ann in video_annotations)
-                if ann_filename
-            }
-        )
+        source_names = sorted({Path(ann.filename).name for ann in video_annotations if ann.filename})
         has_multiple_sources = len(source_names) > 1
 
         log_message("Exporting CVAT video annotations...")
@@ -1084,12 +1072,7 @@ def export_as_cvat_video(
             video_files: Optional[Dict[str, Union[str, Path]]] = None
             if image_width is None or image_height is None:
                 video_files = {}
-                for ann_filename in {
-                    ann.filename
-                    for ann in video_annotations
-                    if ann.filename
-                }:
-                    assert ann_filename is not None
+                for ann_filename in {ann.filename for ann in video_annotations if ann.filename}:
                     local_video = self._resolve_local_path(local_download_root, ann_filename)
                     if local_video is None:
                         raise FileNotFoundError(
diff --git a/setup.py b/setup.py
index 93f34832..eec96e69 100644
--- a/setup.py
+++ b/setup.py
@@ -41,11 +41,7 @@ def get_version(rel_path: str) -> str:
     "python-dateutil",
     "boto3",
     "semver",
-    # FIXME: roll back to main after merging
-    # "dagshub-annotation-converter>=0.1.12",
-    "dagshub-annotation-converter @ "
-    + "git+https://github.com/DagsHub/"
-    + "dagshub-annotation-converter@main#egg=dagshub-annotation-converter",
+    "dagshub-annotation-converter>=0.2.0",
 ]
 
 extras_require = {

From 15289ed4f54c97e155a83516959779db96a6c1b0 Mon Sep 17 00:00:00 2001
From: Dean <dean@dagshub.com>
Date: Tue, 14 Apr 2026 12:20:10 +0300
Subject: [PATCH 24/24] bump converter version

---
 setup.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index a26ff6b7..270ae5ef 100644
--- a/setup.py
+++ b/setup.py
@@ -42,11 +42,7 @@ def get_version(rel_path: str) -> str:
     "python-dateutil",
     "boto3",
     "semver",
-    # FIXME: roll back to main after merging
-    # "dagshub-annotation-converter>=0.1.12",
-    "dagshub-annotation-converter @ "
-    + "git+https://github.com/DagsHub/"
-    + "dagshub-annotation-converter@main#egg=dagshub-annotation-converter",
+    "dagshub-annotation-converter>=0.2.0",
 ]
 
 extras_require = {