Delegate embedding generation to Qdrant (#36)

Teagan42 · web-flow · commit 63d9b4b2cddd · 2025-09-13T08:59:24.000-06:00
* feat: delegate embedding generation to Qdrant

* feat: upsert media documents for Qdrant embeddings

* test: cover loader document embeddings
diff --git a/AGENTS.md b/AGENTS.md
@@ -1,10 +1,11 @@
 # AGENTS
 
 ## Architecture
-- `mcp_plex/loader.py` ingests Plex, TMDb, and IMDb metadata, builds dense and sparse embeddings, and stores items in a Qdrant collection.
+- `mcp_plex/loader.py` ingests Plex, TMDb, and IMDb metadata, relies on Qdrant to generate dense and sparse embeddings, and stores items in a Qdrant collection.
 - `mcp_plex/server.py` exposes retrieval and search tools via FastMCP backed by Qdrant.
 - `mcp_plex/types.py` defines the Pydantic models used across the project.
 - When making architectural design decisions, add a short note here describing the decision and its rationale.
+- Embedding generation was moved from local FastEmbed models to Qdrant's document API to reduce local dependencies and centralize vector creation.
 - Actor names are stored as a top-level payload field and indexed in Qdrant to enable actor and year-based filtering.
 - Dense and sparse embedding model names are configurable via `DENSE_MODEL` and
   `SPARSE_MODEL` environment variables or the corresponding CLI options.
@@ -38,6 +39,7 @@ The project should handle natural-language searches and recommendations such as:
 - Use realistic (or as realistic as possible) data in tests; avoid meaningless placeholder values.
 - Always test both positive and negative logical paths.
 - Do **not** use `# pragma: no cover`; add tests to exercise code paths instead.
+- All changes should include tests that demonstrate the new or modified behavior.
 
 ## Efficiency and Search
 - Use `rg` (ripgrep) for recursive search.
diff --git a/mcp_plex/loader.py b/mcp_plex/loader.py
@@ -9,7 +9,6 @@
 
 import click
 import httpx
-from fastembed import SparseTextEmbedding, TextEmbedding
 from qdrant_client import models
 from qdrant_client.async_qdrant_client import AsyncQdrantClient
 
@@ -311,8 +310,8 @@ async def run(
         server = PlexServer(plex_url, plex_token)
         items = await _load_from_plex(server, tmdb_api_key)
 
-    # Embed and store in Qdrant
-    texts: List[str] = []
+    # Assemble points with server-side embeddings
+    points: List[models.PointStruct] = []
     for item in items:
         parts = [
             item.plex.title,
@@ -325,13 +324,33 @@ async def run(
         ]
         if item.tmdb and hasattr(item.tmdb, "reviews"):
             parts.extend(r.get("content", "") for r in getattr(item.tmdb, "reviews", []))
-        texts.append("\n".join(p for p in parts if p))
-
-    dense_model = TextEmbedding(dense_model_name)
-    sparse_model = SparseTextEmbedding(sparse_model_name)
-
-    dense_vectors = list(dense_model.embed(texts))
-    sparse_vectors = list(sparse_model.passage_embed(texts))
+        text = "\n".join(p for p in parts if p)
+        payload = {
+            "data": item.model_dump(),
+            "title": item.plex.title,
+            "type": item.plex.type,
+        }
+        if item.plex.actors:
+            payload["actors"] = [p.tag for p in item.plex.actors]
+        if item.plex.year is not None:
+            payload["year"] = item.plex.year
+        if item.plex.added_at is not None:
+            payload["added_at"] = item.plex.added_at
+        point_id: int | str = (
+            int(item.plex.rating_key)
+            if item.plex.rating_key.isdigit()
+            else item.plex.rating_key
+        )
+        points.append(
+            models.PointStruct(
+                id=point_id,
+                vector={
+                    "dense": models.Document(text=text, model=dense_model_name),
+                    "sparse": models.Document(text=text, model=sparse_model_name),
+                },
+                payload=payload,
+            )
+        )
 
     if qdrant_url is None and qdrant_host is None:
         qdrant_url = ":memory:"
@@ -344,31 +363,14 @@ async def run(
         https=qdrant_https,
         prefer_grpc=qdrant_prefer_grpc,
     )
+    dense_size, dense_distance = client._get_model_params(dense_model_name)
     collection_name = "media-items"
-    vectors_config = {
-        "dense": models.VectorParams(
-            size=dense_model.embedding_size, distance=models.Distance.COSINE
-        )
-    }
-    sparse_vectors_config = {"sparse": models.SparseVectorParams()}
-
     created_collection = False
-    if await client.collection_exists(collection_name):
-        info = await client.get_collection(collection_name)
-        existing_size = info.config.params.vectors["dense"].size  # type: ignore[index]
-        if existing_size != dense_model.embedding_size:
-            await client.delete_collection(collection_name)
-            await client.create_collection(
-                collection_name=collection_name,
-                vectors_config=vectors_config,
-                sparse_vectors_config=sparse_vectors_config,
-            )
-            created_collection = True
-    else:
+    if not await client.collection_exists(collection_name):
         await client.create_collection(
             collection_name=collection_name,
-            vectors_config=vectors_config,
-            sparse_vectors_config=sparse_vectors_config,
+            vectors_config={"dense": models.VectorParams(size=dense_size, distance=dense_distance)},
+            sparse_vectors_config={"sparse": models.SparseVectorParams()},
         )
         created_collection = True
 
@@ -419,34 +421,8 @@ async def run(
             field_schema=models.PayloadSchemaType.INTEGER,
         )
 
-    points = []
-    for idx, (item, dense, sparse) in enumerate(zip(items, dense_vectors, sparse_vectors)):
-        sv = models.SparseVector(
-            indices=sparse.indices.tolist(), values=sparse.values.tolist()
-        )
-        payload = {
-            "data": item.model_dump(),
-            "title": item.plex.title,
-            "type": item.plex.type,
-        }
-        if item.plex.actors:
-            payload["actors"] = [p.tag for p in item.plex.actors]
-        if item.plex.year is not None:
-            payload["year"] = item.plex.year
-        if item.plex.added_at is not None:
-            payload["added_at"] = item.plex.added_at
-        points.append(
-            models.Record(
-                id=int(item.plex.rating_key)
-                if item.plex.rating_key.isdigit()
-                else item.plex.rating_key,
-                payload=payload,
-                vector={"dense": dense, "sparse": sv},
-            )
-        )
-
     if points:
-        await client.upsert(collection_name="media-items", points=points)
+        await client.upsert(collection_name=collection_name, points=points)
 
     json.dump([item.model_dump() for item in items], fp=sys.stdout, indent=2)
     sys.stdout.write("\n")
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,15 +4,15 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "mcp-plex"
-version = "0.26.1"
+version = "0.26.3"
 
 description = "Plex-Oriented Model Context Protocol Server"
-requires-python = ">=3.11,<4"
+requires-python = ">=3.11,<3.13"
 dependencies = [
   "fastmcp>=2.11.2",
   "pydantic>=2.11.7",
   "plexapi>=4.17.0",
-  "qdrant-client[fastembed-gpu]>=1.12.1",
+  "qdrant-client[fastembed-gpu]>=1.15.1",
   "rapidfuzz>=3.13.0",
   "scikit-learn>=1.7.1",
   "httpx>=0.27.0",
diff --git a/tests/test_loader_integration.py b/tests/test_loader_integration.py
@@ -4,17 +4,23 @@
 from pathlib import Path
 
 from qdrant_client.async_qdrant_client import AsyncQdrantClient
+from qdrant_client import models
 
 from mcp_plex import loader
 
 
 class CaptureClient(AsyncQdrantClient):
     instance: "CaptureClient" | None = None
+    captured_points: list[models.PointStruct] = []
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         CaptureClient.instance = self
 
+    async def upsert(self, collection_name: str, points, **kwargs):
+        CaptureClient.captured_points = points
+        return await super().upsert(collection_name=collection_name, points=points, **kwargs)
+
 
 async def _run_loader(sample_dir: Path) -> None:
     await loader.run(
@@ -36,5 +42,14 @@ def test_run_writes_points(monkeypatch):
     points, _ = asyncio.run(client.scroll("media-items", limit=10, with_payload=True))
     assert len(points) == 2
     assert all("title" in p.payload and "type" in p.payload for p in points)
+    captured = CaptureClient.captured_points
+    assert len(captured) == 2
+    assert all(isinstance(p.vector["dense"], models.Document) for p in captured)
+    assert all(p.vector["dense"].model == "BAAI/bge-small-en-v1.5" for p in captured)
+    assert all(isinstance(p.vector["sparse"], models.Document) for p in captured)
+    assert all(
+        p.vector["sparse"].model == "Qdrant/bm42-all-minilm-l6-v2-attentions"
+        for p in captured
+    )
 
 
diff --git a/uv.lock b/uv.lock