From fcee34aec94000bef9dde91399dc75f89d710753 Mon Sep 17 00:00:00 2001 From: Elliot Scribner Date: Tue, 21 Oct 2025 15:08:24 -0700 Subject: [PATCH 1/8] Add `couchbase_query`, refactor `couchbase`->`couchbase_search`, adjust language --- .../integrations/providers/all_providers.mdx | 13 +- .../vectorstores/couchbase_query.mdx | 386 ++++++++++++++++++ .../{couchbase.mdx => couchbase_search.mdx} | 11 +- .../integrations/vectorstores/index.mdx | 11 +- 4 files changed, 409 insertions(+), 12 deletions(-) create mode 100644 src/oss/javascript/integrations/vectorstores/couchbase_query.mdx rename src/oss/javascript/integrations/vectorstores/{couchbase.mdx => couchbase_search.mdx} (95%) diff --git a/src/oss/javascript/integrations/providers/all_providers.mdx b/src/oss/javascript/integrations/providers/all_providers.mdx index 5f305b01d..5b0e1b971 100644 --- a/src/oss/javascript/integrations/providers/all_providers.mdx +++ b/src/oss/javascript/integrations/providers/all_providers.mdx @@ -865,12 +865,19 @@ Browse the complete collection of integrations available for JavaScript/TypeScri - NoSQL database with vector search capabilities. + NoSQL database with vector search capabilities via the search service. + + NoSQL database with vector search capabilities via the query service. + + +This functionality is only available in Couchbase Server 8.0 and above, which introduced vector search capabilities in the Query service. + + +## Key Differences from CouchbaseVectorStore + +- **Query Service**: Uses Couchbase's Query service with SQL++ instead of the Search service +- **No Index Required**: Does not require a pre-configured search index for basic operations +- **SQL++ Syntax**: Supports WHERE clauses and SQL++ query syntax for filtering +- **Vector Functions**: Uses `APPROX_VECTOR_DISTANCE` function for similarity calculations +- **Distance Strategies**: Supports multiple distance strategies (Euclidean, Cosine, Dot Product) + +## Installation + +```bash npm +npm install couchbase @langchain/openai @langchain/community @langchain/core +``` + +## Create Couchbase Connection Object + +We create a connection to the Couchbase cluster initially and then pass the cluster object to the Vector Store. Here, we are connecting using the username and password. +You can also connect using any other supported way to your cluster. + +For more information on connecting to the Couchbase cluster, please check the [Node SDK documentation](https://docs.couchbase.com/nodejs-sdk/current/hello-world/start-using-sdk.html#connect). + +```typescript +import { Cluster } from "couchbase"; + +const connectionString = "couchbase://localhost"; // or couchbases://localhost if you are using TLS +const dbUsername = "Administrator"; // valid database user with read access to the bucket being queried +const dbPassword = "Password"; // password for the database user + +const couchbaseClient = await Cluster.connect(connectionString, { + username: dbUsername, + password: dbPassword, + configProfile: "wanDevelopment", +}); +``` + +## Basic Setup + +```typescript +import { CouchbaseQueryVectorStore, DistanceStrategy } from "@langchain/community/vectorstores/couchbase_query"; +import { OpenAIEmbeddings } from "@langchain/openai"; +import { Cluster } from "couchbase"; + +// Connect to Couchbase +const cluster = await Cluster.connect("couchbase://localhost", { + username: "Administrator", + password: "password", +}); + +// Initialize embeddings +const embeddings = new OpenAIEmbeddings(); + +// Configure the vector store +const vectorStore = await CouchbaseQueryVectorStore.initialize(embeddings, { + cluster, + bucketName: "my-bucket", + scopeName: "my-scope", + collectionName: "my-collection", + textKey: "text", // optional, defaults to "text" + embeddingKey: "embedding", // optional, defaults to "embedding" + distanceStrategy: DistanceStrategy.COSINE, // optional, defaults to DOT +}); +``` + +## Creating Vector Indexes + +The Query vector store supports creating vector indexes to improve search performance. There are two types of indexes available: + +### BHIVE Index +A specialized vector index optimized for vector operations using Couchbase's vector indexing capabilities: + +```typescript +import { IndexType } from "@langchain/community/vectorstores/couchbase_query"; + +await vectorStore.createIndex({ + indexType: IndexType.BHIVE, + indexDescription: "IVF,SQ8", + indexName: "my_vector_index", // optional + vectorDimension: 1536, // optional, auto-detected from embeddings + distanceMetric: DistanceStrategy.COSINE, // optional, uses store default + fields: ["text", "metadata"], // optional, defaults to text field + whereClause: "type = 'document'", // optional filter + indexScanNprobes: 10, // optional tuning parameter + indexTrainlist: 1000, // optional tuning parameter +}); +``` + +**Generated SQL++:** +```sql +CREATE VECTOR INDEX `my_vector_index` ON `bucket`.`scope`.`collection` +(`embedding` VECTOR) INCLUDE (`text`, `metadata`) +WHERE type = 'document' USING GSI WITH {'dimension': 1536, 'similarity': 'cosine', 'description': 'IVF,SQ8'} +``` + +### Composite Index +A general-purpose GSI index that includes vector fields alongside scalar fields: + +```typescript +await vectorStore.createIndex({ + indexType: IndexType.COMPOSITE, + indexDescription: "IVF1024,SQ8", + indexName: "my_composite_index", + vectorDimension: 1536, + fields: ["text", "metadata.category"], + whereClause: "created_date > '2023-01-01'", + indexScanNprobes: 3, + indexTrainlist: 10000, +}); +``` + +**Generated SQL++:** +```sql +CREATE INDEX `my_composite_index` ON `bucket`.`scope`.`collection` +(`text`, `metadata.category`, `embedding` VECTOR) +WHERE created_date > '2023-01-01' USING GSI +WITH {'dimension': 1536, 'similarity': 'dot', 'description': 'IVF1024,SQ8', 'scan_nprobes': 3, 'trainlist': 10000} +``` + +### Key Differences + +| Aspect | BHIVE Index | COMPOSITE Index | +|--------|-------------|-----------------| +| **SQL++ Syntax** | `CREATE VECTOR INDEX` | `CREATE INDEX` | +| **Vector Field** | `(field VECTOR)` with `INCLUDE` clause | `(field1, field2, vector_field VECTOR)` | +| **Vector Parameters** | Supports all vector parameters | Supports all vector parameters | +| **Optimization** | Specialized for vector operations | General-purpose GSI with vector support | +| **Use Case** | Pure vector similarity search | Mixed vector and scalar queries | +| **Performance** | Optimized for vector distance calculations | Good for hybrid queries | +| **Tuning Parameters** | Supports `indexScanNprobes`, `indexTrainlist` | Supports `indexScanNprobes`, `indexTrainlist` | +| **Limitations** | Only one vector field, uses INCLUDE for other fields | One vector field among multiple index keys | + +## Basic Vector Search Example + +The following example showcases how to use Couchbase Query vector search and perform similarity search. + +```typescript +import { OpenAIEmbeddings } from "@langchain/openai"; +import { + CouchbaseQueryVectorStore, + DistanceStrategy, +} from "@langchain/community/vectorstores/couchbase_query"; +import { Cluster } from "couchbase"; +import { Document } from "@langchain/core/documents"; + +const connectionString = process.env.COUCHBASE_DB_CONN_STR ?? "couchbase://localhost"; +const databaseUsername = process.env.COUCHBASE_DB_USERNAME ?? "Administrator"; +const databasePassword = process.env.COUCHBASE_DB_PASSWORD ?? "Password"; + +const couchbaseClient = await Cluster.connect(connectionString, { + username: databaseUsername, + password: databasePassword, + configProfile: "wanDevelopment", +}); + +// OpenAI API Key is required to use OpenAIEmbeddings +const embeddings = new OpenAIEmbeddings({ + apiKey: process.env.OPENAI_API_KEY, +}); + +const vectorStore = await CouchbaseQueryVectorStore.initialize(embeddings, { + cluster: couchbaseClient, + bucketName: "testing", + scopeName: "_default", + collectionName: "_default", + textKey: "text", + embeddingKey: "embedding", + distanceStrategy: DistanceStrategy.COSINE, +}); + +// Add documents +const documents = [ + new Document({ + pageContent: "Couchbase is a NoSQL database", + metadata: { category: "database", type: "document" } + }), + new Document({ + pageContent: "Vector search enables semantic similarity", + metadata: { category: "ai", type: "document" } + }) +]; + +await vectorStore.addDocuments(documents); + +// Perform similarity search +const query = "What is a NoSQL database?"; +const results = await vectorStore.similaritySearch(query, 4); +console.log("Search results:", results[0]); + +// Search with scores +const resultsWithScores = await vectorStore.similaritySearchWithScore(query, 4); +console.log("Document:", resultsWithScores[0][0]); +console.log("Score:", resultsWithScores[0][1]); +``` + +## Searching Documents + +### Basic Similarity Search + +```typescript +// Basic similarity search +const results = await vectorStore.similaritySearch( + "What is a NoSQL database?", + 4 +); +``` + +### Search with Filters + +```typescript +// Search with filters +const filteredResults = await vectorStore.similaritySearch( + "database technology", + 4, + { + where: "metadata.category = 'database'", + fields: ["text", "metadata.category"] + } +); +``` + +### Search with Scores + +```typescript +// Search with scores +const resultsWithScores = await vectorStore.similaritySearchWithScore( + "vector search capabilities", + 4 +); +``` + +### Complex Filtering + +```typescript +const results = await vectorStore.similaritySearch( + "search query", + 10, + { + where: "metadata.category IN ['tech', 'science'] AND metadata.rating >= 4", + fields: ["content", "metadata.title", "metadata.rating"] + } +); +``` + +## Configuration Options + +### Distance Strategies + +- `DistanceStrategy.DOT` - Dot product (default) +- `DistanceStrategy.L2` - L2 (Euclidean) distance +- `DistanceStrategy.EUCLIDEAN` - Euclidean distance +- `DistanceStrategy.COSINE` - Cosine distance +- `DistanceStrategy.L2_SQUARED` - Squared L2 distance +- `DistanceStrategy.EUCLIDEAN_SQUARED` - Squared Euclidean distance + +### Index Types + +- `IndexType.BHIVE` - Specialized vector index for optimal vector search performance +- `IndexType.COMPOSITE` - General-purpose index that can include vector and scalar fields + +## Advanced Usage + +### Custom Vector Fields + +```typescript +const vectorStore = await CouchbaseQueryVectorStore.initialize(embeddings, { + cluster, + bucketName: "my-bucket", + scopeName: "my-scope", + collectionName: "my-collection", + textKey: "content", + embeddingKey: "vector_embedding", + distanceStrategy: DistanceStrategy.L2, +}); +``` + +### Creating from Texts + +```typescript +const texts = [ + "Couchbase is a NoSQL database", + "Vector search enables semantic similarity" +]; + +const metadatas = [ + { category: "database" }, + { category: "ai" } +]; + +const vectorStore = await CouchbaseQueryVectorStore.fromTexts( + texts, + metadatas, + embeddings, + { + cluster, + bucketName: "my-bucket", + scopeName: "my-scope", + collectionName: "my-collection" + } +); +``` + +### Deleting Documents + +```typescript +const documentIds = ["doc1", "doc2", "doc3"]; +await vectorStore.delete({ ids: documentIds }); +``` + +## Performance Considerations + +1. **Create Indexes**: Use `createIndex()` to create appropriate vector indexes for better performance +2. **Choose Index Type**: + - Use **BHIVE indexes** for pure vector search workloads where you primarily perform similarity searches + - Use **COMPOSITE indexes** for mixed queries that combine vector similarity with scalar field filtering +3. **Tune Parameters**: Adjust `indexScanNprobes` and `indexTrainlist` based on your data size and performance requirements +4. **Filter Early**: Use WHERE clauses to reduce the search space before vector calculations +5. **Index Strategy**: + - **BHIVE**: Better for high-performance vector similarity search with minimal scalar filtering + - **COMPOSITE**: Better when you frequently filter by both vector similarity and scalar fields in the same query + +## Error Handling + +```typescript +try { + await vectorStore.createIndex({ + indexType: IndexType.BHIVE, + indexDescription: "IVF,SQ8", + }); +} catch (error) { + console.error("Index creation failed:", error.message); +} +``` + +### Common Errors + +#### Insufficient Training Data +If you see errors related to insufficient training data, you may need to: +- Increase the `indexTrainlist` parameter (default recommendation: ~50 vectors per centroid) +- Ensure you have enough documents with vector embeddings in your collection +- For collections with < 1 million vectors, use `number_of_vectors / 1000` for centroids +- For larger collections, use `sqrt(number_of_vectors)` for centroids + +## Comparison with CouchbaseVectorStore + +| Feature | CouchbaseQueryVectorStore | CouchbaseVectorStore | +|---------|---------------------------|----------------------| +| Service | Query (SQL++) | Search (FTS) | +| Index Required | Optional (for performance) | Required | +| Query Language | SQL++ WHERE clauses | Search query syntax | +| Vector Functions | APPROX_VECTOR_DISTANCE | VectorQuery API | +| Setup Complexity | Lower | Higher | +| Performance | Good with indexes | Optimized for search | + +
+
+ +# Frequently Asked Questions + +## Question: Do I need to create an index before using CouchbaseQueryVectorStore? + +No, unlike the Search-based CouchbaseVectorStore, the Query-based implementation can work without pre-created indexes. However, creating appropriate vector indexes (BHIVE or COMPOSITE) will significantly improve query performance. + +## Question: When should I use BHIVE vs COMPOSITE indexes? + +- Use **BHIVE indexes** when you primarily perform vector similarity searches with minimal filtering on other fields +- Use **COMPOSITE indexes** when you frequently combine vector similarity with filtering on scalar fields in the same query + +## Question: Can I use both CouchbaseVectorStore and CouchbaseQueryVectorStore on the same data? + +Yes, both can work on the same document structure. However, they use different services (Search vs Query) and have different indexing requirements. + +## Related + +- Vector store [conceptual guide](/docs/concepts/#vectorstores) +- Vector store [how-to guides](/docs/how_to/#vectorstores) diff --git a/src/oss/javascript/integrations/vectorstores/couchbase.mdx b/src/oss/javascript/integrations/vectorstores/couchbase_search.mdx similarity index 95% rename from src/oss/javascript/integrations/vectorstores/couchbase.mdx rename to src/oss/javascript/integrations/vectorstores/couchbase_search.mdx index 20151f9c6..06fe2acda 100644 --- a/src/oss/javascript/integrations/vectorstores/couchbase.mdx +++ b/src/oss/javascript/integrations/vectorstores/couchbase_search.mdx @@ -1,13 +1,10 @@ --- -title: Couchbase +title: Couchbase Search Vector Store --- -[Couchbase](http://couchbase.com/) is an award-winning distributed NoSQL cloud database that delivers unmatched versatility, performance, scalability, and financial value for all of your cloud, mobile, -AI, and edge computing applications. Couchbase embraces AI with coding assistance for developers and vector search for their applications. +The `CouchbaseSearchVectorStore` is an implementation of Vector Search that is a part of the [Full Text Search Service](https://docs.couchbase.com/server/current/learn/services-and-indexes/services/search-service.html) (Search Service) in Couchbase. -Vector Search is a part of the [Full Text Search Service](https://docs.couchbase.com/server/current/learn/services-and-indexes/services/search-service.html) (Search Service) in Couchbase. - -This tutorial explains how to use Vector Search in Couchbase. You can work with both [Couchbase Capella](https://www.couchbase.com/products/capella/) and your self-managed Couchbase Server. +This tutorial explains how to use Vector Search via the Couchbase Search Service. You can work with both [Couchbase Capella](https://www.couchbase.com/products/capella/) and your self-managed Couchbase Server. ## Installation @@ -162,7 +159,7 @@ const store = await CouchbaseVectorStore.initialize( ``` ## Basic Vector Search Example -The following example showcases how to use couchbase vector search and perform similarity search. +The following example showcases how to use couchbase vector search via the Search Service and perform similarity search. For this example, we are going to load the "state_of_the_union.txt" file via the TextLoader, chunk the text into 500 character chunks with no overlaps and index all these chunks into Couchbase. diff --git a/src/oss/javascript/integrations/vectorstores/index.mdx b/src/oss/javascript/integrations/vectorstores/index.mdx index 3ff4af49a..630f0240c 100644 --- a/src/oss/javascript/integrations/vectorstores/index.mdx +++ b/src/oss/javascript/integrations/vectorstores/index.mdx @@ -685,9 +685,16 @@ LangChain.js integrates with a variety of vector stores. You can check out a ful cta="View guide" /> + From 718bd98da08e955b01b232a798f9bf84d27de442 Mon Sep 17 00:00:00 2001 From: Elliot Scribner Date: Tue, 21 Oct 2025 15:14:30 -0700 Subject: [PATCH 2/8] Adjust URLs --- .../javascript/integrations/vectorstores/couchbase_query.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx b/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx index 84eedc83a..65678718a 100644 --- a/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx +++ b/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx @@ -382,5 +382,5 @@ Yes, both can work on the same document structure. However, they use different s ## Related -- Vector store [conceptual guide](/docs/concepts/#vectorstores) -- Vector store [how-to guides](/docs/how_to/#vectorstores) +- Vector store [conceptual guide](/oss/integrations/vectorstores) +- Vector store [how-to guides](/oss/integrations/vectorstores) From a45f8e0ac90e4eb09e401e162bd647b177c57788 Mon Sep 17 00:00:00 2001 From: Elliot Scribner Date: Wed, 22 Oct 2025 11:39:03 -0700 Subject: [PATCH 3/8] Re-order and adjust langauge --- .../integrations/providers/all_providers.mdx | 14 +++++++------- .../javascript/integrations/vectorstores/index.mdx | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/oss/javascript/integrations/providers/all_providers.mdx b/src/oss/javascript/integrations/providers/all_providers.mdx index 5b0e1b971..e275bb36b 100644 --- a/src/oss/javascript/integrations/providers/all_providers.mdx +++ b/src/oss/javascript/integrations/providers/all_providers.mdx @@ -865,17 +865,17 @@ Browse the complete collection of integrations available for JavaScript/TypeScri - NoSQL database with vector search capabilities via the search service. + Recommended vector search method in Couchbase NoSQL database via query service. - + - NoSQL database with vector search capabilities via the query service. + Alternative vector search method in Couchbase NoSQL database via search service. From 910c5fbd1a7f06986baf1586bd908646c86cd09d Mon Sep 17 00:00:00 2001 From: Elliot Scribner Date: Wed, 22 Oct 2025 15:41:12 -0700 Subject: [PATCH 4/8] Rename BHIVE to HYPERSCALE --- .../vectorstores/couchbase_query.mdx | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx b/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx index 65678718a..36c403d94 100644 --- a/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx +++ b/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx @@ -77,14 +77,14 @@ const vectorStore = await CouchbaseQueryVectorStore.initialize(embeddings, { The Query vector store supports creating vector indexes to improve search performance. There are two types of indexes available: -### BHIVE Index +### HYPERSCALE Index A specialized vector index optimized for vector operations using Couchbase's vector indexing capabilities: ```typescript import { IndexType } from "@langchain/community/vectorstores/couchbase_query"; await vectorStore.createIndex({ - indexType: IndexType.BHIVE, + indexType: IndexType.HYPERSCALE, indexDescription: "IVF,SQ8", indexName: "my_vector_index", // optional vectorDimension: 1536, // optional, auto-detected from embeddings @@ -129,7 +129,7 @@ WITH {'dimension': 1536, 'similarity': 'dot', 'description': 'IVF1024,SQ8', 'sca ### Key Differences -| Aspect | BHIVE Index | COMPOSITE Index | +| Aspect | HYPERSCALE Index | COMPOSITE Index | |--------|-------------|-----------------| | **SQL++ Syntax** | `CREATE VECTOR INDEX` | `CREATE INDEX` | | **Vector Field** | `(field VECTOR)` with `INCLUDE` clause | `(field1, field2, vector_field VECTOR)` | @@ -265,7 +265,7 @@ const results = await vectorStore.similaritySearch( ### Index Types -- `IndexType.BHIVE` - Specialized vector index for optimal vector search performance +- `IndexType.HYPERSCALE` - Specialized vector index for optimal vector search performance - `IndexType.COMPOSITE` - General-purpose index that can include vector and scalar fields ## Advanced Usage @@ -321,12 +321,12 @@ await vectorStore.delete({ ids: documentIds }); 1. **Create Indexes**: Use `createIndex()` to create appropriate vector indexes for better performance 2. **Choose Index Type**: - - Use **BHIVE indexes** for pure vector search workloads where you primarily perform similarity searches + - Use **HYPERSCALE indexes** for pure vector search workloads where you primarily perform similarity searches - Use **COMPOSITE indexes** for mixed queries that combine vector similarity with scalar field filtering 3. **Tune Parameters**: Adjust `indexScanNprobes` and `indexTrainlist` based on your data size and performance requirements 4. **Filter Early**: Use WHERE clauses to reduce the search space before vector calculations 5. **Index Strategy**: - - **BHIVE**: Better for high-performance vector similarity search with minimal scalar filtering + - **HYPERSCALE**: Better for high-performance vector similarity search with minimal scalar filtering - **COMPOSITE**: Better when you frequently filter by both vector similarity and scalar fields in the same query ## Error Handling @@ -334,7 +334,7 @@ await vectorStore.delete({ ids: documentIds }); ```typescript try { await vectorStore.createIndex({ - indexType: IndexType.BHIVE, + indexType: IndexType.HYPERSCALE, indexDescription: "IVF,SQ8", }); } catch (error) { @@ -369,11 +369,11 @@ If you see errors related to insufficient training data, you may need to: ## Question: Do I need to create an index before using CouchbaseQueryVectorStore? -No, unlike the Search-based CouchbaseVectorStore, the Query-based implementation can work without pre-created indexes. However, creating appropriate vector indexes (BHIVE or COMPOSITE) will significantly improve query performance. +No, unlike the Search-based CouchbaseVectorStore, the Query-based implementation can work without pre-created indexes. However, creating appropriate vector indexes (HYPERSCALE or COMPOSITE) will significantly improve query performance. -## Question: When should I use BHIVE vs COMPOSITE indexes? +## Question: When should I use HYPERSCALE vs COMPOSITE indexes? -- Use **BHIVE indexes** when you primarily perform vector similarity searches with minimal filtering on other fields +- Use **HYPERSCALE indexes** when you primarily perform vector similarity searches with minimal filtering on other fields - Use **COMPOSITE indexes** when you frequently combine vector similarity with filtering on scalar fields in the same query ## Question: Can I use both CouchbaseVectorStore and CouchbaseQueryVectorStore on the same data? From 17b9ade22fe71f58203492b4875ccbe9188f59ca Mon Sep 17 00:00:00 2001 From: Elliot Scribner Date: Wed, 22 Oct 2025 16:04:06 -0700 Subject: [PATCH 5/8] Polish --- .../vectorstores/couchbase_query.mdx | 53 +++++++++---------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx b/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx index 36c403d94..35fd3a5a8 100644 --- a/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx +++ b/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx @@ -2,17 +2,17 @@ title: Couchbase Query Vector Store --- -The `CouchbaseQueryVectorStore` is an implementation of Vector Search that uses the [Query Service](https://docs.couchbase.com/server/current/learn/services-and-indexes/services/query-service.html) (SQL++) and [Index Service](https://docs.couchbase.com/server/current/learn/services-and-indexes/services/index-service.html) for vector similarity search instead of the Search service. This provides an alternative approach for vector operations using SQL++ queries with vector functions. +The `CouchbaseQueryVectorStore` is the preferred implementation of Vector Search in Couchbase. It uses the [Query Service](https://docs.couchbase.com/server/current/learn/services-and-indexes/services/query-service.html) (SQL++) and [Index Service](https://docs.couchbase.com/server/current/learn/services-and-indexes/services/index-service.html) for vector similarity search, instead of the Search service. This provides a more powerful and straightforward approach for vector operations using SQL++ queries with vector functions. More information about Couchbase's vector search capabilities can be found in the official documentation: [Choose the Right Vector Index](https://docs.couchbase.com/server/current/vector-index/use-vector-indexes.html). - -This functionality is only available in Couchbase Server 8.0 and above, which introduced vector search capabilities in the Query service. - + +This functionality is only available in Couchbase Server 8.0 and above. + -## Key Differences from CouchbaseVectorStore +## Key Differences from `CouchbaseSearchVectorStore` (formerly `CouchbaseVectorStore`) -- **Query Service**: Uses Couchbase's Query service with SQL++ instead of the Search service +- **Query and Index Service**: Uses Couchbase's Query service with SQL++ instead of the Search service - **No Index Required**: Does not require a pre-configured search index for basic operations - **SQL++ Syntax**: Supports WHERE clauses and SQL++ query syntax for filtering - **Vector Functions**: Uses `APPROX_VECTOR_DISTANCE` function for similarity calculations @@ -26,15 +26,15 @@ npm install couchbase @langchain/openai @langchain/community @langchain/core ## Create Couchbase Connection Object -We create a connection to the Couchbase cluster initially and then pass the cluster object to the Vector Store. Here, we are connecting using the username and password. -You can also connect using any other supported way to your cluster. +We create a connection to the Couchbase cluster and then pass the cluster object to the Vector Store. Here, we are connecting using the username and password. +You can also connect to your cluster using any other supported method. For more information on connecting to the Couchbase cluster, please check the [Node SDK documentation](https://docs.couchbase.com/nodejs-sdk/current/hello-world/start-using-sdk.html#connect). ```typescript import { Cluster } from "couchbase"; -const connectionString = "couchbase://localhost"; // or couchbases://localhost if you are using TLS +const connectionString = "couchbase://localhost"; const dbUsername = "Administrator"; // valid database user with read access to the bucket being queried const dbPassword = "Password"; // password for the database user @@ -67,7 +67,7 @@ const vectorStore = await CouchbaseQueryVectorStore.initialize(embeddings, { bucketName: "my-bucket", scopeName: "my-scope", collectionName: "my-collection", - textKey: "text", // optional, defaults to "text" + textKey: "text", // optional, defaults to "text" embeddingKey: "embedding", // optional, defaults to "embedding" distanceStrategy: DistanceStrategy.COSINE, // optional, defaults to DOT }); @@ -77,7 +77,7 @@ const vectorStore = await CouchbaseQueryVectorStore.initialize(embeddings, { The Query vector store supports creating vector indexes to improve search performance. There are two types of indexes available: -### HYPERSCALE Index +### Hyperscale Index A specialized vector index optimized for vector operations using Couchbase's vector indexing capabilities: ```typescript @@ -129,7 +129,7 @@ WITH {'dimension': 1536, 'similarity': 'dot', 'description': 'IVF1024,SQ8', 'sca ### Key Differences -| Aspect | HYPERSCALE Index | COMPOSITE Index | +| Aspect | Hyperscale Index | Composite Index | |--------|-------------|-----------------| | **SQL++ Syntax** | `CREATE VECTOR INDEX` | `CREATE INDEX` | | **Vector Field** | `(field VECTOR)` with `INCLUDE` clause | `(field1, field2, vector_field VECTOR)` | @@ -321,13 +321,10 @@ await vectorStore.delete({ ids: documentIds }); 1. **Create Indexes**: Use `createIndex()` to create appropriate vector indexes for better performance 2. **Choose Index Type**: - - Use **HYPERSCALE indexes** for pure vector search workloads where you primarily perform similarity searches - - Use **COMPOSITE indexes** for mixed queries that combine vector similarity with scalar field filtering + - Use **Hyperscale indexes** for pure vector search workloads where you primarily perform similarity searches + - Use **Composite indexes** for mixed queries that combine vector similarity with scalar field filtering 3. **Tune Parameters**: Adjust `indexScanNprobes` and `indexTrainlist` based on your data size and performance requirements 4. **Filter Early**: Use WHERE clauses to reduce the search space before vector calculations -5. **Index Strategy**: - - **HYPERSCALE**: Better for high-performance vector similarity search with minimal scalar filtering - - **COMPOSITE**: Better when you frequently filter by both vector similarity and scalar fields in the same query ## Error Handling @@ -351,9 +348,9 @@ If you see errors related to insufficient training data, you may need to: - For collections with < 1 million vectors, use `number_of_vectors / 1000` for centroids - For larger collections, use `sqrt(number_of_vectors)` for centroids -## Comparison with CouchbaseVectorStore +## Comparison with `CouchbaseSearchVectorStore` -| Feature | CouchbaseQueryVectorStore | CouchbaseVectorStore | +| Feature | `CouchbaseQueryVectorStore` | `CouchbaseSearchVectorStore` | |---------|---------------------------|----------------------| | Service | Query (SQL++) | Search (FTS) | | Index Required | Optional (for performance) | Required | @@ -362,21 +359,19 @@ If you see errors related to insufficient training data, you may need to: | Setup Complexity | Lower | Higher | | Performance | Good with indexes | Optimized for search | -
-
+## Frequently Asked Questions -# Frequently Asked Questions +### Do I need to create an index before using `CouchbaseQueryVectorStore`? -## Question: Do I need to create an index before using CouchbaseQueryVectorStore? +No, unlike the Search-based `CouchbaseSearchVectorStore`, the Query-based implementation can work without pre-created indexes. However, creating appropriate vector indexes (Hyperscale or Composite) will significantly improve query performance. -No, unlike the Search-based CouchbaseVectorStore, the Query-based implementation can work without pre-created indexes. However, creating appropriate vector indexes (HYPERSCALE or COMPOSITE) will significantly improve query performance. +### When should I use Hyperscale vs. Composite indexes? -## Question: When should I use HYPERSCALE vs COMPOSITE indexes? +- Use **Hyperscale indexes** when you primarily perform vector similarity searches with minimal filtering on other fields +- Use **Composite indexes** when you frequently combine vector similarity with filtering on scalar fields in the same query +- Learn more about how to [Choose the Right Vector Index](https://docs.couchbase.com/server/current/vector-index/use-vector-indexes.html) -- Use **HYPERSCALE indexes** when you primarily perform vector similarity searches with minimal filtering on other fields -- Use **COMPOSITE indexes** when you frequently combine vector similarity with filtering on scalar fields in the same query - -## Question: Can I use both CouchbaseVectorStore and CouchbaseQueryVectorStore on the same data? +### Can I use both `CouchbaseQueryVectorStore` and `CouchbaseSearchVectorStore` on the same data? Yes, both can work on the same document structure. However, they use different services (Search vs Query) and have different indexing requirements. From b08a21df57aa330df7fce94dac78822eb17f4724 Mon Sep 17 00:00:00 2001 From: Elliot Scribner Date: Wed, 22 Oct 2025 16:30:34 -0700 Subject: [PATCH 6/8] Copilot PR Comments --- src/oss/javascript/integrations/providers/all_providers.mdx | 4 ++-- .../javascript/integrations/vectorstores/couchbase_query.mdx | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/oss/javascript/integrations/providers/all_providers.mdx b/src/oss/javascript/integrations/providers/all_providers.mdx index e275bb36b..a852fd587 100644 --- a/src/oss/javascript/integrations/providers/all_providers.mdx +++ b/src/oss/javascript/integrations/providers/all_providers.mdx @@ -870,14 +870,14 @@ Browse the complete collection of integrations available for JavaScript/TypeScri > Recommended vector search method in Couchbase NoSQL database via query service.
- + Alternative vector search method in Couchbase NoSQL database via search service. - + Date: Tue, 28 Oct 2025 23:39:50 -0400 Subject: [PATCH 7/8] sentence case --- .../vectorstores/couchbase_query.mdx | 62 ++++++++++--------- .../vectorstores/couchbase_search.mdx | 28 ++++----- 2 files changed, 46 insertions(+), 44 deletions(-) diff --git a/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx b/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx index 10d6e9103..43e485454 100644 --- a/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx +++ b/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx @@ -7,10 +7,12 @@ The `CouchbaseQueryVectorStore` is the preferred implementation of Vector Search More information about Couchbase's vector search capabilities can be found in the official documentation: [Choose the Right Vector Index](https://docs.couchbase.com/server/current/vector-index/use-vector-indexes.html). -This functionality is only available in Couchbase Server 8.0 and above. + This functionality is only available in Couchbase Server 8.0 and above. -## Key Differences from `CouchbaseSearchVectorStore` (formerly `CouchbaseVectorStore`) +## Key differences from `CouchbaseSearchVectorStore` + +(formerly `CouchbaseVectorStore`) - **Query and Index Service**: Uses Couchbase's Query service with SQL++ instead of the Search service - **No Index Required**: Does not require a pre-configured search index for basic operations @@ -24,7 +26,7 @@ This functionality is only available in Couchbase Server 8.0 and above. npm install couchbase @langchain/openai @langchain/community @langchain/core ``` -## Create Couchbase Connection Object +## Create Couchbase connection object We create a connection to the Couchbase cluster and then pass the cluster object to the Vector Store. Here, we are connecting using the username and password. You can also connect to your cluster using any other supported method. @@ -45,7 +47,7 @@ const couchbaseClient = await Cluster.connect(connectionString, { }); ``` -## Basic Setup +## Basic setup ```typescript import { CouchbaseQueryVectorStore, DistanceStrategy } from "@langchain/community/vectorstores/couchbase_query"; @@ -73,11 +75,11 @@ const vectorStore = await CouchbaseQueryVectorStore.initialize(embeddings, { }); ``` -## Creating Vector Indexes +## Creating vector indexes The Query vector store supports creating vector indexes to improve search performance. There are two types of indexes available: -### Hyperscale Index +### Hyperscale index A specialized vector index optimized for vector operations using Couchbase's vector indexing capabilities: ```typescript @@ -98,12 +100,12 @@ await vectorStore.createIndex({ **Generated SQL++:** ```sql -CREATE VECTOR INDEX `my_vector_index` ON `bucket`.`scope`.`collection` -(`embedding` VECTOR) INCLUDE (`text`, `metadata`) +CREATE VECTOR INDEX `my_vector_index` ON `bucket`.`scope`.`collection` +(`embedding` VECTOR) INCLUDE (`text`, `metadata`) WHERE type = 'document' USING GSI WITH {'dimension': 1536, 'similarity': 'cosine', 'description': 'IVF,SQ8'} ``` -### Composite Index +### Composite index A general-purpose GSI index that includes vector fields alongside scalar fields: ```typescript @@ -121,13 +123,13 @@ await vectorStore.createIndex({ **Generated SQL++:** ```sql -CREATE INDEX `my_composite_index` ON `bucket`.`scope`.`collection` -(`text`, `metadata.category`, `embedding` VECTOR) -WHERE created_date > '2023-01-01' USING GSI +CREATE INDEX `my_composite_index` ON `bucket`.`scope`.`collection` +(`text`, `metadata.category`, `embedding` VECTOR) +WHERE created_date > '2023-01-01' USING GSI WITH {'dimension': 1536, 'similarity': 'dot', 'description': 'IVF1024,SQ8', 'scan_nprobes': 3, 'trainlist': 10000} ``` -### Key Differences +### Key differences | Aspect | Hyperscale Index | Composite Index | |--------|-------------|-----------------| @@ -140,7 +142,7 @@ WITH {'dimension': 1536, 'similarity': 'dot', 'description': 'IVF1024,SQ8', 'sca | **Tuning Parameters** | Supports `indexScanNprobes`, `indexTrainlist` | Supports `indexScanNprobes`, `indexTrainlist` | | **Limitations** | Only one vector field, uses INCLUDE for other fields | One vector field among multiple index keys | -## Basic Vector Search Example +## Basic vector search example The following example showcases how to use Couchbase Query vector search and perform similarity search. @@ -203,9 +205,9 @@ console.log("Document:", resultsWithScores[0][0]); console.log("Score:", resultsWithScores[0][1]); ``` -## Searching Documents +## Searching documents -### Basic Similarity Search +### Basic similarity search ```typescript // Basic similarity search @@ -215,7 +217,7 @@ const results = await vectorStore.similaritySearch( ); ``` -### Search with Filters +### Search with filters ```typescript // Search with filters @@ -229,7 +231,7 @@ const filteredResults = await vectorStore.similaritySearch( ); ``` -### Search with Scores +### Search with scores ```typescript // Search with scores @@ -239,7 +241,7 @@ const resultsWithScores = await vectorStore.similaritySearchWithScore( ); ``` -### Complex Filtering +### Complex filtering ```typescript const results = await vectorStore.similaritySearch( @@ -252,9 +254,9 @@ const results = await vectorStore.similaritySearch( ); ``` -## Configuration Options +## Configuration options -### Distance Strategies +### Distance strategies - `DistanceStrategy.DOT` - Dot product (default) - `DistanceStrategy.L2` - L2 (Euclidean) distance @@ -263,14 +265,14 @@ const results = await vectorStore.similaritySearch( - `DistanceStrategy.L2_SQUARED` - Squared L2 distance - `DistanceStrategy.EUCLIDEAN_SQUARED` - Squared Euclidean distance -### Index Types +### Index types - `IndexType.HYPERSCALE` - Specialized vector index for optimal vector search performance - `IndexType.COMPOSITE` - General-purpose index that can include vector and scalar fields -## Advanced Usage +## Advanced usage -### Custom Vector Fields +### Custom vector fields ```typescript const vectorStore = await CouchbaseQueryVectorStore.initialize(embeddings, { @@ -310,23 +312,23 @@ const vectorStore = await CouchbaseQueryVectorStore.fromTexts( ); ``` -### Deleting Documents +### Deleting documents ```typescript const documentIds = ["doc1", "doc2", "doc3"]; await vectorStore.delete({ ids: documentIds }); ``` -## Performance Considerations +## Performance considerations 1. **Create Indexes**: Use `createIndex()` to create appropriate vector indexes for better performance -2. **Choose Index Type**: +2. **Choose Index Type**: - Use **Hyperscale indexes** for pure vector search workloads where you primarily perform similarity searches - Use **Composite indexes** for mixed queries that combine vector similarity with scalar field filtering 3. **Tune Parameters**: Adjust `indexScanNprobes` and `indexTrainlist` based on your data size and performance requirements 4. **Filter Early**: Use WHERE clauses to reduce the search space before vector calculations -## Error Handling +## Error handling ```typescript try { @@ -339,9 +341,9 @@ try { } ``` -### Common Errors +### Common errors -#### Insufficient Training Data +#### Insufficient training data If you see errors related to insufficient training data, you may need to: - Increase the `indexTrainlist` parameter (default recommendation: ~50 vectors per centroid) - Ensure you have enough documents with vector embeddings in your collection diff --git a/src/oss/javascript/integrations/vectorstores/couchbase_search.mdx b/src/oss/javascript/integrations/vectorstores/couchbase_search.mdx index 06fe2acda..51e236299 100644 --- a/src/oss/javascript/integrations/vectorstores/couchbase_search.mdx +++ b/src/oss/javascript/integrations/vectorstores/couchbase_search.mdx @@ -13,7 +13,7 @@ You will need couchbase and langchain community to use couchbase vector store. F ```bash npm npm install couchbase @langchain/openai @langchain/community @langchain/core ``` -## Create Couchbase Connection Object +## Create Couchbase connection object We create a connection to the Couchbase cluster initially and then pass the cluster object to the Vector Store. Here, we are connecting using the username and password. You can also connect using any other supported way to your cluster. @@ -33,7 +33,7 @@ const couchbaseClient = await Cluster.connect(connectionString, { configProfile: "wanDevelopment", }); ``` -## Create the Search Index +## Create the search index Currently, the Search index needs to be created from the Couchbase Capella or Server UI or using the REST interface. @@ -43,7 +43,7 @@ Let us define a Search index with the name `vector-index` on the testing bucket. We are defining an index on the `testing` bucket's `_default` scope on the `_default` collection with the vector field set to `embedding` with 1536 dimensions and the text field set to `text`. We are also indexing and storing all the fields under `metadata` in the document as a dynamic mapping to account for varying document structures. The similarity metric is set to `dot_product`. -### How to Import an Index to the Full Text Search service? +### How to import an index to the full text search service? - [Couchbase Server](https://docs.couchbase.com/server/current/search/import-search-index.html) - Click on Search -> Add Index -> Import @@ -54,7 +54,7 @@ We are also indexing and storing all the fields under `metadata` in the document - Import the file in Capella using the instructions in the documentation. - Click on Create Index to create the index. -### Index Definition +### Index definition ```json { @@ -157,7 +157,7 @@ const store = await CouchbaseVectorStore.initialize( couchbaseConfig ); ``` -## Basic Vector Search Example +## Basic vector search example The following example showcases how to use couchbase vector search via the Search Service and perform similarity search. For this example, we are going to load the "state_of_the_union.txt" file via the TextLoader, @@ -236,7 +236,7 @@ const result = await store.similaritySearch(query, 1, { }); console.log(result[0]); ``` -## Specifying Fields to Return +## Specifying fields to return You can specify the fields to return from the document using `fields` parameter in the filter during searches. These fields are returned as part of the `metadata` object. You can fetch any field that is stored in the index. @@ -253,7 +253,7 @@ const result = await store.similaritySearch(query, 1, { }); console.log(result[0]); ``` -## Hybrid Search +## Hybrid search Couchbase allows you to do hybrid searches by combining vector search results with searches on non-vector fields of the document like the `metadata` object. @@ -263,7 +263,7 @@ The scores of each of the component searches are added up to get the total score To perform hybrid searches, there is an optional key, `searchOptions` in `fields` parameter that can be passed to all the similarity searches. The different search/query possibilities for the `searchOptions` can be found [here](https://docs.couchbase.com/server/current/search/search-request-params.html#query-object). -### Create Diverse Metadata for Hybrid Search +### Create diverse metadata for hybrid search In order to simulate hybrid search, let us create some random metadata from the existing documents. We uniformly add three fields to the metadata, `date` between 2010 & 2020, `rating` between 1 & 5 and `author` set to either John Doe or Jane Doe. @@ -285,7 +285,7 @@ const store = await CouchbaseVectorStore.fromDocuments( const query = "What did the president say about Ketanji Brown Jackson"; const independenceQuery = "Any mention about independence?"; ``` -### Example: Search by Exact Value +### Example: Search by exact value We can search for exact matches on a textual field like the author in the `metadata` object. @@ -298,7 +298,7 @@ const exactValueResult = await store.similaritySearch(query, 4, { }); console.log(exactValueResult[0]); ``` -### Example: Search by Partial Match +### Example: Search by partial match We can search for partial matches by specifying a fuzziness for the search. This is useful when you want to search for slight variations or misspellings of a search query. @@ -313,7 +313,7 @@ const partialMatchResult = await store.similaritySearch(query, 4, { }); console.log(partialMatchResult[0]); ``` -### Example: Search by Date Range Query +### Example: Search by date range query We can search for documents that are within a date range query on a date field like `metadata.date`. @@ -332,7 +332,7 @@ const dateRangeResult = await store.similaritySearch(independenceQuery, 4, { }); console.log(dateRangeResult[0]); ``` -### Example: Search by Numeric Range Query +### Example: Search by numeric range query We can search for documents that are within a range for a numeric field like `metadata.rating`. @@ -351,7 +351,7 @@ const ratingRangeResult = await store.similaritySearch(independenceQuery, 4, { }); console.log(ratingRangeResult[0]); ``` -### Example: Combining Multiple Search Conditions +### Example: Combining multiple search conditions Different queries can by combined using AND (conjuncts) or OR (disjuncts) operators. @@ -372,7 +372,7 @@ const multipleConditionsResult = await store.similaritySearch(texts[0], 4, { console.log(multipleConditionsResult[0]); ``` -### Other Queries +### Other queries Similarly, you can use any of the supported Query methods like Geo Distance, Polygon Search, Wildcard, Regular Expressions, etc in the `searchOptions` Key of `filter` parameter. Please refer to the documentation for more details on the available query methods and their syntax. From a5d95c5484d151d3e3f65ae6e5035525c3ab68de Mon Sep 17 00:00:00 2001 From: Elliot Scribner Date: Wed, 29 Oct 2025 12:39:28 -0700 Subject: [PATCH 8/8] Remove redundant L2 and L2 Squared Distance Metrics --- .../vectorstores/couchbase_query.mdx | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx b/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx index 43e485454..73bea17b5 100644 --- a/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx +++ b/src/oss/javascript/integrations/vectorstores/couchbase_query.mdx @@ -7,7 +7,7 @@ The `CouchbaseQueryVectorStore` is the preferred implementation of Vector Search More information about Couchbase's vector search capabilities can be found in the official documentation: [Choose the Right Vector Index](https://docs.couchbase.com/server/current/vector-index/use-vector-indexes.html). - This functionality is only available in Couchbase Server 8.0 and above. + This functionality is only available in [Couchbase 8.0](https://docs.couchbase.com/server/8.0/introduction/whats-new.html) and above. ## Key differences from `CouchbaseSearchVectorStore` @@ -18,7 +18,7 @@ More information about Couchbase's vector search capabilities can be found in th - **No Index Required**: Does not require a pre-configured search index for basic operations - **SQL++ Syntax**: Supports WHERE clauses and SQL++ query syntax for filtering - **Vector Functions**: Uses `APPROX_VECTOR_DISTANCE` function for similarity calculations -- **Distance Strategies**: Supports multiple distance strategies (Euclidean, Cosine, Dot Product) +- **Distance Strategies**: Supports multiple distance strategies (Dot Product, Cosine, Euclidean, Euclidean Squared) ## Installation @@ -258,12 +258,10 @@ const results = await vectorStore.similaritySearch( ### Distance strategies -- `DistanceStrategy.DOT` - Dot product (default) -- `DistanceStrategy.L2` - L2 (Euclidean) distance -- `DistanceStrategy.EUCLIDEAN` - Euclidean distance -- `DistanceStrategy.COSINE` - Cosine distance -- `DistanceStrategy.L2_SQUARED` - Squared L2 distance -- `DistanceStrategy.EUCLIDEAN_SQUARED` - Squared Euclidean distance +- `DistanceStrategy.DOT` - [Dot Product](https://docs.couchbase.com/server/current/vector-index/vectors-and-indexes-overview.html#dot) (default) +- `DistanceStrategy.COSINE` - [Cosine Similarity](https://docs.couchbase.com/server/current/vector-index/vectors-and-indexes-overview.html#cosine) +- `DistanceStrategy.EUCLIDEAN` - [Euclidean Distance](https://docs.couchbase.com/server/current/vector-index/vectors-and-indexes-overview.html#euclidean) (also known as L2) +- `DistanceStrategy.EUCLIDEAN_SQUARED` - [Euclidean Squared Distance](https://docs.couchbase.com/server/current/vector-index/vectors-and-indexes-overview.html#euclidean-squared) (also known as L2 Squared) ### Index types @@ -282,7 +280,7 @@ const vectorStore = await CouchbaseQueryVectorStore.initialize(embeddings, { collectionName: "my-collection", textKey: "content", embeddingKey: "vector_embedding", - distanceStrategy: DistanceStrategy.L2, + distanceStrategy: DistanceStrategy.EUCLIDEAN, }); ```