From 3dd536ad04235cd9e17bb4d24207d80074558858 Mon Sep 17 00:00:00 2001 From: Krause Henning Date: Thu, 30 Jan 2025 08:22:53 +0100 Subject: [PATCH 1/3] Change default document building from elasticsearch element; pass fields parameter in similarity_search* methods. --- .../_async/vectorstores.py | 15 ++++++++++++--- .../langchain_elasticsearch/_sync/vectorstores.py | 11 ++++++++++- .../langchain_elasticsearch/_utilities.py | 9 ++++++--- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py b/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py index 4dae8cd..62a2f16 100644 --- a/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py +++ b/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py @@ -389,6 +389,7 @@ async def asimilarity_search( k: int = 4, fetch_k: int = 50, filter: Optional[List[dict]] = None, + fields: Optional[List[str]] = None, *, custom_query: Optional[ Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]] @@ -413,6 +414,7 @@ async def asimilarity_search( k=k, num_candidates=fetch_k, filter=filter, + fields=fields, custom_query=custom_query, ) docs = _hits_to_docs_scores( @@ -504,7 +506,8 @@ async def asimilarity_search_with_score( query: str, k: int = 4, filter: Optional[List[dict]] = None, - *, + fields: Optional[List[str]] = None, + *, custom_query: Optional[ Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]] ] = None, @@ -528,7 +531,11 @@ async def asimilarity_search_with_score( raise ValueError("scores are currently not supported in hybrid mode") hits = await self._store.search( - query=query, k=k, filter=filter, custom_query=custom_query + query=query, + k=k, + filter=filter, + fields=fields, + custom_query=custom_query ) return _hits_to_docs_scores( hits=hits, @@ -541,7 +548,8 @@ async def asimilarity_search_by_vector_with_relevance_scores( embedding: List[float], k: int = 4, filter: Optional[List[Dict]] = None, - *, + fields: Optional[List[str]] = None, + *, custom_query: Optional[ Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]] ] = None, @@ -569,6 +577,7 @@ async def asimilarity_search_by_vector_with_relevance_scores( query_vector=embedding, k=k, filter=filter, + fields=fields, custom_query=custom_query, ) return _hits_to_docs_scores( diff --git a/libs/elasticsearch/langchain_elasticsearch/_sync/vectorstores.py b/libs/elasticsearch/langchain_elasticsearch/_sync/vectorstores.py index 168b131..fc8abf8 100644 --- a/libs/elasticsearch/langchain_elasticsearch/_sync/vectorstores.py +++ b/libs/elasticsearch/langchain_elasticsearch/_sync/vectorstores.py @@ -389,6 +389,7 @@ def similarity_search( k: int = 4, fetch_k: int = 50, filter: Optional[List[dict]] = None, + fields: Optional[List[str]] = None, *, custom_query: Optional[ Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]] @@ -413,6 +414,7 @@ def similarity_search( k=k, num_candidates=fetch_k, filter=filter, + fields=fields, custom_query=custom_query, ) docs = _hits_to_docs_scores( @@ -504,6 +506,7 @@ def similarity_search_with_score( query: str, k: int = 4, filter: Optional[List[dict]] = None, + fields: Optional[List[str]] = None, *, custom_query: Optional[ Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]] @@ -528,7 +531,11 @@ def similarity_search_with_score( raise ValueError("scores are currently not supported in hybrid mode") hits = self._store.search( - query=query, k=k, filter=filter, custom_query=custom_query + query=query, + k=k, + filter=filter, + fields=fields, + custom_query=custom_query ) return _hits_to_docs_scores( hits=hits, @@ -541,6 +548,7 @@ def similarity_search_by_vector_with_relevance_scores( embedding: List[float], k: int = 4, filter: Optional[List[Dict]] = None, + fields: Optional[List[str]] = None, *, custom_query: Optional[ Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]] @@ -569,6 +577,7 @@ def similarity_search_by_vector_with_relevance_scores( query_vector=embedding, k=k, filter=filter, + fields=fields, custom_query=custom_query, ) return _hits_to_docs_scores( diff --git a/libs/elasticsearch/langchain_elasticsearch/_utilities.py b/libs/elasticsearch/langchain_elasticsearch/_utilities.py index 7fb37a6..b4bae91 100644 --- a/libs/elasticsearch/langchain_elasticsearch/_utilities.py +++ b/libs/elasticsearch/langchain_elasticsearch/_utilities.py @@ -69,11 +69,14 @@ def _hits_to_docs_scores( documents = [] - def default_doc_builder(hit: Dict) -> Document: - return Document( + def default_doc_builder(hit: Dict, fields: List[str]) -> Document: + doc = Document( page_content=hit["_source"].get(content_field, ""), metadata=hit["_source"].get("metadata", {}), ) + for field_key in fields: + doc.metadata[field_key] = hit["_source"].get(field_key, None) + return doc doc_builder = doc_builder or default_doc_builder @@ -87,7 +90,7 @@ def default_doc_builder(hit: Dict) -> Document: ]: hit["_source"]["metadata"][field] = hit["_source"][field] - doc = doc_builder(hit) + doc = doc_builder(hit, fields) documents.append((doc, hit["_score"])) return documents From 01cfcac90b717f321c5f9a47f4a685a2f84bd485 Mon Sep 17 00:00:00 2001 From: krauhen Date: Wed, 5 Mar 2025 16:35:40 +0100 Subject: [PATCH 2/3] Remove indent. --- .../langchain_elasticsearch/_async/vectorstores.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py b/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py index 62a2f16..b12fb66 100644 --- a/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py +++ b/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py @@ -389,7 +389,7 @@ async def asimilarity_search( k: int = 4, fetch_k: int = 50, filter: Optional[List[dict]] = None, - fields: Optional[List[str]] = None, + fields: Optional[List[str]] = None, *, custom_query: Optional[ Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]] From 26f56d001fb33ac1846f24efc8508cf695b9ba9f Mon Sep 17 00:00:00 2001 From: krauhen Date: Wed, 5 Mar 2025 16:36:22 +0100 Subject: [PATCH 3/3] Pass fields to _hits_to_docs_scores. --- .../langchain_elasticsearch/_async/vectorstores.py | 3 +++ .../langchain_elasticsearch/_sync/vectorstores.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py b/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py index b12fb66..cf3c4a1 100644 --- a/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py +++ b/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py @@ -420,6 +420,7 @@ async def asimilarity_search( docs = _hits_to_docs_scores( hits=hits, content_field=self.query_field, + fields=fields, doc_builder=doc_builder, ) return [doc for doc, _score in docs] @@ -540,6 +541,7 @@ async def asimilarity_search_with_score( return _hits_to_docs_scores( hits=hits, content_field=self.query_field, + fields=fields, doc_builder=doc_builder, ) @@ -583,6 +585,7 @@ async def asimilarity_search_by_vector_with_relevance_scores( return _hits_to_docs_scores( hits=hits, content_field=self.query_field, + fields=fields, doc_builder=doc_builder, ) diff --git a/libs/elasticsearch/langchain_elasticsearch/_sync/vectorstores.py b/libs/elasticsearch/langchain_elasticsearch/_sync/vectorstores.py index fc8abf8..a7b5e9c 100644 --- a/libs/elasticsearch/langchain_elasticsearch/_sync/vectorstores.py +++ b/libs/elasticsearch/langchain_elasticsearch/_sync/vectorstores.py @@ -420,6 +420,7 @@ def similarity_search( docs = _hits_to_docs_scores( hits=hits, content_field=self.query_field, + fields=fields, doc_builder=doc_builder, ) return [doc for doc, _score in docs] @@ -540,6 +541,7 @@ def similarity_search_with_score( return _hits_to_docs_scores( hits=hits, content_field=self.query_field, + fields=fields, doc_builder=doc_builder, ) @@ -583,6 +585,7 @@ def similarity_search_by_vector_with_relevance_scores( return _hits_to_docs_scores( hits=hits, content_field=self.query_field, + fields=fields, doc_builder=doc_builder, )