Skip to content

Commit bfad7d1

Browse files
committed
Implement update_by_metadata
1 parent 05b71a7 commit bfad7d1

File tree

11 files changed

+552
-77
lines changed

11 files changed

+552
-77
lines changed

pinecone/db_control/request_factory.py

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -245,25 +245,10 @@ def __parse_index_spec(spec: Union[Dict, ServerlessSpec, PodSpec, ByocSpec]) ->
245245
if "schema" in spec["serverless"]:
246246
schema_dict = spec["serverless"]["schema"]
247247
if isinstance(schema_dict, dict):
248-
# Process fields if present, otherwise pass through as-is
249-
schema_kwargs = {}
250-
if "fields" in schema_dict:
251-
fields = {}
252-
for field_name, field_config in schema_dict["fields"].items():
253-
if isinstance(field_config, dict):
254-
# Pass through the entire field_config dict to allow future API fields
255-
fields[field_name] = BackupModelSchemaFields(**field_config)
256-
else:
257-
# If not a dict, create with default filterable=True
258-
fields[field_name] = BackupModelSchemaFields(filterable=True)
259-
schema_kwargs["fields"] = fields
260-
261-
# Pass through any other fields in schema_dict to allow future API fields
262-
for key, value in schema_dict.items():
263-
if key != "fields":
264-
schema_kwargs[key] = value
265-
266-
spec["serverless"]["schema"] = BackupModelSchema(**schema_kwargs)
248+
# Use the helper method to handle both formats correctly
249+
spec["serverless"]["schema"] = (
250+
PineconeDBControlRequestFactory.__parse_schema(schema_dict)
251+
)
267252

268253
index_spec = IndexSpec(serverless=ServerlessSpecModel(**spec["serverless"]))
269254
elif "pod" in spec:

pinecone/db_data/index.py

Lines changed: 85 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -514,25 +514,108 @@ def query_namespaces(
514514
@validate_and_convert_errors
515515
def update(
516516
self,
517-
id: str,
517+
id: Optional[str] = None,
518518
values: Optional[List[float]] = None,
519519
set_metadata: Optional[VectorMetadataTypedDict] = None,
520520
namespace: Optional[str] = None,
521521
sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
522+
filter: Optional[FilterTypedDict] = None,
523+
dry_run: Optional[bool] = None,
522524
**kwargs,
523525
) -> Dict[str, Any]:
524-
return self._vector_api.update_vector(
526+
"""Update vector(s) in a namespace by ID or metadata filter.
527+
528+
The update can be performed by vector ID or by metadata filter. When updating by ID,
529+
a single vector is updated. When updating by metadata filter, all vectors matching
530+
the filter are updated.
531+
532+
If a value is included, it will overwrite the previous value.
533+
If a set_metadata is included, the values of the fields specified in it will be
534+
added or overwrite the previous value.
535+
536+
Examples:
537+
538+
Update by ID:
539+
540+
.. code-block:: python
541+
542+
>>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace')
543+
>>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace')
544+
>>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]},
545+
>>> namespace='my_namespace')
546+
>>> index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]),
547+
>>> namespace='my_namespace')
548+
549+
Update by metadata filter:
550+
551+
.. code-block:: python
552+
553+
>>> # Update metadata for all vectors matching a filter
554+
>>> index.update(
555+
... filter={'genre': {'$eq': 'comedy'}},
556+
... set_metadata={'status': 'active'},
557+
... namespace='my_namespace'
558+
... )
559+
>>> # Preview how many vectors would be updated (dry run)
560+
>>> result = index.update(
561+
... filter={'year': {'$gte': 2020}},
562+
... set_metadata={'updated': True},
563+
... dry_run=True,
564+
... namespace='my_namespace'
565+
... )
566+
>>> print(f"Would update {result.get('matched_records', 0)} vectors")
567+
568+
Args:
569+
id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional]
570+
values (List[float]): Vector values to set. [optional]
571+
set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
572+
Metadata to set for vector(s). [optional]
573+
namespace (str): Namespace name where to update the vector(s). [optional]
574+
sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector.
575+
Expected to be either a SparseValues object or a dict of the form:
576+
{'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional]
577+
filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression.
578+
When provided, the update is applied to all records that match the filter. Mutually exclusive with id.
579+
See `metadata filtering <https://www.pinecone.io/docs/metadata-filtering/>`_ [optional]
580+
dry_run (bool): If True, return the number of records that match the filter without executing the update.
581+
Only meaningful when filter is provided. Defaults to False. [optional]
582+
583+
Returns:
584+
Dict[str, Any]: An empty dictionary if the update was successful when updating by ID.
585+
When updating by filter, the dictionary may contain a 'matched_records' key indicating
586+
how many records matched the filter (even when dry_run is False).
587+
588+
Raises:
589+
ValueError: If both id and filter are provided, or if neither is provided.
590+
"""
591+
if id is not None and filter is not None:
592+
raise ValueError(
593+
"Cannot provide both 'id' and 'filter'. Use 'id' to update a single vector or 'filter' to update multiple vectors."
594+
)
595+
if id is None and filter is None:
596+
raise ValueError("Either 'id' or 'filter' must be provided.")
597+
598+
response = self._vector_api.update_vector(
525599
IndexRequestFactory.update_request(
526600
id=id,
527601
values=values,
528602
set_metadata=set_metadata,
529603
namespace=namespace,
530604
sparse_values=sparse_values,
605+
filter=filter,
606+
dry_run=dry_run,
531607
**kwargs,
532608
),
533609
**self._openapi_kwargs(kwargs),
534610
)
535611

612+
# Convert UpdateResponse to dict, including matched_records if present
613+
result = {}
614+
if hasattr(response, "matched_records") and response.matched_records is not None:
615+
result["matched_records"] = response.matched_records
616+
617+
return result
618+
536619
@validate_and_convert_errors
537620
def describe_index_stats(
538621
self, filter: Optional[FilterTypedDict] = None, **kwargs

pinecone/db_data/index_asyncio.py

Lines changed: 123 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -554,25 +554,146 @@ async def query_namespaces(
554554
@validate_and_convert_errors
555555
async def update(
556556
self,
557-
id: str,
557+
id: Optional[str] = None,
558558
values: Optional[List[float]] = None,
559559
set_metadata: Optional[VectorMetadataTypedDict] = None,
560560
namespace: Optional[str] = None,
561561
sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
562+
filter: Optional[FilterTypedDict] = None,
563+
dry_run: Optional[bool] = None,
562564
**kwargs,
563565
) -> Dict[str, Any]:
564-
return await self._vector_api.update_vector(
566+
"""Update vector(s) in a namespace by ID or metadata filter.
567+
568+
The update can be performed by vector ID or by metadata filter. When updating by ID,
569+
a single vector is updated. When updating by metadata filter, all vectors matching
570+
the filter are updated.
571+
572+
If a value is included, it will overwrite the previous value.
573+
If a set_metadata is included, the values of the fields specified in it will be
574+
added or overwrite the previous value.
575+
576+
Examples:
577+
578+
Update by ID:
579+
580+
.. code-block:: python
581+
582+
import asyncio
583+
from pinecone import Pinecone, Vector, SparseValues
584+
585+
async def main():
586+
pc = Pinecone()
587+
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
588+
# Update vector values
589+
await idx.update(
590+
id='id1',
591+
values=[0.1, 0.2, 0.3, ...],
592+
namespace='my_namespace'
593+
)
594+
595+
# Update metadata
596+
await idx.update(
597+
id='id1',
598+
set_metadata={'key': 'value'},
599+
namespace='my_namespace'
600+
)
601+
602+
# Update sparse values
603+
await idx.update(
604+
id='id1',
605+
sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]},
606+
namespace='my_namespace'
607+
)
608+
609+
# Update sparse values with SparseValues object
610+
await idx.update(
611+
id='id1',
612+
sparse_values=SparseValues(indices=[234781, 5432], values=[0.2, 0.4]),
613+
namespace='my_namespace'
614+
)
615+
616+
asyncio.run(main())
617+
618+
Update by metadata filter:
619+
620+
.. code-block:: python
621+
622+
import asyncio
623+
from pinecone import Pinecone
624+
625+
async def main():
626+
pc = Pinecone()
627+
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
628+
# Update metadata for all vectors matching a filter
629+
await idx.update(
630+
filter={'genre': {'$eq': 'comedy'}},
631+
set_metadata={'status': 'active'},
632+
namespace='my_namespace'
633+
)
634+
635+
# Preview how many vectors would be updated (dry run)
636+
result = await idx.update(
637+
filter={'year': {'$gte': 2020}},
638+
set_metadata={'updated': True},
639+
dry_run=True,
640+
namespace='my_namespace'
641+
)
642+
print(f"Would update {result.get('matched_records', 0)} vectors")
643+
644+
asyncio.run(main())
645+
646+
Args:
647+
id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional]
648+
values (List[float]): Vector values to set. [optional]
649+
set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
650+
Metadata to set for vector(s). [optional]
651+
namespace (str): Namespace name where to update the vector(s). [optional]
652+
sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector.
653+
Expected to be either a SparseValues object or a dict of the form:
654+
{'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional]
655+
filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression.
656+
When provided, the update is applied to all records that match the filter. Mutually exclusive with id.
657+
See `metadata filtering <https://www.pinecone.io/docs/metadata-filtering/>`_ [optional]
658+
dry_run (bool): If True, return the number of records that match the filter without executing the update.
659+
Only meaningful when filter is provided. Defaults to False. [optional]
660+
661+
Returns:
662+
Dict[str, Any]: An empty dictionary if the update was successful when updating by ID.
663+
When updating by filter, the dictionary may contain a 'matched_records' key indicating
664+
how many records matched the filter (even when dry_run is False).
665+
666+
Raises:
667+
ValueError: If both id and filter are provided, or if neither is provided.
668+
"""
669+
if id is not None and filter is not None:
670+
raise ValueError(
671+
"Cannot provide both 'id' and 'filter'. Use 'id' to update a single vector or 'filter' to update multiple vectors."
672+
)
673+
if id is None and filter is None:
674+
raise ValueError("Either 'id' or 'filter' must be provided.")
675+
676+
response = await self._vector_api.update_vector(
565677
IndexRequestFactory.update_request(
566678
id=id,
567679
values=values,
568680
set_metadata=set_metadata,
569681
namespace=namespace,
570682
sparse_values=sparse_values,
683+
filter=filter,
684+
dry_run=dry_run,
571685
**kwargs,
572686
),
573687
**self._openapi_kwargs(kwargs),
574688
)
575689

690+
# Convert UpdateResponse to dict, including matched_records if present
691+
result = {}
692+
if hasattr(response, "matched_records") and response.matched_records is not None:
693+
result["matched_records"] = response.matched_records
694+
695+
return result
696+
576697
@validate_and_convert_errors
577698
async def describe_index_stats(
578699
self, filter: Optional[FilterTypedDict] = None, **kwargs

pinecone/db_data/index_asyncio_interface.py

Lines changed: 61 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -520,33 +520,29 @@ async def main():
520520
@abstractmethod
521521
async def update(
522522
self,
523-
id: str,
523+
id: Optional[str] = None,
524524
values: Optional[List[float]] = None,
525525
set_metadata: Optional[VectorMetadataTypedDict] = None,
526526
namespace: Optional[str] = None,
527527
sparse_values: Optional[Union[SparseValues, SparseVectorTypedDict]] = None,
528+
filter: Optional[FilterTypedDict] = None,
529+
dry_run: Optional[bool] = None,
528530
**kwargs,
529531
) -> Dict[str, Any]:
530-
"""
531-
The Update operation updates vector in a namespace.
532+
"""The Update operation updates vector(s) in a namespace.
532533
533-
Args:
534-
id (str): Vector's unique id.
535-
values (List[float]): vector values to set. [optional]
536-
set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
537-
metadata to set for vector. [optional]
538-
namespace (str): Namespace name where to update the vector.. [optional]
539-
sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector.
540-
Expected to be either a SparseValues object or a dict of the form:
541-
{'indices': List[int], 'values': List[float]} where the lists each have the same length.
534+
The update can be performed by vector ID or by metadata filter. When updating by ID,
535+
a single vector is updated. When updating by metadata filter, all vectors matching
536+
the filter are updated.
542537
543538
If a value is included, it will overwrite the previous value.
544-
If a set_metadata is included,
545-
the values of the fields specified in it will be added or overwrite the previous value.
546-
539+
If a set_metadata is included, the values of the fields specified in it will be
540+
added or overwrite the previous value.
547541
548542
Examples:
549543
544+
Update by ID:
545+
550546
.. code-block:: python
551547
552548
import asyncio
@@ -585,6 +581,56 @@ async def main():
585581
586582
asyncio.run(main())
587583
584+
Update by metadata filter:
585+
586+
.. code-block:: python
587+
588+
import asyncio
589+
from pinecone import Pinecone
590+
591+
async def main():
592+
pc = Pinecone()
593+
async with pc.IndexAsyncio(host="example-dojoi3u.svc.aped-4627-b74a.pinecone.io") as idx:
594+
# Update metadata for all vectors matching a filter
595+
await idx.update(
596+
filter={'genre': {'$eq': 'comedy'}},
597+
set_metadata={'status': 'active'},
598+
namespace='my_namespace'
599+
)
600+
601+
# Preview how many vectors would be updated (dry run)
602+
result = await idx.update(
603+
filter={'year': {'$gte': 2020}},
604+
set_metadata={'updated': True},
605+
dry_run=True,
606+
namespace='my_namespace'
607+
)
608+
print(f"Would update {result.get('matched_records', 0)} vectors")
609+
610+
asyncio.run(main())
611+
612+
Args:
613+
id (str): Vector's unique id. Required when updating by ID. Must be None when filter is provided. [optional]
614+
values (List[float]): Vector values to set. [optional]
615+
set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
616+
Metadata to set for vector(s). [optional]
617+
namespace (str): Namespace name where to update the vector(s). [optional]
618+
sparse_values (Dict[str, Union[List[float], List[int]]]): Sparse values to update for the vector.
619+
Expected to be either a SparseValues object or a dict of the form:
620+
{'indices': List[int], 'values': List[float]} where the lists each have the same length. [optional]
621+
filter (Dict[str, Union[str, float, int, bool, List, dict]]): A metadata filter expression.
622+
When provided, the update is applied to all records that match the filter. Mutually exclusive with id.
623+
See `metadata filtering <https://www.pinecone.io/docs/metadata-filtering/>`_ [optional]
624+
dry_run (bool): If True, return the number of records that match the filter without executing the update.
625+
Only meaningful when filter is provided. Defaults to False. [optional]
626+
627+
Returns:
628+
Dict[str, Any]: An empty dictionary if the update was successful when updating by ID.
629+
When updating by filter, the dictionary may contain a 'matched_records' key indicating
630+
how many records matched the filter (even when dry_run is False).
631+
632+
Raises:
633+
ValueError: If both id and filter are provided, or if neither is provided.
588634
"""
589635
pass
590636

0 commit comments

Comments
 (0)