|
| 1 | +import pytest |
| 2 | +import time |
| 3 | +from pinecone import Vector |
| 4 | +from ..helpers import poll_fetch_for_ids_in_namespace, embedding_values, random_string |
| 5 | + |
| 6 | + |
| 7 | +@pytest.fixture(scope="session") |
| 8 | +def update_namespace(): |
| 9 | + return random_string(10) |
| 10 | + |
| 11 | + |
| 12 | +def seed_for_update(idx, namespace): |
| 13 | + """Seed test data for update tests.""" |
| 14 | + logger = __import__("logging").getLogger(__name__) |
| 15 | + logger.info(f"Seeding vectors for update tests in namespace '{namespace}'") |
| 16 | + idx.upsert( |
| 17 | + vectors=[ |
| 18 | + Vector( |
| 19 | + id=str(i), |
| 20 | + values=embedding_values(2), |
| 21 | + metadata={"genre": "action", "year": 2020, "status": "active"}, |
| 22 | + ) |
| 23 | + for i in range(10) |
| 24 | + ], |
| 25 | + namespace=namespace, |
| 26 | + ) |
| 27 | + poll_fetch_for_ids_in_namespace(idx, ids=[str(i) for i in range(10)], namespace=namespace) |
| 28 | + |
| 29 | + |
| 30 | +@pytest.fixture(scope="class") |
| 31 | +def seed_for_update_tests(idx, update_namespace): |
| 32 | + seed_for_update(idx, update_namespace) |
| 33 | + seed_for_update(idx, "") |
| 34 | + yield |
| 35 | + |
| 36 | + |
| 37 | +def poll_until_update_reflected( |
| 38 | + idx, vector_id, namespace, expected_values=None, expected_metadata=None, timeout=180 |
| 39 | +): |
| 40 | + """Poll fetch until update is reflected in the vector.""" |
| 41 | + logger = __import__("logging").getLogger(__name__) |
| 42 | + delta_t = 2 # Start with shorter interval |
| 43 | + total_time = 0 |
| 44 | + max_delta_t = 10 # Max interval |
| 45 | + |
| 46 | + while total_time < timeout: |
| 47 | + logger.debug( |
| 48 | + f'Polling for update on vector "{vector_id}" in namespace "{namespace}". Total time waited: {total_time} seconds' |
| 49 | + ) |
| 50 | + try: |
| 51 | + results = idx.fetch(ids=[vector_id], namespace=namespace) |
| 52 | + if vector_id in results.vectors: |
| 53 | + vec = results.vectors[vector_id] |
| 54 | + |
| 55 | + # If both are None, we just check that the vector exists |
| 56 | + if expected_values is None and expected_metadata is None: |
| 57 | + return # Vector exists, we're done |
| 58 | + |
| 59 | + values_match = True |
| 60 | + metadata_match = True |
| 61 | + |
| 62 | + if expected_values is not None: |
| 63 | + if vec.values is None: |
| 64 | + values_match = False |
| 65 | + else: |
| 66 | + if len(vec.values) != len(expected_values): |
| 67 | + values_match = False |
| 68 | + else: |
| 69 | + values_match = all( |
| 70 | + vec.values[i] == pytest.approx(expected_values[i], 0.01) |
| 71 | + for i in range(len(expected_values)) |
| 72 | + ) |
| 73 | + |
| 74 | + if expected_metadata is not None: |
| 75 | + metadata_match = vec.metadata == expected_metadata |
| 76 | + |
| 77 | + if values_match and metadata_match: |
| 78 | + logger.debug(f"Update reflected for vector {vector_id}") |
| 79 | + return # Update is reflected |
| 80 | + except Exception as e: |
| 81 | + logger.debug(f"Error while polling: {e}") |
| 82 | + |
| 83 | + time.sleep(delta_t) |
| 84 | + total_time += delta_t |
| 85 | + # Gradually increase interval up to max |
| 86 | + delta_t = min(delta_t * 1.5, max_delta_t) |
| 87 | + |
| 88 | + raise TimeoutError( |
| 89 | + f"Timed out waiting for update on vector {vector_id} in namespace {namespace} after {total_time} seconds" |
| 90 | + ) |
| 91 | + |
| 92 | + |
| 93 | +@pytest.mark.usefixtures("seed_for_update_tests") |
| 94 | +class TestUpdate: |
| 95 | + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) |
| 96 | + def test_update_values(self, idx, update_namespace, use_nondefault_namespace): |
| 97 | + """Test updating vector values by ID.""" |
| 98 | + target_namespace = update_namespace if use_nondefault_namespace else "" |
| 99 | + vector_id = "1" |
| 100 | + |
| 101 | + # Update values |
| 102 | + new_values = embedding_values(2) |
| 103 | + idx.update(id=vector_id, values=new_values, namespace=target_namespace) |
| 104 | + |
| 105 | + # Wait for update to be reflected |
| 106 | + poll_until_update_reflected( |
| 107 | + idx, vector_id, target_namespace, expected_values=new_values, timeout=180 |
| 108 | + ) |
| 109 | + |
| 110 | + # Verify the update |
| 111 | + fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace) |
| 112 | + assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(new_values[0], 0.01) |
| 113 | + assert fetched_vec.vectors[vector_id].values[1] == pytest.approx(new_values[1], 0.01) |
| 114 | + |
| 115 | + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) |
| 116 | + def test_update_metadata(self, idx, update_namespace, use_nondefault_namespace): |
| 117 | + """Test updating vector metadata by ID.""" |
| 118 | + target_namespace = update_namespace if use_nondefault_namespace else "" |
| 119 | + vector_id = "2" |
| 120 | + |
| 121 | + # Update metadata |
| 122 | + new_metadata = {"genre": "comedy", "year": 2021, "status": "inactive"} |
| 123 | + idx.update(id=vector_id, set_metadata=new_metadata, namespace=target_namespace) |
| 124 | + |
| 125 | + # Wait for update to be reflected |
| 126 | + poll_until_update_reflected( |
| 127 | + idx, vector_id, target_namespace, expected_metadata=new_metadata, timeout=180 |
| 128 | + ) |
| 129 | + |
| 130 | + # Verify the update |
| 131 | + fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace) |
| 132 | + assert fetched_vec.vectors[vector_id].metadata == new_metadata |
| 133 | + |
| 134 | + @pytest.mark.parametrize("use_nondefault_namespace", [True, False]) |
| 135 | + def test_update_values_and_metadata(self, idx, update_namespace, use_nondefault_namespace): |
| 136 | + """Test updating both vector values and metadata by ID.""" |
| 137 | + target_namespace = update_namespace if use_nondefault_namespace else "" |
| 138 | + vector_id = "3" |
| 139 | + |
| 140 | + # Update both values and metadata |
| 141 | + new_values = embedding_values(2) |
| 142 | + new_metadata = {"genre": "drama", "year": 2022, "status": "pending"} |
| 143 | + idx.update( |
| 144 | + id=vector_id, values=new_values, set_metadata=new_metadata, namespace=target_namespace |
| 145 | + ) |
| 146 | + |
| 147 | + # Wait for update to be reflected |
| 148 | + poll_until_update_reflected( |
| 149 | + idx, |
| 150 | + vector_id, |
| 151 | + target_namespace, |
| 152 | + expected_values=new_values, |
| 153 | + expected_metadata=new_metadata, |
| 154 | + timeout=180, |
| 155 | + ) |
| 156 | + |
| 157 | + # Verify the update |
| 158 | + fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace) |
| 159 | + assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(new_values[0], 0.01) |
| 160 | + assert fetched_vec.vectors[vector_id].values[1] == pytest.approx(new_values[1], 0.01) |
| 161 | + assert fetched_vec.vectors[vector_id].metadata == new_metadata |
| 162 | + |
| 163 | + def test_update_only_metadata_no_values(self, idx, update_namespace): |
| 164 | + """Test updating only metadata without providing values.""" |
| 165 | + target_namespace = update_namespace |
| 166 | + vector_id = "4" |
| 167 | + |
| 168 | + # Get original values first |
| 169 | + original_vec = idx.fetch(ids=[vector_id], namespace=target_namespace) |
| 170 | + original_values = original_vec.vectors[vector_id].values |
| 171 | + |
| 172 | + # Update only metadata |
| 173 | + new_metadata = {"genre": "thriller", "year": 2023} |
| 174 | + idx.update(id=vector_id, set_metadata=new_metadata, namespace=target_namespace) |
| 175 | + |
| 176 | + # Wait for update to be reflected |
| 177 | + poll_until_update_reflected( |
| 178 | + idx, vector_id, target_namespace, expected_metadata=new_metadata, timeout=180 |
| 179 | + ) |
| 180 | + |
| 181 | + # Verify metadata updated but values unchanged |
| 182 | + fetched_vec = idx.fetch(ids=[vector_id], namespace=target_namespace) |
| 183 | + assert fetched_vec.vectors[vector_id].metadata == new_metadata |
| 184 | + # Values should remain the same (approximately, due to floating point) |
| 185 | + assert len(fetched_vec.vectors[vector_id].values) == len(original_values) |
| 186 | + assert fetched_vec.vectors[vector_id].values[0] == pytest.approx(original_values[0], 0.01) |
0 commit comments