From e37894be058a8baaebe3d73e444f0e4cc7f88fe2 Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Sun, 9 Nov 2025 13:15:37 -0800 Subject: [PATCH 01/79] Implement initial version of Leann KVS with semantic search functionality --- hnsw-test/leann_simple_kvs.py | 101 ++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 hnsw-test/leann_simple_kvs.py diff --git a/hnsw-test/leann_simple_kvs.py b/hnsw-test/leann_simple_kvs.py new file mode 100644 index 000000000..cccd9aa5f --- /dev/null +++ b/hnsw-test/leann_simple_kvs.py @@ -0,0 +1,101 @@ +import os +import shutil +from pathlib import Path +from leann import LeannBuilder, LeannSearcher +import pprint + +# --- 0. Setup KVS (dictionary) and Leann --- + +# 1. KVS (dictionary) setup (simulating ResilientDB) +kvs = {} + +# 2. Leann index file path +INDEX_PATH = str(Path("./").resolve() / "simple_kvs_leann_default.leann") + +# 3. Clean up previous index files (for re-running the demo) +if os.path.exists(f"{INDEX_PATH}.meta.json"): + print(f"Cleaning up existing index at '{INDEX_PATH}'...") + try: + base_name = INDEX_PATH.replace('.leann', '') + os.remove(f"{base_name}.leann.meta.json") + os.remove(f"{base_name}.leann.passages.jsonl") + os.remove(f"{base_name}.leann.passages.idx") + os.remove(f"{base_name}.index") # HNSW index file + except FileNotFoundError: + pass +print("-" * 30) + + +# --- 1. 'Set' Data to KVS and Build Leann Index --- +print("--- 1. Setting data and building Leann index (default mode) ---") + +# Initialize LeannBuilder (default settings, no memory-saving config) +# This will use recompute=True, which requires more RAM during search +builder = LeannBuilder( + backend_name="hnsw", + embedding_model="sentence-transformers/all-MiniLM-L6-v2" # Using a lightweight model +) + +# Data to store (simulating kv_client.Set(key, value)) +data_to_set = [ + ("greeting", "Hello ResilientDB! This is a test."), + ("doc_001", "Leann is a library for vector search."), + ("doc_002", "HNSW is a fast algorithm for approximate nearest neighbor search.") +] + +for key, value in data_to_set: + + # (A) ResilientDB 'Set' operation (store in dictionary) + print(f"SET: KVS Key = '{key}'") + kvs[key] = value + + # (B) Add to Leann index + # We pass the KVS key in the metadata 'id' + builder.add_text(value, metadata={"id": key}) + +# Build the Leann (HNSW) index +builder.build_index(INDEX_PATH) + +print("\nCurrent KVS (dictionary) state:") +pprint.pprint(kvs) +print("Leann (HNSW) index built.") +print("-" * 30) + + +# --- 2. Run Semantic Search (Leann + HNSW) --- +print("--- 2. Running semantic search (default mode) ---") + +try: + # Use 'with' to safely open the LeannSearcher + # This will start an internal server (requires more RAM) + with LeannSearcher(INDEX_PATH) as searcher: + + query = "What is a fast vector search algorithm?" + print(f"Query: '{query}'") + + # (1) Run semantic search on Leann (HNSW) + # We do NOT pass recompute_embeddings=False, using the default + results = searcher.search(query, top_k=1) + + if results: + # (2) Get the 'id' from Leann. In this mode, + # 'id' should be the original string key ("doc_002"). + found_key = results[0].id + + print(f"\n -> Leann found key: '{found_key}' (Score: {results[0].score:.4f})") + + # (3) Use the key from Leann to 'Get' the full data + # from the KVS (dictionary). This is the integration. + final_value = kvs.get(found_key) + + print(" -> Final value retrieved from KVS (dictionary):") + print(f" {final_value}") + else: + print(" -> No results found.") + +except FileNotFoundError: + print(f"ERROR: Index file not found at {INDEX_PATH}.") +except Exception as e: + print(f"An error occurred during search: {e}") + +print("-" * 30) \ No newline at end of file From cd95cbb041e040c96b7c6603063cb2304e7ec4e1 Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Mon, 10 Nov 2025 07:36:27 +0900 Subject: [PATCH 02/79] Add Leann KVS implementation with semantic search and initial data entries --- hnsw-test/leann_simple_kvs.py | 179 +++++++++++----------- hnsw-test/my_hashmap.index | Bin 0 -> 1314 bytes hnsw-test/my_hashmap.leann.meta.json | 19 +++ hnsw-test/my_hashmap.leann.passages.idx | Bin 0 -> 115 bytes hnsw-test/my_hashmap.leann.passages.jsonl | 14 ++ hnsw-test/quick_start.py | 17 ++ 6 files changed, 140 insertions(+), 89 deletions(-) create mode 100644 hnsw-test/my_hashmap.index create mode 100644 hnsw-test/my_hashmap.leann.meta.json create mode 100644 hnsw-test/my_hashmap.leann.passages.idx create mode 100644 hnsw-test/my_hashmap.leann.passages.jsonl create mode 100644 hnsw-test/quick_start.py diff --git a/hnsw-test/leann_simple_kvs.py b/hnsw-test/leann_simple_kvs.py index cccd9aa5f..883730a24 100644 --- a/hnsw-test/leann_simple_kvs.py +++ b/hnsw-test/leann_simple_kvs.py @@ -1,101 +1,102 @@ -import os -import shutil -from pathlib import Path from leann import LeannBuilder, LeannSearcher -import pprint - -# --- 0. Setup KVS (dictionary) and Leann --- - -# 1. KVS (dictionary) setup (simulating ResilientDB) -kvs = {} - -# 2. Leann index file path -INDEX_PATH = str(Path("./").resolve() / "simple_kvs_leann_default.leann") - -# 3. Clean up previous index files (for re-running the demo) -if os.path.exists(f"{INDEX_PATH}.meta.json"): - print(f"Cleaning up existing index at '{INDEX_PATH}'...") - try: - base_name = INDEX_PATH.replace('.leann', '') - os.remove(f"{base_name}.leann.meta.json") - os.remove(f"{base_name}.leann.passages.jsonl") - os.remove(f"{base_name}.leann.passages.idx") - os.remove(f"{base_name}.index") # HNSW index file - except FileNotFoundError: - pass -print("-" * 30) - - -# --- 1. 'Set' Data to KVS and Build Leann Index --- -print("--- 1. Setting data and building Leann index (default mode) ---") +from pathlib import Path +import os -# Initialize LeannBuilder (default settings, no memory-saving config) -# This will use recompute=True, which requires more RAM during search -builder = LeannBuilder( - backend_name="hnsw", - embedding_model="sentence-transformers/all-MiniLM-L6-v2" # Using a lightweight model -) +# 1. Define the hash map (Python dictionary) to search +data_map = { + # Original entries + "doc1": "LEANN saves 97% storage compared to traditional vector databases.", + "doc2": "Tung Tung Tung Sahur called—they need their banana-crocodile hybrid back", + "doc3": "The weather in Davis is sunny today.", + "doc4": "Understanding consensus protocols is key for blockchain.", + + # New entries (Course-related) + "doc5": "ResilientDB is a high-throughput blockchain fabric designed for performance.", + "doc6": "This project explores novel techniques for sharding in distributed ledgers.", + "doc7": "DeFi applications are often built on top of smart contracts.", + "doc8": "Practical Byzantine Fault Tolerance (PBFT) is a foundational agreement protocol.", + "doc9": "Cross-chain communication enables interoperability between different blockchains.", + "doc10": "The project requires using the ResilientDB Fabric unless approved otherwise.", + + # New entries (Unrelated noise) + "doc11": "Mitochondria are the powerhouse of the cell.", + "doc12": "How to bake a perfect sourdough bread with a starters.", + "doc13": "The final report must be written in LaTeX using ACM templates.", + "doc14": "UC Davis is known for its agricultural studies." +} + +# 2. Create lists to map Leann's internal IDs (0, 1, 2...) +# back to our original hash map keys. +# map_keys[i] corresponds to map_values[i] +map_keys = list(data_map.keys()) +map_values = list(data_map.values()) + +INDEX_PATH = str(Path("./hnsw-test").resolve() / "my_hashmap.leann") + +# --- 3. Build the Leann Index --- +print("Building index with LeannBuilder...") +builder = LeannBuilder(backend_name="hnsw") + +# Add the text values from the hash map to the builder. +# Leann will assign internal IDs starting from 0 (0, 1, 2, 3...) +for text in map_values: + builder.add_text(text) + +# Build and save the index file +builder.build_index(INDEX_PATH) +print(f"Index built and saved to {INDEX_PATH}") -# Data to store (simulating kv_client.Set(key, value)) -data_to_set = [ - ("greeting", "Hello ResilientDB! This is a test."), - ("doc_001", "Leann is a library for vector search."), - ("doc_002", "HNSW is a fast algorithm for approximate nearest neighbor search.") -] +# --- 4. Prepare the Leann Searcher --- +searcher = LeannSearcher(INDEX_PATH) -for key, value in data_to_set: +# 5. Create the semantic search function +def semantic_search_leann(query_text, k=3): + """ + Uses LeannSearcher (vector indexing) to find the k-most + semantically similar items from the hash map. + """ - # (A) ResilientDB 'Set' operation (store in dictionary) - print(f"SET: KVS Key = '{key}'") - kvs[key] = value + # searcher.search() returns a list of SearchResult objects + results_from_leann = searcher.search(query_text, top_k=k) - # (B) Add to Leann index - # We pass the KVS key in the metadata 'id' - builder.add_text(value, metadata={"id": key}) - -# Build the Leann (HNSW) index -builder.build_index(INDEX_PATH) - -print("\nCurrent KVS (dictionary) state:") -pprint.pprint(kvs) -print("Leann (HNSW) index built.") -print("-" * 30) - + final_results = [] + if not results_from_leann: + return final_results -# --- 2. Run Semantic Search (Leann + HNSW) --- -print("--- 2. Running semantic search (default mode) ---") - -try: - # Use 'with' to safely open the LeannSearcher - # This will start an internal server (requires more RAM) - with LeannSearcher(INDEX_PATH) as searcher: + # Loop through the SearchResult objects + for result in results_from_leann: + + # Get the internal ID (as an int) from the result object + item_index = int(result.id) + + # Use the ID to look up our original key and value + key = map_keys[item_index] + value = map_values[item_index] + + # Get the similarity score + score = result.score + + final_results.append({ + "key": key, + "value": value, + "similarity_score": score + }) - query = "What is a fast vector search algorithm?" - print(f"Query: '{query}'") + return final_results - # (1) Run semantic search on Leann (HNSW) - # We do NOT pass recompute_embeddings=False, using the default - results = searcher.search(query, top_k=1) +# --- 6. Run the search --- +print("\n--- Search Results (using leann) ---") - if results: - # (2) Get the 'id' from Leann. In this mode, - # 'id' should be the original string key ("doc_002"). - found_key = results[0].id - - print(f"\n -> Leann found key: '{found_key}' (Score: {results[0].score:.4f})") - - # (3) Use the key from Leann to 'Get' the full data - # from the KVS (dictionary). This is the integration. - final_value = kvs.get(found_key) - - print(" -> Final value retrieved from KVS (dictionary):") - print(f" {final_value}") - else: - print(" -> No results found.") +# First query +query1 = "Robust Database" +# Call the renamed function +results1 = semantic_search_leann(query1) -except FileNotFoundError: - print(f"ERROR: Index file not found at {INDEX_PATH}.") -except Exception as e: - print(f"An error occurred during search: {e}") +if results1: + print(f"Query: '{query1}'") + # Print all results (since k=3 by default) + for i, result in enumerate(results1): + print(f" Rank {i+1} ({result['key']}): {result['value']}") + print(f" (Score: {result['similarity_score']:.4f})") -print("-" * 30) \ No newline at end of file +print("---") \ No newline at end of file diff --git a/hnsw-test/my_hashmap.index b/hnsw-test/my_hashmap.index new file mode 100644 index 0000000000000000000000000000000000000000..918b8d7c6b36a035200e3d8c36e04a09e85dba68 GIT binary patch literal 1314 zcmbV~Jx)SV5QPWu2mg&Bgcy4_V2#>AgA1_G%1{tGdcp?WfC;o#cGi?GgB`d8oe$4R zu8lsu$>iMg?wmPu=e~Y#@DPoN!|*!R<_l}EEuXFN{W4)1F$Y%>H^PrDycRFuvL1Kc z?c8`><>o$Pnz6`OW~?%7jM2a1EQRGd5HX&L8_&g!7vjciapOSTcq?ukiW?{5#+kVB zQQY_>ZhRIuzKS0S4Rvicgnp!^_^xnY=t28^PxQSdYzsR=&&%`mJm2!18a&o`G8(x* z9vjcS407o&f2_cto^~?m72ReA{rV2%qa&9*YN4ebT6ETM2It&f2$^Gel+z0ku;!dKe9+cr)}c8cy-f>|r#T;(fb^(RhllI%5x`$rRsqAZ0qm_cxF- zo8p_t)Wc{##rG1BvY6uQ#oWVaImP!da}SfD!4zL}79edn#kU7U8%^ Date: Tue, 11 Nov 2025 08:07:38 +0900 Subject: [PATCH 03/79] Refactor semantic search implementation and improve error handling --- hnsw-test/leann_simple_kvs.py | 70 +++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/hnsw-test/leann_simple_kvs.py b/hnsw-test/leann_simple_kvs.py index 883730a24..51ad293fc 100644 --- a/hnsw-test/leann_simple_kvs.py +++ b/hnsw-test/leann_simple_kvs.py @@ -1,6 +1,5 @@ from leann import LeannBuilder, LeannSearcher from pathlib import Path -import os # 1. Define the hash map (Python dictionary) to search data_map = { @@ -25,21 +24,18 @@ "doc14": "UC Davis is known for its agricultural studies." } -# 2. Create lists to map Leann's internal IDs (0, 1, 2...) -# back to our original hash map keys. -# map_keys[i] corresponds to map_values[i] -map_keys = list(data_map.keys()) -map_values = list(data_map.values()) +# 2. Make a single source of groundtruth for ID, key, and value +docs = list(data_map.items()) -INDEX_PATH = str(Path("./hnsw-test").resolve() / "my_hashmap.leann") +INDEX_PATH = Path("./hnsw-test").resolve() / "my_hashmap.leann" +INDEX_PATH.parent.mkdir(parents=True, exist_ok=True) # --- 3. Build the Leann Index --- print("Building index with LeannBuilder...") builder = LeannBuilder(backend_name="hnsw") -# Add the text values from the hash map to the builder. -# Leann will assign internal IDs starting from 0 (0, 1, 2, 3...) -for text in map_values: +# Add the text from the docs to the builder. +for _, text in docs: builder.add_text(text) # Build and save the index file @@ -47,18 +43,22 @@ print(f"Index built and saved to {INDEX_PATH}") # --- 4. Prepare the Leann Searcher --- -searcher = LeannSearcher(INDEX_PATH) +searcher = LeannSearcher(str(INDEX_PATH)) # 5. Create the semantic search function -def semantic_search_leann(query_text, k=3): +def semantic_search_leann(query_text: str, k: int = 3): """ Uses LeannSearcher (vector indexing) to find the k-most semantically similar items from the hash map. """ + if not query_text: + return [] - # searcher.search() returns a list of SearchResult objects + k = max(0, min(k, len(docs))) + if k == 0: + return [] + results_from_leann = searcher.search(query_text, top_k=k) - final_results = [] if not results_from_leann: return final_results @@ -67,14 +67,18 @@ def semantic_search_leann(query_text, k=3): for result in results_from_leann: # Get the internal ID (as an int) from the result object - item_index = int(result.id) + try: + item_index = int(result.id) + except: + continue + + if not (0 <= item_index < len(docs)): + continue - # Use the ID to look up our original key and value - key = map_keys[item_index] - value = map_values[item_index] + key, value = docs[item_index] # Get the similarity score - score = result.score + score = float(result.score) final_results.append({ "key": key, @@ -84,19 +88,21 @@ def semantic_search_leann(query_text, k=3): return final_results -# --- 6. Run the search --- -print("\n--- Search Results (using leann) ---") +# --- 6. Show the result --- +def print_results(query: str, results): + if not results: + print(f"No results for query: {query!r}") + return + + print(f"Query: '{query}'") + + for i, r in enumerate(results, 1): + print(f" Rank {i} ({r['key']}): {r['value']}") + print(f" (Score: {r['similarity_score']:.4f})") -# First query +# --- 7. Run the search --- +print("\n--- Search Results (using leann) ---") query1 = "Robust Database" -# Call the renamed function -results1 = semantic_search_leann(query1) - -if results1: - print(f"Query: '{query1}'") - # Print all results (since k=3 by default) - for i, result in enumerate(results1): - print(f" Rank {i+1} ({result['key']}): {result['value']}") - print(f" (Score: {result['similarity_score']:.4f})") - +results1 = semantic_search_leann(query1, k=3) +print_results(query1, results1) print("---") \ No newline at end of file From e7ef14f59f3871577594b828522fd9a8ba5e8c82 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Mon, 10 Nov 2025 15:25:02 -0800 Subject: [PATCH 04/79] Add README for LEANN Semantic Search demo --- hnsw-test/README.md | 47 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 hnsw-test/README.md diff --git a/hnsw-test/README.md b/hnsw-test/README.md new file mode 100644 index 000000000..eee9169ae --- /dev/null +++ b/hnsw-test/README.md @@ -0,0 +1,47 @@ +# LEANN Semantic Search — Minimal Demo (README) +This README assumes a **single-file minimal demo** that turns a plain Python dict (`key → text`) into a semantic search using **LEANN** with an **HNSW** backend. + +## Overview +- **Goal**: Use LEANN to build an index and return the top-k semantically similar entries for a query. +- **Design**: + - Single file (no OOP, no external mapping JSON) + - Safe mapping from internal IDs back to your original `(key, text)` + - Minimal, robust checks around IDs and `k` + +## How It Works +- Prepare data: a Python dict ```data_map = { "doc1": "text...", ... }```. +- Freeze ordering: ```docs = list(data_map.items())``` so ```docs[i]``` matches LEANN’s internal ID ```i```. +- Build: feed texts into ```LeannBuilder(backend_name="hnsw")``` and save the index. +- Search: run ```LeannSearcher.search(query, top_k=k)``` to get ```(id, score)``` pairs. +- Map back: use ```id``` to recover the original ```(key, text)``` from ```docs[id]```. + +## Prerequisites +- Python 3.12 +- Package & venv management with **uv** + +## Setup (with uv) +1. Install uv if needed: +```bash +# macOS / Linux +curl -LsSf https://astral.sh/uv/install.sh | sh + +# Windows (PowerShell) +irm https://astral.sh/uv/install.ps1 | iex +``` + +2. Create a virtual environment and add dependencies: +```bash +uv venv +source .venv/bin/activate +uv pip install leann pathlib +``` + +## Run +```bash +python leann_simple_kvs.py +``` +or +``` +uv run python leann_simple_kvs.py +``` + From 924f16c255d338eb4f0f11742b43b72dcabcb33c Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Tue, 11 Nov 2025 01:04:10 -0800 Subject: [PATCH 05/79] Beginning of Installation instructions --- README.md | 463 +++--------------------------------------------------- 1 file changed, 19 insertions(+), 444 deletions(-) diff --git a/README.md b/README.md index 664f009ad..934bfb71d 100644 --- a/README.md +++ b/README.md @@ -17,460 +17,35 @@ under the License. --> -![](https://img.shields.io/github/v/release/resilientdb/resilientdb) -![](https://img.shields.io/badge/language-c++-orange.svg) -![](https://img.shields.io/badge/platform-Ubuntu20.0+-lightgrey.svg) -![GitHub](https://img.shields.io/github/license/resilientdb/resilientdb) -![Generated Button](https://raw.githubusercontent.com/resilientdb/resilientdb/image-data/badge.svg) -![build](https://github.com/resilientdb/resilientdb/workflows/bazel-build%20CI/badge.svg) -![build](https://github.com/resilientdb/resilientdb/workflows/Unite%20Test/badge.svg) +Forked from [this repository](https://github.com/apache/incubator-resilientdb), for more complex setup instructions, please head there. +# (In Progress) How to Setup The Repository +Hey all, Steven here, this is the quickstart guide to getting the Indexer project up and running. +1. Clone it to your local device with `git clone https://github.com/apache/incubator-resilientdb.git` -# ResilientDB: Global-Scale Sustainable Blockchain Fabric +2. (Windows only) ResilientDB uses bash shell commands (.sh extension), which windows doesn't support natively (pretty sure mac _DOES_). Fortunately, Windows 11 and most versions of Windows 10 have an easy to use subsystem for Linux, WSL. Link on how to setup [here](https://learn.microsoft.com/en-us/windows/wsl/install). +After installing WSL, you can open a bash terminal by running the program `Ubuntu`. This will open from the profile of your newly created User for WSL, but you can still access to your Windows files in windows via `cd ~/../../mnt`, which should navigate you to the location of your C/D drive. -**[ResilientDB](https://resilientdb.com/)** is a **High Throughput Yielding Permissioned Blockchain Fabric** founded by **[ExpoLab](https://expolab.org/)** at **[UC Davis](https://www.ucdavis.edu/)** in 2018. ResilientDB advocates a **system-centric** design by adopting a **multi-threaded architecture** that encompasses **deep pipelines**. Further, ResilientDB **separates** the ordering of client transactions from their execution, which allows it to **process messages out-of-order**. +3. Navigate to the project folder and run `sudo sh INSTALL.sh` -# Downloads: -Download address for run-directly software package: https://downloads.apache.org/incubator/resilientdb/ +### Windows - Linux Mismatch -### Quick Facts on ResilientDB -1. ResilientDB orders client transactions through a highly optimized implementation of the **[PBFT](https://pmg.csail.mit.edu/papers/osdi99.pdf)** [Castro and Liskov, 1998] protocol, which helps to achieve consensus among its replicas. ResilientDB also supports deploying other state-of-the-art consensus protocols *[release are planned]* such as **[GeoBFT](http://www.vldb.org/pvldb/vol13/p868-gupta.pdf)** [**[blog](https://blog.resilientdb.com/2023/03/07/GeoBFT.html), [released](https://github.com/resilientdb/resilientdb/releases/tag/nexres-v1.1.0)**], **[PoE](https://openproceedings.org/2021/conf/edbt/p111.pdf)**, **[RCC](https://arxiv.org/abs/1911.00837)**, **[RingBFT](https://openproceedings.org/2022/conf/edbt/paper-73.pdf)**, **[PoC](https://arxiv.org/abs/2302.02325)**, **[SpotLess](https://arxiv.org/abs/2302.02118)**, **[HotStuff](https://arxiv.org/abs/1803.05069)**, and **[DAG](https://arxiv.org/pdf/2105.11827.pdf)**. -2. ResilientDB requires deploying at least **3f+1** replicas, where **f (f > 0)** is the maximum number of arbitrary (or malicious) replicas. -3. ResilientDB supports primary-backup architecture, which designates one of the replicas as the **primary** (replica with identifier **0**). The primary replica initiates consensus on a client transaction, while backups agree to follow a non-malicious primary. -4. ResilientDB exposes a wide range of interfaces such as a **Key-Value** store, **Smart Contracts**, **UTXO**, and **Python SDK**. Following are some of the decentralized applications (DApps) built on top of ResilientDB: **[NFT Marketplace](https://nft.resilientdb.com/)** and **[Debitable](https://debitable.resilientdb.com/)**. -5. To persist blockchain, chain state, and metadata, ResilientDB provides durability through **LevelDB**. -6. ResilientDB provides access to a seamless **GUI display** for deployment and maintenance, and supports **Grafana** for plotting monitoring data. -7. **[Historial Facts]** The ResilientDB project was founded by **[Mohammad Sadoghi](https://expolab.org/)** along with his students ([Suyash Gupta](https://gupta-suyash.github.io/index.html) as the lead Architect, [Sajjad Rahnama](https://sajjadrahnama.com/) as the lead System Designer, and [Jelle Hellings](https://www.jhellings.nl/)) at **[UC Davis](https://www.ucdavis.edu/)** in 2018 and was open-sourced in late 2019. On September 30, 2021, we released ResilientDB v-3.0. In 2022, ResilientDB was completely re-written and re-architected ([Junchao Chen](https://github.com/cjcchen) as the lead Architect, [Dakai Kang](https://github.com/DakaiKang) as the lead Recovery Architect along with the entire [NexRes Team](https://expolab.resilientdb.com/)), paving the way for a new sustainable foundation, referred to as NexRes (Next Generation ResilientDB). Thus, on September 30, 2022, NexRes-v1.0.0 was born, marking a new beginning for **[ResilientDB](https://resilientdb.com/)**. On October 21, 2023, **[ResilientDB](https://cwiki.apache.org/confluence/display/INCUBATOR/ResilientDBProposal)** was officially accepted into **[Apache Incubation](https://incubator.apache.org/projects/resilientdb.html)**. +If you ran the command above on Windows/Ubuntu, and you recieved at least one error message, you're cooked. I've spent the last few hours trying to debug and figure out what happened, and I THINK this is because of a mismatch between the way Windows and Linux/Mac handle newlines vs carraige returns. As I understand it, `INSTALL.sh` effectively adds an unncessary carraige return character (\r) to the end of every line, which messes with file names. -
- - - -
+#### The Main Fix +To clean this up, we need to make the install file Bash friendly again. To do this, I ran both `sed -i 's/\r//g' INSTALL.sh` (a simple character replacement program) and `dos2unix your_script.sh` (an advanced program built for this exact purpose, you may have to install it with apt). Your miles may vary, and likely this will work with only one of the two commands. ---- +After this, try to run `sudo sh INSTALL.sh` again and see if you get any of the following errors: +#### Missing Bazel Version +This looks something like `(specified in /mnt/c/Users/username/Desktop/indexers-ECS265-Fall2025/.bazelversion), but it wasn't found in /usr/bin.` -## Online Documentation: +This should go away if you delete this .bazelversion in your `indexers-ECS265-Fall2025` folder -The latest ResilientDB documentation, including a programming guide, is available on our **[blog repository](https://blog.resilientdb.com/archive.html?tag=NexRes)**. This README file provides basic setup instructions. +#### Invalid Filename Extension +This looks something like `Ignoring file 'bazel.list ' in directory '/etc/apt/sources.list.d/' as it has an invalid filename extension` -#### Table of Contents -1. Software Stack Architecture - - SDK, Interface/API, Platform, Execution, and Chain Layers - - Detailed API Documentation: **[Core](https://api.resilientdb.com/)** and **[SDK](https://sdk.resilientdb.com/)** -2. **SDK Layer:** **[Python SDK](https://blog.resilientdb.com/2023/02/01/UsingPythonSDK.html)** and **[Wallet - ResVault](https://blog.resilientdb.com/2023/09/21/ResVault.html)** -3. **Interface Layer:** **[Key-Value](https://blog.resilientdb.com/2022/09/28/GettingStartedNexRes.html)**, **[Solidity Smart Contract](https://blog.resilientdb.com/2023/01/15/GettingStartedSmartContract.html)**, **[Unspent Transaction Output (UTXO) Model](https://blog.resilientdb.com/2023/02/12/UtxoOnNexres.html)**, ResilientDB Database Connectivity (RDBC) API -4. **Platform Layer:** **[Consensus Manager Architecture (ordering, recovery, network, chain management)](https://blog.resilientdb.com/2022/09/27/What_Is_NexRes.html)** - - **[Recovery & Checkpoint Design](https://blog.resilientdb.com/2023/08/22/ViewChangeInNexRes.html)** -5. **Execution Layer:** Transaction Manager Design (Runtime) -6. **Chain Layer:** Chain State & Storage Manager Design (**[durability](https://blog.resilientdb.com/2023/02/15/NexResDurabilityLayer.html)**) -7. **[Installing & Deploying ResilientDB](https://blog.resilientdb.com/2022/09/28/GettingStartedNexRes.html)** - - Build Your First Application: **[KV Service](https://blog.resilientdb.com/2022/09/28/StartYourApplication.html)**, **[UTXO](https://blog.resilientdb.com/2023/02/12/GettingStartedOnUtxo.html)** - - Dashboard: **[Monitoring](https://blog.resilientdb.com/2022/12/06/NexResGrafanaDashboardInstallation.html)**, **[Deployment](https://blog.resilientdb.com/2022/12/06/DeployGrafanaDashboardOnOracleCloud.html)**, **[Data Pipeline](https://blog.resilientdb.com/2022/12/12/NexResGrafanaDashboardPipeline.html)** - - System Parameters & Configuration - - Continuous Integration & Testing +I believe this happens becuase bazel (the build tool used by ResDB) tries to save it's version into the file `bazel.list\r`, accidentally adding a cairrage return to the end. -
- -
- -## OS Requirements -Ubuntu 20+ - ---- - -## Project Structure - -``` -incubator-resilientdb/ -├── api/ # API layer and interfaces -├── benchmark/ # Performance benchmarking tools -│ └── protocols/ # Protocol-specific benchmarks -│ ├── pbft/ # PBFT protocol benchmarks -│ └── poe/ # PoE protocol benchmarks -├── chain/ # Blockchain chain management -│ ├── state/ # Chain state management -│ └── storage/ # Storage layer (LevelDB, etc.) -├── common/ # Common utilities and libraries -│ ├── crypto/ # Cryptographic functions -│ ├── lru/ # LRU cache implementation -│ ├── proto/ # Protocol buffer definitions -│ ├── test/ # Testing utilities -│ └── utils/ # General utilities -├── ecosystem/ # Ecosystem components (git subtrees) -│ ├── cache/ # Caching implementations -│ │ ├── resilient-node-cache/ # Node.js caching -│ │ └── resilient-python-cache/ # Python caching -│ ├── deployment/ # Deployment and infrastructure -│ │ ├── ansible/ # Ansible playbooks -│ │ └── orbit/ # Orbit deployment tool -│ ├── graphql/ # GraphQL service -│ ├── monitoring/ # Monitoring and observability -│ │ ├── reslens/ # ResLens monitoring tool -│ │ └── reslens-middleware/ # ResLens middleware -│ ├── sdk/ # Software Development Kits -│ │ ├── resdb-orm/ # Python ORM -│ │ ├── resvault-sdk/ # ResVault SDK -│ │ └── rust-sdk/ # Rust SDK -│ ├── smart-contract/ # Smart contract ecosystem -│ │ ├── rescontract/ # ResContract repository -│ │ ├── resilient-contract-kit/ # Contract development toolkit -│ │ └── smart-contract-graphql/ # Smart contract GraphQL service -│ └── tools/ # Development and operational tools -│ ├── create-resilient-app/ # App scaffolding tool -│ └── resvault/ # ResVault tool -├── executor/ # Transaction execution engine -│ ├── common/ # Common execution utilities -│ ├── contract/ # Smart contract execution -│ ├── kv/ # Key-value execution -│ └── utxo/ # UTXO execution -├── interface/ # Client interfaces and APIs -│ ├── common/ # Common interface utilities -│ ├── contract/ # Smart contract interface -│ ├── kv/ # Key-value interface -│ ├── rdbc/ # ResilientDB Database Connectivity -│ └── utxo/ # UTXO interface -├── monitoring/ # Core monitoring components -├── platform/ # Core platform components -│ ├── common/ # Common platform utilities -│ ├── config/ # Configuration management -│ ├── consensus/ # Consensus protocols -│ │ ├── checkpoint/ # Checkpoint management -│ │ ├── execution/ # Transaction execution -│ │ ├── ordering/ # Transaction ordering -│ │ └── recovery/ # Recovery mechanisms -│ ├── networkstrate/ # Network strategy layer -│ ├── proto/ # Protocol definitions -│ ├── rdbc/ # RDBC implementation -│ └── statistic/ # Statistics and metrics -├── proto/ # Protocol buffer definitions -│ ├── contract/ # Smart contract protos -│ ├── kv/ # Key-value protos -│ └── utxo/ # UTXO protos -├── scripts/ # Deployment and utility scripts -│ └── deploy/ # Deployment scripts -├── service/ # Service implementations -│ ├── contract/ # Smart contract service -│ ├── kv/ # Key-value service -│ ├── tools/ # Service tools -│ ├── utils/ # Service utilities -│ └── utxo/ # UTXO service -├── third_party/ # Third-party dependencies -└── tools/ # Development and build tools -``` - -**Note**: The `ecosystem/` directory contains git subtrees for ecosystem components. You can clone the repository without ecosystem components for a smaller, faster download. See [ecosystem/README.md](ecosystem/README.md) for details. - -## Build and Deploy ResilientDB - -Next, we show how to quickly build ResilientDB and deploy 4 replicas and 1 client proxy on your local machine. The proxy acts as an interface for all the clients. It batches client requests and forwards these batches to the replica designated as the leader. The 4 replicas participate in the PBFT consensus to order and execute these batches. Post execution, they return the response to the leader. - -Install dependencies: - - ./INSTALL.sh - -For non-root users, see [INSTALL/README.md](https://github.com/apache/incubator-resilientdb/blob/master/INSTALL/README.md) - -Run ResilientDB (Providing a Key-Value Service): - - ./service/tools/kv/server_tools/start_kv_service.sh - -- This script starts 4 replicas and 1 client. Each replica instantiates a key-value store. - -Build Interactive Tools: - - bazel build service/tools/kv/api_tools/kv_service_tools - -### Issues ### -If you cannot build the project successfully, try to reduce the bazel jobs [here]( -https://github.com/apache/incubator-resilientdb/blob/master/.bazelrc#L1). - -## Functions ## -ResilientDB supports two types of functions: version-based and non-version-based. -Version-based functions will leverage versions to protect each update, versions must be obtained before updating a key. - -***Note***: Version-based functions are not compatible with non-version-based functions. Do not use both in your applications. - -We show the functions below and show how to use [kv_service_tools](service/tools/kv/api_tools/kv_service_tools.cpp) to test the function. - -### Version-Based Functions ### -#### Get #### -Obtain the value of `key` with a specific version `v`. - - kv_service_tools --config config_file --cmd get_with_version --key key --version v - -| parameters | descriptions | -| ---- | ---- | -| config | the path of the client config which points to the db entrance | -| cmd | get_with_version | -| key | the key you want to obtain | -| version | the version you want to obtain. (If the `v` is 0, it will return the latest version | - - -Example: - - bazel-bin/service/tools/kv/api_tools/kv_service_tools --config service/tools/config/interface/service.config --cmd get_with_version --key key1 --version 0 - -Results: -> get key = key1, value = value: "v2" -> version: 2 - -#### Set #### -Set `value` to the key `key` based on version `v`. - - kv_service_tools --config config_file --cmd set_with_version --key key --version v --value value - -| parameters | descriptions | -| ---- | ---- | -| config | the path of the client config which points to the db entrance | -| cmd | set_with_version | -| key | the key you want to set | -| version | the version you have obtained. (If the version has been changed during the update, the transaction will be ignored) | -| value | the new value | - -Example: - - bazel-bin/service/tools/kv/api_tools/kv_service_tools --config service/tools/config/interface/service.config --cmd set_with_version --key key1 --version 0 --value v1 - -Results: -> set key = key1, value = v3, version = 2 done, ret = 0 -> -> current value = value: "v3" -> version: 3 - -#### Get Key History #### -Obtain the update history of key `key` within the versions [`v1`, `v2`]. - - kv_service_tools --config config_file --cmd get_history --key key --min_version v1 --max_version v2 - - -| parameters | descriptions | -| ---- | ---- | -| config | the path of the client config which points to the db entrance | -| cmd | get_history | -| key | the key you want to obtain | -| min_version | the minimum version you want to obtain | -| max_version | the maximum version you want to obtain | - -Example: - - bazel-bin/service/tools/kv/api_tools/kv_service_tools --config service/tools/config/interface/service.config --cmd get_history --key key1 --min_version 1 --max_version 2 - -Results: - -> get history key = key1, min version = 1, max version = 2
-> value =
-> item {
->   key: "key1"
->   value_info {
->    value: "v1"
->    version: 2
->  }
-> }
-> item {
->   key: "key1"
->   value_info {
->    value: "v0"
->    version: 1
->  }
-> } - -#### Get Top #### -Obtain the recent `top_number` history of the key `key`. - - kv_service_tools --config config_path --cmd get_top --key key --top top_number - -| parameters | descriptions | -| ---- | ---- | -| config | the path of the client config which points to the db entrance | -| cmd | get_top | -| key | the key you want to obtain | -| top | the number of the recent updates | - -Example: - - bazel-bin/service/tools/kv/api_tools/kv_service_tools --config service/tools/config/interface/service.config --cmd get_top --key key1 --top 1 - -Results: - ->key = key1, top 1
-> value =
-> item {
-> key: "key1"
->  value_info {
->    value: "v2"
->    version: 3
->  }
->} - -#### Get Key Range #### -Obtain the values of the keys in the ranges [`key1`, `key2`]. Do not use this function in your practice code - - kv_service_tools --config config_file --cmd get_key_range_with_version --min_key key1 --max_key key2 - -| parameters | descriptions | -| ---- | ---- | -| config | the path of the client config which points to the db entrance | -| cmd | get_key_range_with_version | -| min_key | the minimum key | -| max_key | the maximum key | - -Example: - - bazel-bin/service/tools/kv/api_tools/kv_service_tools --config service/tools/config/interface/service.config --cmd get_key_range_with_version --min_key key1 --max_key key3 - -Results: - ->min key = key1 max key = key2
-> getrange value =
-> item {
->   key: "key1"
->   value_info {
->    value: "v0"
->    version: 1
->   }
-> }
-> item {
->   key: "key2"
->   value_info {
->    value: "v1"
->    version: 1
->   }
->} - - -### Non-Version-Based Function ### -#### Set ##### -Set `value` to the key `key`. - - kv_service_tools --config config_file --cmd set --key key --value value - -| parameters | descriptions | -| ---- | ---- | -| config | the path of the client config which points to the db entrance | -| cmd | set | -| key | the key you want to set | -| value | the new value | - -Example: - - bazel-bin/service/tools/kv/api_tools/kv_service_tools --config service/tools/config/interface/service.config --cmd set --key key1 --value value1 - -Results: -> set key = key1, value = v1, done, ret = 0 - -#### Get #### -Obtain the value of `key`. - - kv_service_tools --config config_file --cmd get --key key - -| parameters | descriptions | -| ---- | ---- | -| config | the path of the client config which points to the db entrance | -| cmd | get | -| key | the key you want to obtain | - -Example: - - bazel-bin/service/tools/kv/api_tools/kv_service_tools --config service/tools/config/interface/service.config --cmd get --key key1 - -Results: -> get key = key1, value = "v2" - - -#### Get Key Range #### -Obtain the values of the keys in the ranges [`key1`, `key2`]. Do not use this function in your practice code - - kv_service_tools --config config_path --cmd get_key_range --min_key key1 --max_key key2 - -| parameters | descriptions | -| ---- | ---- | -| config | the path of the client config which points to the db entrance | -| cmd | get_key_range | -| min_key | the minimum key | -| max_key | the maximum key | - -Example: - - bazel-bin/service/tools/kv/api_tools/kv_service_tools --config service/tools/config/interface/service.config --cmd get_key_range --min_key key1 --max_key key3 - -Results: -> getrange min key = key1, max key = key3
-> value = [v3,v2,v1] - - -## Deployment Script - -We also provide access to a [deployment script](https://github.com/resilientdb/resilientdb/tree/master/scripts/deploy) that allows deployment on distinct machines. - -## Deploy via Docker - -1. **Install Docker** - Before getting started, make sure you have Docker installed on your system. If you don't have Docker already, you can download and install it from the official [Docker website](https://www.docker.com/products/docker-desktop/). - -2. **Pull the Latest ResilientDB Image** - Choose the appropriate [ResilientDB image](https://hub.docker.com/repository/docker/expolab/resdb/general) for your machine's architecture: - - - For amd architecture, run: - ```shell - docker pull expolab/resdb:amd64 - ``` - - - For Apple Silicon (M1/M2) architecture, run: - ```shell - docker pull expolab/resdb:arm64 - ``` - -3. **Run a Container with the Pulled Image** - Launch a Docker container using the ResilientDB image you just pulled: - - - For amd architecture, run: - ```shell - docker run -d --name myserver expolab/resdb:amd64 - ``` - - - For Apple Silicon (M1/M2) architecture, run: - ```shell - docker run -d --name myserver expolab/resdb:arm64 - ``` - -4. **Test with Set and Get Commands** - Exec into the running server: - ```shell - docker exec -it myserver bash - ``` - -5. **NOTE: If you encounter a Connection Refused error** - - Run the following command within the container: - ```shell - ./service/tools/kv/server_tools/start_kv_service.sh - ``` - - Verify the functionality of the service by performing set and get operations provided above [functions](README.md#functions). - - -## Custom Ports ## -When starting the service locally, current services are running on 10000 port-base with 5 services where the server config is located [here](https://github.com/apache/incubator-resilientdb/blob/master/service/tools/config/server/server.config) - -If you want to change the setting, you need to generate the certificates. - -Go the the workspace where the resilientdb repo is localted. - -Change the setting parameters here and run the script: - ```shell - ./service/tools/kv/server_tools/generate_config.sh - ``` - -Then re-run the start script: - ```shell - ./service/tools/kv/server_tools/start_kv_service.sh - ``` - - - -## Smart Contract ## -If you want to use smart contracts, please go to: -https://blog.resilientdb.com/2025/02/14/GettingStartedSmartContract.html +To fix this, go to the repository it listed `cd /etc/apt/sources.list.d`, and look at the files with `ls`. You should see two: `bazel.list` and another weirdly formatted one (it came out as `bazel.list^M` for me). Remove them both using `sudo rm`. \ No newline at end of file From 9ac5e378c7a2ba831180ba505e5f33477a008863 Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Thu, 13 Nov 2025 00:32:28 -0800 Subject: [PATCH 06/79] Remove bazelversion (local) and carraige returns from shell scripts --- .bazelversion | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .bazelversion diff --git a/.bazelversion b/.bazelversion deleted file mode 100644 index 09b254e90..000000000 --- a/.bazelversion +++ /dev/null @@ -1 +0,0 @@ -6.0.0 From 8b4a9549ebaa3e3a6f5e186673cdf02915a5f233 Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Fri, 14 Nov 2025 12:50:53 -0800 Subject: [PATCH 07/79] Added instructions for dealing with C++ errors to the README --- README.md | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 934bfb71d..4f9a8e385 100644 --- a/README.md +++ b/README.md @@ -48,4 +48,45 @@ This looks something like `Ignoring file 'bazel.list ' in directory '/etc/apt/so I believe this happens becuase bazel (the build tool used by ResDB) tries to save it's version into the file `bazel.list\r`, accidentally adding a cairrage return to the end. -To fix this, go to the repository it listed `cd /etc/apt/sources.list.d`, and look at the files with `ls`. You should see two: `bazel.list` and another weirdly formatted one (it came out as `bazel.list^M` for me). Remove them both using `sudo rm`. \ No newline at end of file +To fix this, go to the repository it listed `cd /etc/apt/sources.list.d`, and look at the files with `ls`. You should see two: `bazel.list` and another weirdly formatted one (it came out as `bazel.list^M` for me). Remove them both using `sudo rm`. + +#### Using the right version of gcc and g++ +If the install script runs, but the start_kv service tool starts to display a ton of errors with what looks like c++ code, you most likely need to change the version of c++ that bazel is using. + +**Basic Fix** +Yoshiki found that gcc and g++ 12 work best with Resilient DB. If you're getting these errors, run the following commands: + +``` +sudo apt install gcc-12 g++-12 +export CC=/usr/bin/gcc-12 +export CXX=/usr/bin/g++-12 +bazel clean +``` + +re-run the INSTALL script, then try to run the start_kv service + +**Advanced Fix** +This alone didn't work for me, and I needed to tell my whole Ubuntu distribution to use gcc-12 and g++-12 by default. + +Run these commands to let your device know that version 12 is a usable version of gcc. Note that you need to run `sudo apt install gcc-12 g++-12` to get them on your device first. + +`sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 90` +`sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 90` + +Note that the last two numbers (90) are an arbitrary priority value. You can set them to whatever you want. + +Now run both commands: + +`sudo update-alternatives --config gcc` +`sudo update-alternatives --config g++` + +Both should bring up an list of allowed versions of gcc/g++, with a pointer pointing to your Ubuntu's default version of each. For both, select the number associated with version 12. + +Verify that you're now using version 12 for both: + +`gcc --version` +`g++ --version` + +Re-running the INSTALL and kv_start scripts should work now. + +This took me a couple tries to get right, and mistakes with `update-alternatives` were tough to recover from. Uninstalling WSL/Ubuntu then reinstalling it fresh always gets a fresh version of gcc / g++ that works again. Note that this will remove everything in your _Ubuntu_ distro (not everything on your computer) \ No newline at end of file From 33b218214dcfef883dfe46be543249c74f7d8531 Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Fri, 14 Nov 2025 14:49:46 -0800 Subject: [PATCH 08/79] Reorganized README and added more bugfixes for installation --- README.md | 104 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 81 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 4f9a8e385..b30824559 100644 --- a/README.md +++ b/README.md @@ -15,43 +15,95 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + + - - - - - + + This specific repository is forked from the development build + of ResilientDB for a Fall 2025 UC Davis graduate class project. + + Participants: Steven Shoemaker, Tiching Kao, Regan Yang, + Yoshiki Yamaguchi, Ritesh Patro --> +# Table of Contents +1. [Running the Indexing Project](#Running-the-Indexing-Project) +2. [ResilientDB Installation](#ResilientDB-Installation) +3. [ResilientDB Installation Bugs](#ResilientDB-Installation-Bugs) + + +## Running the Indexing Project +TODO + + +## ResilientDB Installation Forked from [this repository](https://github.com/apache/incubator-resilientdb), for more complex setup instructions, please head there. -# (In Progress) How to Setup The Repository -Hey all, Steven here, this is the quickstart guide to getting the Indexer project up and running. +Hey all, Steven here, this is the quickstart guide to getting ResDB up and running. If you've already setup and installed your repo, start from step 5. -1. Clone it to your local device with `git clone https://github.com/apache/incubator-resilientdb.git` +1. Clone this repo to your local device with `git clone https://github.com/apache/incubator-resilientdb.git` -2. (Windows only) ResilientDB uses bash shell commands (.sh extension), which windows doesn't support natively (pretty sure mac _DOES_). Fortunately, Windows 11 and most versions of Windows 10 have an easy to use subsystem for Linux, WSL. Link on how to setup [here](https://learn.microsoft.com/en-us/windows/wsl/install). +2. (Windows only) ResilientDB uses bash shell commands (.sh extension), which windows doesn't support natively. Fortunately, Windows 11 and most versions of Windows 10 have an easy to use subsystem for Linux, WSL. Link on how to setup [here](https://learn.microsoft.com/en-us/windows/wsl/install). After installing WSL, you can open a bash terminal by running the program `Ubuntu`. This will open from the profile of your newly created User for WSL, but you can still access to your Windows files in windows via `cd ~/../../mnt`, which should navigate you to the location of your C/D drive. -3. Navigate to the project folder and run `sudo sh INSTALL.sh` +3. (Windows only?) There's a mismatch between the way Windows and Linux ends lines in files, in short, on Windows machines the shell scripts will all have an unnecessary `\r` (carriage return) character at the end of all shell files. This _will_ cause problems with execution of these files. Use the sed command (at the top-level of the cloned repo) to remove the extraneous characters: + +``` +sudo sed -i 's/\r//g' INSTALL.sh +sudo sed -i 's/\r//g' ./service/tools/kv/server_tools/start_kv_service.sh +``` + +4. Navigate to the project folder and run `sudo sh INSTALL.sh` + +5. To start the k/v store, run `./service/tools/kv/server_tools/start_kv_service.sh` + +6. To start tools for the k/v store, run `bazel build service/tools/kv/api_tools/kv_service_tools` + +If you're starting from step 1, you'll more likely than not run into bugs. Here are a list of ones we've come across and their fixes: + + +## ResilientDB Installation Bugs +### Carriage returns & running shell files on Windows +For Windows (and mac?) users, we need to make bash files friendly for your OS. To do this, we can just run a simple character replacement program on any shell files, `sed -i 's/\r//g' YOUR_SHELL_SCRIPT.sh`. We talk about doing this for INSTALL.sh and start_kv_service.sh in the Installation guide, but it will need to be done for any shell file you want to run. For issues with sed, instead run and `dos2unix YOUR_SHELL_SCRIPT.sh` -### Windows - Linux Mismatch +### Socket Closed +We found that this is likely an issue of WSL not being allocated enough resources. -If you ran the command above on Windows/Ubuntu, and you recieved at least one error message, you're cooked. I've spent the last few hours trying to debug and figure out what happened, and I THINK this is because of a mismatch between the way Windows and Linux/Mac handle newlines vs carraige returns. As I understand it, `INSTALL.sh` effectively adds an unncessary carraige return character (\r) to the end of every line, which messes with file names. +(Windows Only) Shut off your WSL (`wsl --shutdown`). Navigate to %UserProfile%/.wslconfig, and replace the text in that file with the following: -#### The Main Fix -To clean this up, we need to make the install file Bash friendly again. To do this, I ran both `sed -i 's/\r//g' INSTALL.sh` (a simple character replacement program) and `dos2unix your_script.sh` (an advanced program built for this exact purpose, you may have to install it with apt). Your miles may vary, and likely this will work with only one of the two commands. +``` +[wsl2] +memory=6GB +processors=4 + +``` + +(or as close as you can get, in accordance with your device's capabilities) -After this, try to run `sudo sh INSTALL.sh` again and see if you get any of the following errors: -#### Missing Bazel Version +### Missing Bazel Version This looks something like `(specified in /mnt/c/Users/username/Desktop/indexers-ECS265-Fall2025/.bazelversion), but it wasn't found in /usr/bin.` -This should go away if you delete this .bazelversion in your `indexers-ECS265-Fall2025` folder +This goes away if you delete the .bazelversion in your `indexers-ECS265-Fall2025` folder. The file should no longer be there. -#### Invalid Filename Extension + +### Invalid Filename Extension This looks something like `Ignoring file 'bazel.list ' in directory '/etc/apt/sources.list.d/' as it has an invalid filename extension` -I believe this happens becuase bazel (the build tool used by ResDB) tries to save it's version into the file `bazel.list\r`, accidentally adding a cairrage return to the end. +This happens becuase bazel (the build tool used by ResDB) tries to save it's version into the file `bazel.list\r`, accidentally adding a cairrage return to the end. This is a common problem when running INSTALL.sh without deleting the cairrage returns with sed. + +To fix this, go to the repository it listed `cd /etc/apt/sources.list.d`, and look at the files with `ls`. You should see two: `bazel.list` and another weirdly formatted one (it came out as `bazel.list^M` for me). Remove them both using `sudo rm`. You'll need to run INSTALL.sh again after this step. + + +### Problems with /root/.cache/bazel +Sometimes when running `start_kv_service.sh`, you may face this problem. It encompasses any errors with /.cache/bazel. We _think_ it's a side-effect of trying to run the script with the wrong version of gcc/g++, causing bazel to cache incorrect information, and use that information even when later operating with gcc/g++ versions. -To fix this, go to the repository it listed `cd /etc/apt/sources.list.d`, and look at the files with `ls`. You should see two: `bazel.list` and another weirdly formatted one (it came out as `bazel.list^M` for me). Remove them both using `sudo rm`. +First, from the top-level of your repo, run `bazel clean`. Then, remove the entire bazel folder from the cache, `sudo rm -r /root/.cache/bazel` (in some cases, this may be in your home/user directory, run the command in accordance with wherever your error is reporting from). Please make sure your gcc/g++ distribution is the correct version after this (the step below this one). -#### Using the right version of gcc and g++ -If the install script runs, but the start_kv service tool starts to display a ton of errors with what looks like c++ code, you most likely need to change the version of c++ that bazel is using. +Typically, the /.cache/ is safe to delete from, but we don't understand it's function very well. If you have concerns, look into the purpose of the cache on your own time and determine your willingness to perform this step. + + +### Using the right version of gcc and g++ +If the install script runs, but the start_kv service tool starts to display a ton of errors with what looks like c++ code, you most likely need to change the version of gcc/g++ that bazel is using. **Basic Fix** Yoshiki found that gcc and g++ 12 work best with Resilient DB. If you're getting these errors, run the following commands: @@ -70,22 +122,28 @@ This alone didn't work for me, and I needed to tell my whole Ubuntu distribution Run these commands to let your device know that version 12 is a usable version of gcc. Note that you need to run `sudo apt install gcc-12 g++-12` to get them on your device first. -`sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 90` -`sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 90` +``` +sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 90 +sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 90 +``` Note that the last two numbers (90) are an arbitrary priority value. You can set them to whatever you want. Now run both commands: -`sudo update-alternatives --config gcc` -`sudo update-alternatives --config g++` +``` +sudo update-alternatives --config gcc +sudo update-alternatives --config g++ +``` Both should bring up an list of allowed versions of gcc/g++, with a pointer pointing to your Ubuntu's default version of each. For both, select the number associated with version 12. Verify that you're now using version 12 for both: -`gcc --version` -`g++ --version` +``` +gcc --version +g++ --version +``` Re-running the INSTALL and kv_start scripts should work now. From c2e2dbe59251cd60895659c6b27000a961d36883 Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Fri, 14 Nov 2025 14:53:01 -0800 Subject: [PATCH 09/79] Better markdown formatting for README --- README.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b30824559..472ca2f3e 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ ## Running the Indexing Project TODO +\ ## ResilientDB Installation Forked from [this repository](https://github.com/apache/incubator-resilientdb), for more complex setup instructions, please head there. @@ -60,11 +61,14 @@ sudo sed -i 's/\r//g' ./service/tools/kv/server_tools/start_kv_service.sh If you're starting from step 1, you'll more likely than not run into bugs. Here are a list of ones we've come across and their fixes: +\ ## ResilientDB Installation Bugs ### Carriage returns & running shell files on Windows For Windows (and mac?) users, we need to make bash files friendly for your OS. To do this, we can just run a simple character replacement program on any shell files, `sed -i 's/\r//g' YOUR_SHELL_SCRIPT.sh`. We talk about doing this for INSTALL.sh and start_kv_service.sh in the Installation guide, but it will need to be done for any shell file you want to run. For issues with sed, instead run and `dos2unix YOUR_SHELL_SCRIPT.sh` +\ + ### Socket Closed We found that this is likely an issue of WSL not being allocated enough resources. @@ -79,12 +83,14 @@ processors=4 (or as close as you can get, in accordance with your device's capabilities) +\ ### Missing Bazel Version This looks something like `(specified in /mnt/c/Users/username/Desktop/indexers-ECS265-Fall2025/.bazelversion), but it wasn't found in /usr/bin.` This goes away if you delete the .bazelversion in your `indexers-ECS265-Fall2025` folder. The file should no longer be there. +\ ### Invalid Filename Extension This looks something like `Ignoring file 'bazel.list ' in directory '/etc/apt/sources.list.d/' as it has an invalid filename extension` @@ -93,6 +99,7 @@ This happens becuase bazel (the build tool used by ResDB) tries to save it's ver To fix this, go to the repository it listed `cd /etc/apt/sources.list.d`, and look at the files with `ls`. You should see two: `bazel.list` and another weirdly formatted one (it came out as `bazel.list^M` for me). Remove them both using `sudo rm`. You'll need to run INSTALL.sh again after this step. +\ ### Problems with /root/.cache/bazel Sometimes when running `start_kv_service.sh`, you may face this problem. It encompasses any errors with /.cache/bazel. We _think_ it's a side-effect of trying to run the script with the wrong version of gcc/g++, causing bazel to cache incorrect information, and use that information even when later operating with gcc/g++ versions. @@ -101,11 +108,12 @@ First, from the top-level of your repo, run `bazel clean`. Then, remove the enti Typically, the /.cache/ is safe to delete from, but we don't understand it's function very well. If you have concerns, look into the purpose of the cache on your own time and determine your willingness to perform this step. +\ ### Using the right version of gcc and g++ If the install script runs, but the start_kv service tool starts to display a ton of errors with what looks like c++ code, you most likely need to change the version of gcc/g++ that bazel is using. -**Basic Fix** +**Basic Fix**\ Yoshiki found that gcc and g++ 12 work best with Resilient DB. If you're getting these errors, run the following commands: ``` @@ -117,7 +125,7 @@ bazel clean re-run the INSTALL script, then try to run the start_kv service -**Advanced Fix** +**Advanced Fix**\ This alone didn't work for me, and I needed to tell my whole Ubuntu distribution to use gcc-12 and g++-12 by default. Run these commands to let your device know that version 12 is a usable version of gcc. Note that you need to run `sudo apt install gcc-12 g++-12` to get them on your device first. From 7076ad79661a71f623ef5df6aaa66ea91f5c6dc5 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 12:57:18 -0800 Subject: [PATCH 10/79] Add installation instructions for Bazel 6.0.0 Added instructions for installing Bazel 6.0.0 and reverting from newer versions. --- README.md | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 472ca2f3e..d8b8ebb54 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,33 @@ This looks something like `(specified in /mnt/c/Users/username/Desktop/indexers- This goes away if you delete the .bazelversion in your `indexers-ECS265-Fall2025` folder. The file should no longer be there. +This project strictly requires **Bazel 6.0.0**. If you have accidentally upgraded to a newer version (e.g., via `apt upgrade`) and need to revert it manually, follow these steps: + +1. **Remove the current Bazel version** + ```bash + sudo apt-get remove bazel + # If previously installed manually, remove the binary directly: + # sudo rm /usr/local/bin/bazel + ``` + +2. **Download Bazel 6.0.0** + ```bash + wget [https://github.com/bazelbuild/bazel/releases/download/6.0.0/bazel-6.0.0-linux-x86_64](https://github.com/bazelbuild/bazel/releases/download/6.0.0/bazel-6.0.0-linux-x86_64) -O bazel + ``` + +3. **Install to system path** + ```bash + chmod +x bazel + sudo mv bazel /usr/local/bin/bazel + ``` + +4. **Refresh shell cache and verify** + It is important to clear the shell's location cache so it finds the new binary. + ```bash + hash -r + bazel --version + # Output should be: bazel 6.0.0 + ``` \ ### Invalid Filename Extension @@ -155,4 +182,4 @@ g++ --version Re-running the INSTALL and kv_start scripts should work now. -This took me a couple tries to get right, and mistakes with `update-alternatives` were tough to recover from. Uninstalling WSL/Ubuntu then reinstalling it fresh always gets a fresh version of gcc / g++ that works again. Note that this will remove everything in your _Ubuntu_ distro (not everything on your computer) \ No newline at end of file +This took me a couple tries to get right, and mistakes with `update-alternatives` were tough to recover from. Uninstalling WSL/Ubuntu then reinstalling it fresh always gets a fresh version of gcc / g++ that works again. Note that this will remove everything in your _Ubuntu_ distro (not everything on your computer) From 8a2177831c6b93198a7d4569b563f47a299b8cdf Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 15:42:33 -0800 Subject: [PATCH 11/79] Add instructions for running ResDB-ORM Added instructions on how to run ResDB-ORM, including steps to start backend services and configure the environment. --- README.md | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d8b8ebb54..777ef0509 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ 1. [Running the Indexing Project](#Running-the-Indexing-Project) 2. [ResilientDB Installation](#ResilientDB-Installation) 3. [ResilientDB Installation Bugs](#ResilientDB-Installation-Bugs) - +4. [How to Run ResDB-ORM](#How-to-Run-ResDB-ORM) ## Running the Indexing Project TODO @@ -183,3 +183,55 @@ g++ --version Re-running the INSTALL and kv_start scripts should work now. This took me a couple tries to get right, and mistakes with `update-alternatives` were tough to recover from. Uninstalling WSL/Ubuntu then reinstalling it fresh always gets a fresh version of gcc / g++ that works again. Note that this will remove everything in your _Ubuntu_ distro (not everything on your computer) + +## How to Run ResDB-ORM + +To run ResDB-ORM, you must first start the backend services (**KV Service** and **GraphQL Server**) and then connect to them using **ResDB-ORM**. + +### Step 1: Start the KV Service +Run the following script in your ResilientDB directory: +```bash +./service/tools/kv/server_tools/start_kv_service.sh +``` + +### Step 2: Start the GraphQL Server +Open a new terminal tab, then setup and start the GraphQL server: +(1) Clone the repository and navigate into it: +```bash +git clone [https://github.com/apache/incubator-resilientdb-graphql.git](https://github.com/apache/incubator-resilientdb-graphql.git) +cd incubator-resilientdb-graphql +``` +(2) Create and activate a virtual environment: +```bash +python3.10 -m venv venv +source venv/bin/activate +``` +(3) Build and run the service: +```bash +bazel build service/http_server:crow_service_main +bazel-bin/service/http_server/crow_service_main service/tools/config/interface/service.config service/http_server/server_config.config +``` +***Important:*** Check the first line of the startup log and copy the displayed URL (e.g., ```http://0.0.0.0:18000```). You will need this for the configuration step. + +### Step 3: Clone ResDB-ORM repository and install dependencies: +Open another new terminal tab to set up the ORM and verify the operation. +```bash +git clone [https://github.com/apache/incubator-resilientdb-ResDB-ORM.git](https://github.com/apache/incubator-resilientdb-ResDB-ORM.git) +cd ResDB-ORM + +python3.10 -m venv venv +source venv/bin/activate + +pip install -r requirements.txt +``` + +### Step 4: Open ```config.yaml``` and update the db_root_url with the GraphQL Server URL you copied in the previous step. +```yaml +database: + db_root_url: +``` + +### Step 5: Run the test script to ensure everything is working correctly: +```bash +python tests/test.py +``` From c0d035b3c3400102b7111839f3b121f8f0d8d36c Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 15:43:07 -0800 Subject: [PATCH 12/79] Update Step 4 reference to match previous steps --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 777ef0509..1ece4c041 100644 --- a/README.md +++ b/README.md @@ -225,7 +225,7 @@ source venv/bin/activate pip install -r requirements.txt ``` -### Step 4: Open ```config.yaml``` and update the db_root_url with the GraphQL Server URL you copied in the previous step. +### Step 4: Open ```config.yaml``` and update the db_root_url with the GraphQL Server URL you copied in Step 2. ```yaml database: db_root_url: From d1dec348e1e13904c140ad2949801fdc064286e7 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 16:31:16 -0800 Subject: [PATCH 13/79] Fix wget command formatting in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1ece4c041..1d07740b6 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ This project strictly requires **Bazel 6.0.0**. If you have accidentally upgrade 2. **Download Bazel 6.0.0** ```bash - wget [https://github.com/bazelbuild/bazel/releases/download/6.0.0/bazel-6.0.0-linux-x86_64](https://github.com/bazelbuild/bazel/releases/download/6.0.0/bazel-6.0.0-linux-x86_64) -O bazel + wget https://github.com/bazelbuild/bazel/releases/download/6.0.0/bazel-6.0.0-linux-x86_64 -O bazel ``` 3. **Install to system path** From 62028c01766868e0f407d5dd86a1f4dd304b51cd Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 16:38:11 -0800 Subject: [PATCH 14/79] Fix git clone commands in README Removed markdown links from git clone commands in README. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1d07740b6..162b78adf 100644 --- a/README.md +++ b/README.md @@ -198,7 +198,7 @@ Run the following script in your ResilientDB directory: Open a new terminal tab, then setup and start the GraphQL server: (1) Clone the repository and navigate into it: ```bash -git clone [https://github.com/apache/incubator-resilientdb-graphql.git](https://github.com/apache/incubator-resilientdb-graphql.git) +git clone https://github.com/apache/incubator-resilientdb-graphql.git cd incubator-resilientdb-graphql ``` (2) Create and activate a virtual environment: @@ -216,7 +216,7 @@ bazel-bin/service/http_server/crow_service_main service/tools/config/interface/s ### Step 3: Clone ResDB-ORM repository and install dependencies: Open another new terminal tab to set up the ORM and verify the operation. ```bash -git clone [https://github.com/apache/incubator-resilientdb-ResDB-ORM.git](https://github.com/apache/incubator-resilientdb-ResDB-ORM.git) +git clone https://github.com/apache/incubator-resilientdb-ResDB-ORM.git cd ResDB-ORM python3.10 -m venv venv From b0806cfd5c36f4c00df327aa72869da23dd0f311 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 16:39:13 -0800 Subject: [PATCH 15/79] Fix formatting in README for virtual environment setup --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 162b78adf..34a61ce1d 100644 --- a/README.md +++ b/README.md @@ -204,7 +204,6 @@ cd incubator-resilientdb-graphql (2) Create and activate a virtual environment: ```bash python3.10 -m venv venv -source venv/bin/activate ``` (3) Build and run the service: ```bash From e9edddb3cd1fa315e1a2167d24f4bb1d4d042517 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 16:48:36 -0800 Subject: [PATCH 16/79] Add resdb-orm installation to README Added installation instruction for resdb-orm package. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 34a61ce1d..72c012a89 100644 --- a/README.md +++ b/README.md @@ -222,6 +222,7 @@ python3.10 -m venv venv source venv/bin/activate pip install -r requirements.txt +pip install resdb-orm ``` ### Step 4: Open ```config.yaml``` and update the db_root_url with the GraphQL Server URL you copied in Step 2. From 88bed2b244ad2e1da1877628c3d0d4747daba98c Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 16:53:55 -0800 Subject: [PATCH 17/79] Fix clone command directory in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 72c012a89..4107aa184 100644 --- a/README.md +++ b/README.md @@ -216,7 +216,7 @@ bazel-bin/service/http_server/crow_service_main service/tools/config/interface/s Open another new terminal tab to set up the ORM and verify the operation. ```bash git clone https://github.com/apache/incubator-resilientdb-ResDB-ORM.git -cd ResDB-ORM +cd incubator-resilientdb-ResDB-ORM python3.10 -m venv venv source venv/bin/activate From 8deaef860770b06591ea5456a49f3bf8d5e17e06 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 17:14:00 -0800 Subject: [PATCH 18/79] Update README for virtual environment instructions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4107aa184..9faab7c8d 100644 --- a/README.md +++ b/README.md @@ -201,7 +201,7 @@ Open a new terminal tab, then setup and start the GraphQL server: git clone https://github.com/apache/incubator-resilientdb-graphql.git cd incubator-resilientdb-graphql ``` -(2) Create and activate a virtual environment: +(2) Create a virtual environment: ```bash python3.10 -m venv venv ``` From 7f41ddc4b848d10e1ae5f296914a77d79312db54 Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Thu, 20 Nov 2025 12:27:31 +0900 Subject: [PATCH 19/79] Add configuration, indexing, and searching functionality for ResDB-ORM --- hnsw_orm_test/config.py | 33 +++++++ hnsw_orm_test/indexer.py | 102 ++++++++++++++++++++ hnsw_orm_test/populate.py | 51 ++++++++++ hnsw_orm_test/search.py | 72 ++++++++++++++ leann_resdb_tiny/id_mapping.json | 72 ++++++++++++++ leann_resdb_tiny/resdb.ids.txt | 14 +++ leann_resdb_tiny/resdb.index | Bin 0 -> 1314 bytes leann_resdb_tiny/resdb.leann.meta.json | 21 ++++ leann_resdb_tiny/resdb.leann.passages.idx | Bin 0 -> 115 bytes leann_resdb_tiny/resdb.leann.passages.jsonl | 14 +++ 10 files changed, 379 insertions(+) create mode 100644 hnsw_orm_test/config.py create mode 100644 hnsw_orm_test/indexer.py create mode 100644 hnsw_orm_test/populate.py create mode 100644 hnsw_orm_test/search.py create mode 100644 leann_resdb_tiny/id_mapping.json create mode 100644 leann_resdb_tiny/resdb.ids.txt create mode 100644 leann_resdb_tiny/resdb.index create mode 100644 leann_resdb_tiny/resdb.leann.meta.json create mode 100644 leann_resdb_tiny/resdb.leann.passages.idx create mode 100644 leann_resdb_tiny/resdb.leann.passages.jsonl diff --git a/hnsw_orm_test/config.py b/hnsw_orm_test/config.py new file mode 100644 index 000000000..fc9ba1462 --- /dev/null +++ b/hnsw_orm_test/config.py @@ -0,0 +1,33 @@ +import os +from pathlib import Path + +# --- Environment Settings --- +# Path to the ResDB-ORM configuration file (Auto-detection) +RESDB_CONFIG_PATH = Path.home() / "ResDB-ORM" / "config.yaml" +if not RESDB_CONFIG_PATH.exists(): + potential_local_path = Path("config.yaml").resolve() + if potential_local_path.exists(): + RESDB_CONFIG_PATH = potential_local_path + else: + env_path = os.getenv("RESDB_CONFIG_FILE") + if env_path: + RESDB_CONFIG_PATH = Path(env_path) + +# --- Model Settings (Lightweight!) --- +# prajjwal1/bert-tiny: Approx. 17MB, 128 dimensions +MODEL_NAME = "prajjwal1/bert-tiny" + +# --- Directory Settings --- +# Directory where data will be saved +BASE_DIR = Path("./leann_resdb_tiny").resolve() +BASE_DIR.mkdir(parents=True, exist_ok=True) + +# Path to the index (Logical path) +# In reality, files like resdb.index will be generated +INDEX_PATH = BASE_DIR / "resdb.leann" +MAPPING_PATH = BASE_DIR / "id_mapping.json" + +# Polling interval (seconds) +POLL_INTERVAL = 5 + +print(f"Config: Using model '{MODEL_NAME}' at {BASE_DIR}") \ No newline at end of file diff --git a/hnsw_orm_test/indexer.py b/hnsw_orm_test/indexer.py new file mode 100644 index 000000000..d697d3357 --- /dev/null +++ b/hnsw_orm_test/indexer.py @@ -0,0 +1,102 @@ +import time +import json +import os +import gc +from resdb_orm.orm import ResDBORM +from leann import LeannBuilder +import config + +def main(): + # Limit parallelism for memory saving and determinism + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["TOKENIZERS_PARALLELISM"] = "false" + + print(f"Indexer started with lightweight model: {config.MODEL_NAME}") + print(f"Watching ResilientDB ({config.RESDB_CONFIG_PATH})...") + + try: + db = ResDBORM(config_path=str(config.RESDB_CONFIG_PATH)) + except Exception as e: + print(f"DB Connection failed: {e}") + return + + last_count = 0 + + while True: + try: + # 1. Fetch all data + all_txs = db.read_all() + + if not all_txs: + print(" No data yet. Waiting...") + time.sleep(config.POLL_INTERVAL) + continue + + current_count = len(all_txs) + + # 2. Change detection (Simple implementation) + if current_count > last_count: + print(f"\nDetected change: {last_count} -> {current_count} records.") + + # Sort by ID for determinism + sorted_txs = sorted(all_txs, key=lambda x: str(x.get('id', ''))) + + docs_text = [] + docs_mapping = [] + + for tx in sorted_txs: + try: + data = tx.get('data', {}) + if isinstance(data, str): + try: data = json.loads(data) + except: data = {"text": data} + + text = data.get('text', '') + if text: + docs_text.append(text) + docs_mapping.append({ + "resdb_id": str(tx['id']), + "original_key": data.get('original_key', 'unknown'), + "preview": text[:60] + }) + except Exception: + continue + + if docs_text: + print(f"Rebuilding index for {len(docs_text)} documents...") + + # 3. Build index (Specify lightweight model) + builder = LeannBuilder( + backend_name="hnsw", + model=config.MODEL_NAME + ) + + for t in docs_text: + builder.add_text(t) + + builder.build_index(str(config.INDEX_PATH)) + + # Save mapping + with open(config.MAPPING_PATH, 'w') as f: + json.dump(docs_mapping, f, indent=2) + + print(f"✅ Index updated at {config.INDEX_PATH}") + last_count = current_count + + # Free memory (Important) + del builder + gc.collect() + else: + print("No valid text documents found.") + + else: + # No changes + pass + + except Exception as e: + print(f"Error in polling loop: {e}") + + time.sleep(config.POLL_INTERVAL) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/hnsw_orm_test/populate.py b/hnsw_orm_test/populate.py new file mode 100644 index 000000000..0b15a5b72 --- /dev/null +++ b/hnsw_orm_test/populate.py @@ -0,0 +1,51 @@ +import time +from resdb_orm.orm import ResDBORM +import config # Import common settings + +# Data to populate (Hash map) +data_map = { + "doc1": "LEANN saves 97% storage compared to traditional vector databases.", + "doc2": "Tung Tung Tung Sahur called—they need their banana-crocodile hybrid back", + "doc3": "The weather in Davis is sunny today.", + "doc4": "Understanding consensus protocols is key for blockchain.", + "doc5": "ResilientDB is a high-throughput blockchain fabric designed for performance.", + "doc6": "This project explores novel techniques for sharding in distributed ledgers.", + "doc7": "DeFi applications are often built on top of smart contracts.", + "doc8": "Practical Byzantine Fault Tolerance (PBFT) is a foundational agreement protocol.", + "doc9": "Cross-chain communication enables interoperability between different blockchains.", + "doc10": "The project requires using the ResilientDB Fabric unless approved otherwise.", + "doc11": "Mitochondria are the powerhouse of the cell.", + "doc12": "How to bake a perfect sourdough bread with a starters.", + "doc13": "The final report must be written in LaTeX using ACM templates.", + "doc14": "UC Davis is known for its agricultural studies." +} + +def main(): + print(f"Connecting to ResilientDB via {config.RESDB_CONFIG_PATH}...") + try: + db = ResDBORM(config_path=str(config.RESDB_CONFIG_PATH)) + except Exception as e: + print(f"Connection failed: {e}") + return + + print(f"Starting ingestion of {len(data_map)} documents...") + + for key, text in data_map.items(): + # Insert in a format easy for the indexer to recognize + payload = { + "text": text, + "original_key": key, + "type": "vector_source" + } + + try: + tx_id = db.create(payload) + print(f"Stored '{key}': {tx_id}") + time.sleep(0.2) # Short sleep to reduce load + except Exception as e: + print(f"Failed to store {key}: {e}") + + print("\n Data population complete!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/hnsw_orm_test/search.py b/hnsw_orm_test/search.py new file mode 100644 index 000000000..36cb6010f --- /dev/null +++ b/hnsw_orm_test/search.py @@ -0,0 +1,72 @@ +import json +import sys +import os +from leann import LeannSearcher +import config + +def main(): + # Check index existence (Check actual existence of .index file) + real_index_file = config.INDEX_PATH.with_suffix(".index") + + if not real_index_file.exists() or not config.MAPPING_PATH.exists(): + print(f"Index not found at {real_index_file}") + print("Please run 'python populate.py' then 'python indexer.py' first.") + return + + print(f"Loading index from {config.INDEX_PATH}...") + print(f"Using model: {config.MODEL_NAME}") + + try: + # Initialize searcher specifying the lightweight model + searcher = LeannSearcher( + str(config.INDEX_PATH), + model=config.MODEL_NAME + ) + + with open(config.MAPPING_PATH, 'r') as f: + id_mapping = json.load(f) + + print(f"Loaded {len(id_mapping)} documents mapping.") + except Exception as e: + print(f"Failed to load index: {e}") + return + + print("\n=== LEANN (bert-tiny) x ResilientDB Search CLI ===") + print("Type 'exit' to quit.") + + while True: + try: + query = input("\nSearch Query: ").strip() + if not query: continue + if query.lower() in ['exit', 'quit']: break + + # Execute search + k = min(3, len(id_mapping)) + results = searcher.search(query, top_k=k) + + if not results: + print("No results found.") + continue + + print(f"Results for: '{query}'") + for rank, res in enumerate(results, 1): + try: + leann_id = int(res.id) + if 0 <= leann_id < len(id_mapping): + info = id_mapping[leann_id] + score = float(res.score) + + print(f" #{rank} [Score: {score:.4f}]") + print(f" Key: {info['original_key']} (ResDB ID: {info['resdb_id']})") + print(f" Text: {info['preview']}...") + except ValueError: + continue + + except KeyboardInterrupt: + print("\nBye!") + break + except Exception as e: + print(f"Search error: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/leann_resdb_tiny/id_mapping.json b/leann_resdb_tiny/id_mapping.json new file mode 100644 index 000000000..814dcd3f2 --- /dev/null +++ b/leann_resdb_tiny/id_mapping.json @@ -0,0 +1,72 @@ +[ + { + "resdb_id": "0816b075f95122ef94c1ca1dca6a0fd65a45b23a5e6dd58f1cd4e8bdf298a47b", + "original_key": "doc12", + "preview": "How to bake a perfect sourdough bread with a starters." + }, + { + "resdb_id": "28bd1cff1b6a76325510f0a730922dbe81c031b6c18f62f8ae16ac64a56f757b", + "original_key": "doc13", + "preview": "The final report must be written in LaTeX using ACM template" + }, + { + "resdb_id": "2ef425dcb6c5511fad166226e5a039d1eff24f90abef214c5cbda5ff70f51377", + "original_key": "doc7", + "preview": "DeFi applications are often built on top of smart contracts." + }, + { + "resdb_id": "2fbfc533187926b3e57ae4f940c2bc7fea3d2b258ddb1a5da05a7bfc9a63bcc4", + "original_key": "doc5", + "preview": "ResilientDB is a high-throughput blockchain fabric designed " + }, + { + "resdb_id": "4671c1dc0f916696ae7c335e0e02c5986a25af3f135f329189c1699f3fca9c63", + "original_key": "doc6", + "preview": "This project explores novel techniques for sharding in distr" + }, + { + "resdb_id": "4736ac2450fcc29b088ae502818013129e9cc451f431c1d1172545af25e2e66e", + "original_key": "doc11", + "preview": "Mitochondria are the powerhouse of the cell." + }, + { + "resdb_id": "4b5b570779d5f5e9b28ab006c1f92b0165a1781281e85f90961dc380696b4ae3", + "original_key": "doc10", + "preview": "The project requires using the ResilientDB Fabric unless app" + }, + { + "resdb_id": "5118a58ea41404af80359dd9a360aa003ded18aad6979ed92dec66b6e6919999", + "original_key": "doc3", + "preview": "The weather in Davis is sunny today." + }, + { + "resdb_id": "7f10032c91627925177885688816876176c0d6ef458ac28424b5a7f7d72b4794", + "original_key": "doc9", + "preview": "Cross-chain communication enables interoperability between d" + }, + { + "resdb_id": "840fdb63fe531faee5472da9e468d5bd265e874d733ffeae5a9fd008b651cff7", + "original_key": "doc2", + "preview": "Tung Tung Tung Sahur called\u2014they need their banana-crocodile" + }, + { + "resdb_id": "af27a1fe2b5229ca5026bce5f2b40d1f33bbbfea4251f2361b8854d330a9c871", + "original_key": "doc4", + "preview": "Understanding consensus protocols is key for blockchain." + }, + { + "resdb_id": "c378d5248bd1d346674c0533178a31e726c5358b881af95c96430c4b74444c80", + "original_key": "doc8", + "preview": "Practical Byzantine Fault Tolerance (PBFT) is a foundational" + }, + { + "resdb_id": "d7b3a8c84dc8d896b9d9675b4201224aa09ca816eee5eb7c5f7183d3c7bf3dd1", + "original_key": "doc14", + "preview": "UC Davis is known for its agricultural studies." + }, + { + "resdb_id": "e48a7190eb7c3e642d8fc1e6c0c14ad66735103ea09642e027816532a46c084f", + "original_key": "doc1", + "preview": "LEANN saves 97% storage compared to traditional vector datab" + } +] \ No newline at end of file diff --git a/leann_resdb_tiny/resdb.ids.txt b/leann_resdb_tiny/resdb.ids.txt new file mode 100644 index 000000000..6a68f6874 --- /dev/null +++ b/leann_resdb_tiny/resdb.ids.txt @@ -0,0 +1,14 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 diff --git a/leann_resdb_tiny/resdb.index b/leann_resdb_tiny/resdb.index new file mode 100644 index 0000000000000000000000000000000000000000..6e6a406b29f2e82a4f088b4b0ee587ab26590ec5 GIT binary patch literal 1314 zcmbV~Jx)SV5QPUsQSdLu7-Q(&05xi-4J^PyDq}(HUjO0-+<=L;RCd&qEQ1}m1f37h zNnXQ`-ehv_d3Vm4xpUv$ZT~U0A`ZjrSesw0!FGJMz^|)>GU6SaM+}7DJ$NlX!D&4n zdd=K;UFT+)QD%H(Of%*gE=-|+MJI*Tbs%EA5I0_m8@uAh8*yV_+;}f;d=fVn;>H(o zv_)d91kArxfqSy zA9eRK+8N~1AGP!63VNYWeh75i8T3m&IeaINg$^%#(BXyF8gg9&4QJ$&ix-}(Y8~gh z2iMp{Lk((fsv(cs?u-6?$J{RZqb|P8fHUq!-_+rZ`>^O6KRoP-j_>%h_pH~=pgK1^ ztzR_*^3eGXXsGQTsn4p;+WBb7CC9T^_CEN#AM|L~-N)YK`9Ap28y?I4gTL1K0cm9+ AEdT%j literal 0 HcmV?d00001 diff --git a/leann_resdb_tiny/resdb.leann.meta.json b/leann_resdb_tiny/resdb.leann.meta.json new file mode 100644 index 000000000..e0db38a7d --- /dev/null +++ b/leann_resdb_tiny/resdb.leann.meta.json @@ -0,0 +1,21 @@ +{ + "version": "1.0", + "backend_name": "hnsw", + "embedding_model": "facebook/contriever", + "dimensions": 768, + "backend_kwargs": { + "model": "prajjwal1/bert-tiny" + }, + "embedding_mode": "sentence-transformers", + "passage_sources": [ + { + "type": "jsonl", + "path": "resdb.leann.passages.jsonl", + "index_path": "resdb.leann.passages.idx", + "path_relative": "resdb.leann.passages.jsonl", + "index_path_relative": "resdb.leann.passages.idx" + } + ], + "is_compact": true, + "is_pruned": true +} \ No newline at end of file diff --git a/leann_resdb_tiny/resdb.leann.passages.idx b/leann_resdb_tiny/resdb.leann.passages.idx new file mode 100644 index 0000000000000000000000000000000000000000..a2e43c550c379bcdbb74e01100c7d0d93c7fd899 GIT binary patch literal 115 zcmZo*nVP`>0ku;!dKe9+cr)}c8cy+!>tQsS;(ert(Rhll24fGS$rRr?K+1HAFF#Wc zquCVSWFTce#rHapvY6tl%-q9h2~@(|!(?bM#g~r-NE=S^%?Hs&Q+!{5XrM_BtfhJY DJ4+y` literal 0 HcmV?d00001 diff --git a/leann_resdb_tiny/resdb.leann.passages.jsonl b/leann_resdb_tiny/resdb.leann.passages.jsonl new file mode 100644 index 000000000..85cec1ce8 --- /dev/null +++ b/leann_resdb_tiny/resdb.leann.passages.jsonl @@ -0,0 +1,14 @@ +{"id": "0", "text": "How to bake a perfect sourdough bread with a starters.", "metadata": {}} +{"id": "1", "text": "The final report must be written in LaTeX using ACM templates.", "metadata": {}} +{"id": "2", "text": "DeFi applications are often built on top of smart contracts.", "metadata": {}} +{"id": "3", "text": "ResilientDB is a high-throughput blockchain fabric designed for performance.", "metadata": {}} +{"id": "4", "text": "This project explores novel techniques for sharding in distributed ledgers.", "metadata": {}} +{"id": "5", "text": "Mitochondria are the powerhouse of the cell.", "metadata": {}} +{"id": "6", "text": "The project requires using the ResilientDB Fabric unless approved otherwise.", "metadata": {}} +{"id": "7", "text": "The weather in Davis is sunny today.", "metadata": {}} +{"id": "8", "text": "Cross-chain communication enables interoperability between different blockchains.", "metadata": {}} +{"id": "9", "text": "Tung Tung Tung Sahur called—they need their banana-crocodile hybrid back", "metadata": {}} +{"id": "10", "text": "Understanding consensus protocols is key for blockchain.", "metadata": {}} +{"id": "11", "text": "Practical Byzantine Fault Tolerance (PBFT) is a foundational agreement protocol.", "metadata": {}} +{"id": "12", "text": "UC Davis is known for its agricultural studies.", "metadata": {}} +{"id": "13", "text": "LEANN saves 97% storage compared to traditional vector databases.", "metadata": {}} From 60e5802d46016b3c02de29ace2efa91d025ddab1 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 19:47:48 -0800 Subject: [PATCH 20/79] Create README for ResilientDB x LEANN integration Added README.md for ResilientDB x LEANN integration, detailing file structure, prerequisites, installation, usage guide, configuration notes, and troubleshooting steps. --- hnsw_orm_test/README.md | 66 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 hnsw_orm_test/README.md diff --git a/hnsw_orm_test/README.md b/hnsw_orm_test/README.md new file mode 100644 index 000000000..09986832e --- /dev/null +++ b/hnsw_orm_test/README.md @@ -0,0 +1,66 @@ +# ResilientDB x LEANN Vector Search Integration +Add something... + +## File Structure +- config.py: Common configuration (paths, model selection). +- indexer.py: A background service that watches ResilientDB transactions and builds the vector index. +- populate.py: A script to insert sample data into ResilientDB. +- search.py: A client CLI tool to perform semantic search using the generated index. + +## Prerequisites +- ResilientDB KV Service must be running. Ensure you can access http://localhost:18000 (or your configured port). +- Python 3.10 +- ResDB-ORM Virtual Environment + +## Installation +Once the ResDB-ORM virtual environment is activated, ensure dependencies are installed: + +```Bash +(venv) pip install resdb-orm leann +``` + +## Usage Guide +To see the system in action, you need to run the scripts in a specific order using two separate terminal windows. + +***REMINDER: Activate the venv in BOTH terminals!*** + +Terminal 1: Start the Indexer +This process needs to run continuously to monitor the blockchain and update the index. + +```Bash +# 1. Activate venv +source ~/ResDB-ORM/venv/bin/activate + +# 2. Run Indexer +(venv) python indexer.py +``` +***Keep this terminal open!*** + +## Terminal 2: Insert Data & Search +### Step 1: Populate Data +Run this script to write sample documents into ResilientDB. +```Bash +# 1. Activate venv +source ~/ResDB-ORM/venv/bin/activate + +# 2. Run Populate +(venv) python populate.py +``` + +### Step 2: Perform Search +Once the indexer confirms the update, you can search the data. +```Bash +(venv) python search.py +``` + +## Interaction: +Enter a query like blockchain consensus or vector search. The system will return the most relevant documents along with their ResilientDB Transaction IDs. + +## Configuration Notes +- Model: By default, this project uses prajjwal1/bert-tiny (128 dim) to ensure low memory usage and stability. You can change this in config.py. +- Determinism: The indexer forces OMP_NUM_THREADS=1 to guarantee that all replicas build the exact same HNSW graph structure. + +## Troubleshooting +- ModuleNotFoundError: If you see this error, you likely forgot to activate the virtual environment. Run source ~/ResDB-ORM/venv/bin/activate. +- process Killed / OOM: If your process gets killed, ensure you are not running indexer.py and search.py simultaneously if your memory is limited (< 8GB). Stop the indexer (Ctrl+C) before running the search. +- Connection Error: Ensure ResilientDB is running (./start_kv_service.sh). From 6a1177e341faebece8fc09c45ef12701bfd067f5 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 19:48:32 -0800 Subject: [PATCH 21/79] Improve README formatting and section headings Updated formatting and headings in the README for clarity. --- hnsw_orm_test/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hnsw_orm_test/README.md b/hnsw_orm_test/README.md index 09986832e..b6ac406b2 100644 --- a/hnsw_orm_test/README.md +++ b/hnsw_orm_test/README.md @@ -24,7 +24,7 @@ To see the system in action, you need to run the scripts in a specific order usi ***REMINDER: Activate the venv in BOTH terminals!*** -Terminal 1: Start the Indexer +### Terminal 1: Start the Indexer This process needs to run continuously to monitor the blockchain and update the index. ```Bash @@ -36,8 +36,8 @@ source ~/ResDB-ORM/venv/bin/activate ``` ***Keep this terminal open!*** -## Terminal 2: Insert Data & Search -### Step 1: Populate Data +### Terminal 2: Insert Data & Search +#### Step 1: Populate Data Run this script to write sample documents into ResilientDB. ```Bash # 1. Activate venv @@ -47,7 +47,7 @@ source ~/ResDB-ORM/venv/bin/activate (venv) python populate.py ``` -### Step 2: Perform Search +#### Step 2: Perform Search Once the indexer confirms the update, you can search the data. ```Bash (venv) python search.py From bb646f625aa9feb5dcb9b45a5b36275c30262342 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 19:49:11 -0800 Subject: [PATCH 22/79] Format commands in troubleshooting section as code Updated troubleshooting section to format commands as code. --- hnsw_orm_test/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hnsw_orm_test/README.md b/hnsw_orm_test/README.md index b6ac406b2..d9b9e674c 100644 --- a/hnsw_orm_test/README.md +++ b/hnsw_orm_test/README.md @@ -61,6 +61,6 @@ Enter a query like blockchain consensus or vector search. The system will return - Determinism: The indexer forces OMP_NUM_THREADS=1 to guarantee that all replicas build the exact same HNSW graph structure. ## Troubleshooting -- ModuleNotFoundError: If you see this error, you likely forgot to activate the virtual environment. Run source ~/ResDB-ORM/venv/bin/activate. +- ModuleNotFoundError: If you see this error, you likely forgot to activate the virtual environment. Run source ```~/ResDB-ORM/venv/bin/activate```. - process Killed / OOM: If your process gets killed, ensure you are not running indexer.py and search.py simultaneously if your memory is limited (< 8GB). Stop the indexer (Ctrl+C) before running the search. -- Connection Error: Ensure ResilientDB is running (./start_kv_service.sh). +- Connection Error: Ensure ResilientDB is running (```./start_kv_service.sh```). From 6ddbbef006e906dec318b280e3bf5f124d9030a0 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 19:49:43 -0800 Subject: [PATCH 23/79] Clarify access instructions for ResilientDB KV Service Updated README to clarify access to ResilientDB KV Service. --- hnsw_orm_test/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hnsw_orm_test/README.md b/hnsw_orm_test/README.md index d9b9e674c..be56d5cf8 100644 --- a/hnsw_orm_test/README.md +++ b/hnsw_orm_test/README.md @@ -8,7 +8,7 @@ Add something... - search.py: A client CLI tool to perform semantic search using the generated index. ## Prerequisites -- ResilientDB KV Service must be running. Ensure you can access http://localhost:18000 (or your configured port). +- ResilientDB KV Service must be running. Ensure you can access your configured port. - Python 3.10 - ResDB-ORM Virtual Environment From ef8fccb9ab916a7062bd7cca00ea175ed3c57805 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 19:54:57 -0800 Subject: [PATCH 24/79] Update README.md for clarity and conciseness Removed interaction instructions and clarified memory limitations in troubleshooting. --- hnsw_orm_test/README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/hnsw_orm_test/README.md b/hnsw_orm_test/README.md index be56d5cf8..c30b0a324 100644 --- a/hnsw_orm_test/README.md +++ b/hnsw_orm_test/README.md @@ -53,14 +53,11 @@ Once the indexer confirms the update, you can search the data. (venv) python search.py ``` -## Interaction: -Enter a query like blockchain consensus or vector search. The system will return the most relevant documents along with their ResilientDB Transaction IDs. - ## Configuration Notes - Model: By default, this project uses prajjwal1/bert-tiny (128 dim) to ensure low memory usage and stability. You can change this in config.py. -- Determinism: The indexer forces OMP_NUM_THREADS=1 to guarantee that all replicas build the exact same HNSW graph structure. +- Determinism: The indexer forces OMP_NUM_THREADS=1 to guarantee that all replicas build the same HNSW graph structure. ## Troubleshooting - ModuleNotFoundError: If you see this error, you likely forgot to activate the virtual environment. Run source ```~/ResDB-ORM/venv/bin/activate```. -- process Killed / OOM: If your process gets killed, ensure you are not running indexer.py and search.py simultaneously if your memory is limited (< 8GB). Stop the indexer (Ctrl+C) before running the search. +- process Killed / OOM: If your process gets killed, ensure you are not running indexer.py and search.py simultaneously if your WSL memory is limited (< 8GB). Stop the indexer (Ctrl+C) before running the search. - Connection Error: Ensure ResilientDB is running (```./start_kv_service.sh```). From 58205aac7f023c253af07c6d084d7281a1ee9fd1 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 19 Nov 2025 23:12:15 -0800 Subject: [PATCH 25/79] Remove checkmark from index update print statement --- hnsw_orm_test/indexer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hnsw_orm_test/indexer.py b/hnsw_orm_test/indexer.py index d697d3357..c69168e8f 100644 --- a/hnsw_orm_test/indexer.py +++ b/hnsw_orm_test/indexer.py @@ -80,7 +80,7 @@ def main(): with open(config.MAPPING_PATH, 'w') as f: json.dump(docs_mapping, f, indent=2) - print(f"✅ Index updated at {config.INDEX_PATH}") + print(f"Index updated at {config.INDEX_PATH}") last_count = current_count # Free memory (Important) @@ -99,4 +99,4 @@ def main(): time.sleep(config.POLL_INTERVAL) if __name__ == "__main__": - main() \ No newline at end of file + main() From 4121b4789f665dff55ff8d4b96c55e555ec0394f Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Fri, 21 Nov 2025 15:57:17 +0900 Subject: [PATCH 26/79] Refactor ResDB configuration path and enhance indexer functionality with error handling and retry logic; add diagnosis tool and update data management scripts. --- hnsw_orm_test/config.py | 4 +- hnsw_orm_test/diagnose_db.py | 66 +++++++++ hnsw_orm_test/indexer.py | 139 ++++++++++-------- .../leann_resdb_tiny/id_mapping.json | 107 ++++++++++++++ hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt | 21 +++ hnsw_orm_test/leann_resdb_tiny/resdb.index | Bin 0 -> 2462 bytes .../leann_resdb_tiny/resdb.leann.meta.json | 21 +++ .../leann_resdb_tiny/resdb.leann.passages.idx | Bin 0 -> 171 bytes .../resdb.leann.passages.jsonl | 21 +++ hnsw_orm_test/manage_data.py | 104 +++++++++++++ hnsw_orm_test/search.py | 139 ++++++++++++------ 11 files changed, 517 insertions(+), 105 deletions(-) create mode 100644 hnsw_orm_test/diagnose_db.py create mode 100644 hnsw_orm_test/leann_resdb_tiny/id_mapping.json create mode 100644 hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt create mode 100644 hnsw_orm_test/leann_resdb_tiny/resdb.index create mode 100644 hnsw_orm_test/leann_resdb_tiny/resdb.leann.meta.json create mode 100644 hnsw_orm_test/leann_resdb_tiny/resdb.leann.passages.idx create mode 100644 hnsw_orm_test/leann_resdb_tiny/resdb.leann.passages.jsonl create mode 100644 hnsw_orm_test/manage_data.py diff --git a/hnsw_orm_test/config.py b/hnsw_orm_test/config.py index fc9ba1462..066ebd97b 100644 --- a/hnsw_orm_test/config.py +++ b/hnsw_orm_test/config.py @@ -3,7 +3,7 @@ # --- Environment Settings --- # Path to the ResDB-ORM configuration file (Auto-detection) -RESDB_CONFIG_PATH = Path.home() / "ResDB-ORM" / "config.yaml" +RESDB_CONFIG_PATH = Path.home() / "incubator-resilientdb-ResDB-ORM" / "config.yaml" if not RESDB_CONFIG_PATH.exists(): potential_local_path = Path("config.yaml").resolve() if potential_local_path.exists(): @@ -28,6 +28,6 @@ MAPPING_PATH = BASE_DIR / "id_mapping.json" # Polling interval (seconds) -POLL_INTERVAL = 5 +POLL_INTERVAL = 15 print(f"Config: Using model '{MODEL_NAME}' at {BASE_DIR}") \ No newline at end of file diff --git a/hnsw_orm_test/diagnose_db.py b/hnsw_orm_test/diagnose_db.py new file mode 100644 index 000000000..384b5c33a --- /dev/null +++ b/hnsw_orm_test/diagnose_db.py @@ -0,0 +1,66 @@ +import requests +import json +import yaml +from pathlib import Path +import config + +def diagnose(): + print("=== ResilientDB Diagnosis Tool ===") + + # 1. 設定ファイルからURLを取得 + try: + with open(config.RESDB_CONFIG_PATH, 'r') as f: + conf = yaml.safe_load(f) + url = conf['database']['db_root_url'] + print(f"Target URL: {url}") + except Exception as e: + print(f"Error loading config: {e}") + return + + # 2. 全データ取得のエンドポイントを叩く + target_endpoint = f"{url}/v1/transactions" + print(f"Requesting: {target_endpoint} ...") + + try: + response = requests.get(target_endpoint) + print(f"Status Code: {response.status_code}") + + # 生のレスポンス内容を表示 + content = response.text + print(f"Raw Response Length: {len(content)}") + print(f"Raw Response Preview (first 500 chars):\n{content[:500]}") + + if not content: + print("\n[Error] Response body is EMPTY. The database returned no data.") + print("Check if ResilientDB is running and if data was actually persisted.") + return + + # JSONデコードを試行 + try: + data = response.json() + print(f"\nSuccess! Parsed JSON with {len(data)} records.") + + # doc1 があるか簡易チェック + found_keys = [] + for tx in data: + try: + if isinstance(tx.get('data'), str): + payload = json.loads(tx['data']) + else: + payload = tx.get('data') + + if isinstance(payload, dict) and 'original_key' in payload: + found_keys.append(payload['original_key']) + except: + pass + print(f"Found keys in DB: {found_keys}") + + except json.JSONDecodeError as e: + print(f"\n[Error] JSON Decode Failed: {e}") + print("The database response is not valid JSON.") + + except Exception as e: + print(f"\n[Fatal Error] Request failed: {e}") + +if __name__ == "__main__": + diagnose() \ No newline at end of file diff --git a/hnsw_orm_test/indexer.py b/hnsw_orm_test/indexer.py index c69168e8f..da699f556 100644 --- a/hnsw_orm_test/indexer.py +++ b/hnsw_orm_test/indexer.py @@ -2,101 +2,124 @@ import json import os import gc +import requests from resdb_orm.orm import ResDBORM from leann import LeannBuilder import config +class SafeResDBORM(ResDBORM): + def read_all(self): + try: + url = f'{self.db_root_url}/v1/transactions' + response = requests.get(url, timeout=10) + if response.status_code == 200 and response.content: + return response.json() + return [] + except: + return [] + def main(): - # Limit parallelism for memory saving and determinism + # Limit parallelism for deterministic behavior os.environ["OMP_NUM_THREADS"] = "1" os.environ["TOKENIZERS_PARALLELISM"] = "false" - print(f"Indexer started with lightweight model: {config.MODEL_NAME}") - print(f"Watching ResilientDB ({config.RESDB_CONFIG_PATH})...") + print(f"Indexer Service Started. Model: {config.MODEL_NAME}") + print(f"Output Index: {config.INDEX_PATH}") - try: - db = ResDBORM(config_path=str(config.RESDB_CONFIG_PATH)) - except Exception as e: - print(f"DB Connection failed: {e}") - return - - last_count = 0 + db = SafeResDBORM(config_path=str(config.RESDB_CONFIG_PATH)) + last_tx_count = 0 while True: try: - # 1. Fetch all data all_txs = db.read_all() - - if not all_txs: - print(" No data yet. Waiting...") - time.sleep(config.POLL_INTERVAL) - continue - current_count = len(all_txs) - # 2. Change detection (Simple implementation) - if current_count > last_count: - print(f"\nDetected change: {last_count} -> {current_count} records.") + # Start processing if transaction count increased + if current_count > last_tx_count: + print(f"\n[Change Detected] {last_tx_count} -> {current_count} transactions.") - # Sort by ID for determinism - sorted_txs = sorted(all_txs, key=lambda x: str(x.get('id', ''))) - - docs_text = [] - docs_mapping = [] - - for tx in sorted_txs: + # 1. Extract events + events = [] + for tx in all_txs: try: - data = tx.get('data', {}) + data = tx.get('data') if isinstance(data, str): try: data = json.loads(data) except: data = {"text": data} + if not isinstance(data, dict): continue + + ts = float(data.get('timestamp', 0)) + key = data.get('original_key') + op = data.get('operation', 'upsert') text = data.get('text', '') - if text: - docs_text.append(text) - docs_mapping.append({ - "resdb_id": str(tx['id']), - "original_key": data.get('original_key', 'unknown'), - "preview": text[:60] - }) - except Exception: + + if key: + events.append({"key": key, "op": op, "text": text, "ts": ts, "id": str(tx['id'])}) + except: continue + + # 2. Sort by timestamp + events.sort(key=lambda x: x['ts']) + + # 3. Replay state + active_docs = {} + for ev in events: + if ev['op'] == 'delete': + if ev['key'] in active_docs: + del active_docs[ev['key']] + else: + active_docs[ev['key']] = { + "text": ev['text'], + "resdb_id": ev['id'], + "original_key": ev['key'] + } - if docs_text: - print(f"Rebuilding index for {len(docs_text)} documents...") + # 4. Build index + valid_docs = list(active_docs.values()) + if valid_docs: + print(f"Rebuilding index for {len(valid_docs)} documents...") - # 3. Build index (Specify lightweight model) - builder = LeannBuilder( - backend_name="hnsw", - model=config.MODEL_NAME - ) + # --- Start timing --- + start_time = time.time() - for t in docs_text: - builder.add_text(t) + builder = LeannBuilder(backend_name="hnsw", model=config.MODEL_NAME) + for d in valid_docs: + builder.add_text(d['text']) + # Save index builder.build_index(str(config.INDEX_PATH)) + # --- End timing --- + end_time = time.time() + elapsed_time = end_time - start_time + # Save mapping + mapping_data = [{ + "resdb_id": d['resdb_id'], + "original_key": d['original_key'], + "preview": d['text'][:60] + } for d in valid_docs] + with open(config.MAPPING_PATH, 'w') as f: - json.dump(docs_mapping, f, indent=2) + json.dump(mapping_data, f, indent=2) - print(f"Index updated at {config.INDEX_PATH}") - last_count = current_count - - # Free memory (Important) - del builder - gc.collect() + print(f"Index updated successfully at {time.ctime()}") + print(f"Time taken: {elapsed_time:.4f} seconds") # Display time taken here else: - print("No valid text documents found.") + print("Index cleared (no active documents).") + with open(config.MAPPING_PATH, 'w') as f: + json.dump([], f) - else: - # No changes - pass + last_tx_count = current_count + + if 'builder' in locals(): del builder + gc.collect() except Exception as e: - print(f"Error in polling loop: {e}") + print(f"Polling error: {e}") time.sleep(config.POLL_INTERVAL) if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/hnsw_orm_test/leann_resdb_tiny/id_mapping.json b/hnsw_orm_test/leann_resdb_tiny/id_mapping.json new file mode 100644 index 000000000..1d8e2995d --- /dev/null +++ b/hnsw_orm_test/leann_resdb_tiny/id_mapping.json @@ -0,0 +1,107 @@ +[ + { + "resdb_id": "e52da17d162a6c162df93845a429a28e4dfb241d1bf7a474d0244f8d0198da91", + "original_key": "doc9", + "preview": "Cross-chain communication enables interoperability between d" + }, + { + "resdb_id": "8c480746ca1f45926fd740bb1672bbe889f8a11892d5250521099dbbf91293d4", + "original_key": "doc5", + "preview": "ResilientDB is a high-throughput blockchain fabric designed " + }, + { + "resdb_id": "c3591bd07094ac294450e8c472c905d65d91d5ff12af88cfafd439169d0abf26", + "original_key": "fast_key", + "preview": "State 2" + }, + { + "resdb_id": "c4d25ae9df38f234e3cfbd28f3123b04645d0ad1af7776fa636618d2fe38d135", + "original_key": "doc8", + "preview": "Practical Byzantine Fault Tolerance (PBFT) is a foundational" + }, + { + "resdb_id": "1867c657eaad65c77bd4f9e324bad24ec3bca251345b89d9d025e60df5b1965b", + "original_key": "doc4", + "preview": "Understanding consensus protocols is key for blockchain." + }, + { + "resdb_id": "7f0eb841c4f4500a1105bfcc2f9c73e75b410801242da8421847a0ee7ef781fc", + "original_key": "doc3", + "preview": "The weather in Davis is sunny today." + }, + { + "resdb_id": "b48071250116b903859d1a015a3f884e1fffdd20198b2ce08c712f67dafb0b77", + "original_key": "doc12", + "preview": "How to bake a perfect sourdough bread with a starters." + }, + { + "resdb_id": "ceb95d2f2fe5cc61867dad1cbb4b27277f4678a50450422e269573516f51cd5e", + "original_key": "doc10", + "preview": "The project requires using the ResilientDB Fabric unless app" + }, + { + "resdb_id": "58884963eade41926c7de25c960ad5d6be7e174208c88a293e6c7eb5b192d2d3", + "original_key": "doc2", + "preview": "Tung Tung Tung Sahur called\u2014they need their banana-crocodile" + }, + { + "resdb_id": "7c8bed2a8533eba5f575859d59ed4f26b1124418c9dad7157fa8773211fe445a", + "original_key": "doc11", + "preview": "Mitochondria are the powerhouse of the cell." + }, + { + "resdb_id": "0e8ff673d774b53106c1ed1ab0b9dd39d5ec30957535f0e2e8cbcd12ee2b9ee1", + "original_key": "doc14", + "preview": "UC Davis is known for its agricultural studies." + }, + { + "resdb_id": "6b1d447ead2694c9fd880062bd8fd4f1650a71a3295f88ff138e62e577de705d", + "original_key": "doc6", + "preview": "This project explores novel techniques for sharding in distr" + }, + { + "resdb_id": "62feb2ce2ad62c006da8eff19aa3fc87bf78bf20d3142c267205a0df65fe5cb0", + "original_key": "doc13", + "preview": "The final report must be written in LaTeX using ACM template" + }, + { + "resdb_id": "2a59666306bc441898f41ef611dd61ea62ea78ca5e56968386bea6633b3e8005", + "original_key": "news_tech", + "preview": "Microsoft acquires a new AI startup." + }, + { + "resdb_id": "e9e5e81e392dc7dbe13ca0569c7ad4bd46ec0f242561419f9b1884066b6ff428", + "original_key": "doc7", + "preview": "DeFi applications are often built on top of smart contracts." + }, + { + "resdb_id": "ca2698663ffe694834639278c53245e57824a27488f1f5b8c47818d584deb280", + "original_key": "mykey", + "preview": "Updated content for verification" + }, + { + "resdb_id": "326b07d2102ff1877350696e3e118b0428cc1dcf079df75b48e3b623483c3b47", + "original_key": "test_doc", + "preview": "Updated content" + }, + { + "resdb_id": "856d2ae623a89cf83ad2a4add22923e75090dc49f4e6200c0e9101037bfb4317", + "original_key": "perf_test_1", + "preview": "ResilientDB is fast and secure." + }, + { + "resdb_id": "f5cf0e542e17d69eef543fa61b9d7df26047e22b39d6b7d49cd1970c6c785062", + "original_key": "batch_2", + "preview": "Data B" + }, + { + "resdb_id": "27f71687bfc9df031395040b798a12c5165cae7ac5a3f5c81d6651cecb3bab30", + "original_key": "batch_3", + "preview": "Data C" + }, + { + "resdb_id": "306b6e702acaf5301d7e386f46a7a4bdbbe13be3948f6fb5b99d4297db937213", + "original_key": "batch_4", + "preview": "Data D" + } +] \ No newline at end of file diff --git a/hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt b/hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt new file mode 100644 index 000000000..5bc6c8de8 --- /dev/null +++ b/hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt @@ -0,0 +1,21 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 diff --git a/hnsw_orm_test/leann_resdb_tiny/resdb.index b/hnsw_orm_test/leann_resdb_tiny/resdb.index new file mode 100644 index 0000000000000000000000000000000000000000..bc04ff74e70c0b74661cf3db394ff9e12d958323 GIT binary patch literal 2462 zcmb`|y=v4^5C!m?7{9;bCqYC+M1uGT5>slU7WP_*SOh!4!Xgh~;TxF3-o^(==|foL z4fG9ke(M|w!Qk$)7cS>NyZ4?sXJ-Dpj~+gLHI9sNWn``P!yjA{9m{tU{PCb*dyG$T zZ;U66zc$26;|ttf9?v(9*F)B3J-n#BU3;hYZtcC=Ni5OA_uV4hzbQ=Z>$ouE26XtTbj|PIP>#@l4~n#?{7)jp5ZbG|k&7it?~{w~!%T08e>t`}#VuFV(ieDij)Ht+bHci&yS zc;rZ}@XIp|pR>^wFF56yM;{Klq7xUb@vT01(;vD2=Qs7Mqknnkt)JZPTD;Ro8y)gQ zH=WbeFFN#U|Lo}_F8q3l4}W?S-)eQRzwSg&Jx=r`&#d!;n=bcw$ITbLIB8UWb4Dk9 zw9Bt9G@Ct(2jAhr>pU)ZaN)`OCTIF4hx~G+*0{_wJHRhL}^qOZ{)PgShd6A!1 zE59%9E>a(^^iv-EYMD8pjZS*hk4Bmf>jyp@w7X}8f1Y{4M~@of#i^#29Jpzt7ymT+ ia7CZ{cr6^6%~|}Wzc}#j*GZ3B$)j%3npr&f|M44zx zr?$UU|Be`&M3l-Tp&U#y%Ex319f`>d3NnX&$pX5qO_tCrSwV-5$r^fYtWFX7auYPR iG(+z$Pu+lWwR8D;x}1eTdE2<$U0nViH1qck`^gWgI5GJE literal 0 HcmV?d00001 diff --git a/hnsw_orm_test/leann_resdb_tiny/resdb.leann.passages.jsonl b/hnsw_orm_test/leann_resdb_tiny/resdb.leann.passages.jsonl new file mode 100644 index 000000000..4bb20877d --- /dev/null +++ b/hnsw_orm_test/leann_resdb_tiny/resdb.leann.passages.jsonl @@ -0,0 +1,21 @@ +{"id": "0", "text": "Cross-chain communication enables interoperability between different blockchains.", "metadata": {}} +{"id": "1", "text": "ResilientDB is a high-throughput blockchain fabric designed for performance.", "metadata": {}} +{"id": "2", "text": "State 2", "metadata": {}} +{"id": "3", "text": "Practical Byzantine Fault Tolerance (PBFT) is a foundational agreement protocol.", "metadata": {}} +{"id": "4", "text": "Understanding consensus protocols is key for blockchain.", "metadata": {}} +{"id": "5", "text": "The weather in Davis is sunny today.", "metadata": {}} +{"id": "6", "text": "How to bake a perfect sourdough bread with a starters.", "metadata": {}} +{"id": "7", "text": "The project requires using the ResilientDB Fabric unless approved otherwise.", "metadata": {}} +{"id": "8", "text": "Tung Tung Tung Sahur called—they need their banana-crocodile hybrid back", "metadata": {}} +{"id": "9", "text": "Mitochondria are the powerhouse of the cell.", "metadata": {}} +{"id": "10", "text": "UC Davis is known for its agricultural studies.", "metadata": {}} +{"id": "11", "text": "This project explores novel techniques for sharding in distributed ledgers.", "metadata": {}} +{"id": "12", "text": "The final report must be written in LaTeX using ACM templates.", "metadata": {}} +{"id": "13", "text": "Microsoft acquires a new AI startup.", "metadata": {}} +{"id": "14", "text": "DeFi applications are often built on top of smart contracts.", "metadata": {}} +{"id": "15", "text": "Updated content for verification", "metadata": {}} +{"id": "16", "text": "Updated content", "metadata": {}} +{"id": "17", "text": "ResilientDB is fast and secure.", "metadata": {}} +{"id": "18", "text": "Data B", "metadata": {}} +{"id": "19", "text": "Data C", "metadata": {}} +{"id": "20", "text": "Data D", "metadata": {}} diff --git a/hnsw_orm_test/manage_data.py b/hnsw_orm_test/manage_data.py new file mode 100644 index 000000000..14d814e68 --- /dev/null +++ b/hnsw_orm_test/manage_data.py @@ -0,0 +1,104 @@ +import argparse +import json +import time +import requests +import re +from resdb_orm.orm import ResDBORM +import config + +class SafeResDBORM(ResDBORM): + """ + A wrapper class for ResDBORM that includes error handling + and retry logic for network requests. + """ + def read_all(self): + max_retries = 3 + for attempt in range(max_retries): + try: + url = f'{self.db_root_url}/v1/transactions' + # Set a timeout to prevent hanging on connection issues + response = requests.get(url, timeout=5) + + if response.status_code == 200: + # Return JSON if content exists, otherwise return an empty list + return response.json() if response.content else [] + elif response.status_code == 503: + # Handle server cooldown (Service Unavailable) + time.sleep(2) + continue + except Exception: + # Wait briefly before retrying on generic errors + time.sleep(1) + return [] + +def get_db(): + """Initializes and returns the SafeResDBORM instance.""" + try: + return SafeResDBORM(config_path=str(config.RESDB_CONFIG_PATH)) + except Exception as e: + print(f"Connection failed: {e}") + return None + +def add_event(key, text, op_type): + """ + Logs an event (add, update, or delete) to ResilientDB using Event Sourcing. + Instead of overwriting data, we append a new transaction with a timestamp. + """ + db = get_db() + if not db: return + + # Construct the payload. + # 'timestamp' is crucial for the indexer to determine the latest state. + payload = { + "original_key": key, + "text": text, + "timestamp": time.time(), + "operation": op_type, + "type": "vector_source" + } + + # Brief pause to mitigate server load during rapid operations + time.sleep(0.5) + + try: + # Create a new transaction (append-only) + tx_id = db.create(payload) + if isinstance(tx_id, str): + print(f"[{op_type.upper()}] Key: '{key}' (Tx: {tx_id[:8]}...)") + else: + print(f"[{op_type.upper()}] Failed: {tx_id}") + except Exception as e: + print(f"Error sending transaction: {e}") + +def main(): + # Set up CLI argument parsing + parser = argparse.ArgumentParser(description="ResilientDB Vector Data Manager") + subparsers = parser.add_subparsers(dest="command", required=True) + + # Command: Add + p_add = subparsers.add_parser("add") + p_add.add_argument("key") + p_add.add_argument("text") + + # Command: Update + p_upd = subparsers.add_parser("update") + p_upd.add_argument("key") + p_upd.add_argument("text") + + # Command: Delete + p_del = subparsers.add_parser("delete") + p_del.add_argument("key") + + args = parser.parse_args() + + # Execute the corresponding event function based on the command + if args.command == "add": + add_event(args.key, args.text, "add") + elif args.command == "update": + add_event(args.key, args.text, "update") + elif args.command == "delete": + # For deletion, text content is irrelevant, so we send an empty string + add_event(args.key, "", "delete") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/hnsw_orm_test/search.py b/hnsw_orm_test/search.py index 36cb6010f..419e5e77c 100644 --- a/hnsw_orm_test/search.py +++ b/hnsw_orm_test/search.py @@ -1,48 +1,103 @@ import json -import sys import os +import time +import sys +from pathlib import Path from leann import LeannSearcher import config -def main(): - # Check index existence (Check actual existence of .index file) - real_index_file = config.INDEX_PATH.with_suffix(".index") - - if not real_index_file.exists() or not config.MAPPING_PATH.exists(): - print(f"Index not found at {real_index_file}") - print("Please run 'python populate.py' then 'python indexer.py' first.") - return - - print(f"Loading index from {config.INDEX_PATH}...") - print(f"Using model: {config.MODEL_NAME}") - - try: - # Initialize searcher specifying the lightweight model - searcher = LeannSearcher( - str(config.INDEX_PATH), - model=config.MODEL_NAME - ) - - with open(config.MAPPING_PATH, 'r') as f: - id_mapping = json.load(f) +class AutoSearcher: + def __init__(self): + self.searcher = None + self.id_mapping = [] + self.last_mtime = 0 + self.index_file = self._resolve_index_file() + self.mapping_file = config.MAPPING_PATH + self.reload_index() + + def _resolve_index_file(self): + """実際のインデックスファイルパスを解決する""" + # leannは .index を付与する場合があるためチェック + candidates = [ + config.INDEX_PATH, + config.INDEX_PATH.with_suffix(".index"), + Path(str(config.INDEX_PATH) + ".index") + ] + for p in candidates: + if p.exists(): + return p + return config.INDEX_PATH # デフォルト + + def _get_mtime(self): + """マッピングファイルの更新時刻を取得(これが更新トリガーとなる)""" + if self.mapping_file.exists(): + return self.mapping_file.stat().st_mtime + return 0 + + def reload_index(self): + """インデックスの再ロード""" + if not self.index_file.exists() or not self.mapping_file.exists(): + return False + + try: + # マッピングの読み込み + with open(self.mapping_file, 'r') as f: + self.id_mapping = json.load(f) - print(f"Loaded {len(id_mapping)} documents mapping.") - except Exception as e: - print(f"Failed to load index: {e}") - return + # インデックスのロード (存在しない場合はスキップ) + if self.id_mapping: + self.searcher = LeannSearcher(str(config.INDEX_PATH), model=config.MODEL_NAME) + else: + self.searcher = None + + self.last_mtime = self._get_mtime() + print(f"\n[System] Index reloaded. Documents: {len(self.id_mapping)}") + return True + except Exception as e: + print(f"\n[Error] Failed to reload index: {e}") + return False - print("\n=== LEANN (bert-tiny) x ResilientDB Search CLI ===") - print("Type 'exit' to quit.") + def search(self, query): + # 検索前に更新チェック + current_mtime = self._get_mtime() + if current_mtime > self.last_mtime: + print("\n[System] Detected update. Refreshing index...") + time.sleep(0.5) # 書き込み完了待ち + self.reload_index() + + if not self.searcher or not self.id_mapping: + return [] + + try: + results = self.searcher.search(query, top_k=3) + return results + except Exception as e: + print(f"Search execution error: {e}") + return [] + + def get_info(self, result_id): + try: + idx = int(result_id) + if 0 <= idx < len(self.id_mapping): + return self.id_mapping[idx] + except: + pass + return None + +def main(): + print("=== ResilientDB Auto-Reloading Search CLI ===") + engine = AutoSearcher() + + if not engine.index_file.exists(): + print("Waiting for initial index creation...") while True: try: - query = input("\nSearch Query: ").strip() + query = input("\nSearch Query ('exit' to quit): ").strip() if not query: continue if query.lower() in ['exit', 'quit']: break - # Execute search - k = min(3, len(id_mapping)) - results = searcher.search(query, top_k=k) + results = engine.search(query) if not results: print("No results found.") @@ -50,23 +105,17 @@ def main(): print(f"Results for: '{query}'") for rank, res in enumerate(results, 1): - try: - leann_id = int(res.id) - if 0 <= leann_id < len(id_mapping): - info = id_mapping[leann_id] - score = float(res.score) - - print(f" #{rank} [Score: {score:.4f}]") - print(f" Key: {info['original_key']} (ResDB ID: {info['resdb_id']})") - print(f" Text: {info['preview']}...") - except ValueError: - continue + info = engine.get_info(res.id) + if info: + print(f" #{rank} [Score: {res.score:.4f}]") + print(f" Key : {info['original_key']}") + print(f" Text: {info['preview']}...") + else: + print(f" #{rank} [Unknown ID]") except KeyboardInterrupt: print("\nBye!") break - except Exception as e: - print(f"Search error: {e}") if __name__ == "__main__": main() \ No newline at end of file From 97648b7cc1be74bf444c3ab5a60e8430ce45925d Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Thu, 20 Nov 2025 23:04:42 -0800 Subject: [PATCH 27/79] Revise README with version update notice Updated README to reflect version status and upcoming changes. --- hnsw_orm_test/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hnsw_orm_test/README.md b/hnsw_orm_test/README.md index c30b0a324..ae45082ab 100644 --- a/hnsw_orm_test/README.md +++ b/hnsw_orm_test/README.md @@ -1,4 +1,4 @@ -# ResilientDB x LEANN Vector Search Integration +# ResilientDB x LEANN Vector Search Integration (This does not reflect latest version. I will update it by Sunday.) Add something... ## File Structure From deb6bcf2f8bcdbdeaca6570b3d3db3a15bc0c7e1 Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Sat, 22 Nov 2025 05:48:19 +0900 Subject: [PATCH 28/79] Enhance SafeResDBORM functionality with improved error handling, soft validation for updates, and refined logging; update id_mapping and passages data for consistency. --- hnsw_orm_test/indexer.py | 43 +++++----- .../leann_resdb_tiny/id_mapping.json | 9 +- hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt | 1 + hnsw_orm_test/leann_resdb_tiny/resdb.index | Bin 2462 -> 2658 bytes .../leann_resdb_tiny/resdb.leann.passages.idx | Bin 171 -> 179 bytes .../resdb.leann.passages.jsonl | 3 +- hnsw_orm_test/manage_data.py | 77 +++++++++++++----- 7 files changed, 92 insertions(+), 41 deletions(-) diff --git a/hnsw_orm_test/indexer.py b/hnsw_orm_test/indexer.py index da699f556..44f919222 100644 --- a/hnsw_orm_test/indexer.py +++ b/hnsw_orm_test/indexer.py @@ -19,12 +19,10 @@ def read_all(self): return [] def main(): - # Limit parallelism for deterministic behavior os.environ["OMP_NUM_THREADS"] = "1" os.environ["TOKENIZERS_PARALLELISM"] = "false" print(f"Indexer Service Started. Model: {config.MODEL_NAME}") - print(f"Output Index: {config.INDEX_PATH}") db = SafeResDBORM(config_path=str(config.RESDB_CONFIG_PATH)) last_tx_count = 0 @@ -34,7 +32,6 @@ def main(): all_txs = db.read_all() current_count = len(all_txs) - # Start processing if transaction count increased if current_count > last_tx_count: print(f"\n[Change Detected] {last_tx_count} -> {current_count} transactions.") @@ -51,6 +48,7 @@ def main(): ts = float(data.get('timestamp', 0)) key = data.get('original_key') + # operationフィールドがない場合は 'upsert' (強制上書き) として扱う op = data.get('operation', 'upsert') text = data.get('text', '') @@ -62,17 +60,32 @@ def main(): # 2. Sort by timestamp events.sort(key=lambda x: x['ts']) - # 3. Replay state + # 3. Replay state (Filtering Logic) active_docs = {} for ev in events: - if ev['op'] == 'delete': - if ev['key'] in active_docs: - del active_docs[ev['key']] + key = ev['key'] + op = ev['op'] + + if op == 'delete': + if key in active_docs: + del active_docs[key] + elif op == 'update': + # キーが存在する場合のみ更新する + if key in active_docs: + active_docs[key] = { + "text": ev['text'], + "resdb_id": ev['id'], + "original_key": key + } + else: + # 存在しないキーへのupdateは無視し、ログを出す + print(f"Warning: Ignored 'update' for non-existent key: '{key}'") else: - active_docs[ev['key']] = { + # 'add' や 'upsert' は無条件で保存(新規作成または上書き) + active_docs[key] = { "text": ev['text'], "resdb_id": ev['id'], - "original_key": ev['key'] + "original_key": key } # 4. Build index @@ -80,21 +93,15 @@ def main(): if valid_docs: print(f"Rebuilding index for {len(valid_docs)} documents...") - # --- Start timing --- start_time = time.time() - builder = LeannBuilder(backend_name="hnsw", model=config.MODEL_NAME) for d in valid_docs: builder.add_text(d['text']) - # Save index builder.build_index(str(config.INDEX_PATH)) - # --- End timing --- - end_time = time.time() - elapsed_time = end_time - start_time + elapsed_time = time.time() - start_time - # Save mapping mapping_data = [{ "resdb_id": d['resdb_id'], "original_key": d['original_key'], @@ -104,15 +111,13 @@ def main(): with open(config.MAPPING_PATH, 'w') as f: json.dump(mapping_data, f, indent=2) - print(f"Index updated successfully at {time.ctime()}") - print(f"Time taken: {elapsed_time:.4f} seconds") # Display time taken here + print(f"Index updated. Time: {elapsed_time:.4f}s") else: print("Index cleared (no active documents).") with open(config.MAPPING_PATH, 'w') as f: json.dump([], f) last_tx_count = current_count - if 'builder' in locals(): del builder gc.collect() diff --git a/hnsw_orm_test/leann_resdb_tiny/id_mapping.json b/hnsw_orm_test/leann_resdb_tiny/id_mapping.json index 1d8e2995d..b33cfd9cc 100644 --- a/hnsw_orm_test/leann_resdb_tiny/id_mapping.json +++ b/hnsw_orm_test/leann_resdb_tiny/id_mapping.json @@ -50,9 +50,9 @@ "preview": "Mitochondria are the powerhouse of the cell." }, { - "resdb_id": "0e8ff673d774b53106c1ed1ab0b9dd39d5ec30957535f0e2e8cbcd12ee2b9ee1", + "resdb_id": "94261b3f300d544b5b0c2c15166b09ea611cb1be9a9e1e41635357402fbb6c38", "original_key": "doc14", - "preview": "UC Davis is known for its agricultural studies." + "preview": "Update completed! Doc14 was already updated." }, { "resdb_id": "6b1d447ead2694c9fd880062bd8fd4f1650a71a3295f88ff138e62e577de705d", @@ -103,5 +103,10 @@ "resdb_id": "306b6e702acaf5301d7e386f46a7a4bdbbe13be3948f6fb5b99d4297db937213", "original_key": "batch_4", "preview": "Data D" + }, + { + "resdb_id": "e51bae79bd0517816da26e15d3dbbc47baf07884677f8117e7b5fbbc3f8c1af7", + "original_key": "batch_6", + "preview": "Data R is update" } ] \ No newline at end of file diff --git a/hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt b/hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt index 5bc6c8de8..a470ceda5 100644 --- a/hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt +++ b/hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt @@ -19,3 +19,4 @@ 18 19 20 +21 diff --git a/hnsw_orm_test/leann_resdb_tiny/resdb.index b/hnsw_orm_test/leann_resdb_tiny/resdb.index index bc04ff74e70c0b74661cf3db394ff9e12d958323..ce23b6d83408c01eb138266e21230fd8f3b33c09 100644 GIT binary patch literal 2658 zcmcJQy-rm@6om);K?M<1)EGjHA;cKofCMeIQ44x#G%+D`HWr3Au;2l-u&}eXFrl=v z@CoQFc>xmL-(h{p$xXPqxfdsytg{a@Yp=Ct&gJ>D7jN2y*6uXNLLN?Wit4g|>+twV z!G3F>z@ye)2LEnSPqlC0;dI;HoT&%L(|XvcxnJ{h&BK~UHEm3z|1#H$xIdOsh>uf? zxM7G}r)3l0rWQY>77tR3UsH=eQj5P+i+@s!C#l87?nqh_*HVicsl`{R#n-9D_o>B? zsl_j;#qX)bU#ag#r!bv|i$O2ZmDJaQbHVwb_wQ!v+d(hNz0~)E%fXf4gW$v9YH%(1 zDCl{6Kd$Ee&q6d{Ywz9euKsw(noBj&&iz3fez@q@Yu>22SQ9Vo$2M&TFe6Po+Hj>ud!FX0SG2%C&RyzjG{f^cBXyCJeaMJDc%aXyX9O4L!CCe1 zC_M0!v-j2e?R}*m*`R?zR(_jsqRI2|n_@i!eQKw#*$?~P@kLKGv6nsYdw1yv*Qnmv zhkwrnKDa$EdW^%#et5}`{dk5Kt?EW6eaVGEre}GV&;4+DK9hLYHyY84CiIaXng{c! zS3`B0rslU7WP_*SOh!4!Xgh~;TxF3-o^(==|foL z4fG9ke(M|w!Qk$)7cS>NyZ4?sXJ-Dpj~+gLHI9sNWn``P!yjA{9m{tU{PCb*dyG$T zZ;U66zc$26;|ttf9?v(9*F)B3J-n#BU3;hYZtcC=Ni5OA_uV4hzbQ=Z>$ouE26XtTbj|PIP>#@l4~n#?{7)jp5ZbG|k&7it?~{w~!%T08e>t`}#VuFV(ieDij)Ht+bHci&yS zc;rZ}@XIp|pR>^wFF56yM;{Klq7xUb@vT01(;vD2=Qs7Mqknnkt)JZPTD;Ro8y)gQ zH=WbeFFN#U|Lo}_F8q3l4}W?S-)eQRzwSg&Jx=r`&#d!;n=bcw$ITbLIB8UWb4Dk9 zw9Bt9G@Ct(2jAhr>pU)ZaN)`OCTIF4hx~G+*0{_wJHRhL}^qOZ{)PgShd6A!1 zE59%9E>a(^^iv-EYMD8pjZS*hk4Bmf>jyp@w7X}8f1Y{4M~@of#i^#29Jpzt7ymT+ ia7CZ{cr6^6%~|}Wzc}#j*GZ3B$)j%3npr&f|M443!ImLG?kTx=y;`@ZHR1W}$aT*2y diff --git a/hnsw_orm_test/leann_resdb_tiny/resdb.leann.passages.jsonl b/hnsw_orm_test/leann_resdb_tiny/resdb.leann.passages.jsonl index 4bb20877d..83c3041ac 100644 --- a/hnsw_orm_test/leann_resdb_tiny/resdb.leann.passages.jsonl +++ b/hnsw_orm_test/leann_resdb_tiny/resdb.leann.passages.jsonl @@ -8,7 +8,7 @@ {"id": "7", "text": "The project requires using the ResilientDB Fabric unless approved otherwise.", "metadata": {}} {"id": "8", "text": "Tung Tung Tung Sahur called—they need their banana-crocodile hybrid back", "metadata": {}} {"id": "9", "text": "Mitochondria are the powerhouse of the cell.", "metadata": {}} -{"id": "10", "text": "UC Davis is known for its agricultural studies.", "metadata": {}} +{"id": "10", "text": "Update completed! Doc14 was already updated.", "metadata": {}} {"id": "11", "text": "This project explores novel techniques for sharding in distributed ledgers.", "metadata": {}} {"id": "12", "text": "The final report must be written in LaTeX using ACM templates.", "metadata": {}} {"id": "13", "text": "Microsoft acquires a new AI startup.", "metadata": {}} @@ -19,3 +19,4 @@ {"id": "18", "text": "Data B", "metadata": {}} {"id": "19", "text": "Data C", "metadata": {}} {"id": "20", "text": "Data D", "metadata": {}} +{"id": "21", "text": "Data R is update", "metadata": {}} diff --git a/hnsw_orm_test/manage_data.py b/hnsw_orm_test/manage_data.py index 14d814e68..2c52b8ddc 100644 --- a/hnsw_orm_test/manage_data.py +++ b/hnsw_orm_test/manage_data.py @@ -2,7 +2,6 @@ import json import time import requests -import re from resdb_orm.orm import ResDBORM import config @@ -16,39 +15,86 @@ def read_all(self): for attempt in range(max_retries): try: url = f'{self.db_root_url}/v1/transactions' - # Set a timeout to prevent hanging on connection issues response = requests.get(url, timeout=5) if response.status_code == 200: - # Return JSON if content exists, otherwise return an empty list return response.json() if response.content else [] elif response.status_code == 503: - # Handle server cooldown (Service Unavailable) time.sleep(2) continue except Exception: - # Wait briefly before retrying on generic errors time.sleep(1) return [] def get_db(): - """Initializes and returns the SafeResDBORM instance.""" try: return SafeResDBORM(config_path=str(config.RESDB_CONFIG_PATH)) except Exception as e: print(f"Connection failed: {e}") return None +def get_active_keys(db): + """ + Fetches all transactions to determine which keys currently exist. + Used for validation warnings. + """ + all_txs = db.read_all() + events = [] + + for tx in all_txs: + try: + data = tx.get('data') + if isinstance(data, str): + try: data = json.loads(data) + except: data = {"text": data} + + if not isinstance(data, dict): continue + + ts = float(data.get('timestamp', 0)) + key = data.get('original_key') + op = data.get('operation', 'upsert') + + if key: + events.append({"key": key, "op": op, "ts": ts}) + except: + continue + + events.sort(key=lambda x: x['ts']) + + active_keys = set() + for ev in events: + if ev['op'] == 'delete': + active_keys.discard(ev['key']) + elif ev['op'] == 'update': + pass + else: + active_keys.add(ev['key']) + + return active_keys + def add_event(key, text, op_type): """ - Logs an event (add, update, or delete) to ResilientDB using Event Sourcing. - Instead of overwriting data, we append a new transaction with a timestamp. + Logs an event to ResilientDB. + Performs a soft validation: Warns if key is missing but allows execution + to handle eventual consistency (lag). """ db = get_db() if not db: return - # Construct the payload. - # 'timestamp' is crucial for the indexer to determine the latest state. + # --- SOFT VALIDATION LOGIC --- + if op_type in ["update", "delete"]: + print(f"Checking key status for '{key}'...") + active_keys = get_active_keys(db) + + if key not in active_keys: + print(f"\n[WARNING] Key '{key}' was not found in the current database state.") + print(" -> If you JUST created this key, this is normal (propagation lag). Proceeding...") + print(" -> If this is a typo, the update will be IGNORED by the indexer.\n") + # We do NOT return here; we proceed to send the transaction. + else: + print(f"Key '{key}' found. Proceeding with {op_type}.") + # ----------------------------- + payload = { "original_key": key, "text": text, @@ -57,47 +103,40 @@ def add_event(key, text, op_type): "type": "vector_source" } - # Brief pause to mitigate server load during rapid operations time.sleep(0.5) try: - # Create a new transaction (append-only) tx_id = db.create(payload) if isinstance(tx_id, str): - print(f"[{op_type.upper()}] Key: '{key}' (Tx: {tx_id[:8]}...)") + # Message changed to "Request Sent" to be accurate + print(f"[{op_type.upper()} REQUEST SENT] Key: '{key}' (Tx: {tx_id[:8]}...)") else: print(f"[{op_type.upper()}] Failed: {tx_id}") except Exception as e: print(f"Error sending transaction: {e}") def main(): - # Set up CLI argument parsing parser = argparse.ArgumentParser(description="ResilientDB Vector Data Manager") subparsers = parser.add_subparsers(dest="command", required=True) - # Command: Add p_add = subparsers.add_parser("add") p_add.add_argument("key") p_add.add_argument("text") - # Command: Update p_upd = subparsers.add_parser("update") p_upd.add_argument("key") p_upd.add_argument("text") - # Command: Delete p_del = subparsers.add_parser("delete") p_del.add_argument("key") args = parser.parse_args() - # Execute the corresponding event function based on the command if args.command == "add": add_event(args.key, args.text, "add") elif args.command == "update": add_event(args.key, args.text, "update") elif args.command == "delete": - # For deletion, text content is irrelevant, so we send an empty string add_event(args.key, "", "delete") if __name__ == "__main__": From ec4b2f452ff363ce2d993479dbd49bda152d9bd2 Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Sat, 22 Nov 2025 09:49:53 +0900 Subject: [PATCH 29/79] Refactor diagnose_db.py with English comments, enhance indexer.py operation handling, and update id_mapping.json with new entries; remove obsolete files and update data management scripts. --- hnsw_orm_test/diagnose_db.py | 10 +-- hnsw_orm_test/indexer.py | 8 +- .../leann_resdb_tiny/id_mapping.json | 15 ++++ hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt | 3 + hnsw_orm_test/leann_resdb_tiny/resdb.index | Bin 2658 -> 3294 bytes .../leann_resdb_tiny/resdb.leann.passages.idx | Bin 179 -> 203 bytes .../resdb.leann.passages.jsonl | 3 + leann_resdb_tiny/id_mapping.json | 72 ------------------ leann_resdb_tiny/resdb.ids.txt | 14 ---- leann_resdb_tiny/resdb.index | Bin 1314 -> 0 bytes leann_resdb_tiny/resdb.leann.meta.json | 21 ----- leann_resdb_tiny/resdb.leann.passages.idx | Bin 115 -> 0 bytes leann_resdb_tiny/resdb.leann.passages.jsonl | 14 ---- 13 files changed, 30 insertions(+), 130 deletions(-) delete mode 100644 leann_resdb_tiny/id_mapping.json delete mode 100644 leann_resdb_tiny/resdb.ids.txt delete mode 100644 leann_resdb_tiny/resdb.index delete mode 100644 leann_resdb_tiny/resdb.leann.meta.json delete mode 100644 leann_resdb_tiny/resdb.leann.passages.idx delete mode 100644 leann_resdb_tiny/resdb.leann.passages.jsonl diff --git a/hnsw_orm_test/diagnose_db.py b/hnsw_orm_test/diagnose_db.py index 384b5c33a..bc1fbb702 100644 --- a/hnsw_orm_test/diagnose_db.py +++ b/hnsw_orm_test/diagnose_db.py @@ -7,7 +7,7 @@ def diagnose(): print("=== ResilientDB Diagnosis Tool ===") - # 1. 設定ファイルからURLを取得 + # 1. Get URL from config file try: with open(config.RESDB_CONFIG_PATH, 'r') as f: conf = yaml.safe_load(f) @@ -17,7 +17,7 @@ def diagnose(): print(f"Error loading config: {e}") return - # 2. 全データ取得のエンドポイントを叩く + # 2. Hit the endpoint to get all the data target_endpoint = f"{url}/v1/transactions" print(f"Requesting: {target_endpoint} ...") @@ -25,7 +25,7 @@ def diagnose(): response = requests.get(target_endpoint) print(f"Status Code: {response.status_code}") - # 生のレスポンス内容を表示 + # Show raw response content content = response.text print(f"Raw Response Length: {len(content)}") print(f"Raw Response Preview (first 500 chars):\n{content[:500]}") @@ -35,12 +35,12 @@ def diagnose(): print("Check if ResilientDB is running and if data was actually persisted.") return - # JSONデコードを試行 + # Attempt to decode JSON try: data = response.json() print(f"\nSuccess! Parsed JSON with {len(data)} records.") - # doc1 があるか簡易チェック + # Simple check if doc1 exists found_keys = [] for tx in data: try: diff --git a/hnsw_orm_test/indexer.py b/hnsw_orm_test/indexer.py index 44f919222..0c340aa2b 100644 --- a/hnsw_orm_test/indexer.py +++ b/hnsw_orm_test/indexer.py @@ -48,7 +48,7 @@ def main(): ts = float(data.get('timestamp', 0)) key = data.get('original_key') - # operationフィールドがない場合は 'upsert' (強制上書き) として扱う + # If the operation field is missing, it is treated as 'upsert' (forced overwrite). op = data.get('operation', 'upsert') text = data.get('text', '') @@ -70,7 +70,7 @@ def main(): if key in active_docs: del active_docs[key] elif op == 'update': - # キーが存在する場合のみ更新する + # Reload if the key exists if key in active_docs: active_docs[key] = { "text": ev['text'], @@ -78,10 +78,10 @@ def main(): "original_key": key } else: - # 存在しないキーへのupdateは無視し、ログを出す + # Ignore updates to non-existent keys and log them. print(f"Warning: Ignored 'update' for non-existent key: '{key}'") else: - # 'add' や 'upsert' は無条件で保存(新規作成または上書き) + # 'add' saves unconditionally active_docs[key] = { "text": ev['text'], "resdb_id": ev['id'], diff --git a/hnsw_orm_test/leann_resdb_tiny/id_mapping.json b/hnsw_orm_test/leann_resdb_tiny/id_mapping.json index b33cfd9cc..ecddce6fa 100644 --- a/hnsw_orm_test/leann_resdb_tiny/id_mapping.json +++ b/hnsw_orm_test/leann_resdb_tiny/id_mapping.json @@ -108,5 +108,20 @@ "resdb_id": "e51bae79bd0517816da26e15d3dbbc47baf07884677f8117e7b5fbbc3f8c1af7", "original_key": "batch_6", "preview": "Data R is update" + }, + { + "resdb_id": "3f1c62a3b5a1d350445620bc9c38db3fcaa62347be29c8a47139a0517dc00f4c", + "original_key": "batch_10", + "preview": "Data is updated" + }, + { + "resdb_id": "62bb73a900b94091461a32b4db067980b5a6de1a410d94e24c72388e6d524c7d", + "original_key": "batch_11", + "preview": "Data11 is added" + }, + { + "resdb_id": "eac34c42601540521f06de3192fff087935b387631de45161e2babcc8416290a", + "original_key": "batch_12", + "preview": "Data12 is updated" } ] \ No newline at end of file diff --git a/hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt b/hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt index a470ceda5..aa4e2ba89 100644 --- a/hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt +++ b/hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt @@ -20,3 +20,6 @@ 19 20 21 +22 +23 +24 diff --git a/hnsw_orm_test/leann_resdb_tiny/resdb.index b/hnsw_orm_test/leann_resdb_tiny/resdb.index index ce23b6d83408c01eb138266e21230fd8f3b33c09..76bec36552f4ea86d371c8e80d9c1622361a83c2 100644 GIT binary patch literal 3294 zcmcK6yK0nC6b0auc#HRVk0KI8B#20`vWh8Vqn3825{qCbSmqHd?X5?U^#D~ zrJbH{vKI_P#ECN=IIMsF&RJ{kwfFhsldY$(+L6|-x2)xQ*vAFY@%-Bezd!D<+uBDs zZ0%X$uMP1+`vM1x?d8Ua{*blVAGZ76?fX;TgT9A-N0`um-w(UU_*$tTxL$E^tK#5x z#ld$K2j5p5{9JKxP;u~E#lasH2bV_nw-#KjIQXdI;PZ-uuPYAjRvi3Laqw%!!M%!u z!-|7HD-JF%?iUbTt2nq>aqvaOH>&PHy>3VAMFw!G;!}lZ3eOcT7hWv9TzIu`r7(Io zE522@T6nwgPT}3cwZeOa_X{5st`}y`qPN%I%Z*)c-tO$AZgqNpvTwh=(zi<^-+n3E zg}$9pKQB)At#&?~?fZP+*ZNi`FZgh&Z}lJR8#fL7;fzjyHPWK-ul#bwCs$hXk5+%2 zIe*+V;?unO!%HK1)8q#&-l(m{{LudElk<9_H|Z5!Jd~JL`PnqC-P{a*Y3Sr=R@QW6_*u{+-2vn?`1I zUirrxIgjU-`y}u5l6UFTwD%WhypK4_ZZ&{u;s z{{77T^?N#a@zZk7!r?3qI`q`ZBQ@vFAM}03i%VWPOJ`p5nc2Zf!y2C>`sz?qJV=kG zxnJ?$dFSXkXAXF;ehYWBk28Axaa7ABf4J#{ zKYt&?kDG=?1ApqLhm)2*^Gp7CaVGDB`inPb@i;Tj{>GO{4RuxmL-(h{p$xXPqxfdsytg{a@Yp=Ct&gJ>D7jN2y*6uXNLLN?Wit4g|>+twV z!G3F>z@ye)2LEnSPqlC0;dI;HoT&%L(|XvcxnJ{h&BK~UHEm3z|1#H$xIdOsh>uf? zxM7G}r)3l0rWQY>77tR3UsH=eQj5P+i+@s!C#l87?nqh_*HVicsl`{R#n-9D_o>B? zsl_j;#qX)bU#ag#r!bv|i$O2ZmDJaQbHVwb_wQ!v+d(hNz0~)E%fXf4gW$v9YH%(1 zDCl{6Kd$Ee&q6d{Ywz9euKsw(noBj&&iz3fez@q@Yu>22SQ9Vo$2M&TFe6Po+Hj>ud!FX0SG2%C&RyzjG{f^cBXyCJeaMJDc%aXyX9O4L!CCe1 zC_M0!v-j2e?R}*m*`R?zR(_jsqRI2|n_@i!eQKw#*$?~P@kLKGv6nsYdw1yv*Qnmv zhkwrnKDa$EdW^%#et5}`{dk5Kt?EW6eaVGEre}GV&;4+DK9hLYHyY84CiIaXng{c! zS3`B0rGU6SaM+}7DJ$NlX!D&4n zdd=K;UFT+)QD%H(Of%*gE=-|+MJI*Tbs%EA5I0_m8@uAh8*yV_+;}f;d=fVn;>H(o zv_)d91kArxfqSy zA9eRK+8N~1AGP!63VNYWeh75i8T3m&IeaINg$^%#(BXyF8gg9&4QJ$&ix-}(Y8~gh z2iMp{Lk((fsv(cs?u-6?$J{RZqb|P8fHUq!-_+rZ`>^O6KRoP-j_>%h_pH~=pgK1^ ztzR_*^3eGXXsGQTsn4p;+WBb7CC9T^_CEN#AM|L~-N)YK`9Ap28y?I4gTL1K0cm9+ AEdT%j diff --git a/leann_resdb_tiny/resdb.leann.meta.json b/leann_resdb_tiny/resdb.leann.meta.json deleted file mode 100644 index e0db38a7d..000000000 --- a/leann_resdb_tiny/resdb.leann.meta.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "version": "1.0", - "backend_name": "hnsw", - "embedding_model": "facebook/contriever", - "dimensions": 768, - "backend_kwargs": { - "model": "prajjwal1/bert-tiny" - }, - "embedding_mode": "sentence-transformers", - "passage_sources": [ - { - "type": "jsonl", - "path": "resdb.leann.passages.jsonl", - "index_path": "resdb.leann.passages.idx", - "path_relative": "resdb.leann.passages.jsonl", - "index_path_relative": "resdb.leann.passages.idx" - } - ], - "is_compact": true, - "is_pruned": true -} \ No newline at end of file diff --git a/leann_resdb_tiny/resdb.leann.passages.idx b/leann_resdb_tiny/resdb.leann.passages.idx deleted file mode 100644 index a2e43c550c379bcdbb74e01100c7d0d93c7fd899..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 115 zcmZo*nVP`>0ku;!dKe9+cr)}c8cy+!>tQsS;(ert(Rhll24fGS$rRr?K+1HAFF#Wc zquCVSWFTce#rHapvY6tl%-q9h2~@(|!(?bM#g~r-NE=S^%?Hs&Q+!{5XrM_BtfhJY DJ4+y` diff --git a/leann_resdb_tiny/resdb.leann.passages.jsonl b/leann_resdb_tiny/resdb.leann.passages.jsonl deleted file mode 100644 index 85cec1ce8..000000000 --- a/leann_resdb_tiny/resdb.leann.passages.jsonl +++ /dev/null @@ -1,14 +0,0 @@ -{"id": "0", "text": "How to bake a perfect sourdough bread with a starters.", "metadata": {}} -{"id": "1", "text": "The final report must be written in LaTeX using ACM templates.", "metadata": {}} -{"id": "2", "text": "DeFi applications are often built on top of smart contracts.", "metadata": {}} -{"id": "3", "text": "ResilientDB is a high-throughput blockchain fabric designed for performance.", "metadata": {}} -{"id": "4", "text": "This project explores novel techniques for sharding in distributed ledgers.", "metadata": {}} -{"id": "5", "text": "Mitochondria are the powerhouse of the cell.", "metadata": {}} -{"id": "6", "text": "The project requires using the ResilientDB Fabric unless approved otherwise.", "metadata": {}} -{"id": "7", "text": "The weather in Davis is sunny today.", "metadata": {}} -{"id": "8", "text": "Cross-chain communication enables interoperability between different blockchains.", "metadata": {}} -{"id": "9", "text": "Tung Tung Tung Sahur called—they need their banana-crocodile hybrid back", "metadata": {}} -{"id": "10", "text": "Understanding consensus protocols is key for blockchain.", "metadata": {}} -{"id": "11", "text": "Practical Byzantine Fault Tolerance (PBFT) is a foundational agreement protocol.", "metadata": {}} -{"id": "12", "text": "UC Davis is known for its agricultural studies.", "metadata": {}} -{"id": "13", "text": "LEANN saves 97% storage compared to traditional vector databases.", "metadata": {}} From 7761faf53de1d73f0e5f66f270f2fdf19564af5b Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Fri, 21 Nov 2025 17:06:35 -0800 Subject: [PATCH 30/79] Enhance README with detailed service documentation Expanded README.md to provide detailed explanations of indexer.py and manage_data.py functionalities, including usage, prerequisites, and processing flow. --- hnsw_orm_test/README.md | 152 ++++++++++++++++++++++++++++------------ 1 file changed, 106 insertions(+), 46 deletions(-) diff --git a/hnsw_orm_test/README.md b/hnsw_orm_test/README.md index ae45082ab..7979e522e 100644 --- a/hnsw_orm_test/README.md +++ b/hnsw_orm_test/README.md @@ -1,63 +1,123 @@ -# ResilientDB x LEANN Vector Search Integration (This does not reflect latest version. I will update it by Sunday.) -Add something... +# ResilientDB x LEANN Vector Search Integration +This document explains the internal logic and specifications of indexer.py (a resident index construction service) and manage_data.py (a CLI tool for data manipulation), which are core components for keeping data on ResilientDB in a vector-searchable state. -## File Structure -- config.py: Common configuration (paths, model selection). -- indexer.py: A background service that watches ResilientDB transactions and builds the vector index. -- populate.py: A script to insert sample data into ResilientDB. -- search.py: A client CLI tool to perform semantic search using the generated index. +## 1. Indexer Service (indexer.py) +indexer.py monitors the blockchain (ResilientDB) as the "Single Source of Truth" and acts as a resident process to automatically synchronize the local vector search index (HNSW). -## Prerequisites -- ResilientDB KV Service must be running. Ensure you can access your configured port. -- Python 3.10 -- ResDB-ORM Virtual Environment +### 1.1 Overview and Responsibilities +Polling Monitoring: Periodically fetches all transactions from the database and detects changes. -## Installation -Once the ResDB-ORM virtual environment is activated, ensure dependencies are installed: +State Restoration (Log Replay): Replays the append-only transaction log in chronological order to construct the current state of each key in memory. -```Bash -(venv) pip install resdb-orm leann -``` +Vectorization and Index Construction: Vectorizes the latest text data using an Embedding Model, constructs an HNSW graph structure, and saves it to a file. -## Usage Guide -To see the system in action, you need to run the scripts in a specific order using two separate terminal windows. +### 1.2 Main Classes and Functions +SafeResDBORM(ResDBORM) +A wrapper class inheriting from the ResDBORM class of the resdb_orm library, designed to improve network communication stability. -***REMINDER: Activate the venv in BOTH terminals!*** +read_all(self): Fetches all data from the /v1/transactions endpoint. It includes timeout settings and exception handling to prevent the process from crashing even if ResilientDB becomes temporarily unresponsive. -### Terminal 1: Start the Indexer -This process needs to run continuously to monitor the blockchain and update the index. +main(): The main service loop, which repeats the following steps every POLL_INTERVAL (configuration value, default is 15 seconds). -```Bash -# 1. Activate venv -source ~/ResDB-ORM/venv/bin/activate +### 1.3 Processing Flow Details +The index update process is executed according to the following logic: -# 2. Run Indexer -(venv) python indexer.py -``` -***Keep this terminal open!*** +Change Detection: + +Compares the number of transactions fetched in the previous loop (last_tx_count) with the number fetched this time. + +The index reconstruction process starts only if current_count > last_tx_count. + +Event Extraction and Normalization: + +Extracts necessary fields (original_key, operation, text, timestamp) from the fetched raw transaction data. + +JSON parse errors or data in invalid formats are skipped. + +Chronological Sorting: + +Since the arrival order of transactions is not guaranteed in distributed systems, events are sorted in ascending order based on the timestamp within the payload. + +Log Replay (State Application): + +Applies sorted events sequentially from the beginning to update the active_docs dictionary. + +Add / Upsert: Registers the key and text in the dictionary (overwrites if it already exists). + +Update: Updates the content only if the key exists in the dictionary. Update events for non-existent keys are ignored (to prevent inconsistency). + +Delete: Removes the entry if the key exists in the dictionary. + +Index Construction via LeANN: + +Creates an index using the LeANN library for the valid documents remaining in active_docs. + +Saved Files: + +resdb.leann: The vector index body. -### Terminal 2: Insert Data & Search -#### Step 1: Populate Data -Run this script to write sample documents into ResilientDB. +id_mapping.json: Metadata linking search result IDs to the original keys (original_key) and text previews. + +## 2. Data Manager (manage_data.py) +manage_data.py is an interface that allows users to insert and manipulate data in ResilientDB from the command line. It is not just a simple HTTP client; it possesses pre-check functions to maintain data integrity. + +### 2.1 Overview +CRUD Operations: Supports adding (add), updating (update), and deleting (delete) data. + +Soft Validation: Includes a feature to check if the target key exists in the database before performing modification operations (update/delete) and issues a warning if it does not. + +### 2.2 Command Line Usage ```Bash -# 1. Activate venv -source ~/ResDB-ORM/venv/bin/activate -# 2. Run Populate -(venv) python populate.py +# Add new data +python3 manage_data.py add + +# Update existing data +python3 manage_data.py update + +# Delete data +python3 manage_data.py delete ``` -#### Step 2: Perform Search -Once the indexer confirms the update, you can search the data. -```Bash -(venv) python search.py +### 2.3 Internal Logic and Validation Features +SafeResDBORM.read_all() (Retry Logic) +Similar to the class in indexer.py, but this one adds logic to retry up to 3 times (max_retries = 3) in case of network errors. + +get_active_keys(db) +Fetches all transactions currently in the database and uses the same Log Replay logic as indexer.py to generate a "list of currently valid keys". + +add_event(key, text, op_type) +The core function for transaction generation. + +Integrity Check (Soft Validation): + +If the operation type is update or delete, it calls get_active_keys() to confirm whether the target key exists. + +Warning: If the key is not found, it displays a warning: [WARNING] Key '...' was not found. + +Design Intent: Due to the nature of blockchains, there is a lag between writing and reflection (Eventual Consistency). Therefore, the design does not stop on error but warns the user and proceeds with sending the transaction. + +Payload Creation: + +Creates a JSON object with the following structure: + +``` +payload = { + "original_key": key, + "text": text, + "timestamp": time.time(), # Current time for order guarantee + "operation": op_type, # "add", "update", "delete" + "type": "vector_source" +} ``` -## Configuration Notes -- Model: By default, this project uses prajjwal1/bert-tiny (128 dim) to ensure low memory usage and stability. You can change this in config.py. -- Determinism: The indexer forces OMP_NUM_THREADS=1 to guarantee that all replicas build the same HNSW graph structure. +Transaction Submission: Sends data to ResilientDB via the ORM and displays a part of the transaction ID upon success. + +### 3. Summary: Relationship Between the Two Scripts +These two scripts have a relationship close to the Command Query Responsibility Segregation (CQRS) pattern. + +Write Side (manage_data.py): Handles data writing (Commands). Instead of directly modifying the database state, it appends operation logs (events). + +Read Side (indexer.py): Handles data reading (Queries). It aggregates and processes the written event logs to generate a "Read Model (Vector Index)" optimized for search. -## Troubleshooting -- ModuleNotFoundError: If you see this error, you likely forgot to activate the virtual environment. Run source ```~/ResDB-ORM/venv/bin/activate```. -- process Killed / OOM: If your process gets killed, ensure you are not running indexer.py and search.py simultaneously if your WSL memory is limited (< 8GB). Stop the indexer (Ctrl+C) before running the search. -- Connection Error: Ensure ResilientDB is running (```./start_kv_service.sh```). +This architecture realizes high-speed vector search functionality while leveraging the append-only ledger characteristics of ResilientDB. From 05458e11621a9e8eea79a0e117327071ebf71469 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Fri, 21 Nov 2025 17:07:47 -0800 Subject: [PATCH 31/79] Fix formatting in README.md for data manager section --- hnsw_orm_test/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hnsw_orm_test/README.md b/hnsw_orm_test/README.md index 7979e522e..00b9e38cc 100644 --- a/hnsw_orm_test/README.md +++ b/hnsw_orm_test/README.md @@ -62,7 +62,7 @@ id_mapping.json: Metadata linking search result IDs to the original keys (origin manage_data.py is an interface that allows users to insert and manipulate data in ResilientDB from the command line. It is not just a simple HTTP client; it possesses pre-check functions to maintain data integrity. ### 2.1 Overview -CRUD Operations: Supports adding (add), updating (update), and deleting (delete) data. +Operations: Supports adding (add), updating (update), and deleting (delete) data. Soft Validation: Includes a feature to check if the target key exists in the database before performing modification operations (update/delete) and issues a warning if it does not. From a6852fd5e3bdbfb8c156964f13bb2b005278759b Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Sat, 22 Nov 2025 10:17:10 +0900 Subject: [PATCH 32/79] Remove obsolete id_mapping.json, resdb.ids.txt, and related files to streamline the project structure. --- .../leann_resdb_tiny/id_mapping.json | 127 ------------------ hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt | 25 ---- hnsw_orm_test/leann_resdb_tiny/resdb.index | Bin 3294 -> 0 bytes .../leann_resdb_tiny/resdb.leann.meta.json | 21 --- .../leann_resdb_tiny/resdb.leann.passages.idx | Bin 203 -> 0 bytes .../resdb.leann.passages.jsonl | 25 ---- 6 files changed, 198 deletions(-) delete mode 100644 hnsw_orm_test/leann_resdb_tiny/id_mapping.json delete mode 100644 hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt delete mode 100644 hnsw_orm_test/leann_resdb_tiny/resdb.index delete mode 100644 hnsw_orm_test/leann_resdb_tiny/resdb.leann.meta.json delete mode 100644 hnsw_orm_test/leann_resdb_tiny/resdb.leann.passages.idx delete mode 100644 hnsw_orm_test/leann_resdb_tiny/resdb.leann.passages.jsonl diff --git a/hnsw_orm_test/leann_resdb_tiny/id_mapping.json b/hnsw_orm_test/leann_resdb_tiny/id_mapping.json deleted file mode 100644 index ecddce6fa..000000000 --- a/hnsw_orm_test/leann_resdb_tiny/id_mapping.json +++ /dev/null @@ -1,127 +0,0 @@ -[ - { - "resdb_id": "e52da17d162a6c162df93845a429a28e4dfb241d1bf7a474d0244f8d0198da91", - "original_key": "doc9", - "preview": "Cross-chain communication enables interoperability between d" - }, - { - "resdb_id": "8c480746ca1f45926fd740bb1672bbe889f8a11892d5250521099dbbf91293d4", - "original_key": "doc5", - "preview": "ResilientDB is a high-throughput blockchain fabric designed " - }, - { - "resdb_id": "c3591bd07094ac294450e8c472c905d65d91d5ff12af88cfafd439169d0abf26", - "original_key": "fast_key", - "preview": "State 2" - }, - { - "resdb_id": "c4d25ae9df38f234e3cfbd28f3123b04645d0ad1af7776fa636618d2fe38d135", - "original_key": "doc8", - "preview": "Practical Byzantine Fault Tolerance (PBFT) is a foundational" - }, - { - "resdb_id": "1867c657eaad65c77bd4f9e324bad24ec3bca251345b89d9d025e60df5b1965b", - "original_key": "doc4", - "preview": "Understanding consensus protocols is key for blockchain." - }, - { - "resdb_id": "7f0eb841c4f4500a1105bfcc2f9c73e75b410801242da8421847a0ee7ef781fc", - "original_key": "doc3", - "preview": "The weather in Davis is sunny today." - }, - { - "resdb_id": "b48071250116b903859d1a015a3f884e1fffdd20198b2ce08c712f67dafb0b77", - "original_key": "doc12", - "preview": "How to bake a perfect sourdough bread with a starters." - }, - { - "resdb_id": "ceb95d2f2fe5cc61867dad1cbb4b27277f4678a50450422e269573516f51cd5e", - "original_key": "doc10", - "preview": "The project requires using the ResilientDB Fabric unless app" - }, - { - "resdb_id": "58884963eade41926c7de25c960ad5d6be7e174208c88a293e6c7eb5b192d2d3", - "original_key": "doc2", - "preview": "Tung Tung Tung Sahur called\u2014they need their banana-crocodile" - }, - { - "resdb_id": "7c8bed2a8533eba5f575859d59ed4f26b1124418c9dad7157fa8773211fe445a", - "original_key": "doc11", - "preview": "Mitochondria are the powerhouse of the cell." - }, - { - "resdb_id": "94261b3f300d544b5b0c2c15166b09ea611cb1be9a9e1e41635357402fbb6c38", - "original_key": "doc14", - "preview": "Update completed! Doc14 was already updated." - }, - { - "resdb_id": "6b1d447ead2694c9fd880062bd8fd4f1650a71a3295f88ff138e62e577de705d", - "original_key": "doc6", - "preview": "This project explores novel techniques for sharding in distr" - }, - { - "resdb_id": "62feb2ce2ad62c006da8eff19aa3fc87bf78bf20d3142c267205a0df65fe5cb0", - "original_key": "doc13", - "preview": "The final report must be written in LaTeX using ACM template" - }, - { - "resdb_id": "2a59666306bc441898f41ef611dd61ea62ea78ca5e56968386bea6633b3e8005", - "original_key": "news_tech", - "preview": "Microsoft acquires a new AI startup." - }, - { - "resdb_id": "e9e5e81e392dc7dbe13ca0569c7ad4bd46ec0f242561419f9b1884066b6ff428", - "original_key": "doc7", - "preview": "DeFi applications are often built on top of smart contracts." - }, - { - "resdb_id": "ca2698663ffe694834639278c53245e57824a27488f1f5b8c47818d584deb280", - "original_key": "mykey", - "preview": "Updated content for verification" - }, - { - "resdb_id": "326b07d2102ff1877350696e3e118b0428cc1dcf079df75b48e3b623483c3b47", - "original_key": "test_doc", - "preview": "Updated content" - }, - { - "resdb_id": "856d2ae623a89cf83ad2a4add22923e75090dc49f4e6200c0e9101037bfb4317", - "original_key": "perf_test_1", - "preview": "ResilientDB is fast and secure." - }, - { - "resdb_id": "f5cf0e542e17d69eef543fa61b9d7df26047e22b39d6b7d49cd1970c6c785062", - "original_key": "batch_2", - "preview": "Data B" - }, - { - "resdb_id": "27f71687bfc9df031395040b798a12c5165cae7ac5a3f5c81d6651cecb3bab30", - "original_key": "batch_3", - "preview": "Data C" - }, - { - "resdb_id": "306b6e702acaf5301d7e386f46a7a4bdbbe13be3948f6fb5b99d4297db937213", - "original_key": "batch_4", - "preview": "Data D" - }, - { - "resdb_id": "e51bae79bd0517816da26e15d3dbbc47baf07884677f8117e7b5fbbc3f8c1af7", - "original_key": "batch_6", - "preview": "Data R is update" - }, - { - "resdb_id": "3f1c62a3b5a1d350445620bc9c38db3fcaa62347be29c8a47139a0517dc00f4c", - "original_key": "batch_10", - "preview": "Data is updated" - }, - { - "resdb_id": "62bb73a900b94091461a32b4db067980b5a6de1a410d94e24c72388e6d524c7d", - "original_key": "batch_11", - "preview": "Data11 is added" - }, - { - "resdb_id": "eac34c42601540521f06de3192fff087935b387631de45161e2babcc8416290a", - "original_key": "batch_12", - "preview": "Data12 is updated" - } -] \ No newline at end of file diff --git a/hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt b/hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt deleted file mode 100644 index aa4e2ba89..000000000 --- a/hnsw_orm_test/leann_resdb_tiny/resdb.ids.txt +++ /dev/null @@ -1,25 +0,0 @@ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -10 -11 -12 -13 -14 -15 -16 -17 -18 -19 -20 -21 -22 -23 -24 diff --git a/hnsw_orm_test/leann_resdb_tiny/resdb.index b/hnsw_orm_test/leann_resdb_tiny/resdb.index deleted file mode 100644 index 76bec36552f4ea86d371c8e80d9c1622361a83c2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3294 zcmcK6yK0nC6b0auc#HRVk0KI8B#20`vWh8Vqn3825{qCbSmqHd?X5?U^#D~ zrJbH{vKI_P#ECN=IIMsF&RJ{kwfFhsldY$(+L6|-x2)xQ*vAFY@%-Bezd!D<+uBDs zZ0%X$uMP1+`vM1x?d8Ua{*blVAGZ76?fX;TgT9A-N0`um-w(UU_*$tTxL$E^tK#5x z#ld$K2j5p5{9JKxP;u~E#lasH2bV_nw-#KjIQXdI;PZ-uuPYAjRvi3Laqw%!!M%!u z!-|7HD-JF%?iUbTt2nq>aqvaOH>&PHy>3VAMFw!G;!}lZ3eOcT7hWv9TzIu`r7(Io zE522@T6nwgPT}3cwZeOa_X{5st`}y`qPN%I%Z*)c-tO$AZgqNpvTwh=(zi<^-+n3E zg}$9pKQB)At#&?~?fZP+*ZNi`FZgh&Z}lJR8#fL7;fzjyHPWK-ul#bwCs$hXk5+%2 zIe*+V;?unO!%HK1)8q#&-l(m{{LudElk<9_H|Z5!Jd~JL`PnqC-P{a*Y3Sr=R@QW6_*u{+-2vn?`1I zUirrxIgjU-`y}u5l6UFTwD%WhypK4_ZZ&{u;s z{{77T^?N#a@zZk7!r?3qI`q`ZBQ@vFAM}03i%VWPOJ`p5nc2Zf!y2C>`sz?qJV=kG zxnJ?$dFSXkXAXF;ehYWBk28Axaa7ABf4J#{ zKYt&?kDG=?1ApqLhm)2*^Gp7CaVGDB`inPb@i;Tj{>GO{4Ru)~=1nv}PN%iYoC??w}UZp3{2g5W?&W6i1BFXE_y7O^ diff --git a/hnsw_orm_test/leann_resdb_tiny/resdb.leann.passages.jsonl b/hnsw_orm_test/leann_resdb_tiny/resdb.leann.passages.jsonl deleted file mode 100644 index fbdceea7b..000000000 --- a/hnsw_orm_test/leann_resdb_tiny/resdb.leann.passages.jsonl +++ /dev/null @@ -1,25 +0,0 @@ -{"id": "0", "text": "Cross-chain communication enables interoperability between different blockchains.", "metadata": {}} -{"id": "1", "text": "ResilientDB is a high-throughput blockchain fabric designed for performance.", "metadata": {}} -{"id": "2", "text": "State 2", "metadata": {}} -{"id": "3", "text": "Practical Byzantine Fault Tolerance (PBFT) is a foundational agreement protocol.", "metadata": {}} -{"id": "4", "text": "Understanding consensus protocols is key for blockchain.", "metadata": {}} -{"id": "5", "text": "The weather in Davis is sunny today.", "metadata": {}} -{"id": "6", "text": "How to bake a perfect sourdough bread with a starters.", "metadata": {}} -{"id": "7", "text": "The project requires using the ResilientDB Fabric unless approved otherwise.", "metadata": {}} -{"id": "8", "text": "Tung Tung Tung Sahur called—they need their banana-crocodile hybrid back", "metadata": {}} -{"id": "9", "text": "Mitochondria are the powerhouse of the cell.", "metadata": {}} -{"id": "10", "text": "Update completed! Doc14 was already updated.", "metadata": {}} -{"id": "11", "text": "This project explores novel techniques for sharding in distributed ledgers.", "metadata": {}} -{"id": "12", "text": "The final report must be written in LaTeX using ACM templates.", "metadata": {}} -{"id": "13", "text": "Microsoft acquires a new AI startup.", "metadata": {}} -{"id": "14", "text": "DeFi applications are often built on top of smart contracts.", "metadata": {}} -{"id": "15", "text": "Updated content for verification", "metadata": {}} -{"id": "16", "text": "Updated content", "metadata": {}} -{"id": "17", "text": "ResilientDB is fast and secure.", "metadata": {}} -{"id": "18", "text": "Data B", "metadata": {}} -{"id": "19", "text": "Data C", "metadata": {}} -{"id": "20", "text": "Data D", "metadata": {}} -{"id": "21", "text": "Data R is update", "metadata": {}} -{"id": "22", "text": "Data is updated", "metadata": {}} -{"id": "23", "text": "Data11 is added", "metadata": {}} -{"id": "24", "text": "Data12 is updated", "metadata": {}} From 9b7e2dadd731eda3a12458bbdd95eaa3c330eef5 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Fri, 21 Nov 2025 17:31:04 -0800 Subject: [PATCH 33/79] Mark integration as under construction in README Updated the README to indicate that the integration is under construction. --- hnsw_orm_test/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hnsw_orm_test/README.md b/hnsw_orm_test/README.md index 00b9e38cc..6e9d9893e 100644 --- a/hnsw_orm_test/README.md +++ b/hnsw_orm_test/README.md @@ -1,4 +1,4 @@ -# ResilientDB x LEANN Vector Search Integration +# ResilientDB x LEANN Vector Search Integration (Under construction) This document explains the internal logic and specifications of indexer.py (a resident index construction service) and manage_data.py (a CLI tool for data manipulation), which are core components for keeping data on ResilientDB in a vector-searchable state. ## 1. Indexer Service (indexer.py) From 693f6bb57ec504f722254bb04f5a9dee753f700d Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Sun, 23 Nov 2025 19:58:24 -0800 Subject: [PATCH 34/79] Added bash file to recursively clear all carraige returns from all .sh files --- README.md | 11 ++++++++--- WSL_INSTALL.sh | 2 ++ 2 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 WSL_INSTALL.sh diff --git a/README.md b/README.md index 9faab7c8d..5ca674c15 100644 --- a/README.md +++ b/README.md @@ -46,14 +46,19 @@ Hey all, Steven here, this is the quickstart guide to getting ResDB up and runni 2. (Windows only) ResilientDB uses bash shell commands (.sh extension), which windows doesn't support natively. Fortunately, Windows 11 and most versions of Windows 10 have an easy to use subsystem for Linux, WSL. Link on how to setup [here](https://learn.microsoft.com/en-us/windows/wsl/install). After installing WSL, you can open a bash terminal by running the program `Ubuntu`. This will open from the profile of your newly created User for WSL, but you can still access to your Windows files in windows via `cd ~/../../mnt`, which should navigate you to the location of your C/D drive. -3. (Windows only?) There's a mismatch between the way Windows and Linux ends lines in files, in short, on Windows machines the shell scripts will all have an unnecessary `\r` (carriage return) character at the end of all shell files. This _will_ cause problems with execution of these files. Use the sed command (at the top-level of the cloned repo) to remove the extraneous characters: +3. (Windows only?) There's a mismatch between the way Windows and Linux ends lines in files, in short, on Windows machines the shell scripts will all have an unnecessary `\r` (carriage return) character at the end of all shell files. This _will_ cause problems with execution of these files. Use the sed command (at the top-level of the cloned repo) to remove the extraneous characters of the install file: ``` sudo sed -i 's/\r//g' INSTALL.sh -sudo sed -i 's/\r//g' ./service/tools/kv/server_tools/start_kv_service.sh ``` -4. Navigate to the project folder and run `sudo sh INSTALL.sh` +Unfortunately, this is a problem with every shell file in the repository. To fix this, we added a script that will recursively remove the CR from every file in the repo before running install: + +``` +sudo sh WSL_INSTALL.sh +``` + +4. Navigate to the project folder and run `sudo sh INSTALL.sh` (unless you've already run WSL_INSTALL) 5. To start the k/v store, run `./service/tools/kv/server_tools/start_kv_service.sh` diff --git a/WSL_INSTALL.sh b/WSL_INSTALL.sh new file mode 100644 index 000000000..fc9d1b485 --- /dev/null +++ b/WSL_INSTALL.sh @@ -0,0 +1,2 @@ +find . -type f -name '*sh' -exec sed -i 's/\r//g' {} \; +sudo sh INSTALL.sh \ No newline at end of file From e8ddf717280a81ec063e05d58f1d20ddfffd2f2e Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Mon, 24 Nov 2025 02:06:13 -0800 Subject: [PATCH 35/79] Updated the README to use the local builds of ecosystem tools / Explained how to re-run ResDB-orm after first installation --- README.md | 70 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 5ca674c15..5dec43b37 100644 --- a/README.md +++ b/README.md @@ -193,50 +193,70 @@ This took me a couple tries to get right, and mistakes with `update-alternatives To run ResDB-ORM, you must first start the backend services (**KV Service** and **GraphQL Server**) and then connect to them using **ResDB-ORM**. +### First Time Setup + +Running **ResDB-ORM** will always involve starting the **KV Service** and **GraphQL Server** as mentioned above. However, a few things must be done to create the environment first. + +Note that all commands in this section are executed from the top-level indexers-ECS265-Fall2025 directory. You will never need to `cd` into any subdirectory. + ### Step 1: Start the KV Service -Run the following script in your ResilientDB directory: +Run the following script in your top-level indexers-ECS265-Fall2025 directory: ```bash ./service/tools/kv/server_tools/start_kv_service.sh ``` +Reciving one or more `nohup: redirecting stderr to stdout` messages indicates that the service is running. Note that this may take over control of your WSL instance. Do not close out of the terminal, instead continue on a new terminal. ### Step 2: Start the GraphQL Server -Open a new terminal tab, then setup and start the GraphQL server: -(1) Clone the repository and navigate into it: +(1) Run the following script in your top-level indexers-ECS265-Fall2025 directory: ```bash -git clone https://github.com/apache/incubator-resilientdb-graphql.git -cd incubator-resilientdb-graphql +bazel build ./ecosystem/graphql/service/http_server:crow_service_main +bazel-bin/service/http_server/crow_service_main ./ecosystem/graphql/service/tools/config/interface/service.config ./ecosystem/graphql/service/http_server/server_config.config ``` -(2) Create a virtual environment: +The first command may take some time to run. + +Reciving one or more `[INFO ]` messages indicates that the service is running. Note that this may take over control of your WSL instance. Do not close out of the terminal, instead continue on a new terminal. + +(2) After running bazel-bin, you should recieve a message with the format `[INFO ] Crow/1.0 server is running at http://0.0.0.0:18000 using ~ threads`. Copy and save the URL (in this case `http://0.0.0.0:18000`) for the next step. + +### Step 3 Open ```config.yaml``` and Update the db_root_url with the GraphQL Server URL you Copied in Step 2. +Open the file `indexers-ECS265-Fall2025/ecosystem/sdk/resdb-orm/config.yaml`, it will be a small config file. Replace `` with the exact url you copied above like this: +```yaml +database: + db_root_url: +``` + +### Step 4 Create the Python Virtual Environment +GraphQL requires python packages, so we use a virtual environment to run them (though in practice, these packages could be/may already be installed globally). Note that while we create this virtual environment at the top-level, it operates excusively on ResDB-orm and *could* be placed in there instead. + +Open a new terminal tab, then setup and start the GraphQL server: +(1) Create a virtual environment: ```bash python3.10 -m venv venv ``` -(3) Build and run the service: +(2) activate the virtual environment: ```bash -bazel build service/http_server:crow_service_main -bazel-bin/service/http_server/crow_service_main service/tools/config/interface/service.config service/http_server/server_config.config +source venv/bin/activate ``` -***Important:*** Check the first line of the startup log and copy the displayed URL (e.g., ```http://0.0.0.0:18000```). You will need this for the configuration step. - -### Step 3: Clone ResDB-ORM repository and install dependencies: -Open another new terminal tab to set up the ORM and verify the operation. +(3) install the necessary packages (may take awhile): ```bash -git clone https://github.com/apache/incubator-resilientdb-ResDB-ORM.git -cd incubator-resilientdb-ResDB-ORM - -python3.10 -m venv venv -source venv/bin/activate - -pip install -r requirements.txt +pip install -r ./ecosystem/sdk/redb-orm/requirements.txt pip install resdb-orm ``` -### Step 4: Open ```config.yaml``` and update the db_root_url with the GraphQL Server URL you copied in Step 2. -```yaml -database: - db_root_url: -``` +The first install command may take some time to run. Your terminal will be free when it's done. ### Step 5: Run the test script to ensure everything is working correctly: ```bash python tests/test.py ``` + +### Step 6+: Re-Running ResdDB-orm in the future +As long as the setup is successful, you will only need to run these two commands to spin up the **KV Service** and **GraphQL Server** in the future: +```bash +./service/tools/kv/server_tools/start_kv_service.sh +bazel-bin/service/http_server/crow_service_main ./ecosystem/graphql/service/tools/config/interface/service.config ./ecosystem/graphql/service/http_server/server_config.config +``` + +Note that each of these commands will prevent input on the terminal you run them in. + +To interact with ResDB-orm, spin up the python instance running it: `source venv/bin/activate`. To leave this Python environment and return to bash, just type `deactivate`. \ No newline at end of file From 6760e531151d1b4e730465da91fdc83a84124084 Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Mon, 24 Nov 2025 02:48:47 -0800 Subject: [PATCH 36/79] Fixed code typos found after re-building resdb-orm --- README.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5dec43b37..bcbc1e1ce 100644 --- a/README.md +++ b/README.md @@ -197,7 +197,7 @@ To run ResDB-ORM, you must first start the backend services (**KV Service** and Running **ResDB-ORM** will always involve starting the **KV Service** and **GraphQL Server** as mentioned above. However, a few things must be done to create the environment first. -Note that all commands in this section are executed from the top-level indexers-ECS265-Fall2025 directory. You will never need to `cd` into any subdirectory. +Note that all commands in this section are executed from the top-level indexers-ECS265-Fall2025 directory. You will never need to `cd` into any subdirectory until the 5th step, running the test code. ### Step 1: Start the KV Service Run the following script in your top-level indexers-ECS265-Fall2025 directory: @@ -209,8 +209,8 @@ Reciving one or more `nohup: redirecting stderr to stdout` messages indicates th ### Step 2: Start the GraphQL Server (1) Run the following script in your top-level indexers-ECS265-Fall2025 directory: ```bash -bazel build ./ecosystem/graphql/service/http_server:crow_service_main -bazel-bin/service/http_server/crow_service_main ./ecosystem/graphql/service/tools/config/interface/service.config ./ecosystem/graphql/service/http_server/server_config.config +bazel build --package_path=./ecosystem/graphql //service/http_server:crow_service_main +bazel-bin/service/http_server/crow_service_main ecosystem/graphql/service/tools/config/interface/service.config ecosystem/graphql/service/http_server/server_config.config ``` The first command may take some time to run. @@ -225,10 +225,13 @@ database: db_root_url: ``` +More likely than not, it will be `http://0.0.0.0:18000` for you too, and that will match what is currently in the config file. + ### Step 4 Create the Python Virtual Environment GraphQL requires python packages, so we use a virtual environment to run them (though in practice, these packages could be/may already be installed globally). Note that while we create this virtual environment at the top-level, it operates excusively on ResDB-orm and *could* be placed in there instead. Open a new terminal tab, then setup and start the GraphQL server: + (1) Create a virtual environment: ```bash python3.10 -m venv venv @@ -239,14 +242,16 @@ source venv/bin/activate ``` (3) install the necessary packages (may take awhile): ```bash -pip install -r ./ecosystem/sdk/redb-orm/requirements.txt +pip install -r ./ecosystem/sdk/resdb-orm/requirements.txt pip install resdb-orm ``` The first install command may take some time to run. Your terminal will be free when it's done. ### Step 5: Run the test script to ensure everything is working correctly: +To run the ResDB-orm test code, you need to change into the resdb-orm directory: ```bash +cd ./ecosystem/sdk/resdb-orm python tests/test.py ``` @@ -254,7 +259,7 @@ python tests/test.py As long as the setup is successful, you will only need to run these two commands to spin up the **KV Service** and **GraphQL Server** in the future: ```bash ./service/tools/kv/server_tools/start_kv_service.sh -bazel-bin/service/http_server/crow_service_main ./ecosystem/graphql/service/tools/config/interface/service.config ./ecosystem/graphql/service/http_server/server_config.config +bazel-bin/service/http_server/crow_service_main ecosystem/graphql/service/tools/config/interface/service.config ecosystem/graphql/service/http_server/server_config.config ``` Note that each of these commands will prevent input on the terminal you run them in. From 74d018b31457f95cc33e0fe0d2a4b9cfe5dddb90 Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Tue, 25 Nov 2025 16:32:09 -0800 Subject: [PATCH 37/79] Added script to start all helper tools + ResDB-orm, minor changes to spinup instructions --- README.md | 9 ++++---- SPINUP_RESDB_ORM.sh | 54 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 5 deletions(-) create mode 100644 SPINUP_RESDB_ORM.sh diff --git a/README.md b/README.md index bcbc1e1ce..ecc103746 100644 --- a/README.md +++ b/README.md @@ -197,8 +197,6 @@ To run ResDB-ORM, you must first start the backend services (**KV Service** and Running **ResDB-ORM** will always involve starting the **KV Service** and **GraphQL Server** as mentioned above. However, a few things must be done to create the environment first. -Note that all commands in this section are executed from the top-level indexers-ECS265-Fall2025 directory. You will never need to `cd` into any subdirectory until the 5th step, running the test code. - ### Step 1: Start the KV Service Run the following script in your top-level indexers-ECS265-Fall2025 directory: ```bash @@ -207,10 +205,11 @@ Run the following script in your top-level indexers-ECS265-Fall2025 directory: Reciving one or more `nohup: redirecting stderr to stdout` messages indicates that the service is running. Note that this may take over control of your WSL instance. Do not close out of the terminal, instead continue on a new terminal. ### Step 2: Start the GraphQL Server -(1) Run the following script in your top-level indexers-ECS265-Fall2025 directory: +(1) Run the following script in your ecosystem/graphql directory: ```bash -bazel build --package_path=./ecosystem/graphql //service/http_server:crow_service_main -bazel-bin/service/http_server/crow_service_main ecosystem/graphql/service/tools/config/interface/service.config ecosystem/graphql/service/http_server/server_config.config +cd ./ecosystem/graphql +bazel build /service/http_server:crow_service_main +bazel-bin/service/http_server/crow_service_main service/tools/config/interface/service.config service/http_server/server_config.config ``` The first command may take some time to run. diff --git a/SPINUP_RESDB_ORM.sh b/SPINUP_RESDB_ORM.sh new file mode 100644 index 000000000..f9c43edd5 --- /dev/null +++ b/SPINUP_RESDB_ORM.sh @@ -0,0 +1,54 @@ +# TODO: Change this to 300 (5 minutes) once done testing +max_iterator=500 + +touch ormSpinup.log +./service/tools/kv/server_tools/start_kv_service.sh 2>> ormSpinup.log & + +iterator=0 +while ! grep "nohup: redirecting stderr to stdout|Build completed successfully" ormSpinup.log; do + sleep 1 + iterator=$((iterator + 1)) + if [ $iterator -gt $max_iterator ]; then + echo "Timed out waiting for KV service to start" + echo "Run \`./service/tools/kv/server_tools/start_kv_service.sh\` yourself to manually diagnose errors" + # TODO: Un remove this once you diagnose the problem + # rm ormSpinup.log + return 1 + fi +done + +# Just in case the service needs a minute +sleep 2 + +echo "KV service started successfully, now starting GraphQL service" +rm ormSpinup.log +touch ormSpinup.log + +cd ecosystem/graphql +bazel-bin/service/http_server/crow_service_main ./ecosystem/graphql/service/tools/config/interface/service.config ./ecosystem/graphql/service/http_server/server_config.config > ormSpinup.log 2>&1 & + +# TODO: Delete this +sleep 5 + +iterator=0 +while ! grep "[INFO ]" ormSpinup.log; do + sleep 1 + iterator=$((iterator + 1)) + if [ $iterator -gt $max_iterator ]; then + echo "Timed out waiting for GraphQL service to start" + echo "Note that the kv service is currently running" + echo "Run \`bazel-bin/service/http_server/crow_service_main ./ecosystem/graphql/service/tools/config/interface/service.config ./ecosystem/graphql/service/http_server/server_config.config > ormSpinup.log 2>&1 &\` yourself to manually diagnose errors" + rm ormSpinup.log + return 1 + fi +done + +rm ormSpinup.log +# We wait for the SECOND [INFO] message +sleep 2 + +echo "kv_service and graphql_service started successfully" +echo "It should be safe to enable your venv and use ResDB-orm now" + +# TODO: GitHub Copilot installed itself without asking, and suggested the following line. It might actually work for pkill (??) +# echo "to stop them, run: ./service/tools/kv/server_tools/stop_kv_service.sh and pkill crow_service_main" \ No newline at end of file From 9246bfd41e210d05c327f64540cdc1a1a2bb63c8 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Thu, 27 Nov 2025 06:11:15 +0900 Subject: [PATCH 38/79] Correct Bazel build command in README Fix the Bazel build command path in the README. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ecc103746..cd5dd6355 100644 --- a/README.md +++ b/README.md @@ -208,7 +208,7 @@ Reciving one or more `nohup: redirecting stderr to stdout` messages indicates th (1) Run the following script in your ecosystem/graphql directory: ```bash cd ./ecosystem/graphql -bazel build /service/http_server:crow_service_main +bazel build service/http_server:crow_service_main bazel-bin/service/http_server/crow_service_main service/tools/config/interface/service.config service/http_server/server_config.config ``` The first command may take some time to run. @@ -263,4 +263,4 @@ bazel-bin/service/http_server/crow_service_main ecosystem/graphql/service/tools/ Note that each of these commands will prevent input on the terminal you run them in. -To interact with ResDB-orm, spin up the python instance running it: `source venv/bin/activate`. To leave this Python environment and return to bash, just type `deactivate`. \ No newline at end of file +To interact with ResDB-orm, spin up the python instance running it: `source venv/bin/activate`. To leave this Python environment and return to bash, just type `deactivate`. From a92965a76af6d5f44d8548a065f2d123a51f2838 Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Sat, 29 Nov 2025 15:22:39 -0800 Subject: [PATCH 39/79] Added the ability to generate embeddings, save them to ResDB, and search across them --- README.md | 22 +- ecosystem/sdk/vector-indexing/config.yaml | 17 ++ ecosystem/sdk/vector-indexing/hnsw_library.py | 19 ++ .../sdk/vector-indexing/resdb_orm/__init__.py | 16 ++ .../sdk/vector-indexing/resdb_orm/config.yaml | 17 ++ .../sdk/vector-indexing/resdb_orm/orm.py | 94 +++++++ ecosystem/sdk/vector-indexing/vector_add.py | 248 ++++++++++++++++++ ecosystem/sdk/vector-indexing/vector_get.py | 203 ++++++++++++++ 8 files changed, 634 insertions(+), 2 deletions(-) create mode 100644 ecosystem/sdk/vector-indexing/config.yaml create mode 100644 ecosystem/sdk/vector-indexing/hnsw_library.py create mode 100644 ecosystem/sdk/vector-indexing/resdb_orm/__init__.py create mode 100644 ecosystem/sdk/vector-indexing/resdb_orm/config.yaml create mode 100644 ecosystem/sdk/vector-indexing/resdb_orm/orm.py create mode 100644 ecosystem/sdk/vector-indexing/vector_add.py create mode 100644 ecosystem/sdk/vector-indexing/vector_get.py diff --git a/README.md b/README.md index cd5dd6355..76d199406 100644 --- a/README.md +++ b/README.md @@ -32,9 +32,27 @@ 4. [How to Run ResDB-ORM](#How-to-Run-ResDB-ORM) ## Running the Indexing Project -TODO +All user-facing code for this project is located in `ecosystem/sdk/vector-indexing`. As long as the **KV Service** and **GraphQL Server** are running, executing the python code directly through the command line will work - nothing needs to be built beforehand. This does need to be run with a python instance with the ResDB-orm package installed (we reccoment using a virtual environment) -\ +The project acts as a wrapper around the KV Service - (string) values added using our tools will save them to ResilientDB *in addition* to generating vector embeddings for those values (Embeddings are also saved in resilientDB). We then offer the ability to search for the k_closest embeddings based on an input value + +### Adding a Value +From `ecosystem/sdk/vector-indexing`, run the command + +```bash +python vector_add.py --value +``` +- Flag `--value`: must be immediately followed by the value a user wishes to save. Omitting this flag will prevent the program from running. Duplicate values cannot be saved. + +### Searching Across Embeddings +This is the main functionality of our project - the ability to search for the most similar values based on their embeddings. From `ecosystem/sdk/vector-indexing`, run the command + +```bash +python vector_get.py --value --k_matches +``` +- Flag `--value`: must be immediately followed by the value a user wishes to perform a similarity search for. Either this flag, or `--show_all` must be used - omitting both will prevent the program from running. +- Flag `--k_matches`: must immediately be followed by the k-most-similar matches to `--value` that a user wishes to retrieve. If this flag is omitted, a default of 1 will be used. +- Flag `--show_all`: does not require a second arguement. It will list every value that has been added to this instance of ResDB that has a correlated vector embedding. Using this arguement will override the other two flags completely. ## ResilientDB Installation Forked from [this repository](https://github.com/apache/incubator-resilientdb), for more complex setup instructions, please head there. diff --git a/ecosystem/sdk/vector-indexing/config.yaml b/ecosystem/sdk/vector-indexing/config.yaml new file mode 100644 index 000000000..7e8e10372 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/config.yaml @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +database: + db_root_url: http://0.0.0.0:18000 diff --git a/ecosystem/sdk/vector-indexing/hnsw_library.py b/ecosystem/sdk/vector-indexing/hnsw_library.py new file mode 100644 index 000000000..b38f208e2 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/hnsw_library.py @@ -0,0 +1,19 @@ +from resdb_orm.orm import ResDBORM +from leann import LeannBuilder, LeannSearcher +from pathlib import Path +from typing import Dict, List, Any + +db = ResDBORM() + +def create_record(value: str | Dict[str, Any]) -> str: + return db.create(value) + +def get_record(key: str) -> str | Dict[str, Any] | List[Dict[str, Any]]: + return db.read(key) + +#TODO: Replace none with successful or not successful (youll have to check the return type of update) +def put_record(key: str, value: str | Dict[str, Any]) -> None: + _ = db.update(key, value) + +# - - - - - - - - - FINAL SECTION: LONG-TERM TODO s - - - - - - - - - > +# TODO: strongly type as much as you can \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/resdb_orm/__init__.py b/ecosystem/sdk/vector-indexing/resdb_orm/__init__.py new file mode 100644 index 000000000..916f3445e --- /dev/null +++ b/ecosystem/sdk/vector-indexing/resdb_orm/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .orm import ResDBORM diff --git a/ecosystem/sdk/vector-indexing/resdb_orm/config.yaml b/ecosystem/sdk/vector-indexing/resdb_orm/config.yaml new file mode 100644 index 000000000..7e8e10372 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/resdb_orm/config.yaml @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +database: + db_root_url: http://0.0.0.0:18000 diff --git a/ecosystem/sdk/vector-indexing/resdb_orm/orm.py b/ecosystem/sdk/vector-indexing/resdb_orm/orm.py new file mode 100644 index 000000000..1e7564c16 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/resdb_orm/orm.py @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import requests +import json +import secrets +import yaml + +class ResDBORM: + def __init__(self, config_path='config.yaml'): + with open(config_path, 'r') as config_file: + self.config = yaml.safe_load(config_file) + self.db_root_url = self.config['database']['db_root_url'] + + def generate_token(self, length=64): + """Generate a secure random hexadecimal token.""" + return secrets.token_hex(length // 2) + + def create(self, data): + """Create a new record in the DB.""" + token = self.generate_token() + payload = {"id":token, "data":data} + headers = {'Content-Type': 'application/json'} + response = requests.post(f'{self.db_root_url}/v1/transactions/commit', + data=json.dumps(payload), headers=headers) + + + # Check if response is successful and handle empty response content + if response.status_code == 201: + if response.content: + decoded_content = response.content.decode('utf-8') + id_value = decoded_content.split(': ')[1].strip() + return id_value + else: + return {"status": "create unsuccessful, no content in response"} + + def read_all(self): + """Read all records from the DB.""" + response = requests.get(f'{self.db_root_url}/v1/transactions') + return response.json() + + def read(self, key): + """Read a specific record by key from the DB.""" + response = requests.get(f'{self.db_root_url}/v1/transactions/{key}') + return response.json() + + def delete(self, key): + """Delete a specific record by key in the DB.""" + payload = {"id": key} + headers = {'Content-Type': 'application/json'} + response = requests.post(f'{self.db_root_url}/v1/transactions/commit', + data=json.dumps(payload), headers=headers) + + # Check if response is successful and handle empty response content + if response.status_code == 201: + if response.content: + return {"status": "delete successful"} + else: + return {"status": "delete unsuccessful, no content in response"} + + def update(self, key, new_data): + """Update a specific record by key in the DB.""" + # Delete the existing record first + delete_response = self.delete(key) + + # Handle the response accordingly + if "status" in delete_response and "no content in response" in delete_response["status"]: + print("Warning: Delete operation returned no content.") + + # Update by creating a new entry with the same key + payload = {"id": key, "data": new_data} + headers = {'Content-Type': 'application/json'} + response = requests.post(f'{self.db_root_url}/v1/transactions/commit', + data=json.dumps(payload), headers=headers) + + # Check if response is successful and handle empty response content + if response.status_code == 201: + if response.content: + return {"status": "update successful"} + else: + return {"status": "update unsuccessful, no content in response"} + diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py new file mode 100644 index 000000000..b7fd34af1 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -0,0 +1,248 @@ +""" +Filename: vector_add.py +Author(s) / Contrubtor(s): Steven Shoemaker / Regan Yang, Ritesh Patro, Yoshiki Yamaguchi, Tiching Kao +Date: 2025-Fall +Description: (Indexers project) Run to save value to ResDB and generate a vector embedding for it +""" + +import sys +# TODO: I kept this in here bc it might be a background element that ResDB-orm needs +# but not sure. Delete once everything works +import requests +# TODO: I just copied the entire directory into here, I don't get how this works +# Why do we need to import it from there if it's been globally insalled through pip? +# TODO ^ ditto for config.yaml +from resdb_orm.orm import ResDBORM +import hnsw_library +from leann import LeannBuilder, LeannSearcher +from pathlib import Path +import json +import os +from typing import Dict, Any + +WORKING_DIR = Path("./").resolve() + +db = ResDBORM() + +keylist_key = "ddc7ffb0b82115d79e7c552fbe8fc434d606475375e2b3408eb2fa11ea4deeba" + +value_to_add = '' + +# TODO: I got this from a guide for parsing system input, but what does it mean? What is the proper +# way to use it? +# NOTE: It looks like this only runs when you run it as a script and not by linking it in other files +# as sucnh, you should probably keep all code in here + +# TODO: Format these section segmenters better +# - - - - - - - - - SECTION 1: Init and data cleaning - - - - - - - - - > +if __name__ == "__main__": + # Parse the value that the user is requesting to add + for i in range (len(sys.argv)): + # TODO: Consider adding an error message for when users use `--value` with no input + # TODO: Consider if you need to parse whitespace around `--value` + # TODO: Consider cases when `--value` isnt a string + if(sys.argv[i] == '--value' and (i + 1 != len(sys.argv))): + value_to_add = sys.argv[i + 1] + break + + # TODO: Add a --k_num option + + if value_to_add == '': + print("Critical error - the program requires an arguement in the form of `--value stringToSave`") + # TODO: Make sure that this is the right exit code (this WILL close the whole program) + exit(1) + +# - - - - - - - - - SECTION 2: Retrieve HNSW data or create if it doesnt exist - - - - - - - - - > + # TODO: Add notes for when you don't have any saved data + file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") + embedding_keys: Dict[str, Any] = {} + hnsw_text_entries = [value_to_add] + + # TODO: Look into creating/storing ResDB-orm data with a custom key instead of these autogenerated ones + # It might prevent us from having to read keys from an external file + # TODO: This will fail if the `saved_data` folder does not exist, error handle by first creating the folder if it doesnt + # If the user has never saved data before, create the file for storing it + if not os.path.exists(file_embedding_keys): + embedding_keys = { + "temp_ids_txt": "", + "temp_index_txt": "", + "temp_leann_meta_json": "", + "temp_leann_passages_txt": "", + "temp_leann_passages_json": "" + } + # If the user does have some form of prior saved data, it should have a list of keys, even if invalid + else: + try: + with open(file_embedding_keys, 'r') as file: + embedding_keys = json.load(file) + # TODO: Exit out of execution or error handle here + except FileNotFoundError: + print("file not found") + except json.JSONDecodeError: + print("File isnt valid json") + + # Ensure that keys for text files are valid. If not, create them + for field in ["temp_ids_txt", "temp_index_txt", "temp_leann_passages_txt"]: + key = embedding_keys[field] + if key is not None and key != "": + # TODO: Make sure it actually is an Error when you try to get with a key that doesnt work + try: + _ = hnsw_library.get_record(key) + # TODO: Structure this so you don't have the same line of code in two different places + except: + embedding_keys[field] = hnsw_library.create_record('') + else: + embedding_keys[field] = hnsw_library.create_record('') + + # Ensure that key for this specific json file is valid. If not, create it + key = embedding_keys["temp_leann_meta_json"] + if key is not None and key != "": + # TODO: Make sure it actually is an Error when you try to get with a key that doesnt work + try: + print('Getting record') + _ = hnsw_library.get_record(key) + print('got the meta json without erorr') + # TODO: Structure this so you don't have the same line of code in two different places + except: + embedding_keys["temp_leann_meta_json"] = hnsw_library.create_record({}) + else: + embedding_keys["temp_leann_meta_json"] = hnsw_library.create_record({}) + + # Ensure that key for this specific json file is valid. If not, create it + # This file contains the information we need to rebuild the HNSW tree, gather that information + # while checking to see if it exists (or use an empty array otherwise) + key = embedding_keys["temp_leann_passages_json"] + if key is not None and key != "": + # TODO: Make sure it actually is an Error when you try to get with a key that doesnt work + try: + passages_data = hnsw_library.get_record(key) + hnsw_text_entries = list(map(lambda dataPoint: dataPoint['text'], passages_data["data"])) + # TODO: Make sure that this behaves as youd expect + if value_to_add in hnsw_text_entries: + print(f"{value_to_add} is already saved as an embedding in the ResDB database") + sys.exit(0) + hnsw_text_entries.append(value_to_add) + # TODO: Structure this so you don't have the same line of code in two different places + except: + hnsw_text_entries = [value_to_add] + embedding_keys["temp_leann_passages_json"] = hnsw_library.create_record([]) + else: + hnsw_text_entries = [value_to_add] + embedding_keys["temp_leann_passages_json"] = hnsw_library.create_record([]) + + # Open the file in write mode and dump the data + try: + with open(file_embedding_keys, 'w') as file: + json.dump(embedding_keys, file) + print("Successful write") + # TODO: Handle this better or error out + except IOError as e: + print("IO Error, unsuccessful write: {e}") + +# - - - - - - - - - SECTION 3: Construct the HNSW data structure (leann builder) - - - - - - - - - > + file_temporary_storage = str(WORKING_DIR / "saved_data/temp/temp.leann") + + # TODO: Suppress outputs from leann algorithms, they flood the console + + # TODO: This will fail if the `saved_data/temp` folder does not exist, error handle by first creating the folder + # (after error handling way above to create the saved_data folder) if it doesnt exist + # If the user has never saved data before, create the file for storing it + builder = LeannBuilder(backend_name="hnsw") + for text in hnsw_text_entries: + builder.add_text(text) + builder.build_index(file_temporary_storage) + +# - - - - - - - - - SECTION 4: Save the new embeddings - - - - - - - - - > + # Save updated indeces for the text files to ResDB + for pairing in [ + ("temp.ids.txt", "temp_ids_txt") + ]: + + fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) + key = embedding_keys[pairing[1]] + + try: + print(fileName) + # TODO: Change this from rb to just r + with open(fileName, 'r', encoding='ascii') as file: + content = file.read() + hnsw_library.put_record(key, content) + # TODO: These are critical errors that will break the whole system + except FileNotFoundError: + print(f"Error: The file was not found.") + except Exception as e: + print(f"An error occurred: {e}") + + # Save updated indeces for the text files to ResDB + for pairing in [ + ("temp.leann.passages.idx", "temp_leann_passages_txt"), + ("temp.index", "temp_index_txt") + ]: + + fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) + key = embedding_keys[pairing[1]] + + try: + print(fileName) + # TODO: Change this from rb to just r + with open(fileName, 'r', encoding='Windows-1252') as file: + content = file.read() + print(content) + hnsw_library.put_record(key, content) + # TODO: These are critical errors that will break the whole system + except FileNotFoundError: + print(f"Error: The file was not found.") + except Exception as e: + print(f"An error occurred: {e}") + + + + # Save updated indeces for the json file to ResDB + fileName = str(WORKING_DIR / "saved_data/temp/" / "temp.leann.meta.json") + key = embedding_keys["temp_leann_meta_json"] + try: + with open(fileName, 'r') as file: + # TODO: Make sure that this generates JSON content correctly as expected (as a Dict[str, Any]) + content = json.load(file) + hnsw_library.put_record(key, content) + # TODO: These are critical errors that will break the whole system + except FileNotFoundError: + print(f"Error: The file was not found.") + except Exception as e: + print(f"An error occurred: {e}") + + + + # Save updated indeces for the jsonline (`.jsonl`) files to ResDB + fileName = str(WORKING_DIR / "saved_data/temp/" / "temp.leann.passages.jsonl") + key = embedding_keys["temp_leann_passages_json"] + content = [] + try: + with open(fileName, 'r') as file: + for line in file: + content.append(json.loads(line)) + hnsw_library.put_record(key, content) + # TODO: These are critical errors that will break the whole system + except FileNotFoundError: + print(f"Error: The file was not found.") + except Exception as e: + print(f"An error occurred: {e}") + + +# - - - - - - - - - SECTION 5: Cleanup: Remove temporary files - - - - - - - - - > + # TODO: Ultimately remove (and at the start of this file, create) the whole temp directory + # Remove all temporarily created files + for temp_file_path in ["temp.ids.txt", "temp.index", "temp.leann.passages.idx", + "temp.leann.meta.json", "temp.leann.passages.jsonl"]: + fileName = str(WORKING_DIR / "saved_data/temp/" / temp_file_path) + + try: + os.remove(fileName) + # TODO: These are non-critical errors, but do this better + except FileNotFoundError: + print(f"Error: The file was not found.") + # TODO: Figure out when this happens (if sudo venv/activate is enough) and warn the user appropriately + except PermissionError: + print("Permission error when deleting files in your temp directory") + except Exception as e: + print(f"An error occurred: {e}") \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/vector_get.py b/ecosystem/sdk/vector-indexing/vector_get.py new file mode 100644 index 000000000..63c096ebd --- /dev/null +++ b/ecosystem/sdk/vector-indexing/vector_get.py @@ -0,0 +1,203 @@ +""" +Filename: vector_get.py +Author(s) / Contrubtor(s): Steven Shoemaker / Regan Yang, Ritesh Patro, Yoshiki Yamaguchi, Tiching Kao +Date: 2025-Fall +Description: (Indexers project) Run to search ResDB for the embeddings k-closest to input string +""" + +# TODO: I kept this in here bc it might be a background element that ResDB-orm needs +# but not sure. Delete once everything works if its not needed +import requests +# TODO: I just copied the entire directory into here, I don't get how this works +# Why do we need to import it from there if it's been globally insalled through pip? +# TODO ^ ditto for config.yaml +from resdb_orm.orm import ResDBORM +from pathlib import Path +from typing import Dict, Any, List +import json +import os +import hnsw_library +from leann import LeannBuilder, LeannSearcher +import sys + +WORKING_DIR = Path("./").resolve() + +db = ResDBORM() + +search_value = "" +k_matches = 0 +return_all = False + +# TODO: Format these section segmenters better +# - - - - - - - - - SECTION 1: Init and data cleaning - - - - - - - - - > +if __name__ == "__main__": + # Parse the value that the user is requesting to add + for i in range (len(sys.argv)): + # TODO: Consider adding an error message for when users use `--value` with no input + # TODO: Consider if you need to parse whitespace around `--value` + if(sys.argv[i] == '--value' and (i + 1 != len(sys.argv))): + # TODO: For this and the next one, I think you need to NOT allow either to take + # `--k_matches` or `--value` as a possible embeddable value + search_value = sys.argv[i + 1] + + if(sys.argv[i] == '--k_matches' and (i + 1 != len(sys.argv))): + # TODO: This needs better parsing, specifically error handling for non-int inputs + k_matches = int(sys.argv[i + 1]) + + if(sys.argv[i] == '--show_all'): + return_all = True + + if not return_all: + if not search_value: + print('Invalid input, please use one of the two following combinations of flags:') + print('(1) Use flag `--value STRING` to find most similar terms to STRING. In addition, use') + print(' flag `--k_matches ###` to search for the k-closest strings. Leave blank to only find one') + print('(2) Use flag `--show_all` with no arguements to list ALL values that correlate with a vector embedding') + print(' this will override all other flags used') + # TODO: Possibly use `exit` (?) instead - cause the program to halt and error out + exit(1) + elif k_matches <= 0: + print('No or invalid arguement provided for --k_matches. Defaulting to finding one single most similar value') + # NOTE: You don't need this else as long as the above if statement halts execution + +# - - - - - - - - - SECTION 2: Retrieve keys to HNSW data - - - - - - - - - > + file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") + embedding_keys: Dict[str, Any] = {} + # TODO: This is copied from the read code, but if ANY ONE these keys alone are missing the whole + # "get" program will fail. Do not allow this path not existing / a read request turning up nothing + # to happen + if not os.path.exists(file_embedding_keys): + embedding_keys = { + "temp_ids_txt": "", + "temp_index_txt": "", + "temp_leann_meta_json": "", + "temp_leann_passages_txt": "", + "temp_leann_passages_json": "" + } + else: + try: + with open(file_embedding_keys, 'r') as file: + embedding_keys = json.load(file) + # TODO: I copied this from the add file, consider the impact of these errors and handle them appropriately + except FileNotFoundError: + print("file not found") + except json.JSONDecodeError: + print("File isnt valid json") + +# - - - - - - - - - SECTION 3: Save the embedding data to temporary files - - - - - - - - - > + # TODO: The error handling for all 4 of these blocks isnt well thoguht out + # It's correct, but it was just copied from vector_add + # Most of these aren't looped due to the way they're decoded (literally the "encoding" param) + # restructuring this to not loop over these 1-item arrays will make the code a lot more readable + embedding_data = [ + ("temp.ids.txt", "temp_ids_txt") + ] + for pairing in embedding_data: + fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) + key = embedding_keys[pairing[1]] + file_content = (hnsw_library.get_record(key))["data"] + + try: + with open(fileName, 'w', encoding="ascii") as file: + file.write(file_content) + + # TODO: Consider what this error actually means and handle it correctly + except Exception as e: + print(f"An error occurred: {e}") + + + + embedding_data = [ + ("temp.leann.passages.idx", "temp_leann_passages_txt"), + ("temp.index", "temp_index_txt") + ] + for pairing in embedding_data: + fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) + key = embedding_keys[pairing[1]] + file_content = (hnsw_library.get_record(key))["data"] + + try: + with open(fileName, 'w', encoding="Windows-1252") as file: + file.write(file_content) + + # TODO: Consider what this error actually means and handle it correctly + except Exception as e: + print(f"An error occurred: {e}") + + + embedding_data = [ + ("temp.leann.meta.json", "temp_leann_meta_json") + ] + + for pairing in embedding_data: + fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) + key = embedding_keys[pairing[1]] + file_content = (hnsw_library.get_record(key))["data"] + + try: + with open(fileName, 'w') as file: + json.dump(file_content, file) + + # TODO: Consider what this error actually means and handle it correctly + except Exception as e: + print(f"An error occurred: {e}") + + + embedding_data = [ + ("temp.leann.passages.jsonl", "temp_leann_passages_json") + ] + for pairing in embedding_data: + fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) + + # Since each json object is on a new line (it's a jsonl file), we append instead of overwriting + # So, we must force the file to delete/recreate to avoid appending over old data + filePath = Path(fileName) + if filePath.is_file(): + os.remove(fileName) + key = embedding_keys[pairing[1]] + file_content: List[Dict[str, Any]] = (hnsw_library.get_record(key))["data"] + + try: + with open(fileName, 'a') as file: + for i, line in enumerate(file_content): + json.dump(line, file) + if i != (len(file_content) - 1): + file.write("\n") + + # TODO: Consider what this error actually means and handle it correctly + except Exception as e: + print(f"An error occurred: {e}") + +# - - - - - - - - - SECTION 4: Re-Construct the HNSW data structure (leann searcher) - - - - - - - - - > + # TODO: Rethink the name of this and the input variables + k_searches = sys.maxsize if return_all else k_matches + # TODO: Suppress outputs from leann algorithms, they flood the console + file_temporary_storage = str(WORKING_DIR / "saved_data/temp/temp.leann") + searcher = LeannSearcher(file_temporary_storage) + results = searcher.search(search_value, top_k=k_searches) + + # Print results to the console + for i, line in enumerate(results): + # TODO: There's probably a faster way to just merge this into the formatting instead of if/else + if return_all: + print(f"{i+1}. {line.text}") + else: + print(f"{i+1}. {line.text} // (similarity score: {line.score})") + +# - - - - - - - - - SECTION 5: Cleanup: Remove temporary files - - - - - - - - - > + # TODO: Ultimately remove (and at the start of this file, create) the whole temp directory + # Remove all temporarily created files + for temp_file_path in ["temp.ids.txt", "temp.index", "temp.leann.passages.idx", + "temp.leann.meta.json", "temp.leann.passages.jsonl"]: + fileName = str(WORKING_DIR / "saved_data/temp/" / temp_file_path) + + try: + os.remove(fileName) + # TODO: These are non-critical errors, but do this better + except FileNotFoundError: + print(f"Error: The file was not found.") + # TODO: Figure out when this happens (if sudo venv/activate is enough) and warn the user appropriately + except PermissionError: + print("Permission error when deleting files in your temp directory") + except Exception as e: + print(f"An error occurred: {e}") \ No newline at end of file From 49939774c8fc8547dad3c219d4a415b3ee81d6a3 Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Sun, 30 Nov 2025 01:40:54 -0800 Subject: [PATCH 40/79] Cleared all TODOs in vector_add --- ecosystem/sdk/vector-indexing/vector_add.py | 298 ++++++++++---------- 1 file changed, 142 insertions(+), 156 deletions(-) diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py index b7fd34af1..81c5d7e1e 100644 --- a/ecosystem/sdk/vector-indexing/vector_add.py +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -4,64 +4,48 @@ Date: 2025-Fall Description: (Indexers project) Run to save value to ResDB and generate a vector embedding for it """ - +# Typical Python imports import sys -# TODO: I kept this in here bc it might be a background element that ResDB-orm needs -# but not sure. Delete once everything works -import requests -# TODO: I just copied the entire directory into here, I don't get how this works -# Why do we need to import it from there if it's been globally insalled through pip? -# TODO ^ ditto for config.yaml -from resdb_orm.orm import ResDBORM -import hnsw_library -from leann import LeannBuilder, LeannSearcher -from pathlib import Path -import json import os +import json +from pathlib import Path from typing import Dict, Any +# ResDB & HNSW imports +from resdb_orm.orm import ResDBORM +import hnsw_library +from leann import LeannBuilder +# Global variables WORKING_DIR = Path("./").resolve() - db = ResDBORM() -keylist_key = "ddc7ffb0b82115d79e7c552fbe8fc434d606475375e2b3408eb2fa11ea4deeba" - -value_to_add = '' - -# TODO: I got this from a guide for parsing system input, but what does it mean? What is the proper -# way to use it? -# NOTE: It looks like this only runs when you run it as a script and not by linking it in other files -# as sucnh, you should probably keep all code in here - -# TODO: Format these section segmenters better # - - - - - - - - - SECTION 1: Init and data cleaning - - - - - - - - - > +# This entire file is only ever intended to run from a CLI if __name__ == "__main__": + value_to_add = '' + # Parse the value that the user is requesting to add for i in range (len(sys.argv)): - # TODO: Consider adding an error message for when users use `--value` with no input # TODO: Consider if you need to parse whitespace around `--value` - # TODO: Consider cases when `--value` isnt a string if(sys.argv[i] == '--value' and (i + 1 != len(sys.argv))): value_to_add = sys.argv[i + 1] break - - # TODO: Add a --k_num option if value_to_add == '': - print("Critical error - the program requires an arguement in the form of `--value stringToSave`") - # TODO: Make sure that this is the right exit code (this WILL close the whole program) - exit(1) + print("Critical Error - the program requires an arguement in the form of `--value stringToSave`") + sys.exit() # - - - - - - - - - SECTION 2: Retrieve HNSW data or create if it doesnt exist - - - - - - - - - > - # TODO: Add notes for when you don't have any saved data - file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") embedding_keys: Dict[str, Any] = {} hnsw_text_entries = [value_to_add] + file_saved_directory = Path(WORKING_DIR / "saved_data") + file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") + + # Create the saved_data directory if it doesn't exist + if not os.path.exists(file_saved_directory): + file_saved_directory.mkdir() - # TODO: Look into creating/storing ResDB-orm data with a custom key instead of these autogenerated ones - # It might prevent us from having to read keys from an external file - # TODO: This will fail if the `saved_data` folder does not exist, error handle by first creating the folder if it doesnt - # If the user has never saved data before, create the file for storing it + # Create the file storing embedding keys if it doesn't exist if not os.path.exists(file_embedding_keys): embedding_keys = { "temp_ids_txt": "", @@ -75,174 +59,176 @@ try: with open(file_embedding_keys, 'r') as file: embedding_keys = json.load(file) - # TODO: Exit out of execution or error handle here except FileNotFoundError: - print("file not found") - except json.JSONDecodeError: - print("File isnt valid json") + print("The file storing saved keys could not be found.") + print("This means that the requested vector to save will be the first to recieve an embedding") + print("If this is your first time adding an embedding to this database, this is the intended behavior") + except Exception as e: + print(f"Unexpected Error - {e}") + print("The program can continue running, but it will treat this values as the first generated vector embedding") + + # Embedding information is stored in ResDB. The next chunk of code ensures that a place to save this information + # exists - either by retrieving it or creating it. There are 5 total files used to store vector data + + # (1/5) Create embedding information for the json passages file, which is stored in ResDB as string array + key = embedding_keys["temp_leann_passages_json"] + try: + if (key is None or key == ""): raise KeyError() - # Ensure that keys for text files are valid. If not, create them + passages_return_item = hnsw_library.get_record(key) + passages_return_data = passages_return_item["data"] + datapointToText = lambda dataPoint: dataPoint['text'] + hnsw_value_list = list(map(datapointToText, passages_return_data)) + + # This file also contains the saved VALUES, check to make sure we aren't re-saving the same data + if value_to_add in hnsw_text_entries: + print(f"{value_to_add} is already saved with an embedding in the ResDB database") + print("Duplicate embeddings yield the same result, this value will not be saved. Terminating...") + sys.exit() + + hnsw_text_entries.append(value_to_add) + except Exception: + hnsw_text_entries = [value_to_add] + embedding_keys["temp_leann_passages_json"] = hnsw_library.create_record([]) + # (4/5) Create embedding information for text files, which are stored in ResDB as string for field in ["temp_ids_txt", "temp_index_txt", "temp_leann_passages_txt"]: key = embedding_keys[field] - if key is not None and key != "": - # TODO: Make sure it actually is an Error when you try to get with a key that doesnt work - try: - _ = hnsw_library.get_record(key) - # TODO: Structure this so you don't have the same line of code in two different places - except: - embedding_keys[field] = hnsw_library.create_record('') - else: - embedding_keys[field] = hnsw_library.create_record('') - - # Ensure that key for this specific json file is valid. If not, create it - key = embedding_keys["temp_leann_meta_json"] - if key is not None and key != "": - # TODO: Make sure it actually is an Error when you try to get with a key that doesnt work try: - print('Getting record') + # We direct this to except instead of a typical if/else to avoid rewriting the same line of code + if (key is None or key == ""): raise KeyError() _ = hnsw_library.get_record(key) - print('got the meta json without erorr') - # TODO: Structure this so you don't have the same line of code in two different places - except: - embedding_keys["temp_leann_meta_json"] = hnsw_library.create_record({}) - else: + except Exception: + embedding_keys[field] = hnsw_library.create_record('') + # (5/5) Create embedding information for the json metadata file, which is stored in ResDB as a Dict + key = embedding_keys["temp_leann_meta_json"] + try: + if (key is None or key == ""): raise KeyError() + _ = hnsw_library.get_record(key) + except Exception: embedding_keys["temp_leann_meta_json"] = hnsw_library.create_record({}) - # Ensure that key for this specific json file is valid. If not, create it - # This file contains the information we need to rebuild the HNSW tree, gather that information - # while checking to see if it exists (or use an empty array otherwise) - key = embedding_keys["temp_leann_passages_json"] - if key is not None and key != "": - # TODO: Make sure it actually is an Error when you try to get with a key that doesnt work - try: - passages_data = hnsw_library.get_record(key) - hnsw_text_entries = list(map(lambda dataPoint: dataPoint['text'], passages_data["data"])) - # TODO: Make sure that this behaves as youd expect - if value_to_add in hnsw_text_entries: - print(f"{value_to_add} is already saved as an embedding in the ResDB database") - sys.exit(0) - hnsw_text_entries.append(value_to_add) - # TODO: Structure this so you don't have the same line of code in two different places - except: - hnsw_text_entries = [value_to_add] - embedding_keys["temp_leann_passages_json"] = hnsw_library.create_record([]) - else: - hnsw_text_entries = [value_to_add] - embedding_keys["temp_leann_passages_json"] = hnsw_library.create_record([]) - - # Open the file in write mode and dump the data + # Save the embedding keys to a local file try: with open(file_embedding_keys, 'w') as file: json.dump(embedding_keys, file) - print("Successful write") - # TODO: Handle this better or error out - except IOError as e: - print("IO Error, unsuccessful write: {e}") + except Exception as e: + print("Unsuccessful write: {e}") + print("Critical Error - the above error prevents the program from saving locally the keys necessary to track embedding data") + print('This prevents the program from using these embeddings in the future. Consequently, terminating...') + sys.exit() # - - - - - - - - - SECTION 3: Construct the HNSW data structure (leann builder) - - - - - - - - - > + file_temporary_directory = Path(WORKING_DIR / "saved_data/temp") file_temporary_storage = str(WORKING_DIR / "saved_data/temp/temp.leann") - # TODO: Suppress outputs from leann algorithms, they flood the console + # Leann is extremely noisy, prevent standard output to the console while it runs + sys.stdout = os.devnull + + # Create the temp directory if it doesn't exist + if not os.path.exists(file_temporary_directory): + file_temporary_directory.mkdir() - # TODO: This will fail if the `saved_data/temp` folder does not exist, error handle by first creating the folder - # (after error handling way above to create the saved_data folder) if it doesnt exist - # If the user has never saved data before, create the file for storing it + # Construct the HNSW Tree (creates the 5 files referenced below, saved to a temporary folder) builder = LeannBuilder(backend_name="hnsw") for text in hnsw_text_entries: builder.add_text(text) builder.build_index(file_temporary_storage) -# - - - - - - - - - SECTION 4: Save the new embeddings - - - - - - - - - > - # Save updated indeces for the text files to ResDB - for pairing in [ - ("temp.ids.txt", "temp_ids_txt") - ]: - - fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) - key = embedding_keys[pairing[1]] + # Restore standard output to the console + sys.stdout = sys.__stderr__ - try: - print(fileName) - # TODO: Change this from rb to just r - with open(fileName, 'r', encoding='ascii') as file: - content = file.read() - hnsw_library.put_record(key, content) - # TODO: These are critical errors that will break the whole system - except FileNotFoundError: - print(f"Error: The file was not found.") - except Exception as e: - print(f"An error occurred: {e}") +# - - - - - - - - - SECTION 4: Save the new embeddings - - - - - - - - - > + # Embedding information using this library is split across 5 files. The next chunk of code saves each of + # these files as a kv store value in ResDB, storing text data as a string, and JSON data as a Dict or Dict[] - # Save updated indeces for the text files to ResDB + # (2/5) Create embedding information for the txt passages file, which are Win-1252 byte data for pairing in [ ("temp.leann.passages.idx", "temp_leann_passages_txt"), ("temp.index", "temp_index_txt") ]: - fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) key = embedding_keys[pairing[1]] - try: - print(fileName) - # TODO: Change this from rb to just r with open(fileName, 'r', encoding='Windows-1252') as file: content = file.read() - print(content) - hnsw_library.put_record(key, content) - # TODO: These are critical errors that will break the whole system - except FileNotFoundError: - print(f"Error: The file was not found.") + _ = hnsw_library.put_record(key, content) except Exception as e: - print(f"An error occurred: {e}") - - - - # Save updated indeces for the json file to ResDB - fileName = str(WORKING_DIR / "saved_data/temp/" / "temp.leann.meta.json") + print("Unsuccessful save: {e}") + print("Critical Error - the above error completely prevents this embedding from saving to ResDB") + print("this likely has ruined the entire embedding system. Please try to add your value again. If you face") + print("the same error, delete all your saved data by deleting `vector-indexing/saved_data` and start fresh.") + print("Terminating...") + sys.exit() + # (3/5) Create embedding information for the index passages file, which is ascii text data + fileName = str(WORKING_DIR / "saved_data/temp/temp.ids.txt") + key = embedding_keys["temp_ids_txt"] + try: + print(fileName) + with open(fileName, 'r', encoding='ascii') as file: + content = file.read() + _ = hnsw_library.put_record(key, content) + except Exception as e: + print("Unsuccessful save: {e}") + print("Critical Error - the above error completely prevents this embedding from saving to ResDB") + print("this likely has ruined the entire embedding system. Please try to add your value again. If you face") + print("the same error, delete all your saved data by deleting `vector-indexing/saved_data` and start fresh.") + print("Terminating...") + sys.exit() + # (4/5) Create embedding information for the metadata file, which is a single json object + fileName = str(WORKING_DIR / "saved_data/temp/temp.leann.meta.json") key = embedding_keys["temp_leann_meta_json"] try: with open(fileName, 'r') as file: - # TODO: Make sure that this generates JSON content correctly as expected (as a Dict[str, Any]) content = json.load(file) - hnsw_library.put_record(key, content) - # TODO: These are critical errors that will break the whole system - except FileNotFoundError: - print(f"Error: The file was not found.") + _ =hnsw_library.put_record(key, content) except Exception as e: - print(f"An error occurred: {e}") - - - - # Save updated indeces for the jsonline (`.jsonl`) files to ResDB - fileName = str(WORKING_DIR / "saved_data/temp/" / "temp.leann.passages.jsonl") + print("Unsuccessful save: {e}") + print("Critical Error - the above error completely prevents this embedding from saving to ResDB") + print("this likely has ruined the entire embedding system. Please try to add your value again. If you face") + print("the same error, delete all your saved data by deleting `vector-indexing/saved_data` and start fresh.") + print("Terminating...") + sys.exit() + # (5/5) Create embedding information for the passages file, which is a jsonLine file + # consisting of a single json object on each line + fileName = str(WORKING_DIR / "saved_data/temp/temp.leann.passages.jsonl") key = embedding_keys["temp_leann_passages_json"] content = [] try: with open(fileName, 'r') as file: + # We load each json object line-by-line, saving each as an entry in an array for line in file: content.append(json.loads(line)) - hnsw_library.put_record(key, content) - # TODO: These are critical errors that will break the whole system - except FileNotFoundError: - print(f"Error: The file was not found.") + _ = hnsw_library.put_record(key, content) except Exception as e: - print(f"An error occurred: {e}") - + print("Unsuccessful save: {e}") + print("Critical Error - the above error completely prevents this embedding from saving to ResDB") + print("this likely has ruined the entire embedding system. Please try to add your value again. If you face") + print("the same error, delete all your saved data by deleting `vector-indexing/saved_data` and start fresh.") + print("Terminating...") + sys.exit() # - - - - - - - - - SECTION 5: Cleanup: Remove temporary files - - - - - - - - - > - # TODO: Ultimately remove (and at the start of this file, create) the whole temp directory - # Remove all temporarily created files - for temp_file_path in ["temp.ids.txt", "temp.index", "temp.leann.passages.idx", + # Remove all temporary files created during HNSW Tree creation + for file_temp_embedding in ["temp.ids.txt", "temp.index", "temp.leann.passages.idx", "temp.leann.meta.json", "temp.leann.passages.jsonl"]: - fileName = str(WORKING_DIR / "saved_data/temp/" / temp_file_path) - + fileName = str(WORKING_DIR / "saved_data/temp/" / file_temp_embedding) try: os.remove(fileName) - # TODO: These are non-critical errors, but do this better - except FileNotFoundError: - print(f"Error: The file was not found.") - # TODO: Figure out when this happens (if sudo venv/activate is enough) and warn the user appropriately - except PermissionError: - print("Permission error when deleting files in your temp directory") except Exception as e: - print(f"An error occurred: {e}") \ No newline at end of file + print(f"Error - A problem occurred while deleting temporary data: {e}") + print("This is non-critical. It is reccomended you delete the folder `vector-indexing/saved_data/temp` to save space") + + # Remove the whole temp directory + if os.path.exists(file_temporary_directory): + file_temporary_directory.rmdir() + + + + +# = = = = = = = = = EXTRA SECTION: Future TODOs = = = = = = = = = > +# > The whole resdb_orm and config.yaml file had to be copied into the vector-indexing directory +# See if there is a way to run this without them. One had to be installed as a package, it's +# weird that it had to be duped in here to work +# > Look into the possibility of saving a value with custom keys in resdb-orm, instead of using +# the random/autogenerated ones. This could ultimately lead to not needing to use a +# saved_data/embedding_keys.json file at all \ No newline at end of file From 40b48a5ca3d82037b5e6727dc9a31d6dd79aef9f Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Sun, 30 Nov 2025 02:13:34 -0800 Subject: [PATCH 41/79] Progress on TODOs in vector_get, fixed issue in vector_Add that would erroneously prevent values from being saved --- ecosystem/sdk/vector-indexing/vector_add.py | 22 ++--- ecosystem/sdk/vector-indexing/vector_get.py | 94 ++++++++++----------- 2 files changed, 55 insertions(+), 61 deletions(-) diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py index 81c5d7e1e..25e5fba2b 100644 --- a/ecosystem/sdk/vector-indexing/vector_add.py +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -15,13 +15,14 @@ import hnsw_library from leann import LeannBuilder -# Global variables +# Global Variables WORKING_DIR = Path("./").resolve() db = ResDBORM() # - - - - - - - - - SECTION 1: Init and data cleaning - - - - - - - - - > # This entire file is only ever intended to run from a CLI if __name__ == "__main__": + # Input Variable value_to_add = '' # Parse the value that the user is requesting to add @@ -37,7 +38,7 @@ # - - - - - - - - - SECTION 2: Retrieve HNSW data or create if it doesnt exist - - - - - - - - - > embedding_keys: Dict[str, Any] = {} - hnsw_text_entries = [value_to_add] + hnsw_text_entries = [] file_saved_directory = Path(WORKING_DIR / "saved_data") file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") @@ -78,7 +79,7 @@ passages_return_item = hnsw_library.get_record(key) passages_return_data = passages_return_item["data"] datapointToText = lambda dataPoint: dataPoint['text'] - hnsw_value_list = list(map(datapointToText, passages_return_data)) + hnsw_text_entries = list(map(datapointToText, passages_return_data)) # This file also contains the saved VALUES, check to make sure we aren't re-saving the same data if value_to_add in hnsw_text_entries: @@ -112,7 +113,7 @@ with open(file_embedding_keys, 'w') as file: json.dump(embedding_keys, file) except Exception as e: - print("Unsuccessful write: {e}") + print(f"Unsuccessful write: {e}") print("Critical Error - the above error prevents the program from saving locally the keys necessary to track embedding data") print('This prevents the program from using these embeddings in the future. Consequently, terminating...') sys.exit() @@ -122,7 +123,7 @@ file_temporary_storage = str(WORKING_DIR / "saved_data/temp/temp.leann") # Leann is extremely noisy, prevent standard output to the console while it runs - sys.stdout = os.devnull + # sys.stdout = os.devnull # TODO # Create the temp directory if it doesn't exist if not os.path.exists(file_temporary_directory): @@ -135,7 +136,7 @@ builder.build_index(file_temporary_storage) # Restore standard output to the console - sys.stdout = sys.__stderr__ + # sys.stdout = sys.__stdout__ # TODO # - - - - - - - - - SECTION 4: Save the new embeddings - - - - - - - - - > # Embedding information using this library is split across 5 files. The next chunk of code saves each of @@ -153,7 +154,8 @@ content = file.read() _ = hnsw_library.put_record(key, content) except Exception as e: - print("Unsuccessful save: {e}") + print(pairing) + print(f"Unsuccessful save: {e}") print("Critical Error - the above error completely prevents this embedding from saving to ResDB") print("this likely has ruined the entire embedding system. Please try to add your value again. If you face") print("the same error, delete all your saved data by deleting `vector-indexing/saved_data` and start fresh.") @@ -168,7 +170,7 @@ content = file.read() _ = hnsw_library.put_record(key, content) except Exception as e: - print("Unsuccessful save: {e}") + print(f"Unsuccessful save: {e}") print("Critical Error - the above error completely prevents this embedding from saving to ResDB") print("this likely has ruined the entire embedding system. Please try to add your value again. If you face") print("the same error, delete all your saved data by deleting `vector-indexing/saved_data` and start fresh.") @@ -182,7 +184,7 @@ content = json.load(file) _ =hnsw_library.put_record(key, content) except Exception as e: - print("Unsuccessful save: {e}") + print(f"Unsuccessful save: {e}") print("Critical Error - the above error completely prevents this embedding from saving to ResDB") print("this likely has ruined the entire embedding system. Please try to add your value again. If you face") print("the same error, delete all your saved data by deleting `vector-indexing/saved_data` and start fresh.") @@ -200,7 +202,7 @@ content.append(json.loads(line)) _ = hnsw_library.put_record(key, content) except Exception as e: - print("Unsuccessful save: {e}") + print(f"Unsuccessful save: {e}") print("Critical Error - the above error completely prevents this embedding from saving to ResDB") print("this likely has ruined the entire embedding system. Please try to add your value again. If you face") print("the same error, delete all your saved data by deleting `vector-indexing/saved_data` and start fresh.") diff --git a/ecosystem/sdk/vector-indexing/vector_get.py b/ecosystem/sdk/vector-indexing/vector_get.py index 63c096ebd..dce3bd7f7 100644 --- a/ecosystem/sdk/vector-indexing/vector_get.py +++ b/ecosystem/sdk/vector-indexing/vector_get.py @@ -4,85 +4,77 @@ Date: 2025-Fall Description: (Indexers project) Run to search ResDB for the embeddings k-closest to input string """ - -# TODO: I kept this in here bc it might be a background element that ResDB-orm needs -# but not sure. Delete once everything works if its not needed -import requests -# TODO: I just copied the entire directory into here, I don't get how this works -# Why do we need to import it from there if it's been globally insalled through pip? -# TODO ^ ditto for config.yaml -from resdb_orm.orm import ResDBORM -from pathlib import Path -from typing import Dict, Any, List -import json +# Typical Python imports +import sys import os +import json +from pathlib import Path +from typing import Dict, List, Any +# ResDB & HNSW imports +from resdb_orm.orm import ResDBORM import hnsw_library -from leann import LeannBuilder, LeannSearcher -import sys +from leann import LeannSearcher +# Global Variables WORKING_DIR = Path("./").resolve() - db = ResDBORM() -search_value = "" -k_matches = 0 -return_all = False - -# TODO: Format these section segmenters better # - - - - - - - - - SECTION 1: Init and data cleaning - - - - - - - - - > if __name__ == "__main__": - # Parse the value that the user is requesting to add + # Input Variables + search_value = "" + k_matches = 0 + return_all = False + + # Parse the values that the user is requesting to add for i in range (len(sys.argv)): - # TODO: Consider adding an error message for when users use `--value` with no input - # TODO: Consider if you need to parse whitespace around `--value` if(sys.argv[i] == '--value' and (i + 1 != len(sys.argv))): - # TODO: For this and the next one, I think you need to NOT allow either to take - # `--k_matches` or `--value` as a possible embeddable value search_value = sys.argv[i + 1] if(sys.argv[i] == '--k_matches' and (i + 1 != len(sys.argv))): - # TODO: This needs better parsing, specifically error handling for non-int inputs - k_matches = int(sys.argv[i + 1]) + # Ensure that k_matches is in the form of a nondecimal number + try: + k_matches = int(sys.argv[i + 1]) + except ValueError: + print("Invalid input - The input to `--k_matches` must be an integer number") + sys.exit() if(sys.argv[i] == '--show_all'): return_all = True if not return_all: + # If the user doesn't request to return everything OR search on something specific, error out if not search_value: - print('Invalid input, please use one of the two following combinations of flags:') + print('Invalid input - please use one of the two following combinations of flags:') print('(1) Use flag `--value STRING` to find most similar terms to STRING. In addition, use') print(' flag `--k_matches ###` to search for the k-closest strings. Leave blank to only find one') print('(2) Use flag `--show_all` with no arguements to list ALL values that correlate with a vector embedding') print(' this will override all other flags used') - # TODO: Possibly use `exit` (?) instead - cause the program to halt and error out - exit(1) - elif k_matches <= 0: + sys.exit() + + # If the user is searching on a specific string, ensure that the requested number of matches is a whole number + if k_matches <= 0: print('No or invalid arguement provided for --k_matches. Defaulting to finding one single most similar value') - # NOTE: You don't need this else as long as the above if statement halts execution + k_matches = 1 # - - - - - - - - - SECTION 2: Retrieve keys to HNSW data - - - - - - - - - > file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") embedding_keys: Dict[str, Any] = {} - # TODO: This is copied from the read code, but if ANY ONE these keys alone are missing the whole - # "get" program will fail. Do not allow this path not existing / a read request turning up nothing - # to happen - if not os.path.exists(file_embedding_keys): - embedding_keys = { - "temp_ids_txt": "", - "temp_index_txt": "", - "temp_leann_meta_json": "", - "temp_leann_passages_txt": "", - "temp_leann_passages_json": "" - } - else: - try: - with open(file_embedding_keys, 'r') as file: - embedding_keys = json.load(file) - # TODO: I copied this from the add file, consider the impact of these errors and handle them appropriately - except FileNotFoundError: - print("file not found") - except json.JSONDecodeError: - print("File isnt valid json") + + # Retrieve the keys saving the location of embedding data + try: + # We direct this to except instead of a typical if/else to avoid rewriting the same line of code + if (not os.path.exists(file_embedding_keys)): raise FileNotFoundError() + with open(file_embedding_keys, 'r') as file: + embedding_keys = json.load(file) + except FileNotFoundError: + print("Critical Error - The file listing key embeddings does not exist. Please add a vector value before trying to retrieve similar values") + print("Terminating...") + os.exit() + except Exception as e: + print(f"Critical Error - {e}") + print("There is no protocol for handling this error, but it is known it will prevent retrieval of embedding data. Terminating...") + os.exit() # - - - - - - - - - SECTION 3: Save the embedding data to temporary files - - - - - - - - - > # TODO: The error handling for all 4 of these blocks isnt well thoguht out From e7ab97e192d49cd65bcb2bae27f869dd27e4ea89 Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Mon, 1 Dec 2025 02:13:08 -0800 Subject: [PATCH 42/79] Complete all TODOs in add/get/library, fix major bug with encoding/decoding untyped embedding files --- ecosystem/sdk/vector-indexing/hnsw_library.py | 29 ++- ecosystem/sdk/vector-indexing/vector_add.py | 11 +- ecosystem/sdk/vector-indexing/vector_get.py | 172 +++++++++--------- 3 files changed, 114 insertions(+), 98 deletions(-) diff --git a/ecosystem/sdk/vector-indexing/hnsw_library.py b/ecosystem/sdk/vector-indexing/hnsw_library.py index b38f208e2..2d78a71e4 100644 --- a/ecosystem/sdk/vector-indexing/hnsw_library.py +++ b/ecosystem/sdk/vector-indexing/hnsw_library.py @@ -1,19 +1,32 @@ -from resdb_orm.orm import ResDBORM -from leann import LeannBuilder, LeannSearcher -from pathlib import Path +""" +Filename: hnsw_library.py +Author(s) / Contrubtor(s): Steven Shoemaker / Regan Yang, Ritesh Patro, Yoshiki Yamaguchi, Tiching Kao +Date: 2025-Fall +Description: (Indexers project) Simple library to strongly type Indexer-Project embedding saves +""" +# Typical Python imports from typing import Dict, List, Any +# ResDB & HNSW imports +from resdb_orm.orm import ResDBORM + db = ResDBORM() +# RETURNS: Key of the newly created record def create_record(value: str | Dict[str, Any]) -> str: return db.create(value) +# RETURNS: Retrieved value (any of the 5 filetypes used by leann) def get_record(key: str) -> str | Dict[str, Any] | List[Dict[str, Any]]: return db.read(key) -#TODO: Replace none with successful or not successful (youll have to check the return type of update) -def put_record(key: str, value: str | Dict[str, Any]) -> None: - _ = db.update(key, value) +# RETURNS: True if update is successful, False otherwise +def put_record(key: str, value: str | Dict[str, Any]) -> bool: + update_data = db.update(key, value) + return update_data['status'] == 'update successful' + + + -# - - - - - - - - - FINAL SECTION: LONG-TERM TODO s - - - - - - - - - > -# TODO: strongly type as much as you can \ No newline at end of file +# = = = = = = = = = EXTRA SECTION: Future TODOs = = = = = = = = = > +# > Find a PyDoc / formal method of defining return type in function header \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py index 25e5fba2b..194820fd4 100644 --- a/ecosystem/sdk/vector-indexing/vector_add.py +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -122,13 +122,13 @@ file_temporary_directory = Path(WORKING_DIR / "saved_data/temp") file_temporary_storage = str(WORKING_DIR / "saved_data/temp/temp.leann") - # Leann is extremely noisy, prevent standard output to the console while it runs - # sys.stdout = os.devnull # TODO - # Create the temp directory if it doesn't exist if not os.path.exists(file_temporary_directory): file_temporary_directory.mkdir() + # Leann is extremely noisy, prevent standard output to the console while it runs + # sys.stdout = os.devnull # TODO + # Construct the HNSW Tree (creates the 5 files referenced below, saved to a temporary folder) builder = LeannBuilder(backend_name="hnsw") for text in hnsw_text_entries: @@ -142,7 +142,7 @@ # Embedding information using this library is split across 5 files. The next chunk of code saves each of # these files as a kv store value in ResDB, storing text data as a string, and JSON data as a Dict or Dict[] - # (2/5) Create embedding information for the txt passages file, which are Win-1252 byte data + # (2/5) Create embedding information for the txt passages file, which are latin-1 byte data for pairing in [ ("temp.leann.passages.idx", "temp_leann_passages_txt"), ("temp.index", "temp_index_txt") @@ -150,7 +150,7 @@ fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) key = embedding_keys[pairing[1]] try: - with open(fileName, 'r', encoding='Windows-1252') as file: + with open(fileName, 'r', encoding='latin-1') as file: content = file.read() _ = hnsw_library.put_record(key, content) except Exception as e: @@ -165,7 +165,6 @@ fileName = str(WORKING_DIR / "saved_data/temp/temp.ids.txt") key = embedding_keys["temp_ids_txt"] try: - print(fileName) with open(fileName, 'r', encoding='ascii') as file: content = file.read() _ = hnsw_library.put_record(key, content) diff --git a/ecosystem/sdk/vector-indexing/vector_get.py b/ecosystem/sdk/vector-indexing/vector_get.py index dce3bd7f7..4e2d97b9b 100644 --- a/ecosystem/sdk/vector-indexing/vector_get.py +++ b/ecosystem/sdk/vector-indexing/vector_get.py @@ -77,28 +77,16 @@ os.exit() # - - - - - - - - - SECTION 3: Save the embedding data to temporary files - - - - - - - - - > - # TODO: The error handling for all 4 of these blocks isnt well thoguht out - # It's correct, but it was just copied from vector_add - # Most of these aren't looped due to the way they're decoded (literally the "encoding" param) - # restructuring this to not loop over these 1-item arrays will make the code a lot more readable - embedding_data = [ - ("temp.ids.txt", "temp_ids_txt") - ] - for pairing in embedding_data: - fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) - key = embedding_keys[pairing[1]] - file_content = (hnsw_library.get_record(key))["data"] - - try: - with open(fileName, 'w', encoding="ascii") as file: - file.write(file_content) - - # TODO: Consider what this error actually means and handle it correctly - except Exception as e: - print(f"An error occurred: {e}") + file_temporary_directory = Path(WORKING_DIR / "saved_data/temp") + # Create the temp directory if it doesn't exist + if not os.path.exists(file_temporary_directory): + file_temporary_directory.mkdir() + # Embedding information using this library is split across 5 files. The next chunk of code retrieves + # each file from ResDB, temporarily saving it + # (2/5) Save embedding information for the untyped files, which are latin-1 byte data embedding_data = [ ("temp.leann.passages.idx", "temp_leann_passages_txt"), ("temp.index", "temp_index_txt") @@ -106,90 +94,106 @@ for pairing in embedding_data: fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) key = embedding_keys[pairing[1]] - file_content = (hnsw_library.get_record(key))["data"] - - try: - with open(fileName, 'w', encoding="Windows-1252") as file: - file.write(file_content) - - # TODO: Consider what this error actually means and handle it correctly - except Exception as e: - print(f"An error occurred: {e}") - - - embedding_data = [ - ("temp.leann.meta.json", "temp_leann_meta_json") - ] - - for pairing in embedding_data: - fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) - key = embedding_keys[pairing[1]] - file_content = (hnsw_library.get_record(key))["data"] - + file_return_item = hnsw_library.get_record(key) + file_return_data = file_return_item["data"] try: - with open(fileName, 'w') as file: - json.dump(file_content, file) - - # TODO: Consider what this error actually means and handle it correctly + with open(fileName, 'w', encoding="latin-1") as file: + file.write(file_return_data) except Exception as e: - print(f"An error occurred: {e}") - - - embedding_data = [ - ("temp.leann.passages.jsonl", "temp_leann_passages_json") - ] - for pairing in embedding_data: - fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) - - # Since each json object is on a new line (it's a jsonl file), we append instead of overwriting - # So, we must force the file to delete/recreate to avoid appending over old data - filePath = Path(fileName) - if filePath.is_file(): - os.remove(fileName) - key = embedding_keys[pairing[1]] - file_content: List[Dict[str, Any]] = (hnsw_library.get_record(key))["data"] + print(f"Unsuccessful ResDB retrieval for untyped file: {e}") + print("Critical Error - the above error indicates that a file used for vector embeddings is improperly saved") + print("This likely has ruined the entire embedding system. If you face the same error, delete all your saved") + print("data by deleting `vector-indexing/saved_data` and start fresh.") + print("Terminating...") + sys.exit() + # (3/5) Save embedding information for the ID text file, which is ascii data + fileName = str(WORKING_DIR / "saved_data/temp/temp.ids.txt") + key = embedding_keys["temp_ids_txt"] + file_return_item = hnsw_library.get_record(key) + file_return_data = file_return_item["data"] + try: + with open(fileName, 'w', encoding="ascii") as file: + file.write(file_return_data) + except Exception as e: + print(f"Unsuccessful ResDB retrieval for text file: {e}") + print("Critical Error - the above error indicates that a file used for vector embeddings is improperly saved") + print("This likely has ruined the entire embedding system. If you face the same error, delete all your saved") + print("data by deleting `vector-indexing/saved_data` and start fresh.") + print("Terminating...") + sys.exit() + # (4/5) Save embedding information for the json file + fileName = str(WORKING_DIR / "saved_data/temp/temp.leann.meta.json") + key = embedding_keys["temp_leann_meta_json"] + file_return_item = hnsw_library.get_record(key) + file_return_data = file_return_item["data"] + try: + with open(fileName, 'w') as file: + json.dump(file_return_data, file) + except Exception as e: + print(f"Unsuccessful ResDB retrieval for json file: {e}") + print("Critical Error - the above error indicates that a file used for vector embeddings is improperly saved") + print("This likely has ruined the entire embedding system. If you face the same error, delete all your saved") + print("data by deleting `vector-indexing/saved_data` and start fresh.") + print("Terminating...") + sys.exit() + # (5/5) Save embedding information for the jsonLine file + fileName = str(WORKING_DIR / "saved_data/temp/temp.leann.passages.jsonl") + + # Since each json object is on a new line (it's a jsonl file), we append instead of overwriting + # So we must force the file to delete/recreate to avoid appending over old data + filePath = Path(fileName) + if filePath.is_file(): + os.remove(fileName) + key = embedding_keys["temp_leann_passages_json"] + file_return_item = hnsw_library.get_record(key) + file_return_data: List[Dict[str, Any]] = file_return_item["data"] + + # Delimit each json object with lines, instead of just as entires in a list + try: + with open(fileName, 'a') as file: + for i, line in enumerate(file_return_data): + json.dump(line, file) + if i != (len(file_return_data) - 1): + file.write("\n") + except Exception as e: + print(f"Unsuccessful ResDB retrieval for jsonLine file: {e}") + print("Critical Error - the above error indicates that a file used for vector embeddings is improperly saved") + print("This likely has ruined the entire embedding system. If you face the same error, delete all your saved") + print("data by deleting `vector-indexing/saved_data` and start fresh.") + print("Terminating...") + sys.exit() - try: - with open(fileName, 'a') as file: - for i, line in enumerate(file_content): - json.dump(line, file) - if i != (len(file_content) - 1): - file.write("\n") +# - - - - - - - - - SECTION 4: Re-Construct the HNSW data structure (leann searcher) - - - - - - - - - > + total_searches = sys.maxsize if return_all else k_matches - # TODO: Consider what this error actually means and handle it correctly - except Exception as e: - print(f"An error occurred: {e}") + # Leann is extremely noisy, prevent standard output to the console while it runs + # sys.stdout = os.devnull # TODO -# - - - - - - - - - SECTION 4: Re-Construct the HNSW data structure (leann searcher) - - - - - - - - - > - # TODO: Rethink the name of this and the input variables - k_searches = sys.maxsize if return_all else k_matches - # TODO: Suppress outputs from leann algorithms, they flood the console file_temporary_storage = str(WORKING_DIR / "saved_data/temp/temp.leann") searcher = LeannSearcher(file_temporary_storage) - results = searcher.search(search_value, top_k=k_searches) + results = searcher.search(search_value, top_k=total_searches) + + # Restore standard output to the console + # sys.stdout = sys.__stdout__ # TODO # Print results to the console for i, line in enumerate(results): - # TODO: There's probably a faster way to just merge this into the formatting instead of if/else if return_all: print(f"{i+1}. {line.text}") else: print(f"{i+1}. {line.text} // (similarity score: {line.score})") # - - - - - - - - - SECTION 5: Cleanup: Remove temporary files - - - - - - - - - > - # TODO: Ultimately remove (and at the start of this file, create) the whole temp directory - # Remove all temporarily created files + # Remove all temporary files created during HNSW Tree Search for temp_file_path in ["temp.ids.txt", "temp.index", "temp.leann.passages.idx", "temp.leann.meta.json", "temp.leann.passages.jsonl"]: fileName = str(WORKING_DIR / "saved_data/temp/" / temp_file_path) - try: os.remove(fileName) - # TODO: These are non-critical errors, but do this better - except FileNotFoundError: - print(f"Error: The file was not found.") - # TODO: Figure out when this happens (if sudo venv/activate is enough) and warn the user appropriately - except PermissionError: - print("Permission error when deleting files in your temp directory") except Exception as e: - print(f"An error occurred: {e}") \ No newline at end of file + print(f"Error - A problem occurred while deleting temporary data: {e}") + print("This is non-critical. It is reccomended you delete the folder `vector-indexing/saved_data/temp` to save space") + + # Remove the whole temp directory + if os.path.exists(file_temporary_directory): + file_temporary_directory.rmdir() \ No newline at end of file From 164558855942b91daa1afc7f62f78d7226f2ef97 Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Tue, 2 Dec 2025 11:41:58 +0900 Subject: [PATCH 43/79] Add stress test script and update vector add/get for binary file handling - Introduced a new stress test script to sequentially add and verify data entries. - Modified vector_add.py to read files in binary mode and encode content to base64 before saving. - Updated vector_get.py to decode base64 content when retrieving files from ResDB. --- ecosystem/sdk/vector-indexing/stress_test.sh | 53 ++++++++++++++++++++ ecosystem/sdk/vector-indexing/vector_add.py | 6 ++- ecosystem/sdk/vector-indexing/vector_get.py | 6 ++- 3 files changed, 61 insertions(+), 4 deletions(-) create mode 100755 ecosystem/sdk/vector-indexing/stress_test.sh diff --git a/ecosystem/sdk/vector-indexing/stress_test.sh b/ecosystem/sdk/vector-indexing/stress_test.sh new file mode 100755 index 000000000..d33daecd6 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/stress_test.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Configuration: Do not stop on error immediately to allow capturing failure state +# set -e is intentionally omitted + +echo "=== Stress Test Started: Adding 15 items sequentially ===" + +# Base word list for generating random unique sentences +words=("Blockchain" "Database" "Resilient" "Consensus" "Python" "Vector" "Search" "Index" "Node" "Performance" "Latency" "Throughput" "Security" "Encryption" "Network") + +for i in {1..15} +do + # Generate a unique text string using a random word and the current loop index + rand_idx=$((RANDOM % 15)) + text="Test entry #$i: ${words[$rand_idx]} related data with random seed $RANDOM" + + echo "---------------------------------------------------" + echo "[Step $i/15] Adding data: '$text'" + + # 1. Add data to ResilientDB (vector_add.py) + # This generates the HNSW index locally and uploads the binary files to ResDB + python3 vector_add.py --value "$text" + + # Check if the python script executed successfully + if [ $? -ne 0 ]; then + echo "❌ [CRITICAL FAIL] vector_add.py crashed at step $i." + echo " -> The upload process likely failed." + exit 1 + fi + + # 2. Retrieve and verify the data immediately (vector_get.py) + # This downloads the binary files from ResDB and attempts to load the index + echo "[Check] Verifying index integrity..." + python3 vector_get.py --value "Test entry #$i" --k_matches 1 > /dev/null + + # Check if the retrieval script executed successfully + if [ $? -ne 0 ]; then + echo "❌ [CRITICAL FAIL] vector_get.py crashed at step $i!" + echo " -> The index file retrieved from ResDB is likely corrupted." + echo " -> This confirms the 'binary-to-text' saving issue." + exit 1 + else + echo "✅ [OK] Retrieve successful. Index is valid." + fi + + # Wait slightly to be gentle on the local server + sleep 1 +done + +echo "===================================================" +echo "🎉 Congratulations! The system survived the stress test." +echo " It successfully handled 15 sequential adds and reloads." +echo "===================================================" diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py index 194820fd4..f384ca98e 100644 --- a/ecosystem/sdk/vector-indexing/vector_add.py +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -10,6 +10,7 @@ import json from pathlib import Path from typing import Dict, Any +import base64 # ResDB & HNSW imports from resdb_orm.orm import ResDBORM import hnsw_library @@ -150,8 +151,9 @@ fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) key = embedding_keys[pairing[1]] try: - with open(fileName, 'r', encoding='latin-1') as file: - content = file.read() + with open(fileName, 'rb') as file: + binary_content = file.read() + content = base64.b64encode(binary_content).decode('utf-8') _ = hnsw_library.put_record(key, content) except Exception as e: print(pairing) diff --git a/ecosystem/sdk/vector-indexing/vector_get.py b/ecosystem/sdk/vector-indexing/vector_get.py index 4e2d97b9b..a3aa984e6 100644 --- a/ecosystem/sdk/vector-indexing/vector_get.py +++ b/ecosystem/sdk/vector-indexing/vector_get.py @@ -10,6 +10,7 @@ import json from pathlib import Path from typing import Dict, List, Any +import base64 # ResDB & HNSW imports from resdb_orm.orm import ResDBORM import hnsw_library @@ -97,8 +98,9 @@ file_return_item = hnsw_library.get_record(key) file_return_data = file_return_item["data"] try: - with open(fileName, 'w', encoding="latin-1") as file: - file.write(file_return_data) + with open(fileName, 'wb') as file: + binary_content = base64.b64decode(file_return_data) + file.write(binary_content) except Exception as e: print(f"Unsuccessful ResDB retrieval for untyped file: {e}") print("Critical Error - the above error indicates that a file used for vector embeddings is improperly saved") From f35d0ae2f5941d8f3ef31f51dfc0c12424de4665 Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Mon, 1 Dec 2025 21:33:57 -0800 Subject: [PATCH 44/79] Check for saved_data folder before trying to create embedding files --- ecosystem/sdk/vector-indexing/vector_add.py | 2 +- ecosystem/sdk/vector-indexing/vector_get.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py index f384ca98e..93f13252b 100644 --- a/ecosystem/sdk/vector-indexing/vector_add.py +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -143,7 +143,7 @@ # Embedding information using this library is split across 5 files. The next chunk of code saves each of # these files as a kv store value in ResDB, storing text data as a string, and JSON data as a Dict or Dict[] - # (2/5) Create embedding information for the txt passages file, which are latin-1 byte data + # (2/5) Create embedding information for the txt passages file, which are raw byte data for pairing in [ ("temp.leann.passages.idx", "temp_leann_passages_txt"), ("temp.index", "temp_index_txt") diff --git a/ecosystem/sdk/vector-indexing/vector_get.py b/ecosystem/sdk/vector-indexing/vector_get.py index a3aa984e6..d08a67152 100644 --- a/ecosystem/sdk/vector-indexing/vector_get.py +++ b/ecosystem/sdk/vector-indexing/vector_get.py @@ -59,13 +59,16 @@ k_matches = 1 # - - - - - - - - - SECTION 2: Retrieve keys to HNSW data - - - - - - - - - > + file_saved_directory = Path(WORKING_DIR / "saved_data") file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") + file_embedding_keys_path = Path(WORKING_DIR / "saved_data/embedding_keys.json") embedding_keys: Dict[str, Any] = {} # Retrieve the keys saving the location of embedding data try: # We direct this to except instead of a typical if/else to avoid rewriting the same line of code - if (not os.path.exists(file_embedding_keys)): raise FileNotFoundError() + if (not os.path.exists(file_saved_directory)): raise FileNotFoundError() + if (not file_embedding_keys_path.is_file()): raise FileNotFoundError() with open(file_embedding_keys, 'r') as file: embedding_keys = json.load(file) except FileNotFoundError: @@ -87,7 +90,7 @@ # Embedding information using this library is split across 5 files. The next chunk of code retrieves # each file from ResDB, temporarily saving it - # (2/5) Save embedding information for the untyped files, which are latin-1 byte data + # (2/5) Save embedding information for the untyped files, which are raw byte data embedding_data = [ ("temp.leann.passages.idx", "temp_leann_passages_txt"), ("temp.index", "temp_index_txt") From 14e0bfcf414bdf0c377150d96062b74df087b181 Mon Sep 17 00:00:00 2001 From: Ritesh Patro Date: Mon, 1 Dec 2025 23:56:24 -0800 Subject: [PATCH 45/79] Added hard delete functionality along with writing embeddings as bytes --- .bazelrc | 40 +++- WORKSPACE | 10 + ecosystem/sdk/vector-indexing/vector_add.py | 13 +- .../sdk/vector-indexing/vector_delete.py | 182 ++++++++++++++++++ ecosystem/sdk/vector-indexing/vector_get.py | 21 +- 5 files changed, 249 insertions(+), 17 deletions(-) create mode 100644 ecosystem/sdk/vector-indexing/vector_delete.py diff --git a/.bazelrc b/.bazelrc index cc3d1af5c..fc41e7499 100644 --- a/.bazelrc +++ b/.bazelrc @@ -1,4 +1,38 @@ -build --cxxopt='-std=c++17' --copt=-O3 --jobs=40 -#build --action_env=PYTHON_BIN_PATH="/usr/bin/python3.10" -#build --action_env=PYTHON_LIB_PATH="/usr/include/python3.10" +# ============================================ +# Bazel Configuration for MacBook Air M4 +# ============================================ +# Architecture settings for Apple Silicon +build --cpu=darwin_arm64 +build --host_cpu=darwin_arm64 + +# M4 specific optimizations +build --copt=-march=armv8.6-a +build --copt=-O3 +build --copt=-DNDEBUG + +# C++ standard +build --cxxopt=-std=c++17 +build --cxxopt=-stdlib=libc++ + +# OpenSSL configuration for Apple Silicon +build --action_env=OPENSSL_ROOT_DIR=/opt/homebrew/opt/openssl@3 +build --action_env=OPENSSL_INCLUDE_DIR=/opt/homebrew/opt/openssl@3/include +build --action_env=OPENSSL_LIB_DIR=/opt/homebrew/opt/openssl@3/lib + +# Compiler warnings +build --cxxopt=-Wno-deprecated-declarations +build --cxxopt=-Wno-unused-parameter +build --cxxopt=-Wno-unused-variable + +# Performance settings (adjust for M4's 8 cores: 4P + 4E) +build --jobs=6 +build --local_cpu_resources=6 + +# Build output +build --verbose_failures +build --compilation_mode=opt + +# Test settings +test --test_output=errors +test --test_timeout=300 diff --git a/WORKSPACE b/WORKSPACE index e05017f5c..0638128c6 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -20,6 +20,16 @@ workspace(name = "com_resdb_nexres") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +http_archive( + name = "bazel_skylib", + sha256 = "74d544d96f4a5bb630d465ca8bbcfe231e3594e5aae57e1edbf17a6eb3ca2506", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz", + "https://github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz", + ], +) +load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace") +bazel_skylib_workspace() http_archive( name = "hedron_compile_commands", #Replace the commit hash (4f28899228fb3ad0126897876f147ca15026151e) with the latest commit hash from the repo diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py index 93f13252b..04290b8b2 100644 --- a/ecosystem/sdk/vector-indexing/vector_add.py +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -1,6 +1,6 @@ """ Filename: vector_add.py -Author(s) / Contrubtor(s): Steven Shoemaker / Regan Yang, Ritesh Patro, Yoshiki Yamaguchi, Tiching Kao +Author(s) / Contributor(s): Steven Shoemaker / Regan Yang, Ritesh Patro, Yoshiki Yamaguchi, Tiching Kao Date: 2025-Fall Description: (Indexers project) Run to save value to ResDB and generate a vector embedding for it """ @@ -32,7 +32,7 @@ if(sys.argv[i] == '--value' and (i + 1 != len(sys.argv))): value_to_add = sys.argv[i + 1] break - + if value_to_add == '': print("Critical Error - the program requires an arguement in the form of `--value stringToSave`") sys.exit() @@ -143,7 +143,8 @@ # Embedding information using this library is split across 5 files. The next chunk of code saves each of # these files as a kv store value in ResDB, storing text data as a string, and JSON data as a Dict or Dict[] - # (2/5) Create embedding information for the txt passages file, which are raw byte data + + # (2/5) Create embedding information for the txt passages file, which are latin-1 byte data for pairing in [ ("temp.leann.passages.idx", "temp_leann_passages_txt"), ("temp.index", "temp_index_txt") @@ -151,9 +152,11 @@ fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) key = embedding_keys[pairing[1]] try: + # **CORRECTED LINE: Open in binary read mode ('rb')** with open(fileName, 'rb') as file: - binary_content = file.read() - content = base64.b64encode(binary_content).decode('utf-8') + content_bytes = file.read() + # Decode the bytes to a latin-1 string for ResDB storage + content = content_bytes.decode('latin-1') _ = hnsw_library.put_record(key, content) except Exception as e: print(pairing) diff --git a/ecosystem/sdk/vector-indexing/vector_delete.py b/ecosystem/sdk/vector-indexing/vector_delete.py new file mode 100644 index 000000000..ddd2dcb63 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/vector_delete.py @@ -0,0 +1,182 @@ +""" +Filename: vector_delete.py +Author(s) / Contributor(s): Steven Shoemaker / Regan Yang, Ritesh Patro, Yoshiki Yamaguchi, Tiching Kao +Date: 2025-Fall +Description: (Indexers project) Run to delete a value from the HNSW index by removing it + from the ResDB passages list and triggering a full index rebuild. +""" +# Typical Python imports +import sys +import os +import json +from pathlib import Path +from typing import Dict, Any, List +# ResDB & HNSW imports +from resdb_orm.orm import ResDBORM +import hnsw_library +from leann import LeannBuilder + +# Global Variables +WORKING_DIR = Path("./").resolve() +db = ResDBORM() + +# --- Utility Function to Rebuild and Save the Index --- +def rebuild_and_save_index(embedding_keys: Dict[str, Any], hnsw_text_entries: List[str]): + """ + Constructs the HNSW index using the current list of text entries and + saves the resulting files back to ResDB using the stored keys. + + This function incorporates all necessary corrections for file encoding + and data structure integrity. + """ + file_temporary_directory = Path(WORKING_DIR / "saved_data/temp") + file_temporary_storage = str(WORKING_DIR / "saved_data/temp/temp.leann") + + # Create the temp directory if it doesn't exist + if not os.path.exists(file_temporary_directory): + file_temporary_directory.mkdir() + + print("Rebuilding HNSW index with remaining entries (This can take a moment)...") + + # Construct the HNSW Tree (creates the 5 files) + builder = LeannBuilder(backend_name="hnsw") + for text in hnsw_text_entries: + builder.add_text(text) + builder.build_index(file_temporary_storage) + + # --- SECTION 4: Save the new embeddings (All corrections applied here) --- + + # (2/5) Fix 1: Handle binary files correctly to prevent "ASCII characters" error. + for pairing in [ + ("temp.leann.passages.idx", "temp_leann_passages_txt"), + ("temp.index", "temp_index_txt") + ]: + fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) + key = embedding_keys[pairing[1]] + try: + # **CRITICAL FIX:** Open in binary read mode ('rb') + with open(fileName, 'rb') as file: + content_bytes = file.read() + # Decode the bytes to a latin-1 string for ResDB storage + content = content_bytes.decode('latin-1') + hnsw_library.put_record(key, content) + except Exception as e: + print(f"Critical Save Error (2/5 - {pairing[0]}): {e}") + sys.exit() + + # (3/5) Create embedding information for the index passages file, which is ascii text data + fileName = str(WORKING_DIR / "saved_data/temp/temp.ids.txt") + key = embedding_keys["temp_ids_txt"] + try: + with open(fileName, 'r', encoding='ascii') as file: + content = file.read() + hnsw_library.put_record(key, content) + except Exception as e: + print(f"Critical Save Error (3/5 - temp.ids.txt): {e}") + sys.exit() + + # (4/5) Create embedding information for the metadata file, which is a single json object + fileName = str(WORKING_DIR / "saved_data/temp/temp.leann.meta.json") + key = embedding_keys["temp_leann_meta_json"] + try: + with open(fileName, 'r') as file: + content = json.load(file) + hnsw_library.put_record(key, content) + except Exception as e: + print(f"Critical Save Error (4/5 - temp.leann.meta.json): {e}") + sys.exit() + + # (5/5) Fix 2: Read the newly generated JSONL file and save it as a List[Dict] + fileName = str(WORKING_DIR / "saved_data/temp/temp.leann.passages.jsonl") + key = embedding_keys["temp_leann_passages_json"] + content = [] + try: + with open(fileName, 'r') as file: + # We load each json object line-by-line, saving each as an entry in an array + for line in file: + content.append(json.loads(line)) + # The result is a list of dictionaries, correcting the prior corruption issue + hnsw_library.put_record(key, content) + except Exception as e: + print(f"Critical Save Error (5/5 - passages_jsonl): {e}") + sys.exit() + + print("Index rebuild and save complete.") + + # --- SECTION 5: Cleanup: Remove temporary files --- + for file_temp_embedding in ["temp.ids.txt", "temp.index", "temp.leann.passages.idx", + "temp.leann.meta.json", "temp.leann.passages.jsonl"]: + fileName = str(WORKING_DIR / "saved_data/temp/" / file_temp_embedding) + try: + os.remove(fileName) + except Exception: + pass + + if os.path.exists(file_temporary_directory): + file_temporary_directory.rmdir() + + +# - - - - - - - - - SECTION 1: Init and data cleaning - - - - - - - - - > +if __name__ == "__main__": + # Input Variable + value_to_delete = '' + + # Parse the value that the user is requesting to delete + for i in range (len(sys.argv)): + if(sys.argv[i] == '--value' and (i + 1 != len(sys.argv))): + value_to_delete = sys.argv[i + 1] + break + + if value_to_delete == '': + print("Critical Error - the program requires an argument in the form of `--value stringToDelete`") + sys.exit() + +# - - - - - - - - - SECTION 2: Retrieve HNSW data keys and passages list - - - - - - - - - > + file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") + embedding_keys: Dict[str, Any] = {} + + # Retrieve the keys saving the location of embedding data + try: + if (not os.path.exists(file_embedding_keys)): raise FileNotFoundError() + with open(file_embedding_keys, 'r') as file: + embedding_keys = json.load(file) + except FileNotFoundError: + print("Critical Error - The file listing key embeddings does not exist. Cannot delete.") + sys.exit() + except Exception as e: + print(f"Critical Error - {e}") + sys.exit() + + # Get the current list of text passages from ResDB + try: + key = embedding_keys["temp_leann_passages_json"] + if (key is None or key == ""): raise KeyError() + + passages_return_item = hnsw_library.get_record(key) + passages_return_data = passages_return_item["data"] + + # Extract just the text from the list of dictionaries + datapointToText = lambda dataPoint: dataPoint['text'] + current_text_entries = list(map(datapointToText, passages_return_data)) + except Exception as e: + print(f"Critical Error retrieving passages list: {e}") + print("Cannot find or access the list of saved values. Terminating...") + sys.exit() + +# - - - - - - - - - SECTION 3: Delete the value and trigger rebuild - - - - - - - - - > + if value_to_delete not in current_text_entries: + print(f"Error: '{value_to_delete}' not found in the indexed entries. Nothing deleted.") + # NOTE: Even if nothing is deleted, this still proceeds to rebuild and save, which + # is necessary to fix the corruption caused by prior runs of vector_add.py + pass + else: + # Create the new list of entries, excluding the one to delete + current_text_entries = [text for text in current_text_entries if text != value_to_delete] + print(f"Value '{value_to_delete}' removed from text entries list.") + + # Trigger the rebuild and save + rebuild_and_save_index(embedding_keys, current_text_entries) + + print(f"\nSUCCESS: The HNSW index has been rebuilt and saved with correct file encodings.") + if value_to_delete in current_text_entries: + print(f"The value '{value_to_delete}' was not in the index, but the system files have been repaired.") \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/vector_get.py b/ecosystem/sdk/vector-indexing/vector_get.py index d08a67152..a52d38f2c 100644 --- a/ecosystem/sdk/vector-indexing/vector_get.py +++ b/ecosystem/sdk/vector-indexing/vector_get.py @@ -1,6 +1,6 @@ """ Filename: vector_get.py -Author(s) / Contrubtor(s): Steven Shoemaker / Regan Yang, Ritesh Patro, Yoshiki Yamaguchi, Tiching Kao +Author(s) / Contributor(s): Steven Shoemaker / Regan Yang, Ritesh Patro, Yoshiki Yamaguchi, Tiching Kao Date: 2025-Fall Description: (Indexers project) Run to search ResDB for the embeddings k-closest to input string """ @@ -57,7 +57,7 @@ if k_matches <= 0: print('No or invalid arguement provided for --k_matches. Defaulting to finding one single most similar value') k_matches = 1 - + # - - - - - - - - - SECTION 2: Retrieve keys to HNSW data - - - - - - - - - > file_saved_directory = Path(WORKING_DIR / "saved_data") file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") @@ -74,12 +74,12 @@ except FileNotFoundError: print("Critical Error - The file listing key embeddings does not exist. Please add a vector value before trying to retrieve similar values") print("Terminating...") - os.exit() + sys.exit() except Exception as e: print(f"Critical Error - {e}") print("There is no protocol for handling this error, but it is known it will prevent retrieval of embedding data. Terminating...") - os.exit() - + sys.exit() + # - - - - - - - - - SECTION 3: Save the embedding data to temporary files - - - - - - - - - > file_temporary_directory = Path(WORKING_DIR / "saved_data/temp") @@ -95,15 +95,18 @@ ("temp.leann.passages.idx", "temp_leann_passages_txt"), ("temp.index", "temp_index_txt") ] - for pairing in embedding_data: + for pairing in embedding_data: fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) key = embedding_keys[pairing[1]] file_return_item = hnsw_library.get_record(key) file_return_data = file_return_item["data"] try: + # Encode the latin-1 string back to bytes + content_bytes = file_return_data.encode("latin-1") + + # **CRITICAL FIX: Open in binary write mode ('wb')** with open(fileName, 'wb') as file: - binary_content = base64.b64decode(file_return_data) - file.write(binary_content) + file.write(content_bytes) # Write the raw bytes except Exception as e: print(f"Unsuccessful ResDB retrieval for untyped file: {e}") print("Critical Error - the above error indicates that a file used for vector embeddings is improperly saved") @@ -111,7 +114,7 @@ print("data by deleting `vector-indexing/saved_data` and start fresh.") print("Terminating...") sys.exit() - # (3/5) Save embedding information for the ID text file, which is ascii data + # (3/5) Save embedding information for the ID text file, which is ascii data fileName = str(WORKING_DIR / "saved_data/temp/temp.ids.txt") key = embedding_keys["temp_ids_txt"] file_return_item = hnsw_library.get_record(key) From 3a5a08ee86f896b139eb18025060293d5d8e5e53 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 3 Dec 2025 12:38:13 -0800 Subject: [PATCH 46/79] Update encoding method for text passages in vector_add.py --- ecosystem/sdk/vector-indexing/vector_add.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py index 04290b8b2..f3c086675 100644 --- a/ecosystem/sdk/vector-indexing/vector_add.py +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -144,7 +144,7 @@ # these files as a kv store value in ResDB, storing text data as a string, and JSON data as a Dict or Dict[] - # (2/5) Create embedding information for the txt passages file, which are latin-1 byte data + # (2/5) Create embedding information for the txt passages file, which are base64 byte data for pairing in [ ("temp.leann.passages.idx", "temp_leann_passages_txt"), ("temp.index", "temp_index_txt") @@ -155,8 +155,8 @@ # **CORRECTED LINE: Open in binary read mode ('rb')** with open(fileName, 'rb') as file: content_bytes = file.read() - # Decode the bytes to a latin-1 string for ResDB storage - content = content_bytes.decode('latin-1') + # Decode the bytes to a base64 string for ResDB storage + content = base64.b64encode(binary_content).decode('utf-8') _ = hnsw_library.put_record(key, content) except Exception as e: print(pairing) @@ -237,4 +237,4 @@ # weird that it had to be duped in here to work # > Look into the possibility of saving a value with custom keys in resdb-orm, instead of using # the random/autogenerated ones. This could ultimately lead to not needing to use a -# saved_data/embedding_keys.json file at all \ No newline at end of file +# saved_data/embedding_keys.json file at all From fadec907d9e8ae571284fe5ee408add5f880940d Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 3 Dec 2025 12:38:42 -0800 Subject: [PATCH 47/79] Change content decoding to base64 for ResDB Updated file reading to decode bytes as base64 for ResDB storage. --- ecosystem/sdk/vector-indexing/vector_delete.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ecosystem/sdk/vector-indexing/vector_delete.py b/ecosystem/sdk/vector-indexing/vector_delete.py index ddd2dcb63..9c0526750 100644 --- a/ecosystem/sdk/vector-indexing/vector_delete.py +++ b/ecosystem/sdk/vector-indexing/vector_delete.py @@ -57,8 +57,8 @@ def rebuild_and_save_index(embedding_keys: Dict[str, Any], hnsw_text_entries: Li # **CRITICAL FIX:** Open in binary read mode ('rb') with open(fileName, 'rb') as file: content_bytes = file.read() - # Decode the bytes to a latin-1 string for ResDB storage - content = content_bytes.decode('latin-1') + # Decode the bytes to a base64 string for ResDB storage + content = base64.b64encode(binary_content).decode('utf-8') hnsw_library.put_record(key, content) except Exception as e: print(f"Critical Save Error (2/5 - {pairing[0]}): {e}") @@ -179,4 +179,4 @@ def rebuild_and_save_index(embedding_keys: Dict[str, Any], hnsw_text_entries: Li print(f"\nSUCCESS: The HNSW index has been rebuilt and saved with correct file encodings.") if value_to_delete in current_text_entries: - print(f"The value '{value_to_delete}' was not in the index, but the system files have been repaired.") \ No newline at end of file + print(f"The value '{value_to_delete}' was not in the index, but the system files have been repaired.") From 3ed5d4a593c0a13db942d495bab7f042957d23d2 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 3 Dec 2025 12:39:12 -0800 Subject: [PATCH 48/79] Add base64 import to vector_delete.py --- ecosystem/sdk/vector-indexing/vector_delete.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ecosystem/sdk/vector-indexing/vector_delete.py b/ecosystem/sdk/vector-indexing/vector_delete.py index 9c0526750..2123aa1b1 100644 --- a/ecosystem/sdk/vector-indexing/vector_delete.py +++ b/ecosystem/sdk/vector-indexing/vector_delete.py @@ -11,6 +11,7 @@ import json from pathlib import Path from typing import Dict, Any, List +import base64 # ResDB & HNSW imports from resdb_orm.orm import ResDBORM import hnsw_library From b631ecdfddfb8ff25b19c25be2da28829e69eb1b Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Wed, 3 Dec 2025 12:39:41 -0800 Subject: [PATCH 49/79] Update encoding method for file writing Replaced latin-1 encoding with base64 encoding for binary content. --- ecosystem/sdk/vector-indexing/vector_get.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ecosystem/sdk/vector-indexing/vector_get.py b/ecosystem/sdk/vector-indexing/vector_get.py index a52d38f2c..f0e58e877 100644 --- a/ecosystem/sdk/vector-indexing/vector_get.py +++ b/ecosystem/sdk/vector-indexing/vector_get.py @@ -101,8 +101,8 @@ file_return_item = hnsw_library.get_record(key) file_return_data = file_return_item["data"] try: - # Encode the latin-1 string back to bytes - content_bytes = file_return_data.encode("latin-1") + # Encode the base64 string back to bytes + content_bytes = base64.b64encode(binary_content).decode('utf-8') # **CRITICAL FIX: Open in binary write mode ('wb')** with open(fileName, 'wb') as file: @@ -204,4 +204,4 @@ # Remove the whole temp directory if os.path.exists(file_temporary_directory): - file_temporary_directory.rmdir() \ No newline at end of file + file_temporary_directory.rmdir() From e04f98f4dc1ae788698409eeae57ff75a34a6e29 Mon Sep 17 00:00:00 2001 From: Tiching Kao Date: Thu, 4 Dec 2025 17:34:07 -0800 Subject: [PATCH 50/79] Moved JSONScalar to separate file allowing app.py to run. Added test query. Should ask if main repo app.py can run. --- ecosystem/graphql/app.py | 40 +++++++++++++++++++++++--------- ecosystem/graphql/json_scalar.py | 9 +++++++ 2 files changed, 38 insertions(+), 11 deletions(-) create mode 100644 ecosystem/graphql/json_scalar.py diff --git a/ecosystem/graphql/app.py b/ecosystem/graphql/app.py index c5e6900bd..f33d67407 100644 --- a/ecosystem/graphql/app.py +++ b/ecosystem/graphql/app.py @@ -18,6 +18,8 @@ # # +import tempfile +import os from resdb_driver import Resdb from resdb_driver.crypto import generate_keypair @@ -35,21 +37,13 @@ from flask import Flask from flask_cors import CORS +from json_scalar import JSONScalar + app = Flask(__name__) CORS(app) # This will enable CORS for all routes from strawberry.flask.views import GraphQLView -@strawberry.scalar(description="Custom JSON scalar") -class JSONScalar: - @staticmethod - def serialize(value: Any) -> Any: - return value # Directly return the JSON object - - @staticmethod - def parse_value(value: Any) -> Any: - return value # Accept JSON as is - @strawberry.type class RetrieveTransaction: id: str @@ -94,6 +88,30 @@ def getTransaction(self, id: strawberry.ID) -> RetrieveTransaction: asset=data["asset"] ) return payload + + @strawberry.field + def count_cats(self) -> str: + # Create a temporary file + with tempfile.NamedTemporaryFile(mode="w+", delete=False) as tmp_file: + tmp_path = tmp_file.name + + #Write to file + lines = ["cat", "cat", "cat", "mouse", "cat"] + for line in lines: + tmp_file.write(line + "\n") + + # Count number of cats + cat_count = 0 + with open(tmp_path, "r") as f: + for line in f: + if "cat" in line.strip(): + cat_count += 1 + + #Delete temporary file + os.remove(tmp_path) + + #return number of cats + return f'The word "cat" appears {cat_count} times' @strawberry.type class Mutation: @@ -123,4 +141,4 @@ def postTransaction(self, data: PrepareAsset) -> CommitTransaction: ) if __name__ == "__main__": - app.run(port="8000") + app.run(port="8000") \ No newline at end of file diff --git a/ecosystem/graphql/json_scalar.py b/ecosystem/graphql/json_scalar.py new file mode 100644 index 000000000..5f5eb64f0 --- /dev/null +++ b/ecosystem/graphql/json_scalar.py @@ -0,0 +1,9 @@ +import strawberry +from typing import Any + +@strawberry.scalar( + name="JSONScalar", + description="Custom JSON scalar" +) +def JSONScalar(value: Any) -> Any: + return value From fe8bd06052837ecb608c8afd63ff41c3952f41df Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Fri, 5 Dec 2025 12:59:01 +0900 Subject: [PATCH 51/79] Fix base64 encoding for content bytes in vector_add.py, vector_delete.py, and vector_get.py --- ecosystem/sdk/vector-indexing/vector_add.py | 2 +- ecosystem/sdk/vector-indexing/vector_delete.py | 2 +- ecosystem/sdk/vector-indexing/vector_get.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py index f3c086675..c52f01835 100644 --- a/ecosystem/sdk/vector-indexing/vector_add.py +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -156,7 +156,7 @@ with open(fileName, 'rb') as file: content_bytes = file.read() # Decode the bytes to a base64 string for ResDB storage - content = base64.b64encode(binary_content).decode('utf-8') + content = base64.b64encode(content_bytes).decode('utf-8') _ = hnsw_library.put_record(key, content) except Exception as e: print(pairing) diff --git a/ecosystem/sdk/vector-indexing/vector_delete.py b/ecosystem/sdk/vector-indexing/vector_delete.py index 2123aa1b1..07349354b 100644 --- a/ecosystem/sdk/vector-indexing/vector_delete.py +++ b/ecosystem/sdk/vector-indexing/vector_delete.py @@ -59,7 +59,7 @@ def rebuild_and_save_index(embedding_keys: Dict[str, Any], hnsw_text_entries: Li with open(fileName, 'rb') as file: content_bytes = file.read() # Decode the bytes to a base64 string for ResDB storage - content = base64.b64encode(binary_content).decode('utf-8') + content = base64.b64encode(content_bytes).decode('utf-8') hnsw_library.put_record(key, content) except Exception as e: print(f"Critical Save Error (2/5 - {pairing[0]}): {e}") diff --git a/ecosystem/sdk/vector-indexing/vector_get.py b/ecosystem/sdk/vector-indexing/vector_get.py index f0e58e877..0653de790 100644 --- a/ecosystem/sdk/vector-indexing/vector_get.py +++ b/ecosystem/sdk/vector-indexing/vector_get.py @@ -102,7 +102,8 @@ file_return_data = file_return_item["data"] try: # Encode the base64 string back to bytes - content_bytes = base64.b64encode(binary_content).decode('utf-8') + content_bytes = file.read() + content = base64.b64encode(content_bytes).decode('utf-8') # **CRITICAL FIX: Open in binary write mode ('wb')** with open(fileName, 'wb') as file: From dc5a7a6920f9764cb58256f66edd7a75379ae403 Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Fri, 5 Dec 2025 13:04:42 +0900 Subject: [PATCH 52/79] After converting to base64, the behavior became abnormal, so I reverted it back to latin-1 for now. --- ecosystem/sdk/vector-indexing/vector_add.py | 8 ++++---- ecosystem/sdk/vector-indexing/vector_delete.py | 7 +++---- ecosystem/sdk/vector-indexing/vector_get.py | 7 +++---- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py index c52f01835..04290b8b2 100644 --- a/ecosystem/sdk/vector-indexing/vector_add.py +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -144,7 +144,7 @@ # these files as a kv store value in ResDB, storing text data as a string, and JSON data as a Dict or Dict[] - # (2/5) Create embedding information for the txt passages file, which are base64 byte data + # (2/5) Create embedding information for the txt passages file, which are latin-1 byte data for pairing in [ ("temp.leann.passages.idx", "temp_leann_passages_txt"), ("temp.index", "temp_index_txt") @@ -155,8 +155,8 @@ # **CORRECTED LINE: Open in binary read mode ('rb')** with open(fileName, 'rb') as file: content_bytes = file.read() - # Decode the bytes to a base64 string for ResDB storage - content = base64.b64encode(content_bytes).decode('utf-8') + # Decode the bytes to a latin-1 string for ResDB storage + content = content_bytes.decode('latin-1') _ = hnsw_library.put_record(key, content) except Exception as e: print(pairing) @@ -237,4 +237,4 @@ # weird that it had to be duped in here to work # > Look into the possibility of saving a value with custom keys in resdb-orm, instead of using # the random/autogenerated ones. This could ultimately lead to not needing to use a -# saved_data/embedding_keys.json file at all +# saved_data/embedding_keys.json file at all \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/vector_delete.py b/ecosystem/sdk/vector-indexing/vector_delete.py index 07349354b..ddd2dcb63 100644 --- a/ecosystem/sdk/vector-indexing/vector_delete.py +++ b/ecosystem/sdk/vector-indexing/vector_delete.py @@ -11,7 +11,6 @@ import json from pathlib import Path from typing import Dict, Any, List -import base64 # ResDB & HNSW imports from resdb_orm.orm import ResDBORM import hnsw_library @@ -58,8 +57,8 @@ def rebuild_and_save_index(embedding_keys: Dict[str, Any], hnsw_text_entries: Li # **CRITICAL FIX:** Open in binary read mode ('rb') with open(fileName, 'rb') as file: content_bytes = file.read() - # Decode the bytes to a base64 string for ResDB storage - content = base64.b64encode(content_bytes).decode('utf-8') + # Decode the bytes to a latin-1 string for ResDB storage + content = content_bytes.decode('latin-1') hnsw_library.put_record(key, content) except Exception as e: print(f"Critical Save Error (2/5 - {pairing[0]}): {e}") @@ -180,4 +179,4 @@ def rebuild_and_save_index(embedding_keys: Dict[str, Any], hnsw_text_entries: Li print(f"\nSUCCESS: The HNSW index has been rebuilt and saved with correct file encodings.") if value_to_delete in current_text_entries: - print(f"The value '{value_to_delete}' was not in the index, but the system files have been repaired.") + print(f"The value '{value_to_delete}' was not in the index, but the system files have been repaired.") \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/vector_get.py b/ecosystem/sdk/vector-indexing/vector_get.py index 0653de790..a52d38f2c 100644 --- a/ecosystem/sdk/vector-indexing/vector_get.py +++ b/ecosystem/sdk/vector-indexing/vector_get.py @@ -101,9 +101,8 @@ file_return_item = hnsw_library.get_record(key) file_return_data = file_return_item["data"] try: - # Encode the base64 string back to bytes - content_bytes = file.read() - content = base64.b64encode(content_bytes).decode('utf-8') + # Encode the latin-1 string back to bytes + content_bytes = file_return_data.encode("latin-1") # **CRITICAL FIX: Open in binary write mode ('wb')** with open(fileName, 'wb') as file: @@ -205,4 +204,4 @@ # Remove the whole temp directory if os.path.exists(file_temporary_directory): - file_temporary_directory.rmdir() + file_temporary_directory.rmdir() \ No newline at end of file From 8f35987e4fc8a7e5d25201ada924b68b64d048b9 Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Fri, 5 Dec 2025 13:23:16 +0900 Subject: [PATCH 53/79] Update encoding to base64 for content bytes in vector_add.py, vector_delete.py, and vector_get.py --- ecosystem/sdk/vector-indexing/vector_add.py | 6 +++--- ecosystem/sdk/vector-indexing/vector_delete.py | 9 +++++---- ecosystem/sdk/vector-indexing/vector_get.py | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py index 04290b8b2..c3f996726 100644 --- a/ecosystem/sdk/vector-indexing/vector_add.py +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -144,7 +144,7 @@ # these files as a kv store value in ResDB, storing text data as a string, and JSON data as a Dict or Dict[] - # (2/5) Create embedding information for the txt passages file, which are latin-1 byte data + # (2/5) Create embedding information for the txt passages file, which are base64 byte data for pairing in [ ("temp.leann.passages.idx", "temp_leann_passages_txt"), ("temp.index", "temp_index_txt") @@ -155,8 +155,8 @@ # **CORRECTED LINE: Open in binary read mode ('rb')** with open(fileName, 'rb') as file: content_bytes = file.read() - # Decode the bytes to a latin-1 string for ResDB storage - content = content_bytes.decode('latin-1') + # Decode the bytes to a base64 string for ResDB storage + content = base64.b64encode(content_bytes).decode('utf-8') _ = hnsw_library.put_record(key, content) except Exception as e: print(pairing) diff --git a/ecosystem/sdk/vector-indexing/vector_delete.py b/ecosystem/sdk/vector-indexing/vector_delete.py index ddd2dcb63..abf740e08 100644 --- a/ecosystem/sdk/vector-indexing/vector_delete.py +++ b/ecosystem/sdk/vector-indexing/vector_delete.py @@ -11,6 +11,7 @@ import json from pathlib import Path from typing import Dict, Any, List +import base64 # ResDB & HNSW imports from resdb_orm.orm import ResDBORM import hnsw_library @@ -54,12 +55,12 @@ def rebuild_and_save_index(embedding_keys: Dict[str, Any], hnsw_text_entries: Li fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) key = embedding_keys[pairing[1]] try: - # **CRITICAL FIX:** Open in binary read mode ('rb') + # **CORRECTED LINE: Open in binary read mode ('rb')** with open(fileName, 'rb') as file: content_bytes = file.read() - # Decode the bytes to a latin-1 string for ResDB storage - content = content_bytes.decode('latin-1') - hnsw_library.put_record(key, content) + # Decode the bytes to a base64 string for ResDB storage + content = base64.b64encode(content_bytes).decode('utf-8') + _ = hnsw_library.put_record(key, content) except Exception as e: print(f"Critical Save Error (2/5 - {pairing[0]}): {e}") sys.exit() diff --git a/ecosystem/sdk/vector-indexing/vector_get.py b/ecosystem/sdk/vector-indexing/vector_get.py index a52d38f2c..9aa1ff5fe 100644 --- a/ecosystem/sdk/vector-indexing/vector_get.py +++ b/ecosystem/sdk/vector-indexing/vector_get.py @@ -102,7 +102,7 @@ file_return_data = file_return_item["data"] try: # Encode the latin-1 string back to bytes - content_bytes = file_return_data.encode("latin-1") + content_bytes = base64.b64decode(file_return_data) # **CRITICAL FIX: Open in binary write mode ('wb')** with open(fileName, 'wb') as file: From fa292c7acc2a11eebb576d776b26cb24c269513f Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Fri, 5 Dec 2025 13:56:33 +0900 Subject: [PATCH 54/79] Add vector client and proxy server implementation for vector indexing --- .../sdk/vector-indexing/vector_client.py | 83 ++++++++++++++ ecosystem/sdk/vector-indexing/vector_proxy.py | 105 ++++++++++++++++++ 2 files changed, 188 insertions(+) create mode 100644 ecosystem/sdk/vector-indexing/vector_client.py create mode 100644 ecosystem/sdk/vector-indexing/vector_proxy.py diff --git a/ecosystem/sdk/vector-indexing/vector_client.py b/ecosystem/sdk/vector-indexing/vector_client.py new file mode 100644 index 000000000..a06590e86 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/vector_client.py @@ -0,0 +1,83 @@ +# vector_client.py +import argparse +import requests +import sys + +# Proxy Server URL +# Use 'localhost' for local testing +# Use the external IP (e.g., "http://34.xx.xx.xx:5000") for cloud deployment +PROXY_URL = "http://localhost:5000" + +def cmd_add(args): + """Send add request to proxy""" + print(f"Adding value: '{args.value}'...") + try: + resp = requests.post(f"{PROXY_URL}/add", json={"text": args.value}) + data = resp.json() + + if resp.status_code == 200: + print(f"[SUCCESS] {data.get('message')}") + else: + print(f"[ERROR] {data.get('message') or data.get('error')}") + except Exception as e: + print(f"Connection failed: {e}") + +def cmd_delete(args): + """Send delete request to proxy""" + print(f"Deleting value: '{args.value}'...") + try: + resp = requests.post(f"{PROXY_URL}/delete", json={"text": args.value}) + data = resp.json() + + if resp.status_code == 200: + print(f"[SUCCESS] {data.get('message')}") + else: + print(f"[ERROR] {data.get('error')}") + except Exception as e: + print(f"Connection failed: {e}") + +def cmd_search(args): + """Send search request to proxy""" + print(f"Searching for: '{args.value}' (Top {args.k_matches})...") + try: + resp = requests.post(f"{PROXY_URL}/search", json={"value": args.value, "k": args.k_matches}) + data = resp.json() + + if resp.status_code == 200: + results = data.get("results", []) + print(f"\n--- Found {len(results)} results ---") + if not results: + print("No matches found.") + for i, item in enumerate(results, 1): + print(f"{i}. {item['text']} (Score: {item['score']:.4f})") + print("----------------------------") + else: + print(f"[ERROR] {data.get('error')}") + except Exception as e: + print(f"Connection failed: {e}") + +def main(): + parser = argparse.ArgumentParser(description="Vector Search Client") + subparsers = parser.add_subparsers(dest="command", required=True) + + # Add command + p_add = subparsers.add_parser("add") + p_add.add_argument("--value", required=True) + p_add.set_defaults(func=cmd_add) + + # Delete command + p_del = subparsers.add_parser("delete") + p_del.add_argument("--value", required=True) + p_del.set_defaults(func=cmd_delete) + + # Search command + p_search = subparsers.add_parser("search") + p_search.add_argument("--value", required=True) + p_search.add_argument("--k_matches", type=int, default=3) + p_search.set_defaults(func=cmd_search) + + args = parser.parse_args() + args.func(args) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/vector_proxy.py b/ecosystem/sdk/vector-indexing/vector_proxy.py new file mode 100644 index 000000000..36a67f929 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/vector_proxy.py @@ -0,0 +1,105 @@ +# vector_proxy.py +from flask import Flask, request, jsonify +import subprocess +import sys +import re +import os + +app = Flask(__name__) + +# Get the path of the currently running Python interpreter +# (To ensure scripts run within the same environment/venv) +PYTHON_EXE = sys.executable +CWD = os.path.dirname(os.path.abspath(__file__)) + +def run_script(script_name, args): + """ + Executes a Python script as a subprocess and captures its output. + """ + command = [PYTHON_EXE, script_name] + args + try: + # Run the script and capture stdout/stderr + result = subprocess.run( + command, + capture_output=True, + text=True, + cwd=CWD + ) + + # Check return code (0 means success) + if result.returncode != 0: + return False, result.stderr + "\n" + result.stdout + return True, result.stdout + except Exception as e: + return False, str(e) + +# --- API Endpoints --- + +@app.route('/', methods=['GET']) +def health_check(): + return jsonify({"status": "online", "message": "Vector Indexing Proxy is running"}), 200 + +@app.route('/add', methods=['POST']) +def add_vector(): + text = request.json.get('text') + if not text: + return jsonify({"error": "No text provided"}), 400 + + # Command: python vector_add.py --value "text" + success, output = run_script("vector_add.py", ["--value", text]) + + if success: + return jsonify({"status": "success", "message": "Added successfully", "raw_output": output.strip()}) + else: + # Handle specific errors like duplicates + if "already saved" in output: + return jsonify({"status": "skipped", "message": "Value already exists"}) + return jsonify({"status": "error", "error": output.strip()}), 500 + +@app.route('/delete', methods=['POST']) +def delete_vector(): + text = request.json.get('text') + if not text: + return jsonify({"error": "No text provided"}), 400 + + # Command: python vector_delete.py --value "text" + success, output = run_script("vector_delete.py", ["--value", text]) + + if success: + return jsonify({"status": "success", "message": "Deleted successfully", "raw_output": output.strip()}) + else: + return jsonify({"status": "error", "error": output.strip()}), 500 + +@app.route('/search', methods=['POST']) +def search_vector(): + text = request.json.get('value') + k = str(request.json.get('k', 3)) # Default to top 3 results + if not text: + return jsonify({"error": "No text provided"}), 400 + + # Command: python vector_get.py --value "text" --k_matches K + success, output = run_script("vector_get.py", ["--value", text, "--k_matches", k]) + + if not success: + return jsonify({"status": "error", "error": output.strip()}), 500 + + # Parse the stdout from vector_get.py to create a JSON response + # Expected format example: "1. hello world // (similarity score: 0.1234)" + results = [] + for line in output.splitlines(): + # Regex to extract text and score + match = re.search(r'^\d+\.\s+(.*?)\s+//\s+\(similarity score:\s+([0-9.]+)\)', line) + if match: + results.append({ + "text": match.group(1), + "score": float(match.group(2)) + }) + # Capture other informational lines if necessary + elif line.strip() and "Critical Error" not in line: + pass + + return jsonify({"status": "success", "results": results}) + +if __name__ == '__main__': + # Run on port 5000, accessible externally + app.run(host='0.0.0.0', port=5000) \ No newline at end of file From 561852d4dba8f2979d7459ed92d7697c51b0c045 Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Fri, 5 Dec 2025 14:19:45 +0900 Subject: [PATCH 55/79] Fix exit behavior on failed deletion in vector_delete.py --- ecosystem/sdk/vector-indexing/vector_delete.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ecosystem/sdk/vector-indexing/vector_delete.py b/ecosystem/sdk/vector-indexing/vector_delete.py index abf740e08..75b942aaf 100644 --- a/ecosystem/sdk/vector-indexing/vector_delete.py +++ b/ecosystem/sdk/vector-indexing/vector_delete.py @@ -169,7 +169,8 @@ def rebuild_and_save_index(embedding_keys: Dict[str, Any], hnsw_text_entries: Li print(f"Error: '{value_to_delete}' not found in the indexed entries. Nothing deleted.") # NOTE: Even if nothing is deleted, this still proceeds to rebuild and save, which # is necessary to fix the corruption caused by prior runs of vector_add.py - pass + # pass + sys.exit() else: # Create the new list of entries, excluding the one to delete current_text_entries = [text for text in current_text_entries if text != value_to_delete] From 7767fa7fdd3112444cbe3cfab1c4720914dea6bc Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Sat, 6 Dec 2025 13:04:32 +0900 Subject: [PATCH 56/79] Remove unnecessary configurations from .bazelrc for cleaner build setup --- .bazelrc | 40 +++------------------------------------- 1 file changed, 3 insertions(+), 37 deletions(-) diff --git a/.bazelrc b/.bazelrc index fc41e7499..cc3d1af5c 100644 --- a/.bazelrc +++ b/.bazelrc @@ -1,38 +1,4 @@ -# ============================================ -# Bazel Configuration for MacBook Air M4 -# ============================================ +build --cxxopt='-std=c++17' --copt=-O3 --jobs=40 +#build --action_env=PYTHON_BIN_PATH="/usr/bin/python3.10" +#build --action_env=PYTHON_LIB_PATH="/usr/include/python3.10" -# Architecture settings for Apple Silicon -build --cpu=darwin_arm64 -build --host_cpu=darwin_arm64 - -# M4 specific optimizations -build --copt=-march=armv8.6-a -build --copt=-O3 -build --copt=-DNDEBUG - -# C++ standard -build --cxxopt=-std=c++17 -build --cxxopt=-stdlib=libc++ - -# OpenSSL configuration for Apple Silicon -build --action_env=OPENSSL_ROOT_DIR=/opt/homebrew/opt/openssl@3 -build --action_env=OPENSSL_INCLUDE_DIR=/opt/homebrew/opt/openssl@3/include -build --action_env=OPENSSL_LIB_DIR=/opt/homebrew/opt/openssl@3/lib - -# Compiler warnings -build --cxxopt=-Wno-deprecated-declarations -build --cxxopt=-Wno-unused-parameter -build --cxxopt=-Wno-unused-variable - -# Performance settings (adjust for M4's 8 cores: 4P + 4E) -build --jobs=6 -build --local_cpu_resources=6 - -# Build output -build --verbose_failures -build --compilation_mode=opt - -# Test settings -test --test_output=errors -test --test_timeout=300 From ab1bfa9be65f1dab1ab5bcfd662f16e2451dfff1 Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Sat, 6 Dec 2025 13:34:58 +0900 Subject: [PATCH 57/79] feat: Implement vector indexing and search functionality - Added vector search capabilities to the GraphQL API with `searchVector` query. - Introduced mutations for adding (`addVector`) and deleting (`deleteVector`) vectors in the index. - Created helper function `run_vector_script` to execute vector indexing scripts. - Updated `vector_add.py` to handle embedding generation and index building using SentenceTransformers and hnswlib. - Refactored `vector_delete.py` to rebuild the index after deletion of a vector. - Enhanced `vector_get.py` to retrieve and search for vectors using the HNSW index. - Improved error handling and logging throughout the vector indexing process. --- ecosystem/graphql/app.py | 93 ++++++ ecosystem/sdk/vector-indexing/vector_add.py | 277 ++++++------------ .../sdk/vector-indexing/vector_delete.py | 225 +++++--------- ecosystem/sdk/vector-indexing/vector_get.py | 253 ++++++---------- 4 files changed, 344 insertions(+), 504 deletions(-) diff --git a/ecosystem/graphql/app.py b/ecosystem/graphql/app.py index f33d67407..cee6005c1 100644 --- a/ecosystem/graphql/app.py +++ b/ecosystem/graphql/app.py @@ -20,14 +20,50 @@ import tempfile import os +import sys +import subprocess +import re +from pathlib import Path + from resdb_driver import Resdb from resdb_driver.crypto import generate_keypair +# --- Configuration --- db_root_url = "localhost:18000" protocol = "http://" fetch_all_endpoint = "/v1/transactions" db = Resdb(db_root_url) +# --- Vector Indexing Scripts Path Configuration --- +# app.py is in ecosystem/graphql/ +# scripts are in ecosystem/sdk/vector-indexing/ +CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) +VECTOR_SCRIPT_DIR = os.path.abspath(os.path.join(CURRENT_DIR, "../sdk/vector-indexing")) +PYTHON_EXE = sys.executable + +def run_vector_script(script_name: str, args: list) -> tuple[bool, str]: + """Helper to run python scripts located in the vector-indexing directory.""" + script_path = os.path.join(VECTOR_SCRIPT_DIR, script_name) + + if not os.path.exists(script_path): + return False, f"Script not found: {script_path}" + + command = [PYTHON_EXE, script_path] + args + try: + # Run script with the working directory set to where the script is + # (because the scripts rely on relative paths like ./saved_data) + result = subprocess.run( + command, + capture_output=True, + text=True, + cwd=VECTOR_SCRIPT_DIR + ) + if result.returncode != 0: + return False, result.stderr + "\n" + result.stdout + return True, result.stdout.strip() + except Exception as e: + return False, str(e) + import strawberry import typing import ast @@ -44,6 +80,8 @@ from strawberry.flask.views import GraphQLView +# --- GraphQL Types --- + @strawberry.type class RetrieveTransaction: id: str @@ -70,6 +108,14 @@ class PrepareAsset: recipientPublicKey: str asset: JSONScalar +# New Type for Vector Search Results +@strawberry.type +class VectorSearchResult: + text: str + score: float + +# --- Query --- + @strawberry.type class Query: @strawberry.field @@ -113,6 +159,30 @@ def count_cats(self) -> str: #return number of cats return f'The word "cat" appears {cat_count} times' + # --- New: Vector Search Query --- + @strawberry.field + def searchVector(self, text: str, k: int = 3) -> List[VectorSearchResult]: + """Search for similar texts using the HNSW index.""" + success, output = run_vector_script("vector_get.py", ["--value", text, "--k_matches", str(k)]) + + results = [] + if not success: + # Log error internally if needed, returning empty list or raising error + print(f"Vector search failed: {output}") + return [] + + # Parse the output from vector_get.py (e.g. "1. hello // (similarity score: 0.123)") + for line in output.splitlines(): + match = re.search(r'^\d+\.\s+(.*?)\s+//\s+\(similarity score:\s+([0-9.]+)\)', line) + if match: + results.append(VectorSearchResult( + text=match.group(1), + score=float(match.group(2)) + )) + return results + +# --- Mutation --- + @strawberry.type class Mutation: @strawberry.mutation @@ -133,6 +203,29 @@ def postTransaction(self, data: PrepareAsset) -> CommitTransaction: ) return payload + # --- New: Vector Add Mutation --- + @strawberry.mutation + def addVector(self, text: str) -> str: + """Add a text to the vector index.""" + success, output = run_vector_script("vector_add.py", ["--value", text]) + if success: + return "Success: Added to index." + elif "already saved" in output: + return "Skipped: Value already exists." + else: + return f"Error: {output}" + + # --- New: Vector Delete Mutation --- + @strawberry.mutation + def deleteVector(self, text: str) -> str: + """Delete a text from the vector index.""" + success, output = run_vector_script("vector_delete.py", ["--value", text]) + if success: + return "Success: Deleted from index." + else: + return f"Error: {output}" + + schema = strawberry.Schema(query=Query, mutation=Mutation) app.add_url_rule( diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py index c3f996726..854a0d96e 100644 --- a/ecosystem/sdk/vector-indexing/vector_add.py +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -1,240 +1,147 @@ """ Filename: vector_add.py -Author(s) / Contributor(s): Steven Shoemaker / Regan Yang, Ritesh Patro, Yoshiki Yamaguchi, Tiching Kao -Date: 2025-Fall -Description: (Indexers project) Run to save value to ResDB and generate a vector embedding for it +Description: Save value to ResDB, generate embeddings using SentenceTransformers, and build HNSW index via hnswlib. """ -# Typical Python imports import sys import os import json -from pathlib import Path -from typing import Dict, Any import base64 -# ResDB & HNSW imports +import numpy as np +from pathlib import Path +from typing import Dict, Any, List + +# ML & Search libraries +import hnswlib +from sentence_transformers import SentenceTransformer + +# ResDB & Local imports from resdb_orm.orm import ResDBORM import hnsw_library -from leann import LeannBuilder # Global Variables WORKING_DIR = Path("./").resolve() db = ResDBORM() +MODEL_NAME = 'all-MiniLM-L6-v2' # Lightweight and fast model -# - - - - - - - - - SECTION 1: Init and data cleaning - - - - - - - - - > -# This entire file is only ever intended to run from a CLI if __name__ == "__main__": - # Input Variable + # --- SECTION 1: Input Parsing --- value_to_add = '' - - # Parse the value that the user is requesting to add - for i in range (len(sys.argv)): - # TODO: Consider if you need to parse whitespace around `--value` - if(sys.argv[i] == '--value' and (i + 1 != len(sys.argv))): + for i in range(len(sys.argv)): + if sys.argv[i] == '--value' and (i + 1 != len(sys.argv)): value_to_add = sys.argv[i + 1] break if value_to_add == '': - print("Critical Error - the program requires an arguement in the form of `--value stringToSave`") + print("Critical Error - requires argument `--value stringToSave`") sys.exit() -# - - - - - - - - - SECTION 2: Retrieve HNSW data or create if it doesnt exist - - - - - - - - - > - embedding_keys: Dict[str, Any] = {} - hnsw_text_entries = [] + # --- SECTION 2: Retrieve/Init Keys & Data --- file_saved_directory = Path(WORKING_DIR / "saved_data") file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") + embedding_keys: Dict[str, Any] = {} + hnsw_text_entries = [] - # Create the saved_data directory if it doesn't exist if not os.path.exists(file_saved_directory): file_saved_directory.mkdir() - # Create the file storing embedding keys if it doesn't exist + # Load or initialize keys if not os.path.exists(file_embedding_keys): embedding_keys = { + "temp_index_txt": "", # Stores the binary HNSW index (base64) + "temp_leann_passages_json": "", # Stores the List[Dict] of text data + # Unused keys kept for compatibility "temp_ids_txt": "", - "temp_index_txt": "", "temp_leann_meta_json": "", - "temp_leann_passages_txt": "", - "temp_leann_passages_json": "" + "temp_leann_passages_txt": "" } - # If the user does have some form of prior saved data, it should have a list of keys, even if invalid else: try: with open(file_embedding_keys, 'r') as file: embedding_keys = json.load(file) - except FileNotFoundError: - print("The file storing saved keys could not be found.") - print("This means that the requested vector to save will be the first to recieve an embedding") - print("If this is your first time adding an embedding to this database, this is the intended behavior") - except Exception as e: - print(f"Unexpected Error - {e}") - print("The program can continue running, but it will treat this values as the first generated vector embedding") - - # Embedding information is stored in ResDB. The next chunk of code ensures that a place to save this information - # exists - either by retrieving it or creating it. There are 5 total files used to store vector data - - # (1/5) Create embedding information for the json passages file, which is stored in ResDB as string array - key = embedding_keys["temp_leann_passages_json"] - try: - if (key is None or key == ""): raise KeyError() + except Exception: + pass # Use default empty keys if fail - passages_return_item = hnsw_library.get_record(key) - passages_return_data = passages_return_item["data"] - datapointToText = lambda dataPoint: dataPoint['text'] - hnsw_text_entries = list(map(datapointToText, passages_return_data)) + # (A) Retrieve existing text entries + key_passages = embedding_keys.get("temp_leann_passages_json", "") + try: + if not key_passages: raise KeyError() + passages_return = hnsw_library.get_record(key_passages) + # Expecting data to be List[Dict] -> [{"text": "..."}] + current_data = passages_return["data"] + hnsw_text_entries = [item['text'] for item in current_data] - # This file also contains the saved VALUES, check to make sure we aren't re-saving the same data if value_to_add in hnsw_text_entries: - print(f"{value_to_add} is already saved with an embedding in the ResDB database") - print("Duplicate embeddings yield the same result, this value will not be saved. Terminating...") + print(f"'{value_to_add}' is already saved. Skipping.") sys.exit() - + hnsw_text_entries.append(value_to_add) except Exception: + # If fetch fails or key doesn't exist, start fresh hnsw_text_entries = [value_to_add] + # Create initial record placeholder embedding_keys["temp_leann_passages_json"] = hnsw_library.create_record([]) - # (4/5) Create embedding information for text files, which are stored in ResDB as string - for field in ["temp_ids_txt", "temp_index_txt", "temp_leann_passages_txt"]: - key = embedding_keys[field] - try: - # We direct this to except instead of a typical if/else to avoid rewriting the same line of code - if (key is None or key == ""): raise KeyError() - _ = hnsw_library.get_record(key) - except Exception: - embedding_keys[field] = hnsw_library.create_record('') - # (5/5) Create embedding information for the json metadata file, which is stored in ResDB as a Dict - key = embedding_keys["temp_leann_meta_json"] - try: - if (key is None or key == ""): raise KeyError() - _ = hnsw_library.get_record(key) - except Exception: - embedding_keys["temp_leann_meta_json"] = hnsw_library.create_record({}) - # Save the embedding keys to a local file + # (B) Ensure index key exists + if not embedding_keys.get("temp_index_txt"): + embedding_keys["temp_index_txt"] = hnsw_library.create_record('') + + # Save keys locally + with open(file_embedding_keys, 'w') as file: + json.dump(embedding_keys, file) + + # --- SECTION 3: Build Index (HNSW + SentenceTransformers) --- + print("Generating embeddings and building index...") + + # 1. Vectorize text + model = SentenceTransformer(MODEL_NAME) + embeddings = model.encode(hnsw_text_entries) + + # 2. Build HNSW Index + num_elements = len(embeddings) + dim = embeddings.shape[1] + + # Init HNSW index + p = hnswlib.Index(space='cosine', dim=dim) + p.init_index(max_elements=num_elements, ef_construction=200, M=16) + p.add_items(embeddings, np.arange(num_elements)) # IDs are 0, 1, 2... + + # 3. Save index to temp file + file_temp_dir = Path(WORKING_DIR / "saved_data/temp") + if not os.path.exists(file_temp_dir): + file_temp_dir.mkdir() + + index_path = str(file_temp_dir / "hnsw_index.bin") + p.save_index(index_path) + + # --- SECTION 4: Save to ResDB --- + print("Saving data to ResDB...") + + # (1) Save Index Binary (Base64 Encoded) + key_index = embedding_keys["temp_index_txt"] try: - with open(file_embedding_keys, 'w') as file: - json.dump(embedding_keys, file) + with open(index_path, 'rb') as f: + content_bytes = f.read() + # Encode binary to base64 string for JSON transport + content_b64 = base64.b64encode(content_bytes).decode('utf-8') + hnsw_library.put_record(key_index, content_b64) except Exception as e: - print(f"Unsuccessful write: {e}") - print("Critical Error - the above error prevents the program from saving locally the keys necessary to track embedding data") - print('This prevents the program from using these embeddings in the future. Consequently, terminating...') + print(f"Error saving index: {e}") sys.exit() -# - - - - - - - - - SECTION 3: Construct the HNSW data structure (leann builder) - - - - - - - - - > - file_temporary_directory = Path(WORKING_DIR / "saved_data/temp") - file_temporary_storage = str(WORKING_DIR / "saved_data/temp/temp.leann") - - # Create the temp directory if it doesn't exist - if not os.path.exists(file_temporary_directory): - file_temporary_directory.mkdir() - - # Leann is extremely noisy, prevent standard output to the console while it runs - # sys.stdout = os.devnull # TODO - - # Construct the HNSW Tree (creates the 5 files referenced below, saved to a temporary folder) - builder = LeannBuilder(backend_name="hnsw") - for text in hnsw_text_entries: - builder.add_text(text) - builder.build_index(file_temporary_storage) - - # Restore standard output to the console - # sys.stdout = sys.__stdout__ # TODO - -# - - - - - - - - - SECTION 4: Save the new embeddings - - - - - - - - - > - # Embedding information using this library is split across 5 files. The next chunk of code saves each of - # these files as a kv store value in ResDB, storing text data as a string, and JSON data as a Dict or Dict[] - - - # (2/5) Create embedding information for the txt passages file, which are base64 byte data - for pairing in [ - ("temp.leann.passages.idx", "temp_leann_passages_txt"), - ("temp.index", "temp_index_txt") - ]: - fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) - key = embedding_keys[pairing[1]] - try: - # **CORRECTED LINE: Open in binary read mode ('rb')** - with open(fileName, 'rb') as file: - content_bytes = file.read() - # Decode the bytes to a base64 string for ResDB storage - content = base64.b64encode(content_bytes).decode('utf-8') - _ = hnsw_library.put_record(key, content) - except Exception as e: - print(pairing) - print(f"Unsuccessful save: {e}") - print("Critical Error - the above error completely prevents this embedding from saving to ResDB") - print("this likely has ruined the entire embedding system. Please try to add your value again. If you face") - print("the same error, delete all your saved data by deleting `vector-indexing/saved_data` and start fresh.") - print("Terminating...") - sys.exit() - # (3/5) Create embedding information for the index passages file, which is ascii text data - fileName = str(WORKING_DIR / "saved_data/temp/temp.ids.txt") - key = embedding_keys["temp_ids_txt"] - try: - with open(fileName, 'r', encoding='ascii') as file: - content = file.read() - _ = hnsw_library.put_record(key, content) - except Exception as e: - print(f"Unsuccessful save: {e}") - print("Critical Error - the above error completely prevents this embedding from saving to ResDB") - print("this likely has ruined the entire embedding system. Please try to add your value again. If you face") - print("the same error, delete all your saved data by deleting `vector-indexing/saved_data` and start fresh.") - print("Terminating...") - sys.exit() - # (4/5) Create embedding information for the metadata file, which is a single json object - fileName = str(WORKING_DIR / "saved_data/temp/temp.leann.meta.json") - key = embedding_keys["temp_leann_meta_json"] + # (2) Save Text Passages (List of Dicts) + key_passages = embedding_keys["temp_leann_passages_json"] try: - with open(fileName, 'r') as file: - content = json.load(file) - _ =hnsw_library.put_record(key, content) + # Format: [{"text": "val1"}, {"text": "val2"}] + save_data = [{"text": t} for t in hnsw_text_entries] + hnsw_library.put_record(key_passages, save_data) except Exception as e: - print(f"Unsuccessful save: {e}") - print("Critical Error - the above error completely prevents this embedding from saving to ResDB") - print("this likely has ruined the entire embedding system. Please try to add your value again. If you face") - print("the same error, delete all your saved data by deleting `vector-indexing/saved_data` and start fresh.") - print("Terminating...") + print(f"Error saving passages: {e}") sys.exit() - # (5/5) Create embedding information for the passages file, which is a jsonLine file - # consisting of a single json object on each line - fileName = str(WORKING_DIR / "saved_data/temp/temp.leann.passages.jsonl") - key = embedding_keys["temp_leann_passages_json"] - content = [] - try: - with open(fileName, 'r') as file: - # We load each json object line-by-line, saving each as an entry in an array - for line in file: - content.append(json.loads(line)) - _ = hnsw_library.put_record(key, content) - except Exception as e: - print(f"Unsuccessful save: {e}") - print("Critical Error - the above error completely prevents this embedding from saving to ResDB") - print("this likely has ruined the entire embedding system. Please try to add your value again. If you face") - print("the same error, delete all your saved data by deleting `vector-indexing/saved_data` and start fresh.") - print("Terminating...") - sys.exit() - -# - - - - - - - - - SECTION 5: Cleanup: Remove temporary files - - - - - - - - - > - # Remove all temporary files created during HNSW Tree creation - for file_temp_embedding in ["temp.ids.txt", "temp.index", "temp.leann.passages.idx", - "temp.leann.meta.json", "temp.leann.passages.jsonl"]: - fileName = str(WORKING_DIR / "saved_data/temp/" / file_temp_embedding) - try: - os.remove(fileName) - except Exception as e: - print(f"Error - A problem occurred while deleting temporary data: {e}") - print("This is non-critical. It is reccomended you delete the folder `vector-indexing/saved_data/temp` to save space") - - # Remove the whole temp directory - if os.path.exists(file_temporary_directory): - file_temporary_directory.rmdir() - - + # --- Cleanup --- + if os.path.exists(index_path): + os.remove(index_path) + if os.path.exists(file_temp_dir): + file_temp_dir.rmdir() -# = = = = = = = = = EXTRA SECTION: Future TODOs = = = = = = = = = > -# > The whole resdb_orm and config.yaml file had to be copied into the vector-indexing directory -# See if there is a way to run this without them. One had to be installed as a package, it's -# weird that it had to be duped in here to work -# > Look into the possibility of saving a value with custom keys in resdb-orm, instead of using -# the random/autogenerated ones. This could ultimately lead to not needing to use a -# saved_data/embedding_keys.json file at all \ No newline at end of file + print("Success: Value added and index rebuilt.") \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/vector_delete.py b/ecosystem/sdk/vector-indexing/vector_delete.py index 75b942aaf..5abfb8aa9 100644 --- a/ecosystem/sdk/vector-indexing/vector_delete.py +++ b/ecosystem/sdk/vector-indexing/vector_delete.py @@ -1,184 +1,107 @@ """ Filename: vector_delete.py -Author(s) / Contributor(s): Steven Shoemaker / Regan Yang, Ritesh Patro, Yoshiki Yamaguchi, Tiching Kao -Date: 2025-Fall -Description: (Indexers project) Run to delete a value from the HNSW index by removing it - from the ResDB passages list and triggering a full index rebuild. +Description: Remove value from ResDB list and rebuild HNSW index via hnswlib. """ -# Typical Python imports import sys import os import json -from pathlib import Path -from typing import Dict, Any, List import base64 -# ResDB & HNSW imports +import numpy as np +from pathlib import Path +from typing import Dict, Any + +import hnswlib +from sentence_transformers import SentenceTransformer from resdb_orm.orm import ResDBORM import hnsw_library -from leann import LeannBuilder # Global Variables WORKING_DIR = Path("./").resolve() -db = ResDBORM() - -# --- Utility Function to Rebuild and Save the Index --- -def rebuild_and_save_index(embedding_keys: Dict[str, Any], hnsw_text_entries: List[str]): - """ - Constructs the HNSW index using the current list of text entries and - saves the resulting files back to ResDB using the stored keys. - - This function incorporates all necessary corrections for file encoding - and data structure integrity. - """ - file_temporary_directory = Path(WORKING_DIR / "saved_data/temp") - file_temporary_storage = str(WORKING_DIR / "saved_data/temp/temp.leann") - - # Create the temp directory if it doesn't exist - if not os.path.exists(file_temporary_directory): - file_temporary_directory.mkdir() - - print("Rebuilding HNSW index with remaining entries (This can take a moment)...") - - # Construct the HNSW Tree (creates the 5 files) - builder = LeannBuilder(backend_name="hnsw") - for text in hnsw_text_entries: - builder.add_text(text) - builder.build_index(file_temporary_storage) - - # --- SECTION 4: Save the new embeddings (All corrections applied here) --- - - # (2/5) Fix 1: Handle binary files correctly to prevent "ASCII characters" error. - for pairing in [ - ("temp.leann.passages.idx", "temp_leann_passages_txt"), - ("temp.index", "temp_index_txt") - ]: - fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) - key = embedding_keys[pairing[1]] - try: - # **CORRECTED LINE: Open in binary read mode ('rb')** - with open(fileName, 'rb') as file: - content_bytes = file.read() - # Decode the bytes to a base64 string for ResDB storage - content = base64.b64encode(content_bytes).decode('utf-8') - _ = hnsw_library.put_record(key, content) - except Exception as e: - print(f"Critical Save Error (2/5 - {pairing[0]}): {e}") - sys.exit() - - # (3/5) Create embedding information for the index passages file, which is ascii text data - fileName = str(WORKING_DIR / "saved_data/temp/temp.ids.txt") - key = embedding_keys["temp_ids_txt"] - try: - with open(fileName, 'r', encoding='ascii') as file: - content = file.read() - hnsw_library.put_record(key, content) - except Exception as e: - print(f"Critical Save Error (3/5 - temp.ids.txt): {e}") - sys.exit() - - # (4/5) Create embedding information for the metadata file, which is a single json object - fileName = str(WORKING_DIR / "saved_data/temp/temp.leann.meta.json") - key = embedding_keys["temp_leann_meta_json"] - try: - with open(fileName, 'r') as file: - content = json.load(file) - hnsw_library.put_record(key, content) - except Exception as e: - print(f"Critical Save Error (4/5 - temp.leann.meta.json): {e}") - sys.exit() +MODEL_NAME = 'all-MiniLM-L6-v2' - # (5/5) Fix 2: Read the newly generated JSONL file and save it as a List[Dict] - fileName = str(WORKING_DIR / "saved_data/temp/temp.leann.passages.jsonl") - key = embedding_keys["temp_leann_passages_json"] - content = [] - try: - with open(fileName, 'r') as file: - # We load each json object line-by-line, saving each as an entry in an array - for line in file: - content.append(json.loads(line)) - # The result is a list of dictionaries, correcting the prior corruption issue - hnsw_library.put_record(key, content) - except Exception as e: - print(f"Critical Save Error (5/5 - passages_jsonl): {e}") - sys.exit() - - print("Index rebuild and save complete.") - - # --- SECTION 5: Cleanup: Remove temporary files --- - for file_temp_embedding in ["temp.ids.txt", "temp.index", "temp.leann.passages.idx", - "temp.leann.meta.json", "temp.leann.passages.jsonl"]: - fileName = str(WORKING_DIR / "saved_data/temp/" / file_temp_embedding) - try: - os.remove(fileName) - except Exception: - pass - - if os.path.exists(file_temporary_directory): - file_temporary_directory.rmdir() - - -# - - - - - - - - - SECTION 1: Init and data cleaning - - - - - - - - - > if __name__ == "__main__": - # Input Variable + # --- SECTION 1: Input Parsing --- value_to_delete = '' - - # Parse the value that the user is requesting to delete - for i in range (len(sys.argv)): - if(sys.argv[i] == '--value' and (i + 1 != len(sys.argv))): + for i in range(len(sys.argv)): + if sys.argv[i] == '--value' and (i + 1 != len(sys.argv)): value_to_delete = sys.argv[i + 1] break - if value_to_delete == '': - print("Critical Error - the program requires an argument in the form of `--value stringToDelete`") + if not value_to_delete: + print("Error: Requires argument `--value stringToDelete`") sys.exit() -# - - - - - - - - - SECTION 2: Retrieve HNSW data keys and passages list - - - - - - - - - > + # --- SECTION 2: Retrieve Data --- file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") - embedding_keys: Dict[str, Any] = {} - - # Retrieve the keys saving the location of embedding data try: - if (not os.path.exists(file_embedding_keys)): raise FileNotFoundError() with open(file_embedding_keys, 'r') as file: embedding_keys = json.load(file) - except FileNotFoundError: - print("Critical Error - The file listing key embeddings does not exist. Cannot delete.") - sys.exit() - except Exception as e: - print(f"Critical Error - {e}") + except Exception: + print("Error: keys file missing.") sys.exit() - # Get the current list of text passages from ResDB + hnsw_text_entries = [] try: - key = embedding_keys["temp_leann_passages_json"] - if (key is None or key == ""): raise KeyError() - - passages_return_item = hnsw_library.get_record(key) - passages_return_data = passages_return_item["data"] - - # Extract just the text from the list of dictionaries - datapointToText = lambda dataPoint: dataPoint['text'] - current_text_entries = list(map(datapointToText, passages_return_data)) + key_passages = embedding_keys["temp_leann_passages_json"] + ret = hnsw_library.get_record(key_passages) + current_data = ret["data"] + hnsw_text_entries = [item['text'] for item in current_data] except Exception as e: - print(f"Critical Error retrieving passages list: {e}") - print("Cannot find or access the list of saved values. Terminating...") + print(f"Error retrieving data: {e}") sys.exit() -# - - - - - - - - - SECTION 3: Delete the value and trigger rebuild - - - - - - - - - > - if value_to_delete not in current_text_entries: - print(f"Error: '{value_to_delete}' not found in the indexed entries. Nothing deleted.") - # NOTE: Even if nothing is deleted, this still proceeds to rebuild and save, which - # is necessary to fix the corruption caused by prior runs of vector_add.py - # pass + # --- SECTION 3: Modify Data --- + if value_to_delete not in hnsw_text_entries: + print(f"Warning: '{value_to_delete}' not found. Nothing deleted.") sys.exit() - else: - # Create the new list of entries, excluding the one to delete - current_text_entries = [text for text in current_text_entries if text != value_to_delete] - print(f"Value '{value_to_delete}' removed from text entries list.") + + hnsw_text_entries = [t for t in hnsw_text_entries if t != value_to_delete] + print(f"Removed '{value_to_delete}'. Rebuilding index...") - # Trigger the rebuild and save - rebuild_and_save_index(embedding_keys, current_text_entries) + if not hnsw_text_entries: + print("List is now empty. Please add new data to rebuild index.") + # Consider clearing the remote record here if desired + sys.exit() + + # --- SECTION 4: Rebuild & Save (Same logic as vector_add) --- + + # 1. Vectorize + model = SentenceTransformer(MODEL_NAME) + embeddings = model.encode(hnsw_text_entries) + + # 2. Build Index + num_elements = len(embeddings) + dim = embeddings.shape[1] + p = hnswlib.Index(space='cosine', dim=dim) + p.init_index(max_elements=num_elements, ef_construction=200, M=16) + p.add_items(embeddings, np.arange(num_elements)) + + # 3. Save Index Temp + file_temp_dir = Path(WORKING_DIR / "saved_data/temp") + if not os.path.exists(file_temp_dir): + file_temp_dir.mkdir() + index_path = str(file_temp_dir / "hnsw_index.bin") + p.save_index(index_path) + + # 4. Upload to ResDB + try: + # Save Index + key_index = embedding_keys["temp_index_txt"] + with open(index_path, 'rb') as f: + content_b64 = base64.b64encode(f.read()).decode('utf-8') + hnsw_library.put_record(key_index, content_b64) + + # Save Passages + key_passages = embedding_keys["temp_leann_passages_json"] + save_data = [{"text": t} for t in hnsw_text_entries] + hnsw_library.put_record(key_passages, save_data) + + print("Success: Index rebuilt and saved.") + except Exception as e: + print(f"Error saving updates: {e}") - print(f"\nSUCCESS: The HNSW index has been rebuilt and saved with correct file encodings.") - if value_to_delete in current_text_entries: - print(f"The value '{value_to_delete}' was not in the index, but the system files have been repaired.") \ No newline at end of file + # Cleanup + if os.path.exists(index_path): + os.remove(index_path) + if os.path.exists(file_temp_dir): + file_temp_dir.rmdir() \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/vector_get.py b/ecosystem/sdk/vector-indexing/vector_get.py index 9aa1ff5fe..4ba647dbd 100644 --- a/ecosystem/sdk/vector-indexing/vector_get.py +++ b/ecosystem/sdk/vector-indexing/vector_get.py @@ -1,207 +1,124 @@ """ Filename: vector_get.py -Author(s) / Contributor(s): Steven Shoemaker / Regan Yang, Ritesh Patro, Yoshiki Yamaguchi, Tiching Kao -Date: 2025-Fall -Description: (Indexers project) Run to search ResDB for the embeddings k-closest to input string +Description: Retrieve index/data from ResDB and search using hnswlib. """ -# Typical Python imports import sys import os import json -from pathlib import Path -from typing import Dict, List, Any import base64 -# ResDB & HNSW imports +import numpy as np +from pathlib import Path +from typing import Dict, Any + +import hnswlib +from sentence_transformers import SentenceTransformer from resdb_orm.orm import ResDBORM import hnsw_library -from leann import LeannSearcher # Global Variables WORKING_DIR = Path("./").resolve() -db = ResDBORM() +MODEL_NAME = 'all-MiniLM-L6-v2' -# - - - - - - - - - SECTION 1: Init and data cleaning - - - - - - - - - > if __name__ == "__main__": - # Input Variables + # --- SECTION 1: Input Parsing --- search_value = "" - k_matches = 0 + k_matches = 1 return_all = False - # Parse the values that the user is requesting to add - for i in range (len(sys.argv)): - if(sys.argv[i] == '--value' and (i + 1 != len(sys.argv))): + for i in range(len(sys.argv)): + if sys.argv[i] == '--value' and (i + 1 != len(sys.argv)): search_value = sys.argv[i + 1] - - if(sys.argv[i] == '--k_matches' and (i + 1 != len(sys.argv))): - # Ensure that k_matches is in the form of a nondecimal number + if sys.argv[i] == '--k_matches' and (i + 1 != len(sys.argv)): try: k_matches = int(sys.argv[i + 1]) except ValueError: - print("Invalid input - The input to `--k_matches` must be an integer number") - sys.exit() - - if(sys.argv[i] == '--show_all'): + k_matches = 1 + if sys.argv[i] == '--show_all': return_all = True - if not return_all: - # If the user doesn't request to return everything OR search on something specific, error out - if not search_value: - print('Invalid input - please use one of the two following combinations of flags:') - print('(1) Use flag `--value STRING` to find most similar terms to STRING. In addition, use') - print(' flag `--k_matches ###` to search for the k-closest strings. Leave blank to only find one') - print('(2) Use flag `--show_all` with no arguements to list ALL values that correlate with a vector embedding') - print(' this will override all other flags used') - sys.exit() - - # If the user is searching on a specific string, ensure that the requested number of matches is a whole number - if k_matches <= 0: - print('No or invalid arguement provided for --k_matches. Defaulting to finding one single most similar value') - k_matches = 1 + if not search_value and not return_all: + print("Error: Provide --value 'query' or --show_all") + sys.exit() -# - - - - - - - - - SECTION 2: Retrieve keys to HNSW data - - - - - - - - - > - file_saved_directory = Path(WORKING_DIR / "saved_data") + # --- SECTION 2: Retrieve Keys --- file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") - file_embedding_keys_path = Path(WORKING_DIR / "saved_data/embedding_keys.json") - embedding_keys: Dict[str, Any] = {} - - # Retrieve the keys saving the location of embedding data try: - # We direct this to except instead of a typical if/else to avoid rewriting the same line of code - if (not os.path.exists(file_saved_directory)): raise FileNotFoundError() - if (not file_embedding_keys_path.is_file()): raise FileNotFoundError() with open(file_embedding_keys, 'r') as file: embedding_keys = json.load(file) - except FileNotFoundError: - print("Critical Error - The file listing key embeddings does not exist. Please add a vector value before trying to retrieve similar values") - print("Terminating...") - sys.exit() - except Exception as e: - print(f"Critical Error - {e}") - print("There is no protocol for handling this error, but it is known it will prevent retrieval of embedding data. Terminating...") + except Exception: + print("Error: Could not load embedding keys. Add data first.") sys.exit() -# - - - - - - - - - SECTION 3: Save the embedding data to temporary files - - - - - - - - - > - file_temporary_directory = Path(WORKING_DIR / "saved_data/temp") - - # Create the temp directory if it doesn't exist - if not os.path.exists(file_temporary_directory): - file_temporary_directory.mkdir() - - # Embedding information using this library is split across 5 files. The next chunk of code retrieves - # each file from ResDB, temporarily saving it - - # (2/5) Save embedding information for the untyped files, which are raw byte data - embedding_data = [ - ("temp.leann.passages.idx", "temp_leann_passages_txt"), - ("temp.index", "temp_index_txt") - ] - for pairing in embedding_data: - fileName = str(WORKING_DIR / "saved_data/temp/" / pairing[0]) - key = embedding_keys[pairing[1]] - file_return_item = hnsw_library.get_record(key) - file_return_data = file_return_item["data"] - try: - # Encode the latin-1 string back to bytes - content_bytes = base64.b64decode(file_return_data) - - # **CRITICAL FIX: Open in binary write mode ('wb')** - with open(fileName, 'wb') as file: - file.write(content_bytes) # Write the raw bytes - except Exception as e: - print(f"Unsuccessful ResDB retrieval for untyped file: {e}") - print("Critical Error - the above error indicates that a file used for vector embeddings is improperly saved") - print("This likely has ruined the entire embedding system. If you face the same error, delete all your saved") - print("data by deleting `vector-indexing/saved_data` and start fresh.") - print("Terminating...") - sys.exit() - # (3/5) Save embedding information for the ID text file, which is ascii data - fileName = str(WORKING_DIR / "saved_data/temp/temp.ids.txt") - key = embedding_keys["temp_ids_txt"] - file_return_item = hnsw_library.get_record(key) - file_return_data = file_return_item["data"] + # --- SECTION 3: Fetch Data from ResDB --- + file_temp_dir = Path(WORKING_DIR / "saved_data/temp") + if not os.path.exists(file_temp_dir): + file_temp_dir.mkdir() + + # (A) Fetch Passages (Text Data) + passages_data = [] try: - with open(fileName, 'w', encoding="ascii") as file: - file.write(file_return_data) + key_passages = embedding_keys["temp_leann_passages_json"] + ret = hnsw_library.get_record(key_passages) + passages_data = ret["data"] # Expecting List[Dict] except Exception as e: - print(f"Unsuccessful ResDB retrieval for text file: {e}") - print("Critical Error - the above error indicates that a file used for vector embeddings is improperly saved") - print("This likely has ruined the entire embedding system. If you face the same error, delete all your saved") - print("data by deleting `vector-indexing/saved_data` and start fresh.") - print("Terminating...") + print(f"Error retrieving passages: {e}") + sys.exit() + + if return_all: + print(f"--- All Stored Values ({len(passages_data)}) ---") + for i, item in enumerate(passages_data): + print(f"{i+1}. {item['text']}") sys.exit() - # (4/5) Save embedding information for the json file - fileName = str(WORKING_DIR / "saved_data/temp/temp.leann.meta.json") - key = embedding_keys["temp_leann_meta_json"] - file_return_item = hnsw_library.get_record(key) - file_return_data = file_return_item["data"] + + # (B) Fetch Index (Binary) + index_path = str(file_temp_dir / "hnsw_index.bin") try: - with open(fileName, 'w') as file: - json.dump(file_return_data, file) + key_index = embedding_keys["temp_index_txt"] + ret = hnsw_library.get_record(key_index) + content_b64 = ret["data"] + content_bytes = base64.b64decode(content_b64) + with open(index_path, 'wb') as f: + f.write(content_bytes) except Exception as e: - print(f"Unsuccessful ResDB retrieval for json file: {e}") - print("Critical Error - the above error indicates that a file used for vector embeddings is improperly saved") - print("This likely has ruined the entire embedding system. If you face the same error, delete all your saved") - print("data by deleting `vector-indexing/saved_data` and start fresh.") - print("Terminating...") + print(f"Error retrieving index: {e}") sys.exit() - # (5/5) Save embedding information for the jsonLine file - fileName = str(WORKING_DIR / "saved_data/temp/temp.leann.passages.jsonl") - - # Since each json object is on a new line (it's a jsonl file), we append instead of overwriting - # So we must force the file to delete/recreate to avoid appending over old data - filePath = Path(fileName) - if filePath.is_file(): - os.remove(fileName) - key = embedding_keys["temp_leann_passages_json"] - file_return_item = hnsw_library.get_record(key) - file_return_data: List[Dict[str, Any]] = file_return_item["data"] - - # Delimit each json object with lines, instead of just as entires in a list + + # --- SECTION 4: Search --- try: - with open(fileName, 'a') as file: - for i, line in enumerate(file_return_data): - json.dump(line, file) - if i != (len(file_return_data) - 1): - file.write("\n") + # 1. Embed Query + model = SentenceTransformer(MODEL_NAME) + query_vector = model.encode([search_value]) + + # 2. Load Index + dim = query_vector.shape[1] + num_elements = len(passages_data) + + # Safety check: if index is empty but code ran + if num_elements == 0: + print("Index is empty.") + sys.exit() + + p = hnswlib.Index(space='cosine', dim=dim) + # allow slightly more elements to prevent load error if sizes mismatch slightly + p.load_index(index_path, max_elements=num_elements + 100) + + # 3. Query + real_k = min(k_matches, num_elements) + labels, distances = p.knn_query(query_vector, k=real_k) + + # 4. Output + # print(f"--- Search Results for '{search_value}' ---") + for i, (idx, dist) in enumerate(zip(labels[0], distances[0])): + text = passages_data[idx]['text'] + # Convert cosine distance to similarity score approx (1 - dist) + score = 1.0 - dist + print(f"{i+1}. {text} // (similarity score: {score:.4f})") + except Exception as e: - print(f"Unsuccessful ResDB retrieval for jsonLine file: {e}") - print("Critical Error - the above error indicates that a file used for vector embeddings is improperly saved") - print("This likely has ruined the entire embedding system. If you face the same error, delete all your saved") - print("data by deleting `vector-indexing/saved_data` and start fresh.") - print("Terminating...") - sys.exit() + print(f"Search failed: {e}") -# - - - - - - - - - SECTION 4: Re-Construct the HNSW data structure (leann searcher) - - - - - - - - - > - total_searches = sys.maxsize if return_all else k_matches - - # Leann is extremely noisy, prevent standard output to the console while it runs - # sys.stdout = os.devnull # TODO - - file_temporary_storage = str(WORKING_DIR / "saved_data/temp/temp.leann") - searcher = LeannSearcher(file_temporary_storage) - results = searcher.search(search_value, top_k=total_searches) - - # Restore standard output to the console - # sys.stdout = sys.__stdout__ # TODO - - # Print results to the console - for i, line in enumerate(results): - if return_all: - print(f"{i+1}. {line.text}") - else: - print(f"{i+1}. {line.text} // (similarity score: {line.score})") - -# - - - - - - - - - SECTION 5: Cleanup: Remove temporary files - - - - - - - - - > - # Remove all temporary files created during HNSW Tree Search - for temp_file_path in ["temp.ids.txt", "temp.index", "temp.leann.passages.idx", - "temp.leann.meta.json", "temp.leann.passages.jsonl"]: - fileName = str(WORKING_DIR / "saved_data/temp/" / temp_file_path) - try: - os.remove(fileName) - except Exception as e: - print(f"Error - A problem occurred while deleting temporary data: {e}") - print("This is non-critical. It is reccomended you delete the folder `vector-indexing/saved_data/temp` to save space") - - # Remove the whole temp directory - if os.path.exists(file_temporary_directory): - file_temporary_directory.rmdir() \ No newline at end of file + # --- Cleanup --- + if os.path.exists(index_path): + os.remove(index_path) + if os.path.exists(file_temp_dir): + file_temp_dir.rmdir() \ No newline at end of file From ac4cb5516af647fb3e1ce93f838b77b91988eb26 Mon Sep 17 00:00:00 2001 From: SideCoin Date: Fri, 5 Dec 2025 22:23:47 -0800 Subject: [PATCH 58/79] stress test KV --- hnsw-test/index_test/README.md | 54 +++++++++++++ hnsw-test/index_test/benchmark_results.txt | 64 +++++++++++++++ hnsw-test/index_test/benchmark_set.py | 74 ++++++++++++++++++ hnsw-test/index_test/gen_files.py | 32 ++++++++ hnsw-test/index_test/multi_benchmarks.py | 78 +++++++++++++++++++ hnsw-test/index_test/size.txt | 1 + .../tools/kv/api_tools/kv_service_tools.cpp | 18 ++++- 7 files changed, 320 insertions(+), 1 deletion(-) create mode 100644 hnsw-test/index_test/README.md create mode 100644 hnsw-test/index_test/benchmark_results.txt create mode 100644 hnsw-test/index_test/benchmark_set.py create mode 100644 hnsw-test/index_test/gen_files.py create mode 100644 hnsw-test/index_test/multi_benchmarks.py create mode 100644 hnsw-test/index_test/size.txt diff --git a/hnsw-test/index_test/README.md b/hnsw-test/index_test/README.md new file mode 100644 index 000000000..be563dd16 --- /dev/null +++ b/hnsw-test/index_test/README.md @@ -0,0 +1,54 @@ +This program aims to stress test the KV store using the configuration below. We try to find at roughly what value size (MB) does the KV store start experiencing problems e.g extremely long set time or infinite wait time. For the setup below we experienced problems around the 150MB - 200MB mark. Note we are only testing the set function. + +Note, all commands assume you're in the indexers-ECS265-Fall2025 directory, basically the starting directory to the repo. Also have the kv service running correctly in the first place. To run this, first go to service\tools\kv\api_tools\kv_service_tools.cpp and find all comments with #SIZE TEST and uncomment those blocks. You'll need to rerun INSTALL.sh. If you're getting problems for this try running bazel build service/tools/kv/api_tools/kv_service_tools. +Once the KV is up and running, use the Test File Generate Command to create a random test file of specific size. Afterwards you may use any of the benchmark commands. + +# KV Store Stress Test + +This program is designed to stress test the KV store's `set` function. The goal is to identify the value size (in MB) at which the KV store begins to experience performance degradation (e.g., extremely long set times or infinite wait times). + +> **Findings:** Using the configuration below, we experienced problems around the **150MB - 200MB** mark. + +## Prerequisites & Setup + +**Important:** All commands assume you are in the root directory of the repo: `indexers-ECS265-Fall2025`. You must also have the KV service running. + +1. **Modify Source:** Navigate to `service/tools/kv/api_tools/kv_service_tools.cpp`. +2. **Enable Test:** Find all comments marked with `#SIZE TEST` and uncomment those code blocks. +3. **Build:** Rerun the install script: + ```bash + ./INSTALL.sh + ``` +4. If you run into bazel problems + ``` + bazel build service/tools/kv/api_tools/kv_service_tools + ``` +## Example Configuration: + 8GB RAM Shell + Standard 5 replica config from `./service/tools/kv/server_tools/start_kv_service.sh` + +### SET Command: + bazel-bin/service/tools/kv/api_tools/kv_service_tools \ + --config service/tools/config/interface/service.config \ + --cmd set_with_version \ + --key key1 \ + --version 0 \ + --value_path hnsw-test/index_test/FILE + EX: bazel-bin/service/tools/kv/api_tools/kv_service_tools \ + --config service/tools/config/interface/service.config \ + --cmd set_with_version \ + --key key1 \ + --version 0 \ + --value_path hnsw-test/index_test/size.txt + +### Benchmark command: + python3 hnsw-test/index_test/benchmark_set.py KEY FILE + EX: python3 hnsw-test/index_test/benchmark_set.py key1 hnsw-test/index_test/val_50mb.txt + +### Multiple Benchmark command: + python3 hnsw-test/index_test/multi_benchmarks.py KEY FILE TEST_AMOUNT(INT) + EX: python3 hnsw-test/index_test/multi_benchmarks.py key1 hnsw-test/index_test/val_200mb.txt 5 + +### Test File Generate command: + python3 hnsw-test/index_test/gen_files.py SIZE_IN_MB + EX: python3 hnsw-test/index_test/gen_files.py 100 diff --git a/hnsw-test/index_test/benchmark_results.txt b/hnsw-test/index_test/benchmark_results.txt new file mode 100644 index 000000000..0df7c18ab --- /dev/null +++ b/hnsw-test/index_test/benchmark_results.txt @@ -0,0 +1,64 @@ + + +======================================== +BENCHMARK RUN: 2025-12-05 20:00:24.201759 +File: hnsw-test/index_test/val_100mb.txt | Iterations: 5 +======================================== +Iteration 1: 18.2446s | 5.48 MB/s +Iteration 2: 17.7338s | 5.64 MB/s +Iteration 3: 12.0109s | 8.33 MB/s +Iteration 4: 12.1792s | 8.21 MB/s +Iteration 5: 11.7862s | 8.48 MB/s + +---------------------------------------- +SUMMARY: +Average Time: 14.3909 seconds +Average Throughput: 7.23 MB/s +---------------------------------------- + +======================================== +BENCHMARK RUN: 2025-12-05 20:03:50.255545 +File: hnsw-test/index_test/val_100mb.txt | Iterations: 5 +======================================== +Iteration 1: 12.2164s | 8.19 MB/s +Iteration 2: 11.8955s | 8.41 MB/s +Iteration 3: 12.3887s | 8.07 MB/s +Iteration 4: 13.6550s | 7.32 MB/s +Iteration 5: 11.7795s | 8.49 MB/s + +---------------------------------------- +SUMMARY: +Average Time: 12.3870 seconds +Average Throughput: 8.10 MB/s +---------------------------------------- + +======================================== +BENCHMARK RUN: 2025-12-05 20:41:15.569589 +File: hnsw-test/index_test/val_150mb.txt | Iterations: 5 +======================================== +Iteration 1: 29.0940s | 5.16 MB/s +Iteration 2: 28.9977s | 5.17 MB/s +Iteration 3: 25.4595s | 5.89 MB/s +Iteration 4: 24.7560s | 6.06 MB/s +Iteration 5: 24.7177s | 6.07 MB/s + +---------------------------------------- +SUMMARY: +Average Time: 26.6050 seconds +Average Throughput: 5.67 MB/s +---------------------------------------- + +======================================== +BENCHMARK RUN: 2025-12-05 21:01:05.072369 +File: hnsw-test/index_test/val_200mb.txt | Iterations: 5 +======================================== +Iteration 1: 45.9341s | 4.35 MB/s + +======================================== +BENCHMARK RUN: 2025-12-05 21:09:13.801146 +File: hnsw-test/index_test/val_200mb.txt | Iterations: 5 +======================================== +Iteration 1: 24.8878s | 8.04 MB/s +Iteration 2: 37.1333s | 5.39 MB/s +Iteration 3: 40.9425s | 4.88 MB/s +Iteration 4: 37.0935s | 5.39 MB/s diff --git a/hnsw-test/index_test/benchmark_set.py b/hnsw-test/index_test/benchmark_set.py new file mode 100644 index 000000000..ca1edb3a4 --- /dev/null +++ b/hnsw-test/index_test/benchmark_set.py @@ -0,0 +1,74 @@ +import subprocess +import time +import os +import sys + +# --- CONFIGURATION --- +# Update these paths if your script is not in the root of the repo +BINARY_PATH = "bazel-bin/service/tools/kv/api_tools/kv_service_tools" +CONFIG_PATH = "service/tools/config/interface/service.config" +# --------------------- + +def run_benchmark(key, value_file_path): + # Check if file exists + if not os.path.exists(value_file_path): + print(f"Error: File {value_file_path} not found.") + return + + file_size_mb = os.path.getsize(value_file_path) / (1024 * 1024) + print(f"Preparing to set key='{key}' with file='{value_file_path}' ({file_size_mb:.2f} MB)...") + + # Construct the command + # Note: Using the 'value_path' flag (short flag -p) we added earlier + cmd = [ + BINARY_PATH, + "--config", CONFIG_PATH, + "--cmd", "set_with_version", + "--key", key, + "--version", "0", + "--value_path", value_file_path + ] + + print("Running command...") + + # Start Timer + start_time = time.time() + + try: + # Run the process + result = subprocess.run(cmd, capture_output=True, text=True) + + # Stop Timer + end_time = time.time() + duration = end_time - start_time + + if result.returncode != 0: + print("\n❌ Command failed!") + print("Error output:", result.stderr) + print("Standard output:", result.stdout) + else: + print(f"\n✅ Success!") + print(f"Time taken: {duration:.4f} seconds") + + # Calculate throughput if duration is non-zero + if duration > 0: + throughput = file_size_mb / duration + print(f"Throughput: {throughput:.2f} MB/s") + + # Optional: Print program output (truncated) + # print("Output:", result.stdout) + + except FileNotFoundError: + print(f"\n❌ Error: Could not find binary at {BINARY_PATH}") + print("Did you run 'bazel build ...'?") + +if __name__ == "__main__": + if len(sys.argv) < 3: + print("Usage: python3 benchmark_set.py ") + print("Example: python3 benchmark_set.py key1 val_10mb.txt") + sys.exit(1) + + target_key = sys.argv[1] + target_file = sys.argv[2] + + run_benchmark(target_key, target_file) \ No newline at end of file diff --git a/hnsw-test/index_test/gen_files.py b/hnsw-test/index_test/gen_files.py new file mode 100644 index 000000000..d86beffc5 --- /dev/null +++ b/hnsw-test/index_test/gen_files.py @@ -0,0 +1,32 @@ +import sys +import os + +def create_file(size_mb): + # 1. Get the directory where this script is currently living + script_dir = os.path.dirname(os.path.abspath(__file__)) + + filename = f"val_{size_mb}mb.txt" + + # 2. Combine that directory with the filename + full_path = os.path.join(script_dir, filename) + + # Calculate size in bytes + size_bytes = size_mb * 1024 * 1024 + + print(f"Generating {full_path}...") + + # 3. Write the random data + with open(full_path, "wb") as f: + # Note: For extremely large files (e.g. > RAM size), + # you might want to write in chunks. + # For typical testing (up to a few GB), this is fine. + f.write(os.urandom(size_bytes)) + + print(f"Done! Created {full_path}") + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python3 gen_files.py ") + sys.exit(1) + + create_file(int(sys.argv[1])) \ No newline at end of file diff --git a/hnsw-test/index_test/multi_benchmarks.py b/hnsw-test/index_test/multi_benchmarks.py new file mode 100644 index 000000000..83492da9d --- /dev/null +++ b/hnsw-test/index_test/multi_benchmarks.py @@ -0,0 +1,78 @@ +import subprocess +import sys +import os +import re +from datetime import datetime + +# --- CONFIGURATION --- +BENCHMARK_SCRIPT = "benchmark_set.py" +OUTPUT_FILE = "benchmark_results.txt" +# --------------------- + +def run_tests(key, value_file, iterations): + # 1. Setup paths + script_dir = os.path.dirname(os.path.abspath(__file__)) + output_path = os.path.join(script_dir, OUTPUT_FILE) + benchmark_script_path = os.path.join(script_dir, BENCHMARK_SCRIPT) + + # 2. Open the file to write results + with open(output_path, "a") as f: # 'a' for append mode + header = f"\n{'='*40}\nBENCHMARK RUN: {datetime.now()}\nFile: {value_file} | Iterations: {iterations}\n{'='*40}\n" + print(header) + f.write(header) + + times = [] + throughputs = [] + + for i in range(1, iterations + 1): + print(f"Running iteration {i}/{iterations}...", end="", flush=True) + + # Run the existing benchmark_set.py + cmd = ["python3", benchmark_script_path, key, value_file] + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode != 0: + print(" ❌ Failed") + f.write(f"Iteration {i}: FAILED\nError: {result.stderr}\n") + else: + output = result.stdout + + # Extract numbers using Regex to store for stats + # Looking for: "Time taken: 1.2345 seconds" and "Throughput: 45.67 MB/s" + time_match = re.search(r"Time taken:\s+([\d\.]+)", output) + thpt_match = re.search(r"Throughput:\s+([\d\.]+)", output) + + if time_match and thpt_match: + t_val = float(time_match.group(1)) + tp_val = float(thpt_match.group(1)) + times.append(t_val) + throughputs.append(tp_val) + + log_line = f"Iteration {i}: {t_val:.4f}s | {tp_val:.2f} MB/s\n" + print(f" ✅ ({t_val:.4f}s)") + f.write(log_line) + else: + print(" ⚠️ Output format unexpected") + f.write(f"Iteration {i}: Output format unexpected\nRaw: {output}\n") + + # 3. Calculate and write averages + if times: + avg_time = sum(times) / len(times) + avg_thpt = sum(throughputs) / len(throughputs) + summary = (f"\n{'-'*40}\n" + f"SUMMARY:\n" + f"Average Time: {avg_time:.4f} seconds\n" + f"Average Throughput: {avg_thpt:.2f} MB/s\n" + f"{'-'*40}\n") + print(summary) + f.write(summary) + + print(f"Results saved to: {output_path}") + +if __name__ == "__main__": + if len(sys.argv) < 4: + print("Usage: python3 run_multiple_benchmarks.py ") + print("Example: python3 run_multiple_benchmarks.py key1 val_50mb.txt 5") + sys.exit(1) + + run_tests(sys.argv[1], sys.argv[2], int(sys.argv[3])) \ No newline at end of file diff --git a/hnsw-test/index_test/size.txt b/hnsw-test/index_test/size.txt new file mode 100644 index 000000000..c34cc7553 --- /dev/null +++ b/hnsw-test/index_test/size.txt @@ -0,0 +1 @@ +12321312313123313123 \ No newline at end of file diff --git a/service/tools/kv/api_tools/kv_service_tools.cpp b/service/tools/kv/api_tools/kv_service_tools.cpp index b5cd53444..e2f3c1f88 100644 --- a/service/tools/kv/api_tools/kv_service_tools.cpp +++ b/service/tools/kv/api_tools/kv_service_tools.cpp @@ -65,6 +65,7 @@ static struct option long_options[] = { {"min_key", required_argument, NULL, 'y'}, {"max_key", required_argument, NULL, 'Y'}, {"top", required_argument, NULL, 't'}, + //{"value_path", required_argument, NULL, 'p'}, //#SIZE TEST }; void OldAPI(char** argv) { @@ -174,6 +175,19 @@ int main(int argc, char** argv) { case 'h': ShowUsage(); break; + /* //#SIZE TEST + case 'p': { + std::ifstream t(optarg); + if (!t.is_open()) { + printf("Error: Could not open value file: %s\n", optarg); + return -1; + } + std::string str((std::istreambuf_iterator(t)), + std::istreambuf_iterator()); + value = str; + break; + } + */ } } @@ -188,7 +202,9 @@ int main(int argc, char** argv) { } int ret = client.Set(key, value, version); printf("set key = %s, value = %s, version = %d done, ret = %d\n", - key.c_str(), value.c_str(), version, ret); + key.c_str(), value.c_str(), version, ret); + /*printf("set key = %s, value_size = %lu, version = %d done, ret = %d\n", //#SIZE TEST + key.c_str(), value.size(), version, ret);*/ if (ret == 0) { usleep(100000); auto res = client.Get(key, 0); From 32b7ce0d08e2e893306edb70d5865628b36efd2e Mon Sep 17 00:00:00 2001 From: SideCoin Date: Fri, 5 Dec 2025 22:28:47 -0800 Subject: [PATCH 59/79] bazel --- .bazelrc | 39 +-------------------------------------- 1 file changed, 1 insertion(+), 38 deletions(-) diff --git a/.bazelrc b/.bazelrc index fc41e7499..037e0e10a 100644 --- a/.bazelrc +++ b/.bazelrc @@ -1,38 +1 @@ -# ============================================ -# Bazel Configuration for MacBook Air M4 -# ============================================ - -# Architecture settings for Apple Silicon -build --cpu=darwin_arm64 -build --host_cpu=darwin_arm64 - -# M4 specific optimizations -build --copt=-march=armv8.6-a -build --copt=-O3 -build --copt=-DNDEBUG - -# C++ standard -build --cxxopt=-std=c++17 -build --cxxopt=-stdlib=libc++ - -# OpenSSL configuration for Apple Silicon -build --action_env=OPENSSL_ROOT_DIR=/opt/homebrew/opt/openssl@3 -build --action_env=OPENSSL_INCLUDE_DIR=/opt/homebrew/opt/openssl@3/include -build --action_env=OPENSSL_LIB_DIR=/opt/homebrew/opt/openssl@3/lib - -# Compiler warnings -build --cxxopt=-Wno-deprecated-declarations -build --cxxopt=-Wno-unused-parameter -build --cxxopt=-Wno-unused-variable - -# Performance settings (adjust for M4's 8 cores: 4P + 4E) -build --jobs=6 -build --local_cpu_resources=6 - -# Build output -build --verbose_failures -build --compilation_mode=opt - -# Test settings -test --test_output=errors -test --test_timeout=300 +build --cxxopt='-std=c++17' From 692f772e575730e25eab278ab25b149b5bbb4cef Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Sat, 6 Dec 2025 01:28:53 -0800 Subject: [PATCH 60/79] Added kv_vector, a CLI tool that interfaces with the GraphQL proxy --- ecosystem/graphql/app.py | 11 ++- ecosystem/sdk/vector-indexing/kv_vector.py | 87 +++++++++++++++++++ .../sdk/vector-indexing/kv_vector_library.py | 71 +++++++++++++++ ecosystem/sdk/vector-indexing/vector_add.py | 3 +- 4 files changed, 168 insertions(+), 4 deletions(-) create mode 100644 ecosystem/sdk/vector-indexing/kv_vector.py create mode 100644 ecosystem/sdk/vector-indexing/kv_vector_library.py diff --git a/ecosystem/graphql/app.py b/ecosystem/graphql/app.py index cee6005c1..81032f1a8 100644 --- a/ecosystem/graphql/app.py +++ b/ecosystem/graphql/app.py @@ -161,10 +161,15 @@ def count_cats(self) -> str: # --- New: Vector Search Query --- @strawberry.field - def searchVector(self, text: str, k: int = 3) -> List[VectorSearchResult]: + def searchVector(self, text: str = None, k: int = 1) -> List[VectorSearchResult]: """Search for similar texts using the HNSW index.""" - success, output = run_vector_script("vector_get.py", ["--value", text, "--k_matches", str(k)]) - + success = False + output = "" + if text is None: + success, output = run_vector_script("vector_get.py", ["--show_all"]) + else: + success, output = run_vector_script("vector_get.py", ["--value", text, "--k_matches", str(k)]) + results = [] if not success: # Log error internally if needed, returning empty list or raising error diff --git a/ecosystem/sdk/vector-indexing/kv_vector.py b/ecosystem/sdk/vector-indexing/kv_vector.py new file mode 100644 index 000000000..e564d5256 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/kv_vector.py @@ -0,0 +1,87 @@ +""" +Filename: kv_vector.py +Description: CLI interface to interact with Vector Functions running in the GraphQL proxy +""" +# Typical Python imports +from pathlib import Path +import sys +# ResDB & HNSW imports +from kv_vector_library import add_value, delete_value, get_value, get_values + +# Global Variables +WORKING_DIR = Path("./").resolve() + +def help_message(): + print("kv_vector.py --help") + print("To get instructions on use") + sys.exit() + +if __name__ == "__main__": + # Suggestion to use help if user's input is completely misguided + if ((len(sys.argv) < 2) or ((sys.argv[1] != "--add") and (sys.argv[1] != "--delete") and (sys.argv[1] != "--get") and (sys.argv[1] != "--getAll") and (sys.argv[1] != "--help")) ): + print('Invalid formatting of request! Use:') + help_message() + + # help, a flag that instructs users how to use the tool + if (sys.argv[1] == '--help'): + print("This is a tool provided to enable client-side interaction with vector indexing") + print("All vectors added via this tool will have an embedding generated for them, and will be") + print("stored (embedding and value) in ResDB. It effectively serves as a wrapper around the") + print("k/v store - adding and removing key/embedding pairs as instructed. All commands listed below:") + print('-----------------------------------------------------------------------') + print("kv_vector.py --add ") + print(" add the string value to ResDB, and generate an embedding for it") + print("kv_vector.py --delete ") + print(" delete the string value from ResDB, as well as its embedding") + print("kv_vector.py --get , or:") + print("kv_vector.py --get --k_matches ") + print(" get the k-closest values to the input string value, using HNSW.") + print(" if no k is provided, a default of k=1 will be used") + print("kv_vector.py --getAll") + print(" retrieve all values that have a correlated embedding") + print('-----------------------------------------------------------------------') + + if (sys.argv[1] == "--add"): + if (len(sys.argv) != 3): + print("Invalid formatting of request! Use:") + print("kv_vector.py --add ") + print("To save and generate an embedding for the chosen string. Alternatively, use:") + help_message() + else: + add_value(sys.argv[2]) + + if (sys.argv[1] == "--delete"): + if (len(sys.argv) != 3): + print("Invalid formatting of request! Use:") + print("kv_vector.py --delete ") + print("To delete a value and embedding for the chosen string. Alternatively, use:") + help_message() + else: + delete_value(sys.argv[2]) + + if (sys.argv[1] == "--getAll"): + if (len(sys.argv) != 2): + print("Invalid formatting of request! Use:") + print("kv_vector.py --getAll") + print("To get a list of every VALUE that currently has a generated embedding. Alternatively, use:") + help_message() + else: + get_values() + + if (sys.argv[1] == "--get"): + if ((len(sys.argv) == 3)): + get_value(sys.argv[2]) + elif ((len(sys.argv) == 5) and (sys.argv[3] == "--k_matches")): + try: + k_matches = int(sys.argv[4]) + get_value(sys.argv[2], k_matches) + except ValueError: + print("Invalid formatting of request! k_matches must be an integer") + sys.exit() + else: + print("Invalid formatting of request! Use:") + print("kv_vector.py --get , or:") + print("kv_vector.py --get --k_matches ") + print("to find the k-most similar vectors to your input. The default case where k_matches isn't provided, 1 is used") + print("To save and generate an embedding for the chosen string. Alternatively, use:") + help_message() \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/kv_vector_library.py b/ecosystem/sdk/vector-indexing/kv_vector_library.py new file mode 100644 index 000000000..2ffb940e6 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/kv_vector_library.py @@ -0,0 +1,71 @@ +# Typical Python imports +from typing import Any +import requests + +url = "http://127.0.0.1:8000/graphql" + +def format_get_responses(json: Any) -> None: + results = json["data"]["searchVector"] + if (len(results) == 0): + print('No values with embeddings stored in ResDB!') + else: + for i, pairing in enumerate(results): + text = pairing["text"] + # There's probably a better way of telling if there is a score, but thats okay + try: + score = pairing["score"] + score *= 100 + print(f"{i+1}. {text} // (similarity score: {score:.2f}%)") + except KeyError: + print(f"{i+1}. {text}") + +# Returns TRUE for success, FALSE otherwise +def add_value(value: str) -> bool: + query = f""" + mutation {{ + addVector(text: "{value}") + }} + """ + response = requests.post(url, json={"query": query}) + return ((199 < response.status_code) and (response.status_code < 300)) + +# Returns TRUE for success, FALSE otherwise +def delete_value(value: str) -> bool: + query = f""" + mutation {{ + deleteVector(text: "{value}") + }} + """ + response = requests.post(url, json={"query": query}) + return ((199 < response.status_code) and (response.status_code < 300)) + +# Returns TRUE for success, FALSE otherwise +def get_value(value: str, k: int = 1) -> bool: + query = f""" + query {{ + searchVector(text: "{value}", k: {k}) {{ + text + score + }} + }} + """ + response = requests.post(url, json={"query": query}) + success_response = (199 < response.status_code) and (response.status_code < 300) + if success_response: + format_get_responses(response.json()) + return success_response + +# Returns TRUE for success, FALSE otherwise +def get_values() -> bool: + query = f""" + query {{ + searchVector(text: "") {{ + text + }} + }} + """ + response = requests.post(url, json={"query": query}) + success_response = (199 < response.status_code) and (response.status_code < 300) + if success_response: + format_get_responses(response.json()) + return success_response \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py index 854a0d96e..60a995a31 100644 --- a/ecosystem/sdk/vector-indexing/vector_add.py +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -20,8 +20,8 @@ # Global Variables WORKING_DIR = Path("./").resolve() -db = ResDBORM() MODEL_NAME = 'all-MiniLM-L6-v2' # Lightweight and fast model +db = ResDBORM() if __name__ == "__main__": # --- SECTION 1: Input Parsing --- @@ -32,6 +32,7 @@ break if value_to_add == '': + # TODO: Check up and make sure that this gets sent out to the GQL print("Critical Error - requires argument `--value stringToSave`") sys.exit() From da66f0f3efa3b87ac58da0be4006f5199c1eaf47 Mon Sep 17 00:00:00 2001 From: SideCoin Date: Sat, 6 Dec 2025 17:53:33 -0800 Subject: [PATCH 61/79] readme update --- README.md | 10 +++++++ ecosystem/sdk/vector-indexing/stress_test.sh | 31 ++++++++++++++------ 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 76d199406..8f78d8985 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ 2. [ResilientDB Installation](#ResilientDB-Installation) 3. [ResilientDB Installation Bugs](#ResilientDB-Installation-Bugs) 4. [How to Run ResDB-ORM](#How-to-Run-ResDB-ORM) +5. [Stress Test KV](#Stress-Testing-KV) ## Running the Indexing Project All user-facing code for this project is located in `ecosystem/sdk/vector-indexing`. As long as the **KV Service** and **GraphQL Server** are running, executing the python code directly through the command line will work - nothing needs to be built beforehand. This does need to be run with a python instance with the ResDB-orm package installed (we reccoment using a virtual environment) @@ -282,3 +283,12 @@ bazel-bin/service/http_server/crow_service_main ecosystem/graphql/service/tools/ Note that each of these commands will prevent input on the terminal you run them in. To interact with ResDB-orm, spin up the python instance running it: `source venv/bin/activate`. To leave this Python environment and return to bash, just type `deactivate`. + + +## Stress Testing KV + +We tested for the storage limit of big values. In this configuration: +1. 8GB RAM Shell +2. Standard 5 replica config from `./service/tools/kv/server_tools/start_kv_service.sh` + +The results was that around 150-200mb values will cause the KV store to have long delays on operations. You can read more in `hnsw-test/index_test/README.md` along with the testing kit. \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/stress_test.sh b/ecosystem/sdk/vector-indexing/stress_test.sh index d33daecd6..9d7d36c4b 100755 --- a/ecosystem/sdk/vector-indexing/stress_test.sh +++ b/ecosystem/sdk/vector-indexing/stress_test.sh @@ -8,6 +8,9 @@ echo "=== Stress Test Started: Adding 15 items sequentially ===" # Base word list for generating random unique sentences words=("Blockchain" "Database" "Resilient" "Consensus" "Python" "Vector" "Search" "Index" "Node" "Performance" "Latency" "Throughput" "Security" "Encryption" "Network") +# Initialize total time counter +total_duration=0 + for i in {1..15} do # Generate a unique text string using a random word and the current loop index @@ -17,27 +20,37 @@ do echo "---------------------------------------------------" echo "[Step $i/15] Adding data: '$text'" + # --- TIMING START --- + start_time=$(date +%s%3N) + # 1. Add data to ResilientDB (vector_add.py) - # This generates the HNSW index locally and uploads the binary files to ResDB - python3 vector_add.py --value "$text" + python3 kv_vector.py --add "$text" - # Check if the python script executed successfully - if [ $? -ne 0 ]; then + # Capture exit code immediately + exit_code=$? + + # --- TIMING END --- + end_time=$(date +%s%3N) + duration=$((end_time - start_time)) + total_duration=$((total_duration + duration)) + + # Check execution success + if [ $exit_code -ne 0 ]; then echo "❌ [CRITICAL FAIL] vector_add.py crashed at step $i." echo " -> The upload process likely failed." exit 1 + else + echo "⏱️ Add operation took: ${duration} ms" fi # 2. Retrieve and verify the data immediately (vector_get.py) - # This downloads the binary files from ResDB and attempts to load the index echo "[Check] Verifying index integrity..." - python3 vector_get.py --value "Test entry #$i" --k_matches 1 > /dev/null + python3 kv_vector.py --get "Test entry #$i" --k_matches 1 > /dev/null # Check if the retrieval script executed successfully if [ $? -ne 0 ]; then echo "❌ [CRITICAL FAIL] vector_get.py crashed at step $i!" echo " -> The index file retrieved from ResDB is likely corrupted." - echo " -> This confirms the 'binary-to-text' saving issue." exit 1 else echo "✅ [OK] Retrieve successful. Index is valid." @@ -49,5 +62,5 @@ done echo "===================================================" echo "🎉 Congratulations! The system survived the stress test." -echo " It successfully handled 15 sequential adds and reloads." -echo "===================================================" +echo " Total time spent on 'add' operations: ${total_duration} ms" +echo "===================================================" \ No newline at end of file From b4e1ab8bc36e03c7c19f77049c5a381def6e3210 Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Sun, 7 Dec 2025 11:34:01 +0900 Subject: [PATCH 62/79] feat: Refactor vector management for persistent execution and optimize search/add/delete functionality --- ecosystem/graphql/app.py | 142 ++++++---- ecosystem/sdk/vector-indexing/hnsw_library.py | 9 +- ecosystem/sdk/vector-indexing/vector_add.py | 268 ++++++++++-------- .../sdk/vector-indexing/vector_delete.py | 210 ++++++++------ ecosystem/sdk/vector-indexing/vector_get.py | 227 +++++++++------ 5 files changed, 514 insertions(+), 342 deletions(-) diff --git a/ecosystem/graphql/app.py b/ecosystem/graphql/app.py index cee6005c1..d4c13ed5f 100644 --- a/ecosystem/graphql/app.py +++ b/ecosystem/graphql/app.py @@ -23,10 +23,23 @@ import sys import subprocess import re +import json +import strawberry +import typing +import ast from pathlib import Path +from typing import Optional, List, Any +from flask import Flask +from flask_cors import CORS +from strawberry.flask.views import GraphQLView +# --- Local Imports --- from resdb_driver import Resdb from resdb_driver.crypto import generate_keypair +from json_scalar import JSONScalar + +# --- Vector Indexing Imports --- +from sentence_transformers import SentenceTransformer # --- Configuration --- db_root_url = "localhost:18000" @@ -35,51 +48,53 @@ db = Resdb(db_root_url) # --- Vector Indexing Scripts Path Configuration --- -# app.py is in ecosystem/graphql/ -# scripts are in ecosystem/sdk/vector-indexing/ CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) VECTOR_SCRIPT_DIR = os.path.abspath(os.path.join(CURRENT_DIR, "../sdk/vector-indexing")) PYTHON_EXE = sys.executable -def run_vector_script(script_name: str, args: list) -> tuple[bool, str]: - """Helper to run python scripts located in the vector-indexing directory.""" - script_path = os.path.join(VECTOR_SCRIPT_DIR, script_name) +# Add vector script dir to sys.path to allow imports +sys.path.append(VECTOR_SCRIPT_DIR) + +# Try importing the manager classes +try: + from vector_add import VectorIndexManager + from vector_get import VectorSearchManager + from vector_delete import VectorDeleteManager +except ImportError as e: + print(f"Warning: Could not import vector modules. Error: {e}") + VectorIndexManager = None + VectorSearchManager = None + VectorDeleteManager = None + +# --- Initialize AI Model & Managers (Run Once) --- +print("Initializing Vector Managers...") +vector_index_manager = None +vector_search_manager = None +vector_delete_manager = None + +try: + # Load model into memory once at startup to avoid per-request overhead + GLOBAL_MODEL = SentenceTransformer('all-MiniLM-L6-v2') - if not os.path.exists(script_path): - return False, f"Script not found: {script_path}" - - command = [PYTHON_EXE, script_path] + args - try: - # Run script with the working directory set to where the script is - # (because the scripts rely on relative paths like ./saved_data) - result = subprocess.run( - command, - capture_output=True, - text=True, - cwd=VECTOR_SCRIPT_DIR - ) - if result.returncode != 0: - return False, result.stderr + "\n" + result.stdout - return True, result.stdout.strip() - except Exception as e: - return False, str(e) - -import strawberry -import typing -import ast -import json + script_path = Path(VECTOR_SCRIPT_DIR) + + if VectorIndexManager: + vector_index_manager = VectorIndexManager(script_path, GLOBAL_MODEL) + + if VectorSearchManager: + vector_search_manager = VectorSearchManager(script_path, GLOBAL_MODEL) + + if VectorDeleteManager: + vector_delete_manager = VectorDeleteManager(script_path, GLOBAL_MODEL) + + print("Vector Managers initialized successfully.") +except Exception as e: + print(f"Error initializing vector managers: {e}") -from typing import Optional, List, Any -from flask import Flask -from flask_cors import CORS - -from json_scalar import JSONScalar app = Flask(__name__) CORS(app) # This will enable CORS for all routes -from strawberry.flask.views import GraphQLView - # --- GraphQL Types --- @strawberry.type @@ -159,26 +174,31 @@ def count_cats(self) -> str: #return number of cats return f'The word "cat" appears {cat_count} times' - # --- New: Vector Search Query --- + # --- New: Vector Search Query (Optimized) --- @strawberry.field - def searchVector(self, text: str, k: int = 3) -> List[VectorSearchResult]: - """Search for similar texts using the HNSW index.""" - success, output = run_vector_script("vector_get.py", ["--value", text, "--k_matches", str(k)]) - + def searchVector(self, text: str = None, k: int = 1) -> List[VectorSearchResult]: + """Search for similar texts using the in-memory manager.""" results = [] - if not success: - # Log error internally if needed, returning empty list or raising error - print(f"Vector search failed: {output}") + + if not vector_search_manager: + print("Error: Vector search manager not initialized.") return [] - # Parse the output from vector_get.py (e.g. "1. hello // (similarity score: 0.123)") - for line in output.splitlines(): - match = re.search(r'^\d+\.\s+(.*?)\s+//\s+\(similarity score:\s+([0-9.]+)\)', line) - if match: + if text is None: + # Show all functionality + raw_values = vector_search_manager.get_all_values() + for val in raw_values: + # For 'show all', we typically don't have a similarity score, or it's N/A + results.append(VectorSearchResult(text=val, score=1.0)) + else: + # Search functionality + search_results = vector_search_manager.search(text, k) + for item in search_results: results.append(VectorSearchResult( - text=match.group(1), - score=float(match.group(2)) + text=item['text'], + score=item['score'] )) + return results # --- Mutation --- @@ -203,27 +223,23 @@ def postTransaction(self, data: PrepareAsset) -> CommitTransaction: ) return payload - # --- New: Vector Add Mutation --- + # --- New: Vector Add Mutation (Optimized) --- @strawberry.mutation def addVector(self, text: str) -> str: - """Add a text to the vector index.""" - success, output = run_vector_script("vector_add.py", ["--value", text]) - if success: - return "Success: Added to index." - elif "already saved" in output: - return "Skipped: Value already exists." + """Add a text to the vector index using the in-memory manager.""" + if vector_index_manager: + return vector_index_manager.add_value(text) else: - return f"Error: {output}" + return "Error: Vector index manager not initialized." - # --- New: Vector Delete Mutation --- + # --- New: Vector Delete Mutation (Optimized) --- @strawberry.mutation def deleteVector(self, text: str) -> str: - """Delete a text from the vector index.""" - success, output = run_vector_script("vector_delete.py", ["--value", text]) - if success: - return "Success: Deleted from index." + """Delete a text from the vector index using the in-memory manager.""" + if vector_delete_manager: + return vector_delete_manager.delete_value(text) else: - return f"Error: {output}" + return "Error: Vector delete manager not initialized." schema = strawberry.Schema(query=Query, mutation=Mutation) diff --git a/ecosystem/sdk/vector-indexing/hnsw_library.py b/ecosystem/sdk/vector-indexing/hnsw_library.py index 2d78a71e4..3aa322a34 100644 --- a/ecosystem/sdk/vector-indexing/hnsw_library.py +++ b/ecosystem/sdk/vector-indexing/hnsw_library.py @@ -6,11 +6,18 @@ """ # Typical Python imports from typing import Dict, List, Any +from pathlib import Path # ResDB & HNSW imports from resdb_orm.orm import ResDBORM +current_dir = Path(__file__).resolve().parent +# Abs path to resdb_orm/config.yaml +config_path = current_dir / "resdb_orm" / "config.yaml" -db = ResDBORM() +if not config_path.exists(): + print(f"Warning: Config file not found at {config_path}") + +db = ResDBORM(str(config_path)) # RETURNS: Key of the newly created record def create_record(value: str | Dict[str, Any]) -> str: diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py index 854a0d96e..12ec5f2c1 100644 --- a/ecosystem/sdk/vector-indexing/vector_add.py +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -1,6 +1,7 @@ """ Filename: vector_add.py Description: Save value to ResDB, generate embeddings using SentenceTransformers, and build HNSW index via hnswlib. +Refactored to support both CLI execution and import as a module for persistent server usage. """ import sys import os @@ -12,19 +13,161 @@ # ML & Search libraries import hnswlib -from sentence_transformers import SentenceTransformer +# Note: SentenceTransformer is imported inside the block or passed in to avoid overhead if just checking structure, +# but for the class, we assume the model object is passed. # ResDB & Local imports +# Ensure this script can find its dependencies when run from different contexts +current_dir = Path(__file__).resolve().parent +sys.path.append(str(current_dir)) + from resdb_orm.orm import ResDBORM import hnsw_library -# Global Variables -WORKING_DIR = Path("./").resolve() -db = ResDBORM() -MODEL_NAME = 'all-MiniLM-L6-v2' # Lightweight and fast model +class VectorIndexManager: + """ + Manages the HNSW index and ResDB storage for vector embeddings. + Designed to be initialized once with a loaded model to avoid overhead. + """ + def __init__(self, working_dir: Path, model: Any): + """ + Initialize the manager. + Args: + working_dir: The directory where local keys and temp files are stored. + model: A loaded SentenceTransformer model instance. + """ + self.working_dir = working_dir + self.model = model + # self.db = ResDBORM() + + # Setup directories + self.file_saved_directory = self.working_dir / "saved_data" + self.file_embedding_keys = self.file_saved_directory / "embedding_keys.json" + + if not os.path.exists(self.file_saved_directory): + os.makedirs(self.file_saved_directory, exist_ok=True) + + def add_value(self, value_to_add: str) -> str: + """ + Add a string value to the index. + 1. Checks if already exists. + 2. Updates local list. + 3. Re-calculates embeddings (naive approach for PoC). + 4. Re-builds HNSW index. + 5. Saves everything to ResDB. + """ + if not value_to_add: + return "Critical Error - value is empty" + + embedding_keys: Dict[str, Any] = {} + hnsw_text_entries = [] + + # Load or initialize keys + if not os.path.exists(self.file_embedding_keys): + embedding_keys = { + "temp_index_txt": "", # Stores the binary HNSW index (base64) + "temp_leann_passages_json": "", # Stores the List[Dict] of text data + # Unused keys kept for compatibility + "temp_ids_txt": "", + "temp_leann_meta_json": "", + "temp_leann_passages_txt": "" + } + else: + try: + with open(self.file_embedding_keys, 'r') as file: + embedding_keys = json.load(file) + except Exception: + pass # Use default empty keys if fail + + # (A) Retrieve existing text entries + key_passages = embedding_keys.get("temp_leann_passages_json", "") + try: + if not key_passages: raise KeyError() + passages_return = hnsw_library.get_record(key_passages) + # Expecting data to be List[Dict] -> [{"text": "..."}] + current_data = passages_return["data"] + hnsw_text_entries = [item['text'] for item in current_data] + + if value_to_add in hnsw_text_entries: + return f"'{value_to_add}' is already saved. Skipping." + + hnsw_text_entries.append(value_to_add) + except Exception: + # If fetch fails or key doesn't exist, start fresh + hnsw_text_entries = [value_to_add] + # Create initial record placeholder + embedding_keys["temp_leann_passages_json"] = hnsw_library.create_record([]) + + # (B) Ensure index key exists + if not embedding_keys.get("temp_index_txt"): + embedding_keys["temp_index_txt"] = hnsw_library.create_record('') + + # Save keys locally + with open(self.file_embedding_keys, 'w') as file: + json.dump(embedding_keys, file) + + # --- Build Index (HNSW + SentenceTransformers) --- + # print("Generating embeddings and building index...") + + # 1. Vectorize text (using the pre-loaded model) + embeddings = self.model.encode(hnsw_text_entries) + + # 2. Build HNSW Index + num_elements = len(embeddings) + dim = embeddings.shape[1] + + # Init HNSW index + p = hnswlib.Index(space='cosine', dim=dim) + p.init_index(max_elements=num_elements, ef_construction=200, M=16) + p.add_items(embeddings, np.arange(num_elements)) # IDs are 0, 1, 2... + + # 3. Save index to temp file + file_temp_dir = self.working_dir / "saved_data/temp" + if not os.path.exists(file_temp_dir): + os.makedirs(file_temp_dir, exist_ok=True) + + index_path = str(file_temp_dir / "hnsw_index.bin") + p.save_index(index_path) + + # --- Save to ResDB --- + # print("Saving data to ResDB...") + + # (1) Save Index Binary (Base64 Encoded) + key_index = embedding_keys["temp_index_txt"] + try: + with open(index_path, 'rb') as f: + content_bytes = f.read() + # Encode binary to base64 string for JSON transport + content_b64 = base64.b64encode(content_bytes).decode('utf-8') + hnsw_library.put_record(key_index, content_b64) + except Exception as e: + return f"Error saving index: {e}" + + # (2) Save Text Passages (List of Dicts) + key_passages = embedding_keys["temp_leann_passages_json"] + try: + # Format: [{"text": "val1"}, {"text": "val2"}] + save_data = [{"text": t} for t in hnsw_text_entries] + hnsw_library.put_record(key_passages, save_data) + except Exception as e: + return f"Error saving passages: {e}" + + # --- Cleanup --- + if os.path.exists(index_path): + os.remove(index_path) + # We don't remove the dir here to be safe, or we can if empty. + + return "Success: Value added and index rebuilt." if __name__ == "__main__": - # --- SECTION 1: Input Parsing --- + # --- CLI Execution Support --- + # This block allows the script to still be run from the command line if needed + from sentence_transformers import SentenceTransformer + + WORKING_DIR = Path("./").resolve() + MODEL_NAME = 'all-MiniLM-L6-v2' # Lightweight and fast model + + # Input Parsing value_to_add = '' for i in range(len(sys.argv)): if sys.argv[i] == '--value' and (i + 1 != len(sys.argv)): @@ -35,113 +178,10 @@ print("Critical Error - requires argument `--value stringToSave`") sys.exit() - # --- SECTION 2: Retrieve/Init Keys & Data --- - file_saved_directory = Path(WORKING_DIR / "saved_data") - file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") - embedding_keys: Dict[str, Any] = {} - hnsw_text_entries = [] - - if not os.path.exists(file_saved_directory): - file_saved_directory.mkdir() - - # Load or initialize keys - if not os.path.exists(file_embedding_keys): - embedding_keys = { - "temp_index_txt": "", # Stores the binary HNSW index (base64) - "temp_leann_passages_json": "", # Stores the List[Dict] of text data - # Unused keys kept for compatibility - "temp_ids_txt": "", - "temp_leann_meta_json": "", - "temp_leann_passages_txt": "" - } - else: - try: - with open(file_embedding_keys, 'r') as file: - embedding_keys = json.load(file) - except Exception: - pass # Use default empty keys if fail - - # (A) Retrieve existing text entries - key_passages = embedding_keys.get("temp_leann_passages_json", "") - try: - if not key_passages: raise KeyError() - passages_return = hnsw_library.get_record(key_passages) - # Expecting data to be List[Dict] -> [{"text": "..."}] - current_data = passages_return["data"] - hnsw_text_entries = [item['text'] for item in current_data] - - if value_to_add in hnsw_text_entries: - print(f"'{value_to_add}' is already saved. Skipping.") - sys.exit() - - hnsw_text_entries.append(value_to_add) - except Exception: - # If fetch fails or key doesn't exist, start fresh - hnsw_text_entries = [value_to_add] - # Create initial record placeholder - embedding_keys["temp_leann_passages_json"] = hnsw_library.create_record([]) - - # (B) Ensure index key exists - if not embedding_keys.get("temp_index_txt"): - embedding_keys["temp_index_txt"] = hnsw_library.create_record('') - - # Save keys locally - with open(file_embedding_keys, 'w') as file: - json.dump(embedding_keys, file) - - # --- SECTION 3: Build Index (HNSW + SentenceTransformers) --- - print("Generating embeddings and building index...") - - # 1. Vectorize text + # Load Model (Expensive operation) model = SentenceTransformer(MODEL_NAME) - embeddings = model.encode(hnsw_text_entries) - # 2. Build HNSW Index - num_elements = len(embeddings) - dim = embeddings.shape[1] - - # Init HNSW index - p = hnswlib.Index(space='cosine', dim=dim) - p.init_index(max_elements=num_elements, ef_construction=200, M=16) - p.add_items(embeddings, np.arange(num_elements)) # IDs are 0, 1, 2... - - # 3. Save index to temp file - file_temp_dir = Path(WORKING_DIR / "saved_data/temp") - if not os.path.exists(file_temp_dir): - file_temp_dir.mkdir() - - index_path = str(file_temp_dir / "hnsw_index.bin") - p.save_index(index_path) - - # --- SECTION 4: Save to ResDB --- - print("Saving data to ResDB...") - - # (1) Save Index Binary (Base64 Encoded) - key_index = embedding_keys["temp_index_txt"] - try: - with open(index_path, 'rb') as f: - content_bytes = f.read() - # Encode binary to base64 string for JSON transport - content_b64 = base64.b64encode(content_bytes).decode('utf-8') - hnsw_library.put_record(key_index, content_b64) - except Exception as e: - print(f"Error saving index: {e}") - sys.exit() - - # (2) Save Text Passages (List of Dicts) - key_passages = embedding_keys["temp_leann_passages_json"] - try: - # Format: [{"text": "val1"}, {"text": "val2"}] - save_data = [{"text": t} for t in hnsw_text_entries] - hnsw_library.put_record(key_passages, save_data) - except Exception as e: - print(f"Error saving passages: {e}") - sys.exit() - - # --- Cleanup --- - if os.path.exists(index_path): - os.remove(index_path) - if os.path.exists(file_temp_dir): - file_temp_dir.rmdir() - - print("Success: Value added and index rebuilt.") \ No newline at end of file + # Run Manager + manager = VectorIndexManager(WORKING_DIR, model) + result = manager.add_value(value_to_add) + print(result) \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/vector_delete.py b/ecosystem/sdk/vector-indexing/vector_delete.py index 5abfb8aa9..4a618083a 100644 --- a/ecosystem/sdk/vector-indexing/vector_delete.py +++ b/ecosystem/sdk/vector-indexing/vector_delete.py @@ -1,6 +1,7 @@ """ Filename: vector_delete.py Description: Remove value from ResDB list and rebuild HNSW index via hnswlib. +Refactored to support persistent execution via VectorDeleteManager to avoid model reload overhead. """ import sys import os @@ -8,19 +9,137 @@ import base64 import numpy as np from pathlib import Path -from typing import Dict, Any +from typing import Dict, Any, List import hnswlib -from sentence_transformers import SentenceTransformer +# SentenceTransformer is imported in __main__ or passed in via the class + +# ResDB & Local imports +# Ensure this script can find its dependencies +current_dir = Path(__file__).resolve().parent +sys.path.append(str(current_dir)) + from resdb_orm.orm import ResDBORM import hnsw_library -# Global Variables -WORKING_DIR = Path("./").resolve() -MODEL_NAME = 'all-MiniLM-L6-v2' +class VectorDeleteManager: + """ + Manages the deletion of values from the vector index and ResDB. + Uses a shared persistent model to rebuild the index efficiently. + """ + def __init__(self, working_dir: Path, model: Any): + """ + Initialize with working directory and a pre-loaded model. + """ + self.working_dir = working_dir + self.model = model + self.file_saved_directory = self.working_dir / "saved_data" + self.file_embedding_keys = self.file_saved_directory / "embedding_keys.json" + + # Ensure directories exist (though delete implies they should) + if not os.path.exists(self.file_saved_directory): + os.makedirs(self.file_saved_directory, exist_ok=True) + + def delete_value(self, value_to_delete: str) -> str: + """ + Remove a string value from the index. + 1. Retrieve current data. + 2. Filter out the value. + 3. Re-calculate embeddings for remaining data. + 4. Re-build and save the HNSW index. + """ + if not value_to_delete: + return "Error: Value to delete is empty." + + # --- Retrieve Keys --- + if not os.path.exists(self.file_embedding_keys): + return "Error: keys file missing. No data to delete." + + try: + with open(self.file_embedding_keys, 'r') as file: + embedding_keys = json.load(file) + except Exception: + return "Error: Failed to load embedding keys." + + # --- Fetch Data --- + hnsw_text_entries = [] + try: + key_passages = embedding_keys.get("temp_leann_passages_json") + if not key_passages: + return "Error: Passages key not found." + + ret = hnsw_library.get_record(key_passages) + current_data = ret["data"] + hnsw_text_entries = [item['text'] for item in current_data] + except Exception as e: + return f"Error retrieving data: {e}" + + # --- Modify Data --- + if value_to_delete not in hnsw_text_entries: + return f"Warning: '{value_to_delete}' not found. Nothing deleted." + + hnsw_text_entries = [t for t in hnsw_text_entries if t != value_to_delete] + + if not hnsw_text_entries: + # If list is empty, we might want to clear the record or handle it gracefully + # For now, we update it to an empty list and assume index is empty + # Note: hnswlib might complain if we try to build an index with 0 elements. + try: + hnsw_library.put_record(key_passages, []) + # Also potentially clear the index key, but let's just return + return "Success: Removed value. List is now empty." + except Exception as e: + return f"Error clearing data: {e}" + + # --- Rebuild & Save (Similar to add) --- + try: + # 1. Vectorize (using resident model) + embeddings = self.model.encode(hnsw_text_entries) + + # 2. Build Index + num_elements = len(embeddings) + dim = embeddings.shape[1] + p = hnswlib.Index(space='cosine', dim=dim) + p.init_index(max_elements=num_elements, ef_construction=200, M=16) + p.add_items(embeddings, np.arange(num_elements)) + + # 3. Save Index Temp + file_temp_dir = self.working_dir / "saved_data/temp" + if not os.path.exists(file_temp_dir): + os.makedirs(file_temp_dir, exist_ok=True) + + index_path = str(file_temp_dir / "hnsw_index.bin") + p.save_index(index_path) + + # 4. Upload to ResDB + # Save Index + key_index = embedding_keys["temp_index_txt"] + with open(index_path, 'rb') as f: + content_b64 = base64.b64encode(f.read()).decode('utf-8') + hnsw_library.put_record(key_index, content_b64) + + # Save Passages + save_data = [{"text": t} for t in hnsw_text_entries] + hnsw_library.put_record(key_passages, save_data) + + # Cleanup + if os.path.exists(index_path): + os.remove(index_path) + + return f"Success: Removed '{value_to_delete}'. Index rebuilt." + + except Exception as e: + return f"Error rebuilding index: {e}" + if __name__ == "__main__": - # --- SECTION 1: Input Parsing --- + # --- CLI Execution Support --- + from sentence_transformers import SentenceTransformer + + WORKING_DIR = Path("./").resolve() + MODEL_NAME = 'all-MiniLM-L6-v2' + + # Input Parsing value_to_delete = '' for i in range(len(sys.argv)): if sys.argv[i] == '--value' and (i + 1 != len(sys.argv)): @@ -31,77 +150,10 @@ print("Error: Requires argument `--value stringToDelete`") sys.exit() - # --- SECTION 2: Retrieve Data --- - file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") - try: - with open(file_embedding_keys, 'r') as file: - embedding_keys = json.load(file) - except Exception: - print("Error: keys file missing.") - sys.exit() - - hnsw_text_entries = [] - try: - key_passages = embedding_keys["temp_leann_passages_json"] - ret = hnsw_library.get_record(key_passages) - current_data = ret["data"] - hnsw_text_entries = [item['text'] for item in current_data] - except Exception as e: - print(f"Error retrieving data: {e}") - sys.exit() - - # --- SECTION 3: Modify Data --- - if value_to_delete not in hnsw_text_entries: - print(f"Warning: '{value_to_delete}' not found. Nothing deleted.") - sys.exit() - - hnsw_text_entries = [t for t in hnsw_text_entries if t != value_to_delete] - print(f"Removed '{value_to_delete}'. Rebuilding index...") - - if not hnsw_text_entries: - print("List is now empty. Please add new data to rebuild index.") - # Consider clearing the remote record here if desired - sys.exit() - - # --- SECTION 4: Rebuild & Save (Same logic as vector_add) --- - - # 1. Vectorize + # Load Model (Expensive) model = SentenceTransformer(MODEL_NAME) - embeddings = model.encode(hnsw_text_entries) - - # 2. Build Index - num_elements = len(embeddings) - dim = embeddings.shape[1] - p = hnswlib.Index(space='cosine', dim=dim) - p.init_index(max_elements=num_elements, ef_construction=200, M=16) - p.add_items(embeddings, np.arange(num_elements)) - - # 3. Save Index Temp - file_temp_dir = Path(WORKING_DIR / "saved_data/temp") - if not os.path.exists(file_temp_dir): - file_temp_dir.mkdir() - index_path = str(file_temp_dir / "hnsw_index.bin") - p.save_index(index_path) - - # 4. Upload to ResDB - try: - # Save Index - key_index = embedding_keys["temp_index_txt"] - with open(index_path, 'rb') as f: - content_b64 = base64.b64encode(f.read()).decode('utf-8') - hnsw_library.put_record(key_index, content_b64) - - # Save Passages - key_passages = embedding_keys["temp_leann_passages_json"] - save_data = [{"text": t} for t in hnsw_text_entries] - hnsw_library.put_record(key_passages, save_data) - - print("Success: Index rebuilt and saved.") - except Exception as e: - print(f"Error saving updates: {e}") - - # Cleanup - if os.path.exists(index_path): - os.remove(index_path) - if os.path.exists(file_temp_dir): - file_temp_dir.rmdir() \ No newline at end of file + + # Run Manager + manager = VectorDeleteManager(WORKING_DIR, model) + result = manager.delete_value(value_to_delete) + print(result) \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/vector_get.py b/ecosystem/sdk/vector-indexing/vector_get.py index 4ba647dbd..b84566d38 100644 --- a/ecosystem/sdk/vector-indexing/vector_get.py +++ b/ecosystem/sdk/vector-indexing/vector_get.py @@ -1,6 +1,7 @@ """ Filename: vector_get.py Description: Retrieve index/data from ResDB and search using hnswlib. +Refactored to support persistent execution via VectorSearchManager to avoid model reload overhead. """ import sys import os @@ -8,19 +9,143 @@ import base64 import numpy as np from pathlib import Path -from typing import Dict, Any +from typing import Dict, Any, List, Tuple, Optional import hnswlib -from sentence_transformers import SentenceTransformer +# SentenceTransformer is imported in __main__ or passed in via the class + +# ResDB & Local imports +# Ensure this script can find its dependencies +current_dir = Path(__file__).resolve().parent +sys.path.append(str(current_dir)) + from resdb_orm.orm import ResDBORM import hnsw_library -# Global Variables -WORKING_DIR = Path("./").resolve() -MODEL_NAME = 'all-MiniLM-L6-v2' +class VectorSearchManager: + """ + Manages retrieval and search operations using a persistent model. + """ + def __init__(self, working_dir: Path, model: Any): + """ + Initialize with working directory and a pre-loaded model. + """ + self.working_dir = working_dir + self.model = model + self.file_embedding_keys = self.working_dir / "saved_data/embedding_keys.json" + self.file_temp_dir = self.working_dir / "saved_data/temp" + + if not os.path.exists(self.file_temp_dir): + os.makedirs(self.file_temp_dir, exist_ok=True) + + def _get_keys(self) -> Dict[str, Any]: + if not os.path.exists(self.file_embedding_keys): + raise FileNotFoundError("Embedding keys not found. Add data first.") + with open(self.file_embedding_keys, 'r') as file: + return json.load(file) + + def _fetch_data(self) -> Tuple[List[Dict[str, str]], str]: + """ + Fetches passages and binary index from ResDB (or cache logic could be added here). + Writes the binary index to a temp file because hnswlib loads from disk. + Returns: (passages_data, path_to_temp_index_file) + """ + keys = self._get_keys() + + # (A) Fetch Passages (Text Data) + key_passages = keys.get("temp_leann_passages_json") + if not key_passages: + raise KeyError("Passages key missing in local records.") + + # In a real scenario, you might cache this instead of fetching from DB every time + ret_passages = hnsw_library.get_record(key_passages) + passages_data = ret_passages["data"] # Expecting List[Dict] + + # (B) Fetch Index (Binary) + key_index = keys.get("temp_index_txt") + if not key_index: + raise KeyError("Index key missing in local records.") + + ret_index = hnsw_library.get_record(key_index) + content_b64 = ret_index["data"] + content_bytes = base64.b64decode(content_b64) + + index_path = str(self.file_temp_dir / "hnsw_index_search.bin") + with open(index_path, 'wb') as f: + f.write(content_bytes) + + return passages_data, index_path + + def get_all_values(self) -> List[str]: + """Returns all stored text values.""" + try: + keys = self._get_keys() + key_passages = keys.get("temp_leann_passages_json") + if not key_passages: + return [] + + ret = hnsw_library.get_record(key_passages) + data = ret["data"] + return [item['text'] for item in data] + except Exception as e: + print(f"Error fetching all values: {e}") + return [] + + def search(self, search_value: str, k: int = 1) -> List[Dict[str, Any]]: + """ + Executes the search using the pre-loaded model. + Returns a list of dicts: {'text': str, 'score': float} + """ + index_path = None + try: + # 1. Fetch current data & index from DB + passages_data, index_path = self._fetch_data() + + # 2. Embed Query (using resident model) + query_vector = self.model.encode([search_value]) + + # 3. Load Index + dim = query_vector.shape[1] + num_elements = len(passages_data) + + if num_elements == 0: + return [] + + p = hnswlib.Index(space='cosine', dim=dim) + # Allow slightly more elements to prevent load error if sizes mismatch slightly + p.load_index(index_path, max_elements=num_elements + 100) + + # 4. Query + real_k = min(k, num_elements) + labels, distances = p.knn_query(query_vector, k=real_k) + + # 5. Format Results + results = [] + for idx, dist in zip(labels[0], distances[0]): + text = passages_data[idx]['text'] + # Convert cosine distance to similarity score approx (1 - dist) + score = 1.0 - dist + results.append({'text': text, 'score': float(score)}) + + return results + + except Exception as e: + print(f"Search failed: {e}") + return [] + finally: + # Cleanup temp file + if index_path and os.path.exists(index_path): + os.remove(index_path) + if __name__ == "__main__": - # --- SECTION 1: Input Parsing --- + # --- CLI Execution Support --- + from sentence_transformers import SentenceTransformer + + WORKING_DIR = Path("./").resolve() + MODEL_NAME = 'all-MiniLM-L6-v2' + + # Input Parsing search_value = "" k_matches = 1 return_all = False @@ -40,85 +165,17 @@ print("Error: Provide --value 'query' or --show_all") sys.exit() - # --- SECTION 2: Retrieve Keys --- - file_embedding_keys = str(WORKING_DIR / "saved_data/embedding_keys.json") - try: - with open(file_embedding_keys, 'r') as file: - embedding_keys = json.load(file) - except Exception: - print("Error: Could not load embedding keys. Add data first.") - sys.exit() - - # --- SECTION 3: Fetch Data from ResDB --- - file_temp_dir = Path(WORKING_DIR / "saved_data/temp") - if not os.path.exists(file_temp_dir): - file_temp_dir.mkdir() - - # (A) Fetch Passages (Text Data) - passages_data = [] - try: - key_passages = embedding_keys["temp_leann_passages_json"] - ret = hnsw_library.get_record(key_passages) - passages_data = ret["data"] # Expecting List[Dict] - except Exception as e: - print(f"Error retrieving passages: {e}") - sys.exit() + # Load Model (Expensive) + model = SentenceTransformer(MODEL_NAME) + manager = VectorSearchManager(WORKING_DIR, model) if return_all: - print(f"--- All Stored Values ({len(passages_data)}) ---") - for i, item in enumerate(passages_data): - print(f"{i+1}. {item['text']}") - sys.exit() - - # (B) Fetch Index (Binary) - index_path = str(file_temp_dir / "hnsw_index.bin") - try: - key_index = embedding_keys["temp_index_txt"] - ret = hnsw_library.get_record(key_index) - content_b64 = ret["data"] - content_bytes = base64.b64decode(content_b64) - with open(index_path, 'wb') as f: - f.write(content_bytes) - except Exception as e: - print(f"Error retrieving index: {e}") - sys.exit() - - # --- SECTION 4: Search --- - try: - # 1. Embed Query - model = SentenceTransformer(MODEL_NAME) - query_vector = model.encode([search_value]) - - # 2. Load Index - dim = query_vector.shape[1] - num_elements = len(passages_data) - - # Safety check: if index is empty but code ran - if num_elements == 0: - print("Index is empty.") - sys.exit() - - p = hnswlib.Index(space='cosine', dim=dim) - # allow slightly more elements to prevent load error if sizes mismatch slightly - p.load_index(index_path, max_elements=num_elements + 100) - - # 3. Query - real_k = min(k_matches, num_elements) - labels, distances = p.knn_query(query_vector, k=real_k) - - # 4. Output + results = manager.get_all_values() + print(f"--- All Stored Values ({len(results)}) ---") + for i, text in enumerate(results): + print(f"{i+1}. {text}") + else: + results = manager.search(search_value, k_matches) # print(f"--- Search Results for '{search_value}' ---") - for i, (idx, dist) in enumerate(zip(labels[0], distances[0])): - text = passages_data[idx]['text'] - # Convert cosine distance to similarity score approx (1 - dist) - score = 1.0 - dist - print(f"{i+1}. {text} // (similarity score: {score:.4f})") - - except Exception as e: - print(f"Search failed: {e}") - - # --- Cleanup --- - if os.path.exists(index_path): - os.remove(index_path) - if os.path.exists(file_temp_dir): - file_temp_dir.rmdir() \ No newline at end of file + for i, item in enumerate(results): + print(f"{i+1}. {item['text']} // (similarity score: {item['score']:.4f})") \ No newline at end of file From 6fc137e292d7be3c6b7158fb2a4c9175209877af Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Sun, 7 Dec 2025 11:39:22 +0900 Subject: [PATCH 63/79] feat: Optimize vector search query and clean up vector_add script --- ecosystem/graphql/app.py | 11 ----------- ecosystem/sdk/vector-indexing/vector_add.py | 7 ------- 2 files changed, 18 deletions(-) diff --git a/ecosystem/graphql/app.py b/ecosystem/graphql/app.py index af38c94fe..d4c13ed5f 100644 --- a/ecosystem/graphql/app.py +++ b/ecosystem/graphql/app.py @@ -177,18 +177,7 @@ def count_cats(self) -> str: # --- New: Vector Search Query (Optimized) --- @strawberry.field def searchVector(self, text: str = None, k: int = 1) -> List[VectorSearchResult]: -<<<<<<< HEAD """Search for similar texts using the in-memory manager.""" -======= - """Search for similar texts using the HNSW index.""" - success = False - output = "" - if text is None: - success, output = run_vector_script("vector_get.py", ["--show_all"]) - else: - success, output = run_vector_script("vector_get.py", ["--value", text, "--k_matches", str(k)]) - ->>>>>>> da66f0f3efa3b87ac58da0be4006f5199c1eaf47 results = [] if not vector_search_manager: diff --git a/ecosystem/sdk/vector-indexing/vector_add.py b/ecosystem/sdk/vector-indexing/vector_add.py index 36e335392..0a6aa4cc5 100644 --- a/ecosystem/sdk/vector-indexing/vector_add.py +++ b/ecosystem/sdk/vector-indexing/vector_add.py @@ -24,7 +24,6 @@ from resdb_orm.orm import ResDBORM import hnsw_library -<<<<<<< HEAD class VectorIndexManager: """ Manages the HNSW index and ResDB storage for vector embeddings. @@ -159,12 +158,6 @@ def add_value(self, value_to_add: str) -> str: # We don't remove the dir here to be safe, or we can if empty. return "Success: Value added and index rebuilt." -======= -# Global Variables -WORKING_DIR = Path("./").resolve() -MODEL_NAME = 'all-MiniLM-L6-v2' # Lightweight and fast model -db = ResDBORM() ->>>>>>> da66f0f3efa3b87ac58da0be4006f5199c1eaf47 if __name__ == "__main__": # --- CLI Execution Support --- From badd412c68d9e133a5e4c9ebbf87affc39eda51b Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Sat, 6 Dec 2025 21:22:24 -0800 Subject: [PATCH 64/79] getAll now works when calling the proxy --- ecosystem/graphql/app.py | 10 ++++++++++ ecosystem/sdk/vector-indexing/kv_vector_library.py | 14 ++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/ecosystem/graphql/app.py b/ecosystem/graphql/app.py index d4c13ed5f..396ae84f5 100644 --- a/ecosystem/graphql/app.py +++ b/ecosystem/graphql/app.py @@ -173,6 +173,16 @@ def count_cats(self) -> str: #return number of cats return f'The word "cat" appears {cat_count} times' + + @strawberry.field + def getAllVectors(self) -> List[VectorSearchResult]: + """Search for all texts""" + results = [] + raw_values = vector_search_manager.get_all_values() + for val in raw_values: + # For 'show all', we typically don't have a similarity score, or it's N/A + results.append(VectorSearchResult(text=val, score=1.0)) + return results # --- New: Vector Search Query (Optimized) --- @strawberry.field diff --git a/ecosystem/sdk/vector-indexing/kv_vector_library.py b/ecosystem/sdk/vector-indexing/kv_vector_library.py index 2ffb940e6..93d412650 100644 --- a/ecosystem/sdk/vector-indexing/kv_vector_library.py +++ b/ecosystem/sdk/vector-indexing/kv_vector_library.py @@ -1,11 +1,11 @@ # Typical Python imports from typing import Any import requests +import json url = "http://127.0.0.1:8000/graphql" -def format_get_responses(json: Any) -> None: - results = json["data"]["searchVector"] +def format_get_responses(results: Any) -> None: if (len(results) == 0): print('No values with embeddings stored in ResDB!') else: @@ -50,22 +50,24 @@ def get_value(value: str, k: int = 1) -> bool: }} """ response = requests.post(url, json={"query": query}) + responseDestructured = (response.json())["data"]["searchVector"] success_response = (199 < response.status_code) and (response.status_code < 300) if success_response: - format_get_responses(response.json()) + format_get_responses(responseDestructured) return success_response # Returns TRUE for success, FALSE otherwise def get_values() -> bool: query = f""" query {{ - searchVector(text: "") {{ + getAllVectors {{ text }} }} """ - response = requests.post(url, json={"query": query}) + response = requests.post(url, json={"query": query}) + responseDestructured = (response.json())["data"]["getAllVectors"] success_response = (199 < response.status_code) and (response.status_code < 300) if success_response: - format_get_responses(response.json()) + format_get_responses(responseDestructured) return success_response \ No newline at end of file From d30b862af59e5bd9b101da63f63e5d2745763d0e Mon Sep 17 00:00:00 2001 From: SideCoin Date: Sun, 7 Dec 2025 14:04:11 -0800 Subject: [PATCH 65/79] Stress test --- ecosystem/sdk/vector-indexing/ST_add_csv.py | 53 + ecosystem/sdk/vector-indexing/ST_get_csv.py | 93 + .../sdk/vector-indexing/add_csv_to_avg_csv.py | 61 + .../vector-indexing/averaged_intervals.csv | 18 + .../sdk/vector-indexing/get_csv_to_avg_csv.py | 108 + .../sdk/vector-indexing/parsed_log_data.csv | 842 ++ .../saved_data/embedding_keys.json | 1 + .../sdk/vector-indexing/stress_test_add.sh | 93 + .../sdk/vector-indexing/stress_test_get.sh | 87 + .../stress_test_get_interval_averages.csv | 11 + .../stress_test_get_results.txt | 7047 +++++++++++++++++ .../vector-indexing/stress_test_results.txt | 2047 +++++ .../stress_test_resultsCSV.csv | 501 ++ 13 files changed, 10962 insertions(+) create mode 100644 ecosystem/sdk/vector-indexing/ST_add_csv.py create mode 100644 ecosystem/sdk/vector-indexing/ST_get_csv.py create mode 100644 ecosystem/sdk/vector-indexing/add_csv_to_avg_csv.py create mode 100644 ecosystem/sdk/vector-indexing/averaged_intervals.csv create mode 100644 ecosystem/sdk/vector-indexing/get_csv_to_avg_csv.py create mode 100644 ecosystem/sdk/vector-indexing/parsed_log_data.csv create mode 100644 ecosystem/sdk/vector-indexing/saved_data/embedding_keys.json create mode 100644 ecosystem/sdk/vector-indexing/stress_test_add.sh create mode 100644 ecosystem/sdk/vector-indexing/stress_test_get.sh create mode 100644 ecosystem/sdk/vector-indexing/stress_test_get_interval_averages.csv create mode 100644 ecosystem/sdk/vector-indexing/stress_test_get_results.txt create mode 100644 ecosystem/sdk/vector-indexing/stress_test_results.txt create mode 100644 ecosystem/sdk/vector-indexing/stress_test_resultsCSV.csv diff --git a/ecosystem/sdk/vector-indexing/ST_add_csv.py b/ecosystem/sdk/vector-indexing/ST_add_csv.py new file mode 100644 index 000000000..3357d9273 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/ST_add_csv.py @@ -0,0 +1,53 @@ +import re +import pandas as pd + +# 1. Define the input and output filenames +input_filename = 'stress_test_results.txt' +output_filename = 'parsed_log_data.csv' + +# 2. Initialize a list to store the extracted data +extracted_data = [] + +# 3. Open and read the file +try: + with open(input_filename, 'r', encoding='utf-8') as f: + current_step = None + + for line in f: + line = line.strip() + + # Check for the Step line using Regex + # Pattern looks for "[Step " followed by digits + step_match = re.search(r'\[Step (\d+)/\d+\]', line) + if step_match: + current_step = int(step_match.group(1)) + continue + + # Check for the Time line + # Pattern looks for "Add operation took: " followed by digits and " ms" + # We skip the emoji check to make it more robust against encoding issues + if "Add operation took:" in line: + time_match = re.search(r'Add operation took:\s+(\d+)\s+ms', line) + if time_match and current_step is not None: + duration = int(time_match.group(1)) + + # Append the found pair to our list + extracted_data.append({ + 'Step': current_step, + 'Duration_ms': duration + }) + + # Reset current_step to ensure we don't duplicate if format is broken + current_step = None + + # 4. Convert to DataFrame and Save to CSV + if extracted_data: + df = pd.DataFrame(extracted_data) + df.to_csv(output_filename, index=False) + print(f"Successfully processed {len(df)} entries.") + print(df.head()) + else: + print("No matching data found in the file.") + +except FileNotFoundError: + print(f"Error: The file '{input_filename}' was not found.") \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/ST_get_csv.py b/ecosystem/sdk/vector-indexing/ST_get_csv.py new file mode 100644 index 000000000..f048c2117 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/ST_get_csv.py @@ -0,0 +1,93 @@ +import re +import csv +import sys + +def parse_stress_test_log(input_file_path, output_csv_path): + """ + Parses a stress test log file and exports specific metrics to a CSV. + + Args: + input_file_path (str): Path to the input text file. + output_csv_path (str): Path where the CSV file will be saved. + """ + + # List to store the extracted data rows + data_rows = [] + + # State variables to keep track of where we are in the file + current_step = None + first_similarity = None + + # compiled regex patterns for efficiency + # Pattern to find [Step X/500] + step_pattern = re.compile(r'\[Step (\d+)/\d+\]') + + # Pattern to find the first entry: starts with "1. " and grabs the percentage + # Note: We look for lines starting strictly with "1." to avoid other entries + first_entry_pattern = re.compile(r'^1\..*similarity score:\s+([\d\.]+)\%') + + # Pattern to find the operation time: looks for "Get operation took: X ms" + # Handling the potential emoji or whitespace before "Get" + time_pattern = re.compile(r'Get operation took:\s+(\d+)\s+ms') + + try: + with open(input_file_path, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + + # 1. Check if line marks the start of a new step + step_match = step_pattern.search(line) + if step_match: + current_step = step_match.group(1) + first_similarity = None # Reset for the new step + continue + + # 2. Check for the first data entry (only if we are inside a step) + if current_step is not None and first_similarity is None: + entry_match = first_entry_pattern.search(line) + if entry_match: + first_similarity = entry_match.group(1) + continue + + # 3. Check for the timing line, which usually ends the block + time_match = time_pattern.search(line) + if time_match and current_step is not None: + time_took = time_match.group(1) + + # We have all three pieces of data, add to our list + # We use 'NA' if similarity wasn't found for some reason + sim_val = first_similarity if first_similarity else "NA" + + data_rows.append([current_step, time_took, sim_val]) + + # Reset step to avoid stale data carrying over (optional safety) + current_step = None + + # Write the results to a CSV file + with open(output_csv_path, 'w', newline='', encoding='utf-8') as csvfile: + writer = csv.writer(csvfile) + # Write header + writer.writerow(['Step Number', 'Operation Time (ms)', 'First Entry Similarity (%)']) + # Write data + writer.writerows(data_rows) + + print(f"Successfully processed {len(data_rows)} steps.") + print(f"Output saved to: {output_csv_path}") + + except FileNotFoundError: + print(f"Error: The file '{input_file_path}' was not found.") + except Exception as e: + print(f"An unexpected error occurred: {e}") + +if __name__ == "__main__": + # You can change these filenames as needed + input_filename = 'stress_test_get_results.txt' + output_filename = 'stress_test_resultsCSV.csv' + + # Create a dummy file for demonstration if it doesn't exist + # (You should replace this with your actual file) + import os + if not os.path.exists(input_filename): + print(f"'{input_filename}' not found. Please ensure your text file is in the same folder.") + else: + parse_stress_test_log(input_filename, output_filename) \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/add_csv_to_avg_csv.py b/ecosystem/sdk/vector-indexing/add_csv_to_avg_csv.py new file mode 100644 index 000000000..3aafb207e --- /dev/null +++ b/ecosystem/sdk/vector-indexing/add_csv_to_avg_csv.py @@ -0,0 +1,61 @@ +import csv +from statistics import mean + +# 1. Define file names +input_file = 'parsed_log_data.csv' # Input file +output_file = 'averaged_intervals.csv' # Output file +interval_size = 50 + +# Data structure to hold durations for each batch +# Format: { batch_id: [duration1, duration2, ...] } +batches = {} + +try: + print("Processing...") + with open(input_file, 'r', encoding='utf-8') as f: + reader = csv.DictReader(f) + + for row in reader: + # Convert strings to integers + try: + step = int(row['Step']) + duration = int(row['Duration_ms']) + except ValueError: + continue # Skip bad rows + + # Calculate which batch this step belongs to + # Steps 1-50 -> Batch 0, Steps 51-100 -> Batch 1 + batch_id = (step - 1) // interval_size + + if batch_id not in batches: + batches[batch_id] = [] + + batches[batch_id].append(duration) + + # Write the results + with open(output_file, 'w', newline='', encoding='utf-8') as f: + writer = csv.writer(f) + # Write Header + writer.writerow(['Start_Step', 'End_Step', 'Average_Duration_ms']) + + # Sort by batch_id so the output is in order (1-50, 51-100...) + sorted_batch_ids = sorted(batches.keys()) + + for b_id in sorted_batch_ids: + durations = batches[b_id] + + # Calculate Average + avg_val = mean(durations) + + # Calculate Start/End labels for clarity + start_step = (b_id * interval_size) + 1 + end_step = (b_id + 1) * interval_size + + writer.writerow([start_step, end_step, f"{avg_val:.2f}"]) + + print(f"Success! Output saved to {output_file}") + +except FileNotFoundError: + print(f"Error: Could not find {input_file}") +except Exception as e: + print(f"An error occurred: {e}") \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/averaged_intervals.csv b/ecosystem/sdk/vector-indexing/averaged_intervals.csv new file mode 100644 index 000000000..066d7f925 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/averaged_intervals.csv @@ -0,0 +1,18 @@ +Start_Step,End_Step,Average_Duration_ms +1,50,901.76 +51,100,875.76 +101,150,891.42 +151,200,915.34 +201,250,896.18 +251,300,1020.60 +301,350,1044.34 +351,400,1118.60 +401,450,1136.56 +451,500,1150.54 +501,550,1095.94 +551,600,1130.54 +601,650,1189.02 +651,700,1140.20 +701,750,1476.46 +751,800,1122.52 +801,850,1439.44 diff --git a/ecosystem/sdk/vector-indexing/get_csv_to_avg_csv.py b/ecosystem/sdk/vector-indexing/get_csv_to_avg_csv.py new file mode 100644 index 000000000..8b4133e1e --- /dev/null +++ b/ecosystem/sdk/vector-indexing/get_csv_to_avg_csv.py @@ -0,0 +1,108 @@ +import csv +import statistics +import os + +def process_batches(input_csv_path, output_csv_path, batch_size=50): + """ + Reads a CSV with step data, aggregates it into batches, + and calculates averages for time and similarity. + """ + + print(f"Reading from: {input_csv_path}") + + # 1. Read the data + data = [] + try: + with open(input_csv_path, 'r', encoding='utf-8') as f: + reader = csv.DictReader(f) + for row in reader: + # Parse and clean data + try: + step = int(row['Step Number']) + time_val = float(row['Operation Time (ms)']) + + # Handle potential 'NA' or empty strings for similarity + sim_str = row['First Entry Similarity (%)'] + if sim_str and sim_str != 'NA': + sim_val = float(sim_str) + else: + sim_val = None + + data.append({ + 'step': step, + 'time': time_val, + 'similarity': sim_val + }) + except ValueError: + # Skip malformed lines + continue + except FileNotFoundError: + print(f"Error: Could not find file '{input_csv_path}'") + return + + if not data: + print("No valid data found to process.") + return + + # 2. Group into batches + # We use a dictionary where key is the batch index + batches = {} + + for entry in data: + # Determine batch index (e.g., Step 1-50 is batch 0, 51-100 is batch 1) + batch_index = (entry['step'] - 1) // batch_size + + if batch_index not in batches: + batches[batch_index] = {'times': [], 'similarities': []} + + batches[batch_index]['times'].append(entry['time']) + if entry['similarity'] is not None: + batches[batch_index]['similarities'].append(entry['similarity']) + + # 3. Calculate averages and prepare output rows + output_rows = [] + sorted_batch_indices = sorted(batches.keys()) + + for idx in sorted_batch_indices: + batch_data = batches[idx] + + # Define the range string (e.g., "1-50") + start_step = (idx * batch_size) + 1 + end_step = (idx + 1) * batch_size + range_label = f"{start_step}-{end_step}" + + # Calculate Averages + avg_time = statistics.mean(batch_data['times']) if batch_data['times'] else 0 + + if batch_data['similarities']: + avg_sim = statistics.mean(batch_data['similarities']) + else: + avg_sim = 0 + + output_rows.append({ + 'Batch Range': range_label, + 'Avg Operation Time (ms)': round(avg_time, 2), + 'Avg Similarity Score (%)': round(avg_sim, 2) + }) + + # 4. Write to new CSV + fieldnames = ['Batch Range', 'Avg Operation Time (ms)', 'Avg Similarity Score (%)'] + + try: + with open(output_csv_path, 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(output_rows) + + print(f"Success! Processed {len(output_rows)} batches.") + print(f"Averages saved to: {output_csv_path}") + + except IOError as e: + print(f"Error writing to file: {e}") + +if __name__ == "__main__": + # Settings + input_file = 'stress_test_resultsCSV.csv' # Must match the output of the previous script + output_file = 'stress_test_get_interval_averages.csv' + + process_batches(input_file, output_file) \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/parsed_log_data.csv b/ecosystem/sdk/vector-indexing/parsed_log_data.csv new file mode 100644 index 000000000..a94242e30 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/parsed_log_data.csv @@ -0,0 +1,842 @@ +Step,Duration_ms +1,3314 +2,1154 +3,851 +4,798 +5,897 +6,818 +7,908 +8,873 +9,784 +10,812 +11,836 +12,888 +13,742 +14,814 +15,894 +16,843 +17,740 +18,815 +19,847 +20,799 +21,777 +22,866 +23,882 +24,796 +25,908 +26,857 +27,836 +28,817 +29,796 +30,801 +31,800 +32,778 +33,820 +34,1092 +35,908 +36,867 +37,914 +38,828 +39,780 +40,782 +41,892 +42,885 +43,839 +44,846 +45,898 +46,909 +47,826 +48,903 +49,847 +50,911 +51,813 +52,804 +53,801 +54,976 +55,877 +56,899 +57,1038 +58,864 +59,984 +60,947 +61,833 +62,846 +63,848 +64,854 +65,794 +66,762 +67,829 +68,843 +69,845 +70,845 +71,949 +72,1098 +73,865 +74,879 +75,960 +76,945 +77,895 +78,797 +79,795 +80,890 +81,926 +82,850 +83,789 +84,888 +85,883 +86,789 +87,802 +88,909 +89,934 +90,901 +91,780 +92,819 +93,945 +94,926 +95,878 +96,846 +97,943 +98,1007 +99,790 +100,808 +101,1003 +102,891 +103,839 +104,930 +105,775 +106,843 +107,927 +108,775 +109,782 +110,985 +111,987 +112,849 +113,862 +114,996 +115,954 +116,853 +117,978 +118,787 +119,810 +120,987 +121,788 +122,748 +123,999 +124,799 +125,849 +126,1018 +127,903 +128,850 +129,849 +130,902 +131,911 +132,929 +133,857 +134,921 +135,863 +136,848 +137,990 +138,882 +139,893 +140,885 +141,904 +142,889 +143,856 +144,862 +145,1056 +146,821 +147,855 +148,1042 +149,882 +150,907 +151,829 +152,1084 +153,840 +154,798 +155,874 +156,979 +157,835 +158,849 +159,786 +160,897 +161,977 +162,828 +163,844 +164,942 +165,985 +166,858 +167,849 +168,985 +169,853 +170,1022 +171,1167 +172,977 +173,900 +174,866 +175,882 +176,946 +177,897 +178,881 +179,857 +180,853 +181,962 +182,878 +183,848 +184,842 +185,986 +186,847 +187,850 +188,1033 +189,923 +190,1050 +191,869 +192,938 +193,1056 +194,1026 +195,998 +196,861 +197,851 +198,915 +199,936 +200,958 +201,900 +202,896 +203,1011 +204,838 +205,827 +206,924 +207,933 +208,838 +209,846 +210,943 +211,910 +212,842 +213,1060 +214,863 +215,879 +216,934 +217,893 +218,831 +219,838 +220,879 +221,867 +222,792 +223,1012 +224,833 +225,862 +226,965 +227,916 +228,841 +229,862 +230,964 +231,924 +232,889 +233,845 +234,910 +235,1056 +236,864 +237,845 +238,942 +239,792 +240,860 +241,1039 +242,890 +243,857 +244,995 +245,963 +246,855 +247,841 +248,878 +249,863 +250,902 +251,959 +252,858 +253,845 +254,990 +255,850 +256,841 +257,937 +258,928 +259,851 +260,869 +261,989 +262,870 +263,876 +264,1040 +265,916 +266,894 +267,1103 +268,1016 +269,1077 +270,1063 +271,1163 +272,1102 +273,1066 +274,1038 +275,1116 +276,1112 +277,1096 +278,907 +279,1449 +280,1034 +281,1218 +282,1341 +283,1118 +284,1013 +285,987 +286,1010 +287,1065 +288,1068 +289,957 +290,1040 +291,973 +292,1089 +293,916 +294,1106 +295,1017 +296,1101 +297,935 +298,1057 +299,987 +300,1177 +301,1131 +302,1007 +303,1027 +304,1034 +305,903 +306,898 +307,1039 +308,1177 +309,982 +310,1050 +311,973 +312,1189 +313,1002 +314,1106 +315,960 +316,1179 +317,1199 +318,1177 +319,1095 +320,1074 +321,1063 +322,957 +323,1015 +324,1049 +325,1167 +326,1027 +327,1020 +328,932 +329,932 +330,1038 +331,909 +332,951 +333,1033 +334,1079 +335,990 +336,953 +337,1092 +338,997 +339,925 +340,991 +341,1053 +342,1043 +343,1142 +344,1189 +345,1143 +346,1008 +347,985 +348,1144 +349,1026 +350,1162 +351,964 +352,1028 +353,954 +354,1042 +355,1012 +356,966 +357,1083 +358,1014 +359,1066 +360,931 +361,1019 +362,1021 +363,1053 +364,1161 +365,1044 +366,1163 +367,1089 +368,1164 +369,1015 +370,1136 +371,1054 +372,1045 +373,960 +374,1111 +375,1085 +376,1129 +377,1004 +378,962 +379,1155 +380,1324 +381,1114 +382,1227 +383,1065 +384,1206 +385,1175 +386,1357 +387,1290 +388,1054 +389,1170 +390,1137 +391,1196 +392,1170 +393,1126 +394,1344 +395,1407 +396,1224 +397,1286 +398,1212 +399,1272 +400,1144 +401,1176 +402,1015 +403,1102 +404,1005 +405,998 +406,1135 +407,1077 +408,1105 +409,1071 +410,1107 +411,1186 +412,1120 +413,1213 +414,1097 +415,1272 +416,1143 +417,1227 +418,1207 +419,1180 +420,973 +421,1146 +422,1140 +423,1082 +424,998 +425,1262 +426,1204 +427,1325 +428,1010 +429,1133 +430,935 +431,1083 +432,1252 +433,1032 +434,1166 +435,1211 +436,1076 +437,1125 +438,1098 +439,1105 +440,1076 +441,1077 +442,1119 +443,1150 +444,1187 +445,1102 +446,1167 +447,1266 +448,1523 +449,1171 +450,1198 +451,1228 +452,1225 +453,1259 +454,1206 +455,1142 +456,1152 +457,1190 +458,1063 +459,1062 +460,1032 +461,1285 +462,1089 +463,1122 +464,999 +465,1008 +466,1127 +467,1183 +468,1139 +469,1120 +470,1151 +471,1142 +472,1098 +473,998 +474,1192 +475,1269 +476,1441 +477,1153 +478,1193 +479,1311 +480,1264 +481,1067 +482,1223 +483,1235 +484,1135 +485,1122 +486,1120 +487,1264 +488,1244 +489,1027 +490,1021 +491,1094 +492,1035 +493,1274 +494,1081 +495,1171 +496,1093 +497,1165 +498,1020 +499,1201 +500,1092 +501,1179 +502,1121 +503,1094 +504,1063 +505,1218 +506,1029 +507,1045 +508,1042 +509,1087 +510,1086 +511,992 +512,1056 +513,1131 +514,959 +515,1020 +516,1013 +517,1098 +518,1010 +519,969 +520,1337 +521,1031 +522,1084 +523,1035 +524,981 +525,1092 +526,1066 +527,1088 +528,973 +529,1089 +530,1198 +531,1143 +532,1055 +533,1129 +534,1073 +535,1186 +536,1047 +537,1161 +538,1117 +539,1158 +540,1073 +541,1193 +542,1111 +543,1077 +544,1199 +545,1180 +546,1139 +547,1057 +548,1178 +549,1161 +550,1174 +551,1174 +552,1220 +553,1181 +554,1163 +555,1183 +556,1098 +557,1131 +558,1185 +559,1258 +560,1080 +561,1167 +562,1030 +563,1020 +564,1105 +565,1118 +566,1099 +567,1009 +568,1148 +569,1064 +570,1164 +571,1078 +572,1077 +573,1142 +574,1120 +575,1103 +576,1165 +577,1092 +578,1191 +579,1056 +580,1103 +581,1190 +582,1086 +583,1102 +584,1250 +585,1369 +586,1127 +587,1066 +588,1165 +589,1105 +590,1099 +591,1208 +592,1177 +593,1160 +594,1084 +595,1030 +596,1160 +597,1135 +598,1116 +599,1049 +600,1125 +601,1180 +602,1138 +603,1114 +604,1209 +605,1047 +606,1073 +607,1090 +608,1045 +609,1051 +610,1266 +611,1160 +612,1292 +613,1120 +614,1546 +615,1067 +616,1316 +617,1193 +618,1329 +619,1224 +620,1199 +621,1112 +622,1149 +623,1179 +624,1140 +625,1311 +626,1258 +627,1044 +628,1092 +629,1053 +630,1079 +631,1008 +632,1102 +633,1038 +634,1127 +635,1079 +636,1224 +637,1233 +638,1206 +639,1111 +640,1035 +641,1187 +642,1105 +643,1016 +644,1118 +645,1148 +646,3248 +647,1170 +648,1073 +649,1014 +650,1133 +651,1171 +652,1204 +653,1086 +654,1031 +655,1076 +656,1075 +657,1084 +658,1090 +659,988 +660,1036 +661,1106 +662,1021 +663,1144 +664,1081 +665,1055 +666,1132 +667,1072 +668,1105 +669,1089 +670,1118 +671,1078 +672,1147 +673,1119 +674,1062 +675,1035 +676,1201 +677,1020 +678,1043 +679,1105 +680,989 +681,1085 +682,1101 +683,1010 +684,1061 +685,2494 +686,1199 +687,1124 +688,1142 +689,1034 +690,1044 +691,1065 +692,1022 +693,1015 +694,1101 +695,1215 +696,1516 +697,1177 +698,1382 +699,1180 +700,1480 +701,1462 +702,1179 +703,1190 +704,1075 +705,1403 +706,1249 +707,1429 +708,1208 +709,1478 +710,1192 +711,1155 +712,1560 +713,1418 +714,1177 +715,1048 +716,1140 +717,1054 +718,1165 +719,1137 +720,1292 +721,1257 +722,3967 +723,3122 +724,5568 +725,3786 +726,1744 +727,1512 +728,1321 +729,1368 +730,1192 +731,1077 +732,1142 +733,1129 +734,1218 +735,1171 +736,1175 +737,1472 +738,1640 +739,1196 +740,1259 +741,1156 +742,1255 +743,1089 +744,1085 +745,1133 +746,1158 +747,1143 +748,1082 +749,1173 +750,1222 +751,1266 +752,1115 +753,1052 +754,1127 +755,1062 +756,1123 +757,1052 +758,1079 +759,1035 +760,1020 +761,999 +762,1061 +763,1057 +764,1041 +765,1070 +766,994 +767,997 +768,1190 +769,1247 +770,1204 +771,1074 +772,1105 +773,1171 +774,1122 +775,1164 +776,1082 +777,1155 +778,1117 +779,1075 +780,1081 +781,1083 +782,1137 +783,1096 +784,1175 +785,1224 +786,1099 +787,1189 +788,1222 +789,1147 +790,1207 +791,1075 +792,1189 +793,1166 +794,1133 +795,1023 +796,1119 +797,1062 +798,1142 +799,1283 +800,1418 +801,1285 +802,1289 +803,1363 +804,1218 +805,1202 +806,1206 +807,1146 +808,1185 +809,1151 +810,1150 +811,1182 +812,1143 +813,1210 +814,1213 +815,1125 +816,1122 +817,1156 +818,1163 +819,1252 +820,1442 +821,1338 +822,1257 +823,1205 +824,1218 +825,1209 +826,1239 +827,1314 +828,1257 +829,1224 +830,1248 +831,1277 +832,1283 +833,1339 +834,1279 +835,1384 +836,1396 +837,1247 +838,1276 +839,1229 +840,4045 +841,6550 diff --git a/ecosystem/sdk/vector-indexing/saved_data/embedding_keys.json b/ecosystem/sdk/vector-indexing/saved_data/embedding_keys.json new file mode 100644 index 000000000..15947f86b --- /dev/null +++ b/ecosystem/sdk/vector-indexing/saved_data/embedding_keys.json @@ -0,0 +1 @@ +{"temp_index_txt": "66ae9d430bacb699df63c3a73a98b84a95dee78a8ba17c45539f607be6d81b82", "temp_leann_passages_json": "e9be6fd8ef2892bc914fec6a3ee8d7992dfded8e8432216d03d76c0c0c531be1", "temp_ids_txt": "", "temp_leann_meta_json": "", "temp_leann_passages_txt": ""} \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/stress_test_add.sh b/ecosystem/sdk/vector-indexing/stress_test_add.sh new file mode 100644 index 000000000..71fd90eea --- /dev/null +++ b/ecosystem/sdk/vector-indexing/stress_test_add.sh @@ -0,0 +1,93 @@ +#!/bin/bash + +# Configuration +OUTPUT_FILE="stress_test_results.txt" +NUM_ITERATIONS=500 + +# Clear the result file initially +> "$OUTPUT_FILE" + +echo "=== Stress Test Started: Adding $NUM_ITERATIONS items sequentially ===" | tee -a "$OUTPUT_FILE" +echo "Date: $(date)" | tee -a "$OUTPUT_FILE" + +# Base word list +words=("Blockchain" "Database" "Resilient" "Consensus" "Python" "Vector" "Search" "Index" "Node" "Performance" "Latency" "Throughput" "Security" "Encryption" "Network" "Scalability" "Fault-tolerance" "Replication" "Sharding" "Caching" "Load-balancing" "Monitoring" "Logging" "Alerting" "Backup" "Recovery" "Cloud" "Container" "Orchestration" "Microservices" "API" "SDK" "Framework" "Library" "Algorithm" "Data-structure" "Optimization" "Parallelism" "Concurrency" "Threading" "Asynchronous" "Synchronous" "Event-driven" "Message-queue" "Pub-sub" "Websocket" "RESTful" "GraphQL" "JSON" "XML" "YAML" "CSV" "SQL" "NoSQL" "ORM" "CLI" "GUI" "UX" "UI" "DevOps" "CI/CD" "Testing" "Unit-test" "Integration-test" "E2E-test" "Mocking" "Stubbing" "Profiling" "Debugging" "Version-control" "Git" "Branching" "Merging" "Pull-request" "Code-review" "Documentation" "Tutorial" "Example" "Sample" "Template" "Boilerplate" "Best-practices" "Design-patterns" "Architecture" "UML" "ERD" "Flowchart" "Diagram") + +# Initialize counters +total_duration=0 +batch_duration=0 + +for i in $(seq 1 $NUM_ITERATIONS) +do + # Generate unique text + rand_idx=$((RANDOM % ${#words[@]})) + rand_idx2=$((RANDOM % ${#words[@]})) + text="Test entry #$i: ${words[$rand_idx]} ${words[$rand_idx2]} related data with random seed $RANDOM" + + # --------------------------------------------------------- + # LOGGING LOGIC + # --------------------------------------------------------- + + # 1. Write Step info to FILE (Always do this so the log is complete) + echo "---------------------------------------------------" >> "$OUTPUT_FILE" + echo "[Step $i/$NUM_ITERATIONS] Processing..." >> "$OUTPUT_FILE" + echo "Adding data: '$text'" >> "$OUTPUT_FILE" + + # 2. Write Step info to CONSOLE (Only every 25 steps) + if (( i % 25 == 0 )); then + echo "[Step $i/$NUM_ITERATIONS] Processing..." + fi + + # --------------------------------------------------------- + # EXECUTION + # --------------------------------------------------------- + + # --- TIMING START --- + start_time=$(date +%s%3N) + + # Run Python script + # Redirect standard output (>>) and errors (2>&1) ONLY to the file + python3 kv_vector.py --add "$text" >> "$OUTPUT_FILE" 2>&1 + + exit_code=$? + + # --- TIMING END --- + end_time=$(date +%s%3N) + duration=$((end_time - start_time)) + + total_duration=$((total_duration + duration)) + batch_duration=$((batch_duration + duration)) + + # Check execution success + if [ $exit_code -ne 0 ]; then + # If it fails, print to BOTH screen and file immediately + msg="❌ [CRITICAL FAIL] vector_add.py crashed at step $i." + echo "$msg" | tee -a "$OUTPUT_FILE" + exit 1 + else + # Log the individual duration ONLY to the file + echo "⏱️ Add operation took: ${duration} ms" >> "$OUTPUT_FILE" + fi + + # --- BATCH REPORT (Every 50 items) --- + # Print this to BOTH screen and file + if (( i % 50 == 0 )); then + avg_batch=$((batch_duration / 50)) + echo "" | tee -a "$OUTPUT_FILE" + echo "📊 [BATCH REPORT] Items $((i-49)) to $i" | tee -a "$OUTPUT_FILE" + echo " -> Average Latency: ${avg_batch} ms" | tee -a "$OUTPUT_FILE" + echo "" | tee -a "$OUTPUT_FILE" + + # Reset batch counter + batch_duration=0 + fi +done + +# Final Results to BOTH screen and file +echo "===================================================" | tee -a "$OUTPUT_FILE" +echo "🎉 Congratulations! The system survived the stress test." | tee -a "$OUTPUT_FILE" +echo " Total time spent on 'add' operations: ${total_duration} ms" | tee -a "$OUTPUT_FILE" + +overall_avg=$((total_duration / NUM_ITERATIONS)) +echo " Overall Average Latency: ${overall_avg} ms" | tee -a "$OUTPUT_FILE" +echo "===================================================" | tee -a "$OUTPUT_FILE" \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/stress_test_get.sh b/ecosystem/sdk/vector-indexing/stress_test_get.sh new file mode 100644 index 000000000..30d738160 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/stress_test_get.sh @@ -0,0 +1,87 @@ +#!/bin/bash + +# Configuration +OUTPUT_FILE="stress_test_get_results.txt" +NUM_ITERATIONS=500 +K_MATCHES_INT=10 +# Clear the result file initially +> "$OUTPUT_FILE" + +echo "=== Stress Test Started: Getting $NUM_ITERATIONS items sequentially ===" | tee -a "$OUTPUT_FILE" +echo "Date: $(date)" | tee -a "$OUTPUT_FILE" + +# Base word list +words=("Blockchain" "Database" "Resilient" "Consensus" "Python" "Vector" "Search" "Index" "Node" "Performance" "Latency" "Throughput" "Security" "Encryption" "Network" "Scalability" "Fault-tolerance" "Replication" "Sharding" "Caching" "Load-balancing" "Monitoring" "Logging" "Alerting" "Backup" "Recovery" "Cloud" "Container" "Orchestration" "Microservices" "API" "SDK" "Framework" "Library" "Algorithm" "Data-structure" "Optimization" "Parallelism" "Concurrency" "Threading" "Asynchronous" "Synchronous" "Event-driven" "Message-queue" "Pub-sub" "Websocket" "RESTful" "GraphQL" "JSON" "XML" "YAML" "CSV" "SQL" "NoSQL" "ORM" "CLI" "GUI" "UX" "UI" "DevOps" "CI/CD" "Testing" "Unit-test" "Integration-test" "E2E-test" "Mocking" "Stubbing" "Profiling" "Debugging" "Version-control" "Git" "Branching" "Merging" "Pull-request" "Code-review" "Documentation" "Tutorial" "Example" "Sample" "Template" "Boilerplate" "Best-practices" "Design-patterns" "Architecture" "UML" "ERD" "Flowchart" "Diagram") + +# Initialize counters +total_duration=0 +batch_duration=0 + +for i in $(seq 1 $NUM_ITERATIONS) +do + # Generate unique text + rand_idx=$((RANDOM % ${#words[@]})) + text="${words[$rand_idx]}" + # 1. Write Step info to FILE (Always do this so the log is complete) + echo "---------------------------------------------------" >> "$OUTPUT_FILE" + echo "[Step $i/$NUM_ITERATIONS] Processing..." >> "$OUTPUT_FILE" + echo "Getting data: '$text'" >> "$OUTPUT_FILE" + + # 2. Write Step info to CONSOLE (Only every 25 steps) + if (( i % 25 == 0 )); then + echo "[Step $i/$NUM_ITERATIONS] Processing..." + fi + + # --------------------------------------------------------- + # EXECUTION + # --------------------------------------------------------- + + # --- TIMING START --- + start_time=$(date +%s%3N) + + # Run Python script + # Redirect standard output (>>) and errors (2>&1) ONLY to the file + python3 kv_vector.py --get "$text" --k_matches "$K_MATCHES_INT" >> "$OUTPUT_FILE" 2>&1 + + exit_code=$? + + # --- TIMING END --- + end_time=$(date +%s%3N) + duration=$((end_time - start_time)) + + total_duration=$((total_duration + duration)) + batch_duration=$((batch_duration + duration)) + + # Check execution success + if [ $exit_code -ne 0 ]; then + # If it fails, print to BOTH screen and file immediately + msg="❌ [CRITICAL FAIL] vector_get.py crashed at step $i." + echo "$msg" | tee -a "$OUTPUT_FILE" + exit 1 + else + # Log the individual duration ONLY to the file + echo "⏱️ Get operation took: ${duration} ms" >> "$OUTPUT_FILE" + fi + + # --- BATCH REPORT (Every 50 items) --- + # Print this to BOTH screen and file + if (( i % 50 == 0 )); then + avg_batch=$((batch_duration / 50)) + echo "" | tee -a "$OUTPUT_FILE" + echo "📊 [BATCH REPORT] Items $((i-49)) to $i" | tee -a "$OUTPUT_FILE" + echo " -> Average Latency: ${avg_batch} ms" | tee -a "$OUTPUT_FILE" + echo "" | tee -a "$OUTPUT_FILE" + + # Reset batch counter + batch_duration=0 + fi +done + +# Final Results to BOTH screen and file +echo "===================================================" | tee -a "$OUTPUT_FILE" +echo "🎉 Congratulations! The system survived the stress test." | tee -a "$OUTPUT_FILE" +echo " Total time spent on 'get' operations: ${total_duration} ms" | tee -a "$OUTPUT_FILE" + +overall_avg=$((total_duration / NUM_ITERATIONS)) +echo " Overall Average Latency: ${overall_avg} ms" | tee -a "$OUTPUT_FILE" +echo "===================================================" | tee -a "$OUTPUT_FILE" \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/stress_test_get_interval_averages.csv b/ecosystem/sdk/vector-indexing/stress_test_get_interval_averages.csv new file mode 100644 index 000000000..8a0b51c10 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/stress_test_get_interval_averages.csv @@ -0,0 +1,11 @@ +Batch Range,Avg Operation Time (ms),Avg Similarity Score (%) +1-50,1010.18,45.22 +51-100,985.74,48.18 +101-150,1021.68,50.36 +151-200,953.3,46.11 +201-250,919.04,46.58 +251-300,923.66,47.97 +301-350,923.06,47.01 +351-400,955.84,47.61 +401-450,968.84,46.98 +451-500,964.02,48.75 diff --git a/ecosystem/sdk/vector-indexing/stress_test_get_results.txt b/ecosystem/sdk/vector-indexing/stress_test_get_results.txt new file mode 100644 index 000000000..7a622f983 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/stress_test_get_results.txt @@ -0,0 +1,7047 @@ +=== Stress Test Started: Getting 500 items sequentially === +Date: Sun Dec 7 12:14:54 PST 2025 +--------------------------------------------------- +[Step 1/500] Processing... +Getting data: 'Performance' +1. Test entry #452: Event-driven Performance related data with random seed 26374 // (similarity score: 39.07%) +2. Test entry #451: Library Performance related data with random seed 31802 // (similarity score: 35.13%) +3. Test entry #192: Performance Threading related data with random seed 6244 // (similarity score: 33.15%) +4. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 32.63%) +5. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 31.90%) +6. Test entry #481: Tutorial Throughput related data with random seed 11888 // (similarity score: 31.89%) +7. Test entry #273: Data-structure Performance related data with random seed 1219 // (similarity score: 31.74%) +8. Test entry #394: Sharding Performance related data with random seed 1607 // (similarity score: 31.69%) +9. Test entry #332: Performance YAML related data with random seed 23826 // (similarity score: 30.69%) +10. Test entry #291: YAML Performance related data with random seed 3598 // (similarity score: 28.81%) +⏱️ Get operation took: 1003 ms +--------------------------------------------------- +[Step 2/500] Processing... +Getting data: 'Security' +1. Test entry #411: Security Synchronous related data with random seed 11940 // (similarity score: 37.71%) +2. Test entry #174: Best-practices Security related data with random seed 18653 // (similarity score: 35.63%) +3. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 29.96%) +4. Test entry #268: GUI Security related data with random seed 8853 // (similarity score: 27.74%) +5. Test entry #315: Encryption Best-practices related data with random seed 23350 // (similarity score: 27.15%) +6. Test entry #123: Encryption Alerting related data with random seed 20180 // (similarity score: 25.45%) +7. Test entry #234: Security CI/CD related data with random seed 25834 // (similarity score: 25.03%) +8. Test entry #149: Encryption Logging related data with random seed 27963 // (similarity score: 24.16%) +9. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 24.08%) +10. Test entry #382: Encryption Algorithm related data with random seed 9600 // (similarity score: 23.77%) +⏱️ Get operation took: 931 ms +--------------------------------------------------- +[Step 3/500] Processing... +Getting data: 'Cloud' +1. Test entry #497: Cloud Profiling related data with random seed 11981 // (similarity score: 38.74%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 36.08%) +3. Test entry #173: Sample Cloud related data with random seed 3561 // (similarity score: 36.02%) +4. Test entry #154: Cloud Search related data with random seed 2185 // (similarity score: 35.82%) +5. Test entry #58: Cloud Index related data with random seed 16697 // (similarity score: 33.17%) +6. Test entry #122: Cloud Websocket related data with random seed 14024 // (similarity score: 31.07%) +7. Test entry #116: NoSQL Cloud related data with random seed 22178 // (similarity score: 30.67%) +8. Test entry #178: Cloud XML related data with random seed 27579 // (similarity score: 30.58%) +9. Test entry #44: Cloud Index related data with random seed 12754 // (similarity score: 30.20%) +10. Test entry #187: Documentation Cloud related data with random seed 9545 // (similarity score: 26.80%) +⏱️ Get operation took: 932 ms +--------------------------------------------------- +[Step 4/500] Processing... +Getting data: 'Security' +1. Test entry #411: Security Synchronous related data with random seed 11940 // (similarity score: 37.71%) +2. Test entry #174: Best-practices Security related data with random seed 18653 // (similarity score: 35.63%) +3. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 29.96%) +4. Test entry #268: GUI Security related data with random seed 8853 // (similarity score: 27.74%) +5. Test entry #315: Encryption Best-practices related data with random seed 23350 // (similarity score: 27.15%) +6. Test entry #123: Encryption Alerting related data with random seed 20180 // (similarity score: 25.45%) +7. Test entry #234: Security CI/CD related data with random seed 25834 // (similarity score: 25.03%) +8. Test entry #149: Encryption Logging related data with random seed 27963 // (similarity score: 24.16%) +9. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 24.08%) +10. Test entry #382: Encryption Algorithm related data with random seed 9600 // (similarity score: 23.77%) +⏱️ Get operation took: 1161 ms +--------------------------------------------------- +[Step 5/500] Processing... +Getting data: 'GUI' +1. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 43.56%) +2. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 39.19%) +3. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 38.99%) +4. Test entry #268: GUI Security related data with random seed 8853 // (similarity score: 38.70%) +5. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 37.90%) +6. Test entry #90: Network GUI related data with random seed 6917 // (similarity score: 37.45%) +7. Test entry #400: GUI Container related data with random seed 26968 // (similarity score: 36.86%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 36.84%) +9. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 35.99%) +10. Test entry #87: GUI Encryption related data with random seed 24527 // (similarity score: 35.25%) +⏱️ Get operation took: 1206 ms +--------------------------------------------------- +[Step 6/500] Processing... +Getting data: 'Recovery' +1. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 39.10%) +2. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 37.60%) +3. Test entry #306: Recovery Library related data with random seed 21298 // (similarity score: 36.53%) +4. Test entry #430: Recovery Diagram related data with random seed 24192 // (similarity score: 36.20%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 35.07%) +6. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 33.87%) +7. Test entry #367: SDK Recovery related data with random seed 20345 // (similarity score: 33.28%) +8. Test entry #88: Recovery JSON related data with random seed 28249 // (similarity score: 32.39%) +9. Test entry #321: Recovery Git related data with random seed 3971 // (similarity score: 32.26%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 30.40%) +⏱️ Get operation took: 1141 ms +--------------------------------------------------- +[Step 7/500] Processing... +Getting data: 'Git' +1. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 26.09%) +2. Test entry #429: Version-control Code-review related data with random seed 14304 // (similarity score: 24.27%) +3. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 23.51%) +4. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 23.09%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 22.94%) +6. Test entry #338: Version-control Monitoring related data with random seed 11905 // (similarity score: 22.82%) +7. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 22.52%) +8. Test entry #155: Sharding Version-control related data with random seed 4158 // (similarity score: 21.79%) +9. Test entry #340: Version-control Sample related data with random seed 23846 // (similarity score: 21.68%) +10. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 21.45%) +⏱️ Get operation took: 1189 ms +--------------------------------------------------- +[Step 8/500] Processing... +Getting data: 'UI' +1. Test entry #108: Code-review UI related data with random seed 5301 // (similarity score: 33.71%) +2. Test entry #249: SDK UI related data with random seed 20519 // (similarity score: 32.43%) +3. Test entry #210: Asynchronous UI related data with random seed 16307 // (similarity score: 31.84%) +4. Test entry #141: Sample UI related data with random seed 12909 // (similarity score: 28.72%) +5. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 28.18%) +6. Test entry #449: UI Load-balancing related data with random seed 29998 // (similarity score: 27.95%) +7. Test entry #39: UI Sharding related data with random seed 26238 // (similarity score: 26.65%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 25.47%) +9. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 25.01%) +10. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 24.21%) +⏱️ Get operation took: 1217 ms +--------------------------------------------------- +[Step 9/500] Processing... +Getting data: 'Search' +1. Test entry #75: UX Search related data with random seed 11487 // (similarity score: 42.09%) +2. Test entry #372: Best-practices Search related data with random seed 17128 // (similarity score: 41.11%) +3. Test entry #202: Search ORM related data with random seed 9657 // (similarity score: 38.48%) +4. Test entry #154: Cloud Search related data with random seed 2185 // (similarity score: 38.09%) +5. Test entry #280: Boilerplate Search related data with random seed 32715 // (similarity score: 37.50%) +6. Test entry #408: CI/CD Search related data with random seed 26877 // (similarity score: 35.64%) +7. Test entry #78: Search Git related data with random seed 20971 // (similarity score: 35.37%) +8. Test entry #397: Search Fault-tolerance related data with random seed 20761 // (similarity score: 35.23%) +9. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 29.18%) +10. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 26.45%) +⏱️ Get operation took: 1316 ms +--------------------------------------------------- +[Step 10/500] Processing... +Getting data: 'Synchronous' +1. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 42.32%) +2. Test entry #287: Example Synchronous related data with random seed 18676 // (similarity score: 40.82%) +3. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 32.63%) +4. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 32.34%) +5. Test entry #411: Security Synchronous related data with random seed 11940 // (similarity score: 31.90%) +6. Test entry #258: XML Synchronous related data with random seed 9079 // (similarity score: 29.62%) +7. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 29.18%) +8. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 28.48%) +9. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 27.98%) +10. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 27.69%) +⏱️ Get operation took: 1028 ms +--------------------------------------------------- +[Step 11/500] Processing... +Getting data: 'Asynchronous' +1. Test entry #74: Asynchronous Asynchronous related data with random seed 32283 // (similarity score: 46.54%) +2. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 44.22%) +3. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 43.79%) +4. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 43.65%) +5. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 43.29%) +6. Test entry #311: Asynchronous Best-practices related data with random seed 24139 // (similarity score: 41.57%) +7. Test entry #210: Asynchronous UI related data with random seed 16307 // (similarity score: 40.59%) +8. Test entry #79: Algorithm Asynchronous related data with random seed 18478 // (similarity score: 40.11%) +9. Test entry #240: Algorithm Asynchronous related data with random seed 6330 // (similarity score: 38.61%) +10. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 36.49%) +⏱️ Get operation took: 1045 ms +--------------------------------------------------- +[Step 12/500] Processing... +Getting data: 'CSV' +1. Test entry #118: CSV Data-structure related data with random seed 20709 // (similarity score: 60.86%) +2. Test entry #439: CSV Library related data with random seed 9941 // (similarity score: 57.72%) +3. Test entry #21: CSV Example related data with random seed 2897 // (similarity score: 57.43%) +4. Test entry #454: CSV Best-practices related data with random seed 21695 // (similarity score: 56.93%) +5. Test entry #453: CSV Profiling related data with random seed 24443 // (similarity score: 53.45%) +6. Test entry #391: CSV Resilient related data with random seed 1245 // (similarity score: 51.75%) +7. Test entry #260: CSV Event-driven related data with random seed 23556 // (similarity score: 51.60%) +8. Test entry #119: CSV Resilient related data with random seed 21012 // (similarity score: 50.55%) +9. Test entry #392: Algorithm CSV related data with random seed 18964 // (similarity score: 50.33%) +10. Test entry #458: Encryption CSV related data with random seed 22642 // (similarity score: 49.73%) +⏱️ Get operation took: 960 ms +--------------------------------------------------- +[Step 13/500] Processing... +Getting data: 'Node' +1. Test entry #114: Node Optimization related data with random seed 20946 // (similarity score: 44.96%) +2. Test entry #72: Node Optimization related data with random seed 10019 // (similarity score: 41.61%) +3. Test entry #246: Node Documentation related data with random seed 8868 // (similarity score: 39.92%) +4. Test entry #175: Node UX related data with random seed 24087 // (similarity score: 39.28%) +5. Test entry #341: Node Vector related data with random seed 30034 // (similarity score: 39.03%) +6. Test entry #93: Node Documentation related data with random seed 10928 // (similarity score: 38.72%) +7. Test entry #11: Code-review Node related data with random seed 32027 // (similarity score: 35.38%) +8. Test entry #398: Backup Node related data with random seed 21714 // (similarity score: 34.34%) +9. Test entry #328: Node DevOps related data with random seed 7412 // (similarity score: 30.61%) +10. Test entry #16: Node Unit-test related data with random seed 4007 // (similarity score: 29.56%) +⏱️ Get operation took: 1139 ms +--------------------------------------------------- +[Step 14/500] Processing... +Getting data: 'Profiling' +1. Test entry #110: Boilerplate Profiling related data with random seed 6487 // (similarity score: 60.75%) +2. Test entry #333: Profiling Debugging related data with random seed 25431 // (similarity score: 54.23%) +3. Test entry #497: Cloud Profiling related data with random seed 11981 // (similarity score: 49.55%) +4. Test entry #453: CSV Profiling related data with random seed 24443 // (similarity score: 49.26%) +5. Test entry #146: Profiling DevOps related data with random seed 14561 // (similarity score: 46.96%) +6. Test entry #208: Blockchain Profiling related data with random seed 31906 // (similarity score: 46.76%) +7. Test entry #256: Profiling GraphQL related data with random seed 4781 // (similarity score: 43.45%) +8. Test entry #38: Profiling E2E-test related data with random seed 30992 // (similarity score: 43.10%) +9. Test entry #106: Profiling NoSQL related data with random seed 23156 // (similarity score: 42.71%) +10. Test entry #255: Profiling Replication related data with random seed 7369 // (similarity score: 42.39%) +⏱️ Get operation took: 990 ms +--------------------------------------------------- +[Step 15/500] Processing... +Getting data: 'Optimization' +1. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 39.36%) +2. Test entry #114: Node Optimization related data with random seed 20946 // (similarity score: 37.49%) +3. Test entry #281: Optimization Resilient related data with random seed 24028 // (similarity score: 37.11%) +4. Test entry #27: Optimization Load-balancing related data with random seed 27403 // (similarity score: 35.53%) +5. Test entry #139: Load-balancing Optimization related data with random seed 31776 // (similarity score: 35.52%) +6. Test entry #403: Optimization Code-review related data with random seed 25707 // (similarity score: 35.52%) +7. Test entry #72: Node Optimization related data with random seed 10019 // (similarity score: 34.85%) +8. Test entry #191: Optimization Threading related data with random seed 15850 // (similarity score: 33.38%) +9. Test entry #466: UML Optimization related data with random seed 21437 // (similarity score: 32.36%) +10. Test entry #437: Microservices Optimization related data with random seed 8916 // (similarity score: 31.30%) +⏱️ Get operation took: 1055 ms +--------------------------------------------------- +[Step 16/500] Processing... +Getting data: 'Orchestration' +1. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 55.54%) +2. Test entry #309: Event-driven Orchestration related data with random seed 21023 // (similarity score: 51.76%) +3. Test entry #161: Orchestration Diagram related data with random seed 20102 // (similarity score: 50.18%) +4. Test entry #337: Algorithm Orchestration related data with random seed 15318 // (similarity score: 47.14%) +5. Test entry #77: Orchestration Code-review related data with random seed 28098 // (similarity score: 46.32%) +6. Test entry #163: Network Orchestration related data with random seed 17906 // (similarity score: 44.84%) +7. Test entry #275: Framework Orchestration related data with random seed 12664 // (similarity score: 44.25%) +8. Test entry #483: Orchestration Microservices related data with random seed 31494 // (similarity score: 42.41%) +9. Test entry #195: API Orchestration related data with random seed 17599 // (similarity score: 41.25%) +10. Test entry #412: YAML Orchestration related data with random seed 30910 // (similarity score: 40.32%) +⏱️ Get operation took: 986 ms +--------------------------------------------------- +[Step 17/500] Processing... +Getting data: 'CI/CD' +1. Test entry #324: Boilerplate CI/CD related data with random seed 11105 // (similarity score: 54.77%) +2. Test entry #479: CI/CD Data-structure related data with random seed 13652 // (similarity score: 54.51%) +3. Test entry #6: CI/CD Throughput related data with random seed 5729 // (similarity score: 53.92%) +4. Test entry #472: Library CI/CD related data with random seed 18363 // (similarity score: 53.79%) +5. Test entry #450: CI/CD Data-structure related data with random seed 9170 // (similarity score: 52.85%) +6. Test entry #317: Stubbing CI/CD related data with random seed 3418 // (similarity score: 52.74%) +7. Test entry #234: Security CI/CD related data with random seed 25834 // (similarity score: 52.50%) +8. Test entry #326: CI/CD Sample related data with random seed 22568 // (similarity score: 52.47%) +9. Test entry #408: CI/CD Search related data with random seed 26877 // (similarity score: 51.42%) +10. Test entry #489: Load-balancing CI/CD related data with random seed 9733 // (similarity score: 49.63%) +⏱️ Get operation took: 944 ms +--------------------------------------------------- +[Step 18/500] Processing... +Getting data: 'Library' +1. Test entry #451: Library Performance related data with random seed 31802 // (similarity score: 38.26%) +2. Test entry #357: Monitoring Library related data with random seed 91 // (similarity score: 36.58%) +3. Test entry #496: Library Database related data with random seed 13646 // (similarity score: 33.79%) +4. Test entry #320: Library UX related data with random seed 29021 // (similarity score: 31.34%) +5. Test entry #486: Library Example related data with random seed 9207 // (similarity score: 30.25%) +6. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 29.56%) +7. Test entry #95: Library Fault-tolerance related data with random seed 987 // (similarity score: 27.92%) +8. Test entry #472: Library CI/CD related data with random seed 18363 // (similarity score: 27.01%) +9. Test entry #30: API Library related data with random seed 1930 // (similarity score: 26.55%) +10. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 25.80%) +⏱️ Get operation took: 996 ms +--------------------------------------------------- +[Step 19/500] Processing... +Getting data: 'Boilerplate' +1. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 41.22%) +2. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 36.21%) +3. Test entry #280: Boilerplate Search related data with random seed 32715 // (similarity score: 30.77%) +4. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 30.65%) +5. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 29.65%) +6. Test entry #12: Boilerplate YAML related data with random seed 29263 // (similarity score: 29.38%) +7. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 29.35%) +8. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 28.89%) +9. Test entry #443: Threading Boilerplate related data with random seed 23641 // (similarity score: 28.42%) +10. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 27.41%) +⏱️ Get operation took: 945 ms +--------------------------------------------------- +[Step 20/500] Processing... +Getting data: 'Scalability' +1. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 43.17%) +2. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 40.71%) +3. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 38.07%) +4. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 37.55%) +5. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 37.23%) +6. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 35.78%) +7. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 35.62%) +8. Test entry #51: Scalability SDK related data with random seed 3268 // (similarity score: 33.46%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 32.11%) +10. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 31.32%) +⏱️ Get operation took: 995 ms +--------------------------------------------------- +[Step 21/500] Processing... +Getting data: 'Resilient' +1. Test entry #368: Stubbing Resilient related data with random seed 618 // (similarity score: 43.92%) +2. Test entry #383: Python Resilient related data with random seed 20296 // (similarity score: 42.70%) +3. Test entry #281: Optimization Resilient related data with random seed 24028 // (similarity score: 39.95%) +4. Test entry #71: Replication Resilient related data with random seed 6058 // (similarity score: 39.12%) +5. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 37.87%) +6. Test entry #490: Threading Resilient related data with random seed 825 // (similarity score: 37.61%) +7. Test entry #193: Database Resilient related data with random seed 3286 // (similarity score: 37.33%) +8. Test entry #165: Resilient Python related data with random seed 3195 // (similarity score: 37.32%) +9. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 36.10%) +10. Test entry #119: CSV Resilient related data with random seed 21012 // (similarity score: 35.60%) +⏱️ Get operation took: 1036 ms +--------------------------------------------------- +[Step 22/500] Processing... +Getting data: 'Concurrency' +1. Test entry #164: Threading Parallelism related data with random seed 13640 // (similarity score: 47.53%) +2. Test entry #441: Code-review Concurrency related data with random seed 21085 // (similarity score: 47.35%) +3. Test entry #405: Throughput Concurrency related data with random seed 5148 // (similarity score: 46.52%) +4. Test entry #54: Concurrency Mocking related data with random seed 20846 // (similarity score: 42.38%) +5. Test entry #26: Concurrency NoSQL related data with random seed 4796 // (similarity score: 42.23%) +6. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 41.66%) +7. Test entry #350: XML Concurrency related data with random seed 12519 // (similarity score: 41.43%) +8. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 41.41%) +9. Test entry #97: Testing Parallelism related data with random seed 6045 // (similarity score: 37.64%) +10. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 36.83%) +⏱️ Get operation took: 1097 ms +--------------------------------------------------- +[Step 23/500] Processing... +Getting data: 'Best-practices' +1. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 32.60%) +2. Test entry #311: Asynchronous Best-practices related data with random seed 24139 // (similarity score: 31.44%) +3. Test entry #373: NoSQL Best-practices related data with random seed 20318 // (similarity score: 31.35%) +4. Test entry #454: CSV Best-practices related data with random seed 21695 // (similarity score: 30.63%) +5. Test entry #462: Best-practices CLI related data with random seed 19922 // (similarity score: 30.42%) +6. Test entry #487: XML Best-practices related data with random seed 19330 // (similarity score: 29.42%) +7. Test entry #372: Best-practices Search related data with random seed 17128 // (similarity score: 28.31%) +8. Test entry #315: Encryption Best-practices related data with random seed 23350 // (similarity score: 27.13%) +9. Test entry #174: Best-practices Security related data with random seed 18653 // (similarity score: 25.77%) +10. Test entry #148: Threading Best-practices related data with random seed 22158 // (similarity score: 25.48%) +⏱️ Get operation took: 896 ms +--------------------------------------------------- +[Step 24/500] Processing... +Getting data: 'Alerting' +1. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 51.72%) +2. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 44.93%) +3. Test entry #371: Code-review Alerting related data with random seed 6709 // (similarity score: 40.15%) +4. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 40.12%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 35.73%) +6. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 35.52%) +7. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 34.88%) +8. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 34.33%) +9. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 33.48%) +10. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 33.22%) +⏱️ Get operation took: 965 ms +--------------------------------------------------- +[Step 25/500] Processing... +Getting data: 'Pull-request' +1. Test entry #438: Pull-request API related data with random seed 19781 // (similarity score: 51.53%) +2. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 50.75%) +3. Test entry #107: Resilient Pull-request related data with random seed 9028 // (similarity score: 49.75%) +4. Test entry #488: Pull-request Git related data with random seed 13085 // (similarity score: 47.13%) +5. Test entry #424: Pull-request YAML related data with random seed 9732 // (similarity score: 46.59%) +6. Test entry #288: Index Pull-request related data with random seed 4855 // (similarity score: 46.42%) +7. Test entry #28: Pull-request Pub-sub related data with random seed 15070 // (similarity score: 45.59%) +8. Test entry #166: Pull-request Unit-test related data with random seed 18963 // (similarity score: 40.27%) +9. Test entry #314: Threading RESTful related data with random seed 12875 // (similarity score: 19.17%) +10. Test entry #203: Recovery JSON related data with random seed 8776 // (similarity score: 17.99%) +⏱️ Get operation took: 982 ms +--------------------------------------------------- +[Step 26/500] Processing... +Getting data: 'Stubbing' +1. Test entry #368: Stubbing Resilient related data with random seed 618 // (similarity score: 56.85%) +2. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 52.11%) +3. Test entry #406: Debugging Stubbing related data with random seed 28773 // (similarity score: 52.05%) +4. Test entry #18: UX Stubbing related data with random seed 14060 // (similarity score: 50.48%) +5. Test entry #2: Database Stubbing related data with random seed 28361 // (similarity score: 48.35%) +6. Test entry #263: CLI Stubbing related data with random seed 13486 // (similarity score: 46.44%) +7. Test entry #53: Blockchain Stubbing related data with random seed 10771 // (similarity score: 46.20%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 46.19%) +9. Test entry #317: Stubbing CI/CD related data with random seed 3418 // (similarity score: 43.34%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 40.32%) +⏱️ Get operation took: 990 ms +--------------------------------------------------- +[Step 27/500] Processing... +Getting data: 'JSON' +1. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 51.74%) +2. Test entry #156: JSON Optimization related data with random seed 28003 // (similarity score: 49.61%) +3. Test entry #413: JSON Alerting related data with random seed 8852 // (similarity score: 47.02%) +4. Test entry #113: JSON API related data with random seed 11466 // (similarity score: 46.74%) +5. Test entry #85: JSON Vector related data with random seed 23519 // (similarity score: 46.52%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 43.98%) +7. Test entry #218: JSON Caching related data with random seed 29229 // (similarity score: 43.91%) +8. Test entry #143: JSON XML related data with random seed 20028 // (similarity score: 42.01%) +9. Test entry #57: YAML JSON related data with random seed 19740 // (similarity score: 40.00%) +10. Test entry #420: JSON Microservices related data with random seed 9271 // (similarity score: 39.90%) +⏱️ Get operation took: 944 ms +--------------------------------------------------- +[Step 28/500] Processing... +Getting data: 'RESTful' +1. Test entry #482: Code-review RESTful related data with random seed 18863 // (similarity score: 41.46%) +2. Test entry #289: RESTful Data-structure related data with random seed 1227 // (similarity score: 41.33%) +3. Test entry #446: Container RESTful related data with random seed 10831 // (similarity score: 40.34%) +4. Test entry #215: API RESTful related data with random seed 29014 // (similarity score: 40.32%) +5. Test entry #310: RESTful UML related data with random seed 5709 // (similarity score: 40.14%) +6. Test entry #314: Threading RESTful related data with random seed 12875 // (similarity score: 37.42%) +7. Test entry #296: ERD RESTful related data with random seed 19760 // (similarity score: 36.03%) +8. Test entry #200: NoSQL RESTful related data with random seed 23279 // (similarity score: 31.68%) +9. Test entry #423: Diagram RESTful related data with random seed 14813 // (similarity score: 29.93%) +10. Test entry #345: CI/CD RESTful related data with random seed 23698 // (similarity score: 29.68%) +⏱️ Get operation took: 947 ms +--------------------------------------------------- +[Step 29/500] Processing... +Getting data: 'Example' +1. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 21.02%) +2. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 20.91%) +3. Test entry #81: Example Documentation related data with random seed 4554 // (similarity score: 20.87%) +4. Test entry #43: Documentation Example related data with random seed 5875 // (similarity score: 19.56%) +5. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 19.26%) +6. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 18.68%) +7. Test entry #319: Documentation Example related data with random seed 22093 // (similarity score: 18.61%) +8. Test entry #297: Event-driven Code-review related data with random seed 23440 // (similarity score: 17.50%) +9. Test entry #138: Boilerplate ERD related data with random seed 17973 // (similarity score: 17.39%) +10. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 16.97%) +⏱️ Get operation took: 952 ms +--------------------------------------------------- +[Step 30/500] Processing... +Getting data: 'Profiling' +1. Test entry #110: Boilerplate Profiling related data with random seed 6487 // (similarity score: 60.75%) +2. Test entry #333: Profiling Debugging related data with random seed 25431 // (similarity score: 54.23%) +3. Test entry #497: Cloud Profiling related data with random seed 11981 // (similarity score: 49.55%) +4. Test entry #453: CSV Profiling related data with random seed 24443 // (similarity score: 49.26%) +5. Test entry #146: Profiling DevOps related data with random seed 14561 // (similarity score: 46.96%) +6. Test entry #208: Blockchain Profiling related data with random seed 31906 // (similarity score: 46.76%) +7. Test entry #256: Profiling GraphQL related data with random seed 4781 // (similarity score: 43.45%) +8. Test entry #38: Profiling E2E-test related data with random seed 30992 // (similarity score: 43.10%) +9. Test entry #106: Profiling NoSQL related data with random seed 23156 // (similarity score: 42.71%) +10. Test entry #255: Profiling Replication related data with random seed 7369 // (similarity score: 42.39%) +⏱️ Get operation took: 890 ms +--------------------------------------------------- +[Step 31/500] Processing... +Getting data: 'Logging' +1. Test entry #168: Event-driven Logging related data with random seed 22637 // (similarity score: 52.57%) +2. Test entry #172: Logging Architecture related data with random seed 7305 // (similarity score: 49.56%) +3. Test entry #222: Logging Library related data with random seed 32039 // (similarity score: 46.22%) +4. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 45.44%) +5. Test entry #98: Asynchronous Logging related data with random seed 30841 // (similarity score: 45.06%) +6. Test entry #476: Logging API related data with random seed 8268 // (similarity score: 42.63%) +7. Test entry #245: Logging Testing related data with random seed 32263 // (similarity score: 42.44%) +8. Test entry #399: Logging Encryption related data with random seed 25211 // (similarity score: 37.28%) +9. Test entry #149: Encryption Logging related data with random seed 27963 // (similarity score: 35.73%) +10. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 28.02%) +⏱️ Get operation took: 1150 ms +--------------------------------------------------- +[Step 32/500] Processing... +Getting data: 'Framework' +1. Test entry #227: Framework Algorithm related data with random seed 19206 // (similarity score: 31.85%) +2. Test entry #275: Framework Orchestration related data with random seed 12664 // (similarity score: 31.36%) +3. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 29.40%) +4. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 28.78%) +5. Test entry #386: Framework Testing related data with random seed 14924 // (similarity score: 28.77%) +6. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 26.61%) +7. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 24.69%) +8. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 20.97%) +9. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 20.94%) +10. Test entry #228: Branching Framework related data with random seed 10213 // (similarity score: 19.21%) +⏱️ Get operation took: 993 ms +--------------------------------------------------- +[Step 33/500] Processing... +Getting data: 'Template' +1. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 46.79%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 41.61%) +3. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 40.40%) +4. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 38.73%) +5. Test entry #83: Template Sharding related data with random seed 32241 // (similarity score: 34.86%) +6. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 33.67%) +7. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 33.18%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 30.02%) +9. Test entry #322: ORM Template related data with random seed 28110 // (similarity score: 29.98%) +10. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 27.79%) +⏱️ Get operation took: 951 ms +--------------------------------------------------- +[Step 34/500] Processing... +Getting data: 'Websocket' +1. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 61.95%) +2. Test entry #92: Websocket Integration-test related data with random seed 32561 // (similarity score: 60.89%) +3. Test entry #13: Websocket Caching related data with random seed 27250 // (similarity score: 58.67%) +4. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 58.47%) +5. Test entry #122: Cloud Websocket related data with random seed 14024 // (similarity score: 57.43%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 15.29%) +7. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 14.60%) +8. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 14.35%) +9. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 14.31%) +10. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 13.59%) +⏱️ Get operation took: 991 ms +--------------------------------------------------- +[Step 35/500] Processing... +Getting data: 'Stubbing' +1. Test entry #368: Stubbing Resilient related data with random seed 618 // (similarity score: 56.85%) +2. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 52.11%) +3. Test entry #406: Debugging Stubbing related data with random seed 28773 // (similarity score: 52.05%) +4. Test entry #18: UX Stubbing related data with random seed 14060 // (similarity score: 50.48%) +5. Test entry #2: Database Stubbing related data with random seed 28361 // (similarity score: 48.35%) +6. Test entry #263: CLI Stubbing related data with random seed 13486 // (similarity score: 46.44%) +7. Test entry #53: Blockchain Stubbing related data with random seed 10771 // (similarity score: 46.20%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 46.19%) +9. Test entry #317: Stubbing CI/CD related data with random seed 3418 // (similarity score: 43.34%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 40.32%) +⏱️ Get operation took: 947 ms +--------------------------------------------------- +[Step 36/500] Processing... +Getting data: 'Debugging' +1. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 51.51%) +2. Test entry #333: Profiling Debugging related data with random seed 25431 // (similarity score: 49.97%) +3. Test entry #387: Index Debugging related data with random seed 29125 // (similarity score: 43.23%) +4. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 42.05%) +5. Test entry #67: Debugging Blockchain related data with random seed 23179 // (similarity score: 42.03%) +6. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 41.95%) +7. Test entry #444: Vector Debugging related data with random seed 5646 // (similarity score: 41.67%) +8. Test entry #406: Debugging Stubbing related data with random seed 28773 // (similarity score: 41.49%) +9. Test entry #339: API Debugging related data with random seed 14456 // (similarity score: 39.09%) +10. Test entry #34: Debugging CI/CD related data with random seed 7455 // (similarity score: 33.14%) +⏱️ Get operation took: 1050 ms +--------------------------------------------------- +[Step 37/500] Processing... +Getting data: 'Scalability' +1. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 43.17%) +2. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 40.71%) +3. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 38.07%) +4. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 37.55%) +5. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 37.23%) +6. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 35.78%) +7. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 35.62%) +8. Test entry #51: Scalability SDK related data with random seed 3268 // (similarity score: 33.46%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 32.11%) +10. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 31.32%) +⏱️ Get operation took: 992 ms +--------------------------------------------------- +[Step 38/500] Processing... +Getting data: 'Design-patterns' +1. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 63.80%) +2. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 62.09%) +3. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 61.76%) +4. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 57.41%) +5. Test entry #115: UX Design-patterns related data with random seed 14554 // (similarity score: 56.30%) +6. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 50.76%) +7. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 49.55%) +8. Test entry #498: Design-patterns Python related data with random seed 16866 // (similarity score: 49.27%) +9. Test entry #212: Design-patterns DevOps related data with random seed 19247 // (similarity score: 46.55%) +10. Test entry #117: Design-patterns Blockchain related data with random seed 16711 // (similarity score: 45.78%) +⏱️ Get operation took: 950 ms +--------------------------------------------------- +[Step 39/500] Processing... +Getting data: 'Example' +1. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 21.02%) +2. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 20.91%) +3. Test entry #81: Example Documentation related data with random seed 4554 // (similarity score: 20.87%) +4. Test entry #43: Documentation Example related data with random seed 5875 // (similarity score: 19.56%) +5. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 19.26%) +6. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 18.68%) +7. Test entry #319: Documentation Example related data with random seed 22093 // (similarity score: 18.61%) +8. Test entry #297: Event-driven Code-review related data with random seed 23440 // (similarity score: 17.50%) +9. Test entry #138: Boilerplate ERD related data with random seed 17973 // (similarity score: 17.39%) +10. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 16.97%) +⏱️ Get operation took: 945 ms +--------------------------------------------------- +[Step 40/500] Processing... +Getting data: 'Unit-test' +1. Test entry #232: Tutorial Unit-test related data with random seed 11672 // (similarity score: 51.61%) +2. Test entry #448: Tutorial Unit-test related data with random seed 7829 // (similarity score: 51.60%) +3. Test entry #186: Tutorial Unit-test related data with random seed 2934 // (similarity score: 48.49%) +4. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 47.92%) +5. Test entry #233: Unit-test Container related data with random seed 19623 // (similarity score: 46.29%) +6. Test entry #442: Unit-test Framework related data with random seed 4836 // (similarity score: 45.43%) +7. Test entry #477: Unit-test CLI related data with random seed 17091 // (similarity score: 45.02%) +8. Test entry #120: Algorithm Unit-test related data with random seed 14608 // (similarity score: 44.87%) +9. Test entry #422: Unit-test Example related data with random seed 16994 // (similarity score: 44.07%) +10. Test entry #16: Node Unit-test related data with random seed 4007 // (similarity score: 41.87%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 41/500] Processing... +Getting data: 'Scalability' +1. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 43.17%) +2. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 40.71%) +3. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 38.07%) +4. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 37.55%) +5. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 37.23%) +6. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 35.78%) +7. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 35.62%) +8. Test entry #51: Scalability SDK related data with random seed 3268 // (similarity score: 33.46%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 32.11%) +10. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 31.32%) +⏱️ Get operation took: 956 ms +--------------------------------------------------- +[Step 42/500] Processing... +Getting data: 'Encryption' +1. Test entry #382: Encryption Algorithm related data with random seed 9600 // (similarity score: 49.22%) +2. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 48.64%) +3. Test entry #315: Encryption Best-practices related data with random seed 23350 // (similarity score: 48.26%) +4. Test entry #123: Encryption Alerting related data with random seed 20180 // (similarity score: 44.90%) +5. Test entry #65: Encryption Container related data with random seed 20902 // (similarity score: 44.62%) +6. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 43.65%) +7. Test entry #480: Encryption Version-control related data with random seed 3281 // (similarity score: 43.50%) +8. Test entry #331: Encryption Sample related data with random seed 30916 // (similarity score: 43.06%) +9. Test entry #465: Encryption Vector related data with random seed 11764 // (similarity score: 42.15%) +10. Test entry #225: Encryption Message-queue related data with random seed 3677 // (similarity score: 41.11%) +⏱️ Get operation took: 1098 ms +--------------------------------------------------- +[Step 43/500] Processing... +Getting data: 'Git' +1. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 26.09%) +2. Test entry #429: Version-control Code-review related data with random seed 14304 // (similarity score: 24.27%) +3. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 23.51%) +4. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 23.09%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 22.94%) +6. Test entry #338: Version-control Monitoring related data with random seed 11905 // (similarity score: 22.82%) +7. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 22.52%) +8. Test entry #155: Sharding Version-control related data with random seed 4158 // (similarity score: 21.79%) +9. Test entry #340: Version-control Sample related data with random seed 23846 // (similarity score: 21.68%) +10. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 21.45%) +⏱️ Get operation took: 982 ms +--------------------------------------------------- +[Step 44/500] Processing... +Getting data: 'CI/CD' +1. Test entry #324: Boilerplate CI/CD related data with random seed 11105 // (similarity score: 54.77%) +2. Test entry #479: CI/CD Data-structure related data with random seed 13652 // (similarity score: 54.51%) +3. Test entry #6: CI/CD Throughput related data with random seed 5729 // (similarity score: 53.92%) +4. Test entry #472: Library CI/CD related data with random seed 18363 // (similarity score: 53.79%) +5. Test entry #450: CI/CD Data-structure related data with random seed 9170 // (similarity score: 52.85%) +6. Test entry #317: Stubbing CI/CD related data with random seed 3418 // (similarity score: 52.74%) +7. Test entry #234: Security CI/CD related data with random seed 25834 // (similarity score: 52.50%) +8. Test entry #326: CI/CD Sample related data with random seed 22568 // (similarity score: 52.47%) +9. Test entry #408: CI/CD Search related data with random seed 26877 // (similarity score: 51.42%) +10. Test entry #489: Load-balancing CI/CD related data with random seed 9733 // (similarity score: 49.63%) +⏱️ Get operation took: 952 ms +--------------------------------------------------- +[Step 45/500] Processing... +Getting data: 'YAML' +1. Test entry #295: YAML Tutorial related data with random seed 7734 // (similarity score: 62.26%) +2. Test entry #12: Boilerplate YAML related data with random seed 29263 // (similarity score: 61.60%) +3. Test entry #291: YAML Performance related data with random seed 3598 // (similarity score: 57.08%) +4. Test entry #332: Performance YAML related data with random seed 23826 // (similarity score: 54.83%) +5. Test entry #412: YAML Orchestration related data with random seed 30910 // (similarity score: 53.55%) +6. Test entry #395: YAML Threading related data with random seed 20711 // (similarity score: 53.16%) +7. Test entry #424: Pull-request YAML related data with random seed 9732 // (similarity score: 52.19%) +8. Test entry #62: YAML CSV related data with random seed 16118 // (similarity score: 51.86%) +9. Test entry #185: Mocking YAML related data with random seed 11811 // (similarity score: 51.82%) +10. Test entry #57: YAML JSON related data with random seed 19740 // (similarity score: 50.92%) +⏱️ Get operation took: 938 ms +--------------------------------------------------- +[Step 46/500] Processing... +Getting data: 'Boilerplate' +1. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 41.22%) +2. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 36.21%) +3. Test entry #280: Boilerplate Search related data with random seed 32715 // (similarity score: 30.77%) +4. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 30.65%) +5. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 29.65%) +6. Test entry #12: Boilerplate YAML related data with random seed 29263 // (similarity score: 29.38%) +7. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 29.35%) +8. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 28.89%) +9. Test entry #443: Threading Boilerplate related data with random seed 23641 // (similarity score: 28.42%) +10. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 27.41%) +⏱️ Get operation took: 956 ms +--------------------------------------------------- +[Step 47/500] Processing... +Getting data: 'Event-driven' +1. Test entry #354: Load-balancing Event-driven related data with random seed 18844 // (similarity score: 45.07%) +2. Test entry #452: Event-driven Performance related data with random seed 26374 // (similarity score: 44.21%) +3. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 44.20%) +4. Test entry #316: GUI Event-driven related data with random seed 24961 // (similarity score: 43.28%) +5. Test entry #168: Event-driven Logging related data with random seed 22637 // (similarity score: 41.27%) +6. Test entry #260: CSV Event-driven related data with random seed 23556 // (similarity score: 40.58%) +7. Test entry #309: Event-driven Orchestration related data with random seed 21023 // (similarity score: 40.56%) +8. Test entry #142: Event-driven Microservices related data with random seed 7351 // (similarity score: 40.55%) +9. Test entry #45: Event-driven Load-balancing related data with random seed 5105 // (similarity score: 39.29%) +10. Test entry #297: Event-driven Code-review related data with random seed 23440 // (similarity score: 39.27%) +⏱️ Get operation took: 952 ms +--------------------------------------------------- +[Step 48/500] Processing... +Getting data: 'Container' +1. Test entry #182: Testing Container related data with random seed 11294 // (similarity score: 44.10%) +2. Test entry #446: Container RESTful related data with random seed 10831 // (similarity score: 40.01%) +3. Test entry #233: Unit-test Container related data with random seed 19623 // (similarity score: 39.50%) +4. Test entry #400: GUI Container related data with random seed 26968 // (similarity score: 39.49%) +5. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 38.28%) +6. Test entry #160: Container NoSQL related data with random seed 15570 // (similarity score: 37.91%) +7. Test entry #86: Index Container related data with random seed 813 // (similarity score: 37.74%) +8. Test entry #262: Container Branching related data with random seed 22849 // (similarity score: 37.24%) +9. Test entry #65: Encryption Container related data with random seed 20902 // (similarity score: 36.61%) +10. Test entry #170: ORM Container related data with random seed 30245 // (similarity score: 32.73%) +⏱️ Get operation took: 990 ms +--------------------------------------------------- +[Step 49/500] Processing... +Getting data: 'Architecture' +1. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 36.09%) +2. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 32.91%) +3. Test entry #140: UX Architecture related data with random seed 15002 // (similarity score: 31.89%) +4. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 31.79%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 30.07%) +6. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 27.81%) +7. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 26.98%) +8. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 26.33%) +9. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 25.30%) +10. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 24.95%) +⏱️ Get operation took: 892 ms +--------------------------------------------------- +[Step 50/500] Processing... +Getting data: 'Parallelism' +1. Test entry #164: Threading Parallelism related data with random seed 13640 // (similarity score: 53.13%) +2. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 51.21%) +3. Test entry #7: Mocking Parallelism related data with random seed 19010 // (similarity score: 51.18%) +4. Test entry #97: Testing Parallelism related data with random seed 6045 // (similarity score: 49.12%) +5. Test entry #136: Parallelism Sample related data with random seed 18078 // (similarity score: 48.54%) +6. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 47.79%) +7. Test entry #201: Sample Parallelism related data with random seed 8912 // (similarity score: 46.65%) +8. Test entry #252: ERD Parallelism related data with random seed 23325 // (similarity score: 45.15%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 44.59%) +10. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 44.19%) +⏱️ Get operation took: 1000 ms + +📊 [BATCH REPORT] Items 1 to 50 + -> Average Latency: 1010 ms + +--------------------------------------------------- +[Step 51/500] Processing... +Getting data: 'Optimization' +1. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 39.36%) +2. Test entry #114: Node Optimization related data with random seed 20946 // (similarity score: 37.49%) +3. Test entry #281: Optimization Resilient related data with random seed 24028 // (similarity score: 37.11%) +4. Test entry #27: Optimization Load-balancing related data with random seed 27403 // (similarity score: 35.53%) +5. Test entry #139: Load-balancing Optimization related data with random seed 31776 // (similarity score: 35.52%) +6. Test entry #403: Optimization Code-review related data with random seed 25707 // (similarity score: 35.52%) +7. Test entry #72: Node Optimization related data with random seed 10019 // (similarity score: 34.85%) +8. Test entry #191: Optimization Threading related data with random seed 15850 // (similarity score: 33.38%) +9. Test entry #466: UML Optimization related data with random seed 21437 // (similarity score: 32.36%) +10. Test entry #437: Microservices Optimization related data with random seed 8916 // (similarity score: 31.30%) +⏱️ Get operation took: 976 ms +--------------------------------------------------- +[Step 52/500] Processing... +Getting data: 'UX' +1. Test entry #463: Microservices UX related data with random seed 32421 // (similarity score: 28.20%) +2. Test entry #177: UX Monitoring related data with random seed 16167 // (similarity score: 26.37%) +3. Test entry #133: Microservices UX related data with random seed 13570 // (similarity score: 25.58%) +4. Test entry #140: UX Architecture related data with random seed 15002 // (similarity score: 25.04%) +5. Test entry #66: UX UML related data with random seed 3512 // (similarity score: 22.74%) +6. Test entry #402: UX Load-balancing related data with random seed 5781 // (similarity score: 21.76%) +7. Test entry #115: UX Design-patterns related data with random seed 14554 // (similarity score: 21.69%) +8. Test entry #175: Node UX related data with random seed 24087 // (similarity score: 20.22%) +9. Test entry #75: UX Search related data with random seed 11487 // (similarity score: 20.03%) +10. Test entry #237: UX Blockchain related data with random seed 8734 // (similarity score: 19.35%) +⏱️ Get operation took: 962 ms +--------------------------------------------------- +[Step 53/500] Processing... +Getting data: 'Flowchart' +1. Test entry #9: Diagram Flowchart related data with random seed 14766 // (similarity score: 63.49%) +2. Test entry #495: Best-practices Flowchart related data with random seed 507 // (similarity score: 59.08%) +3. Test entry #121: Flowchart SDK related data with random seed 24836 // (similarity score: 56.62%) +4. Test entry #226: Flowchart Testing related data with random seed 32215 // (similarity score: 56.46%) +5. Test entry #342: Python Flowchart related data with random seed 32467 // (similarity score: 56.34%) +6. Test entry #209: Index Flowchart related data with random seed 16113 // (similarity score: 54.06%) +7. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 49.88%) +8. Test entry #41: Index Flowchart related data with random seed 29094 // (similarity score: 49.84%) +9. Test entry #213: GUI GraphQL related data with random seed 9145 // (similarity score: 30.82%) +10. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 24.78%) +⏱️ Get operation took: 984 ms +--------------------------------------------------- +[Step 54/500] Processing... +Getting data: 'Index' +1. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 46.82%) +2. Test entry #473: Index Python related data with random seed 3534 // (similarity score: 44.57%) +3. Test entry #86: Index Container related data with random seed 813 // (similarity score: 41.90%) +4. Test entry #58: Cloud Index related data with random seed 16697 // (similarity score: 41.66%) +5. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 41.23%) +6. Test entry #44: Cloud Index related data with random seed 12754 // (similarity score: 38.93%) +7. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 38.05%) +8. Test entry #31: ORM Index related data with random seed 2163 // (similarity score: 38.00%) +9. Test entry #288: Index Pull-request related data with random seed 4855 // (similarity score: 37.81%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 37.00%) +⏱️ Get operation took: 997 ms +--------------------------------------------------- +[Step 55/500] Processing... +Getting data: 'Framework' +1. Test entry #227: Framework Algorithm related data with random seed 19206 // (similarity score: 31.85%) +2. Test entry #275: Framework Orchestration related data with random seed 12664 // (similarity score: 31.36%) +3. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 29.40%) +4. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 28.78%) +5. Test entry #386: Framework Testing related data with random seed 14924 // (similarity score: 28.77%) +6. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 26.61%) +7. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 24.69%) +8. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 20.97%) +9. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 20.94%) +10. Test entry #228: Branching Framework related data with random seed 10213 // (similarity score: 19.21%) +⏱️ Get operation took: 1063 ms +--------------------------------------------------- +[Step 56/500] Processing... +Getting data: 'Boilerplate' +1. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 41.22%) +2. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 36.21%) +3. Test entry #280: Boilerplate Search related data with random seed 32715 // (similarity score: 30.77%) +4. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 30.65%) +5. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 29.65%) +6. Test entry #12: Boilerplate YAML related data with random seed 29263 // (similarity score: 29.38%) +7. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 29.35%) +8. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 28.89%) +9. Test entry #443: Threading Boilerplate related data with random seed 23641 // (similarity score: 28.42%) +10. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 27.41%) +⏱️ Get operation took: 1066 ms +--------------------------------------------------- +[Step 57/500] Processing... +Getting data: 'YAML' +1. Test entry #295: YAML Tutorial related data with random seed 7734 // (similarity score: 62.26%) +2. Test entry #12: Boilerplate YAML related data with random seed 29263 // (similarity score: 61.60%) +3. Test entry #291: YAML Performance related data with random seed 3598 // (similarity score: 57.08%) +4. Test entry #332: Performance YAML related data with random seed 23826 // (similarity score: 54.83%) +5. Test entry #412: YAML Orchestration related data with random seed 30910 // (similarity score: 53.55%) +6. Test entry #395: YAML Threading related data with random seed 20711 // (similarity score: 53.16%) +7. Test entry #424: Pull-request YAML related data with random seed 9732 // (similarity score: 52.19%) +8. Test entry #62: YAML CSV related data with random seed 16118 // (similarity score: 51.86%) +9. Test entry #185: Mocking YAML related data with random seed 11811 // (similarity score: 51.82%) +10. Test entry #57: YAML JSON related data with random seed 19740 // (similarity score: 50.92%) +⏱️ Get operation took: 1066 ms +--------------------------------------------------- +[Step 58/500] Processing... +Getting data: 'Network' +1. Test entry #283: Network Network related data with random seed 21734 // (similarity score: 44.63%) +2. Test entry #163: Network Orchestration related data with random seed 17906 // (similarity score: 36.14%) +3. Test entry #29: Mocking Network related data with random seed 20303 // (similarity score: 35.95%) +4. Test entry #190: Mocking Network related data with random seed 32119 // (similarity score: 35.84%) +5. Test entry #189: UML Network related data with random seed 6297 // (similarity score: 32.43%) +6. Test entry #90: Network GUI related data with random seed 6917 // (similarity score: 30.73%) +7. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 29.31%) +8. Test entry #49: ORM Network related data with random seed 23604 // (similarity score: 26.22%) +9. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 21.70%) +10. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 19.05%) +⏱️ Get operation took: 981 ms +--------------------------------------------------- +[Step 59/500] Processing... +Getting data: 'Container' +1. Test entry #182: Testing Container related data with random seed 11294 // (similarity score: 44.10%) +2. Test entry #446: Container RESTful related data with random seed 10831 // (similarity score: 40.01%) +3. Test entry #233: Unit-test Container related data with random seed 19623 // (similarity score: 39.50%) +4. Test entry #400: GUI Container related data with random seed 26968 // (similarity score: 39.49%) +5. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 38.28%) +6. Test entry #160: Container NoSQL related data with random seed 15570 // (similarity score: 37.91%) +7. Test entry #86: Index Container related data with random seed 813 // (similarity score: 37.74%) +8. Test entry #262: Container Branching related data with random seed 22849 // (similarity score: 37.24%) +9. Test entry #65: Encryption Container related data with random seed 20902 // (similarity score: 36.61%) +10. Test entry #170: ORM Container related data with random seed 30245 // (similarity score: 32.73%) +⏱️ Get operation took: 1009 ms +--------------------------------------------------- +[Step 60/500] Processing... +Getting data: 'Documentation' +1. Test entry #468: Documentation Monitoring related data with random seed 13294 // (similarity score: 41.51%) +2. Test entry #81: Example Documentation related data with random seed 4554 // (similarity score: 34.95%) +3. Test entry #187: Documentation Cloud related data with random seed 9545 // (similarity score: 34.85%) +4. Test entry #390: Documentation Diagram related data with random seed 8258 // (similarity score: 32.91%) +5. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 30.49%) +6. Test entry #176: Microservices Documentation related data with random seed 28218 // (similarity score: 28.00%) +7. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 27.51%) +8. Test entry #93: Node Documentation related data with random seed 10928 // (similarity score: 27.08%) +9. Test entry #319: Documentation Example related data with random seed 22093 // (similarity score: 26.81%) +10. Test entry #246: Node Documentation related data with random seed 8868 // (similarity score: 26.21%) +⏱️ Get operation took: 932 ms +--------------------------------------------------- +[Step 61/500] Processing... +Getting data: 'Branching' +1. Test entry #436: Example Branching related data with random seed 25763 // (similarity score: 54.14%) +2. Test entry #228: Branching Framework related data with random seed 10213 // (similarity score: 51.21%) +3. Test entry #384: Branching Sample related data with random seed 29822 // (similarity score: 50.90%) +4. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 47.89%) +5. Test entry #262: Container Branching related data with random seed 22849 // (similarity score: 47.76%) +6. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 40.80%) +7. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 40.19%) +8. Test entry #23: Branching Microservices related data with random seed 24428 // (similarity score: 38.88%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 29.49%) +10. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 26.91%) +⏱️ Get operation took: 992 ms +--------------------------------------------------- +[Step 62/500] Processing... +Getting data: 'SQL' +1. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 41.94%) +2. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 41.80%) +3. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 37.96%) +4. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 37.76%) +5. Test entry #152: SQL Load-balancing related data with random seed 9262 // (similarity score: 37.07%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 36.37%) +7. Test entry #137: SQL Parallelism related data with random seed 8672 // (similarity score: 35.23%) +8. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 32.86%) +9. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 30.91%) +10. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 30.51%) +⏱️ Get operation took: 1001 ms +--------------------------------------------------- +[Step 63/500] Processing... +Getting data: 'Synchronous' +1. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 42.32%) +2. Test entry #287: Example Synchronous related data with random seed 18676 // (similarity score: 40.82%) +3. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 32.63%) +4. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 32.34%) +5. Test entry #411: Security Synchronous related data with random seed 11940 // (similarity score: 31.90%) +6. Test entry #258: XML Synchronous related data with random seed 9079 // (similarity score: 29.62%) +7. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 29.18%) +8. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 28.48%) +9. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 27.98%) +10. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 27.69%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 64/500] Processing... +Getting data: 'XML' +1. Test entry #303: XML Tutorial related data with random seed 29551 // (similarity score: 54.70%) +2. Test entry #42: Resilient XML related data with random seed 15654 // (similarity score: 53.29%) +3. Test entry #487: XML Best-practices related data with random seed 19330 // (similarity score: 52.93%) +4. Test entry #298: XML Example related data with random seed 18435 // (similarity score: 52.15%) +5. Test entry #258: XML Synchronous related data with random seed 9079 // (similarity score: 51.57%) +6. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 50.79%) +7. Test entry #129: XML Code-review related data with random seed 19652 // (similarity score: 50.22%) +8. Test entry #475: Fault-tolerance XML related data with random seed 13158 // (similarity score: 46.25%) +9. Test entry #457: XML Recovery related data with random seed 2790 // (similarity score: 46.16%) +10. Test entry #350: XML Concurrency related data with random seed 12519 // (similarity score: 44.91%) +⏱️ Get operation took: 961 ms +--------------------------------------------------- +[Step 65/500] Processing... +Getting data: 'Flowchart' +1. Test entry #9: Diagram Flowchart related data with random seed 14766 // (similarity score: 63.49%) +2. Test entry #495: Best-practices Flowchart related data with random seed 507 // (similarity score: 59.08%) +3. Test entry #121: Flowchart SDK related data with random seed 24836 // (similarity score: 56.62%) +4. Test entry #226: Flowchart Testing related data with random seed 32215 // (similarity score: 56.46%) +5. Test entry #342: Python Flowchart related data with random seed 32467 // (similarity score: 56.34%) +6. Test entry #209: Index Flowchart related data with random seed 16113 // (similarity score: 54.06%) +7. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 49.88%) +8. Test entry #41: Index Flowchart related data with random seed 29094 // (similarity score: 49.84%) +9. Test entry #213: GUI GraphQL related data with random seed 9145 // (similarity score: 30.82%) +10. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 24.78%) +⏱️ Get operation took: 988 ms +--------------------------------------------------- +[Step 66/500] Processing... +Getting data: 'Vector' +1. Test entry #325: Vector UX related data with random seed 2934 // (similarity score: 40.50%) +2. Test entry #444: Vector Debugging related data with random seed 5646 // (similarity score: 36.83%) +3. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 36.57%) +4. Test entry #341: Node Vector related data with random seed 30034 // (similarity score: 36.54%) +5. Test entry #334: Sharding Vector related data with random seed 29020 // (similarity score: 33.49%) +6. Test entry #499: Vector CLI related data with random seed 18419 // (similarity score: 33.05%) +7. Test entry #433: Vector E2E-test related data with random seed 22356 // (similarity score: 31.29%) +8. Test entry #461: Vector Pub-sub related data with random seed 31094 // (similarity score: 31.17%) +9. Test entry #100: Latency Vector related data with random seed 28112 // (similarity score: 30.14%) +10. Test entry #385: ORM Vector related data with random seed 277 // (similarity score: 29.36%) +⏱️ Get operation took: 987 ms +--------------------------------------------------- +[Step 67/500] Processing... +Getting data: 'Merging' +1. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 47.50%) +2. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 46.78%) +3. Test entry #417: Event-driven Merging related data with random seed 13881 // (similarity score: 45.04%) +4. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 44.14%) +5. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 38.53%) +6. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 38.19%) +7. Test entry #150: Diagram Integration-test related data with random seed 19409 // (similarity score: 21.00%) +8. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 19.25%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 18.92%) +10. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 16.25%) +⏱️ Get operation took: 1009 ms +--------------------------------------------------- +[Step 68/500] Processing... +Getting data: 'Mocking' +1. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 53.76%) +2. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 52.01%) +3. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 50.43%) +4. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 49.94%) +5. Test entry #7: Mocking Parallelism related data with random seed 19010 // (similarity score: 48.48%) +6. Test entry #54: Concurrency Mocking related data with random seed 20846 // (similarity score: 45.54%) +7. Test entry #330: Boilerplate Mocking related data with random seed 5771 // (similarity score: 45.53%) +8. Test entry #190: Mocking Network related data with random seed 32119 // (similarity score: 44.81%) +9. Test entry #29: Mocking Network related data with random seed 20303 // (similarity score: 44.44%) +10. Test entry #185: Mocking YAML related data with random seed 11811 // (similarity score: 42.85%) +⏱️ Get operation took: 934 ms +--------------------------------------------------- +[Step 69/500] Processing... +Getting data: 'UML' +1. Test entry #466: UML Optimization related data with random seed 21437 // (similarity score: 51.87%) +2. Test entry #66: UX UML related data with random seed 3512 // (similarity score: 50.20%) +3. Test entry #17: Tutorial UML related data with random seed 12772 // (similarity score: 49.59%) +4. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 47.11%) +5. Test entry #189: UML Network related data with random seed 6297 // (similarity score: 45.74%) +6. Test entry #247: UML Integration-test related data with random seed 29544 // (similarity score: 45.62%) +7. Test entry #474: UML Latency related data with random seed 18011 // (similarity score: 45.46%) +8. Test entry #377: UML GUI related data with random seed 13474 // (similarity score: 44.68%) +9. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 44.27%) +10. Test entry #310: RESTful UML related data with random seed 5709 // (similarity score: 43.88%) +⏱️ Get operation took: 999 ms +--------------------------------------------------- +[Step 70/500] Processing... +Getting data: 'Event-driven' +1. Test entry #354: Load-balancing Event-driven related data with random seed 18844 // (similarity score: 45.07%) +2. Test entry #452: Event-driven Performance related data with random seed 26374 // (similarity score: 44.21%) +3. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 44.20%) +4. Test entry #316: GUI Event-driven related data with random seed 24961 // (similarity score: 43.28%) +5. Test entry #168: Event-driven Logging related data with random seed 22637 // (similarity score: 41.27%) +6. Test entry #260: CSV Event-driven related data with random seed 23556 // (similarity score: 40.58%) +7. Test entry #309: Event-driven Orchestration related data with random seed 21023 // (similarity score: 40.56%) +8. Test entry #142: Event-driven Microservices related data with random seed 7351 // (similarity score: 40.55%) +9. Test entry #45: Event-driven Load-balancing related data with random seed 5105 // (similarity score: 39.29%) +10. Test entry #297: Event-driven Code-review related data with random seed 23440 // (similarity score: 39.27%) +⏱️ Get operation took: 950 ms +--------------------------------------------------- +[Step 71/500] Processing... +Getting data: 'Event-driven' +1. Test entry #354: Load-balancing Event-driven related data with random seed 18844 // (similarity score: 45.07%) +2. Test entry #452: Event-driven Performance related data with random seed 26374 // (similarity score: 44.21%) +3. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 44.20%) +4. Test entry #316: GUI Event-driven related data with random seed 24961 // (similarity score: 43.28%) +5. Test entry #168: Event-driven Logging related data with random seed 22637 // (similarity score: 41.27%) +6. Test entry #260: CSV Event-driven related data with random seed 23556 // (similarity score: 40.58%) +7. Test entry #309: Event-driven Orchestration related data with random seed 21023 // (similarity score: 40.56%) +8. Test entry #142: Event-driven Microservices related data with random seed 7351 // (similarity score: 40.55%) +9. Test entry #45: Event-driven Load-balancing related data with random seed 5105 // (similarity score: 39.29%) +10. Test entry #297: Event-driven Code-review related data with random seed 23440 // (similarity score: 39.27%) +⏱️ Get operation took: 989 ms +--------------------------------------------------- +[Step 72/500] Processing... +Getting data: 'JSON' +1. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 51.74%) +2. Test entry #156: JSON Optimization related data with random seed 28003 // (similarity score: 49.61%) +3. Test entry #413: JSON Alerting related data with random seed 8852 // (similarity score: 47.02%) +4. Test entry #113: JSON API related data with random seed 11466 // (similarity score: 46.74%) +5. Test entry #85: JSON Vector related data with random seed 23519 // (similarity score: 46.52%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 43.98%) +7. Test entry #218: JSON Caching related data with random seed 29229 // (similarity score: 43.91%) +8. Test entry #143: JSON XML related data with random seed 20028 // (similarity score: 42.01%) +9. Test entry #57: YAML JSON related data with random seed 19740 // (similarity score: 40.00%) +10. Test entry #420: JSON Microservices related data with random seed 9271 // (similarity score: 39.90%) +⏱️ Get operation took: 950 ms +--------------------------------------------------- +[Step 73/500] Processing... +Getting data: 'Backup' +1. Test entry #434: Tutorial Backup related data with random seed 6053 // (similarity score: 46.60%) +2. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 44.75%) +3. Test entry #398: Backup Node related data with random seed 21714 // (similarity score: 39.31%) +4. Test entry #410: Backup Parallelism related data with random seed 5421 // (similarity score: 38.11%) +5. Test entry #15: Backup Orchestration related data with random seed 32162 // (similarity score: 37.94%) +6. Test entry #145: CI/CD Backup related data with random seed 22567 // (similarity score: 35.01%) +7. Test entry #104: Backup E2E-test related data with random seed 3184 // (similarity score: 29.08%) +8. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 25.60%) +9. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 24.70%) +10. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 19.74%) +⏱️ Get operation took: 988 ms +--------------------------------------------------- +[Step 74/500] Processing... +Getting data: 'Message-queue' +1. Test entry #147: Message-queue Container related data with random seed 31302 // (similarity score: 55.00%) +2. Test entry #396: Sample Message-queue related data with random seed 1074 // (similarity score: 54.96%) +3. Test entry #445: Message-queue Consensus related data with random seed 22969 // (similarity score: 51.62%) +4. Test entry #447: Message-queue Unit-test related data with random seed 12353 // (similarity score: 48.23%) +5. Test entry #225: Encryption Message-queue related data with random seed 3677 // (similarity score: 45.23%) +6. Test entry #264: SQL Message-queue related data with random seed 315 // (similarity score: 44.77%) +7. Test entry #56: Replication Message-queue related data with random seed 23394 // (similarity score: 44.68%) +8. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 30.91%) +9. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 27.49%) +10. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 24.44%) +⏱️ Get operation took: 954 ms +--------------------------------------------------- +[Step 75/500] Processing... +Getting data: 'UML' +1. Test entry #466: UML Optimization related data with random seed 21437 // (similarity score: 51.87%) +2. Test entry #66: UX UML related data with random seed 3512 // (similarity score: 50.20%) +3. Test entry #17: Tutorial UML related data with random seed 12772 // (similarity score: 49.59%) +4. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 47.11%) +5. Test entry #189: UML Network related data with random seed 6297 // (similarity score: 45.74%) +6. Test entry #247: UML Integration-test related data with random seed 29544 // (similarity score: 45.62%) +7. Test entry #474: UML Latency related data with random seed 18011 // (similarity score: 45.46%) +8. Test entry #377: UML GUI related data with random seed 13474 // (similarity score: 44.68%) +9. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 44.27%) +10. Test entry #310: RESTful UML related data with random seed 5709 // (similarity score: 43.88%) +⏱️ Get operation took: 1049 ms +--------------------------------------------------- +[Step 76/500] Processing... +Getting data: 'Blockchain' +1. Test entry #117: Design-patterns Blockchain related data with random seed 16711 // (similarity score: 41.64%) +2. Test entry #231: Blockchain Data-structure related data with random seed 2611 // (similarity score: 38.85%) +3. Test entry #61: Blockchain Microservices related data with random seed 13172 // (similarity score: 38.28%) +4. Test entry #171: Blockchain Data-structure related data with random seed 29630 // (similarity score: 33.73%) +5. Test entry #237: UX Blockchain related data with random seed 8734 // (similarity score: 31.74%) +6. Test entry #360: GUI Blockchain related data with random seed 10277 // (similarity score: 31.69%) +7. Test entry #67: Debugging Blockchain related data with random seed 23179 // (similarity score: 30.30%) +8. Test entry #229: Orchestration Blockchain related data with random seed 2138 // (similarity score: 30.22%) +9. Test entry #348: Blockchain SDK related data with random seed 25254 // (similarity score: 29.17%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 27.19%) +⏱️ Get operation took: 940 ms +--------------------------------------------------- +[Step 77/500] Processing... +Getting data: 'Orchestration' +1. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 55.54%) +2. Test entry #309: Event-driven Orchestration related data with random seed 21023 // (similarity score: 51.76%) +3. Test entry #161: Orchestration Diagram related data with random seed 20102 // (similarity score: 50.18%) +4. Test entry #337: Algorithm Orchestration related data with random seed 15318 // (similarity score: 47.14%) +5. Test entry #77: Orchestration Code-review related data with random seed 28098 // (similarity score: 46.32%) +6. Test entry #163: Network Orchestration related data with random seed 17906 // (similarity score: 44.84%) +7. Test entry #275: Framework Orchestration related data with random seed 12664 // (similarity score: 44.25%) +8. Test entry #483: Orchestration Microservices related data with random seed 31494 // (similarity score: 42.41%) +9. Test entry #195: API Orchestration related data with random seed 17599 // (similarity score: 41.25%) +10. Test entry #412: YAML Orchestration related data with random seed 30910 // (similarity score: 40.32%) +⏱️ Get operation took: 1150 ms +--------------------------------------------------- +[Step 78/500] Processing... +Getting data: 'YAML' +1. Test entry #295: YAML Tutorial related data with random seed 7734 // (similarity score: 62.26%) +2. Test entry #12: Boilerplate YAML related data with random seed 29263 // (similarity score: 61.60%) +3. Test entry #291: YAML Performance related data with random seed 3598 // (similarity score: 57.08%) +4. Test entry #332: Performance YAML related data with random seed 23826 // (similarity score: 54.83%) +5. Test entry #412: YAML Orchestration related data with random seed 30910 // (similarity score: 53.55%) +6. Test entry #395: YAML Threading related data with random seed 20711 // (similarity score: 53.16%) +7. Test entry #424: Pull-request YAML related data with random seed 9732 // (similarity score: 52.19%) +8. Test entry #62: YAML CSV related data with random seed 16118 // (similarity score: 51.86%) +9. Test entry #185: Mocking YAML related data with random seed 11811 // (similarity score: 51.82%) +10. Test entry #57: YAML JSON related data with random seed 19740 // (similarity score: 50.92%) +⏱️ Get operation took: 999 ms +--------------------------------------------------- +[Step 79/500] Processing... +Getting data: 'Logging' +1. Test entry #168: Event-driven Logging related data with random seed 22637 // (similarity score: 52.57%) +2. Test entry #172: Logging Architecture related data with random seed 7305 // (similarity score: 49.56%) +3. Test entry #222: Logging Library related data with random seed 32039 // (similarity score: 46.22%) +4. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 45.44%) +5. Test entry #98: Asynchronous Logging related data with random seed 30841 // (similarity score: 45.06%) +6. Test entry #476: Logging API related data with random seed 8268 // (similarity score: 42.63%) +7. Test entry #245: Logging Testing related data with random seed 32263 // (similarity score: 42.44%) +8. Test entry #399: Logging Encryption related data with random seed 25211 // (similarity score: 37.28%) +9. Test entry #149: Encryption Logging related data with random seed 27963 // (similarity score: 35.73%) +10. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 28.02%) +⏱️ Get operation took: 940 ms +--------------------------------------------------- +[Step 80/500] Processing... +Getting data: 'Flowchart' +1. Test entry #9: Diagram Flowchart related data with random seed 14766 // (similarity score: 63.49%) +2. Test entry #495: Best-practices Flowchart related data with random seed 507 // (similarity score: 59.08%) +3. Test entry #121: Flowchart SDK related data with random seed 24836 // (similarity score: 56.62%) +4. Test entry #226: Flowchart Testing related data with random seed 32215 // (similarity score: 56.46%) +5. Test entry #342: Python Flowchart related data with random seed 32467 // (similarity score: 56.34%) +6. Test entry #209: Index Flowchart related data with random seed 16113 // (similarity score: 54.06%) +7. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 49.88%) +8. Test entry #41: Index Flowchart related data with random seed 29094 // (similarity score: 49.84%) +9. Test entry #213: GUI GraphQL related data with random seed 9145 // (similarity score: 30.82%) +10. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 24.78%) +⏱️ Get operation took: 1002 ms +--------------------------------------------------- +[Step 81/500] Processing... +Getting data: 'Branching' +1. Test entry #436: Example Branching related data with random seed 25763 // (similarity score: 54.14%) +2. Test entry #228: Branching Framework related data with random seed 10213 // (similarity score: 51.21%) +3. Test entry #384: Branching Sample related data with random seed 29822 // (similarity score: 50.90%) +4. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 47.89%) +5. Test entry #262: Container Branching related data with random seed 22849 // (similarity score: 47.76%) +6. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 40.80%) +7. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 40.19%) +8. Test entry #23: Branching Microservices related data with random seed 24428 // (similarity score: 38.88%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 29.49%) +10. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 26.91%) +⏱️ Get operation took: 1008 ms +--------------------------------------------------- +[Step 82/500] Processing... +Getting data: 'JSON' +1. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 51.74%) +2. Test entry #156: JSON Optimization related data with random seed 28003 // (similarity score: 49.61%) +3. Test entry #413: JSON Alerting related data with random seed 8852 // (similarity score: 47.02%) +4. Test entry #113: JSON API related data with random seed 11466 // (similarity score: 46.74%) +5. Test entry #85: JSON Vector related data with random seed 23519 // (similarity score: 46.52%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 43.98%) +7. Test entry #218: JSON Caching related data with random seed 29229 // (similarity score: 43.91%) +8. Test entry #143: JSON XML related data with random seed 20028 // (similarity score: 42.01%) +9. Test entry #57: YAML JSON related data with random seed 19740 // (similarity score: 40.00%) +10. Test entry #420: JSON Microservices related data with random seed 9271 // (similarity score: 39.90%) +⏱️ Get operation took: 979 ms +--------------------------------------------------- +[Step 83/500] Processing... +Getting data: 'UML' +1. Test entry #466: UML Optimization related data with random seed 21437 // (similarity score: 51.87%) +2. Test entry #66: UX UML related data with random seed 3512 // (similarity score: 50.20%) +3. Test entry #17: Tutorial UML related data with random seed 12772 // (similarity score: 49.59%) +4. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 47.11%) +5. Test entry #189: UML Network related data with random seed 6297 // (similarity score: 45.74%) +6. Test entry #247: UML Integration-test related data with random seed 29544 // (similarity score: 45.62%) +7. Test entry #474: UML Latency related data with random seed 18011 // (similarity score: 45.46%) +8. Test entry #377: UML GUI related data with random seed 13474 // (similarity score: 44.68%) +9. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 44.27%) +10. Test entry #310: RESTful UML related data with random seed 5709 // (similarity score: 43.88%) +⏱️ Get operation took: 948 ms +--------------------------------------------------- +[Step 84/500] Processing... +Getting data: 'Asynchronous' +1. Test entry #74: Asynchronous Asynchronous related data with random seed 32283 // (similarity score: 46.54%) +2. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 44.22%) +3. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 43.79%) +4. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 43.65%) +5. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 43.29%) +6. Test entry #311: Asynchronous Best-practices related data with random seed 24139 // (similarity score: 41.57%) +7. Test entry #210: Asynchronous UI related data with random seed 16307 // (similarity score: 40.59%) +8. Test entry #79: Algorithm Asynchronous related data with random seed 18478 // (similarity score: 40.11%) +9. Test entry #240: Algorithm Asynchronous related data with random seed 6330 // (similarity score: 38.61%) +10. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 36.49%) +⏱️ Get operation took: 948 ms +--------------------------------------------------- +[Step 85/500] Processing... +Getting data: 'Example' +1. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 21.02%) +2. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 20.91%) +3. Test entry #81: Example Documentation related data with random seed 4554 // (similarity score: 20.87%) +4. Test entry #43: Documentation Example related data with random seed 5875 // (similarity score: 19.56%) +5. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 19.26%) +6. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 18.68%) +7. Test entry #319: Documentation Example related data with random seed 22093 // (similarity score: 18.61%) +8. Test entry #297: Event-driven Code-review related data with random seed 23440 // (similarity score: 17.50%) +9. Test entry #138: Boilerplate ERD related data with random seed 17973 // (similarity score: 17.39%) +10. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 16.97%) +⏱️ Get operation took: 893 ms +--------------------------------------------------- +[Step 86/500] Processing... +Getting data: 'Database' +1. Test entry #426: Database Database related data with random seed 19208 // (similarity score: 39.68%) +2. Test entry #496: Library Database related data with random seed 13646 // (similarity score: 36.63%) +3. Test entry #193: Database Resilient related data with random seed 3286 // (similarity score: 35.20%) +4. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 34.25%) +5. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 33.82%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 33.21%) +7. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 32.30%) +8. Test entry #82: Database Cloud related data with random seed 12343 // (similarity score: 32.29%) +9. Test entry #346: Database Microservices related data with random seed 1068 // (similarity score: 32.20%) +10. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 32.14%) +⏱️ Get operation took: 943 ms +--------------------------------------------------- +[Step 87/500] Processing... +Getting data: 'DevOps' +1. Test entry #259: Pub-sub DevOps related data with random seed 23944 // (similarity score: 58.76%) +2. Test entry #212: Design-patterns DevOps related data with random seed 19247 // (similarity score: 57.73%) +3. Test entry #146: Profiling DevOps related data with random seed 14561 // (similarity score: 55.98%) +4. Test entry #46: Data-structure DevOps related data with random seed 24000 // (similarity score: 51.31%) +5. Test entry #269: Database DevOps related data with random seed 15436 // (similarity score: 45.93%) +6. Test entry #328: Node DevOps related data with random seed 7412 // (similarity score: 44.08%) +7. Test entry #207: DevOps JSON related data with random seed 15591 // (similarity score: 41.62%) +8. Test entry #271: Encryption DevOps related data with random seed 2715 // (similarity score: 40.67%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 29.13%) +10. Test entry #243: SDK Orchestration related data with random seed 30501 // (similarity score: 27.73%) +⏱️ Get operation took: 947 ms +--------------------------------------------------- +[Step 88/500] Processing... +Getting data: 'Performance' +1. Test entry #452: Event-driven Performance related data with random seed 26374 // (similarity score: 39.07%) +2. Test entry #451: Library Performance related data with random seed 31802 // (similarity score: 35.13%) +3. Test entry #192: Performance Threading related data with random seed 6244 // (similarity score: 33.15%) +4. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 32.63%) +5. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 31.90%) +6. Test entry #481: Tutorial Throughput related data with random seed 11888 // (similarity score: 31.89%) +7. Test entry #273: Data-structure Performance related data with random seed 1219 // (similarity score: 31.74%) +8. Test entry #394: Sharding Performance related data with random seed 1607 // (similarity score: 31.69%) +9. Test entry #332: Performance YAML related data with random seed 23826 // (similarity score: 30.69%) +10. Test entry #291: YAML Performance related data with random seed 3598 // (similarity score: 28.81%) +⏱️ Get operation took: 1024 ms +--------------------------------------------------- +[Step 89/500] Processing... +Getting data: 'Orchestration' +1. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 55.54%) +2. Test entry #309: Event-driven Orchestration related data with random seed 21023 // (similarity score: 51.76%) +3. Test entry #161: Orchestration Diagram related data with random seed 20102 // (similarity score: 50.18%) +4. Test entry #337: Algorithm Orchestration related data with random seed 15318 // (similarity score: 47.14%) +5. Test entry #77: Orchestration Code-review related data with random seed 28098 // (similarity score: 46.32%) +6. Test entry #163: Network Orchestration related data with random seed 17906 // (similarity score: 44.84%) +7. Test entry #275: Framework Orchestration related data with random seed 12664 // (similarity score: 44.25%) +8. Test entry #483: Orchestration Microservices related data with random seed 31494 // (similarity score: 42.41%) +9. Test entry #195: API Orchestration related data with random seed 17599 // (similarity score: 41.25%) +10. Test entry #412: YAML Orchestration related data with random seed 30910 // (similarity score: 40.32%) +⏱️ Get operation took: 970 ms +--------------------------------------------------- +[Step 90/500] Processing... +Getting data: 'Database' +1. Test entry #426: Database Database related data with random seed 19208 // (similarity score: 39.68%) +2. Test entry #496: Library Database related data with random seed 13646 // (similarity score: 36.63%) +3. Test entry #193: Database Resilient related data with random seed 3286 // (similarity score: 35.20%) +4. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 34.25%) +5. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 33.82%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 33.21%) +7. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 32.30%) +8. Test entry #82: Database Cloud related data with random seed 12343 // (similarity score: 32.29%) +9. Test entry #346: Database Microservices related data with random seed 1068 // (similarity score: 32.20%) +10. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 32.14%) +⏱️ Get operation took: 956 ms +--------------------------------------------------- +[Step 91/500] Processing... +Getting data: 'Architecture' +1. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 36.09%) +2. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 32.91%) +3. Test entry #140: UX Architecture related data with random seed 15002 // (similarity score: 31.89%) +4. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 31.79%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 30.07%) +6. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 27.81%) +7. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 26.98%) +8. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 26.33%) +9. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 25.30%) +10. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 24.95%) +⏱️ Get operation took: 933 ms +--------------------------------------------------- +[Step 92/500] Processing... +Getting data: 'Replication' +1. Test entry #71: Replication Resilient related data with random seed 6058 // (similarity score: 57.23%) +2. Test entry #109: Replication Architecture related data with random seed 27244 // (similarity score: 55.71%) +3. Test entry #24: Replication Fault-tolerance related data with random seed 9951 // (similarity score: 51.79%) +4. Test entry #374: Replication Example related data with random seed 6291 // (similarity score: 50.42%) +5. Test entry #40: Replication UI related data with random seed 1010 // (similarity score: 47.96%) +6. Test entry #56: Replication Message-queue related data with random seed 23394 // (similarity score: 46.60%) +7. Test entry #255: Profiling Replication related data with random seed 7369 // (similarity score: 45.83%) +8. Test entry #111: Replication SDK related data with random seed 8170 // (similarity score: 45.33%) +9. Test entry #470: Consensus Replication related data with random seed 9153 // (similarity score: 44.87%) +10. Test entry #293: Replication CLI related data with random seed 2540 // (similarity score: 43.02%) +⏱️ Get operation took: 897 ms +--------------------------------------------------- +[Step 93/500] Processing... +Getting data: 'Architecture' +1. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 36.09%) +2. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 32.91%) +3. Test entry #140: UX Architecture related data with random seed 15002 // (similarity score: 31.89%) +4. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 31.79%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 30.07%) +6. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 27.81%) +7. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 26.98%) +8. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 26.33%) +9. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 25.30%) +10. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 24.95%) +⏱️ Get operation took: 945 ms +--------------------------------------------------- +[Step 94/500] Processing... +Getting data: 'Mocking' +1. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 53.76%) +2. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 52.01%) +3. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 50.43%) +4. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 49.94%) +5. Test entry #7: Mocking Parallelism related data with random seed 19010 // (similarity score: 48.48%) +6. Test entry #54: Concurrency Mocking related data with random seed 20846 // (similarity score: 45.54%) +7. Test entry #330: Boilerplate Mocking related data with random seed 5771 // (similarity score: 45.53%) +8. Test entry #190: Mocking Network related data with random seed 32119 // (similarity score: 44.81%) +9. Test entry #29: Mocking Network related data with random seed 20303 // (similarity score: 44.44%) +10. Test entry #185: Mocking YAML related data with random seed 11811 // (similarity score: 42.85%) +⏱️ Get operation took: 995 ms +--------------------------------------------------- +[Step 95/500] Processing... +Getting data: 'XML' +1. Test entry #303: XML Tutorial related data with random seed 29551 // (similarity score: 54.70%) +2. Test entry #42: Resilient XML related data with random seed 15654 // (similarity score: 53.29%) +3. Test entry #487: XML Best-practices related data with random seed 19330 // (similarity score: 52.93%) +4. Test entry #298: XML Example related data with random seed 18435 // (similarity score: 52.15%) +5. Test entry #258: XML Synchronous related data with random seed 9079 // (similarity score: 51.57%) +6. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 50.79%) +7. Test entry #129: XML Code-review related data with random seed 19652 // (similarity score: 50.22%) +8. Test entry #475: Fault-tolerance XML related data with random seed 13158 // (similarity score: 46.25%) +9. Test entry #457: XML Recovery related data with random seed 2790 // (similarity score: 46.16%) +10. Test entry #350: XML Concurrency related data with random seed 12519 // (similarity score: 44.91%) +⏱️ Get operation took: 949 ms +--------------------------------------------------- +[Step 96/500] Processing... +Getting data: 'Load-balancing' +1. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 63.40%) +2. Test entry #139: Load-balancing Optimization related data with random seed 31776 // (similarity score: 57.41%) +3. Test entry #27: Optimization Load-balancing related data with random seed 27403 // (similarity score: 55.07%) +4. Test entry #402: UX Load-balancing related data with random seed 5781 // (similarity score: 52.94%) +5. Test entry #302: Load-balancing Diagram related data with random seed 1050 // (similarity score: 52.56%) +6. Test entry #45: Event-driven Load-balancing related data with random seed 5105 // (similarity score: 52.48%) +7. Test entry #354: Load-balancing Event-driven related data with random seed 18844 // (similarity score: 47.54%) +8. Test entry #449: UI Load-balancing related data with random seed 29998 // (similarity score: 45.02%) +9. Test entry #242: Load-balancing CLI related data with random seed 21278 // (similarity score: 44.58%) +10. Test entry #216: Load-balancing Sample related data with random seed 25835 // (similarity score: 43.77%) +⏱️ Get operation took: 943 ms +--------------------------------------------------- +[Step 97/500] Processing... +Getting data: 'Encryption' +1. Test entry #382: Encryption Algorithm related data with random seed 9600 // (similarity score: 49.22%) +2. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 48.64%) +3. Test entry #315: Encryption Best-practices related data with random seed 23350 // (similarity score: 48.26%) +4. Test entry #123: Encryption Alerting related data with random seed 20180 // (similarity score: 44.90%) +5. Test entry #65: Encryption Container related data with random seed 20902 // (similarity score: 44.62%) +6. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 43.65%) +7. Test entry #480: Encryption Version-control related data with random seed 3281 // (similarity score: 43.50%) +8. Test entry #331: Encryption Sample related data with random seed 30916 // (similarity score: 43.06%) +9. Test entry #465: Encryption Vector related data with random seed 11764 // (similarity score: 42.15%) +10. Test entry #225: Encryption Message-queue related data with random seed 3677 // (similarity score: 41.11%) +⏱️ Get operation took: 949 ms +--------------------------------------------------- +[Step 98/500] Processing... +Getting data: 'Pull-request' +1. Test entry #438: Pull-request API related data with random seed 19781 // (similarity score: 51.53%) +2. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 50.75%) +3. Test entry #107: Resilient Pull-request related data with random seed 9028 // (similarity score: 49.75%) +4. Test entry #488: Pull-request Git related data with random seed 13085 // (similarity score: 47.13%) +5. Test entry #424: Pull-request YAML related data with random seed 9732 // (similarity score: 46.59%) +6. Test entry #288: Index Pull-request related data with random seed 4855 // (similarity score: 46.42%) +7. Test entry #28: Pull-request Pub-sub related data with random seed 15070 // (similarity score: 45.59%) +8. Test entry #166: Pull-request Unit-test related data with random seed 18963 // (similarity score: 40.27%) +9. Test entry #314: Threading RESTful related data with random seed 12875 // (similarity score: 19.17%) +10. Test entry #203: Recovery JSON related data with random seed 8776 // (similarity score: 17.99%) +⏱️ Get operation took: 1021 ms +--------------------------------------------------- +[Step 99/500] Processing... +Getting data: 'Optimization' +1. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 39.36%) +2. Test entry #114: Node Optimization related data with random seed 20946 // (similarity score: 37.49%) +3. Test entry #281: Optimization Resilient related data with random seed 24028 // (similarity score: 37.11%) +4. Test entry #27: Optimization Load-balancing related data with random seed 27403 // (similarity score: 35.53%) +5. Test entry #139: Load-balancing Optimization related data with random seed 31776 // (similarity score: 35.52%) +6. Test entry #403: Optimization Code-review related data with random seed 25707 // (similarity score: 35.52%) +7. Test entry #72: Node Optimization related data with random seed 10019 // (similarity score: 34.85%) +8. Test entry #191: Optimization Threading related data with random seed 15850 // (similarity score: 33.38%) +9. Test entry #466: UML Optimization related data with random seed 21437 // (similarity score: 32.36%) +10. Test entry #437: Microservices Optimization related data with random seed 8916 // (similarity score: 31.30%) +⏱️ Get operation took: 1028 ms +--------------------------------------------------- +[Step 100/500] Processing... +Getting data: 'Diagram' +1. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 44.83%) +2. Test entry #423: Diagram RESTful related data with random seed 14813 // (similarity score: 44.09%) +3. Test entry #150: Diagram Integration-test related data with random seed 19409 // (similarity score: 43.81%) +4. Test entry #9: Diagram Flowchart related data with random seed 14766 // (similarity score: 43.28%) +5. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 40.49%) +6. Test entry #390: Documentation Diagram related data with random seed 8258 // (similarity score: 39.71%) +7. Test entry #64: Diagram NoSQL related data with random seed 3428 // (similarity score: 36.31%) +8. Test entry #161: Orchestration Diagram related data with random seed 20102 // (similarity score: 32.78%) +9. Test entry #302: Load-balancing Diagram related data with random seed 1050 // (similarity score: 31.56%) +10. Test entry #430: Recovery Diagram related data with random seed 24192 // (similarity score: 29.49%) +⏱️ Get operation took: 1247 ms + +📊 [BATCH REPORT] Items 51 to 100 + -> Average Latency: 985 ms + +--------------------------------------------------- +[Step 101/500] Processing... +Getting data: 'Template' +1. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 46.79%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 41.61%) +3. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 40.40%) +4. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 38.73%) +5. Test entry #83: Template Sharding related data with random seed 32241 // (similarity score: 34.86%) +6. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 33.67%) +7. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 33.18%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 30.02%) +9. Test entry #322: ORM Template related data with random seed 28110 // (similarity score: 29.98%) +10. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 27.79%) +⏱️ Get operation took: 1028 ms +--------------------------------------------------- +[Step 102/500] Processing... +Getting data: 'DevOps' +1. Test entry #259: Pub-sub DevOps related data with random seed 23944 // (similarity score: 58.76%) +2. Test entry #212: Design-patterns DevOps related data with random seed 19247 // (similarity score: 57.73%) +3. Test entry #146: Profiling DevOps related data with random seed 14561 // (similarity score: 55.98%) +4. Test entry #46: Data-structure DevOps related data with random seed 24000 // (similarity score: 51.31%) +5. Test entry #269: Database DevOps related data with random seed 15436 // (similarity score: 45.93%) +6. Test entry #328: Node DevOps related data with random seed 7412 // (similarity score: 44.08%) +7. Test entry #207: DevOps JSON related data with random seed 15591 // (similarity score: 41.62%) +8. Test entry #271: Encryption DevOps related data with random seed 2715 // (similarity score: 40.67%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 29.13%) +10. Test entry #243: SDK Orchestration related data with random seed 30501 // (similarity score: 27.73%) +⏱️ Get operation took: 1038 ms +--------------------------------------------------- +[Step 103/500] Processing... +Getting data: 'Cloud' +1. Test entry #497: Cloud Profiling related data with random seed 11981 // (similarity score: 38.74%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 36.08%) +3. Test entry #173: Sample Cloud related data with random seed 3561 // (similarity score: 36.02%) +4. Test entry #154: Cloud Search related data with random seed 2185 // (similarity score: 35.82%) +5. Test entry #58: Cloud Index related data with random seed 16697 // (similarity score: 33.17%) +6. Test entry #122: Cloud Websocket related data with random seed 14024 // (similarity score: 31.07%) +7. Test entry #116: NoSQL Cloud related data with random seed 22178 // (similarity score: 30.67%) +8. Test entry #178: Cloud XML related data with random seed 27579 // (similarity score: 30.58%) +9. Test entry #44: Cloud Index related data with random seed 12754 // (similarity score: 30.20%) +10. Test entry #187: Documentation Cloud related data with random seed 9545 // (similarity score: 26.80%) +⏱️ Get operation took: 999 ms +--------------------------------------------------- +[Step 104/500] Processing... +Getting data: 'Throughput' +1. Test entry #481: Tutorial Throughput related data with random seed 11888 // (similarity score: 48.81%) +2. Test entry #80: Fault-tolerance Throughput related data with random seed 27910 // (similarity score: 45.02%) +3. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 44.96%) +4. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 40.15%) +5. Test entry #355: Throughput Sharding related data with random seed 6245 // (similarity score: 39.86%) +6. Test entry #6: CI/CD Throughput related data with random seed 5729 // (similarity score: 38.72%) +7. Test entry #144: Throughput ORM related data with random seed 18100 // (similarity score: 35.68%) +8. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 35.66%) +9. Test entry #335: Throughput E2E-test related data with random seed 11728 // (similarity score: 34.04%) +10. Test entry #294: ORM Throughput related data with random seed 31130 // (similarity score: 33.56%) +⏱️ Get operation took: 990 ms +--------------------------------------------------- +[Step 105/500] Processing... +Getting data: 'YAML' +1. Test entry #295: YAML Tutorial related data with random seed 7734 // (similarity score: 62.26%) +2. Test entry #12: Boilerplate YAML related data with random seed 29263 // (similarity score: 61.60%) +3. Test entry #291: YAML Performance related data with random seed 3598 // (similarity score: 57.08%) +4. Test entry #332: Performance YAML related data with random seed 23826 // (similarity score: 54.83%) +5. Test entry #412: YAML Orchestration related data with random seed 30910 // (similarity score: 53.55%) +6. Test entry #395: YAML Threading related data with random seed 20711 // (similarity score: 53.16%) +7. Test entry #424: Pull-request YAML related data with random seed 9732 // (similarity score: 52.19%) +8. Test entry #62: YAML CSV related data with random seed 16118 // (similarity score: 51.86%) +9. Test entry #185: Mocking YAML related data with random seed 11811 // (similarity score: 51.82%) +10. Test entry #57: YAML JSON related data with random seed 19740 // (similarity score: 50.92%) +⏱️ Get operation took: 1039 ms +--------------------------------------------------- +[Step 106/500] Processing... +Getting data: 'Design-patterns' +1. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 63.80%) +2. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 62.09%) +3. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 61.76%) +4. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 57.41%) +5. Test entry #115: UX Design-patterns related data with random seed 14554 // (similarity score: 56.30%) +6. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 50.76%) +7. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 49.55%) +8. Test entry #498: Design-patterns Python related data with random seed 16866 // (similarity score: 49.27%) +9. Test entry #212: Design-patterns DevOps related data with random seed 19247 // (similarity score: 46.55%) +10. Test entry #117: Design-patterns Blockchain related data with random seed 16711 // (similarity score: 45.78%) +⏱️ Get operation took: 993 ms +--------------------------------------------------- +[Step 107/500] Processing... +Getting data: 'Message-queue' +1. Test entry #147: Message-queue Container related data with random seed 31302 // (similarity score: 55.00%) +2. Test entry #396: Sample Message-queue related data with random seed 1074 // (similarity score: 54.96%) +3. Test entry #445: Message-queue Consensus related data with random seed 22969 // (similarity score: 51.62%) +4. Test entry #447: Message-queue Unit-test related data with random seed 12353 // (similarity score: 48.23%) +5. Test entry #225: Encryption Message-queue related data with random seed 3677 // (similarity score: 45.23%) +6. Test entry #264: SQL Message-queue related data with random seed 315 // (similarity score: 44.77%) +7. Test entry #56: Replication Message-queue related data with random seed 23394 // (similarity score: 44.68%) +8. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 30.91%) +9. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 27.49%) +10. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 24.44%) +⏱️ Get operation took: 992 ms +--------------------------------------------------- +[Step 108/500] Processing... +Getting data: 'Encryption' +1. Test entry #382: Encryption Algorithm related data with random seed 9600 // (similarity score: 49.22%) +2. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 48.64%) +3. Test entry #315: Encryption Best-practices related data with random seed 23350 // (similarity score: 48.26%) +4. Test entry #123: Encryption Alerting related data with random seed 20180 // (similarity score: 44.90%) +5. Test entry #65: Encryption Container related data with random seed 20902 // (similarity score: 44.62%) +6. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 43.65%) +7. Test entry #480: Encryption Version-control related data with random seed 3281 // (similarity score: 43.50%) +8. Test entry #331: Encryption Sample related data with random seed 30916 // (similarity score: 43.06%) +9. Test entry #465: Encryption Vector related data with random seed 11764 // (similarity score: 42.15%) +10. Test entry #225: Encryption Message-queue related data with random seed 3677 // (similarity score: 41.11%) +⏱️ Get operation took: 949 ms +--------------------------------------------------- +[Step 109/500] Processing... +Getting data: 'Integration-test' +1. Test entry #464: Example Integration-test related data with random seed 5874 // (similarity score: 51.54%) +2. Test entry #63: Testing Integration-test related data with random seed 21446 // (similarity score: 48.72%) +3. Test entry #150: Diagram Integration-test related data with random seed 19409 // (similarity score: 41.04%) +4. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 40.67%) +5. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 40.11%) +6. Test entry #455: E2E-test Integration-test related data with random seed 4838 // (similarity score: 39.24%) +7. Test entry #485: Integration-test Pub-sub related data with random seed 29641 // (similarity score: 38.47%) +8. Test entry #247: UML Integration-test related data with random seed 29544 // (similarity score: 38.16%) +9. Test entry #307: NoSQL Integration-test related data with random seed 5046 // (similarity score: 33.52%) +10. Test entry #92: Websocket Integration-test related data with random seed 32561 // (similarity score: 27.10%) +⏱️ Get operation took: 1054 ms +--------------------------------------------------- +[Step 110/500] Processing... +Getting data: 'Stubbing' +1. Test entry #368: Stubbing Resilient related data with random seed 618 // (similarity score: 56.85%) +2. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 52.11%) +3. Test entry #406: Debugging Stubbing related data with random seed 28773 // (similarity score: 52.05%) +4. Test entry #18: UX Stubbing related data with random seed 14060 // (similarity score: 50.48%) +5. Test entry #2: Database Stubbing related data with random seed 28361 // (similarity score: 48.35%) +6. Test entry #263: CLI Stubbing related data with random seed 13486 // (similarity score: 46.44%) +7. Test entry #53: Blockchain Stubbing related data with random seed 10771 // (similarity score: 46.20%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 46.19%) +9. Test entry #317: Stubbing CI/CD related data with random seed 3418 // (similarity score: 43.34%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 40.32%) +⏱️ Get operation took: 1093 ms +--------------------------------------------------- +[Step 111/500] Processing... +Getting data: 'Mocking' +1. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 53.76%) +2. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 52.01%) +3. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 50.43%) +4. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 49.94%) +5. Test entry #7: Mocking Parallelism related data with random seed 19010 // (similarity score: 48.48%) +6. Test entry #54: Concurrency Mocking related data with random seed 20846 // (similarity score: 45.54%) +7. Test entry #330: Boilerplate Mocking related data with random seed 5771 // (similarity score: 45.53%) +8. Test entry #190: Mocking Network related data with random seed 32119 // (similarity score: 44.81%) +9. Test entry #29: Mocking Network related data with random seed 20303 // (similarity score: 44.44%) +10. Test entry #185: Mocking YAML related data with random seed 11811 // (similarity score: 42.85%) +⏱️ Get operation took: 1139 ms +--------------------------------------------------- +[Step 112/500] Processing... +Getting data: 'Security' +1. Test entry #411: Security Synchronous related data with random seed 11940 // (similarity score: 37.71%) +2. Test entry #174: Best-practices Security related data with random seed 18653 // (similarity score: 35.63%) +3. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 29.96%) +4. Test entry #268: GUI Security related data with random seed 8853 // (similarity score: 27.74%) +5. Test entry #315: Encryption Best-practices related data with random seed 23350 // (similarity score: 27.15%) +6. Test entry #123: Encryption Alerting related data with random seed 20180 // (similarity score: 25.45%) +7. Test entry #234: Security CI/CD related data with random seed 25834 // (similarity score: 25.03%) +8. Test entry #149: Encryption Logging related data with random seed 27963 // (similarity score: 24.16%) +9. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 24.08%) +10. Test entry #382: Encryption Algorithm related data with random seed 9600 // (similarity score: 23.77%) +⏱️ Get operation took: 994 ms +--------------------------------------------------- +[Step 113/500] Processing... +Getting data: 'Encryption' +1. Test entry #382: Encryption Algorithm related data with random seed 9600 // (similarity score: 49.22%) +2. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 48.64%) +3. Test entry #315: Encryption Best-practices related data with random seed 23350 // (similarity score: 48.26%) +4. Test entry #123: Encryption Alerting related data with random seed 20180 // (similarity score: 44.90%) +5. Test entry #65: Encryption Container related data with random seed 20902 // (similarity score: 44.62%) +6. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 43.65%) +7. Test entry #480: Encryption Version-control related data with random seed 3281 // (similarity score: 43.50%) +8. Test entry #331: Encryption Sample related data with random seed 30916 // (similarity score: 43.06%) +9. Test entry #465: Encryption Vector related data with random seed 11764 // (similarity score: 42.15%) +10. Test entry #225: Encryption Message-queue related data with random seed 3677 // (similarity score: 41.11%) +⏱️ Get operation took: 897 ms +--------------------------------------------------- +[Step 114/500] Processing... +Getting data: 'Vector' +1. Test entry #325: Vector UX related data with random seed 2934 // (similarity score: 40.50%) +2. Test entry #444: Vector Debugging related data with random seed 5646 // (similarity score: 36.83%) +3. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 36.57%) +4. Test entry #341: Node Vector related data with random seed 30034 // (similarity score: 36.54%) +5. Test entry #334: Sharding Vector related data with random seed 29020 // (similarity score: 33.49%) +6. Test entry #499: Vector CLI related data with random seed 18419 // (similarity score: 33.05%) +7. Test entry #433: Vector E2E-test related data with random seed 22356 // (similarity score: 31.29%) +8. Test entry #461: Vector Pub-sub related data with random seed 31094 // (similarity score: 31.17%) +9. Test entry #100: Latency Vector related data with random seed 28112 // (similarity score: 30.14%) +10. Test entry #385: ORM Vector related data with random seed 277 // (similarity score: 29.36%) +⏱️ Get operation took: 999 ms +--------------------------------------------------- +[Step 115/500] Processing... +Getting data: 'Synchronous' +1. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 42.32%) +2. Test entry #287: Example Synchronous related data with random seed 18676 // (similarity score: 40.82%) +3. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 32.63%) +4. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 32.34%) +5. Test entry #411: Security Synchronous related data with random seed 11940 // (similarity score: 31.90%) +6. Test entry #258: XML Synchronous related data with random seed 9079 // (similarity score: 29.62%) +7. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 29.18%) +8. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 28.48%) +9. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 27.98%) +10. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 27.69%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 116/500] Processing... +Getting data: 'Unit-test' +1. Test entry #232: Tutorial Unit-test related data with random seed 11672 // (similarity score: 51.61%) +2. Test entry #448: Tutorial Unit-test related data with random seed 7829 // (similarity score: 51.60%) +3. Test entry #186: Tutorial Unit-test related data with random seed 2934 // (similarity score: 48.49%) +4. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 47.92%) +5. Test entry #233: Unit-test Container related data with random seed 19623 // (similarity score: 46.29%) +6. Test entry #442: Unit-test Framework related data with random seed 4836 // (similarity score: 45.43%) +7. Test entry #477: Unit-test CLI related data with random seed 17091 // (similarity score: 45.02%) +8. Test entry #120: Algorithm Unit-test related data with random seed 14608 // (similarity score: 44.87%) +9. Test entry #422: Unit-test Example related data with random seed 16994 // (similarity score: 44.07%) +10. Test entry #16: Node Unit-test related data with random seed 4007 // (similarity score: 41.87%) +⏱️ Get operation took: 996 ms +--------------------------------------------------- +[Step 117/500] Processing... +Getting data: 'Orchestration' +1. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 55.54%) +2. Test entry #309: Event-driven Orchestration related data with random seed 21023 // (similarity score: 51.76%) +3. Test entry #161: Orchestration Diagram related data with random seed 20102 // (similarity score: 50.18%) +4. Test entry #337: Algorithm Orchestration related data with random seed 15318 // (similarity score: 47.14%) +5. Test entry #77: Orchestration Code-review related data with random seed 28098 // (similarity score: 46.32%) +6. Test entry #163: Network Orchestration related data with random seed 17906 // (similarity score: 44.84%) +7. Test entry #275: Framework Orchestration related data with random seed 12664 // (similarity score: 44.25%) +8. Test entry #483: Orchestration Microservices related data with random seed 31494 // (similarity score: 42.41%) +9. Test entry #195: API Orchestration related data with random seed 17599 // (similarity score: 41.25%) +10. Test entry #412: YAML Orchestration related data with random seed 30910 // (similarity score: 40.32%) +⏱️ Get operation took: 952 ms +--------------------------------------------------- +[Step 118/500] Processing... +Getting data: 'E2E-test' +1. Test entry #455: E2E-test Integration-test related data with random seed 4838 // (similarity score: 65.81%) +2. Test entry #241: Alerting E2E-test related data with random seed 11551 // (similarity score: 63.85%) +3. Test entry #335: Throughput E2E-test related data with random seed 11728 // (similarity score: 63.35%) +4. Test entry #38: Profiling E2E-test related data with random seed 30992 // (similarity score: 63.16%) +5. Test entry #278: UML E2E-test related data with random seed 2723 // (similarity score: 61.63%) +6. Test entry #433: Vector E2E-test related data with random seed 22356 // (similarity score: 60.28%) +7. Test entry #304: E2E-test Unit-test related data with random seed 19213 // (similarity score: 59.91%) +8. Test entry #104: Backup E2E-test related data with random seed 3184 // (similarity score: 55.45%) +9. Test entry #414: GraphQL E2E-test related data with random seed 14354 // (similarity score: 53.19%) +10. Test entry #102: ERD ERD related data with random seed 20954 // (similarity score: 40.31%) +⏱️ Get operation took: 947 ms +--------------------------------------------------- +[Step 119/500] Processing... +Getting data: 'Design-patterns' +1. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 63.80%) +2. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 62.09%) +3. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 61.76%) +4. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 57.41%) +5. Test entry #115: UX Design-patterns related data with random seed 14554 // (similarity score: 56.30%) +6. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 50.76%) +7. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 49.55%) +8. Test entry #498: Design-patterns Python related data with random seed 16866 // (similarity score: 49.27%) +9. Test entry #212: Design-patterns DevOps related data with random seed 19247 // (similarity score: 46.55%) +10. Test entry #117: Design-patterns Blockchain related data with random seed 16711 // (similarity score: 45.78%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 120/500] Processing... +Getting data: 'Alerting' +1. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 51.72%) +2. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 44.93%) +3. Test entry #371: Code-review Alerting related data with random seed 6709 // (similarity score: 40.15%) +4. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 40.12%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 35.73%) +6. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 35.52%) +7. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 34.88%) +8. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 34.33%) +9. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 33.48%) +10. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 33.22%) +⏱️ Get operation took: 948 ms +--------------------------------------------------- +[Step 121/500] Processing... +Getting data: 'Algorithm' +1. Test entry #248: Algorithm Data-structure related data with random seed 18077 // (similarity score: 46.27%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 38.05%) +3. Test entry #392: Algorithm CSV related data with random seed 18964 // (similarity score: 36.06%) +4. Test entry #79: Algorithm Asynchronous related data with random seed 18478 // (similarity score: 34.57%) +5. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 33.78%) +6. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 33.09%) +7. Test entry #240: Algorithm Asynchronous related data with random seed 6330 // (similarity score: 31.98%) +8. Test entry #227: Framework Algorithm related data with random seed 19206 // (similarity score: 31.96%) +9. Test entry #337: Algorithm Orchestration related data with random seed 15318 // (similarity score: 31.60%) +10. Test entry #120: Algorithm Unit-test related data with random seed 14608 // (similarity score: 30.48%) +⏱️ Get operation took: 947 ms +--------------------------------------------------- +[Step 122/500] Processing... +Getting data: 'Blockchain' +1. Test entry #117: Design-patterns Blockchain related data with random seed 16711 // (similarity score: 41.64%) +2. Test entry #231: Blockchain Data-structure related data with random seed 2611 // (similarity score: 38.85%) +3. Test entry #61: Blockchain Microservices related data with random seed 13172 // (similarity score: 38.28%) +4. Test entry #171: Blockchain Data-structure related data with random seed 29630 // (similarity score: 33.73%) +5. Test entry #237: UX Blockchain related data with random seed 8734 // (similarity score: 31.74%) +6. Test entry #360: GUI Blockchain related data with random seed 10277 // (similarity score: 31.69%) +7. Test entry #67: Debugging Blockchain related data with random seed 23179 // (similarity score: 30.30%) +8. Test entry #229: Orchestration Blockchain related data with random seed 2138 // (similarity score: 30.22%) +9. Test entry #348: Blockchain SDK related data with random seed 25254 // (similarity score: 29.17%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 27.19%) +⏱️ Get operation took: 1009 ms +--------------------------------------------------- +[Step 123/500] Processing... +Getting data: 'Performance' +1. Test entry #452: Event-driven Performance related data with random seed 26374 // (similarity score: 39.07%) +2. Test entry #451: Library Performance related data with random seed 31802 // (similarity score: 35.13%) +3. Test entry #192: Performance Threading related data with random seed 6244 // (similarity score: 33.15%) +4. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 32.63%) +5. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 31.90%) +6. Test entry #481: Tutorial Throughput related data with random seed 11888 // (similarity score: 31.89%) +7. Test entry #273: Data-structure Performance related data with random seed 1219 // (similarity score: 31.74%) +8. Test entry #394: Sharding Performance related data with random seed 1607 // (similarity score: 31.69%) +9. Test entry #332: Performance YAML related data with random seed 23826 // (similarity score: 30.69%) +10. Test entry #291: YAML Performance related data with random seed 3598 // (similarity score: 28.81%) +⏱️ Get operation took: 1084 ms +--------------------------------------------------- +[Step 124/500] Processing... +Getting data: 'Concurrency' +1. Test entry #164: Threading Parallelism related data with random seed 13640 // (similarity score: 47.53%) +2. Test entry #441: Code-review Concurrency related data with random seed 21085 // (similarity score: 47.35%) +3. Test entry #405: Throughput Concurrency related data with random seed 5148 // (similarity score: 46.52%) +4. Test entry #54: Concurrency Mocking related data with random seed 20846 // (similarity score: 42.38%) +5. Test entry #26: Concurrency NoSQL related data with random seed 4796 // (similarity score: 42.23%) +6. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 41.66%) +7. Test entry #350: XML Concurrency related data with random seed 12519 // (similarity score: 41.43%) +8. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 41.41%) +9. Test entry #97: Testing Parallelism related data with random seed 6045 // (similarity score: 37.64%) +10. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 36.83%) +⏱️ Get operation took: 998 ms +--------------------------------------------------- +[Step 125/500] Processing... +Getting data: 'Tutorial' +1. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 43.89%) +2. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 38.60%) +3. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 34.87%) +4. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 34.64%) +5. Test entry #279: Tutorial SDK related data with random seed 4966 // (similarity score: 33.42%) +6. Test entry #303: XML Tutorial related data with random seed 29551 // (similarity score: 32.40%) +7. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 32.34%) +8. Test entry #125: GraphQL Tutorial related data with random seed 2640 // (similarity score: 30.17%) +9. Test entry #448: Tutorial Unit-test related data with random seed 7829 // (similarity score: 29.62%) +10. Test entry #295: YAML Tutorial related data with random seed 7734 // (similarity score: 29.59%) +⏱️ Get operation took: 1173 ms +--------------------------------------------------- +[Step 126/500] Processing... +Getting data: 'CI/CD' +1. Test entry #324: Boilerplate CI/CD related data with random seed 11105 // (similarity score: 54.77%) +2. Test entry #479: CI/CD Data-structure related data with random seed 13652 // (similarity score: 54.51%) +3. Test entry #6: CI/CD Throughput related data with random seed 5729 // (similarity score: 53.92%) +4. Test entry #472: Library CI/CD related data with random seed 18363 // (similarity score: 53.79%) +5. Test entry #450: CI/CD Data-structure related data with random seed 9170 // (similarity score: 52.85%) +6. Test entry #317: Stubbing CI/CD related data with random seed 3418 // (similarity score: 52.74%) +7. Test entry #234: Security CI/CD related data with random seed 25834 // (similarity score: 52.50%) +8. Test entry #326: CI/CD Sample related data with random seed 22568 // (similarity score: 52.47%) +9. Test entry #408: CI/CD Search related data with random seed 26877 // (similarity score: 51.42%) +10. Test entry #489: Load-balancing CI/CD related data with random seed 9733 // (similarity score: 49.63%) +⏱️ Get operation took: 1123 ms +--------------------------------------------------- +[Step 127/500] Processing... +Getting data: 'Framework' +1. Test entry #227: Framework Algorithm related data with random seed 19206 // (similarity score: 31.85%) +2. Test entry #275: Framework Orchestration related data with random seed 12664 // (similarity score: 31.36%) +3. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 29.40%) +4. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 28.78%) +5. Test entry #386: Framework Testing related data with random seed 14924 // (similarity score: 28.77%) +6. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 26.61%) +7. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 24.69%) +8. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 20.97%) +9. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 20.94%) +10. Test entry #228: Branching Framework related data with random seed 10213 // (similarity score: 19.21%) +⏱️ Get operation took: 1035 ms +--------------------------------------------------- +[Step 128/500] Processing... +Getting data: 'DevOps' +1. Test entry #259: Pub-sub DevOps related data with random seed 23944 // (similarity score: 58.76%) +2. Test entry #212: Design-patterns DevOps related data with random seed 19247 // (similarity score: 57.73%) +3. Test entry #146: Profiling DevOps related data with random seed 14561 // (similarity score: 55.98%) +4. Test entry #46: Data-structure DevOps related data with random seed 24000 // (similarity score: 51.31%) +5. Test entry #269: Database DevOps related data with random seed 15436 // (similarity score: 45.93%) +6. Test entry #328: Node DevOps related data with random seed 7412 // (similarity score: 44.08%) +7. Test entry #207: DevOps JSON related data with random seed 15591 // (similarity score: 41.62%) +8. Test entry #271: Encryption DevOps related data with random seed 2715 // (similarity score: 40.67%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 29.13%) +10. Test entry #243: SDK Orchestration related data with random seed 30501 // (similarity score: 27.73%) +⏱️ Get operation took: 1051 ms +--------------------------------------------------- +[Step 129/500] Processing... +Getting data: 'Concurrency' +1. Test entry #164: Threading Parallelism related data with random seed 13640 // (similarity score: 47.53%) +2. Test entry #441: Code-review Concurrency related data with random seed 21085 // (similarity score: 47.35%) +3. Test entry #405: Throughput Concurrency related data with random seed 5148 // (similarity score: 46.52%) +4. Test entry #54: Concurrency Mocking related data with random seed 20846 // (similarity score: 42.38%) +5. Test entry #26: Concurrency NoSQL related data with random seed 4796 // (similarity score: 42.23%) +6. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 41.66%) +7. Test entry #350: XML Concurrency related data with random seed 12519 // (similarity score: 41.43%) +8. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 41.41%) +9. Test entry #97: Testing Parallelism related data with random seed 6045 // (similarity score: 37.64%) +10. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 36.83%) +⏱️ Get operation took: 1050 ms +--------------------------------------------------- +[Step 130/500] Processing... +Getting data: 'Node' +1. Test entry #114: Node Optimization related data with random seed 20946 // (similarity score: 44.96%) +2. Test entry #72: Node Optimization related data with random seed 10019 // (similarity score: 41.61%) +3. Test entry #246: Node Documentation related data with random seed 8868 // (similarity score: 39.92%) +4. Test entry #175: Node UX related data with random seed 24087 // (similarity score: 39.28%) +5. Test entry #341: Node Vector related data with random seed 30034 // (similarity score: 39.03%) +6. Test entry #93: Node Documentation related data with random seed 10928 // (similarity score: 38.72%) +7. Test entry #11: Code-review Node related data with random seed 32027 // (similarity score: 35.38%) +8. Test entry #398: Backup Node related data with random seed 21714 // (similarity score: 34.34%) +9. Test entry #328: Node DevOps related data with random seed 7412 // (similarity score: 30.61%) +10. Test entry #16: Node Unit-test related data with random seed 4007 // (similarity score: 29.56%) +⏱️ Get operation took: 1102 ms +--------------------------------------------------- +[Step 131/500] Processing... +Getting data: 'Logging' +1. Test entry #168: Event-driven Logging related data with random seed 22637 // (similarity score: 52.57%) +2. Test entry #172: Logging Architecture related data with random seed 7305 // (similarity score: 49.56%) +3. Test entry #222: Logging Library related data with random seed 32039 // (similarity score: 46.22%) +4. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 45.44%) +5. Test entry #98: Asynchronous Logging related data with random seed 30841 // (similarity score: 45.06%) +6. Test entry #476: Logging API related data with random seed 8268 // (similarity score: 42.63%) +7. Test entry #245: Logging Testing related data with random seed 32263 // (similarity score: 42.44%) +8. Test entry #399: Logging Encryption related data with random seed 25211 // (similarity score: 37.28%) +9. Test entry #149: Encryption Logging related data with random seed 27963 // (similarity score: 35.73%) +10. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 28.02%) +⏱️ Get operation took: 1034 ms +--------------------------------------------------- +[Step 132/500] Processing... +Getting data: 'SQL' +1. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 41.94%) +2. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 41.80%) +3. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 37.96%) +4. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 37.76%) +5. Test entry #152: SQL Load-balancing related data with random seed 9262 // (similarity score: 37.07%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 36.37%) +7. Test entry #137: SQL Parallelism related data with random seed 8672 // (similarity score: 35.23%) +8. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 32.86%) +9. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 30.91%) +10. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 30.51%) +⏱️ Get operation took: 1002 ms +--------------------------------------------------- +[Step 133/500] Processing... +Getting data: 'Boilerplate' +1. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 41.22%) +2. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 36.21%) +3. Test entry #280: Boilerplate Search related data with random seed 32715 // (similarity score: 30.77%) +4. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 30.65%) +5. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 29.65%) +6. Test entry #12: Boilerplate YAML related data with random seed 29263 // (similarity score: 29.38%) +7. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 29.35%) +8. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 28.89%) +9. Test entry #443: Threading Boilerplate related data with random seed 23641 // (similarity score: 28.42%) +10. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 27.41%) +⏱️ Get operation took: 1038 ms +--------------------------------------------------- +[Step 134/500] Processing... +Getting data: 'UML' +1. Test entry #466: UML Optimization related data with random seed 21437 // (similarity score: 51.87%) +2. Test entry #66: UX UML related data with random seed 3512 // (similarity score: 50.20%) +3. Test entry #17: Tutorial UML related data with random seed 12772 // (similarity score: 49.59%) +4. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 47.11%) +5. Test entry #189: UML Network related data with random seed 6297 // (similarity score: 45.74%) +6. Test entry #247: UML Integration-test related data with random seed 29544 // (similarity score: 45.62%) +7. Test entry #474: UML Latency related data with random seed 18011 // (similarity score: 45.46%) +8. Test entry #377: UML GUI related data with random seed 13474 // (similarity score: 44.68%) +9. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 44.27%) +10. Test entry #310: RESTful UML related data with random seed 5709 // (similarity score: 43.88%) +⏱️ Get operation took: 1096 ms +--------------------------------------------------- +[Step 135/500] Processing... +Getting data: 'CI/CD' +1. Test entry #324: Boilerplate CI/CD related data with random seed 11105 // (similarity score: 54.77%) +2. Test entry #479: CI/CD Data-structure related data with random seed 13652 // (similarity score: 54.51%) +3. Test entry #6: CI/CD Throughput related data with random seed 5729 // (similarity score: 53.92%) +4. Test entry #472: Library CI/CD related data with random seed 18363 // (similarity score: 53.79%) +5. Test entry #450: CI/CD Data-structure related data with random seed 9170 // (similarity score: 52.85%) +6. Test entry #317: Stubbing CI/CD related data with random seed 3418 // (similarity score: 52.74%) +7. Test entry #234: Security CI/CD related data with random seed 25834 // (similarity score: 52.50%) +8. Test entry #326: CI/CD Sample related data with random seed 22568 // (similarity score: 52.47%) +9. Test entry #408: CI/CD Search related data with random seed 26877 // (similarity score: 51.42%) +10. Test entry #489: Load-balancing CI/CD related data with random seed 9733 // (similarity score: 49.63%) +⏱️ Get operation took: 1086 ms +--------------------------------------------------- +[Step 136/500] Processing... +Getting data: 'Version-control' +1. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 57.25%) +2. Test entry #338: Version-control Monitoring related data with random seed 11905 // (similarity score: 50.77%) +3. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 49.77%) +4. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 48.91%) +5. Test entry #340: Version-control Sample related data with random seed 23846 // (similarity score: 47.51%) +6. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 46.96%) +7. Test entry #155: Sharding Version-control related data with random seed 4158 // (similarity score: 43.99%) +8. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 43.17%) +9. Test entry #429: Version-control Code-review related data with random seed 14304 // (similarity score: 40.29%) +10. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 36.46%) +⏱️ Get operation took: 1041 ms +--------------------------------------------------- +[Step 137/500] Processing... +Getting data: 'Design-patterns' +1. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 63.80%) +2. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 62.09%) +3. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 61.76%) +4. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 57.41%) +5. Test entry #115: UX Design-patterns related data with random seed 14554 // (similarity score: 56.30%) +6. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 50.76%) +7. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 49.55%) +8. Test entry #498: Design-patterns Python related data with random seed 16866 // (similarity score: 49.27%) +9. Test entry #212: Design-patterns DevOps related data with random seed 19247 // (similarity score: 46.55%) +10. Test entry #117: Design-patterns Blockchain related data with random seed 16711 // (similarity score: 45.78%) +⏱️ Get operation took: 1005 ms +--------------------------------------------------- +[Step 138/500] Processing... +Getting data: 'GUI' +1. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 43.56%) +2. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 39.19%) +3. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 38.99%) +4. Test entry #268: GUI Security related data with random seed 8853 // (similarity score: 38.70%) +5. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 37.90%) +6. Test entry #90: Network GUI related data with random seed 6917 // (similarity score: 37.45%) +7. Test entry #400: GUI Container related data with random seed 26968 // (similarity score: 36.86%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 36.84%) +9. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 35.99%) +10. Test entry #87: GUI Encryption related data with random seed 24527 // (similarity score: 35.25%) +⏱️ Get operation took: 987 ms +--------------------------------------------------- +[Step 139/500] Processing... +Getting data: 'Code-review' +1. Test entry #297: Event-driven Code-review related data with random seed 23440 // (similarity score: 58.60%) +2. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 56.39%) +3. Test entry #371: Code-review Alerting related data with random seed 6709 // (similarity score: 56.16%) +4. Test entry #478: Code-review Index related data with random seed 24809 // (similarity score: 54.70%) +5. Test entry #108: Code-review UI related data with random seed 5301 // (similarity score: 54.03%) +6. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 53.59%) +7. Test entry #429: Version-control Code-review related data with random seed 14304 // (similarity score: 49.96%) +8. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 49.10%) +9. Test entry #11: Code-review Node related data with random seed 32027 // (similarity score: 48.29%) +10. Test entry #129: XML Code-review related data with random seed 19652 // (similarity score: 44.99%) +⏱️ Get operation took: 999 ms +--------------------------------------------------- +[Step 140/500] Processing... +Getting data: 'ERD' +1. Test entry #102: ERD ERD related data with random seed 20954 // (similarity score: 51.00%) +2. Test entry #138: Boilerplate ERD related data with random seed 17973 // (similarity score: 46.57%) +3. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 44.39%) +4. Test entry #379: Code-review ERD related data with random seed 18047 // (similarity score: 43.54%) +5. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 42.68%) +6. Test entry #135: UI ERD related data with random seed 9172 // (similarity score: 41.81%) +7. Test entry #378: ERD Parallelism related data with random seed 25512 // (similarity score: 39.79%) +8. Test entry #252: ERD Parallelism related data with random seed 23325 // (similarity score: 39.25%) +9. Test entry #296: ERD RESTful related data with random seed 19760 // (similarity score: 36.51%) +10. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 23.26%) +⏱️ Get operation took: 992 ms +--------------------------------------------------- +[Step 141/500] Processing... +Getting data: 'YAML' +1. Test entry #295: YAML Tutorial related data with random seed 7734 // (similarity score: 62.26%) +2. Test entry #12: Boilerplate YAML related data with random seed 29263 // (similarity score: 61.60%) +3. Test entry #291: YAML Performance related data with random seed 3598 // (similarity score: 57.08%) +4. Test entry #332: Performance YAML related data with random seed 23826 // (similarity score: 54.83%) +5. Test entry #412: YAML Orchestration related data with random seed 30910 // (similarity score: 53.55%) +6. Test entry #395: YAML Threading related data with random seed 20711 // (similarity score: 53.16%) +7. Test entry #424: Pull-request YAML related data with random seed 9732 // (similarity score: 52.19%) +8. Test entry #62: YAML CSV related data with random seed 16118 // (similarity score: 51.86%) +9. Test entry #185: Mocking YAML related data with random seed 11811 // (similarity score: 51.82%) +10. Test entry #57: YAML JSON related data with random seed 19740 // (similarity score: 50.92%) +⏱️ Get operation took: 948 ms +--------------------------------------------------- +[Step 142/500] Processing... +Getting data: 'Fault-tolerance' +1. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 62.09%) +2. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 57.76%) +3. Test entry #80: Fault-tolerance Throughput related data with random seed 27910 // (similarity score: 54.33%) +4. Test entry #95: Library Fault-tolerance related data with random seed 987 // (similarity score: 47.09%) +5. Test entry #197: Fault-tolerance ORM related data with random seed 21855 // (similarity score: 45.57%) +6. Test entry #397: Search Fault-tolerance related data with random seed 20761 // (similarity score: 44.70%) +7. Test entry #428: Fault-tolerance ORM related data with random seed 17392 // (similarity score: 44.01%) +8. Test entry #24: Replication Fault-tolerance related data with random seed 9951 // (similarity score: 41.75%) +9. Test entry #91: Fault-tolerance ORM related data with random seed 3476 // (similarity score: 41.55%) +10. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 39.60%) +⏱️ Get operation took: 1082 ms +--------------------------------------------------- +[Step 143/500] Processing... +Getting data: 'Recovery' +1. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 39.10%) +2. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 37.60%) +3. Test entry #306: Recovery Library related data with random seed 21298 // (similarity score: 36.53%) +4. Test entry #430: Recovery Diagram related data with random seed 24192 // (similarity score: 36.20%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 35.07%) +6. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 33.87%) +7. Test entry #367: SDK Recovery related data with random seed 20345 // (similarity score: 33.28%) +8. Test entry #88: Recovery JSON related data with random seed 28249 // (similarity score: 32.39%) +9. Test entry #321: Recovery Git related data with random seed 3971 // (similarity score: 32.26%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 30.40%) +⏱️ Get operation took: 1049 ms +--------------------------------------------------- +[Step 144/500] Processing... +Getting data: 'Logging' +1. Test entry #168: Event-driven Logging related data with random seed 22637 // (similarity score: 52.57%) +2. Test entry #172: Logging Architecture related data with random seed 7305 // (similarity score: 49.56%) +3. Test entry #222: Logging Library related data with random seed 32039 // (similarity score: 46.22%) +4. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 45.44%) +5. Test entry #98: Asynchronous Logging related data with random seed 30841 // (similarity score: 45.06%) +6. Test entry #476: Logging API related data with random seed 8268 // (similarity score: 42.63%) +7. Test entry #245: Logging Testing related data with random seed 32263 // (similarity score: 42.44%) +8. Test entry #399: Logging Encryption related data with random seed 25211 // (similarity score: 37.28%) +9. Test entry #149: Encryption Logging related data with random seed 27963 // (similarity score: 35.73%) +10. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 28.02%) +⏱️ Get operation took: 1042 ms +--------------------------------------------------- +[Step 145/500] Processing... +Getting data: 'CI/CD' +1. Test entry #324: Boilerplate CI/CD related data with random seed 11105 // (similarity score: 54.77%) +2. Test entry #479: CI/CD Data-structure related data with random seed 13652 // (similarity score: 54.51%) +3. Test entry #6: CI/CD Throughput related data with random seed 5729 // (similarity score: 53.92%) +4. Test entry #472: Library CI/CD related data with random seed 18363 // (similarity score: 53.79%) +5. Test entry #450: CI/CD Data-structure related data with random seed 9170 // (similarity score: 52.85%) +6. Test entry #317: Stubbing CI/CD related data with random seed 3418 // (similarity score: 52.74%) +7. Test entry #234: Security CI/CD related data with random seed 25834 // (similarity score: 52.50%) +8. Test entry #326: CI/CD Sample related data with random seed 22568 // (similarity score: 52.47%) +9. Test entry #408: CI/CD Search related data with random seed 26877 // (similarity score: 51.42%) +10. Test entry #489: Load-balancing CI/CD related data with random seed 9733 // (similarity score: 49.63%) +⏱️ Get operation took: 994 ms +--------------------------------------------------- +[Step 146/500] Processing... +Getting data: 'Optimization' +1. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 39.36%) +2. Test entry #114: Node Optimization related data with random seed 20946 // (similarity score: 37.49%) +3. Test entry #281: Optimization Resilient related data with random seed 24028 // (similarity score: 37.11%) +4. Test entry #27: Optimization Load-balancing related data with random seed 27403 // (similarity score: 35.53%) +5. Test entry #139: Load-balancing Optimization related data with random seed 31776 // (similarity score: 35.52%) +6. Test entry #403: Optimization Code-review related data with random seed 25707 // (similarity score: 35.52%) +7. Test entry #72: Node Optimization related data with random seed 10019 // (similarity score: 34.85%) +8. Test entry #191: Optimization Threading related data with random seed 15850 // (similarity score: 33.38%) +9. Test entry #466: UML Optimization related data with random seed 21437 // (similarity score: 32.36%) +10. Test entry #437: Microservices Optimization related data with random seed 8916 // (similarity score: 31.30%) +⏱️ Get operation took: 1030 ms +--------------------------------------------------- +[Step 147/500] Processing... +Getting data: 'Best-practices' +1. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 32.60%) +2. Test entry #311: Asynchronous Best-practices related data with random seed 24139 // (similarity score: 31.44%) +3. Test entry #373: NoSQL Best-practices related data with random seed 20318 // (similarity score: 31.35%) +4. Test entry #454: CSV Best-practices related data with random seed 21695 // (similarity score: 30.63%) +5. Test entry #462: Best-practices CLI related data with random seed 19922 // (similarity score: 30.42%) +6. Test entry #487: XML Best-practices related data with random seed 19330 // (similarity score: 29.42%) +7. Test entry #372: Best-practices Search related data with random seed 17128 // (similarity score: 28.31%) +8. Test entry #315: Encryption Best-practices related data with random seed 23350 // (similarity score: 27.13%) +9. Test entry #174: Best-practices Security related data with random seed 18653 // (similarity score: 25.77%) +10. Test entry #148: Threading Best-practices related data with random seed 22158 // (similarity score: 25.48%) +⏱️ Get operation took: 1020 ms +--------------------------------------------------- +[Step 148/500] Processing... +Getting data: 'Scalability' +1. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 43.17%) +2. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 40.71%) +3. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 38.07%) +4. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 37.55%) +5. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 37.23%) +6. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 35.78%) +7. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 35.62%) +8. Test entry #51: Scalability SDK related data with random seed 3268 // (similarity score: 33.46%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 32.11%) +10. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 31.32%) +⏱️ Get operation took: 1047 ms +--------------------------------------------------- +[Step 149/500] Processing... +Getting data: 'Websocket' +1. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 61.95%) +2. Test entry #92: Websocket Integration-test related data with random seed 32561 // (similarity score: 60.89%) +3. Test entry #13: Websocket Caching related data with random seed 27250 // (similarity score: 58.67%) +4. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 58.47%) +5. Test entry #122: Cloud Websocket related data with random seed 14024 // (similarity score: 57.43%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 15.29%) +7. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 14.60%) +8. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 14.35%) +9. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 14.31%) +10. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 13.59%) +⏱️ Get operation took: 1040 ms +--------------------------------------------------- +[Step 150/500] Processing... +Getting data: 'Mocking' +1. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 53.76%) +2. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 52.01%) +3. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 50.43%) +4. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 49.94%) +5. Test entry #7: Mocking Parallelism related data with random seed 19010 // (similarity score: 48.48%) +6. Test entry #54: Concurrency Mocking related data with random seed 20846 // (similarity score: 45.54%) +7. Test entry #330: Boilerplate Mocking related data with random seed 5771 // (similarity score: 45.53%) +8. Test entry #190: Mocking Network related data with random seed 32119 // (similarity score: 44.81%) +9. Test entry #29: Mocking Network related data with random seed 20303 // (similarity score: 44.44%) +10. Test entry #185: Mocking YAML related data with random seed 11811 // (similarity score: 42.85%) +⏱️ Get operation took: 1041 ms + +📊 [BATCH REPORT] Items 101 to 150 + -> Average Latency: 1021 ms + +--------------------------------------------------- +[Step 151/500] Processing... +Getting data: 'Search' +1. Test entry #75: UX Search related data with random seed 11487 // (similarity score: 42.09%) +2. Test entry #372: Best-practices Search related data with random seed 17128 // (similarity score: 41.11%) +3. Test entry #202: Search ORM related data with random seed 9657 // (similarity score: 38.48%) +4. Test entry #154: Cloud Search related data with random seed 2185 // (similarity score: 38.09%) +5. Test entry #280: Boilerplate Search related data with random seed 32715 // (similarity score: 37.50%) +6. Test entry #408: CI/CD Search related data with random seed 26877 // (similarity score: 35.64%) +7. Test entry #78: Search Git related data with random seed 20971 // (similarity score: 35.37%) +8. Test entry #397: Search Fault-tolerance related data with random seed 20761 // (similarity score: 35.23%) +9. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 29.18%) +10. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 26.45%) +⏱️ Get operation took: 1049 ms +--------------------------------------------------- +[Step 152/500] Processing... +Getting data: 'CLI' +1. Test entry #284: UX CLI related data with random seed 10670 // (similarity score: 47.12%) +2. Test entry #492: Testing CLI related data with random seed 20014 // (similarity score: 46.52%) +3. Test entry #462: Best-practices CLI related data with random seed 19922 // (similarity score: 45.07%) +4. Test entry #242: Load-balancing CLI related data with random seed 21278 // (similarity score: 44.28%) +5. Test entry #425: CLI Data-structure related data with random seed 7062 // (similarity score: 42.94%) +6. Test entry #499: Vector CLI related data with random seed 18419 // (similarity score: 41.47%) +7. Test entry #103: CLI Sharding related data with random seed 23375 // (similarity score: 40.66%) +8. Test entry #477: Unit-test CLI related data with random seed 17091 // (similarity score: 39.09%) +9. Test entry #263: CLI Stubbing related data with random seed 13486 // (similarity score: 37.57%) +10. Test entry #299: CLI Git related data with random seed 18823 // (similarity score: 34.54%) +⏱️ Get operation took: 980 ms +--------------------------------------------------- +[Step 153/500] Processing... +Getting data: 'Python' +1. Test entry #181: UX Python related data with random seed 26197 // (similarity score: 37.24%) +2. Test entry #383: Python Resilient related data with random seed 20296 // (similarity score: 35.13%) +3. Test entry #473: Index Python related data with random seed 3534 // (similarity score: 33.84%) +4. Test entry #165: Resilient Python related data with random seed 3195 // (similarity score: 33.43%) +5. Test entry #498: Design-patterns Python related data with random seed 16866 // (similarity score: 33.09%) +6. Test entry #70: Python Testing related data with random seed 5317 // (similarity score: 31.71%) +7. Test entry #194: Python Consensus related data with random seed 2579 // (similarity score: 29.30%) +8. Test entry #286: Python SDK related data with random seed 14667 // (similarity score: 29.01%) +9. Test entry #381: Python Pub-sub related data with random seed 29514 // (similarity score: 24.48%) +10. Test entry #392: Algorithm CSV related data with random seed 18964 // (similarity score: 24.38%) +⏱️ Get operation took: 999 ms +--------------------------------------------------- +[Step 154/500] Processing... +Getting data: 'SQL' +1. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 41.94%) +2. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 41.80%) +3. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 37.96%) +4. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 37.76%) +5. Test entry #152: SQL Load-balancing related data with random seed 9262 // (similarity score: 37.07%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 36.37%) +7. Test entry #137: SQL Parallelism related data with random seed 8672 // (similarity score: 35.23%) +8. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 32.86%) +9. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 30.91%) +10. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 30.51%) +⏱️ Get operation took: 1039 ms +--------------------------------------------------- +[Step 155/500] Processing... +Getting data: 'SQL' +1. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 41.94%) +2. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 41.80%) +3. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 37.96%) +4. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 37.76%) +5. Test entry #152: SQL Load-balancing related data with random seed 9262 // (similarity score: 37.07%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 36.37%) +7. Test entry #137: SQL Parallelism related data with random seed 8672 // (similarity score: 35.23%) +8. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 32.86%) +9. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 30.91%) +10. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 30.51%) +⏱️ Get operation took: 1052 ms +--------------------------------------------------- +[Step 156/500] Processing... +Getting data: 'Template' +1. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 46.79%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 41.61%) +3. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 40.40%) +4. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 38.73%) +5. Test entry #83: Template Sharding related data with random seed 32241 // (similarity score: 34.86%) +6. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 33.67%) +7. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 33.18%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 30.02%) +9. Test entry #322: ORM Template related data with random seed 28110 // (similarity score: 29.98%) +10. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 27.79%) +⏱️ Get operation took: 1029 ms +--------------------------------------------------- +[Step 157/500] Processing... +Getting data: 'Scalability' +1. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 43.17%) +2. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 40.71%) +3. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 38.07%) +4. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 37.55%) +5. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 37.23%) +6. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 35.78%) +7. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 35.62%) +8. Test entry #51: Scalability SDK related data with random seed 3268 // (similarity score: 33.46%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 32.11%) +10. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 31.32%) +⏱️ Get operation took: 905 ms +--------------------------------------------------- +[Step 158/500] Processing... +Getting data: 'Search' +1. Test entry #75: UX Search related data with random seed 11487 // (similarity score: 42.09%) +2. Test entry #372: Best-practices Search related data with random seed 17128 // (similarity score: 41.11%) +3. Test entry #202: Search ORM related data with random seed 9657 // (similarity score: 38.48%) +4. Test entry #154: Cloud Search related data with random seed 2185 // (similarity score: 38.09%) +5. Test entry #280: Boilerplate Search related data with random seed 32715 // (similarity score: 37.50%) +6. Test entry #408: CI/CD Search related data with random seed 26877 // (similarity score: 35.64%) +7. Test entry #78: Search Git related data with random seed 20971 // (similarity score: 35.37%) +8. Test entry #397: Search Fault-tolerance related data with random seed 20761 // (similarity score: 35.23%) +9. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 29.18%) +10. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 26.45%) +⏱️ Get operation took: 1037 ms +--------------------------------------------------- +[Step 159/500] Processing... +Getting data: 'Git' +1. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 26.09%) +2. Test entry #429: Version-control Code-review related data with random seed 14304 // (similarity score: 24.27%) +3. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 23.51%) +4. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 23.09%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 22.94%) +6. Test entry #338: Version-control Monitoring related data with random seed 11905 // (similarity score: 22.82%) +7. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 22.52%) +8. Test entry #155: Sharding Version-control related data with random seed 4158 // (similarity score: 21.79%) +9. Test entry #340: Version-control Sample related data with random seed 23846 // (similarity score: 21.68%) +10. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 21.45%) +⏱️ Get operation took: 999 ms +--------------------------------------------------- +[Step 160/500] Processing... +Getting data: 'Pub-sub' +1. Test entry #418: Pub-sub API related data with random seed 23035 // (similarity score: 38.66%) +2. Test entry #381: Python Pub-sub related data with random seed 29514 // (similarity score: 38.60%) +3. Test entry #28: Pull-request Pub-sub related data with random seed 15070 // (similarity score: 38.34%) +4. Test entry #362: Pub-sub SQL related data with random seed 11044 // (similarity score: 36.25%) +5. Test entry #461: Vector Pub-sub related data with random seed 31094 // (similarity score: 34.99%) +6. Test entry #259: Pub-sub DevOps related data with random seed 23944 // (similarity score: 34.61%) +7. Test entry #318: Vector Pub-sub related data with random seed 10169 // (similarity score: 33.83%) +8. Test entry #485: Integration-test Pub-sub related data with random seed 29641 // (similarity score: 31.78%) +9. Test entry #36: Threading Sharding related data with random seed 23643 // (similarity score: 14.79%) +10. Test entry #355: Throughput Sharding related data with random seed 6245 // (similarity score: 14.25%) +⏱️ Get operation took: 1092 ms +--------------------------------------------------- +[Step 161/500] Processing... +Getting data: 'Git' +1. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 26.09%) +2. Test entry #429: Version-control Code-review related data with random seed 14304 // (similarity score: 24.27%) +3. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 23.51%) +4. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 23.09%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 22.94%) +6. Test entry #338: Version-control Monitoring related data with random seed 11905 // (similarity score: 22.82%) +7. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 22.52%) +8. Test entry #155: Sharding Version-control related data with random seed 4158 // (similarity score: 21.79%) +9. Test entry #340: Version-control Sample related data with random seed 23846 // (similarity score: 21.68%) +10. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 21.45%) +⏱️ Get operation took: 988 ms +--------------------------------------------------- +[Step 162/500] Processing... +Getting data: 'E2E-test' +1. Test entry #455: E2E-test Integration-test related data with random seed 4838 // (similarity score: 65.81%) +2. Test entry #241: Alerting E2E-test related data with random seed 11551 // (similarity score: 63.85%) +3. Test entry #335: Throughput E2E-test related data with random seed 11728 // (similarity score: 63.35%) +4. Test entry #38: Profiling E2E-test related data with random seed 30992 // (similarity score: 63.16%) +5. Test entry #278: UML E2E-test related data with random seed 2723 // (similarity score: 61.63%) +6. Test entry #433: Vector E2E-test related data with random seed 22356 // (similarity score: 60.28%) +7. Test entry #304: E2E-test Unit-test related data with random seed 19213 // (similarity score: 59.91%) +8. Test entry #104: Backup E2E-test related data with random seed 3184 // (similarity score: 55.45%) +9. Test entry #414: GraphQL E2E-test related data with random seed 14354 // (similarity score: 53.19%) +10. Test entry #102: ERD ERD related data with random seed 20954 // (similarity score: 40.31%) +⏱️ Get operation took: 846 ms +--------------------------------------------------- +[Step 163/500] Processing... +Getting data: 'GraphQL' +1. Test entry #308: GraphQL GraphQL related data with random seed 25902 // (similarity score: 67.68%) +2. Test entry #125: GraphQL Tutorial related data with random seed 2640 // (similarity score: 66.75%) +3. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 64.60%) +4. Test entry #343: GraphQL UX related data with random seed 26726 // (similarity score: 62.29%) +5. Test entry #48: GraphQL Asynchronous related data with random seed 4509 // (similarity score: 60.43%) +6. Test entry #35: GraphQL YAML related data with random seed 19655 // (similarity score: 59.13%) +7. Test entry #213: GUI GraphQL related data with random seed 9145 // (similarity score: 58.32%) +8. Test entry #256: Profiling GraphQL related data with random seed 4781 // (similarity score: 58.12%) +9. Test entry #10: Framework GraphQL related data with random seed 10276 // (similarity score: 57.78%) +10. Test entry #25: GraphQL CI/CD related data with random seed 7557 // (similarity score: 56.63%) +⏱️ Get operation took: 951 ms +--------------------------------------------------- +[Step 164/500] Processing... +Getting data: 'Pull-request' +1. Test entry #438: Pull-request API related data with random seed 19781 // (similarity score: 51.53%) +2. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 50.75%) +3. Test entry #107: Resilient Pull-request related data with random seed 9028 // (similarity score: 49.75%) +4. Test entry #488: Pull-request Git related data with random seed 13085 // (similarity score: 47.13%) +5. Test entry #424: Pull-request YAML related data with random seed 9732 // (similarity score: 46.59%) +6. Test entry #288: Index Pull-request related data with random seed 4855 // (similarity score: 46.42%) +7. Test entry #28: Pull-request Pub-sub related data with random seed 15070 // (similarity score: 45.59%) +8. Test entry #166: Pull-request Unit-test related data with random seed 18963 // (similarity score: 40.27%) +9. Test entry #314: Threading RESTful related data with random seed 12875 // (similarity score: 19.17%) +10. Test entry #203: Recovery JSON related data with random seed 8776 // (similarity score: 17.99%) +⏱️ Get operation took: 1043 ms +--------------------------------------------------- +[Step 165/500] Processing... +Getting data: 'Diagram' +1. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 44.83%) +2. Test entry #423: Diagram RESTful related data with random seed 14813 // (similarity score: 44.09%) +3. Test entry #150: Diagram Integration-test related data with random seed 19409 // (similarity score: 43.81%) +4. Test entry #9: Diagram Flowchart related data with random seed 14766 // (similarity score: 43.28%) +5. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 40.49%) +6. Test entry #390: Documentation Diagram related data with random seed 8258 // (similarity score: 39.71%) +7. Test entry #64: Diagram NoSQL related data with random seed 3428 // (similarity score: 36.31%) +8. Test entry #161: Orchestration Diagram related data with random seed 20102 // (similarity score: 32.78%) +9. Test entry #302: Load-balancing Diagram related data with random seed 1050 // (similarity score: 31.56%) +10. Test entry #430: Recovery Diagram related data with random seed 24192 // (similarity score: 29.49%) +⏱️ Get operation took: 894 ms +--------------------------------------------------- +[Step 166/500] Processing... +Getting data: 'API' +1. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 39.28%) +2. Test entry #215: API RESTful related data with random seed 29014 // (similarity score: 36.10%) +3. Test entry #230: Threading API related data with random seed 20630 // (similarity score: 33.40%) +4. Test entry #30: API Library related data with random seed 1930 // (similarity score: 33.27%) +5. Test entry #195: API Orchestration related data with random seed 17599 // (similarity score: 32.44%) +6. Test entry #157: API Threading related data with random seed 18440 // (similarity score: 32.22%) +7. Test entry #84: API Optimization related data with random seed 7997 // (similarity score: 32.19%) +8. Test entry #438: Pull-request API related data with random seed 19781 // (similarity score: 32.08%) +9. Test entry #476: Logging API related data with random seed 8268 // (similarity score: 31.38%) +10. Test entry #68: API Encryption related data with random seed 30427 // (similarity score: 30.50%) +⏱️ Get operation took: 901 ms +--------------------------------------------------- +[Step 167/500] Processing... +Getting data: 'Search' +1. Test entry #75: UX Search related data with random seed 11487 // (similarity score: 42.09%) +2. Test entry #372: Best-practices Search related data with random seed 17128 // (similarity score: 41.11%) +3. Test entry #202: Search ORM related data with random seed 9657 // (similarity score: 38.48%) +4. Test entry #154: Cloud Search related data with random seed 2185 // (similarity score: 38.09%) +5. Test entry #280: Boilerplate Search related data with random seed 32715 // (similarity score: 37.50%) +6. Test entry #408: CI/CD Search related data with random seed 26877 // (similarity score: 35.64%) +7. Test entry #78: Search Git related data with random seed 20971 // (similarity score: 35.37%) +8. Test entry #397: Search Fault-tolerance related data with random seed 20761 // (similarity score: 35.23%) +9. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 29.18%) +10. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 26.45%) +⏱️ Get operation took: 989 ms +--------------------------------------------------- +[Step 168/500] Processing... +Getting data: 'GUI' +1. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 43.56%) +2. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 39.19%) +3. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 38.99%) +4. Test entry #268: GUI Security related data with random seed 8853 // (similarity score: 38.70%) +5. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 37.90%) +6. Test entry #90: Network GUI related data with random seed 6917 // (similarity score: 37.45%) +7. Test entry #400: GUI Container related data with random seed 26968 // (similarity score: 36.86%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 36.84%) +9. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 35.99%) +10. Test entry #87: GUI Encryption related data with random seed 24527 // (similarity score: 35.25%) +⏱️ Get operation took: 902 ms +--------------------------------------------------- +[Step 169/500] Processing... +Getting data: 'Vector' +1. Test entry #325: Vector UX related data with random seed 2934 // (similarity score: 40.50%) +2. Test entry #444: Vector Debugging related data with random seed 5646 // (similarity score: 36.83%) +3. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 36.57%) +4. Test entry #341: Node Vector related data with random seed 30034 // (similarity score: 36.54%) +5. Test entry #334: Sharding Vector related data with random seed 29020 // (similarity score: 33.49%) +6. Test entry #499: Vector CLI related data with random seed 18419 // (similarity score: 33.05%) +7. Test entry #433: Vector E2E-test related data with random seed 22356 // (similarity score: 31.29%) +8. Test entry #461: Vector Pub-sub related data with random seed 31094 // (similarity score: 31.17%) +9. Test entry #100: Latency Vector related data with random seed 28112 // (similarity score: 30.14%) +10. Test entry #385: ORM Vector related data with random seed 277 // (similarity score: 29.36%) +⏱️ Get operation took: 898 ms +--------------------------------------------------- +[Step 170/500] Processing... +Getting data: 'Websocket' +1. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 61.95%) +2. Test entry #92: Websocket Integration-test related data with random seed 32561 // (similarity score: 60.89%) +3. Test entry #13: Websocket Caching related data with random seed 27250 // (similarity score: 58.67%) +4. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 58.47%) +5. Test entry #122: Cloud Websocket related data with random seed 14024 // (similarity score: 57.43%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 15.29%) +7. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 14.60%) +8. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 14.35%) +9. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 14.31%) +10. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 13.59%) +⏱️ Get operation took: 892 ms +--------------------------------------------------- +[Step 171/500] Processing... +Getting data: 'Recovery' +1. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 39.10%) +2. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 37.60%) +3. Test entry #306: Recovery Library related data with random seed 21298 // (similarity score: 36.53%) +4. Test entry #430: Recovery Diagram related data with random seed 24192 // (similarity score: 36.20%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 35.07%) +6. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 33.87%) +7. Test entry #367: SDK Recovery related data with random seed 20345 // (similarity score: 33.28%) +8. Test entry #88: Recovery JSON related data with random seed 28249 // (similarity score: 32.39%) +9. Test entry #321: Recovery Git related data with random seed 3971 // (similarity score: 32.26%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 30.40%) +⏱️ Get operation took: 845 ms +--------------------------------------------------- +[Step 172/500] Processing... +Getting data: 'Container' +1. Test entry #182: Testing Container related data with random seed 11294 // (similarity score: 44.10%) +2. Test entry #446: Container RESTful related data with random seed 10831 // (similarity score: 40.01%) +3. Test entry #233: Unit-test Container related data with random seed 19623 // (similarity score: 39.50%) +4. Test entry #400: GUI Container related data with random seed 26968 // (similarity score: 39.49%) +5. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 38.28%) +6. Test entry #160: Container NoSQL related data with random seed 15570 // (similarity score: 37.91%) +7. Test entry #86: Index Container related data with random seed 813 // (similarity score: 37.74%) +8. Test entry #262: Container Branching related data with random seed 22849 // (similarity score: 37.24%) +9. Test entry #65: Encryption Container related data with random seed 20902 // (similarity score: 36.61%) +10. Test entry #170: ORM Container related data with random seed 30245 // (similarity score: 32.73%) +⏱️ Get operation took: 997 ms +--------------------------------------------------- +[Step 173/500] Processing... +Getting data: 'Database' +1. Test entry #426: Database Database related data with random seed 19208 // (similarity score: 39.68%) +2. Test entry #496: Library Database related data with random seed 13646 // (similarity score: 36.63%) +3. Test entry #193: Database Resilient related data with random seed 3286 // (similarity score: 35.20%) +4. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 34.25%) +5. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 33.82%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 33.21%) +7. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 32.30%) +8. Test entry #82: Database Cloud related data with random seed 12343 // (similarity score: 32.29%) +9. Test entry #346: Database Microservices related data with random seed 1068 // (similarity score: 32.20%) +10. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 32.14%) +⏱️ Get operation took: 943 ms +--------------------------------------------------- +[Step 174/500] Processing... +Getting data: 'Fault-tolerance' +1. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 62.09%) +2. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 57.76%) +3. Test entry #80: Fault-tolerance Throughput related data with random seed 27910 // (similarity score: 54.33%) +4. Test entry #95: Library Fault-tolerance related data with random seed 987 // (similarity score: 47.09%) +5. Test entry #197: Fault-tolerance ORM related data with random seed 21855 // (similarity score: 45.57%) +6. Test entry #397: Search Fault-tolerance related data with random seed 20761 // (similarity score: 44.70%) +7. Test entry #428: Fault-tolerance ORM related data with random seed 17392 // (similarity score: 44.01%) +8. Test entry #24: Replication Fault-tolerance related data with random seed 9951 // (similarity score: 41.75%) +9. Test entry #91: Fault-tolerance ORM related data with random seed 3476 // (similarity score: 41.55%) +10. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 39.60%) +⏱️ Get operation took: 1055 ms +--------------------------------------------------- +[Step 175/500] Processing... +Getting data: 'Alerting' +1. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 51.72%) +2. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 44.93%) +3. Test entry #371: Code-review Alerting related data with random seed 6709 // (similarity score: 40.15%) +4. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 40.12%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 35.73%) +6. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 35.52%) +7. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 34.88%) +8. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 34.33%) +9. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 33.48%) +10. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 33.22%) +⏱️ Get operation took: 1010 ms +--------------------------------------------------- +[Step 176/500] Processing... +Getting data: 'ERD' +1. Test entry #102: ERD ERD related data with random seed 20954 // (similarity score: 51.00%) +2. Test entry #138: Boilerplate ERD related data with random seed 17973 // (similarity score: 46.57%) +3. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 44.39%) +4. Test entry #379: Code-review ERD related data with random seed 18047 // (similarity score: 43.54%) +5. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 42.68%) +6. Test entry #135: UI ERD related data with random seed 9172 // (similarity score: 41.81%) +7. Test entry #378: ERD Parallelism related data with random seed 25512 // (similarity score: 39.79%) +8. Test entry #252: ERD Parallelism related data with random seed 23325 // (similarity score: 39.25%) +9. Test entry #296: ERD RESTful related data with random seed 19760 // (similarity score: 36.51%) +10. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 23.26%) +⏱️ Get operation took: 976 ms +--------------------------------------------------- +[Step 177/500] Processing... +Getting data: 'Algorithm' +1. Test entry #248: Algorithm Data-structure related data with random seed 18077 // (similarity score: 46.27%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 38.05%) +3. Test entry #392: Algorithm CSV related data with random seed 18964 // (similarity score: 36.06%) +4. Test entry #79: Algorithm Asynchronous related data with random seed 18478 // (similarity score: 34.57%) +5. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 33.78%) +6. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 33.09%) +7. Test entry #240: Algorithm Asynchronous related data with random seed 6330 // (similarity score: 31.98%) +8. Test entry #227: Framework Algorithm related data with random seed 19206 // (similarity score: 31.96%) +9. Test entry #337: Algorithm Orchestration related data with random seed 15318 // (similarity score: 31.60%) +10. Test entry #120: Algorithm Unit-test related data with random seed 14608 // (similarity score: 30.48%) +⏱️ Get operation took: 894 ms +--------------------------------------------------- +[Step 178/500] Processing... +Getting data: 'Monitoring' +1. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 43.08%) +2. Test entry #357: Monitoring Library related data with random seed 91 // (similarity score: 35.99%) +3. Test entry #338: Version-control Monitoring related data with random seed 11905 // (similarity score: 34.42%) +4. Test entry #313: Data-structure Monitoring related data with random seed 10551 // (similarity score: 33.26%) +5. Test entry #494: Monitoring Threading related data with random seed 28404 // (similarity score: 32.92%) +6. Test entry #177: UX Monitoring related data with random seed 16167 // (similarity score: 31.57%) +7. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 30.48%) +8. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 29.69%) +9. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 27.09%) +10. Test entry #371: Code-review Alerting related data with random seed 6709 // (similarity score: 26.04%) +⏱️ Get operation took: 901 ms +--------------------------------------------------- +[Step 179/500] Processing... +Getting data: 'Pub-sub' +1. Test entry #418: Pub-sub API related data with random seed 23035 // (similarity score: 38.66%) +2. Test entry #381: Python Pub-sub related data with random seed 29514 // (similarity score: 38.60%) +3. Test entry #28: Pull-request Pub-sub related data with random seed 15070 // (similarity score: 38.34%) +4. Test entry #362: Pub-sub SQL related data with random seed 11044 // (similarity score: 36.25%) +5. Test entry #461: Vector Pub-sub related data with random seed 31094 // (similarity score: 34.99%) +6. Test entry #259: Pub-sub DevOps related data with random seed 23944 // (similarity score: 34.61%) +7. Test entry #318: Vector Pub-sub related data with random seed 10169 // (similarity score: 33.83%) +8. Test entry #485: Integration-test Pub-sub related data with random seed 29641 // (similarity score: 31.78%) +9. Test entry #36: Threading Sharding related data with random seed 23643 // (similarity score: 14.79%) +10. Test entry #355: Throughput Sharding related data with random seed 6245 // (similarity score: 14.25%) +⏱️ Get operation took: 941 ms +--------------------------------------------------- +[Step 180/500] Processing... +Getting data: 'Backup' +1. Test entry #434: Tutorial Backup related data with random seed 6053 // (similarity score: 46.60%) +2. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 44.75%) +3. Test entry #398: Backup Node related data with random seed 21714 // (similarity score: 39.31%) +4. Test entry #410: Backup Parallelism related data with random seed 5421 // (similarity score: 38.11%) +5. Test entry #15: Backup Orchestration related data with random seed 32162 // (similarity score: 37.94%) +6. Test entry #145: CI/CD Backup related data with random seed 22567 // (similarity score: 35.01%) +7. Test entry #104: Backup E2E-test related data with random seed 3184 // (similarity score: 29.08%) +8. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 25.60%) +9. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 24.70%) +10. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 19.74%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 181/500] Processing... +Getting data: 'Debugging' +1. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 51.51%) +2. Test entry #333: Profiling Debugging related data with random seed 25431 // (similarity score: 49.97%) +3. Test entry #387: Index Debugging related data with random seed 29125 // (similarity score: 43.23%) +4. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 42.05%) +5. Test entry #67: Debugging Blockchain related data with random seed 23179 // (similarity score: 42.03%) +6. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 41.95%) +7. Test entry #444: Vector Debugging related data with random seed 5646 // (similarity score: 41.67%) +8. Test entry #406: Debugging Stubbing related data with random seed 28773 // (similarity score: 41.49%) +9. Test entry #339: API Debugging related data with random seed 14456 // (similarity score: 39.09%) +10. Test entry #34: Debugging CI/CD related data with random seed 7455 // (similarity score: 33.14%) +⏱️ Get operation took: 954 ms +--------------------------------------------------- +[Step 182/500] Processing... +Getting data: 'Tutorial' +1. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 43.89%) +2. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 38.60%) +3. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 34.87%) +4. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 34.64%) +5. Test entry #279: Tutorial SDK related data with random seed 4966 // (similarity score: 33.42%) +6. Test entry #303: XML Tutorial related data with random seed 29551 // (similarity score: 32.40%) +7. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 32.34%) +8. Test entry #125: GraphQL Tutorial related data with random seed 2640 // (similarity score: 30.17%) +9. Test entry #448: Tutorial Unit-test related data with random seed 7829 // (similarity score: 29.62%) +10. Test entry #295: YAML Tutorial related data with random seed 7734 // (similarity score: 29.59%) +⏱️ Get operation took: 954 ms +--------------------------------------------------- +[Step 183/500] Processing... +Getting data: 'Container' +1. Test entry #182: Testing Container related data with random seed 11294 // (similarity score: 44.10%) +2. Test entry #446: Container RESTful related data with random seed 10831 // (similarity score: 40.01%) +3. Test entry #233: Unit-test Container related data with random seed 19623 // (similarity score: 39.50%) +4. Test entry #400: GUI Container related data with random seed 26968 // (similarity score: 39.49%) +5. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 38.28%) +6. Test entry #160: Container NoSQL related data with random seed 15570 // (similarity score: 37.91%) +7. Test entry #86: Index Container related data with random seed 813 // (similarity score: 37.74%) +8. Test entry #262: Container Branching related data with random seed 22849 // (similarity score: 37.24%) +9. Test entry #65: Encryption Container related data with random seed 20902 // (similarity score: 36.61%) +10. Test entry #170: ORM Container related data with random seed 30245 // (similarity score: 32.73%) +⏱️ Get operation took: 897 ms +--------------------------------------------------- +[Step 184/500] Processing... +Getting data: 'UML' +1. Test entry #466: UML Optimization related data with random seed 21437 // (similarity score: 51.87%) +2. Test entry #66: UX UML related data with random seed 3512 // (similarity score: 50.20%) +3. Test entry #17: Tutorial UML related data with random seed 12772 // (similarity score: 49.59%) +4. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 47.11%) +5. Test entry #189: UML Network related data with random seed 6297 // (similarity score: 45.74%) +6. Test entry #247: UML Integration-test related data with random seed 29544 // (similarity score: 45.62%) +7. Test entry #474: UML Latency related data with random seed 18011 // (similarity score: 45.46%) +8. Test entry #377: UML GUI related data with random seed 13474 // (similarity score: 44.68%) +9. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 44.27%) +10. Test entry #310: RESTful UML related data with random seed 5709 // (similarity score: 43.88%) +⏱️ Get operation took: 925 ms +--------------------------------------------------- +[Step 185/500] Processing... +Getting data: 'SDK' +1. Test entry #279: Tutorial SDK related data with random seed 4966 // (similarity score: 56.68%) +2. Test entry #51: Scalability SDK related data with random seed 3268 // (similarity score: 51.89%) +3. Test entry #249: SDK UI related data with random seed 20519 // (similarity score: 49.85%) +4. Test entry #243: SDK Orchestration related data with random seed 30501 // (similarity score: 47.08%) +5. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 46.35%) +6. Test entry #367: SDK Recovery related data with random seed 20345 // (similarity score: 45.44%) +7. Test entry #162: SDK Testing related data with random seed 18191 // (similarity score: 44.04%) +8. Test entry #364: Database SDK related data with random seed 22809 // (similarity score: 40.07%) +9. Test entry #286: Python SDK related data with random seed 14667 // (similarity score: 39.90%) +10. Test entry #348: Blockchain SDK related data with random seed 25254 // (similarity score: 39.13%) +⏱️ Get operation took: 949 ms +--------------------------------------------------- +[Step 186/500] Processing... +Getting data: 'Branching' +1. Test entry #436: Example Branching related data with random seed 25763 // (similarity score: 54.14%) +2. Test entry #228: Branching Framework related data with random seed 10213 // (similarity score: 51.21%) +3. Test entry #384: Branching Sample related data with random seed 29822 // (similarity score: 50.90%) +4. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 47.89%) +5. Test entry #262: Container Branching related data with random seed 22849 // (similarity score: 47.76%) +6. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 40.80%) +7. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 40.19%) +8. Test entry #23: Branching Microservices related data with random seed 24428 // (similarity score: 38.88%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 29.49%) +10. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 26.91%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 187/500] Processing... +Getting data: 'Consensus' +1. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 37.37%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 34.38%) +3. Test entry #194: Python Consensus related data with random seed 2579 // (similarity score: 32.73%) +4. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 32.01%) +5. Test entry #460: Consensus YAML related data with random seed 16140 // (similarity score: 29.09%) +6. Test entry #470: Consensus Replication related data with random seed 9153 // (similarity score: 27.78%) +7. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 27.72%) +8. Test entry #445: Message-queue Consensus related data with random seed 22969 // (similarity score: 25.59%) +9. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 22.21%) +10. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 16.66%) +⏱️ Get operation took: 952 ms +--------------------------------------------------- +[Step 188/500] Processing... +Getting data: 'Replication' +1. Test entry #71: Replication Resilient related data with random seed 6058 // (similarity score: 57.23%) +2. Test entry #109: Replication Architecture related data with random seed 27244 // (similarity score: 55.71%) +3. Test entry #24: Replication Fault-tolerance related data with random seed 9951 // (similarity score: 51.79%) +4. Test entry #374: Replication Example related data with random seed 6291 // (similarity score: 50.42%) +5. Test entry #40: Replication UI related data with random seed 1010 // (similarity score: 47.96%) +6. Test entry #56: Replication Message-queue related data with random seed 23394 // (similarity score: 46.60%) +7. Test entry #255: Profiling Replication related data with random seed 7369 // (similarity score: 45.83%) +8. Test entry #111: Replication SDK related data with random seed 8170 // (similarity score: 45.33%) +9. Test entry #470: Consensus Replication related data with random seed 9153 // (similarity score: 44.87%) +10. Test entry #293: Replication CLI related data with random seed 2540 // (similarity score: 43.02%) +⏱️ Get operation took: 892 ms +--------------------------------------------------- +[Step 189/500] Processing... +Getting data: 'Optimization' +1. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 39.36%) +2. Test entry #114: Node Optimization related data with random seed 20946 // (similarity score: 37.49%) +3. Test entry #281: Optimization Resilient related data with random seed 24028 // (similarity score: 37.11%) +4. Test entry #27: Optimization Load-balancing related data with random seed 27403 // (similarity score: 35.53%) +5. Test entry #139: Load-balancing Optimization related data with random seed 31776 // (similarity score: 35.52%) +6. Test entry #403: Optimization Code-review related data with random seed 25707 // (similarity score: 35.52%) +7. Test entry #72: Node Optimization related data with random seed 10019 // (similarity score: 34.85%) +8. Test entry #191: Optimization Threading related data with random seed 15850 // (similarity score: 33.38%) +9. Test entry #466: UML Optimization related data with random seed 21437 // (similarity score: 32.36%) +10. Test entry #437: Microservices Optimization related data with random seed 8916 // (similarity score: 31.30%) +⏱️ Get operation took: 941 ms +--------------------------------------------------- +[Step 190/500] Processing... +Getting data: 'Throughput' +1. Test entry #481: Tutorial Throughput related data with random seed 11888 // (similarity score: 48.81%) +2. Test entry #80: Fault-tolerance Throughput related data with random seed 27910 // (similarity score: 45.02%) +3. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 44.96%) +4. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 40.15%) +5. Test entry #355: Throughput Sharding related data with random seed 6245 // (similarity score: 39.86%) +6. Test entry #6: CI/CD Throughput related data with random seed 5729 // (similarity score: 38.72%) +7. Test entry #144: Throughput ORM related data with random seed 18100 // (similarity score: 35.68%) +8. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 35.66%) +9. Test entry #335: Throughput E2E-test related data with random seed 11728 // (similarity score: 34.04%) +10. Test entry #294: ORM Throughput related data with random seed 31130 // (similarity score: 33.56%) +⏱️ Get operation took: 952 ms +--------------------------------------------------- +[Step 191/500] Processing... +Getting data: 'Stubbing' +1. Test entry #368: Stubbing Resilient related data with random seed 618 // (similarity score: 56.85%) +2. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 52.11%) +3. Test entry #406: Debugging Stubbing related data with random seed 28773 // (similarity score: 52.05%) +4. Test entry #18: UX Stubbing related data with random seed 14060 // (similarity score: 50.48%) +5. Test entry #2: Database Stubbing related data with random seed 28361 // (similarity score: 48.35%) +6. Test entry #263: CLI Stubbing related data with random seed 13486 // (similarity score: 46.44%) +7. Test entry #53: Blockchain Stubbing related data with random seed 10771 // (similarity score: 46.20%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 46.19%) +9. Test entry #317: Stubbing CI/CD related data with random seed 3418 // (similarity score: 43.34%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 40.32%) +⏱️ Get operation took: 937 ms +--------------------------------------------------- +[Step 192/500] Processing... +Getting data: 'Template' +1. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 46.79%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 41.61%) +3. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 40.40%) +4. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 38.73%) +5. Test entry #83: Template Sharding related data with random seed 32241 // (similarity score: 34.86%) +6. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 33.67%) +7. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 33.18%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 30.02%) +9. Test entry #322: ORM Template related data with random seed 28110 // (similarity score: 29.98%) +10. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 27.79%) +⏱️ Get operation took: 956 ms +--------------------------------------------------- +[Step 193/500] Processing... +Getting data: 'Fault-tolerance' +1. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 62.09%) +2. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 57.76%) +3. Test entry #80: Fault-tolerance Throughput related data with random seed 27910 // (similarity score: 54.33%) +4. Test entry #95: Library Fault-tolerance related data with random seed 987 // (similarity score: 47.09%) +5. Test entry #197: Fault-tolerance ORM related data with random seed 21855 // (similarity score: 45.57%) +6. Test entry #397: Search Fault-tolerance related data with random seed 20761 // (similarity score: 44.70%) +7. Test entry #428: Fault-tolerance ORM related data with random seed 17392 // (similarity score: 44.01%) +8. Test entry #24: Replication Fault-tolerance related data with random seed 9951 // (similarity score: 41.75%) +9. Test entry #91: Fault-tolerance ORM related data with random seed 3476 // (similarity score: 41.55%) +10. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 39.60%) +⏱️ Get operation took: 980 ms +--------------------------------------------------- +[Step 194/500] Processing... +Getting data: 'Framework' +1. Test entry #227: Framework Algorithm related data with random seed 19206 // (similarity score: 31.85%) +2. Test entry #275: Framework Orchestration related data with random seed 12664 // (similarity score: 31.36%) +3. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 29.40%) +4. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 28.78%) +5. Test entry #386: Framework Testing related data with random seed 14924 // (similarity score: 28.77%) +6. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 26.61%) +7. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 24.69%) +8. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 20.97%) +9. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 20.94%) +10. Test entry #228: Branching Framework related data with random seed 10213 // (similarity score: 19.21%) +⏱️ Get operation took: 896 ms +--------------------------------------------------- +[Step 195/500] Processing... +Getting data: 'API' +1. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 39.28%) +2. Test entry #215: API RESTful related data with random seed 29014 // (similarity score: 36.10%) +3. Test entry #230: Threading API related data with random seed 20630 // (similarity score: 33.40%) +4. Test entry #30: API Library related data with random seed 1930 // (similarity score: 33.27%) +5. Test entry #195: API Orchestration related data with random seed 17599 // (similarity score: 32.44%) +6. Test entry #157: API Threading related data with random seed 18440 // (similarity score: 32.22%) +7. Test entry #84: API Optimization related data with random seed 7997 // (similarity score: 32.19%) +8. Test entry #438: Pull-request API related data with random seed 19781 // (similarity score: 32.08%) +9. Test entry #476: Logging API related data with random seed 8268 // (similarity score: 31.38%) +10. Test entry #68: API Encryption related data with random seed 30427 // (similarity score: 30.50%) +⏱️ Get operation took: 898 ms +--------------------------------------------------- +[Step 196/500] Processing... +Getting data: 'Code-review' +1. Test entry #297: Event-driven Code-review related data with random seed 23440 // (similarity score: 58.60%) +2. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 56.39%) +3. Test entry #371: Code-review Alerting related data with random seed 6709 // (similarity score: 56.16%) +4. Test entry #478: Code-review Index related data with random seed 24809 // (similarity score: 54.70%) +5. Test entry #108: Code-review UI related data with random seed 5301 // (similarity score: 54.03%) +6. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 53.59%) +7. Test entry #429: Version-control Code-review related data with random seed 14304 // (similarity score: 49.96%) +8. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 49.10%) +9. Test entry #11: Code-review Node related data with random seed 32027 // (similarity score: 48.29%) +10. Test entry #129: XML Code-review related data with random seed 19652 // (similarity score: 44.99%) +⏱️ Get operation took: 893 ms +--------------------------------------------------- +[Step 197/500] Processing... +Getting data: 'Search' +1. Test entry #75: UX Search related data with random seed 11487 // (similarity score: 42.09%) +2. Test entry #372: Best-practices Search related data with random seed 17128 // (similarity score: 41.11%) +3. Test entry #202: Search ORM related data with random seed 9657 // (similarity score: 38.48%) +4. Test entry #154: Cloud Search related data with random seed 2185 // (similarity score: 38.09%) +5. Test entry #280: Boilerplate Search related data with random seed 32715 // (similarity score: 37.50%) +6. Test entry #408: CI/CD Search related data with random seed 26877 // (similarity score: 35.64%) +7. Test entry #78: Search Git related data with random seed 20971 // (similarity score: 35.37%) +8. Test entry #397: Search Fault-tolerance related data with random seed 20761 // (similarity score: 35.23%) +9. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 29.18%) +10. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 26.45%) +⏱️ Get operation took: 899 ms +--------------------------------------------------- +[Step 198/500] Processing... +Getting data: 'Algorithm' +1. Test entry #248: Algorithm Data-structure related data with random seed 18077 // (similarity score: 46.27%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 38.05%) +3. Test entry #392: Algorithm CSV related data with random seed 18964 // (similarity score: 36.06%) +4. Test entry #79: Algorithm Asynchronous related data with random seed 18478 // (similarity score: 34.57%) +5. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 33.78%) +6. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 33.09%) +7. Test entry #240: Algorithm Asynchronous related data with random seed 6330 // (similarity score: 31.98%) +8. Test entry #227: Framework Algorithm related data with random seed 19206 // (similarity score: 31.96%) +9. Test entry #337: Algorithm Orchestration related data with random seed 15318 // (similarity score: 31.60%) +10. Test entry #120: Algorithm Unit-test related data with random seed 14608 // (similarity score: 30.48%) +⏱️ Get operation took: 893 ms +--------------------------------------------------- +[Step 199/500] Processing... +Getting data: 'Index' +1. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 46.82%) +2. Test entry #473: Index Python related data with random seed 3534 // (similarity score: 44.57%) +3. Test entry #86: Index Container related data with random seed 813 // (similarity score: 41.90%) +4. Test entry #58: Cloud Index related data with random seed 16697 // (similarity score: 41.66%) +5. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 41.23%) +6. Test entry #44: Cloud Index related data with random seed 12754 // (similarity score: 38.93%) +7. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 38.05%) +8. Test entry #31: ORM Index related data with random seed 2163 // (similarity score: 38.00%) +9. Test entry #288: Index Pull-request related data with random seed 4855 // (similarity score: 37.81%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 37.00%) +⏱️ Get operation took: 955 ms +--------------------------------------------------- +[Step 200/500] Processing... +Getting data: 'Consensus' +1. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 37.37%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 34.38%) +3. Test entry #194: Python Consensus related data with random seed 2579 // (similarity score: 32.73%) +4. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 32.01%) +5. Test entry #460: Consensus YAML related data with random seed 16140 // (similarity score: 29.09%) +6. Test entry #470: Consensus Replication related data with random seed 9153 // (similarity score: 27.78%) +7. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 27.72%) +8. Test entry #445: Message-queue Consensus related data with random seed 22969 // (similarity score: 25.59%) +9. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 22.21%) +10. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 16.66%) +⏱️ Get operation took: 1003 ms + +📊 [BATCH REPORT] Items 151 to 200 + -> Average Latency: 953 ms + +--------------------------------------------------- +[Step 201/500] Processing... +Getting data: 'Resilient' +1. Test entry #368: Stubbing Resilient related data with random seed 618 // (similarity score: 43.92%) +2. Test entry #383: Python Resilient related data with random seed 20296 // (similarity score: 42.70%) +3. Test entry #281: Optimization Resilient related data with random seed 24028 // (similarity score: 39.95%) +4. Test entry #71: Replication Resilient related data with random seed 6058 // (similarity score: 39.12%) +5. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 37.87%) +6. Test entry #490: Threading Resilient related data with random seed 825 // (similarity score: 37.61%) +7. Test entry #193: Database Resilient related data with random seed 3286 // (similarity score: 37.33%) +8. Test entry #165: Resilient Python related data with random seed 3195 // (similarity score: 37.32%) +9. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 36.10%) +10. Test entry #119: CSV Resilient related data with random seed 21012 // (similarity score: 35.60%) +⏱️ Get operation took: 1019 ms +--------------------------------------------------- +[Step 202/500] Processing... +Getting data: 'JSON' +1. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 51.74%) +2. Test entry #156: JSON Optimization related data with random seed 28003 // (similarity score: 49.61%) +3. Test entry #413: JSON Alerting related data with random seed 8852 // (similarity score: 47.02%) +4. Test entry #113: JSON API related data with random seed 11466 // (similarity score: 46.74%) +5. Test entry #85: JSON Vector related data with random seed 23519 // (similarity score: 46.52%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 43.98%) +7. Test entry #218: JSON Caching related data with random seed 29229 // (similarity score: 43.91%) +8. Test entry #143: JSON XML related data with random seed 20028 // (similarity score: 42.01%) +9. Test entry #57: YAML JSON related data with random seed 19740 // (similarity score: 40.00%) +10. Test entry #420: JSON Microservices related data with random seed 9271 // (similarity score: 39.90%) +⏱️ Get operation took: 959 ms +--------------------------------------------------- +[Step 203/500] Processing... +Getting data: 'CI/CD' +1. Test entry #324: Boilerplate CI/CD related data with random seed 11105 // (similarity score: 54.77%) +2. Test entry #479: CI/CD Data-structure related data with random seed 13652 // (similarity score: 54.51%) +3. Test entry #6: CI/CD Throughput related data with random seed 5729 // (similarity score: 53.92%) +4. Test entry #472: Library CI/CD related data with random seed 18363 // (similarity score: 53.79%) +5. Test entry #450: CI/CD Data-structure related data with random seed 9170 // (similarity score: 52.85%) +6. Test entry #317: Stubbing CI/CD related data with random seed 3418 // (similarity score: 52.74%) +7. Test entry #234: Security CI/CD related data with random seed 25834 // (similarity score: 52.50%) +8. Test entry #326: CI/CD Sample related data with random seed 22568 // (similarity score: 52.47%) +9. Test entry #408: CI/CD Search related data with random seed 26877 // (similarity score: 51.42%) +10. Test entry #489: Load-balancing CI/CD related data with random seed 9733 // (similarity score: 49.63%) +⏱️ Get operation took: 898 ms +--------------------------------------------------- +[Step 204/500] Processing... +Getting data: 'Recovery' +1. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 39.10%) +2. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 37.60%) +3. Test entry #306: Recovery Library related data with random seed 21298 // (similarity score: 36.53%) +4. Test entry #430: Recovery Diagram related data with random seed 24192 // (similarity score: 36.20%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 35.07%) +6. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 33.87%) +7. Test entry #367: SDK Recovery related data with random seed 20345 // (similarity score: 33.28%) +8. Test entry #88: Recovery JSON related data with random seed 28249 // (similarity score: 32.39%) +9. Test entry #321: Recovery Git related data with random seed 3971 // (similarity score: 32.26%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 30.40%) +⏱️ Get operation took: 917 ms +--------------------------------------------------- +[Step 205/500] Processing... +Getting data: 'CLI' +1. Test entry #284: UX CLI related data with random seed 10670 // (similarity score: 47.12%) +2. Test entry #492: Testing CLI related data with random seed 20014 // (similarity score: 46.52%) +3. Test entry #462: Best-practices CLI related data with random seed 19922 // (similarity score: 45.07%) +4. Test entry #242: Load-balancing CLI related data with random seed 21278 // (similarity score: 44.28%) +5. Test entry #425: CLI Data-structure related data with random seed 7062 // (similarity score: 42.94%) +6. Test entry #499: Vector CLI related data with random seed 18419 // (similarity score: 41.47%) +7. Test entry #103: CLI Sharding related data with random seed 23375 // (similarity score: 40.66%) +8. Test entry #477: Unit-test CLI related data with random seed 17091 // (similarity score: 39.09%) +9. Test entry #263: CLI Stubbing related data with random seed 13486 // (similarity score: 37.57%) +10. Test entry #299: CLI Git related data with random seed 18823 // (similarity score: 34.54%) +⏱️ Get operation took: 933 ms +--------------------------------------------------- +[Step 206/500] Processing... +Getting data: 'Asynchronous' +1. Test entry #74: Asynchronous Asynchronous related data with random seed 32283 // (similarity score: 46.54%) +2. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 44.22%) +3. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 43.79%) +4. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 43.65%) +5. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 43.29%) +6. Test entry #311: Asynchronous Best-practices related data with random seed 24139 // (similarity score: 41.57%) +7. Test entry #210: Asynchronous UI related data with random seed 16307 // (similarity score: 40.59%) +8. Test entry #79: Algorithm Asynchronous related data with random seed 18478 // (similarity score: 40.11%) +9. Test entry #240: Algorithm Asynchronous related data with random seed 6330 // (similarity score: 38.61%) +10. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 36.49%) +⏱️ Get operation took: 1008 ms +--------------------------------------------------- +[Step 207/500] Processing... +Getting data: 'ORM' +1. Test entry #235: ORM Boilerplate related data with random seed 17103 // (similarity score: 52.37%) +2. Test entry #322: ORM Template related data with random seed 28110 // (similarity score: 49.94%) +3. Test entry #428: Fault-tolerance ORM related data with random seed 17392 // (similarity score: 47.01%) +4. Test entry #294: ORM Throughput related data with random seed 31130 // (similarity score: 46.99%) +5. Test entry #202: Search ORM related data with random seed 9657 // (similarity score: 46.57%) +6. Test entry #144: Throughput ORM related data with random seed 18100 // (similarity score: 45.56%) +7. Test entry #22: ORM UX related data with random seed 24344 // (similarity score: 45.31%) +8. Test entry #91: Fault-tolerance ORM related data with random seed 3476 // (similarity score: 44.50%) +9. Test entry #197: Fault-tolerance ORM related data with random seed 21855 // (similarity score: 44.44%) +10. Test entry #49: ORM Network related data with random seed 23604 // (similarity score: 44.40%) +⏱️ Get operation took: 978 ms +--------------------------------------------------- +[Step 208/500] Processing... +Getting data: 'Consensus' +1. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 37.37%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 34.38%) +3. Test entry #194: Python Consensus related data with random seed 2579 // (similarity score: 32.73%) +4. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 32.01%) +5. Test entry #460: Consensus YAML related data with random seed 16140 // (similarity score: 29.09%) +6. Test entry #470: Consensus Replication related data with random seed 9153 // (similarity score: 27.78%) +7. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 27.72%) +8. Test entry #445: Message-queue Consensus related data with random seed 22969 // (similarity score: 25.59%) +9. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 22.21%) +10. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 16.66%) +⏱️ Get operation took: 897 ms +--------------------------------------------------- +[Step 209/500] Processing... +Getting data: 'Merging' +1. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 47.50%) +2. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 46.78%) +3. Test entry #417: Event-driven Merging related data with random seed 13881 // (similarity score: 45.04%) +4. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 44.14%) +5. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 38.53%) +6. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 38.19%) +7. Test entry #150: Diagram Integration-test related data with random seed 19409 // (similarity score: 21.00%) +8. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 19.25%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 18.92%) +10. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 16.25%) +⏱️ Get operation took: 903 ms +--------------------------------------------------- +[Step 210/500] Processing... +Getting data: 'Optimization' +1. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 39.36%) +2. Test entry #114: Node Optimization related data with random seed 20946 // (similarity score: 37.49%) +3. Test entry #281: Optimization Resilient related data with random seed 24028 // (similarity score: 37.11%) +4. Test entry #27: Optimization Load-balancing related data with random seed 27403 // (similarity score: 35.53%) +5. Test entry #139: Load-balancing Optimization related data with random seed 31776 // (similarity score: 35.52%) +6. Test entry #403: Optimization Code-review related data with random seed 25707 // (similarity score: 35.52%) +7. Test entry #72: Node Optimization related data with random seed 10019 // (similarity score: 34.85%) +8. Test entry #191: Optimization Threading related data with random seed 15850 // (similarity score: 33.38%) +9. Test entry #466: UML Optimization related data with random seed 21437 // (similarity score: 32.36%) +10. Test entry #437: Microservices Optimization related data with random seed 8916 // (similarity score: 31.30%) +⏱️ Get operation took: 998 ms +--------------------------------------------------- +[Step 211/500] Processing... +Getting data: 'Tutorial' +1. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 43.89%) +2. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 38.60%) +3. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 34.87%) +4. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 34.64%) +5. Test entry #279: Tutorial SDK related data with random seed 4966 // (similarity score: 33.42%) +6. Test entry #303: XML Tutorial related data with random seed 29551 // (similarity score: 32.40%) +7. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 32.34%) +8. Test entry #125: GraphQL Tutorial related data with random seed 2640 // (similarity score: 30.17%) +9. Test entry #448: Tutorial Unit-test related data with random seed 7829 // (similarity score: 29.62%) +10. Test entry #295: YAML Tutorial related data with random seed 7734 // (similarity score: 29.59%) +⏱️ Get operation took: 980 ms +--------------------------------------------------- +[Step 212/500] Processing... +Getting data: 'UI' +1. Test entry #108: Code-review UI related data with random seed 5301 // (similarity score: 33.71%) +2. Test entry #249: SDK UI related data with random seed 20519 // (similarity score: 32.43%) +3. Test entry #210: Asynchronous UI related data with random seed 16307 // (similarity score: 31.84%) +4. Test entry #141: Sample UI related data with random seed 12909 // (similarity score: 28.72%) +5. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 28.18%) +6. Test entry #449: UI Load-balancing related data with random seed 29998 // (similarity score: 27.95%) +7. Test entry #39: UI Sharding related data with random seed 26238 // (similarity score: 26.65%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 25.47%) +9. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 25.01%) +10. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 24.21%) +⏱️ Get operation took: 899 ms +--------------------------------------------------- +[Step 213/500] Processing... +Getting data: 'Websocket' +1. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 61.95%) +2. Test entry #92: Websocket Integration-test related data with random seed 32561 // (similarity score: 60.89%) +3. Test entry #13: Websocket Caching related data with random seed 27250 // (similarity score: 58.67%) +4. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 58.47%) +5. Test entry #122: Cloud Websocket related data with random seed 14024 // (similarity score: 57.43%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 15.29%) +7. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 14.60%) +8. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 14.35%) +9. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 14.31%) +10. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 13.59%) +⏱️ Get operation took: 895 ms +--------------------------------------------------- +[Step 214/500] Processing... +Getting data: 'SQL' +1. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 41.94%) +2. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 41.80%) +3. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 37.96%) +4. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 37.76%) +5. Test entry #152: SQL Load-balancing related data with random seed 9262 // (similarity score: 37.07%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 36.37%) +7. Test entry #137: SQL Parallelism related data with random seed 8672 // (similarity score: 35.23%) +8. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 32.86%) +9. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 30.91%) +10. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 30.51%) +⏱️ Get operation took: 952 ms +--------------------------------------------------- +[Step 215/500] Processing... +Getting data: 'Backup' +1. Test entry #434: Tutorial Backup related data with random seed 6053 // (similarity score: 46.60%) +2. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 44.75%) +3. Test entry #398: Backup Node related data with random seed 21714 // (similarity score: 39.31%) +4. Test entry #410: Backup Parallelism related data with random seed 5421 // (similarity score: 38.11%) +5. Test entry #15: Backup Orchestration related data with random seed 32162 // (similarity score: 37.94%) +6. Test entry #145: CI/CD Backup related data with random seed 22567 // (similarity score: 35.01%) +7. Test entry #104: Backup E2E-test related data with random seed 3184 // (similarity score: 29.08%) +8. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 25.60%) +9. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 24.70%) +10. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 19.74%) +⏱️ Get operation took: 890 ms +--------------------------------------------------- +[Step 216/500] Processing... +Getting data: 'DevOps' +1. Test entry #259: Pub-sub DevOps related data with random seed 23944 // (similarity score: 58.76%) +2. Test entry #212: Design-patterns DevOps related data with random seed 19247 // (similarity score: 57.73%) +3. Test entry #146: Profiling DevOps related data with random seed 14561 // (similarity score: 55.98%) +4. Test entry #46: Data-structure DevOps related data with random seed 24000 // (similarity score: 51.31%) +5. Test entry #269: Database DevOps related data with random seed 15436 // (similarity score: 45.93%) +6. Test entry #328: Node DevOps related data with random seed 7412 // (similarity score: 44.08%) +7. Test entry #207: DevOps JSON related data with random seed 15591 // (similarity score: 41.62%) +8. Test entry #271: Encryption DevOps related data with random seed 2715 // (similarity score: 40.67%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 29.13%) +10. Test entry #243: SDK Orchestration related data with random seed 30501 // (similarity score: 27.73%) +⏱️ Get operation took: 949 ms +--------------------------------------------------- +[Step 217/500] Processing... +Getting data: 'API' +1. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 39.28%) +2. Test entry #215: API RESTful related data with random seed 29014 // (similarity score: 36.10%) +3. Test entry #230: Threading API related data with random seed 20630 // (similarity score: 33.40%) +4. Test entry #30: API Library related data with random seed 1930 // (similarity score: 33.27%) +5. Test entry #195: API Orchestration related data with random seed 17599 // (similarity score: 32.44%) +6. Test entry #157: API Threading related data with random seed 18440 // (similarity score: 32.22%) +7. Test entry #84: API Optimization related data with random seed 7997 // (similarity score: 32.19%) +8. Test entry #438: Pull-request API related data with random seed 19781 // (similarity score: 32.08%) +9. Test entry #476: Logging API related data with random seed 8268 // (similarity score: 31.38%) +10. Test entry #68: API Encryption related data with random seed 30427 // (similarity score: 30.50%) +⏱️ Get operation took: 897 ms +--------------------------------------------------- +[Step 218/500] Processing... +Getting data: 'Cloud' +1. Test entry #497: Cloud Profiling related data with random seed 11981 // (similarity score: 38.74%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 36.08%) +3. Test entry #173: Sample Cloud related data with random seed 3561 // (similarity score: 36.02%) +4. Test entry #154: Cloud Search related data with random seed 2185 // (similarity score: 35.82%) +5. Test entry #58: Cloud Index related data with random seed 16697 // (similarity score: 33.17%) +6. Test entry #122: Cloud Websocket related data with random seed 14024 // (similarity score: 31.07%) +7. Test entry #116: NoSQL Cloud related data with random seed 22178 // (similarity score: 30.67%) +8. Test entry #178: Cloud XML related data with random seed 27579 // (similarity score: 30.58%) +9. Test entry #44: Cloud Index related data with random seed 12754 // (similarity score: 30.20%) +10. Test entry #187: Documentation Cloud related data with random seed 9545 // (similarity score: 26.80%) +⏱️ Get operation took: 893 ms +--------------------------------------------------- +[Step 219/500] Processing... +Getting data: 'Load-balancing' +1. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 63.40%) +2. Test entry #139: Load-balancing Optimization related data with random seed 31776 // (similarity score: 57.41%) +3. Test entry #27: Optimization Load-balancing related data with random seed 27403 // (similarity score: 55.07%) +4. Test entry #402: UX Load-balancing related data with random seed 5781 // (similarity score: 52.94%) +5. Test entry #302: Load-balancing Diagram related data with random seed 1050 // (similarity score: 52.56%) +6. Test entry #45: Event-driven Load-balancing related data with random seed 5105 // (similarity score: 52.48%) +7. Test entry #354: Load-balancing Event-driven related data with random seed 18844 // (similarity score: 47.54%) +8. Test entry #449: UI Load-balancing related data with random seed 29998 // (similarity score: 45.02%) +9. Test entry #242: Load-balancing CLI related data with random seed 21278 // (similarity score: 44.58%) +10. Test entry #216: Load-balancing Sample related data with random seed 25835 // (similarity score: 43.77%) +⏱️ Get operation took: 912 ms +--------------------------------------------------- +[Step 220/500] Processing... +Getting data: 'Fault-tolerance' +1. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 62.09%) +2. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 57.76%) +3. Test entry #80: Fault-tolerance Throughput related data with random seed 27910 // (similarity score: 54.33%) +4. Test entry #95: Library Fault-tolerance related data with random seed 987 // (similarity score: 47.09%) +5. Test entry #197: Fault-tolerance ORM related data with random seed 21855 // (similarity score: 45.57%) +6. Test entry #397: Search Fault-tolerance related data with random seed 20761 // (similarity score: 44.70%) +7. Test entry #428: Fault-tolerance ORM related data with random seed 17392 // (similarity score: 44.01%) +8. Test entry #24: Replication Fault-tolerance related data with random seed 9951 // (similarity score: 41.75%) +9. Test entry #91: Fault-tolerance ORM related data with random seed 3476 // (similarity score: 41.55%) +10. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 39.60%) +⏱️ Get operation took: 931 ms +--------------------------------------------------- +[Step 221/500] Processing... +Getting data: 'Caching' +1. Test entry #290: Caching Sample related data with random seed 12446 // (similarity score: 47.89%) +2. Test entry #112: Caching Sharding related data with random seed 9211 // (similarity score: 46.65%) +3. Test entry #274: Testing Caching related data with random seed 1136 // (similarity score: 46.39%) +4. Test entry #218: JSON Caching related data with random seed 29229 // (similarity score: 43.15%) +5. Test entry #13: Websocket Caching related data with random seed 27250 // (similarity score: 42.09%) +6. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 21.31%) +7. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 20.98%) +8. Test entry #139: Load-balancing Optimization related data with random seed 31776 // (similarity score: 20.79%) +9. Test entry #311: Asynchronous Best-practices related data with random seed 24139 // (similarity score: 20.69%) +10. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 19.76%) +⏱️ Get operation took: 1116 ms +--------------------------------------------------- +[Step 222/500] Processing... +Getting data: 'Framework' +1. Test entry #227: Framework Algorithm related data with random seed 19206 // (similarity score: 31.85%) +2. Test entry #275: Framework Orchestration related data with random seed 12664 // (similarity score: 31.36%) +3. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 29.40%) +4. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 28.78%) +5. Test entry #386: Framework Testing related data with random seed 14924 // (similarity score: 28.77%) +6. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 26.61%) +7. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 24.69%) +8. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 20.97%) +9. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 20.94%) +10. Test entry #228: Branching Framework related data with random seed 10213 // (similarity score: 19.21%) +⏱️ Get operation took: 1030 ms +--------------------------------------------------- +[Step 223/500] Processing... +Getting data: 'API' +1. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 39.28%) +2. Test entry #215: API RESTful related data with random seed 29014 // (similarity score: 36.10%) +3. Test entry #230: Threading API related data with random seed 20630 // (similarity score: 33.40%) +4. Test entry #30: API Library related data with random seed 1930 // (similarity score: 33.27%) +5. Test entry #195: API Orchestration related data with random seed 17599 // (similarity score: 32.44%) +6. Test entry #157: API Threading related data with random seed 18440 // (similarity score: 32.22%) +7. Test entry #84: API Optimization related data with random seed 7997 // (similarity score: 32.19%) +8. Test entry #438: Pull-request API related data with random seed 19781 // (similarity score: 32.08%) +9. Test entry #476: Logging API related data with random seed 8268 // (similarity score: 31.38%) +10. Test entry #68: API Encryption related data with random seed 30427 // (similarity score: 30.50%) +⏱️ Get operation took: 896 ms +--------------------------------------------------- +[Step 224/500] Processing... +Getting data: 'Scalability' +1. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 43.17%) +2. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 40.71%) +3. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 38.07%) +4. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 37.55%) +5. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 37.23%) +6. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 35.78%) +7. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 35.62%) +8. Test entry #51: Scalability SDK related data with random seed 3268 // (similarity score: 33.46%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 32.11%) +10. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 31.32%) +⏱️ Get operation took: 844 ms +--------------------------------------------------- +[Step 225/500] Processing... +Getting data: 'Tutorial' +1. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 43.89%) +2. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 38.60%) +3. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 34.87%) +4. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 34.64%) +5. Test entry #279: Tutorial SDK related data with random seed 4966 // (similarity score: 33.42%) +6. Test entry #303: XML Tutorial related data with random seed 29551 // (similarity score: 32.40%) +7. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 32.34%) +8. Test entry #125: GraphQL Tutorial related data with random seed 2640 // (similarity score: 30.17%) +9. Test entry #448: Tutorial Unit-test related data with random seed 7829 // (similarity score: 29.62%) +10. Test entry #295: YAML Tutorial related data with random seed 7734 // (similarity score: 29.59%) +⏱️ Get operation took: 900 ms +--------------------------------------------------- +[Step 226/500] Processing... +Getting data: 'Logging' +1. Test entry #168: Event-driven Logging related data with random seed 22637 // (similarity score: 52.57%) +2. Test entry #172: Logging Architecture related data with random seed 7305 // (similarity score: 49.56%) +3. Test entry #222: Logging Library related data with random seed 32039 // (similarity score: 46.22%) +4. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 45.44%) +5. Test entry #98: Asynchronous Logging related data with random seed 30841 // (similarity score: 45.06%) +6. Test entry #476: Logging API related data with random seed 8268 // (similarity score: 42.63%) +7. Test entry #245: Logging Testing related data with random seed 32263 // (similarity score: 42.44%) +8. Test entry #399: Logging Encryption related data with random seed 25211 // (similarity score: 37.28%) +9. Test entry #149: Encryption Logging related data with random seed 27963 // (similarity score: 35.73%) +10. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 28.02%) +⏱️ Get operation took: 937 ms +--------------------------------------------------- +[Step 227/500] Processing... +Getting data: 'RESTful' +1. Test entry #482: Code-review RESTful related data with random seed 18863 // (similarity score: 41.46%) +2. Test entry #289: RESTful Data-structure related data with random seed 1227 // (similarity score: 41.33%) +3. Test entry #446: Container RESTful related data with random seed 10831 // (similarity score: 40.34%) +4. Test entry #215: API RESTful related data with random seed 29014 // (similarity score: 40.32%) +5. Test entry #310: RESTful UML related data with random seed 5709 // (similarity score: 40.14%) +6. Test entry #314: Threading RESTful related data with random seed 12875 // (similarity score: 37.42%) +7. Test entry #296: ERD RESTful related data with random seed 19760 // (similarity score: 36.03%) +8. Test entry #200: NoSQL RESTful related data with random seed 23279 // (similarity score: 31.68%) +9. Test entry #423: Diagram RESTful related data with random seed 14813 // (similarity score: 29.93%) +10. Test entry #345: CI/CD RESTful related data with random seed 23698 // (similarity score: 29.68%) +⏱️ Get operation took: 900 ms +--------------------------------------------------- +[Step 228/500] Processing... +Getting data: 'Version-control' +1. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 57.25%) +2. Test entry #338: Version-control Monitoring related data with random seed 11905 // (similarity score: 50.77%) +3. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 49.77%) +4. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 48.91%) +5. Test entry #340: Version-control Sample related data with random seed 23846 // (similarity score: 47.51%) +6. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 46.96%) +7. Test entry #155: Sharding Version-control related data with random seed 4158 // (similarity score: 43.99%) +8. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 43.17%) +9. Test entry #429: Version-control Code-review related data with random seed 14304 // (similarity score: 40.29%) +10. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 36.46%) +⏱️ Get operation took: 895 ms +--------------------------------------------------- +[Step 229/500] Processing... +Getting data: 'Asynchronous' +1. Test entry #74: Asynchronous Asynchronous related data with random seed 32283 // (similarity score: 46.54%) +2. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 44.22%) +3. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 43.79%) +4. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 43.65%) +5. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 43.29%) +6. Test entry #311: Asynchronous Best-practices related data with random seed 24139 // (similarity score: 41.57%) +7. Test entry #210: Asynchronous UI related data with random seed 16307 // (similarity score: 40.59%) +8. Test entry #79: Algorithm Asynchronous related data with random seed 18478 // (similarity score: 40.11%) +9. Test entry #240: Algorithm Asynchronous related data with random seed 6330 // (similarity score: 38.61%) +10. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 36.49%) +⏱️ Get operation took: 898 ms +--------------------------------------------------- +[Step 230/500] Processing... +Getting data: 'Cloud' +1. Test entry #497: Cloud Profiling related data with random seed 11981 // (similarity score: 38.74%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 36.08%) +3. Test entry #173: Sample Cloud related data with random seed 3561 // (similarity score: 36.02%) +4. Test entry #154: Cloud Search related data with random seed 2185 // (similarity score: 35.82%) +5. Test entry #58: Cloud Index related data with random seed 16697 // (similarity score: 33.17%) +6. Test entry #122: Cloud Websocket related data with random seed 14024 // (similarity score: 31.07%) +7. Test entry #116: NoSQL Cloud related data with random seed 22178 // (similarity score: 30.67%) +8. Test entry #178: Cloud XML related data with random seed 27579 // (similarity score: 30.58%) +9. Test entry #44: Cloud Index related data with random seed 12754 // (similarity score: 30.20%) +10. Test entry #187: Documentation Cloud related data with random seed 9545 // (similarity score: 26.80%) +⏱️ Get operation took: 893 ms +--------------------------------------------------- +[Step 231/500] Processing... +Getting data: 'Resilient' +1. Test entry #368: Stubbing Resilient related data with random seed 618 // (similarity score: 43.92%) +2. Test entry #383: Python Resilient related data with random seed 20296 // (similarity score: 42.70%) +3. Test entry #281: Optimization Resilient related data with random seed 24028 // (similarity score: 39.95%) +4. Test entry #71: Replication Resilient related data with random seed 6058 // (similarity score: 39.12%) +5. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 37.87%) +6. Test entry #490: Threading Resilient related data with random seed 825 // (similarity score: 37.61%) +7. Test entry #193: Database Resilient related data with random seed 3286 // (similarity score: 37.33%) +8. Test entry #165: Resilient Python related data with random seed 3195 // (similarity score: 37.32%) +9. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 36.10%) +10. Test entry #119: CSV Resilient related data with random seed 21012 // (similarity score: 35.60%) +⏱️ Get operation took: 800 ms +--------------------------------------------------- +[Step 232/500] Processing... +Getting data: 'XML' +1. Test entry #303: XML Tutorial related data with random seed 29551 // (similarity score: 54.70%) +2. Test entry #42: Resilient XML related data with random seed 15654 // (similarity score: 53.29%) +3. Test entry #487: XML Best-practices related data with random seed 19330 // (similarity score: 52.93%) +4. Test entry #298: XML Example related data with random seed 18435 // (similarity score: 52.15%) +5. Test entry #258: XML Synchronous related data with random seed 9079 // (similarity score: 51.57%) +6. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 50.79%) +7. Test entry #129: XML Code-review related data with random seed 19652 // (similarity score: 50.22%) +8. Test entry #475: Fault-tolerance XML related data with random seed 13158 // (similarity score: 46.25%) +9. Test entry #457: XML Recovery related data with random seed 2790 // (similarity score: 46.16%) +10. Test entry #350: XML Concurrency related data with random seed 12519 // (similarity score: 44.91%) +⏱️ Get operation took: 892 ms +--------------------------------------------------- +[Step 233/500] Processing... +Getting data: 'Alerting' +1. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 51.72%) +2. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 44.93%) +3. Test entry #371: Code-review Alerting related data with random seed 6709 // (similarity score: 40.15%) +4. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 40.12%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 35.73%) +6. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 35.52%) +7. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 34.88%) +8. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 34.33%) +9. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 33.48%) +10. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 33.22%) +⏱️ Get operation took: 897 ms +--------------------------------------------------- +[Step 234/500] Processing... +Getting data: 'Security' +1. Test entry #411: Security Synchronous related data with random seed 11940 // (similarity score: 37.71%) +2. Test entry #174: Best-practices Security related data with random seed 18653 // (similarity score: 35.63%) +3. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 29.96%) +4. Test entry #268: GUI Security related data with random seed 8853 // (similarity score: 27.74%) +5. Test entry #315: Encryption Best-practices related data with random seed 23350 // (similarity score: 27.15%) +6. Test entry #123: Encryption Alerting related data with random seed 20180 // (similarity score: 25.45%) +7. Test entry #234: Security CI/CD related data with random seed 25834 // (similarity score: 25.03%) +8. Test entry #149: Encryption Logging related data with random seed 27963 // (similarity score: 24.16%) +9. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 24.08%) +10. Test entry #382: Encryption Algorithm related data with random seed 9600 // (similarity score: 23.77%) +⏱️ Get operation took: 893 ms +--------------------------------------------------- +[Step 235/500] Processing... +Getting data: 'Architecture' +1. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 36.09%) +2. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 32.91%) +3. Test entry #140: UX Architecture related data with random seed 15002 // (similarity score: 31.89%) +4. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 31.79%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 30.07%) +6. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 27.81%) +7. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 26.98%) +8. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 26.33%) +9. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 25.30%) +10. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 24.95%) +⏱️ Get operation took: 915 ms +--------------------------------------------------- +[Step 236/500] Processing... +Getting data: 'Stubbing' +1. Test entry #368: Stubbing Resilient related data with random seed 618 // (similarity score: 56.85%) +2. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 52.11%) +3. Test entry #406: Debugging Stubbing related data with random seed 28773 // (similarity score: 52.05%) +4. Test entry #18: UX Stubbing related data with random seed 14060 // (similarity score: 50.48%) +5. Test entry #2: Database Stubbing related data with random seed 28361 // (similarity score: 48.35%) +6. Test entry #263: CLI Stubbing related data with random seed 13486 // (similarity score: 46.44%) +7. Test entry #53: Blockchain Stubbing related data with random seed 10771 // (similarity score: 46.20%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 46.19%) +9. Test entry #317: Stubbing CI/CD related data with random seed 3418 // (similarity score: 43.34%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 40.32%) +⏱️ Get operation took: 931 ms +--------------------------------------------------- +[Step 237/500] Processing... +Getting data: 'Consensus' +1. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 37.37%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 34.38%) +3. Test entry #194: Python Consensus related data with random seed 2579 // (similarity score: 32.73%) +4. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 32.01%) +5. Test entry #460: Consensus YAML related data with random seed 16140 // (similarity score: 29.09%) +6. Test entry #470: Consensus Replication related data with random seed 9153 // (similarity score: 27.78%) +7. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 27.72%) +8. Test entry #445: Message-queue Consensus related data with random seed 22969 // (similarity score: 25.59%) +9. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 22.21%) +10. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 16.66%) +⏱️ Get operation took: 899 ms +--------------------------------------------------- +[Step 238/500] Processing... +Getting data: 'Consensus' +1. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 37.37%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 34.38%) +3. Test entry #194: Python Consensus related data with random seed 2579 // (similarity score: 32.73%) +4. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 32.01%) +5. Test entry #460: Consensus YAML related data with random seed 16140 // (similarity score: 29.09%) +6. Test entry #470: Consensus Replication related data with random seed 9153 // (similarity score: 27.78%) +7. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 27.72%) +8. Test entry #445: Message-queue Consensus related data with random seed 22969 // (similarity score: 25.59%) +9. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 22.21%) +10. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 16.66%) +⏱️ Get operation took: 897 ms +--------------------------------------------------- +[Step 239/500] Processing... +Getting data: 'Flowchart' +1. Test entry #9: Diagram Flowchart related data with random seed 14766 // (similarity score: 63.49%) +2. Test entry #495: Best-practices Flowchart related data with random seed 507 // (similarity score: 59.08%) +3. Test entry #121: Flowchart SDK related data with random seed 24836 // (similarity score: 56.62%) +4. Test entry #226: Flowchart Testing related data with random seed 32215 // (similarity score: 56.46%) +5. Test entry #342: Python Flowchart related data with random seed 32467 // (similarity score: 56.34%) +6. Test entry #209: Index Flowchart related data with random seed 16113 // (similarity score: 54.06%) +7. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 49.88%) +8. Test entry #41: Index Flowchart related data with random seed 29094 // (similarity score: 49.84%) +9. Test entry #213: GUI GraphQL related data with random seed 9145 // (similarity score: 30.82%) +10. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 24.78%) +⏱️ Get operation took: 939 ms +--------------------------------------------------- +[Step 240/500] Processing... +Getting data: 'Unit-test' +1. Test entry #232: Tutorial Unit-test related data with random seed 11672 // (similarity score: 51.61%) +2. Test entry #448: Tutorial Unit-test related data with random seed 7829 // (similarity score: 51.60%) +3. Test entry #186: Tutorial Unit-test related data with random seed 2934 // (similarity score: 48.49%) +4. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 47.92%) +5. Test entry #233: Unit-test Container related data with random seed 19623 // (similarity score: 46.29%) +6. Test entry #442: Unit-test Framework related data with random seed 4836 // (similarity score: 45.43%) +7. Test entry #477: Unit-test CLI related data with random seed 17091 // (similarity score: 45.02%) +8. Test entry #120: Algorithm Unit-test related data with random seed 14608 // (similarity score: 44.87%) +9. Test entry #422: Unit-test Example related data with random seed 16994 // (similarity score: 44.07%) +10. Test entry #16: Node Unit-test related data with random seed 4007 // (similarity score: 41.87%) +⏱️ Get operation took: 950 ms +--------------------------------------------------- +[Step 241/500] Processing... +Getting data: 'Recovery' +1. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 39.10%) +2. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 37.60%) +3. Test entry #306: Recovery Library related data with random seed 21298 // (similarity score: 36.53%) +4. Test entry #430: Recovery Diagram related data with random seed 24192 // (similarity score: 36.20%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 35.07%) +6. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 33.87%) +7. Test entry #367: SDK Recovery related data with random seed 20345 // (similarity score: 33.28%) +8. Test entry #88: Recovery JSON related data with random seed 28249 // (similarity score: 32.39%) +9. Test entry #321: Recovery Git related data with random seed 3971 // (similarity score: 32.26%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 30.40%) +⏱️ Get operation took: 936 ms +--------------------------------------------------- +[Step 242/500] Processing... +Getting data: 'Boilerplate' +1. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 41.22%) +2. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 36.21%) +3. Test entry #280: Boilerplate Search related data with random seed 32715 // (similarity score: 30.77%) +4. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 30.65%) +5. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 29.65%) +6. Test entry #12: Boilerplate YAML related data with random seed 29263 // (similarity score: 29.38%) +7. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 29.35%) +8. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 28.89%) +9. Test entry #443: Threading Boilerplate related data with random seed 23641 // (similarity score: 28.42%) +10. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 27.41%) +⏱️ Get operation took: 899 ms +--------------------------------------------------- +[Step 243/500] Processing... +Getting data: 'GUI' +1. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 43.56%) +2. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 39.19%) +3. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 38.99%) +4. Test entry #268: GUI Security related data with random seed 8853 // (similarity score: 38.70%) +5. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 37.90%) +6. Test entry #90: Network GUI related data with random seed 6917 // (similarity score: 37.45%) +7. Test entry #400: GUI Container related data with random seed 26968 // (similarity score: 36.86%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 36.84%) +9. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 35.99%) +10. Test entry #87: GUI Encryption related data with random seed 24527 // (similarity score: 35.25%) +⏱️ Get operation took: 893 ms +--------------------------------------------------- +[Step 244/500] Processing... +Getting data: 'Template' +1. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 46.79%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 41.61%) +3. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 40.40%) +4. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 38.73%) +5. Test entry #83: Template Sharding related data with random seed 32241 // (similarity score: 34.86%) +6. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 33.67%) +7. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 33.18%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 30.02%) +9. Test entry #322: ORM Template related data with random seed 28110 // (similarity score: 29.98%) +10. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 27.79%) +⏱️ Get operation took: 849 ms +--------------------------------------------------- +[Step 245/500] Processing... +Getting data: 'Synchronous' +1. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 42.32%) +2. Test entry #287: Example Synchronous related data with random seed 18676 // (similarity score: 40.82%) +3. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 32.63%) +4. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 32.34%) +5. Test entry #411: Security Synchronous related data with random seed 11940 // (similarity score: 31.90%) +6. Test entry #258: XML Synchronous related data with random seed 9079 // (similarity score: 29.62%) +7. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 29.18%) +8. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 28.48%) +9. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 27.98%) +10. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 27.69%) +⏱️ Get operation took: 795 ms +--------------------------------------------------- +[Step 246/500] Processing... +Getting data: 'Database' +1. Test entry #426: Database Database related data with random seed 19208 // (similarity score: 39.68%) +2. Test entry #496: Library Database related data with random seed 13646 // (similarity score: 36.63%) +3. Test entry #193: Database Resilient related data with random seed 3286 // (similarity score: 35.20%) +4. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 34.25%) +5. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 33.82%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 33.21%) +7. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 32.30%) +8. Test entry #82: Database Cloud related data with random seed 12343 // (similarity score: 32.29%) +9. Test entry #346: Database Microservices related data with random seed 1068 // (similarity score: 32.20%) +10. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 32.14%) +⏱️ Get operation took: 896 ms +--------------------------------------------------- +[Step 247/500] Processing... +Getting data: 'Mocking' +1. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 53.76%) +2. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 52.01%) +3. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 50.43%) +4. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 49.94%) +5. Test entry #7: Mocking Parallelism related data with random seed 19010 // (similarity score: 48.48%) +6. Test entry #54: Concurrency Mocking related data with random seed 20846 // (similarity score: 45.54%) +7. Test entry #330: Boilerplate Mocking related data with random seed 5771 // (similarity score: 45.53%) +8. Test entry #190: Mocking Network related data with random seed 32119 // (similarity score: 44.81%) +9. Test entry #29: Mocking Network related data with random seed 20303 // (similarity score: 44.44%) +10. Test entry #185: Mocking YAML related data with random seed 11811 // (similarity score: 42.85%) +⏱️ Get operation took: 897 ms +--------------------------------------------------- +[Step 248/500] Processing... +Getting data: 'Logging' +1. Test entry #168: Event-driven Logging related data with random seed 22637 // (similarity score: 52.57%) +2. Test entry #172: Logging Architecture related data with random seed 7305 // (similarity score: 49.56%) +3. Test entry #222: Logging Library related data with random seed 32039 // (similarity score: 46.22%) +4. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 45.44%) +5. Test entry #98: Asynchronous Logging related data with random seed 30841 // (similarity score: 45.06%) +6. Test entry #476: Logging API related data with random seed 8268 // (similarity score: 42.63%) +7. Test entry #245: Logging Testing related data with random seed 32263 // (similarity score: 42.44%) +8. Test entry #399: Logging Encryption related data with random seed 25211 // (similarity score: 37.28%) +9. Test entry #149: Encryption Logging related data with random seed 27963 // (similarity score: 35.73%) +10. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 28.02%) +⏱️ Get operation took: 943 ms +--------------------------------------------------- +[Step 249/500] Processing... +Getting data: 'Message-queue' +1. Test entry #147: Message-queue Container related data with random seed 31302 // (similarity score: 55.00%) +2. Test entry #396: Sample Message-queue related data with random seed 1074 // (similarity score: 54.96%) +3. Test entry #445: Message-queue Consensus related data with random seed 22969 // (similarity score: 51.62%) +4. Test entry #447: Message-queue Unit-test related data with random seed 12353 // (similarity score: 48.23%) +5. Test entry #225: Encryption Message-queue related data with random seed 3677 // (similarity score: 45.23%) +6. Test entry #264: SQL Message-queue related data with random seed 315 // (similarity score: 44.77%) +7. Test entry #56: Replication Message-queue related data with random seed 23394 // (similarity score: 44.68%) +8. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 30.91%) +9. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 27.49%) +10. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 24.44%) +⏱️ Get operation took: 950 ms +--------------------------------------------------- +[Step 250/500] Processing... +Getting data: 'Pull-request' +1. Test entry #438: Pull-request API related data with random seed 19781 // (similarity score: 51.53%) +2. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 50.75%) +3. Test entry #107: Resilient Pull-request related data with random seed 9028 // (similarity score: 49.75%) +4. Test entry #488: Pull-request Git related data with random seed 13085 // (similarity score: 47.13%) +5. Test entry #424: Pull-request YAML related data with random seed 9732 // (similarity score: 46.59%) +6. Test entry #288: Index Pull-request related data with random seed 4855 // (similarity score: 46.42%) +7. Test entry #28: Pull-request Pub-sub related data with random seed 15070 // (similarity score: 45.59%) +8. Test entry #166: Pull-request Unit-test related data with random seed 18963 // (similarity score: 40.27%) +9. Test entry #314: Threading RESTful related data with random seed 12875 // (similarity score: 19.17%) +10. Test entry #203: Recovery JSON related data with random seed 8776 // (similarity score: 17.99%) +⏱️ Get operation took: 864 ms + +📊 [BATCH REPORT] Items 201 to 250 + -> Average Latency: 919 ms + +--------------------------------------------------- +[Step 251/500] Processing... +Getting data: 'Data-structure' +1. Test entry #273: Data-structure Performance related data with random seed 1219 // (similarity score: 50.19%) +2. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 49.27%) +3. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 49.07%) +4. Test entry #248: Algorithm Data-structure related data with random seed 18077 // (similarity score: 47.47%) +5. Test entry #313: Data-structure Monitoring related data with random seed 10551 // (similarity score: 47.31%) +6. Test entry #118: CSV Data-structure related data with random seed 20709 // (similarity score: 45.72%) +7. Test entry #289: RESTful Data-structure related data with random seed 1227 // (similarity score: 42.01%) +8. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 40.81%) +9. Test entry #425: CLI Data-structure related data with random seed 7062 // (similarity score: 39.72%) +10. Test entry #171: Blockchain Data-structure related data with random seed 29630 // (similarity score: 38.81%) +⏱️ Get operation took: 914 ms +--------------------------------------------------- +[Step 252/500] Processing... +Getting data: 'Data-structure' +1. Test entry #273: Data-structure Performance related data with random seed 1219 // (similarity score: 50.19%) +2. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 49.27%) +3. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 49.07%) +4. Test entry #248: Algorithm Data-structure related data with random seed 18077 // (similarity score: 47.47%) +5. Test entry #313: Data-structure Monitoring related data with random seed 10551 // (similarity score: 47.31%) +6. Test entry #118: CSV Data-structure related data with random seed 20709 // (similarity score: 45.72%) +7. Test entry #289: RESTful Data-structure related data with random seed 1227 // (similarity score: 42.01%) +8. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 40.81%) +9. Test entry #425: CLI Data-structure related data with random seed 7062 // (similarity score: 39.72%) +10. Test entry #171: Blockchain Data-structure related data with random seed 29630 // (similarity score: 38.81%) +⏱️ Get operation took: 951 ms +--------------------------------------------------- +[Step 253/500] Processing... +Getting data: 'GraphQL' +1. Test entry #308: GraphQL GraphQL related data with random seed 25902 // (similarity score: 67.68%) +2. Test entry #125: GraphQL Tutorial related data with random seed 2640 // (similarity score: 66.75%) +3. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 64.60%) +4. Test entry #343: GraphQL UX related data with random seed 26726 // (similarity score: 62.29%) +5. Test entry #48: GraphQL Asynchronous related data with random seed 4509 // (similarity score: 60.43%) +6. Test entry #35: GraphQL YAML related data with random seed 19655 // (similarity score: 59.13%) +7. Test entry #213: GUI GraphQL related data with random seed 9145 // (similarity score: 58.32%) +8. Test entry #256: Profiling GraphQL related data with random seed 4781 // (similarity score: 58.12%) +9. Test entry #10: Framework GraphQL related data with random seed 10276 // (similarity score: 57.78%) +10. Test entry #25: GraphQL CI/CD related data with random seed 7557 // (similarity score: 56.63%) +⏱️ Get operation took: 888 ms +--------------------------------------------------- +[Step 254/500] Processing... +Getting data: 'Debugging' +1. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 51.51%) +2. Test entry #333: Profiling Debugging related data with random seed 25431 // (similarity score: 49.97%) +3. Test entry #387: Index Debugging related data with random seed 29125 // (similarity score: 43.23%) +4. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 42.05%) +5. Test entry #67: Debugging Blockchain related data with random seed 23179 // (similarity score: 42.03%) +6. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 41.95%) +7. Test entry #444: Vector Debugging related data with random seed 5646 // (similarity score: 41.67%) +8. Test entry #406: Debugging Stubbing related data with random seed 28773 // (similarity score: 41.49%) +9. Test entry #339: API Debugging related data with random seed 14456 // (similarity score: 39.09%) +10. Test entry #34: Debugging CI/CD related data with random seed 7455 // (similarity score: 33.14%) +⏱️ Get operation took: 989 ms +--------------------------------------------------- +[Step 255/500] Processing... +Getting data: 'RESTful' +1. Test entry #482: Code-review RESTful related data with random seed 18863 // (similarity score: 41.46%) +2. Test entry #289: RESTful Data-structure related data with random seed 1227 // (similarity score: 41.33%) +3. Test entry #446: Container RESTful related data with random seed 10831 // (similarity score: 40.34%) +4. Test entry #215: API RESTful related data with random seed 29014 // (similarity score: 40.32%) +5. Test entry #310: RESTful UML related data with random seed 5709 // (similarity score: 40.14%) +6. Test entry #314: Threading RESTful related data with random seed 12875 // (similarity score: 37.42%) +7. Test entry #296: ERD RESTful related data with random seed 19760 // (similarity score: 36.03%) +8. Test entry #200: NoSQL RESTful related data with random seed 23279 // (similarity score: 31.68%) +9. Test entry #423: Diagram RESTful related data with random seed 14813 // (similarity score: 29.93%) +10. Test entry #345: CI/CD RESTful related data with random seed 23698 // (similarity score: 29.68%) +⏱️ Get operation took: 922 ms +--------------------------------------------------- +[Step 256/500] Processing... +Getting data: 'Monitoring' +1. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 43.08%) +2. Test entry #357: Monitoring Library related data with random seed 91 // (similarity score: 35.99%) +3. Test entry #338: Version-control Monitoring related data with random seed 11905 // (similarity score: 34.42%) +4. Test entry #313: Data-structure Monitoring related data with random seed 10551 // (similarity score: 33.26%) +5. Test entry #494: Monitoring Threading related data with random seed 28404 // (similarity score: 32.92%) +6. Test entry #177: UX Monitoring related data with random seed 16167 // (similarity score: 31.57%) +7. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 30.48%) +8. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 29.69%) +9. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 27.09%) +10. Test entry #371: Code-review Alerting related data with random seed 6709 // (similarity score: 26.04%) +⏱️ Get operation took: 939 ms +--------------------------------------------------- +[Step 257/500] Processing... +Getting data: 'Index' +1. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 46.82%) +2. Test entry #473: Index Python related data with random seed 3534 // (similarity score: 44.57%) +3. Test entry #86: Index Container related data with random seed 813 // (similarity score: 41.90%) +4. Test entry #58: Cloud Index related data with random seed 16697 // (similarity score: 41.66%) +5. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 41.23%) +6. Test entry #44: Cloud Index related data with random seed 12754 // (similarity score: 38.93%) +7. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 38.05%) +8. Test entry #31: ORM Index related data with random seed 2163 // (similarity score: 38.00%) +9. Test entry #288: Index Pull-request related data with random seed 4855 // (similarity score: 37.81%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 37.00%) +⏱️ Get operation took: 920 ms +--------------------------------------------------- +[Step 258/500] Processing... +Getting data: 'CSV' +1. Test entry #118: CSV Data-structure related data with random seed 20709 // (similarity score: 60.86%) +2. Test entry #439: CSV Library related data with random seed 9941 // (similarity score: 57.72%) +3. Test entry #21: CSV Example related data with random seed 2897 // (similarity score: 57.43%) +4. Test entry #454: CSV Best-practices related data with random seed 21695 // (similarity score: 56.93%) +5. Test entry #453: CSV Profiling related data with random seed 24443 // (similarity score: 53.45%) +6. Test entry #391: CSV Resilient related data with random seed 1245 // (similarity score: 51.75%) +7. Test entry #260: CSV Event-driven related data with random seed 23556 // (similarity score: 51.60%) +8. Test entry #119: CSV Resilient related data with random seed 21012 // (similarity score: 50.55%) +9. Test entry #392: Algorithm CSV related data with random seed 18964 // (similarity score: 50.33%) +10. Test entry #458: Encryption CSV related data with random seed 22642 // (similarity score: 49.73%) +⏱️ Get operation took: 983 ms +--------------------------------------------------- +[Step 259/500] Processing... +Getting data: 'Logging' +1. Test entry #168: Event-driven Logging related data with random seed 22637 // (similarity score: 52.57%) +2. Test entry #172: Logging Architecture related data with random seed 7305 // (similarity score: 49.56%) +3. Test entry #222: Logging Library related data with random seed 32039 // (similarity score: 46.22%) +4. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 45.44%) +5. Test entry #98: Asynchronous Logging related data with random seed 30841 // (similarity score: 45.06%) +6. Test entry #476: Logging API related data with random seed 8268 // (similarity score: 42.63%) +7. Test entry #245: Logging Testing related data with random seed 32263 // (similarity score: 42.44%) +8. Test entry #399: Logging Encryption related data with random seed 25211 // (similarity score: 37.28%) +9. Test entry #149: Encryption Logging related data with random seed 27963 // (similarity score: 35.73%) +10. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 28.02%) +⏱️ Get operation took: 908 ms +--------------------------------------------------- +[Step 260/500] Processing... +Getting data: 'Websocket' +1. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 61.95%) +2. Test entry #92: Websocket Integration-test related data with random seed 32561 // (similarity score: 60.89%) +3. Test entry #13: Websocket Caching related data with random seed 27250 // (similarity score: 58.67%) +4. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 58.47%) +5. Test entry #122: Cloud Websocket related data with random seed 14024 // (similarity score: 57.43%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 15.29%) +7. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 14.60%) +8. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 14.35%) +9. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 14.31%) +10. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 13.59%) +⏱️ Get operation took: 965 ms +--------------------------------------------------- +[Step 261/500] Processing... +Getting data: 'Debugging' +1. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 51.51%) +2. Test entry #333: Profiling Debugging related data with random seed 25431 // (similarity score: 49.97%) +3. Test entry #387: Index Debugging related data with random seed 29125 // (similarity score: 43.23%) +4. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 42.05%) +5. Test entry #67: Debugging Blockchain related data with random seed 23179 // (similarity score: 42.03%) +6. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 41.95%) +7. Test entry #444: Vector Debugging related data with random seed 5646 // (similarity score: 41.67%) +8. Test entry #406: Debugging Stubbing related data with random seed 28773 // (similarity score: 41.49%) +9. Test entry #339: API Debugging related data with random seed 14456 // (similarity score: 39.09%) +10. Test entry #34: Debugging CI/CD related data with random seed 7455 // (similarity score: 33.14%) +⏱️ Get operation took: 891 ms +--------------------------------------------------- +[Step 262/500] Processing... +Getting data: 'Sharding' +1. Test entry #394: Sharding Performance related data with random seed 1607 // (similarity score: 54.43%) +2. Test entry #355: Throughput Sharding related data with random seed 6245 // (similarity score: 53.68%) +3. Test entry #130: Resilient Sharding related data with random seed 12002 // (similarity score: 53.30%) +4. Test entry #334: Sharding Vector related data with random seed 29020 // (similarity score: 51.99%) +5. Test entry #155: Sharding Version-control related data with random seed 4158 // (similarity score: 51.36%) +6. Test entry #36: Threading Sharding related data with random seed 23643 // (similarity score: 50.25%) +7. Test entry #83: Template Sharding related data with random seed 32241 // (similarity score: 49.35%) +8. Test entry #103: CLI Sharding related data with random seed 23375 // (similarity score: 47.66%) +9. Test entry #39: UI Sharding related data with random seed 26238 // (similarity score: 45.29%) +10. Test entry #112: Caching Sharding related data with random seed 9211 // (similarity score: 44.08%) +⏱️ Get operation took: 981 ms +--------------------------------------------------- +[Step 263/500] Processing... +Getting data: 'Recovery' +1. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 39.10%) +2. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 37.60%) +3. Test entry #306: Recovery Library related data with random seed 21298 // (similarity score: 36.53%) +4. Test entry #430: Recovery Diagram related data with random seed 24192 // (similarity score: 36.20%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 35.07%) +6. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 33.87%) +7. Test entry #367: SDK Recovery related data with random seed 20345 // (similarity score: 33.28%) +8. Test entry #88: Recovery JSON related data with random seed 28249 // (similarity score: 32.39%) +9. Test entry #321: Recovery Git related data with random seed 3971 // (similarity score: 32.26%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 30.40%) +⏱️ Get operation took: 949 ms +--------------------------------------------------- +[Step 264/500] Processing... +Getting data: 'API' +1. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 39.28%) +2. Test entry #215: API RESTful related data with random seed 29014 // (similarity score: 36.10%) +3. Test entry #230: Threading API related data with random seed 20630 // (similarity score: 33.40%) +4. Test entry #30: API Library related data with random seed 1930 // (similarity score: 33.27%) +5. Test entry #195: API Orchestration related data with random seed 17599 // (similarity score: 32.44%) +6. Test entry #157: API Threading related data with random seed 18440 // (similarity score: 32.22%) +7. Test entry #84: API Optimization related data with random seed 7997 // (similarity score: 32.19%) +8. Test entry #438: Pull-request API related data with random seed 19781 // (similarity score: 32.08%) +9. Test entry #476: Logging API related data with random seed 8268 // (similarity score: 31.38%) +10. Test entry #68: API Encryption related data with random seed 30427 // (similarity score: 30.50%) +⏱️ Get operation took: 887 ms +--------------------------------------------------- +[Step 265/500] Processing... +Getting data: 'Websocket' +1. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 61.95%) +2. Test entry #92: Websocket Integration-test related data with random seed 32561 // (similarity score: 60.89%) +3. Test entry #13: Websocket Caching related data with random seed 27250 // (similarity score: 58.67%) +4. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 58.47%) +5. Test entry #122: Cloud Websocket related data with random seed 14024 // (similarity score: 57.43%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 15.29%) +7. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 14.60%) +8. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 14.35%) +9. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 14.31%) +10. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 13.59%) +⏱️ Get operation took: 905 ms +--------------------------------------------------- +[Step 266/500] Processing... +Getting data: 'Alerting' +1. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 51.72%) +2. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 44.93%) +3. Test entry #371: Code-review Alerting related data with random seed 6709 // (similarity score: 40.15%) +4. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 40.12%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 35.73%) +6. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 35.52%) +7. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 34.88%) +8. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 34.33%) +9. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 33.48%) +10. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 33.22%) +⏱️ Get operation took: 890 ms +--------------------------------------------------- +[Step 267/500] Processing... +Getting data: 'Recovery' +1. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 39.10%) +2. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 37.60%) +3. Test entry #306: Recovery Library related data with random seed 21298 // (similarity score: 36.53%) +4. Test entry #430: Recovery Diagram related data with random seed 24192 // (similarity score: 36.20%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 35.07%) +6. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 33.87%) +7. Test entry #367: SDK Recovery related data with random seed 20345 // (similarity score: 33.28%) +8. Test entry #88: Recovery JSON related data with random seed 28249 // (similarity score: 32.39%) +9. Test entry #321: Recovery Git related data with random seed 3971 // (similarity score: 32.26%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 30.40%) +⏱️ Get operation took: 906 ms +--------------------------------------------------- +[Step 268/500] Processing... +Getting data: 'ERD' +1. Test entry #102: ERD ERD related data with random seed 20954 // (similarity score: 51.00%) +2. Test entry #138: Boilerplate ERD related data with random seed 17973 // (similarity score: 46.57%) +3. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 44.39%) +4. Test entry #379: Code-review ERD related data with random seed 18047 // (similarity score: 43.54%) +5. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 42.68%) +6. Test entry #135: UI ERD related data with random seed 9172 // (similarity score: 41.81%) +7. Test entry #378: ERD Parallelism related data with random seed 25512 // (similarity score: 39.79%) +8. Test entry #252: ERD Parallelism related data with random seed 23325 // (similarity score: 39.25%) +9. Test entry #296: ERD RESTful related data with random seed 19760 // (similarity score: 36.51%) +10. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 23.26%) +⏱️ Get operation took: 839 ms +--------------------------------------------------- +[Step 269/500] Processing... +Getting data: 'Stubbing' +1. Test entry #368: Stubbing Resilient related data with random seed 618 // (similarity score: 56.85%) +2. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 52.11%) +3. Test entry #406: Debugging Stubbing related data with random seed 28773 // (similarity score: 52.05%) +4. Test entry #18: UX Stubbing related data with random seed 14060 // (similarity score: 50.48%) +5. Test entry #2: Database Stubbing related data with random seed 28361 // (similarity score: 48.35%) +6. Test entry #263: CLI Stubbing related data with random seed 13486 // (similarity score: 46.44%) +7. Test entry #53: Blockchain Stubbing related data with random seed 10771 // (similarity score: 46.20%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 46.19%) +9. Test entry #317: Stubbing CI/CD related data with random seed 3418 // (similarity score: 43.34%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 40.32%) +⏱️ Get operation took: 953 ms +--------------------------------------------------- +[Step 270/500] Processing... +Getting data: 'Performance' +1. Test entry #452: Event-driven Performance related data with random seed 26374 // (similarity score: 39.07%) +2. Test entry #451: Library Performance related data with random seed 31802 // (similarity score: 35.13%) +3. Test entry #192: Performance Threading related data with random seed 6244 // (similarity score: 33.15%) +4. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 32.63%) +5. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 31.90%) +6. Test entry #481: Tutorial Throughput related data with random seed 11888 // (similarity score: 31.89%) +7. Test entry #273: Data-structure Performance related data with random seed 1219 // (similarity score: 31.74%) +8. Test entry #394: Sharding Performance related data with random seed 1607 // (similarity score: 31.69%) +9. Test entry #332: Performance YAML related data with random seed 23826 // (similarity score: 30.69%) +10. Test entry #291: YAML Performance related data with random seed 3598 // (similarity score: 28.81%) +⏱️ Get operation took: 1044 ms +--------------------------------------------------- +[Step 271/500] Processing... +Getting data: 'Network' +1. Test entry #283: Network Network related data with random seed 21734 // (similarity score: 44.63%) +2. Test entry #163: Network Orchestration related data with random seed 17906 // (similarity score: 36.14%) +3. Test entry #29: Mocking Network related data with random seed 20303 // (similarity score: 35.95%) +4. Test entry #190: Mocking Network related data with random seed 32119 // (similarity score: 35.84%) +5. Test entry #189: UML Network related data with random seed 6297 // (similarity score: 32.43%) +6. Test entry #90: Network GUI related data with random seed 6917 // (similarity score: 30.73%) +7. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 29.31%) +8. Test entry #49: ORM Network related data with random seed 23604 // (similarity score: 26.22%) +9. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 21.70%) +10. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 19.05%) +⏱️ Get operation took: 882 ms +--------------------------------------------------- +[Step 272/500] Processing... +Getting data: 'Node' +1. Test entry #114: Node Optimization related data with random seed 20946 // (similarity score: 44.96%) +2. Test entry #72: Node Optimization related data with random seed 10019 // (similarity score: 41.61%) +3. Test entry #246: Node Documentation related data with random seed 8868 // (similarity score: 39.92%) +4. Test entry #175: Node UX related data with random seed 24087 // (similarity score: 39.28%) +5. Test entry #341: Node Vector related data with random seed 30034 // (similarity score: 39.03%) +6. Test entry #93: Node Documentation related data with random seed 10928 // (similarity score: 38.72%) +7. Test entry #11: Code-review Node related data with random seed 32027 // (similarity score: 35.38%) +8. Test entry #398: Backup Node related data with random seed 21714 // (similarity score: 34.34%) +9. Test entry #328: Node DevOps related data with random seed 7412 // (similarity score: 30.61%) +10. Test entry #16: Node Unit-test related data with random seed 4007 // (similarity score: 29.56%) +⏱️ Get operation took: 853 ms +--------------------------------------------------- +[Step 273/500] Processing... +Getting data: 'Throughput' +1. Test entry #481: Tutorial Throughput related data with random seed 11888 // (similarity score: 48.81%) +2. Test entry #80: Fault-tolerance Throughput related data with random seed 27910 // (similarity score: 45.02%) +3. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 44.96%) +4. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 40.15%) +5. Test entry #355: Throughput Sharding related data with random seed 6245 // (similarity score: 39.86%) +6. Test entry #6: CI/CD Throughput related data with random seed 5729 // (similarity score: 38.72%) +7. Test entry #144: Throughput ORM related data with random seed 18100 // (similarity score: 35.68%) +8. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 35.66%) +9. Test entry #335: Throughput E2E-test related data with random seed 11728 // (similarity score: 34.04%) +10. Test entry #294: ORM Throughput related data with random seed 31130 // (similarity score: 33.56%) +⏱️ Get operation took: 887 ms +--------------------------------------------------- +[Step 274/500] Processing... +Getting data: 'Alerting' +1. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 51.72%) +2. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 44.93%) +3. Test entry #371: Code-review Alerting related data with random seed 6709 // (similarity score: 40.15%) +4. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 40.12%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 35.73%) +6. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 35.52%) +7. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 34.88%) +8. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 34.33%) +9. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 33.48%) +10. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 33.22%) +⏱️ Get operation took: 869 ms +--------------------------------------------------- +[Step 275/500] Processing... +Getting data: 'Documentation' +1. Test entry #468: Documentation Monitoring related data with random seed 13294 // (similarity score: 41.51%) +2. Test entry #81: Example Documentation related data with random seed 4554 // (similarity score: 34.95%) +3. Test entry #187: Documentation Cloud related data with random seed 9545 // (similarity score: 34.85%) +4. Test entry #390: Documentation Diagram related data with random seed 8258 // (similarity score: 32.91%) +5. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 30.49%) +6. Test entry #176: Microservices Documentation related data with random seed 28218 // (similarity score: 28.00%) +7. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 27.51%) +8. Test entry #93: Node Documentation related data with random seed 10928 // (similarity score: 27.08%) +9. Test entry #319: Documentation Example related data with random seed 22093 // (similarity score: 26.81%) +10. Test entry #246: Node Documentation related data with random seed 8868 // (similarity score: 26.21%) +⏱️ Get operation took: 927 ms +--------------------------------------------------- +[Step 276/500] Processing... +Getting data: 'Blockchain' +1. Test entry #117: Design-patterns Blockchain related data with random seed 16711 // (similarity score: 41.64%) +2. Test entry #231: Blockchain Data-structure related data with random seed 2611 // (similarity score: 38.85%) +3. Test entry #61: Blockchain Microservices related data with random seed 13172 // (similarity score: 38.28%) +4. Test entry #171: Blockchain Data-structure related data with random seed 29630 // (similarity score: 33.73%) +5. Test entry #237: UX Blockchain related data with random seed 8734 // (similarity score: 31.74%) +6. Test entry #360: GUI Blockchain related data with random seed 10277 // (similarity score: 31.69%) +7. Test entry #67: Debugging Blockchain related data with random seed 23179 // (similarity score: 30.30%) +8. Test entry #229: Orchestration Blockchain related data with random seed 2138 // (similarity score: 30.22%) +9. Test entry #348: Blockchain SDK related data with random seed 25254 // (similarity score: 29.17%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 27.19%) +⏱️ Get operation took: 1052 ms +--------------------------------------------------- +[Step 277/500] Processing... +Getting data: 'Node' +1. Test entry #114: Node Optimization related data with random seed 20946 // (similarity score: 44.96%) +2. Test entry #72: Node Optimization related data with random seed 10019 // (similarity score: 41.61%) +3. Test entry #246: Node Documentation related data with random seed 8868 // (similarity score: 39.92%) +4. Test entry #175: Node UX related data with random seed 24087 // (similarity score: 39.28%) +5. Test entry #341: Node Vector related data with random seed 30034 // (similarity score: 39.03%) +6. Test entry #93: Node Documentation related data with random seed 10928 // (similarity score: 38.72%) +7. Test entry #11: Code-review Node related data with random seed 32027 // (similarity score: 35.38%) +8. Test entry #398: Backup Node related data with random seed 21714 // (similarity score: 34.34%) +9. Test entry #328: Node DevOps related data with random seed 7412 // (similarity score: 30.61%) +10. Test entry #16: Node Unit-test related data with random seed 4007 // (similarity score: 29.56%) +⏱️ Get operation took: 940 ms +--------------------------------------------------- +[Step 278/500] Processing... +Getting data: 'Template' +1. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 46.79%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 41.61%) +3. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 40.40%) +4. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 38.73%) +5. Test entry #83: Template Sharding related data with random seed 32241 // (similarity score: 34.86%) +6. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 33.67%) +7. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 33.18%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 30.02%) +9. Test entry #322: ORM Template related data with random seed 28110 // (similarity score: 29.98%) +10. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 27.79%) +⏱️ Get operation took: 910 ms +--------------------------------------------------- +[Step 279/500] Processing... +Getting data: 'Pub-sub' +1. Test entry #418: Pub-sub API related data with random seed 23035 // (similarity score: 38.66%) +2. Test entry #381: Python Pub-sub related data with random seed 29514 // (similarity score: 38.60%) +3. Test entry #28: Pull-request Pub-sub related data with random seed 15070 // (similarity score: 38.34%) +4. Test entry #362: Pub-sub SQL related data with random seed 11044 // (similarity score: 36.25%) +5. Test entry #461: Vector Pub-sub related data with random seed 31094 // (similarity score: 34.99%) +6. Test entry #259: Pub-sub DevOps related data with random seed 23944 // (similarity score: 34.61%) +7. Test entry #318: Vector Pub-sub related data with random seed 10169 // (similarity score: 33.83%) +8. Test entry #485: Integration-test Pub-sub related data with random seed 29641 // (similarity score: 31.78%) +9. Test entry #36: Threading Sharding related data with random seed 23643 // (similarity score: 14.79%) +10. Test entry #355: Throughput Sharding related data with random seed 6245 // (similarity score: 14.25%) +⏱️ Get operation took: 825 ms +--------------------------------------------------- +[Step 280/500] Processing... +Getting data: 'Data-structure' +1. Test entry #273: Data-structure Performance related data with random seed 1219 // (similarity score: 50.19%) +2. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 49.27%) +3. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 49.07%) +4. Test entry #248: Algorithm Data-structure related data with random seed 18077 // (similarity score: 47.47%) +5. Test entry #313: Data-structure Monitoring related data with random seed 10551 // (similarity score: 47.31%) +6. Test entry #118: CSV Data-structure related data with random seed 20709 // (similarity score: 45.72%) +7. Test entry #289: RESTful Data-structure related data with random seed 1227 // (similarity score: 42.01%) +8. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 40.81%) +9. Test entry #425: CLI Data-structure related data with random seed 7062 // (similarity score: 39.72%) +10. Test entry #171: Blockchain Data-structure related data with random seed 29630 // (similarity score: 38.81%) +⏱️ Get operation took: 895 ms +--------------------------------------------------- +[Step 281/500] Processing... +Getting data: 'Boilerplate' +1. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 41.22%) +2. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 36.21%) +3. Test entry #280: Boilerplate Search related data with random seed 32715 // (similarity score: 30.77%) +4. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 30.65%) +5. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 29.65%) +6. Test entry #12: Boilerplate YAML related data with random seed 29263 // (similarity score: 29.38%) +7. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 29.35%) +8. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 28.89%) +9. Test entry #443: Threading Boilerplate related data with random seed 23641 // (similarity score: 28.42%) +10. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 27.41%) +⏱️ Get operation took: 995 ms +--------------------------------------------------- +[Step 282/500] Processing... +Getting data: 'Recovery' +1. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 39.10%) +2. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 37.60%) +3. Test entry #306: Recovery Library related data with random seed 21298 // (similarity score: 36.53%) +4. Test entry #430: Recovery Diagram related data with random seed 24192 // (similarity score: 36.20%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 35.07%) +6. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 33.87%) +7. Test entry #367: SDK Recovery related data with random seed 20345 // (similarity score: 33.28%) +8. Test entry #88: Recovery JSON related data with random seed 28249 // (similarity score: 32.39%) +9. Test entry #321: Recovery Git related data with random seed 3971 // (similarity score: 32.26%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 30.40%) +⏱️ Get operation took: 893 ms +--------------------------------------------------- +[Step 283/500] Processing... +Getting data: 'Vector' +1. Test entry #325: Vector UX related data with random seed 2934 // (similarity score: 40.50%) +2. Test entry #444: Vector Debugging related data with random seed 5646 // (similarity score: 36.83%) +3. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 36.57%) +4. Test entry #341: Node Vector related data with random seed 30034 // (similarity score: 36.54%) +5. Test entry #334: Sharding Vector related data with random seed 29020 // (similarity score: 33.49%) +6. Test entry #499: Vector CLI related data with random seed 18419 // (similarity score: 33.05%) +7. Test entry #433: Vector E2E-test related data with random seed 22356 // (similarity score: 31.29%) +8. Test entry #461: Vector Pub-sub related data with random seed 31094 // (similarity score: 31.17%) +9. Test entry #100: Latency Vector related data with random seed 28112 // (similarity score: 30.14%) +10. Test entry #385: ORM Vector related data with random seed 277 // (similarity score: 29.36%) +⏱️ Get operation took: 847 ms +--------------------------------------------------- +[Step 284/500] Processing... +Getting data: 'Unit-test' +1. Test entry #232: Tutorial Unit-test related data with random seed 11672 // (similarity score: 51.61%) +2. Test entry #448: Tutorial Unit-test related data with random seed 7829 // (similarity score: 51.60%) +3. Test entry #186: Tutorial Unit-test related data with random seed 2934 // (similarity score: 48.49%) +4. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 47.92%) +5. Test entry #233: Unit-test Container related data with random seed 19623 // (similarity score: 46.29%) +6. Test entry #442: Unit-test Framework related data with random seed 4836 // (similarity score: 45.43%) +7. Test entry #477: Unit-test CLI related data with random seed 17091 // (similarity score: 45.02%) +8. Test entry #120: Algorithm Unit-test related data with random seed 14608 // (similarity score: 44.87%) +9. Test entry #422: Unit-test Example related data with random seed 16994 // (similarity score: 44.07%) +10. Test entry #16: Node Unit-test related data with random seed 4007 // (similarity score: 41.87%) +⏱️ Get operation took: 847 ms +--------------------------------------------------- +[Step 285/500] Processing... +Getting data: 'UX' +1. Test entry #463: Microservices UX related data with random seed 32421 // (similarity score: 28.20%) +2. Test entry #177: UX Monitoring related data with random seed 16167 // (similarity score: 26.37%) +3. Test entry #133: Microservices UX related data with random seed 13570 // (similarity score: 25.58%) +4. Test entry #140: UX Architecture related data with random seed 15002 // (similarity score: 25.04%) +5. Test entry #66: UX UML related data with random seed 3512 // (similarity score: 22.74%) +6. Test entry #402: UX Load-balancing related data with random seed 5781 // (similarity score: 21.76%) +7. Test entry #115: UX Design-patterns related data with random seed 14554 // (similarity score: 21.69%) +8. Test entry #175: Node UX related data with random seed 24087 // (similarity score: 20.22%) +9. Test entry #75: UX Search related data with random seed 11487 // (similarity score: 20.03%) +10. Test entry #237: UX Blockchain related data with random seed 8734 // (similarity score: 19.35%) +⏱️ Get operation took: 900 ms +--------------------------------------------------- +[Step 286/500] Processing... +Getting data: 'Consensus' +1. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 37.37%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 34.38%) +3. Test entry #194: Python Consensus related data with random seed 2579 // (similarity score: 32.73%) +4. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 32.01%) +5. Test entry #460: Consensus YAML related data with random seed 16140 // (similarity score: 29.09%) +6. Test entry #470: Consensus Replication related data with random seed 9153 // (similarity score: 27.78%) +7. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 27.72%) +8. Test entry #445: Message-queue Consensus related data with random seed 22969 // (similarity score: 25.59%) +9. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 22.21%) +10. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 16.66%) +⏱️ Get operation took: 892 ms +--------------------------------------------------- +[Step 287/500] Processing... +Getting data: 'Performance' +1. Test entry #452: Event-driven Performance related data with random seed 26374 // (similarity score: 39.07%) +2. Test entry #451: Library Performance related data with random seed 31802 // (similarity score: 35.13%) +3. Test entry #192: Performance Threading related data with random seed 6244 // (similarity score: 33.15%) +4. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 32.63%) +5. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 31.90%) +6. Test entry #481: Tutorial Throughput related data with random seed 11888 // (similarity score: 31.89%) +7. Test entry #273: Data-structure Performance related data with random seed 1219 // (similarity score: 31.74%) +8. Test entry #394: Sharding Performance related data with random seed 1607 // (similarity score: 31.69%) +9. Test entry #332: Performance YAML related data with random seed 23826 // (similarity score: 30.69%) +10. Test entry #291: YAML Performance related data with random seed 3598 // (similarity score: 28.81%) +⏱️ Get operation took: 918 ms +--------------------------------------------------- +[Step 288/500] Processing... +Getting data: 'RESTful' +1. Test entry #482: Code-review RESTful related data with random seed 18863 // (similarity score: 41.46%) +2. Test entry #289: RESTful Data-structure related data with random seed 1227 // (similarity score: 41.33%) +3. Test entry #446: Container RESTful related data with random seed 10831 // (similarity score: 40.34%) +4. Test entry #215: API RESTful related data with random seed 29014 // (similarity score: 40.32%) +5. Test entry #310: RESTful UML related data with random seed 5709 // (similarity score: 40.14%) +6. Test entry #314: Threading RESTful related data with random seed 12875 // (similarity score: 37.42%) +7. Test entry #296: ERD RESTful related data with random seed 19760 // (similarity score: 36.03%) +8. Test entry #200: NoSQL RESTful related data with random seed 23279 // (similarity score: 31.68%) +9. Test entry #423: Diagram RESTful related data with random seed 14813 // (similarity score: 29.93%) +10. Test entry #345: CI/CD RESTful related data with random seed 23698 // (similarity score: 29.68%) +⏱️ Get operation took: 968 ms +--------------------------------------------------- +[Step 289/500] Processing... +Getting data: 'ORM' +1. Test entry #235: ORM Boilerplate related data with random seed 17103 // (similarity score: 52.37%) +2. Test entry #322: ORM Template related data with random seed 28110 // (similarity score: 49.94%) +3. Test entry #428: Fault-tolerance ORM related data with random seed 17392 // (similarity score: 47.01%) +4. Test entry #294: ORM Throughput related data with random seed 31130 // (similarity score: 46.99%) +5. Test entry #202: Search ORM related data with random seed 9657 // (similarity score: 46.57%) +6. Test entry #144: Throughput ORM related data with random seed 18100 // (similarity score: 45.56%) +7. Test entry #22: ORM UX related data with random seed 24344 // (similarity score: 45.31%) +8. Test entry #91: Fault-tolerance ORM related data with random seed 3476 // (similarity score: 44.50%) +9. Test entry #197: Fault-tolerance ORM related data with random seed 21855 // (similarity score: 44.44%) +10. Test entry #49: ORM Network related data with random seed 23604 // (similarity score: 44.40%) +⏱️ Get operation took: 954 ms +--------------------------------------------------- +[Step 290/500] Processing... +Getting data: 'ERD' +1. Test entry #102: ERD ERD related data with random seed 20954 // (similarity score: 51.00%) +2. Test entry #138: Boilerplate ERD related data with random seed 17973 // (similarity score: 46.57%) +3. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 44.39%) +4. Test entry #379: Code-review ERD related data with random seed 18047 // (similarity score: 43.54%) +5. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 42.68%) +6. Test entry #135: UI ERD related data with random seed 9172 // (similarity score: 41.81%) +7. Test entry #378: ERD Parallelism related data with random seed 25512 // (similarity score: 39.79%) +8. Test entry #252: ERD Parallelism related data with random seed 23325 // (similarity score: 39.25%) +9. Test entry #296: ERD RESTful related data with random seed 19760 // (similarity score: 36.51%) +10. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 23.26%) +⏱️ Get operation took: 903 ms +--------------------------------------------------- +[Step 291/500] Processing... +Getting data: 'JSON' +1. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 51.74%) +2. Test entry #156: JSON Optimization related data with random seed 28003 // (similarity score: 49.61%) +3. Test entry #413: JSON Alerting related data with random seed 8852 // (similarity score: 47.02%) +4. Test entry #113: JSON API related data with random seed 11466 // (similarity score: 46.74%) +5. Test entry #85: JSON Vector related data with random seed 23519 // (similarity score: 46.52%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 43.98%) +7. Test entry #218: JSON Caching related data with random seed 29229 // (similarity score: 43.91%) +8. Test entry #143: JSON XML related data with random seed 20028 // (similarity score: 42.01%) +9. Test entry #57: YAML JSON related data with random seed 19740 // (similarity score: 40.00%) +10. Test entry #420: JSON Microservices related data with random seed 9271 // (similarity score: 39.90%) +⏱️ Get operation took: 931 ms +--------------------------------------------------- +[Step 292/500] Processing... +Getting data: 'Stubbing' +1. Test entry #368: Stubbing Resilient related data with random seed 618 // (similarity score: 56.85%) +2. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 52.11%) +3. Test entry #406: Debugging Stubbing related data with random seed 28773 // (similarity score: 52.05%) +4. Test entry #18: UX Stubbing related data with random seed 14060 // (similarity score: 50.48%) +5. Test entry #2: Database Stubbing related data with random seed 28361 // (similarity score: 48.35%) +6. Test entry #263: CLI Stubbing related data with random seed 13486 // (similarity score: 46.44%) +7. Test entry #53: Blockchain Stubbing related data with random seed 10771 // (similarity score: 46.20%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 46.19%) +9. Test entry #317: Stubbing CI/CD related data with random seed 3418 // (similarity score: 43.34%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 40.32%) +⏱️ Get operation took: 900 ms +--------------------------------------------------- +[Step 293/500] Processing... +Getting data: 'GraphQL' +1. Test entry #308: GraphQL GraphQL related data with random seed 25902 // (similarity score: 67.68%) +2. Test entry #125: GraphQL Tutorial related data with random seed 2640 // (similarity score: 66.75%) +3. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 64.60%) +4. Test entry #343: GraphQL UX related data with random seed 26726 // (similarity score: 62.29%) +5. Test entry #48: GraphQL Asynchronous related data with random seed 4509 // (similarity score: 60.43%) +6. Test entry #35: GraphQL YAML related data with random seed 19655 // (similarity score: 59.13%) +7. Test entry #213: GUI GraphQL related data with random seed 9145 // (similarity score: 58.32%) +8. Test entry #256: Profiling GraphQL related data with random seed 4781 // (similarity score: 58.12%) +9. Test entry #10: Framework GraphQL related data with random seed 10276 // (similarity score: 57.78%) +10. Test entry #25: GraphQL CI/CD related data with random seed 7557 // (similarity score: 56.63%) +⏱️ Get operation took: 945 ms +--------------------------------------------------- +[Step 294/500] Processing... +Getting data: 'CLI' +1. Test entry #284: UX CLI related data with random seed 10670 // (similarity score: 47.12%) +2. Test entry #492: Testing CLI related data with random seed 20014 // (similarity score: 46.52%) +3. Test entry #462: Best-practices CLI related data with random seed 19922 // (similarity score: 45.07%) +4. Test entry #242: Load-balancing CLI related data with random seed 21278 // (similarity score: 44.28%) +5. Test entry #425: CLI Data-structure related data with random seed 7062 // (similarity score: 42.94%) +6. Test entry #499: Vector CLI related data with random seed 18419 // (similarity score: 41.47%) +7. Test entry #103: CLI Sharding related data with random seed 23375 // (similarity score: 40.66%) +8. Test entry #477: Unit-test CLI related data with random seed 17091 // (similarity score: 39.09%) +9. Test entry #263: CLI Stubbing related data with random seed 13486 // (similarity score: 37.57%) +10. Test entry #299: CLI Git related data with random seed 18823 // (similarity score: 34.54%) +⏱️ Get operation took: 952 ms +--------------------------------------------------- +[Step 295/500] Processing... +Getting data: 'Sharding' +1. Test entry #394: Sharding Performance related data with random seed 1607 // (similarity score: 54.43%) +2. Test entry #355: Throughput Sharding related data with random seed 6245 // (similarity score: 53.68%) +3. Test entry #130: Resilient Sharding related data with random seed 12002 // (similarity score: 53.30%) +4. Test entry #334: Sharding Vector related data with random seed 29020 // (similarity score: 51.99%) +5. Test entry #155: Sharding Version-control related data with random seed 4158 // (similarity score: 51.36%) +6. Test entry #36: Threading Sharding related data with random seed 23643 // (similarity score: 50.25%) +7. Test entry #83: Template Sharding related data with random seed 32241 // (similarity score: 49.35%) +8. Test entry #103: CLI Sharding related data with random seed 23375 // (similarity score: 47.66%) +9. Test entry #39: UI Sharding related data with random seed 26238 // (similarity score: 45.29%) +10. Test entry #112: Caching Sharding related data with random seed 9211 // (similarity score: 44.08%) +⏱️ Get operation took: 913 ms +--------------------------------------------------- +[Step 296/500] Processing... +Getting data: 'UI' +1. Test entry #108: Code-review UI related data with random seed 5301 // (similarity score: 33.71%) +2. Test entry #249: SDK UI related data with random seed 20519 // (similarity score: 32.43%) +3. Test entry #210: Asynchronous UI related data with random seed 16307 // (similarity score: 31.84%) +4. Test entry #141: Sample UI related data with random seed 12909 // (similarity score: 28.72%) +5. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 28.18%) +6. Test entry #449: UI Load-balancing related data with random seed 29998 // (similarity score: 27.95%) +7. Test entry #39: UI Sharding related data with random seed 26238 // (similarity score: 26.65%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 25.47%) +9. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 25.01%) +10. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 24.21%) +⏱️ Get operation took: 926 ms +--------------------------------------------------- +[Step 297/500] Processing... +Getting data: 'Consensus' +1. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 37.37%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 34.38%) +3. Test entry #194: Python Consensus related data with random seed 2579 // (similarity score: 32.73%) +4. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 32.01%) +5. Test entry #460: Consensus YAML related data with random seed 16140 // (similarity score: 29.09%) +6. Test entry #470: Consensus Replication related data with random seed 9153 // (similarity score: 27.78%) +7. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 27.72%) +8. Test entry #445: Message-queue Consensus related data with random seed 22969 // (similarity score: 25.59%) +9. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 22.21%) +10. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 16.66%) +⏱️ Get operation took: 955 ms +--------------------------------------------------- +[Step 298/500] Processing... +Getting data: 'Template' +1. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 46.79%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 41.61%) +3. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 40.40%) +4. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 38.73%) +5. Test entry #83: Template Sharding related data with random seed 32241 // (similarity score: 34.86%) +6. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 33.67%) +7. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 33.18%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 30.02%) +9. Test entry #322: ORM Template related data with random seed 28110 // (similarity score: 29.98%) +10. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 27.79%) +⏱️ Get operation took: 940 ms +--------------------------------------------------- +[Step 299/500] Processing... +Getting data: 'Websocket' +1. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 61.95%) +2. Test entry #92: Websocket Integration-test related data with random seed 32561 // (similarity score: 60.89%) +3. Test entry #13: Websocket Caching related data with random seed 27250 // (similarity score: 58.67%) +4. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 58.47%) +5. Test entry #122: Cloud Websocket related data with random seed 14024 // (similarity score: 57.43%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 15.29%) +7. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 14.60%) +8. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 14.35%) +9. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 14.31%) +10. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 13.59%) +⏱️ Get operation took: 948 ms +--------------------------------------------------- +[Step 300/500] Processing... +Getting data: 'CI/CD' +1. Test entry #324: Boilerplate CI/CD related data with random seed 11105 // (similarity score: 54.77%) +2. Test entry #479: CI/CD Data-structure related data with random seed 13652 // (similarity score: 54.51%) +3. Test entry #6: CI/CD Throughput related data with random seed 5729 // (similarity score: 53.92%) +4. Test entry #472: Library CI/CD related data with random seed 18363 // (similarity score: 53.79%) +5. Test entry #450: CI/CD Data-structure related data with random seed 9170 // (similarity score: 52.85%) +6. Test entry #317: Stubbing CI/CD related data with random seed 3418 // (similarity score: 52.74%) +7. Test entry #234: Security CI/CD related data with random seed 25834 // (similarity score: 52.50%) +8. Test entry #326: CI/CD Sample related data with random seed 22568 // (similarity score: 52.47%) +9. Test entry #408: CI/CD Search related data with random seed 26877 // (similarity score: 51.42%) +10. Test entry #489: Load-balancing CI/CD related data with random seed 9733 // (similarity score: 49.63%) +⏱️ Get operation took: 992 ms + +📊 [BATCH REPORT] Items 251 to 300 + -> Average Latency: 923 ms + +--------------------------------------------------- +[Step 301/500] Processing... +Getting data: 'Design-patterns' +1. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 63.80%) +2. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 62.09%) +3. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 61.76%) +4. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 57.41%) +5. Test entry #115: UX Design-patterns related data with random seed 14554 // (similarity score: 56.30%) +6. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 50.76%) +7. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 49.55%) +8. Test entry #498: Design-patterns Python related data with random seed 16866 // (similarity score: 49.27%) +9. Test entry #212: Design-patterns DevOps related data with random seed 19247 // (similarity score: 46.55%) +10. Test entry #117: Design-patterns Blockchain related data with random seed 16711 // (similarity score: 45.78%) +⏱️ Get operation took: 929 ms +--------------------------------------------------- +[Step 302/500] Processing... +Getting data: 'Index' +1. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 46.82%) +2. Test entry #473: Index Python related data with random seed 3534 // (similarity score: 44.57%) +3. Test entry #86: Index Container related data with random seed 813 // (similarity score: 41.90%) +4. Test entry #58: Cloud Index related data with random seed 16697 // (similarity score: 41.66%) +5. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 41.23%) +6. Test entry #44: Cloud Index related data with random seed 12754 // (similarity score: 38.93%) +7. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 38.05%) +8. Test entry #31: ORM Index related data with random seed 2163 // (similarity score: 38.00%) +9. Test entry #288: Index Pull-request related data with random seed 4855 // (similarity score: 37.81%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 37.00%) +⏱️ Get operation took: 939 ms +--------------------------------------------------- +[Step 303/500] Processing... +Getting data: 'Backup' +1. Test entry #434: Tutorial Backup related data with random seed 6053 // (similarity score: 46.60%) +2. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 44.75%) +3. Test entry #398: Backup Node related data with random seed 21714 // (similarity score: 39.31%) +4. Test entry #410: Backup Parallelism related data with random seed 5421 // (similarity score: 38.11%) +5. Test entry #15: Backup Orchestration related data with random seed 32162 // (similarity score: 37.94%) +6. Test entry #145: CI/CD Backup related data with random seed 22567 // (similarity score: 35.01%) +7. Test entry #104: Backup E2E-test related data with random seed 3184 // (similarity score: 29.08%) +8. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 25.60%) +9. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 24.70%) +10. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 19.74%) +⏱️ Get operation took: 956 ms +--------------------------------------------------- +[Step 304/500] Processing... +Getting data: 'Merging' +1. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 47.50%) +2. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 46.78%) +3. Test entry #417: Event-driven Merging related data with random seed 13881 // (similarity score: 45.04%) +4. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 44.14%) +5. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 38.53%) +6. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 38.19%) +7. Test entry #150: Diagram Integration-test related data with random seed 19409 // (similarity score: 21.00%) +8. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 19.25%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 18.92%) +10. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 16.25%) +⏱️ Get operation took: 931 ms +--------------------------------------------------- +[Step 305/500] Processing... +Getting data: 'Pub-sub' +1. Test entry #418: Pub-sub API related data with random seed 23035 // (similarity score: 38.66%) +2. Test entry #381: Python Pub-sub related data with random seed 29514 // (similarity score: 38.60%) +3. Test entry #28: Pull-request Pub-sub related data with random seed 15070 // (similarity score: 38.34%) +4. Test entry #362: Pub-sub SQL related data with random seed 11044 // (similarity score: 36.25%) +5. Test entry #461: Vector Pub-sub related data with random seed 31094 // (similarity score: 34.99%) +6. Test entry #259: Pub-sub DevOps related data with random seed 23944 // (similarity score: 34.61%) +7. Test entry #318: Vector Pub-sub related data with random seed 10169 // (similarity score: 33.83%) +8. Test entry #485: Integration-test Pub-sub related data with random seed 29641 // (similarity score: 31.78%) +9. Test entry #36: Threading Sharding related data with random seed 23643 // (similarity score: 14.79%) +10. Test entry #355: Throughput Sharding related data with random seed 6245 // (similarity score: 14.25%) +⏱️ Get operation took: 901 ms +--------------------------------------------------- +[Step 306/500] Processing... +Getting data: 'GUI' +1. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 43.56%) +2. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 39.19%) +3. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 38.99%) +4. Test entry #268: GUI Security related data with random seed 8853 // (similarity score: 38.70%) +5. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 37.90%) +6. Test entry #90: Network GUI related data with random seed 6917 // (similarity score: 37.45%) +7. Test entry #400: GUI Container related data with random seed 26968 // (similarity score: 36.86%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 36.84%) +9. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 35.99%) +10. Test entry #87: GUI Encryption related data with random seed 24527 // (similarity score: 35.25%) +⏱️ Get operation took: 895 ms +--------------------------------------------------- +[Step 307/500] Processing... +Getting data: 'NoSQL' +1. Test entry #373: NoSQL Best-practices related data with random seed 20318 // (similarity score: 55.91%) +2. Test entry #106: Profiling NoSQL related data with random seed 23156 // (similarity score: 52.58%) +3. Test entry #37: Resilient NoSQL related data with random seed 4721 // (similarity score: 50.97%) +4. Test entry #116: NoSQL Cloud related data with random seed 22178 // (similarity score: 49.77%) +5. Test entry #327: NoSQL Architecture related data with random seed 14507 // (similarity score: 49.58%) +6. Test entry #69: Asynchronous NoSQL related data with random seed 15850 // (similarity score: 48.77%) +7. Test entry #64: Diagram NoSQL related data with random seed 3428 // (similarity score: 48.28%) +8. Test entry #254: NoSQL Architecture related data with random seed 8978 // (similarity score: 45.02%) +9. Test entry #388: NoSQL SDK related data with random seed 18178 // (similarity score: 44.14%) +10. Test entry #200: NoSQL RESTful related data with random seed 23279 // (similarity score: 44.04%) +⏱️ Get operation took: 907 ms +--------------------------------------------------- +[Step 308/500] Processing... +Getting data: 'Python' +1. Test entry #181: UX Python related data with random seed 26197 // (similarity score: 37.24%) +2. Test entry #383: Python Resilient related data with random seed 20296 // (similarity score: 35.13%) +3. Test entry #473: Index Python related data with random seed 3534 // (similarity score: 33.84%) +4. Test entry #165: Resilient Python related data with random seed 3195 // (similarity score: 33.43%) +5. Test entry #498: Design-patterns Python related data with random seed 16866 // (similarity score: 33.09%) +6. Test entry #70: Python Testing related data with random seed 5317 // (similarity score: 31.71%) +7. Test entry #194: Python Consensus related data with random seed 2579 // (similarity score: 29.30%) +8. Test entry #286: Python SDK related data with random seed 14667 // (similarity score: 29.01%) +9. Test entry #381: Python Pub-sub related data with random seed 29514 // (similarity score: 24.48%) +10. Test entry #392: Algorithm CSV related data with random seed 18964 // (similarity score: 24.38%) +⏱️ Get operation took: 936 ms +--------------------------------------------------- +[Step 309/500] Processing... +Getting data: 'Algorithm' +1. Test entry #248: Algorithm Data-structure related data with random seed 18077 // (similarity score: 46.27%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 38.05%) +3. Test entry #392: Algorithm CSV related data with random seed 18964 // (similarity score: 36.06%) +4. Test entry #79: Algorithm Asynchronous related data with random seed 18478 // (similarity score: 34.57%) +5. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 33.78%) +6. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 33.09%) +7. Test entry #240: Algorithm Asynchronous related data with random seed 6330 // (similarity score: 31.98%) +8. Test entry #227: Framework Algorithm related data with random seed 19206 // (similarity score: 31.96%) +9. Test entry #337: Algorithm Orchestration related data with random seed 15318 // (similarity score: 31.60%) +10. Test entry #120: Algorithm Unit-test related data with random seed 14608 // (similarity score: 30.48%) +⏱️ Get operation took: 896 ms +--------------------------------------------------- +[Step 310/500] Processing... +Getting data: 'SQL' +1. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 41.94%) +2. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 41.80%) +3. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 37.96%) +4. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 37.76%) +5. Test entry #152: SQL Load-balancing related data with random seed 9262 // (similarity score: 37.07%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 36.37%) +7. Test entry #137: SQL Parallelism related data with random seed 8672 // (similarity score: 35.23%) +8. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 32.86%) +9. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 30.91%) +10. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 30.51%) +⏱️ Get operation took: 987 ms +--------------------------------------------------- +[Step 311/500] Processing... +Getting data: 'ERD' +1. Test entry #102: ERD ERD related data with random seed 20954 // (similarity score: 51.00%) +2. Test entry #138: Boilerplate ERD related data with random seed 17973 // (similarity score: 46.57%) +3. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 44.39%) +4. Test entry #379: Code-review ERD related data with random seed 18047 // (similarity score: 43.54%) +5. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 42.68%) +6. Test entry #135: UI ERD related data with random seed 9172 // (similarity score: 41.81%) +7. Test entry #378: ERD Parallelism related data with random seed 25512 // (similarity score: 39.79%) +8. Test entry #252: ERD Parallelism related data with random seed 23325 // (similarity score: 39.25%) +9. Test entry #296: ERD RESTful related data with random seed 19760 // (similarity score: 36.51%) +10. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 23.26%) +⏱️ Get operation took: 898 ms +--------------------------------------------------- +[Step 312/500] Processing... +Getting data: 'Node' +1. Test entry #114: Node Optimization related data with random seed 20946 // (similarity score: 44.96%) +2. Test entry #72: Node Optimization related data with random seed 10019 // (similarity score: 41.61%) +3. Test entry #246: Node Documentation related data with random seed 8868 // (similarity score: 39.92%) +4. Test entry #175: Node UX related data with random seed 24087 // (similarity score: 39.28%) +5. Test entry #341: Node Vector related data with random seed 30034 // (similarity score: 39.03%) +6. Test entry #93: Node Documentation related data with random seed 10928 // (similarity score: 38.72%) +7. Test entry #11: Code-review Node related data with random seed 32027 // (similarity score: 35.38%) +8. Test entry #398: Backup Node related data with random seed 21714 // (similarity score: 34.34%) +9. Test entry #328: Node DevOps related data with random seed 7412 // (similarity score: 30.61%) +10. Test entry #16: Node Unit-test related data with random seed 4007 // (similarity score: 29.56%) +⏱️ Get operation took: 843 ms +--------------------------------------------------- +[Step 313/500] Processing... +Getting data: 'Branching' +1. Test entry #436: Example Branching related data with random seed 25763 // (similarity score: 54.14%) +2. Test entry #228: Branching Framework related data with random seed 10213 // (similarity score: 51.21%) +3. Test entry #384: Branching Sample related data with random seed 29822 // (similarity score: 50.90%) +4. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 47.89%) +5. Test entry #262: Container Branching related data with random seed 22849 // (similarity score: 47.76%) +6. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 40.80%) +7. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 40.19%) +8. Test entry #23: Branching Microservices related data with random seed 24428 // (similarity score: 38.88%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 29.49%) +10. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 26.91%) +⏱️ Get operation took: 899 ms +--------------------------------------------------- +[Step 314/500] Processing... +Getting data: 'CLI' +1. Test entry #284: UX CLI related data with random seed 10670 // (similarity score: 47.12%) +2. Test entry #492: Testing CLI related data with random seed 20014 // (similarity score: 46.52%) +3. Test entry #462: Best-practices CLI related data with random seed 19922 // (similarity score: 45.07%) +4. Test entry #242: Load-balancing CLI related data with random seed 21278 // (similarity score: 44.28%) +5. Test entry #425: CLI Data-structure related data with random seed 7062 // (similarity score: 42.94%) +6. Test entry #499: Vector CLI related data with random seed 18419 // (similarity score: 41.47%) +7. Test entry #103: CLI Sharding related data with random seed 23375 // (similarity score: 40.66%) +8. Test entry #477: Unit-test CLI related data with random seed 17091 // (similarity score: 39.09%) +9. Test entry #263: CLI Stubbing related data with random seed 13486 // (similarity score: 37.57%) +10. Test entry #299: CLI Git related data with random seed 18823 // (similarity score: 34.54%) +⏱️ Get operation took: 887 ms +--------------------------------------------------- +[Step 315/500] Processing... +Getting data: 'UI' +1. Test entry #108: Code-review UI related data with random seed 5301 // (similarity score: 33.71%) +2. Test entry #249: SDK UI related data with random seed 20519 // (similarity score: 32.43%) +3. Test entry #210: Asynchronous UI related data with random seed 16307 // (similarity score: 31.84%) +4. Test entry #141: Sample UI related data with random seed 12909 // (similarity score: 28.72%) +5. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 28.18%) +6. Test entry #449: UI Load-balancing related data with random seed 29998 // (similarity score: 27.95%) +7. Test entry #39: UI Sharding related data with random seed 26238 // (similarity score: 26.65%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 25.47%) +9. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 25.01%) +10. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 24.21%) +⏱️ Get operation took: 907 ms +--------------------------------------------------- +[Step 316/500] Processing... +Getting data: 'GraphQL' +1. Test entry #308: GraphQL GraphQL related data with random seed 25902 // (similarity score: 67.68%) +2. Test entry #125: GraphQL Tutorial related data with random seed 2640 // (similarity score: 66.75%) +3. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 64.60%) +4. Test entry #343: GraphQL UX related data with random seed 26726 // (similarity score: 62.29%) +5. Test entry #48: GraphQL Asynchronous related data with random seed 4509 // (similarity score: 60.43%) +6. Test entry #35: GraphQL YAML related data with random seed 19655 // (similarity score: 59.13%) +7. Test entry #213: GUI GraphQL related data with random seed 9145 // (similarity score: 58.32%) +8. Test entry #256: Profiling GraphQL related data with random seed 4781 // (similarity score: 58.12%) +9. Test entry #10: Framework GraphQL related data with random seed 10276 // (similarity score: 57.78%) +10. Test entry #25: GraphQL CI/CD related data with random seed 7557 // (similarity score: 56.63%) +⏱️ Get operation took: 989 ms +--------------------------------------------------- +[Step 317/500] Processing... +Getting data: 'SQL' +1. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 41.94%) +2. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 41.80%) +3. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 37.96%) +4. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 37.76%) +5. Test entry #152: SQL Load-balancing related data with random seed 9262 // (similarity score: 37.07%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 36.37%) +7. Test entry #137: SQL Parallelism related data with random seed 8672 // (similarity score: 35.23%) +8. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 32.86%) +9. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 30.91%) +10. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 30.51%) +⏱️ Get operation took: 915 ms +--------------------------------------------------- +[Step 318/500] Processing... +Getting data: 'Threading' +1. Test entry #164: Threading Parallelism related data with random seed 13640 // (similarity score: 52.54%) +2. Test entry #148: Threading Best-practices related data with random seed 22158 // (similarity score: 50.45%) +3. Test entry #415: Threading Algorithm related data with random seed 8994 // (similarity score: 48.42%) +4. Test entry #191: Optimization Threading related data with random seed 15850 // (similarity score: 48.17%) +5. Test entry #494: Monitoring Threading related data with random seed 28404 // (similarity score: 45.10%) +6. Test entry #36: Threading Sharding related data with random seed 23643 // (similarity score: 44.95%) +7. Test entry #490: Threading Resilient related data with random seed 825 // (similarity score: 44.64%) +8. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 44.04%) +9. Test entry #192: Performance Threading related data with random seed 6244 // (similarity score: 43.42%) +10. Test entry #230: Threading API related data with random seed 20630 // (similarity score: 42.47%) +⏱️ Get operation took: 930 ms +--------------------------------------------------- +[Step 319/500] Processing... +Getting data: 'Flowchart' +1. Test entry #9: Diagram Flowchart related data with random seed 14766 // (similarity score: 63.49%) +2. Test entry #495: Best-practices Flowchart related data with random seed 507 // (similarity score: 59.08%) +3. Test entry #121: Flowchart SDK related data with random seed 24836 // (similarity score: 56.62%) +4. Test entry #226: Flowchart Testing related data with random seed 32215 // (similarity score: 56.46%) +5. Test entry #342: Python Flowchart related data with random seed 32467 // (similarity score: 56.34%) +6. Test entry #209: Index Flowchart related data with random seed 16113 // (similarity score: 54.06%) +7. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 49.88%) +8. Test entry #41: Index Flowchart related data with random seed 29094 // (similarity score: 49.84%) +9. Test entry #213: GUI GraphQL related data with random seed 9145 // (similarity score: 30.82%) +10. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 24.78%) +⏱️ Get operation took: 888 ms +--------------------------------------------------- +[Step 320/500] Processing... +Getting data: 'Mocking' +1. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 53.76%) +2. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 52.01%) +3. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 50.43%) +4. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 49.94%) +5. Test entry #7: Mocking Parallelism related data with random seed 19010 // (similarity score: 48.48%) +6. Test entry #54: Concurrency Mocking related data with random seed 20846 // (similarity score: 45.54%) +7. Test entry #330: Boilerplate Mocking related data with random seed 5771 // (similarity score: 45.53%) +8. Test entry #190: Mocking Network related data with random seed 32119 // (similarity score: 44.81%) +9. Test entry #29: Mocking Network related data with random seed 20303 // (similarity score: 44.44%) +10. Test entry #185: Mocking YAML related data with random seed 11811 // (similarity score: 42.85%) +⏱️ Get operation took: 894 ms +--------------------------------------------------- +[Step 321/500] Processing... +Getting data: 'Parallelism' +1. Test entry #164: Threading Parallelism related data with random seed 13640 // (similarity score: 53.13%) +2. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 51.21%) +3. Test entry #7: Mocking Parallelism related data with random seed 19010 // (similarity score: 51.18%) +4. Test entry #97: Testing Parallelism related data with random seed 6045 // (similarity score: 49.12%) +5. Test entry #136: Parallelism Sample related data with random seed 18078 // (similarity score: 48.54%) +6. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 47.79%) +7. Test entry #201: Sample Parallelism related data with random seed 8912 // (similarity score: 46.65%) +8. Test entry #252: ERD Parallelism related data with random seed 23325 // (similarity score: 45.15%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 44.59%) +10. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 44.19%) +⏱️ Get operation took: 901 ms +--------------------------------------------------- +[Step 322/500] Processing... +Getting data: 'Node' +1. Test entry #114: Node Optimization related data with random seed 20946 // (similarity score: 44.96%) +2. Test entry #72: Node Optimization related data with random seed 10019 // (similarity score: 41.61%) +3. Test entry #246: Node Documentation related data with random seed 8868 // (similarity score: 39.92%) +4. Test entry #175: Node UX related data with random seed 24087 // (similarity score: 39.28%) +5. Test entry #341: Node Vector related data with random seed 30034 // (similarity score: 39.03%) +6. Test entry #93: Node Documentation related data with random seed 10928 // (similarity score: 38.72%) +7. Test entry #11: Code-review Node related data with random seed 32027 // (similarity score: 35.38%) +8. Test entry #398: Backup Node related data with random seed 21714 // (similarity score: 34.34%) +9. Test entry #328: Node DevOps related data with random seed 7412 // (similarity score: 30.61%) +10. Test entry #16: Node Unit-test related data with random seed 4007 // (similarity score: 29.56%) +⏱️ Get operation took: 944 ms +--------------------------------------------------- +[Step 323/500] Processing... +Getting data: 'Version-control' +1. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 57.25%) +2. Test entry #338: Version-control Monitoring related data with random seed 11905 // (similarity score: 50.77%) +3. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 49.77%) +4. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 48.91%) +5. Test entry #340: Version-control Sample related data with random seed 23846 // (similarity score: 47.51%) +6. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 46.96%) +7. Test entry #155: Sharding Version-control related data with random seed 4158 // (similarity score: 43.99%) +8. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 43.17%) +9. Test entry #429: Version-control Code-review related data with random seed 14304 // (similarity score: 40.29%) +10. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 36.46%) +⏱️ Get operation took: 889 ms +--------------------------------------------------- +[Step 324/500] Processing... +Getting data: 'Tutorial' +1. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 43.89%) +2. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 38.60%) +3. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 34.87%) +4. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 34.64%) +5. Test entry #279: Tutorial SDK related data with random seed 4966 // (similarity score: 33.42%) +6. Test entry #303: XML Tutorial related data with random seed 29551 // (similarity score: 32.40%) +7. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 32.34%) +8. Test entry #125: GraphQL Tutorial related data with random seed 2640 // (similarity score: 30.17%) +9. Test entry #448: Tutorial Unit-test related data with random seed 7829 // (similarity score: 29.62%) +10. Test entry #295: YAML Tutorial related data with random seed 7734 // (similarity score: 29.59%) +⏱️ Get operation took: 896 ms +--------------------------------------------------- +[Step 325/500] Processing... +Getting data: 'Unit-test' +1. Test entry #232: Tutorial Unit-test related data with random seed 11672 // (similarity score: 51.61%) +2. Test entry #448: Tutorial Unit-test related data with random seed 7829 // (similarity score: 51.60%) +3. Test entry #186: Tutorial Unit-test related data with random seed 2934 // (similarity score: 48.49%) +4. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 47.92%) +5. Test entry #233: Unit-test Container related data with random seed 19623 // (similarity score: 46.29%) +6. Test entry #442: Unit-test Framework related data with random seed 4836 // (similarity score: 45.43%) +7. Test entry #477: Unit-test CLI related data with random seed 17091 // (similarity score: 45.02%) +8. Test entry #120: Algorithm Unit-test related data with random seed 14608 // (similarity score: 44.87%) +9. Test entry #422: Unit-test Example related data with random seed 16994 // (similarity score: 44.07%) +10. Test entry #16: Node Unit-test related data with random seed 4007 // (similarity score: 41.87%) +⏱️ Get operation took: 902 ms +--------------------------------------------------- +[Step 326/500] Processing... +Getting data: 'Threading' +1. Test entry #164: Threading Parallelism related data with random seed 13640 // (similarity score: 52.54%) +2. Test entry #148: Threading Best-practices related data with random seed 22158 // (similarity score: 50.45%) +3. Test entry #415: Threading Algorithm related data with random seed 8994 // (similarity score: 48.42%) +4. Test entry #191: Optimization Threading related data with random seed 15850 // (similarity score: 48.17%) +5. Test entry #494: Monitoring Threading related data with random seed 28404 // (similarity score: 45.10%) +6. Test entry #36: Threading Sharding related data with random seed 23643 // (similarity score: 44.95%) +7. Test entry #490: Threading Resilient related data with random seed 825 // (similarity score: 44.64%) +8. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 44.04%) +9. Test entry #192: Performance Threading related data with random seed 6244 // (similarity score: 43.42%) +10. Test entry #230: Threading API related data with random seed 20630 // (similarity score: 42.47%) +⏱️ Get operation took: 944 ms +--------------------------------------------------- +[Step 327/500] Processing... +Getting data: 'Flowchart' +1. Test entry #9: Diagram Flowchart related data with random seed 14766 // (similarity score: 63.49%) +2. Test entry #495: Best-practices Flowchart related data with random seed 507 // (similarity score: 59.08%) +3. Test entry #121: Flowchart SDK related data with random seed 24836 // (similarity score: 56.62%) +4. Test entry #226: Flowchart Testing related data with random seed 32215 // (similarity score: 56.46%) +5. Test entry #342: Python Flowchart related data with random seed 32467 // (similarity score: 56.34%) +6. Test entry #209: Index Flowchart related data with random seed 16113 // (similarity score: 54.06%) +7. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 49.88%) +8. Test entry #41: Index Flowchart related data with random seed 29094 // (similarity score: 49.84%) +9. Test entry #213: GUI GraphQL related data with random seed 9145 // (similarity score: 30.82%) +10. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 24.78%) +⏱️ Get operation took: 891 ms +--------------------------------------------------- +[Step 328/500] Processing... +Getting data: 'CLI' +1. Test entry #284: UX CLI related data with random seed 10670 // (similarity score: 47.12%) +2. Test entry #492: Testing CLI related data with random seed 20014 // (similarity score: 46.52%) +3. Test entry #462: Best-practices CLI related data with random seed 19922 // (similarity score: 45.07%) +4. Test entry #242: Load-balancing CLI related data with random seed 21278 // (similarity score: 44.28%) +5. Test entry #425: CLI Data-structure related data with random seed 7062 // (similarity score: 42.94%) +6. Test entry #499: Vector CLI related data with random seed 18419 // (similarity score: 41.47%) +7. Test entry #103: CLI Sharding related data with random seed 23375 // (similarity score: 40.66%) +8. Test entry #477: Unit-test CLI related data with random seed 17091 // (similarity score: 39.09%) +9. Test entry #263: CLI Stubbing related data with random seed 13486 // (similarity score: 37.57%) +10. Test entry #299: CLI Git related data with random seed 18823 // (similarity score: 34.54%) +⏱️ Get operation took: 895 ms +--------------------------------------------------- +[Step 329/500] Processing... +Getting data: 'Library' +1. Test entry #451: Library Performance related data with random seed 31802 // (similarity score: 38.26%) +2. Test entry #357: Monitoring Library related data with random seed 91 // (similarity score: 36.58%) +3. Test entry #496: Library Database related data with random seed 13646 // (similarity score: 33.79%) +4. Test entry #320: Library UX related data with random seed 29021 // (similarity score: 31.34%) +5. Test entry #486: Library Example related data with random seed 9207 // (similarity score: 30.25%) +6. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 29.56%) +7. Test entry #95: Library Fault-tolerance related data with random seed 987 // (similarity score: 27.92%) +8. Test entry #472: Library CI/CD related data with random seed 18363 // (similarity score: 27.01%) +9. Test entry #30: API Library related data with random seed 1930 // (similarity score: 26.55%) +10. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 25.80%) +⏱️ Get operation took: 853 ms +--------------------------------------------------- +[Step 330/500] Processing... +Getting data: 'Framework' +1. Test entry #227: Framework Algorithm related data with random seed 19206 // (similarity score: 31.85%) +2. Test entry #275: Framework Orchestration related data with random seed 12664 // (similarity score: 31.36%) +3. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 29.40%) +4. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 28.78%) +5. Test entry #386: Framework Testing related data with random seed 14924 // (similarity score: 28.77%) +6. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 26.61%) +7. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 24.69%) +8. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 20.97%) +9. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 20.94%) +10. Test entry #228: Branching Framework related data with random seed 10213 // (similarity score: 19.21%) +⏱️ Get operation took: 944 ms +--------------------------------------------------- +[Step 331/500] Processing... +Getting data: 'Database' +1. Test entry #426: Database Database related data with random seed 19208 // (similarity score: 39.68%) +2. Test entry #496: Library Database related data with random seed 13646 // (similarity score: 36.63%) +3. Test entry #193: Database Resilient related data with random seed 3286 // (similarity score: 35.20%) +4. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 34.25%) +5. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 33.82%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 33.21%) +7. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 32.30%) +8. Test entry #82: Database Cloud related data with random seed 12343 // (similarity score: 32.29%) +9. Test entry #346: Database Microservices related data with random seed 1068 // (similarity score: 32.20%) +10. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 32.14%) +⏱️ Get operation took: 849 ms +--------------------------------------------------- +[Step 332/500] Processing... +Getting data: 'Threading' +1. Test entry #164: Threading Parallelism related data with random seed 13640 // (similarity score: 52.54%) +2. Test entry #148: Threading Best-practices related data with random seed 22158 // (similarity score: 50.45%) +3. Test entry #415: Threading Algorithm related data with random seed 8994 // (similarity score: 48.42%) +4. Test entry #191: Optimization Threading related data with random seed 15850 // (similarity score: 48.17%) +5. Test entry #494: Monitoring Threading related data with random seed 28404 // (similarity score: 45.10%) +6. Test entry #36: Threading Sharding related data with random seed 23643 // (similarity score: 44.95%) +7. Test entry #490: Threading Resilient related data with random seed 825 // (similarity score: 44.64%) +8. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 44.04%) +9. Test entry #192: Performance Threading related data with random seed 6244 // (similarity score: 43.42%) +10. Test entry #230: Threading API related data with random seed 20630 // (similarity score: 42.47%) +⏱️ Get operation took: 898 ms +--------------------------------------------------- +[Step 333/500] Processing... +Getting data: 'Mocking' +1. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 53.76%) +2. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 52.01%) +3. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 50.43%) +4. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 49.94%) +5. Test entry #7: Mocking Parallelism related data with random seed 19010 // (similarity score: 48.48%) +6. Test entry #54: Concurrency Mocking related data with random seed 20846 // (similarity score: 45.54%) +7. Test entry #330: Boilerplate Mocking related data with random seed 5771 // (similarity score: 45.53%) +8. Test entry #190: Mocking Network related data with random seed 32119 // (similarity score: 44.81%) +9. Test entry #29: Mocking Network related data with random seed 20303 // (similarity score: 44.44%) +10. Test entry #185: Mocking YAML related data with random seed 11811 // (similarity score: 42.85%) +⏱️ Get operation took: 935 ms +--------------------------------------------------- +[Step 334/500] Processing... +Getting data: 'Tutorial' +1. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 43.89%) +2. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 38.60%) +3. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 34.87%) +4. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 34.64%) +5. Test entry #279: Tutorial SDK related data with random seed 4966 // (similarity score: 33.42%) +6. Test entry #303: XML Tutorial related data with random seed 29551 // (similarity score: 32.40%) +7. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 32.34%) +8. Test entry #125: GraphQL Tutorial related data with random seed 2640 // (similarity score: 30.17%) +9. Test entry #448: Tutorial Unit-test related data with random seed 7829 // (similarity score: 29.62%) +10. Test entry #295: YAML Tutorial related data with random seed 7734 // (similarity score: 29.59%) +⏱️ Get operation took: 899 ms +--------------------------------------------------- +[Step 335/500] Processing... +Getting data: 'GUI' +1. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 43.56%) +2. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 39.19%) +3. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 38.99%) +4. Test entry #268: GUI Security related data with random seed 8853 // (similarity score: 38.70%) +5. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 37.90%) +6. Test entry #90: Network GUI related data with random seed 6917 // (similarity score: 37.45%) +7. Test entry #400: GUI Container related data with random seed 26968 // (similarity score: 36.86%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 36.84%) +9. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 35.99%) +10. Test entry #87: GUI Encryption related data with random seed 24527 // (similarity score: 35.25%) +⏱️ Get operation took: 994 ms +--------------------------------------------------- +[Step 336/500] Processing... +Getting data: 'Alerting' +1. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 51.72%) +2. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 44.93%) +3. Test entry #371: Code-review Alerting related data with random seed 6709 // (similarity score: 40.15%) +4. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 40.12%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 35.73%) +6. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 35.52%) +7. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 34.88%) +8. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 34.33%) +9. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 33.48%) +10. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 33.22%) +⏱️ Get operation took: 1007 ms +--------------------------------------------------- +[Step 337/500] Processing... +Getting data: 'Testing' +1. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 48.75%) +2. Test entry #179: Testing Testing related data with random seed 8212 // (similarity score: 47.91%) +3. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 47.89%) +4. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 47.21%) +5. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 46.88%) +6. Test entry #54: Concurrency Mocking related data with random seed 20846 // (similarity score: 44.52%) +7. Test entry #274: Testing Caching related data with random seed 1136 // (similarity score: 44.39%) +8. Test entry #330: Boilerplate Mocking related data with random seed 5771 // (similarity score: 44.01%) +9. Test entry #448: Tutorial Unit-test related data with random seed 7829 // (similarity score: 43.86%) +10. Test entry #232: Tutorial Unit-test related data with random seed 11672 // (similarity score: 43.56%) +⏱️ Get operation took: 944 ms +--------------------------------------------------- +[Step 338/500] Processing... +Getting data: 'Alerting' +1. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 51.72%) +2. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 44.93%) +3. Test entry #371: Code-review Alerting related data with random seed 6709 // (similarity score: 40.15%) +4. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 40.12%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 35.73%) +6. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 35.52%) +7. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 34.88%) +8. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 34.33%) +9. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 33.48%) +10. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 33.22%) +⏱️ Get operation took: 937 ms +--------------------------------------------------- +[Step 339/500] Processing... +Getting data: 'Consensus' +1. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 37.37%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 34.38%) +3. Test entry #194: Python Consensus related data with random seed 2579 // (similarity score: 32.73%) +4. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 32.01%) +5. Test entry #460: Consensus YAML related data with random seed 16140 // (similarity score: 29.09%) +6. Test entry #470: Consensus Replication related data with random seed 9153 // (similarity score: 27.78%) +7. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 27.72%) +8. Test entry #445: Message-queue Consensus related data with random seed 22969 // (similarity score: 25.59%) +9. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 22.21%) +10. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 16.66%) +⏱️ Get operation took: 913 ms +--------------------------------------------------- +[Step 340/500] Processing... +Getting data: 'Caching' +1. Test entry #290: Caching Sample related data with random seed 12446 // (similarity score: 47.89%) +2. Test entry #112: Caching Sharding related data with random seed 9211 // (similarity score: 46.65%) +3. Test entry #274: Testing Caching related data with random seed 1136 // (similarity score: 46.39%) +4. Test entry #218: JSON Caching related data with random seed 29229 // (similarity score: 43.15%) +5. Test entry #13: Websocket Caching related data with random seed 27250 // (similarity score: 42.09%) +6. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 21.31%) +7. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 20.98%) +8. Test entry #139: Load-balancing Optimization related data with random seed 31776 // (similarity score: 20.79%) +9. Test entry #311: Asynchronous Best-practices related data with random seed 24139 // (similarity score: 20.69%) +10. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 19.76%) +⏱️ Get operation took: 933 ms +--------------------------------------------------- +[Step 341/500] Processing... +Getting data: 'UI' +1. Test entry #108: Code-review UI related data with random seed 5301 // (similarity score: 33.71%) +2. Test entry #249: SDK UI related data with random seed 20519 // (similarity score: 32.43%) +3. Test entry #210: Asynchronous UI related data with random seed 16307 // (similarity score: 31.84%) +4. Test entry #141: Sample UI related data with random seed 12909 // (similarity score: 28.72%) +5. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 28.18%) +6. Test entry #449: UI Load-balancing related data with random seed 29998 // (similarity score: 27.95%) +7. Test entry #39: UI Sharding related data with random seed 26238 // (similarity score: 26.65%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 25.47%) +9. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 25.01%) +10. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 24.21%) +⏱️ Get operation took: 943 ms +--------------------------------------------------- +[Step 342/500] Processing... +Getting data: 'Database' +1. Test entry #426: Database Database related data with random seed 19208 // (similarity score: 39.68%) +2. Test entry #496: Library Database related data with random seed 13646 // (similarity score: 36.63%) +3. Test entry #193: Database Resilient related data with random seed 3286 // (similarity score: 35.20%) +4. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 34.25%) +5. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 33.82%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 33.21%) +7. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 32.30%) +8. Test entry #82: Database Cloud related data with random seed 12343 // (similarity score: 32.29%) +9. Test entry #346: Database Microservices related data with random seed 1068 // (similarity score: 32.20%) +10. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 32.14%) +⏱️ Get operation took: 954 ms +--------------------------------------------------- +[Step 343/500] Processing... +Getting data: 'Template' +1. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 46.79%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 41.61%) +3. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 40.40%) +4. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 38.73%) +5. Test entry #83: Template Sharding related data with random seed 32241 // (similarity score: 34.86%) +6. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 33.67%) +7. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 33.18%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 30.02%) +9. Test entry #322: ORM Template related data with random seed 28110 // (similarity score: 29.98%) +10. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 27.79%) +⏱️ Get operation took: 948 ms +--------------------------------------------------- +[Step 344/500] Processing... +Getting data: 'Merging' +1. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 47.50%) +2. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 46.78%) +3. Test entry #417: Event-driven Merging related data with random seed 13881 // (similarity score: 45.04%) +4. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 44.14%) +5. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 38.53%) +6. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 38.19%) +7. Test entry #150: Diagram Integration-test related data with random seed 19409 // (similarity score: 21.00%) +8. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 19.25%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 18.92%) +10. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 16.25%) +⏱️ Get operation took: 935 ms +--------------------------------------------------- +[Step 345/500] Processing... +Getting data: 'API' +1. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 39.28%) +2. Test entry #215: API RESTful related data with random seed 29014 // (similarity score: 36.10%) +3. Test entry #230: Threading API related data with random seed 20630 // (similarity score: 33.40%) +4. Test entry #30: API Library related data with random seed 1930 // (similarity score: 33.27%) +5. Test entry #195: API Orchestration related data with random seed 17599 // (similarity score: 32.44%) +6. Test entry #157: API Threading related data with random seed 18440 // (similarity score: 32.22%) +7. Test entry #84: API Optimization related data with random seed 7997 // (similarity score: 32.19%) +8. Test entry #438: Pull-request API related data with random seed 19781 // (similarity score: 32.08%) +9. Test entry #476: Logging API related data with random seed 8268 // (similarity score: 31.38%) +10. Test entry #68: API Encryption related data with random seed 30427 // (similarity score: 30.50%) +⏱️ Get operation took: 897 ms +--------------------------------------------------- +[Step 346/500] Processing... +Getting data: 'Alerting' +1. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 51.72%) +2. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 44.93%) +3. Test entry #371: Code-review Alerting related data with random seed 6709 // (similarity score: 40.15%) +4. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 40.12%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 35.73%) +6. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 35.52%) +7. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 34.88%) +8. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 34.33%) +9. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 33.48%) +10. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 33.22%) +⏱️ Get operation took: 1001 ms +--------------------------------------------------- +[Step 347/500] Processing... +Getting data: 'Example' +1. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 21.02%) +2. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 20.91%) +3. Test entry #81: Example Documentation related data with random seed 4554 // (similarity score: 20.87%) +4. Test entry #43: Documentation Example related data with random seed 5875 // (similarity score: 19.56%) +5. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 19.26%) +6. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 18.68%) +7. Test entry #319: Documentation Example related data with random seed 22093 // (similarity score: 18.61%) +8. Test entry #297: Event-driven Code-review related data with random seed 23440 // (similarity score: 17.50%) +9. Test entry #138: Boilerplate ERD related data with random seed 17973 // (similarity score: 17.39%) +10. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 16.97%) +⏱️ Get operation took: 997 ms +--------------------------------------------------- +[Step 348/500] Processing... +Getting data: 'Architecture' +1. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 36.09%) +2. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 32.91%) +3. Test entry #140: UX Architecture related data with random seed 15002 // (similarity score: 31.89%) +4. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 31.79%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 30.07%) +6. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 27.81%) +7. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 26.98%) +8. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 26.33%) +9. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 25.30%) +10. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 24.95%) +⏱️ Get operation took: 939 ms +--------------------------------------------------- +[Step 349/500] Processing... +Getting data: 'YAML' +1. Test entry #295: YAML Tutorial related data with random seed 7734 // (similarity score: 62.26%) +2. Test entry #12: Boilerplate YAML related data with random seed 29263 // (similarity score: 61.60%) +3. Test entry #291: YAML Performance related data with random seed 3598 // (similarity score: 57.08%) +4. Test entry #332: Performance YAML related data with random seed 23826 // (similarity score: 54.83%) +5. Test entry #412: YAML Orchestration related data with random seed 30910 // (similarity score: 53.55%) +6. Test entry #395: YAML Threading related data with random seed 20711 // (similarity score: 53.16%) +7. Test entry #424: Pull-request YAML related data with random seed 9732 // (similarity score: 52.19%) +8. Test entry #62: YAML CSV related data with random seed 16118 // (similarity score: 51.86%) +9. Test entry #185: Mocking YAML related data with random seed 11811 // (similarity score: 51.82%) +10. Test entry #57: YAML JSON related data with random seed 19740 // (similarity score: 50.92%) +⏱️ Get operation took: 898 ms +--------------------------------------------------- +[Step 350/500] Processing... +Getting data: 'Pub-sub' +1. Test entry #418: Pub-sub API related data with random seed 23035 // (similarity score: 38.66%) +2. Test entry #381: Python Pub-sub related data with random seed 29514 // (similarity score: 38.60%) +3. Test entry #28: Pull-request Pub-sub related data with random seed 15070 // (similarity score: 38.34%) +4. Test entry #362: Pub-sub SQL related data with random seed 11044 // (similarity score: 36.25%) +5. Test entry #461: Vector Pub-sub related data with random seed 31094 // (similarity score: 34.99%) +6. Test entry #259: Pub-sub DevOps related data with random seed 23944 // (similarity score: 34.61%) +7. Test entry #318: Vector Pub-sub related data with random seed 10169 // (similarity score: 33.83%) +8. Test entry #485: Integration-test Pub-sub related data with random seed 29641 // (similarity score: 31.78%) +9. Test entry #36: Threading Sharding related data with random seed 23643 // (similarity score: 14.79%) +10. Test entry #355: Throughput Sharding related data with random seed 6245 // (similarity score: 14.25%) +⏱️ Get operation took: 946 ms + +📊 [BATCH REPORT] Items 301 to 350 + -> Average Latency: 923 ms + +--------------------------------------------------- +[Step 351/500] Processing... +Getting data: 'DevOps' +1. Test entry #259: Pub-sub DevOps related data with random seed 23944 // (similarity score: 58.76%) +2. Test entry #212: Design-patterns DevOps related data with random seed 19247 // (similarity score: 57.73%) +3. Test entry #146: Profiling DevOps related data with random seed 14561 // (similarity score: 55.98%) +4. Test entry #46: Data-structure DevOps related data with random seed 24000 // (similarity score: 51.31%) +5. Test entry #269: Database DevOps related data with random seed 15436 // (similarity score: 45.93%) +6. Test entry #328: Node DevOps related data with random seed 7412 // (similarity score: 44.08%) +7. Test entry #207: DevOps JSON related data with random seed 15591 // (similarity score: 41.62%) +8. Test entry #271: Encryption DevOps related data with random seed 2715 // (similarity score: 40.67%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 29.13%) +10. Test entry #243: SDK Orchestration related data with random seed 30501 // (similarity score: 27.73%) +⏱️ Get operation took: 879 ms +--------------------------------------------------- +[Step 352/500] Processing... +Getting data: 'Merging' +1. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 47.50%) +2. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 46.78%) +3. Test entry #417: Event-driven Merging related data with random seed 13881 // (similarity score: 45.04%) +4. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 44.14%) +5. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 38.53%) +6. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 38.19%) +7. Test entry #150: Diagram Integration-test related data with random seed 19409 // (similarity score: 21.00%) +8. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 19.25%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 18.92%) +10. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 16.25%) +⏱️ Get operation took: 945 ms +--------------------------------------------------- +[Step 353/500] Processing... +Getting data: 'Architecture' +1. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 36.09%) +2. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 32.91%) +3. Test entry #140: UX Architecture related data with random seed 15002 // (similarity score: 31.89%) +4. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 31.79%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 30.07%) +6. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 27.81%) +7. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 26.98%) +8. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 26.33%) +9. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 25.30%) +10. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 24.95%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 354/500] Processing... +Getting data: 'Caching' +1. Test entry #290: Caching Sample related data with random seed 12446 // (similarity score: 47.89%) +2. Test entry #112: Caching Sharding related data with random seed 9211 // (similarity score: 46.65%) +3. Test entry #274: Testing Caching related data with random seed 1136 // (similarity score: 46.39%) +4. Test entry #218: JSON Caching related data with random seed 29229 // (similarity score: 43.15%) +5. Test entry #13: Websocket Caching related data with random seed 27250 // (similarity score: 42.09%) +6. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 21.31%) +7. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 20.98%) +8. Test entry #139: Load-balancing Optimization related data with random seed 31776 // (similarity score: 20.79%) +9. Test entry #311: Asynchronous Best-practices related data with random seed 24139 // (similarity score: 20.69%) +10. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 19.76%) +⏱️ Get operation took: 944 ms +--------------------------------------------------- +[Step 355/500] Processing... +Getting data: 'Merging' +1. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 47.50%) +2. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 46.78%) +3. Test entry #417: Event-driven Merging related data with random seed 13881 // (similarity score: 45.04%) +4. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 44.14%) +5. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 38.53%) +6. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 38.19%) +7. Test entry #150: Diagram Integration-test related data with random seed 19409 // (similarity score: 21.00%) +8. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 19.25%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 18.92%) +10. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 16.25%) +⏱️ Get operation took: 894 ms +--------------------------------------------------- +[Step 356/500] Processing... +Getting data: 'Recovery' +1. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 39.10%) +2. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 37.60%) +3. Test entry #306: Recovery Library related data with random seed 21298 // (similarity score: 36.53%) +4. Test entry #430: Recovery Diagram related data with random seed 24192 // (similarity score: 36.20%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 35.07%) +6. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 33.87%) +7. Test entry #367: SDK Recovery related data with random seed 20345 // (similarity score: 33.28%) +8. Test entry #88: Recovery JSON related data with random seed 28249 // (similarity score: 32.39%) +9. Test entry #321: Recovery Git related data with random seed 3971 // (similarity score: 32.26%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 30.40%) +⏱️ Get operation took: 894 ms +--------------------------------------------------- +[Step 357/500] Processing... +Getting data: 'E2E-test' +1. Test entry #455: E2E-test Integration-test related data with random seed 4838 // (similarity score: 65.81%) +2. Test entry #241: Alerting E2E-test related data with random seed 11551 // (similarity score: 63.85%) +3. Test entry #335: Throughput E2E-test related data with random seed 11728 // (similarity score: 63.35%) +4. Test entry #38: Profiling E2E-test related data with random seed 30992 // (similarity score: 63.16%) +5. Test entry #278: UML E2E-test related data with random seed 2723 // (similarity score: 61.63%) +6. Test entry #433: Vector E2E-test related data with random seed 22356 // (similarity score: 60.28%) +7. Test entry #304: E2E-test Unit-test related data with random seed 19213 // (similarity score: 59.91%) +8. Test entry #104: Backup E2E-test related data with random seed 3184 // (similarity score: 55.45%) +9. Test entry #414: GraphQL E2E-test related data with random seed 14354 // (similarity score: 53.19%) +10. Test entry #102: ERD ERD related data with random seed 20954 // (similarity score: 40.31%) +⏱️ Get operation took: 896 ms +--------------------------------------------------- +[Step 358/500] Processing... +Getting data: 'UX' +1. Test entry #463: Microservices UX related data with random seed 32421 // (similarity score: 28.20%) +2. Test entry #177: UX Monitoring related data with random seed 16167 // (similarity score: 26.37%) +3. Test entry #133: Microservices UX related data with random seed 13570 // (similarity score: 25.58%) +4. Test entry #140: UX Architecture related data with random seed 15002 // (similarity score: 25.04%) +5. Test entry #66: UX UML related data with random seed 3512 // (similarity score: 22.74%) +6. Test entry #402: UX Load-balancing related data with random seed 5781 // (similarity score: 21.76%) +7. Test entry #115: UX Design-patterns related data with random seed 14554 // (similarity score: 21.69%) +8. Test entry #175: Node UX related data with random seed 24087 // (similarity score: 20.22%) +9. Test entry #75: UX Search related data with random seed 11487 // (similarity score: 20.03%) +10. Test entry #237: UX Blockchain related data with random seed 8734 // (similarity score: 19.35%) +⏱️ Get operation took: 903 ms +--------------------------------------------------- +[Step 359/500] Processing... +Getting data: 'JSON' +1. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 51.74%) +2. Test entry #156: JSON Optimization related data with random seed 28003 // (similarity score: 49.61%) +3. Test entry #413: JSON Alerting related data with random seed 8852 // (similarity score: 47.02%) +4. Test entry #113: JSON API related data with random seed 11466 // (similarity score: 46.74%) +5. Test entry #85: JSON Vector related data with random seed 23519 // (similarity score: 46.52%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 43.98%) +7. Test entry #218: JSON Caching related data with random seed 29229 // (similarity score: 43.91%) +8. Test entry #143: JSON XML related data with random seed 20028 // (similarity score: 42.01%) +9. Test entry #57: YAML JSON related data with random seed 19740 // (similarity score: 40.00%) +10. Test entry #420: JSON Microservices related data with random seed 9271 // (similarity score: 39.90%) +⏱️ Get operation took: 944 ms +--------------------------------------------------- +[Step 360/500] Processing... +Getting data: 'Event-driven' +1. Test entry #354: Load-balancing Event-driven related data with random seed 18844 // (similarity score: 45.07%) +2. Test entry #452: Event-driven Performance related data with random seed 26374 // (similarity score: 44.21%) +3. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 44.20%) +4. Test entry #316: GUI Event-driven related data with random seed 24961 // (similarity score: 43.28%) +5. Test entry #168: Event-driven Logging related data with random seed 22637 // (similarity score: 41.27%) +6. Test entry #260: CSV Event-driven related data with random seed 23556 // (similarity score: 40.58%) +7. Test entry #309: Event-driven Orchestration related data with random seed 21023 // (similarity score: 40.56%) +8. Test entry #142: Event-driven Microservices related data with random seed 7351 // (similarity score: 40.55%) +9. Test entry #45: Event-driven Load-balancing related data with random seed 5105 // (similarity score: 39.29%) +10. Test entry #297: Event-driven Code-review related data with random seed 23440 // (similarity score: 39.27%) +⏱️ Get operation took: 841 ms +--------------------------------------------------- +[Step 361/500] Processing... +Getting data: 'Index' +1. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 46.82%) +2. Test entry #473: Index Python related data with random seed 3534 // (similarity score: 44.57%) +3. Test entry #86: Index Container related data with random seed 813 // (similarity score: 41.90%) +4. Test entry #58: Cloud Index related data with random seed 16697 // (similarity score: 41.66%) +5. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 41.23%) +6. Test entry #44: Cloud Index related data with random seed 12754 // (similarity score: 38.93%) +7. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 38.05%) +8. Test entry #31: ORM Index related data with random seed 2163 // (similarity score: 38.00%) +9. Test entry #288: Index Pull-request related data with random seed 4855 // (similarity score: 37.81%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 37.00%) +⏱️ Get operation took: 906 ms +--------------------------------------------------- +[Step 362/500] Processing... +Getting data: 'UML' +1. Test entry #466: UML Optimization related data with random seed 21437 // (similarity score: 51.87%) +2. Test entry #66: UX UML related data with random seed 3512 // (similarity score: 50.20%) +3. Test entry #17: Tutorial UML related data with random seed 12772 // (similarity score: 49.59%) +4. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 47.11%) +5. Test entry #189: UML Network related data with random seed 6297 // (similarity score: 45.74%) +6. Test entry #247: UML Integration-test related data with random seed 29544 // (similarity score: 45.62%) +7. Test entry #474: UML Latency related data with random seed 18011 // (similarity score: 45.46%) +8. Test entry #377: UML GUI related data with random seed 13474 // (similarity score: 44.68%) +9. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 44.27%) +10. Test entry #310: RESTful UML related data with random seed 5709 // (similarity score: 43.88%) +⏱️ Get operation took: 944 ms +--------------------------------------------------- +[Step 363/500] Processing... +Getting data: 'Event-driven' +1. Test entry #354: Load-balancing Event-driven related data with random seed 18844 // (similarity score: 45.07%) +2. Test entry #452: Event-driven Performance related data with random seed 26374 // (similarity score: 44.21%) +3. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 44.20%) +4. Test entry #316: GUI Event-driven related data with random seed 24961 // (similarity score: 43.28%) +5. Test entry #168: Event-driven Logging related data with random seed 22637 // (similarity score: 41.27%) +6. Test entry #260: CSV Event-driven related data with random seed 23556 // (similarity score: 40.58%) +7. Test entry #309: Event-driven Orchestration related data with random seed 21023 // (similarity score: 40.56%) +8. Test entry #142: Event-driven Microservices related data with random seed 7351 // (similarity score: 40.55%) +9. Test entry #45: Event-driven Load-balancing related data with random seed 5105 // (similarity score: 39.29%) +10. Test entry #297: Event-driven Code-review related data with random seed 23440 // (similarity score: 39.27%) +⏱️ Get operation took: 896 ms +--------------------------------------------------- +[Step 364/500] Processing... +Getting data: 'Blockchain' +1. Test entry #117: Design-patterns Blockchain related data with random seed 16711 // (similarity score: 41.64%) +2. Test entry #231: Blockchain Data-structure related data with random seed 2611 // (similarity score: 38.85%) +3. Test entry #61: Blockchain Microservices related data with random seed 13172 // (similarity score: 38.28%) +4. Test entry #171: Blockchain Data-structure related data with random seed 29630 // (similarity score: 33.73%) +5. Test entry #237: UX Blockchain related data with random seed 8734 // (similarity score: 31.74%) +6. Test entry #360: GUI Blockchain related data with random seed 10277 // (similarity score: 31.69%) +7. Test entry #67: Debugging Blockchain related data with random seed 23179 // (similarity score: 30.30%) +8. Test entry #229: Orchestration Blockchain related data with random seed 2138 // (similarity score: 30.22%) +9. Test entry #348: Blockchain SDK related data with random seed 25254 // (similarity score: 29.17%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 27.19%) +⏱️ Get operation took: 916 ms +--------------------------------------------------- +[Step 365/500] Processing... +Getting data: 'Library' +1. Test entry #451: Library Performance related data with random seed 31802 // (similarity score: 38.26%) +2. Test entry #357: Monitoring Library related data with random seed 91 // (similarity score: 36.58%) +3. Test entry #496: Library Database related data with random seed 13646 // (similarity score: 33.79%) +4. Test entry #320: Library UX related data with random seed 29021 // (similarity score: 31.34%) +5. Test entry #486: Library Example related data with random seed 9207 // (similarity score: 30.25%) +6. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 29.56%) +7. Test entry #95: Library Fault-tolerance related data with random seed 987 // (similarity score: 27.92%) +8. Test entry #472: Library CI/CD related data with random seed 18363 // (similarity score: 27.01%) +9. Test entry #30: API Library related data with random seed 1930 // (similarity score: 26.55%) +10. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 25.80%) +⏱️ Get operation took: 923 ms +--------------------------------------------------- +[Step 366/500] Processing... +Getting data: 'E2E-test' +1. Test entry #455: E2E-test Integration-test related data with random seed 4838 // (similarity score: 65.81%) +2. Test entry #241: Alerting E2E-test related data with random seed 11551 // (similarity score: 63.85%) +3. Test entry #335: Throughput E2E-test related data with random seed 11728 // (similarity score: 63.35%) +4. Test entry #38: Profiling E2E-test related data with random seed 30992 // (similarity score: 63.16%) +5. Test entry #278: UML E2E-test related data with random seed 2723 // (similarity score: 61.63%) +6. Test entry #433: Vector E2E-test related data with random seed 22356 // (similarity score: 60.28%) +7. Test entry #304: E2E-test Unit-test related data with random seed 19213 // (similarity score: 59.91%) +8. Test entry #104: Backup E2E-test related data with random seed 3184 // (similarity score: 55.45%) +9. Test entry #414: GraphQL E2E-test related data with random seed 14354 // (similarity score: 53.19%) +10. Test entry #102: ERD ERD related data with random seed 20954 // (similarity score: 40.31%) +⏱️ Get operation took: 907 ms +--------------------------------------------------- +[Step 367/500] Processing... +Getting data: 'Throughput' +1. Test entry #481: Tutorial Throughput related data with random seed 11888 // (similarity score: 48.81%) +2. Test entry #80: Fault-tolerance Throughput related data with random seed 27910 // (similarity score: 45.02%) +3. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 44.96%) +4. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 40.15%) +5. Test entry #355: Throughput Sharding related data with random seed 6245 // (similarity score: 39.86%) +6. Test entry #6: CI/CD Throughput related data with random seed 5729 // (similarity score: 38.72%) +7. Test entry #144: Throughput ORM related data with random seed 18100 // (similarity score: 35.68%) +8. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 35.66%) +9. Test entry #335: Throughput E2E-test related data with random seed 11728 // (similarity score: 34.04%) +10. Test entry #294: ORM Throughput related data with random seed 31130 // (similarity score: 33.56%) +⏱️ Get operation took: 942 ms +--------------------------------------------------- +[Step 368/500] Processing... +Getting data: 'Database' +1. Test entry #426: Database Database related data with random seed 19208 // (similarity score: 39.68%) +2. Test entry #496: Library Database related data with random seed 13646 // (similarity score: 36.63%) +3. Test entry #193: Database Resilient related data with random seed 3286 // (similarity score: 35.20%) +4. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 34.25%) +5. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 33.82%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 33.21%) +7. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 32.30%) +8. Test entry #82: Database Cloud related data with random seed 12343 // (similarity score: 32.29%) +9. Test entry #346: Database Microservices related data with random seed 1068 // (similarity score: 32.20%) +10. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 32.14%) +⏱️ Get operation took: 895 ms +--------------------------------------------------- +[Step 369/500] Processing... +Getting data: 'UI' +1. Test entry #108: Code-review UI related data with random seed 5301 // (similarity score: 33.71%) +2. Test entry #249: SDK UI related data with random seed 20519 // (similarity score: 32.43%) +3. Test entry #210: Asynchronous UI related data with random seed 16307 // (similarity score: 31.84%) +4. Test entry #141: Sample UI related data with random seed 12909 // (similarity score: 28.72%) +5. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 28.18%) +6. Test entry #449: UI Load-balancing related data with random seed 29998 // (similarity score: 27.95%) +7. Test entry #39: UI Sharding related data with random seed 26238 // (similarity score: 26.65%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 25.47%) +9. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 25.01%) +10. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 24.21%) +⏱️ Get operation took: 953 ms +--------------------------------------------------- +[Step 370/500] Processing... +Getting data: 'Websocket' +1. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 61.95%) +2. Test entry #92: Websocket Integration-test related data with random seed 32561 // (similarity score: 60.89%) +3. Test entry #13: Websocket Caching related data with random seed 27250 // (similarity score: 58.67%) +4. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 58.47%) +5. Test entry #122: Cloud Websocket related data with random seed 14024 // (similarity score: 57.43%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 15.29%) +7. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 14.60%) +8. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 14.35%) +9. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 14.31%) +10. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 13.59%) +⏱️ Get operation took: 905 ms +--------------------------------------------------- +[Step 371/500] Processing... +Getting data: 'Index' +1. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 46.82%) +2. Test entry #473: Index Python related data with random seed 3534 // (similarity score: 44.57%) +3. Test entry #86: Index Container related data with random seed 813 // (similarity score: 41.90%) +4. Test entry #58: Cloud Index related data with random seed 16697 // (similarity score: 41.66%) +5. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 41.23%) +6. Test entry #44: Cloud Index related data with random seed 12754 // (similarity score: 38.93%) +7. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 38.05%) +8. Test entry #31: ORM Index related data with random seed 2163 // (similarity score: 38.00%) +9. Test entry #288: Index Pull-request related data with random seed 4855 // (similarity score: 37.81%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 37.00%) +⏱️ Get operation took: 978 ms +--------------------------------------------------- +[Step 372/500] Processing... +Getting data: 'CSV' +1. Test entry #118: CSV Data-structure related data with random seed 20709 // (similarity score: 60.86%) +2. Test entry #439: CSV Library related data with random seed 9941 // (similarity score: 57.72%) +3. Test entry #21: CSV Example related data with random seed 2897 // (similarity score: 57.43%) +4. Test entry #454: CSV Best-practices related data with random seed 21695 // (similarity score: 56.93%) +5. Test entry #453: CSV Profiling related data with random seed 24443 // (similarity score: 53.45%) +6. Test entry #391: CSV Resilient related data with random seed 1245 // (similarity score: 51.75%) +7. Test entry #260: CSV Event-driven related data with random seed 23556 // (similarity score: 51.60%) +8. Test entry #119: CSV Resilient related data with random seed 21012 // (similarity score: 50.55%) +9. Test entry #392: Algorithm CSV related data with random seed 18964 // (similarity score: 50.33%) +10. Test entry #458: Encryption CSV related data with random seed 22642 // (similarity score: 49.73%) +⏱️ Get operation took: 1067 ms +--------------------------------------------------- +[Step 373/500] Processing... +Getting data: 'Cloud' +1. Test entry #497: Cloud Profiling related data with random seed 11981 // (similarity score: 38.74%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 36.08%) +3. Test entry #173: Sample Cloud related data with random seed 3561 // (similarity score: 36.02%) +4. Test entry #154: Cloud Search related data with random seed 2185 // (similarity score: 35.82%) +5. Test entry #58: Cloud Index related data with random seed 16697 // (similarity score: 33.17%) +6. Test entry #122: Cloud Websocket related data with random seed 14024 // (similarity score: 31.07%) +7. Test entry #116: NoSQL Cloud related data with random seed 22178 // (similarity score: 30.67%) +8. Test entry #178: Cloud XML related data with random seed 27579 // (similarity score: 30.58%) +9. Test entry #44: Cloud Index related data with random seed 12754 // (similarity score: 30.20%) +10. Test entry #187: Documentation Cloud related data with random seed 9545 // (similarity score: 26.80%) +⏱️ Get operation took: 974 ms +--------------------------------------------------- +[Step 374/500] Processing... +Getting data: 'Index' +1. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 46.82%) +2. Test entry #473: Index Python related data with random seed 3534 // (similarity score: 44.57%) +3. Test entry #86: Index Container related data with random seed 813 // (similarity score: 41.90%) +4. Test entry #58: Cloud Index related data with random seed 16697 // (similarity score: 41.66%) +5. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 41.23%) +6. Test entry #44: Cloud Index related data with random seed 12754 // (similarity score: 38.93%) +7. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 38.05%) +8. Test entry #31: ORM Index related data with random seed 2163 // (similarity score: 38.00%) +9. Test entry #288: Index Pull-request related data with random seed 4855 // (similarity score: 37.81%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 37.00%) +⏱️ Get operation took: 990 ms +--------------------------------------------------- +[Step 375/500] Processing... +Getting data: 'Data-structure' +1. Test entry #273: Data-structure Performance related data with random seed 1219 // (similarity score: 50.19%) +2. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 49.27%) +3. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 49.07%) +4. Test entry #248: Algorithm Data-structure related data with random seed 18077 // (similarity score: 47.47%) +5. Test entry #313: Data-structure Monitoring related data with random seed 10551 // (similarity score: 47.31%) +6. Test entry #118: CSV Data-structure related data with random seed 20709 // (similarity score: 45.72%) +7. Test entry #289: RESTful Data-structure related data with random seed 1227 // (similarity score: 42.01%) +8. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 40.81%) +9. Test entry #425: CLI Data-structure related data with random seed 7062 // (similarity score: 39.72%) +10. Test entry #171: Blockchain Data-structure related data with random seed 29630 // (similarity score: 38.81%) +⏱️ Get operation took: 1106 ms +--------------------------------------------------- +[Step 376/500] Processing... +Getting data: 'Flowchart' +1. Test entry #9: Diagram Flowchart related data with random seed 14766 // (similarity score: 63.49%) +2. Test entry #495: Best-practices Flowchart related data with random seed 507 // (similarity score: 59.08%) +3. Test entry #121: Flowchart SDK related data with random seed 24836 // (similarity score: 56.62%) +4. Test entry #226: Flowchart Testing related data with random seed 32215 // (similarity score: 56.46%) +5. Test entry #342: Python Flowchart related data with random seed 32467 // (similarity score: 56.34%) +6. Test entry #209: Index Flowchart related data with random seed 16113 // (similarity score: 54.06%) +7. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 49.88%) +8. Test entry #41: Index Flowchart related data with random seed 29094 // (similarity score: 49.84%) +9. Test entry #213: GUI GraphQL related data with random seed 9145 // (similarity score: 30.82%) +10. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 24.78%) +⏱️ Get operation took: 990 ms +--------------------------------------------------- +[Step 377/500] Processing... +Getting data: 'Message-queue' +1. Test entry #147: Message-queue Container related data with random seed 31302 // (similarity score: 55.00%) +2. Test entry #396: Sample Message-queue related data with random seed 1074 // (similarity score: 54.96%) +3. Test entry #445: Message-queue Consensus related data with random seed 22969 // (similarity score: 51.62%) +4. Test entry #447: Message-queue Unit-test related data with random seed 12353 // (similarity score: 48.23%) +5. Test entry #225: Encryption Message-queue related data with random seed 3677 // (similarity score: 45.23%) +6. Test entry #264: SQL Message-queue related data with random seed 315 // (similarity score: 44.77%) +7. Test entry #56: Replication Message-queue related data with random seed 23394 // (similarity score: 44.68%) +8. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 30.91%) +9. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 27.49%) +10. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 24.44%) +⏱️ Get operation took: 1094 ms +--------------------------------------------------- +[Step 378/500] Processing... +Getting data: 'Sharding' +1. Test entry #394: Sharding Performance related data with random seed 1607 // (similarity score: 54.43%) +2. Test entry #355: Throughput Sharding related data with random seed 6245 // (similarity score: 53.68%) +3. Test entry #130: Resilient Sharding related data with random seed 12002 // (similarity score: 53.30%) +4. Test entry #334: Sharding Vector related data with random seed 29020 // (similarity score: 51.99%) +5. Test entry #155: Sharding Version-control related data with random seed 4158 // (similarity score: 51.36%) +6. Test entry #36: Threading Sharding related data with random seed 23643 // (similarity score: 50.25%) +7. Test entry #83: Template Sharding related data with random seed 32241 // (similarity score: 49.35%) +8. Test entry #103: CLI Sharding related data with random seed 23375 // (similarity score: 47.66%) +9. Test entry #39: UI Sharding related data with random seed 26238 // (similarity score: 45.29%) +10. Test entry #112: Caching Sharding related data with random seed 9211 // (similarity score: 44.08%) +⏱️ Get operation took: 950 ms +--------------------------------------------------- +[Step 379/500] Processing... +Getting data: 'CSV' +1. Test entry #118: CSV Data-structure related data with random seed 20709 // (similarity score: 60.86%) +2. Test entry #439: CSV Library related data with random seed 9941 // (similarity score: 57.72%) +3. Test entry #21: CSV Example related data with random seed 2897 // (similarity score: 57.43%) +4. Test entry #454: CSV Best-practices related data with random seed 21695 // (similarity score: 56.93%) +5. Test entry #453: CSV Profiling related data with random seed 24443 // (similarity score: 53.45%) +6. Test entry #391: CSV Resilient related data with random seed 1245 // (similarity score: 51.75%) +7. Test entry #260: CSV Event-driven related data with random seed 23556 // (similarity score: 51.60%) +8. Test entry #119: CSV Resilient related data with random seed 21012 // (similarity score: 50.55%) +9. Test entry #392: Algorithm CSV related data with random seed 18964 // (similarity score: 50.33%) +10. Test entry #458: Encryption CSV related data with random seed 22642 // (similarity score: 49.73%) +⏱️ Get operation took: 891 ms +--------------------------------------------------- +[Step 380/500] Processing... +Getting data: 'ORM' +1. Test entry #235: ORM Boilerplate related data with random seed 17103 // (similarity score: 52.37%) +2. Test entry #322: ORM Template related data with random seed 28110 // (similarity score: 49.94%) +3. Test entry #428: Fault-tolerance ORM related data with random seed 17392 // (similarity score: 47.01%) +4. Test entry #294: ORM Throughput related data with random seed 31130 // (similarity score: 46.99%) +5. Test entry #202: Search ORM related data with random seed 9657 // (similarity score: 46.57%) +6. Test entry #144: Throughput ORM related data with random seed 18100 // (similarity score: 45.56%) +7. Test entry #22: ORM UX related data with random seed 24344 // (similarity score: 45.31%) +8. Test entry #91: Fault-tolerance ORM related data with random seed 3476 // (similarity score: 44.50%) +9. Test entry #197: Fault-tolerance ORM related data with random seed 21855 // (similarity score: 44.44%) +10. Test entry #49: ORM Network related data with random seed 23604 // (similarity score: 44.40%) +⏱️ Get operation took: 972 ms +--------------------------------------------------- +[Step 381/500] Processing... +Getting data: 'Search' +1. Test entry #75: UX Search related data with random seed 11487 // (similarity score: 42.09%) +2. Test entry #372: Best-practices Search related data with random seed 17128 // (similarity score: 41.11%) +3. Test entry #202: Search ORM related data with random seed 9657 // (similarity score: 38.48%) +4. Test entry #154: Cloud Search related data with random seed 2185 // (similarity score: 38.09%) +5. Test entry #280: Boilerplate Search related data with random seed 32715 // (similarity score: 37.50%) +6. Test entry #408: CI/CD Search related data with random seed 26877 // (similarity score: 35.64%) +7. Test entry #78: Search Git related data with random seed 20971 // (similarity score: 35.37%) +8. Test entry #397: Search Fault-tolerance related data with random seed 20761 // (similarity score: 35.23%) +9. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 29.18%) +10. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 26.45%) +⏱️ Get operation took: 977 ms +--------------------------------------------------- +[Step 382/500] Processing... +Getting data: 'CSV' +1. Test entry #118: CSV Data-structure related data with random seed 20709 // (similarity score: 60.86%) +2. Test entry #439: CSV Library related data with random seed 9941 // (similarity score: 57.72%) +3. Test entry #21: CSV Example related data with random seed 2897 // (similarity score: 57.43%) +4. Test entry #454: CSV Best-practices related data with random seed 21695 // (similarity score: 56.93%) +5. Test entry #453: CSV Profiling related data with random seed 24443 // (similarity score: 53.45%) +6. Test entry #391: CSV Resilient related data with random seed 1245 // (similarity score: 51.75%) +7. Test entry #260: CSV Event-driven related data with random seed 23556 // (similarity score: 51.60%) +8. Test entry #119: CSV Resilient related data with random seed 21012 // (similarity score: 50.55%) +9. Test entry #392: Algorithm CSV related data with random seed 18964 // (similarity score: 50.33%) +10. Test entry #458: Encryption CSV related data with random seed 22642 // (similarity score: 49.73%) +⏱️ Get operation took: 944 ms +--------------------------------------------------- +[Step 383/500] Processing... +Getting data: 'Performance' +1. Test entry #452: Event-driven Performance related data with random seed 26374 // (similarity score: 39.07%) +2. Test entry #451: Library Performance related data with random seed 31802 // (similarity score: 35.13%) +3. Test entry #192: Performance Threading related data with random seed 6244 // (similarity score: 33.15%) +4. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 32.63%) +5. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 31.90%) +6. Test entry #481: Tutorial Throughput related data with random seed 11888 // (similarity score: 31.89%) +7. Test entry #273: Data-structure Performance related data with random seed 1219 // (similarity score: 31.74%) +8. Test entry #394: Sharding Performance related data with random seed 1607 // (similarity score: 31.69%) +9. Test entry #332: Performance YAML related data with random seed 23826 // (similarity score: 30.69%) +10. Test entry #291: YAML Performance related data with random seed 3598 // (similarity score: 28.81%) +⏱️ Get operation took: 948 ms +--------------------------------------------------- +[Step 384/500] Processing... +Getting data: 'CLI' +1. Test entry #284: UX CLI related data with random seed 10670 // (similarity score: 47.12%) +2. Test entry #492: Testing CLI related data with random seed 20014 // (similarity score: 46.52%) +3. Test entry #462: Best-practices CLI related data with random seed 19922 // (similarity score: 45.07%) +4. Test entry #242: Load-balancing CLI related data with random seed 21278 // (similarity score: 44.28%) +5. Test entry #425: CLI Data-structure related data with random seed 7062 // (similarity score: 42.94%) +6. Test entry #499: Vector CLI related data with random seed 18419 // (similarity score: 41.47%) +7. Test entry #103: CLI Sharding related data with random seed 23375 // (similarity score: 40.66%) +8. Test entry #477: Unit-test CLI related data with random seed 17091 // (similarity score: 39.09%) +9. Test entry #263: CLI Stubbing related data with random seed 13486 // (similarity score: 37.57%) +10. Test entry #299: CLI Git related data with random seed 18823 // (similarity score: 34.54%) +⏱️ Get operation took: 996 ms +--------------------------------------------------- +[Step 385/500] Processing... +Getting data: 'UX' +1. Test entry #463: Microservices UX related data with random seed 32421 // (similarity score: 28.20%) +2. Test entry #177: UX Monitoring related data with random seed 16167 // (similarity score: 26.37%) +3. Test entry #133: Microservices UX related data with random seed 13570 // (similarity score: 25.58%) +4. Test entry #140: UX Architecture related data with random seed 15002 // (similarity score: 25.04%) +5. Test entry #66: UX UML related data with random seed 3512 // (similarity score: 22.74%) +6. Test entry #402: UX Load-balancing related data with random seed 5781 // (similarity score: 21.76%) +7. Test entry #115: UX Design-patterns related data with random seed 14554 // (similarity score: 21.69%) +8. Test entry #175: Node UX related data with random seed 24087 // (similarity score: 20.22%) +9. Test entry #75: UX Search related data with random seed 11487 // (similarity score: 20.03%) +10. Test entry #237: UX Blockchain related data with random seed 8734 // (similarity score: 19.35%) +⏱️ Get operation took: 943 ms +--------------------------------------------------- +[Step 386/500] Processing... +Getting data: 'Sample' +1. Test entry #216: Load-balancing Sample related data with random seed 25835 // (similarity score: 42.97%) +2. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 42.71%) +3. Test entry #141: Sample UI related data with random seed 12909 // (similarity score: 39.55%) +4. Test entry #290: Caching Sample related data with random seed 12446 // (similarity score: 38.92%) +5. Test entry #396: Sample Message-queue related data with random seed 1074 // (similarity score: 38.37%) +6. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 38.23%) +7. Test entry #81: Example Documentation related data with random seed 4554 // (similarity score: 37.78%) +8. Test entry #384: Branching Sample related data with random seed 29822 // (similarity score: 37.59%) +9. Test entry #340: Version-control Sample related data with random seed 23846 // (similarity score: 36.93%) +10. Test entry #173: Sample Cloud related data with random seed 3561 // (similarity score: 36.71%) +⏱️ Get operation took: 902 ms +--------------------------------------------------- +[Step 387/500] Processing... +Getting data: 'UX' +1. Test entry #463: Microservices UX related data with random seed 32421 // (similarity score: 28.20%) +2. Test entry #177: UX Monitoring related data with random seed 16167 // (similarity score: 26.37%) +3. Test entry #133: Microservices UX related data with random seed 13570 // (similarity score: 25.58%) +4. Test entry #140: UX Architecture related data with random seed 15002 // (similarity score: 25.04%) +5. Test entry #66: UX UML related data with random seed 3512 // (similarity score: 22.74%) +6. Test entry #402: UX Load-balancing related data with random seed 5781 // (similarity score: 21.76%) +7. Test entry #115: UX Design-patterns related data with random seed 14554 // (similarity score: 21.69%) +8. Test entry #175: Node UX related data with random seed 24087 // (similarity score: 20.22%) +9. Test entry #75: UX Search related data with random seed 11487 // (similarity score: 20.03%) +10. Test entry #237: UX Blockchain related data with random seed 8734 // (similarity score: 19.35%) +⏱️ Get operation took: 881 ms +--------------------------------------------------- +[Step 388/500] Processing... +Getting data: 'Sample' +1. Test entry #216: Load-balancing Sample related data with random seed 25835 // (similarity score: 42.97%) +2. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 42.71%) +3. Test entry #141: Sample UI related data with random seed 12909 // (similarity score: 39.55%) +4. Test entry #290: Caching Sample related data with random seed 12446 // (similarity score: 38.92%) +5. Test entry #396: Sample Message-queue related data with random seed 1074 // (similarity score: 38.37%) +6. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 38.23%) +7. Test entry #81: Example Documentation related data with random seed 4554 // (similarity score: 37.78%) +8. Test entry #384: Branching Sample related data with random seed 29822 // (similarity score: 37.59%) +9. Test entry #340: Version-control Sample related data with random seed 23846 // (similarity score: 36.93%) +10. Test entry #173: Sample Cloud related data with random seed 3561 // (similarity score: 36.71%) +⏱️ Get operation took: 958 ms +--------------------------------------------------- +[Step 389/500] Processing... +Getting data: 'Threading' +1. Test entry #164: Threading Parallelism related data with random seed 13640 // (similarity score: 52.54%) +2. Test entry #148: Threading Best-practices related data with random seed 22158 // (similarity score: 50.45%) +3. Test entry #415: Threading Algorithm related data with random seed 8994 // (similarity score: 48.42%) +4. Test entry #191: Optimization Threading related data with random seed 15850 // (similarity score: 48.17%) +5. Test entry #494: Monitoring Threading related data with random seed 28404 // (similarity score: 45.10%) +6. Test entry #36: Threading Sharding related data with random seed 23643 // (similarity score: 44.95%) +7. Test entry #490: Threading Resilient related data with random seed 825 // (similarity score: 44.64%) +8. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 44.04%) +9. Test entry #192: Performance Threading related data with random seed 6244 // (similarity score: 43.42%) +10. Test entry #230: Threading API related data with random seed 20630 // (similarity score: 42.47%) +⏱️ Get operation took: 951 ms +--------------------------------------------------- +[Step 390/500] Processing... +Getting data: 'XML' +1. Test entry #303: XML Tutorial related data with random seed 29551 // (similarity score: 54.70%) +2. Test entry #42: Resilient XML related data with random seed 15654 // (similarity score: 53.29%) +3. Test entry #487: XML Best-practices related data with random seed 19330 // (similarity score: 52.93%) +4. Test entry #298: XML Example related data with random seed 18435 // (similarity score: 52.15%) +5. Test entry #258: XML Synchronous related data with random seed 9079 // (similarity score: 51.57%) +6. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 50.79%) +7. Test entry #129: XML Code-review related data with random seed 19652 // (similarity score: 50.22%) +8. Test entry #475: Fault-tolerance XML related data with random seed 13158 // (similarity score: 46.25%) +9. Test entry #457: XML Recovery related data with random seed 2790 // (similarity score: 46.16%) +10. Test entry #350: XML Concurrency related data with random seed 12519 // (similarity score: 44.91%) +⏱️ Get operation took: 1019 ms +--------------------------------------------------- +[Step 391/500] Processing... +Getting data: 'RESTful' +1. Test entry #482: Code-review RESTful related data with random seed 18863 // (similarity score: 41.46%) +2. Test entry #289: RESTful Data-structure related data with random seed 1227 // (similarity score: 41.33%) +3. Test entry #446: Container RESTful related data with random seed 10831 // (similarity score: 40.34%) +4. Test entry #215: API RESTful related data with random seed 29014 // (similarity score: 40.32%) +5. Test entry #310: RESTful UML related data with random seed 5709 // (similarity score: 40.14%) +6. Test entry #314: Threading RESTful related data with random seed 12875 // (similarity score: 37.42%) +7. Test entry #296: ERD RESTful related data with random seed 19760 // (similarity score: 36.03%) +8. Test entry #200: NoSQL RESTful related data with random seed 23279 // (similarity score: 31.68%) +9. Test entry #423: Diagram RESTful related data with random seed 14813 // (similarity score: 29.93%) +10. Test entry #345: CI/CD RESTful related data with random seed 23698 // (similarity score: 29.68%) +⏱️ Get operation took: 1109 ms +--------------------------------------------------- +[Step 392/500] Processing... +Getting data: 'Synchronous' +1. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 42.32%) +2. Test entry #287: Example Synchronous related data with random seed 18676 // (similarity score: 40.82%) +3. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 32.63%) +4. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 32.34%) +5. Test entry #411: Security Synchronous related data with random seed 11940 // (similarity score: 31.90%) +6. Test entry #258: XML Synchronous related data with random seed 9079 // (similarity score: 29.62%) +7. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 29.18%) +8. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 28.48%) +9. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 27.98%) +10. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 27.69%) +⏱️ Get operation took: 903 ms +--------------------------------------------------- +[Step 393/500] Processing... +Getting data: 'Network' +1. Test entry #283: Network Network related data with random seed 21734 // (similarity score: 44.63%) +2. Test entry #163: Network Orchestration related data with random seed 17906 // (similarity score: 36.14%) +3. Test entry #29: Mocking Network related data with random seed 20303 // (similarity score: 35.95%) +4. Test entry #190: Mocking Network related data with random seed 32119 // (similarity score: 35.84%) +5. Test entry #189: UML Network related data with random seed 6297 // (similarity score: 32.43%) +6. Test entry #90: Network GUI related data with random seed 6917 // (similarity score: 30.73%) +7. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 29.31%) +8. Test entry #49: ORM Network related data with random seed 23604 // (similarity score: 26.22%) +9. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 21.70%) +10. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 19.05%) +⏱️ Get operation took: 900 ms +--------------------------------------------------- +[Step 394/500] Processing... +Getting data: 'ERD' +1. Test entry #102: ERD ERD related data with random seed 20954 // (similarity score: 51.00%) +2. Test entry #138: Boilerplate ERD related data with random seed 17973 // (similarity score: 46.57%) +3. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 44.39%) +4. Test entry #379: Code-review ERD related data with random seed 18047 // (similarity score: 43.54%) +5. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 42.68%) +6. Test entry #135: UI ERD related data with random seed 9172 // (similarity score: 41.81%) +7. Test entry #378: ERD Parallelism related data with random seed 25512 // (similarity score: 39.79%) +8. Test entry #252: ERD Parallelism related data with random seed 23325 // (similarity score: 39.25%) +9. Test entry #296: ERD RESTful related data with random seed 19760 // (similarity score: 36.51%) +10. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 23.26%) +⏱️ Get operation took: 1087 ms +--------------------------------------------------- +[Step 395/500] Processing... +Getting data: 'XML' +1. Test entry #303: XML Tutorial related data with random seed 29551 // (similarity score: 54.70%) +2. Test entry #42: Resilient XML related data with random seed 15654 // (similarity score: 53.29%) +3. Test entry #487: XML Best-practices related data with random seed 19330 // (similarity score: 52.93%) +4. Test entry #298: XML Example related data with random seed 18435 // (similarity score: 52.15%) +5. Test entry #258: XML Synchronous related data with random seed 9079 // (similarity score: 51.57%) +6. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 50.79%) +7. Test entry #129: XML Code-review related data with random seed 19652 // (similarity score: 50.22%) +8. Test entry #475: Fault-tolerance XML related data with random seed 13158 // (similarity score: 46.25%) +9. Test entry #457: XML Recovery related data with random seed 2790 // (similarity score: 46.16%) +10. Test entry #350: XML Concurrency related data with random seed 12519 // (similarity score: 44.91%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 396/500] Processing... +Getting data: 'Template' +1. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 46.79%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 41.61%) +3. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 40.40%) +4. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 38.73%) +5. Test entry #83: Template Sharding related data with random seed 32241 // (similarity score: 34.86%) +6. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 33.67%) +7. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 33.18%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 30.02%) +9. Test entry #322: ORM Template related data with random seed 28110 // (similarity score: 29.98%) +10. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 27.79%) +⏱️ Get operation took: 903 ms +--------------------------------------------------- +[Step 397/500] Processing... +Getting data: 'SQL' +1. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 41.94%) +2. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 41.80%) +3. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 37.96%) +4. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 37.76%) +5. Test entry #152: SQL Load-balancing related data with random seed 9262 // (similarity score: 37.07%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 36.37%) +7. Test entry #137: SQL Parallelism related data with random seed 8672 // (similarity score: 35.23%) +8. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 32.86%) +9. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 30.91%) +10. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 30.51%) +⏱️ Get operation took: 891 ms +--------------------------------------------------- +[Step 398/500] Processing... +Getting data: 'Debugging' +1. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 51.51%) +2. Test entry #333: Profiling Debugging related data with random seed 25431 // (similarity score: 49.97%) +3. Test entry #387: Index Debugging related data with random seed 29125 // (similarity score: 43.23%) +4. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 42.05%) +5. Test entry #67: Debugging Blockchain related data with random seed 23179 // (similarity score: 42.03%) +6. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 41.95%) +7. Test entry #444: Vector Debugging related data with random seed 5646 // (similarity score: 41.67%) +8. Test entry #406: Debugging Stubbing related data with random seed 28773 // (similarity score: 41.49%) +9. Test entry #339: API Debugging related data with random seed 14456 // (similarity score: 39.09%) +10. Test entry #34: Debugging CI/CD related data with random seed 7455 // (similarity score: 33.14%) +⏱️ Get operation took: 1306 ms +--------------------------------------------------- +[Step 399/500] Processing... +Getting data: 'Asynchronous' +1. Test entry #74: Asynchronous Asynchronous related data with random seed 32283 // (similarity score: 46.54%) +2. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 44.22%) +3. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 43.79%) +4. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 43.65%) +5. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 43.29%) +6. Test entry #311: Asynchronous Best-practices related data with random seed 24139 // (similarity score: 41.57%) +7. Test entry #210: Asynchronous UI related data with random seed 16307 // (similarity score: 40.59%) +8. Test entry #79: Algorithm Asynchronous related data with random seed 18478 // (similarity score: 40.11%) +9. Test entry #240: Algorithm Asynchronous related data with random seed 6330 // (similarity score: 38.61%) +10. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 36.49%) +⏱️ Get operation took: 990 ms +--------------------------------------------------- +[Step 400/500] Processing... +Getting data: 'Data-structure' +1. Test entry #273: Data-structure Performance related data with random seed 1219 // (similarity score: 50.19%) +2. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 49.27%) +3. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 49.07%) +4. Test entry #248: Algorithm Data-structure related data with random seed 18077 // (similarity score: 47.47%) +5. Test entry #313: Data-structure Monitoring related data with random seed 10551 // (similarity score: 47.31%) +6. Test entry #118: CSV Data-structure related data with random seed 20709 // (similarity score: 45.72%) +7. Test entry #289: RESTful Data-structure related data with random seed 1227 // (similarity score: 42.01%) +8. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 40.81%) +9. Test entry #425: CLI Data-structure related data with random seed 7062 // (similarity score: 39.72%) +10. Test entry #171: Blockchain Data-structure related data with random seed 29630 // (similarity score: 38.81%) +⏱️ Get operation took: 953 ms + +📊 [BATCH REPORT] Items 351 to 400 + -> Average Latency: 955 ms + +--------------------------------------------------- +[Step 401/500] Processing... +Getting data: 'Profiling' +1. Test entry #110: Boilerplate Profiling related data with random seed 6487 // (similarity score: 60.75%) +2. Test entry #333: Profiling Debugging related data with random seed 25431 // (similarity score: 54.23%) +3. Test entry #497: Cloud Profiling related data with random seed 11981 // (similarity score: 49.55%) +4. Test entry #453: CSV Profiling related data with random seed 24443 // (similarity score: 49.26%) +5. Test entry #146: Profiling DevOps related data with random seed 14561 // (similarity score: 46.96%) +6. Test entry #208: Blockchain Profiling related data with random seed 31906 // (similarity score: 46.76%) +7. Test entry #256: Profiling GraphQL related data with random seed 4781 // (similarity score: 43.45%) +8. Test entry #38: Profiling E2E-test related data with random seed 30992 // (similarity score: 43.10%) +9. Test entry #106: Profiling NoSQL related data with random seed 23156 // (similarity score: 42.71%) +10. Test entry #255: Profiling Replication related data with random seed 7369 // (similarity score: 42.39%) +⏱️ Get operation took: 1113 ms +--------------------------------------------------- +[Step 402/500] Processing... +Getting data: 'Merging' +1. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 47.50%) +2. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 46.78%) +3. Test entry #417: Event-driven Merging related data with random seed 13881 // (similarity score: 45.04%) +4. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 44.14%) +5. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 38.53%) +6. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 38.19%) +7. Test entry #150: Diagram Integration-test related data with random seed 19409 // (similarity score: 21.00%) +8. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 19.25%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 18.92%) +10. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 16.25%) +⏱️ Get operation took: 1049 ms +--------------------------------------------------- +[Step 403/500] Processing... +Getting data: 'Template' +1. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 46.79%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 41.61%) +3. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 40.40%) +4. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 38.73%) +5. Test entry #83: Template Sharding related data with random seed 32241 // (similarity score: 34.86%) +6. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 33.67%) +7. Test entry #55: Optimization Template related data with random seed 11116 // (similarity score: 33.18%) +8. Test entry #169: Template Stubbing related data with random seed 28565 // (similarity score: 30.02%) +9. Test entry #322: ORM Template related data with random seed 28110 // (similarity score: 29.98%) +10. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 27.79%) +⏱️ Get operation took: 991 ms +--------------------------------------------------- +[Step 404/500] Processing... +Getting data: 'Load-balancing' +1. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 63.40%) +2. Test entry #139: Load-balancing Optimization related data with random seed 31776 // (similarity score: 57.41%) +3. Test entry #27: Optimization Load-balancing related data with random seed 27403 // (similarity score: 55.07%) +4. Test entry #402: UX Load-balancing related data with random seed 5781 // (similarity score: 52.94%) +5. Test entry #302: Load-balancing Diagram related data with random seed 1050 // (similarity score: 52.56%) +6. Test entry #45: Event-driven Load-balancing related data with random seed 5105 // (similarity score: 52.48%) +7. Test entry #354: Load-balancing Event-driven related data with random seed 18844 // (similarity score: 47.54%) +8. Test entry #449: UI Load-balancing related data with random seed 29998 // (similarity score: 45.02%) +9. Test entry #242: Load-balancing CLI related data with random seed 21278 // (similarity score: 44.58%) +10. Test entry #216: Load-balancing Sample related data with random seed 25835 // (similarity score: 43.77%) +⏱️ Get operation took: 904 ms +--------------------------------------------------- +[Step 405/500] Processing... +Getting data: 'JSON' +1. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 51.74%) +2. Test entry #156: JSON Optimization related data with random seed 28003 // (similarity score: 49.61%) +3. Test entry #413: JSON Alerting related data with random seed 8852 // (similarity score: 47.02%) +4. Test entry #113: JSON API related data with random seed 11466 // (similarity score: 46.74%) +5. Test entry #85: JSON Vector related data with random seed 23519 // (similarity score: 46.52%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 43.98%) +7. Test entry #218: JSON Caching related data with random seed 29229 // (similarity score: 43.91%) +8. Test entry #143: JSON XML related data with random seed 20028 // (similarity score: 42.01%) +9. Test entry #57: YAML JSON related data with random seed 19740 // (similarity score: 40.00%) +10. Test entry #420: JSON Microservices related data with random seed 9271 // (similarity score: 39.90%) +⏱️ Get operation took: 936 ms +--------------------------------------------------- +[Step 406/500] Processing... +Getting data: 'Blockchain' +1. Test entry #117: Design-patterns Blockchain related data with random seed 16711 // (similarity score: 41.64%) +2. Test entry #231: Blockchain Data-structure related data with random seed 2611 // (similarity score: 38.85%) +3. Test entry #61: Blockchain Microservices related data with random seed 13172 // (similarity score: 38.28%) +4. Test entry #171: Blockchain Data-structure related data with random seed 29630 // (similarity score: 33.73%) +5. Test entry #237: UX Blockchain related data with random seed 8734 // (similarity score: 31.74%) +6. Test entry #360: GUI Blockchain related data with random seed 10277 // (similarity score: 31.69%) +7. Test entry #67: Debugging Blockchain related data with random seed 23179 // (similarity score: 30.30%) +8. Test entry #229: Orchestration Blockchain related data with random seed 2138 // (similarity score: 30.22%) +9. Test entry #348: Blockchain SDK related data with random seed 25254 // (similarity score: 29.17%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 27.19%) +⏱️ Get operation took: 896 ms +--------------------------------------------------- +[Step 407/500] Processing... +Getting data: 'JSON' +1. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 51.74%) +2. Test entry #156: JSON Optimization related data with random seed 28003 // (similarity score: 49.61%) +3. Test entry #413: JSON Alerting related data with random seed 8852 // (similarity score: 47.02%) +4. Test entry #113: JSON API related data with random seed 11466 // (similarity score: 46.74%) +5. Test entry #85: JSON Vector related data with random seed 23519 // (similarity score: 46.52%) +6. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 43.98%) +7. Test entry #218: JSON Caching related data with random seed 29229 // (similarity score: 43.91%) +8. Test entry #143: JSON XML related data with random seed 20028 // (similarity score: 42.01%) +9. Test entry #57: YAML JSON related data with random seed 19740 // (similarity score: 40.00%) +10. Test entry #420: JSON Microservices related data with random seed 9271 // (similarity score: 39.90%) +⏱️ Get operation took: 881 ms +--------------------------------------------------- +[Step 408/500] Processing... +Getting data: 'Microservices' +1. Test entry #142: Event-driven Microservices related data with random seed 7351 // (similarity score: 59.38%) +2. Test entry #251: Resilient Microservices related data with random seed 1673 // (similarity score: 56.33%) +3. Test entry #437: Microservices Optimization related data with random seed 8916 // (similarity score: 55.90%) +4. Test entry #239: Data-structure Microservices related data with random seed 11589 // (similarity score: 55.78%) +5. Test entry #292: Asynchronous Microservices related data with random seed 8204 // (similarity score: 55.67%) +6. Test entry #483: Orchestration Microservices related data with random seed 31494 // (similarity score: 51.77%) +7. Test entry #176: Microservices Documentation related data with random seed 28218 // (similarity score: 50.61%) +8. Test entry #23: Branching Microservices related data with random seed 24428 // (similarity score: 49.91%) +9. Test entry #463: Microservices UX related data with random seed 32421 // (similarity score: 49.69%) +10. Test entry #353: Microservices YAML related data with random seed 3068 // (similarity score: 48.52%) +⏱️ Get operation took: 916 ms +--------------------------------------------------- +[Step 409/500] Processing... +Getting data: 'Git' +1. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 26.09%) +2. Test entry #429: Version-control Code-review related data with random seed 14304 // (similarity score: 24.27%) +3. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 23.51%) +4. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 23.09%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 22.94%) +6. Test entry #338: Version-control Monitoring related data with random seed 11905 // (similarity score: 22.82%) +7. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 22.52%) +8. Test entry #155: Sharding Version-control related data with random seed 4158 // (similarity score: 21.79%) +9. Test entry #340: Version-control Sample related data with random seed 23846 // (similarity score: 21.68%) +10. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 21.45%) +⏱️ Get operation took: 882 ms +--------------------------------------------------- +[Step 410/500] Processing... +Getting data: 'Synchronous' +1. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 42.32%) +2. Test entry #287: Example Synchronous related data with random seed 18676 // (similarity score: 40.82%) +3. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 32.63%) +4. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 32.34%) +5. Test entry #411: Security Synchronous related data with random seed 11940 // (similarity score: 31.90%) +6. Test entry #258: XML Synchronous related data with random seed 9079 // (similarity score: 29.62%) +7. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 29.18%) +8. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 28.48%) +9. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 27.98%) +10. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 27.69%) +⏱️ Get operation took: 961 ms +--------------------------------------------------- +[Step 411/500] Processing... +Getting data: 'XML' +1. Test entry #303: XML Tutorial related data with random seed 29551 // (similarity score: 54.70%) +2. Test entry #42: Resilient XML related data with random seed 15654 // (similarity score: 53.29%) +3. Test entry #487: XML Best-practices related data with random seed 19330 // (similarity score: 52.93%) +4. Test entry #298: XML Example related data with random seed 18435 // (similarity score: 52.15%) +5. Test entry #258: XML Synchronous related data with random seed 9079 // (similarity score: 51.57%) +6. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 50.79%) +7. Test entry #129: XML Code-review related data with random seed 19652 // (similarity score: 50.22%) +8. Test entry #475: Fault-tolerance XML related data with random seed 13158 // (similarity score: 46.25%) +9. Test entry #457: XML Recovery related data with random seed 2790 // (similarity score: 46.16%) +10. Test entry #350: XML Concurrency related data with random seed 12519 // (similarity score: 44.91%) +⏱️ Get operation took: 934 ms +--------------------------------------------------- +[Step 412/500] Processing... +Getting data: 'GUI' +1. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 43.56%) +2. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 39.19%) +3. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 38.99%) +4. Test entry #268: GUI Security related data with random seed 8853 // (similarity score: 38.70%) +5. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 37.90%) +6. Test entry #90: Network GUI related data with random seed 6917 // (similarity score: 37.45%) +7. Test entry #400: GUI Container related data with random seed 26968 // (similarity score: 36.86%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 36.84%) +9. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 35.99%) +10. Test entry #87: GUI Encryption related data with random seed 24527 // (similarity score: 35.25%) +⏱️ Get operation took: 1059 ms +--------------------------------------------------- +[Step 413/500] Processing... +Getting data: 'GUI' +1. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 43.56%) +2. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 39.19%) +3. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 38.99%) +4. Test entry #268: GUI Security related data with random seed 8853 // (similarity score: 38.70%) +5. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 37.90%) +6. Test entry #90: Network GUI related data with random seed 6917 // (similarity score: 37.45%) +7. Test entry #400: GUI Container related data with random seed 26968 // (similarity score: 36.86%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 36.84%) +9. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 35.99%) +10. Test entry #87: GUI Encryption related data with random seed 24527 // (similarity score: 35.25%) +⏱️ Get operation took: 952 ms +--------------------------------------------------- +[Step 414/500] Processing... +Getting data: 'Debugging' +1. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 51.51%) +2. Test entry #333: Profiling Debugging related data with random seed 25431 // (similarity score: 49.97%) +3. Test entry #387: Index Debugging related data with random seed 29125 // (similarity score: 43.23%) +4. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 42.05%) +5. Test entry #67: Debugging Blockchain related data with random seed 23179 // (similarity score: 42.03%) +6. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 41.95%) +7. Test entry #444: Vector Debugging related data with random seed 5646 // (similarity score: 41.67%) +8. Test entry #406: Debugging Stubbing related data with random seed 28773 // (similarity score: 41.49%) +9. Test entry #339: API Debugging related data with random seed 14456 // (similarity score: 39.09%) +10. Test entry #34: Debugging CI/CD related data with random seed 7455 // (similarity score: 33.14%) +⏱️ Get operation took: 1110 ms +--------------------------------------------------- +[Step 415/500] Processing... +Getting data: 'SDK' +1. Test entry #279: Tutorial SDK related data with random seed 4966 // (similarity score: 56.68%) +2. Test entry #51: Scalability SDK related data with random seed 3268 // (similarity score: 51.89%) +3. Test entry #249: SDK UI related data with random seed 20519 // (similarity score: 49.85%) +4. Test entry #243: SDK Orchestration related data with random seed 30501 // (similarity score: 47.08%) +5. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 46.35%) +6. Test entry #367: SDK Recovery related data with random seed 20345 // (similarity score: 45.44%) +7. Test entry #162: SDK Testing related data with random seed 18191 // (similarity score: 44.04%) +8. Test entry #364: Database SDK related data with random seed 22809 // (similarity score: 40.07%) +9. Test entry #286: Python SDK related data with random seed 14667 // (similarity score: 39.90%) +10. Test entry #348: Blockchain SDK related data with random seed 25254 // (similarity score: 39.13%) +⏱️ Get operation took: 960 ms +--------------------------------------------------- +[Step 416/500] Processing... +Getting data: 'Framework' +1. Test entry #227: Framework Algorithm related data with random seed 19206 // (similarity score: 31.85%) +2. Test entry #275: Framework Orchestration related data with random seed 12664 // (similarity score: 31.36%) +3. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 29.40%) +4. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 28.78%) +5. Test entry #386: Framework Testing related data with random seed 14924 // (similarity score: 28.77%) +6. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 26.61%) +7. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 24.69%) +8. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 20.97%) +9. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 20.94%) +10. Test entry #228: Branching Framework related data with random seed 10213 // (similarity score: 19.21%) +⏱️ Get operation took: 1001 ms +--------------------------------------------------- +[Step 417/500] Processing... +Getting data: 'Algorithm' +1. Test entry #248: Algorithm Data-structure related data with random seed 18077 // (similarity score: 46.27%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 38.05%) +3. Test entry #392: Algorithm CSV related data with random seed 18964 // (similarity score: 36.06%) +4. Test entry #79: Algorithm Asynchronous related data with random seed 18478 // (similarity score: 34.57%) +5. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 33.78%) +6. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 33.09%) +7. Test entry #240: Algorithm Asynchronous related data with random seed 6330 // (similarity score: 31.98%) +8. Test entry #227: Framework Algorithm related data with random seed 19206 // (similarity score: 31.96%) +9. Test entry #337: Algorithm Orchestration related data with random seed 15318 // (similarity score: 31.60%) +10. Test entry #120: Algorithm Unit-test related data with random seed 14608 // (similarity score: 30.48%) +⏱️ Get operation took: 992 ms +--------------------------------------------------- +[Step 418/500] Processing... +Getting data: 'Parallelism' +1. Test entry #164: Threading Parallelism related data with random seed 13640 // (similarity score: 53.13%) +2. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 51.21%) +3. Test entry #7: Mocking Parallelism related data with random seed 19010 // (similarity score: 51.18%) +4. Test entry #97: Testing Parallelism related data with random seed 6045 // (similarity score: 49.12%) +5. Test entry #136: Parallelism Sample related data with random seed 18078 // (similarity score: 48.54%) +6. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 47.79%) +7. Test entry #201: Sample Parallelism related data with random seed 8912 // (similarity score: 46.65%) +8. Test entry #252: ERD Parallelism related data with random seed 23325 // (similarity score: 45.15%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 44.59%) +10. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 44.19%) +⏱️ Get operation took: 947 ms +--------------------------------------------------- +[Step 419/500] Processing... +Getting data: 'Best-practices' +1. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 32.60%) +2. Test entry #311: Asynchronous Best-practices related data with random seed 24139 // (similarity score: 31.44%) +3. Test entry #373: NoSQL Best-practices related data with random seed 20318 // (similarity score: 31.35%) +4. Test entry #454: CSV Best-practices related data with random seed 21695 // (similarity score: 30.63%) +5. Test entry #462: Best-practices CLI related data with random seed 19922 // (similarity score: 30.42%) +6. Test entry #487: XML Best-practices related data with random seed 19330 // (similarity score: 29.42%) +7. Test entry #372: Best-practices Search related data with random seed 17128 // (similarity score: 28.31%) +8. Test entry #315: Encryption Best-practices related data with random seed 23350 // (similarity score: 27.13%) +9. Test entry #174: Best-practices Security related data with random seed 18653 // (similarity score: 25.77%) +10. Test entry #148: Threading Best-practices related data with random seed 22158 // (similarity score: 25.48%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 420/500] Processing... +Getting data: 'SQL' +1. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 41.94%) +2. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 41.80%) +3. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 37.96%) +4. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 37.76%) +5. Test entry #152: SQL Load-balancing related data with random seed 9262 // (similarity score: 37.07%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 36.37%) +7. Test entry #137: SQL Parallelism related data with random seed 8672 // (similarity score: 35.23%) +8. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 32.86%) +9. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 30.91%) +10. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 30.51%) +⏱️ Get operation took: 951 ms +--------------------------------------------------- +[Step 421/500] Processing... +Getting data: 'CLI' +1. Test entry #284: UX CLI related data with random seed 10670 // (similarity score: 47.12%) +2. Test entry #492: Testing CLI related data with random seed 20014 // (similarity score: 46.52%) +3. Test entry #462: Best-practices CLI related data with random seed 19922 // (similarity score: 45.07%) +4. Test entry #242: Load-balancing CLI related data with random seed 21278 // (similarity score: 44.28%) +5. Test entry #425: CLI Data-structure related data with random seed 7062 // (similarity score: 42.94%) +6. Test entry #499: Vector CLI related data with random seed 18419 // (similarity score: 41.47%) +7. Test entry #103: CLI Sharding related data with random seed 23375 // (similarity score: 40.66%) +8. Test entry #477: Unit-test CLI related data with random seed 17091 // (similarity score: 39.09%) +9. Test entry #263: CLI Stubbing related data with random seed 13486 // (similarity score: 37.57%) +10. Test entry #299: CLI Git related data with random seed 18823 // (similarity score: 34.54%) +⏱️ Get operation took: 950 ms +--------------------------------------------------- +[Step 422/500] Processing... +Getting data: 'Event-driven' +1. Test entry #354: Load-balancing Event-driven related data with random seed 18844 // (similarity score: 45.07%) +2. Test entry #452: Event-driven Performance related data with random seed 26374 // (similarity score: 44.21%) +3. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 44.20%) +4. Test entry #316: GUI Event-driven related data with random seed 24961 // (similarity score: 43.28%) +5. Test entry #168: Event-driven Logging related data with random seed 22637 // (similarity score: 41.27%) +6. Test entry #260: CSV Event-driven related data with random seed 23556 // (similarity score: 40.58%) +7. Test entry #309: Event-driven Orchestration related data with random seed 21023 // (similarity score: 40.56%) +8. Test entry #142: Event-driven Microservices related data with random seed 7351 // (similarity score: 40.55%) +9. Test entry #45: Event-driven Load-balancing related data with random seed 5105 // (similarity score: 39.29%) +10. Test entry #297: Event-driven Code-review related data with random seed 23440 // (similarity score: 39.27%) +⏱️ Get operation took: 941 ms +--------------------------------------------------- +[Step 423/500] Processing... +Getting data: 'Threading' +1. Test entry #164: Threading Parallelism related data with random seed 13640 // (similarity score: 52.54%) +2. Test entry #148: Threading Best-practices related data with random seed 22158 // (similarity score: 50.45%) +3. Test entry #415: Threading Algorithm related data with random seed 8994 // (similarity score: 48.42%) +4. Test entry #191: Optimization Threading related data with random seed 15850 // (similarity score: 48.17%) +5. Test entry #494: Monitoring Threading related data with random seed 28404 // (similarity score: 45.10%) +6. Test entry #36: Threading Sharding related data with random seed 23643 // (similarity score: 44.95%) +7. Test entry #490: Threading Resilient related data with random seed 825 // (similarity score: 44.64%) +8. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 44.04%) +9. Test entry #192: Performance Threading related data with random seed 6244 // (similarity score: 43.42%) +10. Test entry #230: Threading API related data with random seed 20630 // (similarity score: 42.47%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 424/500] Processing... +Getting data: 'E2E-test' +1. Test entry #455: E2E-test Integration-test related data with random seed 4838 // (similarity score: 65.81%) +2. Test entry #241: Alerting E2E-test related data with random seed 11551 // (similarity score: 63.85%) +3. Test entry #335: Throughput E2E-test related data with random seed 11728 // (similarity score: 63.35%) +4. Test entry #38: Profiling E2E-test related data with random seed 30992 // (similarity score: 63.16%) +5. Test entry #278: UML E2E-test related data with random seed 2723 // (similarity score: 61.63%) +6. Test entry #433: Vector E2E-test related data with random seed 22356 // (similarity score: 60.28%) +7. Test entry #304: E2E-test Unit-test related data with random seed 19213 // (similarity score: 59.91%) +8. Test entry #104: Backup E2E-test related data with random seed 3184 // (similarity score: 55.45%) +9. Test entry #414: GraphQL E2E-test related data with random seed 14354 // (similarity score: 53.19%) +10. Test entry #102: ERD ERD related data with random seed 20954 // (similarity score: 40.31%) +⏱️ Get operation took: 943 ms +--------------------------------------------------- +[Step 425/500] Processing... +Getting data: 'Design-patterns' +1. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 63.80%) +2. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 62.09%) +3. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 61.76%) +4. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 57.41%) +5. Test entry #115: UX Design-patterns related data with random seed 14554 // (similarity score: 56.30%) +6. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 50.76%) +7. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 49.55%) +8. Test entry #498: Design-patterns Python related data with random seed 16866 // (similarity score: 49.27%) +9. Test entry #212: Design-patterns DevOps related data with random seed 19247 // (similarity score: 46.55%) +10. Test entry #117: Design-patterns Blockchain related data with random seed 16711 // (similarity score: 45.78%) +⏱️ Get operation took: 956 ms +--------------------------------------------------- +[Step 426/500] Processing... +Getting data: 'Documentation' +1. Test entry #468: Documentation Monitoring related data with random seed 13294 // (similarity score: 41.51%) +2. Test entry #81: Example Documentation related data with random seed 4554 // (similarity score: 34.95%) +3. Test entry #187: Documentation Cloud related data with random seed 9545 // (similarity score: 34.85%) +4. Test entry #390: Documentation Diagram related data with random seed 8258 // (similarity score: 32.91%) +5. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 30.49%) +6. Test entry #176: Microservices Documentation related data with random seed 28218 // (similarity score: 28.00%) +7. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 27.51%) +8. Test entry #93: Node Documentation related data with random seed 10928 // (similarity score: 27.08%) +9. Test entry #319: Documentation Example related data with random seed 22093 // (similarity score: 26.81%) +10. Test entry #246: Node Documentation related data with random seed 8868 // (similarity score: 26.21%) +⏱️ Get operation took: 991 ms +--------------------------------------------------- +[Step 427/500] Processing... +Getting data: 'Sharding' +1. Test entry #394: Sharding Performance related data with random seed 1607 // (similarity score: 54.43%) +2. Test entry #355: Throughput Sharding related data with random seed 6245 // (similarity score: 53.68%) +3. Test entry #130: Resilient Sharding related data with random seed 12002 // (similarity score: 53.30%) +4. Test entry #334: Sharding Vector related data with random seed 29020 // (similarity score: 51.99%) +5. Test entry #155: Sharding Version-control related data with random seed 4158 // (similarity score: 51.36%) +6. Test entry #36: Threading Sharding related data with random seed 23643 // (similarity score: 50.25%) +7. Test entry #83: Template Sharding related data with random seed 32241 // (similarity score: 49.35%) +8. Test entry #103: CLI Sharding related data with random seed 23375 // (similarity score: 47.66%) +9. Test entry #39: UI Sharding related data with random seed 26238 // (similarity score: 45.29%) +10. Test entry #112: Caching Sharding related data with random seed 9211 // (similarity score: 44.08%) +⏱️ Get operation took: 944 ms +--------------------------------------------------- +[Step 428/500] Processing... +Getting data: 'Documentation' +1. Test entry #468: Documentation Monitoring related data with random seed 13294 // (similarity score: 41.51%) +2. Test entry #81: Example Documentation related data with random seed 4554 // (similarity score: 34.95%) +3. Test entry #187: Documentation Cloud related data with random seed 9545 // (similarity score: 34.85%) +4. Test entry #390: Documentation Diagram related data with random seed 8258 // (similarity score: 32.91%) +5. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 30.49%) +6. Test entry #176: Microservices Documentation related data with random seed 28218 // (similarity score: 28.00%) +7. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 27.51%) +8. Test entry #93: Node Documentation related data with random seed 10928 // (similarity score: 27.08%) +9. Test entry #319: Documentation Example related data with random seed 22093 // (similarity score: 26.81%) +10. Test entry #246: Node Documentation related data with random seed 8868 // (similarity score: 26.21%) +⏱️ Get operation took: 948 ms +--------------------------------------------------- +[Step 429/500] Processing... +Getting data: 'Synchronous' +1. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 42.32%) +2. Test entry #287: Example Synchronous related data with random seed 18676 // (similarity score: 40.82%) +3. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 32.63%) +4. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 32.34%) +5. Test entry #411: Security Synchronous related data with random seed 11940 // (similarity score: 31.90%) +6. Test entry #258: XML Synchronous related data with random seed 9079 // (similarity score: 29.62%) +7. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 29.18%) +8. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 28.48%) +9. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 27.98%) +10. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 27.69%) +⏱️ Get operation took: 1001 ms +--------------------------------------------------- +[Step 430/500] Processing... +Getting data: 'Vector' +1. Test entry #325: Vector UX related data with random seed 2934 // (similarity score: 40.50%) +2. Test entry #444: Vector Debugging related data with random seed 5646 // (similarity score: 36.83%) +3. Test entry #134: Template Vector related data with random seed 24421 // (similarity score: 36.57%) +4. Test entry #341: Node Vector related data with random seed 30034 // (similarity score: 36.54%) +5. Test entry #334: Sharding Vector related data with random seed 29020 // (similarity score: 33.49%) +6. Test entry #499: Vector CLI related data with random seed 18419 // (similarity score: 33.05%) +7. Test entry #433: Vector E2E-test related data with random seed 22356 // (similarity score: 31.29%) +8. Test entry #461: Vector Pub-sub related data with random seed 31094 // (similarity score: 31.17%) +9. Test entry #100: Latency Vector related data with random seed 28112 // (similarity score: 30.14%) +10. Test entry #385: ORM Vector related data with random seed 277 // (similarity score: 29.36%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 431/500] Processing... +Getting data: 'Integration-test' +1. Test entry #464: Example Integration-test related data with random seed 5874 // (similarity score: 51.54%) +2. Test entry #63: Testing Integration-test related data with random seed 21446 // (similarity score: 48.72%) +3. Test entry #150: Diagram Integration-test related data with random seed 19409 // (similarity score: 41.04%) +4. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 40.67%) +5. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 40.11%) +6. Test entry #455: E2E-test Integration-test related data with random seed 4838 // (similarity score: 39.24%) +7. Test entry #485: Integration-test Pub-sub related data with random seed 29641 // (similarity score: 38.47%) +8. Test entry #247: UML Integration-test related data with random seed 29544 // (similarity score: 38.16%) +9. Test entry #307: NoSQL Integration-test related data with random seed 5046 // (similarity score: 33.52%) +10. Test entry #92: Websocket Integration-test related data with random seed 32561 // (similarity score: 27.10%) +⏱️ Get operation took: 939 ms +--------------------------------------------------- +[Step 432/500] Processing... +Getting data: 'SQL' +1. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 41.94%) +2. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 41.80%) +3. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 37.96%) +4. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 37.76%) +5. Test entry #152: SQL Load-balancing related data with random seed 9262 // (similarity score: 37.07%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 36.37%) +7. Test entry #137: SQL Parallelism related data with random seed 8672 // (similarity score: 35.23%) +8. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 32.86%) +9. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 30.91%) +10. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 30.51%) +⏱️ Get operation took: 948 ms +--------------------------------------------------- +[Step 433/500] Processing... +Getting data: 'ERD' +1. Test entry #102: ERD ERD related data with random seed 20954 // (similarity score: 51.00%) +2. Test entry #138: Boilerplate ERD related data with random seed 17973 // (similarity score: 46.57%) +3. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 44.39%) +4. Test entry #379: Code-review ERD related data with random seed 18047 // (similarity score: 43.54%) +5. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 42.68%) +6. Test entry #135: UI ERD related data with random seed 9172 // (similarity score: 41.81%) +7. Test entry #378: ERD Parallelism related data with random seed 25512 // (similarity score: 39.79%) +8. Test entry #252: ERD Parallelism related data with random seed 23325 // (similarity score: 39.25%) +9. Test entry #296: ERD RESTful related data with random seed 19760 // (similarity score: 36.51%) +10. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 23.26%) +⏱️ Get operation took: 1047 ms +--------------------------------------------------- +[Step 434/500] Processing... +Getting data: 'Pull-request' +1. Test entry #438: Pull-request API related data with random seed 19781 // (similarity score: 51.53%) +2. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 50.75%) +3. Test entry #107: Resilient Pull-request related data with random seed 9028 // (similarity score: 49.75%) +4. Test entry #488: Pull-request Git related data with random seed 13085 // (similarity score: 47.13%) +5. Test entry #424: Pull-request YAML related data with random seed 9732 // (similarity score: 46.59%) +6. Test entry #288: Index Pull-request related data with random seed 4855 // (similarity score: 46.42%) +7. Test entry #28: Pull-request Pub-sub related data with random seed 15070 // (similarity score: 45.59%) +8. Test entry #166: Pull-request Unit-test related data with random seed 18963 // (similarity score: 40.27%) +9. Test entry #314: Threading RESTful related data with random seed 12875 // (similarity score: 19.17%) +10. Test entry #203: Recovery JSON related data with random seed 8776 // (similarity score: 17.99%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 435/500] Processing... +Getting data: 'Scalability' +1. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 43.17%) +2. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 40.71%) +3. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 38.07%) +4. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 37.55%) +5. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 37.23%) +6. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 35.78%) +7. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 35.62%) +8. Test entry #51: Scalability SDK related data with random seed 3268 // (similarity score: 33.46%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 32.11%) +10. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 31.32%) +⏱️ Get operation took: 952 ms +--------------------------------------------------- +[Step 436/500] Processing... +Getting data: 'Node' +1. Test entry #114: Node Optimization related data with random seed 20946 // (similarity score: 44.96%) +2. Test entry #72: Node Optimization related data with random seed 10019 // (similarity score: 41.61%) +3. Test entry #246: Node Documentation related data with random seed 8868 // (similarity score: 39.92%) +4. Test entry #175: Node UX related data with random seed 24087 // (similarity score: 39.28%) +5. Test entry #341: Node Vector related data with random seed 30034 // (similarity score: 39.03%) +6. Test entry #93: Node Documentation related data with random seed 10928 // (similarity score: 38.72%) +7. Test entry #11: Code-review Node related data with random seed 32027 // (similarity score: 35.38%) +8. Test entry #398: Backup Node related data with random seed 21714 // (similarity score: 34.34%) +9. Test entry #328: Node DevOps related data with random seed 7412 // (similarity score: 30.61%) +10. Test entry #16: Node Unit-test related data with random seed 4007 // (similarity score: 29.56%) +⏱️ Get operation took: 994 ms +--------------------------------------------------- +[Step 437/500] Processing... +Getting data: 'Merging' +1. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 47.50%) +2. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 46.78%) +3. Test entry #417: Event-driven Merging related data with random seed 13881 // (similarity score: 45.04%) +4. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 44.14%) +5. Test entry #361: Merging Logging related data with random seed 12323 // (similarity score: 38.53%) +6. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 38.19%) +7. Test entry #150: Diagram Integration-test related data with random seed 19409 // (similarity score: 21.00%) +8. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 19.25%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 18.92%) +10. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 16.25%) +⏱️ Get operation took: 948 ms +--------------------------------------------------- +[Step 438/500] Processing... +Getting data: 'Security' +1. Test entry #411: Security Synchronous related data with random seed 11940 // (similarity score: 37.71%) +2. Test entry #174: Best-practices Security related data with random seed 18653 // (similarity score: 35.63%) +3. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 29.96%) +4. Test entry #268: GUI Security related data with random seed 8853 // (similarity score: 27.74%) +5. Test entry #315: Encryption Best-practices related data with random seed 23350 // (similarity score: 27.15%) +6. Test entry #123: Encryption Alerting related data with random seed 20180 // (similarity score: 25.45%) +7. Test entry #234: Security CI/CD related data with random seed 25834 // (similarity score: 25.03%) +8. Test entry #149: Encryption Logging related data with random seed 27963 // (similarity score: 24.16%) +9. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 24.08%) +10. Test entry #382: Encryption Algorithm related data with random seed 9600 // (similarity score: 23.77%) +⏱️ Get operation took: 944 ms +--------------------------------------------------- +[Step 439/500] Processing... +Getting data: 'Synchronous' +1. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 42.32%) +2. Test entry #287: Example Synchronous related data with random seed 18676 // (similarity score: 40.82%) +3. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 32.63%) +4. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 32.34%) +5. Test entry #411: Security Synchronous related data with random seed 11940 // (similarity score: 31.90%) +6. Test entry #258: XML Synchronous related data with random seed 9079 // (similarity score: 29.62%) +7. Test entry #221: Best-practices Asynchronous related data with random seed 27295 // (similarity score: 29.18%) +8. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 28.48%) +9. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 27.98%) +10. Test entry #276: Backup Asynchronous related data with random seed 23970 // (similarity score: 27.69%) +⏱️ Get operation took: 949 ms +--------------------------------------------------- +[Step 440/500] Processing... +Getting data: 'Git' +1. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 26.09%) +2. Test entry #429: Version-control Code-review related data with random seed 14304 // (similarity score: 24.27%) +3. Test entry #238: Merging Version-control related data with random seed 642 // (similarity score: 23.51%) +4. Test entry #205: Synchronous Code-review related data with random seed 1490 // (similarity score: 23.09%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 22.94%) +6. Test entry #338: Version-control Monitoring related data with random seed 11905 // (similarity score: 22.82%) +7. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 22.52%) +8. Test entry #155: Sharding Version-control related data with random seed 4158 // (similarity score: 21.79%) +9. Test entry #340: Version-control Sample related data with random seed 23846 // (similarity score: 21.68%) +10. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 21.45%) +⏱️ Get operation took: 949 ms +--------------------------------------------------- +[Step 441/500] Processing... +Getting data: 'Flowchart' +1. Test entry #9: Diagram Flowchart related data with random seed 14766 // (similarity score: 63.49%) +2. Test entry #495: Best-practices Flowchart related data with random seed 507 // (similarity score: 59.08%) +3. Test entry #121: Flowchart SDK related data with random seed 24836 // (similarity score: 56.62%) +4. Test entry #226: Flowchart Testing related data with random seed 32215 // (similarity score: 56.46%) +5. Test entry #342: Python Flowchart related data with random seed 32467 // (similarity score: 56.34%) +6. Test entry #209: Index Flowchart related data with random seed 16113 // (similarity score: 54.06%) +7. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 49.88%) +8. Test entry #41: Index Flowchart related data with random seed 29094 // (similarity score: 49.84%) +9. Test entry #213: GUI GraphQL related data with random seed 9145 // (similarity score: 30.82%) +10. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 24.78%) +⏱️ Get operation took: 1003 ms +--------------------------------------------------- +[Step 442/500] Processing... +Getting data: 'Library' +1. Test entry #451: Library Performance related data with random seed 31802 // (similarity score: 38.26%) +2. Test entry #357: Monitoring Library related data with random seed 91 // (similarity score: 36.58%) +3. Test entry #496: Library Database related data with random seed 13646 // (similarity score: 33.79%) +4. Test entry #320: Library UX related data with random seed 29021 // (similarity score: 31.34%) +5. Test entry #486: Library Example related data with random seed 9207 // (similarity score: 30.25%) +6. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 29.56%) +7. Test entry #95: Library Fault-tolerance related data with random seed 987 // (similarity score: 27.92%) +8. Test entry #472: Library CI/CD related data with random seed 18363 // (similarity score: 27.01%) +9. Test entry #30: API Library related data with random seed 1930 // (similarity score: 26.55%) +10. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 25.80%) +⏱️ Get operation took: 941 ms +--------------------------------------------------- +[Step 443/500] Processing... +Getting data: 'Algorithm' +1. Test entry #248: Algorithm Data-structure related data with random seed 18077 // (similarity score: 46.27%) +2. Test entry #153: Template Algorithm related data with random seed 17570 // (similarity score: 38.05%) +3. Test entry #392: Algorithm CSV related data with random seed 18964 // (similarity score: 36.06%) +4. Test entry #79: Algorithm Asynchronous related data with random seed 18478 // (similarity score: 34.57%) +5. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 33.78%) +6. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 33.09%) +7. Test entry #240: Algorithm Asynchronous related data with random seed 6330 // (similarity score: 31.98%) +8. Test entry #227: Framework Algorithm related data with random seed 19206 // (similarity score: 31.96%) +9. Test entry #337: Algorithm Orchestration related data with random seed 15318 // (similarity score: 31.60%) +10. Test entry #120: Algorithm Unit-test related data with random seed 14608 // (similarity score: 30.48%) +⏱️ Get operation took: 943 ms +--------------------------------------------------- +[Step 444/500] Processing... +Getting data: 'Data-structure' +1. Test entry #273: Data-structure Performance related data with random seed 1219 // (similarity score: 50.19%) +2. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 49.27%) +3. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 49.07%) +4. Test entry #248: Algorithm Data-structure related data with random seed 18077 // (similarity score: 47.47%) +5. Test entry #313: Data-structure Monitoring related data with random seed 10551 // (similarity score: 47.31%) +6. Test entry #118: CSV Data-structure related data with random seed 20709 // (similarity score: 45.72%) +7. Test entry #289: RESTful Data-structure related data with random seed 1227 // (similarity score: 42.01%) +8. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 40.81%) +9. Test entry #425: CLI Data-structure related data with random seed 7062 // (similarity score: 39.72%) +10. Test entry #171: Blockchain Data-structure related data with random seed 29630 // (similarity score: 38.81%) +⏱️ Get operation took: 968 ms +--------------------------------------------------- +[Step 445/500] Processing... +Getting data: 'Example' +1. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 21.02%) +2. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 20.91%) +3. Test entry #81: Example Documentation related data with random seed 4554 // (similarity score: 20.87%) +4. Test entry #43: Documentation Example related data with random seed 5875 // (similarity score: 19.56%) +5. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 19.26%) +6. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 18.68%) +7. Test entry #319: Documentation Example related data with random seed 22093 // (similarity score: 18.61%) +8. Test entry #297: Event-driven Code-review related data with random seed 23440 // (similarity score: 17.50%) +9. Test entry #138: Boilerplate ERD related data with random seed 17973 // (similarity score: 17.39%) +10. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 16.97%) +⏱️ Get operation took: 978 ms +--------------------------------------------------- +[Step 446/500] Processing... +Getting data: 'Branching' +1. Test entry #436: Example Branching related data with random seed 25763 // (similarity score: 54.14%) +2. Test entry #228: Branching Framework related data with random seed 10213 // (similarity score: 51.21%) +3. Test entry #384: Branching Sample related data with random seed 29822 // (similarity score: 50.90%) +4. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 47.89%) +5. Test entry #262: Container Branching related data with random seed 22849 // (similarity score: 47.76%) +6. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 40.80%) +7. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 40.19%) +8. Test entry #23: Branching Microservices related data with random seed 24428 // (similarity score: 38.88%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 29.49%) +10. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 26.91%) +⏱️ Get operation took: 948 ms +--------------------------------------------------- +[Step 447/500] Processing... +Getting data: 'SQL' +1. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 41.94%) +2. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 41.80%) +3. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 37.96%) +4. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 37.76%) +5. Test entry #152: SQL Load-balancing related data with random seed 9262 // (similarity score: 37.07%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 36.37%) +7. Test entry #137: SQL Parallelism related data with random seed 8672 // (similarity score: 35.23%) +8. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 32.86%) +9. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 30.91%) +10. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 30.51%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 448/500] Processing... +Getting data: 'Fault-tolerance' +1. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 62.09%) +2. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 57.76%) +3. Test entry #80: Fault-tolerance Throughput related data with random seed 27910 // (similarity score: 54.33%) +4. Test entry #95: Library Fault-tolerance related data with random seed 987 // (similarity score: 47.09%) +5. Test entry #197: Fault-tolerance ORM related data with random seed 21855 // (similarity score: 45.57%) +6. Test entry #397: Search Fault-tolerance related data with random seed 20761 // (similarity score: 44.70%) +7. Test entry #428: Fault-tolerance ORM related data with random seed 17392 // (similarity score: 44.01%) +8. Test entry #24: Replication Fault-tolerance related data with random seed 9951 // (similarity score: 41.75%) +9. Test entry #91: Fault-tolerance ORM related data with random seed 3476 // (similarity score: 41.55%) +10. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 39.60%) +⏱️ Get operation took: 1000 ms +--------------------------------------------------- +[Step 449/500] Processing... +Getting data: 'UX' +1. Test entry #463: Microservices UX related data with random seed 32421 // (similarity score: 28.20%) +2. Test entry #177: UX Monitoring related data with random seed 16167 // (similarity score: 26.37%) +3. Test entry #133: Microservices UX related data with random seed 13570 // (similarity score: 25.58%) +4. Test entry #140: UX Architecture related data with random seed 15002 // (similarity score: 25.04%) +5. Test entry #66: UX UML related data with random seed 3512 // (similarity score: 22.74%) +6. Test entry #402: UX Load-balancing related data with random seed 5781 // (similarity score: 21.76%) +7. Test entry #115: UX Design-patterns related data with random seed 14554 // (similarity score: 21.69%) +8. Test entry #175: Node UX related data with random seed 24087 // (similarity score: 20.22%) +9. Test entry #75: UX Search related data with random seed 11487 // (similarity score: 20.03%) +10. Test entry #237: UX Blockchain related data with random seed 8734 // (similarity score: 19.35%) +⏱️ Get operation took: 1086 ms +--------------------------------------------------- +[Step 450/500] Processing... +Getting data: 'Design-patterns' +1. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 63.80%) +2. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 62.09%) +3. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 61.76%) +4. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 57.41%) +5. Test entry #115: UX Design-patterns related data with random seed 14554 // (similarity score: 56.30%) +6. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 50.76%) +7. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 49.55%) +8. Test entry #498: Design-patterns Python related data with random seed 16866 // (similarity score: 49.27%) +9. Test entry #212: Design-patterns DevOps related data with random seed 19247 // (similarity score: 46.55%) +10. Test entry #117: Design-patterns Blockchain related data with random seed 16711 // (similarity score: 45.78%) +⏱️ Get operation took: 1066 ms + +📊 [BATCH REPORT] Items 401 to 450 + -> Average Latency: 968 ms + +--------------------------------------------------- +[Step 451/500] Processing... +Getting data: 'Branching' +1. Test entry #436: Example Branching related data with random seed 25763 // (similarity score: 54.14%) +2. Test entry #228: Branching Framework related data with random seed 10213 // (similarity score: 51.21%) +3. Test entry #384: Branching Sample related data with random seed 29822 // (similarity score: 50.90%) +4. Test entry #188: Branching Synchronous related data with random seed 11262 // (similarity score: 47.89%) +5. Test entry #262: Container Branching related data with random seed 22849 // (similarity score: 47.76%) +6. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 40.80%) +7. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 40.19%) +8. Test entry #23: Branching Microservices related data with random seed 24428 // (similarity score: 38.88%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 29.49%) +10. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 26.91%) +⏱️ Get operation took: 970 ms +--------------------------------------------------- +[Step 452/500] Processing... +Getting data: 'Network' +1. Test entry #283: Network Network related data with random seed 21734 // (similarity score: 44.63%) +2. Test entry #163: Network Orchestration related data with random seed 17906 // (similarity score: 36.14%) +3. Test entry #29: Mocking Network related data with random seed 20303 // (similarity score: 35.95%) +4. Test entry #190: Mocking Network related data with random seed 32119 // (similarity score: 35.84%) +5. Test entry #189: UML Network related data with random seed 6297 // (similarity score: 32.43%) +6. Test entry #90: Network GUI related data with random seed 6917 // (similarity score: 30.73%) +7. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 29.31%) +8. Test entry #49: ORM Network related data with random seed 23604 // (similarity score: 26.22%) +9. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 21.70%) +10. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 19.05%) +⏱️ Get operation took: 995 ms +--------------------------------------------------- +[Step 453/500] Processing... +Getting data: 'Parallelism' +1. Test entry #164: Threading Parallelism related data with random seed 13640 // (similarity score: 53.13%) +2. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 51.21%) +3. Test entry #7: Mocking Parallelism related data with random seed 19010 // (similarity score: 51.18%) +4. Test entry #97: Testing Parallelism related data with random seed 6045 // (similarity score: 49.12%) +5. Test entry #136: Parallelism Sample related data with random seed 18078 // (similarity score: 48.54%) +6. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 47.79%) +7. Test entry #201: Sample Parallelism related data with random seed 8912 // (similarity score: 46.65%) +8. Test entry #252: ERD Parallelism related data with random seed 23325 // (similarity score: 45.15%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 44.59%) +10. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 44.19%) +⏱️ Get operation took: 1015 ms +--------------------------------------------------- +[Step 454/500] Processing... +Getting data: 'Scalability' +1. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 43.17%) +2. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 40.71%) +3. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 38.07%) +4. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 37.55%) +5. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 37.23%) +6. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 35.78%) +7. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 35.62%) +8. Test entry #51: Scalability SDK related data with random seed 3268 // (similarity score: 33.46%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 32.11%) +10. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 31.32%) +⏱️ Get operation took: 990 ms +--------------------------------------------------- +[Step 455/500] Processing... +Getting data: 'Consensus' +1. Test entry #131: Consensus Version-control related data with random seed 2743 // (similarity score: 37.37%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 34.38%) +3. Test entry #194: Python Consensus related data with random seed 2579 // (similarity score: 32.73%) +4. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 32.01%) +5. Test entry #460: Consensus YAML related data with random seed 16140 // (similarity score: 29.09%) +6. Test entry #470: Consensus Replication related data with random seed 9153 // (similarity score: 27.78%) +7. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 27.72%) +8. Test entry #445: Message-queue Consensus related data with random seed 22969 // (similarity score: 25.59%) +9. Test entry #204: Merging Best-practices related data with random seed 6339 // (similarity score: 22.21%) +10. Test entry #456: Sample Merging related data with random seed 5260 // (similarity score: 16.66%) +⏱️ Get operation took: 1209 ms +--------------------------------------------------- +[Step 456/500] Processing... +Getting data: 'Index' +1. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 46.82%) +2. Test entry #473: Index Python related data with random seed 3534 // (similarity score: 44.57%) +3. Test entry #86: Index Container related data with random seed 813 // (similarity score: 41.90%) +4. Test entry #58: Cloud Index related data with random seed 16697 // (similarity score: 41.66%) +5. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 41.23%) +6. Test entry #44: Cloud Index related data with random seed 12754 // (similarity score: 38.93%) +7. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 38.05%) +8. Test entry #31: ORM Index related data with random seed 2163 // (similarity score: 38.00%) +9. Test entry #288: Index Pull-request related data with random seed 4855 // (similarity score: 37.81%) +10. Test entry #358: Index Stubbing related data with random seed 6263 // (similarity score: 37.00%) +⏱️ Get operation took: 1025 ms +--------------------------------------------------- +[Step 457/500] Processing... +Getting data: 'Recovery' +1. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 39.10%) +2. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 37.60%) +3. Test entry #306: Recovery Library related data with random seed 21298 // (similarity score: 36.53%) +4. Test entry #430: Recovery Diagram related data with random seed 24192 // (similarity score: 36.20%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 35.07%) +6. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 33.87%) +7. Test entry #367: SDK Recovery related data with random seed 20345 // (similarity score: 33.28%) +8. Test entry #88: Recovery JSON related data with random seed 28249 // (similarity score: 32.39%) +9. Test entry #321: Recovery Git related data with random seed 3971 // (similarity score: 32.26%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 30.40%) +⏱️ Get operation took: 990 ms +--------------------------------------------------- +[Step 458/500] Processing... +Getting data: 'Framework' +1. Test entry #227: Framework Algorithm related data with random seed 19206 // (similarity score: 31.85%) +2. Test entry #275: Framework Orchestration related data with random seed 12664 // (similarity score: 31.36%) +3. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 29.40%) +4. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 28.78%) +5. Test entry #386: Framework Testing related data with random seed 14924 // (similarity score: 28.77%) +6. Test entry #329: Code-review Framework related data with random seed 1817 // (similarity score: 26.61%) +7. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 24.69%) +8. Test entry #365: Framework ERD related data with random seed 21870 // (similarity score: 20.97%) +9. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 20.94%) +10. Test entry #228: Branching Framework related data with random seed 10213 // (similarity score: 19.21%) +⏱️ Get operation took: 948 ms +--------------------------------------------------- +[Step 459/500] Processing... +Getting data: 'Example' +1. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 21.02%) +2. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 20.91%) +3. Test entry #81: Example Documentation related data with random seed 4554 // (similarity score: 20.87%) +4. Test entry #43: Documentation Example related data with random seed 5875 // (similarity score: 19.56%) +5. Test entry #312: Event-driven Tutorial related data with random seed 31631 // (similarity score: 19.26%) +6. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 18.68%) +7. Test entry #319: Documentation Example related data with random seed 22093 // (similarity score: 18.61%) +8. Test entry #297: Event-driven Code-review related data with random seed 23440 // (similarity score: 17.50%) +9. Test entry #138: Boilerplate ERD related data with random seed 17973 // (similarity score: 17.39%) +10. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 16.97%) +⏱️ Get operation took: 957 ms +--------------------------------------------------- +[Step 460/500] Processing... +Getting data: 'Pull-request' +1. Test entry #438: Pull-request API related data with random seed 19781 // (similarity score: 51.53%) +2. Test entry #206: Pull-request Template related data with random seed 2208 // (similarity score: 50.75%) +3. Test entry #107: Resilient Pull-request related data with random seed 9028 // (similarity score: 49.75%) +4. Test entry #488: Pull-request Git related data with random seed 13085 // (similarity score: 47.13%) +5. Test entry #424: Pull-request YAML related data with random seed 9732 // (similarity score: 46.59%) +6. Test entry #288: Index Pull-request related data with random seed 4855 // (similarity score: 46.42%) +7. Test entry #28: Pull-request Pub-sub related data with random seed 15070 // (similarity score: 45.59%) +8. Test entry #166: Pull-request Unit-test related data with random seed 18963 // (similarity score: 40.27%) +9. Test entry #314: Threading RESTful related data with random seed 12875 // (similarity score: 19.17%) +10. Test entry #203: Recovery JSON related data with random seed 8776 // (similarity score: 17.99%) +⏱️ Get operation took: 993 ms +--------------------------------------------------- +[Step 461/500] Processing... +Getting data: 'Search' +1. Test entry #75: UX Search related data with random seed 11487 // (similarity score: 42.09%) +2. Test entry #372: Best-practices Search related data with random seed 17128 // (similarity score: 41.11%) +3. Test entry #202: Search ORM related data with random seed 9657 // (similarity score: 38.48%) +4. Test entry #154: Cloud Search related data with random seed 2185 // (similarity score: 38.09%) +5. Test entry #280: Boilerplate Search related data with random seed 32715 // (similarity score: 37.50%) +6. Test entry #408: CI/CD Search related data with random seed 26877 // (similarity score: 35.64%) +7. Test entry #78: Search Git related data with random seed 20971 // (similarity score: 35.37%) +8. Test entry #397: Search Fault-tolerance related data with random seed 20761 // (similarity score: 35.23%) +9. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 29.18%) +10. Test entry #305: Index Database related data with random seed 10951 // (similarity score: 26.45%) +⏱️ Get operation took: 987 ms +--------------------------------------------------- +[Step 462/500] Processing... +Getting data: 'Concurrency' +1. Test entry #164: Threading Parallelism related data with random seed 13640 // (similarity score: 47.53%) +2. Test entry #441: Code-review Concurrency related data with random seed 21085 // (similarity score: 47.35%) +3. Test entry #405: Throughput Concurrency related data with random seed 5148 // (similarity score: 46.52%) +4. Test entry #54: Concurrency Mocking related data with random seed 20846 // (similarity score: 42.38%) +5. Test entry #26: Concurrency NoSQL related data with random seed 4796 // (similarity score: 42.23%) +6. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 41.66%) +7. Test entry #350: XML Concurrency related data with random seed 12519 // (similarity score: 41.43%) +8. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 41.41%) +9. Test entry #97: Testing Parallelism related data with random seed 6045 // (similarity score: 37.64%) +10. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 36.83%) +⏱️ Get operation took: 944 ms +--------------------------------------------------- +[Step 463/500] Processing... +Getting data: 'GUI' +1. Test entry #124: GUI Parallelism related data with random seed 24581 // (similarity score: 43.56%) +2. Test entry #267: GUI Threading related data with random seed 19721 // (similarity score: 39.19%) +3. Test entry #101: XML GUI related data with random seed 23123 // (similarity score: 38.99%) +4. Test entry #268: GUI Security related data with random seed 8853 // (similarity score: 38.70%) +5. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 37.90%) +6. Test entry #90: Network GUI related data with random seed 6917 // (similarity score: 37.45%) +7. Test entry #400: GUI Container related data with random seed 26968 // (similarity score: 36.86%) +8. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 36.84%) +9. Test entry #427: GUI Template related data with random seed 25503 // (similarity score: 35.99%) +10. Test entry #87: GUI Encryption related data with random seed 24527 // (similarity score: 35.25%) +⏱️ Get operation took: 1007 ms +--------------------------------------------------- +[Step 464/500] Processing... +Getting data: 'SQL' +1. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 41.94%) +2. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 41.80%) +3. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 37.96%) +4. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 37.76%) +5. Test entry #152: SQL Load-balancing related data with random seed 9262 // (similarity score: 37.07%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 36.37%) +7. Test entry #137: SQL Parallelism related data with random seed 8672 // (similarity score: 35.23%) +8. Test entry #431: SQL Version-control related data with random seed 505 // (similarity score: 32.86%) +9. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 30.91%) +10. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 30.51%) +⏱️ Get operation took: 1096 ms +--------------------------------------------------- +[Step 465/500] Processing... +Getting data: 'DevOps' +1. Test entry #259: Pub-sub DevOps related data with random seed 23944 // (similarity score: 58.76%) +2. Test entry #212: Design-patterns DevOps related data with random seed 19247 // (similarity score: 57.73%) +3. Test entry #146: Profiling DevOps related data with random seed 14561 // (similarity score: 55.98%) +4. Test entry #46: Data-structure DevOps related data with random seed 24000 // (similarity score: 51.31%) +5. Test entry #269: Database DevOps related data with random seed 15436 // (similarity score: 45.93%) +6. Test entry #328: Node DevOps related data with random seed 7412 // (similarity score: 44.08%) +7. Test entry #207: DevOps JSON related data with random seed 15591 // (similarity score: 41.62%) +8. Test entry #271: Encryption DevOps related data with random seed 2715 // (similarity score: 40.67%) +9. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 29.13%) +10. Test entry #243: SDK Orchestration related data with random seed 30501 // (similarity score: 27.73%) +⏱️ Get operation took: 938 ms +--------------------------------------------------- +[Step 466/500] Processing... +Getting data: 'Design-patterns' +1. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 63.80%) +2. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 62.09%) +3. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 61.76%) +4. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 57.41%) +5. Test entry #115: UX Design-patterns related data with random seed 14554 // (similarity score: 56.30%) +6. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 50.76%) +7. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 49.55%) +8. Test entry #498: Design-patterns Python related data with random seed 16866 // (similarity score: 49.27%) +9. Test entry #212: Design-patterns DevOps related data with random seed 19247 // (similarity score: 46.55%) +10. Test entry #117: Design-patterns Blockchain related data with random seed 16711 // (similarity score: 45.78%) +⏱️ Get operation took: 996 ms +--------------------------------------------------- +[Step 467/500] Processing... +Getting data: 'Fault-tolerance' +1. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 62.09%) +2. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 57.76%) +3. Test entry #80: Fault-tolerance Throughput related data with random seed 27910 // (similarity score: 54.33%) +4. Test entry #95: Library Fault-tolerance related data with random seed 987 // (similarity score: 47.09%) +5. Test entry #197: Fault-tolerance ORM related data with random seed 21855 // (similarity score: 45.57%) +6. Test entry #397: Search Fault-tolerance related data with random seed 20761 // (similarity score: 44.70%) +7. Test entry #428: Fault-tolerance ORM related data with random seed 17392 // (similarity score: 44.01%) +8. Test entry #24: Replication Fault-tolerance related data with random seed 9951 // (similarity score: 41.75%) +9. Test entry #91: Fault-tolerance ORM related data with random seed 3476 // (similarity score: 41.55%) +10. Test entry #3: Fault-tolerance SQL related data with random seed 10390 // (similarity score: 39.60%) +⏱️ Get operation took: 999 ms +--------------------------------------------------- +[Step 468/500] Processing... +Getting data: 'Database' +1. Test entry #426: Database Database related data with random seed 19208 // (similarity score: 39.68%) +2. Test entry #496: Library Database related data with random seed 13646 // (similarity score: 36.63%) +3. Test entry #193: Database Resilient related data with random seed 3286 // (similarity score: 35.20%) +4. Test entry #366: Sample Database related data with random seed 18353 // (similarity score: 34.25%) +5. Test entry #20: SQL Data-structure related data with random seed 15443 // (similarity score: 33.82%) +6. Test entry #261: SQL Database related data with random seed 22537 // (similarity score: 33.21%) +7. Test entry #50: Framework Database related data with random seed 22053 // (similarity score: 32.30%) +8. Test entry #82: Database Cloud related data with random seed 12343 // (similarity score: 32.29%) +9. Test entry #346: Database Microservices related data with random seed 1068 // (similarity score: 32.20%) +10. Test entry #211: Resilient SQL related data with random seed 21527 // (similarity score: 32.14%) +⏱️ Get operation took: 953 ms +--------------------------------------------------- +[Step 469/500] Processing... +Getting data: 'Microservices' +1. Test entry #142: Event-driven Microservices related data with random seed 7351 // (similarity score: 59.38%) +2. Test entry #251: Resilient Microservices related data with random seed 1673 // (similarity score: 56.33%) +3. Test entry #437: Microservices Optimization related data with random seed 8916 // (similarity score: 55.90%) +4. Test entry #239: Data-structure Microservices related data with random seed 11589 // (similarity score: 55.78%) +5. Test entry #292: Asynchronous Microservices related data with random seed 8204 // (similarity score: 55.67%) +6. Test entry #483: Orchestration Microservices related data with random seed 31494 // (similarity score: 51.77%) +7. Test entry #176: Microservices Documentation related data with random seed 28218 // (similarity score: 50.61%) +8. Test entry #23: Branching Microservices related data with random seed 24428 // (similarity score: 49.91%) +9. Test entry #463: Microservices UX related data with random seed 32421 // (similarity score: 49.69%) +10. Test entry #353: Microservices YAML related data with random seed 3068 // (similarity score: 48.52%) +⏱️ Get operation took: 905 ms +--------------------------------------------------- +[Step 470/500] Processing... +Getting data: 'RESTful' +1. Test entry #482: Code-review RESTful related data with random seed 18863 // (similarity score: 41.46%) +2. Test entry #289: RESTful Data-structure related data with random seed 1227 // (similarity score: 41.33%) +3. Test entry #446: Container RESTful related data with random seed 10831 // (similarity score: 40.34%) +4. Test entry #215: API RESTful related data with random seed 29014 // (similarity score: 40.32%) +5. Test entry #310: RESTful UML related data with random seed 5709 // (similarity score: 40.14%) +6. Test entry #314: Threading RESTful related data with random seed 12875 // (similarity score: 37.42%) +7. Test entry #296: ERD RESTful related data with random seed 19760 // (similarity score: 36.03%) +8. Test entry #200: NoSQL RESTful related data with random seed 23279 // (similarity score: 31.68%) +9. Test entry #423: Diagram RESTful related data with random seed 14813 // (similarity score: 29.93%) +10. Test entry #345: CI/CD RESTful related data with random seed 23698 // (similarity score: 29.68%) +⏱️ Get operation took: 1001 ms +--------------------------------------------------- +[Step 471/500] Processing... +Getting data: 'Network' +1. Test entry #283: Network Network related data with random seed 21734 // (similarity score: 44.63%) +2. Test entry #163: Network Orchestration related data with random seed 17906 // (similarity score: 36.14%) +3. Test entry #29: Mocking Network related data with random seed 20303 // (similarity score: 35.95%) +4. Test entry #190: Mocking Network related data with random seed 32119 // (similarity score: 35.84%) +5. Test entry #189: UML Network related data with random seed 6297 // (similarity score: 32.43%) +6. Test entry #90: Network GUI related data with random seed 6917 // (similarity score: 30.73%) +7. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 29.31%) +8. Test entry #49: ORM Network related data with random seed 23604 // (similarity score: 26.22%) +9. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 21.70%) +10. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 19.05%) +⏱️ Get operation took: 928 ms +--------------------------------------------------- +[Step 472/500] Processing... +Getting data: 'Architecture' +1. Test entry #126: Architecture Framework related data with random seed 1304 // (similarity score: 36.09%) +2. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 32.91%) +3. Test entry #140: UX Architecture related data with random seed 15002 // (similarity score: 31.89%) +4. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 31.79%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 30.07%) +6. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 27.81%) +7. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 26.98%) +8. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 26.33%) +9. Test entry #73: Design-patterns Network related data with random seed 21388 // (similarity score: 25.30%) +10. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 24.95%) +⏱️ Get operation took: 960 ms +--------------------------------------------------- +[Step 473/500] Processing... +Getting data: 'Latency' +1. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 64.40%) +2. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 55.53%) +3. Test entry #474: UML Latency related data with random seed 18011 // (similarity score: 53.35%) +4. Test entry #370: Latency Library related data with random seed 12394 // (similarity score: 52.31%) +5. Test entry #167: YAML Latency related data with random seed 19573 // (similarity score: 51.30%) +6. Test entry #100: Latency Vector related data with random seed 28112 // (similarity score: 51.14%) +7. Test entry #352: Latency JSON related data with random seed 17683 // (similarity score: 46.23%) +8. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 33.10%) +9. Test entry #80: Fault-tolerance Throughput related data with random seed 27910 // (similarity score: 31.48%) +10. Test entry #481: Tutorial Throughput related data with random seed 11888 // (similarity score: 28.88%) +⏱️ Get operation took: 882 ms +--------------------------------------------------- +[Step 474/500] Processing... +Getting data: 'Throughput' +1. Test entry #481: Tutorial Throughput related data with random seed 11888 // (similarity score: 48.81%) +2. Test entry #80: Fault-tolerance Throughput related data with random seed 27910 // (similarity score: 45.02%) +3. Test entry #47: Asynchronous Throughput related data with random seed 4006 // (similarity score: 44.96%) +4. Test entry #60: Design-patterns Throughput related data with random seed 13831 // (similarity score: 40.15%) +5. Test entry #355: Throughput Sharding related data with random seed 6245 // (similarity score: 39.86%) +6. Test entry #6: CI/CD Throughput related data with random seed 5729 // (similarity score: 38.72%) +7. Test entry #144: Throughput ORM related data with random seed 18100 // (similarity score: 35.68%) +8. Test entry #96: Tutorial Latency related data with random seed 9789 // (similarity score: 35.66%) +9. Test entry #335: Throughput E2E-test related data with random seed 11728 // (similarity score: 34.04%) +10. Test entry #294: ORM Throughput related data with random seed 31130 // (similarity score: 33.56%) +⏱️ Get operation took: 945 ms +--------------------------------------------------- +[Step 475/500] Processing... +Getting data: 'Container' +1. Test entry #182: Testing Container related data with random seed 11294 // (similarity score: 44.10%) +2. Test entry #446: Container RESTful related data with random seed 10831 // (similarity score: 40.01%) +3. Test entry #233: Unit-test Container related data with random seed 19623 // (similarity score: 39.50%) +4. Test entry #400: GUI Container related data with random seed 26968 // (similarity score: 39.49%) +5. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 38.28%) +6. Test entry #160: Container NoSQL related data with random seed 15570 // (similarity score: 37.91%) +7. Test entry #86: Index Container related data with random seed 813 // (similarity score: 37.74%) +8. Test entry #262: Container Branching related data with random seed 22849 // (similarity score: 37.24%) +9. Test entry #65: Encryption Container related data with random seed 20902 // (similarity score: 36.61%) +10. Test entry #170: ORM Container related data with random seed 30245 // (similarity score: 32.73%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 476/500] Processing... +Getting data: 'Message-queue' +1. Test entry #147: Message-queue Container related data with random seed 31302 // (similarity score: 55.00%) +2. Test entry #396: Sample Message-queue related data with random seed 1074 // (similarity score: 54.96%) +3. Test entry #445: Message-queue Consensus related data with random seed 22969 // (similarity score: 51.62%) +4. Test entry #447: Message-queue Unit-test related data with random seed 12353 // (similarity score: 48.23%) +5. Test entry #225: Encryption Message-queue related data with random seed 3677 // (similarity score: 45.23%) +6. Test entry #264: SQL Message-queue related data with random seed 315 // (similarity score: 44.77%) +7. Test entry #56: Replication Message-queue related data with random seed 23394 // (similarity score: 44.68%) +8. Test entry #180: Alerting Latency related data with random seed 5286 // (similarity score: 30.91%) +9. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 27.49%) +10. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 24.44%) +⏱️ Get operation took: 1092 ms +--------------------------------------------------- +[Step 477/500] Processing... +Getting data: 'Load-balancing' +1. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 63.40%) +2. Test entry #139: Load-balancing Optimization related data with random seed 31776 // (similarity score: 57.41%) +3. Test entry #27: Optimization Load-balancing related data with random seed 27403 // (similarity score: 55.07%) +4. Test entry #402: UX Load-balancing related data with random seed 5781 // (similarity score: 52.94%) +5. Test entry #302: Load-balancing Diagram related data with random seed 1050 // (similarity score: 52.56%) +6. Test entry #45: Event-driven Load-balancing related data with random seed 5105 // (similarity score: 52.48%) +7. Test entry #354: Load-balancing Event-driven related data with random seed 18844 // (similarity score: 47.54%) +8. Test entry #449: UI Load-balancing related data with random seed 29998 // (similarity score: 45.02%) +9. Test entry #242: Load-balancing CLI related data with random seed 21278 // (similarity score: 44.58%) +10. Test entry #216: Load-balancing Sample related data with random seed 25835 // (similarity score: 43.77%) +⏱️ Get operation took: 895 ms +--------------------------------------------------- +[Step 478/500] Processing... +Getting data: 'Diagram' +1. Test entry #266: Merging Diagram related data with random seed 4694 // (similarity score: 44.83%) +2. Test entry #423: Diagram RESTful related data with random seed 14813 // (similarity score: 44.09%) +3. Test entry #150: Diagram Integration-test related data with random seed 19409 // (similarity score: 43.81%) +4. Test entry #9: Diagram Flowchart related data with random seed 14766 // (similarity score: 43.28%) +5. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 40.49%) +6. Test entry #390: Documentation Diagram related data with random seed 8258 // (similarity score: 39.71%) +7. Test entry #64: Diagram NoSQL related data with random seed 3428 // (similarity score: 36.31%) +8. Test entry #161: Orchestration Diagram related data with random seed 20102 // (similarity score: 32.78%) +9. Test entry #302: Load-balancing Diagram related data with random seed 1050 // (similarity score: 31.56%) +10. Test entry #430: Recovery Diagram related data with random seed 24192 // (similarity score: 29.49%) +⏱️ Get operation took: 896 ms +--------------------------------------------------- +[Step 479/500] Processing... +Getting data: 'Data-structure' +1. Test entry #273: Data-structure Performance related data with random seed 1219 // (similarity score: 50.19%) +2. Test entry #183: Code-review Data-structure related data with random seed 20935 // (similarity score: 49.27%) +3. Test entry #1: Data-structure Index related data with random seed 16730 // (similarity score: 49.07%) +4. Test entry #248: Algorithm Data-structure related data with random seed 18077 // (similarity score: 47.47%) +5. Test entry #313: Data-structure Monitoring related data with random seed 10551 // (similarity score: 47.31%) +6. Test entry #118: CSV Data-structure related data with random seed 20709 // (similarity score: 45.72%) +7. Test entry #289: RESTful Data-structure related data with random seed 1227 // (similarity score: 42.01%) +8. Test entry #389: Data-structure JSON related data with random seed 23144 // (similarity score: 40.81%) +9. Test entry #425: CLI Data-structure related data with random seed 7062 // (similarity score: 39.72%) +10. Test entry #171: Blockchain Data-structure related data with random seed 29630 // (similarity score: 38.81%) +⏱️ Get operation took: 942 ms +--------------------------------------------------- +[Step 480/500] Processing... +Getting data: 'GraphQL' +1. Test entry #308: GraphQL GraphQL related data with random seed 25902 // (similarity score: 67.68%) +2. Test entry #125: GraphQL Tutorial related data with random seed 2640 // (similarity score: 66.75%) +3. Test entry #224: Synchronous GraphQL related data with random seed 6657 // (similarity score: 64.60%) +4. Test entry #343: GraphQL UX related data with random seed 26726 // (similarity score: 62.29%) +5. Test entry #48: GraphQL Asynchronous related data with random seed 4509 // (similarity score: 60.43%) +6. Test entry #35: GraphQL YAML related data with random seed 19655 // (similarity score: 59.13%) +7. Test entry #213: GUI GraphQL related data with random seed 9145 // (similarity score: 58.32%) +8. Test entry #256: Profiling GraphQL related data with random seed 4781 // (similarity score: 58.12%) +9. Test entry #10: Framework GraphQL related data with random seed 10276 // (similarity score: 57.78%) +10. Test entry #25: GraphQL CI/CD related data with random seed 7557 // (similarity score: 56.63%) +⏱️ Get operation took: 947 ms +--------------------------------------------------- +[Step 481/500] Processing... +Getting data: 'Scalability' +1. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 43.17%) +2. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 40.71%) +3. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 38.07%) +4. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 37.55%) +5. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 37.23%) +6. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 35.78%) +7. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 35.62%) +8. Test entry #51: Scalability SDK related data with random seed 3268 // (similarity score: 33.46%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 32.11%) +10. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 31.32%) +⏱️ Get operation took: 1011 ms +--------------------------------------------------- +[Step 482/500] Processing... +Getting data: 'UML' +1. Test entry #466: UML Optimization related data with random seed 21437 // (similarity score: 51.87%) +2. Test entry #66: UX UML related data with random seed 3512 // (similarity score: 50.20%) +3. Test entry #17: Tutorial UML related data with random seed 12772 // (similarity score: 49.59%) +4. Test entry #158: Consensus UML related data with random seed 10660 // (similarity score: 47.11%) +5. Test entry #189: UML Network related data with random seed 6297 // (similarity score: 45.74%) +6. Test entry #247: UML Integration-test related data with random seed 29544 // (similarity score: 45.62%) +7. Test entry #474: UML Latency related data with random seed 18011 // (similarity score: 45.46%) +8. Test entry #377: UML GUI related data with random seed 13474 // (similarity score: 44.68%) +9. Test entry #4: Design-patterns UML related data with random seed 16565 // (similarity score: 44.27%) +10. Test entry #310: RESTful UML related data with random seed 5709 // (similarity score: 43.88%) +⏱️ Get operation took: 1028 ms +--------------------------------------------------- +[Step 483/500] Processing... +Getting data: 'E2E-test' +1. Test entry #455: E2E-test Integration-test related data with random seed 4838 // (similarity score: 65.81%) +2. Test entry #241: Alerting E2E-test related data with random seed 11551 // (similarity score: 63.85%) +3. Test entry #335: Throughput E2E-test related data with random seed 11728 // (similarity score: 63.35%) +4. Test entry #38: Profiling E2E-test related data with random seed 30992 // (similarity score: 63.16%) +5. Test entry #278: UML E2E-test related data with random seed 2723 // (similarity score: 61.63%) +6. Test entry #433: Vector E2E-test related data with random seed 22356 // (similarity score: 60.28%) +7. Test entry #304: E2E-test Unit-test related data with random seed 19213 // (similarity score: 59.91%) +8. Test entry #104: Backup E2E-test related data with random seed 3184 // (similarity score: 55.45%) +9. Test entry #414: GraphQL E2E-test related data with random seed 14354 // (similarity score: 53.19%) +10. Test entry #102: ERD ERD related data with random seed 20954 // (similarity score: 40.31%) +⏱️ Get operation took: 910 ms +--------------------------------------------------- +[Step 484/500] Processing... +Getting data: 'Recovery' +1. Test entry #471: Recovery Version-control related data with random seed 2051 // (similarity score: 39.10%) +2. Test entry #33: Recovery Container related data with random seed 11930 // (similarity score: 37.60%) +3. Test entry #306: Recovery Library related data with random seed 21298 // (similarity score: 36.53%) +4. Test entry #430: Recovery Diagram related data with random seed 24192 // (similarity score: 36.20%) +5. Test entry #484: Version-control Recovery related data with random seed 20954 // (similarity score: 35.07%) +6. Test entry #493: Stubbing Recovery related data with random seed 28424 // (similarity score: 33.87%) +7. Test entry #367: SDK Recovery related data with random seed 20345 // (similarity score: 33.28%) +8. Test entry #88: Recovery JSON related data with random seed 28249 // (similarity score: 32.39%) +9. Test entry #321: Recovery Git related data with random seed 3971 // (similarity score: 32.26%) +10. Test entry #407: Recovery Blockchain related data with random seed 694 // (similarity score: 30.40%) +⏱️ Get operation took: 894 ms +--------------------------------------------------- +[Step 485/500] Processing... +Getting data: 'Debugging' +1. Test entry #14: Monitoring Debugging related data with random seed 9592 // (similarity score: 51.51%) +2. Test entry #333: Profiling Debugging related data with random seed 25431 // (similarity score: 49.97%) +3. Test entry #387: Index Debugging related data with random seed 29125 // (similarity score: 43.23%) +4. Test entry #253: Encryption Debugging related data with random seed 3566 // (similarity score: 42.05%) +5. Test entry #67: Debugging Blockchain related data with random seed 23179 // (similarity score: 42.03%) +6. Test entry #236: Websocket Debugging related data with random seed 18729 // (similarity score: 41.95%) +7. Test entry #444: Vector Debugging related data with random seed 5646 // (similarity score: 41.67%) +8. Test entry #406: Debugging Stubbing related data with random seed 28773 // (similarity score: 41.49%) +9. Test entry #339: API Debugging related data with random seed 14456 // (similarity score: 39.09%) +10. Test entry #34: Debugging CI/CD related data with random seed 7455 // (similarity score: 33.14%) +⏱️ Get operation took: 956 ms +--------------------------------------------------- +[Step 486/500] Processing... +Getting data: 'Flowchart' +1. Test entry #9: Diagram Flowchart related data with random seed 14766 // (similarity score: 63.49%) +2. Test entry #495: Best-practices Flowchart related data with random seed 507 // (similarity score: 59.08%) +3. Test entry #121: Flowchart SDK related data with random seed 24836 // (similarity score: 56.62%) +4. Test entry #226: Flowchart Testing related data with random seed 32215 // (similarity score: 56.46%) +5. Test entry #342: Python Flowchart related data with random seed 32467 // (similarity score: 56.34%) +6. Test entry #209: Index Flowchart related data with random seed 16113 // (similarity score: 54.06%) +7. Test entry #344: Architecture Flowchart related data with random seed 2645 // (similarity score: 49.88%) +8. Test entry #41: Index Flowchart related data with random seed 29094 // (similarity score: 49.84%) +9. Test entry #213: GUI GraphQL related data with random seed 9145 // (similarity score: 30.82%) +10. Test entry #469: Branching GraphQL related data with random seed 16900 // (similarity score: 24.78%) +⏱️ Get operation took: 893 ms +--------------------------------------------------- +[Step 487/500] Processing... +Getting data: 'Orchestration' +1. Test entry #257: Orchestration Architecture related data with random seed 19866 // (similarity score: 55.54%) +2. Test entry #309: Event-driven Orchestration related data with random seed 21023 // (similarity score: 51.76%) +3. Test entry #161: Orchestration Diagram related data with random seed 20102 // (similarity score: 50.18%) +4. Test entry #337: Algorithm Orchestration related data with random seed 15318 // (similarity score: 47.14%) +5. Test entry #77: Orchestration Code-review related data with random seed 28098 // (similarity score: 46.32%) +6. Test entry #163: Network Orchestration related data with random seed 17906 // (similarity score: 44.84%) +7. Test entry #275: Framework Orchestration related data with random seed 12664 // (similarity score: 44.25%) +8. Test entry #483: Orchestration Microservices related data with random seed 31494 // (similarity score: 42.41%) +9. Test entry #195: API Orchestration related data with random seed 17599 // (similarity score: 41.25%) +10. Test entry #412: YAML Orchestration related data with random seed 30910 // (similarity score: 40.32%) +⏱️ Get operation took: 931 ms +--------------------------------------------------- +[Step 488/500] Processing... +Getting data: 'Boilerplate' +1. Test entry #277: Boilerplate Tutorial related data with random seed 24276 // (similarity score: 41.22%) +2. Test entry #363: Tutorial Template related data with random seed 30949 // (similarity score: 36.21%) +3. Test entry #280: Boilerplate Search related data with random seed 32715 // (similarity score: 30.77%) +4. Test entry #151: Design-patterns Tutorial related data with random seed 32202 // (similarity score: 30.65%) +5. Test entry #217: Tutorial Design-patterns related data with random seed 26687 // (similarity score: 29.65%) +6. Test entry #12: Boilerplate YAML related data with random seed 29263 // (similarity score: 29.38%) +7. Test entry #196: Boilerplate Synchronous related data with random seed 18887 // (similarity score: 29.35%) +8. Test entry #89: Resilient Template related data with random seed 6643 // (similarity score: 28.89%) +9. Test entry #443: Threading Boilerplate related data with random seed 23641 // (similarity score: 28.42%) +10. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 27.41%) +⏱️ Get operation took: 959 ms +--------------------------------------------------- +[Step 489/500] Processing... +Getting data: 'Concurrency' +1. Test entry #164: Threading Parallelism related data with random seed 13640 // (similarity score: 47.53%) +2. Test entry #441: Code-review Concurrency related data with random seed 21085 // (similarity score: 47.35%) +3. Test entry #405: Throughput Concurrency related data with random seed 5148 // (similarity score: 46.52%) +4. Test entry #54: Concurrency Mocking related data with random seed 20846 // (similarity score: 42.38%) +5. Test entry #26: Concurrency NoSQL related data with random seed 4796 // (similarity score: 42.23%) +6. Test entry #300: Parallelism Asynchronous related data with random seed 2571 // (similarity score: 41.66%) +7. Test entry #350: XML Concurrency related data with random seed 12519 // (similarity score: 41.43%) +8. Test entry #265: Version-control Parallelism related data with random seed 26444 // (similarity score: 41.41%) +9. Test entry #97: Testing Parallelism related data with random seed 6045 // (similarity score: 37.64%) +10. Test entry #432: Parallelism Architecture related data with random seed 13977 // (similarity score: 36.83%) +⏱️ Get operation took: 932 ms +--------------------------------------------------- +[Step 490/500] Processing... +Getting data: 'Scalability' +1. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 43.17%) +2. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 40.71%) +3. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 38.07%) +4. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 37.55%) +5. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 37.23%) +6. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 35.78%) +7. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 35.62%) +8. Test entry #51: Scalability SDK related data with random seed 3268 // (similarity score: 33.46%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 32.11%) +10. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 31.32%) +⏱️ Get operation took: 904 ms +--------------------------------------------------- +[Step 491/500] Processing... +Getting data: 'Testing' +1. Test entry #132: Mocking Framework related data with random seed 24854 // (similarity score: 48.75%) +2. Test entry #179: Testing Testing related data with random seed 8212 // (similarity score: 47.91%) +3. Test entry #491: Framework Mocking related data with random seed 10031 // (similarity score: 47.89%) +4. Test entry #409: SQL Mocking related data with random seed 18821 // (similarity score: 47.21%) +5. Test entry #220: Mocking GUI related data with random seed 16639 // (similarity score: 46.88%) +6. Test entry #54: Concurrency Mocking related data with random seed 20846 // (similarity score: 44.52%) +7. Test entry #274: Testing Caching related data with random seed 1136 // (similarity score: 44.39%) +8. Test entry #330: Boilerplate Mocking related data with random seed 5771 // (similarity score: 44.01%) +9. Test entry #448: Tutorial Unit-test related data with random seed 7829 // (similarity score: 43.86%) +10. Test entry #232: Tutorial Unit-test related data with random seed 11672 // (similarity score: 43.56%) +⏱️ Get operation took: 851 ms +--------------------------------------------------- +[Step 492/500] Processing... +Getting data: 'Load-balancing' +1. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 63.40%) +2. Test entry #139: Load-balancing Optimization related data with random seed 31776 // (similarity score: 57.41%) +3. Test entry #27: Optimization Load-balancing related data with random seed 27403 // (similarity score: 55.07%) +4. Test entry #402: UX Load-balancing related data with random seed 5781 // (similarity score: 52.94%) +5. Test entry #302: Load-balancing Diagram related data with random seed 1050 // (similarity score: 52.56%) +6. Test entry #45: Event-driven Load-balancing related data with random seed 5105 // (similarity score: 52.48%) +7. Test entry #354: Load-balancing Event-driven related data with random seed 18844 // (similarity score: 47.54%) +8. Test entry #449: UI Load-balancing related data with random seed 29998 // (similarity score: 45.02%) +9. Test entry #242: Load-balancing CLI related data with random seed 21278 // (similarity score: 44.58%) +10. Test entry #216: Load-balancing Sample related data with random seed 25835 // (similarity score: 43.77%) +⏱️ Get operation took: 1041 ms +--------------------------------------------------- +[Step 493/500] Processing... +Getting data: 'RESTful' +1. Test entry #482: Code-review RESTful related data with random seed 18863 // (similarity score: 41.46%) +2. Test entry #289: RESTful Data-structure related data with random seed 1227 // (similarity score: 41.33%) +3. Test entry #446: Container RESTful related data with random seed 10831 // (similarity score: 40.34%) +4. Test entry #215: API RESTful related data with random seed 29014 // (similarity score: 40.32%) +5. Test entry #310: RESTful UML related data with random seed 5709 // (similarity score: 40.14%) +6. Test entry #314: Threading RESTful related data with random seed 12875 // (similarity score: 37.42%) +7. Test entry #296: ERD RESTful related data with random seed 19760 // (similarity score: 36.03%) +8. Test entry #200: NoSQL RESTful related data with random seed 23279 // (similarity score: 31.68%) +9. Test entry #423: Diagram RESTful related data with random seed 14813 // (similarity score: 29.93%) +10. Test entry #345: CI/CD RESTful related data with random seed 23698 // (similarity score: 29.68%) +⏱️ Get operation took: 953 ms +--------------------------------------------------- +[Step 494/500] Processing... +Getting data: 'Library' +1. Test entry #451: Library Performance related data with random seed 31802 // (similarity score: 38.26%) +2. Test entry #357: Monitoring Library related data with random seed 91 // (similarity score: 36.58%) +3. Test entry #496: Library Database related data with random seed 13646 // (similarity score: 33.79%) +4. Test entry #320: Library UX related data with random seed 29021 // (similarity score: 31.34%) +5. Test entry #486: Library Example related data with random seed 9207 // (similarity score: 30.25%) +6. Test entry #214: Design-patterns Library related data with random seed 23324 // (similarity score: 29.56%) +7. Test entry #95: Library Fault-tolerance related data with random seed 987 // (similarity score: 27.92%) +8. Test entry #472: Library CI/CD related data with random seed 18363 // (similarity score: 27.01%) +9. Test entry #30: API Library related data with random seed 1930 // (similarity score: 26.55%) +10. Test entry #250: Index Library related data with random seed 2980 // (similarity score: 25.80%) +⏱️ Get operation took: 942 ms +--------------------------------------------------- +[Step 495/500] Processing... +Getting data: 'Alerting' +1. Test entry #419: Monitoring Alerting related data with random seed 23527 // (similarity score: 51.72%) +2. Test entry #351: Alerting Algorithm related data with random seed 24557 // (similarity score: 44.93%) +3. Test entry #371: Code-review Alerting related data with random seed 6709 // (similarity score: 40.15%) +4. Test entry #52: Alerting Diagram related data with random seed 516 // (similarity score: 40.12%) +5. Test entry #198: Architecture Alerting related data with random seed 32659 // (similarity score: 35.73%) +6. Test entry #336: API Alerting related data with random seed 704 // (similarity score: 35.52%) +7. Test entry #159: Consensus Alerting related data with random seed 3048 // (similarity score: 34.88%) +8. Test entry #94: SQL Alerting related data with random seed 10343 // (similarity score: 34.33%) +9. Test entry #105: Alerting Websocket related data with random seed 4059 // (similarity score: 33.48%) +10. Test entry #359: Encryption Alerting related data with random seed 12354 // (similarity score: 33.22%) +⏱️ Get operation took: 887 ms +--------------------------------------------------- +[Step 496/500] Processing... +Getting data: 'Integration-test' +1. Test entry #464: Example Integration-test related data with random seed 5874 // (similarity score: 51.54%) +2. Test entry #63: Testing Integration-test related data with random seed 21446 // (similarity score: 48.72%) +3. Test entry #150: Diagram Integration-test related data with random seed 19409 // (similarity score: 41.04%) +4. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 40.67%) +5. Test entry #375: Integration-test Branching related data with random seed 18965 // (similarity score: 40.11%) +6. Test entry #455: E2E-test Integration-test related data with random seed 4838 // (similarity score: 39.24%) +7. Test entry #485: Integration-test Pub-sub related data with random seed 29641 // (similarity score: 38.47%) +8. Test entry #247: UML Integration-test related data with random seed 29544 // (similarity score: 38.16%) +9. Test entry #307: NoSQL Integration-test related data with random seed 5046 // (similarity score: 33.52%) +10. Test entry #92: Websocket Integration-test related data with random seed 32561 // (similarity score: 27.10%) +⏱️ Get operation took: 902 ms +--------------------------------------------------- +[Step 497/500] Processing... +Getting data: 'Cloud' +1. Test entry #497: Cloud Profiling related data with random seed 11981 // (similarity score: 38.74%) +2. Test entry #270: Cloud Consensus related data with random seed 15128 // (similarity score: 36.08%) +3. Test entry #173: Sample Cloud related data with random seed 3561 // (similarity score: 36.02%) +4. Test entry #154: Cloud Search related data with random seed 2185 // (similarity score: 35.82%) +5. Test entry #58: Cloud Index related data with random seed 16697 // (similarity score: 33.17%) +6. Test entry #122: Cloud Websocket related data with random seed 14024 // (similarity score: 31.07%) +7. Test entry #116: NoSQL Cloud related data with random seed 22178 // (similarity score: 30.67%) +8. Test entry #178: Cloud XML related data with random seed 27579 // (similarity score: 30.58%) +9. Test entry #44: Cloud Index related data with random seed 12754 // (similarity score: 30.20%) +10. Test entry #187: Documentation Cloud related data with random seed 9545 // (similarity score: 26.80%) +⏱️ Get operation took: 890 ms +--------------------------------------------------- +[Step 498/500] Processing... +Getting data: 'Scalability' +1. Test entry #244: Scalability Algorithm related data with random seed 31613 // (similarity score: 43.17%) +2. Test entry #99: Scalability Example related data with random seed 25883 // (similarity score: 40.71%) +3. Test entry #393: Scalability Load-balancing related data with random seed 32178 // (similarity score: 38.07%) +4. Test entry #223: Fault-tolerance Scalability related data with random seed 4603 // (similarity score: 37.55%) +5. Test entry #19: Asynchronous Scalability related data with random seed 22862 // (similarity score: 37.23%) +6. Test entry #467: Scalability ERD related data with random seed 14991 // (similarity score: 35.78%) +7. Test entry #199: Fault-tolerance Scalability related data with random seed 22961 // (similarity score: 35.62%) +8. Test entry #51: Scalability SDK related data with random seed 3268 // (similarity score: 33.46%) +9. Test entry #376: Scalability Integration-test related data with random seed 12414 // (similarity score: 32.11%) +10. Test entry #416: Scalability Unit-test related data with random seed 5162 // (similarity score: 31.32%) +⏱️ Get operation took: 1000 ms +--------------------------------------------------- +[Step 499/500] Processing... +Getting data: 'SDK' +1. Test entry #279: Tutorial SDK related data with random seed 4966 // (similarity score: 56.68%) +2. Test entry #51: Scalability SDK related data with random seed 3268 // (similarity score: 51.89%) +3. Test entry #249: SDK UI related data with random seed 20519 // (similarity score: 49.85%) +4. Test entry #243: SDK Orchestration related data with random seed 30501 // (similarity score: 47.08%) +5. Test entry #5: SDK GUI related data with random seed 24418 // (similarity score: 46.35%) +6. Test entry #367: SDK Recovery related data with random seed 20345 // (similarity score: 45.44%) +7. Test entry #162: SDK Testing related data with random seed 18191 // (similarity score: 44.04%) +8. Test entry #364: Database SDK related data with random seed 22809 // (similarity score: 40.07%) +9. Test entry #286: Python SDK related data with random seed 14667 // (similarity score: 39.90%) +10. Test entry #348: Blockchain SDK related data with random seed 25254 // (similarity score: 39.13%) +⏱️ Get operation took: 946 ms +--------------------------------------------------- +[Step 500/500] Processing... +Getting data: 'Microservices' +1. Test entry #142: Event-driven Microservices related data with random seed 7351 // (similarity score: 59.38%) +2. Test entry #251: Resilient Microservices related data with random seed 1673 // (similarity score: 56.33%) +3. Test entry #437: Microservices Optimization related data with random seed 8916 // (similarity score: 55.90%) +4. Test entry #239: Data-structure Microservices related data with random seed 11589 // (similarity score: 55.78%) +5. Test entry #292: Asynchronous Microservices related data with random seed 8204 // (similarity score: 55.67%) +6. Test entry #483: Orchestration Microservices related data with random seed 31494 // (similarity score: 51.77%) +7. Test entry #176: Microservices Documentation related data with random seed 28218 // (similarity score: 50.61%) +8. Test entry #23: Branching Microservices related data with random seed 24428 // (similarity score: 49.91%) +9. Test entry #463: Microservices UX related data with random seed 32421 // (similarity score: 49.69%) +10. Test entry #353: Microservices YAML related data with random seed 3068 // (similarity score: 48.52%) +⏱️ Get operation took: 1020 ms + +📊 [BATCH REPORT] Items 451 to 500 + -> Average Latency: 964 ms + +=================================================== +🎉 Congratulations! The system survived the stress test. + Total time spent on 'get' operations: 481268 ms + Overall Average Latency: 962 ms +=================================================== diff --git a/ecosystem/sdk/vector-indexing/stress_test_results.txt b/ecosystem/sdk/vector-indexing/stress_test_results.txt new file mode 100644 index 000000000..9be475c38 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/stress_test_results.txt @@ -0,0 +1,2047 @@ +=== Stress Test Started: Adding 500 items sequentially === +Date: Sun Dec 7 11:51:12 PST 2025 +--------------------------------------------------- +[Step 1/500] Processing... +Adding data: 'Test entry #1: Data-structure Index related data with random seed 16730' +⏱️ Add operation took: 2867 ms +--------------------------------------------------- +[Step 2/500] Processing... +Adding data: 'Test entry #2: Database Stubbing related data with random seed 28361' +⏱️ Add operation took: 781 ms +--------------------------------------------------- +[Step 3/500] Processing... +Adding data: 'Test entry #3: Fault-tolerance SQL related data with random seed 10390' +⏱️ Add operation took: 699 ms +--------------------------------------------------- +[Step 4/500] Processing... +Adding data: 'Test entry #4: Design-patterns UML related data with random seed 16565' +⏱️ Add operation took: 967 ms +--------------------------------------------------- +[Step 5/500] Processing... +Adding data: 'Test entry #5: SDK GUI related data with random seed 24418' +⏱️ Add operation took: 884 ms +--------------------------------------------------- +[Step 6/500] Processing... +Adding data: 'Test entry #6: CI/CD Throughput related data with random seed 5729' +⏱️ Add operation took: 779 ms +--------------------------------------------------- +[Step 7/500] Processing... +Adding data: 'Test entry #7: Mocking Parallelism related data with random seed 19010' +⏱️ Add operation took: 692 ms +--------------------------------------------------- +[Step 8/500] Processing... +Adding data: 'Test entry #8: GraphQL Index related data with random seed 15029' +⏱️ Add operation took: 703 ms +--------------------------------------------------- +[Step 9/500] Processing... +Adding data: 'Test entry #9: Diagram Flowchart related data with random seed 14766' +⏱️ Add operation took: 849 ms +--------------------------------------------------- +[Step 10/500] Processing... +Adding data: 'Test entry #10: Framework GraphQL related data with random seed 10276' +⏱️ Add operation took: 693 ms +--------------------------------------------------- +[Step 11/500] Processing... +Adding data: 'Test entry #11: Code-review Node related data with random seed 32027' +⏱️ Add operation took: 749 ms +--------------------------------------------------- +[Step 12/500] Processing... +Adding data: 'Test entry #12: Boilerplate YAML related data with random seed 29263' +⏱️ Add operation took: 697 ms +--------------------------------------------------- +[Step 13/500] Processing... +Adding data: 'Test entry #13: Websocket Caching related data with random seed 27250' +⏱️ Add operation took: 696 ms +--------------------------------------------------- +[Step 14/500] Processing... +Adding data: 'Test entry #14: Monitoring Debugging related data with random seed 9592' +⏱️ Add operation took: 693 ms +--------------------------------------------------- +[Step 15/500] Processing... +Adding data: 'Test entry #15: Backup Orchestration related data with random seed 32162' +⏱️ Add operation took: 748 ms +--------------------------------------------------- +[Step 16/500] Processing... +Adding data: 'Test entry #16: Node Unit-test related data with random seed 4007' +⏱️ Add operation took: 744 ms +--------------------------------------------------- +[Step 17/500] Processing... +Adding data: 'Test entry #17: Tutorial UML related data with random seed 12772' +⏱️ Add operation took: 702 ms +--------------------------------------------------- +[Step 18/500] Processing... +Adding data: 'Test entry #18: UX Stubbing related data with random seed 14060' +⏱️ Add operation took: 744 ms +--------------------------------------------------- +[Step 19/500] Processing... +Adding data: 'Test entry #19: Asynchronous Scalability related data with random seed 22862' +⏱️ Add operation took: 770 ms +--------------------------------------------------- +[Step 20/500] Processing... +Adding data: 'Test entry #20: SQL Data-structure related data with random seed 15443' +⏱️ Add operation took: 726 ms +--------------------------------------------------- +[Step 21/500] Processing... +Adding data: 'Test entry #21: CSV Example related data with random seed 2897' +⏱️ Add operation took: 701 ms +--------------------------------------------------- +[Step 22/500] Processing... +Adding data: 'Test entry #22: ORM UX related data with random seed 24344' +⏱️ Add operation took: 749 ms +--------------------------------------------------- +[Step 23/500] Processing... +Adding data: 'Test entry #23: Branching Microservices related data with random seed 24428' +⏱️ Add operation took: 739 ms +--------------------------------------------------- +[Step 24/500] Processing... +Adding data: 'Test entry #24: Replication Fault-tolerance related data with random seed 9951' +⏱️ Add operation took: 910 ms +--------------------------------------------------- +[Step 25/500] Processing... +Adding data: 'Test entry #25: GraphQL CI/CD related data with random seed 7557' +⏱️ Add operation took: 843 ms +--------------------------------------------------- +[Step 26/500] Processing... +Adding data: 'Test entry #26: Concurrency NoSQL related data with random seed 4796' +⏱️ Add operation took: 849 ms +--------------------------------------------------- +[Step 27/500] Processing... +Adding data: 'Test entry #27: Optimization Load-balancing related data with random seed 27403' +⏱️ Add operation took: 835 ms +--------------------------------------------------- +[Step 28/500] Processing... +Adding data: 'Test entry #28: Pull-request Pub-sub related data with random seed 15070' +⏱️ Add operation took: 1023 ms +--------------------------------------------------- +[Step 29/500] Processing... +Adding data: 'Test entry #29: Mocking Network related data with random seed 20303' +⏱️ Add operation took: 752 ms +--------------------------------------------------- +[Step 30/500] Processing... +Adding data: 'Test entry #30: API Library related data with random seed 1930' +⏱️ Add operation took: 796 ms +--------------------------------------------------- +[Step 31/500] Processing... +Adding data: 'Test entry #31: ORM Index related data with random seed 2163' +⏱️ Add operation took: 795 ms +--------------------------------------------------- +[Step 32/500] Processing... +Adding data: 'Test entry #32: Example Git related data with random seed 8370' +⏱️ Add operation took: 960 ms +--------------------------------------------------- +[Step 33/500] Processing... +Adding data: 'Test entry #33: Recovery Container related data with random seed 11930' +⏱️ Add operation took: 935 ms +--------------------------------------------------- +[Step 34/500] Processing... +Adding data: 'Test entry #34: Debugging CI/CD related data with random seed 7455' +⏱️ Add operation took: 896 ms +--------------------------------------------------- +[Step 35/500] Processing... +Adding data: 'Test entry #35: GraphQL YAML related data with random seed 19655' +⏱️ Add operation took: 886 ms +--------------------------------------------------- +[Step 36/500] Processing... +Adding data: 'Test entry #36: Threading Sharding related data with random seed 23643' +⏱️ Add operation took: 1022 ms +--------------------------------------------------- +[Step 37/500] Processing... +Adding data: 'Test entry #37: Resilient NoSQL related data with random seed 4721' +⏱️ Add operation took: 885 ms +--------------------------------------------------- +[Step 38/500] Processing... +Adding data: 'Test entry #38: Profiling E2E-test related data with random seed 30992' +⏱️ Add operation took: 789 ms +--------------------------------------------------- +[Step 39/500] Processing... +Adding data: 'Test entry #39: UI Sharding related data with random seed 26238' +⏱️ Add operation took: 895 ms +--------------------------------------------------- +[Step 40/500] Processing... +Adding data: 'Test entry #40: Replication UI related data with random seed 1010' +⏱️ Add operation took: 852 ms +--------------------------------------------------- +[Step 41/500] Processing... +Adding data: 'Test entry #41: Index Flowchart related data with random seed 29094' +⏱️ Add operation took: 1119 ms +--------------------------------------------------- +[Step 42/500] Processing... +Adding data: 'Test entry #42: Resilient XML related data with random seed 15654' +⏱️ Add operation took: 1015 ms +--------------------------------------------------- +[Step 43/500] Processing... +Adding data: 'Test entry #43: Documentation Example related data with random seed 5875' +⏱️ Add operation took: 1014 ms +--------------------------------------------------- +[Step 44/500] Processing... +Adding data: 'Test entry #44: Cloud Index related data with random seed 12754' +⏱️ Add operation took: 778 ms +--------------------------------------------------- +[Step 45/500] Processing... +Adding data: 'Test entry #45: Event-driven Load-balancing related data with random seed 5105' +⏱️ Add operation took: 923 ms +--------------------------------------------------- +[Step 46/500] Processing... +Adding data: 'Test entry #46: Data-structure DevOps related data with random seed 24000' +⏱️ Add operation took: 834 ms +--------------------------------------------------- +[Step 47/500] Processing... +Adding data: 'Test entry #47: Asynchronous Throughput related data with random seed 4006' +⏱️ Add operation took: 845 ms +--------------------------------------------------- +[Step 48/500] Processing... +Adding data: 'Test entry #48: GraphQL Asynchronous related data with random seed 4509' +⏱️ Add operation took: 794 ms +--------------------------------------------------- +[Step 49/500] Processing... +Adding data: 'Test entry #49: ORM Network related data with random seed 23604' +⏱️ Add operation took: 975 ms +--------------------------------------------------- +[Step 50/500] Processing... +Adding data: 'Test entry #50: Framework Database related data with random seed 22053' +⏱️ Add operation took: 1005 ms + +📊 [BATCH REPORT] Items 1 to 50 + -> Average Latency: 871 ms + +--------------------------------------------------- +[Step 51/500] Processing... +Adding data: 'Test entry #51: Scalability SDK related data with random seed 3268' +⏱️ Add operation took: 880 ms +--------------------------------------------------- +[Step 52/500] Processing... +Adding data: 'Test entry #52: Alerting Diagram related data with random seed 516' +⏱️ Add operation took: 842 ms +--------------------------------------------------- +[Step 53/500] Processing... +Adding data: 'Test entry #53: Blockchain Stubbing related data with random seed 10771' +⏱️ Add operation took: 848 ms +--------------------------------------------------- +[Step 54/500] Processing... +Adding data: 'Test entry #54: Concurrency Mocking related data with random seed 20846' +⏱️ Add operation took: 853 ms +--------------------------------------------------- +[Step 55/500] Processing... +Adding data: 'Test entry #55: Optimization Template related data with random seed 11116' +⏱️ Add operation took: 877 ms +--------------------------------------------------- +[Step 56/500] Processing... +Adding data: 'Test entry #56: Replication Message-queue related data with random seed 23394' +⏱️ Add operation took: 1328 ms +--------------------------------------------------- +[Step 57/500] Processing... +Adding data: 'Test entry #57: YAML JSON related data with random seed 19740' +⏱️ Add operation took: 1213 ms +--------------------------------------------------- +[Step 58/500] Processing... +Adding data: 'Test entry #58: Cloud Index related data with random seed 16697' +⏱️ Add operation took: 999 ms +--------------------------------------------------- +[Step 59/500] Processing... +Adding data: 'Test entry #59: API Vector related data with random seed 15476' +⏱️ Add operation took: 856 ms +--------------------------------------------------- +[Step 60/500] Processing... +Adding data: 'Test entry #60: Design-patterns Throughput related data with random seed 13831' +⏱️ Add operation took: 823 ms +--------------------------------------------------- +[Step 61/500] Processing... +Adding data: 'Test entry #61: Blockchain Microservices related data with random seed 13172' +⏱️ Add operation took: 746 ms +--------------------------------------------------- +[Step 62/500] Processing... +Adding data: 'Test entry #62: YAML CSV related data with random seed 16118' +⏱️ Add operation took: 850 ms +--------------------------------------------------- +[Step 63/500] Processing... +Adding data: 'Test entry #63: Testing Integration-test related data with random seed 21446' +⏱️ Add operation took: 824 ms +--------------------------------------------------- +[Step 64/500] Processing... +Adding data: 'Test entry #64: Diagram NoSQL related data with random seed 3428' +⏱️ Add operation took: 816 ms +--------------------------------------------------- +[Step 65/500] Processing... +Adding data: 'Test entry #65: Encryption Container related data with random seed 20902' +⏱️ Add operation took: 810 ms +--------------------------------------------------- +[Step 66/500] Processing... +Adding data: 'Test entry #66: UX UML related data with random seed 3512' +⏱️ Add operation took: 796 ms +--------------------------------------------------- +[Step 67/500] Processing... +Adding data: 'Test entry #67: Debugging Blockchain related data with random seed 23179' +⏱️ Add operation took: 821 ms +--------------------------------------------------- +[Step 68/500] Processing... +Adding data: 'Test entry #68: API Encryption related data with random seed 30427' +⏱️ Add operation took: 812 ms +--------------------------------------------------- +[Step 69/500] Processing... +Adding data: 'Test entry #69: Asynchronous NoSQL related data with random seed 15850' +⏱️ Add operation took: 866 ms +--------------------------------------------------- +[Step 70/500] Processing... +Adding data: 'Test entry #70: Python Testing related data with random seed 5317' +⏱️ Add operation took: 829 ms +--------------------------------------------------- +[Step 71/500] Processing... +Adding data: 'Test entry #71: Replication Resilient related data with random seed 6058' +⏱️ Add operation took: 899 ms +--------------------------------------------------- +[Step 72/500] Processing... +Adding data: 'Test entry #72: Node Optimization related data with random seed 10019' +⏱️ Add operation took: 783 ms +--------------------------------------------------- +[Step 73/500] Processing... +Adding data: 'Test entry #73: Design-patterns Network related data with random seed 21388' +⏱️ Add operation took: 890 ms +--------------------------------------------------- +[Step 74/500] Processing... +Adding data: 'Test entry #74: Asynchronous Asynchronous related data with random seed 32283' +⏱️ Add operation took: 816 ms +--------------------------------------------------- +[Step 75/500] Processing... +Adding data: 'Test entry #75: UX Search related data with random seed 11487' +⏱️ Add operation took: 797 ms +--------------------------------------------------- +[Step 76/500] Processing... +Adding data: 'Test entry #76: Git Unit-test related data with random seed 805' +⏱️ Add operation took: 853 ms +--------------------------------------------------- +[Step 77/500] Processing... +Adding data: 'Test entry #77: Orchestration Code-review related data with random seed 28098' +⏱️ Add operation took: 806 ms +--------------------------------------------------- +[Step 78/500] Processing... +Adding data: 'Test entry #78: Search Git related data with random seed 20971' +⏱️ Add operation took: 718 ms +--------------------------------------------------- +[Step 79/500] Processing... +Adding data: 'Test entry #79: Algorithm Asynchronous related data with random seed 18478' +⏱️ Add operation took: 749 ms +--------------------------------------------------- +[Step 80/500] Processing... +Adding data: 'Test entry #80: Fault-tolerance Throughput related data with random seed 27910' +⏱️ Add operation took: 809 ms +--------------------------------------------------- +[Step 81/500] Processing... +Adding data: 'Test entry #81: Example Documentation related data with random seed 4554' +⏱️ Add operation took: 846 ms +--------------------------------------------------- +[Step 82/500] Processing... +Adding data: 'Test entry #82: Database Cloud related data with random seed 12343' +⏱️ Add operation took: 800 ms +--------------------------------------------------- +[Step 83/500] Processing... +Adding data: 'Test entry #83: Template Sharding related data with random seed 32241' +⏱️ Add operation took: 847 ms +--------------------------------------------------- +[Step 84/500] Processing... +Adding data: 'Test entry #84: API Optimization related data with random seed 7997' +⏱️ Add operation took: 841 ms +--------------------------------------------------- +[Step 85/500] Processing... +Adding data: 'Test entry #85: JSON Vector related data with random seed 23519' +⏱️ Add operation took: 732 ms +--------------------------------------------------- +[Step 86/500] Processing... +Adding data: 'Test entry #86: Index Container related data with random seed 813' +⏱️ Add operation took: 791 ms +--------------------------------------------------- +[Step 87/500] Processing... +Adding data: 'Test entry #87: GUI Encryption related data with random seed 24527' +⏱️ Add operation took: 886 ms +--------------------------------------------------- +[Step 88/500] Processing... +Adding data: 'Test entry #88: Recovery JSON related data with random seed 28249' +⏱️ Add operation took: 874 ms +--------------------------------------------------- +[Step 89/500] Processing... +Adding data: 'Test entry #89: Resilient Template related data with random seed 6643' +⏱️ Add operation took: 851 ms +--------------------------------------------------- +[Step 90/500] Processing... +Adding data: 'Test entry #90: Network GUI related data with random seed 6917' +⏱️ Add operation took: 772 ms +--------------------------------------------------- +[Step 91/500] Processing... +Adding data: 'Test entry #91: Fault-tolerance ORM related data with random seed 3476' +⏱️ Add operation took: 748 ms +--------------------------------------------------- +[Step 92/500] Processing... +Adding data: 'Test entry #92: Websocket Integration-test related data with random seed 32561' +⏱️ Add operation took: 832 ms +--------------------------------------------------- +[Step 93/500] Processing... +Adding data: 'Test entry #93: Node Documentation related data with random seed 10928' +⏱️ Add operation took: 939 ms +--------------------------------------------------- +[Step 94/500] Processing... +Adding data: 'Test entry #94: SQL Alerting related data with random seed 10343' +⏱️ Add operation took: 842 ms +--------------------------------------------------- +[Step 95/500] Processing... +Adding data: 'Test entry #95: Library Fault-tolerance related data with random seed 987' +⏱️ Add operation took: 846 ms +--------------------------------------------------- +[Step 96/500] Processing... +Adding data: 'Test entry #96: Tutorial Latency related data with random seed 9789' +⏱️ Add operation took: 770 ms +--------------------------------------------------- +[Step 97/500] Processing... +Adding data: 'Test entry #97: Testing Parallelism related data with random seed 6045' +⏱️ Add operation took: 804 ms +--------------------------------------------------- +[Step 98/500] Processing... +Adding data: 'Test entry #98: Asynchronous Logging related data with random seed 30841' +⏱️ Add operation took: 924 ms +--------------------------------------------------- +[Step 99/500] Processing... +Adding data: 'Test entry #99: Scalability Example related data with random seed 25883' +⏱️ Add operation took: 783 ms +--------------------------------------------------- +[Step 100/500] Processing... +Adding data: 'Test entry #100: Latency Vector related data with random seed 28112' +⏱️ Add operation took: 778 ms + +📊 [BATCH REPORT] Items 51 to 100 + -> Average Latency: 846 ms + +--------------------------------------------------- +[Step 101/500] Processing... +Adding data: 'Test entry #101: XML GUI related data with random seed 23123' +⏱️ Add operation took: 780 ms +--------------------------------------------------- +[Step 102/500] Processing... +Adding data: 'Test entry #102: ERD ERD related data with random seed 20954' +⏱️ Add operation took: 818 ms +--------------------------------------------------- +[Step 103/500] Processing... +Adding data: 'Test entry #103: CLI Sharding related data with random seed 23375' +⏱️ Add operation took: 725 ms +--------------------------------------------------- +[Step 104/500] Processing... +Adding data: 'Test entry #104: Backup E2E-test related data with random seed 3184' +⏱️ Add operation took: 793 ms +--------------------------------------------------- +[Step 105/500] Processing... +Adding data: 'Test entry #105: Alerting Websocket related data with random seed 4059' +⏱️ Add operation took: 928 ms +--------------------------------------------------- +[Step 106/500] Processing... +Adding data: 'Test entry #106: Profiling NoSQL related data with random seed 23156' +⏱️ Add operation took: 900 ms +--------------------------------------------------- +[Step 107/500] Processing... +Adding data: 'Test entry #107: Resilient Pull-request related data with random seed 9028' +⏱️ Add operation took: 756 ms +--------------------------------------------------- +[Step 108/500] Processing... +Adding data: 'Test entry #108: Code-review UI related data with random seed 5301' +⏱️ Add operation took: 801 ms +--------------------------------------------------- +[Step 109/500] Processing... +Adding data: 'Test entry #109: Replication Architecture related data with random seed 27244' +⏱️ Add operation took: 874 ms +--------------------------------------------------- +[Step 110/500] Processing... +Adding data: 'Test entry #110: Boilerplate Profiling related data with random seed 6487' +⏱️ Add operation took: 890 ms +--------------------------------------------------- +[Step 111/500] Processing... +Adding data: 'Test entry #111: Replication SDK related data with random seed 8170' +⏱️ Add operation took: 740 ms +--------------------------------------------------- +[Step 112/500] Processing... +Adding data: 'Test entry #112: Caching Sharding related data with random seed 9211' +⏱️ Add operation took: 744 ms +--------------------------------------------------- +[Step 113/500] Processing... +Adding data: 'Test entry #113: JSON API related data with random seed 11466' +⏱️ Add operation took: 912 ms +--------------------------------------------------- +[Step 114/500] Processing... +Adding data: 'Test entry #114: Node Optimization related data with random seed 20946' +⏱️ Add operation took: 838 ms +--------------------------------------------------- +[Step 115/500] Processing... +Adding data: 'Test entry #115: UX Design-patterns related data with random seed 14554' +⏱️ Add operation took: 732 ms +--------------------------------------------------- +[Step 116/500] Processing... +Adding data: 'Test entry #116: NoSQL Cloud related data with random seed 22178' +⏱️ Add operation took: 796 ms +--------------------------------------------------- +[Step 117/500] Processing... +Adding data: 'Test entry #117: Design-patterns Blockchain related data with random seed 16711' +⏱️ Add operation took: 782 ms +--------------------------------------------------- +[Step 118/500] Processing... +Adding data: 'Test entry #118: CSV Data-structure related data with random seed 20709' +⏱️ Add operation took: 935 ms +--------------------------------------------------- +[Step 119/500] Processing... +Adding data: 'Test entry #119: CSV Resilient related data with random seed 21012' +⏱️ Add operation took: 958 ms +--------------------------------------------------- +[Step 120/500] Processing... +Adding data: 'Test entry #120: Algorithm Unit-test related data with random seed 14608' +⏱️ Add operation took: 797 ms +--------------------------------------------------- +[Step 121/500] Processing... +Adding data: 'Test entry #121: Flowchart SDK related data with random seed 24836' +⏱️ Add operation took: 929 ms +--------------------------------------------------- +[Step 122/500] Processing... +Adding data: 'Test entry #122: Cloud Websocket related data with random seed 14024' +⏱️ Add operation took: 872 ms +--------------------------------------------------- +[Step 123/500] Processing... +Adding data: 'Test entry #123: Encryption Alerting related data with random seed 20180' +⏱️ Add operation took: 826 ms +--------------------------------------------------- +[Step 124/500] Processing... +Adding data: 'Test entry #124: GUI Parallelism related data with random seed 24581' +⏱️ Add operation took: 946 ms +--------------------------------------------------- +[Step 125/500] Processing... +Adding data: 'Test entry #125: GraphQL Tutorial related data with random seed 2640' +⏱️ Add operation took: 862 ms +--------------------------------------------------- +[Step 126/500] Processing... +Adding data: 'Test entry #126: Architecture Framework related data with random seed 1304' +⏱️ Add operation took: 840 ms +--------------------------------------------------- +[Step 127/500] Processing... +Adding data: 'Test entry #127: Template SQL related data with random seed 14183' +⏱️ Add operation took: 963 ms +--------------------------------------------------- +[Step 128/500] Processing... +Adding data: 'Test entry #128: Code-review Documentation related data with random seed 15051' +⏱️ Add operation took: 1061 ms +--------------------------------------------------- +[Step 129/500] Processing... +Adding data: 'Test entry #129: XML Code-review related data with random seed 19652' +⏱️ Add operation took: 1110 ms +--------------------------------------------------- +[Step 130/500] Processing... +Adding data: 'Test entry #130: Resilient Sharding related data with random seed 12002' +⏱️ Add operation took: 881 ms +--------------------------------------------------- +[Step 131/500] Processing... +Adding data: 'Test entry #131: Consensus Version-control related data with random seed 2743' +⏱️ Add operation took: 908 ms +--------------------------------------------------- +[Step 132/500] Processing... +Adding data: 'Test entry #132: Mocking Framework related data with random seed 24854' +⏱️ Add operation took: 890 ms +--------------------------------------------------- +[Step 133/500] Processing... +Adding data: 'Test entry #133: Microservices UX related data with random seed 13570' +⏱️ Add operation took: 1106 ms +--------------------------------------------------- +[Step 134/500] Processing... +Adding data: 'Test entry #134: Template Vector related data with random seed 24421' +⏱️ Add operation took: 934 ms +--------------------------------------------------- +[Step 135/500] Processing... +Adding data: 'Test entry #135: UI ERD related data with random seed 9172' +⏱️ Add operation took: 938 ms +--------------------------------------------------- +[Step 136/500] Processing... +Adding data: 'Test entry #136: Parallelism Sample related data with random seed 18078' +⏱️ Add operation took: 920 ms +--------------------------------------------------- +[Step 137/500] Processing... +Adding data: 'Test entry #137: SQL Parallelism related data with random seed 8672' +⏱️ Add operation took: 1000 ms +--------------------------------------------------- +[Step 138/500] Processing... +Adding data: 'Test entry #138: Boilerplate ERD related data with random seed 17973' +⏱️ Add operation took: 965 ms +--------------------------------------------------- +[Step 139/500] Processing... +Adding data: 'Test entry #139: Load-balancing Optimization related data with random seed 31776' +⏱️ Add operation took: 851 ms +--------------------------------------------------- +[Step 140/500] Processing... +Adding data: 'Test entry #140: UX Architecture related data with random seed 15002' +⏱️ Add operation took: 842 ms +--------------------------------------------------- +[Step 141/500] Processing... +Adding data: 'Test entry #141: Sample UI related data with random seed 12909' +⏱️ Add operation took: 1046 ms +--------------------------------------------------- +[Step 142/500] Processing... +Adding data: 'Test entry #142: Event-driven Microservices related data with random seed 7351' +⏱️ Add operation took: 828 ms +--------------------------------------------------- +[Step 143/500] Processing... +Adding data: 'Test entry #143: JSON XML related data with random seed 20028' +⏱️ Add operation took: 845 ms +--------------------------------------------------- +[Step 144/500] Processing... +Adding data: 'Test entry #144: Throughput ORM related data with random seed 18100' +⏱️ Add operation took: 974 ms +--------------------------------------------------- +[Step 145/500] Processing... +Adding data: 'Test entry #145: CI/CD Backup related data with random seed 22567' +⏱️ Add operation took: 1029 ms +--------------------------------------------------- +[Step 146/500] Processing... +Adding data: 'Test entry #146: Profiling DevOps related data with random seed 14561' +⏱️ Add operation took: 982 ms +--------------------------------------------------- +[Step 147/500] Processing... +Adding data: 'Test entry #147: Message-queue Container related data with random seed 31302' +⏱️ Add operation took: 929 ms +--------------------------------------------------- +[Step 148/500] Processing... +Adding data: 'Test entry #148: Threading Best-practices related data with random seed 22158' +⏱️ Add operation took: 992 ms +--------------------------------------------------- +[Step 149/500] Processing... +Adding data: 'Test entry #149: Encryption Logging related data with random seed 27963' +⏱️ Add operation took: 959 ms +--------------------------------------------------- +[Step 150/500] Processing... +Adding data: 'Test entry #150: Diagram Integration-test related data with random seed 19409' +⏱️ Add operation took: 1243 ms + +📊 [BATCH REPORT] Items 101 to 150 + -> Average Latency: 899 ms + +--------------------------------------------------- +[Step 151/500] Processing... +Adding data: 'Test entry #151: Design-patterns Tutorial related data with random seed 32202' +⏱️ Add operation took: 845 ms +--------------------------------------------------- +[Step 152/500] Processing... +Adding data: 'Test entry #152: SQL Load-balancing related data with random seed 9262' +⏱️ Add operation took: 854 ms +--------------------------------------------------- +[Step 153/500] Processing... +Adding data: 'Test entry #153: Template Algorithm related data with random seed 17570' +⏱️ Add operation took: 1055 ms +--------------------------------------------------- +[Step 154/500] Processing... +Adding data: 'Test entry #154: Cloud Search related data with random seed 2185' +⏱️ Add operation took: 1139 ms +--------------------------------------------------- +[Step 155/500] Processing... +Adding data: 'Test entry #155: Sharding Version-control related data with random seed 4158' +⏱️ Add operation took: 1017 ms +--------------------------------------------------- +[Step 156/500] Processing... +Adding data: 'Test entry #156: JSON Optimization related data with random seed 28003' +⏱️ Add operation took: 997 ms +--------------------------------------------------- +[Step 157/500] Processing... +Adding data: 'Test entry #157: API Threading related data with random seed 18440' +⏱️ Add operation took: 1049 ms +--------------------------------------------------- +[Step 158/500] Processing... +Adding data: 'Test entry #158: Consensus UML related data with random seed 10660' +⏱️ Add operation took: 933 ms +--------------------------------------------------- +[Step 159/500] Processing... +Adding data: 'Test entry #159: Consensus Alerting related data with random seed 3048' +⏱️ Add operation took: 962 ms +--------------------------------------------------- +[Step 160/500] Processing... +Adding data: 'Test entry #160: Container NoSQL related data with random seed 15570' +⏱️ Add operation took: 998 ms +--------------------------------------------------- +[Step 161/500] Processing... +Adding data: 'Test entry #161: Orchestration Diagram related data with random seed 20102' +⏱️ Add operation took: 881 ms +--------------------------------------------------- +[Step 162/500] Processing... +Adding data: 'Test entry #162: SDK Testing related data with random seed 18191' +⏱️ Add operation took: 838 ms +--------------------------------------------------- +[Step 163/500] Processing... +Adding data: 'Test entry #163: Network Orchestration related data with random seed 17906' +⏱️ Add operation took: 883 ms +--------------------------------------------------- +[Step 164/500] Processing... +Adding data: 'Test entry #164: Threading Parallelism related data with random seed 13640' +⏱️ Add operation took: 924 ms +--------------------------------------------------- +[Step 165/500] Processing... +Adding data: 'Test entry #165: Resilient Python related data with random seed 3195' +⏱️ Add operation took: 1019 ms +--------------------------------------------------- +[Step 166/500] Processing... +Adding data: 'Test entry #166: Pull-request Unit-test related data with random seed 18963' +⏱️ Add operation took: 943 ms +--------------------------------------------------- +[Step 167/500] Processing... +Adding data: 'Test entry #167: YAML Latency related data with random seed 19573' +⏱️ Add operation took: 843 ms +--------------------------------------------------- +[Step 168/500] Processing... +Adding data: 'Test entry #168: Event-driven Logging related data with random seed 22637' +⏱️ Add operation took: 1006 ms +--------------------------------------------------- +[Step 169/500] Processing... +Adding data: 'Test entry #169: Template Stubbing related data with random seed 28565' +⏱️ Add operation took: 883 ms +--------------------------------------------------- +[Step 170/500] Processing... +Adding data: 'Test entry #170: ORM Container related data with random seed 30245' +⏱️ Add operation took: 838 ms +--------------------------------------------------- +[Step 171/500] Processing... +Adding data: 'Test entry #171: Blockchain Data-structure related data with random seed 29630' +⏱️ Add operation took: 910 ms +--------------------------------------------------- +[Step 172/500] Processing... +Adding data: 'Test entry #172: Logging Architecture related data with random seed 7305' +⏱️ Add operation took: 851 ms +--------------------------------------------------- +[Step 173/500] Processing... +Adding data: 'Test entry #173: Sample Cloud related data with random seed 3561' +⏱️ Add operation took: 869 ms +--------------------------------------------------- +[Step 174/500] Processing... +Adding data: 'Test entry #174: Best-practices Security related data with random seed 18653' +⏱️ Add operation took: 812 ms +--------------------------------------------------- +[Step 175/500] Processing... +Adding data: 'Test entry #175: Node UX related data with random seed 24087' +⏱️ Add operation took: 839 ms +--------------------------------------------------- +[Step 176/500] Processing... +Adding data: 'Test entry #176: Microservices Documentation related data with random seed 28218' +⏱️ Add operation took: 968 ms +--------------------------------------------------- +[Step 177/500] Processing... +Adding data: 'Test entry #177: UX Monitoring related data with random seed 16167' +⏱️ Add operation took: 875 ms +--------------------------------------------------- +[Step 178/500] Processing... +Adding data: 'Test entry #178: Cloud XML related data with random seed 27579' +⏱️ Add operation took: 849 ms +--------------------------------------------------- +[Step 179/500] Processing... +Adding data: 'Test entry #179: Testing Testing related data with random seed 8212' +⏱️ Add operation took: 947 ms +--------------------------------------------------- +[Step 180/500] Processing... +Adding data: 'Test entry #180: Alerting Latency related data with random seed 5286' +⏱️ Add operation took: 933 ms +--------------------------------------------------- +[Step 181/500] Processing... +Adding data: 'Test entry #181: UX Python related data with random seed 26197' +⏱️ Add operation took: 854 ms +--------------------------------------------------- +[Step 182/500] Processing... +Adding data: 'Test entry #182: Testing Container related data with random seed 11294' +⏱️ Add operation took: 846 ms +--------------------------------------------------- +[Step 183/500] Processing... +Adding data: 'Test entry #183: Code-review Data-structure related data with random seed 20935' +⏱️ Add operation took: 862 ms +--------------------------------------------------- +[Step 184/500] Processing... +Adding data: 'Test entry #184: Encryption Template related data with random seed 22013' +⏱️ Add operation took: 970 ms +--------------------------------------------------- +[Step 185/500] Processing... +Adding data: 'Test entry #185: Mocking YAML related data with random seed 11811' +⏱️ Add operation took: 838 ms +--------------------------------------------------- +[Step 186/500] Processing... +Adding data: 'Test entry #186: Tutorial Unit-test related data with random seed 2934' +⏱️ Add operation took: 794 ms +--------------------------------------------------- +[Step 187/500] Processing... +Adding data: 'Test entry #187: Documentation Cloud related data with random seed 9545' +⏱️ Add operation took: 982 ms +--------------------------------------------------- +[Step 188/500] Processing... +Adding data: 'Test entry #188: Branching Synchronous related data with random seed 11262' +⏱️ Add operation took: 859 ms +--------------------------------------------------- +[Step 189/500] Processing... +Adding data: 'Test entry #189: UML Network related data with random seed 6297' +⏱️ Add operation took: 768 ms +--------------------------------------------------- +[Step 190/500] Processing... +Adding data: 'Test entry #190: Mocking Network related data with random seed 32119' +⏱️ Add operation took: 1015 ms +--------------------------------------------------- +[Step 191/500] Processing... +Adding data: 'Test entry #191: Optimization Threading related data with random seed 15850' +⏱️ Add operation took: 860 ms +--------------------------------------------------- +[Step 192/500] Processing... +Adding data: 'Test entry #192: Performance Threading related data with random seed 6244' +⏱️ Add operation took: 745 ms +--------------------------------------------------- +[Step 193/500] Processing... +Adding data: 'Test entry #193: Database Resilient related data with random seed 3286' +⏱️ Add operation took: 862 ms +--------------------------------------------------- +[Step 194/500] Processing... +Adding data: 'Test entry #194: Python Consensus related data with random seed 2579' +⏱️ Add operation took: 1062 ms +--------------------------------------------------- +[Step 195/500] Processing... +Adding data: 'Test entry #195: API Orchestration related data with random seed 17599' +⏱️ Add operation took: 907 ms +--------------------------------------------------- +[Step 196/500] Processing... +Adding data: 'Test entry #196: Boilerplate Synchronous related data with random seed 18887' +⏱️ Add operation took: 850 ms +--------------------------------------------------- +[Step 197/500] Processing... +Adding data: 'Test entry #197: Fault-tolerance ORM related data with random seed 21855' +⏱️ Add operation took: 912 ms +--------------------------------------------------- +[Step 198/500] Processing... +Adding data: 'Test entry #198: Architecture Alerting related data with random seed 32659' +⏱️ Add operation took: 928 ms +--------------------------------------------------- +[Step 199/500] Processing... +Adding data: 'Test entry #199: Fault-tolerance Scalability related data with random seed 22961' +⏱️ Add operation took: 959 ms +--------------------------------------------------- +[Step 200/500] Processing... +Adding data: 'Test entry #200: NoSQL RESTful related data with random seed 23279' +⏱️ Add operation took: 948 ms + +📊 [BATCH REPORT] Items 151 to 200 + -> Average Latency: 911 ms + +--------------------------------------------------- +[Step 201/500] Processing... +Adding data: 'Test entry #201: Sample Parallelism related data with random seed 8912' +⏱️ Add operation took: 1045 ms +--------------------------------------------------- +[Step 202/500] Processing... +Adding data: 'Test entry #202: Search ORM related data with random seed 9657' +⏱️ Add operation took: 1138 ms +--------------------------------------------------- +[Step 203/500] Processing... +Adding data: 'Test entry #203: Recovery JSON related data with random seed 8776' +⏱️ Add operation took: 1189 ms +--------------------------------------------------- +[Step 204/500] Processing... +Adding data: 'Test entry #204: Merging Best-practices related data with random seed 6339' +⏱️ Add operation took: 1121 ms +--------------------------------------------------- +[Step 205/500] Processing... +Adding data: 'Test entry #205: Synchronous Code-review related data with random seed 1490' +⏱️ Add operation took: 1166 ms +--------------------------------------------------- +[Step 206/500] Processing... +Adding data: 'Test entry #206: Pull-request Template related data with random seed 2208' +⏱️ Add operation took: 1033 ms +--------------------------------------------------- +[Step 207/500] Processing... +Adding data: 'Test entry #207: DevOps JSON related data with random seed 15591' +⏱️ Add operation took: 1222 ms +--------------------------------------------------- +[Step 208/500] Processing... +Adding data: 'Test entry #208: Blockchain Profiling related data with random seed 31906' +⏱️ Add operation took: 1168 ms +--------------------------------------------------- +[Step 209/500] Processing... +Adding data: 'Test entry #209: Index Flowchart related data with random seed 16113' +⏱️ Add operation took: 1153 ms +--------------------------------------------------- +[Step 210/500] Processing... +Adding data: 'Test entry #210: Asynchronous UI related data with random seed 16307' +⏱️ Add operation took: 951 ms +--------------------------------------------------- +[Step 211/500] Processing... +Adding data: 'Test entry #211: Resilient SQL related data with random seed 21527' +⏱️ Add operation took: 888 ms +--------------------------------------------------- +[Step 212/500] Processing... +Adding data: 'Test entry #212: Design-patterns DevOps related data with random seed 19247' +⏱️ Add operation took: 1019 ms +--------------------------------------------------- +[Step 213/500] Processing... +Adding data: 'Test entry #213: GUI GraphQL related data with random seed 9145' +⏱️ Add operation took: 954 ms +--------------------------------------------------- +[Step 214/500] Processing... +Adding data: 'Test entry #214: Design-patterns Library related data with random seed 23324' +⏱️ Add operation took: 972 ms +--------------------------------------------------- +[Step 215/500] Processing... +Adding data: 'Test entry #215: API RESTful related data with random seed 29014' +⏱️ Add operation took: 939 ms +--------------------------------------------------- +[Step 216/500] Processing... +Adding data: 'Test entry #216: Load-balancing Sample related data with random seed 25835' +⏱️ Add operation took: 853 ms +--------------------------------------------------- +[Step 217/500] Processing... +Adding data: 'Test entry #217: Tutorial Design-patterns related data with random seed 26687' +⏱️ Add operation took: 788 ms +--------------------------------------------------- +[Step 218/500] Processing... +Adding data: 'Test entry #218: JSON Caching related data with random seed 29229' +⏱️ Add operation took: 1010 ms +--------------------------------------------------- +[Step 219/500] Processing... +Adding data: 'Test entry #219: SQL Cloud related data with random seed 31203' +⏱️ Add operation took: 950 ms +--------------------------------------------------- +[Step 220/500] Processing... +Adding data: 'Test entry #220: Mocking GUI related data with random seed 16639' +⏱️ Add operation took: 873 ms +--------------------------------------------------- +[Step 221/500] Processing... +Adding data: 'Test entry #221: Best-practices Asynchronous related data with random seed 27295' +⏱️ Add operation took: 852 ms +--------------------------------------------------- +[Step 222/500] Processing... +Adding data: 'Test entry #222: Logging Library related data with random seed 32039' +⏱️ Add operation took: 921 ms +--------------------------------------------------- +[Step 223/500] Processing... +Adding data: 'Test entry #223: Fault-tolerance Scalability related data with random seed 4603' +⏱️ Add operation took: 1211 ms +--------------------------------------------------- +[Step 224/500] Processing... +Adding data: 'Test entry #224: Synchronous GraphQL related data with random seed 6657' +⏱️ Add operation took: 909 ms +--------------------------------------------------- +[Step 225/500] Processing... +Adding data: 'Test entry #225: Encryption Message-queue related data with random seed 3677' +⏱️ Add operation took: 865 ms +--------------------------------------------------- +[Step 226/500] Processing... +Adding data: 'Test entry #226: Flowchart Testing related data with random seed 32215' +⏱️ Add operation took: 1039 ms +--------------------------------------------------- +[Step 227/500] Processing... +Adding data: 'Test entry #227: Framework Algorithm related data with random seed 19206' +⏱️ Add operation took: 1088 ms +--------------------------------------------------- +[Step 228/500] Processing... +Adding data: 'Test entry #228: Branching Framework related data with random seed 10213' +⏱️ Add operation took: 872 ms +--------------------------------------------------- +[Step 229/500] Processing... +Adding data: 'Test entry #229: Orchestration Blockchain related data with random seed 2138' +⏱️ Add operation took: 950 ms +--------------------------------------------------- +[Step 230/500] Processing... +Adding data: 'Test entry #230: Threading API related data with random seed 20630' +⏱️ Add operation took: 883 ms +--------------------------------------------------- +[Step 231/500] Processing... +Adding data: 'Test entry #231: Blockchain Data-structure related data with random seed 2611' +⏱️ Add operation took: 1011 ms +--------------------------------------------------- +[Step 232/500] Processing... +Adding data: 'Test entry #232: Tutorial Unit-test related data with random seed 11672' +⏱️ Add operation took: 907 ms +--------------------------------------------------- +[Step 233/500] Processing... +Adding data: 'Test entry #233: Unit-test Container related data with random seed 19623' +⏱️ Add operation took: 888 ms +--------------------------------------------------- +[Step 234/500] Processing... +Adding data: 'Test entry #234: Security CI/CD related data with random seed 25834' +⏱️ Add operation took: 845 ms +--------------------------------------------------- +[Step 235/500] Processing... +Adding data: 'Test entry #235: ORM Boilerplate related data with random seed 17103' +⏱️ Add operation took: 901 ms +--------------------------------------------------- +[Step 236/500] Processing... +Adding data: 'Test entry #236: Websocket Debugging related data with random seed 18729' +⏱️ Add operation took: 839 ms +--------------------------------------------------- +[Step 237/500] Processing... +Adding data: 'Test entry #237: UX Blockchain related data with random seed 8734' +⏱️ Add operation took: 794 ms +--------------------------------------------------- +[Step 238/500] Processing... +Adding data: 'Test entry #238: Merging Version-control related data with random seed 642' +⏱️ Add operation took: 876 ms +--------------------------------------------------- +[Step 239/500] Processing... +Adding data: 'Test entry #239: Data-structure Microservices related data with random seed 11589' +⏱️ Add operation took: 813 ms +--------------------------------------------------- +[Step 240/500] Processing... +Adding data: 'Test entry #240: Algorithm Asynchronous related data with random seed 6330' +⏱️ Add operation took: 843 ms +--------------------------------------------------- +[Step 241/500] Processing... +Adding data: 'Test entry #241: Alerting E2E-test related data with random seed 11551' +⏱️ Add operation took: 976 ms +--------------------------------------------------- +[Step 242/500] Processing... +Adding data: 'Test entry #242: Load-balancing CLI related data with random seed 21278' +⏱️ Add operation took: 917 ms +--------------------------------------------------- +[Step 243/500] Processing... +Adding data: 'Test entry #243: SDK Orchestration related data with random seed 30501' +⏱️ Add operation took: 850 ms +--------------------------------------------------- +[Step 244/500] Processing... +Adding data: 'Test entry #244: Scalability Algorithm related data with random seed 31613' +⏱️ Add operation took: 1329 ms +--------------------------------------------------- +[Step 245/500] Processing... +Adding data: 'Test entry #245: Logging Testing related data with random seed 32263' +⏱️ Add operation took: 860 ms +--------------------------------------------------- +[Step 246/500] Processing... +Adding data: 'Test entry #246: Node Documentation related data with random seed 8868' +⏱️ Add operation took: 843 ms +--------------------------------------------------- +[Step 247/500] Processing... +Adding data: 'Test entry #247: UML Integration-test related data with random seed 29544' +⏱️ Add operation took: 991 ms +--------------------------------------------------- +[Step 248/500] Processing... +Adding data: 'Test entry #248: Algorithm Data-structure related data with random seed 18077' +⏱️ Add operation took: 830 ms +--------------------------------------------------- +[Step 249/500] Processing... +Adding data: 'Test entry #249: SDK UI related data with random seed 20519' +⏱️ Add operation took: 806 ms +--------------------------------------------------- +[Step 250/500] Processing... +Adding data: 'Test entry #250: Index Library related data with random seed 2980' +⏱️ Add operation took: 925 ms + +📊 [BATCH REPORT] Items 201 to 250 + -> Average Latency: 961 ms + +--------------------------------------------------- +[Step 251/500] Processing... +Adding data: 'Test entry #251: Resilient Microservices related data with random seed 1673' +⏱️ Add operation took: 802 ms +--------------------------------------------------- +[Step 252/500] Processing... +Adding data: 'Test entry #252: ERD Parallelism related data with random seed 23325' +⏱️ Add operation took: 812 ms +--------------------------------------------------- +[Step 253/500] Processing... +Adding data: 'Test entry #253: Encryption Debugging related data with random seed 3566' +⏱️ Add operation took: 1021 ms +--------------------------------------------------- +[Step 254/500] Processing... +Adding data: 'Test entry #254: NoSQL Architecture related data with random seed 8978' +⏱️ Add operation took: 817 ms +--------------------------------------------------- +[Step 255/500] Processing... +Adding data: 'Test entry #255: Profiling Replication related data with random seed 7369' +⏱️ Add operation took: 842 ms +--------------------------------------------------- +[Step 256/500] Processing... +Adding data: 'Test entry #256: Profiling GraphQL related data with random seed 4781' +⏱️ Add operation took: 965 ms +--------------------------------------------------- +[Step 257/500] Processing... +Adding data: 'Test entry #257: Orchestration Architecture related data with random seed 19866' +⏱️ Add operation took: 971 ms +--------------------------------------------------- +[Step 258/500] Processing... +Adding data: 'Test entry #258: XML Synchronous related data with random seed 9079' +⏱️ Add operation took: 927 ms +--------------------------------------------------- +[Step 259/500] Processing... +Adding data: 'Test entry #259: Pub-sub DevOps related data with random seed 23944' +⏱️ Add operation took: 859 ms +--------------------------------------------------- +[Step 260/500] Processing... +Adding data: 'Test entry #260: CSV Event-driven related data with random seed 23556' +⏱️ Add operation took: 848 ms +--------------------------------------------------- +[Step 261/500] Processing... +Adding data: 'Test entry #261: SQL Database related data with random seed 22537' +⏱️ Add operation took: 984 ms +--------------------------------------------------- +[Step 262/500] Processing... +Adding data: 'Test entry #262: Container Branching related data with random seed 22849' +⏱️ Add operation took: 1118 ms +--------------------------------------------------- +[Step 263/500] Processing... +Adding data: 'Test entry #263: CLI Stubbing related data with random seed 13486' +⏱️ Add operation took: 917 ms +--------------------------------------------------- +[Step 264/500] Processing... +Adding data: 'Test entry #264: SQL Message-queue related data with random seed 315' +⏱️ Add operation took: 956 ms +--------------------------------------------------- +[Step 265/500] Processing... +Adding data: 'Test entry #265: Version-control Parallelism related data with random seed 26444' +⏱️ Add operation took: 1132 ms +--------------------------------------------------- +[Step 266/500] Processing... +Adding data: 'Test entry #266: Merging Diagram related data with random seed 4694' +⏱️ Add operation took: 931 ms +--------------------------------------------------- +[Step 267/500] Processing... +Adding data: 'Test entry #267: GUI Threading related data with random seed 19721' +⏱️ Add operation took: 1044 ms +--------------------------------------------------- +[Step 268/500] Processing... +Adding data: 'Test entry #268: GUI Security related data with random seed 8853' +⏱️ Add operation took: 837 ms +--------------------------------------------------- +[Step 269/500] Processing... +Adding data: 'Test entry #269: Database DevOps related data with random seed 15436' +⏱️ Add operation took: 1538 ms +--------------------------------------------------- +[Step 270/500] Processing... +Adding data: 'Test entry #270: Cloud Consensus related data with random seed 15128' +⏱️ Add operation took: 1227 ms +--------------------------------------------------- +[Step 271/500] Processing... +Adding data: 'Test entry #271: Encryption DevOps related data with random seed 2715' +⏱️ Add operation took: 1122 ms +--------------------------------------------------- +[Step 272/500] Processing... +Adding data: 'Test entry #272: Git Data-structure related data with random seed 29885' +⏱️ Add operation took: 915 ms +--------------------------------------------------- +[Step 273/500] Processing... +Adding data: 'Test entry #273: Data-structure Performance related data with random seed 1219' +⏱️ Add operation took: 894 ms +--------------------------------------------------- +[Step 274/500] Processing... +Adding data: 'Test entry #274: Testing Caching related data with random seed 1136' +⏱️ Add operation took: 1046 ms +--------------------------------------------------- +[Step 275/500] Processing... +Adding data: 'Test entry #275: Framework Orchestration related data with random seed 12664' +⏱️ Add operation took: 827 ms +--------------------------------------------------- +[Step 276/500] Processing... +Adding data: 'Test entry #276: Backup Asynchronous related data with random seed 23970' +⏱️ Add operation took: 909 ms +--------------------------------------------------- +[Step 277/500] Processing... +Adding data: 'Test entry #277: Boilerplate Tutorial related data with random seed 24276' +⏱️ Add operation took: 945 ms +--------------------------------------------------- +[Step 278/500] Processing... +Adding data: 'Test entry #278: UML E2E-test related data with random seed 2723' +⏱️ Add operation took: 873 ms +--------------------------------------------------- +[Step 279/500] Processing... +Adding data: 'Test entry #279: Tutorial SDK related data with random seed 4966' +⏱️ Add operation took: 875 ms +--------------------------------------------------- +[Step 280/500] Processing... +Adding data: 'Test entry #280: Boilerplate Search related data with random seed 32715' +⏱️ Add operation took: 907 ms +--------------------------------------------------- +[Step 281/500] Processing... +Adding data: 'Test entry #281: Optimization Resilient related data with random seed 24028' +⏱️ Add operation took: 867 ms +--------------------------------------------------- +[Step 282/500] Processing... +Adding data: 'Test entry #282: UML Testing related data with random seed 21001' +⏱️ Add operation took: 972 ms +--------------------------------------------------- +[Step 283/500] Processing... +Adding data: 'Test entry #283: Network Network related data with random seed 21734' +⏱️ Add operation took: 1156 ms +--------------------------------------------------- +[Step 284/500] Processing... +Adding data: 'Test entry #284: UX CLI related data with random seed 10670' +⏱️ Add operation took: 907 ms +--------------------------------------------------- +[Step 285/500] Processing... +Adding data: 'Test entry #285: UML Sharding related data with random seed 24334' +⏱️ Add operation took: 922 ms +--------------------------------------------------- +[Step 286/500] Processing... +Adding data: 'Test entry #286: Python SDK related data with random seed 14667' +⏱️ Add operation took: 1017 ms +--------------------------------------------------- +[Step 287/500] Processing... +Adding data: 'Test entry #287: Example Synchronous related data with random seed 18676' +⏱️ Add operation took: 1048 ms +--------------------------------------------------- +[Step 288/500] Processing... +Adding data: 'Test entry #288: Index Pull-request related data with random seed 4855' +⏱️ Add operation took: 837 ms +--------------------------------------------------- +[Step 289/500] Processing... +Adding data: 'Test entry #289: RESTful Data-structure related data with random seed 1227' +⏱️ Add operation took: 962 ms +--------------------------------------------------- +[Step 290/500] Processing... +Adding data: 'Test entry #290: Caching Sample related data with random seed 12446' +⏱️ Add operation took: 1105 ms +--------------------------------------------------- +[Step 291/500] Processing... +Adding data: 'Test entry #291: YAML Performance related data with random seed 3598' +⏱️ Add operation took: 1047 ms +--------------------------------------------------- +[Step 292/500] Processing... +Adding data: 'Test entry #292: Asynchronous Microservices related data with random seed 8204' +⏱️ Add operation took: 997 ms +--------------------------------------------------- +[Step 293/500] Processing... +Adding data: 'Test entry #293: Replication CLI related data with random seed 2540' +⏱️ Add operation took: 1114 ms +--------------------------------------------------- +[Step 294/500] Processing... +Adding data: 'Test entry #294: ORM Throughput related data with random seed 31130' +⏱️ Add operation took: 983 ms +--------------------------------------------------- +[Step 295/500] Processing... +Adding data: 'Test entry #295: YAML Tutorial related data with random seed 7734' +⏱️ Add operation took: 920 ms +--------------------------------------------------- +[Step 296/500] Processing... +Adding data: 'Test entry #296: ERD RESTful related data with random seed 19760' +⏱️ Add operation took: 925 ms +--------------------------------------------------- +[Step 297/500] Processing... +Adding data: 'Test entry #297: Event-driven Code-review related data with random seed 23440' +⏱️ Add operation took: 851 ms +--------------------------------------------------- +[Step 298/500] Processing... +Adding data: 'Test entry #298: XML Example related data with random seed 18435' +⏱️ Add operation took: 972 ms +--------------------------------------------------- +[Step 299/500] Processing... +Adding data: 'Test entry #299: CLI Git related data with random seed 18823' +⏱️ Add operation took: 1021 ms +--------------------------------------------------- +[Step 300/500] Processing... +Adding data: 'Test entry #300: Parallelism Asynchronous related data with random seed 2571' +⏱️ Add operation took: 916 ms + +📊 [BATCH REPORT] Items 251 to 300 + -> Average Latency: 968 ms + +--------------------------------------------------- +[Step 301/500] Processing... +Adding data: 'Test entry #301: Index ORM related data with random seed 30920' +⏱️ Add operation took: 926 ms +--------------------------------------------------- +[Step 302/500] Processing... +Adding data: 'Test entry #302: Load-balancing Diagram related data with random seed 1050' +⏱️ Add operation took: 889 ms +--------------------------------------------------- +[Step 303/500] Processing... +Adding data: 'Test entry #303: XML Tutorial related data with random seed 29551' +⏱️ Add operation took: 893 ms +--------------------------------------------------- +[Step 304/500] Processing... +Adding data: 'Test entry #304: E2E-test Unit-test related data with random seed 19213' +⏱️ Add operation took: 981 ms +--------------------------------------------------- +[Step 305/500] Processing... +Adding data: 'Test entry #305: Index Database related data with random seed 10951' +⏱️ Add operation took: 986 ms +--------------------------------------------------- +[Step 306/500] Processing... +Adding data: 'Test entry #306: Recovery Library related data with random seed 21298' +⏱️ Add operation took: 1006 ms +--------------------------------------------------- +[Step 307/500] Processing... +Adding data: 'Test entry #307: NoSQL Integration-test related data with random seed 5046' +⏱️ Add operation took: 924 ms +--------------------------------------------------- +[Step 308/500] Processing... +Adding data: 'Test entry #308: GraphQL GraphQL related data with random seed 25902' +⏱️ Add operation took: 880 ms +--------------------------------------------------- +[Step 309/500] Processing... +Adding data: 'Test entry #309: Event-driven Orchestration related data with random seed 21023' +⏱️ Add operation took: 1029 ms +--------------------------------------------------- +[Step 310/500] Processing... +Adding data: 'Test entry #310: RESTful UML related data with random seed 5709' +⏱️ Add operation took: 1069 ms +--------------------------------------------------- +[Step 311/500] Processing... +Adding data: 'Test entry #311: Asynchronous Best-practices related data with random seed 24139' +⏱️ Add operation took: 1174 ms +--------------------------------------------------- +[Step 312/500] Processing... +Adding data: 'Test entry #312: Event-driven Tutorial related data with random seed 31631' +⏱️ Add operation took: 957 ms +--------------------------------------------------- +[Step 313/500] Processing... +Adding data: 'Test entry #313: Data-structure Monitoring related data with random seed 10551' +⏱️ Add operation took: 1158 ms +--------------------------------------------------- +[Step 314/500] Processing... +Adding data: 'Test entry #314: Threading RESTful related data with random seed 12875' +⏱️ Add operation took: 1148 ms +--------------------------------------------------- +[Step 315/500] Processing... +Adding data: 'Test entry #315: Encryption Best-practices related data with random seed 23350' +⏱️ Add operation took: 1219 ms +--------------------------------------------------- +[Step 316/500] Processing... +Adding data: 'Test entry #316: GUI Event-driven related data with random seed 24961' +⏱️ Add operation took: 2146 ms +--------------------------------------------------- +[Step 317/500] Processing... +Adding data: 'Test entry #317: Stubbing CI/CD related data with random seed 3418' +⏱️ Add operation took: 2723 ms +--------------------------------------------------- +[Step 318/500] Processing... +Adding data: 'Test entry #318: Vector Pub-sub related data with random seed 10169' +⏱️ Add operation took: 2119 ms +--------------------------------------------------- +[Step 319/500] Processing... +Adding data: 'Test entry #319: Documentation Example related data with random seed 22093' +⏱️ Add operation took: 1268 ms +--------------------------------------------------- +[Step 320/500] Processing... +Adding data: 'Test entry #320: Library UX related data with random seed 29021' +⏱️ Add operation took: 1088 ms +--------------------------------------------------- +[Step 321/500] Processing... +Adding data: 'Test entry #321: Recovery Git related data with random seed 3971' +⏱️ Add operation took: 1114 ms +--------------------------------------------------- +[Step 322/500] Processing... +Adding data: 'Test entry #322: ORM Template related data with random seed 28110' +⏱️ Add operation took: 1119 ms +--------------------------------------------------- +[Step 323/500] Processing... +Adding data: 'Test entry #323: Unit-test YAML related data with random seed 16167' +⏱️ Add operation took: 953 ms +--------------------------------------------------- +[Step 324/500] Processing... +Adding data: 'Test entry #324: Boilerplate CI/CD related data with random seed 11105' +⏱️ Add operation took: 942 ms +--------------------------------------------------- +[Step 325/500] Processing... +Adding data: 'Test entry #325: Vector UX related data with random seed 2934' +⏱️ Add operation took: 1093 ms +--------------------------------------------------- +[Step 326/500] Processing... +Adding data: 'Test entry #326: CI/CD Sample related data with random seed 22568' +⏱️ Add operation took: 933 ms +--------------------------------------------------- +[Step 327/500] Processing... +Adding data: 'Test entry #327: NoSQL Architecture related data with random seed 14507' +⏱️ Add operation took: 1044 ms +--------------------------------------------------- +[Step 328/500] Processing... +Adding data: 'Test entry #328: Node DevOps related data with random seed 7412' +⏱️ Add operation took: 953 ms +--------------------------------------------------- +[Step 329/500] Processing... +Adding data: 'Test entry #329: Code-review Framework related data with random seed 1817' +⏱️ Add operation took: 984 ms +--------------------------------------------------- +[Step 330/500] Processing... +Adding data: 'Test entry #330: Boilerplate Mocking related data with random seed 5771' +⏱️ Add operation took: 878 ms +--------------------------------------------------- +[Step 331/500] Processing... +Adding data: 'Test entry #331: Encryption Sample related data with random seed 30916' +⏱️ Add operation took: 876 ms +--------------------------------------------------- +[Step 332/500] Processing... +Adding data: 'Test entry #332: Performance YAML related data with random seed 23826' +⏱️ Add operation took: 885 ms +--------------------------------------------------- +[Step 333/500] Processing... +Adding data: 'Test entry #333: Profiling Debugging related data with random seed 25431' +⏱️ Add operation took: 1026 ms +--------------------------------------------------- +[Step 334/500] Processing... +Adding data: 'Test entry #334: Sharding Vector related data with random seed 29020' +⏱️ Add operation took: 860 ms +--------------------------------------------------- +[Step 335/500] Processing... +Adding data: 'Test entry #335: Throughput E2E-test related data with random seed 11728' +⏱️ Add operation took: 883 ms +--------------------------------------------------- +[Step 336/500] Processing... +Adding data: 'Test entry #336: API Alerting related data with random seed 704' +⏱️ Add operation took: 945 ms +--------------------------------------------------- +[Step 337/500] Processing... +Adding data: 'Test entry #337: Algorithm Orchestration related data with random seed 15318' +⏱️ Add operation took: 962 ms +--------------------------------------------------- +[Step 338/500] Processing... +Adding data: 'Test entry #338: Version-control Monitoring related data with random seed 11905' +⏱️ Add operation took: 1067 ms +--------------------------------------------------- +[Step 339/500] Processing... +Adding data: 'Test entry #339: API Debugging related data with random seed 14456' +⏱️ Add operation took: 1005 ms +--------------------------------------------------- +[Step 340/500] Processing... +Adding data: 'Test entry #340: Version-control Sample related data with random seed 23846' +⏱️ Add operation took: 1043 ms +--------------------------------------------------- +[Step 341/500] Processing... +Adding data: 'Test entry #341: Node Vector related data with random seed 30034' +⏱️ Add operation took: 985 ms +--------------------------------------------------- +[Step 342/500] Processing... +Adding data: 'Test entry #342: Python Flowchart related data with random seed 32467' +⏱️ Add operation took: 1223 ms +--------------------------------------------------- +[Step 343/500] Processing... +Adding data: 'Test entry #343: GraphQL UX related data with random seed 26726' +⏱️ Add operation took: 1143 ms +--------------------------------------------------- +[Step 344/500] Processing... +Adding data: 'Test entry #344: Architecture Flowchart related data with random seed 2645' +⏱️ Add operation took: 1355 ms +--------------------------------------------------- +[Step 345/500] Processing... +Adding data: 'Test entry #345: CI/CD RESTful related data with random seed 23698' +⏱️ Add operation took: 1105 ms +--------------------------------------------------- +[Step 346/500] Processing... +Adding data: 'Test entry #346: Database Microservices related data with random seed 1068' +⏱️ Add operation took: 1342 ms +--------------------------------------------------- +[Step 347/500] Processing... +Adding data: 'Test entry #347: Index Git related data with random seed 12917' +⏱️ Add operation took: 906 ms +--------------------------------------------------- +[Step 348/500] Processing... +Adding data: 'Test entry #348: Blockchain SDK related data with random seed 25254' +⏱️ Add operation took: 840 ms +--------------------------------------------------- +[Step 349/500] Processing... +Adding data: 'Test entry #349: Parallelism Git related data with random seed 27215' +⏱️ Add operation took: 1181 ms +--------------------------------------------------- +[Step 350/500] Processing... +Adding data: 'Test entry #350: XML Concurrency related data with random seed 12519' +⏱️ Add operation took: 972 ms + +📊 [BATCH REPORT] Items 301 to 350 + -> Average Latency: 1104 ms + +--------------------------------------------------- +[Step 351/500] Processing... +Adding data: 'Test entry #351: Alerting Algorithm related data with random seed 24557' +⏱️ Add operation took: 1073 ms +--------------------------------------------------- +[Step 352/500] Processing... +Adding data: 'Test entry #352: Latency JSON related data with random seed 17683' +⏱️ Add operation took: 945 ms +--------------------------------------------------- +[Step 353/500] Processing... +Adding data: 'Test entry #353: Microservices YAML related data with random seed 3068' +⏱️ Add operation took: 914 ms +--------------------------------------------------- +[Step 354/500] Processing... +Adding data: 'Test entry #354: Load-balancing Event-driven related data with random seed 18844' +⏱️ Add operation took: 962 ms +--------------------------------------------------- +[Step 355/500] Processing... +Adding data: 'Test entry #355: Throughput Sharding related data with random seed 6245' +⏱️ Add operation took: 936 ms +--------------------------------------------------- +[Step 356/500] Processing... +Adding data: 'Test entry #356: NoSQL Encryption related data with random seed 18580' +⏱️ Add operation took: 930 ms +--------------------------------------------------- +[Step 357/500] Processing... +Adding data: 'Test entry #357: Monitoring Library related data with random seed 91' +⏱️ Add operation took: 898 ms +--------------------------------------------------- +[Step 358/500] Processing... +Adding data: 'Test entry #358: Index Stubbing related data with random seed 6263' +⏱️ Add operation took: 941 ms +--------------------------------------------------- +[Step 359/500] Processing... +Adding data: 'Test entry #359: Encryption Alerting related data with random seed 12354' +⏱️ Add operation took: 902 ms +--------------------------------------------------- +[Step 360/500] Processing... +Adding data: 'Test entry #360: GUI Blockchain related data with random seed 10277' +⏱️ Add operation took: 940 ms +--------------------------------------------------- +[Step 361/500] Processing... +Adding data: 'Test entry #361: Merging Logging related data with random seed 12323' +⏱️ Add operation took: 1001 ms +--------------------------------------------------- +[Step 362/500] Processing... +Adding data: 'Test entry #362: Pub-sub SQL related data with random seed 11044' +⏱️ Add operation took: 883 ms +--------------------------------------------------- +[Step 363/500] Processing... +Adding data: 'Test entry #363: Tutorial Template related data with random seed 30949' +⏱️ Add operation took: 1016 ms +--------------------------------------------------- +[Step 364/500] Processing... +Adding data: 'Test entry #364: Database SDK related data with random seed 22809' +⏱️ Add operation took: 946 ms +--------------------------------------------------- +[Step 365/500] Processing... +Adding data: 'Test entry #365: Framework ERD related data with random seed 21870' +⏱️ Add operation took: 929 ms +--------------------------------------------------- +[Step 366/500] Processing... +Adding data: 'Test entry #366: Sample Database related data with random seed 18353' +⏱️ Add operation took: 982 ms +--------------------------------------------------- +[Step 367/500] Processing... +Adding data: 'Test entry #367: SDK Recovery related data with random seed 20345' +⏱️ Add operation took: 891 ms +--------------------------------------------------- +[Step 368/500] Processing... +Adding data: 'Test entry #368: Stubbing Resilient related data with random seed 618' +⏱️ Add operation took: 849 ms +--------------------------------------------------- +[Step 369/500] Processing... +Adding data: 'Test entry #369: Replication CI/CD related data with random seed 23286' +⏱️ Add operation took: 917 ms +--------------------------------------------------- +[Step 370/500] Processing... +Adding data: 'Test entry #370: Latency Library related data with random seed 12394' +⏱️ Add operation took: 1104 ms +--------------------------------------------------- +[Step 371/500] Processing... +Adding data: 'Test entry #371: Code-review Alerting related data with random seed 6709' +⏱️ Add operation took: 1033 ms +--------------------------------------------------- +[Step 372/500] Processing... +Adding data: 'Test entry #372: Best-practices Search related data with random seed 17128' +⏱️ Add operation took: 1095 ms +--------------------------------------------------- +[Step 373/500] Processing... +Adding data: 'Test entry #373: NoSQL Best-practices related data with random seed 20318' +⏱️ Add operation took: 969 ms +--------------------------------------------------- +[Step 374/500] Processing... +Adding data: 'Test entry #374: Replication Example related data with random seed 6291' +⏱️ Add operation took: 1132 ms +--------------------------------------------------- +[Step 375/500] Processing... +Adding data: 'Test entry #375: Integration-test Branching related data with random seed 18965' +⏱️ Add operation took: 1010 ms +--------------------------------------------------- +[Step 376/500] Processing... +Adding data: 'Test entry #376: Scalability Integration-test related data with random seed 12414' +⏱️ Add operation took: 1095 ms +--------------------------------------------------- +[Step 377/500] Processing... +Adding data: 'Test entry #377: UML GUI related data with random seed 13474' +⏱️ Add operation took: 912 ms +--------------------------------------------------- +[Step 378/500] Processing... +Adding data: 'Test entry #378: ERD Parallelism related data with random seed 25512' +⏱️ Add operation took: 948 ms +--------------------------------------------------- +[Step 379/500] Processing... +Adding data: 'Test entry #379: Code-review ERD related data with random seed 18047' +⏱️ Add operation took: 978 ms +--------------------------------------------------- +[Step 380/500] Processing... +Adding data: 'Test entry #380: CI/CD Replication related data with random seed 744' +⏱️ Add operation took: 896 ms +--------------------------------------------------- +[Step 381/500] Processing... +Adding data: 'Test entry #381: Python Pub-sub related data with random seed 29514' +⏱️ Add operation took: 1028 ms +--------------------------------------------------- +[Step 382/500] Processing... +Adding data: 'Test entry #382: Encryption Algorithm related data with random seed 9600' +⏱️ Add operation took: 1108 ms +--------------------------------------------------- +[Step 383/500] Processing... +Adding data: 'Test entry #383: Python Resilient related data with random seed 20296' +⏱️ Add operation took: 939 ms +--------------------------------------------------- +[Step 384/500] Processing... +Adding data: 'Test entry #384: Branching Sample related data with random seed 29822' +⏱️ Add operation took: 1025 ms +--------------------------------------------------- +[Step 385/500] Processing... +Adding data: 'Test entry #385: ORM Vector related data with random seed 277' +⏱️ Add operation took: 962 ms +--------------------------------------------------- +[Step 386/500] Processing... +Adding data: 'Test entry #386: Framework Testing related data with random seed 14924' +⏱️ Add operation took: 939 ms +--------------------------------------------------- +[Step 387/500] Processing... +Adding data: 'Test entry #387: Index Debugging related data with random seed 29125' +⏱️ Add operation took: 1020 ms +--------------------------------------------------- +[Step 388/500] Processing... +Adding data: 'Test entry #388: NoSQL SDK related data with random seed 18178' +⏱️ Add operation took: 932 ms +--------------------------------------------------- +[Step 389/500] Processing... +Adding data: 'Test entry #389: Data-structure JSON related data with random seed 23144' +⏱️ Add operation took: 938 ms +--------------------------------------------------- +[Step 390/500] Processing... +Adding data: 'Test entry #390: Documentation Diagram related data with random seed 8258' +⏱️ Add operation took: 1139 ms +--------------------------------------------------- +[Step 391/500] Processing... +Adding data: 'Test entry #391: CSV Resilient related data with random seed 1245' +⏱️ Add operation took: 921 ms +--------------------------------------------------- +[Step 392/500] Processing... +Adding data: 'Test entry #392: Algorithm CSV related data with random seed 18964' +⏱️ Add operation took: 978 ms +--------------------------------------------------- +[Step 393/500] Processing... +Adding data: 'Test entry #393: Scalability Load-balancing related data with random seed 32178' +⏱️ Add operation took: 1182 ms +--------------------------------------------------- +[Step 394/500] Processing... +Adding data: 'Test entry #394: Sharding Performance related data with random seed 1607' +⏱️ Add operation took: 1032 ms +--------------------------------------------------- +[Step 395/500] Processing... +Adding data: 'Test entry #395: YAML Threading related data with random seed 20711' +⏱️ Add operation took: 1062 ms +--------------------------------------------------- +[Step 396/500] Processing... +Adding data: 'Test entry #396: Sample Message-queue related data with random seed 1074' +⏱️ Add operation took: 942 ms +--------------------------------------------------- +[Step 397/500] Processing... +Adding data: 'Test entry #397: Search Fault-tolerance related data with random seed 20761' +⏱️ Add operation took: 1074 ms +--------------------------------------------------- +[Step 398/500] Processing... +Adding data: 'Test entry #398: Backup Node related data with random seed 21714' +⏱️ Add operation took: 960 ms +--------------------------------------------------- +[Step 399/500] Processing... +Adding data: 'Test entry #399: Logging Encryption related data with random seed 25211' +⏱️ Add operation took: 1068 ms +--------------------------------------------------- +[Step 400/500] Processing... +Adding data: 'Test entry #400: GUI Container related data with random seed 26968' +⏱️ Add operation took: 892 ms + +📊 [BATCH REPORT] Items 351 to 400 + -> Average Latency: 983 ms + +--------------------------------------------------- +[Step 401/500] Processing... +Adding data: 'Test entry #401: Asynchronous ORM related data with random seed 14345' +⏱️ Add operation took: 1042 ms +--------------------------------------------------- +[Step 402/500] Processing... +Adding data: 'Test entry #402: UX Load-balancing related data with random seed 5781' +⏱️ Add operation took: 986 ms +--------------------------------------------------- +[Step 403/500] Processing... +Adding data: 'Test entry #403: Optimization Code-review related data with random seed 25707' +⏱️ Add operation took: 1148 ms +--------------------------------------------------- +[Step 404/500] Processing... +Adding data: 'Test entry #404: Unit-test Fault-tolerance related data with random seed 25177' +⏱️ Add operation took: 1266 ms +--------------------------------------------------- +[Step 405/500] Processing... +Adding data: 'Test entry #405: Throughput Concurrency related data with random seed 5148' +⏱️ Add operation took: 1073 ms +--------------------------------------------------- +[Step 406/500] Processing... +Adding data: 'Test entry #406: Debugging Stubbing related data with random seed 28773' +⏱️ Add operation took: 1128 ms +--------------------------------------------------- +[Step 407/500] Processing... +Adding data: 'Test entry #407: Recovery Blockchain related data with random seed 694' +⏱️ Add operation took: 949 ms +--------------------------------------------------- +[Step 408/500] Processing... +Adding data: 'Test entry #408: CI/CD Search related data with random seed 26877' +⏱️ Add operation took: 991 ms +--------------------------------------------------- +[Step 409/500] Processing... +Adding data: 'Test entry #409: SQL Mocking related data with random seed 18821' +⏱️ Add operation took: 978 ms +--------------------------------------------------- +[Step 410/500] Processing... +Adding data: 'Test entry #410: Backup Parallelism related data with random seed 5421' +⏱️ Add operation took: 2301 ms +--------------------------------------------------- +[Step 411/500] Processing... +Adding data: 'Test entry #411: Security Synchronous related data with random seed 11940' +⏱️ Add operation took: 1003 ms +--------------------------------------------------- +[Step 412/500] Processing... +Adding data: 'Test entry #412: YAML Orchestration related data with random seed 30910' +⏱️ Add operation took: 1191 ms +--------------------------------------------------- +[Step 413/500] Processing... +Adding data: 'Test entry #413: JSON Alerting related data with random seed 8852' +⏱️ Add operation took: 1177 ms +--------------------------------------------------- +[Step 414/500] Processing... +Adding data: 'Test entry #414: GraphQL E2E-test related data with random seed 14354' +⏱️ Add operation took: 1049 ms +--------------------------------------------------- +[Step 415/500] Processing... +Adding data: 'Test entry #415: Threading Algorithm related data with random seed 8994' +⏱️ Add operation took: 1118 ms +--------------------------------------------------- +[Step 416/500] Processing... +Adding data: 'Test entry #416: Scalability Unit-test related data with random seed 5162' +⏱️ Add operation took: 913 ms +--------------------------------------------------- +[Step 417/500] Processing... +Adding data: 'Test entry #417: Event-driven Merging related data with random seed 13881' +⏱️ Add operation took: 951 ms +--------------------------------------------------- +[Step 418/500] Processing... +Adding data: 'Test entry #418: Pub-sub API related data with random seed 23035' +⏱️ Add operation took: 1148 ms +--------------------------------------------------- +[Step 419/500] Processing... +Adding data: 'Test entry #419: Monitoring Alerting related data with random seed 23527' +⏱️ Add operation took: 990 ms +--------------------------------------------------- +[Step 420/500] Processing... +Adding data: 'Test entry #420: JSON Microservices related data with random seed 9271' +⏱️ Add operation took: 1181 ms +--------------------------------------------------- +[Step 421/500] Processing... +Adding data: 'Test entry #421: Git Sample related data with random seed 22798' +⏱️ Add operation took: 1005 ms +--------------------------------------------------- +[Step 422/500] Processing... +Adding data: 'Test entry #422: Unit-test Example related data with random seed 16994' +⏱️ Add operation took: 1024 ms +--------------------------------------------------- +[Step 423/500] Processing... +Adding data: 'Test entry #423: Diagram RESTful related data with random seed 14813' +⏱️ Add operation took: 1163 ms +--------------------------------------------------- +[Step 424/500] Processing... +Adding data: 'Test entry #424: Pull-request YAML related data with random seed 9732' +⏱️ Add operation took: 949 ms +--------------------------------------------------- +[Step 425/500] Processing... +Adding data: 'Test entry #425: CLI Data-structure related data with random seed 7062' +⏱️ Add operation took: 935 ms +--------------------------------------------------- +[Step 426/500] Processing... +Adding data: 'Test entry #426: Database Database related data with random seed 19208' +⏱️ Add operation took: 968 ms +--------------------------------------------------- +[Step 427/500] Processing... +Adding data: 'Test entry #427: GUI Template related data with random seed 25503' +⏱️ Add operation took: 1159 ms +--------------------------------------------------- +[Step 428/500] Processing... +Adding data: 'Test entry #428: Fault-tolerance ORM related data with random seed 17392' +⏱️ Add operation took: 1043 ms +--------------------------------------------------- +[Step 429/500] Processing... +Adding data: 'Test entry #429: Version-control Code-review related data with random seed 14304' +⏱️ Add operation took: 1144 ms +--------------------------------------------------- +[Step 430/500] Processing... +Adding data: 'Test entry #430: Recovery Diagram related data with random seed 24192' +⏱️ Add operation took: 1024 ms +--------------------------------------------------- +[Step 431/500] Processing... +Adding data: 'Test entry #431: SQL Version-control related data with random seed 505' +⏱️ Add operation took: 1143 ms +--------------------------------------------------- +[Step 432/500] Processing... +Adding data: 'Test entry #432: Parallelism Architecture related data with random seed 13977' +⏱️ Add operation took: 963 ms +--------------------------------------------------- +[Step 433/500] Processing... +Adding data: 'Test entry #433: Vector E2E-test related data with random seed 22356' +⏱️ Add operation took: 1033 ms +--------------------------------------------------- +[Step 434/500] Processing... +Adding data: 'Test entry #434: Tutorial Backup related data with random seed 6053' +⏱️ Add operation took: 951 ms +--------------------------------------------------- +[Step 435/500] Processing... +Adding data: 'Test entry #435: Architecture CSV related data with random seed 22129' +⏱️ Add operation took: 917 ms +--------------------------------------------------- +[Step 436/500] Processing... +Adding data: 'Test entry #436: Example Branching related data with random seed 25763' +⏱️ Add operation took: 967 ms +--------------------------------------------------- +[Step 437/500] Processing... +Adding data: 'Test entry #437: Microservices Optimization related data with random seed 8916' +⏱️ Add operation took: 1014 ms +--------------------------------------------------- +[Step 438/500] Processing... +Adding data: 'Test entry #438: Pull-request API related data with random seed 19781' +⏱️ Add operation took: 929 ms +--------------------------------------------------- +[Step 439/500] Processing... +Adding data: 'Test entry #439: CSV Library related data with random seed 9941' +⏱️ Add operation took: 1002 ms +--------------------------------------------------- +[Step 440/500] Processing... +Adding data: 'Test entry #440: Index Git related data with random seed 8279' +⏱️ Add operation took: 923 ms +--------------------------------------------------- +[Step 441/500] Processing... +Adding data: 'Test entry #441: Code-review Concurrency related data with random seed 21085' +⏱️ Add operation took: 883 ms +--------------------------------------------------- +[Step 442/500] Processing... +Adding data: 'Test entry #442: Unit-test Framework related data with random seed 4836' +⏱️ Add operation took: 951 ms +--------------------------------------------------- +[Step 443/500] Processing... +Adding data: 'Test entry #443: Threading Boilerplate related data with random seed 23641' +⏱️ Add operation took: 909 ms +--------------------------------------------------- +[Step 444/500] Processing... +Adding data: 'Test entry #444: Vector Debugging related data with random seed 5646' +⏱️ Add operation took: 950 ms +--------------------------------------------------- +[Step 445/500] Processing... +Adding data: 'Test entry #445: Message-queue Consensus related data with random seed 22969' +⏱️ Add operation took: 935 ms +--------------------------------------------------- +[Step 446/500] Processing... +Adding data: 'Test entry #446: Container RESTful related data with random seed 10831' +⏱️ Add operation took: 945 ms +--------------------------------------------------- +[Step 447/500] Processing... +Adding data: 'Test entry #447: Message-queue Unit-test related data with random seed 12353' +⏱️ Add operation took: 867 ms +--------------------------------------------------- +[Step 448/500] Processing... +Adding data: 'Test entry #448: Tutorial Unit-test related data with random seed 7829' +⏱️ Add operation took: 900 ms +--------------------------------------------------- +[Step 449/500] Processing... +Adding data: 'Test entry #449: UI Load-balancing related data with random seed 29998' +⏱️ Add operation took: 972 ms +--------------------------------------------------- +[Step 450/500] Processing... +Adding data: 'Test entry #450: CI/CD Data-structure related data with random seed 9170' +⏱️ Add operation took: 1303 ms + +📊 [BATCH REPORT] Items 401 to 450 + -> Average Latency: 1049 ms + +--------------------------------------------------- +[Step 451/500] Processing... +Adding data: 'Test entry #451: Library Performance related data with random seed 31802' +⏱️ Add operation took: 1022 ms +--------------------------------------------------- +[Step 452/500] Processing... +Adding data: 'Test entry #452: Event-driven Performance related data with random seed 26374' +⏱️ Add operation took: 1128 ms +--------------------------------------------------- +[Step 453/500] Processing... +Adding data: 'Test entry #453: CSV Profiling related data with random seed 24443' +⏱️ Add operation took: 959 ms +--------------------------------------------------- +[Step 454/500] Processing... +Adding data: 'Test entry #454: CSV Best-practices related data with random seed 21695' +⏱️ Add operation took: 988 ms +--------------------------------------------------- +[Step 455/500] Processing... +Adding data: 'Test entry #455: E2E-test Integration-test related data with random seed 4838' +⏱️ Add operation took: 1069 ms +--------------------------------------------------- +[Step 456/500] Processing... +Adding data: 'Test entry #456: Sample Merging related data with random seed 5260' +⏱️ Add operation took: 1041 ms +--------------------------------------------------- +[Step 457/500] Processing... +Adding data: 'Test entry #457: XML Recovery related data with random seed 2790' +⏱️ Add operation took: 1339 ms +--------------------------------------------------- +[Step 458/500] Processing... +Adding data: 'Test entry #458: Encryption CSV related data with random seed 22642' +⏱️ Add operation took: 1015 ms +--------------------------------------------------- +[Step 459/500] Processing... +Adding data: 'Test entry #459: CSV YAML related data with random seed 8225' +⏱️ Add operation took: 1058 ms +--------------------------------------------------- +[Step 460/500] Processing... +Adding data: 'Test entry #460: Consensus YAML related data with random seed 16140' +⏱️ Add operation took: 987 ms +--------------------------------------------------- +[Step 461/500] Processing... +Adding data: 'Test entry #461: Vector Pub-sub related data with random seed 31094' +⏱️ Add operation took: 1127 ms +--------------------------------------------------- +[Step 462/500] Processing... +Adding data: 'Test entry #462: Best-practices CLI related data with random seed 19922' +⏱️ Add operation took: 1398 ms +--------------------------------------------------- +[Step 463/500] Processing... +Adding data: 'Test entry #463: Microservices UX related data with random seed 32421' +⏱️ Add operation took: 1211 ms +--------------------------------------------------- +[Step 464/500] Processing... +Adding data: 'Test entry #464: Example Integration-test related data with random seed 5874' +⏱️ Add operation took: 1174 ms +--------------------------------------------------- +[Step 465/500] Processing... +Adding data: 'Test entry #465: Encryption Vector related data with random seed 11764' +⏱️ Add operation took: 1069 ms +--------------------------------------------------- +[Step 466/500] Processing... +Adding data: 'Test entry #466: UML Optimization related data with random seed 21437' +⏱️ Add operation took: 1144 ms +--------------------------------------------------- +[Step 467/500] Processing... +Adding data: 'Test entry #467: Scalability ERD related data with random seed 14991' +⏱️ Add operation took: 1017 ms +--------------------------------------------------- +[Step 468/500] Processing... +Adding data: 'Test entry #468: Documentation Monitoring related data with random seed 13294' +⏱️ Add operation took: 1181 ms +--------------------------------------------------- +[Step 469/500] Processing... +Adding data: 'Test entry #469: Branching GraphQL related data with random seed 16900' +⏱️ Add operation took: 1014 ms +--------------------------------------------------- +[Step 470/500] Processing... +Adding data: 'Test entry #470: Consensus Replication related data with random seed 9153' +⏱️ Add operation took: 1107 ms +--------------------------------------------------- +[Step 471/500] Processing... +Adding data: 'Test entry #471: Recovery Version-control related data with random seed 2051' +⏱️ Add operation took: 921 ms +--------------------------------------------------- +[Step 472/500] Processing... +Adding data: 'Test entry #472: Library CI/CD related data with random seed 18363' +⏱️ Add operation took: 1030 ms +--------------------------------------------------- +[Step 473/500] Processing... +Adding data: 'Test entry #473: Index Python related data with random seed 3534' +⏱️ Add operation took: 922 ms +--------------------------------------------------- +[Step 474/500] Processing... +Adding data: 'Test entry #474: UML Latency related data with random seed 18011' +⏱️ Add operation took: 952 ms +--------------------------------------------------- +[Step 475/500] Processing... +Adding data: 'Test entry #475: Fault-tolerance XML related data with random seed 13158' +⏱️ Add operation took: 956 ms +--------------------------------------------------- +[Step 476/500] Processing... +Adding data: 'Test entry #476: Logging API related data with random seed 8268' +⏱️ Add operation took: 986 ms +--------------------------------------------------- +[Step 477/500] Processing... +Adding data: 'Test entry #477: Unit-test CLI related data with random seed 17091' +⏱️ Add operation took: 922 ms +--------------------------------------------------- +[Step 478/500] Processing... +Adding data: 'Test entry #478: Code-review Index related data with random seed 24809' +⏱️ Add operation took: 962 ms +--------------------------------------------------- +[Step 479/500] Processing... +Adding data: 'Test entry #479: CI/CD Data-structure related data with random seed 13652' +⏱️ Add operation took: 1025 ms +--------------------------------------------------- +[Step 480/500] Processing... +Adding data: 'Test entry #480: Encryption Version-control related data with random seed 3281' +⏱️ Add operation took: 931 ms +--------------------------------------------------- +[Step 481/500] Processing... +Adding data: 'Test entry #481: Tutorial Throughput related data with random seed 11888' +⏱️ Add operation took: 996 ms +--------------------------------------------------- +[Step 482/500] Processing... +Adding data: 'Test entry #482: Code-review RESTful related data with random seed 18863' +⏱️ Add operation took: 986 ms +--------------------------------------------------- +[Step 483/500] Processing... +Adding data: 'Test entry #483: Orchestration Microservices related data with random seed 31494' +⏱️ Add operation took: 955 ms +--------------------------------------------------- +[Step 484/500] Processing... +Adding data: 'Test entry #484: Version-control Recovery related data with random seed 20954' +⏱️ Add operation took: 983 ms +--------------------------------------------------- +[Step 485/500] Processing... +Adding data: 'Test entry #485: Integration-test Pub-sub related data with random seed 29641' +⏱️ Add operation took: 936 ms +--------------------------------------------------- +[Step 486/500] Processing... +Adding data: 'Test entry #486: Library Example related data with random seed 9207' +⏱️ Add operation took: 955 ms +--------------------------------------------------- +[Step 487/500] Processing... +Adding data: 'Test entry #487: XML Best-practices related data with random seed 19330' +⏱️ Add operation took: 1035 ms +--------------------------------------------------- +[Step 488/500] Processing... +Adding data: 'Test entry #488: Pull-request Git related data with random seed 13085' +⏱️ Add operation took: 1023 ms +--------------------------------------------------- +[Step 489/500] Processing... +Adding data: 'Test entry #489: Load-balancing CI/CD related data with random seed 9733' +⏱️ Add operation took: 997 ms +--------------------------------------------------- +[Step 490/500] Processing... +Adding data: 'Test entry #490: Threading Resilient related data with random seed 825' +⏱️ Add operation took: 948 ms +--------------------------------------------------- +[Step 491/500] Processing... +Adding data: 'Test entry #491: Framework Mocking related data with random seed 10031' +⏱️ Add operation took: 925 ms +--------------------------------------------------- +[Step 492/500] Processing... +Adding data: 'Test entry #492: Testing CLI related data with random seed 20014' +⏱️ Add operation took: 929 ms +--------------------------------------------------- +[Step 493/500] Processing... +Adding data: 'Test entry #493: Stubbing Recovery related data with random seed 28424' +⏱️ Add operation took: 1007 ms +--------------------------------------------------- +[Step 494/500] Processing... +Adding data: 'Test entry #494: Monitoring Threading related data with random seed 28404' +⏱️ Add operation took: 917 ms +--------------------------------------------------- +[Step 495/500] Processing... +Adding data: 'Test entry #495: Best-practices Flowchart related data with random seed 507' +⏱️ Add operation took: 890 ms +--------------------------------------------------- +[Step 496/500] Processing... +Adding data: 'Test entry #496: Library Database related data with random seed 13646' +⏱️ Add operation took: 969 ms +--------------------------------------------------- +[Step 497/500] Processing... +Adding data: 'Test entry #497: Cloud Profiling related data with random seed 11981' +⏱️ Add operation took: 928 ms +--------------------------------------------------- +[Step 498/500] Processing... +Adding data: 'Test entry #498: Design-patterns Python related data with random seed 16866' +⏱️ Add operation took: 947 ms +--------------------------------------------------- +[Step 499/500] Processing... +Adding data: 'Test entry #499: Vector CLI related data with random seed 18419' +⏱️ Add operation took: 1037 ms +--------------------------------------------------- +[Step 500/500] Processing... +Adding data: 'Test entry #500: CSV Replication related data with random seed 2550' +⏱️ Add operation took: 967 ms + +📊 [BATCH REPORT] Items 451 to 500 + -> Average Latency: 1019 ms + +=================================================== +🎉 Congratulations! The system survived the stress test. + Total time spent on 'add' operations: 480736 ms + Overall Average Latency: 961 ms +=================================================== diff --git a/ecosystem/sdk/vector-indexing/stress_test_resultsCSV.csv b/ecosystem/sdk/vector-indexing/stress_test_resultsCSV.csv new file mode 100644 index 000000000..1be4404cd --- /dev/null +++ b/ecosystem/sdk/vector-indexing/stress_test_resultsCSV.csv @@ -0,0 +1,501 @@ +Step Number,Operation Time (ms),First Entry Similarity (%) +1,1003,39.07 +2,931,37.71 +3,932,38.74 +4,1161,37.71 +5,1206,43.56 +6,1141,39.10 +7,1189,26.09 +8,1217,33.71 +9,1316,42.09 +10,1028,42.32 +11,1045,46.54 +12,960,60.86 +13,1139,44.96 +14,990,60.75 +15,1055,39.36 +16,986,55.54 +17,944,54.77 +18,996,38.26 +19,945,41.22 +20,995,43.17 +21,1036,43.92 +22,1097,47.53 +23,896,32.60 +24,965,51.72 +25,982,51.53 +26,990,56.85 +27,944,51.74 +28,947,41.46 +29,952,21.02 +30,890,60.75 +31,1150,52.57 +32,993,31.85 +33,951,46.79 +34,991,61.95 +35,947,56.85 +36,1050,51.51 +37,992,43.17 +38,950,63.80 +39,945,21.02 +40,946,51.61 +41,956,43.17 +42,1098,49.22 +43,982,26.09 +44,952,54.77 +45,938,62.26 +46,956,41.22 +47,952,45.07 +48,990,44.10 +49,892,36.09 +50,1000,53.13 +51,976,39.36 +52,962,28.20 +53,984,63.49 +54,997,46.82 +55,1063,31.85 +56,1066,41.22 +57,1066,62.26 +58,981,44.63 +59,1009,44.10 +60,932,41.51 +61,992,54.14 +62,1001,41.94 +63,946,42.32 +64,961,54.70 +65,988,63.49 +66,987,40.50 +67,1009,47.50 +68,934,53.76 +69,999,51.87 +70,950,45.07 +71,989,45.07 +72,950,51.74 +73,988,46.60 +74,954,55.00 +75,1049,51.87 +76,940,41.64 +77,1150,55.54 +78,999,62.26 +79,940,52.57 +80,1002,63.49 +81,1008,54.14 +82,979,51.74 +83,948,51.87 +84,948,46.54 +85,893,21.02 +86,943,39.68 +87,947,58.76 +88,1024,39.07 +89,970,55.54 +90,956,39.68 +91,933,36.09 +92,897,57.23 +93,945,36.09 +94,995,53.76 +95,949,54.70 +96,943,63.40 +97,949,49.22 +98,1021,51.53 +99,1028,39.36 +100,1247,44.83 +101,1028,46.79 +102,1038,58.76 +103,999,38.74 +104,990,48.81 +105,1039,62.26 +106,993,63.80 +107,992,55.00 +108,949,49.22 +109,1054,51.54 +110,1093,56.85 +111,1139,53.76 +112,994,37.71 +113,897,49.22 +114,999,40.50 +115,946,42.32 +116,996,51.61 +117,952,55.54 +118,947,65.81 +119,946,63.80 +120,948,51.72 +121,947,46.27 +122,1009,41.64 +123,1084,39.07 +124,998,47.53 +125,1173,43.89 +126,1123,54.77 +127,1035,31.85 +128,1051,58.76 +129,1050,47.53 +130,1102,44.96 +131,1034,52.57 +132,1002,41.94 +133,1038,41.22 +134,1096,51.87 +135,1086,54.77 +136,1041,57.25 +137,1005,63.80 +138,987,43.56 +139,999,58.60 +140,992,51.00 +141,948,62.26 +142,1082,62.09 +143,1049,39.10 +144,1042,52.57 +145,994,54.77 +146,1030,39.36 +147,1020,32.60 +148,1047,43.17 +149,1040,61.95 +150,1041,53.76 +151,1049,42.09 +152,980,47.12 +153,999,37.24 +154,1039,41.94 +155,1052,41.94 +156,1029,46.79 +157,905,43.17 +158,1037,42.09 +159,999,26.09 +160,1092,38.66 +161,988,26.09 +162,846,65.81 +163,951,67.68 +164,1043,51.53 +165,894,44.83 +166,901,39.28 +167,989,42.09 +168,902,43.56 +169,898,40.50 +170,892,61.95 +171,845,39.10 +172,997,44.10 +173,943,39.68 +174,1055,62.09 +175,1010,51.72 +176,976,51.00 +177,894,46.27 +178,901,43.08 +179,941,38.66 +180,946,46.60 +181,954,51.51 +182,954,43.89 +183,897,44.10 +184,925,51.87 +185,949,56.68 +186,946,54.14 +187,952,37.37 +188,892,57.23 +189,941,39.36 +190,952,48.81 +191,937,56.85 +192,956,46.79 +193,980,62.09 +194,896,31.85 +195,898,39.28 +196,893,58.60 +197,899,42.09 +198,893,46.27 +199,955,46.82 +200,1003,37.37 +201,1019,43.92 +202,959,51.74 +203,898,54.77 +204,917,39.10 +205,933,47.12 +206,1008,46.54 +207,978,52.37 +208,897,37.37 +209,903,47.50 +210,998,39.36 +211,980,43.89 +212,899,33.71 +213,895,61.95 +214,952,41.94 +215,890,46.60 +216,949,58.76 +217,897,39.28 +218,893,38.74 +219,912,63.40 +220,931,62.09 +221,1116,47.89 +222,1030,31.85 +223,896,39.28 +224,844,43.17 +225,900,43.89 +226,937,52.57 +227,900,41.46 +228,895,57.25 +229,898,46.54 +230,893,38.74 +231,800,43.92 +232,892,54.70 +233,897,51.72 +234,893,37.71 +235,915,36.09 +236,931,56.85 +237,899,37.37 +238,897,37.37 +239,939,63.49 +240,950,51.61 +241,936,39.10 +242,899,41.22 +243,893,43.56 +244,849,46.79 +245,795,42.32 +246,896,39.68 +247,897,53.76 +248,943,52.57 +249,950,55.00 +250,864,51.53 +251,914,50.19 +252,951,50.19 +253,888,67.68 +254,989,51.51 +255,922,41.46 +256,939,43.08 +257,920,46.82 +258,983,60.86 +259,908,52.57 +260,965,61.95 +261,891,51.51 +262,981,54.43 +263,949,39.10 +264,887,39.28 +265,905,61.95 +266,890,51.72 +267,906,39.10 +268,839,51.00 +269,953,56.85 +270,1044,39.07 +271,882,44.63 +272,853,44.96 +273,887,48.81 +274,869,51.72 +275,927,41.51 +276,1052,41.64 +277,940,44.96 +278,910,46.79 +279,825,38.66 +280,895,50.19 +281,995,41.22 +282,893,39.10 +283,847,40.50 +284,847,51.61 +285,900,28.20 +286,892,37.37 +287,918,39.07 +288,968,41.46 +289,954,52.37 +290,903,51.00 +291,931,51.74 +292,900,56.85 +293,945,67.68 +294,952,47.12 +295,913,54.43 +296,926,33.71 +297,955,37.37 +298,940,46.79 +299,948,61.95 +300,992,54.77 +301,929,63.80 +302,939,46.82 +303,956,46.60 +304,931,47.50 +305,901,38.66 +306,895,43.56 +307,907,55.91 +308,936,37.24 +309,896,46.27 +310,987,41.94 +311,898,51.00 +312,843,44.96 +313,899,54.14 +314,887,47.12 +315,907,33.71 +316,989,67.68 +317,915,41.94 +318,930,52.54 +319,888,63.49 +320,894,53.76 +321,901,53.13 +322,944,44.96 +323,889,57.25 +324,896,43.89 +325,902,51.61 +326,944,52.54 +327,891,63.49 +328,895,47.12 +329,853,38.26 +330,944,31.85 +331,849,39.68 +332,898,52.54 +333,935,53.76 +334,899,43.89 +335,994,43.56 +336,1007,51.72 +337,944,48.75 +338,937,51.72 +339,913,37.37 +340,933,47.89 +341,943,33.71 +342,954,39.68 +343,948,46.79 +344,935,47.50 +345,897,39.28 +346,1001,51.72 +347,997,21.02 +348,939,36.09 +349,898,62.26 +350,946,38.66 +351,879,58.76 +352,945,47.50 +353,946,36.09 +354,944,47.89 +355,894,47.50 +356,894,39.10 +357,896,65.81 +358,903,28.20 +359,944,51.74 +360,841,45.07 +361,906,46.82 +362,944,51.87 +363,896,45.07 +364,916,41.64 +365,923,38.26 +366,907,65.81 +367,942,48.81 +368,895,39.68 +369,953,33.71 +370,905,61.95 +371,978,46.82 +372,1067,60.86 +373,974,38.74 +374,990,46.82 +375,1106,50.19 +376,990,63.49 +377,1094,55.00 +378,950,54.43 +379,891,60.86 +380,972,52.37 +381,977,42.09 +382,944,60.86 +383,948,39.07 +384,996,47.12 +385,943,28.20 +386,902,42.97 +387,881,28.20 +388,958,42.97 +389,951,52.54 +390,1019,54.70 +391,1109,41.46 +392,903,42.32 +393,900,44.63 +394,1087,51.00 +395,946,54.70 +396,903,46.79 +397,891,41.94 +398,1306,51.51 +399,990,46.54 +400,953,50.19 +401,1113,60.75 +402,1049,47.50 +403,991,46.79 +404,904,63.40 +405,936,51.74 +406,896,41.64 +407,881,51.74 +408,916,59.38 +409,882,26.09 +410,961,42.32 +411,934,54.70 +412,1059,43.56 +413,952,43.56 +414,1110,51.51 +415,960,56.68 +416,1001,31.85 +417,992,46.27 +418,947,53.13 +419,946,32.60 +420,951,41.94 +421,950,47.12 +422,941,45.07 +423,946,52.54 +424,943,65.81 +425,956,63.80 +426,991,41.51 +427,944,54.43 +428,948,41.51 +429,1001,42.32 +430,946,40.50 +431,939,51.54 +432,948,41.94 +433,1047,51.00 +434,946,51.53 +435,952,43.17 +436,994,44.96 +437,948,47.50 +438,944,37.71 +439,949,42.32 +440,949,26.09 +441,1003,63.49 +442,941,38.26 +443,943,46.27 +444,968,50.19 +445,978,21.02 +446,948,54.14 +447,946,41.94 +448,1000,62.09 +449,1086,28.20 +450,1066,63.80 +451,970,54.14 +452,995,44.63 +453,1015,53.13 +454,990,43.17 +455,1209,37.37 +456,1025,46.82 +457,990,39.10 +458,948,31.85 +459,957,21.02 +460,993,51.53 +461,987,42.09 +462,944,47.53 +463,1007,43.56 +464,1096,41.94 +465,938,58.76 +466,996,63.80 +467,999,62.09 +468,953,39.68 +469,905,59.38 +470,1001,41.46 +471,928,44.63 +472,960,36.09 +473,882,64.40 +474,945,48.81 +475,946,44.10 +476,1092,55.00 +477,895,63.40 +478,896,44.83 +479,942,50.19 +480,947,67.68 +481,1011,43.17 +482,1028,51.87 +483,910,65.81 +484,894,39.10 +485,956,51.51 +486,893,63.49 +487,931,55.54 +488,959,41.22 +489,932,47.53 +490,904,43.17 +491,851,48.75 +492,1041,63.40 +493,953,41.46 +494,942,38.26 +495,887,51.72 +496,902,51.54 +497,890,38.74 +498,1000,43.17 +499,946,56.68 +500,1020,59.38 From c733d42e537c53ccd420e78852bcf24496fe6575 Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Mon, 8 Dec 2025 10:29:02 +0900 Subject: [PATCH 66/79] fix: Adjust similarity score formatting in response output --- ecosystem/sdk/vector-indexing/kv_vector_library.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ecosystem/sdk/vector-indexing/kv_vector_library.py b/ecosystem/sdk/vector-indexing/kv_vector_library.py index 93d412650..3c37904e4 100644 --- a/ecosystem/sdk/vector-indexing/kv_vector_library.py +++ b/ecosystem/sdk/vector-indexing/kv_vector_library.py @@ -14,8 +14,8 @@ def format_get_responses(results: Any) -> None: # There's probably a better way of telling if there is a score, but thats okay try: score = pairing["score"] - score *= 100 - print(f"{i+1}. {text} // (similarity score: {score:.2f}%)") + ## score *= 100 + print(f"{i+1}. {text} // (similarity score: {score})") except KeyError: print(f"{i+1}. {text}") From bae91c10e6c3e4c1e789a4f7ca8da4871dd722d0 Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Wed, 10 Dec 2025 06:19:00 +0900 Subject: [PATCH 67/79] Add stress test results CSV and remove vector client and proxy scripts - Added a new CSV file containing stress test results for vector indexing operations. - Removed the vector_client.py script which handled command-line interactions for adding, deleting, and searching vectors. - Removed the vector_proxy.py script which provided a Flask API for vector operations. --- ecosystem/README.md | 3 +- .../saved_data/embedding_keys.json | 1 - .../{ => test_codes}/ST_add_csv.py | 0 .../{ => test_codes}/ST_get_csv.py | 0 .../{ => test_codes}/add_csv_to_avg_csv.py | 0 .../{ => test_codes}/averaged_intervals.csv | 0 .../vector-indexing/test_codes/demo_add.sh | 26 +++++ .../{ => test_codes}/get_csv_to_avg_csv.py | 0 .../{ => test_codes}/parsed_log_data.csv | 0 .../{ => test_codes}/stress_test.sh | 0 .../{ => test_codes}/stress_test_add.sh | 0 .../{ => test_codes}/stress_test_get.sh | 0 .../stress_test_get_interval_averages.csv | 0 .../stress_test_get_results.txt | 0 .../{ => test_codes}/stress_test_results.txt | 0 .../stress_test_resultsCSV.csv | 0 .../sdk/vector-indexing/vector_client.py | 83 -------------- ecosystem/sdk/vector-indexing/vector_proxy.py | 105 ------------------ 18 files changed, 28 insertions(+), 190 deletions(-) delete mode 100644 ecosystem/sdk/vector-indexing/saved_data/embedding_keys.json rename ecosystem/sdk/vector-indexing/{ => test_codes}/ST_add_csv.py (100%) rename ecosystem/sdk/vector-indexing/{ => test_codes}/ST_get_csv.py (100%) rename ecosystem/sdk/vector-indexing/{ => test_codes}/add_csv_to_avg_csv.py (100%) rename ecosystem/sdk/vector-indexing/{ => test_codes}/averaged_intervals.csv (100%) create mode 100755 ecosystem/sdk/vector-indexing/test_codes/demo_add.sh rename ecosystem/sdk/vector-indexing/{ => test_codes}/get_csv_to_avg_csv.py (100%) rename ecosystem/sdk/vector-indexing/{ => test_codes}/parsed_log_data.csv (100%) rename ecosystem/sdk/vector-indexing/{ => test_codes}/stress_test.sh (100%) rename ecosystem/sdk/vector-indexing/{ => test_codes}/stress_test_add.sh (100%) rename ecosystem/sdk/vector-indexing/{ => test_codes}/stress_test_get.sh (100%) rename ecosystem/sdk/vector-indexing/{ => test_codes}/stress_test_get_interval_averages.csv (100%) rename ecosystem/sdk/vector-indexing/{ => test_codes}/stress_test_get_results.txt (100%) rename ecosystem/sdk/vector-indexing/{ => test_codes}/stress_test_results.txt (100%) rename ecosystem/sdk/vector-indexing/{ => test_codes}/stress_test_resultsCSV.csv (100%) delete mode 100644 ecosystem/sdk/vector-indexing/vector_client.py delete mode 100644 ecosystem/sdk/vector-indexing/vector_proxy.py diff --git a/ecosystem/README.md b/ecosystem/README.md index cd05368d9..b4c0c1f73 100644 --- a/ecosystem/README.md +++ b/ecosystem/README.md @@ -39,7 +39,8 @@ ecosystem/ ├── sdk/ # Software Development Kits │ ├── rust-sdk/ # Rust SDK │ ├── resvault-sdk/ # ResVault SDK -│ └── resdb-orm/ # Python ORM +│ ├── resdb-orm/ # Python ORM +│ └── vector-indexing/ # Vector indexing for semantic search ├── deployment/ # Deployment and infrastructure │ ├── ansible/ # Ansible playbooks │ └── orbit/ # Orbit deployment tool diff --git a/ecosystem/sdk/vector-indexing/saved_data/embedding_keys.json b/ecosystem/sdk/vector-indexing/saved_data/embedding_keys.json deleted file mode 100644 index 15947f86b..000000000 --- a/ecosystem/sdk/vector-indexing/saved_data/embedding_keys.json +++ /dev/null @@ -1 +0,0 @@ -{"temp_index_txt": "66ae9d430bacb699df63c3a73a98b84a95dee78a8ba17c45539f607be6d81b82", "temp_leann_passages_json": "e9be6fd8ef2892bc914fec6a3ee8d7992dfded8e8432216d03d76c0c0c531be1", "temp_ids_txt": "", "temp_leann_meta_json": "", "temp_leann_passages_txt": ""} \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/ST_add_csv.py b/ecosystem/sdk/vector-indexing/test_codes/ST_add_csv.py similarity index 100% rename from ecosystem/sdk/vector-indexing/ST_add_csv.py rename to ecosystem/sdk/vector-indexing/test_codes/ST_add_csv.py diff --git a/ecosystem/sdk/vector-indexing/ST_get_csv.py b/ecosystem/sdk/vector-indexing/test_codes/ST_get_csv.py similarity index 100% rename from ecosystem/sdk/vector-indexing/ST_get_csv.py rename to ecosystem/sdk/vector-indexing/test_codes/ST_get_csv.py diff --git a/ecosystem/sdk/vector-indexing/add_csv_to_avg_csv.py b/ecosystem/sdk/vector-indexing/test_codes/add_csv_to_avg_csv.py similarity index 100% rename from ecosystem/sdk/vector-indexing/add_csv_to_avg_csv.py rename to ecosystem/sdk/vector-indexing/test_codes/add_csv_to_avg_csv.py diff --git a/ecosystem/sdk/vector-indexing/averaged_intervals.csv b/ecosystem/sdk/vector-indexing/test_codes/averaged_intervals.csv similarity index 100% rename from ecosystem/sdk/vector-indexing/averaged_intervals.csv rename to ecosystem/sdk/vector-indexing/test_codes/averaged_intervals.csv diff --git a/ecosystem/sdk/vector-indexing/test_codes/demo_add.sh b/ecosystem/sdk/vector-indexing/test_codes/demo_add.sh new file mode 100755 index 000000000..ea2b2d511 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/test_codes/demo_add.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +echo "=== Adding 10 demo texts to ResilientDB ===" + +texts=( +"Large language models can generate human-like text and assist with tasks such as summarization, translation, and code generation." +"Photosynthesis allows plants to convert sunlight into chemical energy, producing oxygen as a byproduct." +"Kyoto is known for its ancient temples, traditional wooden houses, and beautiful seasonal landscapes." +"Strong branding helps companies build customer trust and differentiate themselves in competitive markets." +"Regular exercise improves cardiovascular health, increases muscle strength, and reduces stress levels." +"Active learning encourages students to participate, discuss ideas, and apply knowledge rather than passively listen." +"Sourdough bread develops its unique flavor through natural fermentation using wild yeast and lactic acid bacteria." +"Reducing plastic waste requires better recycling systems and increased use of biodegradable materials." +"Impressionist painters focused on capturing light and movement rather than creating precise, realistic details." +"Basketball requires teamwork, quick decision-making, and precise coordination between players on the court." +) + +for text in "${texts[@]}" +do + echo "→ Adding:" + echo " \"$text\"" + python3 kv_vector.py --add "$text" + echo "" +done + +echo "=== Done: All demo texts added ===" diff --git a/ecosystem/sdk/vector-indexing/get_csv_to_avg_csv.py b/ecosystem/sdk/vector-indexing/test_codes/get_csv_to_avg_csv.py similarity index 100% rename from ecosystem/sdk/vector-indexing/get_csv_to_avg_csv.py rename to ecosystem/sdk/vector-indexing/test_codes/get_csv_to_avg_csv.py diff --git a/ecosystem/sdk/vector-indexing/parsed_log_data.csv b/ecosystem/sdk/vector-indexing/test_codes/parsed_log_data.csv similarity index 100% rename from ecosystem/sdk/vector-indexing/parsed_log_data.csv rename to ecosystem/sdk/vector-indexing/test_codes/parsed_log_data.csv diff --git a/ecosystem/sdk/vector-indexing/stress_test.sh b/ecosystem/sdk/vector-indexing/test_codes/stress_test.sh similarity index 100% rename from ecosystem/sdk/vector-indexing/stress_test.sh rename to ecosystem/sdk/vector-indexing/test_codes/stress_test.sh diff --git a/ecosystem/sdk/vector-indexing/stress_test_add.sh b/ecosystem/sdk/vector-indexing/test_codes/stress_test_add.sh similarity index 100% rename from ecosystem/sdk/vector-indexing/stress_test_add.sh rename to ecosystem/sdk/vector-indexing/test_codes/stress_test_add.sh diff --git a/ecosystem/sdk/vector-indexing/stress_test_get.sh b/ecosystem/sdk/vector-indexing/test_codes/stress_test_get.sh similarity index 100% rename from ecosystem/sdk/vector-indexing/stress_test_get.sh rename to ecosystem/sdk/vector-indexing/test_codes/stress_test_get.sh diff --git a/ecosystem/sdk/vector-indexing/stress_test_get_interval_averages.csv b/ecosystem/sdk/vector-indexing/test_codes/stress_test_get_interval_averages.csv similarity index 100% rename from ecosystem/sdk/vector-indexing/stress_test_get_interval_averages.csv rename to ecosystem/sdk/vector-indexing/test_codes/stress_test_get_interval_averages.csv diff --git a/ecosystem/sdk/vector-indexing/stress_test_get_results.txt b/ecosystem/sdk/vector-indexing/test_codes/stress_test_get_results.txt similarity index 100% rename from ecosystem/sdk/vector-indexing/stress_test_get_results.txt rename to ecosystem/sdk/vector-indexing/test_codes/stress_test_get_results.txt diff --git a/ecosystem/sdk/vector-indexing/stress_test_results.txt b/ecosystem/sdk/vector-indexing/test_codes/stress_test_results.txt similarity index 100% rename from ecosystem/sdk/vector-indexing/stress_test_results.txt rename to ecosystem/sdk/vector-indexing/test_codes/stress_test_results.txt diff --git a/ecosystem/sdk/vector-indexing/stress_test_resultsCSV.csv b/ecosystem/sdk/vector-indexing/test_codes/stress_test_resultsCSV.csv similarity index 100% rename from ecosystem/sdk/vector-indexing/stress_test_resultsCSV.csv rename to ecosystem/sdk/vector-indexing/test_codes/stress_test_resultsCSV.csv diff --git a/ecosystem/sdk/vector-indexing/vector_client.py b/ecosystem/sdk/vector-indexing/vector_client.py deleted file mode 100644 index a06590e86..000000000 --- a/ecosystem/sdk/vector-indexing/vector_client.py +++ /dev/null @@ -1,83 +0,0 @@ -# vector_client.py -import argparse -import requests -import sys - -# Proxy Server URL -# Use 'localhost' for local testing -# Use the external IP (e.g., "http://34.xx.xx.xx:5000") for cloud deployment -PROXY_URL = "http://localhost:5000" - -def cmd_add(args): - """Send add request to proxy""" - print(f"Adding value: '{args.value}'...") - try: - resp = requests.post(f"{PROXY_URL}/add", json={"text": args.value}) - data = resp.json() - - if resp.status_code == 200: - print(f"[SUCCESS] {data.get('message')}") - else: - print(f"[ERROR] {data.get('message') or data.get('error')}") - except Exception as e: - print(f"Connection failed: {e}") - -def cmd_delete(args): - """Send delete request to proxy""" - print(f"Deleting value: '{args.value}'...") - try: - resp = requests.post(f"{PROXY_URL}/delete", json={"text": args.value}) - data = resp.json() - - if resp.status_code == 200: - print(f"[SUCCESS] {data.get('message')}") - else: - print(f"[ERROR] {data.get('error')}") - except Exception as e: - print(f"Connection failed: {e}") - -def cmd_search(args): - """Send search request to proxy""" - print(f"Searching for: '{args.value}' (Top {args.k_matches})...") - try: - resp = requests.post(f"{PROXY_URL}/search", json={"value": args.value, "k": args.k_matches}) - data = resp.json() - - if resp.status_code == 200: - results = data.get("results", []) - print(f"\n--- Found {len(results)} results ---") - if not results: - print("No matches found.") - for i, item in enumerate(results, 1): - print(f"{i}. {item['text']} (Score: {item['score']:.4f})") - print("----------------------------") - else: - print(f"[ERROR] {data.get('error')}") - except Exception as e: - print(f"Connection failed: {e}") - -def main(): - parser = argparse.ArgumentParser(description="Vector Search Client") - subparsers = parser.add_subparsers(dest="command", required=True) - - # Add command - p_add = subparsers.add_parser("add") - p_add.add_argument("--value", required=True) - p_add.set_defaults(func=cmd_add) - - # Delete command - p_del = subparsers.add_parser("delete") - p_del.add_argument("--value", required=True) - p_del.set_defaults(func=cmd_delete) - - # Search command - p_search = subparsers.add_parser("search") - p_search.add_argument("--value", required=True) - p_search.add_argument("--k_matches", type=int, default=3) - p_search.set_defaults(func=cmd_search) - - args = parser.parse_args() - args.func(args) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/ecosystem/sdk/vector-indexing/vector_proxy.py b/ecosystem/sdk/vector-indexing/vector_proxy.py deleted file mode 100644 index 36a67f929..000000000 --- a/ecosystem/sdk/vector-indexing/vector_proxy.py +++ /dev/null @@ -1,105 +0,0 @@ -# vector_proxy.py -from flask import Flask, request, jsonify -import subprocess -import sys -import re -import os - -app = Flask(__name__) - -# Get the path of the currently running Python interpreter -# (To ensure scripts run within the same environment/venv) -PYTHON_EXE = sys.executable -CWD = os.path.dirname(os.path.abspath(__file__)) - -def run_script(script_name, args): - """ - Executes a Python script as a subprocess and captures its output. - """ - command = [PYTHON_EXE, script_name] + args - try: - # Run the script and capture stdout/stderr - result = subprocess.run( - command, - capture_output=True, - text=True, - cwd=CWD - ) - - # Check return code (0 means success) - if result.returncode != 0: - return False, result.stderr + "\n" + result.stdout - return True, result.stdout - except Exception as e: - return False, str(e) - -# --- API Endpoints --- - -@app.route('/', methods=['GET']) -def health_check(): - return jsonify({"status": "online", "message": "Vector Indexing Proxy is running"}), 200 - -@app.route('/add', methods=['POST']) -def add_vector(): - text = request.json.get('text') - if not text: - return jsonify({"error": "No text provided"}), 400 - - # Command: python vector_add.py --value "text" - success, output = run_script("vector_add.py", ["--value", text]) - - if success: - return jsonify({"status": "success", "message": "Added successfully", "raw_output": output.strip()}) - else: - # Handle specific errors like duplicates - if "already saved" in output: - return jsonify({"status": "skipped", "message": "Value already exists"}) - return jsonify({"status": "error", "error": output.strip()}), 500 - -@app.route('/delete', methods=['POST']) -def delete_vector(): - text = request.json.get('text') - if not text: - return jsonify({"error": "No text provided"}), 400 - - # Command: python vector_delete.py --value "text" - success, output = run_script("vector_delete.py", ["--value", text]) - - if success: - return jsonify({"status": "success", "message": "Deleted successfully", "raw_output": output.strip()}) - else: - return jsonify({"status": "error", "error": output.strip()}), 500 - -@app.route('/search', methods=['POST']) -def search_vector(): - text = request.json.get('value') - k = str(request.json.get('k', 3)) # Default to top 3 results - if not text: - return jsonify({"error": "No text provided"}), 400 - - # Command: python vector_get.py --value "text" --k_matches K - success, output = run_script("vector_get.py", ["--value", text, "--k_matches", k]) - - if not success: - return jsonify({"status": "error", "error": output.strip()}), 500 - - # Parse the stdout from vector_get.py to create a JSON response - # Expected format example: "1. hello world // (similarity score: 0.1234)" - results = [] - for line in output.splitlines(): - # Regex to extract text and score - match = re.search(r'^\d+\.\s+(.*?)\s+//\s+\(similarity score:\s+([0-9.]+)\)', line) - if match: - results.append({ - "text": match.group(1), - "score": float(match.group(2)) - }) - # Capture other informational lines if necessary - elif line.strip() and "Critical Error" not in line: - pass - - return jsonify({"status": "success", "results": results}) - -if __name__ == '__main__': - # Run on port 5000, accessible externally - app.run(host='0.0.0.0', port=5000) \ No newline at end of file From b19ecbf7e26ef289cf98d92bb5a6bede29114337 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Tue, 9 Dec 2025 14:33:35 -0800 Subject: [PATCH 68/79] Add README for Vector Indexing SDK This README provides an overview of the Vector Indexing SDK, including architecture, prerequisites, installation instructions, and usage examples for the CLI tool. --- ecosystem/sdk/vector-indexing/README.md | 66 +++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 ecosystem/sdk/vector-indexing/README.md diff --git a/ecosystem/sdk/vector-indexing/README.md b/ecosystem/sdk/vector-indexing/README.md new file mode 100644 index 000000000..36c84d7b8 --- /dev/null +++ b/ecosystem/sdk/vector-indexing/README.md @@ -0,0 +1,66 @@ +# Vector Indexing SDK for ResilientDB +This directory contains a Python SDK for performing vector indexing and similarity search using ResilientDB as the storage backend. + +The primary interface for users is the ```kv_vector.py``` CLI tool, which interacts with the ResilientDB GraphQL service to manage vector embeddings. + +## Architecture +- ```kv_vector.py```: The CLI frontend. It sends GraphQL mutations and queries to the proxy. +- ```kv_vector_library.py```: Handles the HTTP requests to the GraphQL endpoint. +### Backend Scripts +- ```vector_add.py```, ```vector_get.py```, ```vector_delete.py```: These scripts reside on the server side (or strictly connected environment) to handle embedding generation (via SentenceTransformers) and HNSW index management. + +## Prerequisites +Before using this SDK, please ensure the entire ResilientDB stack is up and running. Specifically, you need: +1. ResilientDB KV Store: The core blockchain storage service must be running. [How to Setup](https://github.com/apache/incubator-resilientdb) +2. GraphQL Server (```ecosystem/graphql```): The backend service handling GraphQL schemas and resolvers. [How to Setup](https://github.com/apache/incubator-resilientdb/tree/master/ecosystem/graphql) +3. GraphQL Application (```ecosystem/graphql/app.py```): The Python web server (Ariadne/Flask) that exposes the GraphQL endpoint. [How to Setup](https://github.com/apache/incubator-resilientdb/tree/master/ecosystem/graphql) + - Default Endpoint: http://127.0.0.1:8000/graphql +4. In a terminal where the current directory is ecosystem/sdk/vector-indexing, activate the GraphQL virtual environment. + +## Installation +Install the required Python dependencies: +``` +pip install requests pyyaml numpy hnswlib sentence-transformers +``` + +## Quick Start: Demo Data +A shell script is provided to quickly populate the database with sample data for testing purposes. This is the fastest way to verify your environment is set up correctly. +1. Make sure you are in the ```ecosystem/sdk/vector-indexing``` directory. +2. Run the demo script: + ``` + bash demo_add.sh + ``` + **What this does:** The script iterates through a predefined list of sentences (covering topics like biology, sports, and art) and adds them to the ResilientDB vector index one by one using ```kv_vector.py```. + +## Usage (CLI) +The ```kv_vector.py``` script is the main entry point. It allows you to add text (which is automatically vectorized), search for similar text, and manage records via the GraphQL endpoint. + +### 1. Adding Data +To add a text string. This will generate an embedding and store it in ResilientDB. +``` +python3 kv_vector.py --add "" +``` + +### 2. Searching +To find the ```k``` most similar strings to your query using HNSW similarity search. +``` +# Get the single most similar record (default k=1) +python3 kv_vector.py --get "" + +# Get the top 3 matches +python3 kv_vector.py --get "" --k_matches 3 + +### 3. Listing All Data +To retrieve all text values currently stored in the index. +``` +python3 kv_vector.py --getAll +``` + +### 4. Deleting Data +To remove a specific value and its embedding from the index. +``` +python3 kv_vector.py --delete "" +``` + +## Configuration +If your GraphQL service is running on a different host or port, you may need to modify the configuration in ```kv_vector_library.py``` or the ```config.yaml``` file depending on your deployment mode. From 4475cd3e65e3b3f42d3c300744e4f01564baa86a Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Tue, 9 Dec 2025 14:33:57 -0800 Subject: [PATCH 69/79] Update README to remove default endpoint information Removed the default GraphQL endpoint from the README. --- ecosystem/sdk/vector-indexing/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/ecosystem/sdk/vector-indexing/README.md b/ecosystem/sdk/vector-indexing/README.md index 36c84d7b8..15ba1dce1 100644 --- a/ecosystem/sdk/vector-indexing/README.md +++ b/ecosystem/sdk/vector-indexing/README.md @@ -14,7 +14,6 @@ Before using this SDK, please ensure the entire ResilientDB stack is up and runn 1. ResilientDB KV Store: The core blockchain storage service must be running. [How to Setup](https://github.com/apache/incubator-resilientdb) 2. GraphQL Server (```ecosystem/graphql```): The backend service handling GraphQL schemas and resolvers. [How to Setup](https://github.com/apache/incubator-resilientdb/tree/master/ecosystem/graphql) 3. GraphQL Application (```ecosystem/graphql/app.py```): The Python web server (Ariadne/Flask) that exposes the GraphQL endpoint. [How to Setup](https://github.com/apache/incubator-resilientdb/tree/master/ecosystem/graphql) - - Default Endpoint: http://127.0.0.1:8000/graphql 4. In a terminal where the current directory is ecosystem/sdk/vector-indexing, activate the GraphQL virtual environment. ## Installation From 7c0f3558e415bf7802a53a4f1d6a30f1928d04e3 Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Tue, 9 Dec 2025 14:35:17 -0800 Subject: [PATCH 70/79] Fix demo script command in README Updated the command to run the demo script in the README. --- ecosystem/sdk/vector-indexing/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ecosystem/sdk/vector-indexing/README.md b/ecosystem/sdk/vector-indexing/README.md index 15ba1dce1..a538a4e2f 100644 --- a/ecosystem/sdk/vector-indexing/README.md +++ b/ecosystem/sdk/vector-indexing/README.md @@ -27,7 +27,7 @@ A shell script is provided to quickly populate the database with sample data for 1. Make sure you are in the ```ecosystem/sdk/vector-indexing``` directory. 2. Run the demo script: ``` - bash demo_add.sh + ./demo_add.sh ``` **What this does:** The script iterates through a predefined list of sentences (covering topics like biology, sports, and art) and adds them to the ResilientDB vector index one by one using ```kv_vector.py```. @@ -48,6 +48,7 @@ python3 kv_vector.py --get "" # Get the top 3 matches python3 kv_vector.py --get "" --k_matches 3 +``` ### 3. Listing All Data To retrieve all text values currently stored in the index. From 21fe92c59fb3d3c8ec883d5fa64be3fb3d63d7e3 Mon Sep 17 00:00:00 2001 From: Yoshiki0319 Date: Wed, 10 Dec 2025 07:38:02 +0900 Subject: [PATCH 71/79] Add demo script to add texts to ResilientDB --- ecosystem/sdk/vector-indexing/{test_codes => }/demo_add.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename ecosystem/sdk/vector-indexing/{test_codes => }/demo_add.sh (100%) diff --git a/ecosystem/sdk/vector-indexing/test_codes/demo_add.sh b/ecosystem/sdk/vector-indexing/demo_add.sh similarity index 100% rename from ecosystem/sdk/vector-indexing/test_codes/demo_add.sh rename to ecosystem/sdk/vector-indexing/demo_add.sh From 4e6e2b607b89585cc8c5355831eab62e623f553a Mon Sep 17 00:00:00 2001 From: Yoshiki Yamaguchi <82981913+Yoshiki0319@users.noreply.github.com> Date: Tue, 9 Dec 2025 14:44:04 -0800 Subject: [PATCH 72/79] Update README with demo script execution instructions Added instructions to make demo script executable. --- ecosystem/sdk/vector-indexing/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ecosystem/sdk/vector-indexing/README.md b/ecosystem/sdk/vector-indexing/README.md index a538a4e2f..59064316e 100644 --- a/ecosystem/sdk/vector-indexing/README.md +++ b/ecosystem/sdk/vector-indexing/README.md @@ -27,6 +27,7 @@ A shell script is provided to quickly populate the database with sample data for 1. Make sure you are in the ```ecosystem/sdk/vector-indexing``` directory. 2. Run the demo script: ``` + chmod +x demo_add.sh ./demo_add.sh ``` **What this does:** The script iterates through a predefined list of sentences (covering topics like biology, sports, and art) and adds them to the ResilientDB vector index one by one using ```kv_vector.py```. From 54d3920f7380fa0d43e64d7965c1a287f2310eb9 Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Wed, 10 Dec 2025 13:30:11 -0800 Subject: [PATCH 73/79] Changed the commands in the instructions to run the project --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8f78d8985..2afb9bc74 100644 --- a/README.md +++ b/README.md @@ -276,8 +276,10 @@ python tests/test.py ### Step 6+: Re-Running ResdDB-orm in the future As long as the setup is successful, you will only need to run these two commands to spin up the **KV Service** and **GraphQL Server** in the future: ```bash +# From the top-level directory ./service/tools/kv/server_tools/start_kv_service.sh -bazel-bin/service/http_server/crow_service_main ecosystem/graphql/service/tools/config/interface/service.config ecosystem/graphql/service/http_server/server_config.config +# From the ecosystem/graphql directory +bazel-bin/service/http_server/crow_service_main service/tools/config/interface/service.config service/http_server/server_config.config ``` Note that each of these commands will prevent input on the terminal you run them in. From 4253b823327e77f37b1f49d99a548ade2d34f3b8 Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Fri, 12 Dec 2025 03:10:05 -0800 Subject: [PATCH 74/79] Added a better installation guide to the README --- README.md | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/README.md b/README.md index 8f78d8985..251a2dacd 100644 --- a/README.md +++ b/README.md @@ -26,12 +26,89 @@ --> # Table of Contents +1. [First-Time Installation](#First-Time-Installation) +2. [Using the Project] +3. [Stress testing the Project] +4. [(Appendix) Common Installation bugs] + 1. [Running the Indexing Project](#Running-the-Indexing-Project) 2. [ResilientDB Installation](#ResilientDB-Installation) 3. [ResilientDB Installation Bugs](#ResilientDB-Installation-Bugs) 4. [How to Run ResDB-ORM](#How-to-Run-ResDB-ORM) 5. [Stress Test KV](#Stress-Testing-KV) +## First-Time Installation +Forked from [this repository](https://github.com/apache/incubator-resilientdb), for more complex setup instructions, please head there. + +Hey all, Steven here. This is the quickstart guide to getting this project up and running. Vector Indexing itself only requires one line of setup (`pip install hnswlib sentence-transformers numpy`, from your venv or with python), but it is built on top of ResDB tooling that _does_ require setup (kv_store, graphQL server, and graphQL itself). This guide will walk you through setting those elements up for the first time. + +1. Clone this repo to your local device with `git clone https://github.com/apache/incubator-resilientdb.git` + +2. (Windows only) ResilientDB uses bash shell commands (.sh extension), which windows doesn't support natively. Fortunately, Windows 11 and most versions of Windows 10 have an easy to use subsystem for Linux, WSL. Link on how to setup [here](https://learn.microsoft.com/en-us/windows/wsl/install). +After installing WSL, you can open a bash terminal by running the program `Ubuntu`. This will open from the profile of your newly created User for WSL, but you can still access to your Windows files in windows via `cd ~/../../mnt`, which should navigate you to the location of your C/D drive. + +3. (Windows/WSL only) There's a mismatch between the way Windows and Linux ends lines in files, in short, on Windows machines the shell scripts will all have an unnecessary `\r` (carriage return) character at the end of all shell files. This _will_ cause problems with execution of these files. Use the sed command (at the top-level of the cloned repo) to remove the extraneous characters of the install file: + +```bash +sudo sed -i 's/\r//g' INSTALL.sh +``` + +Unfortunately, this is a problem with every shell file in the repository. Instead of just running the above command, we reccomend running it in the top-level directory and having the change propagate to all shell files: + +```bash +find . -type f -name '*sh' -exec sed -i 's/\r//g' {} \; +``` + +4. Navigate to the project folder and run the install script +```bash +sudo sh INSTALL.sh +``` + +5. The first component the indexing project is built upon is the kv_store. It is reccomended that you do this step in a seperate command line, as it can take control of the command line while running. From the top level of your project directory, run: + +```bash +./service/tools/kv/server_tools/start_kv_service.sh +``` + +Reciving one or more `nohup: redirecting stderr to stdout` messages indicates that the service is running. + +6. The second component the indexing project is built on is the graphql server. It is reccomended that you do this step in a seperate command line, as it can take control of the command line while running. Navigate to `ecosystem/graphql`, and run the following commands: + +```bash +# First-time installation only +bazel build service/http_server: +# Start the server +bazel-bin/service/http_server/crow_service_main service/tools/config/interface/service.config service/http_server/server_config.config +``` + +The first command may take some time to run. Reciving one or more `[INFO ]` messages indicates that the service is running. + +7. (optional) The third component that the indexing project is built on is the graphql tool itself. This server runs on python. It is a requirement of this project that python3.10 is used (see appendix for help using different python versions). While you can run this from your device's global python distribution, it is reccomended that you use a venv, as following: + +```bash +python3.10 -m venv venv +source venv/bin/activate +``` + +to leave the virtual environment, just run `deactivate` + +8. We need to run installation for grapql tooling. First navigate to `ecosystem/graphql`. Then, from your python venv or global python distribution, run: + +```bash +pip install -r requirements.txt +pip install hnswlib sentence-transformers numpy +``` + +Note that these commands can take a very long time to run (10+ minutes) + +Next, we need to start graphql. It is reccomended that you do this step in a seperate command line, as it can take control of the command line while running. While still in the `ecosystem/graphql`, and run the following command: + +```bash +python app.py +``` + + + ## Running the Indexing Project All user-facing code for this project is located in `ecosystem/sdk/vector-indexing`. As long as the **KV Service** and **GraphQL Server** are running, executing the python code directly through the command line will work - nothing needs to be built beforehand. This does need to be run with a python instance with the ResDB-orm package installed (we reccoment using a virtual environment) From 6a9b85fcdadf0fb3e5af0c03c61f16ff6c62e41b Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Fri, 12 Dec 2025 03:31:04 -0800 Subject: [PATCH 75/79] Added a correct commands for using the tooling --- README.md | 75 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index a922eee95..7de028a79 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ # Table of Contents 1. [First-Time Installation](#First-Time-Installation) -2. [Using the Project] +2. [Using the Project](#Using-the-Project) 3. [Stress testing the Project] 4. [(Appendix) Common Installation bugs] @@ -107,62 +107,67 @@ Next, we need to start graphql. It is reccomended that you do this step in a sep python app.py ``` +## Using the Project +### Starting Back Up Again +Every time you would like to use the project again, you need to again run the kv_store, graphql server and graphql itself: +```bash +# From the top-level directory +./service/tools/kv/server_tools/start_kv_service.sh +# From ecosystem/graphql +bazel-bin/service/http_server/crow_service_main service/tools/config/interface/service.config service/http_server/server_config.config +# From ecosystem/graphql, AND using your python virtual environment (if applicable) +python app.py +``` -## Running the Indexing Project -All user-facing code for this project is located in `ecosystem/sdk/vector-indexing`. As long as the **KV Service** and **GraphQL Server** are running, executing the python code directly through the command line will work - nothing needs to be built beforehand. This does need to be run with a python instance with the ResDB-orm package installed (we reccoment using a virtual environment) +Note that running each of these will most likely take away control of the terminal. -The project acts as a wrapper around the KV Service - (string) values added using our tools will save them to ResilientDB *in addition* to generating vector embeddings for those values (Embeddings are also saved in resilientDB). We then offer the ability to search for the k_closest embeddings based on an input value +### Commands +All of the functionality of this program is accessible through a CLI tool located in `ecosystem/sdk/vector-indexing/kv_vector.py` -### Adding a Value +#### Adding a Value From `ecosystem/sdk/vector-indexing`, run the command ```bash -python vector_add.py --value +python kv_vector.py --add ``` -- Flag `--value`: must be immediately followed by the value a user wishes to save. Omitting this flag will prevent the program from running. Duplicate values cannot be saved. -### Searching Across Embeddings -This is the main functionality of our project - the ability to search for the most similar values based on their embeddings. From `ecosystem/sdk/vector-indexing`, run the command +This will save both the value, as well an embedding representing the value, to ResDB. + +- `YOUR_STRING` will be saved as a string, regardless of the format it is sent as. Duplicate values cannot be saved. + +#### Deleting a Value +From `ecosystem/sdk/vector-indexing`, run the command ```bash -python vector_get.py --value --k_matches +python kv_vector.py --delete ``` -- Flag `--value`: must be immediately followed by the value a user wishes to perform a similarity search for. Either this flag, or `--show_all` must be used - omitting both will prevent the program from running. -- Flag `--k_matches`: must immediately be followed by the k-most-similar matches to `--value` that a user wishes to retrieve. If this flag is omitted, a default of 1 will be used. -- Flag `--show_all`: does not require a second arguement. It will list every value that has been added to this instance of ResDB that has a correlated vector embedding. Using this arguement will override the other two flags completely. -## ResilientDB Installation -Forked from [this repository](https://github.com/apache/incubator-resilientdb), for more complex setup instructions, please head there. +This remove both the value, as well the embedding representing the value, from ResDB. If `YOUR_STRING` has not been saved through our tooling, nothing will happen. -Hey all, Steven here, this is the quickstart guide to getting ResDB up and running. If you've already setup and installed your repo, start from step 5. +#### Searching Across Embeddings +This is the main functionality of our project - the ability to search for the most similar values based on their embeddings. To get the k-closest values to a queried string, run: -1. Clone this repo to your local device with `git clone https://github.com/apache/incubator-resilientdb.git` - -2. (Windows only) ResilientDB uses bash shell commands (.sh extension), which windows doesn't support natively. Fortunately, Windows 11 and most versions of Windows 10 have an easy to use subsystem for Linux, WSL. Link on how to setup [here](https://learn.microsoft.com/en-us/windows/wsl/install). -After installing WSL, you can open a bash terminal by running the program `Ubuntu`. This will open from the profile of your newly created User for WSL, but you can still access to your Windows files in windows via `cd ~/../../mnt`, which should navigate you to the location of your C/D drive. - -3. (Windows only?) There's a mismatch between the way Windows and Linux ends lines in files, in short, on Windows machines the shell scripts will all have an unnecessary `\r` (carriage return) character at the end of all shell files. This _will_ cause problems with execution of these files. Use the sed command (at the top-level of the cloned repo) to remove the extraneous characters of the install file: - -``` -sudo sed -i 's/\r//g' INSTALL.sh +```bash +python kv_vector.py --get --k_matches ``` -Unfortunately, this is a problem with every shell file in the repository. To fix this, we added a script that will recursively remove the CR from every file in the repo before running install: +This will return the `YOUR_INTEGER` most similar currently stored values to `YOUR_STRING`, as long as their similarity score. -``` -sudo sh WSL_INSTALL.sh -``` +- If `--k_matches` is omitted, a value of k=1 will be used. If a non-integer value is used for this input, the program will terminate. -4. Navigate to the project folder and run `sudo sh INSTALL.sh` (unless you've already run WSL_INSTALL) +There is also a command to see all values saved to ResDB that this tool has generated an embedding for: -5. To start the k/v store, run `./service/tools/kv/server_tools/start_kv_service.sh` - -6. To start tools for the k/v store, run `bazel build service/tools/kv/api_tools/kv_service_tools` +```bash +python kv_vector.py --getAll +``` -If you're starting from step 1, you'll more likely than not run into bugs. Here are a list of ones we've come across and their fixes: +#### Other +To get a brief recap of all of this functionality, you can run: -\ +```bash +python kv_vector.py --help +``` ## ResilientDB Installation Bugs ### Carriage returns & running shell files on Windows From 685853467a722c2d9fef38746e77cc8b6598b59a Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Fri, 12 Dec 2025 03:36:22 -0800 Subject: [PATCH 76/79] Extra changes to the readme --- README.md | 115 ++++++++++++------------------------------------------ 1 file changed, 25 insertions(+), 90 deletions(-) diff --git a/README.md b/README.md index 7de028a79..e1dc568b8 100644 --- a/README.md +++ b/README.md @@ -28,8 +28,8 @@ # Table of Contents 1. [First-Time Installation](#First-Time-Installation) 2. [Using the Project](#Using-the-Project) -3. [Stress testing the Project] -4. [(Appendix) Common Installation bugs] +3. [Stress testing the Project](#Stress-testing-the-Project) +4. [(Appendix) Common Installation bugs](#(Appendix)-Common-Installation-Bugs) 1. [Running the Indexing Project](#Running-the-Indexing-Project) 2. [ResilientDB Installation](#ResilientDB-Installation) @@ -169,7 +169,29 @@ To get a brief recap of all of this functionality, you can run: python kv_vector.py --help ``` -## ResilientDB Installation Bugs +## Stress Testing the Project + +We tested for the storage limit of big values. In this configuration: +1. 8GB RAM Shell +2. Standard 5 replica config from `./service/tools/kv/server_tools/start_kv_service.sh` + +The results was that around 150-200mb values will cause the KV store to have long delays on operations. You can read more in `hnsw-test/index_test/README.md` along with the testing kit. + +## (Appendix) Common Installation Bugs + +### Using Python3.10 +The project will not be able to install the correct dependencies for graphql if a version aside from python3.10 is used. Specifically, python3.10 needs to create the virtual environment, or be the globally install version of python if all commands are run outside of a venv. + +There are several ways of doing this, but we reccomend using deadsnakes + +```bash +sudo apt install software-properties-common +sudo add-apt-repository ppa:deadsnakes/ppa +sudo apt install python3.10 python3.10-dev python3.10-venv +``` + +This will create a command in your terminal, `python3.10`, which can be used to create the venv. + ### Carriage returns & running shell files on Windows For Windows (and mac?) users, we need to make bash files friendly for your OS. To do this, we can just run a simple character replacement program on any shell files, `sed -i 's/\r//g' YOUR_SHELL_SCRIPT.sh`. We talk about doing this for INSTALL.sh and start_kv_service.sh in the Installation guide, but it will need to be done for any shell file you want to run. For issues with sed, instead run and `dos2unix YOUR_SHELL_SCRIPT.sh` @@ -289,90 +311,3 @@ g++ --version Re-running the INSTALL and kv_start scripts should work now. This took me a couple tries to get right, and mistakes with `update-alternatives` were tough to recover from. Uninstalling WSL/Ubuntu then reinstalling it fresh always gets a fresh version of gcc / g++ that works again. Note that this will remove everything in your _Ubuntu_ distro (not everything on your computer) - -## How to Run ResDB-ORM - -To run ResDB-ORM, you must first start the backend services (**KV Service** and **GraphQL Server**) and then connect to them using **ResDB-ORM**. - -### First Time Setup - -Running **ResDB-ORM** will always involve starting the **KV Service** and **GraphQL Server** as mentioned above. However, a few things must be done to create the environment first. - -### Step 1: Start the KV Service -Run the following script in your top-level indexers-ECS265-Fall2025 directory: -```bash -./service/tools/kv/server_tools/start_kv_service.sh -``` -Reciving one or more `nohup: redirecting stderr to stdout` messages indicates that the service is running. Note that this may take over control of your WSL instance. Do not close out of the terminal, instead continue on a new terminal. - -### Step 2: Start the GraphQL Server -(1) Run the following script in your ecosystem/graphql directory: -```bash -cd ./ecosystem/graphql -bazel build service/http_server:crow_service_main -bazel-bin/service/http_server/crow_service_main service/tools/config/interface/service.config service/http_server/server_config.config -``` -The first command may take some time to run. - -Reciving one or more `[INFO ]` messages indicates that the service is running. Note that this may take over control of your WSL instance. Do not close out of the terminal, instead continue on a new terminal. - -(2) After running bazel-bin, you should recieve a message with the format `[INFO ] Crow/1.0 server is running at http://0.0.0.0:18000 using ~ threads`. Copy and save the URL (in this case `http://0.0.0.0:18000`) for the next step. - -### Step 3 Open ```config.yaml``` and Update the db_root_url with the GraphQL Server URL you Copied in Step 2. -Open the file `indexers-ECS265-Fall2025/ecosystem/sdk/resdb-orm/config.yaml`, it will be a small config file. Replace `` with the exact url you copied above like this: -```yaml -database: - db_root_url: -``` - -More likely than not, it will be `http://0.0.0.0:18000` for you too, and that will match what is currently in the config file. - -### Step 4 Create the Python Virtual Environment -GraphQL requires python packages, so we use a virtual environment to run them (though in practice, these packages could be/may already be installed globally). Note that while we create this virtual environment at the top-level, it operates excusively on ResDB-orm and *could* be placed in there instead. - -Open a new terminal tab, then setup and start the GraphQL server: - -(1) Create a virtual environment: -```bash -python3.10 -m venv venv -``` -(2) activate the virtual environment: -```bash -source venv/bin/activate -``` -(3) install the necessary packages (may take awhile): -```bash -pip install -r ./ecosystem/sdk/resdb-orm/requirements.txt -pip install resdb-orm -``` - -The first install command may take some time to run. Your terminal will be free when it's done. - -### Step 5: Run the test script to ensure everything is working correctly: -To run the ResDB-orm test code, you need to change into the resdb-orm directory: -```bash -cd ./ecosystem/sdk/resdb-orm -python tests/test.py -``` - -### Step 6+: Re-Running ResdDB-orm in the future -As long as the setup is successful, you will only need to run these two commands to spin up the **KV Service** and **GraphQL Server** in the future: -```bash -# From the top-level directory -./service/tools/kv/server_tools/start_kv_service.sh -# From the ecosystem/graphql directory -bazel-bin/service/http_server/crow_service_main service/tools/config/interface/service.config service/http_server/server_config.config -``` - -Note that each of these commands will prevent input on the terminal you run them in. - -To interact with ResDB-orm, spin up the python instance running it: `source venv/bin/activate`. To leave this Python environment and return to bash, just type `deactivate`. - - -## Stress Testing KV - -We tested for the storage limit of big values. In this configuration: -1. 8GB RAM Shell -2. Standard 5 replica config from `./service/tools/kv/server_tools/start_kv_service.sh` - -The results was that around 150-200mb values will cause the KV store to have long delays on operations. You can read more in `hnsw-test/index_test/README.md` along with the testing kit. \ No newline at end of file From d1c7177a70436a2344ebf13ff4ab29550fe502df Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Fri, 12 Dec 2025 03:37:41 -0800 Subject: [PATCH 77/79] Cleaning up unclear code in the ReadME --- README.md | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/README.md b/README.md index e1dc568b8..25d66bd99 100644 --- a/README.md +++ b/README.md @@ -31,12 +31,6 @@ 3. [Stress testing the Project](#Stress-testing-the-Project) 4. [(Appendix) Common Installation bugs](#(Appendix)-Common-Installation-Bugs) -1. [Running the Indexing Project](#Running-the-Indexing-Project) -2. [ResilientDB Installation](#ResilientDB-Installation) -3. [ResilientDB Installation Bugs](#ResilientDB-Installation-Bugs) -4. [How to Run ResDB-ORM](#How-to-Run-ResDB-ORM) -5. [Stress Test KV](#Stress-Testing-KV) - ## First-Time Installation Forked from [this repository](https://github.com/apache/incubator-resilientdb), for more complex setup instructions, please head there. @@ -83,7 +77,7 @@ bazel-bin/service/http_server/crow_service_main service/tools/config/interface/s The first command may take some time to run. Reciving one or more `[INFO ]` messages indicates that the service is running. -7. (optional) The third component that the indexing project is built on is the graphql tool itself. This server runs on python. It is a requirement of this project that python3.10 is used (see appendix for help using different python versions). While you can run this from your device's global python distribution, it is reccomended that you use a venv, as following: +7. (optional) The third component that the indexing project is built on is the graphql tool itself. This server runs on python. It is a requirement of this project that python3.10 is used (see [the appendix](#(Appendix)-Common-Installation-Bugs) for help using different python versions). While you can run this from your device's global python distribution, it is reccomended that you use a venv, as following: ```bash python3.10 -m venv venv From 651f22c631b58e92576c5b7fedcfd347cf39bbb5 Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Fri, 12 Dec 2025 18:26:04 -0800 Subject: [PATCH 78/79] Moved all indexing files to the ecosystem folder or deleted unused top-level files --- README.md | 2 +- WSL_INSTALL.sh | 2 - .../sdk/vector-indexing/hnsw-test}/README.md | 0 .../hnsw-test}/index_test/README.md | 0 .../index_test/benchmark_results.txt | 0 .../hnsw-test}/index_test/benchmark_set.py | 0 .../hnsw-test}/index_test/gen_files.py | 0 .../hnsw-test}/index_test/multi_benchmarks.py | 0 .../hnsw-test}/index_test/size.txt | 0 .../hnsw-test}/leann_simple_kvs.py | 0 .../hnsw-test}/my_hashmap.index | Bin .../hnsw-test}/my_hashmap.leann.meta.json | 0 .../hnsw-test}/my_hashmap.leann.passages.idx | Bin .../my_hashmap.leann.passages.jsonl | 0 .../vector-indexing/hnsw-test}/quick_start.py | 0 hnsw_orm_test/README.md | 123 --------------- hnsw_orm_test/config.py | 33 ---- hnsw_orm_test/diagnose_db.py | 66 -------- hnsw_orm_test/indexer.py | 130 ---------------- hnsw_orm_test/manage_data.py | 143 ------------------ hnsw_orm_test/populate.py | 51 ------- hnsw_orm_test/search.py | 121 --------------- 22 files changed, 1 insertion(+), 670 deletions(-) delete mode 100644 WSL_INSTALL.sh rename {hnsw-test => ecosystem/sdk/vector-indexing/hnsw-test}/README.md (100%) rename {hnsw-test => ecosystem/sdk/vector-indexing/hnsw-test}/index_test/README.md (100%) rename {hnsw-test => ecosystem/sdk/vector-indexing/hnsw-test}/index_test/benchmark_results.txt (100%) rename {hnsw-test => ecosystem/sdk/vector-indexing/hnsw-test}/index_test/benchmark_set.py (100%) rename {hnsw-test => ecosystem/sdk/vector-indexing/hnsw-test}/index_test/gen_files.py (100%) rename {hnsw-test => ecosystem/sdk/vector-indexing/hnsw-test}/index_test/multi_benchmarks.py (100%) rename {hnsw-test => ecosystem/sdk/vector-indexing/hnsw-test}/index_test/size.txt (100%) rename {hnsw-test => ecosystem/sdk/vector-indexing/hnsw-test}/leann_simple_kvs.py (100%) rename {hnsw-test => ecosystem/sdk/vector-indexing/hnsw-test}/my_hashmap.index (100%) rename {hnsw-test => ecosystem/sdk/vector-indexing/hnsw-test}/my_hashmap.leann.meta.json (100%) rename {hnsw-test => ecosystem/sdk/vector-indexing/hnsw-test}/my_hashmap.leann.passages.idx (100%) rename {hnsw-test => ecosystem/sdk/vector-indexing/hnsw-test}/my_hashmap.leann.passages.jsonl (100%) rename {hnsw-test => ecosystem/sdk/vector-indexing/hnsw-test}/quick_start.py (100%) delete mode 100644 hnsw_orm_test/README.md delete mode 100644 hnsw_orm_test/config.py delete mode 100644 hnsw_orm_test/diagnose_db.py delete mode 100644 hnsw_orm_test/indexer.py delete mode 100644 hnsw_orm_test/manage_data.py delete mode 100644 hnsw_orm_test/populate.py delete mode 100644 hnsw_orm_test/search.py diff --git a/README.md b/README.md index 25d66bd99..9b7f43b13 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ We tested for the storage limit of big values. In this configuration: 1. 8GB RAM Shell 2. Standard 5 replica config from `./service/tools/kv/server_tools/start_kv_service.sh` -The results was that around 150-200mb values will cause the KV store to have long delays on operations. You can read more in `hnsw-test/index_test/README.md` along with the testing kit. +The results was that around 150-200mb values will cause the KV store to have long delays on operations. You can read more in `ecosystem/sdk/vector-indexing/hnsw-test/index_test/README.md` along with the testing kit. ## (Appendix) Common Installation Bugs diff --git a/WSL_INSTALL.sh b/WSL_INSTALL.sh deleted file mode 100644 index fc9d1b485..000000000 --- a/WSL_INSTALL.sh +++ /dev/null @@ -1,2 +0,0 @@ -find . -type f -name '*sh' -exec sed -i 's/\r//g' {} \; -sudo sh INSTALL.sh \ No newline at end of file diff --git a/hnsw-test/README.md b/ecosystem/sdk/vector-indexing/hnsw-test/README.md similarity index 100% rename from hnsw-test/README.md rename to ecosystem/sdk/vector-indexing/hnsw-test/README.md diff --git a/hnsw-test/index_test/README.md b/ecosystem/sdk/vector-indexing/hnsw-test/index_test/README.md similarity index 100% rename from hnsw-test/index_test/README.md rename to ecosystem/sdk/vector-indexing/hnsw-test/index_test/README.md diff --git a/hnsw-test/index_test/benchmark_results.txt b/ecosystem/sdk/vector-indexing/hnsw-test/index_test/benchmark_results.txt similarity index 100% rename from hnsw-test/index_test/benchmark_results.txt rename to ecosystem/sdk/vector-indexing/hnsw-test/index_test/benchmark_results.txt diff --git a/hnsw-test/index_test/benchmark_set.py b/ecosystem/sdk/vector-indexing/hnsw-test/index_test/benchmark_set.py similarity index 100% rename from hnsw-test/index_test/benchmark_set.py rename to ecosystem/sdk/vector-indexing/hnsw-test/index_test/benchmark_set.py diff --git a/hnsw-test/index_test/gen_files.py b/ecosystem/sdk/vector-indexing/hnsw-test/index_test/gen_files.py similarity index 100% rename from hnsw-test/index_test/gen_files.py rename to ecosystem/sdk/vector-indexing/hnsw-test/index_test/gen_files.py diff --git a/hnsw-test/index_test/multi_benchmarks.py b/ecosystem/sdk/vector-indexing/hnsw-test/index_test/multi_benchmarks.py similarity index 100% rename from hnsw-test/index_test/multi_benchmarks.py rename to ecosystem/sdk/vector-indexing/hnsw-test/index_test/multi_benchmarks.py diff --git a/hnsw-test/index_test/size.txt b/ecosystem/sdk/vector-indexing/hnsw-test/index_test/size.txt similarity index 100% rename from hnsw-test/index_test/size.txt rename to ecosystem/sdk/vector-indexing/hnsw-test/index_test/size.txt diff --git a/hnsw-test/leann_simple_kvs.py b/ecosystem/sdk/vector-indexing/hnsw-test/leann_simple_kvs.py similarity index 100% rename from hnsw-test/leann_simple_kvs.py rename to ecosystem/sdk/vector-indexing/hnsw-test/leann_simple_kvs.py diff --git a/hnsw-test/my_hashmap.index b/ecosystem/sdk/vector-indexing/hnsw-test/my_hashmap.index similarity index 100% rename from hnsw-test/my_hashmap.index rename to ecosystem/sdk/vector-indexing/hnsw-test/my_hashmap.index diff --git a/hnsw-test/my_hashmap.leann.meta.json b/ecosystem/sdk/vector-indexing/hnsw-test/my_hashmap.leann.meta.json similarity index 100% rename from hnsw-test/my_hashmap.leann.meta.json rename to ecosystem/sdk/vector-indexing/hnsw-test/my_hashmap.leann.meta.json diff --git a/hnsw-test/my_hashmap.leann.passages.idx b/ecosystem/sdk/vector-indexing/hnsw-test/my_hashmap.leann.passages.idx similarity index 100% rename from hnsw-test/my_hashmap.leann.passages.idx rename to ecosystem/sdk/vector-indexing/hnsw-test/my_hashmap.leann.passages.idx diff --git a/hnsw-test/my_hashmap.leann.passages.jsonl b/ecosystem/sdk/vector-indexing/hnsw-test/my_hashmap.leann.passages.jsonl similarity index 100% rename from hnsw-test/my_hashmap.leann.passages.jsonl rename to ecosystem/sdk/vector-indexing/hnsw-test/my_hashmap.leann.passages.jsonl diff --git a/hnsw-test/quick_start.py b/ecosystem/sdk/vector-indexing/hnsw-test/quick_start.py similarity index 100% rename from hnsw-test/quick_start.py rename to ecosystem/sdk/vector-indexing/hnsw-test/quick_start.py diff --git a/hnsw_orm_test/README.md b/hnsw_orm_test/README.md deleted file mode 100644 index 6e9d9893e..000000000 --- a/hnsw_orm_test/README.md +++ /dev/null @@ -1,123 +0,0 @@ -# ResilientDB x LEANN Vector Search Integration (Under construction) -This document explains the internal logic and specifications of indexer.py (a resident index construction service) and manage_data.py (a CLI tool for data manipulation), which are core components for keeping data on ResilientDB in a vector-searchable state. - -## 1. Indexer Service (indexer.py) -indexer.py monitors the blockchain (ResilientDB) as the "Single Source of Truth" and acts as a resident process to automatically synchronize the local vector search index (HNSW). - -### 1.1 Overview and Responsibilities -Polling Monitoring: Periodically fetches all transactions from the database and detects changes. - -State Restoration (Log Replay): Replays the append-only transaction log in chronological order to construct the current state of each key in memory. - -Vectorization and Index Construction: Vectorizes the latest text data using an Embedding Model, constructs an HNSW graph structure, and saves it to a file. - -### 1.2 Main Classes and Functions -SafeResDBORM(ResDBORM) -A wrapper class inheriting from the ResDBORM class of the resdb_orm library, designed to improve network communication stability. - -read_all(self): Fetches all data from the /v1/transactions endpoint. It includes timeout settings and exception handling to prevent the process from crashing even if ResilientDB becomes temporarily unresponsive. - -main(): The main service loop, which repeats the following steps every POLL_INTERVAL (configuration value, default is 15 seconds). - -### 1.3 Processing Flow Details -The index update process is executed according to the following logic: - -Change Detection: - -Compares the number of transactions fetched in the previous loop (last_tx_count) with the number fetched this time. - -The index reconstruction process starts only if current_count > last_tx_count. - -Event Extraction and Normalization: - -Extracts necessary fields (original_key, operation, text, timestamp) from the fetched raw transaction data. - -JSON parse errors or data in invalid formats are skipped. - -Chronological Sorting: - -Since the arrival order of transactions is not guaranteed in distributed systems, events are sorted in ascending order based on the timestamp within the payload. - -Log Replay (State Application): - -Applies sorted events sequentially from the beginning to update the active_docs dictionary. - -Add / Upsert: Registers the key and text in the dictionary (overwrites if it already exists). - -Update: Updates the content only if the key exists in the dictionary. Update events for non-existent keys are ignored (to prevent inconsistency). - -Delete: Removes the entry if the key exists in the dictionary. - -Index Construction via LeANN: - -Creates an index using the LeANN library for the valid documents remaining in active_docs. - -Saved Files: - -resdb.leann: The vector index body. - -id_mapping.json: Metadata linking search result IDs to the original keys (original_key) and text previews. - -## 2. Data Manager (manage_data.py) -manage_data.py is an interface that allows users to insert and manipulate data in ResilientDB from the command line. It is not just a simple HTTP client; it possesses pre-check functions to maintain data integrity. - -### 2.1 Overview -Operations: Supports adding (add), updating (update), and deleting (delete) data. - -Soft Validation: Includes a feature to check if the target key exists in the database before performing modification operations (update/delete) and issues a warning if it does not. - -### 2.2 Command Line Usage -```Bash - -# Add new data -python3 manage_data.py add - -# Update existing data -python3 manage_data.py update - -# Delete data -python3 manage_data.py delete -``` - -### 2.3 Internal Logic and Validation Features -SafeResDBORM.read_all() (Retry Logic) -Similar to the class in indexer.py, but this one adds logic to retry up to 3 times (max_retries = 3) in case of network errors. - -get_active_keys(db) -Fetches all transactions currently in the database and uses the same Log Replay logic as indexer.py to generate a "list of currently valid keys". - -add_event(key, text, op_type) -The core function for transaction generation. - -Integrity Check (Soft Validation): - -If the operation type is update or delete, it calls get_active_keys() to confirm whether the target key exists. - -Warning: If the key is not found, it displays a warning: [WARNING] Key '...' was not found. - -Design Intent: Due to the nature of blockchains, there is a lag between writing and reflection (Eventual Consistency). Therefore, the design does not stop on error but warns the user and proceeds with sending the transaction. - -Payload Creation: - -Creates a JSON object with the following structure: - -``` -payload = { - "original_key": key, - "text": text, - "timestamp": time.time(), # Current time for order guarantee - "operation": op_type, # "add", "update", "delete" - "type": "vector_source" -} -``` - -Transaction Submission: Sends data to ResilientDB via the ORM and displays a part of the transaction ID upon success. - -### 3. Summary: Relationship Between the Two Scripts -These two scripts have a relationship close to the Command Query Responsibility Segregation (CQRS) pattern. - -Write Side (manage_data.py): Handles data writing (Commands). Instead of directly modifying the database state, it appends operation logs (events). - -Read Side (indexer.py): Handles data reading (Queries). It aggregates and processes the written event logs to generate a "Read Model (Vector Index)" optimized for search. - -This architecture realizes high-speed vector search functionality while leveraging the append-only ledger characteristics of ResilientDB. diff --git a/hnsw_orm_test/config.py b/hnsw_orm_test/config.py deleted file mode 100644 index 066ebd97b..000000000 --- a/hnsw_orm_test/config.py +++ /dev/null @@ -1,33 +0,0 @@ -import os -from pathlib import Path - -# --- Environment Settings --- -# Path to the ResDB-ORM configuration file (Auto-detection) -RESDB_CONFIG_PATH = Path.home() / "incubator-resilientdb-ResDB-ORM" / "config.yaml" -if not RESDB_CONFIG_PATH.exists(): - potential_local_path = Path("config.yaml").resolve() - if potential_local_path.exists(): - RESDB_CONFIG_PATH = potential_local_path - else: - env_path = os.getenv("RESDB_CONFIG_FILE") - if env_path: - RESDB_CONFIG_PATH = Path(env_path) - -# --- Model Settings (Lightweight!) --- -# prajjwal1/bert-tiny: Approx. 17MB, 128 dimensions -MODEL_NAME = "prajjwal1/bert-tiny" - -# --- Directory Settings --- -# Directory where data will be saved -BASE_DIR = Path("./leann_resdb_tiny").resolve() -BASE_DIR.mkdir(parents=True, exist_ok=True) - -# Path to the index (Logical path) -# In reality, files like resdb.index will be generated -INDEX_PATH = BASE_DIR / "resdb.leann" -MAPPING_PATH = BASE_DIR / "id_mapping.json" - -# Polling interval (seconds) -POLL_INTERVAL = 15 - -print(f"Config: Using model '{MODEL_NAME}' at {BASE_DIR}") \ No newline at end of file diff --git a/hnsw_orm_test/diagnose_db.py b/hnsw_orm_test/diagnose_db.py deleted file mode 100644 index bc1fbb702..000000000 --- a/hnsw_orm_test/diagnose_db.py +++ /dev/null @@ -1,66 +0,0 @@ -import requests -import json -import yaml -from pathlib import Path -import config - -def diagnose(): - print("=== ResilientDB Diagnosis Tool ===") - - # 1. Get URL from config file - try: - with open(config.RESDB_CONFIG_PATH, 'r') as f: - conf = yaml.safe_load(f) - url = conf['database']['db_root_url'] - print(f"Target URL: {url}") - except Exception as e: - print(f"Error loading config: {e}") - return - - # 2. Hit the endpoint to get all the data - target_endpoint = f"{url}/v1/transactions" - print(f"Requesting: {target_endpoint} ...") - - try: - response = requests.get(target_endpoint) - print(f"Status Code: {response.status_code}") - - # Show raw response content - content = response.text - print(f"Raw Response Length: {len(content)}") - print(f"Raw Response Preview (first 500 chars):\n{content[:500]}") - - if not content: - print("\n[Error] Response body is EMPTY. The database returned no data.") - print("Check if ResilientDB is running and if data was actually persisted.") - return - - # Attempt to decode JSON - try: - data = response.json() - print(f"\nSuccess! Parsed JSON with {len(data)} records.") - - # Simple check if doc1 exists - found_keys = [] - for tx in data: - try: - if isinstance(tx.get('data'), str): - payload = json.loads(tx['data']) - else: - payload = tx.get('data') - - if isinstance(payload, dict) and 'original_key' in payload: - found_keys.append(payload['original_key']) - except: - pass - print(f"Found keys in DB: {found_keys}") - - except json.JSONDecodeError as e: - print(f"\n[Error] JSON Decode Failed: {e}") - print("The database response is not valid JSON.") - - except Exception as e: - print(f"\n[Fatal Error] Request failed: {e}") - -if __name__ == "__main__": - diagnose() \ No newline at end of file diff --git a/hnsw_orm_test/indexer.py b/hnsw_orm_test/indexer.py deleted file mode 100644 index 0c340aa2b..000000000 --- a/hnsw_orm_test/indexer.py +++ /dev/null @@ -1,130 +0,0 @@ -import time -import json -import os -import gc -import requests -from resdb_orm.orm import ResDBORM -from leann import LeannBuilder -import config - -class SafeResDBORM(ResDBORM): - def read_all(self): - try: - url = f'{self.db_root_url}/v1/transactions' - response = requests.get(url, timeout=10) - if response.status_code == 200 and response.content: - return response.json() - return [] - except: - return [] - -def main(): - os.environ["OMP_NUM_THREADS"] = "1" - os.environ["TOKENIZERS_PARALLELISM"] = "false" - - print(f"Indexer Service Started. Model: {config.MODEL_NAME}") - - db = SafeResDBORM(config_path=str(config.RESDB_CONFIG_PATH)) - last_tx_count = 0 - - while True: - try: - all_txs = db.read_all() - current_count = len(all_txs) - - if current_count > last_tx_count: - print(f"\n[Change Detected] {last_tx_count} -> {current_count} transactions.") - - # 1. Extract events - events = [] - for tx in all_txs: - try: - data = tx.get('data') - if isinstance(data, str): - try: data = json.loads(data) - except: data = {"text": data} - - if not isinstance(data, dict): continue - - ts = float(data.get('timestamp', 0)) - key = data.get('original_key') - # If the operation field is missing, it is treated as 'upsert' (forced overwrite). - op = data.get('operation', 'upsert') - text = data.get('text', '') - - if key: - events.append({"key": key, "op": op, "text": text, "ts": ts, "id": str(tx['id'])}) - except: - continue - - # 2. Sort by timestamp - events.sort(key=lambda x: x['ts']) - - # 3. Replay state (Filtering Logic) - active_docs = {} - for ev in events: - key = ev['key'] - op = ev['op'] - - if op == 'delete': - if key in active_docs: - del active_docs[key] - elif op == 'update': - # Reload if the key exists - if key in active_docs: - active_docs[key] = { - "text": ev['text'], - "resdb_id": ev['id'], - "original_key": key - } - else: - # Ignore updates to non-existent keys and log them. - print(f"Warning: Ignored 'update' for non-existent key: '{key}'") - else: - # 'add' saves unconditionally - active_docs[key] = { - "text": ev['text'], - "resdb_id": ev['id'], - "original_key": key - } - - # 4. Build index - valid_docs = list(active_docs.values()) - if valid_docs: - print(f"Rebuilding index for {len(valid_docs)} documents...") - - start_time = time.time() - builder = LeannBuilder(backend_name="hnsw", model=config.MODEL_NAME) - for d in valid_docs: - builder.add_text(d['text']) - - builder.build_index(str(config.INDEX_PATH)) - - elapsed_time = time.time() - start_time - - mapping_data = [{ - "resdb_id": d['resdb_id'], - "original_key": d['original_key'], - "preview": d['text'][:60] - } for d in valid_docs] - - with open(config.MAPPING_PATH, 'w') as f: - json.dump(mapping_data, f, indent=2) - - print(f"Index updated. Time: {elapsed_time:.4f}s") - else: - print("Index cleared (no active documents).") - with open(config.MAPPING_PATH, 'w') as f: - json.dump([], f) - - last_tx_count = current_count - if 'builder' in locals(): del builder - gc.collect() - - except Exception as e: - print(f"Polling error: {e}") - - time.sleep(config.POLL_INTERVAL) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/hnsw_orm_test/manage_data.py b/hnsw_orm_test/manage_data.py deleted file mode 100644 index 2c52b8ddc..000000000 --- a/hnsw_orm_test/manage_data.py +++ /dev/null @@ -1,143 +0,0 @@ -import argparse -import json -import time -import requests -from resdb_orm.orm import ResDBORM -import config - -class SafeResDBORM(ResDBORM): - """ - A wrapper class for ResDBORM that includes error handling - and retry logic for network requests. - """ - def read_all(self): - max_retries = 3 - for attempt in range(max_retries): - try: - url = f'{self.db_root_url}/v1/transactions' - response = requests.get(url, timeout=5) - - if response.status_code == 200: - return response.json() if response.content else [] - elif response.status_code == 503: - time.sleep(2) - continue - except Exception: - time.sleep(1) - return [] - -def get_db(): - try: - return SafeResDBORM(config_path=str(config.RESDB_CONFIG_PATH)) - except Exception as e: - print(f"Connection failed: {e}") - return None - -def get_active_keys(db): - """ - Fetches all transactions to determine which keys currently exist. - Used for validation warnings. - """ - all_txs = db.read_all() - events = [] - - for tx in all_txs: - try: - data = tx.get('data') - if isinstance(data, str): - try: data = json.loads(data) - except: data = {"text": data} - - if not isinstance(data, dict): continue - - ts = float(data.get('timestamp', 0)) - key = data.get('original_key') - op = data.get('operation', 'upsert') - - if key: - events.append({"key": key, "op": op, "ts": ts}) - except: - continue - - events.sort(key=lambda x: x['ts']) - - active_keys = set() - for ev in events: - if ev['op'] == 'delete': - active_keys.discard(ev['key']) - elif ev['op'] == 'update': - pass - else: - active_keys.add(ev['key']) - - return active_keys - -def add_event(key, text, op_type): - """ - Logs an event to ResilientDB. - Performs a soft validation: Warns if key is missing but allows execution - to handle eventual consistency (lag). - """ - db = get_db() - if not db: return - - # --- SOFT VALIDATION LOGIC --- - if op_type in ["update", "delete"]: - print(f"Checking key status for '{key}'...") - active_keys = get_active_keys(db) - - if key not in active_keys: - print(f"\n[WARNING] Key '{key}' was not found in the current database state.") - print(" -> If you JUST created this key, this is normal (propagation lag). Proceeding...") - print(" -> If this is a typo, the update will be IGNORED by the indexer.\n") - # We do NOT return here; we proceed to send the transaction. - else: - print(f"Key '{key}' found. Proceeding with {op_type}.") - # ----------------------------- - - payload = { - "original_key": key, - "text": text, - "timestamp": time.time(), - "operation": op_type, - "type": "vector_source" - } - - time.sleep(0.5) - - try: - tx_id = db.create(payload) - if isinstance(tx_id, str): - # Message changed to "Request Sent" to be accurate - print(f"[{op_type.upper()} REQUEST SENT] Key: '{key}' (Tx: {tx_id[:8]}...)") - else: - print(f"[{op_type.upper()}] Failed: {tx_id}") - except Exception as e: - print(f"Error sending transaction: {e}") - -def main(): - parser = argparse.ArgumentParser(description="ResilientDB Vector Data Manager") - subparsers = parser.add_subparsers(dest="command", required=True) - - p_add = subparsers.add_parser("add") - p_add.add_argument("key") - p_add.add_argument("text") - - p_upd = subparsers.add_parser("update") - p_upd.add_argument("key") - p_upd.add_argument("text") - - p_del = subparsers.add_parser("delete") - p_del.add_argument("key") - - args = parser.parse_args() - - if args.command == "add": - add_event(args.key, args.text, "add") - elif args.command == "update": - add_event(args.key, args.text, "update") - elif args.command == "delete": - add_event(args.key, "", "delete") - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/hnsw_orm_test/populate.py b/hnsw_orm_test/populate.py deleted file mode 100644 index 0b15a5b72..000000000 --- a/hnsw_orm_test/populate.py +++ /dev/null @@ -1,51 +0,0 @@ -import time -from resdb_orm.orm import ResDBORM -import config # Import common settings - -# Data to populate (Hash map) -data_map = { - "doc1": "LEANN saves 97% storage compared to traditional vector databases.", - "doc2": "Tung Tung Tung Sahur called—they need their banana-crocodile hybrid back", - "doc3": "The weather in Davis is sunny today.", - "doc4": "Understanding consensus protocols is key for blockchain.", - "doc5": "ResilientDB is a high-throughput blockchain fabric designed for performance.", - "doc6": "This project explores novel techniques for sharding in distributed ledgers.", - "doc7": "DeFi applications are often built on top of smart contracts.", - "doc8": "Practical Byzantine Fault Tolerance (PBFT) is a foundational agreement protocol.", - "doc9": "Cross-chain communication enables interoperability between different blockchains.", - "doc10": "The project requires using the ResilientDB Fabric unless approved otherwise.", - "doc11": "Mitochondria are the powerhouse of the cell.", - "doc12": "How to bake a perfect sourdough bread with a starters.", - "doc13": "The final report must be written in LaTeX using ACM templates.", - "doc14": "UC Davis is known for its agricultural studies." -} - -def main(): - print(f"Connecting to ResilientDB via {config.RESDB_CONFIG_PATH}...") - try: - db = ResDBORM(config_path=str(config.RESDB_CONFIG_PATH)) - except Exception as e: - print(f"Connection failed: {e}") - return - - print(f"Starting ingestion of {len(data_map)} documents...") - - for key, text in data_map.items(): - # Insert in a format easy for the indexer to recognize - payload = { - "text": text, - "original_key": key, - "type": "vector_source" - } - - try: - tx_id = db.create(payload) - print(f"Stored '{key}': {tx_id}") - time.sleep(0.2) # Short sleep to reduce load - except Exception as e: - print(f"Failed to store {key}: {e}") - - print("\n Data population complete!") - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/hnsw_orm_test/search.py b/hnsw_orm_test/search.py deleted file mode 100644 index 419e5e77c..000000000 --- a/hnsw_orm_test/search.py +++ /dev/null @@ -1,121 +0,0 @@ -import json -import os -import time -import sys -from pathlib import Path -from leann import LeannSearcher -import config - -class AutoSearcher: - def __init__(self): - self.searcher = None - self.id_mapping = [] - self.last_mtime = 0 - self.index_file = self._resolve_index_file() - self.mapping_file = config.MAPPING_PATH - self.reload_index() - - def _resolve_index_file(self): - """実際のインデックスファイルパスを解決する""" - # leannは .index を付与する場合があるためチェック - candidates = [ - config.INDEX_PATH, - config.INDEX_PATH.with_suffix(".index"), - Path(str(config.INDEX_PATH) + ".index") - ] - for p in candidates: - if p.exists(): - return p - return config.INDEX_PATH # デフォルト - - def _get_mtime(self): - """マッピングファイルの更新時刻を取得(これが更新トリガーとなる)""" - if self.mapping_file.exists(): - return self.mapping_file.stat().st_mtime - return 0 - - def reload_index(self): - """インデックスの再ロード""" - if not self.index_file.exists() or not self.mapping_file.exists(): - return False - - try: - # マッピングの読み込み - with open(self.mapping_file, 'r') as f: - self.id_mapping = json.load(f) - - # インデックスのロード (存在しない場合はスキップ) - if self.id_mapping: - self.searcher = LeannSearcher(str(config.INDEX_PATH), model=config.MODEL_NAME) - else: - self.searcher = None - - self.last_mtime = self._get_mtime() - print(f"\n[System] Index reloaded. Documents: {len(self.id_mapping)}") - return True - except Exception as e: - print(f"\n[Error] Failed to reload index: {e}") - return False - - def search(self, query): - # 検索前に更新チェック - current_mtime = self._get_mtime() - if current_mtime > self.last_mtime: - print("\n[System] Detected update. Refreshing index...") - time.sleep(0.5) # 書き込み完了待ち - self.reload_index() - - if not self.searcher or not self.id_mapping: - return [] - - try: - results = self.searcher.search(query, top_k=3) - return results - except Exception as e: - print(f"Search execution error: {e}") - return [] - - def get_info(self, result_id): - try: - idx = int(result_id) - if 0 <= idx < len(self.id_mapping): - return self.id_mapping[idx] - except: - pass - return None - -def main(): - print("=== ResilientDB Auto-Reloading Search CLI ===") - engine = AutoSearcher() - - if not engine.index_file.exists(): - print("Waiting for initial index creation...") - - while True: - try: - query = input("\nSearch Query ('exit' to quit): ").strip() - if not query: continue - if query.lower() in ['exit', 'quit']: break - - results = engine.search(query) - - if not results: - print("No results found.") - continue - - print(f"Results for: '{query}'") - for rank, res in enumerate(results, 1): - info = engine.get_info(res.id) - if info: - print(f" #{rank} [Score: {res.score:.4f}]") - print(f" Key : {info['original_key']}") - print(f" Text: {info['preview']}...") - else: - print(f" #{rank} [Unknown ID]") - - except KeyboardInterrupt: - print("\nBye!") - break - -if __name__ == "__main__": - main() \ No newline at end of file From 3f44eaa923a137bbba41687bb6c45884c12fc0b1 Mon Sep 17 00:00:00 2001 From: Steven Shoemaker Date: Fri, 12 Dec 2025 18:28:23 -0800 Subject: [PATCH 79/79] Removed outdated spinup file --- SPINUP_RESDB_ORM.sh | 54 --------------------------------------------- 1 file changed, 54 deletions(-) delete mode 100644 SPINUP_RESDB_ORM.sh diff --git a/SPINUP_RESDB_ORM.sh b/SPINUP_RESDB_ORM.sh deleted file mode 100644 index f9c43edd5..000000000 --- a/SPINUP_RESDB_ORM.sh +++ /dev/null @@ -1,54 +0,0 @@ -# TODO: Change this to 300 (5 minutes) once done testing -max_iterator=500 - -touch ormSpinup.log -./service/tools/kv/server_tools/start_kv_service.sh 2>> ormSpinup.log & - -iterator=0 -while ! grep "nohup: redirecting stderr to stdout|Build completed successfully" ormSpinup.log; do - sleep 1 - iterator=$((iterator + 1)) - if [ $iterator -gt $max_iterator ]; then - echo "Timed out waiting for KV service to start" - echo "Run \`./service/tools/kv/server_tools/start_kv_service.sh\` yourself to manually diagnose errors" - # TODO: Un remove this once you diagnose the problem - # rm ormSpinup.log - return 1 - fi -done - -# Just in case the service needs a minute -sleep 2 - -echo "KV service started successfully, now starting GraphQL service" -rm ormSpinup.log -touch ormSpinup.log - -cd ecosystem/graphql -bazel-bin/service/http_server/crow_service_main ./ecosystem/graphql/service/tools/config/interface/service.config ./ecosystem/graphql/service/http_server/server_config.config > ormSpinup.log 2>&1 & - -# TODO: Delete this -sleep 5 - -iterator=0 -while ! grep "[INFO ]" ormSpinup.log; do - sleep 1 - iterator=$((iterator + 1)) - if [ $iterator -gt $max_iterator ]; then - echo "Timed out waiting for GraphQL service to start" - echo "Note that the kv service is currently running" - echo "Run \`bazel-bin/service/http_server/crow_service_main ./ecosystem/graphql/service/tools/config/interface/service.config ./ecosystem/graphql/service/http_server/server_config.config > ormSpinup.log 2>&1 &\` yourself to manually diagnose errors" - rm ormSpinup.log - return 1 - fi -done - -rm ormSpinup.log -# We wait for the SECOND [INFO] message -sleep 2 - -echo "kv_service and graphql_service started successfully" -echo "It should be safe to enable your venv and use ResDB-orm now" - -# TODO: GitHub Copilot installed itself without asking, and suggested the following line. It might actually work for pkill (??) -# echo "to stop them, run: ./service/tools/kv/server_tools/stop_kv_service.sh and pkill crow_service_main" \ No newline at end of file