apache · steveoVivo · Nov 9, 2025 · Nov 9, 2025 · Nov 10, 2025 · Nov 10, 2025
diff --git a/.bazelrc b/.bazelrc
@@ -1,4 +1,3 @@
 build --cxxopt='-std=c++17' --copt=-O3 --jobs=40
 #build --action_env=PYTHON_BIN_PATH="/usr/bin/python3.10"
 #build --action_env=PYTHON_LIB_PATH="/usr/include/python3.10"
-
diff --git a/.bazelversion b/.bazelversion
diff --git a/README.md b/README.md
diff --git a/WORKSPACE b/WORKSPACE
@@ -20,6 +20,16 @@ workspace(name = "com_resdb_nexres")
 
 load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
 
+http_archive(
+    name = "bazel_skylib",
+    sha256 = "74d544d96f4a5bb630d465ca8bbcfe231e3594e5aae57e1edbf17a6eb3ca2506",
+    urls = [
+        "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz",
+        "https://github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz",
+    ],
+)
+load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace")
+bazel_skylib_workspace()
 http_archive(
     name = "hedron_compile_commands",
     #Replace the commit hash (4f28899228fb3ad0126897876f147ca15026151e) with the latest commit hash from the repo

diff --git a/ecosystem/README.md b/ecosystem/README.md
@@ -39,7 +39,8 @@ ecosystem/
 ├── sdk/                             # Software Development Kits
 │   ├── rust-sdk/                    # Rust SDK
 │   ├── resvault-sdk/                # ResVault SDK
-│   └── resdb-orm/                   # Python ORM
+│   ├── resdb-orm/                   # Python ORM
+│   └── vector-indexing/             # Vector indexing for semantic search
 ├── deployment/                      # Deployment and infrastructure
 │   ├── ansible/                     # Ansible playbooks
 │   └── orbit/                       # Orbit deployment tool

diff --git a/ecosystem/graphql/app.py b/ecosystem/graphql/app.py
@@ -18,37 +18,84 @@
 #
 #
 
+import tempfile
+import os
+import sys
+import subprocess
+import re
+import json
+import strawberry
+import typing
+import ast
+from pathlib import Path
+from typing import Optional, List, Any
+from flask import Flask
+from flask_cors import CORS
+from strawberry.flask.views import GraphQLView
+
+# --- Local Imports ---
 from resdb_driver import Resdb
 from resdb_driver.crypto import generate_keypair
+from json_scalar import JSONScalar 
 
+# --- Vector Indexing Imports ---
+from sentence_transformers import SentenceTransformer
+
+# --- Configuration ---
 db_root_url = "localhost:18000"
 protocol = "http://"
 fetch_all_endpoint = "/v1/transactions"
 db = Resdb(db_root_url)
 
-import strawberry
-import typing
-import ast
-import json
+# --- Vector Indexing Scripts Path Configuration ---
+CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+VECTOR_SCRIPT_DIR = os.path.abspath(os.path.join(CURRENT_DIR, "../sdk/vector-indexing"))
+PYTHON_EXE = sys.executable
 
-from typing import Optional, List, Any
-from flask import Flask
-from flask_cors import CORS
+# Add vector script dir to sys.path to allow imports
+sys.path.append(VECTOR_SCRIPT_DIR)
 
-app = Flask(__name__)
-CORS(app) # This will enable CORS for all routes
+# Try importing the manager classes
+try:
+    from vector_add import VectorIndexManager
+    from vector_get import VectorSearchManager
+    from vector_delete import VectorDeleteManager
+except ImportError as e:
+    print(f"Warning: Could not import vector modules. Error: {e}")
+    VectorIndexManager = None
+    VectorSearchManager = None
+    VectorDeleteManager = None
 
-from strawberry.flask.views import GraphQLView
+# --- Initialize AI Model & Managers (Run Once) ---
+print("Initializing Vector Managers...")
+vector_index_manager = None
+vector_search_manager = None
+vector_delete_manager = None
+
+try:
+    # Load model into memory once at startup to avoid per-request overhead
+    GLOBAL_MODEL = SentenceTransformer('all-MiniLM-L6-v2')
+
+    script_path = Path(VECTOR_SCRIPT_DIR)
+
+    if VectorIndexManager:
+        vector_index_manager = VectorIndexManager(script_path, GLOBAL_MODEL)
+
+    if VectorSearchManager:
+        vector_search_manager = VectorSearchManager(script_path, GLOBAL_MODEL)
+
+    if VectorDeleteManager:
+        vector_delete_manager = VectorDeleteManager(script_path, GLOBAL_MODEL)
+
+    print("Vector Managers initialized successfully.")
+except Exception as e:
+    print(f"Error initializing vector managers: {e}")
 
-@strawberry.scalar(description="Custom JSON scalar")
-class JSONScalar:
-    @staticmethod
-    def serialize(value: Any) -> Any:
-        return value  # Directly return the JSON object
 
-    @staticmethod
-    def parse_value(value: Any) -> Any:
-        return value  # Accept JSON as is
+app = Flask(__name__)
+CORS(app) # This will enable CORS for all routes
+
+# --- GraphQL Types ---
 
 @strawberry.type
 class RetrieveTransaction:
@@ -76,6 +123,14 @@ class PrepareAsset:
     recipientPublicKey: str
     asset: JSONScalar
 
+# New Type for Vector Search Results
+@strawberry.type
+class VectorSearchResult:
+    text: str
+    score: float
+
+# --- Query ---
+
 @strawberry.type
 class Query:
     @strawberry.field
@@ -94,6 +149,69 @@ def getTransaction(self, id: strawberry.ID) -> RetrieveTransaction:
             asset=data["asset"]
         )
         return payload
+
+    @strawberry.field
+    def count_cats(self) -> str:
+        # Create a temporary file
+        with tempfile.NamedTemporaryFile(mode="w+", delete=False) as tmp_file:
+            tmp_path = tmp_file.name
+
+            #Write to file
+            lines = ["cat", "cat", "cat", "mouse", "cat"]
+            for line in lines:
+                tmp_file.write(line + "\n")
+
+        # Count number of cats
+        cat_count = 0
+        with open(tmp_path, "r") as f:
+            for line in f:
+                if "cat" in line.strip():
+                    cat_count += 1
+
+        #Delete temporary file
+        os.remove(tmp_path)
+
+        #return number of cats
+        return f'The word "cat" appears {cat_count} times'
+
+    @strawberry.field
+    def getAllVectors(self) -> List[VectorSearchResult]:
+        """Search for all texts"""
+        results = []
+        raw_values = vector_search_manager.get_all_values()
+        for val in raw_values:
+            # For 'show all', we typically don't have a similarity score, or it's N/A
+            results.append(VectorSearchResult(text=val, score=1.0))
+        return results
+
+    # --- New: Vector Search Query (Optimized) ---
+    @strawberry.field
+    def searchVector(self, text: str = None, k: int = 1) -> List[VectorSearchResult]:
+        """Search for similar texts using the in-memory manager."""
+        results = []
+
+        if not vector_search_manager:
+            print("Error: Vector search manager not initialized.")
+            return []
+
+        if text is None:
+            # Show all functionality
+            raw_values = vector_search_manager.get_all_values()
+            for val in raw_values:
+                # For 'show all', we typically don't have a similarity score, or it's N/A
+                results.append(VectorSearchResult(text=val, score=1.0))
+        else:
+            # Search functionality
+            search_results = vector_search_manager.search(text, k)
+            for item in search_results:
+                results.append(VectorSearchResult(
+                    text=item['text'],
+                    score=item['score']
+                ))
+
+        return results
+
+# --- Mutation ---
 
 @strawberry.type
 class Mutation:
@@ -115,6 +233,25 @@ def postTransaction(self, data: PrepareAsset) -> CommitTransaction:
         )
         return payload
 
+    # --- New: Vector Add Mutation (Optimized) ---
+    @strawberry.mutation
+    def addVector(self, text: str) -> str:
+        """Add a text to the vector index using the in-memory manager."""
+        if vector_index_manager:
+            return vector_index_manager.add_value(text)
+        else:
+            return "Error: Vector index manager not initialized."
+
+    # --- New: Vector Delete Mutation (Optimized) ---
+    @strawberry.mutation
+    def deleteVector(self, text: str) -> str:
+        """Delete a text from the vector index using the in-memory manager."""
+        if vector_delete_manager:
+            return vector_delete_manager.delete_value(text)
+        else:
+            return "Error: Vector delete manager not initialized."
+
+
 schema = strawberry.Schema(query=Query, mutation=Mutation)
 
 app.add_url_rule(
@@ -123,4 +260,4 @@ def postTransaction(self, data: PrepareAsset) -> CommitTransaction:
 )
 
 if __name__ == "__main__":
-    app.run(port="8000")
+    app.run(port="8000")
diff --git a/ecosystem/graphql/json_scalar.py b/ecosystem/graphql/json_scalar.py
@@ -0,0 +1,9 @@
+import strawberry
+from typing import Any
+
+@strawberry.scalar(
+    name="JSONScalar",
+    description="Custom JSON scalar"
+)
+def JSONScalar(value: Any) -> Any:
+    return value
diff --git a/ecosystem/sdk/vector-indexing/README.md b/ecosystem/sdk/vector-indexing/README.md
@@ -0,0 +1,67 @@
+# Vector Indexing SDK for ResilientDB
+This directory contains a Python SDK for performing vector indexing and similarity search using ResilientDB as the storage backend.
+
+The primary interface for users is the ```kv_vector.py``` CLI tool, which interacts with the ResilientDB GraphQL service to manage vector embeddings.
+
+## Architecture
+- ```kv_vector.py```: The CLI frontend. It sends GraphQL mutations and queries to the proxy.
+- ```kv_vector_library.py```: Handles the HTTP requests to the GraphQL endpoint.
+### Backend Scripts
+- ```vector_add.py```, ```vector_get.py```, ```vector_delete.py```: These scripts reside on the server side (or strictly connected environment) to handle embedding generation (via SentenceTransformers) and HNSW index management.
+
+## Prerequisites
+Before using this SDK, please ensure the entire ResilientDB stack is up and running. Specifically, you need:
+1. ResilientDB KV Store: The core blockchain storage service must be running. [How to Setup](https://github.com/apache/incubator-resilientdb)
+2. GraphQL Server (```ecosystem/graphql```): The backend service handling GraphQL schemas and resolvers. [How to Setup](https://github.com/apache/incubator-resilientdb/tree/master/ecosystem/graphql)
+3. GraphQL Application (```ecosystem/graphql/app.py```): The Python web server (Ariadne/Flask) that exposes the GraphQL endpoint. [How to Setup](https://github.com/apache/incubator-resilientdb/tree/master/ecosystem/graphql)
+4. In a terminal where the current directory is ecosystem/sdk/vector-indexing, activate the GraphQL virtual environment.
+
+## Installation
+Install the required Python dependencies:
+```
+pip install requests pyyaml numpy hnswlib sentence-transformers
+```
+
+## Quick Start: Demo Data
+A shell script is provided to quickly populate the database with sample data for testing purposes. This is the fastest way to verify your environment is set up correctly.
+1. Make sure you are in the ```ecosystem/sdk/vector-indexing``` directory.
+2. Run the demo script:
+   ```
+   chmod +x demo_add.sh
+   ./demo_add.sh
+   ```
+   **What this does:** The script iterates through a predefined list of sentences (covering topics like biology, sports, and art) and adds them to the ResilientDB vector index one by one using ```kv_vector.py```.
+
+## Usage (CLI)
+The ```kv_vector.py``` script is the main entry point. It allows you to add text (which is automatically vectorized), search for similar text, and manage records via the GraphQL endpoint.
+
+### 1. Adding Data
+To add a text string. This will generate an embedding and store it in ResilientDB.
+```
+python3 kv_vector.py --add "<TEXT>"
+```
+
+### 2. Searching
+To find the ```k``` most similar strings to your query using HNSW similarity search.
+```
+# Get the single most similar record (default k=1)
+python3 kv_vector.py --get "<SEARCH WORDS>"
+
+# Get the top 3 matches
+python3 kv_vector.py --get "<SEARCH WORDS>" --k_matches 3
+```
+
+### 3. Listing All Data
+To retrieve all text values currently stored in the index.
+```
+python3 kv_vector.py --getAll
+```
+
+### 4. Deleting Data
+To remove a specific value and its embedding from the index.
+```
+python3 kv_vector.py --delete "<TEXT>"
+```
+
+## Configuration
+If your GraphQL service is running on a different host or port, you may need to modify the configuration in ```kv_vector_library.py``` or the ```config.yaml``` file depending on your deployment mode.
diff --git a/ecosystem/sdk/vector-indexing/config.yaml b/ecosystem/sdk/vector-indexing/config.yaml
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+database:
+  db_root_url: http://0.0.0.0:18000
diff --git a/ecosystem/sdk/vector-indexing/demo_add.sh b/ecosystem/sdk/vector-indexing/demo_add.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+echo "=== Adding 10 demo texts to ResilientDB ==="
+
+texts=(
+"Large language models can generate human-like text and assist with tasks such as summarization, translation, and code generation."
+"Photosynthesis allows plants to convert sunlight into chemical energy, producing oxygen as a byproduct."
+"Kyoto is known for its ancient temples, traditional wooden houses, and beautiful seasonal landscapes."
+"Strong branding helps companies build customer trust and differentiate themselves in competitive markets."
+"Regular exercise improves cardiovascular health, increases muscle strength, and reduces stress levels."
+"Active learning encourages students to participate, discuss ideas, and apply knowledge rather than passively listen."
+"Sourdough bread develops its unique flavor through natural fermentation using wild yeast and lactic acid bacteria."
+"Reducing plastic waste requires better recycling systems and increased use of biodegradable materials."
+"Impressionist painters focused on capturing light and movement rather than creating precise, realistic details."
+"Basketball requires teamwork, quick decision-making, and precise coordination between players on the court."
+)
+
+for text in "${texts[@]}"
+do
+    echo "→ Adding:"
+    echo "   \"$text\""
+    python3 kv_vector.py --add "$text"
+    echo ""
+done
+
+echo "=== Done: All demo texts added ==="