Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
f4cc018
embedding api improvements
ricopinazo Aug 28, 2025
8bf8e8c
disable embeddings by default
ricopinazo Aug 28, 2025
6fa9962
fix compilation error for enable_embeddings
ricopinazo Aug 28, 2025
3da0307
fix compilation error in main.rs
ricopinazo Aug 28, 2025
06640c0
sort milvus integration
ricopinazo Sep 5, 2025
2b5b5f0
lancedb vector storage implementation and rename score to distance
ricopinazo Sep 12, 2025
f906e94
like this almost compiles but not yet
ricopinazo Sep 16, 2025
976ff92
force lancedb to work with other version of chrono but still polars-a…
ricopinazo Sep 17, 2025
8e1975a
this seems to compile with all features but storage
ricopinazo Sep 17, 2025
e942d0e
put pometry-storage back into place
ricopinazo Sep 17, 2025
5eeb703
sort new multi-embedding vector cache
ricopinazo Sep 17, 2025
f637544
still some rust tests failing
ricopinazo Sep 19, 2025
8b999da
remove outdated comment
ricopinazo Sep 19, 2025
98f2555
all rust tests now passing with the new custom openai server
ricopinazo Sep 24, 2025
b1c66c7
some compilation errors caused by teh server future not being Sync
ricopinazo Sep 26, 2025
9952530
fixing some python tests
ricopinazo Sep 29, 2025
d7fe667
wip
ricopinazo Oct 10, 2025
f0af2a9
trying to avoid the drop of the tempdir but still not working
ricopinazo Oct 17, 2025
78e7701
fix compilation error
ricopinazo Oct 17, 2025
3701786
fix bug caused by a temp dir being dropped too soon
ricopinazo Oct 23, 2025
705d588
fix python tests
ricopinazo Oct 24, 2025
ed89cea
Merge branch 'master' into embedding-api
ricopinazo Oct 24, 2025
f4ba9cc
fix dependency conflicts
ricopinazo Oct 24, 2025
857c5ab
format
ricopinazo Oct 24, 2025
656b314
fix rust test
ricopinazo Oct 27, 2025
517c322
change rust version
ricopinazo Oct 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4,184 changes: 3,250 additions & 934 deletions Cargo.lock

Large diffs are not rendered by default.

7 changes: 3 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ readme = "README.md"
homepage = "https://github.com/Raphtory/raphtory/"
keywords = ["graph", "temporal-graph", "temporal"]
authors = ["Pometry"]
rust-version = "1.86.0"
rust-version = "1.88.0"
edition = "2021"

# debug symbols are using a lot of resources
Expand Down Expand Up @@ -87,7 +87,7 @@ parking_lot = { version = "0.12.1", features = [
"send_guard",
] }
ordered-float = "4.2.0"
chrono = { version = "0.4.42", features = ["serde"] }
chrono = { version = "0.4.41", features = ["serde"] }
tempfile = "3.10.0"
futures-util = "0.3.30"
thiserror = "2.0.0"
Expand Down Expand Up @@ -154,6 +154,7 @@ minijinja = "2.2.0"
minijinja-contrib = { version = "2.2.0", features = ["datetime"] }
datafusion = { version = "50.0.0" }
arroy = "0.6.1"
lancedb = "0.22.2" # this is the latest and asks for chrono 0.4.41
heed = "0.22.0"
sqlparser = "0.58.0"
futures = "0.3"
Expand All @@ -170,5 +171,3 @@ indexmap = { version = "2.7.0", features = ["rayon"] }
fake = { version = "3.1.0", features = ["chrono"] }
strsim = { version = "0.11.1" }
uuid = { version = "1.16.0", features = ["v4"] }


26 changes: 26 additions & 0 deletions milvus/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
version: "3.9"

services:
milvus:
image: milvusdb/milvus:v2.6.1
container_name: milvus
command: ["milvus", "run", "standalone"]
security_opt:
- seccomp:unconfined
environment:
ETCD_USE_EMBED: "true"
COMMON_STORAGETYPE: "local"
DEPLOY_MODE: "STANDALONE"
ports:
- "9091:9091"
- "19530:19530"

attu:
image: zilliz/attu:v2.6
container_name: attu
environment:
MILVUS_URL: "http://milvus:19530"
ports:
- "8000:3000"
depends_on:
- milvus
64 changes: 38 additions & 26 deletions python/tests/test_base_install/test_vectors.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from time import sleep
import pytest
from raphtory import Graph
from raphtory.vectors import VectorisedGraph
from raphtory.vectors import VectorisedGraph, OpenAIEmbeddings, embedding_server

embedding_map = {
"raphtory": [1.0, 0.0, 0.0], # this is now needed,
"node1": [1.0, 0.0, 0.0],
"node2": [0.0, 1.0, 0.0],
"node3": [0.0, 0.0, 1.0],
Expand All @@ -11,20 +14,20 @@
"edge3": [0.0, 1.0, 1.0],
}


def single_embedding(text: str):
try:
@pytest.fixture(autouse=True)
def test_server():
@embedding_server(address="0.0.0.0:7340") # TODO: ask only for PORT!!!
def custom_embeddings(text: str):
return embedding_map[text]
except:
raise Exception(f"unexpected document content: {text}")


def embedding(texts):
return [single_embedding(text) for text in texts]
custom_embeddings.start()
sleep(1)
yield
custom_embeddings.stop()


def floats_are_equals(float1: float, float2: float) -> bool:
return float1 + 0.001 > float2 and float1 - 0.001 < float2
return float1 + 0.00001 > float2 and float1 - 0.01 < float2


# the graph generated by this function looks like this:
Expand All @@ -48,26 +51,28 @@ def create_graph() -> VectorisedGraph:
g.add_edge(3, "node1", "node3", {"name": "edge2"})
g.add_edge(4, "node3", "node4", {"name": "edge3"})

vg = g.vectorise(embedding, nodes="{{ name }}", edges="{{ properties.name }}")
# FIXME: I should not need to write /v1?, change that in rust, so the route is /embeddings, not v1/embeddings
embeddings = OpenAIEmbeddings(api_base="http://localhost:7340/v1")
vg = g.vectorise(embeddings, nodes="{{ name }}", edges="{{ properties.name }}")

return vg


def test_selection():
vg = create_graph()

################################
selection = vg.empty_selection()
nodes_to_select = ["node1", "node2"]
edges_to_select = [("node1", "node2"), ("node1", "node3")]
selection = vg.empty_selection()
selection.add_nodes(nodes_to_select)
selection.add_edges(edges_to_select)
nodes = selection.nodes()
###########################
# ################################
# selection = vg.empty_selection()
# nodes_to_select = ["node1", "node2"]
# edges_to_select = [("node1", "node2"), ("node1", "node3")]
# selection = vg.empty_selection()
# selection.add_nodes(nodes_to_select)
# selection.add_edges(edges_to_select)
# nodes = selection.nodes()
# ###########################

assert len(vg.empty_selection().get_documents()) == 0
assert len(vg.empty_selection().get_documents_with_scores()) == 0
assert len(vg.empty_selection().get_documents_with_distances()) == 0

nodes_to_select = ["node1", "node2"]
edges_to_select = [("node1", "node2"), ("node1", "node3")]
Expand All @@ -77,7 +82,9 @@ def test_selection():
nodes = selection.nodes()
node_names_returned = [node.name for node in nodes]
assert node_names_returned == nodes_to_select
print("before get documents")
docs = [doc.content for doc in selection.get_documents()]
print("after get documents")
assert docs == ["node1", "node2"]

selection = vg.empty_selection()
Expand Down Expand Up @@ -113,8 +120,8 @@ def test_search():
assert edge_names_returned == [("node1", "node2")]
# TODO: same for edges ?

[(doc1, score1)] = vg.entities_by_similarity("node1", 1).get_documents_with_scores()
assert floats_are_equals(score1, 1.0)
[(doc1, distance1)] = vg.entities_by_similarity("node1", 1).get_documents_with_distances()
assert floats_are_equals(distance1, 0.0)
assert (doc1.entity.name, doc1.content) == ("node1", "node1")

# chained search
Expand Down Expand Up @@ -205,16 +212,19 @@ def test_filtering_by_entity_type():
assert contents == ["edge1", "edge2", "edge3"]


def constant_embedding(texts):
return [[1.0, 0.0, 0.0] for text in texts]

@embedding_server(address="0.0.0.0:7341")
def constant_embedding(_text):
return [1.0, 0.0, 0.0]

def test_default_template():
g = Graph()
g.add_node(1, "node1")
g.add_edge(2, "node1", "node1")

vg = g.vectorise(constant_embedding)
constant_embedding.start()

vg = g.vectorise(OpenAIEmbeddings(api_base="http://localhost:7341/v1"))

node_docs = vg.nodes_by_similarity(query="whatever", limit=10).get_documents()
assert len(node_docs) == 1
Expand All @@ -226,3 +236,5 @@ def test_default_template():
edge_docs[0].content
== "There is an edge from node1 to node1 with events at:\n- Jan 1 1970 00:00\n"
)

constant_embedding.stop()
2 changes: 1 addition & 1 deletion raphtory-benchmark/src/common/vectors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub fn create_graph_for_vector_bench(size: usize) -> Graph {
}

pub async fn vectorise_graph_for_bench_async(graph: Graph) -> VectorisedGraph<Graph> {
let cache = VectorCache::in_memory(embedding_model);
let cache = VectorCache::in_memory(embedding_model).await.unwrap();
let template = DocumentTemplate {
node_template: Some("{{name}}".to_owned()),
edge_template: None,
Expand Down
93 changes: 35 additions & 58 deletions raphtory-graphql/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,29 @@ use itertools::Itertools;
use moka::future::Cache;
use raphtory::{
db::api::view::MaterializedGraph,
errors::{GraphError, InvalidPathReason},
errors::{GraphError, GraphResult, InvalidPathReason},
prelude::CacheOps,
vectors::{
cache::VectorCache, template::DocumentTemplate, vectorisable::Vectorisable,
cache::{CachedEmbeddingModel, VectorCache},
template::DocumentTemplate,
vectorisable::Vectorisable,
vectorised_graph::VectorisedGraph,
},
};
use std::{
collections::HashMap,
path::{Path, PathBuf},
sync::Arc,
};
use tokio::fs;
use tracing::{error, warn};
use walkdir::WalkDir;

#[derive(Clone)]
pub struct EmbeddingConf {
pub(crate) cache: VectorCache,
pub(crate) global_template: Option<DocumentTemplate>,
pub(crate) individual_templates: HashMap<PathBuf, DocumentTemplate>,
}
// #[derive(Clone)]
// pub struct EmbeddingConf {
// pub(crate) cache: VectorCache,
// // pub(crate) global_template: Option<DocumentTemplate>,
// // pub(crate) individual_templates: HashMap<PathBuf, DocumentTemplate>,
// }

pub(crate) fn get_relative_path(
work_dir: PathBuf,
Expand All @@ -54,14 +55,14 @@ pub(crate) fn get_relative_path(

#[derive(Clone)]
pub struct Data {
pub(crate) work_dir: PathBuf,
pub(crate) work_dir: PathBuf, // TODO: move this to config?
cache: Cache<PathBuf, GraphWithVectors>,
pub(crate) create_index: bool,
pub(crate) embedding_conf: Option<EmbeddingConf>,
pub(crate) create_index: bool, // TODO: move this to config?
pub(crate) vector_cache: VectorCache,
}

impl Data {
pub fn new(work_dir: &Path, configs: &AppConfig) -> Self {
pub async fn new(work_dir: &Path, configs: &AppConfig) -> GraphResult<Self> {
let cache_configs = &configs.cache;

let cache = Cache::<PathBuf, GraphWithVectors>::builder()
Expand All @@ -80,12 +81,14 @@ impl Data {
#[cfg(not(feature = "search"))]
let create_index = false;

Self {
// TODO: make vector feature optional?

Ok(Self {
work_dir: work_dir.to_path_buf(),
cache,
create_index,
embedding_conf: Default::default(),
}
vector_cache: VectorCache::on_disk(&work_dir.join(".vector-cache")).await?, // FIXME: need to disable graph names starting with a dot
})
}

pub async fn get_graph(
Expand Down Expand Up @@ -116,8 +119,8 @@ impl Data {
let folder_clone = folder.clone();
let graph_clone = graph.clone();
blocking_io(move || graph_clone.cache(folder_clone)).await?;
let vectors = self.vectorise(graph.clone(), &folder).await;
let graph = GraphWithVectors::new(graph, vectors);
// let vectors = self.vectorise(graph.clone(), &folder).await;
let graph = GraphWithVectors::new(graph, None);
graph
.folder
.get_or_try_init(|| Ok::<_, GraphError>(folder.into()))?;
Expand All @@ -134,23 +137,16 @@ impl Data {
Ok(())
}

fn resolve_template(&self, graph: &Path) -> Option<&DocumentTemplate> {
let conf = self.embedding_conf.as_ref()?;
conf.individual_templates
.get(graph)
.or(conf.global_template.as_ref())
}

async fn vectorise_with_template(
&self,
graph: MaterializedGraph,
folder: &ValidGraphFolder,
template: &DocumentTemplate,
model: CachedEmbeddingModel,
) -> Option<VectorisedGraph<MaterializedGraph>> {
let conf = self.embedding_conf.as_ref()?;
let vectors = graph
.vectorise(
conf.cache.clone(),
model,
template.clone(),
Some(&folder.get_vectors_path()),
true, // verbose
Expand All @@ -166,34 +162,15 @@ impl Data {
}
}

async fn vectorise(
pub(crate) async fn vectorise_folder(
&self,
graph: MaterializedGraph,
folder: &ValidGraphFolder,
) -> Option<VectorisedGraph<MaterializedGraph>> {
let template = self.resolve_template(folder.get_original_path())?;
self.vectorise_with_template(graph, folder, template).await
}

async fn vectorise_folder(&self, folder: &ExistingGraphFolder) -> Option<()> {
// it's important that we check if there is a valid template set for this graph path
// before actually loading the graph, otherwise we are loading the graph for no reason
let template = self.resolve_template(folder.get_original_path())?;
let graph = self
.read_graph_from_folder(folder.clone())
.await
.ok()?
.graph;
self.vectorise_with_template(graph, folder, template).await;
Some(())
}

pub(crate) async fn vectorise_all_graphs_that_are_not(&self) -> Result<(), GraphError> {
for folder in self.get_all_graph_folders() {
if !folder.get_vectors_path().exists() {
self.vectorise_folder(&folder).await;
}
}
folder: &ExistingGraphFolder,
template: &DocumentTemplate,
model: CachedEmbeddingModel,
) -> GraphResult<()> {
let graph = self.read_graph_from_folder(folder.clone()).await?.graph;
self.vectorise_with_template(graph, folder, template, model)
.await;
Ok(())
}

Expand All @@ -216,9 +193,9 @@ impl Data {
&self,
folder: ExistingGraphFolder,
) -> Result<GraphWithVectors, GraphError> {
let cache = self.embedding_conf.as_ref().map(|conf| conf.cache.clone());
let create_index = self.create_index;
blocking_io(move || GraphWithVectors::read_from_folder(&folder, cache, create_index)).await
GraphWithVectors::read_from_folder(&folder, &self.vector_cache, create_index).await
// FIXME: I need some blocking_io inside of GraphWithVectors::read_from_folder
}
}

Expand Down Expand Up @@ -276,12 +253,12 @@ pub(crate) mod data_tests {
File::create(path.join("graph")).unwrap();
}

pub(crate) fn save_graphs_to_work_dir(
pub(crate) async fn save_graphs_to_work_dir(
work_dir: &Path,
graphs: &HashMap<String, MaterializedGraph>,
) -> Result<(), GraphError> {
for (name, graph) in graphs.into_iter() {
let data = Data::new(work_dir, &AppConfig::default());
let data = Data::new(work_dir, &AppConfig::default()).await?;
let folder = ValidGraphFolder::try_from(data.work_dir, name)?;

#[cfg(feature = "storage")]
Expand Down
16 changes: 0 additions & 16 deletions raphtory-graphql/src/embeddings.rs

This file was deleted.

Loading
Loading