From b971def1f18c9ea3149c3355856b78306dce0f5e Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 17 Nov 2025 14:54:16 +0100 Subject: [PATCH 01/39] attempt at fixing the graph writing logic for windows --- raphtory-graphql/src/data.rs | 28 +++++++++------------------- raphtory-graphql/src/paths.rs | 8 ++++++-- raphtory/src/errors.rs | 2 ++ 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index 8205a1abc5..9b9be70909 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -21,6 +21,7 @@ use std::{ path::{Path, PathBuf}, sync::Arc, }; +use std::ffi::OsStr; use tokio::fs; use tracing::warn; use walkdir::WalkDir; @@ -282,28 +283,17 @@ impl Data { /// Serializes a graph to disk, overwriting any existing data in its folder. fn encode_graph_to_disk(graph: GraphWithVectors) -> Result<(), GraphError> { let folder_path = graph.folder.get_base_path(); + let bak_name = "_".to_string() + folder_path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + let bak_path = folder_path.with_file_name(bak_name); - // Create a backup of the existing folder + // Write to backup path first + graph.graph.encode(&bak_path)?; + if folder_path.exists() { - let bak_path = folder_path.with_extension("bak"); - - // Remove any old backups - if bak_path.exists() { - std::fs::remove_dir_all(&bak_path)?; - } - - std::fs::rename(&folder_path, &bak_path)?; - } - - // Serialize the graph to the original folder path - graph.graph.encode(&folder_path)?; - - // Delete the backup on success - let bak_path = folder_path.with_extension("bak"); - - if bak_path.exists() { - std::fs::remove_dir_all(&bak_path)?; + // delete old data + std::fs::remove_dir_all(folder_path)?; } + std::fs::rename(&bak_path, folder_path)?; Ok(()) } diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index aa21703882..a4a059e357 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -125,8 +125,12 @@ pub(crate) fn valid_path( } full_path.push(component); //check if the path with the component is a graph - if namespace && full_path.join(META_PATH).exists() { - return Err(InvalidPathReason::ParentIsGraph(user_facing_path)); + if full_path.join(META_PATH).exists() { + if namespace { + return Err(InvalidPathReason::ParentIsGraph(user_facing_path)); + } else if component.to_str().ok_or(InvalidPathReason::NonUTFCharacters)?.starts_with("_") { + return Err(InvalidPathReason::GraphNamePrefix) + } } //check for symlinks if full_path.is_symlink() { diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index f7d790b29b..98dbbcb75f 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -54,6 +54,8 @@ pub enum InvalidPathReason { PathNotParsable(PathBuf), #[error("The path to the graph contains a subpath to an existing graph: {0}")] ParentIsGraph(PathBuf), + #[error("Graph name cannot start with _")] + GraphNamePrefix, #[error("The path provided does not exists as a namespace: {0}")] NamespaceDoesNotExist(String), #[error("The path provided contains non-UTF8 characters.")] From 9b0d94f0b9f32426f6b8d27e5f27569e4277ca5d Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 17 Nov 2025 15:13:00 +0100 Subject: [PATCH 02/39] fmt --- raphtory-graphql/src/data.rs | 10 +++++++--- raphtory-graphql/src/paths.rs | 8 ++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index 9b9be70909..f17a40c897 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -17,11 +17,11 @@ use raphtory::{ }; use std::{ collections::HashMap, + ffi::OsStr, io::{Read, Seek}, path::{Path, PathBuf}, sync::Arc, }; -use std::ffi::OsStr; use tokio::fs; use tracing::warn; use walkdir::WalkDir; @@ -283,12 +283,16 @@ impl Data { /// Serializes a graph to disk, overwriting any existing data in its folder. fn encode_graph_to_disk(graph: GraphWithVectors) -> Result<(), GraphError> { let folder_path = graph.folder.get_base_path(); - let bak_name = "_".to_string() + folder_path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + let bak_name = "_".to_string() + + folder_path + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or(""); let bak_path = folder_path.with_file_name(bak_name); // Write to backup path first graph.graph.encode(&bak_path)?; - + if folder_path.exists() { // delete old data std::fs::remove_dir_all(folder_path)?; diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index a4a059e357..c2d7366719 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -128,8 +128,12 @@ pub(crate) fn valid_path( if full_path.join(META_PATH).exists() { if namespace { return Err(InvalidPathReason::ParentIsGraph(user_facing_path)); - } else if component.to_str().ok_or(InvalidPathReason::NonUTFCharacters)?.starts_with("_") { - return Err(InvalidPathReason::GraphNamePrefix) + } else if component + .to_str() + .ok_or(InvalidPathReason::NonUTFCharacters)? + .starts_with("_") + { + return Err(InvalidPathReason::GraphNamePrefix); } } //check for symlinks From 3e578bb17882592bcf063f3f53d7d48766e3f4ee Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Tue, 18 Nov 2025 09:26:30 +0100 Subject: [PATCH 03/39] It should never be possible for a graph to be de-cached while it is being mutated. If we allow that to happen, reinserting it doesn't fix anything as the graph might already have been re-cached as well and state is inconsistent. --- raphtory-graphql/src/data.rs | 2 - raphtory-graphql/src/graph.rs | 15 ++- .../src/model/graph/mutable_graph.rs | 92 ++++--------------- raphtory-graphql/src/model/mod.rs | 8 +- 4 files changed, 30 insertions(+), 87 deletions(-) diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index f17a40c897..fd1d6ffc21 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -17,7 +17,6 @@ use raphtory::{ }; use std::{ collections::HashMap, - ffi::OsStr, io::{Read, Seek}, path::{Path, PathBuf}, sync::Arc, @@ -54,7 +53,6 @@ pub(crate) fn get_relative_path( Ok(path_str) } -#[derive(Clone)] pub struct Data { pub(crate) work_dir: PathBuf, cache: Cache, diff --git a/raphtory-graphql/src/graph.rs b/raphtory-graphql/src/graph.rs index 12b5b63902..e0c041e1da 100644 --- a/raphtory-graphql/src/graph.rs +++ b/raphtory-graphql/src/graph.rs @@ -1,24 +1,17 @@ -use std::sync::{ - atomic::{AtomicBool, Ordering}, - Arc, -}; - use crate::paths::ExistingGraphFolder; -use once_cell::sync::OnceCell; use raphtory::{ core::entities::nodes::node_ref::AsNodeRef, db::{ api::view::{ internal::{ - InheritEdgeHistoryFilter, InheritNodeHistoryFilter, InheritStorageOps, - InternalStorageOps, Static, + InheritEdgeHistoryFilter, InheritNodeHistoryFilter, InheritStorageOps, Static, }, Base, InheritViewOps, MaterializedGraph, }, graph::{edge::EdgeView, node::NodeView, views::deletion_graph::PersistentGraph}, }, errors::{GraphError, GraphResult}, - prelude::{EdgeViewOps, Graph, NodeViewOps, StableDecode}, + prelude::{EdgeViewOps, Graph, StableDecode}, serialise::GraphFolder, vectors::{cache::VectorCache, vectorised_graph::VectorisedGraph}, }; @@ -26,6 +19,10 @@ use raphtory_api::GraphType; use raphtory_storage::{ core_ops::InheritCoreGraphOps, layer_ops::InheritLayerOps, mutation::InheritMutationOps, }; +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, +}; use tracing::info; #[cfg(feature = "search")] diff --git a/raphtory-graphql/src/model/graph/mutable_graph.rs b/raphtory-graphql/src/model/graph/mutable_graph.rs index 27c9f7b732..852c668b04 100644 --- a/raphtory-graphql/src/model/graph/mutable_graph.rs +++ b/raphtory-graphql/src/model/graph/mutable_graph.rs @@ -1,5 +1,4 @@ use crate::{ - data::Data, graph::{GraphWithVectors, UpdateEmbeddings}, model::graph::{edge::GqlEdge, graph::GqlGraph, node::GqlNode, property::Value}, paths::ExistingGraphFolder, @@ -116,15 +115,13 @@ pub struct EdgeAddition { pub struct GqlMutableGraph { path: ExistingGraphFolder, graph: GraphWithVectors, - data: Data, } impl GqlMutableGraph { - pub(crate) fn new(path: ExistingGraphFolder, graph: GraphWithVectors, data: Data) -> Self { + pub(crate) fn new(path: ExistingGraphFolder, graph: GraphWithVectors) -> Self { Self { path: path.into(), graph, - data, } } } @@ -152,9 +149,7 @@ impl GqlMutableGraph { /// Get mutable existing node. async fn node(&self, name: String) -> Option { - self.graph - .node(name) - .map(|n| GqlMutableNode::new(n, self.path.clone(), self.data.clone())) + self.graph.node(name).map(|n| GqlMutableNode::new(n)) } /// Add a new node or add updates to an existing node. @@ -179,11 +174,7 @@ impl GqlMutableGraph { self.post_mutation_ops().await; let _ = node.update_embeddings().await; - Ok(GqlMutableNode::new( - node, - self.path.clone(), - self.data.clone(), - )) + Ok(GqlMutableNode::new(node)) } /// Create a new node or fail if it already exists. @@ -208,11 +199,7 @@ impl GqlMutableGraph { self.post_mutation_ops().await; let _ = node.update_embeddings().await; - Ok(GqlMutableNode::new( - node, - self.path.clone(), - self.data.clone(), - )) + Ok(GqlMutableNode::new(node)) } /// Add a batch of nodes @@ -261,9 +248,7 @@ impl GqlMutableGraph { /// Get a mutable existing edge. async fn edge(&self, src: String, dst: String) -> Option { - self.graph - .edge(src, dst) - .map(|e| GqlMutableEdge::new(e, self.path.clone(), self.data.clone())) + self.graph.edge(src, dst).map(|e| GqlMutableEdge::new(e)) } /// Add a new edge or add updates to an existing edge. @@ -289,11 +274,7 @@ impl GqlMutableGraph { self.post_mutation_ops().await; let _ = edge.update_embeddings().await; - Ok(GqlMutableEdge::new( - edge, - self.path.clone(), - self.data.clone(), - )) + Ok(GqlMutableEdge::new(edge)) } /// Add a batch of edges @@ -358,11 +339,7 @@ impl GqlMutableGraph { self.post_mutation_ops().await; let _ = edge.update_embeddings().await; - Ok(GqlMutableEdge::new( - edge, - self.path.clone(), - self.data.clone(), - )) + Ok(GqlMutableEdge::new(edge)) } /// Add temporal properties to graph. @@ -439,13 +416,6 @@ impl GqlMutableGraph { /// Post mutation operations. async fn post_mutation_ops(&self) { self.graph.set_dirty(true); - - // Reinsert the graph into the cache to reset eviction priority. - // This prevents data loss if the graph is evicted after the mutation but before - // the `set_dirty` call. - self.data - .insert_graph_into_cache(self.path.get_original_path_str(), self.graph.clone()) - .await; } } @@ -453,17 +423,11 @@ impl GqlMutableGraph { #[graphql(name = "MutableNode")] pub struct GqlMutableNode { node: NodeView<'static, GraphWithVectors>, - path: ExistingGraphFolder, - data: Data, } impl GqlMutableNode { - pub fn new( - node: NodeView<'static, GraphWithVectors>, - path: ExistingGraphFolder, - data: Data, - ) -> Self { - Self { node, path, data } + pub fn new(node: NodeView<'static, GraphWithVectors>) -> Self { + Self { node } } } @@ -550,13 +514,6 @@ impl GqlMutableNode { /// Post mutation operations. async fn post_mutation_ops(&self) { self.node.graph.set_dirty(true); - - // Reinsert the graph into the cache to reset eviction priority. - // This prevents data loss if the graph is evicted after the mutation but before - // the `set_dirty` call. - self.data - .insert_graph_into_cache(self.path.get_original_path_str(), self.node.graph.clone()) - .await; } } @@ -564,13 +521,11 @@ impl GqlMutableNode { #[graphql(name = "MutableEdge")] pub struct GqlMutableEdge { edge: EdgeView, - path: ExistingGraphFolder, - data: Data, } impl GqlMutableEdge { - pub fn new(edge: EdgeView, path: ExistingGraphFolder, data: Data) -> Self { - Self { edge, path, data } + pub fn new(edge: EdgeView) -> Self { + Self { edge } } } @@ -588,12 +543,12 @@ impl GqlMutableEdge { /// Get the mutable source node of the edge. async fn src(&self) -> GqlMutableNode { - GqlMutableNode::new(self.edge.src(), self.path.clone(), self.data.clone()) + GqlMutableNode::new(self.edge.src()) } /// Get the mutable destination node of the edge. async fn dst(&self) -> GqlMutableNode { - GqlMutableNode::new(self.edge.dst(), self.path.clone(), self.data.clone()) + GqlMutableNode::new(self.edge.dst()) } /// Mark the edge as deleted at time time. @@ -694,13 +649,6 @@ impl GqlMutableEdge { /// Post mutation operations. async fn post_mutation_ops(&self) { self.edge.graph.set_dirty(true); - - // Reinsert the graph into the cache to reset eviction priority. - // This prevents data loss if the graph is evicted after the mutation but before - // the `set_dirty` call. - self.data - .insert_graph_into_cache(self.path.get_original_path_str(), self.edge.graph.clone()) - .await; } } @@ -740,7 +688,7 @@ mod tests { graph.into() } - async fn create_mutable_graph() -> (GqlMutableGraph, tempfile::TempDir) { + async fn create_mutable_graph() -> (GqlMutableGraph, Data, tempfile::TempDir) { let graph = create_test_graph(); let tmp_dir = tempdir().unwrap(); @@ -761,14 +709,14 @@ mod tests { data.insert_graph(folder, graph).await.unwrap(); let (graph_with_vectors, path) = data.get_graph("test_graph").await.unwrap(); - let mutable_graph = GqlMutableGraph::new(path, graph_with_vectors, data.clone()); + let mutable_graph = GqlMutableGraph::new(path, graph_with_vectors); - (mutable_graph, tmp_dir) + (mutable_graph, data, tmp_dir) } #[tokio::test] async fn test_add_nodes_empty_list() { - let (mutable_graph, _tmp_dir) = create_mutable_graph().await; + let (mutable_graph, _data, _tmp_dir) = create_mutable_graph().await; let nodes = vec![]; let result = mutable_graph.add_nodes(nodes).await; @@ -780,7 +728,7 @@ mod tests { #[tokio::test] #[ignore = "TODO: #2384"] async fn test_add_nodes_simple() { - let (mutable_graph, _tmp_dir) = create_mutable_graph().await; + let (mutable_graph, _data, _tmp_dir) = create_mutable_graph().await; let nodes = vec![ NodeAddition { @@ -825,7 +773,7 @@ mod tests { #[tokio::test] #[ignore = "TODO: #2384"] async fn test_add_nodes_with_properties() { - let (mutable_graph, _tmp_dir) = create_mutable_graph().await; + let (mutable_graph, _data, _tmp_dir) = create_mutable_graph().await; let nodes = vec![ NodeAddition { @@ -897,7 +845,7 @@ mod tests { #[tokio::test] #[ignore = "TODO: #2384"] async fn test_add_edges_simple() { - let (mutable_graph, _tmp_dir) = create_mutable_graph().await; + let (mutable_graph, _data, _tmp_dir) = create_mutable_graph().await; // First add some nodes. let nodes = vec![ diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index 85b724927c..5ab20774c1 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -20,7 +20,7 @@ use dynamic_graphql::{ }; use raphtory::{ db::{ - api::view::{internal::InternalStorageOps, MaterializedGraph}, + api::view::{ MaterializedGraph}, graph::views::deletion_graph::PersistentGraph, }, errors::{GraphError, InvalidPathReason}, @@ -30,8 +30,8 @@ use raphtory::{ }; use std::{ error::Error, - fmt::{Display, Formatter}, - path::PathBuf, + fmt::{Display, Formatter} + , sync::Arc, }; @@ -113,7 +113,7 @@ impl QueryRoot { let graph = data .get_graph(path.as_ref()) .await - .map(|(g, folder)| GqlMutableGraph::new(folder, g, data.clone()))?; + .map(|(g, folder)| GqlMutableGraph::new(folder, g))?; Ok(graph) } From ada3afb55956f8b87fd6639600b65fd49ebe0ffa Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Tue, 18 Nov 2025 10:36:46 +0100 Subject: [PATCH 04/39] fmt --- raphtory-graphql/src/model/mod.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index 5ab20774c1..87f7e98462 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -19,10 +19,7 @@ use dynamic_graphql::{ Result, Upload, }; use raphtory::{ - db::{ - api::view::{ MaterializedGraph}, - graph::views::deletion_graph::PersistentGraph, - }, + db::{api::view::MaterializedGraph, graph::views::deletion_graph::PersistentGraph}, errors::{GraphError, InvalidPathReason}, prelude::*, serialise::*, @@ -30,8 +27,7 @@ use raphtory::{ }; use std::{ error::Error, - fmt::{Display, Formatter} - , + fmt::{Display, Formatter}, sync::Arc, }; From fbde175674402e07514b4fe2b6a3eb001a25198c Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 1 Dec 2025 13:13:16 +0100 Subject: [PATCH 05/39] initial implementation of dirty path tracking --- raphtory-graphql/src/data.rs | 338 +++++---- raphtory-graphql/src/graph.rs | 20 +- raphtory-graphql/src/lib.rs | 55 +- raphtory-graphql/src/model/graph/graph.rs | 18 +- .../src/model/graph/mutable_graph.rs | 40 +- raphtory-graphql/src/model/mod.rs | 80 +-- raphtory-graphql/src/paths.rs | 678 +++++++++++++++--- raphtory/src/serialise/graph_folder.rs | 10 +- raphtory/src/serialise/metadata.rs | 16 + 9 files changed, 905 insertions(+), 350 deletions(-) diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index fd1d6ffc21..56ceb03581 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -2,14 +2,22 @@ use crate::{ config::app_config::AppConfig, graph::GraphWithVectors, model::blocking_io, - paths::{valid_path, ExistingGraphFolder, ValidGraphFolder}, + paths::{ + mark_dirty, read_data_path, valid_path, valid_relative_graph_path, ExistingGraphFolder, + InternalPathValidationError, PathValidationError, ValidGraphFolder, WriteableGraphFolder, + }, + rayon::blocking_compute, + GQLError, + GQLError::Insertion, }; -use itertools::Itertools; +use futures_util::FutureExt; +use itertools::{fold, Itertools}; use moka::future::Cache; use raphtory::{ db::api::view::{internal::InternalStorageOps, MaterializedGraph}, errors::{GraphError, InvalidPathReason}, prelude::StableEncode, + serialise::{GraphFolder, META_PATH}, vectors::{ cache::VectorCache, template::DocumentTemplate, vectorisable::Vectorisable, vectorised_graph::VectorisedGraph, @@ -17,14 +25,19 @@ use raphtory::{ }; use std::{ collections::HashMap, - io::{Read, Seek}, + fs, + fs::File, + io, + io::{ErrorKind, Read, Seek}, path::{Path, PathBuf}, sync::Arc, }; -use tokio::fs; -use tracing::warn; +use tempfile::{spooled_tempfile_in, tempfile_in, NamedTempFile}; +use tracing::{error, warn}; use walkdir::WalkDir; +pub const DIRTY_PATH: &'static str = ".dirty"; + #[derive(Clone)] pub struct EmbeddingConf { pub(crate) cache: VectorCache, @@ -32,6 +45,73 @@ pub struct EmbeddingConf { pub(crate) individual_templates: HashMap, } +#[derive(thiserror::Error, Debug)] +enum MutationErrorInner { + #[error(transparent)] + GraphError(#[from] GraphError), + #[error(transparent)] + IO(#[from] io::Error), + #[error(transparent)] + InvalidInternal(#[from] InternalPathValidationError), +} + +#[derive(thiserror::Error, Debug)] +pub enum InsertionError { + #[error("Failed to insert graph {graph}: {error}")] + Insertion { + graph: String, + error: MutationErrorInner, + }, + #[error(transparent)] + PathValidation(#[from] PathValidationError), + #[error("Failed to insert graph {graph}: {error}")] + GraphError { graph: String, error: GraphError }, +} + +impl InsertionError { + pub fn from_inner(graph: &str, error: MutationErrorInner) -> Self { + InsertionError::Insertion { + graph: graph.to_string(), + error, + } + } + + pub fn from_graph_err(graph: &str, error: GraphError) -> Self { + InsertionError::GraphError { + graph: graph.to_string(), + error, + } + } +} + +#[derive(thiserror::Error, Debug)] +pub enum DeletionError { + #[error("Failed to delete graph {graph}: {error}")] + Insertion { + graph: String, + error: MutationErrorInner, + }, + #[error(transparent)] + PathValidation(#[from] PathValidationError), +} + +#[derive(thiserror::Error, Debug)] +pub enum MoveError { + #[error("Failed to move graph: {0}")] + Insertion(#[from] InsertionError), + #[error("Failed to move graph: {0}")] + Deletion(#[from] DeletionError), +} + +impl DeletionError { + fn from_inner(graph: &str, error: MutationErrorInner) -> Self { + DeletionError::Insertion { + graph: graph.to_string(), + error, + } + } +} + pub(crate) fn get_relative_path( work_dir: PathBuf, path: &Path, @@ -55,7 +135,7 @@ pub(crate) fn get_relative_path( pub struct Data { pub(crate) work_dir: PathBuf, - cache: Cache, + cache: Cache, pub(crate) create_index: bool, pub(crate) embedding_conf: Option, } @@ -64,23 +144,23 @@ impl Data { pub fn new(work_dir: &Path, configs: &AppConfig) -> Self { let cache_configs = &configs.cache; - let cache = Cache::::builder() + let cache = Cache::::builder() .max_capacity(cache_configs.capacity) .time_to_idle(std::time::Duration::from_secs(cache_configs.tti_seconds)) - .eviction_listener(|_, graph, cause| { + .async_eviction_listener(|_, graph, cause| { // The eviction listener gets called any time a graph is removed from the cache, // not just when it is evicted. Only serialize on evictions. - if !cause.was_evicted() { - return; - } - - // On eviction, serialize graphs that don't have underlying storage. - // FIXME: don't have currently a way to know which embedding updates are pending - if !graph.graph.disk_storage_enabled() && graph.is_dirty() { - if let Err(e) = Self::encode_graph_to_disk(graph.clone()) { - warn!("Error encoding graph to disk on eviction: {e}"); + async move { + if !cause.was_evicted() { + return; + } + if let Err(e) = + blocking_compute(move || graph.folder.write_graph_data(graph.graph)).await + { + error!("Error encoding graph to disk on eviction: {e}"); } } + .boxed() }) .build(); @@ -97,97 +177,90 @@ impl Data { } } - pub async fn get_graph( + pub fn validate_path_for_insert( &self, path: &str, - ) -> Result<(GraphWithVectors, ExistingGraphFolder), Arc> { - let graph_folder = ExistingGraphFolder::try_from(self.work_dir.clone(), path)?; - let graph_folder_clone = graph_folder.clone(); - self.cache - .try_get_with(path.into(), self.read_graph_from_folder(graph_folder_clone)) - .await - .map(|graph| (graph, graph_folder)) + overwrite: bool, + ) -> Result { + if overwrite { + WriteableGraphFolder::try_existing_or_new(self.work_dir.clone(), path) + } else { + WriteableGraphFolder::try_new(self.work_dir.clone(), path) + } } - pub async fn has_graph(&self, path: &str) -> bool { - ExistingGraphFolder::try_from(self.work_dir.clone(), path).is_ok() + pub async fn get_graph(&self, path: &str) -> Result> { + self.cache + .try_get_with(path.into(), self.read_graph_from_disk(path)) + .await } - pub fn validate_path_for_insert( - &self, - path: &str, - overwrite: bool, - ) -> Result { - let folder = ValidGraphFolder::try_from(self.work_dir.clone(), path)?; - - match ExistingGraphFolder::try_from(self.work_dir.clone(), path) { - Ok(_) => { - if overwrite { - Ok(folder) - } else { - Err(GraphError::GraphNameAlreadyExists(folder.to_error_path())) - } - } - Err(_) => Ok(folder), - } + pub fn has_graph(&self, path: &str) -> bool { + self.cache.contains_key(path) + || ExistingGraphFolder::try_from(self.work_dir.clone(), path).is_ok() } pub async fn insert_graph( &self, - folder: ValidGraphFolder, + writeable_folder: WriteableGraphFolder, graph: MaterializedGraph, - ) -> Result<(), GraphError> { - let vectors = self.vectorise(graph.clone(), &folder).await; - let graph = GraphWithVectors::new(graph, vectors, folder.clone().into()); - - let graph_clone = graph.clone(); - let folder_clone = folder.clone(); - - blocking_io(move || { - // Graphs with underlying storage already write data to disk. - // They just need to write metadata, primarily to infer the graph type. - // Graphs without storage are encoded to the folder. - if graph_clone.disk_storage_enabled() { - folder_clone.write_metadata(&graph_clone)?; - } else { - Self::encode_graph_to_disk(graph_clone)?; - } - - Ok::<(), GraphError>(()) + ) -> Result<(), InsertionError> { + let vectors = self.vectorise(graph.clone(), &writeable_folder).await; + let graph = blocking_compute(move || { + writeable_folder.write_graph_data(graph.clone())?; + let folder = writeable_folder.finish()?; + let graph = GraphWithVectors::new(graph, vectors, folder.as_existing()?); + Ok::<_, InsertionError>(graph) }) .await?; - let path = folder.get_original_path_str(); - self.cache.insert(path.into(), graph).await; - + self.cache + .insert(graph.folder.local_path.clone(), graph) + .await; Ok(()) } - /// Insert a graph into the cache without writing to disk. - pub async fn insert_graph_into_cache(&self, path: &str, graph: GraphWithVectors) { - let path = path.into(); - self.cache.insert(path, graph).await; - } - /// Insert a graph serialized from a graph folder. - pub async fn insert_graph_as_bytes( + pub async fn insert_graph_as_bytes( &self, - folder: ValidGraphFolder, + folder: WriteableGraphFolder, bytes: R, - ) -> Result<(), GraphError> { - let path = folder.get_original_path_str(); - folder.unzip_to_folder(bytes)?; - - let existing_folder = ExistingGraphFolder::try_from(self.work_dir.clone(), path)?; - self.vectorise_folder(&existing_folder).await; + ) -> Result<(), InsertionError> { + let folder_clone = folder.clone(); + blocking_io(move || { + folder_clone + .data_path() + .unzip_to_folder(bytes) + .map_err(|err| { + InsertionError::from_graph_err(folder_clone.get_original_path_str(), err) + }) + }) + .await?; + self.vectorise_folder(folder.as_existing()?).await; + blocking_io(move || folder.finish()).await?; + Ok(()) + } + async fn delete_graph_inner( + &self, + graph_folder: ExistingGraphFolder, + ) -> Result<(), MutationErrorInner> { + blocking_io(move || { + mark_dirty(&graph_folder.path)?; + fs::remove_dir_all(&graph_folder.path)?; + fs::remove_file(graph_folder.data_folder.join(DIRTY_PATH))?; + Ok::<_, MutationErrorInner>(()) + }) + .await?; Ok(()) } - pub async fn delete_graph(&self, path: &str) -> Result<(), GraphError> { + pub async fn delete_graph(&self, path: &str) -> Result<(), DeletionError> { let graph_folder = ExistingGraphFolder::try_from(self.work_dir.clone(), path)?; - fs::remove_dir_all(graph_folder.get_base_path()).await?; - self.cache.remove(&PathBuf::from(path)).await; + self.delete_graph_inner(graph_folder) + .await + .map_err(|err| DeletionError::from_inner(path, err))?; + self.cache.remove(path).await; Ok(()) } @@ -232,72 +305,57 @@ impl Data { self.vectorise_with_template(graph, folder, template).await } - async fn vectorise_folder(&self, folder: &ExistingGraphFolder) -> Option<()> { + async fn vectorise_folder(&self, folder: ExistingGraphFolder) -> Option<()> { // it's important that we check if there is a valid template set for this graph path // before actually loading the graph, otherwise we are loading the graph for no reason let template = self.resolve_template(folder.get_original_path())?; let graph = self - .read_graph_from_folder(folder.clone()) + .read_graph_from_disk_inner(folder.clone()) .await .ok()? .graph; - self.vectorise_with_template(graph, folder, template).await; + self.vectorise_with_template(graph, &folder, template).await; Some(()) } pub(crate) async fn vectorise_all_graphs_that_are_not(&self) -> Result<(), GraphError> { for folder in self.get_all_graph_folders() { - if !folder.get_vectors_path().exists() { - self.vectorise_folder(&folder).await; + if !folder.data_path().get_vectors_path().exists() { + self.vectorise_folder(folder).await; } } Ok(()) } // TODO: return iter - pub fn get_all_graph_folders(&self) -> Vec { + pub fn get_all_graph_folders(&self) -> impl Iterator { let base_path = self.work_dir.clone(); WalkDir::new(&self.work_dir) .into_iter() - .filter_map(|e| { + .filter_map(move |e| { let entry = e.ok()?; let path = entry.path(); let relative = get_relative_path(base_path.clone(), path, false).ok()?; let folder = ExistingGraphFolder::try_from(base_path.clone(), &relative).ok()?; Some(folder) }) - .collect() } - async fn read_graph_from_folder( + async fn read_graph_from_disk_inner( &self, folder: ExistingGraphFolder, - ) -> Result { + ) -> Result { let cache = self.embedding_conf.as_ref().map(|conf| conf.cache.clone()); let create_index = self.create_index; - blocking_io(move || GraphWithVectors::read_from_folder(&folder, cache, create_index)).await + Ok( + blocking_io(move || GraphWithVectors::read_from_folder(&folder, cache, create_index)) + .await?, + ) } - /// Serializes a graph to disk, overwriting any existing data in its folder. - fn encode_graph_to_disk(graph: GraphWithVectors) -> Result<(), GraphError> { - let folder_path = graph.folder.get_base_path(); - let bak_name = "_".to_string() - + folder_path - .file_name() - .and_then(|n| n.to_str()) - .unwrap_or(""); - let bak_path = folder_path.with_file_name(bak_name); - - // Write to backup path first - graph.graph.encode(&bak_path)?; - - if folder_path.exists() { - // delete old data - std::fs::remove_dir_all(folder_path)?; - } - std::fs::rename(&bak_path, folder_path)?; - - Ok(()) + async fn read_graph_from_disk(&self, path: &str) -> Result { + let folder = ExistingGraphFolder::try_from(self.work_dir.clone(), path)?; + self.read_graph_from_disk_inner(folder).await } } @@ -305,10 +363,8 @@ impl Drop for Data { fn drop(&mut self) { // On drop, serialize graphs that don't have underlying storage. for (_, graph) in self.cache.iter() { - if !graph.graph.disk_storage_enabled() && graph.is_dirty() { - if let Err(e) = Self::encode_graph_to_disk(graph.clone()) { - warn!("Error encoding graph to disk on drop: {e}"); - } + if let Err(e) = graph.folder.write_graph_data(graph.graph) { + error!("Error encoding graph to disk on drop: {e}"); } } } @@ -316,15 +372,11 @@ impl Drop for Data { #[cfg(test)] pub(crate) mod data_tests { - use super::ValidGraphFolder; - use crate::{ - config::app_config::{AppConfig, AppConfigBuilder}, - data::Data, - }; + use super::InsertionError; + use crate::{config::app_config::AppConfigBuilder, data::Data}; use itertools::Itertools; use raphtory::{ db::api::view::{internal::InternalStorageOps, MaterializedGraph}, - errors::GraphError, prelude::*, }; use std::{collections::HashMap, fs, path::Path, time::Duration}; @@ -336,14 +388,13 @@ pub(crate) mod data_tests { graph.encode(path).unwrap(); } - pub(crate) fn save_graphs_to_work_dir( - work_dir: &Path, + pub(crate) async fn save_graphs_to_work_dir( + data: &Data, graphs: &HashMap, - ) -> Result<(), GraphError> { + ) -> Result<(), InsertionError> { for (name, graph) in graphs.into_iter() { - let data = Data::new(work_dir, &AppConfig::default()); - let folder = ValidGraphFolder::try_from(data.work_dir.clone(), name)?; - graph.encode(folder)?; + let folder = data.validate_path_for_insert(name, true)?; + data.insert_graph(folder, graph.clone()).await?; } Ok(()) } @@ -366,11 +417,10 @@ pub(crate) mod data_tests { let mut graphs = HashMap::new(); graphs.insert("test_g".to_string(), graph); - - save_graphs_to_work_dir(tmp_work_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_work_dir.path(), &Default::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); + for graph in graphs.keys() { assert!(data.get_graph(graph).await.is_ok(), "could not get {graph}") } @@ -398,21 +448,21 @@ pub(crate) mod data_tests { let data = Data::new(tmp_work_dir.path(), &configs); - assert!(!data.cache.contains_key(Path::new("test_g"))); - assert!(!data.cache.contains_key(Path::new("test_g2"))); + assert!(!data.cache.contains_key("test_g")); + assert!(!data.cache.contains_key("test_g2")); // Test size based eviction data.get_graph("test_g2").await.unwrap(); - assert!(data.cache.contains_key(Path::new("test_g2"))); - assert!(!data.cache.contains_key(Path::new("test_g"))); + assert!(data.cache.contains_key("test_g2")); + assert!(!data.cache.contains_key("test_g")); data.get_graph("test_g").await.unwrap(); // wait for any eviction data.cache.run_pending_tasks().await; assert_eq!(data.cache.iter().count(), 1); sleep(Duration::from_secs(3)).await; - assert!(!data.cache.contains_key(Path::new("test_g"))); - assert!(!data.cache.contains_key(Path::new("test_g2"))); + assert!(!data.cache.contains_key("test_g")); + assert!(!data.cache.contains_key("test_g2")); } #[tokio::test] @@ -451,7 +501,7 @@ pub(crate) mod data_tests { let paths = data .get_all_graph_folders() .into_iter() - .map(|folder| folder.get_base_path().to_path_buf()) + .map(|folder| folder.0.path) .collect_vec(); assert_eq!(paths.len(), 5); @@ -510,8 +560,8 @@ pub(crate) mod data_tests { let data = Data::new(tmp_work_dir.path(), &configs); - let (loaded_graph1, _) = data.get_graph("test_graph1").await.unwrap(); - let (loaded_graph2, _) = data.get_graph("test_graph2").await.unwrap(); + let loaded_graph1 = data.get_graph("test_graph1").await.unwrap(); + let loaded_graph2 = data.get_graph("test_graph2").await.unwrap(); // TODO: This test doesn't work with disk storage right now, make sure modification dates actually update correctly! if loaded_graph1.graph.disk_storage_enabled() { @@ -595,7 +645,7 @@ pub(crate) mod data_tests { let data = Data::new(tmp_work_dir.path(), &configs); // Load first graph - let (loaded_graph1, _) = data.get_graph("test_graph1").await.unwrap(); + let loaded_graph1 = data.get_graph("test_graph1").await.unwrap(); assert!( !loaded_graph1.is_dirty(), "Graph1 should not be dirty when loaded from disk" @@ -610,7 +660,7 @@ pub(crate) mod data_tests { // Load second graph println!("Loading second graph"); - let (loaded_graph2, _) = data.get_graph("test_graph2").await.unwrap(); + let loaded_graph2 = data.get_graph("test_graph2").await.unwrap(); assert!( !loaded_graph2.is_dirty(), "Graph2 should not be dirty when loaded from disk" diff --git a/raphtory-graphql/src/graph.rs b/raphtory-graphql/src/graph.rs index e0c041e1da..e7f7b833f6 100644 --- a/raphtory-graphql/src/graph.rs +++ b/raphtory-graphql/src/graph.rs @@ -32,7 +32,7 @@ use raphtory::prelude::IndexMutationOps; pub struct GraphWithVectors { pub graph: MaterializedGraph, pub vectors: Option>, - pub(crate) folder: GraphFolder, + pub(crate) folder: ExistingGraphFolder, pub(crate) is_dirty: Arc, } @@ -40,12 +40,12 @@ impl GraphWithVectors { pub(crate) fn new( graph: MaterializedGraph, vectors: Option>, - folder: GraphFolder, + folder: ExistingGraphFolder, ) -> Self { Self { graph, vectors, - folder: folder, + folder, is_dirty: Arc::new(AtomicBool::new(false)), } } @@ -88,26 +88,26 @@ impl GraphWithVectors { create_index: bool, ) -> Result { let graph = { + let data_path = folder.data_path(); // Either decode a graph serialized using encode or load using underlying storage. - if MaterializedGraph::is_decodable(folder.get_graph_path()) { + if MaterializedGraph::is_decodable(data_path.get_graph_path()) { let path_for_decoded_graph = None; - MaterializedGraph::decode(folder.clone(), path_for_decoded_graph)? + MaterializedGraph::decode(data_path, path_for_decoded_graph)? } else { - let metadata = folder.read_metadata()?; + let metadata = data_path.read_metadata()?; let graph = match metadata.graph_type { GraphType::EventGraph => { - let graph = Graph::load_from_path(folder.get_graph_path()); + let graph = Graph::load_from_path(data_path.get_graph_path()); MaterializedGraph::EventGraph(graph) } GraphType::PersistentGraph => { - let graph = PersistentGraph::load_from_path(folder.get_graph_path()); + let graph = PersistentGraph::load_from_path(data_path.get_graph_path()); MaterializedGraph::PersistentGraph(graph) } }; #[cfg(feature = "search")] - graph.load_index(&folder)?; - + graph.load_index(&data_path)?; graph } }; diff --git a/raphtory-graphql/src/lib.rs b/raphtory-graphql/src/lib.rs index be5eab06c4..7aeb6975d6 100644 --- a/raphtory-graphql/src/lib.rs +++ b/raphtory-graphql/src/lib.rs @@ -1,4 +1,8 @@ pub use crate::server::GraphServer; +use crate::{data::InsertionError, paths::PathValidationError}; +use raphtory::errors::GraphError; +use std::{panic::Location, sync::Arc}; + mod auth; pub mod data; mod embeddings; @@ -15,6 +19,21 @@ pub mod config; pub mod python; pub mod rayon; +#[derive(thiserror::Error, Debug)] +pub enum GQLError { + #[error(transparent)] + GraphError(#[from] GraphError), + #[error(transparent)] + Validation(#[from] PathValidationError), + #[error("Insertion failed for Graph {graph}: {error}")] + Insertion { + graph: String, + error: InsertionError, + }, + #[error(transparent)] + Arc(#[from] Arc), +} + #[cfg(test)] mod graphql_test { use crate::{ @@ -99,10 +118,9 @@ mod graphql_test { let graphs = HashMap::from([("master".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let config = AppConfigBuilder::new().with_create_index(true).build(); let data = Data::new(tmp_dir.path(), &config); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); let schema = App::create_schema().data(data).finish().unwrap(); @@ -199,9 +217,8 @@ mod graphql_test { let graph: MaterializedGraph = graph.into(); let graphs = HashMap::from([("lotr".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); let schema = App::create_schema().data(data).finish().unwrap(); @@ -310,9 +327,9 @@ mod graphql_test { let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); + let schema = App::create_schema().data(data).finish().unwrap(); let prop_has_key_filter = r#" { @@ -408,9 +425,9 @@ mod graphql_test { let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); + let schema = App::create_schema().data(data).finish().unwrap(); let prop_has_key_filter = r#" { @@ -449,6 +466,7 @@ mod graphql_test { #[tokio::test] async fn test_unique_temporal_properties() { + // TODO: this doesn't test anything? let g = Graph::new(); g.add_metadata([("name", "graph")]).unwrap(); g.add_properties(1, [("state", "abc")]).unwrap(); @@ -473,7 +491,8 @@ mod graphql_test { let graph: MaterializedGraph = g.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); + let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); let expected = json!({ "graph": { @@ -624,9 +643,9 @@ mod graphql_test { let g = g.into(); let graphs = HashMap::from([("graph".to_string(), g)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); + let schema = App::create_schema().data(data).finish().unwrap(); let prop_has_key_filter = r#" @@ -868,9 +887,9 @@ mod graphql_test { let graph = graph.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); + let schema = App::create_schema().data(data).finish().unwrap(); let prop_has_key_filter = r#" { @@ -1047,9 +1066,9 @@ mod graphql_test { let graph = graph.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); + let schema = App::create_schema().data(data).finish().unwrap(); let req = r#" @@ -1188,9 +1207,8 @@ mod graphql_test { ("graph6".to_string(), graph6.into()), ]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); let schema = App::create_schema().data(data).finish().unwrap(); let req = r#" @@ -1409,9 +1427,8 @@ mod graphql_test { let graph = graph.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); let schema = App::create_schema().data(data).finish().unwrap(); let req = r#" diff --git a/raphtory-graphql/src/model/graph/graph.rs b/raphtory-graphql/src/model/graph/graph.rs index 8b6222a614..068addc202 100644 --- a/raphtory-graphql/src/model/graph/graph.rs +++ b/raphtory-graphql/src/model/graph/graph.rs @@ -17,6 +17,7 @@ use crate::{ }, paths::ExistingGraphFolder, rayon::blocking_compute, + GQLError, }; use async_graphql::Context; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; @@ -47,6 +48,7 @@ use std::{ sync::Arc, }; +use crate::{graph::GraphWithVectors, paths::PathValidationError}; #[cfg(feature = "search")] use raphtory::db::api::view::SearchableGraphOps; @@ -57,6 +59,12 @@ pub(crate) struct GqlGraph { graph: DynamicGraph, } +impl From for GqlGraph { + fn from(value: GraphWithVectors) -> Self { + GqlGraph::new(value.folder, value.graph) + } +} + impl GqlGraph { pub fn new(path: ExistingGraphFolder, graph: G) -> Self { Self { @@ -415,7 +423,7 @@ impl GqlGraph { //if someone write non-utf characters as a filename /// Returns the graph name. - async fn name(&self) -> Result { + async fn name(&self) -> Result { self.path.get_graph_name() } @@ -486,13 +494,9 @@ impl GqlGraph { } /// Export all nodes and edges from this graph view to another existing graph - async fn export_to<'a>( - &self, - ctx: &Context<'a>, - path: String, - ) -> Result> { + async fn export_to<'a>(&self, ctx: &Context<'a>, path: String) -> Result { let data = ctx.data_unchecked::(); - let other_g = data.get_graph(path.as_ref()).await?.0; + let other_g = data.get_graph(path.as_ref()).await?.graph; let g = self.graph.clone(); blocking_compute(move || { other_g.import_nodes(g.nodes(), true)?; diff --git a/raphtory-graphql/src/model/graph/mutable_graph.rs b/raphtory-graphql/src/model/graph/mutable_graph.rs index 852c668b04..1350883d19 100644 --- a/raphtory-graphql/src/model/graph/mutable_graph.rs +++ b/raphtory-graphql/src/model/graph/mutable_graph.rs @@ -113,16 +113,12 @@ pub struct EdgeAddition { #[derive(ResolvedObject, Clone)] #[graphql(name = "MutableGraph")] pub struct GqlMutableGraph { - path: ExistingGraphFolder, graph: GraphWithVectors, } -impl GqlMutableGraph { - pub(crate) fn new(path: ExistingGraphFolder, graph: GraphWithVectors) -> Self { - Self { - path: path.into(), - graph, - } +impl From for GqlMutableGraph { + fn from(graph: GraphWithVectors) -> Self { + Self { graph } } } @@ -144,7 +140,7 @@ fn as_properties( impl GqlMutableGraph { /// Get the non-mutable graph. async fn graph(&self) -> GqlGraph { - GqlGraph::new(self.path.clone(), self.graph.graph.clone()) + GqlGraph::new(self.graph.folder.clone(), self.graph.graph.clone()) } /// Get mutable existing node. @@ -706,10 +702,10 @@ mod tests { let folder = data .validate_path_for_insert("test_graph", overwrite) .unwrap(); - data.insert_graph(folder, graph).await.unwrap(); + data.insert_graph(folder.clone(), graph).await.unwrap(); - let (graph_with_vectors, path) = data.get_graph("test_graph").await.unwrap(); - let mutable_graph = GqlMutableGraph::new(path, graph_with_vectors); + let graph_with_vectors = data.get_graph("test_graph").await.unwrap(); + let mutable_graph = GqlMutableGraph::from(graph_with_vectors); (mutable_graph, data, tmp_dir) } @@ -757,17 +753,17 @@ mod tests { assert!(result.unwrap()); // TODO: #2380 (embeddings aren't working right now) - // let query = "node1".to_string(); - // let embedding = &fake_embedding(vec![query]).await.unwrap().remove(0); - // let limit = 5; - // let result = mutable_graph - // .graph - // .vectors - // .unwrap() - // .nodes_by_similarity(embedding, limit, None); - // - // assert!(result.is_ok()); - // assert!(result.unwrap().get_documents().unwrap().len() == 2); + let query = "node1".to_string(); + let embedding = &fake_embedding(vec![query]).await.unwrap().remove(0); + let limit = 5; + let result = mutable_graph + .graph + .vectors + .unwrap() + .nodes_by_similarity(embedding, limit, None); + + assert!(result.is_ok()); + assert!(result.unwrap().get_documents().unwrap().len() == 2); } #[tokio::test] diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index 87f7e98462..449b040123 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -1,6 +1,7 @@ use crate::{ auth::ContextValidation, - data::Data, + data::{Data, DeletionError, InsertionError}, + graph::GraphWithVectors, model::{ graph::{ collection::GqlCollection, graph::GqlGraph, index::IndexSpecInput, @@ -9,7 +10,7 @@ use crate::{ }, plugins::{mutation_plugin::MutationPlugin, query_plugin::QueryPlugin}, }, - paths::valid_path, + paths::{valid_path, ValidGraphFolder, WriteableGraphFolder}, rayon::blocking_compute, url_encode::{url_decode_graph, url_encode_graph}, }; @@ -30,6 +31,7 @@ use std::{ fmt::{Display, Formatter}, sync::Arc, }; +use tempfile::env::override_temp_dir; pub(crate) mod graph; pub mod plugins; @@ -93,10 +95,7 @@ impl QueryRoot { /// Returns a graph async fn graph<'a>(ctx: &Context<'a>, path: &str) -> Result { let data = ctx.data_unchecked::(); - Ok(data - .get_graph(path) - .await - .map(|(g, folder)| GqlGraph::new(folder, g.graph))?) + Ok(data.get_graph(path).await?.into()) } /// Update graph query, has side effects to update graph state @@ -106,10 +105,7 @@ impl QueryRoot { ctx.require_write_access()?; let data = ctx.data_unchecked::(); - let graph = data - .get_graph(path.as_ref()) - .await - .map(|(g, folder)| GqlMutableGraph::new(folder, g))?; + let graph = data.get_graph(path.as_ref()).await?.into(); Ok(graph) } @@ -119,7 +115,7 @@ impl QueryRoot { /// Returns:: GqlVectorisedGraph async fn vectorised_graph<'a>(ctx: &Context<'a>, path: &str) -> Option { let data = ctx.data_unchecked::(); - let g = data.get_graph(path).await.ok()?.0.vectors?; + let g = data.get_graph(path).await.ok()?.vectors?; Some(g.into()) } @@ -165,10 +161,10 @@ impl QueryRoot { /// Encodes graph and returns as string /// /// Returns:: Base64 url safe encoded string - async fn receive_graph<'a>(ctx: &Context<'a>, path: String) -> Result> { + async fn receive_graph<'a>(ctx: &Context<'a>, path: String) -> Result { let path = path.as_ref(); let data = ctx.data_unchecked::(); - let g = data.get_graph(path).await?.0.graph.clone(); + let g = data.get_graph(path).await?.graph.clone(); let res = url_encode_graph(g)?; Ok(res) } @@ -193,7 +189,7 @@ impl Mut { /// Delete graph from a path on the server. // If namespace is not provided, it will be set to the current working directory. - async fn delete_graph<'a>(ctx: &Context<'a>, path: String) -> Result { + async fn delete_graph<'a>(ctx: &Context<'a>, path: String) -> Result { let data = ctx.data_unchecked::(); data.delete_graph(&path).await?; Ok(true) @@ -204,14 +200,14 @@ impl Mut { ctx: &Context<'a>, path: String, graph_type: GqlGraphType, - ) -> Result { + ) -> Result { let data = ctx.data_unchecked::(); let overwrite = false; let folder = data.validate_path_for_insert(&path, overwrite)?; - let path = folder.get_graph_path(); + let graph_path = folder.data_path().get_graph_path(); let graph: MaterializedGraph = match graph_type { - GqlGraphType::Persistent => PersistentGraph::new_at_path(path).into(), - GqlGraphType::Event => Graph::new_at_path(path).into(), + GqlGraphType::Persistent => PersistentGraph::new_at_path(graph_path).into(), + GqlGraphType::Event => Graph::new_at_path(graph_path).into(), }; data.insert_graph(folder, graph).await?; @@ -219,29 +215,33 @@ impl Mut { Ok(true) } - /// Move graph from a path path on the server to a new_path on the server. - /// - /// If namespace is not provided, it will be set to the current working directory. - /// This applies to both the graph namespace and new graph namespace. - async fn move_graph<'a>(ctx: &Context<'a>, path: &str, new_path: &str) -> Result { - Self::copy_graph(ctx, path, new_path).await?; + /// Move graph from a path on the server to a new_path on the server. + async fn move_graph<'a>( + ctx: &Context<'a>, + path: &str, + new_path: &str, + overwrite: Option, + ) -> Result { + Self::copy_graph(ctx, path, new_path, overwrite).await?; let data = ctx.data_unchecked::(); data.delete_graph(path).await?; Ok(true) } - /// Copy graph from a path path on the server to a new_path on the server. - /// - /// If namespace is not provided, it will be set to the current working directory. - /// This applies to both the graph namespace and new graph namespace. - async fn copy_graph<'a>(ctx: &Context<'a>, path: &str, new_path: &str) -> Result { + /// Copy graph from a path on the server to a new_path on the server. + async fn copy_graph<'a>( + ctx: &Context<'a>, + path: &str, + new_path: &str, + overwrite: Option, + ) -> Result { // doing this in a more efficient way is not trivial, this at least is correct // there are questions like, maybe the new vectorised graph have different rules // for the templates or if it needs to be vectorised at all + let overwrite = overwrite.unwrap_or(false); let data = ctx.data_unchecked::(); - let overwrite = false; let folder = data.validate_path_for_insert(new_path, overwrite)?; - let graph = data.get_graph(path).await?.0.graph; + let graph = data.get_graph(path).await?.graph; data.insert_graph(folder, graph).await?; Ok(true) @@ -281,13 +281,13 @@ impl Mut { overwrite: bool, ) -> Result { let data = ctx.data_unchecked::(); - let folder = data.validate_path_for_insert(path, overwrite)?; - let path_for_decoded_graph = Some(folder.get_graph_path()); - let g: MaterializedGraph = url_decode_graph(graph, path_for_decoded_graph.as_deref())?; - - if overwrite { - let _ignored = data.delete_graph(path).await; - } + let folder = if overwrite { + WriteableGraphFolder::try_existing_or_new(data.work_dir.clone(), path)? + } else { + WriteableGraphFolder::try_new(data.work_dir.clone(), path)? + }; + let g: MaterializedGraph = + url_decode_graph(graph, Some(&folder.data_path().get_graph_path()))?; data.insert_graph(folder, g).await?; Ok(path.to_owned()) @@ -305,7 +305,7 @@ impl Mut { overwrite: bool, ) -> Result { let data = ctx.data_unchecked::(); - let parent_graph = data.get_graph(parent_path).await?.0.graph; + let parent_graph = data.get_graph(parent_path).await?.graph; let new_subgraph = blocking_compute(move || parent_graph.subgraph(nodes).materialize()).await?; let folder = data.validate_path_for_insert(&new_path, overwrite)?; @@ -328,7 +328,7 @@ impl Mut { #[cfg(feature = "search")] { let data = ctx.data_unchecked::(); - let graph = data.get_graph(path).await?.0.graph; + let graph = data.get_graph(path).await?.graph; match index_spec { Some(index_spec) => { let index_spec = index_spec.to_index_spec(graph.clone())?; diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index c2d7366719..16142f0575 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -1,206 +1,686 @@ -use crate::rayon::blocking_compute; +use crate::{ + data::DIRTY_PATH, + model::{blocking_io, GqlGraphError}, + rayon::blocking_compute, + GQLError, +}; +use futures_util::io; use raphtory::{ + db::api::view::{internal::InternalStorageOps, MaterializedGraph}, errors::{GraphError, InvalidPathReason}, + prelude::ParquetEncoder, serialise::{metadata::GraphMetadata, GraphFolder, META_PATH}, }; +use serde::{Deserialize, Serialize}; use std::{ + cmp::Ordering, fs, + fs::File, + io::{ErrorKind, Read, Write}, ops::Deref, path::{Component, Path, PathBuf}, time::{SystemTime, UNIX_EPOCH}, }; +use tokio::io::AsyncReadExt; +use tracing::{error, warn}; #[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] -pub struct ExistingGraphFolder { - folder: ValidGraphFolder, -} +pub struct ExistingGraphFolder(pub(crate) ValidGraphFolder); impl Deref for ExistingGraphFolder { type Target = ValidGraphFolder; fn deref(&self) -> &Self::Target { - &self.folder - } -} - -impl From for GraphFolder { - fn from(value: ValidGraphFolder) -> Self { - value.folder + &self.0 } } -impl From for GraphFolder { - fn from(value: ExistingGraphFolder) -> Self { - value.folder.folder - } -} impl ExistingGraphFolder { - pub(crate) fn try_from(base_path: PathBuf, relative_path: &str) -> Result { - let graph_folder = ValidGraphFolder::try_from(base_path, relative_path)?; + pub(crate) fn try_from( + base_path: PathBuf, + relative_path: &str, + ) -> Result { + let path = valid_path(base_path, relative_path, false)?; + let graph_folder: GraphFolder = get_full_data_path(&path) + .map_err(|error| match error { + InternalPathValidationError::MissingMetadataFile => { + PathValidationError::GraphNotExistsError(relative_path.to_string()) + } + _ => PathValidationError::InternalError { + graph: relative_path.to_string(), + error, + }, + })? + .into(); if graph_folder.is_reserved() { - Ok(Self { - folder: graph_folder, - }) + Ok(Self(ValidGraphFolder { + path, + data_folder: graph_folder.root_folder, + local_path: relative_path.to_string(), + })) } else { - Err(GraphError::GraphNotFound(graph_folder.to_error_path())) + Err(PathValidationError::GraphNotExistsError( + relative_path.to_string(), + )) } } +} - pub(crate) fn get_graph_name(&self) -> Result { - let path = &self.get_base_path(); - let last_component: Component = path.components().last().ok_or_else(|| { - GraphError::from(InvalidPathReason::PathNotParsable(self.to_error_path())) - })?; - match last_component { - Component::Normal(value) => { - value +#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] +pub struct ValidGraphFolder { + pub path: PathBuf, + pub data_folder: PathBuf, + pub local_path: String, +} + +fn extend_and_validate( + full_path: &mut PathBuf, + component: Component, + namespace: bool, + user_facing_path: &str, +) -> Result<(), InvalidPathReason> { + match component { + Component::Prefix(_) => { + return Err(InvalidPathReason::RootNotAllowed(user_facing_path.into())) + } + Component::RootDir => { + return Err(InvalidPathReason::RootNotAllowed(user_facing_path.into())) + } + Component::CurDir => { + return Err(InvalidPathReason::CurDirNotAllowed(user_facing_path.into())) + } + Component::ParentDir => { + return Err(InvalidPathReason::ParentDirNotAllowed( + user_facing_path.into(), + )) + } + Component::Normal(component) => { + // check if some intermediate path is already a graph + if full_path.join(META_PATH).exists() { + return Err(InvalidPathReason::ParentIsGraph(user_facing_path.into())); + } + full_path.push(component); + //check if the path with the component is a graph + if full_path.join(META_PATH).exists() { + if namespace { + return Err(InvalidPathReason::ParentIsGraph(user_facing_path.into())); + } else if component .to_str() - .map(|s| s.to_string()) - .ok_or(GraphError::from(InvalidPathReason::PathNotParsable( - self.to_error_path(), - ))) + .ok_or(InvalidPathReason::NonUTFCharacters)? + .starts_with("_") + { + return Err(InvalidPathReason::GraphNamePrefix); + } + } + //check for symlinks + if full_path.is_symlink() { + return Err(InvalidPathReason::SymlinkNotAllowed( + user_facing_path.into(), + )); } - Component::Prefix(_) - | Component::RootDir - | Component::CurDir - | Component::ParentDir => Err(GraphError::from(InvalidPathReason::PathNotParsable( - self.to_error_path(), - ))), } } + Ok(()) } -#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] -pub struct ValidGraphFolder { - pub folder: GraphFolder, - original_path: String, -} +pub(crate) fn valid_path( + base_path: PathBuf, + relative_path: &str, + namespace: bool, +) -> Result { + let user_facing_path = PathBuf::from(relative_path); + + if relative_path.contains(r"//") { + return Err(InvalidPathReason::DoubleForwardSlash(user_facing_path)); + } + if relative_path.contains(r"\") { + return Err(InvalidPathReason::BackslashError(user_facing_path)); + } -impl From for ValidGraphFolder { - fn from(value: ExistingGraphFolder) -> Self { - value.folder + let mut full_path = base_path.clone(); + // fail if any component is a Prefix (C://), tries to access root, + // tries to access a parent dir or is a symlink which could break out of the working dir + for component in user_facing_path.components() { + extend_and_validate(&mut full_path, component, namespace, relative_path)?; } + Ok(full_path) +} + +#[derive(Clone, Debug)] +struct NewPath { + path: PathBuf, + cleanup: Option, } -impl Deref for ValidGraphFolder { - type Target = GraphFolder; +impl PartialEq for NewPath { + fn eq(&self, other: &Self) -> bool { + self.path.eq(&other.path) + } +} - fn deref(&self) -> &Self::Target { - &self.folder +impl PartialOrd for NewPath { + fn partial_cmp(&self, other: &Self) -> Option { + self.path.partial_cmp(&other.path) } } -pub(crate) fn valid_path( +pub(crate) fn create_valid_path( base_path: PathBuf, relative_path: &str, namespace: bool, -) -> Result { +) -> Result { let user_facing_path = PathBuf::from(relative_path); if relative_path.contains(r"//") { - return Err(InvalidPathReason::DoubleForwardSlash(user_facing_path)); + return Err(InvalidPathReason::DoubleForwardSlash(user_facing_path).into()); } if relative_path.contains(r"\") { - return Err(InvalidPathReason::BackslashError(user_facing_path)); + return Err(InvalidPathReason::BackslashError(user_facing_path).into()); } let mut full_path = base_path.clone(); + let mut cleanup_marker = None; // fail if any component is a Prefix (C://), tries to access root, // tries to access a parent dir or is a symlink which could break out of the working dir for component in user_facing_path.components() { + match extend_and_validate(&mut full_path, component, namespace, relative_path) { + Ok(_) => { + if !full_path.exists() { + if cleanup_marker.is_none() { + cleanup_marker = Some(CleanupPath { + path: full_path.clone(), + dirty_marker: mark_dirty(&full_path)?, + }); + fs::create_dir(&full_path)?; + } + } + } + Err(error) => { + if let Some(created_path) = cleanup_marker { + created_path.cleanup()?; + } + return Err(error.into()); + } + } + } + Ok(NewPath { + path: full_path, + cleanup: cleanup_marker, + }) +} + +#[derive(Debug, Clone)] +struct CleanupPath { + path: PathBuf, + dirty_marker: PathBuf, +} + +impl CleanupPath { + fn persist(&self) -> Result<(), InternalPathValidationError> { + fs::remove_file(&self.dirty_marker)?; + Ok(()) + } + + fn cleanup(&self) -> Result<(), InternalPathValidationError> { + fs::remove_dir_all(&self.path)?; + fs::remove_file(&self.dirty_marker)?; + Ok(()) + } +} + +#[derive(Clone, Debug)] +pub(crate) struct WriteableGraphFolder { + folder: ValidGraphFolder, + dirty_marker: Option, +} + +impl Deref for WriteableGraphFolder { + type Target = ValidGraphFolder; + + fn deref(&self) -> &Self::Target { + &self.folder + } +} + +impl WriteableGraphFolder { + fn new_inner( + valid_path: NewPath, + graph_name: &str, + prefix: &str, + meta: Option, + ) -> Result { + let next_path = make_data_path(&valid_path.path, prefix)?; + let data_folder = valid_path.path.join(&next_path); + fs::create_dir(&data_folder)?; + + fs::write( + valid_path.path.join(DIRTY_PATH), + &serde_json::to_vec(&Metadata { + path: next_path, + meta, + })?, + )?; + let folder = ValidGraphFolder { + path: valid_path.path, + data_folder, + local_path: graph_name.to_string(), + }; + Ok(Self { + folder: folder, + dirty_marker: valid_path.cleanup, + }) + } + fn new( + valid_path: NewPath, + graph_name: &str, + prefix: &str, + meta: Option, + ) -> Result { + Self::new_inner(valid_path, graph_name, prefix, meta).map_err(|error| { + PathValidationError::InternalError { + graph: graph_name.to_string(), + error, + } + }) + } + + pub(crate) fn try_new( + base_path: PathBuf, + relative_path: &str, + ) -> Result { + let path = create_valid_path(base_path, relative_path, false).map_err(|error| { + PathValidationError::InternalError { + graph: relative_path.to_string(), + error, + } + })?; + if !path.cleanup.is_some() { + return Err(PathValidationError::GraphExistsError( + relative_path.to_string(), + )); + } + Self::new(path, relative_path, "data_", None) + } + + pub(crate) fn try_existing_or_new( + base_path: PathBuf, + relative_path: &str, + ) -> Result { + let path = create_valid_path(base_path, relative_path, false).map_err(|error| { + PathValidationError::InternalError { + graph: relative_path.to_string(), + error, + } + })?; + Self::new(path, relative_path, "data_", None) + } + + /// Used for swapping out only the graph parquet data + fn new_inner_graph_folder( + outer: &ValidGraphFolder, + metadata: GraphMetadata, + ) -> Result { + let graph_path = outer.data_folder.clone(); + Self::new_inner( + NewPath { + path: graph_path, + cleanup: None, + }, + &outer.local_path, + "graph_", + Some(metadata), + ) + } + + fn finish_inner(&self) -> Result<(), InternalPathValidationError> { + let old_path = get_full_data_path(&self.folder.path).ok(); + fs::rename( + self.folder.path.join(".dirty"), + self.folder.path.join(META_PATH), + )?; + if let Some(old_path) = old_path { + if old_path.exists() { + fs::remove_dir_all(old_path)?; + } + } + if let Some(cleanup) = self.dirty_marker.as_ref() { + cleanup.persist()?; + } + Ok(()) + } + + /// Swap old and new data and delete the old graph + pub fn finish(self) -> Result { + match self.finish_inner() { + Ok(_) => Ok(self.folder), + Err(error) => Err(PathValidationError::InternalError { + graph: self.folder.local_path, + error, + }), + } + } +} + +#[derive(thiserror::Error, Debug)] +pub enum InternalPathValidationError { + #[error("Path from metadata is invalid: {0}")] + InvalidPath(#[from] InvalidPathReason), + #[error(transparent)] + IOError(io::Error), + #[error("Graph path should not be nested: {0}")] + NestedPath(PathBuf), + #[error("Graph metadata file does not exist")] + MissingMetadataFile, + #[error("Reading path from metadata failed: {0}")] + InvalidMetadata(#[from] serde_json::Error), + #[error(transparent)] + GraphError(#[from] GraphError), + #[error("Graph path should always have a parent")] + MissingParent, +} + +impl From for InternalPathValidationError { + fn from(value: io::Error) -> Self { + error!("Unexpected IO failure: {}", value); + InternalPathValidationError::IOError(value) + } +} + +#[derive(thiserror::Error, Debug)] +pub enum PathValidationError { + #[error("Graph {0} already exists")] + GraphExistsError(String), + #[error("Graph {0} does not exist")] + GraphNotExistsError(String), + #[error(transparent)] + InvalidPath(#[from] InvalidPathReason), + #[error("Graph {graph} is corrupted: {error}")] + InternalError { + graph: String, + error: InternalPathValidationError, + }, + #[error("Unexpected IO error for graph {graph}: {error}")] + IOError { graph: String, error: io::Error }, +} + +pub(crate) fn valid_relative_graph_path( + mut full_path: PathBuf, + relative_path: &Path, +) -> Result { + let mut components = relative_path.components(); + if let Some(component) = components.next() { match component { Component::Prefix(_) => { - return Err(InvalidPathReason::RootNotAllowed(user_facing_path)) - } - Component::RootDir => return Err(InvalidPathReason::RootNotAllowed(user_facing_path)), - Component::CurDir => return Err(InvalidPathReason::CurDirNotAllowed(user_facing_path)), - Component::ParentDir => { - return Err(InvalidPathReason::ParentDirNotAllowed(user_facing_path)) + Err(InvalidPathReason::RootNotAllowed( + relative_path.to_path_buf(), + ))?; } + Component::RootDir => Err(InvalidPathReason::RootNotAllowed( + relative_path.to_path_buf(), + ))?, + Component::CurDir => Err(InvalidPathReason::CurDirNotAllowed( + relative_path.to_path_buf(), + ))?, + Component::ParentDir => Err(InvalidPathReason::ParentDirNotAllowed( + relative_path.to_path_buf(), + ))?, Component::Normal(component) => { - // check if some intermediate path is already a graph - if full_path.join(META_PATH).exists() { - return Err(InvalidPathReason::ParentIsGraph(user_facing_path)); - } full_path.push(component); - //check if the path with the component is a graph - if full_path.join(META_PATH).exists() { - if namespace { - return Err(InvalidPathReason::ParentIsGraph(user_facing_path)); - } else if component - .to_str() - .ok_or(InvalidPathReason::NonUTFCharacters)? - .starts_with("_") - { - return Err(InvalidPathReason::GraphNamePrefix); - } - } //check for symlinks if full_path.is_symlink() { - return Err(InvalidPathReason::SymlinkNotAllowed(user_facing_path)); + Err(InvalidPathReason::SymlinkNotAllowed( + relative_path.to_path_buf(), + ))? } } } } + if components.next().is_some() { + Err(InternalPathValidationError::NestedPath( + relative_path.to_path_buf(), + ))? + } Ok(full_path) } -impl ValidGraphFolder { - pub(crate) fn try_from( - base_path: PathBuf, - relative_path: &str, - ) -> Result { - let full_path = valid_path(base_path, relative_path, false)?; - Ok(Self { - original_path: relative_path.to_owned(), - folder: GraphFolder::from(full_path), - }) +fn is_graph(path: &Path) -> bool { + path.join(META_PATH).is_file() +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct RelativePath { + path: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Metadata { + path: String, + meta: Option, +} + +pub(crate) fn read_path_pointer( + base_path: &Path, + file_name: &str, +) -> Result { + let mut file = File::open(base_path.join(file_name)).map_err(|error| match error.kind() { + ErrorKind::NotFound => InternalPathValidationError::MissingMetadataFile, + _ => InternalPathValidationError::IOError(error), + })?; + let mut value = String::new(); + file.read_to_string(&mut value)?; + let path: RelativePath = serde_json::from_str(&value)?; + Ok(path.path) +} + +pub(crate) fn read_data_path(base_path: &Path) -> Result { + read_path_pointer(base_path, META_PATH) +} + +pub(crate) fn read_dirty_path(base_path: &Path) -> Result { + read_path_pointer(base_path, DIRTY_PATH) +} + +pub(crate) fn ensure_clean_folder( + base_path: &Path, + expected_dirty: bool, +) -> Result<(), InternalPathValidationError> { + match read_dirty_path(base_path) { + Ok(path) => { + if !expected_dirty { + warn!("Found dirty path {path}, cleaning..."); + fs::remove_dir_all(base_path.join(path))?; + } + } + Err(InternalPathValidationError::MissingMetadataFile) => { + return if expected_dirty { + Err(InternalPathValidationError::MissingMetadataFile) + } else { + Ok(()) + } + } + Err(error) => { + if expected_dirty { + return Err(error); + } else { + warn!("Found dirty file with invalid path: {error}, cleaning...") + } + } } + fs::remove_file(base_path.join(DIRTY_PATH))?; + Ok(()) +} + +/// Mark path as dirty +/// - ensure parent is clean +/// - create dirty file and fsync it +pub(crate) fn mark_dirty(path: &Path) -> Result { + let cleanup_path = path + .file_name() + .ok_or(InternalPathValidationError::MissingParent)? + .to_str() + .ok_or(InvalidPathReason::NonUTFCharacters)? + .to_string(); + let parent = path + .parent() + .ok_or(InternalPathValidationError::MissingParent)?; + ensure_clean_folder(parent, false)?; + let dirty_file_path = parent.join(DIRTY_PATH); + let mut dirty_file = File::create_new(&dirty_file_path)?; + dirty_file.write_all(&serde_json::to_vec(&RelativePath { path: cleanup_path })?)?; + // make sure the dirty path is properly recorded before we proceed! + dirty_file.sync_all()?; + Ok(dirty_file_path) +} + +fn get_full_data_path(base_path: &Path) -> Result { + let relative_path = read_data_path(base_path)?; + valid_relative_graph_path(base_path.to_path_buf(), relative_path.as_ref()) +} + +fn make_data_path(base_path: &Path, prefix: &str) -> Result { + let old_id: Option = match read_data_path(base_path) { + Ok(path) => path.strip_prefix(prefix).and_then(|id| id.parse().ok()), + Err(InternalPathValidationError::MissingMetadataFile) => None, + Err(error) => return Err(error), + }; + let mut id = match old_id { + None => 0, + Some(id) => id + 1, + }; + let mut path = format!("{prefix}{id}"); + while base_path.join(&path).exists() { + id += 1; + path = format!("{prefix}{id}"); + } + Ok(path) +} +impl ValidGraphFolder { pub fn created(&self) -> Result { - fs::metadata(self.get_graph_path())?.created()?.to_millis() + fs::metadata(self.meta_path())?.created()?.to_millis() } pub fn last_opened(&self) -> Result { - fs::metadata(self.get_graph_path())?.accessed()?.to_millis() + fs::metadata(self.data_path().get_meta_path())? + .accessed()? + .to_millis() } pub fn last_updated(&self) -> Result { - fs::metadata(self.get_graph_path())?.modified()?.to_millis() + fs::metadata(self.data_path().get_meta_path())? + .modified()? + .to_millis() } pub async fn created_async(&self) -> Result { - let metadata = tokio::fs::metadata(self.get_graph_path()).await?; + let metadata = tokio::fs::metadata(self.meta_path()).await?; metadata.created()?.to_millis() } pub async fn last_opened_async(&self) -> Result { - let metadata = tokio::fs::metadata(self.get_graph_path()).await?; + let metadata = tokio::fs::metadata(self.data_path().get_meta_path()).await?; metadata.accessed()?.to_millis() } pub async fn last_updated_async(&self) -> Result { - let metadata = tokio::fs::metadata(self.get_graph_path()).await?; + let metadata = tokio::fs::metadata(self.data_path().get_meta_path()).await?; metadata.modified()?.to_millis() } pub async fn read_metadata_async(&self) -> Result { - let folder = self.folder.clone(); + let folder: GraphFolder = self.data_folder.clone().into(); blocking_compute(move || folder.read_metadata()).await } pub fn get_original_path_str(&self) -> &str { - &self.original_path + &self.local_path } pub fn get_original_path(&self) -> &Path { - &Path::new(&self.original_path) + &Path::new(&self.local_path) } /// This returns the PathBuf used to build multiple GraphError types pub fn to_error_path(&self) -> PathBuf { - self.original_path.to_owned().into() + self.local_path.to_owned().into() + } + + pub fn get_graph_name(&self) -> Result { + let path: &Path = self.local_path.as_ref(); + let last_component: Component = path + .components() + .last() + .ok_or_else(|| InvalidPathReason::PathNotParsable(self.to_error_path()))?; + let name = match last_component { + Component::Normal(value) => value + .to_str() + .map(|s| s.to_string()) + .ok_or_else(|| InvalidPathReason::PathNotParsable(self.to_error_path()))?, + Component::Prefix(_) + | Component::RootDir + | Component::CurDir + | Component::ParentDir => { + Err(InvalidPathReason::PathNotParsable(self.to_error_path()))? + } + }; + Ok(name) + } + + fn write_graph_data_inner( + &self, + graph: MaterializedGraph, + ) -> Result<(), InternalPathValidationError> { + let metadata = GraphMetadata::from_graph(&graph); + if graph.disk_storage_enabled() { + let data_folder = &self.data_folder; + let path = read_data_path(data_folder)?; + let meta_json = serde_json::to_string(&Metadata { + path, + meta: Some(metadata), + })?; + let dirty_path = data_folder.join(DIRTY_PATH); + fs::write(&dirty_path, &meta_json)?; + fs::rename(&dirty_path, data_folder.join(META_PATH))?; + } else { + let swap = WriteableGraphFolder::new_inner_graph_folder(self, metadata)?; + let data_folder = swap.data_folder.clone(); + graph.encode_parquet(data_folder)?; + swap.finish_inner()?; + } + Ok(()) + } + pub(crate) fn write_graph_data( + &self, + graph: MaterializedGraph, + ) -> Result<(), PathValidationError> { + self.write_graph_data_inner(graph) + .map_err(|error| PathValidationError::InternalError { + graph: self.local_path.clone(), + error, + }) + } + + pub(crate) fn data_path(&self) -> GraphFolder { + self.data_folder.clone().into() + } + + pub(crate) fn meta_path(&self) -> PathBuf { + self.path.join(META_PATH) + } + + pub(crate) fn get_vectors_path(&self) -> PathBuf { + self.data_path().get_vectors_path() + } + + pub(crate) fn as_existing(&self) -> Result { + if self.data_path().is_reserved() { + Ok(ExistingGraphFolder(self.clone())) + } else { + Err(PathValidationError::GraphNotExistsError( + self.local_path.clone(), + )) + } } } diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 1d1c2913a4..e491b06211 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -166,15 +166,7 @@ impl GraphFolder { &self, graph: &impl GraphViewOps<'graph>, ) -> Result<(), GraphError> { - let node_count = graph.count_nodes(); - let edge_count = graph.count_edges(); - let properties = graph.metadata(); - let metadata = GraphMetadata { - node_count, - edge_count, - metadata: properties.as_vec(), - graph_type: graph.graph_type(), - }; + let metadata = GraphMetadata::from_graph(graph); if self.write_as_zip_format { let file = File::options() diff --git a/raphtory/src/serialise/metadata.rs b/raphtory/src/serialise/metadata.rs index 92971a61c9..d62a29fe78 100644 --- a/raphtory/src/serialise/metadata.rs +++ b/raphtory/src/serialise/metadata.rs @@ -1,4 +1,5 @@ use crate::{ + db::api::view::internal::GraphView, prelude::{GraphViewOps, PropertiesOps}, serialise::GraphFolder, }; @@ -16,6 +17,21 @@ pub struct GraphMetadata { pub graph_type: GraphType, } +impl GraphMetadata { + pub fn from_graph(graph: G) -> Self { + let node_count = graph.count_nodes(); + let edge_count = graph.count_edges(); + let metadata = graph.metadata().as_vec(); + let graph_type = graph.graph_type(); + Self { + node_count, + edge_count, + metadata, + graph_type, + } + } +} + pub fn assert_metadata_correct<'graph>(folder: &GraphFolder, graph: &impl GraphViewOps<'graph>) { let metadata = folder.read_metadata().unwrap(); assert_eq!(metadata.node_count, graph.count_nodes()); From 7805348097e643a98bdd76b78541f24f4f946ffc Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 1 Dec 2025 14:23:35 +0100 Subject: [PATCH 06/39] more validation --- raphtory-graphql/src/paths.rs | 23 +++++++++++++++++++++++ raphtory/src/errors.rs | 5 +++++ 2 files changed, 28 insertions(+) diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index 16142f0575..f52a21f7b0 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -144,6 +144,17 @@ pub(crate) fn valid_path( for component in user_facing_path.components() { extend_and_validate(&mut full_path, component, namespace, relative_path)?; } + if full_path.exists() { + if namespace { + if full_path.join(META_PATH).exists() { + return Err(InvalidPathReason::NamespaceIsGraph(user_facing_path).into()); + } + } else { + if !full_path.join(META_PATH).exists() { + return Err(InvalidPathReason::GraphIsNamespace(user_facing_path).into()); + } + } + } Ok(full_path) } @@ -204,6 +215,18 @@ pub(crate) fn create_valid_path( } } } + if cleanup_marker.is_none() { + // folder already exists, check if it is of the right type + if namespace { + if full_path.join(META_PATH).exists() { + return Err(InvalidPathReason::NamespaceIsGraph(user_facing_path).into()); + } + } else { + if !full_path.join(META_PATH).exists() { + return Err(InvalidPathReason::GraphIsNamespace(user_facing_path).into()); + } + } + } Ok(NewPath { path: full_path, cleanup: cleanup_marker, diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index 98dbbcb75f..8a75285f12 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -4,6 +4,7 @@ use crate::{ }; use arrow::{datatypes::DataType, error::ArrowError}; use itertools::Itertools; +use neo4rs::Path; use parquet::errors::ParquetError; use raphtory_api::core::entities::{ properties::prop::{PropError, PropType}, @@ -58,6 +59,10 @@ pub enum InvalidPathReason { GraphNamePrefix, #[error("The path provided does not exists as a namespace: {0}")] NamespaceDoesNotExist(String), + #[error("The path provided already exists as a namespace: {0}")] + GraphIsNamespace(PathBuf), + #[error("The path provided already exists as a graph: {0}")] + NamespaceIsGraph(PathBuf), #[error("The path provided contains non-UTF8 characters.")] NonUTFCharacters, #[error("Failed to strip prefix")] From c0da91e94b1c7fc9018c8e8114dd44549d579eaf Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 1 Dec 2025 14:37:17 +0100 Subject: [PATCH 07/39] need to clean up dirty paths during validation --- raphtory-graphql/src/paths.rs | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index f52a21f7b0..f90f74f21a 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -79,46 +79,47 @@ fn extend_and_validate( component: Component, namespace: bool, user_facing_path: &str, -) -> Result<(), InvalidPathReason> { +) -> Result<(), InternalPathValidationError> { match component { Component::Prefix(_) => { - return Err(InvalidPathReason::RootNotAllowed(user_facing_path.into())) + return Err(InvalidPathReason::RootNotAllowed(user_facing_path.into()).into()) } Component::RootDir => { - return Err(InvalidPathReason::RootNotAllowed(user_facing_path.into())) + return Err(InvalidPathReason::RootNotAllowed(user_facing_path.into()).into()) } Component::CurDir => { - return Err(InvalidPathReason::CurDirNotAllowed(user_facing_path.into())) + return Err(InvalidPathReason::CurDirNotAllowed(user_facing_path.into()).into()) } Component::ParentDir => { return Err(InvalidPathReason::ParentDirNotAllowed( user_facing_path.into(), - )) + ).into()) } Component::Normal(component) => { // check if some intermediate path is already a graph if full_path.join(META_PATH).exists() { - return Err(InvalidPathReason::ParentIsGraph(user_facing_path.into())); + return Err(InvalidPathReason::ParentIsGraph(user_facing_path.into()).into()); } full_path.push(component); //check if the path with the component is a graph if full_path.join(META_PATH).exists() { if namespace { - return Err(InvalidPathReason::ParentIsGraph(user_facing_path.into())); + return Err(InvalidPathReason::ParentIsGraph(user_facing_path.into()).into()); } else if component .to_str() .ok_or(InvalidPathReason::NonUTFCharacters)? .starts_with("_") { - return Err(InvalidPathReason::GraphNamePrefix); + return Err(InvalidPathReason::GraphNamePrefix.into()); } } //check for symlinks if full_path.is_symlink() { return Err(InvalidPathReason::SymlinkNotAllowed( user_facing_path.into(), - )); + ).into()); } + ensure_clean_folder(&full_path, false)?; } } Ok(()) @@ -128,14 +129,14 @@ pub(crate) fn valid_path( base_path: PathBuf, relative_path: &str, namespace: bool, -) -> Result { +) -> Result { let user_facing_path = PathBuf::from(relative_path); if relative_path.contains(r"//") { - return Err(InvalidPathReason::DoubleForwardSlash(user_facing_path)); + return Err(InvalidPathReason::DoubleForwardSlash(user_facing_path).into()); } if relative_path.contains(r"\") { - return Err(InvalidPathReason::BackslashError(user_facing_path)); + return Err(InvalidPathReason::BackslashError(user_facing_path).into()); } let mut full_path = base_path.clone(); From e5374aa3a37ef9f4ac5ac495cddeb79bcf5c8d49 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 8 Dec 2025 15:22:20 +0100 Subject: [PATCH 08/39] Refactor writing to disk such that it writes with the new folder structure and refactor the validation logic (compiles but does not work yet) --- raphtory-graphql/src/data.rs | 58 +-- raphtory-graphql/src/graph.rs | 9 +- raphtory-graphql/src/model/graph/graph.rs | 14 +- .../src/model/graph/meta_graph.rs | 27 +- raphtory-graphql/src/model/graph/mod.rs | 2 +- raphtory-graphql/src/model/graph/namespace.rs | 172 +++++--- .../src/model/graph/namespaced_item.rs | 2 +- raphtory-graphql/src/model/mod.rs | 38 +- raphtory-graphql/src/paths.rs | 405 ++++++++---------- raphtory/src/errors.rs | 8 +- raphtory/src/python/graph/views/graph_view.rs | 2 +- raphtory/src/serialise/graph_folder.rs | 319 ++++++++++---- raphtory/src/serialise/metadata.rs | 3 + raphtory/src/serialise/mod.rs | 2 +- raphtory/src/serialise/serialise.rs | 6 +- 15 files changed, 619 insertions(+), 448 deletions(-) diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index 56ceb03581..15c6acb93e 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -3,8 +3,9 @@ use crate::{ graph::GraphWithVectors, model::blocking_io, paths::{ - mark_dirty, read_data_path, valid_path, valid_relative_graph_path, ExistingGraphFolder, - InternalPathValidationError, PathValidationError, ValidGraphFolder, WriteableGraphFolder, + mark_dirty, valid_path, valid_relative_graph_path, ExistingGraphFolder, + InternalPathValidationError, PathValidationError, ValidGraphFolder, WithPath, + WriteableGraphFolder, }, rayon::blocking_compute, GQLError, @@ -112,24 +113,27 @@ impl DeletionError { } } +/// Get relative path as String joined with `"/"` for use with the validation methods. +/// The path is not validated here! pub(crate) fn get_relative_path( - work_dir: PathBuf, + work_dir: &Path, path: &Path, - namespace: bool, -) -> Result { - let path_buf = path.strip_prefix(work_dir.clone())?.to_path_buf(); - let components = path_buf - .components() - .into_iter() - .map(|c| { - c.as_os_str() - .to_str() - .ok_or(InvalidPathReason::NonUTFCharacters) - }) - .collect::, _>>()?; - //a safe unwrap as checking above - let path_str = components.into_iter().join("/"); - valid_path(work_dir, &path_str, namespace)?; +) -> Result { + let relative = path.strip_prefix(work_dir)?; + let mut path_str = String::new(); + let mut components = relative.components().map(|component| { + component + .as_os_str() + .to_str() + .ok_or(InternalPathValidationError::NonUTFCharacters) + }); + if let Some(first) = components.next() { + path_str.push_str(first?); + } + for component in components { + path_str.push('/'); + path_str.push_str(component?); + } Ok(path_str) } @@ -214,9 +218,7 @@ impl Data { }) .await?; - self.cache - .insert(graph.folder.local_path.clone(), graph) - .await; + self.cache.insert(graph.folder.local_path(), graph).await; Ok(()) } @@ -246,9 +248,9 @@ impl Data { graph_folder: ExistingGraphFolder, ) -> Result<(), MutationErrorInner> { blocking_io(move || { - mark_dirty(&graph_folder.path)?; - fs::remove_dir_all(&graph_folder.path)?; - fs::remove_file(graph_folder.data_folder.join(DIRTY_PATH))?; + let dirty_file = mark_dirty(graph_folder.path())?; + fs::remove_dir_all(graph_folder.path())?; + fs::remove_file(dirty_file)?; Ok::<_, MutationErrorInner>(()) }) .await?; @@ -282,7 +284,7 @@ impl Data { .vectorise( conf.cache.clone(), template.clone(), - Some(&folder.get_vectors_path()), + Some(&folder.get_vectors_path().ok()?), true, // verbose ) .await; @@ -320,7 +322,7 @@ impl Data { pub(crate) async fn vectorise_all_graphs_that_are_not(&self) -> Result<(), GraphError> { for folder in self.get_all_graph_folders() { - if !folder.data_path().get_vectors_path().exists() { + if !folder.data_path().get_vectors_path()?.exists() { self.vectorise_folder(folder).await; } } @@ -335,7 +337,7 @@ impl Data { .filter_map(move |e| { let entry = e.ok()?; let path = entry.path(); - let relative = get_relative_path(base_path.clone(), path, false).ok()?; + let relative = get_relative_path(&base_path, path).ok()?; let folder = ExistingGraphFolder::try_from(base_path.clone(), &relative).ok()?; Some(folder) }) @@ -501,7 +503,7 @@ pub(crate) mod data_tests { let paths = data .get_all_graph_folders() .into_iter() - .map(|folder| folder.0.path) + .map(|folder| folder.0.data_path().root().to_path_buf()) .collect_vec(); assert_eq!(paths.len(), 5); diff --git a/raphtory-graphql/src/graph.rs b/raphtory-graphql/src/graph.rs index e7f7b833f6..252b0b62df 100644 --- a/raphtory-graphql/src/graph.rs +++ b/raphtory-graphql/src/graph.rs @@ -90,18 +90,18 @@ impl GraphWithVectors { let graph = { let data_path = folder.data_path(); // Either decode a graph serialized using encode or load using underlying storage. - if MaterializedGraph::is_decodable(data_path.get_graph_path()) { + if MaterializedGraph::is_decodable(data_path.get_graph_path()?) { let path_for_decoded_graph = None; MaterializedGraph::decode(data_path, path_for_decoded_graph)? } else { let metadata = data_path.read_metadata()?; let graph = match metadata.graph_type { GraphType::EventGraph => { - let graph = Graph::load_from_path(data_path.get_graph_path()); + let graph = Graph::load_from_path(data_path.get_graph_path()?); MaterializedGraph::EventGraph(graph) } GraphType::PersistentGraph => { - let graph = PersistentGraph::load_from_path(data_path.get_graph_path()); + let graph = PersistentGraph::load_from_path(data_path.get_graph_path()?); MaterializedGraph::PersistentGraph(graph) } }; @@ -113,7 +113,8 @@ impl GraphWithVectors { }; let vectors = cache.and_then(|cache| { - VectorisedGraph::read_from_path(&folder.get_vectors_path(), graph.clone(), cache).ok() + VectorisedGraph::read_from_path(&folder.get_vectors_path().ok()?, graph.clone(), cache) + .ok() }); info!("Graph loaded = {}", folder.get_original_path_str()); diff --git a/raphtory-graphql/src/model/graph/graph.rs b/raphtory-graphql/src/model/graph/graph.rs index 068addc202..3c77ca2a60 100644 --- a/raphtory-graphql/src/model/graph/graph.rs +++ b/raphtory-graphql/src/model/graph/graph.rs @@ -20,7 +20,7 @@ use crate::{ GQLError, }; use async_graphql::Context; -use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; +use dynamic_graphql::{ResolvedObject, ResolvedObjectFields, Result}; use itertools::Itertools; use raphtory::{ core::{ @@ -254,18 +254,18 @@ impl GqlGraph { //////////////////////// /// Returns the timestamp for the creation of the graph. - async fn created(&self) -> Result { - self.path.created_async().await + async fn created(&self) -> Result { + Ok(self.path.created_async().await?) } /// Returns the graph's last opened timestamp according to system time. - async fn last_opened(&self) -> Result { - self.path.last_opened_async().await + async fn last_opened(&self) -> Result { + Ok(self.path.last_opened_async().await?) } /// Returns the graph's last updated timestamp. - async fn last_updated(&self) -> Result { - self.path.last_updated_async().await + async fn last_updated(&self) -> Result { + Ok(self.path.last_updated_async().await?) } /// Returns the timestamp of the earliest activity in the graph. diff --git a/raphtory-graphql/src/model/graph/meta_graph.rs b/raphtory-graphql/src/model/graph/meta_graph.rs index 72316bf2b2..21ac19ab79 100644 --- a/raphtory-graphql/src/model/graph/meta_graph.rs +++ b/raphtory-graphql/src/model/graph/meta_graph.rs @@ -1,5 +1,5 @@ use crate::{model::graph::property::GqlProperty, paths::ExistingGraphFolder}; -use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; +use dynamic_graphql::{ResolvedObject, ResolvedObjectFields, Result}; use raphtory::{errors::GraphError, serialise::metadata::GraphMetadata}; use std::{cmp::Ordering, sync::Arc}; use tokio::sync::OnceCell; @@ -39,10 +39,11 @@ impl MetaGraph { } } - async fn meta(&self) -> Result<&GraphMetadata, GraphError> { - self.meta + async fn meta(&self) -> Result<&GraphMetadata> { + Ok(self + .meta .get_or_try_init(|| self.folder.read_metadata_async()) - .await + .await?) } } @@ -60,22 +61,22 @@ impl MetaGraph { } /// Returns the timestamp for the creation of the graph. - async fn created(&self) -> Result { - self.folder.created_async().await + async fn created(&self) -> Result { + Ok(self.folder.created_async().await?) } /// Returns the graph's last opened timestamp according to system time. - async fn last_opened(&self) -> Result { - self.folder.last_opened_async().await + async fn last_opened(&self) -> Result { + Ok(self.folder.last_opened_async().await?) } /// Returns the graph's last updated timestamp. - async fn last_updated(&self) -> Result { - self.folder.last_updated_async().await + async fn last_updated(&self) -> Result { + Ok(self.folder.last_updated_async().await?) } /// Returns the number of nodes in the graph. - async fn node_count(&self) -> Result { + async fn node_count(&self) -> Result { Ok(self.meta().await?.node_count) } @@ -83,12 +84,12 @@ impl MetaGraph { /// /// Returns: /// int: - async fn edge_count(&self) -> Result { + async fn edge_count(&self) -> Result { Ok(self.meta().await?.edge_count) } /// Returns the metadata of the graph. - async fn metadata(&self) -> Result, GraphError> { + async fn metadata(&self) -> Result> { Ok(self .meta() .await? diff --git a/raphtory-graphql/src/model/graph/mod.rs b/raphtory-graphql/src/model/graph/mod.rs index 95b6802854..5e49aaba70 100644 --- a/raphtory-graphql/src/model/graph/mod.rs +++ b/raphtory-graphql/src/model/graph/mod.rs @@ -11,7 +11,7 @@ pub(crate) mod index; pub(crate) mod meta_graph; pub(crate) mod mutable_graph; pub(crate) mod namespace; -mod namespaced_item; +pub(crate) mod namespaced_item; pub(crate) mod node; mod nodes; mod path_from_node; diff --git a/raphtory-graphql/src/model/graph/namespace.rs b/raphtory-graphql/src/model/graph/namespace.rs index b6001a2651..5110a325b2 100644 --- a/raphtory-graphql/src/model/graph/namespace.rs +++ b/raphtory-graphql/src/model/graph/namespace.rs @@ -3,73 +3,140 @@ use crate::{ model::graph::{ collection::GqlCollection, meta_graph::MetaGraph, namespaced_item::NamespacedItem, }, - paths::{valid_path, ExistingGraphFolder}, + paths::{ + valid_path, valid_relative_graph_path, ExistingGraphFolder, InternalPathValidationError, + PathValidationError, ValidPath, + }, rayon::blocking_compute, }; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; -use itertools::Itertools; +use itertools::{join, Itertools}; use raphtory::errors::InvalidPathReason; use std::path::PathBuf; use walkdir::WalkDir; #[derive(ResolvedObject, Clone, Ord, Eq, PartialEq, PartialOrd)] pub(crate) struct Namespace { - base_dir: PathBuf, - current_dir: PathBuf, + current_dir: PathBuf, // always validated + relative_path: String, // relative to the root working directory +} + +pub struct NamespaceIter { + it: walkdir::IntoIter, + root: Namespace, +} + +impl Iterator for NamespaceIter { + type Item = NamespacedItem; + + fn next(&mut self) -> Option { + loop { + match self.it.next() { + None => return None, + Some(Ok(entry)) => { + let path = entry.path(); + if path.is_dir() { + match get_relative_path(&self.root.current_dir, path) { + Ok(relative) => { + match self.root.try_new_child(&relative) { + Ok(child) => { + match &child { + NamespacedItem::Namespace(_) => {} + NamespacedItem::MetaGraph(_) => { + self.it.skip_current_dir() // graphs should not be traversed further + } + } + return Some(child); + } + Err(_) => { + self.it.skip_current_dir() // not a valid path + } + } + } + Err(_) => { + self.it.skip_current_dir() // not a valid path and shouldn't be traversed further} + } + } + } + } + _ => {} // skip errors + }; + } + } } impl Namespace { - pub fn new(base_dir: PathBuf, current_dir: PathBuf) -> Self { + pub fn root(root: PathBuf) -> Self { Self { - base_dir, - current_dir, + current_dir: root, + relative_path: "".to_owned(), } } - fn get_all_children(&self) -> impl Iterator + use<'_> { + pub fn try_new(root: PathBuf, relative_path: String) -> Result { + let current_dir = valid_path(root, relative_path.as_str())?; + Self::try_from_valid(current_dir, &relative_path) + } + + /// Create a namespace from a valid path if it exists and is a namespace + pub fn try_from_valid( + current_dir: ValidPath, + relative_path: impl Into, + ) -> Result { + if current_dir.is_namespace() { + Ok(Self { + current_dir: current_dir.into_path(), + relative_path: relative_path.into(), + }) + } else { + Err(PathValidationError::NamespaceDoesNotExist( + relative_path.into(), + )) + } + } + + pub fn new_child_namespace(&self, relative_path: &str) -> Result { + let current_dir = valid_path(self.current_dir.clone(), relative_path)?; + let relative_path = [&self.relative_path, relative_path].join("/"); + Self::try_from_valid(current_dir, relative_path) + } + + pub fn try_new_child(&self, file_name: &str) -> Result { + let current_dir = valid_path(self.current_dir.clone(), file_name)?; + let relative_path = [&self.relative_path, file_name].join("/"); + let child = if current_dir.is_namespace() { + NamespacedItem::Namespace(Self::try_from_valid(current_dir, relative_path)?) + } else { + NamespacedItem::MetaGraph(MetaGraph::new(ExistingGraphFolder::try_from_valid( + current_dir, + &relative_path, + )?)) + }; + Ok(child) + } + + /// Non-recursively list children + pub fn get_children(&self) -> impl Iterator + use<'_> { WalkDir::new(&self.current_dir) .max_depth(1) .into_iter() .flatten() .filter_map(|entry| { let path = entry.path(); - let file_name = entry.file_name().to_str()?; if path.is_dir() { - if path != self.current_dir - && valid_path(self.current_dir.clone(), file_name, true).is_ok() - { - Some(NamespacedItem::Namespace(Namespace::new( - self.base_dir.clone(), - path.to_path_buf(), - ))) - } else { - let base_path = self.base_dir.clone(); - let relative = get_relative_path(base_path.clone(), path, false).ok()?; - let folder = - ExistingGraphFolder::try_from(base_path.clone(), &relative).ok()?; - Some(NamespacedItem::MetaGraph(MetaGraph::new(folder))) - } + let file_name = entry.file_name().to_str()?; + self.try_new_child(file_name).ok() } else { None } }) } - pub(crate) fn get_all_namespaces(&self) -> Vec { - let base_path = self.base_dir.clone(); - WalkDir::new(&self.current_dir) - .into_iter() - .filter_map(|e| { - let entry = e.ok()?; - let path = entry.path(); - if path.is_dir() && get_relative_path(base_path.clone(), path, true).is_ok() { - Some(Namespace::new(self.base_dir.clone(), path.to_path_buf())) - } else { - None - } - }) - .sorted() - .collect() + /// Recursively list all children + pub fn get_all_children(&self) -> impl Iterator { + let it = WalkDir::new(&self.current_dir).into_iter(); + let root = self.clone(); + NamespaceIter { it, root } } } @@ -80,7 +147,7 @@ impl Namespace { blocking_compute(move || { GqlCollection::new( self_clone - .get_all_children() + .get_children() .into_iter() .filter_map(|g| match g { NamespacedItem::MetaGraph(g) => Some(g), @@ -92,16 +159,23 @@ impl Namespace { }) .await } - async fn path(&self) -> Result { - get_relative_path(self.base_dir.clone(), self.current_dir.as_path(), true) + async fn path(&self) -> String { + self.relative_path.clone() } async fn parent(&self) -> Option { - let parent = self.current_dir.parent()?.to_path_buf(); - if parent.starts_with(&self.base_dir) { - Some(Namespace::new(self.base_dir.clone(), parent)) - } else { + if self.relative_path.is_empty() { None + } else { + let parent = self.current_dir.parent()?.to_path_buf(); + let relative_path = self + .relative_path + .rsplit_once("/") + .map_or("", |(parent, _)| parent); + Some(Self { + current_dir: parent, + relative_path: relative_path.to_owned(), + }) } } @@ -110,7 +184,7 @@ impl Namespace { blocking_compute(move || { GqlCollection::new( self_clone - .get_all_children() + .get_children() .filter_map(|item| match item { NamespacedItem::MetaGraph(_) => None, NamespacedItem::Namespace(n) => Some(n), @@ -126,9 +200,7 @@ impl Namespace { // Namespaces will be listed before graphs. async fn items(&self) -> GqlCollection { let self_clone = self.clone(); - blocking_compute(move || { - GqlCollection::new(self_clone.get_all_children().sorted().collect()) - }) - .await + blocking_compute(move || GqlCollection::new(self_clone.get_children().sorted().collect())) + .await } } diff --git a/raphtory-graphql/src/model/graph/namespaced_item.rs b/raphtory-graphql/src/model/graph/namespaced_item.rs index 1f8e87bb13..8d315eebf7 100644 --- a/raphtory-graphql/src/model/graph/namespaced_item.rs +++ b/raphtory-graphql/src/model/graph/namespaced_item.rs @@ -5,7 +5,7 @@ use dynamic_graphql::Union; // This is useful for when fetching a collection of both for the purposes of displaying all such // items, paged. #[derive(Union, Clone, PartialOrd, PartialEq, Ord, Eq)] -pub(crate) enum NamespacedItem { +pub enum NamespacedItem { /// Namespace. Namespace(Namespace), /// Metagraph. diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index 449b040123..95be2618b5 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -5,7 +5,7 @@ use crate::{ model::{ graph::{ collection::GqlCollection, graph::GqlGraph, index::IndexSpecInput, - mutable_graph::GqlMutableGraph, namespace::Namespace, + mutable_graph::GqlMutableGraph, namespace::Namespace, namespaced_item::NamespacedItem, vectorised_graph::GqlVectorisedGraph, }, plugins::{mutation_plugin::MutationPlugin, query_plugin::QueryPlugin}, @@ -19,6 +19,7 @@ use dynamic_graphql::{ App, Enum, Mutation, MutationFields, MutationRoot, ResolvedObject, ResolvedObjectFields, Result, Upload, }; +use itertools::Itertools; use raphtory::{ db::{api::view::MaterializedGraph, graph::views::deletion_graph::PersistentGraph}, errors::{GraphError, InvalidPathReason}, @@ -124,25 +125,26 @@ impl QueryRoot { /// Returns:: List of namespaces on root async fn namespaces<'a>(ctx: &Context<'a>) -> GqlCollection { let data = ctx.data_unchecked::(); - let root = Namespace::new(data.work_dir.clone(), data.work_dir.clone()); - GqlCollection::new(root.get_all_namespaces().into()) + let root = Namespace::root(data.work_dir.clone()); + let list = blocking_compute(move || { + root.get_all_children() + .filter_map(|child| match child { + NamespacedItem::Namespace(item) => Some(item), + NamespacedItem::MetaGraph(_) => None, + }) + .sorted() + .collect() + }) + .await; + GqlCollection::new(list) } /// Returns a specific namespace at a given path /// /// Returns:: Namespace or error if no namespace found - async fn namespace<'a>( - ctx: &Context<'a>, - path: String, - ) -> Result { + async fn namespace<'a>(ctx: &Context<'a>, path: String) -> Result { let data = ctx.data_unchecked::(); - let current_dir = valid_path(data.work_dir.clone(), path.as_str(), true)?; - - if current_dir.exists() { - Ok(Namespace::new(data.work_dir.clone(), current_dir)) - } else { - Err(InvalidPathReason::NamespaceDoesNotExist(path)) - } + Ok(Namespace::try_new(data.work_dir.clone(), path)?) } /// Returns root namespace @@ -150,7 +152,7 @@ impl QueryRoot { /// Returns:: Root namespace async fn root<'a>(ctx: &Context<'a>) -> Namespace { let data = ctx.data_unchecked::(); - Namespace::new(data.work_dir.clone(), data.work_dir.clone()) + Namespace::root(data.work_dir.clone()) } /// Returns a plugin. @@ -200,11 +202,11 @@ impl Mut { ctx: &Context<'a>, path: String, graph_type: GqlGraphType, - ) -> Result { + ) -> Result { let data = ctx.data_unchecked::(); let overwrite = false; let folder = data.validate_path_for_insert(&path, overwrite)?; - let graph_path = folder.data_path().get_graph_path(); + let graph_path = folder.data_path().get_graph_path()?; let graph: MaterializedGraph = match graph_type { GqlGraphType::Persistent => PersistentGraph::new_at_path(graph_path).into(), GqlGraphType::Event => Graph::new_at_path(graph_path).into(), @@ -287,7 +289,7 @@ impl Mut { WriteableGraphFolder::try_new(data.work_dir.clone(), path)? }; let g: MaterializedGraph = - url_decode_graph(graph, Some(&folder.data_path().get_graph_path()))?; + url_decode_graph(graph, Some(&folder.data_path().get_graph_path()?))?; data.insert_graph(folder, g).await?; Ok(path.to_owned()) diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index f90f74f21a..a62f096018 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -9,7 +9,10 @@ use raphtory::{ db::api::view::{internal::InternalStorageOps, MaterializedGraph}, errors::{GraphError, InvalidPathReason}, prelude::ParquetEncoder, - serialise::{metadata::GraphMetadata, GraphFolder, META_PATH}, + serialise::{ + make_data_path, metadata::GraphMetadata, read_data_path, read_dirty_path, + read_path_pointer, GraphFolder, Metadata, RelativePath, DATA_PATH, META_PATH, + }, }; use serde::{Deserialize, Serialize}; use std::{ @@ -18,11 +21,29 @@ use std::{ fs::File, io::{ErrorKind, Read, Write}, ops::Deref, - path::{Component, Path, PathBuf}, + path::{Component, Path, PathBuf, StripPrefixError}, time::{SystemTime, UNIX_EPOCH}, }; use tokio::io::AsyncReadExt; -use tracing::{error, warn}; +use tracing::{error, metadata, warn}; + +pub struct ValidPath(PathBuf); + +impl ValidPath { + /// path exists and is a graph + pub fn is_graph(&self) -> bool { + self.0.exists() && self.0.join(META_PATH).exists() + } + + /// path exists and is a namespace + pub fn is_namespace(&self) -> bool { + self.0.exists() && !self.0.join(META_PATH).exists() + } + + pub fn into_path(self) -> PathBuf { + self.0 + } +} #[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] pub struct ExistingGraphFolder(pub(crate) ValidGraphFolder); @@ -36,27 +57,19 @@ impl Deref for ExistingGraphFolder { } impl ExistingGraphFolder { - pub(crate) fn try_from( - base_path: PathBuf, + pub fn try_from(base_path: PathBuf, relative_path: &str) -> Result { + let path = valid_path(base_path, relative_path)?; + Self::try_from_valid(path, relative_path) + } + + pub fn try_from_valid( + base_path: ValidPath, relative_path: &str, ) -> Result { - let path = valid_path(base_path, relative_path, false)?; - let graph_folder: GraphFolder = get_full_data_path(&path) - .map_err(|error| match error { - InternalPathValidationError::MissingMetadataFile => { - PathValidationError::GraphNotExistsError(relative_path.to_string()) - } - _ => PathValidationError::InternalError { - graph: relative_path.to_string(), - error, - }, - })? - .into(); - + let graph_folder: GraphFolder = base_path.into_path().into(); if graph_folder.is_reserved() { Ok(Self(ValidGraphFolder { - path, - data_folder: graph_folder.root_folder, + data_path: graph_folder, local_path: relative_path.to_string(), })) } else { @@ -69,15 +82,13 @@ impl ExistingGraphFolder { #[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] pub struct ValidGraphFolder { - pub path: PathBuf, - pub data_folder: PathBuf, - pub local_path: String, + data_path: GraphFolder, + local_path: String, } fn extend_and_validate( full_path: &mut PathBuf, component: Component, - namespace: bool, user_facing_path: &str, ) -> Result<(), InternalPathValidationError> { match component { @@ -91,9 +102,7 @@ fn extend_and_validate( return Err(InvalidPathReason::CurDirNotAllowed(user_facing_path.into()).into()) } Component::ParentDir => { - return Err(InvalidPathReason::ParentDirNotAllowed( - user_facing_path.into(), - ).into()) + return Err(InvalidPathReason::ParentDirNotAllowed(user_facing_path.into()).into()) } Component::Normal(component) => { // check if some intermediate path is already a graph @@ -101,25 +110,11 @@ fn extend_and_validate( return Err(InvalidPathReason::ParentIsGraph(user_facing_path.into()).into()); } full_path.push(component); - //check if the path with the component is a graph - if full_path.join(META_PATH).exists() { - if namespace { - return Err(InvalidPathReason::ParentIsGraph(user_facing_path.into()).into()); - } else if component - .to_str() - .ok_or(InvalidPathReason::NonUTFCharacters)? - .starts_with("_") - { - return Err(InvalidPathReason::GraphNamePrefix.into()); - } - } //check for symlinks if full_path.is_symlink() { - return Err(InvalidPathReason::SymlinkNotAllowed( - user_facing_path.into(), - ).into()); + return Err(InvalidPathReason::SymlinkNotAllowed(user_facing_path.into()).into()); } - ensure_clean_folder(&full_path, false)?; + ensure_clean_folder(&full_path)?; } } Ok(()) @@ -128,8 +123,7 @@ fn extend_and_validate( pub(crate) fn valid_path( base_path: PathBuf, relative_path: &str, - namespace: bool, -) -> Result { +) -> Result { let user_facing_path = PathBuf::from(relative_path); if relative_path.contains(r"//") { @@ -143,20 +137,10 @@ pub(crate) fn valid_path( // fail if any component is a Prefix (C://), tries to access root, // tries to access a parent dir or is a symlink which could break out of the working dir for component in user_facing_path.components() { - extend_and_validate(&mut full_path, component, namespace, relative_path)?; + extend_and_validate(&mut full_path, component, relative_path) + .with_path(relative_path.to_string())?; } - if full_path.exists() { - if namespace { - if full_path.join(META_PATH).exists() { - return Err(InvalidPathReason::NamespaceIsGraph(user_facing_path).into()); - } - } else { - if !full_path.join(META_PATH).exists() { - return Err(InvalidPathReason::GraphIsNamespace(user_facing_path).into()); - } - } - } - Ok(full_path) + Ok(ValidPath(full_path)) } #[derive(Clone, Debug)] @@ -165,6 +149,12 @@ struct NewPath { cleanup: Option, } +impl NewPath { + pub fn is_new(&self) -> bool { + self.cleanup.is_some() + } +} + impl PartialEq for NewPath { fn eq(&self, other: &Self) -> bool { self.path.eq(&other.path) @@ -180,7 +170,6 @@ impl PartialOrd for NewPath { pub(crate) fn create_valid_path( base_path: PathBuf, relative_path: &str, - namespace: bool, ) -> Result { let user_facing_path = PathBuf::from(relative_path); @@ -196,7 +185,7 @@ pub(crate) fn create_valid_path( // fail if any component is a Prefix (C://), tries to access root, // tries to access a parent dir or is a symlink which could break out of the working dir for component in user_facing_path.components() { - match extend_and_validate(&mut full_path, component, namespace, relative_path) { + match extend_and_validate(&mut full_path, component, relative_path) { Ok(_) => { if !full_path.exists() { if cleanup_marker.is_none() { @@ -216,18 +205,6 @@ pub(crate) fn create_valid_path( } } } - if cleanup_marker.is_none() { - // folder already exists, check if it is of the right type - if namespace { - if full_path.join(META_PATH).exists() { - return Err(InvalidPathReason::NamespaceIsGraph(user_facing_path).into()); - } - } else { - if !full_path.join(META_PATH).exists() { - return Err(InvalidPathReason::GraphIsNamespace(user_facing_path).into()); - } - } - } Ok(NewPath { path: full_path, cleanup: cleanup_marker, @@ -271,37 +248,36 @@ impl WriteableGraphFolder { fn new_inner( valid_path: NewPath, graph_name: &str, - prefix: &str, - meta: Option, ) -> Result { - let next_path = make_data_path(&valid_path.path, prefix)?; - let data_folder = valid_path.path.join(&next_path); + let is_new = valid_path.is_new(); + let graph_folder = GraphFolder::from(valid_path.path); + if !is_new { + if !graph_folder.is_reserved() { + return Err(InternalPathValidationError::GraphIsNamespace); + } + } + let next_path = make_data_path(graph_folder.root(), DATA_PATH)?; + let data_folder = graph_folder.root().join(&next_path); fs::create_dir(&data_folder)?; fs::write( - valid_path.path.join(DIRTY_PATH), + graph_folder.root().join(DIRTY_PATH), &serde_json::to_vec(&Metadata { path: next_path, - meta, + meta: None, })?, )?; let folder = ValidGraphFolder { - path: valid_path.path, - data_folder, + data_path: graph_folder, local_path: graph_name.to_string(), }; Ok(Self { - folder: folder, + folder, dirty_marker: valid_path.cleanup, }) } - fn new( - valid_path: NewPath, - graph_name: &str, - prefix: &str, - meta: Option, - ) -> Result { - Self::new_inner(valid_path, graph_name, prefix, meta).map_err(|error| { + fn new(valid_path: NewPath, graph_name: &str) -> Result { + Self::new_inner(valid_path, graph_name).map_err(|error| { PathValidationError::InternalError { graph: graph_name.to_string(), error, @@ -313,7 +289,7 @@ impl WriteableGraphFolder { base_path: PathBuf, relative_path: &str, ) -> Result { - let path = create_valid_path(base_path, relative_path, false).map_err(|error| { + let path = create_valid_path(base_path, relative_path).map_err(|error| { PathValidationError::InternalError { graph: relative_path.to_string(), error, @@ -324,44 +300,27 @@ impl WriteableGraphFolder { relative_path.to_string(), )); } - Self::new(path, relative_path, "data_", None) + Self::new(path, relative_path) } pub(crate) fn try_existing_or_new( base_path: PathBuf, relative_path: &str, ) -> Result { - let path = create_valid_path(base_path, relative_path, false).map_err(|error| { + let path = create_valid_path(base_path, relative_path).map_err(|error| { PathValidationError::InternalError { graph: relative_path.to_string(), error, } })?; - Self::new(path, relative_path, "data_", None) - } - - /// Used for swapping out only the graph parquet data - fn new_inner_graph_folder( - outer: &ValidGraphFolder, - metadata: GraphMetadata, - ) -> Result { - let graph_path = outer.data_folder.clone(); - Self::new_inner( - NewPath { - path: graph_path, - cleanup: None, - }, - &outer.local_path, - "graph_", - Some(metadata), - ) + Self::new(path, relative_path) } fn finish_inner(&self) -> Result<(), InternalPathValidationError> { - let old_path = get_full_data_path(&self.folder.path).ok(); + let old_path = self.folder.data_path.get_data_path().ok(); fs::rename( - self.folder.path.join(".dirty"), - self.folder.path.join(META_PATH), + self.folder.data_path.root().join(DIRTY_PATH), + self.folder.data_path.root().join(META_PATH), )?; if let Some(old_path) = old_path { if old_path.exists() { @@ -402,6 +361,14 @@ pub enum InternalPathValidationError { GraphError(#[from] GraphError), #[error("Graph path should always have a parent")] MissingParent, + #[error(transparent)] + StripPrefix(#[from] StripPrefixError), + #[error("Expected a graph but found a namespace")] + GraphIsNamespace, + #[error("Expected a namespace but found a graph")] + NamespaceIsGraph, + #[error("The path provided contains non-UTF8 characters.")] + NonUTFCharacters, } impl From for InternalPathValidationError { @@ -417,6 +384,8 @@ pub enum PathValidationError { GraphExistsError(String), #[error("Graph {0} does not exist")] GraphNotExistsError(String), + #[error("The path provided does not exists as a namespace: {0}")] + NamespaceDoesNotExist(String), #[error(transparent)] InvalidPath(#[from] InvalidPathReason), #[error("Graph {graph} is corrupted: {error}")] @@ -428,6 +397,19 @@ pub enum PathValidationError { IOError { graph: String, error: io::Error }, } +pub trait WithPath { + fn with_path(self, graph: String) -> Result; +} + +impl> WithPath for Result { + fn with_path(self, graph: String) -> Result { + self.map_err(move |error| PathValidationError::InternalError { + graph, + error: error.into(), + }) + } +} + pub(crate) fn valid_relative_graph_path( mut full_path: PathBuf, relative_path: &Path, @@ -472,66 +454,27 @@ fn is_graph(path: &Path) -> bool { path.join(META_PATH).is_file() } -#[derive(Debug, Serialize, Deserialize)] -pub struct RelativePath { - path: String, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct Metadata { - path: String, - meta: Option, -} - -pub(crate) fn read_path_pointer( - base_path: &Path, - file_name: &str, -) -> Result { - let mut file = File::open(base_path.join(file_name)).map_err(|error| match error.kind() { - ErrorKind::NotFound => InternalPathValidationError::MissingMetadataFile, - _ => InternalPathValidationError::IOError(error), - })?; - let mut value = String::new(); - file.read_to_string(&mut value)?; - let path: RelativePath = serde_json::from_str(&value)?; - Ok(path.path) -} - -pub(crate) fn read_data_path(base_path: &Path) -> Result { - read_path_pointer(base_path, META_PATH) -} - -pub(crate) fn read_dirty_path(base_path: &Path) -> Result { - read_path_pointer(base_path, DIRTY_PATH) -} - -pub(crate) fn ensure_clean_folder( - base_path: &Path, - expected_dirty: bool, -) -> Result<(), InternalPathValidationError> { - match read_dirty_path(base_path) { - Ok(path) => { - if !expected_dirty { - warn!("Found dirty path {path}, cleaning..."); - fs::remove_dir_all(base_path.join(path))?; - } - } - Err(InternalPathValidationError::MissingMetadataFile) => { - return if expected_dirty { - Err(InternalPathValidationError::MissingMetadataFile) - } else { - Ok(()) +pub(crate) fn ensure_clean_folder(base_path: &Path) -> Result<(), InternalPathValidationError> { + if base_path.is_dir() { + match read_dirty_path(base_path) { + Ok(path) => { + if let Some(path) = path { + warn!("Found dirty path {path}, cleaning..."); + fs::remove_dir_all(base_path.join(path))?; + } } - } - Err(error) => { - if expected_dirty { - return Err(error); - } else { + Err(error) => { warn!("Found dirty file with invalid path: {error}, cleaning...") } } + match fs::remove_file(base_path.join(DIRTY_PATH)) { + Ok(_) => {} + Err(err) => match err.kind() { + ErrorKind::NotFound => {} + _ => Err(err)?, + }, + }; } - fs::remove_file(base_path.join(DIRTY_PATH))?; Ok(()) } @@ -543,12 +486,12 @@ pub(crate) fn mark_dirty(path: &Path) -> Result Result Result { - let relative_path = read_data_path(base_path)?; - valid_relative_graph_path(base_path.to_path_buf(), relative_path.as_ref()) -} - -fn make_data_path(base_path: &Path, prefix: &str) -> Result { - let old_id: Option = match read_data_path(base_path) { - Ok(path) => path.strip_prefix(prefix).and_then(|id| id.parse().ok()), - Err(InternalPathValidationError::MissingMetadataFile) => None, - Err(error) => return Err(error), - }; - let mut id = match old_id { - None => 0, - Some(id) => id + 1, - }; - let mut path = format!("{prefix}{id}"); - while base_path.join(&path).exists() { - id += 1; - path = format!("{prefix}{id}"); - } - Ok(path) -} - impl ValidGraphFolder { - pub fn created(&self) -> Result { - fs::metadata(self.meta_path())?.created()?.to_millis() + fn with_internal_errors( + &self, + map: impl FnOnce() -> Result, + ) -> Result { + map().with_path(self.local_path()) + } + + pub fn path(&self) -> &Path { + &self.data_path.root() + } + pub fn local_path(&self) -> String { + self.local_path.clone() + } + pub fn created(&self) -> Result { + let path = self.meta_path()?; + self.with_internal_errors(move || Ok(path.metadata()?.created()?.to_millis()?)) } - pub fn last_opened(&self) -> Result { - fs::metadata(self.data_path().get_meta_path())? - .accessed()? - .to_millis() + pub fn last_opened(&self) -> Result { + self.with_internal_errors(|| { + Ok(fs::metadata(self.data_path.get_meta_path()?)? + .accessed()? + .to_millis()?) + }) } - pub fn last_updated(&self) -> Result { - fs::metadata(self.data_path().get_meta_path())? - .modified()? - .to_millis() + pub fn last_updated(&self) -> Result { + self.with_internal_errors(|| { + Ok(fs::metadata(self.data_path().get_meta_path()?)? + .modified()? + .to_millis()?) + }) } - pub async fn created_async(&self) -> Result { - let metadata = tokio::fs::metadata(self.meta_path()).await?; - metadata.created()?.to_millis() + pub async fn created_async(&self) -> Result { + let path = self.meta_path()?; + let metadata = tokio::fs::metadata(path) + .await + .with_path(self.local_path())?; + self.with_internal_errors(|| Ok(metadata.created()?.to_millis()?)) } - pub async fn last_opened_async(&self) -> Result { - let metadata = tokio::fs::metadata(self.data_path().get_meta_path()).await?; - metadata.accessed()?.to_millis() + pub async fn last_opened_async(&self) -> Result { + let metadata = tokio::fs::metadata(self.meta_path()?) + .await + .with_path(self.local_path())?; + self.with_internal_errors(|| Ok(metadata.accessed()?.to_millis()?)) } - pub async fn last_updated_async(&self) -> Result { - let metadata = tokio::fs::metadata(self.data_path().get_meta_path()).await?; - metadata.modified()?.to_millis() + pub async fn last_updated_async(&self) -> Result { + let metadata = tokio::fs::metadata(self.meta_path()?) + .await + .with_path(self.local_path())?; + self.with_internal_errors(|| Ok(metadata.modified()?.to_millis()?)) } - pub async fn read_metadata_async(&self) -> Result { - let folder: GraphFolder = self.data_folder.clone().into(); - blocking_compute(move || folder.read_metadata()).await + pub async fn read_metadata_async(&self) -> Result { + let folder: GraphFolder = self.data_path.clone(); + blocking_compute(move || folder.read_metadata()) + .await + .with_path(self.local_path()) } pub fn get_original_path_str(&self) -> &str { @@ -656,21 +603,18 @@ impl ValidGraphFolder { graph: MaterializedGraph, ) -> Result<(), InternalPathValidationError> { let metadata = GraphMetadata::from_graph(&graph); + let data_folder = &self.data_path; if graph.disk_storage_enabled() { - let data_folder = &self.data_folder; - let path = read_data_path(data_folder)?; + let path = data_folder.get_relative_graph_path()?; let meta_json = serde_json::to_string(&Metadata { path, meta: Some(metadata), })?; - let dirty_path = data_folder.join(DIRTY_PATH); + let dirty_path = data_folder.root().join(DIRTY_PATH); fs::write(&dirty_path, &meta_json)?; - fs::rename(&dirty_path, data_folder.join(META_PATH))?; + fs::rename(&dirty_path, data_folder.root().join(META_PATH))?; } else { - let swap = WriteableGraphFolder::new_inner_graph_folder(self, metadata)?; - let data_folder = swap.data_folder.clone(); - graph.encode_parquet(data_folder)?; - swap.finish_inner()?; + data_folder.replace_graph(graph)?; } Ok(()) } @@ -679,22 +623,19 @@ impl ValidGraphFolder { graph: MaterializedGraph, ) -> Result<(), PathValidationError> { self.write_graph_data_inner(graph) - .map_err(|error| PathValidationError::InternalError { - graph: self.local_path.clone(), - error, - }) + .with_path(self.local_path()) } - pub(crate) fn data_path(&self) -> GraphFolder { - self.data_folder.clone().into() + pub(crate) fn data_path(&self) -> &GraphFolder { + &self.data_path } - pub(crate) fn meta_path(&self) -> PathBuf { - self.path.join(META_PATH) + pub(crate) fn meta_path(&self) -> Result { + self.with_internal_errors(|| Ok(self.data_path.get_meta_path()?)) } - pub(crate) fn get_vectors_path(&self) -> PathBuf { - self.data_path().get_vectors_path() + pub(crate) fn get_vectors_path(&self) -> Result { + self.with_internal_errors(|| Ok(self.data_path().get_vectors_path()?)) } pub(crate) fn as_existing(&self) -> Result { diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index 8a75285f12..bd17c0c0da 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -57,14 +57,11 @@ pub enum InvalidPathReason { ParentIsGraph(PathBuf), #[error("Graph name cannot start with _")] GraphNamePrefix, - #[error("The path provided does not exists as a namespace: {0}")] - NamespaceDoesNotExist(String), + #[error("The path provided already exists as a namespace: {0}")] GraphIsNamespace(PathBuf), #[error("The path provided already exists as a graph: {0}")] NamespaceIsGraph(PathBuf), - #[error("The path provided contains non-UTF8 characters.")] - NonUTFCharacters, #[error("Failed to strip prefix")] StripPrefix { #[from] @@ -262,6 +259,9 @@ pub enum GraphError { source: zip::result::ZipError, }, + #[error("Not a zip archive")] + NotAZip, + #[error("Failed to load graph: {0}")] LoadFailure(String), diff --git a/raphtory/src/python/graph/views/graph_view.rs b/raphtory/src/python/graph/views/graph_view.rs index e0113c8f6c..976ccf8e99 100644 --- a/raphtory/src/python/graph/views/graph_view.rs +++ b/raphtory/src/python/graph/views/graph_view.rs @@ -478,7 +478,7 @@ impl PyGraphView { /// Materializes the graph view into a graphql compatible folder. fn materialize_to_graph_folder(&self, path: PathBuf) -> Result { let folder: GraphFolder = path.into(); - folder.reserve()?; + folder.init()?; let graph = self.graph.materialize_at(Some(&folder.get_graph_path()))?; folder.write_metadata(&graph)?; diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index e491b06211..a7134ec904 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -1,44 +1,146 @@ use crate::{ - db::api::view::MaterializedGraph, + db::api::view::{internal::GraphView, MaterializedGraph}, errors::GraphError, - prelude::{Graph, GraphViewOps, PropertiesOps}, + prelude::{Graph, GraphViewOps, ParquetEncoder, PropertiesOps, StableEncode}, serialise::{metadata::GraphMetadata, serialise::StableDecode}, }; +use serde::{Deserialize, Serialize}; use std::{ fs::{self, File, OpenOptions}, - io::{self, BufReader, ErrorKind, Read, Seek, Write}, + io::{self, BufReader, BufWriter, ErrorKind, Read, Seek, Write}, path::{Path, PathBuf}, }; use tracing::info; use walkdir::WalkDir; -use zip::{write::FileOptions, ZipArchive, ZipWriter}; +use zip::{ + write::{FileOptions, SimpleFileOptions}, + ZipArchive, ZipWriter, +}; /// Stores graph data pub const GRAPH_PATH: &str = "graph"; +pub const DATA_PATH: &str = "data"; + /// Stores graph metadata pub const META_PATH: &str = ".raph"; +/// Temporary metadata for atomic replacement +pub const DIRTY_PATH: &str = ".dirty"; + /// Directory that stores search indexes pub const INDEX_PATH: &str = "index"; /// Directory that stores vector embeddings of the graph pub const VECTORS_PATH: &str = "vectors"; +fn read_path_from_file(mut file: impl Read) -> Result { + let mut value = String::new(); + file.read_to_string(&mut value)?; + let path: RelativePath = serde_json::from_str(&value)?; + Ok(path.path) +} + +pub fn read_path_pointer(base_path: &Path, file_name: &str) -> Result, io::Error> { + let file = match File::open(base_path.join(file_name)) { + Ok(file) => file, + Err(error) => { + return match error.kind() { + ErrorKind::NotFound => Ok(None), + _ => Err(error), + } + } + }; + let path = read_path_from_file(file)?; + Ok(Some(path)) +} + +pub fn read_data_path(base_path: &Path) -> Result, io::Error> { + read_path_pointer(base_path, META_PATH) +} + +pub fn read_dirty_path(base_path: &Path) -> Result, io::Error> { + read_path_pointer(base_path, DIRTY_PATH) +} + +pub fn make_data_path(base_path: &Path, prefix: &str) -> Result { + let mut id = read_data_path(base_path)? + .and_then(|path| { + path.strip_prefix(prefix) + .and_then(|id| id.parse::().ok()) + }) + .map_or(0, |id| id + 1); + + let mut path = format!("{prefix}{id}"); + while base_path.join(&path).exists() { + id += 1; + path = format!("{prefix}{id}"); + } + Ok(path) +} + +fn get_zip_data_path(zip: &mut ZipArchive) -> Result { + let file = zip.by_name(META_PATH)?; + Ok(read_path_from_file(file)?) +} + +fn get_zip_graph_path(zip: &mut ZipArchive) -> Result { + let mut path = get_zip_data_path(zip)?; + let graph_path = get_zip_graph_path_name(zip, path.clone())?; + path.push('/'); + path.push_str(&graph_path); + Ok(graph_path) +} + +fn get_zip_graph_path_name( + zip: &mut ZipArchive, + mut data_path: String, +) -> Result { + data_path.push('/'); + data_path.push_str(META_PATH); + let graph_path = read_path_from_file(zip.by_name(&data_path)?)?; + Ok(graph_path) +} + +fn get_zip_meta_path(zip: &mut ZipArchive) -> Result { + let mut path = get_zip_graph_path(zip)?; + path.push('/'); + path.push_str(META_PATH); + Ok(path) +} + +fn file_opts() -> SimpleFileOptions { + SimpleFileOptions::default() +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct RelativePath { + pub path: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Metadata { + pub path: String, + pub meta: Option, +} + /// A container for managing graph data. +/// /// Folder structure: /// /// GraphFolder -/// ├── graph/ # Graph data -/// ├── .raph # Metadata file -/// ├── index/ # Search indexes (optional) -/// └── vectors/ # Vector embeddings (optional) +/// ├── .raph # Metadata file (json: {path: "data_{id}"}) +/// └── data_{id}/ # Data folder (incremental id for atomic replacement) +/// ├── .raph # Metadata file (json: {path: "graph_{id}", meta: {}}) +/// ├── graph_{id}/ # Graph data +/// ├── index/ # Search indexes (optional) +/// └── vectors/ # Vector embeddings (optional) /// /// If `write_as_zip_format` is true, then the folder is compressed /// and stored as a zip file. #[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] pub struct GraphFolder { - pub root_folder: PathBuf, + root_folder: PathBuf, pub(crate) write_as_zip_format: bool, } @@ -51,27 +153,38 @@ impl GraphFolder { } } + pub fn root(&self) -> &Path { + &self.root_folder + } + /// Reserve a folder, marking it as occupied by a graph. - /// Returns an error if `write_as_zip_format` is true or if the folder has data. - pub fn reserve(&self) -> Result<(), GraphError> { + /// Returns an error if the folder has data. + pub fn init(&self) -> Result<(), GraphError> { + let relative_data_path = self.get_relative_data_path()?; + let meta = serde_json::to_string(&RelativePath { + path: relative_data_path.clone(), + })?; if self.write_as_zip_format { - return Err(GraphError::IOErrorMsg( - "Cannot reserve a zip folder".to_string(), - )); - } - - self.ensure_clean_root_dir()?; - - // Mark as occupied using empty metadata & graph data. - File::create_new(self.get_meta_path())?; - fs::create_dir_all(self.get_graph_path())?; + let file = File::create_new(&self.root_folder)?; + let mut zip = ZipWriter::new(BufWriter::new(file)); + zip.start_file(META_PATH, file_opts())?; + zip.write_all(meta.as_bytes())?; + zip.add_directory(relative_data_path, file_opts())?; + zip.flush()?; + } else { + self.ensure_clean_root_dir()?; + let data_path = self.root_folder.join(META_PATH); + let mut path_file = File::create_new(&data_path)?; + path_file.write_all(meta.as_bytes())?; + fs::create_dir_all(&data_path)?; + } Ok(()) } /// Returns true if folder is occupied by a graph. pub fn is_reserved(&self) -> bool { - self.get_meta_path().exists() + self.get_meta_path().map_or(false, |path| path.exists()) } /// Clears the folder of any contents. @@ -87,20 +200,56 @@ impl GraphFolder { Ok(()) } - pub fn get_graph_path(&self) -> PathBuf { - self.root_folder.join(GRAPH_PATH) + pub fn get_relative_data_path(&self) -> Result { + let path = if self.is_zip() { + let mut zip = self.read_zip()?; + get_zip_data_path(&mut zip)? + } else { + read_data_path(&self.root_folder)?.unwrap_or_else(|| DATA_PATH.to_string() + "0") + }; + Ok(path) + } + + pub fn get_data_path(&self) -> Result { + let relative = + read_data_path(&self.root_folder)?.unwrap_or_else(|| DATA_PATH.to_string() + "0"); + Ok(self.root_folder.join(relative)) + } + + pub fn get_graph_path(&self) -> Result { + let mut path = self.get_data_path()?; + let relative = read_data_path(&path)?.unwrap_or_else(|| GRAPH_PATH.to_string() + "0"); + path.push(relative); + Ok(path) + } + + pub fn get_relative_graph_path(&self) -> Result { + if self.is_zip() { + let mut zip = self.read_zip()?; + let data_path = get_zip_data_path(&mut zip)?; + get_zip_graph_path_name(&mut zip, data_path) + } else { + let data_path = self.get_data_path()?; + Ok(read_data_path(&data_path)?.unwrap_or_else(|| GRAPH_PATH.to_string() + "0")) + } } - pub fn get_meta_path(&self) -> PathBuf { - self.root_folder.join(META_PATH) + pub fn get_meta_path(&self) -> Result { + let mut path = self.get_data_path()?; + path.push(META_PATH); + Ok(path) } - pub fn get_index_path(&self) -> PathBuf { - self.root_folder.join(INDEX_PATH) + pub fn get_index_path(&self) -> Result { + let mut path = self.get_data_path()?; + path.push(INDEX_PATH); + Ok(path) } - pub fn get_vectors_path(&self) -> PathBuf { - self.root_folder.join(VECTORS_PATH) + pub fn get_vectors_path(&self) -> Result { + let mut path = self.get_data_path()?; + path.push(VECTORS_PATH); + Ok(path) } pub fn get_base_path(&self) -> &Path { @@ -111,62 +260,64 @@ impl GraphFolder { self.root_folder.is_file() } - pub fn read_metadata(&self) -> Result { - match self.try_read_metadata() { - Ok(data) => Ok(data), - Err(e) => { - match e.kind() { - // In the case that the file is not found or invalid, try creating it then re-reading - ErrorKind::NotFound | ErrorKind::InvalidData | ErrorKind::UnexpectedEof => { - info!( - "Metadata file does not exist or is invalid. Attempting to recreate..." - ); - - // Either decode a graph serialized using encode or load using underlying storage. - let graph = if self.is_zip() - || MaterializedGraph::is_decodable(self.get_graph_path()) - { - MaterializedGraph::decode(self, None)? - } else { - // We currently do not have a way of figuring out the graph type - // from storage, so for now default to an EventGraph. - let graph = Graph::load_from_path(self.get_graph_path()); - MaterializedGraph::EventGraph(graph) - }; - - self.write_metadata(&graph)?; - - info!("Metadata file recreated successfully"); - - Ok(self.try_read_metadata()?) - } - _ => Err(e.into()), - } - } + fn read_zip(&self) -> Result, GraphError> { + if self.is_zip() { + let file = File::open(&self.root_folder)?; + let archive = ZipArchive::new(file)?; + Ok(archive) + } else { + Err(GraphError::NotAZip) } } - pub fn try_read_metadata(&self) -> Result { + pub fn replace_graph(&self, graph: impl ParquetEncoder + GraphView) -> Result<(), GraphError> { + let data_path = self.get_data_path()?; + let old_graph_path = self.get_graph_path().ok(); + let new_graph_path = make_data_path(&data_path, GRAPH_PATH)?; + let meta = Some(GraphMetadata::from_graph(&graph)); + let new_graph_folder = data_path.join(&new_graph_path); + let dirty_path = data_path.join(DIRTY_PATH); + fs::write( + &dirty_path, + &serde_json::to_vec(&Metadata { + path: new_graph_path, + meta, + })?, + )?; + graph.encode_parquet(&new_graph_folder)?; + fs::rename(&dirty_path, data_path.join(META_PATH))?; + if let Some(old_graph_path) = old_graph_path { + fs::remove_dir_all(old_graph_path)?; + } + Ok(()) + } + + pub fn read_metadata(&self) -> Result { + let mut json = String::new(); if self.is_zip() { - let file = File::open(&self.root_folder)?; - let mut archive = ZipArchive::new(file)?; - let zip_file = archive.by_name(META_PATH)?; - let reader = BufReader::new(zip_file); - let metadata = serde_json::from_reader(reader)?; - Ok(metadata) + let mut zip = self.read_zip()?; + let path = get_zip_meta_path(&mut zip)?; + let mut zip_file = zip.by_name(&path)?; + zip_file.read_to_string(&mut json)?; } else { - let file = File::open(self.get_meta_path())?; - let reader = BufReader::new(file); - let metadata = serde_json::from_reader(reader)?; - Ok(metadata) + let mut file = File::open(self.get_meta_path()?)?; + file.read_to_string(&mut json)?; } + let metadata = serde_json::from_str(&json)?; + Ok(metadata) } pub fn write_metadata<'graph>( &self, graph: &impl GraphViewOps<'graph>, ) -> Result<(), GraphError> { + let graph_path = self.get_relative_graph_path()?; + let data_path = self.get_relative_data_path()?; let metadata = GraphMetadata::from_graph(graph); + let meta = Metadata { + path: graph_path, + meta: Some(metadata), + }; if self.write_as_zip_format { let file = File::options() @@ -176,20 +327,14 @@ impl GraphFolder { let mut zip = ZipWriter::new_append(file)?; zip.start_file::<_, ()>(META_PATH, FileOptions::default())?; - Ok(serde_json::to_writer(zip, &metadata)?) + Ok(serde_json::to_writer(zip, &meta)?) } else { - let path = self.get_meta_path(); + let path = self.get_meta_path()?; let file = File::create(path.clone())?; - - Ok(serde_json::to_writer(file, &metadata)?) + Ok(serde_json::to_writer(file, &meta)?) } } - pub(crate) fn get_appendable_graph_file(&self) -> Result { - let path = self.get_graph_path(); - Ok(OpenOptions::new().append(true).open(path)?) - } - fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { if self.root_folder.exists() { let non_empty = self.root_folder.read_dir()?.next().is_some(); @@ -203,9 +348,9 @@ impl GraphFolder { Ok(()) } - fn is_disk_graph(&self) -> bool { - let path = self.get_graph_path(); - path.is_dir() + fn is_disk_graph(&self) -> Result { + let meta = self.read_metadata()?; + Ok(meta.is_diskgraph) } /// Creates a zip file from the folder. @@ -312,6 +457,7 @@ mod tests { /// Verify that the metadata is re-created if it does not exist. #[test] + #[ignore = "Need to think about how to deal with reading old format"] fn test_read_metadata_from_noninitialized_zip() { global_info_logger(); @@ -339,6 +485,7 @@ mod tests { edge_count: 0, metadata: vec![], graph_type: GraphType::EventGraph, + is_diskgraph: false } ); } @@ -373,6 +520,7 @@ mod tests { /// Verify that the metadata is re-created if it does not exist. #[test] + #[ignore = "Need to think about how to handle reading from old format"] fn test_read_metadata_from_noninitialized_folder() { global_info_logger(); @@ -399,6 +547,7 @@ mod tests { edge_count: 0, metadata: vec![], graph_type: GraphType::EventGraph, + is_diskgraph: false } ); } diff --git a/raphtory/src/serialise/metadata.rs b/raphtory/src/serialise/metadata.rs index d62a29fe78..388e8e549f 100644 --- a/raphtory/src/serialise/metadata.rs +++ b/raphtory/src/serialise/metadata.rs @@ -15,6 +15,7 @@ pub struct GraphMetadata { pub edge_count: usize, pub metadata: Vec<(ArcStr, Prop)>, pub graph_type: GraphType, + pub is_diskgraph: bool, } impl GraphMetadata { @@ -23,11 +24,13 @@ impl GraphMetadata { let edge_count = graph.count_edges(); let metadata = graph.metadata().as_vec(); let graph_type = graph.graph_type(); + let is_diskgraph = graph.disk_storage_enabled(); Self { node_count, edge_count, metadata, graph_type, + is_diskgraph, } } } diff --git a/raphtory/src/serialise/mod.rs b/raphtory/src/serialise/mod.rs index e6a139713b..3725237f7e 100644 --- a/raphtory/src/serialise/mod.rs +++ b/raphtory/src/serialise/mod.rs @@ -7,7 +7,7 @@ pub(crate) mod parquet; pub mod proto; mod serialise; -pub use graph_folder::{GraphFolder, GRAPH_PATH, INDEX_PATH, META_PATH, VECTORS_PATH}; +pub use graph_folder::*; pub use serialise::{StableDecode, StableEncode}; #[cfg(feature = "proto")] diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 88ff739193..928197fd77 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -41,8 +41,8 @@ impl StableEncode for T { #[cfg(feature = "search")] self.persist_index_to_disk_zip(&folder)?; } else { - folder.reserve()?; - self.encode_parquet(&folder.get_graph_path())?; + folder.init()?; + self.encode_parquet(&folder.get_graph_path()?)?; #[cfg(feature = "search")] self.persist_index_to_disk(&folder)?; @@ -100,7 +100,7 @@ impl StableDecode for T { let reader = std::fs::File::open(&folder.get_base_path())?; graph = Self::decode_parquet_from_zip(reader, path_for_decoded_graph)?; } else { - graph = Self::decode_parquet(&folder.get_graph_path(), path_for_decoded_graph)?; + graph = Self::decode_parquet(&folder.get_graph_path()?, path_for_decoded_graph)?; } #[cfg(feature = "search")] From a9f5c500c5d9a16100cceda0f1ca43604fd93179 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 8 Dec 2025 17:19:34 +0100 Subject: [PATCH 09/39] fix writing to empty graph folder --- raphtory-graphql/src/data.rs | 6 ++++-- raphtory-graphql/src/paths.rs | 7 ++++--- raphtory/src/serialise/graph_folder.rs | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index 15c6acb93e..171173dcc7 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -365,8 +365,10 @@ impl Drop for Data { fn drop(&mut self) { // On drop, serialize graphs that don't have underlying storage. for (_, graph) in self.cache.iter() { - if let Err(e) = graph.folder.write_graph_data(graph.graph) { - error!("Error encoding graph to disk on drop: {e}"); + if graph.is_dirty() { + if let Err(e) = graph.folder.write_graph_data(graph.graph) { + error!("Error encoding graph to disk on drop: {e}"); + } } } } diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index a62f096018..18314fea01 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -317,14 +317,15 @@ impl WriteableGraphFolder { } fn finish_inner(&self) -> Result<(), InternalPathValidationError> { - let old_path = self.folder.data_path.get_data_path().ok(); + let old_path = read_data_path(self.folder.data_path().root())?; fs::rename( self.folder.data_path.root().join(DIRTY_PATH), self.folder.data_path.root().join(META_PATH), )?; if let Some(old_path) = old_path { - if old_path.exists() { - fs::remove_dir_all(old_path)?; + let path = self.folder.data_path.root().join(&old_path); + if path.exists() { + fs::remove_dir_all(path)?; } } if let Some(cleanup) = self.dirty_marker.as_ref() { diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index a7134ec904..0de8b758c8 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -272,7 +272,7 @@ impl GraphFolder { pub fn replace_graph(&self, graph: impl ParquetEncoder + GraphView) -> Result<(), GraphError> { let data_path = self.get_data_path()?; - let old_graph_path = self.get_graph_path().ok(); + let old_graph_path = read_data_path(&data_path)?; let new_graph_path = make_data_path(&data_path, GRAPH_PATH)?; let meta = Some(GraphMetadata::from_graph(&graph)); let new_graph_folder = data_path.join(&new_graph_path); From 260b1653d23323675294b4c83381e95f6f01c031 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Tue, 9 Dec 2025 16:52:40 +0100 Subject: [PATCH 10/39] move a lot more of the logic to GraphFolder --- db4-graph/src/lib.rs | 5 +- raphtory-graphql/src/data.rs | 70 ++- raphtory-graphql/src/graph.rs | 34 +- raphtory-graphql/src/model/graph/graph.rs | 32 +- .../src/model/graph/meta_graph.rs | 7 +- raphtory-graphql/src/model/graph/namespace.rs | 7 +- raphtory-graphql/src/model/mod.rs | 26 +- raphtory-graphql/src/paths.rs | 266 ++++++------ raphtory-storage/src/graph/graph.rs | 4 +- raphtory/src/db/api/mutation/index_ops.rs | 6 +- .../db/api/storage/graph/storage_ops/mod.rs | 3 +- raphtory/src/db/api/storage/storage.rs | 5 +- .../src/db/api/view/internal/materialize.rs | 2 +- raphtory/src/db/api/view/internal/mod.rs | 6 +- raphtory/src/errors.rs | 7 +- raphtory/src/python/graph/views/graph_view.rs | 4 +- raphtory/src/search/graph_index.rs | 39 +- raphtory/src/serialise/graph_folder.rs | 401 +++++++++++++----- raphtory/src/serialise/metadata.rs | 2 +- raphtory/src/serialise/serialise.rs | 21 +- 20 files changed, 546 insertions(+), 401 deletions(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 9250e8f485..4865d2bead 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -206,8 +206,9 @@ impl, ES = ES>> TemporalGraph { }) } - pub fn disk_storage_enabled(&self) -> bool { - self.graph_dir().is_some() && Extension::disk_storage_enabled() + pub fn disk_storage_enabled(&self) -> Option<&Path> { + self.graph_dir() + .filter(|_| Extension::disk_storage_enabled()) } pub fn extension(&self) -> &EXT { self.storage().extension() diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index 171173dcc7..e5949f5aab 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -4,8 +4,8 @@ use crate::{ model::blocking_io, paths::{ mark_dirty, valid_path, valid_relative_graph_path, ExistingGraphFolder, - InternalPathValidationError, PathValidationError, ValidGraphFolder, WithPath, - WriteableGraphFolder, + InternalPathValidationError, PathValidationError, ValidGraphFolder, ValidGraphPaths, + ValidWriteableGraphFolder, WithPath, }, rayon::blocking_compute, GQLError, @@ -18,7 +18,7 @@ use raphtory::{ db::api::view::{internal::InternalStorageOps, MaterializedGraph}, errors::{GraphError, InvalidPathReason}, prelude::StableEncode, - serialise::{GraphFolder, META_PATH}, + serialise::{GraphFolder, GraphPaths, META_PATH}, vectors::{ cache::VectorCache, template::DocumentTemplate, vectorisable::Vectorisable, vectorised_graph::VectorisedGraph, @@ -43,7 +43,7 @@ pub const DIRTY_PATH: &'static str = ".dirty"; pub struct EmbeddingConf { pub(crate) cache: VectorCache, pub(crate) global_template: Option, - pub(crate) individual_templates: HashMap, + pub(crate) individual_templates: HashMap, } #[derive(thiserror::Error, Debug)] @@ -159,7 +159,7 @@ impl Data { return; } if let Err(e) = - blocking_compute(move || graph.folder.write_graph_data(graph.graph)).await + blocking_compute(move || graph.folder.replace_graph_data(graph.graph)).await { error!("Error encoding graph to disk on eviction: {e}"); } @@ -185,11 +185,11 @@ impl Data { &self, path: &str, overwrite: bool, - ) -> Result { + ) -> Result { if overwrite { - WriteableGraphFolder::try_existing_or_new(self.work_dir.clone(), path) + ValidWriteableGraphFolder::try_existing_or_new(self.work_dir.clone(), path) } else { - WriteableGraphFolder::try_new(self.work_dir.clone(), path) + ValidWriteableGraphFolder::try_new(self.work_dir.clone(), path) } } @@ -206,7 +206,7 @@ impl Data { pub async fn insert_graph( &self, - writeable_folder: WriteableGraphFolder, + writeable_folder: ValidWriteableGraphFolder, graph: MaterializedGraph, ) -> Result<(), InsertionError> { let vectors = self.vectorise(graph.clone(), &writeable_folder).await; @@ -218,27 +218,25 @@ impl Data { }) .await?; - self.cache.insert(graph.folder.local_path(), graph).await; + self.cache + .insert(graph.folder.local_path_string(), graph) + .await; Ok(()) } /// Insert a graph serialized from a graph folder. pub async fn insert_graph_as_bytes( &self, - folder: WriteableGraphFolder, + folder: ValidWriteableGraphFolder, bytes: R, ) -> Result<(), InsertionError> { let folder_clone = folder.clone(); - blocking_io(move || { - folder_clone - .data_path() - .unzip_to_folder(bytes) - .map_err(|err| { - InsertionError::from_graph_err(folder_clone.get_original_path_str(), err) - }) - }) - .await?; - self.vectorise_folder(folder.as_existing()?).await; + blocking_io(move || folder_clone.write_graph_bytes(bytes)).await?; + if let Some(template) = self.resolve_template(folder.local_path()) { + let folder_clone = folder.clone(); + let graph = blocking_io(move || folder_clone.read_graph()).await?; + self.vectorise_with_template(graph, &folder, template).await; + } blocking_io(move || folder.finish()).await?; Ok(()) } @@ -248,8 +246,8 @@ impl Data { graph_folder: ExistingGraphFolder, ) -> Result<(), MutationErrorInner> { blocking_io(move || { - let dirty_file = mark_dirty(graph_folder.path())?; - fs::remove_dir_all(graph_folder.path())?; + let dirty_file = mark_dirty(graph_folder.root())?; + fs::remove_dir_all(graph_folder.root())?; fs::remove_file(dirty_file)?; Ok::<_, MutationErrorInner>(()) }) @@ -266,7 +264,7 @@ impl Data { Ok(()) } - fn resolve_template(&self, graph: &Path) -> Option<&DocumentTemplate> { + fn resolve_template(&self, graph: &str) -> Option<&DocumentTemplate> { let conf = self.embedding_conf.as_ref()?; conf.individual_templates .get(graph) @@ -276,7 +274,7 @@ impl Data { async fn vectorise_with_template( &self, graph: MaterializedGraph, - folder: &ValidGraphFolder, + folder: &impl ValidGraphPaths, template: &DocumentTemplate, ) -> Option> { let conf = self.embedding_conf.as_ref()?; @@ -284,14 +282,14 @@ impl Data { .vectorise( conf.cache.clone(), template.clone(), - Some(&folder.get_vectors_path().ok()?), + Some(&folder.vectors_path().ok()?), true, // verbose ) .await; match vectors { Ok(vectors) => Some(vectors), Err(error) => { - let name = folder.get_original_path_str(); + let name = folder.local_path_string(); warn!("An error occurred when trying to vectorise graph {name}: {error}"); None } @@ -301,16 +299,16 @@ impl Data { async fn vectorise( &self, graph: MaterializedGraph, - folder: &ValidGraphFolder, + folder: &ValidWriteableGraphFolder, ) -> Option> { - let template = self.resolve_template(folder.get_original_path())?; + let template = self.resolve_template(folder.local_path())?; self.vectorise_with_template(graph, folder, template).await } async fn vectorise_folder(&self, folder: ExistingGraphFolder) -> Option<()> { // it's important that we check if there is a valid template set for this graph path // before actually loading the graph, otherwise we are loading the graph for no reason - let template = self.resolve_template(folder.get_original_path())?; + let template = self.resolve_template(folder.local_path())?; let graph = self .read_graph_from_disk_inner(folder.clone()) .await @@ -322,14 +320,13 @@ impl Data { pub(crate) async fn vectorise_all_graphs_that_are_not(&self) -> Result<(), GraphError> { for folder in self.get_all_graph_folders() { - if !folder.data_path().get_vectors_path()?.exists() { + if !folder.vectors_path()?.exists() { self.vectorise_folder(folder).await; } } Ok(()) } - // TODO: return iter pub fn get_all_graph_folders(&self) -> impl Iterator { let base_path = self.work_dir.clone(); WalkDir::new(&self.work_dir) @@ -366,7 +363,7 @@ impl Drop for Data { // On drop, serialize graphs that don't have underlying storage. for (_, graph) in self.cache.iter() { if graph.is_dirty() { - if let Err(e) = graph.folder.write_graph_data(graph.graph) { + if let Err(e) = graph.folder.replace_graph_data(graph.graph) { error!("Error encoding graph to disk on drop: {e}"); } } @@ -382,6 +379,7 @@ pub(crate) mod data_tests { use raphtory::{ db::api::view::{internal::InternalStorageOps, MaterializedGraph}, prelude::*, + serialise::GraphPaths, }; use std::{collections::HashMap, fs, path::Path, time::Duration}; use tokio::time::sleep; @@ -505,7 +503,7 @@ pub(crate) mod data_tests { let paths = data .get_all_graph_folders() .into_iter() - .map(|folder| folder.0.data_path().root().to_path_buf()) + .map(|folder| folder.0.root().to_path_buf()) .collect_vec(); assert_eq!(paths.len(), 5); @@ -568,7 +566,7 @@ pub(crate) mod data_tests { let loaded_graph2 = data.get_graph("test_graph2").await.unwrap(); // TODO: This test doesn't work with disk storage right now, make sure modification dates actually update correctly! - if loaded_graph1.graph.disk_storage_enabled() { + if loaded_graph1.graph.disk_storage_enabled().is_some() { assert!( !loaded_graph1.is_dirty(), "Graph1 should not be dirty when loaded from disk" @@ -675,7 +673,7 @@ pub(crate) mod data_tests { data.cache.run_pending_tasks().await; // TODO: This test doesn't work with disk storage right now, make sure modification dates actually update correctly! - if loaded_graph1.graph.disk_storage_enabled() { + if loaded_graph1.graph.disk_storage_enabled().is_some() { // Check modification times after eviction let graph1_metadata_after = fs::metadata(&graph1_path).unwrap(); let graph2_metadata_after = fs::metadata(&graph2_path).unwrap(); diff --git a/raphtory-graphql/src/graph.rs b/raphtory-graphql/src/graph.rs index 252b0b62df..99fc0b52ae 100644 --- a/raphtory-graphql/src/graph.rs +++ b/raphtory-graphql/src/graph.rs @@ -1,4 +1,4 @@ -use crate::paths::ExistingGraphFolder; +use crate::paths::{ExistingGraphFolder, ValidGraphPaths}; use raphtory::{ core::entities::nodes::node_ref::AsNodeRef, db::{ @@ -27,6 +27,7 @@ use tracing::info; #[cfg(feature = "search")] use raphtory::prelude::IndexMutationOps; +use raphtory::serialise::GraphPaths; #[derive(Clone)] pub struct GraphWithVectors { @@ -87,37 +88,12 @@ impl GraphWithVectors { cache: Option, create_index: bool, ) -> Result { - let graph = { - let data_path = folder.data_path(); - // Either decode a graph serialized using encode or load using underlying storage. - if MaterializedGraph::is_decodable(data_path.get_graph_path()?) { - let path_for_decoded_graph = None; - MaterializedGraph::decode(data_path, path_for_decoded_graph)? - } else { - let metadata = data_path.read_metadata()?; - let graph = match metadata.graph_type { - GraphType::EventGraph => { - let graph = Graph::load_from_path(data_path.get_graph_path()?); - MaterializedGraph::EventGraph(graph) - } - GraphType::PersistentGraph => { - let graph = PersistentGraph::load_from_path(data_path.get_graph_path()?); - MaterializedGraph::PersistentGraph(graph) - } - }; - - #[cfg(feature = "search")] - graph.load_index(&data_path)?; - graph - } - }; - + let graph = folder.data_path()?.read_graph()?; let vectors = cache.and_then(|cache| { - VectorisedGraph::read_from_path(&folder.get_vectors_path().ok()?, graph.clone(), cache) - .ok() + VectorisedGraph::read_from_path(&folder.vectors_path().ok()?, graph.clone(), cache).ok() }); - info!("Graph loaded = {}", folder.get_original_path_str()); + info!("Graph loaded = {}", folder.local_path()); #[cfg(feature = "search")] if create_index { diff --git a/raphtory-graphql/src/model/graph/graph.rs b/raphtory-graphql/src/model/graph/graph.rs index 3c77ca2a60..834a0001ee 100644 --- a/raphtory-graphql/src/model/graph/graph.rs +++ b/raphtory-graphql/src/model/graph/graph.rs @@ -48,7 +48,10 @@ use std::{ sync::Arc, }; -use crate::{graph::GraphWithVectors, paths::PathValidationError}; +use crate::{ + graph::GraphWithVectors, + paths::{PathValidationError, ValidGraphPaths}, +}; #[cfg(feature = "search")] use raphtory::db::api::view::SearchableGraphOps; @@ -428,28 +431,17 @@ impl GqlGraph { } /// Returns path of graph. - async fn path(&self) -> Result { - Ok(self - .path - .get_original_path() - .to_str() - .ok_or(InvalidPathReason::PathNotParsable( - self.path.to_error_path(), - ))? - .to_owned()) + async fn path(&self) -> String { + self.path.local_path_string() } /// Returns namespace of graph. - async fn namespace(&self) -> Result { - Ok(self - .path - .get_original_path() - .parent() - .and_then(|p| p.to_str().map(|s| s.to_string())) - .ok_or(InvalidPathReason::PathNotParsable( - self.path.to_error_path(), - ))? - .to_owned()) + async fn namespace(&self) -> String { + self.path + .local_path() + .rsplit_once("/") + .map_or("", |(prefix, _)| prefix) + .to_string() } /// Returns the graph schema. diff --git a/raphtory-graphql/src/model/graph/meta_graph.rs b/raphtory-graphql/src/model/graph/meta_graph.rs index 21ac19ab79..2b01d95105 100644 --- a/raphtory-graphql/src/model/graph/meta_graph.rs +++ b/raphtory-graphql/src/model/graph/meta_graph.rs @@ -1,4 +1,7 @@ -use crate::{model::graph::property::GqlProperty, paths::ExistingGraphFolder}; +use crate::{ + model::graph::property::GqlProperty, + paths::{ExistingGraphFolder, ValidGraphPaths}, +}; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields, Result}; use raphtory::{errors::GraphError, serialise::metadata::GraphMetadata}; use std::{cmp::Ordering, sync::Arc}; @@ -57,7 +60,7 @@ impl MetaGraph { /// Returns path of graph. async fn path(&self) -> String { - self.folder.get_original_path_str().to_owned() + self.folder.local_path_string() } /// Returns the timestamp for the creation of the graph. diff --git a/raphtory-graphql/src/model/graph/namespace.rs b/raphtory-graphql/src/model/graph/namespace.rs index 5110a325b2..978bcf89f1 100644 --- a/raphtory-graphql/src/model/graph/namespace.rs +++ b/raphtory-graphql/src/model/graph/namespace.rs @@ -103,7 +103,11 @@ impl Namespace { pub fn try_new_child(&self, file_name: &str) -> Result { let current_dir = valid_path(self.current_dir.clone(), file_name)?; - let relative_path = [&self.relative_path, file_name].join("/"); + let relative_path = if self.relative_path.is_empty() { + file_name.to_owned() + } else { + [&self.relative_path, file_name].join("/") + }; let child = if current_dir.is_namespace() { NamespacedItem::Namespace(Self::try_from_valid(current_dir, relative_path)?) } else { @@ -118,6 +122,7 @@ impl Namespace { /// Non-recursively list children pub fn get_children(&self) -> impl Iterator + use<'_> { WalkDir::new(&self.current_dir) + .min_depth(1) .max_depth(1) .into_iter() .flatten() diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index 95be2618b5..3c468b6f01 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -10,7 +10,7 @@ use crate::{ }, plugins::{mutation_plugin::MutationPlugin, query_plugin::QueryPlugin}, }, - paths::{valid_path, ValidGraphFolder, WriteableGraphFolder}, + paths::{valid_path, ValidGraphFolder, ValidWriteableGraphFolder}, rayon::blocking_compute, url_encode::{url_decode_graph, url_encode_graph}, }; @@ -206,7 +206,7 @@ impl Mut { let data = ctx.data_unchecked::(); let overwrite = false; let folder = data.validate_path_for_insert(&path, overwrite)?; - let graph_path = folder.data_path().get_graph_path()?; + let graph_path = folder.graph_path()?; let graph: MaterializedGraph = match graph_type { GqlGraphType::Persistent => PersistentGraph::new_at_path(graph_path).into(), GqlGraphType::Event => Graph::new_at_path(graph_path).into(), @@ -284,12 +284,11 @@ impl Mut { ) -> Result { let data = ctx.data_unchecked::(); let folder = if overwrite { - WriteableGraphFolder::try_existing_or_new(data.work_dir.clone(), path)? + ValidWriteableGraphFolder::try_existing_or_new(data.work_dir.clone(), path)? } else { - WriteableGraphFolder::try_new(data.work_dir.clone(), path)? + ValidWriteableGraphFolder::try_new(data.work_dir.clone(), path)? }; - let g: MaterializedGraph = - url_decode_graph(graph, Some(&folder.data_path().get_graph_path()?))?; + let g: MaterializedGraph = url_decode_graph(graph, Some(&folder.graph_path()?))?; data.insert_graph(folder, g).await?; Ok(path.to_owned()) @@ -307,14 +306,15 @@ impl Mut { overwrite: bool, ) -> Result { let data = ctx.data_unchecked::(); - let parent_graph = data.get_graph(parent_path).await?.graph; - let new_subgraph = - blocking_compute(move || parent_graph.subgraph(nodes).materialize()).await?; let folder = data.validate_path_for_insert(&new_path, overwrite)?; - - if overwrite { - let _ignored = data.delete_graph(&new_path).await; - } + let parent_graph = data.get_graph(parent_path).await?.graph; + let graph_path = folder.graph_path()?; + let new_subgraph = blocking_compute(move || { + parent_graph + .subgraph(nodes) + .materialize_at(Some(&graph_path)) + }) + .await?; data.insert_graph(folder, new_subgraph).await?; Ok(new_path) diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index 18314fea01..9600a1c18a 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -11,7 +11,8 @@ use raphtory::{ prelude::ParquetEncoder, serialise::{ make_data_path, metadata::GraphMetadata, read_data_path, read_dirty_path, - read_path_pointer, GraphFolder, Metadata, RelativePath, DATA_PATH, META_PATH, + read_path_pointer, GraphFolder, GraphPaths, InnerGraphFolder, Metadata, RelativePath, + WriteableGraphFolder, DATA_PATH, META_PATH, }, }; use serde::{Deserialize, Serialize}; @@ -19,7 +20,7 @@ use std::{ cmp::Ordering, fs, fs::File, - io::{ErrorKind, Read, Write}, + io::{ErrorKind, Read, Seek, Write}, ops::Deref, path::{Component, Path, PathBuf, StripPrefixError}, time::{SystemTime, UNIX_EPOCH}, @@ -27,6 +28,21 @@ use std::{ use tokio::io::AsyncReadExt; use tracing::{error, metadata, warn}; +pub trait ValidGraphPaths: GraphPaths { + fn local_path(&self) -> &str; + + fn local_path_string(&self) -> String { + self.local_path().to_owned() + } + + fn with_internal_errors( + &self, + fun: impl FnOnce() -> R, + ) -> Result { + fun().with_path(self.local_path()) + } +} + pub struct ValidPath(PathBuf); impl ValidPath { @@ -48,6 +64,22 @@ impl ValidPath { #[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] pub struct ExistingGraphFolder(pub(crate) ValidGraphFolder); +impl GraphPaths for ExistingGraphFolder { + fn root(&self) -> &Path { + self.0.root() + } + + fn data_path(&self) -> Result { + self.0.data_path() + } +} + +impl ValidGraphPaths for ExistingGraphFolder { + fn local_path(&self) -> &str { + self.0.local_path() + } +} + impl Deref for ExistingGraphFolder { type Target = ValidGraphFolder; @@ -69,7 +101,7 @@ impl ExistingGraphFolder { let graph_folder: GraphFolder = base_path.into_path().into(); if graph_folder.is_reserved() { Ok(Self(ValidGraphFolder { - data_path: graph_folder, + graph_folder: graph_folder, local_path: relative_path.to_string(), })) } else { @@ -78,11 +110,23 @@ impl ExistingGraphFolder { )) } } + + fn replace_graph_data_inner( + &self, + graph: MaterializedGraph, + ) -> Result<(), InternalPathValidationError> { + self.graph_folder.data_path()?.replace_graph(graph)?; + Ok(()) + } + pub fn replace_graph_data(&self, graph: MaterializedGraph) -> Result<(), PathValidationError> { + self.replace_graph_data_inner(graph) + .with_path(self.local_path()) + } } #[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] pub struct ValidGraphFolder { - data_path: GraphFolder, + graph_folder: GraphFolder, local_path: String, } @@ -144,7 +188,7 @@ pub(crate) fn valid_path( } #[derive(Clone, Debug)] -struct NewPath { +pub struct NewPath { path: PathBuf, cleanup: Option, } @@ -231,20 +275,29 @@ impl CleanupPath { } #[derive(Clone, Debug)] -pub(crate) struct WriteableGraphFolder { - folder: ValidGraphFolder, +pub(crate) struct ValidWriteableGraphFolder { + data_path: WriteableGraphFolder, + local_path: String, dirty_marker: Option, } -impl Deref for WriteableGraphFolder { - type Target = ValidGraphFolder; +impl GraphPaths for ValidWriteableGraphFolder { + fn root(&self) -> &Path { + self.data_path.root() + } - fn deref(&self) -> &Self::Target { - &self.folder + fn data_path(&self) -> Result { + self.data_path.data_path() + } +} + +impl ValidGraphPaths for ValidWriteableGraphFolder { + fn local_path(&self) -> &str { + &self.local_path } } -impl WriteableGraphFolder { +impl ValidWriteableGraphFolder { fn new_inner( valid_path: NewPath, graph_name: &str, @@ -256,24 +309,11 @@ impl WriteableGraphFolder { return Err(InternalPathValidationError::GraphIsNamespace); } } - let next_path = make_data_path(graph_folder.root(), DATA_PATH)?; - let data_folder = graph_folder.root().join(&next_path); - fs::create_dir(&data_folder)?; - - fs::write( - graph_folder.root().join(DIRTY_PATH), - &serde_json::to_vec(&Metadata { - path: next_path, - meta: None, - })?, - )?; - let folder = ValidGraphFolder { - data_path: graph_folder, - local_path: graph_name.to_string(), - }; + let data_path = graph_folder.init_swap()?; Ok(Self { - folder, + data_path, dirty_marker: valid_path.cleanup, + local_path: graph_name.to_string(), }) } fn new(valid_path: NewPath, graph_name: &str) -> Result { @@ -316,33 +356,43 @@ impl WriteableGraphFolder { Self::new(path, relative_path) } - fn finish_inner(&self) -> Result<(), InternalPathValidationError> { - let old_path = read_data_path(self.folder.data_path().root())?; - fs::rename( - self.folder.data_path.root().join(DIRTY_PATH), - self.folder.data_path.root().join(META_PATH), - )?; - if let Some(old_path) = old_path { - let path = self.folder.data_path.root().join(&old_path); - if path.exists() { - fs::remove_dir_all(path)?; - } - } - if let Some(cleanup) = self.dirty_marker.as_ref() { - cleanup.persist()?; - } + fn write_graph_data_inner( + &self, + graph: MaterializedGraph, + ) -> Result<(), InternalPathValidationError> { + self.data_path.data_path()?.replace_graph(graph)?; Ok(()) } + pub fn write_graph_data(&self, graph: MaterializedGraph) -> Result<(), PathValidationError> { + self.write_graph_data_inner(graph) + .with_path(self.local_path()) + } + + pub fn read_graph(&self) -> Result { + self.with_internal_errors(|| self.data_path()?.read_graph()) + } + + pub fn write_graph_bytes( + &self, + bytes: R, + ) -> Result<(), PathValidationError> { + self.data_path + .data_path() + .with_path(&self.local_path)? + .unzip_to_folder(bytes) + .with_path(&self.local_path) + } /// Swap old and new data and delete the old graph pub fn finish(self) -> Result { - match self.finish_inner() { - Ok(_) => Ok(self.folder), - Err(error) => Err(PathValidationError::InternalError { - graph: self.folder.local_path, - error, - }), + let data_path = self.data_path.finish().with_path(&self.local_path)?; + if let Some(cleanup) = self.dirty_marker.as_ref() { + cleanup.persist().with_path(&self.local_path)?; } + Ok(ValidGraphFolder { + graph_folder: data_path, + local_path: self.local_path, + }) } } @@ -398,14 +448,16 @@ pub enum PathValidationError { IOError { graph: String, error: io::Error }, } -pub trait WithPath { - fn with_path(self, graph: String) -> Result; +pub trait WithPath { + type Value; + fn with_path>(self, graph: S) -> Result; } -impl> WithPath for Result { - fn with_path(self, graph: String) -> Result { +impl> WithPath for Result { + type Value = V; + fn with_path>(self, graph: S) -> Result { self.map_err(move |error| PathValidationError::InternalError { - graph, + graph: graph.into(), error: error.into(), }) } @@ -501,6 +553,22 @@ pub(crate) fn mark_dirty(path: &Path) -> Result &Path { + self.graph_folder.root() + } + + fn data_path(&self) -> Result { + self.graph_folder.data_path() + } +} + +impl ValidGraphPaths for ValidGraphFolder { + fn local_path(&self) -> &str { + &self.local_path + } +} + impl ValidGraphFolder { fn with_internal_errors( &self, @@ -509,20 +577,18 @@ impl ValidGraphFolder { map().with_path(self.local_path()) } - pub fn path(&self) -> &Path { - &self.data_path.root() - } - pub fn local_path(&self) -> String { - self.local_path.clone() + pub fn graph_folder(&self) -> &GraphFolder { + &self.graph_folder } pub fn created(&self) -> Result { - let path = self.meta_path()?; - self.with_internal_errors(move || Ok(path.metadata()?.created()?.to_millis()?)) + self.with_internal_errors(|| { + Ok(self.root_meta_path().metadata()?.created()?.to_millis()?) + }) } pub fn last_opened(&self) -> Result { self.with_internal_errors(|| { - Ok(fs::metadata(self.data_path.get_meta_path()?)? + Ok(fs::metadata(self.graph_folder.meta_path()?)? .accessed()? .to_millis()?) }) @@ -530,49 +596,32 @@ impl ValidGraphFolder { pub fn last_updated(&self) -> Result { self.with_internal_errors(|| { - Ok(fs::metadata(self.data_path().get_meta_path()?)? - .modified()? - .to_millis()?) + Ok(fs::metadata(self.meta_path()?)?.modified()?.to_millis()?) }) } pub async fn created_async(&self) -> Result { - let path = self.meta_path()?; - let metadata = tokio::fs::metadata(path) - .await - .with_path(self.local_path())?; - self.with_internal_errors(|| Ok(metadata.created()?.to_millis()?)) + let cloned = self.clone(); + blocking_io(move || cloned.created()).await } pub async fn last_opened_async(&self) -> Result { - let metadata = tokio::fs::metadata(self.meta_path()?) - .await - .with_path(self.local_path())?; - self.with_internal_errors(|| Ok(metadata.accessed()?.to_millis()?)) + let cloned = self.clone(); + blocking_io(move || cloned.last_opened()).await } pub async fn last_updated_async(&self) -> Result { - let metadata = tokio::fs::metadata(self.meta_path()?) - .await - .with_path(self.local_path())?; - self.with_internal_errors(|| Ok(metadata.modified()?.to_millis()?)) + let cloned = self.clone(); + blocking_io(move || cloned.last_updated()).await } pub async fn read_metadata_async(&self) -> Result { - let folder: GraphFolder = self.data_path.clone(); + let folder: GraphFolder = self.graph_folder.clone(); blocking_compute(move || folder.read_metadata()) .await .with_path(self.local_path()) } - pub fn get_original_path_str(&self) -> &str { - &self.local_path - } - - pub fn get_original_path(&self) -> &Path { - &Path::new(&self.local_path) - } - /// This returns the PathBuf used to build multiple GraphError types pub fn to_error_path(&self) -> PathBuf { self.local_path.to_owned().into() @@ -598,49 +647,8 @@ impl ValidGraphFolder { }; Ok(name) } - - fn write_graph_data_inner( - &self, - graph: MaterializedGraph, - ) -> Result<(), InternalPathValidationError> { - let metadata = GraphMetadata::from_graph(&graph); - let data_folder = &self.data_path; - if graph.disk_storage_enabled() { - let path = data_folder.get_relative_graph_path()?; - let meta_json = serde_json::to_string(&Metadata { - path, - meta: Some(metadata), - })?; - let dirty_path = data_folder.root().join(DIRTY_PATH); - fs::write(&dirty_path, &meta_json)?; - fs::rename(&dirty_path, data_folder.root().join(META_PATH))?; - } else { - data_folder.replace_graph(graph)?; - } - Ok(()) - } - pub(crate) fn write_graph_data( - &self, - graph: MaterializedGraph, - ) -> Result<(), PathValidationError> { - self.write_graph_data_inner(graph) - .with_path(self.local_path()) - } - - pub(crate) fn data_path(&self) -> &GraphFolder { - &self.data_path - } - - pub(crate) fn meta_path(&self) -> Result { - self.with_internal_errors(|| Ok(self.data_path.get_meta_path()?)) - } - - pub(crate) fn get_vectors_path(&self) -> Result { - self.with_internal_errors(|| Ok(self.data_path().get_vectors_path()?)) - } - pub(crate) fn as_existing(&self) -> Result { - if self.data_path().is_reserved() { + if self.graph_folder.is_reserved() { Ok(ExistingGraphFolder(self.clone())) } else { Err(PathValidationError::GraphNotExistsError( diff --git a/raphtory-storage/src/graph/graph.rs b/raphtory-storage/src/graph/graph.rs index aa27ba07b1..4b9de7875a 100644 --- a/raphtory-storage/src/graph/graph.rs +++ b/raphtory-storage/src/graph/graph.rs @@ -13,7 +13,7 @@ use crate::{ use db4_graph::TemporalGraph; use raphtory_api::core::entities::{properties::meta::Meta, LayerIds, LayerVariants, EID, VID}; use raphtory_core::entities::{nodes::node_ref::NodeRef, properties::graph_meta::GraphMeta}; -use std::{fmt::Debug, iter, sync::Arc}; +use std::{fmt::Debug, iter, path::Path, sync::Arc}; use storage::Extension; use thiserror::Error; @@ -94,7 +94,7 @@ impl GraphStorage { } } - pub fn disk_storage_enabled(&self) -> bool { + pub fn disk_storage_enabled(&self) -> Option<&Path> { match self { GraphStorage::Mem(graph) => graph.graph.disk_storage_enabled(), GraphStorage::Unlocked(graph) => graph.disk_storage_enabled(), diff --git a/raphtory/src/db/api/mutation/index_ops.rs b/raphtory/src/db/api/mutation/index_ops.rs index 0502bd7b29..1645c8b879 100644 --- a/raphtory/src/db/api/mutation/index_ops.rs +++ b/raphtory/src/db/api/mutation/index_ops.rs @@ -2,7 +2,7 @@ use crate::{ db::api::view::{IndexSpec, IndexSpecBuilder}, errors::GraphError, prelude::AdditionOps, - serialise::GraphFolder, + serialise::{GraphFolder, GraphPaths}, }; use std::{fs::File, path::Path}; use zip::ZipArchive; @@ -154,10 +154,10 @@ impl IndexMutationOps for G { }) } - fn persist_index_to_disk(&self, path: &GraphFolder) -> Result<(), GraphError> { + fn persist_index_to_disk(&self, path: &impl GraphPaths) -> Result<(), GraphError> { self.get_storage() .map_or(Err(GraphError::IndexingNotSupported), |storage| { - storage.persist_index_to_disk(&path)?; + storage.persist_index_to_disk(path)?; Ok(()) }) } diff --git a/raphtory/src/db/api/storage/graph/storage_ops/mod.rs b/raphtory/src/db/api/storage/graph/storage_ops/mod.rs index 9593817d11..5366d60ac8 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/mod.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/mod.rs @@ -1,5 +1,6 @@ use crate::db::api::{storage::storage::Storage, view::internal::InternalStorageOps}; use raphtory_storage::graph::graph::GraphStorage; +use std::path::Path; pub mod edge_filter; pub mod list_ops; @@ -14,7 +15,7 @@ impl InternalStorageOps for GraphStorage { None } - fn disk_storage_enabled(&self) -> bool { + fn disk_storage_enabled(&self) -> Option<&Path> { self.disk_storage_enabled() } } diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index adf3d02c24..e1fdbfce1c 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -37,6 +37,7 @@ use std::{ }; use storage::{Extension, WalImpl}; +use crate::serialise::GraphPaths; #[cfg(feature = "search")] use { crate::{ @@ -239,7 +240,7 @@ impl Storage { self.index.read_recursive().is_indexed() } - pub(crate) fn persist_index_to_disk(&self, path: &GraphFolder) -> Result<(), GraphError> { + pub(crate) fn persist_index_to_disk(&self, path: &impl GraphPaths) -> Result<(), GraphError> { let guard = self.get_index().read_recursive(); if guard.is_indexed() { if guard.path().is_none() { @@ -275,7 +276,7 @@ impl InternalStorageOps for Storage { Some(self) } - fn disk_storage_enabled(&self) -> bool { + fn disk_storage_enabled(&self) -> Option<&Path> { self.graph.disk_storage_enabled() } } diff --git a/raphtory/src/db/api/view/internal/materialize.rs b/raphtory/src/db/api/view/internal/materialize.rs index 670de56613..4985856887 100644 --- a/raphtory/src/db/api/view/internal/materialize.rs +++ b/raphtory/src/db/api/view/internal/materialize.rs @@ -100,7 +100,7 @@ impl InternalStorageOps for MaterializedGraph { for_all!(self, g => g.get_storage()) } - fn disk_storage_enabled(&self) -> bool { + fn disk_storage_enabled(&self) -> Option<&Path> { for_all!(self, g => g.disk_storage_enabled()) } } diff --git a/raphtory/src/db/api/view/internal/mod.rs b/raphtory/src/db/api/view/internal/mod.rs index 827007db45..9429942407 100644 --- a/raphtory/src/db/api/view/internal/mod.rs +++ b/raphtory/src/db/api/view/internal/mod.rs @@ -12,6 +12,7 @@ use crate::{ }; use std::{ fmt::{Debug, Formatter}, + path::Path, sync::Arc, }; @@ -25,6 +26,7 @@ mod one_hop_filter; pub(crate) mod time_semantics; mod wrapped_graph; +use crate::serialise::GraphFolder; pub use edge_filter_ops::*; pub use filter_ops::*; pub use into_dynamic::{IntoDynHop, IntoDynamic}; @@ -106,7 +108,7 @@ pub trait InternalStorageOps { /// Returns `true` if the underlying storage saves data to disk, /// or `false` if the storage is in-memory only. - fn disk_storage_enabled(&self) -> bool; + fn disk_storage_enabled(&self) -> Option<&Path>; } impl InternalStorageOps for G @@ -117,7 +119,7 @@ where self.base().get_storage() } - fn disk_storage_enabled(&self) -> bool { + fn disk_storage_enabled(&self) -> Option<&Path> { self.base().disk_storage_enabled() } } diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index bd17c0c0da..d87fe2116c 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -262,6 +262,9 @@ pub enum GraphError { #[error("Not a zip archive")] NotAZip, + #[error("Graph folder is not initialised for writing")] + NoWriteInProgress, + #[error("Failed to load graph: {0}")] LoadFailure(String), @@ -430,8 +433,8 @@ pub enum GraphError { #[error("Your window and step must be of the same type: duration (string) or epoch (int)")] MismatchedIntervalTypes, - #[error("Cannot initialize cache for zipped graph. Unzip the graph to initialize the cache.")] - ZippedGraphCannotBeCached, + #[error("Cannot swap zipped graph data")] + ZippedGraphCannotBeSwapped, } impl From for GraphError { diff --git a/raphtory/src/python/graph/views/graph_view.rs b/raphtory/src/python/graph/views/graph_view.rs index 976ccf8e99..85c2ac606b 100644 --- a/raphtory/src/python/graph/views/graph_view.rs +++ b/raphtory/src/python/graph/views/graph_view.rs @@ -478,9 +478,9 @@ impl PyGraphView { /// Materializes the graph view into a graphql compatible folder. fn materialize_to_graph_folder(&self, path: PathBuf) -> Result { let folder: GraphFolder = path.into(); - folder.init()?; + folder.init_write()?; - let graph = self.graph.materialize_at(Some(&folder.get_graph_path()))?; + let graph = self.graph.materialize_at(Some(&folder.get_graph_path()?))?; folder.write_metadata(&graph)?; Ok(graph) diff --git a/raphtory/src/search/graph_index.rs b/raphtory/src/search/graph_index.rs index 6bedbeace9..3aafcb31c1 100644 --- a/raphtory/src/search/graph_index.rs +++ b/raphtory/src/search/graph_index.rs @@ -7,7 +7,7 @@ use crate::{ errors::GraphError, prelude::*, search::{edge_index::EdgeIndex, node_index::NodeIndex, searcher::Searcher}, - serialise::{GraphFolder, INDEX_PATH}, + serialise::{GraphFolder, GraphPaths, InnerGraphFolder, INDEX_PATH}, }; use parking_lot::RwLock; use raphtory_api::core::storage::dict_mapper::MaybeNew; @@ -43,7 +43,7 @@ impl Index { #[derive(Clone)] pub struct ImmutableGraphIndex { pub(crate) index: Index, - pub(crate) path: Arc, + pub(crate) path: Arc, pub index_spec: Arc, } @@ -189,7 +189,7 @@ impl GraphIndex { let temp_dir = match cached_graph_path { // Creates index in a temp dir within cache graph dir. // The intention is to avoid creating index in a tmp dir that could be on another file system. - Some(path) => TempDir::new_in(path.get_base_path())?, + Some(path) => TempDir::new_in(path.root())?, None => TempDir::new()?, }; @@ -242,31 +242,12 @@ impl GraphIndex { } } - pub(crate) fn persist_to_disk(&self, path: &GraphFolder) -> Result<(), GraphError> { + pub(crate) fn persist_to_disk(&self, path: &impl GraphPaths) -> Result<(), GraphError> { let source_path = self.path().ok_or(GraphError::CannotPersistRamIndex)?; - let path = path.get_index_path(); - let path = path.as_path(); - - let temp_path = &path.with_extension(format!("tmp-{}", Uuid::new_v4())); - - copy_dir_recursive(&source_path, temp_path)?; - - // Always overwrite the existing graph index when persisting, since the in-memory - // working index may have newer updates. The persisted index is decoupled from the - // active one, and changes remain in memory unless explicitly saved. - // This behavior mirrors how the in-memory graph works — updates are not persisted - // unless manually saved, except when using the cached view (see db/graph/views/cached_view). - // This however is reached only when write_updates, otherwise graph is not allowed to be written to - // the existing location anyway. See GraphError::NonEmptyGraphFolder. - if path.exists() { - fs::remove_dir_all(path) - .map_err(|_e| GraphError::FailedToRemoveExistingGraphIndex(path.to_path_buf()))?; + let path = path.index_path()?; + if source_path != path { + copy_dir_recursive(&source_path, &path)?; } - - fs::rename(temp_path, path).map_err(|e| { - GraphError::IOErrorMsg(format!("Failed to rename temp index folder: {}", e)) - })?; - Ok(()) } @@ -314,10 +295,10 @@ impl GraphIndex { pub fn make_mutable_if_needed(&mut self) -> Result<(), GraphError> { if let GraphIndex::Immutable(immutable) = self { - let temp_dir = TempDir::new_in(&immutable.path.get_base_path())?; + let temp_dir = TempDir::new_in(immutable.path.as_ref())?; let temp_path = temp_dir.path(); - copy_dir_recursive(&immutable.path.get_index_path(), temp_path)?; + copy_dir_recursive(&immutable.path.index_path(), temp_path)?; let node_index = NodeIndex::load_from_path(&temp_path.join("nodes"))?; let edge_index = EdgeIndex::load_from_path(&temp_path.join("edges"))?; @@ -350,7 +331,7 @@ impl GraphIndex { pub fn path(&self) -> Option { match self { - GraphIndex::Immutable(i) => Some(i.path.get_index_path()), + GraphIndex::Immutable(i) => Some(i.path.index_path()), GraphIndex::Mutable(m) => m.path.as_ref().map(|p| p.path().to_path_buf()), GraphIndex::Empty => None, } diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 0de8b758c8..48185da28f 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -1,16 +1,19 @@ use crate::{ - db::api::view::{internal::GraphView, MaterializedGraph}, + db::{ + api::view::{internal::GraphView, MaterializedGraph}, + graph::views::deletion_graph::PersistentGraph, + }, errors::GraphError, - prelude::{Graph, GraphViewOps, ParquetEncoder, PropertiesOps, StableEncode}, + prelude::{Graph, GraphViewOps, ParquetDecoder, ParquetEncoder, PropertiesOps, StableEncode}, serialise::{metadata::GraphMetadata, serialise::StableDecode}, }; +use raphtory_api::GraphType; use serde::{Deserialize, Serialize}; use std::{ - fs::{self, File, OpenOptions}, - io::{self, BufReader, BufWriter, ErrorKind, Read, Seek, Write}, + fs::{self, File}, + io::{self, ErrorKind, Read, Seek, Write}, path::{Path, PathBuf}, }; -use tracing::info; use walkdir::WalkDir; use zip::{ write::{FileOptions, SimpleFileOptions}, @@ -79,7 +82,7 @@ pub fn make_data_path(base_path: &Path, prefix: &str) -> Result) -> Result { +fn get_zip_data_path(zip: &mut ZipArchive) -> Result { let file = zip.by_name(META_PATH)?; Ok(read_path_from_file(file)?) } @@ -121,7 +124,36 @@ pub struct RelativePath { #[derive(Debug, Serialize, Deserialize)] pub struct Metadata { pub path: String, - pub meta: Option, + pub meta: GraphMetadata, +} + +pub trait GraphPaths { + fn root(&self) -> &Path; + + fn root_meta_path(&self) -> PathBuf { + self.root().join(META_PATH) + } + + fn data_path(&self) -> Result; + fn vectors_path(&self) -> Result { + Ok(self.data_path()?.vectors_path()) + } + + fn index_path(&self) -> Result { + Ok(self.data_path()?.index_path()) + } + + fn graph_path(&self) -> Result { + self.data_path()?.graph_path() + } + + fn relative_graph_path(&self) -> Result { + self.data_path()?.relative_graph_path() + } + + fn meta_path(&self) -> Result { + Ok(self.data_path()?.meta_path()) + } } /// A container for managing graph data. @@ -144,6 +176,20 @@ pub struct GraphFolder { pub(crate) write_as_zip_format: bool, } +impl GraphPaths for GraphFolder { + fn root(&self) -> &Path { + &self.root_folder + } + + fn data_path(&self) -> Result { + let relative = + read_data_path(&self.root_folder)?.unwrap_or_else(|| DATA_PATH.to_string() + "0"); + Ok(InnerGraphFolder { + path: self.root_folder.join(relative), + }) + } +} + impl GraphFolder { pub fn new_as_zip(path: impl AsRef) -> Self { let folder: GraphFolder = path.into(); @@ -153,38 +199,74 @@ impl GraphFolder { } } - pub fn root(&self) -> &Path { - &self.root_folder - } - /// Reserve a folder, marking it as occupied by a graph. /// Returns an error if the folder has data. - pub fn init(&self) -> Result<(), GraphError> { + pub fn init_write(self) -> Result { + if self.write_as_zip_format { + return Err(GraphError::ZippedGraphCannotBeSwapped); + } let relative_data_path = self.get_relative_data_path()?; let meta = serde_json::to_string(&RelativePath { path: relative_data_path.clone(), })?; - if self.write_as_zip_format { - let file = File::create_new(&self.root_folder)?; - let mut zip = ZipWriter::new(BufWriter::new(file)); - zip.start_file(META_PATH, file_opts())?; - zip.write_all(meta.as_bytes())?; - zip.add_directory(relative_data_path, file_opts())?; - zip.flush()?; - } else { - self.ensure_clean_root_dir()?; - let data_path = self.root_folder.join(META_PATH); - let mut path_file = File::create_new(&data_path)?; - path_file.write_all(meta.as_bytes())?; + self.ensure_clean_root_dir()?; + let metapath = self.root_folder.join(DIRTY_PATH); + let mut path_file = File::create_new(&metapath)?; + path_file.write_all(meta.as_bytes())?; + fs::create_dir_all(self.root_folder.join(relative_data_path))?; + Ok(WriteableGraphFolder { + path: self.root_folder, + }) + } - fs::create_dir_all(&data_path)?; + /// Prepare a graph folder for atomically swapping the data contents. + /// This returns an error if the folder is set to write as Zip. + /// + /// If a swap is already in progress (i.e., `.dirty` file exists) it is aborted and + /// the contents of the corresponding folder are deleted. + pub fn init_swap(self) -> Result { + if self.write_as_zip_format { + return Err(GraphError::ZippedGraphCannotBeSwapped); } - Ok(()) + let old_swap = match read_dirty_path(self.root()) { + Ok(path) => path, + Err(_) => { + fs::remove_file(self.root_folder.join(DIRTY_PATH))?; // dirty file is corrupted, clean it up + None + } + }; + + fs::create_dir_all(self.root())?; + + let swap_path = match old_swap { + Some(relative_path) => { + let swap_path = self.root_folder.join(relative_path); + if swap_path.exists() { + fs::remove_dir_all(&swap_path)?; + } + swap_path + } + None => { + let new_relative_data_path = make_data_path(self.root(), DATA_PATH)?; + let new_data_path = self.root_folder.join(&new_relative_data_path); + let meta = serde_json::to_string(&RelativePath { + path: new_relative_data_path, + })?; + let mut dirty_file = File::create_new(self.root_folder.join(DIRTY_PATH))?; + dirty_file.write_all(meta.as_bytes())?; + dirty_file.sync_all()?; + new_data_path + } + }; + fs::create_dir_all(swap_path)?; + Ok(WriteableGraphFolder { + path: self.root_folder, + }) } /// Returns true if folder is occupied by a graph. pub fn is_reserved(&self) -> bool { - self.get_meta_path().map_or(false, |path| path.exists()) + self.meta_path().map_or(false, |path| path.exists()) } /// Clears the folder of any contents. @@ -210,52 +292,17 @@ impl GraphFolder { Ok(path) } - pub fn get_data_path(&self) -> Result { - let relative = - read_data_path(&self.root_folder)?.unwrap_or_else(|| DATA_PATH.to_string() + "0"); - Ok(self.root_folder.join(relative)) - } - - pub fn get_graph_path(&self) -> Result { - let mut path = self.get_data_path()?; - let relative = read_data_path(&path)?.unwrap_or_else(|| GRAPH_PATH.to_string() + "0"); - path.push(relative); - Ok(path) - } - pub fn get_relative_graph_path(&self) -> Result { if self.is_zip() { let mut zip = self.read_zip()?; let data_path = get_zip_data_path(&mut zip)?; get_zip_graph_path_name(&mut zip, data_path) } else { - let data_path = self.get_data_path()?; - Ok(read_data_path(&data_path)?.unwrap_or_else(|| GRAPH_PATH.to_string() + "0")) + let data_path = self.data_path()?; + Ok(read_data_path(data_path.as_ref())?.unwrap_or_else(|| GRAPH_PATH.to_string() + "0")) } } - pub fn get_meta_path(&self) -> Result { - let mut path = self.get_data_path()?; - path.push(META_PATH); - Ok(path) - } - - pub fn get_index_path(&self) -> Result { - let mut path = self.get_data_path()?; - path.push(INDEX_PATH); - Ok(path) - } - - pub fn get_vectors_path(&self) -> Result { - let mut path = self.get_data_path()?; - path.push(VECTORS_PATH); - Ok(path) - } - - pub fn get_base_path(&self) -> &Path { - &self.root_folder - } - pub fn is_zip(&self) -> bool { self.root_folder.is_file() } @@ -270,28 +317,6 @@ impl GraphFolder { } } - pub fn replace_graph(&self, graph: impl ParquetEncoder + GraphView) -> Result<(), GraphError> { - let data_path = self.get_data_path()?; - let old_graph_path = read_data_path(&data_path)?; - let new_graph_path = make_data_path(&data_path, GRAPH_PATH)?; - let meta = Some(GraphMetadata::from_graph(&graph)); - let new_graph_folder = data_path.join(&new_graph_path); - let dirty_path = data_path.join(DIRTY_PATH); - fs::write( - &dirty_path, - &serde_json::to_vec(&Metadata { - path: new_graph_path, - meta, - })?, - )?; - graph.encode_parquet(&new_graph_folder)?; - fs::rename(&dirty_path, data_path.join(META_PATH))?; - if let Some(old_graph_path) = old_graph_path { - fs::remove_dir_all(old_graph_path)?; - } - Ok(()) - } - pub fn read_metadata(&self) -> Result { let mut json = String::new(); if self.is_zip() { @@ -300,11 +325,11 @@ impl GraphFolder { let mut zip_file = zip.by_name(&path)?; zip_file.read_to_string(&mut json)?; } else { - let mut file = File::open(self.get_meta_path()?)?; + let mut file = File::open(self.meta_path()?)?; file.read_to_string(&mut json)?; } - let metadata = serde_json::from_str(&json)?; - Ok(metadata) + let metadata: Metadata = serde_json::from_str(&json)?; + Ok(metadata.meta) } pub fn write_metadata<'graph>( @@ -316,20 +341,17 @@ impl GraphFolder { let metadata = GraphMetadata::from_graph(graph); let meta = Metadata { path: graph_path, - meta: Some(metadata), + meta: metadata, }; if self.write_as_zip_format { - let file = File::options() - .read(true) - .write(true) - .open(&self.get_base_path())?; + let file = File::options().read(true).write(true).open(&self.root())?; let mut zip = ZipWriter::new_append(file)?; zip.start_file::<_, ()>(META_PATH, FileOptions::default())?; Ok(serde_json::to_writer(zip, &meta)?) } else { - let path = self.get_meta_path()?; + let path = self.meta_path()?; let file = File::create(path.clone())?; Ok(serde_json::to_writer(file, &meta)?) } @@ -391,18 +413,153 @@ impl GraphFolder { Ok(()) } +} - /// Extracts a zip file to the folder. - pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { - if self.write_as_zip_format { - return Err(GraphError::IOErrorMsg( - "Cannot unzip to a zip format folder".to_string(), - )); +#[must_use] +#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq)] +pub struct WriteableGraphFolder { + path: PathBuf, +} + +impl GraphPaths for WriteableGraphFolder { + fn root(&self) -> &Path { + &self.path + } + + fn data_path(&self) -> Result { + let path = read_dirty_path(self.root())?.ok_or(GraphError::NoWriteInProgress)?; + Ok(InnerGraphFolder { + path: self.root().join(path), + }) + } +} + +impl WriteableGraphFolder { + /// Finalise an in-progress write by atomically renaming the '.dirty' file to '.raph' + /// and cleaning up any old data if it exists. + /// + /// This operation returns an error if there is no write in progress. + pub fn finish(self) -> Result { + let old_data = read_data_path(self.root())?; + fs::rename(self.root().join(DIRTY_PATH), self.root().join(META_PATH))?; + if let Some(old_data) = old_data { + let old_data_path = self.root().join(old_data); + if old_data_path.is_dir() { + fs::remove_dir_all(old_data_path)?; + } } + Ok(GraphFolder { + root_folder: self.path, + write_as_zip_format: false, + }) + } +} + +pub struct InnerGraphFolder { + path: PathBuf, +} + +impl AsRef for InnerGraphFolder { + fn as_ref(&self) -> &Path { + &self.path + } +} + +impl InnerGraphFolder { + pub fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { + let graph_path = self.relative_graph_path()?; + let metadata = GraphMetadata::from_graph(graph); + let meta = Metadata { + path: graph_path, + meta: metadata, + }; + let path = self.meta_path(); + let file = File::create(&path)?; + Ok(serde_json::to_writer(file, &meta)?) + } + + pub fn read_metadata(&self) -> Result { + let mut json = String::new(); + let mut file = File::open(self.meta_path())?; + file.read_to_string(&mut json)?; + let metadata: Metadata = serde_json::from_str(&json)?; + Ok(metadata.meta) + } + pub fn replace_graph(&self, graph: impl ParquetEncoder + GraphView) -> Result<(), GraphError> { + let data_path = self.as_ref(); + let old_relative_graph_path = self.relative_graph_path()?; + let old_graph_path = self.path.join(&old_relative_graph_path); + let new_relative_graph_path = match graph.disk_storage_enabled() { + None => { + let new_graph_path = make_data_path(&data_path, GRAPH_PATH)?; + graph.encode_parquet(&data_path.join(&new_graph_path))?; + new_graph_path + } + Some(path) => { + if path != old_graph_path { + let new_graph_path = make_data_path(&data_path, GRAPH_PATH)?; + graph.materialize_at(Some(&data_path.join(&new_graph_path)))?; + new_graph_path + } else { + old_relative_graph_path.clone() + } + } + }; + let meta = GraphMetadata::from_graph(&graph); + let dirty_path = data_path.join(DIRTY_PATH); + fs::write( + &dirty_path, + &serde_json::to_vec(&Metadata { + path: new_relative_graph_path.clone(), + meta, + })?, + )?; + fs::rename(&dirty_path, data_path.join(META_PATH))?; + if new_relative_graph_path != old_relative_graph_path { + fs::remove_dir_all(old_graph_path)?; + } + Ok(()) + } + pub fn vectors_path(&self) -> PathBuf { + self.path.join(VECTORS_PATH) + } + + pub fn index_path(&self) -> PathBuf { + self.path.join(INDEX_PATH) + } + + pub fn meta_path(&self) -> PathBuf { + self.path.join(META_PATH) + } + + pub fn relative_graph_path(&self) -> Result { + let relative = read_data_path(&self.path)?.unwrap_or_else(|| GRAPH_PATH.to_owned() + "0"); + Ok(relative) + } + + pub fn graph_path(&self) -> Result { + Ok(self.path.join(self.relative_graph_path()?)) + } + + fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { + if self.as_ref().exists() { + let non_empty = self.as_ref().read_dir()?.next().is_some(); + if non_empty { + return Err(GraphError::NonEmptyGraphFolder(self.as_ref().to_path_buf())); + } + } else { + fs::create_dir_all(self)? + } + Ok(()) + } + + /// Extracts a zip file to the folder. + pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { self.ensure_clean_root_dir()?; let mut zip = ZipArchive::new(reader)?; + let data_dir = get_zip_data_path(&mut zip)?; for i in 0..zip.len() { let mut file = zip.by_index(i)?; @@ -410,24 +567,42 @@ impl GraphFolder { Some(name) => name, None => continue, }; - - let out_path = self.root_folder.join(zip_entry_name); - - if file.is_dir() { - std::fs::create_dir_all(&out_path)?; - } else { - // Create any parent directories - if let Some(parent) = out_path.parent() { - std::fs::create_dir_all(parent)?; + if let Ok(inner_path) = zip_entry_name.strip_prefix(&data_dir) { + let out_path = self.as_ref().join(inner_path); + if file.is_dir() { + std::fs::create_dir_all(&out_path)?; + } else { + // Create any parent directories + if let Some(parent) = out_path.parent() { + std::fs::create_dir_all(parent)?; + } + + let mut out_file = std::fs::File::create(&out_path)?; + std::io::copy(&mut file, &mut out_file)?; } - - let mut out_file = std::fs::File::create(&out_path)?; - std::io::copy(&mut file, &mut out_file)?; } } Ok(()) } + + pub fn read_graph(&self) -> Result { + let meta = self.read_metadata()?; + let graph = if meta.is_diskgraph { + match meta.graph_type { + GraphType::EventGraph => { + MaterializedGraph::EventGraph(Graph::load_from_path(self.graph_path()?)) + } + GraphType::PersistentGraph => MaterializedGraph::PersistentGraph( + PersistentGraph::load_from_path(self.graph_path()?), + ), + } + } else { + MaterializedGraph::decode_parquet(self.graph_path()?, None)? + }; + // FIXME: load index + Ok(graph) + } } impl> From

for GraphFolder { diff --git a/raphtory/src/serialise/metadata.rs b/raphtory/src/serialise/metadata.rs index 388e8e549f..4f4fe63175 100644 --- a/raphtory/src/serialise/metadata.rs +++ b/raphtory/src/serialise/metadata.rs @@ -24,7 +24,7 @@ impl GraphMetadata { let edge_count = graph.count_edges(); let metadata = graph.metadata().as_vec(); let graph_type = graph.graph_type(); - let is_diskgraph = graph.disk_storage_enabled(); + let is_diskgraph = graph.disk_storage_enabled().is_some(); Self { node_count, edge_count, diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 928197fd77..f88b478b7e 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -11,6 +11,7 @@ use tempfile; #[cfg(feature = "search")] use crate::prelude::IndexMutationOps; +use crate::serialise::GraphPaths; pub trait StableEncode: StaticGraphViewOps + AdditionOps { /// Encode the graph into bytes. @@ -35,21 +36,19 @@ impl StableEncode for T { let folder: GraphFolder = path.into(); if folder.write_as_zip_format { - let file = File::create_new(&folder.get_base_path())?; + let file = File::create_new(&folder.root())?; self.encode_parquet_to_zip(file)?; - #[cfg(feature = "search")] self.persist_index_to_disk_zip(&folder)?; + folder.write_metadata(self)?; } else { - folder.init()?; - self.encode_parquet(&folder.get_graph_path()?)?; - + let write_folder = folder.init_write()?; + self.encode_parquet(write_folder.graph_path()?)?; #[cfg(feature = "search")] - self.persist_index_to_disk(&folder)?; + self.persist_index_to_disk(&write_folder)?; + write_folder.data_path()?.write_metadata(self)?; + write_folder.finish()?; } - - folder.write_metadata(self)?; - Ok(()) } } @@ -97,10 +96,10 @@ impl StableDecode for T { let folder: GraphFolder = path.into(); if folder.is_zip() { - let reader = std::fs::File::open(&folder.get_base_path())?; + let reader = std::fs::File::open(&folder.root())?; graph = Self::decode_parquet_from_zip(reader, path_for_decoded_graph)?; } else { - graph = Self::decode_parquet(&folder.get_graph_path()?, path_for_decoded_graph)?; + graph = Self::decode_parquet(&folder.graph_path()?, path_for_decoded_graph)?; } #[cfg(feature = "search")] From f4e3cb80ef0486bc9e13241a379425608e85309a Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 10 Dec 2025 12:55:41 +0100 Subject: [PATCH 11/39] fix zip encoding/decoding --- raphtory-graphql/src/lib.rs | 2 +- raphtory-graphql/src/paths.rs | 54 ++++--- raphtory-graphql/src/url_encode.rs | 2 +- raphtory/src/errors.rs | 3 + raphtory/src/python/graph/views/graph_view.rs | 10 +- raphtory/src/serialise/graph_folder.rs | 141 ++++++++++-------- raphtory/src/serialise/parquet/mod.rs | 72 +++------ raphtory/src/serialise/serialise.rs | 99 +++++++----- 8 files changed, 202 insertions(+), 181 deletions(-) diff --git a/raphtory-graphql/src/lib.rs b/raphtory-graphql/src/lib.rs index 7aeb6975d6..44d0db8d00 100644 --- a/raphtory-graphql/src/lib.rs +++ b/raphtory-graphql/src/lib.rs @@ -1001,7 +1001,7 @@ mod graphql_test { )); let res = schema.execute(req).await; - assert_eq!(res.errors.len(), 0); + assert_eq!(res.errors, []); let res_json = res.data.into_json().unwrap(); assert_eq!(res_json, json!({"sendGraph": "test"})); diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index 9600a1c18a..7eecadcd50 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -69,8 +69,12 @@ impl GraphPaths for ExistingGraphFolder { self.0.root() } - fn data_path(&self) -> Result { - self.0.data_path() + fn relative_data_path(&self) -> Result { + self.0.relative_data_path() + } + + fn relative_graph_path(&self) -> Result { + self.0.relative_graph_path() } } @@ -101,7 +105,7 @@ impl ExistingGraphFolder { let graph_folder: GraphFolder = base_path.into_path().into(); if graph_folder.is_reserved() { Ok(Self(ValidGraphFolder { - graph_folder: graph_folder, + global_path: graph_folder, local_path: relative_path.to_string(), })) } else { @@ -115,7 +119,7 @@ impl ExistingGraphFolder { &self, graph: MaterializedGraph, ) -> Result<(), InternalPathValidationError> { - self.graph_folder.data_path()?.replace_graph(graph)?; + self.global_path.data_path()?.replace_graph(graph)?; Ok(()) } pub fn replace_graph_data(&self, graph: MaterializedGraph) -> Result<(), PathValidationError> { @@ -126,7 +130,7 @@ impl ExistingGraphFolder { #[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] pub struct ValidGraphFolder { - graph_folder: GraphFolder, + global_path: GraphFolder, local_path: String, } @@ -276,18 +280,22 @@ impl CleanupPath { #[derive(Clone, Debug)] pub(crate) struct ValidWriteableGraphFolder { - data_path: WriteableGraphFolder, + global_path: WriteableGraphFolder, local_path: String, dirty_marker: Option, } impl GraphPaths for ValidWriteableGraphFolder { fn root(&self) -> &Path { - self.data_path.root() + self.global_path.root() + } + + fn relative_data_path(&self) -> Result { + self.global_path.relative_data_path() } - fn data_path(&self) -> Result { - self.data_path.data_path() + fn relative_graph_path(&self) -> Result { + self.global_path.relative_data_path() } } @@ -311,7 +319,7 @@ impl ValidWriteableGraphFolder { } let data_path = graph_folder.init_swap()?; Ok(Self { - data_path, + global_path: data_path, dirty_marker: valid_path.cleanup, local_path: graph_name.to_string(), }) @@ -360,7 +368,7 @@ impl ValidWriteableGraphFolder { &self, graph: MaterializedGraph, ) -> Result<(), InternalPathValidationError> { - self.data_path.data_path()?.replace_graph(graph)?; + self.global_path.data_path()?.replace_graph(graph)?; Ok(()) } pub fn write_graph_data(&self, graph: MaterializedGraph) -> Result<(), PathValidationError> { @@ -376,7 +384,7 @@ impl ValidWriteableGraphFolder { &self, bytes: R, ) -> Result<(), PathValidationError> { - self.data_path + self.global_path .data_path() .with_path(&self.local_path)? .unzip_to_folder(bytes) @@ -385,12 +393,12 @@ impl ValidWriteableGraphFolder { /// Swap old and new data and delete the old graph pub fn finish(self) -> Result { - let data_path = self.data_path.finish().with_path(&self.local_path)?; + let data_path = self.global_path.finish().with_path(&self.local_path)?; if let Some(cleanup) = self.dirty_marker.as_ref() { cleanup.persist().with_path(&self.local_path)?; } Ok(ValidGraphFolder { - graph_folder: data_path, + global_path: data_path, local_path: self.local_path, }) } @@ -555,11 +563,15 @@ pub(crate) fn mark_dirty(path: &Path) -> Result &Path { - self.graph_folder.root() + self.global_path.root() + } + + fn relative_data_path(&self) -> Result { + self.global_path.relative_data_path() } - fn data_path(&self) -> Result { - self.graph_folder.data_path() + fn relative_graph_path(&self) -> Result { + self.global_path.relative_graph_path() } } @@ -578,7 +590,7 @@ impl ValidGraphFolder { } pub fn graph_folder(&self) -> &GraphFolder { - &self.graph_folder + &self.global_path } pub fn created(&self) -> Result { self.with_internal_errors(|| { @@ -588,7 +600,7 @@ impl ValidGraphFolder { pub fn last_opened(&self) -> Result { self.with_internal_errors(|| { - Ok(fs::metadata(self.graph_folder.meta_path()?)? + Ok(fs::metadata(self.global_path.meta_path()?)? .accessed()? .to_millis()?) }) @@ -616,7 +628,7 @@ impl ValidGraphFolder { } pub async fn read_metadata_async(&self) -> Result { - let folder: GraphFolder = self.graph_folder.clone(); + let folder: GraphFolder = self.global_path.clone(); blocking_compute(move || folder.read_metadata()) .await .with_path(self.local_path()) @@ -648,7 +660,7 @@ impl ValidGraphFolder { Ok(name) } pub(crate) fn as_existing(&self) -> Result { - if self.graph_folder.is_reserved() { + if self.global_path.is_reserved() { Ok(ExistingGraphFolder(self.clone())) } else { Err(PathValidationError::GraphNotExistsError( diff --git a/raphtory-graphql/src/url_encode.rs b/raphtory-graphql/src/url_encode.rs index 0cd177285a..6daa9f07d8 100644 --- a/raphtory-graphql/src/url_encode.rs +++ b/raphtory-graphql/src/url_encode.rs @@ -21,7 +21,7 @@ pub enum UrlDecodeError { pub fn url_encode_graph>(graph: G) -> Result { let g: MaterializedGraph = graph.into(); - let bytes = g.encode_to_bytes(); + let bytes = g.encode_to_bytes()?; Ok(BASE64_URL_SAFE.encode(bytes)) } diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index d87fe2116c..f39b786595 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -435,6 +435,9 @@ pub enum GraphError { #[error("Cannot swap zipped graph data")] ZippedGraphCannotBeSwapped, + + #[error("Invalid prefix, expected '{expected}', got '{actual}'")] + InvalidPrefix { expected: String, actual: String }, } impl From for GraphError { diff --git a/raphtory/src/python/graph/views/graph_view.rs b/raphtory/src/python/graph/views/graph_view.rs index 85c2ac606b..b0d49ef00a 100644 --- a/raphtory/src/python/graph/views/graph_view.rs +++ b/raphtory/src/python/graph/views/graph_view.rs @@ -39,7 +39,7 @@ use crate::{ types::repr::{Repr, StructReprBuilder}, utils::PyNodeRef, }, - serialise::GraphFolder, + serialise::{GraphFolder, GraphPaths}, }; use chrono::prelude::*; use pyo3::prelude::*; @@ -478,11 +478,11 @@ impl PyGraphView { /// Materializes the graph view into a graphql compatible folder. fn materialize_to_graph_folder(&self, path: PathBuf) -> Result { let folder: GraphFolder = path.into(); - folder.init_write()?; - - let graph = self.graph.materialize_at(Some(&folder.get_graph_path()?))?; - folder.write_metadata(&graph)?; + let write = folder.init_write()?; + let graph = self.graph.materialize_at(Some(&write.graph_path()?))?; + write.data_path()?.write_metadata(&graph)?; + write.finish()?; Ok(graph) } diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 48185da28f..4dec5840d7 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -22,8 +22,10 @@ use zip::{ /// Stores graph data pub const GRAPH_PATH: &str = "graph"; +pub const DEFAULT_GRAPH_PATH: &str = "graph0"; pub const DATA_PATH: &str = "data"; +pub const DEFAULT_DATA_PATH: &str = "data0"; /// Stores graph metadata pub const META_PATH: &str = ".raph"; @@ -82,21 +84,37 @@ pub fn make_data_path(base_path: &Path, prefix: &str) -> Result(zip: &mut ZipArchive) -> Result { +pub fn read_or_default_data_path(base_path: &Path, prefix: &str) -> Result { + match read_data_path(base_path)? { + None => Ok(prefix.to_owned() + "0"), + Some(path) => { + if path.starts_with(prefix) { + Ok(path) + } else { + Err(GraphError::InvalidPrefix { + expected: prefix.to_owned(), + actual: path, + }) + } + } + } +} + +pub fn get_zip_data_path(zip: &mut ZipArchive) -> Result { let file = zip.by_name(META_PATH)?; Ok(read_path_from_file(file)?) } -fn get_zip_graph_path(zip: &mut ZipArchive) -> Result { +pub fn get_zip_graph_path(zip: &mut ZipArchive) -> Result { let mut path = get_zip_data_path(zip)?; let graph_path = get_zip_graph_path_name(zip, path.clone())?; path.push('/'); path.push_str(&graph_path); - Ok(graph_path) + Ok(path) } -fn get_zip_graph_path_name( - zip: &mut ZipArchive, +pub fn get_zip_graph_path_name( + zip: &mut ZipArchive, mut data_path: String, ) -> Result { data_path.push('/'); @@ -105,7 +123,7 @@ fn get_zip_graph_path_name( Ok(graph_path) } -fn get_zip_meta_path(zip: &mut ZipArchive) -> Result { +pub fn get_zip_meta_path(zip: &mut ZipArchive) -> Result { let mut path = get_zip_graph_path(zip)?; path.push('/'); path.push_str(META_PATH); @@ -134,25 +152,37 @@ pub trait GraphPaths { self.root().join(META_PATH) } - fn data_path(&self) -> Result; + fn data_path(&self) -> Result { + Ok(InnerGraphFolder { + path: self.root().join(self.relative_data_path()?), + }) + } + + fn relative_data_path(&self) -> Result; fn vectors_path(&self) -> Result { - Ok(self.data_path()?.vectors_path()) + let mut path = self.data_path()?.path; + path.push(VECTORS_PATH); + Ok(path) } fn index_path(&self) -> Result { - Ok(self.data_path()?.index_path()) + let mut path = self.data_path()?.path; + path.push(INDEX_PATH); + Ok(path) } fn graph_path(&self) -> Result { - self.data_path()?.graph_path() + let mut path = self.data_path()?.path; + path.push(self.relative_graph_path()?); + Ok(path) } - fn relative_graph_path(&self) -> Result { - self.data_path()?.relative_graph_path() - } + fn relative_graph_path(&self) -> Result; fn meta_path(&self) -> Result { - Ok(self.data_path()?.meta_path()) + let mut path = self.data_path()?.path; + path.push(META_PATH); + Ok(path) } } @@ -181,12 +211,25 @@ impl GraphPaths for GraphFolder { &self.root_folder } - fn data_path(&self) -> Result { - let relative = - read_data_path(&self.root_folder)?.unwrap_or_else(|| DATA_PATH.to_string() + "0"); - Ok(InnerGraphFolder { - path: self.root_folder.join(relative), - }) + fn relative_data_path(&self) -> Result { + let path = if self.is_zip() { + let mut zip = self.read_zip()?; + get_zip_data_path(&mut zip)? + } else { + read_or_default_data_path(self.root(), DATA_PATH)? + }; + Ok(path) + } + + fn relative_graph_path(&self) -> Result { + if self.is_zip() { + let mut zip = self.read_zip()?; + let data_path = get_zip_data_path(&mut zip)?; + get_zip_graph_path_name(&mut zip, data_path) + } else { + let data_path = self.data_path()?; + read_or_default_data_path(data_path.as_ref(), GRAPH_PATH) + } } } @@ -205,7 +248,7 @@ impl GraphFolder { if self.write_as_zip_format { return Err(GraphError::ZippedGraphCannotBeSwapped); } - let relative_data_path = self.get_relative_data_path()?; + let relative_data_path = self.relative_data_path()?; let meta = serde_json::to_string(&RelativePath { path: relative_data_path.clone(), })?; @@ -282,24 +325,14 @@ impl GraphFolder { Ok(()) } - pub fn get_relative_data_path(&self) -> Result { - let path = if self.is_zip() { - let mut zip = self.read_zip()?; - get_zip_data_path(&mut zip)? - } else { - read_data_path(&self.root_folder)?.unwrap_or_else(|| DATA_PATH.to_string() + "0") - }; - Ok(path) - } - - pub fn get_relative_graph_path(&self) -> Result { + pub fn get_zip_graph_prefix(&self) -> Result { if self.is_zip() { let mut zip = self.read_zip()?; - let data_path = get_zip_data_path(&mut zip)?; - get_zip_graph_path_name(&mut zip, data_path) + Ok([get_zip_data_path(&mut zip)?, get_zip_graph_path(&mut zip)?].join("/")) } else { - let data_path = self.data_path()?; - Ok(read_data_path(data_path.as_ref())?.unwrap_or_else(|| GRAPH_PATH.to_string() + "0")) + let data_path = read_or_default_data_path(self.root(), DATA_PATH)?; + let graph_path = read_or_default_data_path(&self.root().join(&data_path), GRAPH_PATH)?; + Ok([data_path, graph_path].join("/")) } } @@ -332,31 +365,6 @@ impl GraphFolder { Ok(metadata.meta) } - pub fn write_metadata<'graph>( - &self, - graph: &impl GraphViewOps<'graph>, - ) -> Result<(), GraphError> { - let graph_path = self.get_relative_graph_path()?; - let data_path = self.get_relative_data_path()?; - let metadata = GraphMetadata::from_graph(graph); - let meta = Metadata { - path: graph_path, - meta: metadata, - }; - - if self.write_as_zip_format { - let file = File::options().read(true).write(true).open(&self.root())?; - let mut zip = ZipWriter::new_append(file)?; - - zip.start_file::<_, ()>(META_PATH, FileOptions::default())?; - Ok(serde_json::to_writer(zip, &meta)?) - } else { - let path = self.meta_path()?; - let file = File::create(path.clone())?; - Ok(serde_json::to_writer(file, &meta)?) - } - } - fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { if self.root_folder.exists() { let non_empty = self.root_folder.read_dir()?.next().is_some(); @@ -426,11 +434,14 @@ impl GraphPaths for WriteableGraphFolder { &self.path } - fn data_path(&self) -> Result { + fn relative_data_path(&self) -> Result { let path = read_dirty_path(self.root())?.ok_or(GraphError::NoWriteInProgress)?; - Ok(InnerGraphFolder { - path: self.root().join(path), - }) + Ok(path) + } + + fn relative_graph_path(&self) -> Result { + let path = read_or_default_data_path(&self.data_path()?.as_ref(), GRAPH_PATH)?; + Ok(path) } } diff --git a/raphtory/src/serialise/parquet/mod.rs b/raphtory/src/serialise/parquet/mod.rs index 93eb4740fb..f6b17da266 100644 --- a/raphtory/src/serialise/parquet/mod.rs +++ b/raphtory/src/serialise/parquet/mod.rs @@ -46,7 +46,7 @@ use std::{ sync::Arc, }; use walkdir::WalkDir; -use zip::{write::FileOptions, ZipWriter}; +use zip::{write::FileOptions, ZipArchive, ZipWriter}; mod edges; mod model; @@ -55,23 +55,20 @@ mod nodes; mod graph; pub trait ParquetEncoder { - fn encode_parquet_to_bytes(&self) -> Result, GraphError> { - // Write directly to an in-memory cursor - let mut zip_buffer = Vec::new(); - let cursor = std::io::Cursor::new(&mut zip_buffer); - - self.encode_parquet_to_zip(cursor)?; - - Ok(zip_buffer) - } - - fn encode_parquet_to_zip(&self, writer: W) -> Result<(), GraphError> { + /// Encode the graph as parquet data to the zip writer + /// (note the writer is still open for appending more data after calling this function) + /// + /// The graph data will be written at `prefix` inside the zip. + fn encode_parquet_to_zip>( + &self, + mut zip_writer: &mut ZipWriter, + prefix: P, + ) -> Result<(), GraphError> { + let prefix = prefix.as_ref(); // Encode to a tmp dir using parquet, then zip it to the writer let temp_dir = tempfile::tempdir()?; self.encode_parquet(&temp_dir)?; - let mut zip_writer = ZipWriter::new(writer); - // Walk through the directory and add files and directories to the zip. // Files and directories are stored in the archive under the GRAPH_PATH directory. for entry in WalkDir::new(temp_dir.path()) @@ -85,10 +82,7 @@ pub trait ParquetEncoder { })?; // Attach GRAPH_PATH as a prefix to the relative path - let zip_entry_name = PathBuf::from(GRAPH_PATH) - .join(relative_path) - .to_string_lossy() - .into_owned(); + let zip_entry_name = prefix.join(relative_path).to_string_lossy().into_owned(); if path.is_file() { zip_writer.start_file::<_, ()>(zip_entry_name, FileOptions::<()>::default())?; @@ -100,8 +94,6 @@ pub trait ParquetEncoder { zip_writer.add_directory::<_, ()>(zip_entry_name, FileOptions::<()>::default())?; } } - - zip_writer.finish()?; Ok(()) } @@ -109,22 +101,23 @@ pub trait ParquetEncoder { } pub trait ParquetDecoder: Sized { - fn decode_parquet_from_bytes( + fn decode_parquet_from_bytes>( bytes: &[u8], path_for_decoded_graph: Option<&Path>, + prefix: P, ) -> Result { // Read directly from an in-memory cursor - let reader = std::io::Cursor::new(bytes); - - Self::decode_parquet_from_zip(reader, path_for_decoded_graph) + let mut reader = ZipArchive::new(std::io::Cursor::new(bytes))?; + Self::decode_parquet_from_zip(&mut reader, path_for_decoded_graph, prefix) } - fn decode_parquet_from_zip( - reader: R, + fn decode_parquet_from_zip>( + zip: &mut ZipArchive, path_for_decoded_graph: Option<&Path>, + prefix: P, ) -> Result { + let prefix = prefix.as_ref(); // Unzip to a temp dir and decode parquet from there - let mut zip = zip::ZipArchive::new(reader)?; let temp_dir = tempfile::tempdir()?; for i in 0..zip.len() { @@ -134,18 +127,8 @@ pub trait ParquetDecoder: Sized { None => continue, }; - if zip_entry_name.starts_with(GRAPH_PATH) { - // Since we attach the GRAPH_PATH prefix to the zip entry name - // when encoding, we strip it away while decoding. - let relative_path = zip_entry_name - .strip_prefix(GRAPH_PATH) - .map_err(|e| { - GraphError::IOErrorMsg(format!("Failed to strip prefix from path: {}", e)) - })? - .to_path_buf(); - + if let Ok(relative_path) = zip_entry_name.strip_prefix(prefix) { let out_path = temp_dir.path().join(relative_path); - if file.is_dir() { std::fs::create_dir_all(&out_path)?; } else { @@ -153,27 +136,14 @@ pub trait ParquetDecoder: Sized { if let Some(parent) = out_path.parent() { std::fs::create_dir_all(parent)?; } - let mut out_file = std::fs::File::create(&out_path)?; std::io::copy(&mut file, &mut out_file)?; } } } - Self::decode_parquet(temp_dir.path(), path_for_decoded_graph) } - fn is_parquet_decodable(path: impl AsRef) -> bool { - // Considered to be decodable if there is at least one .parquet - WalkDir::new(path) - .into_iter() - .filter_map(Result::ok) - .any(|entry| { - entry.path().is_file() - && entry.path().extension().is_some_and(|ext| ext == "parquet") - }) - } - fn decode_parquet( path: impl AsRef, path_for_decoded_graph: Option<&Path>, diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index f88b478b7e..c81a3edb53 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -1,35 +1,61 @@ +#[cfg(feature = "search")] +use crate::prelude::IndexMutationOps; use crate::{ db::api::{mutation::AdditionOps, view::StaticGraphViewOps}, errors::GraphError, serialise::{ + get_zip_graph_path, + metadata::GraphMetadata, parquet::{ParquetDecoder, ParquetEncoder}, - GraphFolder, + GraphFolder, GraphPaths, Metadata, RelativePath, DATA_PATH, DEFAULT_DATA_PATH, + DEFAULT_GRAPH_PATH, GRAPH_PATH, META_PATH, }, }; -use std::{fs, fs::File, path::Path}; +use std::{ + fs, + fs::File, + io::{Cursor, Read, Seek, Write}, + path::Path, +}; use tempfile; - -#[cfg(feature = "search")] -use crate::prelude::IndexMutationOps; -use crate::serialise::GraphPaths; +use zip::{write::SimpleFileOptions, ZipArchive, ZipWriter}; pub trait StableEncode: StaticGraphViewOps + AdditionOps { + fn encode_to_zip(&self, writer: ZipWriter) -> Result<(), GraphError>; /// Encode the graph into bytes. - fn encode_to_bytes(&self) -> Vec; + fn encode_to_bytes(&self) -> Result, GraphError>; /// Encode the graph into the given path. fn encode(&self, path: impl Into) -> Result<(), GraphError>; } impl StableEncode for T { - fn encode_to_bytes(&self) -> Vec { - // Encode to a temp zip file and return the bytes - let tempdir = tempfile::tempdir().unwrap(); - let zip_path = tempdir.path().join("graph.zip"); - let folder = GraphFolder::new_as_zip(&zip_path); + fn encode_to_zip(&self, mut writer: ZipWriter) -> Result<(), GraphError> { + let graph_meta = GraphMetadata::from_graph(self); + writer.start_file(META_PATH, SimpleFileOptions::default())?; + writer.write(&serde_json::to_vec(&RelativePath { + path: DEFAULT_DATA_PATH.to_string(), + })?)?; + writer.start_file( + [DEFAULT_DATA_PATH, META_PATH].join("/"), + SimpleFileOptions::default(), + )?; + writer.write(&serde_json::to_vec(&Metadata { + path: DEFAULT_GRAPH_PATH.to_string(), + meta: graph_meta, + })?)?; + let graph_prefix = [DEFAULT_DATA_PATH, DEFAULT_GRAPH_PATH].join("/"); + self.encode_parquet_to_zip(&mut writer, graph_prefix)?; + // TODO: Encode Index to zip + writer.finish()?; + Ok(()) + } - self.encode(&folder).unwrap(); - fs::read(&zip_path).unwrap() + fn encode_to_bytes(&self) -> Result, GraphError> { + let mut bytes = Vec::new(); + let writer = ZipWriter::new(Cursor::new(&mut bytes)); + self.encode_to_zip(writer)?; + Ok(bytes) } fn encode(&self, path: impl Into) -> Result<(), GraphError> { @@ -37,10 +63,7 @@ impl StableEncode for T { if folder.write_as_zip_format { let file = File::create_new(&folder.root())?; - self.encode_parquet_to_zip(file)?; - #[cfg(feature = "search")] - self.persist_index_to_disk_zip(&folder)?; - folder.write_metadata(self)?; + self.encode_to_zip(ZipWriter::new(file))?; } else { let write_folder = folder.init_write()?; self.encode_parquet(write_folder.graph_path()?)?; @@ -61,15 +84,17 @@ pub trait StableDecode: StaticGraphViewOps + AdditionOps { path_for_decoded_graph: Option<&Path>, ) -> Result; + fn decode_from_zip( + reader: ZipArchive, + path_for_decoded_graph: Option<&Path>, + ) -> Result; + // Decode the graph from the given path. // `path_for_decoded_graph` gets passed to the newly created graph. fn decode( path: impl Into, path_for_decoded_graph: Option<&Path>, ) -> Result; - - /// Returns true if the graph can be decoded from the given path. - fn is_decodable(path: impl AsRef) -> bool; } impl StableDecode for T { @@ -77,13 +102,19 @@ impl StableDecode for T { bytes: &[u8], path_for_decoded_graph: Option<&Path>, ) -> Result { - // Write bytes to a temp zip file and decode - let tempdir = tempfile::tempdir()?; - let zip_path = tempdir.path().join("graph.zip"); - let folder = GraphFolder::new_as_zip(&zip_path); - std::fs::write(&zip_path, bytes)?; + let cursor = Cursor::new(bytes); + Self::decode_from_zip(ZipArchive::new(cursor)?, path_for_decoded_graph) + } + + fn decode_from_zip( + mut reader: ZipArchive, + path_for_decoded_graph: Option<&Path>, + ) -> Result { + let graph_prefix = get_zip_graph_path(&mut reader)?; + let graph = + Self::decode_parquet_from_zip(&mut reader, path_for_decoded_graph, graph_prefix)?; - let graph = Self::decode(&folder, path_for_decoded_graph)?; + //TODO: graph.load_index_from_zip(&mut reader, prefix) Ok(graph) } @@ -96,19 +127,13 @@ impl StableDecode for T { let folder: GraphFolder = path.into(); if folder.is_zip() { - let reader = std::fs::File::open(&folder.root())?; - graph = Self::decode_parquet_from_zip(reader, path_for_decoded_graph)?; + let reader = ZipArchive::new(File::open(&folder.root())?)?; + graph = Self::decode_from_zip(reader, path_for_decoded_graph)?; } else { graph = Self::decode_parquet(&folder.graph_path()?, path_for_decoded_graph)?; + #[cfg(feature = "search")] + graph.load_index(&folder)?; } - - #[cfg(feature = "search")] - graph.load_index(&folder)?; - Ok(graph) } - - fn is_decodable(path: impl AsRef) -> bool { - Self::is_parquet_decodable(path) - } } From 4330632f5386c20f0c7ff8d681e898416c9f0a09 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 10 Dec 2025 13:39:54 +0100 Subject: [PATCH 12/39] all the tests compile --- raphtory/src/db/api/storage/storage.rs | 3 +- raphtory/src/db/api/view/internal/mod.rs | 1 - raphtory/src/errors.rs | 4 +- raphtory/src/python/graph/graph.rs | 6 +- .../src/python/graph/graph_with_deletions.rs | 6 +- .../types/macros/trait_impl/serialise.rs | 11 +- raphtory/src/serialise/graph_folder.rs | 211 +++++++++--------- raphtory/tests/df_loaders.rs | 27 ++- 8 files changed, 141 insertions(+), 128 deletions(-) diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index e1fdbfce1c..340609fd5d 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -37,13 +37,12 @@ use std::{ }; use storage::{Extension, WalImpl}; -use crate::serialise::GraphPaths; #[cfg(feature = "search")] use { crate::{ db::api::view::IndexSpec, search::graph_index::{GraphIndex, MutableGraphIndex}, - serialise::GraphFolder, + serialise::{GraphFolder, GraphPaths}, }, either::Either, parking_lot::RwLock, diff --git a/raphtory/src/db/api/view/internal/mod.rs b/raphtory/src/db/api/view/internal/mod.rs index 9429942407..0ab2d7eecc 100644 --- a/raphtory/src/db/api/view/internal/mod.rs +++ b/raphtory/src/db/api/view/internal/mod.rs @@ -26,7 +26,6 @@ mod one_hop_filter; pub(crate) mod time_semantics; mod wrapped_graph; -use crate::serialise::GraphFolder; pub use edge_filter_ops::*; pub use filter_ops::*; pub use into_dynamic::{IntoDynHop, IntoDynamic}; diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index f39b786595..c8235a6a95 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -4,7 +4,6 @@ use crate::{ }; use arrow::{datatypes::DataType, error::ArrowError}; use itertools::Itertools; -use neo4rs::Path; use parquet::errors::ParquetError; use raphtory_api::core::entities::{ properties::prop::{PropError, PropType}, @@ -19,9 +18,8 @@ use raphtory_core::{ }; use raphtory_storage::mutation::MutationError; use std::{ - backtrace::Backtrace, fmt::Debug, - io, panic, + io, panic::Location, path::{PathBuf, StripPrefixError}, sync::Arc, diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index e606d393fc..212d6a1c0e 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -170,9 +170,9 @@ impl PyGraph { Graph::load_from_path(path) } - fn __reduce__(&self) -> (PyGraphEncoder, (Vec,)) { - let state = self.graph.encode_to_bytes(); - (PyGraphEncoder, (state,)) + fn __reduce__(&self) -> Result<(PyGraphEncoder, (Vec,)), GraphError> { + let state = self.graph.encode_to_bytes()?; + Ok((PyGraphEncoder, (state,))) } /// Persist graph to parquet files diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index 67cc9ad62c..e89f2529c9 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -119,9 +119,9 @@ impl PyPersistentGraph { PersistentGraph::load_from_path(path) } - fn __reduce__(&self) -> (PyGraphEncoder, (Vec,)) { - let state = self.graph.encode_to_bytes(); - (PyGraphEncoder, (state,)) + fn __reduce__(&self) -> Result<(PyGraphEncoder, (Vec,)), GraphError> { + let state = self.graph.encode_to_bytes()?; + Ok((PyGraphEncoder, (state,))) } /// Adds a new node with the given id and properties to the graph. diff --git a/raphtory/src/python/types/macros/trait_impl/serialise.rs b/raphtory/src/python/types/macros/trait_impl/serialise.rs index 3ea61824be..d9cbd2e01c 100644 --- a/raphtory/src/python/types/macros/trait_impl/serialise.rs +++ b/raphtory/src/python/types/macros/trait_impl/serialise.rs @@ -1,3 +1,5 @@ +use crate::errors::GraphError; + /// Macro for implementing all the Cache methods on a python wrapper /// /// # Arguments @@ -69,9 +71,12 @@ macro_rules! impl_serialise { /// /// Returns: /// bytes: - fn serialise<'py>(&self, py: Python<'py>) -> Bound<'py, pyo3::types::PyBytes> { - let bytes = $crate::serialise::StableEncode::encode_to_bytes(&self.$field); - pyo3::types::PyBytes::new(py, &bytes) + fn serialise<'py>( + &self, + py: Python<'py>, + ) -> Result, GraphError> { + let bytes = $crate::serialise::StableEncode::encode_to_bytes(&self.$field)?; + Ok(pyo3::types::PyBytes::new(py, &bytes)) } } }; diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 4dec5840d7..fc79666072 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -385,15 +385,11 @@ impl GraphFolder { /// Creates a zip file from the folder. pub fn zip_from_folder(&self, mut writer: W) -> Result<(), GraphError> { - let mut buffer = Vec::new(); - if self.is_zip() { let mut reader = File::open(&self.root_folder)?; - reader.read_to_end(&mut buffer)?; - writer.write_all(&buffer)?; + io::copy(&mut reader, &mut writer)?; } else { let mut zip = ZipWriter::new(writer); - for entry in WalkDir::new(&self.root_folder) .into_iter() .filter_map(Result::ok) @@ -410,7 +406,7 @@ impl GraphFolder { let mut file = File::open(path)?; std::io::copy(&mut file, &mut zip)?; - } else if path.is_dir() { + } else if path.is_dir() && !zip_entry_name.is_empty() { // Add empty directories to the zip zip.add_directory::<_, ()>(zip_entry_name, FileOptions::default())?; } @@ -418,7 +414,13 @@ impl GraphFolder { zip.finish()?; } + Ok(()) + } + pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { + self.ensure_clean_root_dir()?; + let mut archive = ZipArchive::new(reader)?; + archive.extract(self.root())?; Ok(()) } } @@ -644,100 +646,99 @@ mod tests { /// Verify that the metadata is re-created if it does not exist. #[test] #[ignore = "Need to think about how to deal with reading old format"] - fn test_read_metadata_from_noninitialized_zip() { - global_info_logger(); - - let graph = Graph::new(); - graph.add_node(0, 0, NO_PROPS, None).unwrap(); - - let tmp_dir = tempfile::TempDir::new().unwrap(); - let zip_path = tmp_dir.path().join("graph.zip"); - let folder = GraphFolder::new_as_zip(&zip_path); - graph.encode(&folder).unwrap(); - - // Remove the metadata file from the zip to simulate a noninitialized zip - remove_metadata_from_zip(&zip_path); - - // Should fail because the metadata file is not present - let err = folder.try_read_metadata(); - assert!(err.is_err()); - - // Should re-create the metadata file - let result = folder.read_metadata().unwrap(); - assert_eq!( - result, - GraphMetadata { - node_count: 1, - edge_count: 0, - metadata: vec![], - graph_type: GraphType::EventGraph, - is_diskgraph: false - } - ); - } - - /// Helper function to remove the metadata file from a zip - fn remove_metadata_from_zip(zip_path: &Path) { - let mut zip_file = std::fs::File::open(&zip_path).unwrap(); - let mut zip_archive = zip::ZipArchive::new(&mut zip_file).unwrap(); - let mut temp_zip = tempfile::NamedTempFile::new().unwrap(); - - // Scope for the zip writer - { - let mut zip_writer = zip::ZipWriter::new(&mut temp_zip); - - for i in 0..zip_archive.len() { - let mut file = zip_archive.by_index(i).unwrap(); - - // Copy all files except the metadata file - if file.name() != META_PATH { - zip_writer - .start_file::<_, ()>(file.name(), FileOptions::default()) - .unwrap(); - std::io::copy(&mut file, &mut zip_writer).unwrap(); - } - } - - zip_writer.finish().unwrap(); - } - - std::fs::copy(temp_zip.path(), &zip_path).unwrap(); - } - - /// Verify that the metadata is re-created if it does not exist. - #[test] - #[ignore = "Need to think about how to handle reading from old format"] - fn test_read_metadata_from_noninitialized_folder() { - global_info_logger(); - - let graph = Graph::new(); - graph.add_node(0, 0, NO_PROPS, None).unwrap(); - - let temp_folder = tempfile::TempDir::new().unwrap(); - let folder = GraphFolder::from(temp_folder.path()); - graph.encode(&folder).unwrap(); - - // Remove the metadata file - std::fs::remove_file(folder.get_meta_path()).unwrap(); - - // Should fail because the metadata file is not present - let err = folder.try_read_metadata(); - assert!(err.is_err()); - - // Should re-create the metadata file - let result = folder.read_metadata().unwrap(); - assert_eq!( - result, - GraphMetadata { - node_count: 1, - edge_count: 0, - metadata: vec![], - graph_type: GraphType::EventGraph, - is_diskgraph: false - } - ); - } - + // fn test_read_metadata_from_noninitialized_zip() { + // global_info_logger(); + // + // let graph = Graph::new(); + // graph.add_node(0, 0, NO_PROPS, None).unwrap(); + // + // let tmp_dir = tempfile::TempDir::new().unwrap(); + // let zip_path = tmp_dir.path().join("graph.zip"); + // let folder = GraphFolder::new_as_zip(&zip_path); + // graph.encode(&folder).unwrap(); + // + // // Remove the metadata file from the zip to simulate a noninitialized zip + // remove_metadata_from_zip(&zip_path); + // + // // Should fail because the metadata file is not present + // let err = folder.try_read_metadata(); + // assert!(err.is_err()); + // + // // Should re-create the metadata file + // let result = folder.read_metadata().unwrap(); + // assert_eq!( + // result, + // GraphMetadata { + // node_count: 1, + // edge_count: 0, + // metadata: vec![], + // graph_type: GraphType::EventGraph, + // is_diskgraph: false + // } + // ); + // } + + // /// Helper function to remove the metadata file from a zip + // fn remove_metadata_from_zip(zip_path: &Path) { + // let mut zip_file = std::fs::File::open(&zip_path).unwrap(); + // let mut zip_archive = zip::ZipArchive::new(&mut zip_file).unwrap(); + // let mut temp_zip = tempfile::NamedTempFile::new().unwrap(); + // + // // Scope for the zip writer + // { + // let mut zip_writer = zip::ZipWriter::new(&mut temp_zip); + // + // for i in 0..zip_archive.len() { + // let mut file = zip_archive.by_index(i).unwrap(); + // + // // Copy all files except the metadata file + // if file.name() != META_PATH { + // zip_writer + // .start_file::<_, ()>(file.name(), FileOptions::default()) + // .unwrap(); + // std::io::copy(&mut file, &mut zip_writer).unwrap(); + // } + // } + // + // zip_writer.finish().unwrap(); + // } + // + // std::fs::copy(temp_zip.path(), &zip_path).unwrap(); + // } + + // /// Verify that the metadata is re-created if it does not exist. + // #[test] + // #[ignore = "Need to think about how to handle reading from old format"] + // fn test_read_metadata_from_noninitialized_folder() { + // global_info_logger(); + // + // let graph = Graph::new(); + // graph.add_node(0, 0, NO_PROPS, None).unwrap(); + // + // let temp_folder = tempfile::TempDir::new().unwrap(); + // let folder = GraphFolder::from(temp_folder.path()); + // graph.encode(&folder).unwrap(); + // + // // Remove the metadata file + // std::fs::remove_file(folder.get_meta_path()).unwrap(); + // + // // Should fail because the metadata file is not present + // let err = folder.try_read_metadata(); + // assert!(err.is_err()); + // + // // Should re-create the metadata file + // let result = folder.read_metadata().unwrap(); + // assert_eq!( + // result, + // GraphMetadata { + // node_count: 1, + // edge_count: 0, + // metadata: vec![], + // graph_type: GraphType::EventGraph, + // is_diskgraph: false + // } + // ); + // } #[test] fn test_zip_from_folder() { let graph = Graph::new(); @@ -750,8 +751,8 @@ mod tests { let initial_folder = GraphFolder::from(temp_folder.path().join("initial")); graph.encode(&initial_folder).unwrap(); - assert!(initial_folder.get_graph_path().exists()); - assert!(initial_folder.get_meta_path().exists()); + assert!(initial_folder.graph_path().unwrap().exists()); + assert!(initial_folder.meta_path().unwrap().exists()); // Create a zip file from the folder let output_zip_path = temp_folder.path().join("output.zip"); @@ -832,7 +833,7 @@ mod tests { let graph_folder = GraphFolder::from(&folder); graph.encode(&graph_folder).unwrap(); - assert!(graph_folder.get_graph_path().exists()); + assert!(graph_folder.graph_path().unwrap().exists()); // Zip the folder let mut zip_bytes = Vec::new(); @@ -846,8 +847,8 @@ mod tests { unzip_folder.unzip_to_folder(cursor).unwrap(); // Verify the extracted folder has the same structure - assert!(unzip_folder.get_graph_path().exists()); - assert!(unzip_folder.get_meta_path().exists()); + assert!(unzip_folder.graph_path().unwrap().exists()); + assert!(unzip_folder.meta_path().unwrap().exists()); // Verify the extracted graph is the same as the original let extracted_graph = Graph::decode(&unzip_folder, None::<&std::path::Path>).unwrap(); diff --git a/raphtory/tests/df_loaders.rs b/raphtory/tests/df_loaders.rs index 734ccec0e2..fbb00b633d 100644 --- a/raphtory/tests/df_loaders.rs +++ b/raphtory/tests/df_loaders.rs @@ -613,7 +613,8 @@ mod parquet_tests { PropUpdatesFixture, }, }; - use std::str::FromStr; + use std::{io::Cursor, str::FromStr}; + use zip::{ZipArchive, ZipWriter}; #[test] fn node_temp_props() { @@ -1125,10 +1126,13 @@ mod parquet_tests { // Test writing to a file let file = std::fs::File::create(&zip_path).unwrap(); - g.encode_parquet_to_zip(file).unwrap(); + let mut writer = ZipWriter::new(file); + g.encode_parquet_to_zip(&mut writer, "graph").unwrap(); + writer.finish().unwrap(); - let reader = std::fs::File::open(&zip_path).unwrap(); - let g2 = Graph::decode_parquet_from_zip(reader, None::<&std::path::Path>).unwrap(); + let mut reader = ZipArchive::new(std::fs::File::open(&zip_path).unwrap()).unwrap(); + let g2 = + Graph::decode_parquet_from_zip(&mut reader, None::<&std::path::Path>, "graph").unwrap(); assert_graph_equal(&g, &g2); } @@ -1151,8 +1155,12 @@ mod parquet_tests { g.add_edge(3, 1, 4, [("test prop 3", 10.0)], None).unwrap(); g.add_edge(4, 1, 3, [("test prop 4", true)], None).unwrap(); - let bytes = g.encode_parquet_to_bytes().unwrap(); - let g2 = Graph::decode_parquet_from_bytes(&bytes, None::<&std::path::Path>).unwrap(); + let mut bytes = Vec::new(); + let mut writer = ZipWriter::new(Cursor::new(&mut bytes)); + g.encode_parquet_to_zip(&mut writer, "graph").unwrap(); + writer.finish().unwrap(); + let g2 = + Graph::decode_parquet_from_bytes(&bytes, None::<&std::path::Path>, "graph").unwrap(); assert_graph_equal(&g, &g2); } @@ -1160,8 +1168,11 @@ mod parquet_tests { fn test_parquet_bytes_proptest() { proptest!(|(edges in build_graph_strat(30, 30, 10, 10, true))| { let g = Graph::from(build_graph(&edges)); - let bytes = g.encode_parquet_to_bytes().unwrap(); - let g2 = Graph::decode_parquet_from_bytes(&bytes, None::<&std::path::Path>).unwrap(); + let mut bytes = Vec::new(); + let mut writer = ZipWriter::new(Cursor::new(&mut bytes)); + g.encode_parquet_to_zip(&mut writer, "graph").unwrap(); + writer.finish().unwrap(); + let g2 = Graph::decode_parquet_from_bytes(&bytes, None::<&std::path::Path>, "graph").unwrap(); assert_graph_equal(&g, &g2); }) From a275065699bf874608cf35ed70c3243c829e2456 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 11 Dec 2025 13:02:12 +0100 Subject: [PATCH 13/39] all features compile --- Makefile | 31 +- python/python/raphtory/__init__.pyi | 955 +++++++++--------- .../python/raphtory/algorithms/__init__.pyi | 168 ++- python/python/raphtory/filter/__init__.pyi | 99 +- python/python/raphtory/graph_gen/__init__.pyi | 8 +- .../python/raphtory/graph_loader/__init__.pyi | 16 +- python/python/raphtory/graphql/__init__.pyi | 181 +++- python/python/raphtory/iterables/__init__.pyi | 261 ++--- .../python/raphtory/node_state/__init__.pyi | 293 ++++-- python/python/raphtory/vectors/__init__.pyi | 58 +- raphtory/src/db/api/mutation/index_ops.rs | 34 +- raphtory/src/db/api/storage/storage.rs | 10 +- raphtory/src/errors.rs | 35 +- raphtory/src/search/graph_index.rs | 53 +- raphtory/src/search/mod.rs | 2 +- raphtory/src/serialise/graph_folder.rs | 3 +- raphtory/tests/serialise_test.rs | 2 +- 17 files changed, 1288 insertions(+), 921 deletions(-) diff --git a/Makefile b/Makefile index 3b1385639f..176749b5f8 100644 --- a/Makefile +++ b/Makefile @@ -12,17 +12,13 @@ build-all: rust-build test-all: rust-test-all python-test -test-all-public: rust-test-all-public python-test-public - # Tidying tidy: rust-fmt build-python stubs python-fmt -tidy-public: rust-fmt build-python-public stubs python-fmt - python-tidy: stubs python-fmt test-graphql-schema -check-pr: tidy-public test-all +check-pr: tidy test-all gen-graphql-schema: raphtory schema > raphtory-graphql/schema.graphql @@ -31,7 +27,6 @@ test-graphql-schema: install-node-tools npx graphql-schema-linter --rules fields-have-descriptions,types-have-descriptions raphtory-graphql/schema.graphql # Utilities - activate-storage: ./scripts/activate_private_storage.py @@ -71,13 +66,12 @@ run-graphql: rust-test: cargo test -q -rust-test-all: activate-storage - cargo nextest run --all --features=storage +rust-check: cargo hack check --workspace --all-targets --each-feature --skip extension-module,default -rust-test-all-public: +rust-test-all: rust-check cargo nextest run --all - cargo hack check --workspace --all-targets --each-feature --skip extension-module,default,storage + ########## # Python # @@ -86,32 +80,23 @@ rust-test-all-public: install-python: cd python && maturin build && pip install ../target/wheels/*.whl -build-python-public: deactivate-storage +build-python: cd python && maturin develop -r --extras=dev -build-python: activate-storage - cd python && maturin develop -r --features=storage --extras=dev +debug-python: + cd python && maturin develop --profile=debug --extras=dev # Testing - -python-test: activate-storage - cd python && tox run && tox run -e storage - -python-test-public: +python-test: cd python && tox run python-fmt: cd python && black . -debug-python-public: deactivate-storage - cd python && maturin develop --profile=debug build-python-rtd: cd python && maturin build --profile=build-fast && pip install ../target/wheels/*.whl -debug-python: activate-storage - cd python && maturin develop --features=storage,extension-module --extras=dev - ######## # Docs # ######## diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index adac628ab2..d0fe9a6365 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1,6 +1,7 @@ """ Raphtory graph analytics library """ + from __future__ import annotations ############################################################################### @@ -26,8 +27,40 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['GraphView', 'Graph', 'PersistentGraph', 'Node', 'Nodes', 'PathFromNode', 'PathFromGraph', 'MutableNode', 'Edge', 'Edges', 'NestedEdges', 'MutableEdge', 'Properties', 'PyPropValueList', 'Metadata', 'TemporalProperties', 'PropertiesView', 'TemporalProp', 'WindowSet', 'IndexSpecBuilder', 'IndexSpec', 'version', 'graphql', 'algorithms', 'graph_loader', 'graph_gen', 'vectors', 'node_state', 'filter', 'iterables', 'nullmodels', 'plottingutils'] -class GraphView(object): +__all__ = [ + "GraphView", + "Graph", + "PersistentGraph", + "Node", + "Nodes", + "PathFromNode", + "PathFromGraph", + "MutableNode", + "Edge", + "Edges", + "NestedEdges", + "MutableEdge", + "Properties", + "PyPropValueList", + "Metadata", + "TemporalProperties", + "PropertiesView", + "TemporalProp", + "WindowSet", + "version", + "graphql", + "algorithms", + "graph_loader", + "graph_gen", + "vectors", + "node_state", + "filter", + "iterables", + "nullmodels", + "plottingutils", +] + +class GraphView(object): """Graph view is a read-only version of a graph at a certain point in time.""" def __eq__(self, value): @@ -237,7 +270,9 @@ class GraphView(object): GraphView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -307,14 +342,6 @@ class GraphView(object): list[Node]: the nodes that match the properties name and value """ - def get_index_spec(self) -> IndexSpec: - """ - Get index spec - - Returns: - IndexSpec: - """ - def has_edge(self, src: NodeInput, dst: NodeInput) -> bool: """ Returns true if the graph contains the specified edge @@ -399,14 +426,20 @@ class GraphView(object): GraphView: The layered view """ - def materialize(self) -> GraphView: + def materialize(self, path=None) -> GraphView: """ - Returns a 'materialized' clone of the graph view - i.e. a new graph with a copy of the data seen within the view instead of just a mask over the original graph + Returns a 'materialized' clone of the graph view - i.e. a new graph with a + copy of the data seen within the view instead of just a mask over the original graph. + If a path is provided, the new graph will be stored at that path + (assuming the storage feature is enabled). Returns: GraphView: Returns a graph clone """ + def materialize_to_graph_folder(self, path): + """Materializes the graph view into a graphql compatible folder.""" + @property def metadata(self) -> Metadata: """ @@ -447,7 +480,12 @@ class GraphView(object): Properties: Properties paired with their names """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -471,32 +509,6 @@ class GraphView(object): WindowSet: A `WindowSet` object. """ - def search_edges(self, filter: Any, limit: int = 25, offset: int = 0) -> list[Edge]: - """ - Searches for edges which match the given filter expression. This uses Tantivy's exact search. - - Arguments: - filter: The filter expression to search for. - limit(int): The maximum number of results to return. Defaults to 25. - offset(int): The number of results to skip. This is useful for pagination. Defaults to 0. - - Returns: - list[Edge]: A list of edges which match the filter expression. The list will be empty if no edges match the query. - """ - - def search_nodes(self, filter: Any, limit: int = 25, offset: int = 0) -> list[Node]: - """ - Searches for nodes which match the given filter expression. This uses Tantivy's exact search. - - Arguments: - filter: The filter expression to search for. - limit(int): The maximum number of results to return. Defaults to 25. - offset(int): The number of results to skip. This is useful for pagination. Defaults to 0. - - Returns: - list[Node]: A list of nodes which match the filter expression. The list will be empty if no nodes match. - """ - def shrink_end(self, end: TimeInput) -> GraphView: """ Set the end of the window to the smaller of `end` and `self.end()` @@ -593,7 +605,14 @@ class GraphView(object): GraphView: Returns the subgraph """ - def to_networkx(self, explode_edges: bool = False, include_node_properties: bool = True, include_edge_properties: bool = True, include_update_history: bool = True, include_property_history: bool = True) -> nx.MultiDiGraph: + def to_networkx( + self, + explode_edges: bool = False, + include_node_properties: bool = True, + include_edge_properties: bool = True, + include_update_history: bool = True, + include_property_history: bool = True, + ) -> nx.MultiDiGraph: """ Returns a graph with NetworkX. @@ -612,7 +631,19 @@ class GraphView(object): nx.MultiDiGraph: A Networkx MultiDiGraph. """ - def to_pyvis(self, explode_edges: bool = False, edge_color: str = '#000000', shape: str = 'dot', node_image: Optional[str] = None, edge_weight: Optional[str] = None, edge_label: Optional[str] = None, colour_nodes_by_type: bool = False, directed: bool = True, notebook: bool = False, **kwargs: Any) -> pyvis.network.Network: + def to_pyvis( + self, + explode_edges: bool = False, + edge_color: str = "#000000", + shape: str = "dot", + node_image: Optional[str] = None, + edge_weight: Optional[str] = None, + edge_label: Optional[str] = None, + colour_nodes_by_type: bool = False, + directed: bool = True, + notebook: bool = False, + **kwargs: Any, + ) -> pyvis.network.Network: """ Draw a graph with PyVis. Pyvis is a required dependency. If you intend to use this function make sure that you install Pyvis @@ -673,7 +704,14 @@ class GraphView(object): GraphView: The layered view """ - def vectorise(self, embedding: Callable[[list], list], nodes: bool | str = True, edges: bool | str = True, cache: Optional[str] = None, verbose: bool = False) -> VectorisedGraph: + def vectorise( + self, + embedding: Callable[[list], list], + nodes: bool | str = True, + edges: bool | str = True, + cache: Optional[str] = None, + verbose: bool = False, + ) -> VectorisedGraph: """ Create a VectorisedGraph from the current graph @@ -709,7 +747,7 @@ class GraphView(object): Optional[int]: """ -class Graph(GraphView): +class Graph(GraphView): """ A temporal graph with event semantics. @@ -717,13 +755,19 @@ class Graph(GraphView): num_shards (int, optional): The number of locks to use in the storage to allow for multithreaded updates. """ - def __new__(cls, num_shards: Optional[int] = None) -> Graph: + def __new__(cls, path=None) -> Graph: """Create and return a new object. See help(type) for accurate signature.""" - def __reduce__(self): - ... - - def add_edge(self, timestamp: TimeInput, src: str|int, dst: str|int, properties: Optional[PropInput] = None, layer: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableEdge: + def __reduce__(self): ... + def add_edge( + self, + timestamp: TimeInput, + src: str | int, + dst: str | int, + properties: Optional[PropInput] = None, + layer: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> MutableEdge: """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -756,7 +800,14 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def add_node(self, timestamp: TimeInput, id: str|int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableNode: + def add_node( + self, + timestamp: TimeInput, + id: str | int, + properties: Optional[PropInput] = None, + node_type: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> MutableNode: """ Adds a new node with the given id and properties to the graph. @@ -774,7 +825,12 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def add_properties(self, timestamp: TimeInput, properties: PropInput, secondary_index: Optional[int] = None) -> None: + def add_properties( + self, + timestamp: TimeInput, + properties: PropInput, + secondary_index: Optional[int] = None, + ) -> None: """ Adds properties to the graph. @@ -790,68 +846,14 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def cache(self, path: str) -> None: - """ - Write Graph to cache file and initialise the cache. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. If the file already exists its contents are overwritten. - - Arguments: - path (str): The path to the cache file - - Returns: - None: - """ - - def create_index(self) -> None: - """ - Create graph index - - Returns: - None: - """ - - def create_index_in_ram(self) -> None: - """ - Creates a graph index in memory (RAM). - - This is primarily intended for use in tests and should not be used in production environments, - as the index will not be persisted to disk. - - Returns: - None: - """ - - def create_index_in_ram_with_spec(self, py_spec: IndexSpec) -> None: - """ - Creates a graph index in memory (RAM) with the provided index spec. - - This is primarily intended for use in tests and should not be used in production environments, - as the index will not be persisted to disk. - - Arguments: - py_spec: The specification for the in-memory index to be created. - - Arguments: - py_spec (IndexSpec): - The specification for the in-memory index to be created. - - Returns: - None: - """ - - def create_index_with_spec(self, py_spec: Any) -> None: - """ - Create graph index with the provided index spec. - - Arguments: - py_spec: - The specification for the in-memory index to be created. - - Returns: - None: - """ - - def create_node(self, timestamp: TimeInput, id: str|int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableNode: + def create_node( + self, + timestamp: TimeInput, + id: str | int, + properties: Optional[PropInput] = None, + node_type: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> MutableNode: """ Creates a new node with the given id and properties to the graph. It fails if the node already exists. @@ -881,7 +883,7 @@ class Graph(GraphView): Graph: """ - def edge(self, src: str|int, dst: str|int) -> MutableEdge: + def edge(self, src: str | int, dst: str | int) -> MutableEdge: """ Gets the edge with the specified source and destination nodes @@ -974,7 +976,9 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def import_edges_as(self, edges: List[Edge], new_ids: List[Tuple[int, int]], merge: bool = False) -> None: + def import_edges_as( + self, edges: List[Edge], new_ids: List[Tuple[int, int]], merge: bool = False + ) -> None: """ Import multiple edges into the graph with new ids. @@ -1009,7 +1013,9 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def import_node_as(self, node: Node, new_id: str|int, merge: bool = False) -> MutableNode: + def import_node_as( + self, node: Node, new_id: str | int, merge: bool = False + ) -> MutableNode: """ Import a single node into the graph with new id. @@ -1044,7 +1050,9 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def import_nodes_as(self, nodes: List[Node], new_ids: List[str|int], merge: bool = False) -> None: + def import_nodes_as( + self, nodes: List[Node], new_ids: List[str | int], merge: bool = False + ) -> None: """ Import multiple nodes into the graph with new ids. @@ -1075,21 +1083,17 @@ class Graph(GraphView): """ @staticmethod - def load_cached(path: str) -> Graph: - """ - Load Graph from a file and initialise it as a cache file. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. - - Arguments: - path (str): The path to the cache file - - Returns: - Graph: the loaded graph with initialised cache - """ - - def load_edge_props_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load(path): ... + def load_edge_props_from_pandas( + self, + df: DataFrame, + src: str, + dst: str, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + ) -> None: """ Load edge properties from a Pandas DataFrame. @@ -1109,7 +1113,16 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_parquet(self, parquet_path: str, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_props_from_parquet( + self, + parquet_path: str, + src: str, + dst: str, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + ) -> None: """ Load edge properties from parquet file @@ -1129,7 +1142,19 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edges_from_pandas( + self, + df: DataFrame, + time: str, + src: str, + dst: str, + secondary_index: Optional[str] = None, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + ) -> None: """ Load edges from a Pandas DataFrame into the graph. @@ -1138,6 +1163,7 @@ class Graph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. + secondary_index (str, optional): The column name for the secondary index. Defaults to None. properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. @@ -1151,7 +1177,19 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edges_from_parquet( + self, + parquet_path: str, + time: str, + src: str, + dst: str, + secondary_index: Optional[str] = None, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + ) -> None: """ Load edges from a Parquet file into the graph. @@ -1160,6 +1198,7 @@ class Graph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. + secondary_index (str, optional): The column name for the secondary index. Defaults to None. properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. @@ -1176,7 +1215,7 @@ class Graph(GraphView): @staticmethod def load_from_file(path: str) -> Graph: """ - Load Graph from a file. + Load Graph from a parquet file. Arguments: path (str): The path to the file. @@ -1185,7 +1224,15 @@ class Graph(GraphView): Graph: """ - def load_node_props_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_props_from_pandas( + self, + df: DataFrame, + id: str, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + ) -> None: """ Load node properties from a Pandas DataFrame. @@ -1204,7 +1251,15 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_node_props_from_parquet(self, parquet_path: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_props_from_parquet( + self, + parquet_path: str, + id: str, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + ) -> None: """ Load node properties from a parquet file. @@ -1223,7 +1278,18 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_pandas(self, df: DataFrame, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_nodes_from_pandas( + self, + df: DataFrame, + time: str, + id: str, + secondary_index: Optional[str] = None, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + ) -> None: """ Load nodes from a Pandas DataFrame into the graph. @@ -1231,6 +1297,7 @@ class Graph(GraphView): df (DataFrame): The Pandas DataFrame containing the nodes. time (str): The column name for the timestamps. id (str): The column name for the node IDs. + secondary_index (str, optional): The column name for the secondary index. Defaults to None. node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) properties (List[str], optional): List of node property column names. Defaults to None. @@ -1244,7 +1311,18 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_parquet(self, parquet_path: str, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_nodes_from_parquet( + self, + parquet_path: str, + time: str, + id: str, + secondary_index: Optional[str] = None, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + ) -> None: """ Load nodes from a Parquet file into the graph. @@ -1252,6 +1330,7 @@ class Graph(GraphView): parquet_path (str): Parquet file or directory of Parquet files containing the nodes time (str): The column name for the timestamps. id (str): The column name for the node IDs. + secondary_index (str, optional): The column name for the secondary index. Defaults to None. node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) properties (List[str], optional): List of node property column names. Defaults to None. @@ -1265,7 +1344,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def node(self, id: str|int) -> MutableNode: + def node(self, id: str | int) -> MutableNode: """ Gets the node with the specified id @@ -1286,7 +1365,7 @@ class Graph(GraphView): def save_to_file(self, path: str) -> None: """ - Saves the Graph to the given path. + Saves the Graph to the given path in parquet format. Arguments: path (str): The path to the file. @@ -1315,7 +1394,7 @@ class Graph(GraphView): def to_parquet(self, graph_dir: str | PathLike) -> None: """ - Persist graph to parquet files. + Persist graph to parquet files Arguments: graph_dir (str | PathLike): the folder where the graph will be persisted as parquet @@ -1338,24 +1417,22 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def write_updates(self) -> None: - """ - Persist the new updates by appending them to the cache file. - - Returns: - None: - """ - -class PersistentGraph(GraphView): +class PersistentGraph(GraphView): """A temporal graph that allows edges and nodes to be deleted.""" - def __new__(cls) -> PersistentGraph: + def __new__(cls, path=None) -> PersistentGraph: """Create and return a new object. See help(type) for accurate signature.""" - def __reduce__(self): - ... - - def add_edge(self, timestamp: int, src: str | int, dst: str | int, properties: Optional[PropInput] = None, layer: Optional[str] = None, secondary_index: Optional[int] = None) -> None: + def __reduce__(self): ... + def add_edge( + self, + timestamp: int, + src: str | int, + dst: str | int, + properties: Optional[PropInput] = None, + layer: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> None: """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -1388,7 +1465,14 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def add_node(self, timestamp: TimeInput, id: str | int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, secondary_index: Optional[int] = None) -> None: + def add_node( + self, + timestamp: TimeInput, + id: str | int, + properties: Optional[PropInput] = None, + node_type: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> None: """ Adds a new node with the given id and properties to the graph. @@ -1406,7 +1490,12 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def add_properties(self, timestamp: TimeInput, properties: dict, secondary_index: Optional[int] = None) -> None: + def add_properties( + self, + timestamp: TimeInput, + properties: dict, + secondary_index: Optional[int] = None, + ) -> None: """ Adds properties to the graph. @@ -1422,67 +1511,14 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def cache(self, path: str) -> None: - """ - Write PersistentGraph to cache file and initialise the cache. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. If the file already exists its contents are overwritten. - - Arguments: - path (str): The path to the cache file - - Returns: - None: - """ - - def create_index(self) -> None: - """ - Create graph index - - Returns: - None: - """ - - def create_index_in_ram(self) -> None: - """ - Creates a graph index in memory (RAM). - - This is primarily intended for use in tests and should not be used in production environments, - as the index will not be persisted to disk. - - Returns: - None: - """ - - def create_index_in_ram_with_spec(self, py_spec: IndexSpec) -> None: - """ - Creates a graph index in memory (RAM) with the provided index spec. - - This is primarily intended for use in tests and should not be used in production environments, - as the index will not be persisted to disk. - - Arguments: - py_spec: The specification for the in-memory index to be created. - - Arguments: - py_spec (IndexSpec): The specification for the in-memory index to be created. - - Returns: - None: - """ - - def create_index_with_spec(self, py_spec: Any) -> None: - """ - Create graph index with the provided index spec. - Arguments: - py_spec: - The specification for the in-memory index to be created. - - Returns: - None: - """ - - def create_node(self, timestamp: TimeInput, id: str | int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableNode: + def create_node( + self, + timestamp: TimeInput, + id: str | int, + properties: Optional[PropInput] = None, + node_type: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> MutableNode: """ Creates a new node with the given id and properties to the graph. It fails if the node already exists. @@ -1500,7 +1536,14 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def delete_edge(self, timestamp: int, src: str | int, dst: str | int, layer: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableEdge: + def delete_edge( + self, + timestamp: int, + src: str | int, + dst: str | int, + layer: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> MutableEdge: """ Deletes an edge given the timestamp, src and dst nodes and layer (optional) @@ -1613,7 +1656,9 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def import_edges_as(self, edges: List[Edge], new_ids: list[Tuple[GID, GID]], merge: bool = False) -> None: + def import_edges_as( + self, edges: List[Edge], new_ids: list[Tuple[GID, GID]], merge: bool = False + ) -> None: """ Import multiple edges into the graph with new ids. @@ -1650,7 +1695,9 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def import_node_as(self, node: Node, new_id: str|int, merge: bool = False) -> Node: + def import_node_as( + self, node: Node, new_id: str | int, merge: bool = False + ) -> Node: """ Import a single node into the graph with new id. @@ -1687,7 +1734,9 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def import_nodes_as(self, nodes: List[Node], new_ids: List[str|int], merge: bool = False) -> None: + def import_nodes_as( + self, nodes: List[Node], new_ids: List[str | int], merge: bool = False + ) -> None: """ Import multiple nodes into the graph with new ids. @@ -1707,21 +1756,17 @@ class PersistentGraph(GraphView): """ @staticmethod - def load_cached(path: str) -> PersistentGraph: - """ - Load PersistentGraph from a file and initialise it as a cache file. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. - - Arguments: - path (str): The path to the cache file - - Returns: - PersistentGraph: the loaded graph with initialised cache - """ - - def load_edge_deletions_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load(path): ... + def load_edge_deletions_from_pandas( + self, + df: DataFrame, + time: str, + src: str, + dst: str, + secondary_index: Optional[str] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + ) -> None: """ Load edges deletions from a Pandas DataFrame into the graph. @@ -1730,6 +1775,8 @@ class PersistentGraph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. + secondary_index (str, optional): The column name for the secondary index. + NOTE: All values in this column must be unique. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) @@ -1740,15 +1787,26 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_deletions_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_deletions_from_parquet( + self, + parquet_path: str, + time: str, + src: str, + dst: str, + secondary_index: Optional[str] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + ) -> None: """ Load edges deletions from a Parquet file into the graph. Arguments: parquet_path (str): Parquet file or directory of Parquet files path containing node information. + time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. + secondary_index (str, optional): The column name for the secondary index. + NOTE: All values in this column must be unique. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) @@ -1759,7 +1817,16 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_props_from_pandas( + self, + df: DataFrame, + src: str, + dst: str, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + ) -> None: """ Load edge properties from a Pandas DataFrame. @@ -1779,7 +1846,16 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_parquet(self, parquet_path: str, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_props_from_parquet( + self, + parquet_path: str, + src: str, + dst: str, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + ) -> None: """ Load edge properties from parquet file @@ -1799,7 +1875,19 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edges_from_pandas( + self, + df: DataFrame, + time: str, + src: str, + dst: str, + secondary_index: Optional[str] = None, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + ) -> None: """ Load edges from a Pandas DataFrame into the graph. @@ -1808,6 +1896,8 @@ class PersistentGraph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. + secondary_index (str, optional): The column name for the secondary index. + NOTE: All values in this column must be unique. Defaults to None. properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. @@ -1821,7 +1911,19 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edges_from_parquet( + self, + parquet_path: str, + time: str, + src: str, + dst: str, + secondary_index: Optional[str] = None, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + ) -> None: """ Load edges from a Parquet file into the graph. @@ -1830,6 +1932,8 @@ class PersistentGraph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. + secondary_index (str, optional): The column name for the secondary index. + NOTE: All values in this column must be unique. Defaults to None. properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. @@ -1846,7 +1950,7 @@ class PersistentGraph(GraphView): @staticmethod def load_from_file(path: str) -> PersistentGraph: """ - Load PersistentGraph from a file. + Load PersistentGraph from a parquet file. Arguments: path (str): The path to the file. @@ -1855,7 +1959,15 @@ class PersistentGraph(GraphView): PersistentGraph: """ - def load_node_props_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_props_from_pandas( + self, + df: DataFrame, + id: str, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + ) -> None: """ Load node properties from a Pandas DataFrame. @@ -1874,7 +1986,15 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_node_props_from_parquet(self, parquet_path: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_props_from_parquet( + self, + parquet_path: str, + id: str, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + ) -> None: """ Load node properties from a parquet file. @@ -1893,7 +2013,18 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_pandas(self, df: DataFrame, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_nodes_from_pandas( + self, + df: DataFrame, + time: str, + id: str, + secondary_index: Optional[str] = None, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + ) -> None: """ Load nodes from a Pandas DataFrame into the graph. @@ -1901,6 +2032,8 @@ class PersistentGraph(GraphView): df (DataFrame): The Pandas DataFrame containing the nodes. time (str): The column name for the timestamps. id (str): The column name for the node IDs. + secondary_index (str, optional): The column name for the secondary index. + NOTE: All values in this column must be unique. Defaults to None. node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) properties (List[str], optional): List of node property column names. Defaults to None. @@ -1914,7 +2047,18 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_parquet(self, parquet_path: str, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_nodes_from_parquet( + self, + parquet_path: str, + time: str, + id: str, + secondary_index: Optional[str] = None, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + ) -> None: """ Load nodes from a Parquet file into the graph. @@ -1922,6 +2066,8 @@ class PersistentGraph(GraphView): parquet_path (str): Parquet file or directory of Parquet files containing the nodes time (str): The column name for the timestamps. id (str): The column name for the node IDs. + secondary_index (str, optional): The column name for the secondary index. + NOTE: All values in this column must be unique. Defaults to None. node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) properties (List[str], optional): List of node property column names. Defaults to None. @@ -1956,7 +2102,7 @@ class PersistentGraph(GraphView): def save_to_file(self, path: str) -> None: """ - Saves the PersistentGraph to the given path. + Saves the PersistentGraph to the given path in parquet format. Arguments: path (str): The path to the file. @@ -1997,15 +2143,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def write_updates(self) -> None: - """ - Persist the new updates by appending them to the cache file. - - Returns: - None: - """ - -class Node(object): +class Node(object): """A node (or node) in the graph.""" def __eq__(self, value): @@ -2180,7 +2318,9 @@ class Node(object): Node: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2261,7 +2401,7 @@ class Node(object): """ @property - def id(self) -> (str|int): + def id(self) -> str | int: """ Returns the id of the node. This is a unique identifier for the node. @@ -2425,7 +2565,12 @@ class Node(object): Properties: A list of properties. """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -2556,7 +2701,7 @@ class Node(object): Optional[int]: """ -class Nodes(object): +class Nodes(object): """A list of nodes that can be iterated over.""" def __bool__(self): @@ -2745,7 +2890,9 @@ class Nodes(object): Nodes: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2982,7 +3129,12 @@ class Nodes(object): PropertiesView: A view of the node properties. """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3080,7 +3232,9 @@ class Nodes(object): Optional[datetime]: The earliest datetime that this Nodes is valid or None if the Nodes is valid for all times. """ - def to_df(self, include_property_history: bool = False, convert_datetime: bool = False) -> DataFrame: + def to_df( + self, include_property_history: bool = False, convert_datetime: bool = False + ) -> DataFrame: """ Converts the graph's nodes into a Pandas DataFrame. @@ -3141,8 +3295,7 @@ class Nodes(object): Optional[int]: """ -class PathFromNode(object): - +class PathFromNode(object): def __bool__(self): """True if self else False""" @@ -3299,7 +3452,9 @@ class PathFromNode(object): PathFromNode: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3509,7 +3664,12 @@ class PathFromNode(object): PropertiesView: """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3651,8 +3811,7 @@ class PathFromNode(object): Optional[int]: """ -class PathFromGraph(object): - +class PathFromGraph(object): def __bool__(self): """True if self else False""" @@ -3818,7 +3977,9 @@ class PathFromGraph(object): PathFromGraph: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4053,7 +4214,12 @@ class PathFromGraph(object): NestedPropsIterable: """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -4195,8 +4361,7 @@ class PathFromGraph(object): Optional[int]: """ -class MutableNode(Node): - +class MutableNode(Node): def __repr__(self): """Return repr(self).""" @@ -4213,7 +4378,12 @@ class MutableNode(Node): None: """ - def add_updates(self, t: TimeInput, properties: Optional[PropInput] = None, secondary_index: Optional[int] = None) -> None: + def add_updates( + self, + t: TimeInput, + properties: Optional[PropInput] = None, + secondary_index: Optional[int] = None, + ) -> None: """ Add updates to a node in the graph at a specified time. This function allows for the addition of property updates to a node within the graph. The updates are time-stamped, meaning they are applied at the specified time. @@ -4258,7 +4428,7 @@ class MutableNode(Node): None: """ -class Edge(object): +class Edge(object): """ PyEdge is a Python class that represents an edge in the graph. An edge is a directed connection between two nodes. @@ -4445,7 +4615,9 @@ class Edge(object): Edge: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4647,7 +4819,12 @@ class Edge(object): Properties: Properties on the Edge. """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -4796,7 +4973,7 @@ class Edge(object): Optional[int]: """ -class Edges(object): +class Edges(object): """A list of edges that can be iterated over.""" def __bool__(self): @@ -4981,7 +5158,9 @@ class Edges(object): Edges: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -5188,7 +5367,12 @@ class Edges(object): PropertiesView: """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -5304,7 +5488,12 @@ class Edges(object): I64Iterable: """ - def to_df(self, include_property_history: bool = True, convert_datetime: bool = False, explode: bool = False) -> DataFrame: + def to_df( + self, + include_property_history: bool = True, + convert_datetime: bool = False, + explode: bool = False, + ) -> DataFrame: """ Converts the graph's edges into a Pandas DataFrame. @@ -5357,8 +5546,7 @@ class Edges(object): Optional[int]: """ -class NestedEdges(object): - +class NestedEdges(object): def __bool__(self): """True if self else False""" @@ -5533,7 +5721,9 @@ class NestedEdges(object): NestedEdges: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -5731,7 +5921,12 @@ class NestedEdges(object): PyNestedPropsIterable: """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -5880,8 +6075,7 @@ class NestedEdges(object): Optional[int]: """ -class MutableEdge(Edge): - +class MutableEdge(Edge): def __repr__(self): """Return repr(self).""" @@ -5899,7 +6093,13 @@ class MutableEdge(Edge): None: """ - def add_updates(self, t: TimeInput, properties: Optional[PropInput] = None, layer: Optional[str] = None, secondary_index: Optional[int] = None) -> None: + def add_updates( + self, + t: TimeInput, + properties: Optional[PropInput] = None, + layer: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> None: """ Add updates to an edge in the graph at a specified time. This function allows for the addition of property updates to an edge within the graph. The updates are time-stamped, meaning they are applied at the specified time. @@ -5946,7 +6146,7 @@ class MutableEdge(Edge): None: """ -class Properties(object): +class Properties(object): """A view of the properties of an entity""" def __contains__(self, key): @@ -6037,8 +6237,7 @@ class Properties(object): list[PropValue]: """ -class PyPropValueList(object): - +class PyPropValueList(object): def __eq__(self, value): """Return self==value.""" @@ -6074,12 +6273,8 @@ class PyPropValueList(object): PropValue: The average of each property values, or None if count is zero. """ - def collect(self): - ... - - def count(self): - ... - + def collect(self): ... + def count(self): ... def drop_none(self) -> list[PropValue]: """ Drop none. @@ -6128,7 +6323,7 @@ class PyPropValueList(object): PropValue: """ -class Metadata(object): +class Metadata(object): """A view of metadata of an entity""" def __contains__(self, key): @@ -6209,7 +6404,7 @@ class Metadata(object): list[PropValue]: """ -class TemporalProperties(object): +class TemporalProperties(object): """A view of the temporal properties of an entity""" def __contains__(self, key): @@ -6304,8 +6499,7 @@ class TemporalProperties(object): list[TemporalProp]: the list of property views """ -class PropertiesView(object): - +class PropertiesView(object): def __contains__(self, key): """Return bool(key in self).""" @@ -6388,7 +6582,7 @@ class PropertiesView(object): list[list[PropValue]]: """ -class TemporalProp(object): +class TemporalProp(object): """A view of a temporal property""" def __eq__(self, value): @@ -6549,8 +6743,7 @@ class TemporalProp(object): NumpyArray: """ -class WindowSet(object): - +class WindowSet(object): def __iter__(self): """Implement iter(self).""" @@ -6568,152 +6761,6 @@ class WindowSet(object): Iterable: The time index. """ -class IndexSpecBuilder(object): - - def __new__(cls, graph) -> IndexSpecBuilder: - """Create and return a new object. See help(type) for accurate signature.""" - - def build(self) -> IndexSpec: - """ - Return a spec - - Returns: - IndexSpec: - """ - - def with_all_edge_metadata(self) -> dict[str, Any]: - """ - Adds all edge metadata to the spec. - - Returns: - dict[str, Any]: - """ - - def with_all_edge_properties(self) -> dict[str, Any]: - """ - Adds all edge properties to the spec. - - Returns: - dict[str, Any]: - """ - - def with_all_edge_properties_and_metadata(self) -> dict[str, Any]: - """ - Adds all edge properties and metadata to the spec. - - Returns: - dict[str, Any]: - """ - - def with_all_node_metadata(self) -> dict[str, Any]: - """ - Adds all node metadata to the spec. - - Returns: - dict[str, Any]: - """ - - def with_all_node_properties(self) -> dict[str, Any]: - """ - Adds all node properties to the spec. - - Returns: - dict[str, Any]: - """ - - def with_all_node_properties_and_metadata(self) -> dict[str, Any]: - """ - Adds all node properties and metadata to the spec. - - Returns: - dict[str, Any]: - """ - - def with_edge_metadata(self, props: Any) -> dict[str, Any]: - """ - Adds specified edge metadata to the spec. - - Arguments: - props: List of metadata. - - Returns: - dict[str, Any]: - """ - - def with_edge_properties(self, props: Any) -> dict[str, Any]: - """ - Adds specified edge properties to the spec. - - Arguments: - props: List of properties. - - Returns: - dict[str, Any]: - """ - - def with_node_metadata(self, props: Any) -> dict[str, Any]: - """ - Adds specified node metadata to the spec. - - Arguments: - props: list of metadata. - - Returns: - dict[str, Any]: - """ - - def with_node_properties(self, props: Any) -> dict[str, Any]: - """ - Adds specified node properties to the spec. - - Arguments: - props: list of properties. - - Returns: - dict[str, Any]: - """ - -class IndexSpec(object): - - def __repr__(self): - """Return repr(self).""" - - @property - def edge_metadata(self) -> list[str]: - """ - Get edge metadata. - - Returns: - list[str]: - """ - - @property - def edge_properties(self) -> list[str]: - """ - Get edge properties. - - Returns: - list[str]: - """ - - @property - def node_metadata(self) -> list[str]: - """ - Get node metadata. - - Returns: - list[str]: - """ - - @property - def node_properties(self) -> list[str]: - """ - Get node properties. - - Returns: - list[str]: - """ - def version() -> str: """ Return Raphtory version. diff --git a/python/python/raphtory/algorithms/__init__.pyi b/python/python/raphtory/algorithms/__init__.pyi index c3005db67e..ae2892f399 100644 --- a/python/python/raphtory/algorithms/__init__.pyi +++ b/python/python/raphtory/algorithms/__init__.pyi @@ -1,6 +1,7 @@ """ Algorithmic functions that can be run on Raphtory graphs """ + from __future__ import annotations ############################################################################### @@ -26,8 +27,59 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['dijkstra_single_source_shortest_paths', 'global_reciprocity', 'betweenness_centrality', 'all_local_reciprocity', 'triplet_count', 'local_triangle_count', 'average_degree', 'directed_graph_density', 'degree_centrality', 'max_degree', 'min_degree', 'max_out_degree', 'max_in_degree', 'min_out_degree', 'min_in_degree', 'pagerank', 'single_source_shortest_path', 'global_clustering_coefficient', 'temporally_reachable_nodes', 'temporal_bipartite_graph_projection', 'local_clustering_coefficient', 'local_clustering_coefficient_batch', 'weakly_connected_components', 'strongly_connected_components', 'in_components', 'in_component', 'out_components', 'out_component', 'fast_rp', 'global_temporal_three_node_motif', 'global_temporal_three_node_motif_multi', 'local_temporal_three_node_motifs', 'hits', 'balance', 'label_propagation', 'k_core', 'temporal_SEIR', 'louvain', 'fruchterman_reingold', 'cohesive_fruchterman_reingold', 'max_weight_matching', 'Matching', 'Infected'] -def dijkstra_single_source_shortest_paths(graph: GraphView, source: NodeInput, targets: list[NodeInput], direction: Direction = "both", weight: str = 'weight') -> NodeStateWeightedSP: +__all__ = [ + "dijkstra_single_source_shortest_paths", + "global_reciprocity", + "betweenness_centrality", + "all_local_reciprocity", + "triplet_count", + "local_triangle_count", + "average_degree", + "directed_graph_density", + "degree_centrality", + "max_degree", + "min_degree", + "max_out_degree", + "max_in_degree", + "min_out_degree", + "min_in_degree", + "pagerank", + "single_source_shortest_path", + "global_clustering_coefficient", + "temporally_reachable_nodes", + "temporal_bipartite_graph_projection", + "local_clustering_coefficient", + "local_clustering_coefficient_batch", + "weakly_connected_components", + "strongly_connected_components", + "in_components", + "in_component", + "out_components", + "out_component", + "fast_rp", + "global_temporal_three_node_motif", + "global_temporal_three_node_motif_multi", + "local_temporal_three_node_motifs", + "hits", + "balance", + "label_propagation", + "k_core", + "temporal_SEIR", + "louvain", + "fruchterman_reingold", + "cohesive_fruchterman_reingold", + "max_weight_matching", + "Matching", + "Infected", +] + +def dijkstra_single_source_shortest_paths( + graph: GraphView, + source: NodeInput, + targets: list[NodeInput], + direction: Direction = "both", + weight: str = "weight", +) -> NodeStateWeightedSP: """ Finds the shortest paths from a single source to multiple targets in a graph. @@ -57,7 +109,9 @@ def global_reciprocity(graph: GraphView) -> float: float: reciprocity of the graph between 0 and 1. """ -def betweenness_centrality(graph: GraphView, k: Optional[int] = None, normalized: bool = True) -> NodeStateF64: +def betweenness_centrality( + graph: GraphView, k: Optional[int] = None, normalized: bool = True +) -> NodeStateF64: """ Computes the betweenness centrality for nodes in a given graph. @@ -225,7 +279,13 @@ def min_in_degree(graph: GraphView) -> int: int: value of the smallest indegree """ -def pagerank(graph: GraphView, iter_count: int = 20, max_diff: Optional[float] = None, use_l2_norm: bool = True, damping_factor: float = 0.85) -> NodeStateF64: +def pagerank( + graph: GraphView, + iter_count: int = 20, + max_diff: Optional[float] = None, + use_l2_norm: bool = True, + damping_factor: float = 0.85, +) -> NodeStateF64: """ Pagerank -- pagerank centrality value of the nodes in a graph @@ -246,7 +306,9 @@ def pagerank(graph: GraphView, iter_count: int = 20, max_diff: Optional[float] = NodeStateF64: Mapping of nodes to their pagerank value. """ -def single_source_shortest_path(graph: GraphView, source: NodeInput, cutoff: Optional[int] = None) -> NodeStateNodes: +def single_source_shortest_path( + graph: GraphView, source: NodeInput, cutoff: Optional[int] = None +) -> NodeStateNodes: """ Calculates the single source shortest paths from a given source node. @@ -277,7 +339,13 @@ def global_clustering_coefficient(graph: GraphView) -> float: [`Triplet Count`](triplet_count) """ -def temporally_reachable_nodes(graph: GraphView, max_hops: int, start_time: int, seed_nodes: list[NodeInput], stop_nodes: Optional[list[NodeInput]] = None) -> NodeStateReachability: +def temporally_reachable_nodes( + graph: GraphView, + max_hops: int, + start_time: int, + seed_nodes: list[NodeInput], + stop_nodes: Optional[list[NodeInput]] = None, +) -> NodeStateReachability: """ Temporally reachable nodes -- the nodes that are reachable by a time respecting path followed out from a set of seed nodes at a starting time. @@ -296,7 +364,9 @@ def temporally_reachable_nodes(graph: GraphView, max_hops: int, start_time: int, NodeStateReachability: Mapping of nodes to their reachability history. """ -def temporal_bipartite_graph_projection(graph: GraphView, delta: int, pivot_type: str) -> Graph: +def temporal_bipartite_graph_projection( + graph: GraphView, delta: int, pivot_type: str +) -> Graph: """ Projects a temporal bipartite graph into an undirected temporal graph over the pivot node type. Let `G` be a bipartite graph with node types `A` and `B`. Given `delta > 0`, the projection graph `G'` pivoting over type `B` nodes, will make a connection between nodes `n1` and `n2` (of type `A`) at time `(t1 + t2)/2` if they respectively have an edge at time `t1`, `t2` with the same node of type `B` in `G`, and `|t2-t1| < delta`. @@ -409,7 +479,14 @@ def out_component(node: Node) -> NodeStateUsize: NodeStateUsize: A NodeState mapping the nodes in the out-component to their distance from the starting node. """ -def fast_rp(graph: GraphView, embedding_dim: int, normalization_strength: float, iter_weights: list[float], seed: Optional[int] = None, threads: Optional[int] = None) -> NodeStateListF64: +def fast_rp( + graph: GraphView, + embedding_dim: int, + normalization_strength: float, + iter_weights: list[float], + seed: Optional[int] = None, + threads: Optional[int] = None, +) -> NodeStateListF64: """ Computes embedding vectors for each vertex of an undirected/bidirectional graph according to the Fast RP algorithm. Original Paper: https://doi.org/10.48550/arXiv.1908.11512 @@ -425,7 +502,9 @@ def fast_rp(graph: GraphView, embedding_dim: int, normalization_strength: float, NodeStateListF64: Mapping from nodes to embedding vectors. """ -def global_temporal_three_node_motif(graph: GraphView, delta: int, threads: Optional[int] = None) -> list[int]: +def global_temporal_three_node_motif( + graph: GraphView, delta: int, threads: Optional[int] = None +) -> list[int]: """ Computes the number of three edge, up-to-three node delta-temporal motifs in the graph, using the algorithm of Paranjape et al, Motifs in Temporal Networks (2017). We point the reader to this reference for more information on the algorithm and background, but provide a short summary below. @@ -474,7 +553,9 @@ def global_temporal_three_node_motif(graph: GraphView, delta: int, threads: Opti """ -def global_temporal_three_node_motif_multi(graph: GraphView, deltas: list[int], threads: Optional[int] = None) -> list[list[int]]: +def global_temporal_three_node_motif_multi( + graph: GraphView, deltas: list[int], threads: Optional[int] = None +) -> list[list[int]]: """ Computes the global counts of three-edge up-to-three node temporal motifs for a range of timescales. See `global_temporal_three_node_motif` for an interpretation of each row returned. @@ -487,7 +568,9 @@ def global_temporal_three_node_motif_multi(graph: GraphView, deltas: list[int], list[list[int]]: A list of 40d arrays, each array is the motif count for a particular value of delta, returned in the order that the deltas were given as input. """ -def local_temporal_three_node_motifs(graph: GraphView, delta: int, threads=None) -> NodeStateMotifs: +def local_temporal_three_node_motifs( + graph: GraphView, delta: int, threads=None +) -> NodeStateMotifs: """ Computes the number of each type of motif that each node participates in. See global_temporal_three_node_motifs for a summary of the motifs involved. @@ -503,7 +586,9 @@ def local_temporal_three_node_motifs(graph: GraphView, delta: int, threads=None) the motif. For two node motifs, both constituent nodes count the motif. For triangles, all three constituent nodes count the motif. """ -def hits(graph: GraphView, iter_count: int = 20, threads: Optional[int] = None) -> NodeStateHits: +def hits( + graph: GraphView, iter_count: int = 20, threads: Optional[int] = None +) -> NodeStateHits: """ HITS (Hubs and Authority) Algorithm: @@ -522,7 +607,9 @@ def hits(graph: GraphView, iter_count: int = 20, threads: Optional[int] = None) NodeStateHits: A mapping from nodes their hub and authority scores """ -def balance(graph: GraphView, name: str = "weight", direction: Direction = "both") -> NodeStateF64: +def balance( + graph: GraphView, name: str = "weight", direction: Direction = "both" +) -> NodeStateF64: """ Sums the weights of edges in the graph based on the specified direction. @@ -541,7 +628,9 @@ def balance(graph: GraphView, name: str = "weight", direction: Direction = "both """ -def label_propagation(graph: GraphView, seed: Optional[bytes] = None) -> list[set[Node]]: +def label_propagation( + graph: GraphView, seed: Optional[bytes] = None +) -> list[set[Node]]: """ Computes components using a label propagation algorithm @@ -554,7 +643,9 @@ def label_propagation(graph: GraphView, seed: Optional[bytes] = None) -> list[se """ -def k_core(graph: GraphView, k: int, iter_count: int, threads: Optional[int] = None) -> list[Node]: +def k_core( + graph: GraphView, k: int, iter_count: int, threads: Optional[int] = None +) -> list[Node]: """ Determines which nodes are in the k-core for a given value of k @@ -569,7 +660,15 @@ def k_core(graph: GraphView, k: int, iter_count: int, threads: Optional[int] = N """ -def temporal_SEIR(graph: GraphView, seeds: int | float | list[NodeInput], infection_prob: float, initial_infection: int | str | datetime, recovery_rate: float | None = None, incubation_rate: float | None = None, rng_seed: int | None = None) -> NodeStateSEIR: +def temporal_SEIR( + graph: GraphView, + seeds: int | float | list[NodeInput], + infection_prob: float, + initial_infection: int | str | datetime, + recovery_rate: float | None = None, + incubation_rate: float | None = None, + rng_seed: int | None = None, +) -> NodeStateSEIR: """ Simulate an SEIR dynamic on the network @@ -599,7 +698,12 @@ def temporal_SEIR(graph: GraphView, seeds: int | float | list[NodeInput], infect """ -def louvain(graph: GraphView, resolution: float = 1.0, weight_prop: str | None = None, tol: None | float = None) -> NodeStateUsize: +def louvain( + graph: GraphView, + resolution: float = 1.0, + weight_prop: str | None = None, + tol: None | float = None, +) -> NodeStateUsize: """ Louvain algorithm for community detection @@ -613,7 +717,14 @@ def louvain(graph: GraphView, resolution: float = 1.0, weight_prop: str | None = NodeStateUsize: Mapping of nodes to their community assignment """ -def fruchterman_reingold(graph: GraphView, iterations: int | None = 100, scale: float | None = 1.0, node_start_size: float | None = 1.0, cooloff_factor: float | None = 0.95, dt: float | None = 0.1) -> NodeLayout: +def fruchterman_reingold( + graph: GraphView, + iterations: int | None = 100, + scale: float | None = 1.0, + node_start_size: float | None = 1.0, + cooloff_factor: float | None = 0.95, + dt: float | None = 0.1, +) -> NodeLayout: """ Fruchterman Reingold layout algorithm @@ -629,7 +740,14 @@ def fruchterman_reingold(graph: GraphView, iterations: int | None = 100, scale: NodeLayout: A mapping from nodes to their [x, y] positions """ -def cohesive_fruchterman_reingold(graph: GraphView, iter_count: int = 100, scale: float = 1.0, node_start_size: float = 1.0, cooloff_factor: float = 0.95, dt: float = 0.1) -> NodeLayout: +def cohesive_fruchterman_reingold( + graph: GraphView, + iter_count: int = 100, + scale: float = 1.0, + node_start_size: float = 1.0, + cooloff_factor: float = 0.95, + dt: float = 0.1, +) -> NodeLayout: """ Cohesive version of `fruchterman_reingold` that adds virtual edges between isolated nodes Arguments: @@ -645,7 +763,12 @@ def cohesive_fruchterman_reingold(graph: GraphView, iter_count: int = 100, scale """ -def max_weight_matching(graph: GraphView, weight_prop: Optional[str] = None, max_cardinality: bool = True, verify_optimum_flag: bool = False) -> Matching: +def max_weight_matching( + graph: GraphView, + weight_prop: Optional[str] = None, + max_cardinality: bool = True, + verify_optimum_flag: bool = False, +) -> Matching: """ Compute a maximum-weighted matching in the general undirected weighted graph given by "edges". If `max_cardinality` is true, only @@ -682,7 +805,7 @@ def max_weight_matching(graph: GraphView, weight_prop: Optional[str] = None, max Matching: The matching """ -class Matching(object): +class Matching(object): """A Matching (i.e., a set of edges that do not share any nodes)""" def __bool__(self): @@ -754,8 +877,7 @@ class Matching(object): """ -class Infected(object): - +class Infected(object): def __repr__(self): """Return repr(self).""" diff --git a/python/python/raphtory/filter/__init__.pyi b/python/python/raphtory/filter/__init__.pyi index 36d732c413..5f33a18fcb 100644 --- a/python/python/raphtory/filter/__init__.pyi +++ b/python/python/raphtory/filter/__init__.pyi @@ -23,9 +23,20 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['FilterExpr', 'PropertyFilterOps', 'NodeFilterBuilder', 'Node', 'EdgeFilterOp', 'EdgeEndpoint', 'Edge', 'Property', 'Metadata', 'TemporalPropertyFilterBuilder'] -class FilterExpr(object): - +__all__ = [ + "FilterExpr", + "PropertyFilterOps", + "NodeFilterBuilder", + "Node", + "EdgeFilterOp", + "EdgeEndpoint", + "Edge", + "Property", + "Metadata", + "TemporalPropertyFilterBuilder", +] + +class FilterExpr(object): def __and__(self, value): """Return self&value.""" @@ -41,8 +52,7 @@ class FilterExpr(object): def __ror__(self, value): """Return value|self.""" -class PropertyFilterOps(object): - +class PropertyFilterOps(object): def __eq__(self, value): """Return self==value.""" @@ -64,7 +74,7 @@ class PropertyFilterOps(object): def contains(self, value) -> filter.FilterExpr: """ Returns a filter expression that checks if this object contains a specified property. - + Arguments: PropValue: @@ -72,7 +82,9 @@ class PropertyFilterOps(object): filter.FilterExpr: """ - def fuzzy_search(self, prop_value: str, levenshtein_distance: int, prefix_match: bool) -> filter.FilterExpr: + def fuzzy_search( + self, prop_value: str, levenshtein_distance: int, prefix_match: bool + ) -> filter.FilterExpr: """ Returns a filter expression that checks if the specified properties approximately match the specified string. @@ -82,7 +94,7 @@ class PropertyFilterOps(object): prop_value (str): Property to match against. levenshtein_distance (int): Maximum levenshtein distance between the specified prop_value and the result. prefix_match (bool): Enable prefix matching. - + Returns: filter.FilterExpr: """ @@ -90,7 +102,7 @@ class PropertyFilterOps(object): def is_in(self, values: list[PropValue]) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is in a specified iterable of properties. - + Arguments: values (list[PropValue]): @@ -101,7 +113,7 @@ class PropertyFilterOps(object): def is_none(self) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is none. - + Returns: filter.FilterExpr: """ @@ -109,7 +121,7 @@ class PropertyFilterOps(object): def is_not_in(self, values: list[PropValue]) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is not in a specified iterable of properties. - + Arguments: values (list[PropValue]): @@ -120,7 +132,7 @@ class PropertyFilterOps(object): def is_some(self) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is some. - + Returns: filter.FilterExpr: """ @@ -128,7 +140,7 @@ class PropertyFilterOps(object): def not_contains(self, value) -> filter.FilterExpr: """ Returns a filter expression that checks if this object does not contain a specified property. - + Arguments: PropValue: @@ -136,7 +148,7 @@ class PropertyFilterOps(object): filter.FilterExpr: """ -class NodeFilterBuilder(object): +class NodeFilterBuilder(object): """ A builder for constructing node filters @@ -172,7 +184,9 @@ class NodeFilterBuilder(object): filter.FilterExpr: """ - def fuzzy_search(self, value, levenshtein_distance: int, prefix_match: bool) -> filter.FilterExpr: + def fuzzy_search( + self, value, levenshtein_distance: int, prefix_match: bool + ) -> filter.FilterExpr: """ Returns a filter expression that checks if the specified properties approximately match the specified string. @@ -213,7 +227,7 @@ class NodeFilterBuilder(object): """ Returns a filter expression that checks if the specified iterable of strings does not contain a given value. - + Arguments: value (str): @@ -221,8 +235,7 @@ class NodeFilterBuilder(object): filter.FilterExpr: """ -class Node(object): - +class Node(object): @staticmethod def name(): """ @@ -241,8 +254,7 @@ class Node(object): NodeFilterBuilder: A filter builder for filtering by node type """ -class EdgeFilterOp(object): - +class EdgeFilterOp(object): def __eq__(self, value): """Return self==value.""" @@ -264,7 +276,7 @@ class EdgeFilterOp(object): def contains(self, value: str) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value contains the specified string. - + Arguments: value (str): @@ -272,7 +284,9 @@ class EdgeFilterOp(object): filter.FilterExpr: """ - def fuzzy_search(self, value, levenshtein_distance: int, prefix_match: bool) -> filter.FilterExpr: + def fuzzy_search( + self, value, levenshtein_distance: int, prefix_match: bool + ) -> filter.FilterExpr: """ Returns a filter expression that checks if the specified properties approximately match the specified string. @@ -282,7 +296,7 @@ class EdgeFilterOp(object): prop_value (str): Property to match against. levenshtein_distance (int): Maximum levenshtein distance between the specified prop_value and the result. prefix_match (bool): Enable prefix matching. - + Returns: filter.FilterExpr: """ @@ -290,7 +304,7 @@ class EdgeFilterOp(object): def is_in(self, values: list[str]) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is contained within the specified iterable of strings. - + Arguments: values (list[str]): @@ -301,7 +315,7 @@ class EdgeFilterOp(object): def is_not_in(self, values: list[str]) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is not contained within the provided iterable of strings. - + Arguments: values (list[str]): @@ -312,7 +326,7 @@ class EdgeFilterOp(object): def not_contains(self, value: str) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value does not contain the specified string. - + Arguments: value (str): @@ -320,22 +334,16 @@ class EdgeFilterOp(object): filter.FilterExpr: """ -class EdgeEndpoint(object): - - def name(self): - ... - -class Edge(object): +class EdgeEndpoint(object): + def name(self): ... +class Edge(object): @staticmethod - def dst(): - ... - + def dst(): ... @staticmethod - def src(): - ... + def src(): ... -class Property(PropertyFilterOps): +class Property(PropertyFilterOps): """ Construct a property filter @@ -346,10 +354,9 @@ class Property(PropertyFilterOps): def __new__(cls, name: str) -> Property: """Create and return a new object. See help(type) for accurate signature.""" - def temporal(self): - ... + def temporal(self): ... -class Metadata(PropertyFilterOps): +class Metadata(PropertyFilterOps): """ Construct a metadata filter @@ -360,10 +367,6 @@ class Metadata(PropertyFilterOps): def __new__(cls, name: str) -> Metadata: """Create and return a new object. See help(type) for accurate signature.""" -class TemporalPropertyFilterBuilder(object): - - def any(self): - ... - - def latest(self): - ... +class TemporalPropertyFilterBuilder(object): + def any(self): ... + def latest(self): ... diff --git a/python/python/raphtory/graph_gen/__init__.pyi b/python/python/raphtory/graph_gen/__init__.pyi index 3ec394b85c..3a9f849f05 100644 --- a/python/python/raphtory/graph_gen/__init__.pyi +++ b/python/python/raphtory/graph_gen/__init__.pyi @@ -1,6 +1,7 @@ """ Generate Raphtory graphs from attachment models """ + from __future__ import annotations ############################################################################### @@ -27,7 +28,8 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['random_attachment', 'ba_preferential_attachment'] +__all__ = ["random_attachment", "ba_preferential_attachment"] + def random_attachment(g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any = None): """ Generates a graph using the random attachment model @@ -46,7 +48,9 @@ def random_attachment(g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any None """ -def ba_preferential_attachment(g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any = None): +def ba_preferential_attachment( + g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any = None +): """ Generates a graph using the preferential attachment model. diff --git a/python/python/raphtory/graph_loader/__init__.pyi b/python/python/raphtory/graph_loader/__init__.pyi index 10ba033c37..e0b31f720f 100644 --- a/python/python/raphtory/graph_loader/__init__.pyi +++ b/python/python/raphtory/graph_loader/__init__.pyi @@ -1,6 +1,7 @@ """ Load and save Raphtory graphs from/to file(s) """ + from __future__ import annotations ############################################################################### @@ -27,7 +28,16 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['lotr_graph', 'lotr_graph_with_props', 'neo4j_movie_graph', 'stable_coin_graph', 'reddit_hyperlink_graph', 'reddit_hyperlink_graph_local', 'karate_club_graph'] +__all__ = [ + "lotr_graph", + "lotr_graph_with_props", + "neo4j_movie_graph", + "stable_coin_graph", + "reddit_hyperlink_graph", + "reddit_hyperlink_graph_local", + "karate_club_graph", +] + def lotr_graph() -> Graph: """ Load the Lord of the Rings dataset into a graph. @@ -56,7 +66,9 @@ def lotr_graph_with_props() -> Graph: Graph: """ -def neo4j_movie_graph(uri: str, username: str, password: str, database: str = ...) -> Graph: +def neo4j_movie_graph( + uri: str, username: str, password: str, database: str = ... +) -> Graph: """ Returns the neo4j movie graph example. diff --git a/python/python/raphtory/graphql/__init__.pyi b/python/python/raphtory/graphql/__init__.pyi index b8315a8395..7e93c483cc 100644 --- a/python/python/raphtory/graphql/__init__.pyi +++ b/python/python/raphtory/graphql/__init__.pyi @@ -23,8 +23,26 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['GraphServer', 'RunningGraphServer', 'RaphtoryClient', 'RemoteGraph', 'RemoteEdge', 'RemoteNode', 'RemoteNodeAddition', 'RemoteUpdate', 'RemoteEdgeAddition', 'RemoteIndexSpec', 'PropsInput', 'SomePropertySpec', 'AllPropertySpec', 'encode_graph', 'decode_graph', 'schema'] -class GraphServer(object): +__all__ = [ + "GraphServer", + "RunningGraphServer", + "RaphtoryClient", + "RemoteGraph", + "RemoteEdge", + "RemoteNode", + "RemoteNodeAddition", + "RemoteUpdate", + "RemoteEdgeAddition", + "RemoteIndexSpec", + "PropsInput", + "SomePropertySpec", + "AllPropertySpec", + "encode_graph", + "decode_graph", + "schema", +] + +class GraphServer(object): """ A class for defining and running a Raphtory GraphQL server @@ -43,7 +61,21 @@ class GraphServer(object): create_index: """ - def __new__(cls, work_dir: str | PathLike, cache_capacity: Optional[int] = None, cache_tti_seconds: Optional[int] = None, log_level: Optional[str] = None, tracing: Optional[bool] = None, otlp_agent_host: Optional[str] = None, otlp_agent_port: Optional[str] = None, otlp_tracing_service_name: Optional[str] = None, auth_public_key: Any = None, auth_enabled_for_reads: Any = None, config_path: Optional[str | PathLike] = None, create_index: Any = None) -> GraphServer: + def __new__( + cls, + work_dir: str | PathLike, + cache_capacity: Optional[int] = None, + cache_tti_seconds: Optional[int] = None, + log_level: Optional[str] = None, + tracing: Optional[bool] = None, + otlp_agent_host: Optional[str] = None, + otlp_agent_port: Optional[str] = None, + otlp_tracing_service_name: Optional[str] = None, + auth_public_key: Any = None, + auth_enabled_for_reads: Any = None, + config_path: Optional[str | PathLike] = None, + create_index: Any = None, + ) -> GraphServer: """Create and return a new object. See help(type) for accurate signature.""" def run(self, port: int = 1736, timeout_ms: int = 180000) -> None: @@ -58,7 +90,13 @@ class GraphServer(object): None: """ - def set_embeddings(self, cache: str, embedding: Optional[Callable] = None, nodes: bool | str = True, edges: bool | str = True) -> GraphServer: + def set_embeddings( + self, + cache: str, + embedding: Optional[Callable] = None, + nodes: bool | str = True, + edges: bool | str = True, + ) -> GraphServer: """ Setup the server to vectorise graphs with a default template. @@ -94,7 +132,9 @@ class GraphServer(object): GraphServer: The server with indexing disabled """ - def with_vectorised_graphs(self, graph_names: list[str], nodes: bool | str = True, edges: bool | str = True) -> GraphServer: + def with_vectorised_graphs( + self, graph_names: list[str], nodes: bool | str = True, edges: bool | str = True + ) -> GraphServer: """ Vectorise a subset of the graphs of the server. @@ -107,15 +147,11 @@ class GraphServer(object): GraphServer: A new server object containing the vectorised graphs. """ -class RunningGraphServer(object): +class RunningGraphServer(object): """A Raphtory server handler that also enables querying the server""" - def __enter__(self): - ... - - def __exit__(self, _exc_type, _exc_val, _exc_tb): - ... - + def __enter__(self): ... + def __exit__(self, _exc_type, _exc_val, _exc_tb): ... def get_client(self): """ Get the client for the server @@ -132,7 +168,7 @@ class RunningGraphServer(object): None: """ -class RaphtoryClient(object): +class RaphtoryClient(object): """ A client for handling GraphQL operations in the context of Raphtory. @@ -214,7 +250,9 @@ class RaphtoryClient(object): """ - def query(self, query: str, variables: Optional[dict[str, Any]] = None) -> dict[str, Any]: + def query( + self, query: str, variables: Optional[dict[str, Any]] = None + ) -> dict[str, Any]: """ Make a GraphQL query against the server. @@ -231,7 +269,7 @@ class RaphtoryClient(object): Receive graph from a path path on the server Note: - This downloads a copy of the graph. Modifications are not persistet to the server. + This downloads a copy of the graph. Modifications are not persisted to the server. Arguments: path (str): the path of the graph to be received @@ -252,7 +290,9 @@ class RaphtoryClient(object): """ - def send_graph(self, path: str, graph: Graph | PersistentGraph, overwrite: bool = False) -> dict[str, Any]: + def send_graph( + self, path: str, graph: Graph | PersistentGraph, overwrite: bool = False + ) -> dict[str, Any]: """ Send a graph to the server @@ -265,7 +305,9 @@ class RaphtoryClient(object): dict[str, Any]: The data field from the graphQL response after executing the mutation. """ - def upload_graph(self, path: str, file_path: str, overwrite: bool = False) -> dict[str, Any]: + def upload_graph( + self, path: str, file_path: str, overwrite: bool = False + ) -> dict[str, Any]: """ Upload graph file from a path file_path on the client @@ -278,9 +320,15 @@ class RaphtoryClient(object): dict[str, Any]: The data field from the graphQL response after executing the mutation. """ -class RemoteGraph(object): - - def add_edge(self, timestamp: int | str | datetime, src: str | int, dst: str | int, properties: Optional[dict] = None, layer: Optional[str] = None) -> RemoteEdge: +class RemoteGraph(object): + def add_edge( + self, + timestamp: int | str | datetime, + src: str | int, + dst: str | int, + properties: Optional[dict] = None, + layer: Optional[str] = None, + ) -> RemoteEdge: """ Adds a new edge with the given source and destination nodes and properties to the remote graph. @@ -317,7 +365,13 @@ class RemoteGraph(object): None: """ - def add_node(self, timestamp: int | str | datetime, id: str | int, properties: Optional[dict] = None, node_type: Optional[str] = None) -> RemoteNode: + def add_node( + self, + timestamp: int | str | datetime, + id: str | int, + properties: Optional[dict] = None, + node_type: Optional[str] = None, + ) -> RemoteNode: """ Adds a new node with the given id and properties to the remote graph. @@ -354,7 +408,13 @@ class RemoteGraph(object): None: """ - def create_node(self, timestamp: int | str | datetime, id: str | int, properties: Optional[dict] = None, node_type: Optional[str] = None) -> RemoteNode: + def create_node( + self, + timestamp: int | str | datetime, + id: str | int, + properties: Optional[dict] = None, + node_type: Optional[str] = None, + ) -> RemoteNode: """ Create a new node with the given id and properties to the remote graph and fail if the node already exists. @@ -368,7 +428,13 @@ class RemoteGraph(object): RemoteNode: the new remote node """ - def delete_edge(self, timestamp: int, src: str | int, dst: str | int, layer: Optional[str] = None) -> RemoteEdge: + def delete_edge( + self, + timestamp: int, + src: str | int, + dst: str | int, + layer: Optional[str] = None, + ) -> RemoteEdge: """ Deletes an edge in the remote graph, given the timestamp, src and dst nodes and layer (optional) @@ -416,7 +482,7 @@ class RemoteGraph(object): None: """ -class RemoteEdge(object): +class RemoteEdge(object): """ A remote edge reference @@ -425,7 +491,9 @@ class RemoteEdge(object): and [RemoteGraph.delete_edge][raphtory.graphql.RemoteGraph.delete_edge]. """ - def add_metadata(self, properties: dict[str, PropValue], layer: Optional[str] = None) -> None: + def add_metadata( + self, properties: dict[str, PropValue], layer: Optional[str] = None + ) -> None: """ Add metadata to the edge within the remote graph. This function is used to add metadata to an edge that does not @@ -439,7 +507,12 @@ class RemoteEdge(object): None: """ - def add_updates(self, t: int | str | datetime, properties: Optional[dict[str, PropValue]] = None, layer: Optional[str] = None) -> None: + def add_updates( + self, + t: int | str | datetime, + properties: Optional[dict[str, PropValue]] = None, + layer: Optional[str] = None, + ) -> None: """ Add updates to an edge in the remote graph at a specified time. @@ -470,7 +543,9 @@ class RemoteEdge(object): GraphError: If the operation fails. """ - def update_metadata(self, properties: dict[str, PropValue], layer: Optional[str] = None) -> None: + def update_metadata( + self, properties: dict[str, PropValue], layer: Optional[str] = None + ) -> None: """ Update metadata of an edge in the remote graph overwriting existing values. This function is used to add properties to an edge that does not @@ -484,8 +559,7 @@ class RemoteEdge(object): None: """ -class RemoteNode(object): - +class RemoteNode(object): def add_metadata(self, properties: dict[str, PropValue]) -> None: """ Add metadata to a node in the remote graph. @@ -499,7 +573,9 @@ class RemoteNode(object): None: """ - def add_updates(self, t: int | str | datetime, properties: Optional[dict[str, PropValue]] = None) -> None: + def add_updates( + self, t: int | str | datetime, properties: Optional[dict[str, PropValue]] = None + ) -> None: """ Add updates to a node in the remote graph at a specified time. This function allows for the addition of property updates to a node within the graph. The updates are time-stamped, meaning they are applied at the specified time. @@ -537,7 +613,7 @@ class RemoteNode(object): None: """ -class RemoteNodeAddition(object): +class RemoteNodeAddition(object): """ Node addition update @@ -548,10 +624,16 @@ class RemoteNodeAddition(object): updates (list[RemoteUpdate], optional): the temporal updates """ - def __new__(cls, name: GID, node_type: Optional[str] = None, metadata: Optional[PropInput] = None, updates: Optional[list[RemoteUpdate]] = None) -> RemoteNodeAddition: + def __new__( + cls, + name: GID, + node_type: Optional[str] = None, + metadata: Optional[PropInput] = None, + updates: Optional[list[RemoteUpdate]] = None, + ) -> RemoteNodeAddition: """Create and return a new object. See help(type) for accurate signature.""" -class RemoteUpdate(object): +class RemoteUpdate(object): """ A temporal update @@ -560,10 +642,12 @@ class RemoteUpdate(object): properties (PropInput, optional): the properties for the update """ - def __new__(cls, time: TimeInput, properties: Optional[PropInput] = None) -> RemoteUpdate: + def __new__( + cls, time: TimeInput, properties: Optional[PropInput] = None + ) -> RemoteUpdate: """Create and return a new object. See help(type) for accurate signature.""" -class RemoteEdgeAddition(object): +class RemoteEdgeAddition(object): """ An edge update @@ -575,10 +659,17 @@ class RemoteEdgeAddition(object): updates (list[RemoteUpdate], optional): the temporal updates for the edge """ - def __new__(cls, src: GID, dst: GID, layer: Optional[str] = None, metadata: Optional[PropInput] = None, updates: Optional[list[RemoteUpdate]] = None) -> RemoteEdgeAddition: + def __new__( + cls, + src: GID, + dst: GID, + layer: Optional[str] = None, + metadata: Optional[PropInput] = None, + updates: Optional[list[RemoteUpdate]] = None, + ) -> RemoteEdgeAddition: """Create and return a new object. See help(type) for accurate signature.""" -class RemoteIndexSpec(object): +class RemoteIndexSpec(object): """ Create a RemoteIndexSpec specifying which node and edge properties to index. @@ -590,7 +681,7 @@ class RemoteIndexSpec(object): def __new__(cls, node_props: PropsInput, edge_props: PropsInput) -> RemoteIndexSpec: """Create and return a new object. See help(type) for accurate signature.""" -class PropsInput(object): +class PropsInput(object): """ Create a PropsInput by choosing to include all/some properties explicitly. @@ -602,10 +693,14 @@ class PropsInput(object): ValueError: If neither all and some are specified. """ - def __new__(cls, all: Optional[AllPropertySpec] = None, some: Optional[SomePropertySpec] = None) -> PropsInput: + def __new__( + cls, + all: Optional[AllPropertySpec] = None, + some: Optional[SomePropertySpec] = None, + ) -> PropsInput: """Create and return a new object. See help(type) for accurate signature.""" -class SomePropertySpec(object): +class SomePropertySpec(object): """ Create a SomePropertySpec by explicitly listing metadata and/or temporal property names. @@ -614,10 +709,12 @@ class SomePropertySpec(object): properties (list[str]): Temporal property names. Defaults to []. """ - def __new__(cls, metadata: list[str] = [], properties: list[str] = []) -> SomePropertySpec: + def __new__( + cls, metadata: list[str] = [], properties: list[str] = [] + ) -> SomePropertySpec: """Create and return a new object. See help(type) for accurate signature.""" -class AllPropertySpec(object): +class AllPropertySpec(object): """ Specifies that **all** properties should be included when creating an index. Use one of the predefined variants: ALL , ALL_METADATA , or ALL_TEMPORAL . diff --git a/python/python/raphtory/iterables/__init__.pyi b/python/python/raphtory/iterables/__init__.pyi index ec2c4d6ee9..2a80bbc5cb 100644 --- a/python/python/raphtory/iterables/__init__.pyi +++ b/python/python/raphtory/iterables/__init__.pyi @@ -23,9 +23,33 @@ from os import PathLike import networkx as nx # type: ignore import pyvis # type: ignore -__all__ = ['NestedUtcDateTimeIterable', 'NestedGIDIterable', 'GIDIterable', 'StringIterable', 'OptionArcStringIterable', 'UsizeIterable', 'OptionI64Iterable', 'NestedOptionArcStringIterable', 'NestedStringIterable', 'NestedOptionI64Iterable', 'NestedI64VecIterable', 'NestedUsizeIterable', 'BoolIterable', 'ArcStringIterable', 'NestedVecUtcDateTimeIterable', 'OptionVecUtcDateTimeIterable', 'GIDGIDIterable', 'NestedGIDGIDIterable', 'NestedBoolIterable', 'U64Iterable', 'OptionUtcDateTimeIterable', 'ArcStringVecIterable', 'NestedArcStringVecIterable'] -class NestedUtcDateTimeIterable(object): - +__all__ = [ + "NestedUtcDateTimeIterable", + "NestedGIDIterable", + "GIDIterable", + "StringIterable", + "OptionArcStringIterable", + "UsizeIterable", + "OptionI64Iterable", + "NestedOptionArcStringIterable", + "NestedStringIterable", + "NestedOptionI64Iterable", + "NestedI64VecIterable", + "NestedUsizeIterable", + "BoolIterable", + "ArcStringIterable", + "NestedVecUtcDateTimeIterable", + "OptionVecUtcDateTimeIterable", + "GIDGIDIterable", + "NestedGIDGIDIterable", + "NestedBoolIterable", + "U64Iterable", + "OptionUtcDateTimeIterable", + "ArcStringVecIterable", + "NestedArcStringVecIterable", +] + +class NestedUtcDateTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -53,11 +77,9 @@ class NestedUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class NestedGIDIterable(object): + def collect(self): ... +class NestedGIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -85,17 +107,11 @@ class NestedGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def min(self): - ... - -class GIDIterable(object): + def collect(self): ... + def max(self): ... + def min(self): ... +class GIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -123,17 +139,11 @@ class GIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def min(self): - ... - -class StringIterable(object): + def collect(self): ... + def max(self): ... + def min(self): ... +class StringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -161,11 +171,9 @@ class StringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class OptionArcStringIterable(object): + def collect(self): ... +class OptionArcStringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -193,11 +201,9 @@ class OptionArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class UsizeIterable(object): + def collect(self): ... +class UsizeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -225,23 +231,13 @@ class UsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def mean(self): - ... - - def min(self): - ... - - def sum(self): - ... - -class OptionI64Iterable(object): + def collect(self): ... + def max(self): ... + def mean(self): ... + def min(self): ... + def sum(self): ... +class OptionI64Iterable(object): def __eq__(self, value): """Return self==value.""" @@ -269,17 +265,11 @@ class OptionI64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def min(self): - ... - -class NestedOptionArcStringIterable(object): + def collect(self): ... + def max(self): ... + def min(self): ... +class NestedOptionArcStringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -307,11 +297,9 @@ class NestedOptionArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class NestedStringIterable(object): + def collect(self): ... +class NestedStringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -339,11 +327,9 @@ class NestedStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class NestedOptionI64Iterable(object): + def collect(self): ... +class NestedOptionI64Iterable(object): def __eq__(self, value): """Return self==value.""" @@ -371,17 +357,11 @@ class NestedOptionI64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def min(self): - ... - -class NestedI64VecIterable(object): + def collect(self): ... + def max(self): ... + def min(self): ... +class NestedI64VecIterable(object): def __eq__(self, value): """Return self==value.""" @@ -409,11 +389,9 @@ class NestedI64VecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class NestedUsizeIterable(object): + def collect(self): ... +class NestedUsizeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -441,23 +419,13 @@ class NestedUsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def mean(self): - ... - - def min(self): - ... - - def sum(self): - ... - -class BoolIterable(object): + def collect(self): ... + def max(self): ... + def mean(self): ... + def min(self): ... + def sum(self): ... +class BoolIterable(object): def __eq__(self, value): """Return self==value.""" @@ -485,11 +453,9 @@ class BoolIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class ArcStringIterable(object): + def collect(self): ... +class ArcStringIterable(object): def __iter__(self): """Implement iter(self).""" @@ -499,11 +465,9 @@ class ArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class NestedVecUtcDateTimeIterable(object): + def collect(self): ... +class NestedVecUtcDateTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -531,11 +495,9 @@ class NestedVecUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class OptionVecUtcDateTimeIterable(object): + def collect(self): ... +class OptionVecUtcDateTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -563,11 +525,9 @@ class OptionVecUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class GIDGIDIterable(object): + def collect(self): ... +class GIDGIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -595,17 +555,11 @@ class GIDGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def min(self): - ... - -class NestedGIDGIDIterable(object): + def collect(self): ... + def max(self): ... + def min(self): ... +class NestedGIDGIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -633,17 +587,11 @@ class NestedGIDGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def min(self): - ... - -class NestedBoolIterable(object): + def collect(self): ... + def max(self): ... + def min(self): ... +class NestedBoolIterable(object): def __eq__(self, value): """Return self==value.""" @@ -671,11 +619,9 @@ class NestedBoolIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class U64Iterable(object): + def collect(self): ... +class U64Iterable(object): def __eq__(self, value): """Return self==value.""" @@ -703,23 +649,13 @@ class U64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def mean(self): - ... - - def min(self): - ... - - def sum(self): - ... - -class OptionUtcDateTimeIterable(object): + def collect(self): ... + def max(self): ... + def mean(self): ... + def min(self): ... + def sum(self): ... +class OptionUtcDateTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -747,11 +683,9 @@ class OptionUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class ArcStringVecIterable(object): + def collect(self): ... +class ArcStringVecIterable(object): def __eq__(self, value): """Return self==value.""" @@ -779,11 +713,9 @@ class ArcStringVecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class NestedArcStringVecIterable(object): + def collect(self): ... +class NestedArcStringVecIterable(object): def __eq__(self, value): """Return self==value.""" @@ -811,5 +743,4 @@ class NestedArcStringVecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... + def collect(self): ... diff --git a/python/python/raphtory/node_state/__init__.pyi b/python/python/raphtory/node_state/__init__.pyi index 469a550b2e..456f7240dd 100644 --- a/python/python/raphtory/node_state/__init__.pyi +++ b/python/python/raphtory/node_state/__init__.pyi @@ -23,9 +23,42 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['NodeGroups', 'DegreeView', 'NodeStateUsize', 'NodeStateU64', 'NodeStateOptionI64', 'IdView', 'NodeStateGID', 'EarliestTimeView', 'LatestTimeView', 'NameView', 'NodeStateString', 'EarliestDateTimeView', 'LatestDateTimeView', 'NodeStateOptionDateTime', 'HistoryView', 'EdgeHistoryCountView', 'NodeStateListI64', 'HistoryDateTimeView', 'NodeStateOptionListDateTime', 'NodeTypeView', 'NodeStateOptionStr', 'NodeStateListDateTime', 'NodeStateWeightedSP', 'NodeStateF64', 'NodeStateNodes', 'NodeStateReachability', 'NodeStateListF64', 'NodeStateMotifs', 'NodeStateHits', 'NodeStateSEIR', 'NodeLayout', 'NodeStateF64String'] -class NodeGroups(object): - +__all__ = [ + "NodeGroups", + "DegreeView", + "NodeStateUsize", + "NodeStateU64", + "NodeStateOptionI64", + "IdView", + "NodeStateGID", + "EarliestTimeView", + "LatestTimeView", + "NameView", + "NodeStateString", + "EarliestDateTimeView", + "LatestDateTimeView", + "NodeStateOptionDateTime", + "HistoryView", + "EdgeHistoryCountView", + "NodeStateListI64", + "HistoryDateTimeView", + "NodeStateOptionListDateTime", + "NodeTypeView", + "NodeStateOptionStr", + "NodeStateListDateTime", + "NodeStateWeightedSP", + "NodeStateF64", + "NodeStateNodes", + "NodeStateReachability", + "NodeStateListF64", + "NodeStateMotifs", + "NodeStateHits", + "NodeStateSEIR", + "NodeLayout", + "NodeStateF64String", +] + +class NodeGroups(object): def __bool__(self): """True if self else False""" @@ -68,7 +101,7 @@ class NodeGroups(object): Iterator[Tuple[Any, GraphView]]: Iterator over subgraphs with corresponding value """ -class DegreeView(object): +class DegreeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -230,7 +263,9 @@ class DegreeView(object): DegreeView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -385,7 +420,12 @@ class DegreeView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -574,8 +614,7 @@ class DegreeView(object): Optional[int]: """ -class NodeStateUsize(object): - +class NodeStateUsize(object): def __eq__(self, value): """Return self==value.""" @@ -768,8 +807,7 @@ class NodeStateUsize(object): Iterator[int]: Iterator over values """ -class NodeStateU64(object): - +class NodeStateU64(object): def __eq__(self, value): """Return self==value.""" @@ -954,8 +992,7 @@ class NodeStateU64(object): Iterator[int]: Iterator over values """ -class NodeStateOptionI64(object): - +class NodeStateOptionI64(object): def __eq__(self, value): """Return self==value.""" @@ -997,7 +1034,9 @@ class NodeStateOptionI64(object): NodeStateOptionI64: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: + def get( + self, node: NodeInput, default: Optional[Optional[int]] = None + ) -> Optional[Optional[int]]: """ Get value for node @@ -1131,7 +1170,7 @@ class NodeStateOptionI64(object): Iterator[Optional[int]]: Iterator over values """ -class IdView(object): +class IdView(object): """A lazy view over node values""" def __eq__(self, value): @@ -1317,8 +1356,7 @@ class IdView(object): Iterator[GID]: Iterator over values """ -class NodeStateGID(object): - +class NodeStateGID(object): def __eq__(self, value): """Return self==value.""" @@ -1486,7 +1524,7 @@ class NodeStateGID(object): Iterator[GID]: Iterator over values """ -class EarliestTimeView(object): +class EarliestTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -1648,7 +1686,9 @@ class EarliestTimeView(object): EarliestTimeView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -1667,7 +1707,9 @@ class EarliestTimeView(object): WindowSet: A `WindowSet` object. """ - def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: + def get( + self, node: NodeInput, default: Optional[Optional[int]] = None + ) -> Optional[Optional[int]]: """ Get value for node @@ -1795,7 +1837,12 @@ class EarliestTimeView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -1975,7 +2022,7 @@ class EarliestTimeView(object): Optional[int]: """ -class LatestTimeView(object): +class LatestTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -2137,7 +2184,9 @@ class LatestTimeView(object): LatestTimeView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2156,7 +2205,9 @@ class LatestTimeView(object): WindowSet: A `WindowSet` object. """ - def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: + def get( + self, node: NodeInput, default: Optional[Optional[int]] = None + ) -> Optional[Optional[int]]: """ Get value for node @@ -2284,7 +2335,12 @@ class LatestTimeView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -2464,7 +2520,7 @@ class LatestTimeView(object): Optional[int]: """ -class NameView(object): +class NameView(object): """A lazy view over node values""" def __eq__(self, value): @@ -2658,8 +2714,7 @@ class NameView(object): Iterator[str]: Iterator over values """ -class NodeStateString(object): - +class NodeStateString(object): def __eq__(self, value): """Return self==value.""" @@ -2835,7 +2890,7 @@ class NodeStateString(object): Iterator[str]: Iterator over values """ -class EarliestDateTimeView(object): +class EarliestDateTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -2997,7 +3052,9 @@ class EarliestDateTimeView(object): EarliestDateTimeView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3016,7 +3073,9 @@ class EarliestDateTimeView(object): WindowSet: A `WindowSet` object. """ - def get(self, node: NodeInput, default: Optional[Optional[datetime]] = None) -> Optional[Optional[datetime]]: + def get( + self, node: NodeInput, default: Optional[Optional[datetime]] = None + ) -> Optional[Optional[datetime]]: """ Get value for node @@ -3144,7 +3203,12 @@ class EarliestDateTimeView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3324,7 +3388,7 @@ class EarliestDateTimeView(object): Optional[int]: """ -class LatestDateTimeView(object): +class LatestDateTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -3486,7 +3550,9 @@ class LatestDateTimeView(object): LatestDateTimeView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3505,7 +3571,9 @@ class LatestDateTimeView(object): WindowSet: A `WindowSet` object. """ - def get(self, node: NodeInput, default: Optional[Optional[datetime]] = None) -> Optional[Optional[datetime]]: + def get( + self, node: NodeInput, default: Optional[Optional[datetime]] = None + ) -> Optional[Optional[datetime]]: """ Get value for node @@ -3633,7 +3701,12 @@ class LatestDateTimeView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3813,8 +3886,7 @@ class LatestDateTimeView(object): Optional[int]: """ -class NodeStateOptionDateTime(object): - +class NodeStateOptionDateTime(object): def __eq__(self, value): """Return self==value.""" @@ -3856,7 +3928,9 @@ class NodeStateOptionDateTime(object): NodeStateOptionDateTime: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[Optional[datetime]] = None) -> Optional[Optional[datetime]]: + def get( + self, node: NodeInput, default: Optional[Optional[datetime]] = None + ) -> Optional[Optional[datetime]]: """ Get value for node @@ -3990,7 +4064,7 @@ class NodeStateOptionDateTime(object): Iterator[Optional[datetime]]: Iterator over values """ -class HistoryView(object): +class HistoryView(object): """A lazy view over node values""" def __eq__(self, value): @@ -4152,7 +4226,9 @@ class HistoryView(object): HistoryView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4171,7 +4247,9 @@ class HistoryView(object): WindowSet: A `WindowSet` object. """ - def get(self, node: NodeInput, default: Optional[list[int]] = None) -> Optional[list[int]]: + def get( + self, node: NodeInput, default: Optional[list[int]] = None + ) -> Optional[list[int]]: """ Get value for node @@ -4291,7 +4369,12 @@ class HistoryView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -4471,7 +4554,7 @@ class HistoryView(object): Optional[int]: """ -class EdgeHistoryCountView(object): +class EdgeHistoryCountView(object): """A lazy view over node values""" def __eq__(self, value): @@ -4633,7 +4716,9 @@ class EdgeHistoryCountView(object): EdgeHistoryCountView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4780,7 +4865,12 @@ class EdgeHistoryCountView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -4969,8 +5059,7 @@ class EdgeHistoryCountView(object): Optional[int]: """ -class NodeStateListI64(object): - +class NodeStateListI64(object): def __eq__(self, value): """Return self==value.""" @@ -5012,7 +5101,9 @@ class NodeStateListI64(object): NodeStateListI64: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[list[int]] = None) -> Optional[list[int]]: + def get( + self, node: NodeInput, default: Optional[list[int]] = None + ) -> Optional[list[int]]: """ Get value for node @@ -5138,7 +5229,7 @@ class NodeStateListI64(object): Iterator[list[int]]: Iterator over values """ -class HistoryDateTimeView(object): +class HistoryDateTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -5300,7 +5391,9 @@ class HistoryDateTimeView(object): HistoryDateTimeView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -5319,7 +5412,9 @@ class HistoryDateTimeView(object): WindowSet: A `WindowSet` object. """ - def get(self, node: NodeInput, default: Optional[Optional[list[datetime]]] = None) -> Optional[Optional[list[datetime]]]: + def get( + self, node: NodeInput, default: Optional[Optional[list[datetime]]] = None + ) -> Optional[Optional[list[datetime]]]: """ Get value for node @@ -5439,7 +5534,12 @@ class HistoryDateTimeView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -5619,8 +5719,7 @@ class HistoryDateTimeView(object): Optional[int]: """ -class NodeStateOptionListDateTime(object): - +class NodeStateOptionListDateTime(object): def __eq__(self, value): """Return self==value.""" @@ -5662,7 +5761,9 @@ class NodeStateOptionListDateTime(object): NodeStateOptionListDateTime: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[Optional[list[datetime]]] = None) -> Optional[Optional[list[datetime]]]: + def get( + self, node: NodeInput, default: Optional[Optional[list[datetime]]] = None + ) -> Optional[Optional[list[datetime]]]: """ Get value for node @@ -5788,7 +5889,7 @@ class NodeStateOptionListDateTime(object): Iterator[Optional[list[datetime]]]: Iterator over values """ -class NodeTypeView(object): +class NodeTypeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -5848,7 +5949,9 @@ class NodeTypeView(object): NodeStateOptionStr: the computed `NodeState` """ - def get(self, node: NodeInput, default: Optional[Optional[str]] = None) -> Optional[Optional[str]]: + def get( + self, node: NodeInput, default: Optional[Optional[str]] = None + ) -> Optional[Optional[str]]: """ Get value for node @@ -5982,8 +6085,7 @@ class NodeTypeView(object): Iterator[Optional[str]]: Iterator over values """ -class NodeStateOptionStr(object): - +class NodeStateOptionStr(object): def __eq__(self, value): """Return self==value.""" @@ -6025,7 +6127,9 @@ class NodeStateOptionStr(object): NodeStateOptionStr: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[Optional[str]] = None) -> Optional[Optional[str]]: + def get( + self, node: NodeInput, default: Optional[Optional[str]] = None + ) -> Optional[Optional[str]]: """ Get value for node @@ -6159,8 +6263,7 @@ class NodeStateOptionStr(object): Iterator[Optional[str]]: Iterator over values """ -class NodeStateListDateTime(object): - +class NodeStateListDateTime(object): def __eq__(self, value): """Return self==value.""" @@ -6202,7 +6305,9 @@ class NodeStateListDateTime(object): NodeStateListDateTime: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[list[datetime]] = None) -> Optional[list[datetime]]: + def get( + self, node: NodeInput, default: Optional[list[datetime]] = None + ) -> Optional[list[datetime]]: """ Get value for node @@ -6328,8 +6433,7 @@ class NodeStateListDateTime(object): Iterator[list[datetime]]: Iterator over values """ -class NodeStateWeightedSP(object): - +class NodeStateWeightedSP(object): def __eq__(self, value): """Return self==value.""" @@ -6360,7 +6464,9 @@ class NodeStateWeightedSP(object): def __repr__(self): """Return repr(self).""" - def get(self, node: NodeInput, default: Optional[Tuple[float, Nodes]] = None) -> Optional[Tuple[float, Nodes]]: + def get( + self, node: NodeInput, default: Optional[Tuple[float, Nodes]] = None + ) -> Optional[Tuple[float, Nodes]]: """ Get value for node @@ -6415,8 +6521,7 @@ class NodeStateWeightedSP(object): Iterator[Tuple[float, Nodes]]: Iterator over values """ -class NodeStateF64(object): - +class NodeStateF64(object): def __eq__(self, value): """Return self==value.""" @@ -6601,8 +6706,7 @@ class NodeStateF64(object): Iterator[float]: Iterator over values """ -class NodeStateNodes(object): - +class NodeStateNodes(object): def __eq__(self, value): """Return self==value.""" @@ -6688,8 +6792,7 @@ class NodeStateNodes(object): Iterator[Nodes]: Iterator over values """ -class NodeStateReachability(object): - +class NodeStateReachability(object): def __eq__(self, value): """Return self==value.""" @@ -6720,7 +6823,9 @@ class NodeStateReachability(object): def __repr__(self): """Return repr(self).""" - def get(self, node: NodeInput, default: Optional[list[Tuple[int, str]]] = None) -> Optional[list[Tuple[int, str]]]: + def get( + self, node: NodeInput, default: Optional[list[Tuple[int, str]]] = None + ) -> Optional[list[Tuple[int, str]]]: """ Get value for node @@ -6775,8 +6880,7 @@ class NodeStateReachability(object): Iterator[list[Tuple[int, str]]]: Iterator over values """ -class NodeStateListF64(object): - +class NodeStateListF64(object): def __eq__(self, value): """Return self==value.""" @@ -6807,7 +6911,9 @@ class NodeStateListF64(object): def __repr__(self): """Return repr(self).""" - def get(self, node: NodeInput, default: Optional[list[float]] = None) -> Optional[list[float]]: + def get( + self, node: NodeInput, default: Optional[list[float]] = None + ) -> Optional[list[float]]: """ Get value for node @@ -6862,8 +6968,7 @@ class NodeStateListF64(object): Iterator[list[float]]: Iterator over values """ -class NodeStateMotifs(object): - +class NodeStateMotifs(object): def __eq__(self, value): """Return self==value.""" @@ -6905,7 +7010,9 @@ class NodeStateMotifs(object): NodeStateMotifs: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[list[int]] = None) -> Optional[list[int]]: + def get( + self, node: NodeInput, default: Optional[list[int]] = None + ) -> Optional[list[int]]: """ Get value for node @@ -7031,8 +7138,7 @@ class NodeStateMotifs(object): Iterator[list[int]]: Iterator over values """ -class NodeStateHits(object): - +class NodeStateHits(object): def __eq__(self, value): """Return self==value.""" @@ -7074,7 +7180,9 @@ class NodeStateHits(object): NodeStateHits: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[Tuple[float, float]] = None) -> Optional[Tuple[float, float]]: + def get( + self, node: NodeInput, default: Optional[Tuple[float, float]] = None + ) -> Optional[Tuple[float, float]]: """ Get value for node @@ -7200,8 +7308,7 @@ class NodeStateHits(object): Iterator[Tuple[float, float]]: Iterator over values """ -class NodeStateSEIR(object): - +class NodeStateSEIR(object): def __eq__(self, value): """Return self==value.""" @@ -7243,7 +7350,9 @@ class NodeStateSEIR(object): NodeStateSEIR: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[Infected] = None) -> Optional[Infected]: + def get( + self, node: NodeInput, default: Optional[Infected] = None + ) -> Optional[Infected]: """ Get value for node @@ -7369,8 +7478,7 @@ class NodeStateSEIR(object): Iterator[Infected]: Iterator over values """ -class NodeLayout(object): - +class NodeLayout(object): def __eq__(self, value): """Return self==value.""" @@ -7401,7 +7509,9 @@ class NodeLayout(object): def __repr__(self): """Return repr(self).""" - def get(self, node: NodeInput, default: Optional[list[float]] = None) -> Optional[list[float]]: + def get( + self, node: NodeInput, default: Optional[list[float]] = None + ) -> Optional[list[float]]: """ Get value for node @@ -7456,8 +7566,7 @@ class NodeLayout(object): Iterator[list[float]]: Iterator over values """ -class NodeStateF64String(object): - +class NodeStateF64String(object): def __eq__(self, value): """Return self==value.""" @@ -7488,7 +7597,9 @@ class NodeStateF64String(object): def __repr__(self): """Return repr(self).""" - def get(self, node: NodeInput, default: Optional[Tuple[float, str]] = None) -> Optional[Tuple[float, str]]: + def get( + self, node: NodeInput, default: Optional[Tuple[float, str]] = None + ) -> Optional[Tuple[float, str]]: """ Get value for node diff --git a/python/python/raphtory/vectors/__init__.pyi b/python/python/raphtory/vectors/__init__.pyi index bd615cda2f..6b9e515fac 100644 --- a/python/python/raphtory/vectors/__init__.pyi +++ b/python/python/raphtory/vectors/__init__.pyi @@ -23,10 +23,15 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['VectorisedGraph', 'Document', 'Embedding', 'VectorSelection'] -class VectorisedGraph(object): +__all__ = ["VectorisedGraph", "Document", "Embedding", "VectorSelection"] - def edges_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: +class VectorisedGraph(object): + def edges_by_similarity( + self, + query: str | list, + limit: int, + window: Optional[Tuple[int | str, int | str]] = None, + ) -> VectorSelection: """ Search the top scoring edges according to `query` with no more than `limit` edges @@ -42,7 +47,12 @@ class VectorisedGraph(object): def empty_selection(self): """Return an empty selection of documents""" - def entities_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: + def entities_by_similarity( + self, + query: str | list, + limit: int, + window: Optional[Tuple[int | str, int | str]] = None, + ) -> VectorSelection: """ Search the top scoring entities according to `query` with no more than `limit` entities @@ -55,7 +65,12 @@ class VectorisedGraph(object): VectorSelection: The vector selection resulting from the search """ - def nodes_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: + def nodes_by_similarity( + self, + query: str | list, + limit: int, + window: Optional[Tuple[int | str, int | str]] = None, + ) -> VectorSelection: """ Search the top scoring nodes according to `query` with no more than `limit` nodes @@ -68,7 +83,7 @@ class VectorisedGraph(object): VectorSelection: The vector selection resulting from the search """ -class Document(object): +class Document(object): """ A Document @@ -109,13 +124,11 @@ class Document(object): Optional[Any]: """ -class Embedding(object): - +class Embedding(object): def __repr__(self): """Return repr(self).""" -class VectorSelection(object): - +class VectorSelection(object): def add_edges(self, edges: list) -> None: """ Add all the documents associated with the `edges` to the current selection @@ -161,7 +174,9 @@ class VectorSelection(object): list[Edge]: list of edges in the current selection """ - def expand(self, hops: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: + def expand( + self, hops: int, window: Optional[Tuple[int | str, int | str]] = None + ) -> None: """ Add all the documents `hops` hops away to the selection @@ -178,7 +193,12 @@ class VectorSelection(object): None: """ - def expand_edges_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: + def expand_edges_by_similarity( + self, + query: str | list, + limit: int, + window: Optional[Tuple[int | str, int | str]] = None, + ) -> None: """ Add the top `limit` adjacent edges with higher score for `query` to the selection @@ -193,7 +213,12 @@ class VectorSelection(object): None: """ - def expand_entities_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: + def expand_entities_by_similarity( + self, + query: str | list, + limit: int, + window: Optional[Tuple[int | str, int | str]] = None, + ) -> None: """ Add the top `limit` adjacent entities with higher score for `query` to the selection @@ -215,7 +240,12 @@ class VectorSelection(object): None: """ - def expand_nodes_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: + def expand_nodes_by_similarity( + self, + query: str | list, + limit: int, + window: Optional[Tuple[int | str, int | str]] = None, + ) -> None: """ Add the top `limit` adjacent nodes with higher score for `query` to the selection diff --git a/raphtory/src/db/api/mutation/index_ops.rs b/raphtory/src/db/api/mutation/index_ops.rs index 1645c8b879..bce81bf9c1 100644 --- a/raphtory/src/db/api/mutation/index_ops.rs +++ b/raphtory/src/db/api/mutation/index_ops.rs @@ -1,11 +1,15 @@ use crate::{ - db::api::view::{IndexSpec, IndexSpecBuilder}, + db::api::view::{internal::InternalStorageOps, IndexSpec, IndexSpecBuilder}, errors::GraphError, prelude::AdditionOps, serialise::{GraphFolder, GraphPaths}, }; -use std::{fs::File, path::Path}; -use zip::ZipArchive; +use std::{ + fs::File, + io::{Seek, Write}, + path::Path, +}; +use zip::{ZipArchive, ZipWriter}; /// Mutation operations for managing indexes. pub trait IndexMutationOps: Sized + AdditionOps { @@ -55,7 +59,7 @@ pub trait IndexMutationOps: Sized + AdditionOps { /// /// Returns: /// None: - fn persist_index_to_disk(&self, path: &GraphFolder) -> Result<(), GraphError>; + fn persist_index_to_disk(&self, path: &impl GraphPaths) -> Result<(), GraphError>; /// Persists the current index to disk as a compressed ZIP file at the specified path. /// @@ -64,7 +68,11 @@ pub trait IndexMutationOps: Sized + AdditionOps { /// /// Returns: /// None: - fn persist_index_to_disk_zip(&self, path: &GraphFolder) -> Result<(), GraphError>; + fn persist_index_to_disk_zip( + &self, + writer: &mut ZipWriter, + prefix: &str, + ) -> Result<(), GraphError>; /// Drops (removes) the current index from the database. /// @@ -138,13 +146,13 @@ impl IndexMutationOps for G { self.get_storage() .map_or(Err(GraphError::IndexingNotSupported), |storage| { if path.is_zip() { - if has_index(path.get_base_path())? { + if has_index(path.root())? { storage.load_index_if_empty(&path)?; } else { return Ok(()); // Skip if no index in zip } } else { - let index_path = path.get_index_path(); + let index_path = path.index_path()?; if index_path.exists() && index_path.read_dir()?.next().is_some() { storage.load_index_if_empty(&path)?; } @@ -162,12 +170,14 @@ impl IndexMutationOps for G { }) } - fn persist_index_to_disk_zip(&self, path: &GraphFolder) -> Result<(), GraphError> { + fn persist_index_to_disk_zip( + &self, + writer: &mut ZipWriter, + prefix: &str, + ) -> Result<(), GraphError> { self.get_storage() - .map_or(Err(GraphError::IndexingNotSupported), |storage| { - storage.persist_index_to_disk_zip(&path)?; - Ok(()) - }) + .ok_or(GraphError::IndexingNotSupported)? + .persist_index_to_disk_zip(writer, prefix) } fn drop_index(&self) -> Result<(), GraphError> { diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 340609fd5d..042fdf9861 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -32,6 +32,7 @@ use raphtory_storage::{ }; use std::{ fmt::{Display, Formatter}, + io::{Seek, Write}, path::Path, sync::Arc, }; @@ -50,6 +51,7 @@ use { raphtory_storage::{core_ops::CoreGraphOps, graph::nodes::node_storage_ops::NodeStorageOps}, std::ops::{Deref, DerefMut}, tracing::info, + zip::ZipWriter, }; #[derive(Debug, Default)] @@ -251,14 +253,18 @@ impl Storage { Ok(()) } - pub(crate) fn persist_index_to_disk_zip(&self, path: &GraphFolder) -> Result<(), GraphError> { + pub(crate) fn persist_index_to_disk_zip( + &self, + writer: &mut ZipWriter, + prefix: &str, + ) -> Result<(), GraphError> { let guard = self.get_index().read_recursive(); if guard.is_indexed() { if guard.path().is_none() { info!("{}", IN_MEMORY_INDEX_NOT_PERSISTED); return Ok(()); } - self.if_index(|index| index.persist_to_disk_zip(path))?; + self.if_index(|index| index.persist_to_disk_zip(writer, prefix))?; } Ok(()) } diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index c8235a6a95..c01b412237 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -33,6 +33,9 @@ use pyo3::PyErr; #[cfg(feature = "search")] use {tantivy, tantivy::query::QueryParserError}; +#[cfg(feature = "io")] +use zip::result::ZipError; + #[derive(thiserror::Error, Debug)] pub enum InvalidPathReason { #[error("Backslash not allowed in path: {0}")] @@ -60,11 +63,8 @@ pub enum InvalidPathReason { GraphIsNamespace(PathBuf), #[error("The path provided already exists as a graph: {0}")] NamespaceIsGraph(PathBuf), - #[error("Failed to strip prefix")] - StripPrefix { - #[from] - source: StripPrefixError, - }, + #[error("Failed to strip prefix: {source}")] + StripPrefix { source: StripPrefixError }, } #[derive(thiserror::Error, Debug)] @@ -253,8 +253,8 @@ pub enum GraphError { #[cfg(feature = "io")] #[error("zip operation failed")] ZipError { - #[from] source: zip::result::ZipError, + location: &'static Location<'static>, }, #[error("Not a zip archive")] @@ -436,6 +436,12 @@ pub enum GraphError { #[error("Invalid prefix, expected '{expected}', got '{actual}'")] InvalidPrefix { expected: String, actual: String }, + + #[error("{source} at {location}")] + StripPrefixError { + source: StripPrefixError, + location: &'static Location<'static>, + }, } impl From for GraphError { @@ -483,6 +489,23 @@ impl From for GraphError { } } +#[cfg(feature = "io")] +impl From for GraphError { + #[track_caller] + fn from(source: ZipError) -> Self { + let location = Location::caller(); + GraphError::ZipError { source, location } + } +} + +impl From for GraphError { + #[track_caller] + fn from(source: StripPrefixError) -> Self { + let location = Location::caller(); + GraphError::StripPrefixError { source, location } + } +} + #[cfg(test)] mod test { use crate::errors::GraphError; diff --git a/raphtory/src/search/graph_index.rs b/raphtory/src/search/graph_index.rs index 3aafcb31c1..e716411392 100644 --- a/raphtory/src/search/graph_index.rs +++ b/raphtory/src/search/graph_index.rs @@ -17,6 +17,7 @@ use std::{ fmt::Debug, fs, fs::File, + io::{Seek, Write}, ops::Deref, path::{Path, PathBuf}, sync::Arc, @@ -24,7 +25,10 @@ use std::{ use tempfile::TempDir; use uuid::Uuid; use walkdir::WalkDir; -use zip::{write::FileOptions, ZipArchive, ZipWriter}; +use zip::{ + write::{FileOptions, SimpleFileOptions}, + ZipArchive, ZipWriter, +}; #[derive(Clone)] pub struct Index { @@ -221,7 +225,7 @@ impl GraphIndex { pub fn load_from_path(path: &GraphFolder) -> Result { if path.is_zip() { let index_path = TempDir::new()?; - unzip_index(&path.get_base_path(), index_path.path())?; + unzip_index(&path.root(), index_path.path())?; let (index, index_spec) = load_indexes(index_path.path())?; @@ -231,12 +235,12 @@ impl GraphIndex { index_spec: Arc::new(RwLock::new(index_spec)), })) } else { - let index_path = path.get_index_path(); + let index_path = path.index_path()?; let (index, index_spec) = load_indexes(index_path.as_path())?; Ok(GraphIndex::Immutable(ImmutableGraphIndex { index, - path: Arc::new(path.clone()), + path: Arc::new(path.data_path()?), index_spec: Arc::new(index_spec), })) } @@ -251,45 +255,26 @@ impl GraphIndex { Ok(()) } - pub(crate) fn persist_to_disk_zip(&self, path: &GraphFolder) -> Result<(), GraphError> { - let file = File::options() - .read(true) - .write(true) - .open(path.get_base_path())?; - let mut zip = ZipWriter::new_append(file)?; - + pub(crate) fn persist_to_disk_zip( + &self, + writer: &mut ZipWriter, + prefix: &str, + ) -> Result<(), GraphError> { let source_path = self.path().ok_or(GraphError::CannotPersistRamIndex)?; - for entry in WalkDir::new(&source_path) .into_iter() .filter_map(Result::ok) .filter(|e| e.path().is_file()) { - let rel_path = entry - .path() - .strip_prefix(&source_path) - .map_err(|e| GraphError::IOErrorMsg(format!("Failed to strip path: {}", e)))?; - - let zip_entry_name = PathBuf::from(INDEX_PATH) - .join(rel_path) - .to_string_lossy() - .into_owned(); - zip.start_file::<_, ()>(zip_entry_name, FileOptions::default()) - .map_err(|e| { - GraphError::IOErrorMsg(format!("Failed to start zip file entry: {}", e)) - })?; + let rel_path = entry.path().strip_prefix(&source_path)?; - let mut f = File::open(entry.path()) - .map_err(|e| GraphError::IOErrorMsg(format!("Failed to open index file: {}", e)))?; + let zip_entry_name = Path::new(prefix).join(rel_path); + writer.start_file_from_path(zip_entry_name, SimpleFileOptions::default())?; - std::io::copy(&mut f, &mut zip).map_err(|e| { - GraphError::IOErrorMsg(format!("Failed to write zip content: {}", e)) - })?; - } - - zip.finish() - .map_err(|e| GraphError::IOErrorMsg(format!("Failed to finalize zip: {}", e)))?; + let mut f = File::open(entry.path())?; + std::io::copy(&mut f, writer)?; + } Ok(()) } diff --git a/raphtory/src/search/mod.rs b/raphtory/src/search/mod.rs index 69d810e807..c7c5bdd153 100644 --- a/raphtory/src/search/mod.rs +++ b/raphtory/src/search/mod.rs @@ -906,7 +906,7 @@ mod test_index { let binding = tempfile::TempDir::new().unwrap(); let path = binding.path(); let folder = GraphFolder::new_as_zip(path); - graph.encode(folder.root_folder).unwrap(); + graph.encode(folder).unwrap(); let graph = Graph::decode(path, None).unwrap(); assert_eq!(index_spec, graph.get_index_spec().unwrap()); diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index fc79666072..71235c8a1c 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize}; use std::{ fs::{self, File}, io::{self, ErrorKind, Read, Seek, Write}, - path::{Path, PathBuf}, + path::{Component, Path, PathBuf}, }; use walkdir::WalkDir; use zip::{ @@ -468,6 +468,7 @@ impl WriteableGraphFolder { } } +#[derive(Clone, Debug)] pub struct InnerGraphFolder { path: PathBuf, } diff --git a/raphtory/tests/serialise_test.rs b/raphtory/tests/serialise_test.rs index 6d77b963e4..7632604d4c 100644 --- a/raphtory/tests/serialise_test.rs +++ b/raphtory/tests/serialise_test.rs @@ -438,7 +438,7 @@ mod serialise_test { fn encode_decode_prop_test() { proptest!(|(edges in build_edge_list(100, 100))| { let g = build_graph_from_edge_list(&edges); - let bytes = g.encode_to_bytes(); + let bytes = g.encode_to_bytes().unwrap(); let g2 = Graph::decode_from_bytes(&bytes, None).unwrap(); assert_graph_equal(&g, &g2); }) From 1eb301889843e9160637d445494b4c275d720b14 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 11 Dec 2025 14:29:22 +0100 Subject: [PATCH 14/39] move the python benchmarks so they don't always run --- .../tests/{test_base_install => }/test_graph_benchmarks.py | 0 python/tox.ini | 5 ++++- 2 files changed, 4 insertions(+), 1 deletion(-) rename python/tests/{test_base_install => }/test_graph_benchmarks.py (100%) diff --git a/python/tests/test_base_install/test_graph_benchmarks.py b/python/tests/test_graph_benchmarks.py similarity index 100% rename from python/tests/test_base_install/test_graph_benchmarks.py rename to python/tests/test_graph_benchmarks.py diff --git a/python/tox.ini b/python/tox.ini index 888e1380ed..1de129bd95 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -50,8 +50,11 @@ deps = matplotlib commands = pytest --nbmake --nbmake-timeout=1200 {tty:--color=yes} ../examples/python/socio-patterns/example.ipynb +[testenv:benchmark] +commands = pytest tests/test_graph_benchmarks.py + [testenv:docs] -deps = +deps = -r ../docs/requirements.txt change_dir = ../docs/user-guide commands = pytest --markdown-docs -m markdown-docs --markdown-docs-syntax=superfences From 3f2a95da0a2b7dcd48356ef0112ff8f5ed3bdb09 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 11 Dec 2025 14:30:17 +0100 Subject: [PATCH 15/39] make secondary_index the last argument so it doesn't become annoying --- raphtory/src/python/graph/graph.rs | 25 +++++------ .../src/python/graph/graph_with_deletions.rs | 42 +++++++++++-------- 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 212d6a1c0e..7ce640fc9a 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -627,12 +627,12 @@ impl PyGraph { /// df (DataFrame): The Pandas DataFrame containing the nodes. /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -640,19 +640,20 @@ impl PyGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (df, time, id, secondary_index = None, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None) + signature = (df, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None, secondary_index = None) )] fn load_nodes_from_pandas<'py>( &self, df: &Bound<'py, PyAny>, time: &str, id: &str, - secondary_index: Option<&str>, + node_type: Option<&str>, node_type_col: Option<&str>, properties: Option>, metadata: Option>, shared_metadata: Option>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -676,12 +677,12 @@ impl PyGraph { /// parquet_path (str): Parquet file or directory of Parquet files containing the nodes /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -689,19 +690,19 @@ impl PyGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (parquet_path, time, id, secondary_index = None, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None) + signature = (parquet_path, time, id, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None, secondary_index = None) )] fn load_nodes_from_parquet( &self, parquet_path: PathBuf, time: &str, id: &str, - secondary_index: Option<&str>, node_type: Option<&str>, node_type_col: Option<&str>, properties: Option>, metadata: Option>, shared_metadata: Option>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -727,12 +728,12 @@ impl PyGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -740,7 +741,7 @@ impl PyGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (df, time, src, dst, secondary_index = None, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, secondary_index = None) )] fn load_edges_from_pandas( &self, @@ -748,12 +749,12 @@ impl PyGraph { time: &str, src: &str, dst: &str, - secondary_index: Option<&str>, properties: Option>, metadata: Option>, shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -779,12 +780,12 @@ impl PyGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -792,7 +793,7 @@ impl PyGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (parquet_path, time, src, dst, secondary_index = None, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + signature = (parquet_path, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, secondary_index = None) )] fn load_edges_from_parquet( &self, @@ -800,12 +801,12 @@ impl PyGraph { time: &str, src: &str, dst: &str, - secondary_index: Option<&str>, properties: Option>, metadata: Option>, shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index e89f2529c9..9e22b3a23c 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -575,31 +575,32 @@ impl PyPersistentGraph { /// df (DataFrame): The Pandas DataFrame containing the nodes. /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// secondary_index (str, optional): The column name for the secondary index. /// NOTE: All values in this column must be unique. Defaults to None. /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// secondary_index (str, optional): The column name for the secondary index. /// /// Returns: /// None: This function does not return a value, if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. - #[pyo3(signature = (df, time, id, secondary_index = None, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None))] + #[pyo3(signature = (df, time, id, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None, secondary_index = None))] fn load_nodes_from_pandas( &self, df: &Bound, time: &str, id: &str, - secondary_index: Option<&str>, + node_type: Option<&str>, node_type_col: Option<&str>, properties: Option>, metadata: Option>, shared_metadata: Option>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -623,31 +624,32 @@ impl PyPersistentGraph { /// parquet_path (str): Parquet file or directory of Parquet files containing the nodes /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// secondary_index (str, optional): The column name for the secondary index. /// NOTE: All values in this column must be unique. Defaults to None. /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// secondary_index (str, optional): The column name for the secondary index. /// /// Returns: /// None: This function does not return a value, if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. - #[pyo3(signature = (parquet_path, time, id, secondary_index = None, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None))] + #[pyo3(signature = (parquet_path, time, id, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None, secondary_index = None))] fn load_nodes_from_parquet( &self, parquet_path: PathBuf, time: &str, id: &str, - secondary_index: Option<&str>, + node_type: Option<&str>, node_type_col: Option<&str>, properties: Option>, metadata: Option>, shared_metadata: Option>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -673,32 +675,33 @@ impl PyPersistentGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// secondary_index (str, optional): The column name for the secondary index. /// NOTE: All values in this column must be unique. Defaults to None. /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// secondary_index (str, optional): The column name for the secondary index. /// /// Returns: /// None: This function does not return a value, if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. - #[pyo3(signature = (df, time, src, dst, secondary_index = None, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None))] + #[pyo3(signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, secondary_index = None))] fn load_edges_from_pandas( &self, df: &Bound, time: &str, src: &str, dst: &str, - secondary_index: Option<&str>, + properties: Option>, metadata: Option>, shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -724,32 +727,33 @@ impl PyPersistentGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// secondary_index (str, optional): The column name for the secondary index. /// NOTE: All values in this column must be unique. Defaults to None. /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// secondary_index (str, optional): The column name for the secondary index. /// /// Returns: /// None: This function does not return a value, if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. - #[pyo3(signature = (parquet_path, time, src, dst, secondary_index = None, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None))] + #[pyo3(signature = (parquet_path, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, secondary_index = None))] fn load_edges_from_parquet( &self, parquet_path: PathBuf, time: &str, src: &str, dst: &str, - secondary_index: Option<&str>, + properties: Option>, metadata: Option>, shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -776,26 +780,27 @@ impl PyPersistentGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// secondary_index (str, optional): The column name for the secondary index. /// NOTE: All values in this column must be unique. Defaults to None. /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// secondary_index (str, optional): The column name for the secondary index. /// /// Returns: /// None: This function does not return a value, if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. - #[pyo3(signature = (df, time, src, dst, secondary_index = None, layer = None, layer_col = None))] + #[pyo3(signature = (df, time, src, dst, layer = None, layer_col = None, secondary_index = None))] fn load_edge_deletions_from_pandas( &self, df: &Bound, time: &str, src: &str, dst: &str, - secondary_index: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { load_edge_deletions_from_pandas( &self.graph, @@ -816,26 +821,27 @@ impl PyPersistentGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// secondary_index (str, optional): The column name for the secondary index. /// NOTE: All values in this column must be unique. Defaults to None. /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// secondary_index (str, optional): The column name for the secondary index. /// /// Returns: /// None: This function does not return a value, if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. - #[pyo3(signature = (parquet_path, time, src, dst, secondary_index = None, layer = None, layer_col = None))] + #[pyo3(signature = (parquet_path, time, src, dst, layer = None, layer_col = None, secondary_index = None))] fn load_edge_deletions_from_parquet( &self, parquet_path: PathBuf, time: &str, src: &str, dst: &str, - secondary_index: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { load_edge_deletions_from_parquet( &self.graph, From 55cb6f6fb32843276120c48b63faaf4fd3282f16 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 11 Dec 2025 15:19:20 +0100 Subject: [PATCH 16/39] improved error messages --- .../test_graphql/edit_graph/test_archive_graph.py | 4 ++-- .../test_graphql/edit_graph/test_copy_graph.py | 8 ++++---- raphtory-graphql/src/paths.rs | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py index 858dd15f30..c926fdbbec 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py @@ -21,7 +21,7 @@ def test_archive_graph_fails_if_graph_not_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' not found" in str(excinfo.value) def test_archive_graph_fails_if_graph_not_found_at_namespace(): @@ -38,7 +38,7 @@ def test_archive_graph_fails_if_graph_not_found_at_namespace(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g1' not found" in str(excinfo.value) def test_archive_graph_succeeds(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py index 3d72683421..5e0617a350 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py @@ -20,7 +20,7 @@ def test_copy_graph_fails_if_graph_not_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'ben/g5' not found" in str(excinfo.value) def test_copy_graph_fails_if_graph_with_same_name_already_exists(): @@ -45,7 +45,7 @@ def test_copy_graph_fails_if_graph_with_same_name_already_exists(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'g6' already exists" in str(excinfo.value) def test_copy_graph_fails_if_graph_with_same_name_already_exists_at_same_namespace_as_graph(): @@ -70,7 +70,7 @@ def test_copy_graph_fails_if_graph_with_same_name_already_exists_at_same_namespa }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'ben/g6' already exists" in str(excinfo.value) def test_copy_graph_fails_if_graph_with_same_name_already_exists_at_diff_namespace_as_graph(): @@ -96,7 +96,7 @@ def test_copy_graph_fails_if_graph_with_same_name_already_exists_at_diff_namespa }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'shivam/g6' already exists" in str(excinfo.value) def test_copy_graph_succeeds(): diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index 7eecadcd50..e5094bbfbb 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -439,20 +439,20 @@ impl From for InternalPathValidationError { #[derive(thiserror::Error, Debug)] pub enum PathValidationError { - #[error("Graph {0} already exists")] + #[error("Graph '{0}' already exists")] GraphExistsError(String), - #[error("Graph {0} does not exist")] + #[error("Graph '{0}' does not exist")] GraphNotExistsError(String), - #[error("The path provided does not exists as a namespace: {0}")] + #[error("'{0}' does not exist as a namespace")] NamespaceDoesNotExist(String), #[error(transparent)] InvalidPath(#[from] InvalidPathReason), - #[error("Graph {graph} is corrupted: {error}")] + #[error("Graph '{graph}' is corrupted: {error}")] InternalError { graph: String, error: InternalPathValidationError, }, - #[error("Unexpected IO error for graph {graph}: {error}")] + #[error("Unexpected IO error for graph '{graph}': {error}")] IOError { graph: String, error: io::Error }, } From 42cff8e780a143ba84b2475be5ede3b2c9d16bb3 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 11 Dec 2025 15:19:46 +0100 Subject: [PATCH 17/39] need to get the graph before creating the new folder or the new path gets cleaned up again! --- raphtory-graphql/src/lib.rs | 22 ++++++++++++++++++++++ raphtory-graphql/src/model/mod.rs | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/raphtory-graphql/src/lib.rs b/raphtory-graphql/src/lib.rs index 44d0db8d00..7a62fc1c45 100644 --- a/raphtory-graphql/src/lib.rs +++ b/raphtory-graphql/src/lib.rs @@ -61,6 +61,28 @@ mod graphql_test { }; use tempfile::tempdir; + #[tokio::test] + async fn test_copy_graph() { + let graph = Graph::new(); + graph.add_node(1, "test", NO_PROPS, None).unwrap(); + let tmp_dir = tempdir().unwrap(); + let data = Data::new(tmp_dir.path(), &AppConfig::default()); + let namespace = tmp_dir.path().join("test"); + fs::create_dir(&namespace).unwrap(); + graph.encode(namespace.join("g3")).unwrap(); + let schema = App::create_schema().data(data).finish().unwrap(); + let query = r#"mutation { + copyGraph( + path: "test/g3", + newPath: "test/g4", + ) + }"#; + + let req = Request::new(query); + let res = schema.execute(req).await; + assert_eq!(res.errors, []); + } + #[tokio::test] #[cfg(feature = "search")] async fn test_search_nodes_gql() { diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index 3c468b6f01..7a629af159 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -242,8 +242,8 @@ impl Mut { // for the templates or if it needs to be vectorised at all let overwrite = overwrite.unwrap_or(false); let data = ctx.data_unchecked::(); - let folder = data.validate_path_for_insert(new_path, overwrite)?; let graph = data.get_graph(path).await?.graph; + let folder = data.validate_path_for_insert(new_path, overwrite)?; data.insert_graph(folder, graph).await?; Ok(true) From 7ea994b18bc68b61b8c7bc39b88988c37eb8a9f5 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 11 Dec 2025 15:20:14 +0100 Subject: [PATCH 18/39] don't create arbitrarily deep paths when writing graphs --- python/tests/test_base_install/test_graphdb/test_graphdb.py | 2 +- raphtory/src/serialise/graph_folder.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tests/test_base_install/test_graphdb/test_graphdb.py b/python/tests/test_base_install/test_graphdb/test_graphdb.py index 59b843676c..a0470cc96d 100644 --- a/python/tests/test_base_install/test_graphdb/test_graphdb.py +++ b/python/tests/test_base_install/test_graphdb/test_graphdb.py @@ -1203,7 +1203,7 @@ def test_save_missing_dir(): g = create_graph() tmpdirname = tempfile.TemporaryDirectory() inner_folder = "".join(random.choice(string.ascii_letters) for _ in range(10)) - graph_path = tmpdirname.name + "/" + inner_folder + "/test_graph.bin" + graph_path = tmpdirname.name + "/" + inner_folder + "/test_graph" with pytest.raises(Exception): g.save_to_file(graph_path) diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 71235c8a1c..a71c2eff89 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -372,7 +372,7 @@ impl GraphFolder { return Err(GraphError::NonEmptyGraphFolder(self.root_folder.clone())); } } else { - fs::create_dir_all(&self.root_folder)? + fs::create_dir(&self.root_folder)? } Ok(()) From 9ba4158cfd4aa9e1e1329a9618f1aa9d890f83a3 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 11 Dec 2025 17:15:33 +0100 Subject: [PATCH 19/39] better error messages --- .../edit_graph/test_archive_graph.py | 4 ++-- .../test_graphql/edit_graph/test_copy_graph.py | 2 +- .../test_graphql/edit_graph/test_delete_graph.py | 8 ++++---- .../test_graphql/edit_graph/test_get_graph.py | 4 ++-- .../test_graphql/edit_graph/test_move_graph.py | 16 ++++++++-------- .../test_graphql/edit_graph/test_new_graph.py | 2 +- .../edit_graph/test_receive_graph.py | 4 ++-- .../test_graphql/edit_graph/test_send_graph.py | 4 ++-- .../test_graphql/edit_graph/test_upload_graph.py | 4 ++-- 9 files changed, 24 insertions(+), 24 deletions(-) diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py index c926fdbbec..64abdc470e 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py @@ -21,7 +21,7 @@ def test_archive_graph_fails_if_graph_not_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph 'g1' not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) def test_archive_graph_fails_if_graph_not_found_at_namespace(): @@ -38,7 +38,7 @@ def test_archive_graph_fails_if_graph_not_found_at_namespace(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph 'shivam/g1' not found" in str(excinfo.value) + assert "Graph 'shivam/g1' does not exist" in str(excinfo.value) def test_archive_graph_succeeds(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py index 5e0617a350..734e08cce9 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py @@ -20,7 +20,7 @@ def test_copy_graph_fails_if_graph_not_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph 'ben/g5' not found" in str(excinfo.value) + assert "Graph 'ben/g5' does not exist" in str(excinfo.value) def test_copy_graph_fails_if_graph_with_same_name_already_exists(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_delete_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_delete_graph.py index 29b7a1d2b1..768aa23b61 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_delete_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_delete_graph.py @@ -18,7 +18,7 @@ def test_delete_graph_fails_if_graph_not_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'ben/g5' does not exist" in str(excinfo.value) def test_delete_graph_succeeds_if_graph_found(): @@ -43,7 +43,7 @@ def test_delete_graph_succeeds_if_graph_found(): query = """{graph(path: "g1") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) def test_delete_graph_using_client_api_succeeds_if_graph_found(): @@ -62,7 +62,7 @@ def test_delete_graph_using_client_api_succeeds_if_graph_found(): query = """{graph(path: "g1") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) def test_delete_graph_succeeds_if_graph_found_at_namespace(): @@ -87,4 +87,4 @@ def test_delete_graph_succeeds_if_graph_found_at_namespace(): query = """{graph(path: "g1") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_get_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_get_graph.py index 740278d623..6f22bc0928 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_get_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_get_graph.py @@ -16,7 +16,7 @@ def test_get_graph_fails_if_graph_not_found(): query = """{ graph(path: "g1") { name, path, nodes { list { name } } } }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) def test_get_graph_fails_if_graph_not_found_at_namespace(): @@ -29,7 +29,7 @@ def test_get_graph_fails_if_graph_not_found_at_namespace(): ) with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g1' does not exist" in str(excinfo.value) def test_get_graph_succeeds_if_graph_found(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_move_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_move_graph.py index 98eb97d4bf..f72762e3d8 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_move_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_move_graph.py @@ -20,7 +20,7 @@ def test_move_graph_fails_if_graph_not_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'ben/g5' does not exist" in str(excinfo.value) def test_move_graph_fails_if_graph_with_same_name_already_exists(): @@ -45,7 +45,7 @@ def test_move_graph_fails_if_graph_with_same_name_already_exists(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'g6' already exists" in str(excinfo.value) def test_move_graph_fails_if_graph_with_same_name_already_exists_at_same_namespace_as_graph(): @@ -70,7 +70,7 @@ def test_move_graph_fails_if_graph_with_same_name_already_exists_at_same_namespa }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'ben/g6' already exists" in str(excinfo.value) def test_move_graph_fails_if_graph_with_same_name_already_exists_at_diff_namespace_as_graph(): @@ -96,7 +96,7 @@ def test_move_graph_fails_if_graph_with_same_name_already_exists_at_diff_namespa }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'shivam/g6' already exists" in str(excinfo.value) def test_move_graph_succeeds(): @@ -124,7 +124,7 @@ def test_move_graph_succeeds(): query = """{graph(path: "shivam/g3") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g3' does not exist" in str(excinfo.value) query = """{graph(path: "g4") { nodes {list {name}} @@ -157,7 +157,7 @@ def test_move_graph_using_client_api_succeeds(): query = """{graph(path: "shivam/g3") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g3' does not exist" in str(excinfo.value) query = """{graph(path: "ben/g4") { nodes {list {name}} @@ -197,7 +197,7 @@ def test_move_graph_succeeds_at_same_namespace_as_graph(): query = """{graph(path: "shivam/g3") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g3' does not exist" in str(excinfo.value) query = """{graph(path: "shivam/g4") { nodes {list {name}} @@ -238,7 +238,7 @@ def test_move_graph_succeeds_at_diff_namespace_as_graph(): query = """{graph(path: "ben/g3") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'ben/g3' does not exist" in str(excinfo.value) query = """{graph(path: "shivam/g4") { nodes {list {name}} diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_new_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_new_graph.py index da0d3f6c9d..adba406a92 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_new_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_new_graph.py @@ -45,7 +45,7 @@ def test_new_graph_fails_if_graph_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'test/path/g1' already exists" in str(excinfo.value) def test_client_new_graph_works(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_receive_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_receive_graph.py index 8f6daa3cd8..8fff9ff978 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_receive_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_receive_graph.py @@ -16,7 +16,7 @@ def test_receive_graph_fails_if_no_graph_found(): query = """{ receiveGraph(path: "g2") }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g2' does not exist" in str(excinfo.value) def test_receive_graph_succeeds_if_graph_found(): @@ -62,7 +62,7 @@ def test_receive_graph_fails_if_no_graph_found_at_namespace(): query = """{ receiveGraph(path: "shivam/g2") }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g2' does not exist" in str(excinfo.value) def test_receive_graph_succeeds_if_graph_found_at_namespace(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_send_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_send_graph.py index d73703d88a..7b0b3b0e1d 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_send_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_send_graph.py @@ -31,7 +31,7 @@ def test_send_graph_fails_if_graph_already_exists(): client = RaphtoryClient("http://localhost:1736") with pytest.raises(Exception) as excinfo: client.send_graph(path="g", graph=g) - assert "Graph already exists by name = g" in str(excinfo.value) + assert "Graph 'g' already exists" in str(excinfo.value) def test_send_graph_succeeds_if_graph_already_exists_with_overwrite_enabled(): @@ -94,7 +94,7 @@ def test_send_graph_fails_if_graph_already_exists_at_namespace(): client = RaphtoryClient("http://localhost:1736") with pytest.raises(Exception) as excinfo: client.send_graph(path="shivam/g", graph=g) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'shivam/g' already exists" in str(excinfo.value) def test_send_graph_succeeds_if_graph_already_exists_at_namespace_with_overwrite_enabled(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_upload_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_upload_graph.py index 78e7e7ac1b..974e1320ab 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_upload_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_upload_graph.py @@ -70,7 +70,7 @@ def test_upload_graph_fails_if_graph_already_exists(): client = RaphtoryClient("http://localhost:1736") with pytest.raises(Exception) as excinfo: client.upload_graph(path="g", file_path=g_file_path) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'g' already exists" in str(excinfo.value) def test_upload_graph_succeeds_if_graph_already_exists_with_overwrite_enabled(): @@ -153,7 +153,7 @@ def test_upload_graph_fails_if_graph_already_exists_at_namespace(): client = RaphtoryClient("http://localhost:1736") with pytest.raises(Exception) as excinfo: client.upload_graph(path="shivam/g", file_path=g_file_path, overwrite=False) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'shivam/g' already exists" in str(excinfo.value) def test_upload_graph_succeeds_if_graph_already_exists_at_namespace_with_overwrite_enabled(): From b85412d932ea23b2d2113598556cc937baf9f80b Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 11 Dec 2025 17:15:50 +0100 Subject: [PATCH 20/39] overwrite is handled internally, no need to call delete --- raphtory-graphql/src/model/mod.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index 7a629af159..bc5c09b9b9 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -262,11 +262,6 @@ impl Mut { let data = ctx.data_unchecked::(); let in_file = graph.value(ctx)?.content; let folder = data.validate_path_for_insert(&path, overwrite)?; - - if overwrite { - let _ignored = data.delete_graph(&path).await; - } - data.insert_graph_as_bytes(folder, in_file).await?; Ok(path) From 756b1b670ce81ab0148be1934e16c29bdd538a08 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 11 Dec 2025 17:16:09 +0100 Subject: [PATCH 21/39] fix doc strings --- raphtory-graphql/schema.graphql | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/raphtory-graphql/schema.graphql b/raphtory-graphql/schema.graphql index 965ea9c050..9a107ad3df 100644 --- a/raphtory-graphql/schema.graphql +++ b/raphtory-graphql/schema.graphql @@ -1208,19 +1208,13 @@ type MutRoot { """ newGraph(path: String!, graphType: GraphType!): Boolean! """ - Move graph from a path path on the server to a new_path on the server. - - If namespace is not provided, it will be set to the current working directory. - This applies to both the graph namespace and new graph namespace. + Move graph from a path on the server to a new_path on the server. """ - moveGraph(path: String!, newPath: String!): Boolean! + moveGraph(path: String!, newPath: String!, overwrite: Boolean): Boolean! """ - Copy graph from a path path on the server to a new_path on the server. - - If namespace is not provided, it will be set to the current working directory. - This applies to both the graph namespace and new graph namespace. + Copy graph from a path on the server to a new_path on the server. """ - copyGraph(path: String!, newPath: String!): Boolean! + copyGraph(path: String!, newPath: String!, overwrite: Boolean): Boolean! """ Upload a graph file from a path on the client using GQL multipart uploading. From b66912294f5896a75fb13a34d8bb6859ce1ca696 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 12 Dec 2025 12:14:23 +0100 Subject: [PATCH 22/39] no more deserialize for Prop --- .../src/core/entities/properties/prop/prop_array.rs | 9 --------- .../src/core/entities/properties/prop/prop_enum.rs | 2 +- raphtory-core/src/entities/properties/graph_meta.rs | 2 +- raphtory-core/src/entities/properties/tprop.rs | 2 +- raphtory-core/src/storage/mod.rs | 4 ++-- raphtory-graphql/src/model/graph/meta_graph.rs | 9 ++------- raphtory/src/serialise/metadata.rs | 4 ---- raphtory/tests/serialise_test.rs | 7 ------- 8 files changed, 7 insertions(+), 32 deletions(-) diff --git a/raphtory-api/src/core/entities/properties/prop/prop_array.rs b/raphtory-api/src/core/entities/properties/prop/prop_array.rs index d492b11cb4..557a7fbbe7 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_array.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_array.rs @@ -207,15 +207,6 @@ impl Serialize for PropArray { } } -impl<'de> Deserialize<'de> for PropArray { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let vec: Vec = Deserialize::deserialize(deserializer)?; - Ok(PropArray::Vec(Arc::from(vec))) - } -} impl PartialEq for PropArray { fn eq(&self, other: &Self) -> bool { diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index acde96f75d..bdf52c475f 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -34,7 +34,7 @@ pub const DECIMAL_MAX: i128 = 99999999999999999999999999999999999999i128; // equ pub struct InvalidBigDecimal(BigDecimal); /// Denotes the types of properties allowed to be stored in the graph. -#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, derive_more::From)] +#[derive(Debug, Serialize, PartialEq, Clone, derive_more::From)] pub enum Prop { Str(ArcStr), U8(u8), diff --git a/raphtory-core/src/entities/properties/graph_meta.rs b/raphtory-core/src/entities/properties/graph_meta.rs index 49b5b193e6..305a86a751 100644 --- a/raphtory-core/src/entities/properties/graph_meta.rs +++ b/raphtory-core/src/entities/properties/graph_meta.rs @@ -19,7 +19,7 @@ use raphtory_api::core::{ use serde::{Deserialize, Serialize}; use std::ops::{Deref, DerefMut}; -#[derive(Serialize, Deserialize, Debug, Default)] +#[derive(Serialize, Debug, Default)] pub struct GraphMeta { metadata_mapper: PropMapper, temporal_mapper: PropMapper, diff --git a/raphtory-core/src/entities/properties/tprop.rs b/raphtory-core/src/entities/properties/tprop.rs index 3ea03418fe..166a093211 100644 --- a/raphtory-core/src/entities/properties/tprop.rs +++ b/raphtory-core/src/entities/properties/tprop.rs @@ -19,7 +19,7 @@ use serde::{Deserialize, Serialize}; use std::{collections::HashMap, iter, ops::Range, sync::Arc}; use thiserror::Error; -#[derive(Debug, Default, PartialEq, Clone, Serialize, Deserialize)] +#[derive(Debug, Default, PartialEq, Clone, Serialize)] pub enum TProp { #[default] Empty, diff --git a/raphtory-core/src/storage/mod.rs b/raphtory-core/src/storage/mod.rs index 4405c3c272..71f54275c7 100644 --- a/raphtory-core/src/storage/mod.rs +++ b/raphtory-core/src/storage/mod.rs @@ -29,7 +29,7 @@ pub mod lazy_vec; pub mod locked_view; pub mod timeindex; -#[derive(Debug, Serialize, Deserialize, PartialEq, Default)] +#[derive(Debug, Serialize, PartialEq, Default)] pub struct TColumns { t_props_log: Vec, num_rows: usize, @@ -124,7 +124,7 @@ impl TColumns { } } -#[derive(Debug, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Serialize, PartialEq)] pub enum PropColumn { Empty(usize), Bool(LazyVec), diff --git a/raphtory-graphql/src/model/graph/meta_graph.rs b/raphtory-graphql/src/model/graph/meta_graph.rs index 2b01d95105..24c6a713e3 100644 --- a/raphtory-graphql/src/model/graph/meta_graph.rs +++ b/raphtory-graphql/src/model/graph/meta_graph.rs @@ -93,12 +93,7 @@ impl MetaGraph { /// Returns the metadata of the graph. async fn metadata(&self) -> Result> { - Ok(self - .meta() - .await? - .metadata - .iter() - .map(|(key, prop)| GqlProperty::new(key.to_string(), prop.clone())) - .collect()) + // Need to read this from parquet + todo!() } } diff --git a/raphtory/src/serialise/metadata.rs b/raphtory/src/serialise/metadata.rs index 4f4fe63175..aef2f31d94 100644 --- a/raphtory/src/serialise/metadata.rs +++ b/raphtory/src/serialise/metadata.rs @@ -13,7 +13,6 @@ use serde::{Deserialize, Serialize}; pub struct GraphMetadata { pub node_count: usize, pub edge_count: usize, - pub metadata: Vec<(ArcStr, Prop)>, pub graph_type: GraphType, pub is_diskgraph: bool, } @@ -22,13 +21,11 @@ impl GraphMetadata { pub fn from_graph(graph: G) -> Self { let node_count = graph.count_nodes(); let edge_count = graph.count_edges(); - let metadata = graph.metadata().as_vec(); let graph_type = graph.graph_type(); let is_diskgraph = graph.disk_storage_enabled().is_some(); Self { node_count, edge_count, - metadata, graph_type, is_diskgraph, } @@ -39,6 +36,5 @@ pub fn assert_metadata_correct<'graph>(folder: &GraphFolder, graph: &impl GraphV let metadata = folder.read_metadata().unwrap(); assert_eq!(metadata.node_count, graph.count_nodes()); assert_eq!(metadata.edge_count, graph.count_edges()); - assert_eq!(metadata.metadata, graph.properties().as_vec()); assert_eq!(metadata.graph_type, graph.graph_type()); } diff --git a/raphtory/tests/serialise_test.rs b/raphtory/tests/serialise_test.rs index 7632604d4c..813039ac9b 100644 --- a/raphtory/tests/serialise_test.rs +++ b/raphtory/tests/serialise_test.rs @@ -131,7 +131,6 @@ mod serialise_test { } #[test] - #[ignore = "TODO: #2377"] fn edge_t_props() { let tempdir = TempDir::new().unwrap(); let temp_file = tempdir.path().join("graph"); @@ -183,7 +182,6 @@ mod serialise_test { } #[test] - #[ignore = "TODO: #2377"] fn test_all_the_t_props_on_node() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -211,7 +209,6 @@ mod serialise_test { } #[test] - #[ignore = "TODO: #2377"] fn test_all_the_t_props_on_edge() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -239,7 +236,6 @@ mod serialise_test { } #[test] - #[ignore = "TODO: #2377"] fn test_all_the_metadata_on_edge() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -267,7 +263,6 @@ mod serialise_test { } #[test] - #[ignore = "TODO: #2377"] fn test_all_the_metadata_on_node() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -293,7 +288,6 @@ mod serialise_test { } #[test] - #[ignore = "TODO: #2377"] fn graph_metadata() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -315,7 +309,6 @@ mod serialise_test { } #[test] - #[ignore = "TODO: #2377"] fn graph_temp_properties() { let mut props = vec![]; write_props_to_vec(&mut props); From 7ae63762d6f19a6d1d6a5a34bbefe81a8e475db5 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 12 Dec 2025 16:48:25 +0100 Subject: [PATCH 23/39] load graph metadata from the parquet file instead --- .../src/model/graph/meta_graph.rs | 13 +++++-- raphtory/src/io/arrow/prop_handler.rs | 4 +-- raphtory/src/serialise/mod.rs | 2 +- raphtory/src/serialise/parquet/mod.rs | 35 +++++++++++++++++-- 4 files changed, 45 insertions(+), 9 deletions(-) diff --git a/raphtory-graphql/src/model/graph/meta_graph.rs b/raphtory-graphql/src/model/graph/meta_graph.rs index 24c6a713e3..70b044fcec 100644 --- a/raphtory-graphql/src/model/graph/meta_graph.rs +++ b/raphtory-graphql/src/model/graph/meta_graph.rs @@ -3,7 +3,11 @@ use crate::{ paths::{ExistingGraphFolder, ValidGraphPaths}, }; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields, Result}; -use raphtory::{errors::GraphError, serialise::metadata::GraphMetadata}; +use raphtory::{ + errors::GraphError, + io::parquet_loaders::load_graph_props_from_parquet, + serialise::{metadata::GraphMetadata, parquet::decode_graph_metadata, GraphPaths}, +}; use std::{cmp::Ordering, sync::Arc}; use tokio::sync::OnceCell; @@ -93,7 +97,10 @@ impl MetaGraph { /// Returns the metadata of the graph. async fn metadata(&self) -> Result> { - // Need to read this from parquet - todo!() + let res = decode_graph_metadata(&self.folder)?; + Ok(res + .into_iter() + .filter_map(|(key, value)| value.map(|prop| GqlProperty::new(key, prop))) + .collect()) } } diff --git a/raphtory/src/io/arrow/prop_handler.rs b/raphtory/src/io/arrow/prop_handler.rs index aa3849d8ff..183ad63f44 100644 --- a/raphtory/src/io/arrow/prop_handler.rs +++ b/raphtory/src/io/arrow/prop_handler.rs @@ -242,7 +242,7 @@ fn data_type_as_prop_type(dt: &DataType) -> Result { } } -trait PropCol: Send + Sync { +pub trait PropCol: Send + Sync { fn get(&self, i: usize) -> Option; fn as_array(&self) -> ArrayRef; @@ -442,7 +442,7 @@ impl PropCol for EmptyCol { } } -fn lift_property_col(arr: &dyn Array) -> Box { +pub fn lift_property_col(arr: &dyn Array) -> Box { match arr.data_type() { DataType::Boolean => Box::new(arr.as_boolean().clone()), DataType::Int32 => Box::new(arr.as_primitive::().clone()), diff --git a/raphtory/src/serialise/mod.rs b/raphtory/src/serialise/mod.rs index 3725237f7e..ec33629745 100644 --- a/raphtory/src/serialise/mod.rs +++ b/raphtory/src/serialise/mod.rs @@ -1,7 +1,7 @@ mod graph_folder; pub mod metadata; -pub(crate) mod parquet; +pub mod parquet; #[cfg(feature = "proto")] pub mod proto; diff --git a/raphtory/src/serialise/parquet/mod.rs b/raphtory/src/serialise/parquet/mod.rs index f6b17da266..20d9f4ac59 100644 --- a/raphtory/src/serialise/parquet/mod.rs +++ b/raphtory/src/serialise/parquet/mod.rs @@ -4,9 +4,13 @@ use crate::{ graph::views::deletion_graph::PersistentGraph, }, errors::GraphError, - io::parquet_loaders::{ - load_edge_deletions_from_parquet, load_edge_props_from_parquet, load_edges_from_parquet, - load_graph_props_from_parquet, load_node_props_from_parquet, load_nodes_from_parquet, + io::{ + arrow::{df_loaders::load_graph_props_from_df, prop_handler::lift_property_col}, + parquet_loaders::{ + get_parquet_file_paths, load_edge_deletions_from_parquet, load_edge_props_from_parquet, + load_edges_from_parquet, load_graph_props_from_parquet, load_node_props_from_parquet, + load_nodes_from_parquet, process_parquet_file_to_df, + }, }, prelude::*, serialise::{ @@ -17,6 +21,7 @@ use crate::{ model::get_id_type, nodes::{encode_nodes_cprop, encode_nodes_tprop}, }, + GraphPaths, }, }; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; @@ -401,6 +406,30 @@ fn decode_graph_type(path: impl AsRef) -> Result { g_type.ok_or_else(|| GraphError::LoadFailure("Graph type not found".to_string())) } +pub fn decode_graph_metadata( + path: &impl GraphPaths, +) -> Result)>, GraphError> { + let c_graph_path = path.graph_path()?.join(GRAPH_C_PATH); + let exclude = vec![TIME_COL]; + let (c_props, _) = collect_prop_columns(&c_graph_path, &exclude)?; + let c_props = c_props.iter().map(|s| s.as_str()).collect::>(); + let mut result: Vec<(String, Option)> = + c_props.iter().map(|s| (s.to_string(), None)).collect(); + + for path in get_parquet_file_paths(&c_graph_path)? { + let df_view = process_parquet_file_to_df(path.as_path(), Some(&c_props), None)?; + for chunk in df_view.chunks { + let chunk = chunk?; + for (col, res) in chunk.chunk.into_iter().zip(&mut result) { + if let Some(value) = lift_property_col(&col).get(0) { + res.1 = Some(value); + } + } + } + } + Ok(result) +} + fn decode_graph_storage( path: impl AsRef, batch_size: Option, From 5c6ff6edd9adb92a0cf2ccc608ff6105b1caba21 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 15 Dec 2025 14:30:16 +0100 Subject: [PATCH 24/39] tidy up a lot of warnings --- db4-storage/src/pages/mod.rs | 9 +- db4-storage/src/segments/node.rs | 16 ++- python/python/raphtory/__init__.pyi | 40 +++--- .../entities/properties/prop/prop_array.rs | 3 +- .../entities/properties/prop/prop_enum.rs | 2 +- raphtory-benchmark/src/common/mod.rs | 2 +- raphtory-benchmark/src/common/vectors.rs | 2 +- .../src/graph_gen/raph_social.rs | 14 +-- .../src/entities/properties/graph_meta.rs | 2 +- .../src/entities/properties/tprop.rs | 2 +- raphtory-core/src/storage/lazy_vec.rs | 7 +- raphtory-core/src/storage/mod.rs | 2 +- raphtory-graphql/src/data.rs | 23 ++-- raphtory-graphql/src/graph.rs | 6 +- raphtory-graphql/src/lib.rs | 5 +- raphtory-graphql/src/main.rs | 2 +- raphtory-graphql/src/model/graph/graph.rs | 3 +- .../src/model/graph/meta_graph.rs | 6 +- .../src/model/graph/mutable_graph.rs | 1 - raphtory-graphql/src/model/graph/namespace.rs | 8 +- raphtory-graphql/src/model/mod.rs | 9 +- raphtory-graphql/src/paths.rs | 20 +-- .../src/python/client/raphtory_client.rs | 3 +- raphtory-graphql/src/python/mod.rs | 4 +- raphtory-storage/src/mutation/addition_ops.rs | 27 ---- .../src/mutation/addition_ops_ext.rs | 30 ----- raphtory/src/db/api/storage/storage.rs | 56 --------- raphtory/src/db/api/view/graph.rs | 9 +- raphtory/src/db/graph/assertions.rs | 118 +++++++++--------- raphtory/src/io/arrow/df_loaders.rs | 8 +- .../src/python/graph/io/pandas_loaders.rs | 1 - .../types/macros/trait_impl/serialise.rs | 2 - raphtory/src/python/utils/export.rs | 5 +- raphtory/src/serialise/graph_folder.rs | 27 ++-- raphtory/src/serialise/metadata.rs | 11 +- raphtory/src/serialise/parquet/mod.rs | 3 +- raphtory/src/serialise/serialise.rs | 6 +- raphtory/src/vectors/db.rs | 21 ++-- raphtory/tests/test_deletions.rs | 2 - raphtory/tests/test_materialize.rs | 2 +- 40 files changed, 160 insertions(+), 359 deletions(-) diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 58d7623f00..c16d598e7e 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -456,7 +456,7 @@ mod test { make_nodes, }, }; - use chrono::{DateTime, NaiveDateTime, Utc}; + use chrono::DateTime; use proptest::prelude::*; use raphtory_api::core::entities::properties::prop::Prop; use raphtory_core::{entities::VID, storage::timeindex::TimeIndexOps}; @@ -738,14 +738,11 @@ mod test { ("857".to_owned(), Prop::F64(2.56)), ( "296".to_owned(), - Prop::NDTime(NaiveDateTime::from_timestamp(1334043671, 0)), + Prop::NDTime(DateTime::from_timestamp(1334043671, 0).unwrap().naive_utc()), ), ( "92".to_owned(), - Prop::DTime(DateTime::::from_utc( - NaiveDateTime::from_timestamp(994032315, 0), - Utc, - )), + Prop::DTime(DateTime::from_timestamp(994032315, 0).unwrap()), ), ], )], diff --git a/db4-storage/src/segments/node.rs b/db4-storage/src/segments/node.rs index 63b1d9f05d..12ce8e7265 100644 --- a/db4-storage/src/segments/node.rs +++ b/db4-storage/src/segments/node.rs @@ -546,15 +546,6 @@ impl>> NodeSegmentOps for NodeSegm #[cfg(test)] mod test { - use std::{ops::Deref, sync::Arc}; - - use raphtory_api::core::entities::properties::{ - meta::Meta, - prop::{Prop, PropType}, - }; - use raphtory_core::entities::{EID, ELID, VID}; - use tempfile::tempdir; - use crate::{ LocalPOS, api::nodes::NodeSegmentOps, @@ -562,6 +553,13 @@ mod test { persist::strategy::NoOpStrategy, segments::node::NodeSegmentView, }; + use raphtory_api::core::entities::properties::{ + meta::Meta, + prop::{Prop, PropType}, + }; + use raphtory_core::entities::{EID, ELID, VID}; + use std::sync::Arc; + use tempfile::tempdir; #[test] fn est_size_changes() { diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index d0fe9a6365..68643483b4 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1148,12 +1148,12 @@ class Graph(GraphView): time: str, src: str, dst: str, - secondary_index: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None, + secondary_index: Optional[str] = None, ) -> None: """ Load edges from a Pandas DataFrame into the graph. @@ -1163,12 +1163,12 @@ class Graph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - secondary_index (str, optional): The column name for the secondary index. Defaults to None. properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + secondary_index (str, optional): The column name for the secondary index. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1183,12 +1183,12 @@ class Graph(GraphView): time: str, src: str, dst: str, - secondary_index: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None, + secondary_index: Optional[str] = None, ) -> None: """ Load edges from a Parquet file into the graph. @@ -1198,12 +1198,12 @@ class Graph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - secondary_index (str, optional): The column name for the secondary index. Defaults to None. properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + secondary_index (str, optional): The column name for the secondary index. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1283,12 +1283,12 @@ class Graph(GraphView): df: DataFrame, time: str, id: str, - secondary_index: Optional[str] = None, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, + secondary_index: Optional[str] = None, ) -> None: """ Load nodes from a Pandas DataFrame into the graph. @@ -1297,12 +1297,12 @@ class Graph(GraphView): df (DataFrame): The Pandas DataFrame containing the nodes. time (str): The column name for the timestamps. id (str): The column name for the node IDs. - secondary_index (str, optional): The column name for the secondary index. Defaults to None. node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + secondary_index (str, optional): The column name for the secondary index. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1316,12 +1316,12 @@ class Graph(GraphView): parquet_path: str, time: str, id: str, - secondary_index: Optional[str] = None, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, + secondary_index: Optional[str] = None, ) -> None: """ Load nodes from a Parquet file into the graph. @@ -1330,12 +1330,12 @@ class Graph(GraphView): parquet_path (str): Parquet file or directory of Parquet files containing the nodes time (str): The column name for the timestamps. id (str): The column name for the node IDs. - secondary_index (str, optional): The column name for the secondary index. Defaults to None. node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + secondary_index (str, optional): The column name for the secondary index. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1763,9 +1763,9 @@ class PersistentGraph(GraphView): time: str, src: str, dst: str, - secondary_index: Optional[str] = None, layer: Optional[str] = None, layer_col: Optional[str] = None, + secondary_index: Optional[str] = None, ) -> None: """ Load edges deletions from a Pandas DataFrame into the graph. @@ -1775,10 +1775,10 @@ class PersistentGraph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - secondary_index (str, optional): The column name for the secondary index. NOTE: All values in this column must be unique. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + secondary_index (str, optional): The column name for the secondary index. Returns: None: This function does not return a value, if the operation is successful. @@ -1793,9 +1793,9 @@ class PersistentGraph(GraphView): time: str, src: str, dst: str, - secondary_index: Optional[str] = None, layer: Optional[str] = None, layer_col: Optional[str] = None, + secondary_index: Optional[str] = None, ) -> None: """ Load edges deletions from a Parquet file into the graph. @@ -1805,10 +1805,10 @@ class PersistentGraph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - secondary_index (str, optional): The column name for the secondary index. NOTE: All values in this column must be unique. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + secondary_index (str, optional): The column name for the secondary index. Returns: None: This function does not return a value, if the operation is successful. @@ -1881,12 +1881,12 @@ class PersistentGraph(GraphView): time: str, src: str, dst: str, - secondary_index: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None, + secondary_index: Optional[str] = None, ) -> None: """ Load edges from a Pandas DataFrame into the graph. @@ -1896,13 +1896,13 @@ class PersistentGraph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - secondary_index (str, optional): The column name for the secondary index. NOTE: All values in this column must be unique. Defaults to None. properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + secondary_index (str, optional): The column name for the secondary index. Returns: None: This function does not return a value, if the operation is successful. @@ -1917,12 +1917,12 @@ class PersistentGraph(GraphView): time: str, src: str, dst: str, - secondary_index: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None, + secondary_index: Optional[str] = None, ) -> None: """ Load edges from a Parquet file into the graph. @@ -1932,13 +1932,13 @@ class PersistentGraph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - secondary_index (str, optional): The column name for the secondary index. NOTE: All values in this column must be unique. Defaults to None. properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + secondary_index (str, optional): The column name for the secondary index. Returns: None: This function does not return a value, if the operation is successful. @@ -2018,12 +2018,12 @@ class PersistentGraph(GraphView): df: DataFrame, time: str, id: str, - secondary_index: Optional[str] = None, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, + secondary_index: Optional[str] = None, ) -> None: """ Load nodes from a Pandas DataFrame into the graph. @@ -2032,13 +2032,13 @@ class PersistentGraph(GraphView): df (DataFrame): The Pandas DataFrame containing the nodes. time (str): The column name for the timestamps. id (str): The column name for the node IDs. - secondary_index (str, optional): The column name for the secondary index. NOTE: All values in this column must be unique. Defaults to None. node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + secondary_index (str, optional): The column name for the secondary index. Returns: None: This function does not return a value, if the operation is successful. @@ -2052,12 +2052,12 @@ class PersistentGraph(GraphView): parquet_path: str, time: str, id: str, - secondary_index: Optional[str] = None, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, + secondary_index: Optional[str] = None, ) -> None: """ Load nodes from a Parquet file into the graph. @@ -2066,13 +2066,13 @@ class PersistentGraph(GraphView): parquet_path (str): Parquet file or directory of Parquet files containing the nodes time (str): The column name for the timestamps. id (str): The column name for the node IDs. - secondary_index (str, optional): The column name for the secondary index. NOTE: All values in this column must be unique. Defaults to None. node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + secondary_index (str, optional): The column name for the secondary index. Returns: None: This function does not return a value, if the operation is successful. diff --git a/raphtory-api/src/core/entities/properties/prop/prop_array.rs b/raphtory-api/src/core/entities/properties/prop/prop_array.rs index 557a7fbbe7..da58312649 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_array.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_array.rs @@ -8,7 +8,7 @@ use arrow_array::{ cast::AsArray, types::*, Array, ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray, }; use arrow_schema::{DataType, Field, Fields, TimeUnit}; -use serde::{ser::SerializeSeq, Deserialize, Serialize, Serializer}; +use serde::{ser::SerializeSeq, Serialize, Serializer}; use std::{ hash::{Hash, Hasher}, sync::Arc, @@ -207,7 +207,6 @@ impl Serialize for PropArray { } } - impl PartialEq for PropArray { fn eq(&self, other: &Self) -> bool { match (self, other) { diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index bdf52c475f..33d0c5e149 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -11,7 +11,7 @@ use itertools::Itertools; use rustc_hash::{FxBuildHasher, FxHashMap}; use serde::{ ser::{SerializeMap, SerializeSeq}, - Deserialize, Serialize, + Serialize, }; use std::{ cmp::Ordering, diff --git a/raphtory-benchmark/src/common/mod.rs b/raphtory-benchmark/src/common/mod.rs index eb65cda3a6..8494770969 100644 --- a/raphtory-benchmark/src/common/mod.rs +++ b/raphtory-benchmark/src/common/mod.rs @@ -373,7 +373,7 @@ pub fn run_analysis_benchmarks( |b: &mut Bencher| { let mut rng = rand::rng(); let v: u64 = loop { - let v: u64 = rng.gen(); + let v: u64 = rng.random(); if !nodes.contains(&GID::U64(v)) { break v; } diff --git a/raphtory-benchmark/src/common/vectors.rs b/raphtory-benchmark/src/common/vectors.rs index 701ace6db2..919d201ccb 100644 --- a/raphtory-benchmark/src/common/vectors.rs +++ b/raphtory-benchmark/src/common/vectors.rs @@ -16,7 +16,7 @@ pub fn gen_embedding_for_bench(text: &str) -> Embedding { let hash = hasher.finish(); let mut rng: StdRng = SeedableRng::seed_from_u64(hash); - (0..1536).map(|_| rng.gen()).collect() + (0..1536).map(|_| rng.random()).collect() } async fn embedding_model(texts: Vec) -> EmbeddingResult> { diff --git a/raphtory-benchmark/src/graph_gen/raph_social.rs b/raphtory-benchmark/src/graph_gen/raph_social.rs index c7cef5ae35..c1a0ec6a15 100644 --- a/raphtory-benchmark/src/graph_gen/raph_social.rs +++ b/raphtory-benchmark/src/graph_gen/raph_social.rs @@ -15,7 +15,7 @@ use fake::{ }, Fake, }; -use rand::{prelude::SliceRandom, rng, seq::IndexedRandom, Rng}; +use rand::{rng, seq::IndexedRandom, Rng}; use raphtory::prelude::*; use serde::{de::DeserializeOwned, Deserialize, Serialize}; use std::{collections::HashMap, error::Error, fmt::Debug}; @@ -173,7 +173,7 @@ pub fn generate_data_write_to_csv( post_id: format!("post_{}", i), forum_id: format!("forum_{}", rng.random_range(1..=num_forums)), creation_date, // Use post's creation date - is_featured: rng.gen_bool(0.2), + is_featured: rng.random_bool(0.2), likes_count: rng.random_range(0..500), comments_count: rng.random_range(0..200), })?; @@ -200,7 +200,7 @@ pub fn generate_data_write_to_csv( comment_id: format!("comment_{}", i), post_id: format!("post_{}", rng.random_range(1..=num_posts)), creation_date, // Use comment's creation date - is_edited: rng.gen_bool(0.1), + is_edited: rng.random_bool(0.1), upvotes: rng.random_range(0..200), reply_count: rng.random_range(0..20), })?; @@ -421,7 +421,7 @@ pub fn generate_graph( ), ( "gender", - Prop::Str(ArcStr::from(if rng.gen_bool(0.5) { + Prop::Str(ArcStr::from(if rng.random_bool(0.5) { "male" } else { "female" @@ -465,7 +465,7 @@ pub fn generate_graph( forum_id.clone(), [ ("activity_score", Prop::F64(rng.random_range(0.0..100.0))), - ("is_moderator", Prop::Bool(rng.gen_bool(0.1))), + ("is_moderator", Prop::Bool(rng.random_bool(0.1))), ], None, ) @@ -516,7 +516,7 @@ pub fn generate_graph( post_id.clone(), forum_id.clone(), [ - ("is_featured", Prop::Bool(rng.gen_bool(0.2))), + ("is_featured", Prop::Bool(rng.random_bool(0.2))), ("likes_count", Prop::U64(rng.random_range(0..500))), ("comments_count", Prop::U64(rng.random_range(0..200))), ], @@ -568,7 +568,7 @@ pub fn generate_graph( comment_id.clone(), post_id.clone(), [ - ("is_edited", Prop::Bool(rng.gen_bool(0.1))), + ("is_edited", Prop::Bool(rng.random_bool(0.1))), ("upvotes", Prop::U64(rng.random_range(0..200))), ("reply_count", Prop::U64(rng.random_range(0..20))), ], diff --git a/raphtory-core/src/entities/properties/graph_meta.rs b/raphtory-core/src/entities/properties/graph_meta.rs index 305a86a751..ca53ee5bdc 100644 --- a/raphtory-core/src/entities/properties/graph_meta.rs +++ b/raphtory-core/src/entities/properties/graph_meta.rs @@ -16,7 +16,7 @@ use raphtory_api::core::{ FxDashMap, }, }; -use serde::{Deserialize, Serialize}; +use serde::Serialize; use std::ops::{Deref, DerefMut}; #[derive(Serialize, Debug, Default)] diff --git a/raphtory-core/src/entities/properties/tprop.rs b/raphtory-core/src/entities/properties/tprop.rs index 166a093211..33303a6085 100644 --- a/raphtory-core/src/entities/properties/tprop.rs +++ b/raphtory-core/src/entities/properties/tprop.rs @@ -15,7 +15,7 @@ use raphtory_api::core::{ storage::arc_str::ArcStr, }; use rustc_hash::FxHashMap; -use serde::{Deserialize, Serialize}; +use serde::Serialize; use std::{collections::HashMap, iter, ops::Range, sync::Arc}; use thiserror::Error; diff --git a/raphtory-core/src/storage/lazy_vec.rs b/raphtory-core/src/storage/lazy_vec.rs index b5f5cfe5ad..bdc9557368 100644 --- a/raphtory-core/src/storage/lazy_vec.rs +++ b/raphtory-core/src/storage/lazy_vec.rs @@ -274,8 +274,7 @@ where LazyVec::LazyVec1(A::default(), TupleCol::from(inner)) } - #[cfg(test)] - fn iter(&self) -> Box + Send + '_> { + pub fn iter(&self) -> Box + Send + '_> { match self { LazyVec::Empty => Box::new(iter::empty()), LazyVec::LazyVec1(default, tuples) => { @@ -287,8 +286,7 @@ where } } - #[cfg(test)] - fn iter_opt(&self) -> Box> + Send + '_> { + pub fn iter_opt(&self) -> Box> + Send + '_> { match self { LazyVec::Empty => Box::new(iter::empty()), LazyVec::LazyVec1(_, tuples) => Box::new(tuples.iter()), @@ -356,7 +354,6 @@ where #[cfg(test)] mod lazy_vec_tests { use super::*; - use itertools::Itertools; use proptest::{arbitrary::Arbitrary, proptest}; fn check_lazy_vec(lazy_vec: &LazyVec, v: Vec>) { diff --git a/raphtory-core/src/storage/mod.rs b/raphtory-core/src/storage/mod.rs index 71f54275c7..5a68c2fd0b 100644 --- a/raphtory-core/src/storage/mod.rs +++ b/raphtory-core/src/storage/mod.rs @@ -19,7 +19,7 @@ use raphtory_api::core::{ storage::arc_str::ArcStr, }; use rustc_hash::FxHashMap; -use serde::{Deserialize, Serialize}; +use serde::Serialize; use std::{borrow::Cow, collections::HashMap, fmt::Debug, sync::Arc}; use thiserror::Error; diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index e5949f5aab..b24fc22256 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -3,22 +3,18 @@ use crate::{ graph::GraphWithVectors, model::blocking_io, paths::{ - mark_dirty, valid_path, valid_relative_graph_path, ExistingGraphFolder, - InternalPathValidationError, PathValidationError, ValidGraphFolder, ValidGraphPaths, - ValidWriteableGraphFolder, WithPath, + mark_dirty, ExistingGraphFolder, InternalPathValidationError, PathValidationError, + ValidGraphPaths, ValidWriteableGraphFolder, }, rayon::blocking_compute, GQLError, - GQLError::Insertion, }; use futures_util::FutureExt; -use itertools::{fold, Itertools}; use moka::future::Cache; use raphtory::{ - db::api::view::{internal::InternalStorageOps, MaterializedGraph}, - errors::{GraphError, InvalidPathReason}, - prelude::StableEncode, - serialise::{GraphFolder, GraphPaths, META_PATH}, + db::api::view::MaterializedGraph, + errors::GraphError, + serialise::GraphPaths, vectors::{ cache::VectorCache, template::DocumentTemplate, vectorisable::Vectorisable, vectorised_graph::VectorisedGraph, @@ -26,14 +22,11 @@ use raphtory::{ }; use std::{ collections::HashMap, - fs, - fs::File, - io, - io::{ErrorKind, Read, Seek}, + fs, io, + io::{Read, Seek}, path::{Path, PathBuf}, sync::Arc, }; -use tempfile::{spooled_tempfile_in, tempfile_in, NamedTempFile}; use tracing::{error, warn}; use walkdir::WalkDir; @@ -47,7 +40,7 @@ pub struct EmbeddingConf { } #[derive(thiserror::Error, Debug)] -enum MutationErrorInner { +pub enum MutationErrorInner { #[error(transparent)] GraphError(#[from] GraphError), #[error(transparent)] diff --git a/raphtory-graphql/src/graph.rs b/raphtory-graphql/src/graph.rs index 99fc0b52ae..56aaaef1c1 100644 --- a/raphtory-graphql/src/graph.rs +++ b/raphtory-graphql/src/graph.rs @@ -8,14 +8,12 @@ use raphtory::{ }, Base, InheritViewOps, MaterializedGraph, }, - graph::{edge::EdgeView, node::NodeView, views::deletion_graph::PersistentGraph}, + graph::{edge::EdgeView, node::NodeView}, }, errors::{GraphError, GraphResult}, - prelude::{EdgeViewOps, Graph, StableDecode}, - serialise::GraphFolder, + prelude::EdgeViewOps, vectors::{cache::VectorCache, vectorised_graph::VectorisedGraph}, }; -use raphtory_api::GraphType; use raphtory_storage::{ core_ops::InheritCoreGraphOps, layer_ops::InheritLayerOps, mutation::InheritMutationOps, }; diff --git a/raphtory-graphql/src/lib.rs b/raphtory-graphql/src/lib.rs index 7a62fc1c45..51670f95e7 100644 --- a/raphtory-graphql/src/lib.rs +++ b/raphtory-graphql/src/lib.rs @@ -1,7 +1,7 @@ pub use crate::server::GraphServer; use crate::{data::InsertionError, paths::PathValidationError}; use raphtory::errors::GraphError; -use std::{panic::Location, sync::Arc}; +use std::sync::Arc; mod auth; pub mod data; @@ -37,12 +37,11 @@ pub enum GQLError { #[cfg(test)] mod graphql_test { use crate::{ - config::app_config::{AppConfig, AppConfigBuilder}, + config::app_config::AppConfig, data::{data_tests::save_graphs_to_work_dir, Data}, model::App, url_encode::{url_decode_graph, url_encode_graph}, }; - use arrow_array::types::UInt8Type; use async_graphql::UploadValue; use dynamic_graphql::{Request, Variables}; use raphtory::{ diff --git a/raphtory-graphql/src/main.rs b/raphtory-graphql/src/main.rs index 8957ce6e22..dc16e3dbb6 100644 --- a/raphtory-graphql/src/main.rs +++ b/raphtory-graphql/src/main.rs @@ -81,7 +81,7 @@ async fn main() -> IoResult<()> { let schema = App::create_schema().finish().unwrap(); println!("{}", schema.sdl()); } else { - let mut builder = AppConfigBuilder::new() + let builder = AppConfigBuilder::new() .with_cache_capacity(args.cache_capacity) .with_cache_tti_seconds(args.cache_tti_seconds) .with_log_level(args.log_level) diff --git a/raphtory-graphql/src/model/graph/graph.rs b/raphtory-graphql/src/model/graph/graph.rs index 834a0001ee..72eac7fb3c 100644 --- a/raphtory-graphql/src/model/graph/graph.rs +++ b/raphtory-graphql/src/model/graph/graph.rs @@ -39,13 +39,12 @@ use raphtory::{ }, }, }, - errors::{GraphError, InvalidPathReason}, + errors::GraphError, prelude::*, }; use std::{ collections::HashSet, convert::{Into, TryInto}, - sync::Arc, }; use crate::{ diff --git a/raphtory-graphql/src/model/graph/meta_graph.rs b/raphtory-graphql/src/model/graph/meta_graph.rs index 70b044fcec..48a9fc84b3 100644 --- a/raphtory-graphql/src/model/graph/meta_graph.rs +++ b/raphtory-graphql/src/model/graph/meta_graph.rs @@ -3,11 +3,7 @@ use crate::{ paths::{ExistingGraphFolder, ValidGraphPaths}, }; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields, Result}; -use raphtory::{ - errors::GraphError, - io::parquet_loaders::load_graph_props_from_parquet, - serialise::{metadata::GraphMetadata, parquet::decode_graph_metadata, GraphPaths}, -}; +use raphtory::serialise::{metadata::GraphMetadata, parquet::decode_graph_metadata}; use std::{cmp::Ordering, sync::Arc}; use tokio::sync::OnceCell; diff --git a/raphtory-graphql/src/model/graph/mutable_graph.rs b/raphtory-graphql/src/model/graph/mutable_graph.rs index 1350883d19..4803866797 100644 --- a/raphtory-graphql/src/model/graph/mutable_graph.rs +++ b/raphtory-graphql/src/model/graph/mutable_graph.rs @@ -1,7 +1,6 @@ use crate::{ graph::{GraphWithVectors, UpdateEmbeddings}, model::graph::{edge::GqlEdge, graph::GqlGraph, node::GqlNode, property::Value}, - paths::ExistingGraphFolder, rayon::blocking_write, }; use dynamic_graphql::{InputObject, ResolvedObject, ResolvedObjectFields}; diff --git a/raphtory-graphql/src/model/graph/namespace.rs b/raphtory-graphql/src/model/graph/namespace.rs index 978bcf89f1..191e402d3a 100644 --- a/raphtory-graphql/src/model/graph/namespace.rs +++ b/raphtory-graphql/src/model/graph/namespace.rs @@ -3,15 +3,11 @@ use crate::{ model::graph::{ collection::GqlCollection, meta_graph::MetaGraph, namespaced_item::NamespacedItem, }, - paths::{ - valid_path, valid_relative_graph_path, ExistingGraphFolder, InternalPathValidationError, - PathValidationError, ValidPath, - }, + paths::{valid_path, ExistingGraphFolder, PathValidationError, ValidPath}, rayon::blocking_compute, }; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; -use itertools::{join, Itertools}; -use raphtory::errors::InvalidPathReason; +use itertools::Itertools; use std::path::PathBuf; use walkdir::WalkDir; diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index bc5c09b9b9..ea6b82f9e4 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -1,7 +1,6 @@ use crate::{ auth::ContextValidation, - data::{Data, DeletionError, InsertionError}, - graph::GraphWithVectors, + data::{Data, DeletionError}, model::{ graph::{ collection::GqlCollection, graph::GqlGraph, index::IndexSpecInput, @@ -10,7 +9,7 @@ use crate::{ }, plugins::{mutation_plugin::MutationPlugin, query_plugin::QueryPlugin}, }, - paths::{valid_path, ValidGraphFolder, ValidWriteableGraphFolder}, + paths::ValidWriteableGraphFolder, rayon::blocking_compute, url_encode::{url_decode_graph, url_encode_graph}, }; @@ -22,7 +21,7 @@ use dynamic_graphql::{ use itertools::Itertools; use raphtory::{ db::{api::view::MaterializedGraph, graph::views::deletion_graph::PersistentGraph}, - errors::{GraphError, InvalidPathReason}, + errors::GraphError, prelude::*, serialise::*, version, @@ -30,9 +29,7 @@ use raphtory::{ use std::{ error::Error, fmt::{Display, Formatter}, - sync::Arc, }; -use tempfile::env::override_temp_dir; pub(crate) mod graph; pub mod plugins; diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index e5094bbfbb..fae7b0d4e9 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -1,21 +1,14 @@ -use crate::{ - data::DIRTY_PATH, - model::{blocking_io, GqlGraphError}, - rayon::blocking_compute, - GQLError, -}; +use crate::{data::DIRTY_PATH, model::blocking_io, rayon::blocking_compute}; use futures_util::io; use raphtory::{ - db::api::view::{internal::InternalStorageOps, MaterializedGraph}, + db::api::view::{MaterializedGraph}, errors::{GraphError, InvalidPathReason}, prelude::ParquetEncoder, serialise::{ - make_data_path, metadata::GraphMetadata, read_data_path, read_dirty_path, - read_path_pointer, GraphFolder, GraphPaths, InnerGraphFolder, Metadata, RelativePath, - WriteableGraphFolder, DATA_PATH, META_PATH, + metadata::GraphMetadata, read_dirty_path, GraphFolder, GraphPaths, RelativePath, + WriteableGraphFolder, META_PATH, }, }; -use serde::{Deserialize, Serialize}; use std::{ cmp::Ordering, fs, @@ -25,8 +18,7 @@ use std::{ path::{Component, Path, PathBuf, StripPrefixError}, time::{SystemTime, UNIX_EPOCH}, }; -use tokio::io::AsyncReadExt; -use tracing::{error, metadata, warn}; +use tracing::{error, warn}; pub trait ValidGraphPaths: GraphPaths { fn local_path(&self) -> &str; @@ -279,7 +271,7 @@ impl CleanupPath { } #[derive(Clone, Debug)] -pub(crate) struct ValidWriteableGraphFolder { +pub struct ValidWriteableGraphFolder { global_path: WriteableGraphFolder, local_path: String, dirty_marker: Option, diff --git a/raphtory-graphql/src/python/client/raphtory_client.rs b/raphtory-graphql/src/python/client/raphtory_client.rs index e14fd931c8..d57020e6db 100644 --- a/raphtory-graphql/src/python/client/raphtory_client.rs +++ b/raphtory-graphql/src/python/client/raphtory_client.rs @@ -1,5 +1,4 @@ use crate::{ - paths::ExistingGraphFolder, python::{ client::{remote_graph::PyRemoteGraph, PyRemoteIndexSpec}, encode_graph, @@ -17,7 +16,7 @@ use raphtory::{db::api::view::MaterializedGraph, serialise::GraphFolder}; use raphtory_api::python::error::adapt_err_value; use reqwest::{multipart, multipart::Part, Client}; use serde_json::{json, Value as JsonValue}; -use std::{collections::HashMap, future::Future, io::Cursor, path::PathBuf, sync::Arc}; +use std::{collections::HashMap, future::Future, io::Cursor, sync::Arc}; use tokio::runtime::Runtime; use tracing::debug; diff --git a/raphtory-graphql/src/python/mod.rs b/raphtory-graphql/src/python/mod.rs index f7f82643f7..a7936a8c32 100644 --- a/raphtory-graphql/src/python/mod.rs +++ b/raphtory-graphql/src/python/mod.rs @@ -1,5 +1,3 @@ -use std::path::PathBuf; - use crate::{ model::App, url_encode::{url_decode_graph, url_encode_graph, UrlDecodeError}, @@ -10,7 +8,7 @@ use pyo3::{ types::{PyDict, PyList, PyNone}, IntoPyObjectExt, }; -use raphtory::{db::api::view::MaterializedGraph, prelude::GraphViewOps}; +use raphtory::db::api::view::MaterializedGraph; use raphtory_api::python::error::adapt_err_value; use serde_json::{Map, Number, Value as JsonValue}; diff --git a/raphtory-storage/src/mutation/addition_ops.rs b/raphtory-storage/src/mutation/addition_ops.rs index 162eba66f9..428e0545ad 100644 --- a/raphtory-storage/src/mutation/addition_ops.rs +++ b/raphtory-storage/src/mutation/addition_ops.rs @@ -185,33 +185,6 @@ pub trait SessionAdditionOps: Send + Sync { dtype: PropType, is_static: bool, ) -> Result, Self::Error>; - - /// add node update - fn internal_add_node( - &self, - t: TimeIndexEntry, - v: VID, - props: &[(usize, Prop)], - ) -> Result<(), Self::Error>; - - /// add edge update - fn internal_add_edge( - &self, - t: TimeIndexEntry, - src: VID, - dst: VID, - props: &[(usize, Prop)], - layer: usize, - ) -> Result, Self::Error>; - - /// add update for an existing edge - fn internal_add_edge_update( - &self, - t: TimeIndexEntry, - edge: EID, - props: &[(usize, Prop)], - layer: usize, - ) -> Result<(), Self::Error>; } impl InternalAdditionOps for GraphStorage { diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index e770f8a537..f0cc310ebc 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -167,36 +167,6 @@ impl<'a> SessionAdditionOps for UnlockedSession<'a> { .edge_meta() .resolve_prop_id(prop, dtype, is_static)?) } - - fn internal_add_node( - &self, - t: TimeIndexEntry, - v: VID, - props: &[(usize, Prop)], - ) -> Result<(), Self::Error> { - todo!() - } - - fn internal_add_edge( - &self, - t: TimeIndexEntry, - src: VID, - dst: VID, - props: &[(usize, Prop)], - layer: usize, - ) -> Result, Self::Error> { - todo!() - } - - fn internal_add_edge_update( - &self, - t: TimeIndexEntry, - edge: EID, - props: &[(usize, Prop)], - layer: usize, - ) -> Result<(), Self::Error> { - todo!() - } } impl InternalAdditionOps for TemporalGraph { diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 042fdf9861..4c39fae9e3 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -32,7 +32,6 @@ use raphtory_storage::{ }; use std::{ fmt::{Display, Formatter}, - io::{Seek, Write}, path::Path, sync::Arc, }; @@ -410,61 +409,6 @@ impl<'a> SessionAdditionOps for StorageWriteSession<'a> { Ok(id) } - - fn internal_add_node( - &self, - t: TimeIndexEntry, - v: VID, - props: &[(usize, Prop)], - ) -> Result<(), Self::Error> { - self.session.internal_add_node(t, v, props)?; - - #[cfg(feature = "search")] - self.storage - .if_index_mut(|index| index.add_node_update(t, v, props))?; - - Ok(()) - } - - fn internal_add_edge( - &self, - t: TimeIndexEntry, - src: VID, - dst: VID, - props: &[(usize, Prop)], - layer: usize, - ) -> Result, Self::Error> { - let id = self.session.internal_add_edge(t, src, dst, props, layer)?; - #[cfg(feature = "search")] - self.storage.if_index_mut(|index| { - index.add_edge_update(&self.storage.graph, id, t, layer, props) - })?; - - Ok(id) - } - - fn internal_add_edge_update( - &self, - t: TimeIndexEntry, - edge: EID, - props: &[(usize, Prop)], - layer: usize, - ) -> Result<(), Self::Error> { - self.session - .internal_add_edge_update(t, edge, props, layer)?; - - #[cfg(feature = "search")] - self.storage.if_index_mut(|index| { - index.add_edge_update( - &self.storage.graph, - MaybeNew::Existing(edge), - t, - layer, - props, - ) - })?; - Ok(()) - } } impl InternalAdditionOps for Storage { diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 3d519ad20f..ecb2bb7c11 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -17,13 +17,8 @@ use crate::{ node::NodeView, nodes::Nodes, views::{ - cached_view::CachedView, - filter::{ - model::{AsEdgeFilter, AsNodeFilter}, - node_type_filtered_graph::NodeTypeFilteredGraph, - }, - node_subgraph::NodeSubgraph, - valid_graph::ValidGraph, + cached_view::CachedView, filter::node_type_filtered_graph::NodeTypeFilteredGraph, + node_subgraph::NodeSubgraph, valid_graph::ValidGraph, }, }, }, diff --git a/raphtory/src/db/graph/assertions.rs b/raphtory/src/db/graph/assertions.rs index 03008434f2..c4b0a21c57 100644 --- a/raphtory/src/db/graph/assertions.rs +++ b/raphtory/src/db/graph/assertions.rs @@ -96,26 +96,6 @@ impl ApplyFilter for FilterNeighbour } } -pub struct SearchNodes(F); - -impl ApplyFilter for SearchNodes { - fn apply(&self, graph: G) -> Vec { - #[cfg(feature = "search")] - { - let mut results = graph - .search_nodes(self.0.clone(), 20, 0) - .unwrap() - .into_iter() - .map(|nv| nv.name()) - .collect::>(); - results.sort(); - return results; - } - #[cfg(not(feature = "search"))] - Vec::::new() - } -} - pub struct FilterEdges(F); impl ApplyFilter for FilterEdges { @@ -132,26 +112,6 @@ impl ApplyFilter for FilterEdges } } -pub struct SearchEdges(F); - -impl ApplyFilter for SearchEdges { - fn apply(&self, graph: G) -> Vec { - #[cfg(feature = "search")] - { - let mut results = graph - .search_edges(self.0.clone(), 20, 0) - .unwrap() - .into_iter() - .map(|ev| format!("{}->{}", ev.src().name(), ev.dst().name())) - .collect::>(); - results.sort(); - return results; - } - #[cfg(not(feature = "search"))] - Vec::::new() - } -} - #[track_caller] pub fn assert_filter_nodes_results( init_graph: impl FnOnce(Graph) -> Graph, @@ -293,18 +253,6 @@ pub fn filter_nodes(graph: &Graph, filter: impl CreateNodeFilter) -> Vec results } -#[cfg(feature = "search")] -pub fn search_nodes(graph: &Graph, filter: impl AsNodeFilter) -> Vec { - let mut results = graph - .search_nodes(filter, 10, 0) - .expect("Failed to search nodes") - .into_iter() - .map(|v| v.name()) - .collect::>(); - results.sort(); - results -} - pub fn filter_edges(graph: &Graph, filter: impl CreateEdgeFilter) -> Vec { let mut results = graph .filter_edges(filter) @@ -318,13 +266,61 @@ pub fn filter_edges(graph: &Graph, filter: impl CreateEdgeFilter) -> Vec } #[cfg(feature = "search")] -pub fn search_edges(graph: &Graph, filter: impl AsEdgeFilter) -> Vec { - let mut results = graph - .search_edges(filter, 10, 0) - .expect("Failed to filter edges") - .into_iter() - .map(|e| format!("{}->{}", e.src().name(), e.dst().name())) - .collect::>(); - results.sort(); - results +mod search { + use super::*; + + pub struct SearchNodes(F); + + impl ApplyFilter for SearchNodes { + fn apply(&self, graph: G) -> Vec { + let mut results = graph + .search_nodes(self.0.clone(), 20, 0) + .unwrap() + .into_iter() + .map(|nv| nv.name()) + .collect::>(); + results.sort(); + results + } + } + + pub struct SearchEdges(F); + + impl ApplyFilter for SearchEdges { + fn apply(&self, graph: G) -> Vec { + let mut results = graph + .search_edges(self.0.clone(), 20, 0) + .unwrap() + .into_iter() + .map(|ev| format!("{}->{}", ev.src().name(), ev.dst().name())) + .collect::>(); + results.sort(); + results + } + } + + pub fn search_nodes(graph: &Graph, filter: impl AsNodeFilter) -> Vec { + let mut results = graph + .search_nodes(filter, 10, 0) + .expect("Failed to search nodes") + .into_iter() + .map(|v| v.name()) + .collect::>(); + results.sort(); + results + } + + pub fn search_edges(graph: &Graph, filter: impl AsEdgeFilter) -> Vec { + let mut results = graph + .search_edges(filter, 10, 0) + .expect("Failed to filter edges") + .into_iter() + .map(|e| format!("{}->{}", e.src().name(), e.dst().name())) + .collect::>(); + results.sort(); + results + } } + +#[cfg(feature = "search")] +pub use search::*; diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 2f8e5e0594..e9ae84761a 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -9,7 +9,6 @@ use crate::{ }, prelude::*, }; -use arrow::array::BooleanArray; use bytemuck::checked::cast_slice_mut; use db4_graph::WriteLockedGraph; use either::Either; @@ -29,11 +28,7 @@ use raphtory_core::{ entities::{graph::logical_to_physical::ResolverShardT, GidRef, VID}, storage::timeindex::AsTime, }; -use raphtory_storage::{ - core_ops::CoreGraphOps, - layer_ops::InternalLayerOps, - mutation::addition_ops::{InternalAdditionOps, SessionAdditionOps}, -}; +use raphtory_storage::mutation::addition_ops::{InternalAdditionOps, SessionAdditionOps}; use rayon::prelude::*; use std::{ borrow::{Borrow, Cow}, @@ -313,7 +308,6 @@ pub fn load_edges_from_df>(); for chunk in chunks { let df = chunk?; diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index 6a8b6abfca..a973bbd8fa 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -3,7 +3,6 @@ use crate::{ errors::GraphError, io::arrow::{dataframe::*, df_loaders::*}, prelude::{AdditionOps, PropertyAdditionOps}, - python::graph::io::*, }; use arrow::array::ArrayRef; use pyo3::{ diff --git a/raphtory/src/python/types/macros/trait_impl/serialise.rs b/raphtory/src/python/types/macros/trait_impl/serialise.rs index d9cbd2e01c..9b00777477 100644 --- a/raphtory/src/python/types/macros/trait_impl/serialise.rs +++ b/raphtory/src/python/types/macros/trait_impl/serialise.rs @@ -1,5 +1,3 @@ -use crate::errors::GraphError; - /// Macro for implementing all the Cache methods on a python wrapper /// /// # Arguments diff --git a/raphtory/src/python/utils/export.rs b/raphtory/src/python/utils/export.rs index b84ae7caa9..95ea8ea119 100644 --- a/raphtory/src/python/utils/export.rs +++ b/raphtory/src/python/utils/export.rs @@ -8,10 +8,7 @@ use raphtory_api::core::{ storage::{arc_str::ArcStr, timeindex::AsTime}, }; use rayon::{iter::IntoParallelRefIterator, prelude::*}; -use std::{ - collections::{HashMap, HashSet}, - sync::Arc, -}; +use std::collections::{HashMap, HashSet}; pub(crate) fn extract_properties

( include_property_history: bool, diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index a71c2eff89..497e43405b 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -4,21 +4,18 @@ use crate::{ graph::views::deletion_graph::PersistentGraph, }, errors::GraphError, - prelude::{Graph, GraphViewOps, ParquetDecoder, ParquetEncoder, PropertiesOps, StableEncode}, - serialise::{metadata::GraphMetadata, serialise::StableDecode}, + prelude::{Graph, GraphViewOps, ParquetDecoder, ParquetEncoder}, + serialise::metadata::GraphMetadata, }; use raphtory_api::GraphType; use serde::{Deserialize, Serialize}; use std::{ fs::{self, File}, io::{self, ErrorKind, Read, Seek, Write}, - path::{Component, Path, PathBuf}, + path::{Path, PathBuf}, }; use walkdir::WalkDir; -use zip::{ - write::{FileOptions, SimpleFileOptions}, - ZipArchive, ZipWriter, -}; +use zip::{write::FileOptions, ZipArchive, ZipWriter}; /// Stores graph data pub const GRAPH_PATH: &str = "graph"; @@ -130,10 +127,6 @@ pub fn get_zip_meta_path(zip: &mut ZipArchive) -> Result SimpleFileOptions { - SimpleFileOptions::default() -} - #[derive(Debug, Serialize, Deserialize)] pub struct RelativePath { pub path: String, @@ -378,7 +371,7 @@ impl GraphFolder { Ok(()) } - fn is_disk_graph(&self) -> Result { + pub fn is_disk_graph(&self) -> Result { let meta = self.read_metadata()?; Ok(meta.is_diskgraph) } @@ -641,12 +634,12 @@ mod tests { use crate::{ db::graph::graph::assert_graph_equal, prelude::{AdditionOps, Graph, Prop, StableEncode, NO_PROPS}, + serialise::serialise::StableDecode, }; - use raphtory_api::{core::utils::logging::global_info_logger, GraphType}; - /// Verify that the metadata is re-created if it does not exist. - #[test] - #[ignore = "Need to think about how to deal with reading old format"] + // /// Verify that the metadata is re-created if it does not exist. + // #[test] + // #[ignore = "Need to think about how to deal with reading old format"] // fn test_read_metadata_from_noninitialized_zip() { // global_info_logger(); // @@ -764,7 +757,7 @@ mod tests { // Verify the output zip contains the same graph let zip_folder = GraphFolder::new_as_zip(&output_zip_path); - let decoded_graph = Graph::decode(&zip_folder, None::<&std::path::Path>).unwrap(); + let decoded_graph = Graph::decode(&zip_folder, None::<&Path>).unwrap(); assert_graph_equal(&graph, &decoded_graph); } diff --git a/raphtory/src/serialise/metadata.rs b/raphtory/src/serialise/metadata.rs index aef2f31d94..77257003ac 100644 --- a/raphtory/src/serialise/metadata.rs +++ b/raphtory/src/serialise/metadata.rs @@ -1,12 +1,5 @@ -use crate::{ - db::api::view::internal::GraphView, - prelude::{GraphViewOps, PropertiesOps}, - serialise::GraphFolder, -}; -use raphtory_api::{ - core::{entities::properties::prop::Prop, storage::arc_str::ArcStr}, - GraphType, -}; +use crate::{db::api::view::internal::GraphView, prelude::GraphViewOps, serialise::GraphFolder}; +use raphtory_api::GraphType; use serde::{Deserialize, Serialize}; #[derive(PartialEq, Serialize, Deserialize, Debug)] diff --git a/raphtory/src/serialise/parquet/mod.rs b/raphtory/src/serialise/parquet/mod.rs index 20d9f4ac59..069806af4d 100644 --- a/raphtory/src/serialise/parquet/mod.rs +++ b/raphtory/src/serialise/parquet/mod.rs @@ -5,7 +5,7 @@ use crate::{ }, errors::GraphError, io::{ - arrow::{df_loaders::load_graph_props_from_df, prop_handler::lift_property_col}, + arrow::prop_handler::lift_property_col, parquet_loaders::{ get_parquet_file_paths, load_edge_deletions_from_parquet, load_edge_props_from_parquet, load_edges_from_parquet, load_graph_props_from_parquet, load_node_props_from_parquet, @@ -14,7 +14,6 @@ use crate::{ }, prelude::*, serialise::{ - graph_folder::GRAPH_PATH, parquet::{ edges::encode_edge_deletions, graph::{encode_graph_cprop, encode_graph_tprop}, diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index c81a3edb53..50033cd06d 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -7,17 +7,15 @@ use crate::{ get_zip_graph_path, metadata::GraphMetadata, parquet::{ParquetDecoder, ParquetEncoder}, - GraphFolder, GraphPaths, Metadata, RelativePath, DATA_PATH, DEFAULT_DATA_PATH, - DEFAULT_GRAPH_PATH, GRAPH_PATH, META_PATH, + GraphFolder, GraphPaths, Metadata, RelativePath, DEFAULT_DATA_PATH, DEFAULT_GRAPH_PATH, + META_PATH, }, }; use std::{ - fs, fs::File, io::{Cursor, Read, Seek, Write}, path::Path, }; -use tempfile; use zip::{write::SimpleFileOptions, ZipArchive, ZipWriter}; pub trait StableEncode: StaticGraphViewOps + AdditionOps { diff --git a/raphtory/src/vectors/db.rs b/raphtory/src/vectors/db.rs index a0a51b43a5..226a0e9450 100644 --- a/raphtory/src/vectors/db.rs +++ b/raphtory/src/vectors/db.rs @@ -1,15 +1,3 @@ -use std::{ - collections::HashSet, - ops::Deref, - path::{Path, PathBuf}, - sync::{Arc, OnceLock}, -}; - -use arroy::{distances::Cosine, Database as ArroyDatabase, Reader, Writer}; -use futures_util::StreamExt; -use rand::{rngs::StdRng, SeedableRng}; -use tempfile::TempDir; - use super::{ entity_ref::{EntityRef, IntoDbId}, Embedding, @@ -19,6 +7,15 @@ use crate::{ errors::{GraphError, GraphResult}, prelude::GraphViewOps, }; +use arroy::{distances::Cosine, Database as ArroyDatabase, Reader, Writer}; +use futures_util::StreamExt; +use std::{ + collections::HashSet, + ops::Deref, + path::{Path, PathBuf}, + sync::{Arc, OnceLock}, +}; +use tempfile::TempDir; const LMDB_MAX_SIZE: usize = 1024 * 1024 * 1024 * 1024; // 1TB diff --git a/raphtory/tests/test_deletions.rs b/raphtory/tests/test_deletions.rs index a0e6637456..e096574a30 100644 --- a/raphtory/tests/test_deletions.rs +++ b/raphtory/tests/test_deletions.rs @@ -254,10 +254,8 @@ fn materialize_window_multilayer() { let g = PersistentGraph::new(); g.add_edge(1, 0, 0, NO_PROPS, None).unwrap(); g.delete_edge(3, 0, 0, Some("a")).unwrap(); - let w = 0..10; let glw = g.valid_layers("a").window(w.start, w.end); - let layers = glw.edge(0, 0).unwrap().explode_layers(); let gmlw = glw.materialize().unwrap(); assert_persistent_materialize_graph_equal(&glw, &gmlw); } diff --git a/raphtory/tests/test_materialize.rs b/raphtory/tests/test_materialize.rs index 28070c3022..49ea3ab917 100644 --- a/raphtory/tests/test_materialize.rs +++ b/raphtory/tests/test_materialize.rs @@ -1,6 +1,6 @@ use proptest::{arbitrary::any, proptest}; use raphtory::{ - db::{api::view::MaterializedGraph, graph::graph::assert_graph_equal}, + db::graph::graph::assert_graph_equal, prelude::*, test_storage, test_utils::{build_edge_list, build_graph_from_edge_list}, From 391c57b9ed30d79b507f06d4d98cdd208283e615 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 15 Dec 2025 15:29:52 +0100 Subject: [PATCH 25/39] fmt --- raphtory-graphql/src/paths.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index fae7b0d4e9..be07f4ae1b 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -1,7 +1,7 @@ use crate::{data::DIRTY_PATH, model::blocking_io, rayon::blocking_compute}; use futures_util::io; use raphtory::{ - db::api::view::{MaterializedGraph}, + db::api::view::MaterializedGraph, errors::{GraphError, InvalidPathReason}, prelude::ParquetEncoder, serialise::{ From 3c259bf9bbba2506a34c4e5142fb3c47f9f15515 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Tue, 16 Dec 2025 16:18:14 +0100 Subject: [PATCH 26/39] tidy up and add more validation for relative paths --- db4-storage/src/pages/edge_page/writer.rs | 6 +- .../src/segments/graph_prop/segment.rs | 1 - raphtory-graphql/src/data.rs | 1 + raphtory-graphql/src/lib.rs | 3 + raphtory-graphql/src/main.rs | 2 +- raphtory-graphql/src/paths.rs | 189 +++++++++--------- raphtory-storage/src/core_ops.rs | 2 +- raphtory-storage/src/graph/graph.rs | 2 +- .../graph/storage_ops/time_semantics.rs | 9 +- raphtory/src/db/api/storage/storage.rs | 5 +- raphtory/src/db/api/view/graph.rs | 8 +- raphtory/src/db/graph/assertions.rs | 4 +- raphtory/src/db/graph/views/deletion_graph.rs | 3 +- raphtory/src/errors.rs | 51 +++-- raphtory/src/io/arrow/df_loaders.rs | 5 +- raphtory/src/io/parquet_loaders.rs | 6 +- raphtory/src/serialise/graph_folder.rs | 65 +++--- 17 files changed, 180 insertions(+), 182 deletions(-) diff --git a/db4-storage/src/pages/edge_page/writer.rs b/db4-storage/src/pages/edge_page/writer.rs index cde4d7fdb3..8578e5619c 100644 --- a/db4-storage/src/pages/edge_page/writer.rs +++ b/db4-storage/src/pages/edge_page/writer.rs @@ -2,12 +2,8 @@ use crate::{ LocalPOS, api::edges::EdgeSegmentOps, error::StorageError, pages::layer_counter::GraphStats, segments::edge::segment::MemEdgeSegment, }; -use arrow_array::{ArrayRef, BooleanArray}; use raphtory_api::core::entities::{VID, properties::prop::Prop}; -use raphtory_core::{ - entities::EID, - storage::timeindex::{AsTime, TimeIndexEntry}, -}; +use raphtory_core::storage::timeindex::{AsTime, TimeIndexEntry}; use std::ops::DerefMut; pub struct EdgeWriter< diff --git a/db4-storage/src/segments/graph_prop/segment.rs b/db4-storage/src/segments/graph_prop/segment.rs index 4e310cad03..a007804978 100644 --- a/db4-storage/src/segments/graph_prop/segment.rs +++ b/db4-storage/src/segments/graph_prop/segment.rs @@ -1,5 +1,4 @@ use crate::{ - LocalPOS, error::StorageError, segments::{HasRow, SegmentContainer}, }; diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index b24fc22256..ae9f9c51f0 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -379,6 +379,7 @@ pub(crate) mod data_tests { fn create_graph_folder(path: &Path) { // Use empty graph to create folder structure + fs::create_dir_all(path).unwrap(); let graph = Graph::new(); graph.encode(path).unwrap(); } diff --git a/raphtory-graphql/src/lib.rs b/raphtory-graphql/src/lib.rs index 51670f95e7..1971c1c0ae 100644 --- a/raphtory-graphql/src/lib.rs +++ b/raphtory-graphql/src/lib.rs @@ -60,6 +60,9 @@ mod graphql_test { }; use tempfile::tempdir; + #[cfg(feature = "search")] + use crate::config::app_config::AppConfigBuilder; + #[tokio::test] async fn test_copy_graph() { let graph = Graph::new(); diff --git a/raphtory-graphql/src/main.rs b/raphtory-graphql/src/main.rs index dc16e3dbb6..8957ce6e22 100644 --- a/raphtory-graphql/src/main.rs +++ b/raphtory-graphql/src/main.rs @@ -81,7 +81,7 @@ async fn main() -> IoResult<()> { let schema = App::create_schema().finish().unwrap(); println!("{}", schema.sdl()); } else { - let builder = AppConfigBuilder::new() + let mut builder = AppConfigBuilder::new() .with_cache_capacity(args.cache_capacity) .with_cache_tti_seconds(args.cache_tti_seconds) .with_log_level(args.log_level) diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index be07f4ae1b..cb656b7677 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -3,14 +3,14 @@ use futures_util::io; use raphtory::{ db::api::view::MaterializedGraph, errors::{GraphError, InvalidPathReason}, - prelude::ParquetEncoder, serialise::{ - metadata::GraphMetadata, read_dirty_path, GraphFolder, GraphPaths, RelativePath, - WriteableGraphFolder, META_PATH, + metadata::GraphMetadata, GraphFolder, GraphPaths, RelativePath, WriteableGraphFolder, + META_PATH, }, }; use std::{ cmp::Ordering, + ffi::OsStr, fs, fs::File, io::{ErrorKind, Read, Seek, Write}, @@ -126,60 +126,62 @@ pub struct ValidGraphFolder { local_path: String, } +fn valid_component(component: Component<'_>) -> Result<&OsStr, InvalidPathReason> { + match component { + Component::Prefix(_) => Err(InvalidPathReason::RootNotAllowed), + Component::RootDir => Err(InvalidPathReason::RootNotAllowed), + Component::CurDir => Err(InvalidPathReason::CurDirNotAllowed), + Component::ParentDir => Err(InvalidPathReason::ParentDirNotAllowed), + Component::Normal(component) => Ok(component), + } +} + fn extend_and_validate( full_path: &mut PathBuf, component: Component, - user_facing_path: &str, ) -> Result<(), InternalPathValidationError> { - match component { - Component::Prefix(_) => { - return Err(InvalidPathReason::RootNotAllowed(user_facing_path.into()).into()) - } - Component::RootDir => { - return Err(InvalidPathReason::RootNotAllowed(user_facing_path.into()).into()) - } - Component::CurDir => { - return Err(InvalidPathReason::CurDirNotAllowed(user_facing_path.into()).into()) - } - Component::ParentDir => { - return Err(InvalidPathReason::ParentDirNotAllowed(user_facing_path.into()).into()) - } - Component::Normal(component) => { - // check if some intermediate path is already a graph - if full_path.join(META_PATH).exists() { - return Err(InvalidPathReason::ParentIsGraph(user_facing_path.into()).into()); - } - full_path.push(component); - //check for symlinks - if full_path.is_symlink() { - return Err(InvalidPathReason::SymlinkNotAllowed(user_facing_path.into()).into()); - } - ensure_clean_folder(&full_path)?; - } + let component = valid_component(component)?; + // check if some intermediate path is already a graph + if full_path.join(META_PATH).exists() { + return Err(InvalidPathReason::ParentIsGraph.into()); } + full_path.push(component); + //check for symlinks + if full_path.is_symlink() { + return Err(InvalidPathReason::SymlinkNotAllowed.into()); + } + ensure_clean_folder(&full_path)?; Ok(()) } -pub(crate) fn valid_path( +fn valid_path_inner( base_path: PathBuf, relative_path: &str, -) -> Result { - let user_facing_path = PathBuf::from(relative_path); +) -> Result { + let mut full_path = base_path.clone(); + let user_facing_path: &Path = relative_path.as_ref(); if relative_path.contains(r"//") { - return Err(InvalidPathReason::DoubleForwardSlash(user_facing_path).into()); + Err(InvalidPathReason::DoubleForwardSlash)?; } if relative_path.contains(r"\") { - return Err(InvalidPathReason::BackslashError(user_facing_path).into()); + Err(InvalidPathReason::BackslashError)?; } - let mut full_path = base_path.clone(); // fail if any component is a Prefix (C://), tries to access root, // tries to access a parent dir or is a symlink which could break out of the working dir for component in user_facing_path.components() { - extend_and_validate(&mut full_path, component, relative_path) - .with_path(relative_path.to_string())?; + extend_and_validate(&mut full_path, component)?; } + + Ok(full_path) +} + +pub(crate) fn valid_path( + base_path: PathBuf, + relative_path: &str, +) -> Result { + let full_path = valid_path_inner(base_path, relative_path).with_path(relative_path)?; Ok(ValidPath(full_path)) } @@ -214,10 +216,10 @@ pub(crate) fn create_valid_path( let user_facing_path = PathBuf::from(relative_path); if relative_path.contains(r"//") { - return Err(InvalidPathReason::DoubleForwardSlash(user_facing_path).into()); + return Err(InvalidPathReason::DoubleForwardSlash.into()); } if relative_path.contains(r"\") { - return Err(InvalidPathReason::BackslashError(user_facing_path).into()); + return Err(InvalidPathReason::BackslashError.into()); } let mut full_path = base_path.clone(); @@ -225,7 +227,7 @@ pub(crate) fn create_valid_path( // fail if any component is a Prefix (C://), tries to access root, // tries to access a parent dir or is a symlink which could break out of the working dir for component in user_facing_path.components() { - match extend_and_validate(&mut full_path, component, relative_path) { + match extend_and_validate(&mut full_path, component) { Ok(_) => { if !full_path.exists() { if cleanup_marker.is_none() { @@ -420,6 +422,10 @@ pub enum InternalPathValidationError { NamespaceIsGraph, #[error("The path provided contains non-UTF8 characters.")] NonUTFCharacters, + #[error("Relative path from metadata is empty")] + EmptyRelativePath, + #[error("Relative path from metadata has more than one component")] + RelativePathMultipleComponents, } impl From for InternalPathValidationError { @@ -437,8 +443,6 @@ pub enum PathValidationError { GraphNotExistsError(String), #[error("'{0}' does not exist as a namespace")] NamespaceDoesNotExist(String), - #[error(transparent)] - InvalidPath(#[from] InvalidPathReason), #[error("Graph '{graph}' is corrupted: {error}")] InternalError { graph: String, @@ -463,56 +467,48 @@ impl> WithPath for Result { } } -pub(crate) fn valid_relative_graph_path( - mut full_path: PathBuf, - relative_path: &Path, -) -> Result { +fn is_graph(path: &Path) -> bool { + path.join(META_PATH).is_file() +} + +fn valid_relative_path(relative_path: &Path) -> Result<(), InternalPathValidationError> { let mut components = relative_path.components(); - if let Some(component) = components.next() { - match component { - Component::Prefix(_) => { - Err(InvalidPathReason::RootNotAllowed( - relative_path.to_path_buf(), - ))?; - } - Component::RootDir => Err(InvalidPathReason::RootNotAllowed( - relative_path.to_path_buf(), - ))?, - Component::CurDir => Err(InvalidPathReason::CurDirNotAllowed( - relative_path.to_path_buf(), - ))?, - Component::ParentDir => Err(InvalidPathReason::ParentDirNotAllowed( - relative_path.to_path_buf(), - ))?, - Component::Normal(component) => { - full_path.push(component); - //check for symlinks - if full_path.is_symlink() { - Err(InvalidPathReason::SymlinkNotAllowed( - relative_path.to_path_buf(), - ))? - } - } - } - } + valid_component( + components + .next() + .ok_or(InternalPathValidationError::EmptyRelativePath)?, + )?; if components.next().is_some() { - Err(InternalPathValidationError::NestedPath( - relative_path.to_path_buf(), - ))? + return Err(InternalPathValidationError::RelativePathMultipleComponents); } - Ok(full_path) + Ok(()) } -fn is_graph(path: &Path) -> bool { - path.join(META_PATH).is_file() +fn read_dirty_relative_path( + base_path: &Path, +) -> Result, InternalPathValidationError> { + let mut file = match File::open(base_path.join(DIRTY_PATH)) { + Ok(file) => file, + Err(error) => { + return match error.kind() { + ErrorKind::NotFound => Ok(None), + _ => Err(error.into()), + } + } + }; + let mut json_string = String::new(); + file.read_to_string(&mut json_string)?; + let path: RelativePath = serde_json::from_str(&json_string)?; + valid_relative_path(path.path.as_ref())?; + Ok(Some(base_path.join(path.path))) } pub(crate) fn ensure_clean_folder(base_path: &Path) -> Result<(), InternalPathValidationError> { if base_path.is_dir() { - match read_dirty_path(base_path) { + match read_dirty_relative_path(base_path) { Ok(path) => { if let Some(path) = path { - warn!("Found dirty path {path}, cleaning..."); + warn!("Found dirty path {}, cleaning...", path.display()); fs::remove_dir_all(base_path.join(path))?; } } @@ -633,22 +629,23 @@ impl ValidGraphFolder { pub fn get_graph_name(&self) -> Result { let path: &Path = self.local_path.as_ref(); - let last_component: Component = path - .components() - .last() - .ok_or_else(|| InvalidPathReason::PathNotParsable(self.to_error_path()))?; - let name = match last_component { - Component::Normal(value) => value - .to_str() - .map(|s| s.to_string()) - .ok_or_else(|| InvalidPathReason::PathNotParsable(self.to_error_path()))?, - Component::Prefix(_) - | Component::RootDir - | Component::CurDir - | Component::ParentDir => { - Err(InvalidPathReason::PathNotParsable(self.to_error_path()))? + let name = self.with_internal_errors(|| { + let last_component: Component = path + .components() + .last() + .ok_or(InvalidPathReason::PathNotParsable)?; + match last_component { + Component::Normal(value) => Ok(value + .to_str() + .map(|s| s.to_string()) + .ok_or(InvalidPathReason::PathNotParsable)?), + Component::Prefix(_) + | Component::RootDir + | Component::CurDir + | Component::ParentDir => Err(InvalidPathReason::PathNotParsable)?, } - }; + })?; + Ok(name) } pub(crate) fn as_existing(&self) -> Result { diff --git a/raphtory-storage/src/core_ops.rs b/raphtory-storage/src/core_ops.rs index a0b82be773..c6c1897a23 100644 --- a/raphtory-storage/src/core_ops.rs +++ b/raphtory-storage/src/core_ops.rs @@ -15,7 +15,7 @@ use raphtory_api::{ inherit::Base, iter::{BoxedIter, BoxedLIter, IntoDynBoxed}, }; -use raphtory_core::entities::{nodes::node_ref::NodeRef, properties::graph_meta::GraphMeta}; +use raphtory_core::entities::nodes::node_ref::NodeRef; use std::{iter, sync::Arc}; use storage::resolver::GIDResolverOps; diff --git a/raphtory-storage/src/graph/graph.rs b/raphtory-storage/src/graph/graph.rs index 0d883e398a..40fd0aa572 100644 --- a/raphtory-storage/src/graph/graph.rs +++ b/raphtory-storage/src/graph/graph.rs @@ -12,7 +12,7 @@ use crate::{ }; use db4_graph::TemporalGraph; use raphtory_api::core::entities::{properties::meta::Meta, LayerIds, LayerVariants, EID, VID}; -use raphtory_core::entities::{nodes::node_ref::NodeRef, properties::graph_meta::GraphMeta}; +use raphtory_core::entities::nodes::node_ref::NodeRef; use std::{fmt::Debug, iter, path::Path, sync::Arc}; use storage::{Extension, GraphPropEntry}; use thiserror::Error; diff --git a/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs b/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs index 621f0c44ab..4930179fde 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs @@ -1,11 +1,8 @@ use super::GraphStorage; use crate::{ core::{entities::LayerIds, storage::timeindex::TimeIndexOps}, - db::api::{ - storage::graph, - view::internal::{ - EdgeHistoryFilter, GraphTimeSemanticsOps, NodeHistoryFilter, TimeSemantics, - }, + db::api::view::internal::{ + EdgeHistoryFilter, GraphTimeSemanticsOps, NodeHistoryFilter, TimeSemantics, }, prelude::Prop, }; @@ -25,7 +22,7 @@ use raphtory_storage::{ }, }; use rayon::iter::ParallelIterator; -use std::ops::{Deref, Range}; +use std::ops::Range; use storage::{ api::graph_props::{GraphPropEntryOps, GraphPropRefOps}, gen_ts::ALL_LAYERS, diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 4ae2f44598..2601d140c2 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -48,7 +48,10 @@ use { parking_lot::RwLock, raphtory_core::entities::nodes::node_ref::AsNodeRef, raphtory_storage::{core_ops::CoreGraphOps, graph::nodes::node_storage_ops::NodeStorageOps}, - std::ops::{Deref, DerefMut}, + std::{ + io::{Seek, Write}, + ops::{Deref, DerefMut}, + }, tracing::info, zip::ZipWriter, }; diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index a843310451..70702d723f 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -1,5 +1,3 @@ -#[cfg(feature = "search")] -use crate::search::{fallback_filter_edges, fallback_filter_nodes}; use crate::{ core::{ entities::{nodes::node_ref::AsNodeRef, LayerIds, VID}, @@ -58,6 +56,12 @@ use std::{ sync::{atomic::Ordering, Arc}, }; +#[cfg(feature = "search")] +use crate::{ + db::graph::views::filter::model::{AsEdgeFilter, AsNodeFilter}, + search::{fallback_filter_edges, fallback_filter_nodes}, +}; + /// This trait GraphViewOps defines operations for accessing /// information about a graph. The trait has associated types /// that are used to define the type of the nodes, edges diff --git a/raphtory/src/db/graph/assertions.rs b/raphtory/src/db/graph/assertions.rs index c4b0a21c57..ef151a919b 100644 --- a/raphtory/src/db/graph/assertions.rs +++ b/raphtory/src/db/graph/assertions.rs @@ -269,7 +269,7 @@ pub fn filter_edges(graph: &Graph, filter: impl CreateEdgeFilter) -> Vec mod search { use super::*; - pub struct SearchNodes(F); + pub struct SearchNodes(pub F); impl ApplyFilter for SearchNodes { fn apply(&self, graph: G) -> Vec { @@ -284,7 +284,7 @@ mod search { } } - pub struct SearchEdges(F); + pub struct SearchEdges(pub F); impl ApplyFilter for SearchEdges { fn apply(&self, graph: G) -> Vec { diff --git a/raphtory/src/db/graph/views/deletion_graph.rs b/raphtory/src/db/graph/views/deletion_graph.rs index fab23f7658..d9949aa3c7 100644 --- a/raphtory/src/db/graph/views/deletion_graph.rs +++ b/raphtory/src/db/graph/views/deletion_graph.rs @@ -28,8 +28,7 @@ use raphtory_storage::{ }; use std::{ fmt::{Display, Formatter}, - iter, - ops::{Deref, Range}, + ops::Range, path::Path, sync::Arc, }; diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index c01b412237..44793b23a0 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -38,31 +38,28 @@ use zip::result::ZipError; #[derive(thiserror::Error, Debug)] pub enum InvalidPathReason { - #[error("Backslash not allowed in path: {0}")] - BackslashError(PathBuf), - #[error("Double forward slashes are not allowed in path: {0}")] - DoubleForwardSlash(PathBuf), - #[error("Only relative paths are allowed to be used within the working_dir: {0}")] - RootNotAllowed(PathBuf), - #[error("References to the current dir are not allowed within the path: {0}")] - CurDirNotAllowed(PathBuf), - #[error("References to the parent dir are not allowed within the path: {0}")] - ParentDirNotAllowed(PathBuf), - #[error("A component of the given path was a symlink: {0}")] - SymlinkNotAllowed(PathBuf), - #[error("The give path does not exist: {0}")] - PathDoesNotExist(PathBuf), - #[error("Could not parse Path: {0}")] - PathNotParsable(PathBuf), - #[error("The path to the graph contains a subpath to an existing graph: {0}")] - ParentIsGraph(PathBuf), + #[error("Backslash not allowed in path")] + BackslashError, + #[error("Double forward slashes are not allowed in path")] + DoubleForwardSlash, + #[error("Only relative paths are allowed to be used within the working_dir")] + RootNotAllowed, + #[error("References to the current dir are not allowed within the path")] + CurDirNotAllowed, + #[error("References to the parent dir are not allowed within the path")] + ParentDirNotAllowed, + #[error("A component of the given path was a symlink")] + SymlinkNotAllowed, + #[error("Could not parse Path")] + PathNotParsable, + #[error("The path to the graph contains a subpath to an existing graph")] + ParentIsGraph, #[error("Graph name cannot start with _")] GraphNamePrefix, - - #[error("The path provided already exists as a namespace: {0}")] - GraphIsNamespace(PathBuf), - #[error("The path provided already exists as a graph: {0}")] - NamespaceIsGraph(PathBuf), + #[error("The path provided already exists as a namespace")] + GraphIsNamespace, + #[error("The path provided already exists as a graph")] + NamespaceIsGraph, #[error("Failed to strip prefix: {source}")] StripPrefix { source: StripPrefixError }, } @@ -149,6 +146,9 @@ pub enum GraphError { source: LoadError, }, + #[error("Path {0} does not exist")] + PathDoesNotExist(PathBuf), + #[error("Storage feature not enabled")] DiskGraphNotFound, @@ -434,14 +434,13 @@ pub enum GraphError { #[error("Cannot swap zipped graph data")] ZippedGraphCannotBeSwapped, - #[error("Invalid prefix, expected '{expected}', got '{actual}'")] - InvalidPrefix { expected: String, actual: String }, - #[error("{source} at {location}")] StripPrefixError { source: StripPrefixError, location: &'static Location<'static>, }, + #[error("Path {0} is not a valid relative data path")] + InvalidRelativePath(String), } impl From for GraphError { diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index f918633c7f..c21626aa6c 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -33,10 +33,7 @@ use rayon::prelude::*; use std::{ borrow::{Borrow, Cow}, collections::HashMap, - sync::{ - atomic::{AtomicBool, AtomicUsize, Ordering}, - Arc, - }, + sync::atomic::{AtomicBool, AtomicUsize, Ordering}, }; fn build_progress_bar(des: String, num_rows: usize) -> Result { diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index e2251ba160..1e6ac51145 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -1,6 +1,6 @@ use crate::{ db::api::view::StaticGraphViewOps, - errors::{GraphError, InvalidPathReason::PathDoesNotExist}, + errors::GraphError, io::arrow::{dataframe::*, df_loaders::*}, prelude::{AdditionOps, DeletionOps, PropertyAdditionOps}, }; @@ -360,9 +360,7 @@ pub fn get_parquet_file_paths(parquet_path: &Path) -> Result, Graph } } } else { - return Err(GraphError::from(PathDoesNotExist( - parquet_path.to_path_buf(), - ))); + return Err(GraphError::PathDoesNotExist(parquet_path.to_path_buf())); } parquet_files.sort(); diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 497e43405b..592a65291c 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -7,7 +7,7 @@ use crate::{ prelude::{Graph, GraphViewOps, ParquetDecoder, ParquetEncoder}, serialise::metadata::GraphMetadata, }; -use raphtory_api::GraphType; +use raphtory_api::{core::input::input_node::parse_u64_strict, GraphType}; use serde::{Deserialize, Serialize}; use std::{ fs::{self, File}, @@ -36,37 +36,53 @@ pub const INDEX_PATH: &str = "index"; /// Directory that stores vector embeddings of the graph pub const VECTORS_PATH: &str = "vectors"; -fn read_path_from_file(mut file: impl Read) -> Result { +pub(crate) fn valid_relative_graph_path( + relative_path: &str, + prefix: &str, +) -> Result<(), GraphError> { + relative_path + .strip_prefix(prefix) // should have the prefix + .and_then(|id| parse_u64_strict(id)) // the remainder should be the id + .ok_or_else(|| GraphError::InvalidRelativePath(relative_path.to_string()))?; + Ok(()) +} + +fn read_path_from_file(mut file: impl Read, prefix: &str) -> Result { let mut value = String::new(); file.read_to_string(&mut value)?; let path: RelativePath = serde_json::from_str(&value)?; + valid_relative_graph_path(&path.path, prefix)?; Ok(path.path) } -pub fn read_path_pointer(base_path: &Path, file_name: &str) -> Result, io::Error> { +pub fn read_path_pointer( + base_path: &Path, + file_name: &str, + prefix: &str, +) -> Result, GraphError> { let file = match File::open(base_path.join(file_name)) { Ok(file) => file, Err(error) => { return match error.kind() { ErrorKind::NotFound => Ok(None), - _ => Err(error), + _ => Err(error.into()), } } }; - let path = read_path_from_file(file)?; + let path = read_path_from_file(file, prefix)?; Ok(Some(path)) } -pub fn read_data_path(base_path: &Path) -> Result, io::Error> { - read_path_pointer(base_path, META_PATH) +pub fn read_data_path(base_path: &Path, prefix: &str) -> Result, GraphError> { + read_path_pointer(base_path, META_PATH, prefix) } -pub fn read_dirty_path(base_path: &Path) -> Result, io::Error> { - read_path_pointer(base_path, DIRTY_PATH) +pub fn read_dirty_path(base_path: &Path, prefix: &str) -> Result, GraphError> { + read_path_pointer(base_path, DIRTY_PATH, prefix) } pub fn make_data_path(base_path: &Path, prefix: &str) -> Result { - let mut id = read_data_path(base_path)? + let mut id = read_data_path(base_path, prefix)? .and_then(|path| { path.strip_prefix(prefix) .and_then(|id| id.parse::().ok()) @@ -82,24 +98,12 @@ pub fn make_data_path(base_path: &Path, prefix: &str) -> Result Result { - match read_data_path(base_path)? { - None => Ok(prefix.to_owned() + "0"), - Some(path) => { - if path.starts_with(prefix) { - Ok(path) - } else { - Err(GraphError::InvalidPrefix { - expected: prefix.to_owned(), - actual: path, - }) - } - } - } + Ok(read_data_path(base_path, prefix)?.unwrap_or_else(|| prefix.to_owned() + "0")) } pub fn get_zip_data_path(zip: &mut ZipArchive) -> Result { let file = zip.by_name(META_PATH)?; - Ok(read_path_from_file(file)?) + Ok(read_path_from_file(file, DATA_PATH)?) } pub fn get_zip_graph_path(zip: &mut ZipArchive) -> Result { @@ -116,12 +120,12 @@ pub fn get_zip_graph_path_name( ) -> Result { data_path.push('/'); data_path.push_str(META_PATH); - let graph_path = read_path_from_file(zip.by_name(&data_path)?)?; + let graph_path = read_path_from_file(zip.by_name(&data_path)?, GRAPH_PATH)?; Ok(graph_path) } pub fn get_zip_meta_path(zip: &mut ZipArchive) -> Result { - let mut path = get_zip_graph_path(zip)?; + let mut path = get_zip_data_path(zip)?; path.push('/'); path.push_str(META_PATH); Ok(path) @@ -264,7 +268,7 @@ impl GraphFolder { if self.write_as_zip_format { return Err(GraphError::ZippedGraphCannotBeSwapped); } - let old_swap = match read_dirty_path(self.root()) { + let old_swap = match read_dirty_path(self.root(), DATA_PATH) { Ok(path) => path, Err(_) => { fs::remove_file(self.root_folder.join(DIRTY_PATH))?; // dirty file is corrupted, clean it up @@ -430,7 +434,7 @@ impl GraphPaths for WriteableGraphFolder { } fn relative_data_path(&self) -> Result { - let path = read_dirty_path(self.root())?.ok_or(GraphError::NoWriteInProgress)?; + let path = read_dirty_path(self.root(), DATA_PATH)?.ok_or(GraphError::NoWriteInProgress)?; Ok(path) } @@ -446,7 +450,7 @@ impl WriteableGraphFolder { /// /// This operation returns an error if there is no write in progress. pub fn finish(self) -> Result { - let old_data = read_data_path(self.root())?; + let old_data = read_data_path(self.root(), DATA_PATH)?; fs::rename(self.root().join(DIRTY_PATH), self.root().join(META_PATH))?; if let Some(old_data) = old_data { let old_data_path = self.root().join(old_data); @@ -541,7 +545,8 @@ impl InnerGraphFolder { } pub fn relative_graph_path(&self) -> Result { - let relative = read_data_path(&self.path)?.unwrap_or_else(|| GRAPH_PATH.to_owned() + "0"); + let relative = + read_data_path(&self.path, GRAPH_PATH)?.unwrap_or_else(|| GRAPH_PATH.to_owned() + "0"); Ok(relative) } From 558dd7fcf0e418ef1b56524862091c13af4dddd7 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 17 Dec 2025 08:42:40 +0100 Subject: [PATCH 27/39] need to bring back Prop deserialisation for the WAL --- .../entities/properties/prop/prop_array.rs | 16 ++++++-- .../entities/properties/prop/prop_enum.rs | 8 ++-- raphtory-api/src/python/prop.rs | 38 +++++++++++++++++-- 3 files changed, 52 insertions(+), 10 deletions(-) diff --git a/raphtory-api/src/core/entities/properties/prop/prop_array.rs b/raphtory-api/src/core/entities/properties/prop/prop_array.rs index da58312649..bd9621025a 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_array.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_array.rs @@ -8,7 +8,7 @@ use arrow_array::{ cast::AsArray, types::*, Array, ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray, }; use arrow_schema::{DataType, Field, Fields, TimeUnit}; -use serde::{ser::SerializeSeq, Serialize, Serializer}; +use serde::{ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer}; use std::{ hash::{Hash, Hasher}, sync::Arc, @@ -16,7 +16,7 @@ use std::{ #[derive(Debug, Clone, derive_more::From)] pub enum PropArray { - Vec(Arc>), + Vec(Arc<[Prop]>), Array(ArrayRef), } @@ -201,12 +201,22 @@ impl Serialize for PropArray { { let mut state = serializer.serialize_seq(Some(self.len()))?; for prop in self.iter_all() { - state.serialize_element(&prop.as_ref().map(SerdeProp))?; + state.serialize_element(&prop)?; } state.end() } } +impl<'de> Deserialize<'de> for PropArray { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let data = >::deserialize(deserializer)?; + Ok(PropArray::Vec(data.into())) + } +} + impl PartialEq for PropArray { fn eq(&self, other: &Self) -> bool { match (self, other) { diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index 33d0c5e149..1da96ccb08 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -11,7 +11,7 @@ use itertools::Itertools; use rustc_hash::{FxBuildHasher, FxHashMap}; use serde::{ ser::{SerializeMap, SerializeSeq}, - Serialize, + Deserialize, Serialize, }; use std::{ cmp::Ordering, @@ -34,7 +34,7 @@ pub const DECIMAL_MAX: i128 = 99999999999999999999999999999999999999i128; // equ pub struct InvalidBigDecimal(BigDecimal); /// Denotes the types of properties allowed to be stored in the graph. -#[derive(Debug, Serialize, PartialEq, Clone, derive_more::From)] +#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, derive_more::From)] pub enum Prop { Str(ArcStr), U8(u8), @@ -462,7 +462,7 @@ impl From> for Prop { impl From> for Prop { fn from(value: Vec) -> Self { - Prop::List(Arc::new(value).into()) + Prop::List(value.into()) } } @@ -499,7 +499,7 @@ pub trait IntoPropList { impl, K: Into> IntoPropList for I { fn into_prop_list(self) -> Prop { let vec = self.into_iter().map(|v| v.into()).collect::>(); - Prop::List(Arc::new(vec).into()) + Prop::List(vec.into()) } } diff --git a/raphtory-api/src/python/prop.rs b/raphtory-api/src/python/prop.rs index cfb7d3828f..9283133c34 100644 --- a/raphtory-api/src/python/prop.rs +++ b/raphtory-api/src/python/prop.rs @@ -59,7 +59,39 @@ impl<'py> IntoPyObject<'py> for Prop { Prop::List(PropArray::Array(arr_ref)) => { PyArray::from_array_ref(arr_ref).into_pyarrow(py)? } - Prop::List(PropArray::Vec(v)) => v.deref().clone().into_pyobject(py)?.into_any(), // Fixme: optimise the clone here? + Prop::List(PropArray::Vec(v)) => v.into_pyobject(py)?.into_any(), // Fixme: optimise the clone here? + Prop::Map(v) => v.deref().clone().into_pyobject(py)?.into_any(), + Prop::Decimal(d) => { + let decl_cls = get_decimal_cls(py)?; + decl_cls.call1((d.to_string(),))? + } + }) + } +} + +impl<'a, 'py: 'a> IntoPyObject<'py> for &'a Prop { + type Target = PyAny; + type Output = Bound<'py, PyAny>; + type Error = PyErr; + + fn into_pyobject(self, py: Python<'py>) -> Result { + Ok(match self { + Prop::Str(s) => s.into_pyobject(py)?.into_any(), + Prop::Bool(bool) => bool.into_bound_py_any(py)?, + Prop::U8(u8) => u8.into_pyobject(py)?.into_any(), + Prop::U16(u16) => u16.into_pyobject(py)?.into_any(), + Prop::I64(i64) => i64.into_pyobject(py)?.into_any(), + Prop::U64(u64) => u64.into_pyobject(py)?.into_any(), + Prop::F64(f64) => f64.into_pyobject(py)?.into_any(), + Prop::DTime(dtime) => dtime.into_pyobject(py)?.into_any(), + Prop::NDTime(ndtime) => ndtime.into_pyobject(py)?.into_any(), + Prop::I32(v) => v.into_pyobject(py)?.into_any(), + Prop::U32(v) => v.into_pyobject(py)?.into_any(), + Prop::F32(v) => v.into_pyobject(py)?.into_any(), + Prop::List(PropArray::Array(arr_ref)) => { + PyArray::from_array_ref(arr_ref.clone()).into_pyarrow(py)? + } + Prop::List(PropArray::Vec(v)) => v.into_pyobject(py)?.into_any(), Prop::Map(v) => v.deref().clone().into_pyobject(py)?.into_any(), Prop::Decimal(d) => { let decl_cls = get_decimal_cls(py)?; @@ -108,8 +140,8 @@ impl<'source> FromPyObject<'source> for Prop { let (arr, _) = arrow.into_inner(); return Ok(Prop::List(PropArray::Array(arr))); } - if let Ok(list) = ob.extract() { - return Ok(Prop::List(PropArray::Vec(Arc::new(list)))); + if let Ok(list) = ob.extract::>() { + return Ok(Prop::List(PropArray::Vec(list.into()))); } if let Ok(map) = ob.extract() { return Ok(Prop::Map(Arc::new(map))); From 253afb9690fc7cee1b7ba5e19a1af0c434edbea3 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 17 Dec 2025 09:30:00 +0100 Subject: [PATCH 28/39] this reserve causes a race condition as it re-checks the count and is probably not really helpful anyway --- db4-storage/src/pages/layer_counter.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/db4-storage/src/pages/layer_counter.rs b/db4-storage/src/pages/layer_counter.rs index b3865ba0b1..24c3e9df15 100644 --- a/db4-storage/src/pages/layer_counter.rs +++ b/db4-storage/src/pages/layer_counter.rs @@ -96,8 +96,6 @@ impl GraphStats { } } else { // we need to create the layer - self.layers.reserve(layer_id + 1 - self.layers.count()); - loop { let new_layer_id = self.layers.push_with(|_| Default::default()); if new_layer_id >= layer_id { From 260675622eb7f89f83ac32ce0942542788e1600b Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 17 Dec 2025 12:28:05 +0100 Subject: [PATCH 29/39] flat serialisation of Prop for working with arrow --- db4-storage/src/properties/mod.rs | 8 +- .../entities/properties/prop/prop_array.rs | 2 +- .../entities/properties/prop/prop_enum.rs | 243 ++++++++++++++++-- .../entities/properties/prop/prop_ref_enum.rs | 6 +- raphtory/src/serialise/parquet/graph.rs | 4 +- raphtory/src/serialise/parquet/model.rs | 10 +- 6 files changed, 239 insertions(+), 34 deletions(-) diff --git a/db4-storage/src/properties/mod.rs b/db4-storage/src/properties/mod.rs index 14ae383845..f2c5e86c90 100644 --- a/db4-storage/src/properties/mod.rs +++ b/db4-storage/src/properties/mod.rs @@ -8,8 +8,8 @@ use bigdecimal::ToPrimitive; use raphtory_api::core::entities::properties::{ meta::PropMapper, prop::{ - Prop, PropType, SerdeList, SerdeMap, arrow_dtype_from_prop_type, list_array_from_props, - struct_array_from_props, + Prop, PropType, SerdeArrowList, SerdeArrowMap, arrow_dtype_from_prop_type, + list_array_from_props, struct_array_from_props, }, }; use raphtory_core::{ @@ -206,7 +206,7 @@ impl Properties { .unwrap(); let array_iter = indices .map(|i| lazy_vec.get_opt(i)) - .map(|e| e.map(|m| SerdeMap(m))); + .map(|e| e.map(|m| SerdeArrowMap(m))); let struct_array = struct_array_from_props(&dt, array_iter); @@ -221,7 +221,7 @@ impl Properties { let array_iter = indices .map(|i| lazy_vec.get_opt(i)) - .map(|opt_list| opt_list.map(SerdeList)); + .map(|opt_list| opt_list.map(SerdeArrowList)); let list_array = list_array_from_props(&dt, array_iter); diff --git a/raphtory-api/src/core/entities/properties/prop/prop_array.rs b/raphtory-api/src/core/entities/properties/prop/prop_array.rs index bd9621025a..fbd5020279 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_array.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_array.rs @@ -1,6 +1,6 @@ use crate::{ core::entities::properties::prop::{ - unify_types, ArrowRow, DirectConvert, Prop, PropType, SerdeProp, + unify_types, ArrowRow, DirectConvert, Prop, PropType, SerdeArrowProp, }, iter::{BoxedLIter, IntoDynBoxed}, }; diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index 1da96ccb08..e2a1284fda 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -11,7 +11,7 @@ use itertools::Itertools; use rustc_hash::{FxBuildHasher, FxHashMap}; use serde::{ ser::{SerializeMap, SerializeSeq}, - Deserialize, Serialize, + Deserialize, Serialize, Serializer, }; use std::{ cmp::Ordering, @@ -23,9 +23,21 @@ use std::{ }; use thiserror::Error; -use crate::core::entities::properties::prop::prop_array::*; -use arrow_array::{cast::AsArray, ArrayRef, LargeListArray, StructArray}; -use arrow_schema::{DataType, Field, FieldRef}; +use crate::{ + core::entities::properties::prop::{prop_array::*, ArrowRow}, + iter::IntoDynBoxed, +}; +use arrow_array::{ + cast::AsArray, + types::{ + Date32Type, Date64Type, Decimal128Type, DecimalType, Float32Type, Float64Type, Int32Type, + Int64Type, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, + TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, + }, + Array, ArrayRef, LargeListArray, StructArray, +}; +use arrow_schema::{DataType, Field, FieldRef, TimeUnit}; +use serde::ser::Error; pub const DECIMAL_MAX: i128 = 99999999999999999999999999999999999999i128; // equivalent to parquet decimal(38, 0) @@ -151,44 +163,52 @@ impl PartialOrd for Prop { } } -pub struct SerdeProp<'a>(pub &'a Prop); +pub struct SerdeArrowProp<'a>(pub &'a Prop); +#[derive(Clone, Copy, Debug)] +pub struct SerdeArrowList<'a>(pub &'a PropArray); + #[derive(Clone, Copy, Debug)] -pub struct SerdeList<'a>(pub &'a PropArray); +pub struct SerdeArrowArray<'a>(pub &'a ArrayRef); #[derive(Clone, Copy)] -pub struct SerdeMap<'a>(pub &'a HashMap); +pub struct SerdeArrowMap<'a>(pub &'a HashMap); #[derive(Clone, Copy, Serialize)] pub struct SerdeRow { value: Option

, } -impl<'a> Serialize for SerdeList<'a> { +impl<'a> Serialize for SerdeArrowList<'a> { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { - let mut state = serializer.serialize_seq(Some(self.0.len()))?; - for prop in self.0.iter() { - state.serialize_element(&SerdeProp(&prop))?; + match &self.0 { + PropArray::Vec(list) => { + let mut state = serializer.serialize_seq(Some(self.0.len()))?; + for prop in list.iter() { + state.serialize_element(&SerdeArrowProp(prop))?; + } + state.end() + } + PropArray::Array(array) => SerdeArrowArray(array).serialize(serializer), } - state.end() } } -impl<'a> Serialize for SerdeMap<'a> { +impl<'a> Serialize for SerdeArrowMap<'a> { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { let mut state = serializer.serialize_map(Some(self.0.len()))?; for (k, v) in self.0.iter() { - state.serialize_entry(k, &SerdeProp(v))?; + state.serialize_entry(k, &SerdeArrowProp(v))?; } state.end() } } -impl<'a> Serialize for SerdeProp<'a> { +impl<'a> Serialize for SerdeArrowProp<'a> { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, @@ -206,13 +226,198 @@ impl<'a> Serialize for SerdeProp<'a> { Prop::Bool(b) => serializer.serialize_bool(*b), Prop::DTime(dt) => serializer.serialize_i64(dt.timestamp_millis()), Prop::NDTime(dt) => serializer.serialize_i64(dt.and_utc().timestamp_millis()), - Prop::List(l) => SerdeList(l).serialize(serializer), - Prop::Map(m) => SerdeMap(m).serialize(serializer), + Prop::List(l) => SerdeArrowList(l).serialize(serializer), + Prop::Map(m) => SerdeArrowMap(m).serialize(serializer), Prop::Decimal(dec) => serializer.serialize_str(&dec.to_string()), } } } +impl<'a> Serialize for SerdeArrowArray<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let dtype = self.0.data_type(); + let len = self.0.len(); + let mut state = serializer.serialize_seq(Some(len))?; + match dtype { + DataType::Boolean => { + for v in self.0.as_boolean().values().iter() { + state.serialize_element(&v)?; + } + state.end() + } + DataType::Int32 => { + for v in self.0.as_primitive::().values().iter() { + state.serialize_element(v)?; + } + state.end() + } + DataType::Int64 => { + for v in self.0.as_primitive::().values().iter() { + state.serialize_element(v)?; + } + state.end() + } + DataType::UInt8 => { + for v in self.0.as_primitive::().values().iter() { + state.serialize_element(v)?; + } + state.end() + } + DataType::UInt16 => { + for v in self.0.as_primitive::().values().iter() { + state.serialize_element(v)?; + } + state.end() + } + DataType::UInt32 => { + for v in self.0.as_primitive::().values().iter() { + state.serialize_element(v)?; + } + state.end() + } + DataType::UInt64 => { + for v in self.0.as_primitive::().values().iter() { + state.serialize_element(v)?; + } + state.end() + } + DataType::Float32 => { + for v in self.0.as_primitive::().values().iter() { + state.serialize_element(v)?; + } + state.end() + } + DataType::Float64 => { + for v in self.0.as_primitive::().values().iter() { + state.serialize_element(v)?; + } + state.end() + } + DataType::Timestamp(unit, _) => match unit { + TimeUnit::Second => { + for v in self.0.as_primitive::().values().iter() { + state.serialize_element(v)?; + } + state.end() + } + TimeUnit::Millisecond => { + for v in self + .0 + .as_primitive::() + .values() + .iter() + { + state.serialize_element(v)?; + } + state.end() + } + TimeUnit::Microsecond => { + for v in self + .0 + .as_primitive::() + .values() + .iter() + { + state.serialize_element(v)?; + } + state.end() + } + TimeUnit::Nanosecond => { + for v in self + .0 + .as_primitive::() + .values() + .iter() + { + state.serialize_element(v)?; + } + state.end() + } + }, + DataType::Date32 => { + for v in self.0.as_primitive::().values().iter() { + state.serialize_element(v)?; + } + state.end() + } + DataType::Date64 => { + for v in self.0.as_primitive::().values().iter() { + state.serialize_element(v)?; + } + state.end() + } + DataType::Utf8 => { + for v in self.0.as_string::().iter() { + state.serialize_element( + v.ok_or_else(|| S::Error::custom("options not supported in array"))?, + )?; + } + state.end() + } + DataType::LargeUtf8 => { + for v in self.0.as_string::().iter() { + state.serialize_element( + v.ok_or_else(|| S::Error::custom("options not supported in array"))?, + )?; + } + state.end() + } + DataType::Utf8View => { + for v in self.0.as_string_view().iter() { + state.serialize_element( + v.ok_or_else(|| S::Error::custom("options not supported in array"))?, + )?; + } + state.end() + } + DataType::Decimal128(precision, scale) => { + for v in self.0.as_primitive::().iter() { + let v = v.ok_or_else(|| S::Error::custom("options not supported in array"))?; + state + .serialize_element(&Decimal128Type::format_decimal(v, *precision, *scale))? + // i128 not supported by serde_arrow! + } + state.end() + } + DataType::Struct(_) => { + let struct_array = self.0.as_struct(); + for i in 0..struct_array.len() { + state.serialize_element(&ArrowRow::new(struct_array, i))?; + } + state.end() + } + DataType::List(_) => { + let list = self.0.as_list::(); + for array in list.iter() { + state.serialize_element(&SerdeArrowArray( + &array.ok_or_else(|| S::Error::custom("options not supported in array"))?, + ))?; + } + state.end() + } + DataType::LargeList(_) => { + let list = self.0.as_list::(); + for array in list.iter() { + state.serialize_element(&SerdeArrowArray( + &array.ok_or_else(|| S::Error::custom("options not supported in array"))?, + ))?; + } + state.end() + } + DataType::Null => { + for _ in 0..self.0.len() { + state.serialize_element(&None::<()>)?; + } + state.end() + } + dtype => Err(S::Error::custom(format!("unsuported data type {dtype:?}"))), + } + } +} + pub fn validate_prop(prop: Prop) -> Result { match prop { Prop::Decimal(ref bd) => { @@ -241,9 +446,9 @@ impl Prop { Prop::Map(h_map.into()) } - pub fn as_map(&self) -> Option> { + pub fn as_map(&self) -> Option> { match self { - Prop::Map(map) => Some(SerdeMap(map)), + Prop::Map(map) => Some(SerdeArrowMap(map)), _ => None, } } diff --git a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs index f77d94dc20..2c4ade6b64 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs @@ -1,5 +1,5 @@ use crate::core::{ - entities::properties::prop::{Prop, SerdeList, SerdeMap}, + entities::properties::prop::{Prop, SerdeArrowList, SerdeArrowMap}, storage::arc_str::ArcStr, }; use bigdecimal::BigDecimal; @@ -124,7 +124,7 @@ impl<'a> Serialize for PropMapRef<'a> { S: serde::Serializer, { match self { - PropMapRef::Mem(map) => SerdeMap(map).serialize(serializer), + PropMapRef::Mem(map) => SerdeArrowMap(map).serialize(serializer), PropMapRef::Arrow(row) => row.serialize(serializer), } } @@ -148,7 +148,7 @@ impl<'a> Serialize for PropRef<'a> { PropNum::F64(v) => serializer.serialize_f64(*v), }, PropRef::Bool(b) => serializer.serialize_bool(*b), - PropRef::List(lst) => SerdeList(lst).serialize(serializer), + PropRef::List(lst) => SerdeArrowList(lst).serialize(serializer), PropRef::Map(map_ref) => map_ref.serialize(serializer), PropRef::NDTime(dt) => serializer.serialize_i64(dt.and_utc().timestamp_millis()), PropRef::DTime(dt) => serializer.serialize_i64(dt.timestamp_millis()), diff --git a/raphtory/src/serialise/parquet/graph.rs b/raphtory/src/serialise/parquet/graph.rs index 735c6cab58..acd3230676 100644 --- a/raphtory/src/serialise/parquet/graph.rs +++ b/raphtory/src/serialise/parquet/graph.rs @@ -10,7 +10,7 @@ use arrow::datatypes::{DataType, Field}; use itertools::Itertools; use parquet::format::KeyValue; use raphtory_api::{ - core::{entities::properties::prop::SerdeProp, storage::arc_str::ArcStr}, + core::{entities::properties::prop::SerdeArrowProp, storage::arc_str::ArcStr}, GraphType, }; use raphtory_core::storage::timeindex::TimeIndexEntry; @@ -88,7 +88,7 @@ impl Serialize for Row { let mut state = serializer.serialize_map(Some(self.row.len()))?; for (k, v) in self.row.iter() { - state.serialize_entry(k, &SerdeProp(v))?; + state.serialize_entry(k, &SerdeArrowProp(v))?; } state.serialize_entry(TIME_COL, &self.t.0)?; diff --git a/raphtory/src/serialise/parquet/model.rs b/raphtory/src/serialise/parquet/model.rs index a34b661c25..2b1562bf87 100644 --- a/raphtory/src/serialise/parquet/model.rs +++ b/raphtory/src/serialise/parquet/model.rs @@ -10,7 +10,7 @@ use crate::{ }; use arrow::datatypes::DataType; use raphtory_api::core::{ - entities::{properties::prop::SerdeProp, GidType}, + entities::{properties::prop::SerdeArrowProp, GidType}, storage::{arc_str::ArcStr, timeindex::TimeIndexEntry}, }; use raphtory_storage::graph::graph::GraphStorage; @@ -59,7 +59,7 @@ impl<'a, G: StaticGraphViewOps> Serialize for ParquetTEdge<'a, G> { state.serialize_entry(LAYER_COL, &layer)?; for (name, prop) in edge.properties().temporal().iter_latest() { - state.serialize_entry(&name, &SerdeProp(&prop))?; + state.serialize_entry(&name, &SerdeArrowProp(&prop))?; } state.end() @@ -85,7 +85,7 @@ impl<'a, G: StaticGraphViewOps> Serialize for ParquetCEdge<'a, G> { state.serialize_entry(LAYER_COL, &layer)?; for (name, prop) in edge.metadata().iter_filtered() { - state.serialize_entry(&name, &SerdeProp(&prop))?; + state.serialize_entry(&name, &SerdeArrowProp(&prop))?; } state.end() @@ -136,7 +136,7 @@ impl<'a> Serialize for ParquetTNode<'a> { state.serialize_entry(TYPE_COL, &self.node.node_type())?; for (name, prop) in self.props.iter() { - state.serialize_entry(&self.cols[*name], &SerdeProp(prop))?; + state.serialize_entry(&self.cols[*name], &SerdeArrowProp(prop))?; } state.end() @@ -158,7 +158,7 @@ impl<'a> Serialize for ParquetCNode<'a> { state.serialize_entry(TYPE_COL, &self.node.node_type())?; for (name, prop) in self.node.metadata().iter_filtered() { - state.serialize_entry(&name, &SerdeProp(&prop))?; + state.serialize_entry(&name, &SerdeArrowProp(&prop))?; } state.end() From 82c8145f02f8e2b7702baca0d9436d5010b95fe4 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 17 Dec 2025 13:44:30 +0100 Subject: [PATCH 30/39] simplify serialisation for SerdeArrowArray --- .../entities/properties/prop/prop_enum.rs | 141 +++++++----------- 1 file changed, 52 insertions(+), 89 deletions(-) diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index e2a1284fda..5957232895 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -34,7 +34,7 @@ use arrow_array::{ Int64Type, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, }, - Array, ArrayRef, LargeListArray, StructArray, + Array, ArrayRef, ArrowPrimitiveType, LargeListArray, StructArray, }; use arrow_schema::{DataType, Field, FieldRef, TimeUnit}; use serde::ser::Error; @@ -243,178 +243,141 @@ impl<'a> Serialize for SerdeArrowArray<'a> { let mut state = serializer.serialize_seq(Some(len))?; match dtype { DataType::Boolean => { - for v in self.0.as_boolean().values().iter() { + for v in self.0.as_boolean().iter() { state.serialize_element(&v)?; } - state.end() } DataType::Int32 => { - for v in self.0.as_primitive::().values().iter() { - state.serialize_element(v)?; + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; } - state.end() } DataType::Int64 => { - for v in self.0.as_primitive::().values().iter() { - state.serialize_element(v)?; + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; } - state.end() } DataType::UInt8 => { - for v in self.0.as_primitive::().values().iter() { - state.serialize_element(v)?; + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; } - state.end() } DataType::UInt16 => { - for v in self.0.as_primitive::().values().iter() { - state.serialize_element(v)?; + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; } - state.end() } DataType::UInt32 => { - for v in self.0.as_primitive::().values().iter() { - state.serialize_element(v)?; + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; } - state.end() } DataType::UInt64 => { - for v in self.0.as_primitive::().values().iter() { - state.serialize_element(v)?; + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; } - state.end() } DataType::Float32 => { - for v in self.0.as_primitive::().values().iter() { - state.serialize_element(v)?; + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; } - state.end() } DataType::Float64 => { - for v in self.0.as_primitive::().values().iter() { - state.serialize_element(v)?; + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; } - state.end() } DataType::Timestamp(unit, _) => match unit { TimeUnit::Second => { - for v in self.0.as_primitive::().values().iter() { - state.serialize_element(v)?; + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; } - state.end() } TimeUnit::Millisecond => { - for v in self - .0 - .as_primitive::() - .values() - .iter() - { - state.serialize_element(v)?; + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; } - state.end() } TimeUnit::Microsecond => { - for v in self - .0 - .as_primitive::() - .values() - .iter() - { - state.serialize_element(v)?; + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; } - state.end() } TimeUnit::Nanosecond => { - for v in self - .0 - .as_primitive::() - .values() - .iter() - { - state.serialize_element(v)?; + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; } - state.end() } }, DataType::Date32 => { - for v in self.0.as_primitive::().values().iter() { - state.serialize_element(v)?; + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; } - state.end() } DataType::Date64 => { - for v in self.0.as_primitive::().values().iter() { - state.serialize_element(v)?; + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; } - state.end() } DataType::Utf8 => { for v in self.0.as_string::().iter() { - state.serialize_element( - v.ok_or_else(|| S::Error::custom("options not supported in array"))?, - )?; + state.serialize_element(&v)?; } - state.end() } DataType::LargeUtf8 => { for v in self.0.as_string::().iter() { - state.serialize_element( - v.ok_or_else(|| S::Error::custom("options not supported in array"))?, - )?; + state.serialize_element(&v)?; } - state.end() } DataType::Utf8View => { for v in self.0.as_string_view().iter() { - state.serialize_element( - v.ok_or_else(|| S::Error::custom("options not supported in array"))?, - )?; + state.serialize_element(&v)?; } - state.end() } DataType::Decimal128(precision, scale) => { for v in self.0.as_primitive::().iter() { - let v = v.ok_or_else(|| S::Error::custom("options not supported in array"))?; - state - .serialize_element(&Decimal128Type::format_decimal(v, *precision, *scale))? + let element = v.map(|v| Decimal128Type::format_decimal(v, *precision, *scale)); + state.serialize_element(&element)? // i128 not supported by serde_arrow! } - state.end() } DataType::Struct(_) => { let struct_array = self.0.as_struct(); - for i in 0..struct_array.len() { - state.serialize_element(&ArrowRow::new(struct_array, i))?; + match struct_array.nulls() { + None => { + for i in 0..struct_array.len() { + state.serialize_element(&ArrowRow::new(struct_array, i))?; + } + } + Some(nulls) => { + for (i, is_valid) in nulls.iter().enumerate() { + state.serialize_element( + &is_valid.then_some(ArrowRow::new(struct_array, i)), + )?; + } + } } - state.end() } DataType::List(_) => { let list = self.0.as_list::(); for array in list.iter() { - state.serialize_element(&SerdeArrowArray( - &array.ok_or_else(|| S::Error::custom("options not supported in array"))?, - ))?; + state.serialize_element(&array.as_ref().map(SerdeArrowArray))?; } - state.end() } DataType::LargeList(_) => { let list = self.0.as_list::(); for array in list.iter() { - state.serialize_element(&SerdeArrowArray( - &array.ok_or_else(|| S::Error::custom("options not supported in array"))?, - ))?; + state.serialize_element(&array.as_ref().map(SerdeArrowArray))?; } - state.end() } DataType::Null => { for _ in 0..self.0.len() { state.serialize_element(&None::<()>)?; } - state.end() } - dtype => Err(S::Error::custom(format!("unsuported data type {dtype:?}"))), + dtype => Err(Error::custom(format!("unsuported data type {dtype:?}")))?, } + state.end() } } From 3ecf1a82e17db538696c45801db6c082b5c87c70 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 19 Dec 2025 11:21:56 +0100 Subject: [PATCH 31/39] more refactoring of the graph path handling --- examples/rust/src/bin/bench/main.rs | 2 +- examples/rust/src/bin/btc/main.rs | 2 +- examples/rust/src/bin/hulongbay/main.rs | 2 +- examples/rust/src/bin/lotr/main.rs | 2 +- examples/rust/src/bin/pokec/main.rs | 2 +- python/python/raphtory/__init__.pyi | 6 +- .../entities/properties/prop/prop_array.rs | 4 +- .../entities/properties/prop/prop_enum.rs | 30 +- raphtory-benchmark/benches/index_bench.rs | 5 +- raphtory-benchmark/benches/search_bench.rs | 2 +- raphtory-benchmark/src/common/mod.rs | 3 +- raphtory-graphql/src/data.rs | 2 +- raphtory-graphql/src/graph.rs | 9 +- raphtory-graphql/src/lib.rs | 5 +- .../src/model/graph/meta_graph.rs | 2 +- raphtory-graphql/src/model/mod.rs | 16 +- raphtory-graphql/src/paths.rs | 93 +-- .../src/python/client/raphtory_client.rs | 5 +- raphtory-graphql/src/python/mod.rs | 3 +- raphtory-graphql/src/url_encode.rs | 17 +- .../storage/graph/storage_ops/disk_storage.rs | 205 ------ raphtory/src/db/api/storage/storage.rs | 39 +- raphtory/src/db/api/view/graph.rs | 602 +++++++++--------- .../src/db/api/view/internal/materialize.rs | 19 + raphtory/src/db/graph/graph.rs | 27 +- raphtory/src/db/graph/views/deletion_graph.rs | 18 +- raphtory/src/errors.rs | 9 +- raphtory/src/graph_loader/company_house.rs | 2 +- raphtory/src/graph_loader/stable_coins.rs | 2 +- raphtory/src/io/arrow/df_loaders.rs | 5 +- raphtory/src/python/graph/disk_graph.rs | 321 ---------- raphtory/src/python/graph/graph.rs | 16 +- .../src/python/graph/graph_with_deletions.rs | 12 +- raphtory/src/python/graph/views/graph_view.rs | 17 +- .../types/macros/trait_impl/serialise.rs | 14 +- raphtory/src/search/mod.rs | 66 +- raphtory/src/search/searcher.rs | 2 +- raphtory/src/serialise/graph_folder.rs | 206 +++--- raphtory/src/serialise/metadata.rs | 6 +- raphtory/src/serialise/parquet/mod.rs | 2 +- raphtory/src/serialise/serialise.rs | 84 ++- raphtory/tests/db_tests.rs | 4 +- raphtory/tests/serialise_test.rs | 38 +- 43 files changed, 749 insertions(+), 1179 deletions(-) delete mode 100644 raphtory/src/db/api/storage/graph/storage_ops/disk_storage.rs delete mode 100644 raphtory/src/python/graph/disk_graph.rs diff --git a/examples/rust/src/bin/bench/main.rs b/examples/rust/src/bin/bench/main.rs index 1ec87f0e6c..ad63c92313 100644 --- a/examples/rust/src/bin/bench/main.rs +++ b/examples/rust/src/bin/bench/main.rs @@ -39,7 +39,7 @@ fn main() { info!("Loading data"); let graph = if encoded_data_dir.exists() { let now = Instant::now(); - let g = Graph::decode(encoded_data_dir.as_path(), None) + let g = Graph::decode(encoded_data_dir.as_path()) .expect("Failed to load graph from encoded data files"); info!( diff --git a/examples/rust/src/bin/btc/main.rs b/examples/rust/src/bin/btc/main.rs index 3aebf1764c..0dc39c63a6 100644 --- a/examples/rust/src/bin/btc/main.rs +++ b/examples/rust/src/bin/btc/main.rs @@ -64,7 +64,7 @@ fn main() { let graph = if encoded_data_dir.exists() { let now = Instant::now(); - let g = Graph::decode(encoded_data_dir.as_path(), None) + let g = Graph::decode(encoded_data_dir.as_path()) .expect("Failed to load graph from encoded data files"); info!( diff --git a/examples/rust/src/bin/hulongbay/main.rs b/examples/rust/src/bin/hulongbay/main.rs index e7ca0cab60..6d64bf532a 100644 --- a/examples/rust/src/bin/hulongbay/main.rs +++ b/examples/rust/src/bin/hulongbay/main.rs @@ -65,7 +65,7 @@ pub fn loader(data_dir: &Path) -> Result> { let encoded_data_dir = data_dir.join("graphdb.bincode"); if encoded_data_dir.exists() { let now = Instant::now(); - let g = Graph::decode(encoded_data_dir.as_path(), None)?; + let g = Graph::decode(encoded_data_dir.as_path())?; info!( "Loaded graph from path {} with {} nodes, {} edges, took {} seconds", diff --git a/examples/rust/src/bin/lotr/main.rs b/examples/rust/src/bin/lotr/main.rs index 69a5386995..cda67cf6c5 100644 --- a/examples/rust/src/bin/lotr/main.rs +++ b/examples/rust/src/bin/lotr/main.rs @@ -38,7 +38,7 @@ fn main() { let graph = if encoded_data_dir.exists() { let now = Instant::now(); - let g = Graph::decode(encoded_data_dir.as_path(), None) + let g = Graph::decode(encoded_data_dir.as_path()) .expect("Failed to load graph from encoded data files"); info!( diff --git a/examples/rust/src/bin/pokec/main.rs b/examples/rust/src/bin/pokec/main.rs index beadfe8e5a..3f066ab915 100644 --- a/examples/rust/src/bin/pokec/main.rs +++ b/examples/rust/src/bin/pokec/main.rs @@ -24,7 +24,7 @@ fn main() { let data_dir = Path::new(args.get(1).expect("No data directory provided")); let g = if std::path::Path::new("/tmp/pokec").exists() { - Graph::decode("/tmp/pokec", None).unwrap() + Graph::decode("/tmp/pokec").unwrap() } else { let g = Graph::new(); CsvLoader::new(data_dir) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 68643483b4..cf5cd38e89 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -426,18 +426,16 @@ class GraphView(object): GraphView: The layered view """ - def materialize(self, path=None) -> GraphView: + def materialize(self) -> GraphView: """ Returns a 'materialized' clone of the graph view - i.e. a new graph with a copy of the data seen within the view instead of just a mask over the original graph. - If a path is provided, the new graph will be stored at that path - (assuming the storage feature is enabled). Returns: GraphView: Returns a graph clone """ - def materialize_to_graph_folder(self, path): + def materialize_at(self, path): """Materializes the graph view into a graphql compatible folder.""" @property diff --git a/raphtory-api/src/core/entities/properties/prop/prop_array.rs b/raphtory-api/src/core/entities/properties/prop/prop_array.rs index fbd5020279..025736ffc7 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_array.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_array.rs @@ -1,7 +1,5 @@ use crate::{ - core::entities::properties::prop::{ - unify_types, ArrowRow, DirectConvert, Prop, PropType, SerdeArrowProp, - }, + core::entities::properties::prop::{unify_types, ArrowRow, DirectConvert, Prop, PropType}, iter::{BoxedLIter, IntoDynBoxed}, }; use arrow_array::{ diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index 5957232895..667c937c9b 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -1,16 +1,26 @@ use crate::core::{ entities::{ - properties::prop::{prop_ref_enum::PropRef, PropNum, PropType}, + properties::prop::{prop_array::*, prop_ref_enum::PropRef, ArrowRow, PropNum, PropType}, GidRef, }, storage::arc_str::ArcStr, }; +use arrow_array::{ + cast::AsArray, + types::{ + Date32Type, Date64Type, Decimal128Type, DecimalType, Float32Type, Float64Type, Int32Type, + Int64Type, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, + TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, + }, + Array, ArrayRef, LargeListArray, StructArray, +}; +use arrow_schema::{DataType, Field, FieldRef, TimeUnit}; use bigdecimal::{num_bigint::BigInt, BigDecimal}; use chrono::{DateTime, NaiveDateTime, Utc}; use itertools::Itertools; use rustc_hash::{FxBuildHasher, FxHashMap}; use serde::{ - ser::{SerializeMap, SerializeSeq}, + ser::{Error, SerializeMap, SerializeSeq}, Deserialize, Serialize, Serializer, }; use std::{ @@ -23,22 +33,6 @@ use std::{ }; use thiserror::Error; -use crate::{ - core::entities::properties::prop::{prop_array::*, ArrowRow}, - iter::IntoDynBoxed, -}; -use arrow_array::{ - cast::AsArray, - types::{ - Date32Type, Date64Type, Decimal128Type, DecimalType, Float32Type, Float64Type, Int32Type, - Int64Type, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, - TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, - }, - Array, ArrayRef, ArrowPrimitiveType, LargeListArray, StructArray, -}; -use arrow_schema::{DataType, Field, FieldRef, TimeUnit}; -use serde::ser::Error; - pub const DECIMAL_MAX: i128 = 99999999999999999999999999999999999999i128; // equivalent to parquet decimal(38, 0) #[derive(Error, Debug)] diff --git a/raphtory-benchmark/benches/index_bench.rs b/raphtory-benchmark/benches/index_bench.rs index 5434921574..e1534c4bc4 100644 --- a/raphtory-benchmark/benches/index_bench.rs +++ b/raphtory-benchmark/benches/index_bench.rs @@ -27,11 +27,8 @@ fn bench_graph_index_load(c: &mut Criterion) { let mut group = c.benchmark_group("graph_index_load"); group.sample_size(100); - - let path_for_decoded_graph = None; - group.bench_function(BenchmarkId::from_parameter("load_once"), |b| { - b.iter(|| Graph::decode(black_box(&path), path_for_decoded_graph.clone()).unwrap()); + b.iter(|| Graph::decode(black_box(&path)).unwrap()); }); group.finish(); diff --git a/raphtory-benchmark/benches/search_bench.rs b/raphtory-benchmark/benches/search_bench.rs index 03567243a6..426631ab6c 100644 --- a/raphtory-benchmark/benches/search_bench.rs +++ b/raphtory-benchmark/benches/search_bench.rs @@ -33,7 +33,7 @@ use std::{iter, sync::Arc, time::Instant}; static GRAPH: Lazy> = Lazy::new(|| { let data_dir = "/tmp/graphs/raph_social/rf0.1"; // TODO Fix this // let data_dir = "/tmp/graphs/raph_social/rf1.0"; - let graph = Graph::decode(data_dir, None).unwrap(); + let graph = Graph::decode(data_dir).unwrap(); println!("Nodes count = {}", graph.count_nodes()); println!("Edges count = {}", graph.count_edges()); diff --git a/raphtory-benchmark/src/common/mod.rs b/raphtory-benchmark/src/common/mod.rs index 8494770969..3d83103de3 100644 --- a/raphtory-benchmark/src/common/mod.rs +++ b/raphtory-benchmark/src/common/mod.rs @@ -587,9 +587,8 @@ pub fn run_proto_encode_benchmark(group: &mut BenchmarkGroup, graph: G pub fn run_proto_decode_benchmark(group: &mut BenchmarkGroup, graph: Graph) { let f = TempDir::new().unwrap(); graph.encode(f.path()).unwrap(); - let path_for_decoded_graph = None; bench(group, "proto_decode", None, |b| { - b.iter(|| Graph::decode(f.path(), path_for_decoded_graph).unwrap()) + b.iter(|| Graph::decode(f.path()).unwrap()) }) } diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index ae9f9c51f0..a98f625913 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -275,7 +275,7 @@ impl Data { .vectorise( conf.cache.clone(), template.clone(), - Some(&folder.vectors_path().ok()?), + Some(&folder.graph_folder().vectors_path().ok()?), true, // verbose ) .await; diff --git a/raphtory-graphql/src/graph.rs b/raphtory-graphql/src/graph.rs index 56aaaef1c1..5c039908ae 100644 --- a/raphtory-graphql/src/graph.rs +++ b/raphtory-graphql/src/graph.rs @@ -25,7 +25,7 @@ use tracing::info; #[cfg(feature = "search")] use raphtory::prelude::IndexMutationOps; -use raphtory::serialise::GraphPaths; +use raphtory::serialise::{GraphPaths, StableDecode}; #[derive(Clone)] pub struct GraphWithVectors { @@ -86,7 +86,12 @@ impl GraphWithVectors { cache: Option, create_index: bool, ) -> Result { - let graph = folder.data_path()?.read_graph()?; + let graph_folder = folder.graph_folder(); + let graph = if graph_folder.read_metadata()?.is_diskgraph { + MaterializedGraph::load_from_path(graph_folder)? + } else { + MaterializedGraph::decode(graph_folder)? + }; let vectors = cache.and_then(|cache| { VectorisedGraph::read_from_path(&folder.vectors_path().ok()?, graph.clone(), cache).ok() }); diff --git a/raphtory-graphql/src/lib.rs b/raphtory-graphql/src/lib.rs index 1971c1c0ae..f9f85c5672 100644 --- a/raphtory-graphql/src/lib.rs +++ b/raphtory-graphql/src/lib.rs @@ -40,7 +40,7 @@ mod graphql_test { config::app_config::AppConfig, data::{data_tests::save_graphs_to_work_dir, Data}, model::App, - url_encode::{url_decode_graph, url_encode_graph}, + url_encode::{url_decode_graph_at, url_encode_graph}, }; use async_graphql::UploadValue; use dynamic_graphql::{Request, Variables}; @@ -1062,8 +1062,7 @@ mod graphql_test { let res_json = res.data.into_json().unwrap(); let graph_encoded = res_json.get("receiveGraph").unwrap().as_str().unwrap(); let temp_dir = tempdir().unwrap(); - let path_for_decoded_graph = Some(temp_dir.path()); - let graph_roundtrip = url_decode_graph(graph_encoded, path_for_decoded_graph) + let graph_roundtrip = url_decode_graph_at(graph_encoded, temp_dir.path()) .unwrap() .into_dynamic(); assert_eq!(g, graph_roundtrip); diff --git a/raphtory-graphql/src/model/graph/meta_graph.rs b/raphtory-graphql/src/model/graph/meta_graph.rs index 48a9fc84b3..d9c5949883 100644 --- a/raphtory-graphql/src/model/graph/meta_graph.rs +++ b/raphtory-graphql/src/model/graph/meta_graph.rs @@ -93,7 +93,7 @@ impl MetaGraph { /// Returns the metadata of the graph. async fn metadata(&self) -> Result> { - let res = decode_graph_metadata(&self.folder)?; + let res = decode_graph_metadata(self.folder.graph_folder())?; Ok(res .into_iter() .filter_map(|(key, value)| value.map(|prop| GqlProperty::new(key, prop))) diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index ea6b82f9e4..b28330cbfe 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -9,9 +9,9 @@ use crate::{ }, plugins::{mutation_plugin::MutationPlugin, query_plugin::QueryPlugin}, }, - paths::ValidWriteableGraphFolder, + paths::{ValidGraphPaths, ValidWriteableGraphFolder}, rayon::blocking_compute, - url_encode::{url_decode_graph, url_encode_graph}, + url_encode::{url_decode_graph_at, url_encode_graph}, }; use async_graphql::Context; use dynamic_graphql::{ @@ -203,10 +203,10 @@ impl Mut { let data = ctx.data_unchecked::(); let overwrite = false; let folder = data.validate_path_for_insert(&path, overwrite)?; - let graph_path = folder.graph_path()?; + let graph_path = folder.graph_folder(); let graph: MaterializedGraph = match graph_type { - GqlGraphType::Persistent => PersistentGraph::new_at_path(graph_path).into(), - GqlGraphType::Event => Graph::new_at_path(graph_path).into(), + GqlGraphType::Persistent => PersistentGraph::new_at_path(graph_path)?.into(), + GqlGraphType::Event => Graph::new_at_path(graph_path)?.into(), }; data.insert_graph(folder, graph).await?; @@ -280,7 +280,7 @@ impl Mut { } else { ValidWriteableGraphFolder::try_new(data.work_dir.clone(), path)? }; - let g: MaterializedGraph = url_decode_graph(graph, Some(&folder.graph_path()?))?; + let g: MaterializedGraph = url_decode_graph_at(graph, folder.graph_folder())?; data.insert_graph(folder, g).await?; Ok(path.to_owned()) @@ -300,11 +300,11 @@ impl Mut { let data = ctx.data_unchecked::(); let folder = data.validate_path_for_insert(&new_path, overwrite)?; let parent_graph = data.get_graph(parent_path).await?.graph; - let graph_path = folder.graph_path()?; + let folder_clone = folder.clone(); let new_subgraph = blocking_compute(move || { parent_graph .subgraph(nodes) - .materialize_at(Some(&graph_path)) + .materialize_at(folder_clone.graph_folder()) }) .await?; diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index cb656b7677..4475ed1820 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -1,11 +1,15 @@ use crate::{data::DIRTY_PATH, model::blocking_io, rayon::blocking_compute}; use futures_util::io; use raphtory::{ - db::api::view::MaterializedGraph, + db::api::{ + storage::storage::{Extension, PersistentStrategy}, + view::{internal::InternalStorageOps, MaterializedGraph}, + }, errors::{GraphError, InvalidPathReason}, + prelude::GraphViewOps, serialise::{ - metadata::GraphMetadata, GraphFolder, GraphPaths, RelativePath, WriteableGraphFolder, - META_PATH, + metadata::GraphMetadata, GraphFolder, GraphPaths, RelativePath, StableDecode, + WriteableGraphFolder, META_PATH, }, }; use std::{ @@ -19,10 +23,13 @@ use std::{ time::{SystemTime, UNIX_EPOCH}, }; use tracing::{error, warn}; +use zip::ZipArchive; -pub trait ValidGraphPaths: GraphPaths { +pub trait ValidGraphPaths { fn local_path(&self) -> &str; + fn graph_folder(&self) -> &impl GraphPaths; + fn local_path_string(&self) -> String { self.local_path().to_owned() } @@ -56,24 +63,14 @@ impl ValidPath { #[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] pub struct ExistingGraphFolder(pub(crate) ValidGraphFolder); -impl GraphPaths for ExistingGraphFolder { - fn root(&self) -> &Path { - self.0.root() - } - - fn relative_data_path(&self) -> Result { - self.0.relative_data_path() - } - - fn relative_graph_path(&self) -> Result { - self.0.relative_graph_path() - } -} - impl ValidGraphPaths for ExistingGraphFolder { fn local_path(&self) -> &str { self.0.local_path() } + + fn graph_folder(&self) -> &impl GraphPaths { + self.0.graph_folder() + } } impl Deref for ExistingGraphFolder { @@ -279,24 +276,14 @@ pub struct ValidWriteableGraphFolder { dirty_marker: Option, } -impl GraphPaths for ValidWriteableGraphFolder { - fn root(&self) -> &Path { - self.global_path.root() - } - - fn relative_data_path(&self) -> Result { - self.global_path.relative_data_path() - } - - fn relative_graph_path(&self) -> Result { - self.global_path.relative_data_path() - } -} - impl ValidGraphPaths for ValidWriteableGraphFolder { fn local_path(&self) -> &str { &self.local_path } + + fn graph_folder(&self) -> &impl GraphPaths { + &self.global_path + } } impl ValidWriteableGraphFolder { @@ -362,7 +349,19 @@ impl ValidWriteableGraphFolder { &self, graph: MaterializedGraph, ) -> Result<(), InternalPathValidationError> { - self.global_path.data_path()?.replace_graph(graph)?; + if Extension::disk_storage_enabled() { + let graph_path = self.graph_folder().graph_path()?; + if graph + .disk_storage_enabled() + .is_some_and(|path| path == &graph_path) + { + self.global_path.write_metadata(&graph)?; + } else { + graph.materialize_at(self.graph_folder())?; + } + } else { + self.global_path.data_path()?.replace_graph(graph)?; + } Ok(()) } pub fn write_graph_data(&self, graph: MaterializedGraph) -> Result<(), PathValidationError> { @@ -371,18 +370,30 @@ impl ValidWriteableGraphFolder { } pub fn read_graph(&self) -> Result { - self.with_internal_errors(|| self.data_path()?.read_graph()) + self.with_internal_errors(|| { + if self.graph_folder().read_metadata()?.is_diskgraph { + MaterializedGraph::load_from_path(self.graph_folder()) + } else { + MaterializedGraph::decode(self.graph_folder()) + } + }) } pub fn write_graph_bytes( &self, bytes: R, ) -> Result<(), PathValidationError> { - self.global_path - .data_path() - .with_path(&self.local_path)? - .unzip_to_folder(bytes) - .with_path(&self.local_path) + self.with_internal_errors(|| { + if Extension::disk_storage_enabled() { + MaterializedGraph::decode_from_zip_at( + ZipArchive::new(bytes)?, + self.graph_folder(), + )?; + } else { + self.global_path.data_path()?.unzip_to_folder(bytes)?; + } + Ok::<(), GraphError>(()) + }) } /// Swap old and new data and delete the old graph @@ -567,6 +578,10 @@ impl ValidGraphPaths for ValidGraphFolder { fn local_path(&self) -> &str { &self.local_path } + + fn graph_folder(&self) -> &impl GraphPaths { + &self.global_path + } } impl ValidGraphFolder { diff --git a/raphtory-graphql/src/python/client/raphtory_client.rs b/raphtory-graphql/src/python/client/raphtory_client.rs index d57020e6db..786df1df90 100644 --- a/raphtory-graphql/src/python/client/raphtory_client.rs +++ b/raphtory-graphql/src/python/client/raphtory_client.rs @@ -5,7 +5,7 @@ use crate::{ server::is_online, translate_from_python, translate_map_to_python, }, - url_encode::url_decode_graph, + url_encode::{url_decode_graph, url_decode_graph_at}, }; use pyo3::{ exceptions::{PyException, PyValueError}, @@ -418,8 +418,7 @@ impl PyRaphtoryClient { let data = self.query_with_json_variables(query.clone(), variables.into())?; match data.get("receiveGraph") { Some(JsonValue::String(graph)) => { - let path_for_decoded_graph = None; - let mat_graph = url_decode_graph(graph, path_for_decoded_graph)?; + let mat_graph = url_decode_graph(graph)?; Ok(mat_graph) } _ => Err(PyException::new_err(format!( diff --git a/raphtory-graphql/src/python/mod.rs b/raphtory-graphql/src/python/mod.rs index a7936a8c32..0292834874 100644 --- a/raphtory-graphql/src/python/mod.rs +++ b/raphtory-graphql/src/python/mod.rs @@ -119,8 +119,7 @@ pub(crate) fn encode_graph(graph: MaterializedGraph) -> PyResult { /// Union[Graph, PersistentGraph]: the decoded graph #[pyfunction] pub(crate) fn decode_graph(graph: &str) -> PyResult { - let path_for_decoded_graph = None; - let result = url_decode_graph(graph, path_for_decoded_graph); + let result = url_decode_graph(graph); match result { Ok(g) => Ok(g), Err(e) => Err(PyValueError::new_err(format!("Error decoding: {:?}", e))), diff --git a/raphtory-graphql/src/url_encode.rs b/raphtory-graphql/src/url_encode.rs index 6daa9f07d8..7b64fd6eae 100644 --- a/raphtory-graphql/src/url_encode.rs +++ b/raphtory-graphql/src/url_encode.rs @@ -3,8 +3,9 @@ use raphtory::{ db::api::view::MaterializedGraph, errors::GraphError, prelude::{StableDecode, StableEncode}, + serialise::GraphPaths, }; -use std::path::Path; + #[derive(thiserror::Error, Debug)] pub enum UrlDecodeError { #[error("Bincode operation failed")] @@ -26,13 +27,17 @@ pub fn url_encode_graph>(graph: G) -> Result>( +pub fn url_decode_graph>(graph: T) -> Result { + let bytes = BASE64_URL_SAFE.decode(graph.as_ref()).unwrap(); + MaterializedGraph::decode_from_bytes(&bytes) +} + +pub fn url_decode_graph_at>( graph: T, - storage_path: Option<&Path>, + storage_path: &(impl GraphPaths + ?Sized), ) -> Result { let bytes = BASE64_URL_SAFE.decode(graph.as_ref()).unwrap(); - - MaterializedGraph::decode_from_bytes(&bytes, storage_path) + MaterializedGraph::decode_from_bytes_at(&bytes, storage_path) } #[cfg(test)] @@ -55,7 +60,7 @@ mod tests { let bytes = url_encode_graph(graph.clone()).unwrap(); let tempdir = tempfile::tempdir().unwrap(); let storage_path = tempdir.path().to_path_buf(); - let decoded_graph = url_decode_graph(bytes, Some(&storage_path)).unwrap(); + let decoded_graph = url_decode_graph_at(bytes, &storage_path).unwrap(); let g2 = decoded_graph.into_events().unwrap(); diff --git a/raphtory/src/db/api/storage/graph/storage_ops/disk_storage.rs b/raphtory/src/db/api/storage/graph/storage_ops/disk_storage.rs deleted file mode 100644 index 0a217c8bc1..0000000000 --- a/raphtory/src/db/api/storage/graph/storage_ops/disk_storage.rs +++ /dev/null @@ -1,205 +0,0 @@ -use crate::{ - db::{ - api::view::internal::GraphTimeSemanticsOps, graph::views::deletion_graph::PersistentGraph, - }, - errors::GraphError, - prelude::{Graph, GraphViewOps, NodeStateOps, NodeViewOps}, -}; -use arrow::array::ArrayRef; -use itertools::Itertools; -use pometry_storage::interop::GraphLike; -use raphtory_api::{ - core::{ - entities::{properties::tprop::TPropOps, LayerIds, EID, GID, VID}, - storage::timeindex::{TimeIndexEntry, TimeIndexOps}, - Direction, - }, - iter::IntoDynBoxed, -}; -use raphtory_core::utils::iter::GenLockedIter; -use raphtory_storage::{ - core_ops::CoreGraphOps, - disk::{graph_impl::prop_conversion::arrow_array_from_props, DiskGraphStorage}, - graph::{ - edges::edge_storage_ops::EdgeStorageOps, graph::GraphStorage, - nodes::node_storage_ops::NodeStorageOps, - }, -}; -use std::{path::Path, sync::Arc}; - -impl From for Graph { - fn from(value: DiskGraphStorage) -> Self { - Graph::from_internal_graph(GraphStorage::Disk(Arc::new(value))) - } -} - -impl From for PersistentGraph { - fn from(value: DiskGraphStorage) -> Self { - PersistentGraph::from_internal_graph(GraphStorage::Disk(Arc::new(value))) - } -} - -pub trait IntoGraph { - fn into_graph(self) -> Graph; - - fn into_persistent_graph(self) -> PersistentGraph; -} - -impl IntoGraph for DiskGraphStorage { - fn into_graph(self) -> Graph { - self.into() - } - - fn into_persistent_graph(self) -> PersistentGraph { - self.into() - } -} - -impl Graph { - pub fn persist_as_disk_graph(&self, graph_dir: impl AsRef) -> Result { - Ok(Graph::from(DiskGraphStorage::from_graph(self, graph_dir)?)) - } -} - -impl PersistentGraph { - pub fn persist_as_disk_graph( - &self, - graph_dir: impl AsRef, - ) -> Result { - Ok(PersistentGraph::from(DiskGraphStorage::from_graph( - &self.event_graph(), - graph_dir, - )?)) - } -} - -impl GraphLike for Graph { - fn external_ids(&self) -> Vec { - self.nodes().id().collect() - } - - fn node_names(&self) -> impl Iterator { - self.nodes().name().into_iter_values() - } - - fn node_type_ids(&self) -> Option> { - if self.core_graph().node_meta().node_type_meta().len() <= 1 { - None - } else { - let core_nodes = self.core_nodes(); - Some((0..core_nodes.len()).map(move |i| core_nodes.node_entry(VID(i)).node_type_id())) - } - } - - fn node_types(&self) -> Option> { - let meta = self.core_graph().node_meta().node_type_meta(); - if meta.len() <= 1 { - None - } else { - Some(meta.get_keys().into_iter().map(|s| s.to_string())) - } - } - - fn layer_names(&self) -> Vec { - self.edge_meta() - .layer_meta() - .get_keys() - .into_iter() - .map_into() - .collect() - } - - fn num_nodes(&self) -> usize { - self.unfiltered_num_nodes() - } - - fn num_edges(&self) -> usize { - self.count_edges() - } - - fn out_degree(&self, vid: VID, layer: usize) -> usize { - self.core_node(vid.0.into()) - .degree(&LayerIds::One(layer), Direction::OUT) - } - - fn in_degree(&self, vid: VID, layer: usize) -> usize { - self.core_node(vid.0.into()) - .degree(&LayerIds::One(layer), Direction::IN) - } - - fn in_edges(&self, vid: VID, layer: usize, map: impl Fn(VID, EID) -> B) -> Vec { - let node = self.core_node(vid.0.into()); - node.edges_iter(&LayerIds::One(layer), Direction::IN) - .map(|edge| map(edge.src(), edge.pid())) - .collect() - } - fn out_edges(&self, vid: VID, layer: usize) -> Vec<(VID, VID, EID)> { - let node = self.core_node(vid.0.into()); - let edges = node - .edges_iter(&LayerIds::One(layer), Direction::OUT) - .map(|edge| { - let src = edge.src(); - let dst = edge.dst(); - let eid = edge.pid(); - (src, dst, eid) - }) - .collect(); - edges - } - - fn edge_additions(&self, eid: EID, layer: usize) -> impl Iterator + '_ { - let edge = self.core_edge(eid); - GenLockedIter::from(edge, |edge| edge.additions(layer).iter().into_dyn_boxed()) - } - - fn edge_prop_keys(&self) -> Vec { - let props = self.edge_meta().temporal_prop_mapper().get_keys(); - props.into_iter().map(|s| s.to_string()).collect() - } - - fn find_name(&self, vid: VID) -> Option { - self.core_node(vid.0.into()).name().map(|s| s.to_string()) - } - - fn prop_as_arrow>( - &self, - disk_edges: &[u64], - edge_id_map: &[usize], - edge_ts: &[TimeIndexEntry], - edge_t_offsets: &[usize], - layer: usize, - prop_id: usize, - _key: S, - ) -> Option { - let prop_type = self - .edge_meta() - .temporal_prop_mapper() - .get_dtype(prop_id) - .unwrap(); - arrow_array_from_props( - disk_edges.iter().flat_map(|&disk_eid| { - let disk_eid = disk_eid as usize; - let eid = edge_id_map[disk_eid]; - let ts = &edge_ts[edge_t_offsets[disk_eid]..edge_t_offsets[disk_eid + 1]]; - let edge = self.core_edge(EID(eid)); - ts.iter() - .map(move |t| edge.temporal_prop_layer(layer, prop_id).at(t)) - }), - prop_type, - ) - } - - fn earliest_time(&self) -> i64 { - self.earliest_time_global().unwrap_or(i64::MAX) - } - - fn latest_time(&self) -> i64 { - self.latest_time_global().unwrap_or(i64::MIN) - } - - fn out_neighbours(&self, vid: VID) -> impl Iterator + '_ { - self.core_node(vid) - .into_edges_iter(&LayerIds::All, Direction::OUT) - .map(|e_ref| (e_ref.dst(), e_ref.pid())) - } -} diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 2601d140c2..f69144d128 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -35,8 +35,11 @@ use std::{ path::Path, sync::Arc, }; -use storage::{Extension, WalImpl}; +pub use storage::{ + persist::strategy::{Config, PersistentStrategy}, + Extension, WalImpl, +}; #[cfg(feature = "search")] use { crate::{ @@ -99,33 +102,35 @@ impl Storage { } } - pub(crate) fn new_at_path(path: impl AsRef) -> Self { - Self { - graph: GraphStorage::Unlocked(Arc::new( - TemporalGraph::new_with_path(path, Extension::default()).unwrap(), - )), + pub(crate) fn new_at_path(path: impl AsRef) -> Result { + Ok(Self { + graph: GraphStorage::Unlocked(Arc::new(TemporalGraph::new_with_path( + path, + Extension::default(), + )?)), #[cfg(feature = "search")] index: RwLock::new(GraphIndex::Empty), - } + }) } - pub(crate) fn new_with_path_and_ext(path: impl AsRef, ext: Extension) -> Self { - Self { - graph: GraphStorage::Unlocked(Arc::new( - TemporalGraph::new_with_path(path, ext).unwrap(), - )), + pub(crate) fn new_with_path_and_ext( + path: impl AsRef, + ext: Extension, + ) -> Result { + Ok(Self { + graph: GraphStorage::Unlocked(Arc::new(TemporalGraph::new_with_path(path, ext)?)), #[cfg(feature = "search")] index: RwLock::new(GraphIndex::Empty), - } + }) } - pub(crate) fn load_from(path: impl AsRef) -> Self { - let graph = GraphStorage::Unlocked(Arc::new(TemporalGraph::load_from_path(path).unwrap())); - Self { + pub(crate) fn load_from(path: impl AsRef) -> Result { + let graph = GraphStorage::Unlocked(Arc::new(TemporalGraph::load_from_path(path)?)); + Ok(Self { graph, #[cfg(feature = "search")] index: RwLock::new(GraphIndex::Empty), - } + }) } pub(crate) fn from_inner(graph: GraphStorage) -> Self { diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 70702d723f..e6e8ad205a 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -56,6 +56,9 @@ use std::{ sync::{atomic::Ordering, Arc}, }; +#[cfg(feature = "io")] +use crate::serialise::GraphPaths; + #[cfg(feature = "search")] use crate::{ db::graph::views::filter::model::{AsEdgeFilter, AsNodeFilter}, @@ -85,11 +88,13 @@ pub trait GraphViewOps<'graph>: BoxableGraphView + Sized + Clone + 'graph { /// /// Returns: /// MaterializedGraph: Returns a new materialized graph. - fn materialize_at(&self, path: Option<&Path>) -> Result; + #[cfg(feature = "io")] + fn materialize_at( + &self, + path: &(impl GraphPaths + ?Sized), + ) -> Result; - fn materialize(&self) -> Result { - self.materialize_at(None) - } + fn materialize(&self) -> Result; fn subgraph, V: AsNodeRef>(&self, nodes: I) -> NodeSubgraph; @@ -228,346 +233,361 @@ fn edges_inner<'graph, G: GraphView + 'graph>(g: &G, locked: bool) -> Edges<'gra } } -impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { - fn edges(&self) -> Edges<'graph, Self, Self> { - edges_inner(self, true) - } +fn materialize_impl( + graph: &impl GraphView, + path: Option<&Path>, +) -> Result { + let storage = graph.core_graph().lock(); + let mut node_meta = Meta::new_for_nodes(); + let mut edge_meta = Meta::new_for_edges(); + let mut graph_props_meta = Meta::new_for_graph_props(); + + node_meta.set_metadata_mapper(graph.node_meta().metadata_mapper().deep_clone()); + node_meta.set_temporal_prop_mapper(graph.node_meta().temporal_prop_mapper().deep_clone()); + edge_meta.set_metadata_mapper(graph.edge_meta().metadata_mapper().deep_clone()); + edge_meta.set_temporal_prop_mapper(graph.edge_meta().temporal_prop_mapper().deep_clone()); + graph_props_meta.set_metadata_mapper(graph.graph_props_meta().metadata_mapper().deep_clone()); + graph_props_meta + .set_temporal_prop_mapper(graph.graph_props_meta().temporal_prop_mapper().deep_clone()); + + let layer_meta = edge_meta.layer_meta(); + + // NOTE: layers must be set in layer_meta before the TemporalGraph is initialized to + // make sure empty layers are created. + let layer_map: Vec<_> = match graph.layer_ids() { + LayerIds::None => { + // no layers to map + vec![] + } + LayerIds::All => { + let layers = storage.edge_meta().layer_meta().keys(); + let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; - fn edges_unlocked(&self) -> Edges<'graph, Self, Self> { - edges_inner(self, false) - } + for (id, name) in storage.edge_meta().layer_meta().ids().zip(layers.iter()) { + let new_id = layer_meta.get_or_create_id(name).inner(); + layer_map[id] = new_id; + } - fn nodes(&self) -> Nodes<'graph, Self, Self> { - let graph = self.clone(); - Nodes::new(graph) - } + layer_map + } + LayerIds::One(l_id) => { + let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; + let layer_name = storage.edge_meta().get_layer_name_by_id(*l_id); + let new_id = layer_meta.get_or_create_id(&layer_name).inner(); - fn materialize_at(&self, path: Option<&Path>) -> Result { - let storage = self.core_graph().lock(); - - let mut node_meta = Meta::new_for_nodes(); - let mut edge_meta = Meta::new_for_edges(); - let mut graph_props_meta = Meta::new_for_graph_props(); - - node_meta.set_metadata_mapper(self.node_meta().metadata_mapper().deep_clone()); - node_meta.set_temporal_prop_mapper(self.node_meta().temporal_prop_mapper().deep_clone()); - edge_meta.set_metadata_mapper(self.edge_meta().metadata_mapper().deep_clone()); - edge_meta.set_temporal_prop_mapper(self.edge_meta().temporal_prop_mapper().deep_clone()); - graph_props_meta - .set_metadata_mapper(self.graph_props_meta().metadata_mapper().deep_clone()); - graph_props_meta - .set_temporal_prop_mapper(self.graph_props_meta().temporal_prop_mapper().deep_clone()); - - let layer_meta = edge_meta.layer_meta(); - - // NOTE: layers must be set in layer_meta before the TemporalGraph is initialized to - // make sure empty layers are created. - let layer_map: Vec<_> = match self.layer_ids() { - LayerIds::None => { - // no layers to map - vec![] + layer_map[*l_id] = new_id; + layer_map + } + LayerIds::Multiple(ids) => { + let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; + let layers = storage.edge_meta().layer_meta().all_keys(); + + for id in ids { + let layer_name = &layers[id]; + let new_id = layer_meta.get_or_create_id(layer_name).inner(); + layer_map[id] = new_id; } - LayerIds::All => { - let layers = storage.edge_meta().layer_meta().keys(); - let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; - for (id, name) in storage.edge_meta().layer_meta().ids().zip(layers.iter()) { - let new_id = layer_meta.get_or_create_id(name).inner(); - layer_map[id] = new_id; - } + layer_map + } + }; - layer_map - } - LayerIds::One(l_id) => { - let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; - let layer_name = storage.edge_meta().get_layer_name_by_id(*l_id); - let new_id = layer_meta.get_or_create_id(&layer_name).inner(); + node_meta.set_layer_mapper(layer_meta.clone()); - layer_map[*l_id] = new_id; - layer_map - } - LayerIds::Multiple(ids) => { - let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; - let layers = storage.edge_meta().layer_meta().all_keys(); - - for id in ids { - let layer_name = &layers[id]; - let new_id = layer_meta.get_or_create_id(layer_name).inner(); - layer_map[id] = new_id; - } + let temporal_graph = TemporalGraph::new_with_meta( + path.map(|p| p.into()), + node_meta, + edge_meta, + graph_props_meta, + storage.extension().clone(), + )?; - layer_map - } - }; + if let Some(earliest) = graph.earliest_time() { + temporal_graph.update_time(TimeIndexEntry::start(earliest)); + }; - node_meta.set_layer_mapper(layer_meta.clone()); + if let Some(latest) = graph.latest_time() { + temporal_graph.update_time(TimeIndexEntry::end(latest)); + }; - let temporal_graph = TemporalGraph::new_with_meta( - path.map(|p| p.into()), - node_meta, - edge_meta, - graph_props_meta, - storage.extension().clone(), - ) - .unwrap(); + // Set event counter to be the same as old graph to avoid any possibility for duplicate event ids + temporal_graph + .storage() + .set_event_id(storage.read_event_id()); - if let Some(earliest) = self.earliest_time() { - temporal_graph.update_time(TimeIndexEntry::start(earliest)); - }; + let graph_storage = GraphStorage::from(temporal_graph); - if let Some(latest) = self.latest_time() { - temporal_graph.update_time(TimeIndexEntry::end(latest)); - }; + { + // scope for the write lock + let mut new_storage = graph_storage.write_lock()?; + new_storage.resize_chunks_to_num_nodes(graph.count_nodes()); + for layer_id in &layer_map { + new_storage.nodes.ensure_layer(*layer_id); + } - // Set event counter to be the same as old graph to avoid any possibility for duplicate event ids - temporal_graph - .storage() - .set_event_id(storage.read_event_id()); + let mut node_map = vec![VID::default(); storage.unfiltered_num_nodes()]; + let node_map_shared = atomic_usize_from_mut_slice(bytemuck::cast_slice_mut(&mut node_map)); + + new_storage.nodes.par_iter_mut().try_for_each(|shard| { + for (index, node) in graph.nodes().iter().enumerate() { + let new_id = VID(index); + let gid = node.id(); + node_map_shared[node.node.index()].store(new_id.index(), Ordering::Relaxed); + if let Some(node_pos) = shard.resolve_pos(new_id) { + let mut writer = shard.writer(); + if let Some(node_type) = node.node_type() { + let new_type_id = graph_storage + .node_meta() + .node_type_meta() + .get_or_create_id(&node_type) + .inner(); + writer.store_node_id_and_node_type( + node_pos, + 0, + gid.as_ref(), + new_type_id, + 0, + ); + } else { + writer.store_node_id(node_pos, 0, gid.as_ref(), 0); + } + graph_storage + .write_session()? + .set_node(gid.as_ref(), new_id)?; - let graph_storage = GraphStorage::from(temporal_graph); + for (t, row) in node.rows() { + writer.add_props(t, node_pos, 0, row, 0); + } - { - // scope for the write lock - let mut new_storage = graph_storage.write_lock()?; - new_storage.resize_chunks_to_num_nodes(self.count_nodes()); - for layer_id in &layer_map { - new_storage.nodes.ensure_layer(*layer_id); + writer.update_c_props( + node_pos, + 0, + node.metadata_ids() + .filter_map(|id| node.get_metadata(id).map(|prop| (id, prop))), + 0, + ); + } } + Ok::<(), MutationError>(()) + })?; - let mut node_map = vec![VID::default(); storage.unfiltered_num_nodes()]; - let node_map_shared = - atomic_usize_from_mut_slice(bytemuck::cast_slice_mut(&mut node_map)); + new_storage.resize_chunks_to_num_edges(graph.count_edges()); - new_storage.nodes.par_iter_mut().try_for_each(|shard| { - for (index, node) in self.nodes().iter().enumerate() { - let new_id = VID(index); - let gid = node.id(); - node_map_shared[node.node.index()].store(new_id.index(), Ordering::Relaxed); - if let Some(node_pos) = shard.resolve_pos(new_id) { - let mut writer = shard.writer(); - if let Some(node_type) = node.node_type() { - let new_type_id = graph_storage - .node_meta() - .node_type_meta() - .get_or_create_id(&node_type) - .inner(); - writer.store_node_id_and_node_type( - node_pos, - 0, - gid.as_ref(), - new_type_id, - 0, - ); - } else { - writer.store_node_id(node_pos, 0, gid.as_ref(), 0); - } - graph_storage - .write_session()? - .set_node(gid.as_ref(), new_id)?; + for layer_id in &layer_map { + new_storage.edges.ensure_layer(*layer_id); + } - for (t, row) in node.rows() { - writer.add_props(t, node_pos, 0, row, 0); + new_storage.edges.par_iter_mut().try_for_each(|shard| { + for (eid, edge) in graph.edges().iter().enumerate() { + let src = node_map[edge.edge.src().index()]; + let dst = node_map[edge.edge.dst().index()]; + let eid = EID(eid); + if let Some(edge_pos) = shard.resolve_pos(eid) { + let mut writer = shard.writer(); + // make the edge for the first time + writer.add_static_edge(Some(edge_pos), src, dst, 0, false); + + for edge in edge.explode_layers() { + let layer = layer_map[edge.edge.layer().unwrap()]; + for edge in edge.explode() { + let t = edge.edge.time().unwrap(); + writer.add_edge(t, edge_pos, src, dst, [], layer, 0); + } + //TODO: move this in edge.row() + for (t, t_props) in edge + .properties() + .temporal() + .values() + .map(|tp| { + let prop_id = tp.id(); + tp.iter_indexed() + .map(|(t, prop)| (t, prop_id, prop)) + .collect::>() + }) + .kmerge_by(|(t, _, _), (t2, _, _)| t <= t2) + .chunk_by(|(t, _, _)| *t) + .into_iter() + { + let props = t_props + .map(|(_, prop_id, prop)| (prop_id, prop)) + .collect::>(); + writer.add_edge(t, edge_pos, src, dst, props, layer, 0); } - writer.update_c_props( + edge_pos, + src, + dst, + layer, + edge.metadata_ids().filter_map(move |prop_id| { + edge.get_metadata(prop_id).map(|prop| (prop_id, prop)) + }), + ); + } + + let time_semantics = graph.edge_time_semantics(); + let edge_entry = graph.core_edge(edge.edge.pid()); + for (t, layer) in time_semantics.edge_deletion_history( + edge_entry.as_ref(), + graph, + graph.layer_ids(), + ) { + let layer = layer_map[layer]; + writer.delete_edge(t, edge_pos, src, dst, layer, 0); + } + } + } + Ok::<(), MutationError>(()) + })?; + + new_storage.nodes.par_iter_mut().try_for_each(|shard| { + for (eid, edge) in graph.edges().iter().enumerate() { + let eid = EID(eid); + let src_id = node_map[edge.edge.src().index()]; + let dst_id = node_map[edge.edge.dst().index()]; + let maybe_src_pos = shard.resolve_pos(src_id); + let maybe_dst_pos = shard.resolve_pos(dst_id); + + if let Some(node_pos) = maybe_src_pos { + let mut writer = shard.writer(); + writer.add_static_outbound_edge(node_pos, dst_id, eid, 0); + } + + if let Some(node_pos) = maybe_dst_pos { + let mut writer = shard.writer(); + writer.add_static_inbound_edge(node_pos, src_id, eid, 0); + } + + for e in edge.explode_layers() { + let layer = layer_map[e.edge.layer().unwrap()]; + if let Some(node_pos) = maybe_src_pos { + let mut writer = shard.writer(); + writer.add_outbound_edge::( + None, node_pos, + dst_id, + eid.with_layer(layer), 0, - node.metadata_ids() - .filter_map(|id| node.get_metadata(id).map(|prop| (id, prop))), + ); + } + if let Some(node_pos) = maybe_dst_pos { + let mut writer = shard.writer(); + writer.add_inbound_edge::( + None, + node_pos, + src_id, + eid.with_layer(layer), 0, ); } } - Ok::<(), MutationError>(()) - })?; - new_storage.resize_chunks_to_num_edges(self.count_edges()); - - for layer_id in &layer_map { - new_storage.edges.ensure_layer(*layer_id); - } + for e in edge.explode() { + if let Some(node_pos) = maybe_src_pos { + let mut writer = shard.writer(); - new_storage.edges.par_iter_mut().try_for_each(|shard| { - for (eid, edge) in self.edges().iter().enumerate() { - let src = node_map[edge.edge.src().index()]; - let dst = node_map[edge.edge.dst().index()]; - let eid = EID(eid); - if let Some(edge_pos) = shard.resolve_pos(eid) { + let t = e.time_and_index().expect("exploded edge should have time"); + let l = layer_map[e.edge.layer().unwrap()]; + writer.update_timestamp(t, node_pos, eid.with_layer(l), 0); + } + if let Some(node_pos) = maybe_dst_pos { let mut writer = shard.writer(); - // make the edge for the first time - writer.add_static_edge(Some(edge_pos), src, dst, 0, false); - - for edge in edge.explode_layers() { - let layer = layer_map[edge.edge.layer().unwrap()]; - for edge in edge.explode() { - let t = edge.edge.time().unwrap(); - writer.add_edge(t, edge_pos, src, dst, [], layer, 0); - } - //TODO: move this in edge.row() - for (t, t_props) in edge - .properties() - .temporal() - .values() - .map(|tp| { - let prop_id = tp.id(); - tp.iter_indexed() - .map(|(t, prop)| (t, prop_id, prop)) - .collect::>() - }) - .kmerge_by(|(t, _, _), (t2, _, _)| t <= t2) - .chunk_by(|(t, _, _)| *t) - .into_iter() - { - let props = t_props - .map(|(_, prop_id, prop)| (prop_id, prop)) - .collect::>(); - writer.add_edge(t, edge_pos, src, dst, props, layer, 0); - } - writer.update_c_props( - edge_pos, - src, - dst, - layer, - edge.metadata_ids().filter_map(move |prop_id| { - edge.get_metadata(prop_id).map(|prop| (prop_id, prop)) - }), - ); - } - let time_semantics = self.edge_time_semantics(); - let edge_entry = self.core_edge(edge.edge.pid()); - for (t, layer) in time_semantics.edge_deletion_history( - edge_entry.as_ref(), - self, - self.layer_ids(), - ) { - let layer = layer_map[layer]; - writer.delete_edge(t, edge_pos, src, dst, layer, 0); - } + let t = e.time_and_index().expect("exploded edge should have time"); + let l = layer_map[e.edge.layer().unwrap()]; + writer.update_timestamp(t, node_pos, eid.with_layer(l), 0); } } - Ok::<(), MutationError>(()) - })?; - - new_storage.nodes.par_iter_mut().try_for_each(|shard| { - for (eid, edge) in self.edges().iter().enumerate() { - let eid = EID(eid); - let src_id = node_map[edge.edge.src().index()]; - let dst_id = node_map[edge.edge.dst().index()]; - let maybe_src_pos = shard.resolve_pos(src_id); - let maybe_dst_pos = shard.resolve_pos(dst_id); + let edge_time_semantics = graph.edge_time_semantics(); + let edge_entry = graph.core_edge(edge.edge.pid()); + for (t, layer) in edge_time_semantics.edge_deletion_history( + edge_entry.as_ref(), + graph, + graph.layer_ids(), + ) { + let layer = layer_map[layer]; if let Some(node_pos) = maybe_src_pos { let mut writer = shard.writer(); - writer.add_static_outbound_edge(node_pos, dst_id, eid, 0); + writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer), 0); } - if let Some(node_pos) = maybe_dst_pos { let mut writer = shard.writer(); - writer.add_static_inbound_edge(node_pos, src_id, eid, 0); + writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer), 0); } + } + } - for e in edge.explode_layers() { - let layer = layer_map[e.edge.layer().unwrap()]; - if let Some(node_pos) = maybe_src_pos { - let mut writer = shard.writer(); - writer.add_outbound_edge::( - None, - node_pos, - dst_id, - eid.with_layer(layer), - 0, - ); - } - if let Some(node_pos) = maybe_dst_pos { - let mut writer = shard.writer(); - writer.add_inbound_edge::( - None, - node_pos, - src_id, - eid.with_layer(layer), - 0, - ); - } - } - - for e in edge.explode() { - if let Some(node_pos) = maybe_src_pos { - let mut writer = shard.writer(); - - let t = e.time_and_index().expect("exploded edge should have time"); - let l = layer_map[e.edge.layer().unwrap()]; - writer.update_timestamp(t, node_pos, eid.with_layer(l), 0); - } - if let Some(node_pos) = maybe_dst_pos { - let mut writer = shard.writer(); + Ok::<(), MutationError>(()) + })?; - let t = e.time_and_index().expect("exploded edge should have time"); - let l = layer_map[e.edge.layer().unwrap()]; - writer.update_timestamp(t, node_pos, eid.with_layer(l), 0); - } - } + // Copy over graph properties + if let Some(graph_writer) = new_storage.graph_props.writer() { + // Copy temporal properties + for (prop_name, temporal_prop) in graph.properties().temporal().iter() { + let prop_id = graph_storage + .graph_props_meta() + .temporal_prop_mapper() + .get_or_create_id(&prop_name) + .inner(); - let edge_time_semantics = self.edge_time_semantics(); - let edge_entry = self.core_edge(edge.edge.pid()); - for (t, layer) in edge_time_semantics.edge_deletion_history( - edge_entry.as_ref(), - self, - self.layer_ids(), - ) { - let layer = layer_map[layer]; - if let Some(node_pos) = maybe_src_pos { - let mut writer = shard.writer(); - writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer), 0); - } - if let Some(node_pos) = maybe_dst_pos { - let mut writer = shard.writer(); - writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer), 0); - } - } + for (t, prop_value) in temporal_prop.iter_indexed() { + let lsn = 0; + graph_writer.add_properties(t, [(prop_id, prop_value)], lsn); } + } - Ok::<(), MutationError>(()) - })?; - - // Copy over graph properties - if let Some(graph_writer) = new_storage.graph_props.writer() { - // Copy temporal properties - for (prop_name, temporal_prop) in self.properties().temporal().iter() { + // Copy metadata (constant properties) + let metadata_props: Vec<_> = graph + .metadata() + .iter_filtered() + .map(|(prop_name, prop_value)| { let prop_id = graph_storage .graph_props_meta() - .temporal_prop_mapper() + .metadata_mapper() .get_or_create_id(&prop_name) .inner(); + (prop_id, prop_value) + }) + .collect(); - for (t, prop_value) in temporal_prop.iter_indexed() { - let lsn = 0; - graph_writer.add_properties(t, [(prop_id, prop_value)], lsn); - } - } - - // Copy metadata (constant properties) - let metadata_props: Vec<_> = self - .metadata() - .iter_filtered() - .map(|(prop_name, prop_value)| { - let prop_id = graph_storage - .graph_props_meta() - .metadata_mapper() - .get_or_create_id(&prop_name) - .inner(); - (prop_id, prop_value) - }) - .collect(); - - if !metadata_props.is_empty() { - let lsn = 0; - graph_writer.update_metadata(metadata_props, lsn); - } + if !metadata_props.is_empty() { + let lsn = 0; + graph_writer.update_metadata(metadata_props, lsn); } } + } + + Ok(graph.new_base_graph(graph_storage)) +} - Ok(self.new_base_graph(graph_storage)) +impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { + fn edges(&self) -> Edges<'graph, Self, Self> { + edges_inner(self, true) + } + + fn edges_unlocked(&self) -> Edges<'graph, Self, Self> { + edges_inner(self, false) + } + + fn nodes(&self) -> Nodes<'graph, Self, Self> { + let graph = self.clone(); + Nodes::new(graph) + } + + fn materialize(&self) -> Result { + materialize_impl(self, None) + } + + #[cfg(feature = "io")] + fn materialize_at( + &self, + path: &(impl GraphPaths + ?Sized), + ) -> Result { + path.init()?; + let graph_path = path.graph_path()?; + let graph = materialize_impl(self, Some(graph_path.as_ref()))?; + path.write_metadata(&graph)?; + Ok(graph) } fn subgraph, V: AsNodeRef>(&self, nodes: I) -> NodeSubgraph { diff --git a/raphtory/src/db/api/view/internal/materialize.rs b/raphtory/src/db/api/view/internal/materialize.rs index 4985856887..6ee4893eba 100644 --- a/raphtory/src/db/api/view/internal/materialize.rs +++ b/raphtory/src/db/api/view/internal/materialize.rs @@ -7,12 +7,16 @@ use crate::{ api::view::internal::*, graph::{graph::Graph, views::deletion_graph::PersistentGraph}, }, + errors::GraphError, prelude::*, }; use raphtory_api::{iter::BoxedLIter, GraphType}; use raphtory_storage::{graph::graph::GraphStorage, mutation::InheritMutationOps}; use std::ops::Range; +#[cfg(feature = "io")] +use crate::serialise::GraphPaths; + #[derive(Clone)] pub enum MaterializedGraph { EventGraph(Graph), @@ -93,6 +97,21 @@ impl MaterializedGraph { MaterializedGraph::PersistentGraph(g) => Some(g), } } + + #[cfg(feature = "io")] + pub fn load_from_path(path: &(impl GraphPaths + ?Sized)) -> Result { + let meta = path.read_metadata()?; + if meta.is_diskgraph { + match meta.graph_type { + GraphType::EventGraph => Ok(Self::EventGraph(Graph::load_from_path(path)?)), + GraphType::PersistentGraph => Ok(Self::PersistentGraph( + PersistentGraph::load_from_path(path)?, + )), + } + } else { + Err(GraphError::NotADiskGraph) + } + } } impl InternalStorageOps for MaterializedGraph { diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index dc6156ebfb..c0d89facb0 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -30,6 +30,7 @@ use crate::{ }, graph::{edges::Edges, node::NodeView, nodes::Nodes}, }, + errors::GraphError, prelude::*, }; use raphtory_api::inherit::Base; @@ -43,10 +44,12 @@ use std::{ fmt::{Display, Formatter}, hint::black_box, ops::Deref, - path::Path, sync::Arc, }; +#[cfg(feature = "io")] +use crate::serialise::GraphPaths; + #[repr(transparent)] #[derive(Debug, Clone, Default)] pub struct Graph { @@ -578,10 +581,14 @@ impl Graph { /// use raphtory::prelude::Graph; /// let g = Graph::new_at_path("/path/to/storage"); /// ``` - pub fn new_at_path(path: impl AsRef) -> Self { - Self { - inner: Arc::new(Storage::new_at_path(path)), - } + #[cfg(feature = "io")] + pub fn new_at_path(path: &(impl GraphPaths + ?Sized)) -> Result { + path.init()?; + let graph = Self { + inner: Arc::new(Storage::new_at_path(path.graph_path()?)?), + }; + path.write_metadata(&graph)?; + Ok(graph) } /// Load a graph from a specific path @@ -594,10 +601,12 @@ impl Graph { /// use raphtory::prelude::Graph; /// let g = Graph::load_from_path("/path/to/storage"); /// - pub fn load_from_path(path: impl AsRef) -> Self { - Self { - inner: Arc::new(Storage::load_from(path)), - } + #[cfg(feature = "io")] + pub fn load_from_path(path: &(impl GraphPaths + ?Sized)) -> Result { + //TODO: add support for loading indexes and vectors + Ok(Self { + inner: Arc::new(Storage::load_from(path.graph_path()?)?), + }) } pub(crate) fn from_storage(inner: Arc) -> Self { diff --git a/raphtory/src/db/graph/views/deletion_graph.rs b/raphtory/src/db/graph/views/deletion_graph.rs index d9949aa3c7..9b6a70ca9d 100644 --- a/raphtory/src/db/graph/views/deletion_graph.rs +++ b/raphtory/src/db/graph/views/deletion_graph.rs @@ -10,6 +10,7 @@ use crate::{ }, graph::graph::graph_equal, }, + errors::GraphError, prelude::*, }; use raphtory_api::{ @@ -29,11 +30,13 @@ use raphtory_storage::{ use std::{ fmt::{Display, Formatter}, ops::Range, - path::Path, sync::Arc, }; use storage::api::graph_props::{GraphPropEntryOps, GraphPropRefOps}; +#[cfg(feature = "io")] +use crate::serialise::GraphPaths; + /// A graph view where an edge remains active from the time it is added until it is explicitly marked as deleted. /// /// Note that the graph will give you access to all edges that were added at any point in time, even those that are marked as deleted. @@ -109,8 +112,12 @@ impl PersistentGraph { /// use raphtory::prelude::PersistentGraph; /// let g = Graph::new_at_path("/path/to/storage"); /// ``` - pub fn new_at_path(path: impl AsRef) -> Self { - Self(Arc::new(Storage::new_at_path(path))) + #[cfg(feature = "io")] + pub fn new_at_path(path: &(impl GraphPaths + ?Sized)) -> Result { + path.init()?; + let graph = Self(Arc::new(Storage::new_at_path(path.graph_path()?)?)); + path.write_metadata(&graph)?; + Ok(graph) } /// Load a graph from a specific path @@ -123,8 +130,9 @@ impl PersistentGraph { /// use raphtory::prelude::Graph; /// let g = Graph::load_from_path("/path/to/storage"); /// - pub fn load_from_path(path: impl AsRef) -> Self { - Self(Arc::new(Storage::load_from(path))) + #[cfg(feature = "io")] + pub fn load_from_path(path: &(impl GraphPaths + ?Sized)) -> Result { + Ok(Self(Arc::new(Storage::load_from(path.graph_path()?)?))) } pub fn from_storage(storage: Arc) -> Self { diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index 44793b23a0..301d7764b4 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -33,6 +33,7 @@ use pyo3::PyErr; #[cfg(feature = "search")] use {tantivy, tantivy::query::QueryParserError}; +use storage::error::StorageError; #[cfg(feature = "io")] use zip::result::ZipError; @@ -150,7 +151,7 @@ pub enum GraphError { PathDoesNotExist(PathBuf), #[error("Storage feature not enabled")] - DiskGraphNotFound, + DiskGraphNotEnabled, #[error("Missing graph index. You need to create an index first.")] IndexNotCreated, @@ -260,6 +261,9 @@ pub enum GraphError { #[error("Not a zip archive")] NotAZip, + #[error("Not a disk graph")] + NotADiskGraph, + #[error("Graph folder is not initialised for writing")] NoWriteInProgress, @@ -441,6 +445,9 @@ pub enum GraphError { }, #[error("Path {0} is not a valid relative data path")] InvalidRelativePath(String), + + #[error(transparent)] + StorageError(#[from] StorageError), } impl From for GraphError { diff --git a/raphtory/src/graph_loader/company_house.rs b/raphtory/src/graph_loader/company_house.rs index 18ef83483b..e123b56bae 100644 --- a/raphtory/src/graph_loader/company_house.rs +++ b/raphtory/src/graph_loader/company_house.rs @@ -34,7 +34,7 @@ pub fn company_house_graph(path: Option) -> Graph { fn restore_from_bincode(encoded_data_dir: &Path) -> Option { if encoded_data_dir.exists() { let now = Instant::now(); - let g = Graph::decode(encoded_data_dir, None) + let g = Graph::decode(encoded_data_dir) .map_err(|err| { error!( "Restoring from bincode failed with error: {}! Reloading file!", diff --git a/raphtory/src/graph_loader/stable_coins.rs b/raphtory/src/graph_loader/stable_coins.rs index ea5efee7f5..b92b8265de 100644 --- a/raphtory/src/graph_loader/stable_coins.rs +++ b/raphtory/src/graph_loader/stable_coins.rs @@ -48,7 +48,7 @@ pub fn stable_coin_graph(path: Option, subset: bool) -> Graph { fn restore_from_file(encoded_data_file: &PathBuf) -> Option { if encoded_data_file.exists() { let now = Instant::now(); - let g = Graph::decode(encoded_data_file.as_path(), None) + let g = Graph::decode(encoded_data_file.as_path()) .map_err(|err| { error!( "Restoring from bincode failed with error: {}! Reloading file!", diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index c21626aa6c..b3fdc23192 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -13,7 +13,6 @@ use bytemuck::checked::cast_slice_mut; use db4_graph::WriteLockedGraph; use either::Either; use itertools::izip; -use kdam::{Bar, BarBuilder, BarExt}; use raphtory_api::{ atomic_extra::atomic_usize_from_mut_slice, core::{ @@ -36,6 +35,10 @@ use std::{ sync::atomic::{AtomicBool, AtomicUsize, Ordering}, }; +#[cfg(feature = "python")] +use kdam::{Bar, BarBuilder, BarExt}; + +#[cfg(feature = "python")] fn build_progress_bar(des: String, num_rows: usize) -> Result { BarBuilder::default() .desc(des) diff --git a/raphtory/src/python/graph/disk_graph.rs b/raphtory/src/python/graph/disk_graph.rs deleted file mode 100644 index b80f8aa2bb..0000000000 --- a/raphtory/src/python/graph/disk_graph.rs +++ /dev/null @@ -1,321 +0,0 @@ -//! A columnar temporal graph. -//! -use super::io::pandas_loaders::*; -use crate::{ - db::{ - api::storage::graph::storage_ops::disk_storage::IntoGraph, - graph::views::deletion_graph::PersistentGraph, - }, - errors::GraphError, - io::parquet_loaders::read_struct_arrays, - prelude::Graph, - python::{graph::graph::PyGraph, types::repr::StructReprBuilder}, -}; -use arrow::{array::StructArray, datatypes::Field}; -use itertools::Itertools; -use pometry_storage::{ - graph::{load_node_metadata, TemporalGraph}, - RAError, -}; -use pyo3::{exceptions::PyRuntimeError, prelude::*, pybacked::PyBackedStr, types::PyDict}; -use raphtory_storage::disk::{DiskGraphStorage, ParquetLayerCols}; -use std::{ - ops::Deref, - path::{Path, PathBuf}, - str::FromStr, -}; - -#[derive(Clone)] -#[pyclass(name = "DiskGraphStorage", frozen, module = "raphtory")] -pub struct PyDiskGraph(pub DiskGraphStorage); - -impl AsRef for PyDiskGraph -where - DiskGraphStorage: AsRef, -{ - fn as_ref(&self) -> &G { - self.0.as_ref() - } -} - -impl From for PyDiskGraph { - fn from(value: DiskGraphStorage) -> Self { - Self(value) - } -} - -impl From for DiskGraphStorage { - fn from(value: PyDiskGraph) -> Self { - value.0 - } -} - -struct PyParquetLayerCols { - parquet_dir: PyBackedStr, - layer: PyBackedStr, - src_col: PyBackedStr, - dst_col: PyBackedStr, - time_col: PyBackedStr, - exclude_edge_props: Vec, -} - -impl PyParquetLayerCols { - pub fn as_deref(&self) -> ParquetLayerCols<'_> { - ParquetLayerCols { - parquet_dir: self.parquet_dir.deref(), - layer: self.layer.deref(), - src_col: self.src_col.deref(), - dst_col: self.dst_col.deref(), - time_col: self.time_col.deref(), - exclude_edge_props: self.exclude_edge_props.iter().map(|s| s.deref()).collect(), - } - } -} - -impl<'a> FromPyObject<'a> for PyParquetLayerCols { - fn extract_bound(obj: &Bound<'a, PyAny>) -> PyResult { - let dict = obj.downcast::()?; - Ok(PyParquetLayerCols { - parquet_dir: dict - .get_item("parquet_dir")? - .ok_or(PyRuntimeError::new_err("parquet_dir is required"))? - .extract::()?, - layer: dict - .get_item("layer")? - .ok_or(PyRuntimeError::new_err("layer is required"))? - .extract::()?, - src_col: dict - .get_item("src_col")? - .ok_or(PyRuntimeError::new_err("src_col is required"))? - .extract::()?, - dst_col: dict - .get_item("dst_col")? - .ok_or(PyRuntimeError::new_err("dst_col is required"))? - .extract::()?, - time_col: dict - .get_item("time_col")? - .ok_or(PyRuntimeError::new_err("time_col is required"))? - .extract::()?, - exclude_edge_props: match dict.get_item("exclude_edge_props")? { - None => Ok(vec![]), - Some(item) => item - .try_iter()? - .map(|v| v.and_then(|v| v.extract::())) - .collect::>>(), - }?, - }) - } -} - -#[pymethods] -impl PyGraph { - /// save graph in disk_graph format and memory map the result - /// - /// Arguments: - /// graph_dir (str | PathLike): folder where the graph will be saved - /// - /// Returns: - /// DiskGraphStorage: the persisted graph storage - pub fn persist_as_disk_graph(&self, graph_dir: PathBuf) -> Result { - Ok(PyDiskGraph(DiskGraphStorage::from_graph( - &self.graph, - &graph_dir, - )?)) - } -} - -#[pymethods] -impl PyDiskGraph { - pub fn graph_dir(&self) -> &Path { - self.0.graph_dir() - } - - pub fn to_events(&self) -> Graph { - self.0.clone().into_graph() - } - - pub fn to_persistent(&self) -> PersistentGraph { - self.0.clone().into_persistent_graph() - } - - #[staticmethod] - #[pyo3(signature = (graph_dir, edge_df, time_col, src_col, dst_col))] - pub fn load_from_pandas( - graph_dir: PathBuf, - edge_df: &Bound, - time_col: &str, - src_col: &str, - dst_col: &str, - ) -> Result { - let cols_to_check = vec![src_col, dst_col, time_col]; - - let df_columns: Vec = edge_df.getattr("columns")?.extract()?; - let df_columns: Vec<&str> = df_columns.iter().map(|x| x.as_str()).collect(); - - let df_view = process_pandas_py_df(edge_df, df_columns)?; - df_view.check_cols_exist(&cols_to_check)?; - let src_index = df_view.get_index(src_col)?; - let dst_index = df_view.get_index(dst_col)?; - let time_index = df_view.get_index(time_col)?; - - let mut chunks_iter = df_view.chunks.peekable(); - let chunk_size = if let Some(result) = chunks_iter.peek() { - match result { - Ok(df) => df.chunk.len(), - Err(e) => { - return Err(GraphError::LoadFailure(format!( - "Failed to load graph {e:?}" - ))) - } - } - } else { - return Err(GraphError::LoadFailure("No chunks available".to_string())); - }; - - let edge_lists = chunks_iter - .map_ok(|df| { - let fields = df - .chunk - .iter() - .zip(df_view.names.iter()) - .map(|(arr, col_name)| { - Field::new(col_name, arr.data_type().clone(), arr.null_count() > 0) - }) - .collect_vec(); - let s_array = StructArray::new(fields.into(), df.chunk, None); - s_array - }) - .collect::, GraphError>>()?; - - let graph = DiskGraphStorage::load_from_edge_lists( - &edge_lists, - chunk_size, - chunk_size, - graph_dir, - time_index, - src_index, - dst_index, - )?; - - Ok(PyDiskGraph(graph)) - } - - #[staticmethod] - fn load_from_dir(graph_dir: PathBuf) -> Result { - DiskGraphStorage::load_from_dir(&graph_dir) - .map_err(|err| { - GraphError::LoadFailure(format!( - "Failed to load graph {err:?} from dir {}", - graph_dir.display() - )) - }) - .map(PyDiskGraph) - } - - #[staticmethod] - #[pyo3( - signature = (graph_dir, layer_parquet_cols, node_properties=None, chunk_size=10_000_000, t_props_chunk_size=10_000_000, num_threads=4, node_type_col=None, node_id_col=None, num_rows=None) - )] - fn load_from_parquets( - graph_dir: PathBuf, - layer_parquet_cols: Vec, - node_properties: Option, - chunk_size: usize, - t_props_chunk_size: usize, - num_threads: usize, - node_type_col: Option<&str>, - node_id_col: Option<&str>, - num_rows: Option, - ) -> Result { - let layer_cols = layer_parquet_cols - .iter() - .map(|layer| layer.as_deref()) - .collect(); - DiskGraphStorage::load_from_parquets( - graph_dir, - layer_cols, - node_properties, - chunk_size, - t_props_chunk_size, - num_threads, - node_type_col, - node_id_col, - num_rows, - ) - .map_err(|err| { - GraphError::LoadFailure(format!("Failed to load graph from parquet files: {err:?}")) - }) - .map(PyDiskGraph) - } - - #[pyo3(signature = (location, col_names=None, chunk_size=None))] - pub fn load_node_metadata( - &self, - location: PathBuf, - col_names: Option>, - chunk_size: Option, - ) -> Result { - let col_names = convert_py_prop_args(col_names.as_deref()); - let chunks = read_struct_arrays(&location, col_names.as_deref())?; - let _ = load_node_metadata(chunk_size.unwrap_or(200_000), self.graph_dir(), chunks)?; - Self::load_from_dir(self.graph_dir().to_path_buf()) - } - - #[pyo3(signature=(location, col_name, chunk_size=None))] - pub fn load_node_types( - &self, - location: PathBuf, - col_name: &str, - chunk_size: Option, - ) -> Result { - let mut cloned = self.clone(); - let chunks = read_struct_arrays(&location, Some(&[col_name]))?.map(|chunk| match chunk { - Ok(chunk) => { - let (_, cols, _) = chunk.into_parts(); - cols.into_iter().next().ok_or(RAError::EmptyChunk) - } - Err(err) => Err(err), - }); - cloned - .0 - .load_node_types_from_arrays(chunks, chunk_size.unwrap_or(1_000_000))?; - Ok(cloned) - } - - #[pyo3(signature = (location, chunk_size=20_000_000))] - pub fn append_node_temporal_properties( - &self, - location: &str, - chunk_size: usize, - ) -> Result { - let path = PathBuf::from_str(location).unwrap(); - let chunks = read_struct_arrays(&path, None)?; - let mut graph = TemporalGraph::new(self.0.inner().graph_dir())?; - graph.load_temporal_node_props_from_chunks(chunks, chunk_size, false)?; - Self::load_from_dir(self.graph_dir().to_path_buf()) - } - - /// Merge this graph with another `DiskGraph`. Note that both graphs should have nodes that are - /// sorted by their global ids or the resulting graph will be nonsense! - fn merge_by_sorted_gids( - &self, - other: &Self, - graph_dir: PathBuf, - ) -> Result { - Ok(PyDiskGraph( - self.0.merge_by_sorted_gids(&other.0, graph_dir)?, - )) - } - - fn __repr__(&self) -> String { - StructReprBuilder::new("DiskGraph") - .add_field("number_of_nodes", self.0.inner.num_nodes()) - .add_field( - "number_of_temporal_edges", - self.0.inner.count_temporal_edges(), - ) - .add_field("earliest_time", self.0.inner.earliest()) - .add_field("latest_time", self.0.inner.latest()) - .finish() - } -} diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 7ce640fc9a..4040963970 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -139,9 +139,7 @@ impl PyGraphEncoder { } fn __call__(&self, bytes: Vec) -> Result { - let path_for_decoded_graph: Option<&std::path::Path> = None; - - MaterializedGraph::decode_from_bytes(&bytes, path_for_decoded_graph) + MaterializedGraph::decode_from_bytes(&bytes) } fn __setstate__(&self) {} fn __getstate__(&self) {} @@ -152,22 +150,22 @@ impl PyGraphEncoder { impl PyGraph { #[new] #[pyo3(signature = (path = None))] - pub fn py_new(path: Option) -> (Self, PyGraphView) { + pub fn py_new(path: Option) -> Result<(Self, PyGraphView), GraphError> { let graph = match path { None => Graph::new(), - Some(path) => Graph::new_at_path(path), + Some(path) => Graph::new_at_path(&path)?, }; - ( + Ok(( Self { graph: graph.clone(), }, PyGraphView::from(graph), - ) + )) } #[staticmethod] - pub fn load(path: PathBuf) -> Graph { - Graph::load_from_path(path) + pub fn load(path: PathBuf) -> Result { + Graph::load_from_path(&path) } fn __reduce__(&self) -> Result<(PyGraphEncoder, (Vec,)), GraphError> { diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index 9e22b3a23c..966aba49a6 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -101,22 +101,22 @@ impl PyPersistentGraph { impl PyPersistentGraph { #[new] #[pyo3(signature = (path = None))] - pub fn py_new(path: Option) -> (Self, PyGraphView) { + pub fn py_new(path: Option) -> Result<(Self, PyGraphView), GraphError> { let graph = match path { - Some(path) => PersistentGraph::new_at_path(path), + Some(path) => PersistentGraph::new_at_path(&path)?, None => PersistentGraph::new(), }; - ( + Ok(( Self { graph: graph.clone(), }, PyGraphView::from(graph), - ) + )) } #[staticmethod] - pub fn load(path: PathBuf) -> PersistentGraph { - PersistentGraph::load_from_path(path) + pub fn load(path: PathBuf) -> Result { + PersistentGraph::load_from_path(&path) } fn __reduce__(&self) -> Result<(PyGraphEncoder, (Vec,)), GraphError> { diff --git a/raphtory/src/python/graph/views/graph_view.rs b/raphtory/src/python/graph/views/graph_view.rs index b0d49ef00a..81cdef16cd 100644 --- a/raphtory/src/python/graph/views/graph_view.rs +++ b/raphtory/src/python/graph/views/graph_view.rs @@ -465,25 +465,16 @@ impl PyGraphView { /// Returns a 'materialized' clone of the graph view - i.e. a new graph with a /// copy of the data seen within the view instead of just a mask over the original graph. - /// If a path is provided, the new graph will be stored at that path - /// (assuming the storage feature is enabled). /// /// Returns: /// GraphView: Returns a graph clone - #[pyo3(signature = (path = None))] - fn materialize(&self, path: Option) -> Result { - self.graph.materialize_at(path.as_deref()) + fn materialize(&self) -> Result { + self.graph.materialize() } /// Materializes the graph view into a graphql compatible folder. - fn materialize_to_graph_folder(&self, path: PathBuf) -> Result { - let folder: GraphFolder = path.into(); - let write = folder.init_write()?; - - let graph = self.graph.materialize_at(Some(&write.graph_path()?))?; - write.data_path()?.write_metadata(&graph)?; - write.finish()?; - Ok(graph) + fn materialize_at(&self, path: PathBuf) -> Result { + self.graph.materialize_at(&path) } /// Displays the graph diff --git a/raphtory/src/python/types/macros/trait_impl/serialise.rs b/raphtory/src/python/types/macros/trait_impl/serialise.rs index 9b00777477..bc0bfab9d3 100644 --- a/raphtory/src/python/types/macros/trait_impl/serialise.rs +++ b/raphtory/src/python/types/macros/trait_impl/serialise.rs @@ -18,12 +18,7 @@ macro_rules! impl_serialise { #[doc = concat!(" ", $name, ":")] #[staticmethod] fn load_from_file(path: PathBuf) -> Result<$base_type, GraphError> { - let path_for_decoded_graph = None; - - <$base_type as $crate::serialise::StableDecode>::decode( - path, - path_for_decoded_graph, - ) + <$base_type as $crate::serialise::StableDecode>::decode(&path) } #[doc = concat!(" Saves the ", $name, " to the given path in parquet format.")] @@ -57,12 +52,7 @@ macro_rules! impl_serialise { #[doc = concat!(" ", $name, ":")] #[staticmethod] fn deserialise(bytes: &[u8]) -> Result<$base_type, GraphError> { - let path_for_decoded_graph = None; - - <$base_type as $crate::serialise::StableDecode>::decode_from_bytes( - bytes, - path_for_decoded_graph, - ) + <$base_type as $crate::serialise::StableDecode>::decode_from_bytes(bytes) } #[doc = concat!(" Serialise ", $name, " to bytes.")] diff --git a/raphtory/src/search/mod.rs b/raphtory/src/search/mod.rs index c7c5bdd153..735a79e657 100644 --- a/raphtory/src/search/mod.rs +++ b/raphtory/src/search/mod.rs @@ -168,7 +168,7 @@ mod test_index { mod test_index_io { use crate::{ db::{ - api::view::{internal::InternalStorageOps, ResolvedIndexSpec, StaticGraphViewOps}, + api::view::{internal::InternalStorageOps, ResolvedIndexSpec}, graph::views::filter::model::{AsNodeFilter, NodeFilter, NodeFilterBuilderOps}, }, errors::GraphError, @@ -221,11 +221,11 @@ mod test_index { let filter = NodeFilter::name().eq("Alice"); assert_search_results(&graph, &filter, vec!["Alice"]); - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let is_indexed = graph.get_storage().unwrap().is_indexed(); assert!(!is_indexed); } @@ -241,12 +241,12 @@ mod test_index { assert_search_results(&graph, &filter, vec!["Alice"]); // Persisted both graph and index - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); // Loaded index that was persisted - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let is_indexed = graph.get_storage().unwrap().is_indexed(); assert!(is_indexed); @@ -257,7 +257,7 @@ mod test_index { fn test_encoding_graph_twice_to_same_storage_path_fails() { let graph = init_graph(); graph.create_index().unwrap(); - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); let result = graph.encode(path); @@ -282,7 +282,7 @@ mod test_index { assert_search_results(&graph, &filter1, vec!["Alice"]); // Persisted both graph and index - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); @@ -299,7 +299,7 @@ mod test_index { assert_search_results(&graph, &filter2, vec!["Tommy"]); // Loaded index that was persisted - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let is_indexed = graph.get_storage().unwrap().is_indexed(); assert!(is_indexed); assert_search_results(&graph, &filter1, vec!["Alice"]); @@ -319,13 +319,13 @@ mod test_index { assert_search_results(&graph, &filter2, vec!["Tommy"]); // Should persist the updated graph and index - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); // Should load the updated graph and index let storage_path = path.parent().unwrap().to_path_buf(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let is_indexed = graph.get_storage().unwrap().is_indexed(); assert!(is_indexed); assert_search_results(&graph, &filter1, vec!["Alice"]); @@ -336,13 +336,13 @@ mod test_index { fn test_zip_encode_decode_index() { let graph = init_graph(); graph.create_index().unwrap(); - let tmp_dir = tempfile::TempDir::new().unwrap(); + let tmp_dir = TempDir::new().unwrap(); let zip_path = tmp_dir.path().join("graph.zip"); let folder = GraphFolder::new_as_zip(zip_path); graph.encode(&folder).unwrap(); let storage_path = tmp_dir.path().to_path_buf(); - let graph = Graph::decode(folder, None).unwrap(); + let graph = Graph::decode(&folder).unwrap(); let node = graph.node("Alice").unwrap(); let node_type = node.node_type(); assert_eq!(node_type, Some(ArcStr::from("fire_nation"))); @@ -355,7 +355,7 @@ mod test_index { fn test_encoding_graph_twice_to_same_storage_path_fails_zip() { let graph = init_graph(); graph.create_index().unwrap(); - let tmp_dir = tempfile::TempDir::new().unwrap(); + let tmp_dir = TempDir::new().unwrap(); let zip_path = tmp_dir.path().join("graph.zip"); let folder = GraphFolder::new_as_zip(&zip_path); graph.encode(&folder).unwrap(); @@ -381,19 +381,19 @@ mod test_index { let graph = init_graph(); graph.create_index().unwrap(); - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); // This gives us immutable index - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); // This tests that we are able to persist the immutable index - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let filter1 = NodeFilter::name().eq("Alice"); assert_search_results(&graph, &filter1, vec!["Alice"]); } @@ -403,12 +403,12 @@ mod test_index { let graph = init_graph(); graph.create_index().unwrap(); - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); // This gives us immutable index - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); // This converts immutable index to mutable index graph @@ -416,11 +416,11 @@ mod test_index { .unwrap(); // This tests that we are able to persist the mutable index - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let filter = NodeFilter::name().eq("Ozai"); assert_search_results(&graph, &filter, vec!["Ozai"]); } @@ -429,12 +429,12 @@ mod test_index { fn test_loading_zip_index_creates_mutable_index() { let graph = init_graph(); graph.create_index().unwrap(); - let tmp_dir = tempfile::TempDir::new().unwrap(); + let tmp_dir = TempDir::new().unwrap(); let zip_path = tmp_dir.path().join("graph.zip"); let folder = GraphFolder::new_as_zip(&zip_path); graph.encode(&folder).unwrap(); - let graph = Graph::decode(folder, None).unwrap(); + let graph = Graph::decode(&folder).unwrap(); let immutable = graph .get_storage() .unwrap() @@ -448,11 +448,11 @@ mod test_index { fn test_loading_index_creates_immutable_index() { let graph = init_graph(); graph.create_index().unwrap(); - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let immutable = graph .get_storage() .unwrap() @@ -472,11 +472,11 @@ mod test_index { let filter = NodeFilter::name().eq("Alice"); assert_search_results(&graph, &filter, vec!["Alice"]); - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let is_indexed = graph.get_storage().unwrap().is_indexed(); assert!(!is_indexed); @@ -543,7 +543,7 @@ mod test_index { let tmp_dir = TempDir::new().unwrap(); let path = tmp_dir.path().to_path_buf(); graph.encode(&path).unwrap(); - let graph = Graph::decode(&path, None).unwrap(); + let graph = Graph::decode(&path).unwrap(); let spec = graph.get_index_spec().unwrap().props(&graph); assert_eq!( @@ -833,8 +833,8 @@ mod test_index { let tmp_graph_dir = tempfile::tempdir().unwrap(); let path = tmp_graph_dir.path().to_path_buf(); - graph.encode(path.clone()).unwrap(); - let graph = Graph::decode(path.clone(), None).unwrap(); + graph.encode(&path).unwrap(); + let graph = Graph::decode(&path).unwrap(); assert_eq!(index_spec, graph.get_index_spec().unwrap()); let results = search_nodes(&graph, PropertyFilter::metadata("y").eq(false)); @@ -854,7 +854,7 @@ mod test_index { let tmp_graph_dir = tempfile::tempdir().unwrap(); let path = tmp_graph_dir.path().to_path_buf(); graph.encode(path.clone()).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(&path).unwrap(); assert_eq!(index_spec, graph.get_index_spec().unwrap()); let results = search_nodes(&graph, PropertyFilter::metadata("y").eq(false)); @@ -883,7 +883,7 @@ mod test_index { let path = tmp_graph_dir.path().to_path_buf(); graph.encode(path.clone()).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(&path).unwrap(); let index_spec2 = graph.get_index_spec().unwrap(); assert_eq!(index_spec, index_spec2); @@ -908,7 +908,7 @@ mod test_index { let folder = GraphFolder::new_as_zip(path); graph.encode(folder).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); assert_eq!(index_spec, graph.get_index_spec().unwrap()); } diff --git a/raphtory/src/search/searcher.rs b/raphtory/src/search/searcher.rs index 20005ce97b..67f38f6ea5 100644 --- a/raphtory/src/search/searcher.rs +++ b/raphtory/src/search/searcher.rs @@ -265,7 +265,7 @@ mod search_tests { fn load_jira_graph() -> Result<(), GraphError> { global_info_logger(); - let graph = Graph::decode("/tmp/graphs/jira", None).expect("failed to load graph"); + let graph = Graph::decode("/tmp/graphs/jira").expect("failed to load graph"); assert!(graph.count_nodes() > 0); let now = SystemTime::now(); diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 592a65291c..be157152f5 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -1,13 +1,8 @@ use crate::{ - db::{ - api::view::{internal::GraphView, MaterializedGraph}, - graph::views::deletion_graph::PersistentGraph, - }, - errors::GraphError, - prelude::{Graph, GraphViewOps, ParquetDecoder, ParquetEncoder}, + db::api::view::internal::GraphView, errors::GraphError, prelude::ParquetEncoder, serialise::metadata::GraphMetadata, }; -use raphtory_api::{core::input::input_node::parse_u64_strict, GraphType}; +use raphtory_api::core::input::input_node::parse_u64_strict; use serde::{Deserialize, Serialize}; use std::{ fs::{self, File}, @@ -155,7 +150,6 @@ pub trait GraphPaths { }) } - fn relative_data_path(&self) -> Result; fn vectors_path(&self) -> Result { let mut path = self.data_path()?.path; path.push(VECTORS_PATH); @@ -174,13 +168,103 @@ pub trait GraphPaths { Ok(path) } - fn relative_graph_path(&self) -> Result; - fn meta_path(&self) -> Result { let mut path = self.data_path()?.path; path.push(META_PATH); Ok(path) } + + fn is_zip(&self) -> bool { + self.root().is_file() + } + + fn read_zip(&self) -> Result, GraphError> { + if self.is_zip() { + let file = File::open(self.root())?; + let archive = ZipArchive::new(file)?; + Ok(archive) + } else { + Err(GraphError::NotAZip) + } + } + + fn relative_data_path(&self) -> Result { + let path = if self.is_zip() { + let mut zip = self.read_zip()?; + get_zip_data_path(&mut zip)? + } else { + read_or_default_data_path(self.root(), DATA_PATH)? + }; + Ok(path) + } + + fn relative_graph_path(&self) -> Result { + if self.is_zip() { + let mut zip = self.read_zip()?; + let data_path = get_zip_data_path(&mut zip)?; + get_zip_graph_path_name(&mut zip, data_path) + } else { + let data_path = self.data_path()?; + read_or_default_data_path(data_path.as_ref(), GRAPH_PATH) + } + } + + fn read_metadata(&self) -> Result { + let mut json = String::new(); + if self.is_zip() { + let mut zip = self.read_zip()?; + let path = get_zip_meta_path(&mut zip)?; + let mut zip_file = zip.by_name(&path)?; + zip_file.read_to_string(&mut json)?; + } else { + let mut file = File::open(self.meta_path()?)?; + file.read_to_string(&mut json)?; + } + let metadata: Metadata = serde_json::from_str(&json)?; + Ok(metadata.meta) + } + + fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { + let graph_path = self.relative_graph_path()?; + let metadata = GraphMetadata::from_graph(graph); + let meta = Metadata { + path: graph_path, + meta: metadata, + }; + let path = self.meta_path()?; + let file = File::create(&path)?; + Ok(serde_json::to_writer(file, &meta)?) + } + + /// Returns true if folder is occupied by a graph. + fn is_reserved(&self) -> bool { + self.is_zip() || self.meta_path().map_or(false, |path| path.exists()) + } + + /// Initialise the data folder and metadata pointer + fn init(&self) -> Result<(), GraphError> { + if self.root().is_dir() { + let non_empty = self.root().read_dir()?.next().is_some(); + if non_empty { + return Err(GraphError::NonEmptyGraphFolder(self.root().into())); + } + } else { + fs::create_dir(self.root())? + } + let meta_path = self.relative_data_path()?; + fs::create_dir(self.root().join(&meta_path))?; + fs::write( + self.root_meta_path(), + serde_json::to_string(&RelativePath { path: meta_path })?, + )?; + Ok(()) + } +} + +impl + ?Sized> GraphPaths for P { + fn root(&self) -> &Path { + self.as_ref() + } } /// A container for managing graph data. @@ -207,27 +291,6 @@ impl GraphPaths for GraphFolder { fn root(&self) -> &Path { &self.root_folder } - - fn relative_data_path(&self) -> Result { - let path = if self.is_zip() { - let mut zip = self.read_zip()?; - get_zip_data_path(&mut zip)? - } else { - read_or_default_data_path(self.root(), DATA_PATH)? - }; - Ok(path) - } - - fn relative_graph_path(&self) -> Result { - if self.is_zip() { - let mut zip = self.read_zip()?; - let data_path = get_zip_data_path(&mut zip)?; - get_zip_graph_path_name(&mut zip, data_path) - } else { - let data_path = self.data_path()?; - read_or_default_data_path(data_path.as_ref(), GRAPH_PATH) - } - } } impl GraphFolder { @@ -304,11 +367,6 @@ impl GraphFolder { }) } - /// Returns true if folder is occupied by a graph. - pub fn is_reserved(&self) -> bool { - self.meta_path().map_or(false, |path| path.exists()) - } - /// Clears the folder of any contents. pub fn clear(&self) -> Result<(), GraphError> { if self.is_zip() { @@ -333,35 +391,6 @@ impl GraphFolder { } } - pub fn is_zip(&self) -> bool { - self.root_folder.is_file() - } - - fn read_zip(&self) -> Result, GraphError> { - if self.is_zip() { - let file = File::open(&self.root_folder)?; - let archive = ZipArchive::new(file)?; - Ok(archive) - } else { - Err(GraphError::NotAZip) - } - } - - pub fn read_metadata(&self) -> Result { - let mut json = String::new(); - if self.is_zip() { - let mut zip = self.read_zip()?; - let path = get_zip_meta_path(&mut zip)?; - let mut zip_file = zip.by_name(&path)?; - zip_file.read_to_string(&mut json)?; - } else { - let mut file = File::open(self.meta_path()?)?; - file.read_to_string(&mut json)?; - } - let metadata: Metadata = serde_json::from_str(&json)?; - Ok(metadata.meta) - } - fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { if self.root_folder.exists() { let non_empty = self.root_folder.read_dir()?.next().is_some(); @@ -442,6 +471,10 @@ impl GraphPaths for WriteableGraphFolder { let path = read_or_default_data_path(&self.data_path()?.as_ref(), GRAPH_PATH)?; Ok(path) } + + fn init(&self) -> Result<(), GraphError> { + Ok(()) + } } impl WriteableGraphFolder { @@ -501,23 +534,10 @@ impl InnerGraphFolder { let data_path = self.as_ref(); let old_relative_graph_path = self.relative_graph_path()?; let old_graph_path = self.path.join(&old_relative_graph_path); - let new_relative_graph_path = match graph.disk_storage_enabled() { - None => { - let new_graph_path = make_data_path(&data_path, GRAPH_PATH)?; - graph.encode_parquet(&data_path.join(&new_graph_path))?; - new_graph_path - } - Some(path) => { - if path != old_graph_path { - let new_graph_path = make_data_path(&data_path, GRAPH_PATH)?; - graph.materialize_at(Some(&data_path.join(&new_graph_path)))?; - new_graph_path - } else { - old_relative_graph_path.clone() - } - } - }; let meta = GraphMetadata::from_graph(&graph); + let new_relative_graph_path = make_data_path(&data_path, GRAPH_PATH)?; + graph.encode_parquet(&data_path.join(&new_relative_graph_path))?; + let dirty_path = data_path.join(DIRTY_PATH); fs::write( &dirty_path, @@ -597,24 +617,6 @@ impl InnerGraphFolder { Ok(()) } - - pub fn read_graph(&self) -> Result { - let meta = self.read_metadata()?; - let graph = if meta.is_diskgraph { - match meta.graph_type { - GraphType::EventGraph => { - MaterializedGraph::EventGraph(Graph::load_from_path(self.graph_path()?)) - } - GraphType::PersistentGraph => MaterializedGraph::PersistentGraph( - PersistentGraph::load_from_path(self.graph_path()?), - ), - } - } else { - MaterializedGraph::decode_parquet(self.graph_path()?, None)? - }; - // FIXME: load index - Ok(graph) - } } impl> From

for GraphFolder { @@ -762,7 +764,7 @@ mod tests { // Verify the output zip contains the same graph let zip_folder = GraphFolder::new_as_zip(&output_zip_path); - let decoded_graph = Graph::decode(&zip_folder, None::<&Path>).unwrap(); + let decoded_graph = Graph::decode(&zip_folder).unwrap(); assert_graph_equal(&graph, &decoded_graph); } @@ -796,7 +798,7 @@ mod tests { // Verify the output zip contains the same graph let zip_folder = GraphFolder::new_as_zip(&output_zip_path); - let decoded_graph = Graph::decode(&zip_folder, None::<&std::path::Path>).unwrap(); + let decoded_graph = Graph::decode(&zip_folder).unwrap(); assert_graph_equal(&graph, &decoded_graph); } @@ -850,7 +852,7 @@ mod tests { assert!(unzip_folder.meta_path().unwrap().exists()); // Verify the extracted graph is the same as the original - let extracted_graph = Graph::decode(&unzip_folder, None::<&std::path::Path>).unwrap(); + let extracted_graph = Graph::decode(&unzip_folder).unwrap(); assert_graph_equal(&graph, &extracted_graph); } } diff --git a/raphtory/src/serialise/metadata.rs b/raphtory/src/serialise/metadata.rs index 77257003ac..faedfbaccc 100644 --- a/raphtory/src/serialise/metadata.rs +++ b/raphtory/src/serialise/metadata.rs @@ -1,4 +1,8 @@ -use crate::{db::api::view::internal::GraphView, prelude::GraphViewOps, serialise::GraphFolder}; +use crate::{ + db::api::view::internal::GraphView, + prelude::GraphViewOps, + serialise::{GraphFolder, GraphPaths}, +}; use raphtory_api::GraphType; use serde::{Deserialize, Serialize}; diff --git a/raphtory/src/serialise/parquet/mod.rs b/raphtory/src/serialise/parquet/mod.rs index 069806af4d..9889ec575f 100644 --- a/raphtory/src/serialise/parquet/mod.rs +++ b/raphtory/src/serialise/parquet/mod.rs @@ -435,7 +435,7 @@ fn decode_graph_storage( path_for_decoded_graph: Option<&Path>, ) -> Result, GraphError> { let graph = if let Some(storage_path) = path_for_decoded_graph { - Arc::new(Storage::new_at_path(storage_path)) + Arc::new(Storage::new_at_path(storage_path)?) } else { Arc::new(Storage::default()) }; diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 50033cd06d..41555f2223 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -14,7 +14,6 @@ use crate::{ use std::{ fs::File, io::{Cursor, Read, Seek, Write}, - path::Path, }; use zip::{write::SimpleFileOptions, ZipArchive, ZipWriter}; @@ -77,60 +76,93 @@ impl StableEncode for T { pub trait StableDecode: StaticGraphViewOps + AdditionOps { // Decode the graph from the given bytes array. // `path_for_decoded_graph` gets passed to the newly created graph. - fn decode_from_bytes( + fn decode_from_bytes(bytes: &[u8]) -> Result; + + fn decode_from_bytes_at( bytes: &[u8], - path_for_decoded_graph: Option<&Path>, + target: &(impl GraphPaths + ?Sized), ) -> Result; - fn decode_from_zip( + fn decode_from_zip(reader: ZipArchive) -> Result; + + fn decode_from_zip_at( reader: ZipArchive, - path_for_decoded_graph: Option<&Path>, + target: &(impl GraphPaths + ?Sized), ) -> Result; // Decode the graph from the given path. // `path_for_decoded_graph` gets passed to the newly created graph. - fn decode( - path: impl Into, - path_for_decoded_graph: Option<&Path>, + fn decode(path: &(impl GraphPaths + ?Sized)) -> Result; + + fn decode_at( + path: &(impl GraphPaths + ?Sized), + target: &(impl GraphPaths + ?Sized), ) -> Result; } impl StableDecode for T { - fn decode_from_bytes( + fn decode_from_bytes(bytes: &[u8]) -> Result { + let cursor = Cursor::new(bytes); + Self::decode_from_zip(ZipArchive::new(cursor)?) + } + + fn decode_from_bytes_at( bytes: &[u8], - path_for_decoded_graph: Option<&Path>, + target: &(impl GraphPaths + ?Sized), ) -> Result { let cursor = Cursor::new(bytes); - Self::decode_from_zip(ZipArchive::new(cursor)?, path_for_decoded_graph) + Self::decode_from_zip_at(ZipArchive::new(cursor)?, target) } - fn decode_from_zip( + fn decode_from_zip(mut reader: ZipArchive) -> Result { + let graph_prefix = get_zip_graph_path(&mut reader)?; + let graph = Self::decode_parquet_from_zip(&mut reader, None, graph_prefix)?; + + //TODO: graph.load_index_from_zip(&mut reader, prefix) + + Ok(graph) + } + + fn decode_from_zip_at( mut reader: ZipArchive, - path_for_decoded_graph: Option<&Path>, + target: &(impl GraphPaths + ?Sized), ) -> Result { let graph_prefix = get_zip_graph_path(&mut reader)?; - let graph = - Self::decode_parquet_from_zip(&mut reader, path_for_decoded_graph, graph_prefix)?; + let graph = Self::decode_parquet_from_zip( + &mut reader, + Some(target.graph_path()?.as_path()), + graph_prefix, + )?; //TODO: graph.load_index_from_zip(&mut reader, prefix) Ok(graph) } - fn decode( - path: impl Into, - path_for_decoded_graph: Option<&Path>, - ) -> Result { + fn decode(path: &(impl GraphPaths + ?Sized)) -> Result { let graph; - let folder: GraphFolder = path.into(); + if path.is_zip() { + let reader = path.read_zip()?; + graph = Self::decode_from_zip(reader)?; + } else { + graph = Self::decode_parquet(&path.graph_path()?, None)?; + // TODO: Fix index loading: + // #[cfg(feature = "search")] + // graph.load_index(&path)?; + } + Ok(graph) + } - if folder.is_zip() { - let reader = ZipArchive::new(File::open(&folder.root())?)?; - graph = Self::decode_from_zip(reader, path_for_decoded_graph)?; + fn decode_at( + path: &(impl GraphPaths + ?Sized), + target: &(impl GraphPaths + ?Sized), + ) -> Result { + let graph; + if path.is_zip() { + let reader = path.read_zip()?; + graph = Self::decode_from_zip_at(reader, target)?; } else { - graph = Self::decode_parquet(&folder.graph_path()?, path_for_decoded_graph)?; - #[cfg(feature = "search")] - graph.load_index(&folder)?; + graph = Self::decode_parquet(path.graph_path()?, Some(target.graph_path()?.as_path()))?; } Ok(graph) } diff --git a/raphtory/tests/db_tests.rs b/raphtory/tests/db_tests.rs index e9f6182dc5..5dfe5f1a38 100644 --- a/raphtory/tests/db_tests.rs +++ b/raphtory/tests/db_tests.rs @@ -709,7 +709,7 @@ fn graph_save_to_load_from_file() { g.encode(&graph_path).unwrap(); // Load from files - let g2 = Graph::decode(&graph_path, None).unwrap(); + let g2 = Graph::decode(&graph_path).unwrap(); assert_eq!(g, g2); } @@ -2947,7 +2947,7 @@ fn save_load_serial() { let dir = tempfile::tempdir().unwrap(); let file_path = dir.path().join("abcd11"); g.encode(&file_path).unwrap(); - let gg = Graph::decode(file_path, None).unwrap(); + let gg = Graph::decode(&file_path).unwrap(); assert_graph_equal(&g, &gg); } diff --git a/raphtory/tests/serialise_test.rs b/raphtory/tests/serialise_test.rs index 813039ac9b..7339f1b1f2 100644 --- a/raphtory/tests/serialise_test.rs +++ b/raphtory/tests/serialise_test.rs @@ -32,7 +32,7 @@ mod serialise_test { let g1 = Graph::new(); g1.add_node(1, "Alice", NO_PROPS, None).unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); } @@ -45,7 +45,7 @@ mod serialise_test { g1.add_node(2, "Bob", [("age", Prop::U32(47))], None) .unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); } @@ -61,7 +61,7 @@ mod serialise_test { let temp_file = TempDir::new().unwrap(); g.encode(&temp_file).unwrap(); - let g2 = MaterializedGraph::decode(&temp_file, None).unwrap(); + let g2 = MaterializedGraph::decode(&temp_file).unwrap(); assert_eq!(g2.nodes().name().collect_vec(), ["ben", "hamza", "haaroon"]); let node_names: Vec<_> = g2.nodes().iter().map(|n| n.name()).collect(); assert_eq!(node_names, ["ben", "hamza", "haaroon"]); @@ -77,7 +77,7 @@ mod serialise_test { let temp_file = TempDir::new().unwrap(); g3.encode(&temp_file).unwrap(); - let g4 = MaterializedGraph::decode(&temp_file, None).unwrap(); + let g4 = MaterializedGraph::decode(&temp_file).unwrap(); assert_eq!(g4.nodes().name().collect_vec(), ["ben", "hamza", "haaroon"]); let node_names: Vec<_> = g4.nodes().iter().map(|n| n.name()).collect(); assert_eq!(node_names, ["ben", "hamza", "haaroon"]); @@ -97,7 +97,7 @@ mod serialise_test { .expect("Failed to update metadata"); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); } @@ -110,7 +110,7 @@ mod serialise_test { g1.add_node(2, "Bob", NO_PROPS, None).unwrap(); g1.add_edge(3, "Alice", "Bob", NO_PROPS, None).unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); } @@ -122,7 +122,7 @@ mod serialise_test { g1.add_edge(3, "Alice", "Bob", NO_PROPS, None).unwrap(); g1.delete_edge(19, "Alice", "Bob", None).unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = PersistentGraph::decode(&temp_file, None).unwrap(); + let g2 = PersistentGraph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); let edge = g2.edge("Alice", "Bob").expect("Failed to get edge"); @@ -150,7 +150,7 @@ mod serialise_test { .unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); } @@ -163,7 +163,7 @@ mod serialise_test { e1.update_metadata([("friends", true)], None) .expect("Failed to update metadata"); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); } @@ -177,7 +177,7 @@ mod serialise_test { g1.add_edge(7, "Bob", "Charlie", [("friends", false)], Some("two")) .unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); } @@ -191,7 +191,7 @@ mod serialise_test { let g1 = Graph::new(); g1.add_node(1, "Alice", props.clone(), None).unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); let node = g2.node("Alice").expect("Failed to get node"); @@ -218,7 +218,7 @@ mod serialise_test { let g1 = Graph::new(); g1.add_edge(1, "Alice", "Bob", props.clone(), None).unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); let edge = g2.edge("Alice", "Bob").expect("Failed to get edge"); @@ -247,7 +247,7 @@ mod serialise_test { e.update_metadata(props.clone(), Some("a")) .expect("Failed to update metadata"); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); let edge = g2 @@ -274,7 +274,7 @@ mod serialise_test { n.update_metadata(props.clone()) .expect("Failed to update metadata"); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); let node = g2.node("Alice").expect("Failed to get node"); @@ -299,7 +299,7 @@ mod serialise_test { let tempdir = TempDir::new().unwrap(); let temp_file = tempdir.path().join("graph"); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); props.into_iter().for_each(|(name, prop)| { @@ -322,7 +322,7 @@ mod serialise_test { let tempdir = TempDir::new().unwrap(); let temp_file = tempdir.path().join("graph"); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); props @@ -378,7 +378,7 @@ mod serialise_test { g.add_edge(7, "Bob", "Charlie", [("friends", false)], Some("two")) .unwrap(); - let g2 = Graph::decode(&temp_cache_file, None).unwrap(); + let g2 = Graph::decode(&temp_cache_file).unwrap(); assert_graph_equal(&g, &g2); assert_metadata_correct(&folder, &g); @@ -421,7 +421,7 @@ mod serialise_test { g.add_edge(7, "Bob", "Charlie", [("friends", false)], Some("two")) .unwrap(); - let g2 = PersistentGraph::decode(&temp_cache_file, None).unwrap(); + let g2 = PersistentGraph::decode(&temp_cache_file).unwrap(); assert_graph_equal(&g, &g2); assert_metadata_correct(&folder, &g); @@ -432,7 +432,7 @@ mod serialise_test { proptest!(|(edges in build_edge_list(100, 100))| { let g = build_graph_from_edge_list(&edges); let bytes = g.encode_to_bytes().unwrap(); - let g2 = Graph::decode_from_bytes(&bytes, None).unwrap(); + let g2 = Graph::decode_from_bytes(&bytes).unwrap(); assert_graph_equal(&g, &g2); }) } From d73c2da130fc2a445589e391c6020794f6eccae1 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 19 Dec 2025 12:50:03 +0100 Subject: [PATCH 32/39] avoid writing metadata without writing graph --- raphtory-graphql/src/model/mod.rs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index b28330cbfe..e0780df305 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -20,7 +20,13 @@ use dynamic_graphql::{ }; use itertools::Itertools; use raphtory::{ - db::{api::view::MaterializedGraph, graph::views::deletion_graph::PersistentGraph}, + db::{ + api::{ + storage::storage::{Extension, PersistentStrategy}, + view::MaterializedGraph, + }, + graph::views::deletion_graph::PersistentGraph, + }, errors::GraphError, prelude::*, serialise::*, @@ -302,9 +308,12 @@ impl Mut { let parent_graph = data.get_graph(parent_path).await?.graph; let folder_clone = folder.clone(); let new_subgraph = blocking_compute(move || { - parent_graph - .subgraph(nodes) - .materialize_at(folder_clone.graph_folder()) + let subgraph = parent_graph.subgraph(nodes); + if Extension::disk_storage_enabled() { + subgraph.materialize_at(folder_clone.graph_folder()) + } else { + subgraph.materialize() + } }) .await?; From 5256ac0aada7194c6131531cbbca0517ff9608dc Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 19 Dec 2025 12:54:09 +0100 Subject: [PATCH 33/39] is_reserved should not be true for files --- raphtory/src/serialise/graph_folder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index be157152f5..d6fd08a534 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -238,7 +238,7 @@ pub trait GraphPaths { /// Returns true if folder is occupied by a graph. fn is_reserved(&self) -> bool { - self.is_zip() || self.meta_path().map_or(false, |path| path.exists()) + self.meta_path().map_or(false, |path| path.exists()) } /// Initialise the data folder and metadata pointer From 3d716be9614592868bcfb859466082502a728dcc Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 19 Dec 2025 12:54:28 +0100 Subject: [PATCH 34/39] materialize_at only works with persistent storage --- raphtory/src/db/api/view/graph.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index e6e8ad205a..bae70fdfe3 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "io")] +use crate::serialise::GraphPaths; use crate::{ core::{ entities::{nodes::node_ref::AsNodeRef, LayerIds, VID}, @@ -55,9 +57,7 @@ use std::{ path::Path, sync::{atomic::Ordering, Arc}, }; - -#[cfg(feature = "io")] -use crate::serialise::GraphPaths; +use storage::{persist::strategy::PersistentStrategy, Extension}; #[cfg(feature = "search")] use crate::{ @@ -583,11 +583,15 @@ impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { &self, path: &(impl GraphPaths + ?Sized), ) -> Result { - path.init()?; - let graph_path = path.graph_path()?; - let graph = materialize_impl(self, Some(graph_path.as_ref()))?; - path.write_metadata(&graph)?; - Ok(graph) + if Extension::disk_storage_enabled() { + path.init()?; + let graph_path = path.graph_path()?; + let graph = materialize_impl(self, Some(graph_path.as_ref()))?; + path.write_metadata(&graph)?; + Ok(graph) + } else { + Err(GraphError::DiskGraphNotEnabled) + } } fn subgraph, V: AsNodeRef>(&self, nodes: I) -> NodeSubgraph { From 6da9ba799d0c9308b31af5ab42e42c45d02a9930 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 19 Dec 2025 14:11:47 +0100 Subject: [PATCH 35/39] need to write the metadata in decode_at variants --- raphtory-graphql/src/lib.rs | 2 +- raphtory/src/serialise/serialise.rs | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/raphtory-graphql/src/lib.rs b/raphtory-graphql/src/lib.rs index f9f85c5672..db5a3bf972 100644 --- a/raphtory-graphql/src/lib.rs +++ b/raphtory-graphql/src/lib.rs @@ -996,7 +996,7 @@ mod graphql_test { let req = Request::new(list_nodes); let res = schema.execute(req).await; - assert_eq!(res.errors.len(), 0); + assert_eq!(res.errors, []); let res_json = res.data.into_json().unwrap(); assert_eq!( res_json, diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 41555f2223..3b958e1433 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -127,6 +127,7 @@ impl StableDecode for T { mut reader: ZipArchive, target: &(impl GraphPaths + ?Sized), ) -> Result { + target.init()?; let graph_prefix = get_zip_graph_path(&mut reader)?; let graph = Self::decode_parquet_from_zip( &mut reader, @@ -135,7 +136,7 @@ impl StableDecode for T { )?; //TODO: graph.load_index_from_zip(&mut reader, prefix) - + target.write_metadata(&graph)?; Ok(graph) } @@ -157,6 +158,7 @@ impl StableDecode for T { path: &(impl GraphPaths + ?Sized), target: &(impl GraphPaths + ?Sized), ) -> Result { + target.init()?; let graph; if path.is_zip() { let reader = path.read_zip()?; @@ -164,6 +166,7 @@ impl StableDecode for T { } else { graph = Self::decode_parquet(path.graph_path()?, Some(target.graph_path()?.as_path()))?; } + target.write_metadata(&graph)?; Ok(graph) } } From 2ebbad9e8d13b4a151bf87b176824e064bb6f857 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 19 Dec 2025 14:12:22 +0100 Subject: [PATCH 36/39] load metadata correctly when storage is enabled and use cached graph when available --- raphtory-graphql/src/data.rs | 4 +++ .../src/model/graph/meta_graph.rs | 36 +++++++++++++++---- raphtory/src/python/graph/views/graph_view.rs | 1 - 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index a98f625913..480ab08f07 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -192,6 +192,10 @@ impl Data { .await } + pub async fn get_cached_graph(&self, path: &str) -> Option { + self.cache.get(path).await + } + pub fn has_graph(&self, path: &str) -> bool { self.cache.contains_key(path) || ExistingGraphFolder::try_from(self.work_dir.clone(), path).is_ok() diff --git a/raphtory-graphql/src/model/graph/meta_graph.rs b/raphtory-graphql/src/model/graph/meta_graph.rs index d9c5949883..57590581e6 100644 --- a/raphtory-graphql/src/model/graph/meta_graph.rs +++ b/raphtory-graphql/src/model/graph/meta_graph.rs @@ -1,9 +1,16 @@ use crate::{ + data::Data, + graph::GraphWithVectors, model::graph::property::GqlProperty, paths::{ExistingGraphFolder, ValidGraphPaths}, }; +use async_graphql::Context; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields, Result}; -use raphtory::serialise::{metadata::GraphMetadata, parquet::decode_graph_metadata}; +use raphtory::{ + db::api::storage::storage::{Extension, PersistentStrategy}, + prelude::{GraphViewOps, PropertiesOps}, + serialise::{metadata::GraphMetadata, parquet::decode_graph_metadata}, +}; use std::{cmp::Ordering, sync::Arc}; use tokio::sync::OnceCell; @@ -92,11 +99,26 @@ impl MetaGraph { } /// Returns the metadata of the graph. - async fn metadata(&self) -> Result> { - let res = decode_graph_metadata(self.folder.graph_folder())?; - Ok(res - .into_iter() - .filter_map(|(key, value)| value.map(|prop| GqlProperty::new(key, prop))) - .collect()) + async fn metadata(&self, ctx: &Context<'_>) -> Result> { + let data: &Data = ctx.data_unchecked(); + let maybe_cached = if Extension::disk_storage_enabled() { + let graph = data.get_graph(self.folder.local_path()).await?; + Some(graph) + } else { + data.get_cached_graph(self.folder.local_path()).await + }; + let res = match maybe_cached { + None => decode_graph_metadata(self.folder.graph_folder())? + .into_iter() + .filter_map(|(key, value)| value.map(|prop| GqlProperty::new(key, prop))) + .collect(), + Some(graph) => graph + .graph + .metadata() + .iter() + .filter_map(|(key, value)| value.map(|prop| GqlProperty::new(key.into(), prop))) + .collect(), + }; + Ok(res) } } diff --git a/raphtory/src/python/graph/views/graph_view.rs b/raphtory/src/python/graph/views/graph_view.rs index 81cdef16cd..bdf7b48fa4 100644 --- a/raphtory/src/python/graph/views/graph_view.rs +++ b/raphtory/src/python/graph/views/graph_view.rs @@ -39,7 +39,6 @@ use crate::{ types::repr::{Repr, StructReprBuilder}, utils::PyNodeRef, }, - serialise::{GraphFolder, GraphPaths}, }; use chrono::prelude::*; use pyo3::prelude::*; From 8daaed8e8ff5554830d4be3ddd1fa9b00616c413 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 19 Dec 2025 16:48:45 +0100 Subject: [PATCH 37/39] fix send_graph --- raphtory-graphql/src/model/mod.rs | 1 - raphtory-graphql/src/paths.rs | 1 + raphtory-graphql/src/url_encode.rs | 11 +++++++++-- raphtory/src/serialise/serialise.rs | 8 +++++++- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index e0780df305..b057c9ae1b 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -287,7 +287,6 @@ impl Mut { ValidWriteableGraphFolder::try_new(data.work_dir.clone(), path)? }; let g: MaterializedGraph = url_decode_graph_at(graph, folder.graph_folder())?; - data.insert_graph(folder, g).await?; Ok(path.to_owned()) } diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index 4475ed1820..90358c75e7 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -440,6 +440,7 @@ pub enum InternalPathValidationError { } impl From for InternalPathValidationError { + #[track_caller] fn from(value: io::Error) -> Self { error!("Unexpected IO failure: {}", value); InternalPathValidationError::IOError(value) diff --git a/raphtory-graphql/src/url_encode.rs b/raphtory-graphql/src/url_encode.rs index 7b64fd6eae..98f4c09cb5 100644 --- a/raphtory-graphql/src/url_encode.rs +++ b/raphtory-graphql/src/url_encode.rs @@ -1,6 +1,9 @@ use base64::{prelude::BASE64_URL_SAFE, DecodeError, Engine}; use raphtory::{ - db::api::view::MaterializedGraph, + db::api::{ + storage::storage::{Extension, PersistentStrategy}, + view::MaterializedGraph, + }, errors::GraphError, prelude::{StableDecode, StableEncode}, serialise::GraphPaths, @@ -37,7 +40,11 @@ pub fn url_decode_graph_at>( storage_path: &(impl GraphPaths + ?Sized), ) -> Result { let bytes = BASE64_URL_SAFE.decode(graph.as_ref()).unwrap(); - MaterializedGraph::decode_from_bytes_at(&bytes, storage_path) + if Extension::disk_storage_enabled() { + MaterializedGraph::decode_from_bytes_at(&bytes, storage_path) + } else { + MaterializedGraph::decode_from_bytes(&bytes) + } } #[cfg(test)] diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 3b958e1433..ec9c949320 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -1,7 +1,9 @@ #[cfg(feature = "search")] use crate::prelude::IndexMutationOps; use crate::{ - db::api::{mutation::AdditionOps, view::StaticGraphViewOps}, + db::api::{ + mutation::AdditionOps, storage::storage::PersistentStrategy, view::StaticGraphViewOps, + }, errors::GraphError, serialise::{ get_zip_graph_path, @@ -15,6 +17,7 @@ use std::{ fs::File, io::{Cursor, Read, Seek, Write}, }; +use storage::Extension; use zip::{write::SimpleFileOptions, ZipArchive, ZipWriter}; pub trait StableEncode: StaticGraphViewOps + AdditionOps { @@ -127,6 +130,9 @@ impl StableDecode for T { mut reader: ZipArchive, target: &(impl GraphPaths + ?Sized), ) -> Result { + if Extension::disk_storage_enabled() { + return Err(GraphError::DiskGraphNotEnabled); + } target.init()?; let graph_prefix = get_zip_graph_path(&mut reader)?; let graph = Self::decode_parquet_from_zip( From bdc611d934de76e75a1907fee00424794f2389ac Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 19 Dec 2025 17:06:05 +0100 Subject: [PATCH 38/39] fix new_graph in graphql --- raphtory-graphql/src/model/mod.rs | 13 ++++++++++--- raphtory/src/db/graph/graph.rs | 9 ++++++--- raphtory/src/db/graph/views/deletion_graph.rs | 14 ++++++++++---- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index b057c9ae1b..baee486526 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -210,9 +210,16 @@ impl Mut { let overwrite = false; let folder = data.validate_path_for_insert(&path, overwrite)?; let graph_path = folder.graph_folder(); - let graph: MaterializedGraph = match graph_type { - GqlGraphType::Persistent => PersistentGraph::new_at_path(graph_path)?.into(), - GqlGraphType::Event => Graph::new_at_path(graph_path)?.into(), + let graph: MaterializedGraph = if Extension::disk_storage_enabled() { + match graph_type { + GqlGraphType::Persistent => PersistentGraph::new_at_path(graph_path)?.into(), + GqlGraphType::Event => Graph::new_at_path(graph_path)?.into(), + } + } else { + match graph_type { + GqlGraphType::Persistent => PersistentGraph::new().into(), + GqlGraphType::Event => Graph::new().into(), + } }; data.insert_graph(folder, graph).await?; diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index c0d89facb0..a8bd2c2473 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -16,6 +16,8 @@ //! ``` //! use super::views::deletion_graph::PersistentGraph; +#[cfg(feature = "io")] +use crate::serialise::GraphPaths; use crate::{ db::{ api::{ @@ -46,9 +48,7 @@ use std::{ ops::Deref, sync::Arc, }; - -#[cfg(feature = "io")] -use crate::serialise::GraphPaths; +use storage::{persist::strategy::PersistentStrategy, Extension}; #[repr(transparent)] #[derive(Debug, Clone, Default)] @@ -583,6 +583,9 @@ impl Graph { /// ``` #[cfg(feature = "io")] pub fn new_at_path(path: &(impl GraphPaths + ?Sized)) -> Result { + if !Extension::disk_storage_enabled() { + return Err(GraphError::DiskGraphNotEnabled); + } path.init()?; let graph = Self { inner: Arc::new(Storage::new_at_path(path.graph_path()?)?), diff --git a/raphtory/src/db/graph/views/deletion_graph.rs b/raphtory/src/db/graph/views/deletion_graph.rs index 9b6a70ca9d..91a45774a7 100644 --- a/raphtory/src/db/graph/views/deletion_graph.rs +++ b/raphtory/src/db/graph/views/deletion_graph.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "io")] +use crate::serialise::GraphPaths; use crate::{ core::{ entities::LayerIds, @@ -32,10 +34,11 @@ use std::{ ops::Range, sync::Arc, }; -use storage::api::graph_props::{GraphPropEntryOps, GraphPropRefOps}; - -#[cfg(feature = "io")] -use crate::serialise::GraphPaths; +use storage::{ + api::graph_props::{GraphPropEntryOps, GraphPropRefOps}, + persist::strategy::PersistentStrategy, + Extension, +}; /// A graph view where an edge remains active from the time it is added until it is explicitly marked as deleted. /// @@ -114,6 +117,9 @@ impl PersistentGraph { /// ``` #[cfg(feature = "io")] pub fn new_at_path(path: &(impl GraphPaths + ?Sized)) -> Result { + if !Extension::disk_storage_enabled() { + return Err(GraphError::DiskGraphNotEnabled); + } path.init()?; let graph = Self(Arc::new(Storage::new_at_path(path.graph_path()?)?)); path.write_metadata(&graph)?; From 6bfbced06bbfae1e5868215b65a78711695ec8be Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 19 Dec 2025 17:38:49 +0100 Subject: [PATCH 39/39] improve error messages --- .../test_graphql/edit_graph/test_graphql.py | 23 ++++++++-------- raphtory-graphql/src/paths.rs | 26 ++++++++++++------- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_graphql.py b/python/tests/test_base_install/test_graphql/edit_graph/test_graphql.py index 2af6156ae8..da015845d3 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_graphql.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_graphql.py @@ -47,8 +47,8 @@ def test_wrong_url(): with pytest.raises(Exception) as excinfo: client = RaphtoryClient("http://broken_url.com") assert ( - str(excinfo.value) - == "Could not connect to the given server - no response --error sending request for url (http://broken_url.com/)" + str(excinfo.value) + == "Could not connect to the given server - no response --error sending request for url (http://broken_url.com/)" ) @@ -156,39 +156,40 @@ def assert_graph_fetch(path): path = "../shivam/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the parent dir are not allowed within the path:" in str( + assert "Invalid path '../shivam/g': References to the parent dir are not allowed within the path" in str( excinfo.value ) path = "./shivam/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the current dir are not allowed within the path" in str( + assert "Invalid path './shivam/g': References to the current dir are not allowed within the path" in str( excinfo.value ) path = "shivam/../../../../investigation/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the parent dir are not allowed within the path:" in str( + assert "Invalid path 'shivam/../../../../investigation/g': References to the parent dir are not allowed within the path" in str( excinfo.value ) path = "//shivam/investigation/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "Double forward slashes are not allowed in path" in str(excinfo.value) + assert "Invalid path '//shivam/investigation/g': Double forward slashes are not allowed in path" in str( + excinfo.value) path = "shivam/investigation//2024-12-12/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "Double forward slashes are not allowed in path" in str(excinfo.value) + assert "Invalid path 'shivam/investigation//2024-12-12/g': Double forward slashes are not allowed in path" in str( + excinfo.value) path = r"shivam/investigation\2024-12-12" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "Backslash not allowed in path" in str(excinfo.value) - + assert r"Backslash not allowed in path" in str(excinfo.value) # Test if we can escape through a symlink tmp_dir2 = tempfile.mkdtemp() nested_dir = os.path.join(tmp_work_dir, "shivam", "graphs") @@ -199,7 +200,8 @@ def assert_graph_fetch(path): path = "shivam/graphs/not_a_symlink_i_promise/escaped" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "A component of the given path was a symlink" in str(excinfo.value) + assert "Invalid path 'shivam/graphs/not_a_symlink_i_promise/escaped': A component of the given path was a symlink" in str( + excinfo.value) def test_graph_windows_and_layers_query(): @@ -642,7 +644,6 @@ def test_edge_id(): } } - # def test_disk_graph_name(): # import pandas as pd # from raphtory import DiskGraphStorage diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index 90358c75e7..4f494c70f1 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -336,12 +336,7 @@ impl ValidWriteableGraphFolder { base_path: PathBuf, relative_path: &str, ) -> Result { - let path = create_valid_path(base_path, relative_path).map_err(|error| { - PathValidationError::InternalError { - graph: relative_path.to_string(), - error, - } - })?; + let path = create_valid_path(base_path, relative_path).with_path(relative_path)?; Self::new(path, relative_path) } @@ -411,7 +406,7 @@ impl ValidWriteableGraphFolder { #[derive(thiserror::Error, Debug)] pub enum InternalPathValidationError { - #[error("Path from metadata is invalid: {0}")] + #[error(transparent)] InvalidPath(#[from] InvalidPathReason), #[error(transparent)] IOError(io::Error), @@ -455,6 +450,11 @@ pub enum PathValidationError { GraphNotExistsError(String), #[error("'{0}' does not exist as a namespace")] NamespaceDoesNotExist(String), + #[error("Invalid path '{graph}': {reason}")] + InvalidPath { + graph: String, + reason: InvalidPathReason, + }, #[error("Graph '{graph}' is corrupted: {error}")] InternalError { graph: String, @@ -472,9 +472,15 @@ pub trait WithPath { impl> WithPath for Result { type Value = V; fn with_path>(self, graph: S) -> Result { - self.map_err(move |error| PathValidationError::InternalError { - graph: graph.into(), - error: error.into(), + self.map_err(move |error| { + let error = error.into(); + let graph = graph.into(); + match error { + InternalPathValidationError::InvalidPath(reason) => { + PathValidationError::InvalidPath { graph, reason } + } + _ => PathValidationError::InternalError { graph, error }, + } }) } }