diff --git a/Makefile b/Makefile index 3b1385639f..176749b5f8 100644 --- a/Makefile +++ b/Makefile @@ -12,17 +12,13 @@ build-all: rust-build test-all: rust-test-all python-test -test-all-public: rust-test-all-public python-test-public - # Tidying tidy: rust-fmt build-python stubs python-fmt -tidy-public: rust-fmt build-python-public stubs python-fmt - python-tidy: stubs python-fmt test-graphql-schema -check-pr: tidy-public test-all +check-pr: tidy test-all gen-graphql-schema: raphtory schema > raphtory-graphql/schema.graphql @@ -31,7 +27,6 @@ test-graphql-schema: install-node-tools npx graphql-schema-linter --rules fields-have-descriptions,types-have-descriptions raphtory-graphql/schema.graphql # Utilities - activate-storage: ./scripts/activate_private_storage.py @@ -71,13 +66,12 @@ run-graphql: rust-test: cargo test -q -rust-test-all: activate-storage - cargo nextest run --all --features=storage +rust-check: cargo hack check --workspace --all-targets --each-feature --skip extension-module,default -rust-test-all-public: +rust-test-all: rust-check cargo nextest run --all - cargo hack check --workspace --all-targets --each-feature --skip extension-module,default,storage + ########## # Python # @@ -86,32 +80,23 @@ rust-test-all-public: install-python: cd python && maturin build && pip install ../target/wheels/*.whl -build-python-public: deactivate-storage +build-python: cd python && maturin develop -r --extras=dev -build-python: activate-storage - cd python && maturin develop -r --features=storage --extras=dev +debug-python: + cd python && maturin develop --profile=debug --extras=dev # Testing - -python-test: activate-storage - cd python && tox run && tox run -e storage - -python-test-public: +python-test: cd python && tox run python-fmt: cd python && black . -debug-python-public: deactivate-storage - cd python && maturin develop --profile=debug build-python-rtd: cd python && maturin build --profile=build-fast && pip install ../target/wheels/*.whl -debug-python: activate-storage - cd python && maturin develop --features=storage,extension-module --extras=dev - ######## # Docs # ######## diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 7b7e8b7fa2..be5258c31a 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -212,8 +212,9 @@ impl, ES = ES, GS = GS>> Temporal }) } - pub fn disk_storage_enabled(&self) -> bool { - self.graph_dir().is_some() && Extension::disk_storage_enabled() + pub fn disk_storage_enabled(&self) -> Option<&Path> { + self.graph_dir() + .filter(|_| Extension::disk_storage_enabled()) } pub fn extension(&self) -> &EXT { diff --git a/db4-storage/src/pages/edge_page/writer.rs b/db4-storage/src/pages/edge_page/writer.rs index cde4d7fdb3..8578e5619c 100644 --- a/db4-storage/src/pages/edge_page/writer.rs +++ b/db4-storage/src/pages/edge_page/writer.rs @@ -2,12 +2,8 @@ use crate::{ LocalPOS, api::edges::EdgeSegmentOps, error::StorageError, pages::layer_counter::GraphStats, segments::edge::segment::MemEdgeSegment, }; -use arrow_array::{ArrayRef, BooleanArray}; use raphtory_api::core::entities::{VID, properties::prop::Prop}; -use raphtory_core::{ - entities::EID, - storage::timeindex::{AsTime, TimeIndexEntry}, -}; +use raphtory_core::storage::timeindex::{AsTime, TimeIndexEntry}; use std::ops::DerefMut; pub struct EdgeWriter< diff --git a/db4-storage/src/pages/layer_counter.rs b/db4-storage/src/pages/layer_counter.rs index b3865ba0b1..24c3e9df15 100644 --- a/db4-storage/src/pages/layer_counter.rs +++ b/db4-storage/src/pages/layer_counter.rs @@ -96,8 +96,6 @@ impl GraphStats { } } else { // we need to create the layer - self.layers.reserve(layer_id + 1 - self.layers.count()); - loop { let new_layer_id = self.layers.push_with(|_| Default::default()); if new_layer_id >= layer_id { diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 6be52d59e5..f6e803f3a7 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -484,7 +484,7 @@ mod test { make_nodes, }, }; - use chrono::{DateTime, NaiveDateTime, Utc}; + use chrono::DateTime; use proptest::prelude::*; use raphtory_api::core::entities::properties::prop::Prop; use raphtory_core::{entities::VID, storage::timeindex::TimeIndexOps}; @@ -766,14 +766,11 @@ mod test { ("857".to_owned(), Prop::F64(2.56)), ( "296".to_owned(), - Prop::NDTime(NaiveDateTime::from_timestamp(1334043671, 0)), + Prop::NDTime(DateTime::from_timestamp(1334043671, 0).unwrap().naive_utc()), ), ( "92".to_owned(), - Prop::DTime(DateTime::::from_utc( - NaiveDateTime::from_timestamp(994032315, 0), - Utc, - )), + Prop::DTime(DateTime::from_timestamp(994032315, 0).unwrap()), ), ], )], diff --git a/db4-storage/src/properties/mod.rs b/db4-storage/src/properties/mod.rs index 14ae383845..f2c5e86c90 100644 --- a/db4-storage/src/properties/mod.rs +++ b/db4-storage/src/properties/mod.rs @@ -8,8 +8,8 @@ use bigdecimal::ToPrimitive; use raphtory_api::core::entities::properties::{ meta::PropMapper, prop::{ - Prop, PropType, SerdeList, SerdeMap, arrow_dtype_from_prop_type, list_array_from_props, - struct_array_from_props, + Prop, PropType, SerdeArrowList, SerdeArrowMap, arrow_dtype_from_prop_type, + list_array_from_props, struct_array_from_props, }, }; use raphtory_core::{ @@ -206,7 +206,7 @@ impl Properties { .unwrap(); let array_iter = indices .map(|i| lazy_vec.get_opt(i)) - .map(|e| e.map(|m| SerdeMap(m))); + .map(|e| e.map(|m| SerdeArrowMap(m))); let struct_array = struct_array_from_props(&dt, array_iter); @@ -221,7 +221,7 @@ impl Properties { let array_iter = indices .map(|i| lazy_vec.get_opt(i)) - .map(|opt_list| opt_list.map(SerdeList)); + .map(|opt_list| opt_list.map(SerdeArrowList)); let list_array = list_array_from_props(&dt, array_iter); diff --git a/db4-storage/src/segments/graph_prop/segment.rs b/db4-storage/src/segments/graph_prop/segment.rs index 4e310cad03..a007804978 100644 --- a/db4-storage/src/segments/graph_prop/segment.rs +++ b/db4-storage/src/segments/graph_prop/segment.rs @@ -1,5 +1,4 @@ use crate::{ - LocalPOS, error::StorageError, segments::{HasRow, SegmentContainer}, }; diff --git a/db4-storage/src/segments/node/segment.rs b/db4-storage/src/segments/node/segment.rs index bc7c5bcdd6..2ac2e5b31d 100644 --- a/db4-storage/src/segments/node/segment.rs +++ b/db4-storage/src/segments/node/segment.rs @@ -550,21 +550,19 @@ impl>> NodeSegmentOps for NodeSegm #[cfg(test)] mod test { - use std::sync::Arc; - - use raphtory_api::core::entities::properties::{ - meta::Meta, - prop::{Prop, PropType}, - }; - use raphtory_core::entities::{EID, ELID, VID}; - use tempfile::tempdir; - use crate::{ LocalPOS, NodeSegmentView, api::nodes::NodeSegmentOps, pages::{layer_counter::GraphStats, node_page::writer::NodeWriter}, persist::strategy::NoOpStrategy, }; + use raphtory_api::core::entities::properties::{ + meta::Meta, + prop::{Prop, PropType}, + }; + use raphtory_core::entities::{EID, ELID, VID}; + use std::sync::Arc; + use tempfile::tempdir; #[test] fn est_size_changes() { diff --git a/examples/rust/src/bin/bench/main.rs b/examples/rust/src/bin/bench/main.rs index 1ec87f0e6c..ad63c92313 100644 --- a/examples/rust/src/bin/bench/main.rs +++ b/examples/rust/src/bin/bench/main.rs @@ -39,7 +39,7 @@ fn main() { info!("Loading data"); let graph = if encoded_data_dir.exists() { let now = Instant::now(); - let g = Graph::decode(encoded_data_dir.as_path(), None) + let g = Graph::decode(encoded_data_dir.as_path()) .expect("Failed to load graph from encoded data files"); info!( diff --git a/examples/rust/src/bin/btc/main.rs b/examples/rust/src/bin/btc/main.rs index 3aebf1764c..0dc39c63a6 100644 --- a/examples/rust/src/bin/btc/main.rs +++ b/examples/rust/src/bin/btc/main.rs @@ -64,7 +64,7 @@ fn main() { let graph = if encoded_data_dir.exists() { let now = Instant::now(); - let g = Graph::decode(encoded_data_dir.as_path(), None) + let g = Graph::decode(encoded_data_dir.as_path()) .expect("Failed to load graph from encoded data files"); info!( diff --git a/examples/rust/src/bin/hulongbay/main.rs b/examples/rust/src/bin/hulongbay/main.rs index e7ca0cab60..6d64bf532a 100644 --- a/examples/rust/src/bin/hulongbay/main.rs +++ b/examples/rust/src/bin/hulongbay/main.rs @@ -65,7 +65,7 @@ pub fn loader(data_dir: &Path) -> Result> { let encoded_data_dir = data_dir.join("graphdb.bincode"); if encoded_data_dir.exists() { let now = Instant::now(); - let g = Graph::decode(encoded_data_dir.as_path(), None)?; + let g = Graph::decode(encoded_data_dir.as_path())?; info!( "Loaded graph from path {} with {} nodes, {} edges, took {} seconds", diff --git a/examples/rust/src/bin/lotr/main.rs b/examples/rust/src/bin/lotr/main.rs index 69a5386995..cda67cf6c5 100644 --- a/examples/rust/src/bin/lotr/main.rs +++ b/examples/rust/src/bin/lotr/main.rs @@ -38,7 +38,7 @@ fn main() { let graph = if encoded_data_dir.exists() { let now = Instant::now(); - let g = Graph::decode(encoded_data_dir.as_path(), None) + let g = Graph::decode(encoded_data_dir.as_path()) .expect("Failed to load graph from encoded data files"); info!( diff --git a/examples/rust/src/bin/pokec/main.rs b/examples/rust/src/bin/pokec/main.rs index beadfe8e5a..3f066ab915 100644 --- a/examples/rust/src/bin/pokec/main.rs +++ b/examples/rust/src/bin/pokec/main.rs @@ -24,7 +24,7 @@ fn main() { let data_dir = Path::new(args.get(1).expect("No data directory provided")); let g = if std::path::Path::new("/tmp/pokec").exists() { - Graph::decode("/tmp/pokec", None).unwrap() + Graph::decode("/tmp/pokec").unwrap() } else { let g = Graph::new(); CsvLoader::new(data_dir) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index adac628ab2..cf5cd38e89 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1,6 +1,7 @@ """ Raphtory graph analytics library """ + from __future__ import annotations ############################################################################### @@ -26,8 +27,40 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['GraphView', 'Graph', 'PersistentGraph', 'Node', 'Nodes', 'PathFromNode', 'PathFromGraph', 'MutableNode', 'Edge', 'Edges', 'NestedEdges', 'MutableEdge', 'Properties', 'PyPropValueList', 'Metadata', 'TemporalProperties', 'PropertiesView', 'TemporalProp', 'WindowSet', 'IndexSpecBuilder', 'IndexSpec', 'version', 'graphql', 'algorithms', 'graph_loader', 'graph_gen', 'vectors', 'node_state', 'filter', 'iterables', 'nullmodels', 'plottingutils'] -class GraphView(object): +__all__ = [ + "GraphView", + "Graph", + "PersistentGraph", + "Node", + "Nodes", + "PathFromNode", + "PathFromGraph", + "MutableNode", + "Edge", + "Edges", + "NestedEdges", + "MutableEdge", + "Properties", + "PyPropValueList", + "Metadata", + "TemporalProperties", + "PropertiesView", + "TemporalProp", + "WindowSet", + "version", + "graphql", + "algorithms", + "graph_loader", + "graph_gen", + "vectors", + "node_state", + "filter", + "iterables", + "nullmodels", + "plottingutils", +] + +class GraphView(object): """Graph view is a read-only version of a graph at a certain point in time.""" def __eq__(self, value): @@ -237,7 +270,9 @@ class GraphView(object): GraphView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -307,14 +342,6 @@ class GraphView(object): list[Node]: the nodes that match the properties name and value """ - def get_index_spec(self) -> IndexSpec: - """ - Get index spec - - Returns: - IndexSpec: - """ - def has_edge(self, src: NodeInput, dst: NodeInput) -> bool: """ Returns true if the graph contains the specified edge @@ -401,12 +428,16 @@ class GraphView(object): def materialize(self) -> GraphView: """ - Returns a 'materialized' clone of the graph view - i.e. a new graph with a copy of the data seen within the view instead of just a mask over the original graph + Returns a 'materialized' clone of the graph view - i.e. a new graph with a + copy of the data seen within the view instead of just a mask over the original graph. Returns: GraphView: Returns a graph clone """ + def materialize_at(self, path): + """Materializes the graph view into a graphql compatible folder.""" + @property def metadata(self) -> Metadata: """ @@ -447,7 +478,12 @@ class GraphView(object): Properties: Properties paired with their names """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -471,32 +507,6 @@ class GraphView(object): WindowSet: A `WindowSet` object. """ - def search_edges(self, filter: Any, limit: int = 25, offset: int = 0) -> list[Edge]: - """ - Searches for edges which match the given filter expression. This uses Tantivy's exact search. - - Arguments: - filter: The filter expression to search for. - limit(int): The maximum number of results to return. Defaults to 25. - offset(int): The number of results to skip. This is useful for pagination. Defaults to 0. - - Returns: - list[Edge]: A list of edges which match the filter expression. The list will be empty if no edges match the query. - """ - - def search_nodes(self, filter: Any, limit: int = 25, offset: int = 0) -> list[Node]: - """ - Searches for nodes which match the given filter expression. This uses Tantivy's exact search. - - Arguments: - filter: The filter expression to search for. - limit(int): The maximum number of results to return. Defaults to 25. - offset(int): The number of results to skip. This is useful for pagination. Defaults to 0. - - Returns: - list[Node]: A list of nodes which match the filter expression. The list will be empty if no nodes match. - """ - def shrink_end(self, end: TimeInput) -> GraphView: """ Set the end of the window to the smaller of `end` and `self.end()` @@ -593,7 +603,14 @@ class GraphView(object): GraphView: Returns the subgraph """ - def to_networkx(self, explode_edges: bool = False, include_node_properties: bool = True, include_edge_properties: bool = True, include_update_history: bool = True, include_property_history: bool = True) -> nx.MultiDiGraph: + def to_networkx( + self, + explode_edges: bool = False, + include_node_properties: bool = True, + include_edge_properties: bool = True, + include_update_history: bool = True, + include_property_history: bool = True, + ) -> nx.MultiDiGraph: """ Returns a graph with NetworkX. @@ -612,7 +629,19 @@ class GraphView(object): nx.MultiDiGraph: A Networkx MultiDiGraph. """ - def to_pyvis(self, explode_edges: bool = False, edge_color: str = '#000000', shape: str = 'dot', node_image: Optional[str] = None, edge_weight: Optional[str] = None, edge_label: Optional[str] = None, colour_nodes_by_type: bool = False, directed: bool = True, notebook: bool = False, **kwargs: Any) -> pyvis.network.Network: + def to_pyvis( + self, + explode_edges: bool = False, + edge_color: str = "#000000", + shape: str = "dot", + node_image: Optional[str] = None, + edge_weight: Optional[str] = None, + edge_label: Optional[str] = None, + colour_nodes_by_type: bool = False, + directed: bool = True, + notebook: bool = False, + **kwargs: Any, + ) -> pyvis.network.Network: """ Draw a graph with PyVis. Pyvis is a required dependency. If you intend to use this function make sure that you install Pyvis @@ -673,7 +702,14 @@ class GraphView(object): GraphView: The layered view """ - def vectorise(self, embedding: Callable[[list], list], nodes: bool | str = True, edges: bool | str = True, cache: Optional[str] = None, verbose: bool = False) -> VectorisedGraph: + def vectorise( + self, + embedding: Callable[[list], list], + nodes: bool | str = True, + edges: bool | str = True, + cache: Optional[str] = None, + verbose: bool = False, + ) -> VectorisedGraph: """ Create a VectorisedGraph from the current graph @@ -709,7 +745,7 @@ class GraphView(object): Optional[int]: """ -class Graph(GraphView): +class Graph(GraphView): """ A temporal graph with event semantics. @@ -717,13 +753,19 @@ class Graph(GraphView): num_shards (int, optional): The number of locks to use in the storage to allow for multithreaded updates. """ - def __new__(cls, num_shards: Optional[int] = None) -> Graph: + def __new__(cls, path=None) -> Graph: """Create and return a new object. See help(type) for accurate signature.""" - def __reduce__(self): - ... - - def add_edge(self, timestamp: TimeInput, src: str|int, dst: str|int, properties: Optional[PropInput] = None, layer: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableEdge: + def __reduce__(self): ... + def add_edge( + self, + timestamp: TimeInput, + src: str | int, + dst: str | int, + properties: Optional[PropInput] = None, + layer: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> MutableEdge: """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -756,7 +798,14 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def add_node(self, timestamp: TimeInput, id: str|int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableNode: + def add_node( + self, + timestamp: TimeInput, + id: str | int, + properties: Optional[PropInput] = None, + node_type: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> MutableNode: """ Adds a new node with the given id and properties to the graph. @@ -774,7 +823,12 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def add_properties(self, timestamp: TimeInput, properties: PropInput, secondary_index: Optional[int] = None) -> None: + def add_properties( + self, + timestamp: TimeInput, + properties: PropInput, + secondary_index: Optional[int] = None, + ) -> None: """ Adds properties to the graph. @@ -790,68 +844,14 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def cache(self, path: str) -> None: - """ - Write Graph to cache file and initialise the cache. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. If the file already exists its contents are overwritten. - - Arguments: - path (str): The path to the cache file - - Returns: - None: - """ - - def create_index(self) -> None: - """ - Create graph index - - Returns: - None: - """ - - def create_index_in_ram(self) -> None: - """ - Creates a graph index in memory (RAM). - - This is primarily intended for use in tests and should not be used in production environments, - as the index will not be persisted to disk. - - Returns: - None: - """ - - def create_index_in_ram_with_spec(self, py_spec: IndexSpec) -> None: - """ - Creates a graph index in memory (RAM) with the provided index spec. - - This is primarily intended for use in tests and should not be used in production environments, - as the index will not be persisted to disk. - - Arguments: - py_spec: The specification for the in-memory index to be created. - - Arguments: - py_spec (IndexSpec): - The specification for the in-memory index to be created. - - Returns: - None: - """ - - def create_index_with_spec(self, py_spec: Any) -> None: - """ - Create graph index with the provided index spec. - - Arguments: - py_spec: - The specification for the in-memory index to be created. - - Returns: - None: - """ - - def create_node(self, timestamp: TimeInput, id: str|int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableNode: + def create_node( + self, + timestamp: TimeInput, + id: str | int, + properties: Optional[PropInput] = None, + node_type: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> MutableNode: """ Creates a new node with the given id and properties to the graph. It fails if the node already exists. @@ -881,7 +881,7 @@ class Graph(GraphView): Graph: """ - def edge(self, src: str|int, dst: str|int) -> MutableEdge: + def edge(self, src: str | int, dst: str | int) -> MutableEdge: """ Gets the edge with the specified source and destination nodes @@ -974,7 +974,9 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def import_edges_as(self, edges: List[Edge], new_ids: List[Tuple[int, int]], merge: bool = False) -> None: + def import_edges_as( + self, edges: List[Edge], new_ids: List[Tuple[int, int]], merge: bool = False + ) -> None: """ Import multiple edges into the graph with new ids. @@ -1009,7 +1011,9 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def import_node_as(self, node: Node, new_id: str|int, merge: bool = False) -> MutableNode: + def import_node_as( + self, node: Node, new_id: str | int, merge: bool = False + ) -> MutableNode: """ Import a single node into the graph with new id. @@ -1044,7 +1048,9 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def import_nodes_as(self, nodes: List[Node], new_ids: List[str|int], merge: bool = False) -> None: + def import_nodes_as( + self, nodes: List[Node], new_ids: List[str | int], merge: bool = False + ) -> None: """ Import multiple nodes into the graph with new ids. @@ -1075,21 +1081,17 @@ class Graph(GraphView): """ @staticmethod - def load_cached(path: str) -> Graph: - """ - Load Graph from a file and initialise it as a cache file. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. - - Arguments: - path (str): The path to the cache file - - Returns: - Graph: the loaded graph with initialised cache - """ - - def load_edge_props_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load(path): ... + def load_edge_props_from_pandas( + self, + df: DataFrame, + src: str, + dst: str, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + ) -> None: """ Load edge properties from a Pandas DataFrame. @@ -1109,7 +1111,16 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_parquet(self, parquet_path: str, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_props_from_parquet( + self, + parquet_path: str, + src: str, + dst: str, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + ) -> None: """ Load edge properties from parquet file @@ -1129,7 +1140,19 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edges_from_pandas( + self, + df: DataFrame, + time: str, + src: str, + dst: str, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + secondary_index: Optional[str] = None, + ) -> None: """ Load edges from a Pandas DataFrame into the graph. @@ -1143,6 +1166,7 @@ class Graph(GraphView): shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + secondary_index (str, optional): The column name for the secondary index. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1151,7 +1175,19 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edges_from_parquet( + self, + parquet_path: str, + time: str, + src: str, + dst: str, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + secondary_index: Optional[str] = None, + ) -> None: """ Load edges from a Parquet file into the graph. @@ -1165,6 +1201,7 @@ class Graph(GraphView): shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + secondary_index (str, optional): The column name for the secondary index. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1176,7 +1213,7 @@ class Graph(GraphView): @staticmethod def load_from_file(path: str) -> Graph: """ - Load Graph from a file. + Load Graph from a parquet file. Arguments: path (str): The path to the file. @@ -1185,7 +1222,15 @@ class Graph(GraphView): Graph: """ - def load_node_props_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_props_from_pandas( + self, + df: DataFrame, + id: str, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + ) -> None: """ Load node properties from a Pandas DataFrame. @@ -1204,7 +1249,15 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_node_props_from_parquet(self, parquet_path: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_props_from_parquet( + self, + parquet_path: str, + id: str, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + ) -> None: """ Load node properties from a parquet file. @@ -1223,7 +1276,18 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_pandas(self, df: DataFrame, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_nodes_from_pandas( + self, + df: DataFrame, + time: str, + id: str, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + secondary_index: Optional[str] = None, + ) -> None: """ Load nodes from a Pandas DataFrame into the graph. @@ -1236,6 +1300,7 @@ class Graph(GraphView): properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + secondary_index (str, optional): The column name for the secondary index. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1244,7 +1309,18 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_parquet(self, parquet_path: str, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_nodes_from_parquet( + self, + parquet_path: str, + time: str, + id: str, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + secondary_index: Optional[str] = None, + ) -> None: """ Load nodes from a Parquet file into the graph. @@ -1257,6 +1333,7 @@ class Graph(GraphView): properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + secondary_index (str, optional): The column name for the secondary index. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1265,7 +1342,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def node(self, id: str|int) -> MutableNode: + def node(self, id: str | int) -> MutableNode: """ Gets the node with the specified id @@ -1286,7 +1363,7 @@ class Graph(GraphView): def save_to_file(self, path: str) -> None: """ - Saves the Graph to the given path. + Saves the Graph to the given path in parquet format. Arguments: path (str): The path to the file. @@ -1315,7 +1392,7 @@ class Graph(GraphView): def to_parquet(self, graph_dir: str | PathLike) -> None: """ - Persist graph to parquet files. + Persist graph to parquet files Arguments: graph_dir (str | PathLike): the folder where the graph will be persisted as parquet @@ -1338,24 +1415,22 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def write_updates(self) -> None: - """ - Persist the new updates by appending them to the cache file. - - Returns: - None: - """ - -class PersistentGraph(GraphView): +class PersistentGraph(GraphView): """A temporal graph that allows edges and nodes to be deleted.""" - def __new__(cls) -> PersistentGraph: + def __new__(cls, path=None) -> PersistentGraph: """Create and return a new object. See help(type) for accurate signature.""" - def __reduce__(self): - ... - - def add_edge(self, timestamp: int, src: str | int, dst: str | int, properties: Optional[PropInput] = None, layer: Optional[str] = None, secondary_index: Optional[int] = None) -> None: + def __reduce__(self): ... + def add_edge( + self, + timestamp: int, + src: str | int, + dst: str | int, + properties: Optional[PropInput] = None, + layer: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> None: """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -1388,7 +1463,14 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def add_node(self, timestamp: TimeInput, id: str | int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, secondary_index: Optional[int] = None) -> None: + def add_node( + self, + timestamp: TimeInput, + id: str | int, + properties: Optional[PropInput] = None, + node_type: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> None: """ Adds a new node with the given id and properties to the graph. @@ -1406,7 +1488,12 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def add_properties(self, timestamp: TimeInput, properties: dict, secondary_index: Optional[int] = None) -> None: + def add_properties( + self, + timestamp: TimeInput, + properties: dict, + secondary_index: Optional[int] = None, + ) -> None: """ Adds properties to the graph. @@ -1422,67 +1509,14 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def cache(self, path: str) -> None: - """ - Write PersistentGraph to cache file and initialise the cache. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. If the file already exists its contents are overwritten. - - Arguments: - path (str): The path to the cache file - - Returns: - None: - """ - - def create_index(self) -> None: - """ - Create graph index - - Returns: - None: - """ - - def create_index_in_ram(self) -> None: - """ - Creates a graph index in memory (RAM). - - This is primarily intended for use in tests and should not be used in production environments, - as the index will not be persisted to disk. - - Returns: - None: - """ - - def create_index_in_ram_with_spec(self, py_spec: IndexSpec) -> None: - """ - Creates a graph index in memory (RAM) with the provided index spec. - - This is primarily intended for use in tests and should not be used in production environments, - as the index will not be persisted to disk. - - Arguments: - py_spec: The specification for the in-memory index to be created. - - Arguments: - py_spec (IndexSpec): The specification for the in-memory index to be created. - - Returns: - None: - """ - - def create_index_with_spec(self, py_spec: Any) -> None: - """ - Create graph index with the provided index spec. - Arguments: - py_spec: - The specification for the in-memory index to be created. - - Returns: - None: - """ - - def create_node(self, timestamp: TimeInput, id: str | int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableNode: + def create_node( + self, + timestamp: TimeInput, + id: str | int, + properties: Optional[PropInput] = None, + node_type: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> MutableNode: """ Creates a new node with the given id and properties to the graph. It fails if the node already exists. @@ -1500,7 +1534,14 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def delete_edge(self, timestamp: int, src: str | int, dst: str | int, layer: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableEdge: + def delete_edge( + self, + timestamp: int, + src: str | int, + dst: str | int, + layer: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> MutableEdge: """ Deletes an edge given the timestamp, src and dst nodes and layer (optional) @@ -1613,7 +1654,9 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def import_edges_as(self, edges: List[Edge], new_ids: list[Tuple[GID, GID]], merge: bool = False) -> None: + def import_edges_as( + self, edges: List[Edge], new_ids: list[Tuple[GID, GID]], merge: bool = False + ) -> None: """ Import multiple edges into the graph with new ids. @@ -1650,7 +1693,9 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def import_node_as(self, node: Node, new_id: str|int, merge: bool = False) -> Node: + def import_node_as( + self, node: Node, new_id: str | int, merge: bool = False + ) -> Node: """ Import a single node into the graph with new id. @@ -1687,7 +1732,9 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def import_nodes_as(self, nodes: List[Node], new_ids: List[str|int], merge: bool = False) -> None: + def import_nodes_as( + self, nodes: List[Node], new_ids: List[str | int], merge: bool = False + ) -> None: """ Import multiple nodes into the graph with new ids. @@ -1707,21 +1754,17 @@ class PersistentGraph(GraphView): """ @staticmethod - def load_cached(path: str) -> PersistentGraph: - """ - Load PersistentGraph from a file and initialise it as a cache file. - - Future updates are tracked. Use `write_updates` to persist them to the - cache file. - - Arguments: - path (str): The path to the cache file - - Returns: - PersistentGraph: the loaded graph with initialised cache - """ - - def load_edge_deletions_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load(path): ... + def load_edge_deletions_from_pandas( + self, + df: DataFrame, + time: str, + src: str, + dst: str, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + secondary_index: Optional[str] = None, + ) -> None: """ Load edges deletions from a Pandas DataFrame into the graph. @@ -1730,8 +1773,10 @@ class PersistentGraph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. + NOTE: All values in this column must be unique. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + secondary_index (str, optional): The column name for the secondary index. Returns: None: This function does not return a value, if the operation is successful. @@ -1740,17 +1785,28 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_deletions_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_deletions_from_parquet( + self, + parquet_path: str, + time: str, + src: str, + dst: str, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + secondary_index: Optional[str] = None, + ) -> None: """ Load edges deletions from a Parquet file into the graph. Arguments: parquet_path (str): Parquet file or directory of Parquet files path containing node information. + time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. + NOTE: All values in this column must be unique. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + secondary_index (str, optional): The column name for the secondary index. Returns: None: This function does not return a value, if the operation is successful. @@ -1759,7 +1815,16 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_props_from_pandas( + self, + df: DataFrame, + src: str, + dst: str, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + ) -> None: """ Load edge properties from a Pandas DataFrame. @@ -1779,7 +1844,16 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_parquet(self, parquet_path: str, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_props_from_parquet( + self, + parquet_path: str, + src: str, + dst: str, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + ) -> None: """ Load edge properties from parquet file @@ -1799,7 +1873,19 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edges_from_pandas( + self, + df: DataFrame, + time: str, + src: str, + dst: str, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + secondary_index: Optional[str] = None, + ) -> None: """ Load edges from a Pandas DataFrame into the graph. @@ -1808,11 +1894,13 @@ class PersistentGraph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. + NOTE: All values in this column must be unique. Defaults to None. properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + secondary_index (str, optional): The column name for the secondary index. Returns: None: This function does not return a value, if the operation is successful. @@ -1821,7 +1909,19 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edges_from_parquet( + self, + parquet_path: str, + time: str, + src: str, + dst: str, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + layer: Optional[str] = None, + layer_col: Optional[str] = None, + secondary_index: Optional[str] = None, + ) -> None: """ Load edges from a Parquet file into the graph. @@ -1830,11 +1930,13 @@ class PersistentGraph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. + NOTE: All values in this column must be unique. Defaults to None. properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + secondary_index (str, optional): The column name for the secondary index. Returns: None: This function does not return a value, if the operation is successful. @@ -1846,7 +1948,7 @@ class PersistentGraph(GraphView): @staticmethod def load_from_file(path: str) -> PersistentGraph: """ - Load PersistentGraph from a file. + Load PersistentGraph from a parquet file. Arguments: path (str): The path to the file. @@ -1855,7 +1957,15 @@ class PersistentGraph(GraphView): PersistentGraph: """ - def load_node_props_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_props_from_pandas( + self, + df: DataFrame, + id: str, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + ) -> None: """ Load node properties from a Pandas DataFrame. @@ -1874,7 +1984,15 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_node_props_from_parquet(self, parquet_path: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_props_from_parquet( + self, + parquet_path: str, + id: str, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + ) -> None: """ Load node properties from a parquet file. @@ -1893,7 +2011,18 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_pandas(self, df: DataFrame, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_nodes_from_pandas( + self, + df: DataFrame, + time: str, + id: str, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + secondary_index: Optional[str] = None, + ) -> None: """ Load nodes from a Pandas DataFrame into the graph. @@ -1901,11 +2030,13 @@ class PersistentGraph(GraphView): df (DataFrame): The Pandas DataFrame containing the nodes. time (str): The column name for the timestamps. id (str): The column name for the node IDs. + NOTE: All values in this column must be unique. Defaults to None. node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + secondary_index (str, optional): The column name for the secondary index. Returns: None: This function does not return a value, if the operation is successful. @@ -1914,7 +2045,18 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_parquet(self, parquet_path: str, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_nodes_from_parquet( + self, + parquet_path: str, + time: str, + id: str, + node_type: Optional[str] = None, + node_type_col: Optional[str] = None, + properties: Optional[List[str]] = None, + metadata: Optional[List[str]] = None, + shared_metadata: Optional[PropInput] = None, + secondary_index: Optional[str] = None, + ) -> None: """ Load nodes from a Parquet file into the graph. @@ -1922,11 +2064,13 @@ class PersistentGraph(GraphView): parquet_path (str): Parquet file or directory of Parquet files containing the nodes time (str): The column name for the timestamps. id (str): The column name for the node IDs. + NOTE: All values in this column must be unique. Defaults to None. node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + secondary_index (str, optional): The column name for the secondary index. Returns: None: This function does not return a value, if the operation is successful. @@ -1956,7 +2100,7 @@ class PersistentGraph(GraphView): def save_to_file(self, path: str) -> None: """ - Saves the PersistentGraph to the given path. + Saves the PersistentGraph to the given path in parquet format. Arguments: path (str): The path to the file. @@ -1997,15 +2141,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def write_updates(self) -> None: - """ - Persist the new updates by appending them to the cache file. - - Returns: - None: - """ - -class Node(object): +class Node(object): """A node (or node) in the graph.""" def __eq__(self, value): @@ -2180,7 +2316,9 @@ class Node(object): Node: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2261,7 +2399,7 @@ class Node(object): """ @property - def id(self) -> (str|int): + def id(self) -> str | int: """ Returns the id of the node. This is a unique identifier for the node. @@ -2425,7 +2563,12 @@ class Node(object): Properties: A list of properties. """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -2556,7 +2699,7 @@ class Node(object): Optional[int]: """ -class Nodes(object): +class Nodes(object): """A list of nodes that can be iterated over.""" def __bool__(self): @@ -2745,7 +2888,9 @@ class Nodes(object): Nodes: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2982,7 +3127,12 @@ class Nodes(object): PropertiesView: A view of the node properties. """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3080,7 +3230,9 @@ class Nodes(object): Optional[datetime]: The earliest datetime that this Nodes is valid or None if the Nodes is valid for all times. """ - def to_df(self, include_property_history: bool = False, convert_datetime: bool = False) -> DataFrame: + def to_df( + self, include_property_history: bool = False, convert_datetime: bool = False + ) -> DataFrame: """ Converts the graph's nodes into a Pandas DataFrame. @@ -3141,8 +3293,7 @@ class Nodes(object): Optional[int]: """ -class PathFromNode(object): - +class PathFromNode(object): def __bool__(self): """True if self else False""" @@ -3299,7 +3450,9 @@ class PathFromNode(object): PathFromNode: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3509,7 +3662,12 @@ class PathFromNode(object): PropertiesView: """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3651,8 +3809,7 @@ class PathFromNode(object): Optional[int]: """ -class PathFromGraph(object): - +class PathFromGraph(object): def __bool__(self): """True if self else False""" @@ -3818,7 +3975,9 @@ class PathFromGraph(object): PathFromGraph: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4053,7 +4212,12 @@ class PathFromGraph(object): NestedPropsIterable: """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -4195,8 +4359,7 @@ class PathFromGraph(object): Optional[int]: """ -class MutableNode(Node): - +class MutableNode(Node): def __repr__(self): """Return repr(self).""" @@ -4213,7 +4376,12 @@ class MutableNode(Node): None: """ - def add_updates(self, t: TimeInput, properties: Optional[PropInput] = None, secondary_index: Optional[int] = None) -> None: + def add_updates( + self, + t: TimeInput, + properties: Optional[PropInput] = None, + secondary_index: Optional[int] = None, + ) -> None: """ Add updates to a node in the graph at a specified time. This function allows for the addition of property updates to a node within the graph. The updates are time-stamped, meaning they are applied at the specified time. @@ -4258,7 +4426,7 @@ class MutableNode(Node): None: """ -class Edge(object): +class Edge(object): """ PyEdge is a Python class that represents an edge in the graph. An edge is a directed connection between two nodes. @@ -4445,7 +4613,9 @@ class Edge(object): Edge: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4647,7 +4817,12 @@ class Edge(object): Properties: Properties on the Edge. """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -4796,7 +4971,7 @@ class Edge(object): Optional[int]: """ -class Edges(object): +class Edges(object): """A list of edges that can be iterated over.""" def __bool__(self): @@ -4981,7 +5156,9 @@ class Edges(object): Edges: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -5188,7 +5365,12 @@ class Edges(object): PropertiesView: """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -5304,7 +5486,12 @@ class Edges(object): I64Iterable: """ - def to_df(self, include_property_history: bool = True, convert_datetime: bool = False, explode: bool = False) -> DataFrame: + def to_df( + self, + include_property_history: bool = True, + convert_datetime: bool = False, + explode: bool = False, + ) -> DataFrame: """ Converts the graph's edges into a Pandas DataFrame. @@ -5357,8 +5544,7 @@ class Edges(object): Optional[int]: """ -class NestedEdges(object): - +class NestedEdges(object): def __bool__(self): """True if self else False""" @@ -5533,7 +5719,9 @@ class NestedEdges(object): NestedEdges: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -5731,7 +5919,12 @@ class NestedEdges(object): PyNestedPropsIterable: """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -5880,8 +6073,7 @@ class NestedEdges(object): Optional[int]: """ -class MutableEdge(Edge): - +class MutableEdge(Edge): def __repr__(self): """Return repr(self).""" @@ -5899,7 +6091,13 @@ class MutableEdge(Edge): None: """ - def add_updates(self, t: TimeInput, properties: Optional[PropInput] = None, layer: Optional[str] = None, secondary_index: Optional[int] = None) -> None: + def add_updates( + self, + t: TimeInput, + properties: Optional[PropInput] = None, + layer: Optional[str] = None, + secondary_index: Optional[int] = None, + ) -> None: """ Add updates to an edge in the graph at a specified time. This function allows for the addition of property updates to an edge within the graph. The updates are time-stamped, meaning they are applied at the specified time. @@ -5946,7 +6144,7 @@ class MutableEdge(Edge): None: """ -class Properties(object): +class Properties(object): """A view of the properties of an entity""" def __contains__(self, key): @@ -6037,8 +6235,7 @@ class Properties(object): list[PropValue]: """ -class PyPropValueList(object): - +class PyPropValueList(object): def __eq__(self, value): """Return self==value.""" @@ -6074,12 +6271,8 @@ class PyPropValueList(object): PropValue: The average of each property values, or None if count is zero. """ - def collect(self): - ... - - def count(self): - ... - + def collect(self): ... + def count(self): ... def drop_none(self) -> list[PropValue]: """ Drop none. @@ -6128,7 +6321,7 @@ class PyPropValueList(object): PropValue: """ -class Metadata(object): +class Metadata(object): """A view of metadata of an entity""" def __contains__(self, key): @@ -6209,7 +6402,7 @@ class Metadata(object): list[PropValue]: """ -class TemporalProperties(object): +class TemporalProperties(object): """A view of the temporal properties of an entity""" def __contains__(self, key): @@ -6304,8 +6497,7 @@ class TemporalProperties(object): list[TemporalProp]: the list of property views """ -class PropertiesView(object): - +class PropertiesView(object): def __contains__(self, key): """Return bool(key in self).""" @@ -6388,7 +6580,7 @@ class PropertiesView(object): list[list[PropValue]]: """ -class TemporalProp(object): +class TemporalProp(object): """A view of a temporal property""" def __eq__(self, value): @@ -6549,8 +6741,7 @@ class TemporalProp(object): NumpyArray: """ -class WindowSet(object): - +class WindowSet(object): def __iter__(self): """Implement iter(self).""" @@ -6568,152 +6759,6 @@ class WindowSet(object): Iterable: The time index. """ -class IndexSpecBuilder(object): - - def __new__(cls, graph) -> IndexSpecBuilder: - """Create and return a new object. See help(type) for accurate signature.""" - - def build(self) -> IndexSpec: - """ - Return a spec - - Returns: - IndexSpec: - """ - - def with_all_edge_metadata(self) -> dict[str, Any]: - """ - Adds all edge metadata to the spec. - - Returns: - dict[str, Any]: - """ - - def with_all_edge_properties(self) -> dict[str, Any]: - """ - Adds all edge properties to the spec. - - Returns: - dict[str, Any]: - """ - - def with_all_edge_properties_and_metadata(self) -> dict[str, Any]: - """ - Adds all edge properties and metadata to the spec. - - Returns: - dict[str, Any]: - """ - - def with_all_node_metadata(self) -> dict[str, Any]: - """ - Adds all node metadata to the spec. - - Returns: - dict[str, Any]: - """ - - def with_all_node_properties(self) -> dict[str, Any]: - """ - Adds all node properties to the spec. - - Returns: - dict[str, Any]: - """ - - def with_all_node_properties_and_metadata(self) -> dict[str, Any]: - """ - Adds all node properties and metadata to the spec. - - Returns: - dict[str, Any]: - """ - - def with_edge_metadata(self, props: Any) -> dict[str, Any]: - """ - Adds specified edge metadata to the spec. - - Arguments: - props: List of metadata. - - Returns: - dict[str, Any]: - """ - - def with_edge_properties(self, props: Any) -> dict[str, Any]: - """ - Adds specified edge properties to the spec. - - Arguments: - props: List of properties. - - Returns: - dict[str, Any]: - """ - - def with_node_metadata(self, props: Any) -> dict[str, Any]: - """ - Adds specified node metadata to the spec. - - Arguments: - props: list of metadata. - - Returns: - dict[str, Any]: - """ - - def with_node_properties(self, props: Any) -> dict[str, Any]: - """ - Adds specified node properties to the spec. - - Arguments: - props: list of properties. - - Returns: - dict[str, Any]: - """ - -class IndexSpec(object): - - def __repr__(self): - """Return repr(self).""" - - @property - def edge_metadata(self) -> list[str]: - """ - Get edge metadata. - - Returns: - list[str]: - """ - - @property - def edge_properties(self) -> list[str]: - """ - Get edge properties. - - Returns: - list[str]: - """ - - @property - def node_metadata(self) -> list[str]: - """ - Get node metadata. - - Returns: - list[str]: - """ - - @property - def node_properties(self) -> list[str]: - """ - Get node properties. - - Returns: - list[str]: - """ - def version() -> str: """ Return Raphtory version. diff --git a/python/python/raphtory/algorithms/__init__.pyi b/python/python/raphtory/algorithms/__init__.pyi index c3005db67e..ae2892f399 100644 --- a/python/python/raphtory/algorithms/__init__.pyi +++ b/python/python/raphtory/algorithms/__init__.pyi @@ -1,6 +1,7 @@ """ Algorithmic functions that can be run on Raphtory graphs """ + from __future__ import annotations ############################################################################### @@ -26,8 +27,59 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['dijkstra_single_source_shortest_paths', 'global_reciprocity', 'betweenness_centrality', 'all_local_reciprocity', 'triplet_count', 'local_triangle_count', 'average_degree', 'directed_graph_density', 'degree_centrality', 'max_degree', 'min_degree', 'max_out_degree', 'max_in_degree', 'min_out_degree', 'min_in_degree', 'pagerank', 'single_source_shortest_path', 'global_clustering_coefficient', 'temporally_reachable_nodes', 'temporal_bipartite_graph_projection', 'local_clustering_coefficient', 'local_clustering_coefficient_batch', 'weakly_connected_components', 'strongly_connected_components', 'in_components', 'in_component', 'out_components', 'out_component', 'fast_rp', 'global_temporal_three_node_motif', 'global_temporal_three_node_motif_multi', 'local_temporal_three_node_motifs', 'hits', 'balance', 'label_propagation', 'k_core', 'temporal_SEIR', 'louvain', 'fruchterman_reingold', 'cohesive_fruchterman_reingold', 'max_weight_matching', 'Matching', 'Infected'] -def dijkstra_single_source_shortest_paths(graph: GraphView, source: NodeInput, targets: list[NodeInput], direction: Direction = "both", weight: str = 'weight') -> NodeStateWeightedSP: +__all__ = [ + "dijkstra_single_source_shortest_paths", + "global_reciprocity", + "betweenness_centrality", + "all_local_reciprocity", + "triplet_count", + "local_triangle_count", + "average_degree", + "directed_graph_density", + "degree_centrality", + "max_degree", + "min_degree", + "max_out_degree", + "max_in_degree", + "min_out_degree", + "min_in_degree", + "pagerank", + "single_source_shortest_path", + "global_clustering_coefficient", + "temporally_reachable_nodes", + "temporal_bipartite_graph_projection", + "local_clustering_coefficient", + "local_clustering_coefficient_batch", + "weakly_connected_components", + "strongly_connected_components", + "in_components", + "in_component", + "out_components", + "out_component", + "fast_rp", + "global_temporal_three_node_motif", + "global_temporal_three_node_motif_multi", + "local_temporal_three_node_motifs", + "hits", + "balance", + "label_propagation", + "k_core", + "temporal_SEIR", + "louvain", + "fruchterman_reingold", + "cohesive_fruchterman_reingold", + "max_weight_matching", + "Matching", + "Infected", +] + +def dijkstra_single_source_shortest_paths( + graph: GraphView, + source: NodeInput, + targets: list[NodeInput], + direction: Direction = "both", + weight: str = "weight", +) -> NodeStateWeightedSP: """ Finds the shortest paths from a single source to multiple targets in a graph. @@ -57,7 +109,9 @@ def global_reciprocity(graph: GraphView) -> float: float: reciprocity of the graph between 0 and 1. """ -def betweenness_centrality(graph: GraphView, k: Optional[int] = None, normalized: bool = True) -> NodeStateF64: +def betweenness_centrality( + graph: GraphView, k: Optional[int] = None, normalized: bool = True +) -> NodeStateF64: """ Computes the betweenness centrality for nodes in a given graph. @@ -225,7 +279,13 @@ def min_in_degree(graph: GraphView) -> int: int: value of the smallest indegree """ -def pagerank(graph: GraphView, iter_count: int = 20, max_diff: Optional[float] = None, use_l2_norm: bool = True, damping_factor: float = 0.85) -> NodeStateF64: +def pagerank( + graph: GraphView, + iter_count: int = 20, + max_diff: Optional[float] = None, + use_l2_norm: bool = True, + damping_factor: float = 0.85, +) -> NodeStateF64: """ Pagerank -- pagerank centrality value of the nodes in a graph @@ -246,7 +306,9 @@ def pagerank(graph: GraphView, iter_count: int = 20, max_diff: Optional[float] = NodeStateF64: Mapping of nodes to their pagerank value. """ -def single_source_shortest_path(graph: GraphView, source: NodeInput, cutoff: Optional[int] = None) -> NodeStateNodes: +def single_source_shortest_path( + graph: GraphView, source: NodeInput, cutoff: Optional[int] = None +) -> NodeStateNodes: """ Calculates the single source shortest paths from a given source node. @@ -277,7 +339,13 @@ def global_clustering_coefficient(graph: GraphView) -> float: [`Triplet Count`](triplet_count) """ -def temporally_reachable_nodes(graph: GraphView, max_hops: int, start_time: int, seed_nodes: list[NodeInput], stop_nodes: Optional[list[NodeInput]] = None) -> NodeStateReachability: +def temporally_reachable_nodes( + graph: GraphView, + max_hops: int, + start_time: int, + seed_nodes: list[NodeInput], + stop_nodes: Optional[list[NodeInput]] = None, +) -> NodeStateReachability: """ Temporally reachable nodes -- the nodes that are reachable by a time respecting path followed out from a set of seed nodes at a starting time. @@ -296,7 +364,9 @@ def temporally_reachable_nodes(graph: GraphView, max_hops: int, start_time: int, NodeStateReachability: Mapping of nodes to their reachability history. """ -def temporal_bipartite_graph_projection(graph: GraphView, delta: int, pivot_type: str) -> Graph: +def temporal_bipartite_graph_projection( + graph: GraphView, delta: int, pivot_type: str +) -> Graph: """ Projects a temporal bipartite graph into an undirected temporal graph over the pivot node type. Let `G` be a bipartite graph with node types `A` and `B`. Given `delta > 0`, the projection graph `G'` pivoting over type `B` nodes, will make a connection between nodes `n1` and `n2` (of type `A`) at time `(t1 + t2)/2` if they respectively have an edge at time `t1`, `t2` with the same node of type `B` in `G`, and `|t2-t1| < delta`. @@ -409,7 +479,14 @@ def out_component(node: Node) -> NodeStateUsize: NodeStateUsize: A NodeState mapping the nodes in the out-component to their distance from the starting node. """ -def fast_rp(graph: GraphView, embedding_dim: int, normalization_strength: float, iter_weights: list[float], seed: Optional[int] = None, threads: Optional[int] = None) -> NodeStateListF64: +def fast_rp( + graph: GraphView, + embedding_dim: int, + normalization_strength: float, + iter_weights: list[float], + seed: Optional[int] = None, + threads: Optional[int] = None, +) -> NodeStateListF64: """ Computes embedding vectors for each vertex of an undirected/bidirectional graph according to the Fast RP algorithm. Original Paper: https://doi.org/10.48550/arXiv.1908.11512 @@ -425,7 +502,9 @@ def fast_rp(graph: GraphView, embedding_dim: int, normalization_strength: float, NodeStateListF64: Mapping from nodes to embedding vectors. """ -def global_temporal_three_node_motif(graph: GraphView, delta: int, threads: Optional[int] = None) -> list[int]: +def global_temporal_three_node_motif( + graph: GraphView, delta: int, threads: Optional[int] = None +) -> list[int]: """ Computes the number of three edge, up-to-three node delta-temporal motifs in the graph, using the algorithm of Paranjape et al, Motifs in Temporal Networks (2017). We point the reader to this reference for more information on the algorithm and background, but provide a short summary below. @@ -474,7 +553,9 @@ def global_temporal_three_node_motif(graph: GraphView, delta: int, threads: Opti """ -def global_temporal_three_node_motif_multi(graph: GraphView, deltas: list[int], threads: Optional[int] = None) -> list[list[int]]: +def global_temporal_three_node_motif_multi( + graph: GraphView, deltas: list[int], threads: Optional[int] = None +) -> list[list[int]]: """ Computes the global counts of three-edge up-to-three node temporal motifs for a range of timescales. See `global_temporal_three_node_motif` for an interpretation of each row returned. @@ -487,7 +568,9 @@ def global_temporal_three_node_motif_multi(graph: GraphView, deltas: list[int], list[list[int]]: A list of 40d arrays, each array is the motif count for a particular value of delta, returned in the order that the deltas were given as input. """ -def local_temporal_three_node_motifs(graph: GraphView, delta: int, threads=None) -> NodeStateMotifs: +def local_temporal_three_node_motifs( + graph: GraphView, delta: int, threads=None +) -> NodeStateMotifs: """ Computes the number of each type of motif that each node participates in. See global_temporal_three_node_motifs for a summary of the motifs involved. @@ -503,7 +586,9 @@ def local_temporal_three_node_motifs(graph: GraphView, delta: int, threads=None) the motif. For two node motifs, both constituent nodes count the motif. For triangles, all three constituent nodes count the motif. """ -def hits(graph: GraphView, iter_count: int = 20, threads: Optional[int] = None) -> NodeStateHits: +def hits( + graph: GraphView, iter_count: int = 20, threads: Optional[int] = None +) -> NodeStateHits: """ HITS (Hubs and Authority) Algorithm: @@ -522,7 +607,9 @@ def hits(graph: GraphView, iter_count: int = 20, threads: Optional[int] = None) NodeStateHits: A mapping from nodes their hub and authority scores """ -def balance(graph: GraphView, name: str = "weight", direction: Direction = "both") -> NodeStateF64: +def balance( + graph: GraphView, name: str = "weight", direction: Direction = "both" +) -> NodeStateF64: """ Sums the weights of edges in the graph based on the specified direction. @@ -541,7 +628,9 @@ def balance(graph: GraphView, name: str = "weight", direction: Direction = "both """ -def label_propagation(graph: GraphView, seed: Optional[bytes] = None) -> list[set[Node]]: +def label_propagation( + graph: GraphView, seed: Optional[bytes] = None +) -> list[set[Node]]: """ Computes components using a label propagation algorithm @@ -554,7 +643,9 @@ def label_propagation(graph: GraphView, seed: Optional[bytes] = None) -> list[se """ -def k_core(graph: GraphView, k: int, iter_count: int, threads: Optional[int] = None) -> list[Node]: +def k_core( + graph: GraphView, k: int, iter_count: int, threads: Optional[int] = None +) -> list[Node]: """ Determines which nodes are in the k-core for a given value of k @@ -569,7 +660,15 @@ def k_core(graph: GraphView, k: int, iter_count: int, threads: Optional[int] = N """ -def temporal_SEIR(graph: GraphView, seeds: int | float | list[NodeInput], infection_prob: float, initial_infection: int | str | datetime, recovery_rate: float | None = None, incubation_rate: float | None = None, rng_seed: int | None = None) -> NodeStateSEIR: +def temporal_SEIR( + graph: GraphView, + seeds: int | float | list[NodeInput], + infection_prob: float, + initial_infection: int | str | datetime, + recovery_rate: float | None = None, + incubation_rate: float | None = None, + rng_seed: int | None = None, +) -> NodeStateSEIR: """ Simulate an SEIR dynamic on the network @@ -599,7 +698,12 @@ def temporal_SEIR(graph: GraphView, seeds: int | float | list[NodeInput], infect """ -def louvain(graph: GraphView, resolution: float = 1.0, weight_prop: str | None = None, tol: None | float = None) -> NodeStateUsize: +def louvain( + graph: GraphView, + resolution: float = 1.0, + weight_prop: str | None = None, + tol: None | float = None, +) -> NodeStateUsize: """ Louvain algorithm for community detection @@ -613,7 +717,14 @@ def louvain(graph: GraphView, resolution: float = 1.0, weight_prop: str | None = NodeStateUsize: Mapping of nodes to their community assignment """ -def fruchterman_reingold(graph: GraphView, iterations: int | None = 100, scale: float | None = 1.0, node_start_size: float | None = 1.0, cooloff_factor: float | None = 0.95, dt: float | None = 0.1) -> NodeLayout: +def fruchterman_reingold( + graph: GraphView, + iterations: int | None = 100, + scale: float | None = 1.0, + node_start_size: float | None = 1.0, + cooloff_factor: float | None = 0.95, + dt: float | None = 0.1, +) -> NodeLayout: """ Fruchterman Reingold layout algorithm @@ -629,7 +740,14 @@ def fruchterman_reingold(graph: GraphView, iterations: int | None = 100, scale: NodeLayout: A mapping from nodes to their [x, y] positions """ -def cohesive_fruchterman_reingold(graph: GraphView, iter_count: int = 100, scale: float = 1.0, node_start_size: float = 1.0, cooloff_factor: float = 0.95, dt: float = 0.1) -> NodeLayout: +def cohesive_fruchterman_reingold( + graph: GraphView, + iter_count: int = 100, + scale: float = 1.0, + node_start_size: float = 1.0, + cooloff_factor: float = 0.95, + dt: float = 0.1, +) -> NodeLayout: """ Cohesive version of `fruchterman_reingold` that adds virtual edges between isolated nodes Arguments: @@ -645,7 +763,12 @@ def cohesive_fruchterman_reingold(graph: GraphView, iter_count: int = 100, scale """ -def max_weight_matching(graph: GraphView, weight_prop: Optional[str] = None, max_cardinality: bool = True, verify_optimum_flag: bool = False) -> Matching: +def max_weight_matching( + graph: GraphView, + weight_prop: Optional[str] = None, + max_cardinality: bool = True, + verify_optimum_flag: bool = False, +) -> Matching: """ Compute a maximum-weighted matching in the general undirected weighted graph given by "edges". If `max_cardinality` is true, only @@ -682,7 +805,7 @@ def max_weight_matching(graph: GraphView, weight_prop: Optional[str] = None, max Matching: The matching """ -class Matching(object): +class Matching(object): """A Matching (i.e., a set of edges that do not share any nodes)""" def __bool__(self): @@ -754,8 +877,7 @@ class Matching(object): """ -class Infected(object): - +class Infected(object): def __repr__(self): """Return repr(self).""" diff --git a/python/python/raphtory/filter/__init__.pyi b/python/python/raphtory/filter/__init__.pyi index 36d732c413..5f33a18fcb 100644 --- a/python/python/raphtory/filter/__init__.pyi +++ b/python/python/raphtory/filter/__init__.pyi @@ -23,9 +23,20 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['FilterExpr', 'PropertyFilterOps', 'NodeFilterBuilder', 'Node', 'EdgeFilterOp', 'EdgeEndpoint', 'Edge', 'Property', 'Metadata', 'TemporalPropertyFilterBuilder'] -class FilterExpr(object): - +__all__ = [ + "FilterExpr", + "PropertyFilterOps", + "NodeFilterBuilder", + "Node", + "EdgeFilterOp", + "EdgeEndpoint", + "Edge", + "Property", + "Metadata", + "TemporalPropertyFilterBuilder", +] + +class FilterExpr(object): def __and__(self, value): """Return self&value.""" @@ -41,8 +52,7 @@ class FilterExpr(object): def __ror__(self, value): """Return value|self.""" -class PropertyFilterOps(object): - +class PropertyFilterOps(object): def __eq__(self, value): """Return self==value.""" @@ -64,7 +74,7 @@ class PropertyFilterOps(object): def contains(self, value) -> filter.FilterExpr: """ Returns a filter expression that checks if this object contains a specified property. - + Arguments: PropValue: @@ -72,7 +82,9 @@ class PropertyFilterOps(object): filter.FilterExpr: """ - def fuzzy_search(self, prop_value: str, levenshtein_distance: int, prefix_match: bool) -> filter.FilterExpr: + def fuzzy_search( + self, prop_value: str, levenshtein_distance: int, prefix_match: bool + ) -> filter.FilterExpr: """ Returns a filter expression that checks if the specified properties approximately match the specified string. @@ -82,7 +94,7 @@ class PropertyFilterOps(object): prop_value (str): Property to match against. levenshtein_distance (int): Maximum levenshtein distance between the specified prop_value and the result. prefix_match (bool): Enable prefix matching. - + Returns: filter.FilterExpr: """ @@ -90,7 +102,7 @@ class PropertyFilterOps(object): def is_in(self, values: list[PropValue]) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is in a specified iterable of properties. - + Arguments: values (list[PropValue]): @@ -101,7 +113,7 @@ class PropertyFilterOps(object): def is_none(self) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is none. - + Returns: filter.FilterExpr: """ @@ -109,7 +121,7 @@ class PropertyFilterOps(object): def is_not_in(self, values: list[PropValue]) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is not in a specified iterable of properties. - + Arguments: values (list[PropValue]): @@ -120,7 +132,7 @@ class PropertyFilterOps(object): def is_some(self) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is some. - + Returns: filter.FilterExpr: """ @@ -128,7 +140,7 @@ class PropertyFilterOps(object): def not_contains(self, value) -> filter.FilterExpr: """ Returns a filter expression that checks if this object does not contain a specified property. - + Arguments: PropValue: @@ -136,7 +148,7 @@ class PropertyFilterOps(object): filter.FilterExpr: """ -class NodeFilterBuilder(object): +class NodeFilterBuilder(object): """ A builder for constructing node filters @@ -172,7 +184,9 @@ class NodeFilterBuilder(object): filter.FilterExpr: """ - def fuzzy_search(self, value, levenshtein_distance: int, prefix_match: bool) -> filter.FilterExpr: + def fuzzy_search( + self, value, levenshtein_distance: int, prefix_match: bool + ) -> filter.FilterExpr: """ Returns a filter expression that checks if the specified properties approximately match the specified string. @@ -213,7 +227,7 @@ class NodeFilterBuilder(object): """ Returns a filter expression that checks if the specified iterable of strings does not contain a given value. - + Arguments: value (str): @@ -221,8 +235,7 @@ class NodeFilterBuilder(object): filter.FilterExpr: """ -class Node(object): - +class Node(object): @staticmethod def name(): """ @@ -241,8 +254,7 @@ class Node(object): NodeFilterBuilder: A filter builder for filtering by node type """ -class EdgeFilterOp(object): - +class EdgeFilterOp(object): def __eq__(self, value): """Return self==value.""" @@ -264,7 +276,7 @@ class EdgeFilterOp(object): def contains(self, value: str) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value contains the specified string. - + Arguments: value (str): @@ -272,7 +284,9 @@ class EdgeFilterOp(object): filter.FilterExpr: """ - def fuzzy_search(self, value, levenshtein_distance: int, prefix_match: bool) -> filter.FilterExpr: + def fuzzy_search( + self, value, levenshtein_distance: int, prefix_match: bool + ) -> filter.FilterExpr: """ Returns a filter expression that checks if the specified properties approximately match the specified string. @@ -282,7 +296,7 @@ class EdgeFilterOp(object): prop_value (str): Property to match against. levenshtein_distance (int): Maximum levenshtein distance between the specified prop_value and the result. prefix_match (bool): Enable prefix matching. - + Returns: filter.FilterExpr: """ @@ -290,7 +304,7 @@ class EdgeFilterOp(object): def is_in(self, values: list[str]) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is contained within the specified iterable of strings. - + Arguments: values (list[str]): @@ -301,7 +315,7 @@ class EdgeFilterOp(object): def is_not_in(self, values: list[str]) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is not contained within the provided iterable of strings. - + Arguments: values (list[str]): @@ -312,7 +326,7 @@ class EdgeFilterOp(object): def not_contains(self, value: str) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value does not contain the specified string. - + Arguments: value (str): @@ -320,22 +334,16 @@ class EdgeFilterOp(object): filter.FilterExpr: """ -class EdgeEndpoint(object): - - def name(self): - ... - -class Edge(object): +class EdgeEndpoint(object): + def name(self): ... +class Edge(object): @staticmethod - def dst(): - ... - + def dst(): ... @staticmethod - def src(): - ... + def src(): ... -class Property(PropertyFilterOps): +class Property(PropertyFilterOps): """ Construct a property filter @@ -346,10 +354,9 @@ class Property(PropertyFilterOps): def __new__(cls, name: str) -> Property: """Create and return a new object. See help(type) for accurate signature.""" - def temporal(self): - ... + def temporal(self): ... -class Metadata(PropertyFilterOps): +class Metadata(PropertyFilterOps): """ Construct a metadata filter @@ -360,10 +367,6 @@ class Metadata(PropertyFilterOps): def __new__(cls, name: str) -> Metadata: """Create and return a new object. See help(type) for accurate signature.""" -class TemporalPropertyFilterBuilder(object): - - def any(self): - ... - - def latest(self): - ... +class TemporalPropertyFilterBuilder(object): + def any(self): ... + def latest(self): ... diff --git a/python/python/raphtory/graph_gen/__init__.pyi b/python/python/raphtory/graph_gen/__init__.pyi index 3ec394b85c..3a9f849f05 100644 --- a/python/python/raphtory/graph_gen/__init__.pyi +++ b/python/python/raphtory/graph_gen/__init__.pyi @@ -1,6 +1,7 @@ """ Generate Raphtory graphs from attachment models """ + from __future__ import annotations ############################################################################### @@ -27,7 +28,8 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['random_attachment', 'ba_preferential_attachment'] +__all__ = ["random_attachment", "ba_preferential_attachment"] + def random_attachment(g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any = None): """ Generates a graph using the random attachment model @@ -46,7 +48,9 @@ def random_attachment(g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any None """ -def ba_preferential_attachment(g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any = None): +def ba_preferential_attachment( + g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any = None +): """ Generates a graph using the preferential attachment model. diff --git a/python/python/raphtory/graph_loader/__init__.pyi b/python/python/raphtory/graph_loader/__init__.pyi index 10ba033c37..e0b31f720f 100644 --- a/python/python/raphtory/graph_loader/__init__.pyi +++ b/python/python/raphtory/graph_loader/__init__.pyi @@ -1,6 +1,7 @@ """ Load and save Raphtory graphs from/to file(s) """ + from __future__ import annotations ############################################################################### @@ -27,7 +28,16 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['lotr_graph', 'lotr_graph_with_props', 'neo4j_movie_graph', 'stable_coin_graph', 'reddit_hyperlink_graph', 'reddit_hyperlink_graph_local', 'karate_club_graph'] +__all__ = [ + "lotr_graph", + "lotr_graph_with_props", + "neo4j_movie_graph", + "stable_coin_graph", + "reddit_hyperlink_graph", + "reddit_hyperlink_graph_local", + "karate_club_graph", +] + def lotr_graph() -> Graph: """ Load the Lord of the Rings dataset into a graph. @@ -56,7 +66,9 @@ def lotr_graph_with_props() -> Graph: Graph: """ -def neo4j_movie_graph(uri: str, username: str, password: str, database: str = ...) -> Graph: +def neo4j_movie_graph( + uri: str, username: str, password: str, database: str = ... +) -> Graph: """ Returns the neo4j movie graph example. diff --git a/python/python/raphtory/graphql/__init__.pyi b/python/python/raphtory/graphql/__init__.pyi index b8315a8395..7e93c483cc 100644 --- a/python/python/raphtory/graphql/__init__.pyi +++ b/python/python/raphtory/graphql/__init__.pyi @@ -23,8 +23,26 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['GraphServer', 'RunningGraphServer', 'RaphtoryClient', 'RemoteGraph', 'RemoteEdge', 'RemoteNode', 'RemoteNodeAddition', 'RemoteUpdate', 'RemoteEdgeAddition', 'RemoteIndexSpec', 'PropsInput', 'SomePropertySpec', 'AllPropertySpec', 'encode_graph', 'decode_graph', 'schema'] -class GraphServer(object): +__all__ = [ + "GraphServer", + "RunningGraphServer", + "RaphtoryClient", + "RemoteGraph", + "RemoteEdge", + "RemoteNode", + "RemoteNodeAddition", + "RemoteUpdate", + "RemoteEdgeAddition", + "RemoteIndexSpec", + "PropsInput", + "SomePropertySpec", + "AllPropertySpec", + "encode_graph", + "decode_graph", + "schema", +] + +class GraphServer(object): """ A class for defining and running a Raphtory GraphQL server @@ -43,7 +61,21 @@ class GraphServer(object): create_index: """ - def __new__(cls, work_dir: str | PathLike, cache_capacity: Optional[int] = None, cache_tti_seconds: Optional[int] = None, log_level: Optional[str] = None, tracing: Optional[bool] = None, otlp_agent_host: Optional[str] = None, otlp_agent_port: Optional[str] = None, otlp_tracing_service_name: Optional[str] = None, auth_public_key: Any = None, auth_enabled_for_reads: Any = None, config_path: Optional[str | PathLike] = None, create_index: Any = None) -> GraphServer: + def __new__( + cls, + work_dir: str | PathLike, + cache_capacity: Optional[int] = None, + cache_tti_seconds: Optional[int] = None, + log_level: Optional[str] = None, + tracing: Optional[bool] = None, + otlp_agent_host: Optional[str] = None, + otlp_agent_port: Optional[str] = None, + otlp_tracing_service_name: Optional[str] = None, + auth_public_key: Any = None, + auth_enabled_for_reads: Any = None, + config_path: Optional[str | PathLike] = None, + create_index: Any = None, + ) -> GraphServer: """Create and return a new object. See help(type) for accurate signature.""" def run(self, port: int = 1736, timeout_ms: int = 180000) -> None: @@ -58,7 +90,13 @@ class GraphServer(object): None: """ - def set_embeddings(self, cache: str, embedding: Optional[Callable] = None, nodes: bool | str = True, edges: bool | str = True) -> GraphServer: + def set_embeddings( + self, + cache: str, + embedding: Optional[Callable] = None, + nodes: bool | str = True, + edges: bool | str = True, + ) -> GraphServer: """ Setup the server to vectorise graphs with a default template. @@ -94,7 +132,9 @@ class GraphServer(object): GraphServer: The server with indexing disabled """ - def with_vectorised_graphs(self, graph_names: list[str], nodes: bool | str = True, edges: bool | str = True) -> GraphServer: + def with_vectorised_graphs( + self, graph_names: list[str], nodes: bool | str = True, edges: bool | str = True + ) -> GraphServer: """ Vectorise a subset of the graphs of the server. @@ -107,15 +147,11 @@ class GraphServer(object): GraphServer: A new server object containing the vectorised graphs. """ -class RunningGraphServer(object): +class RunningGraphServer(object): """A Raphtory server handler that also enables querying the server""" - def __enter__(self): - ... - - def __exit__(self, _exc_type, _exc_val, _exc_tb): - ... - + def __enter__(self): ... + def __exit__(self, _exc_type, _exc_val, _exc_tb): ... def get_client(self): """ Get the client for the server @@ -132,7 +168,7 @@ class RunningGraphServer(object): None: """ -class RaphtoryClient(object): +class RaphtoryClient(object): """ A client for handling GraphQL operations in the context of Raphtory. @@ -214,7 +250,9 @@ class RaphtoryClient(object): """ - def query(self, query: str, variables: Optional[dict[str, Any]] = None) -> dict[str, Any]: + def query( + self, query: str, variables: Optional[dict[str, Any]] = None + ) -> dict[str, Any]: """ Make a GraphQL query against the server. @@ -231,7 +269,7 @@ class RaphtoryClient(object): Receive graph from a path path on the server Note: - This downloads a copy of the graph. Modifications are not persistet to the server. + This downloads a copy of the graph. Modifications are not persisted to the server. Arguments: path (str): the path of the graph to be received @@ -252,7 +290,9 @@ class RaphtoryClient(object): """ - def send_graph(self, path: str, graph: Graph | PersistentGraph, overwrite: bool = False) -> dict[str, Any]: + def send_graph( + self, path: str, graph: Graph | PersistentGraph, overwrite: bool = False + ) -> dict[str, Any]: """ Send a graph to the server @@ -265,7 +305,9 @@ class RaphtoryClient(object): dict[str, Any]: The data field from the graphQL response after executing the mutation. """ - def upload_graph(self, path: str, file_path: str, overwrite: bool = False) -> dict[str, Any]: + def upload_graph( + self, path: str, file_path: str, overwrite: bool = False + ) -> dict[str, Any]: """ Upload graph file from a path file_path on the client @@ -278,9 +320,15 @@ class RaphtoryClient(object): dict[str, Any]: The data field from the graphQL response after executing the mutation. """ -class RemoteGraph(object): - - def add_edge(self, timestamp: int | str | datetime, src: str | int, dst: str | int, properties: Optional[dict] = None, layer: Optional[str] = None) -> RemoteEdge: +class RemoteGraph(object): + def add_edge( + self, + timestamp: int | str | datetime, + src: str | int, + dst: str | int, + properties: Optional[dict] = None, + layer: Optional[str] = None, + ) -> RemoteEdge: """ Adds a new edge with the given source and destination nodes and properties to the remote graph. @@ -317,7 +365,13 @@ class RemoteGraph(object): None: """ - def add_node(self, timestamp: int | str | datetime, id: str | int, properties: Optional[dict] = None, node_type: Optional[str] = None) -> RemoteNode: + def add_node( + self, + timestamp: int | str | datetime, + id: str | int, + properties: Optional[dict] = None, + node_type: Optional[str] = None, + ) -> RemoteNode: """ Adds a new node with the given id and properties to the remote graph. @@ -354,7 +408,13 @@ class RemoteGraph(object): None: """ - def create_node(self, timestamp: int | str | datetime, id: str | int, properties: Optional[dict] = None, node_type: Optional[str] = None) -> RemoteNode: + def create_node( + self, + timestamp: int | str | datetime, + id: str | int, + properties: Optional[dict] = None, + node_type: Optional[str] = None, + ) -> RemoteNode: """ Create a new node with the given id and properties to the remote graph and fail if the node already exists. @@ -368,7 +428,13 @@ class RemoteGraph(object): RemoteNode: the new remote node """ - def delete_edge(self, timestamp: int, src: str | int, dst: str | int, layer: Optional[str] = None) -> RemoteEdge: + def delete_edge( + self, + timestamp: int, + src: str | int, + dst: str | int, + layer: Optional[str] = None, + ) -> RemoteEdge: """ Deletes an edge in the remote graph, given the timestamp, src and dst nodes and layer (optional) @@ -416,7 +482,7 @@ class RemoteGraph(object): None: """ -class RemoteEdge(object): +class RemoteEdge(object): """ A remote edge reference @@ -425,7 +491,9 @@ class RemoteEdge(object): and [RemoteGraph.delete_edge][raphtory.graphql.RemoteGraph.delete_edge]. """ - def add_metadata(self, properties: dict[str, PropValue], layer: Optional[str] = None) -> None: + def add_metadata( + self, properties: dict[str, PropValue], layer: Optional[str] = None + ) -> None: """ Add metadata to the edge within the remote graph. This function is used to add metadata to an edge that does not @@ -439,7 +507,12 @@ class RemoteEdge(object): None: """ - def add_updates(self, t: int | str | datetime, properties: Optional[dict[str, PropValue]] = None, layer: Optional[str] = None) -> None: + def add_updates( + self, + t: int | str | datetime, + properties: Optional[dict[str, PropValue]] = None, + layer: Optional[str] = None, + ) -> None: """ Add updates to an edge in the remote graph at a specified time. @@ -470,7 +543,9 @@ class RemoteEdge(object): GraphError: If the operation fails. """ - def update_metadata(self, properties: dict[str, PropValue], layer: Optional[str] = None) -> None: + def update_metadata( + self, properties: dict[str, PropValue], layer: Optional[str] = None + ) -> None: """ Update metadata of an edge in the remote graph overwriting existing values. This function is used to add properties to an edge that does not @@ -484,8 +559,7 @@ class RemoteEdge(object): None: """ -class RemoteNode(object): - +class RemoteNode(object): def add_metadata(self, properties: dict[str, PropValue]) -> None: """ Add metadata to a node in the remote graph. @@ -499,7 +573,9 @@ class RemoteNode(object): None: """ - def add_updates(self, t: int | str | datetime, properties: Optional[dict[str, PropValue]] = None) -> None: + def add_updates( + self, t: int | str | datetime, properties: Optional[dict[str, PropValue]] = None + ) -> None: """ Add updates to a node in the remote graph at a specified time. This function allows for the addition of property updates to a node within the graph. The updates are time-stamped, meaning they are applied at the specified time. @@ -537,7 +613,7 @@ class RemoteNode(object): None: """ -class RemoteNodeAddition(object): +class RemoteNodeAddition(object): """ Node addition update @@ -548,10 +624,16 @@ class RemoteNodeAddition(object): updates (list[RemoteUpdate], optional): the temporal updates """ - def __new__(cls, name: GID, node_type: Optional[str] = None, metadata: Optional[PropInput] = None, updates: Optional[list[RemoteUpdate]] = None) -> RemoteNodeAddition: + def __new__( + cls, + name: GID, + node_type: Optional[str] = None, + metadata: Optional[PropInput] = None, + updates: Optional[list[RemoteUpdate]] = None, + ) -> RemoteNodeAddition: """Create and return a new object. See help(type) for accurate signature.""" -class RemoteUpdate(object): +class RemoteUpdate(object): """ A temporal update @@ -560,10 +642,12 @@ class RemoteUpdate(object): properties (PropInput, optional): the properties for the update """ - def __new__(cls, time: TimeInput, properties: Optional[PropInput] = None) -> RemoteUpdate: + def __new__( + cls, time: TimeInput, properties: Optional[PropInput] = None + ) -> RemoteUpdate: """Create and return a new object. See help(type) for accurate signature.""" -class RemoteEdgeAddition(object): +class RemoteEdgeAddition(object): """ An edge update @@ -575,10 +659,17 @@ class RemoteEdgeAddition(object): updates (list[RemoteUpdate], optional): the temporal updates for the edge """ - def __new__(cls, src: GID, dst: GID, layer: Optional[str] = None, metadata: Optional[PropInput] = None, updates: Optional[list[RemoteUpdate]] = None) -> RemoteEdgeAddition: + def __new__( + cls, + src: GID, + dst: GID, + layer: Optional[str] = None, + metadata: Optional[PropInput] = None, + updates: Optional[list[RemoteUpdate]] = None, + ) -> RemoteEdgeAddition: """Create and return a new object. See help(type) for accurate signature.""" -class RemoteIndexSpec(object): +class RemoteIndexSpec(object): """ Create a RemoteIndexSpec specifying which node and edge properties to index. @@ -590,7 +681,7 @@ class RemoteIndexSpec(object): def __new__(cls, node_props: PropsInput, edge_props: PropsInput) -> RemoteIndexSpec: """Create and return a new object. See help(type) for accurate signature.""" -class PropsInput(object): +class PropsInput(object): """ Create a PropsInput by choosing to include all/some properties explicitly. @@ -602,10 +693,14 @@ class PropsInput(object): ValueError: If neither all and some are specified. """ - def __new__(cls, all: Optional[AllPropertySpec] = None, some: Optional[SomePropertySpec] = None) -> PropsInput: + def __new__( + cls, + all: Optional[AllPropertySpec] = None, + some: Optional[SomePropertySpec] = None, + ) -> PropsInput: """Create and return a new object. See help(type) for accurate signature.""" -class SomePropertySpec(object): +class SomePropertySpec(object): """ Create a SomePropertySpec by explicitly listing metadata and/or temporal property names. @@ -614,10 +709,12 @@ class SomePropertySpec(object): properties (list[str]): Temporal property names. Defaults to []. """ - def __new__(cls, metadata: list[str] = [], properties: list[str] = []) -> SomePropertySpec: + def __new__( + cls, metadata: list[str] = [], properties: list[str] = [] + ) -> SomePropertySpec: """Create and return a new object. See help(type) for accurate signature.""" -class AllPropertySpec(object): +class AllPropertySpec(object): """ Specifies that **all** properties should be included when creating an index. Use one of the predefined variants: ALL , ALL_METADATA , or ALL_TEMPORAL . diff --git a/python/python/raphtory/iterables/__init__.pyi b/python/python/raphtory/iterables/__init__.pyi index ec2c4d6ee9..2a80bbc5cb 100644 --- a/python/python/raphtory/iterables/__init__.pyi +++ b/python/python/raphtory/iterables/__init__.pyi @@ -23,9 +23,33 @@ from os import PathLike import networkx as nx # type: ignore import pyvis # type: ignore -__all__ = ['NestedUtcDateTimeIterable', 'NestedGIDIterable', 'GIDIterable', 'StringIterable', 'OptionArcStringIterable', 'UsizeIterable', 'OptionI64Iterable', 'NestedOptionArcStringIterable', 'NestedStringIterable', 'NestedOptionI64Iterable', 'NestedI64VecIterable', 'NestedUsizeIterable', 'BoolIterable', 'ArcStringIterable', 'NestedVecUtcDateTimeIterable', 'OptionVecUtcDateTimeIterable', 'GIDGIDIterable', 'NestedGIDGIDIterable', 'NestedBoolIterable', 'U64Iterable', 'OptionUtcDateTimeIterable', 'ArcStringVecIterable', 'NestedArcStringVecIterable'] -class NestedUtcDateTimeIterable(object): - +__all__ = [ + "NestedUtcDateTimeIterable", + "NestedGIDIterable", + "GIDIterable", + "StringIterable", + "OptionArcStringIterable", + "UsizeIterable", + "OptionI64Iterable", + "NestedOptionArcStringIterable", + "NestedStringIterable", + "NestedOptionI64Iterable", + "NestedI64VecIterable", + "NestedUsizeIterable", + "BoolIterable", + "ArcStringIterable", + "NestedVecUtcDateTimeIterable", + "OptionVecUtcDateTimeIterable", + "GIDGIDIterable", + "NestedGIDGIDIterable", + "NestedBoolIterable", + "U64Iterable", + "OptionUtcDateTimeIterable", + "ArcStringVecIterable", + "NestedArcStringVecIterable", +] + +class NestedUtcDateTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -53,11 +77,9 @@ class NestedUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class NestedGIDIterable(object): + def collect(self): ... +class NestedGIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -85,17 +107,11 @@ class NestedGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def min(self): - ... - -class GIDIterable(object): + def collect(self): ... + def max(self): ... + def min(self): ... +class GIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -123,17 +139,11 @@ class GIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def min(self): - ... - -class StringIterable(object): + def collect(self): ... + def max(self): ... + def min(self): ... +class StringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -161,11 +171,9 @@ class StringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class OptionArcStringIterable(object): + def collect(self): ... +class OptionArcStringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -193,11 +201,9 @@ class OptionArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class UsizeIterable(object): + def collect(self): ... +class UsizeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -225,23 +231,13 @@ class UsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def mean(self): - ... - - def min(self): - ... - - def sum(self): - ... - -class OptionI64Iterable(object): + def collect(self): ... + def max(self): ... + def mean(self): ... + def min(self): ... + def sum(self): ... +class OptionI64Iterable(object): def __eq__(self, value): """Return self==value.""" @@ -269,17 +265,11 @@ class OptionI64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def min(self): - ... - -class NestedOptionArcStringIterable(object): + def collect(self): ... + def max(self): ... + def min(self): ... +class NestedOptionArcStringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -307,11 +297,9 @@ class NestedOptionArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class NestedStringIterable(object): + def collect(self): ... +class NestedStringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -339,11 +327,9 @@ class NestedStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class NestedOptionI64Iterable(object): + def collect(self): ... +class NestedOptionI64Iterable(object): def __eq__(self, value): """Return self==value.""" @@ -371,17 +357,11 @@ class NestedOptionI64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def min(self): - ... - -class NestedI64VecIterable(object): + def collect(self): ... + def max(self): ... + def min(self): ... +class NestedI64VecIterable(object): def __eq__(self, value): """Return self==value.""" @@ -409,11 +389,9 @@ class NestedI64VecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class NestedUsizeIterable(object): + def collect(self): ... +class NestedUsizeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -441,23 +419,13 @@ class NestedUsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def mean(self): - ... - - def min(self): - ... - - def sum(self): - ... - -class BoolIterable(object): + def collect(self): ... + def max(self): ... + def mean(self): ... + def min(self): ... + def sum(self): ... +class BoolIterable(object): def __eq__(self, value): """Return self==value.""" @@ -485,11 +453,9 @@ class BoolIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class ArcStringIterable(object): + def collect(self): ... +class ArcStringIterable(object): def __iter__(self): """Implement iter(self).""" @@ -499,11 +465,9 @@ class ArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class NestedVecUtcDateTimeIterable(object): + def collect(self): ... +class NestedVecUtcDateTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -531,11 +495,9 @@ class NestedVecUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class OptionVecUtcDateTimeIterable(object): + def collect(self): ... +class OptionVecUtcDateTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -563,11 +525,9 @@ class OptionVecUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class GIDGIDIterable(object): + def collect(self): ... +class GIDGIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -595,17 +555,11 @@ class GIDGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def min(self): - ... - -class NestedGIDGIDIterable(object): + def collect(self): ... + def max(self): ... + def min(self): ... +class NestedGIDGIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -633,17 +587,11 @@ class NestedGIDGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def min(self): - ... - -class NestedBoolIterable(object): + def collect(self): ... + def max(self): ... + def min(self): ... +class NestedBoolIterable(object): def __eq__(self, value): """Return self==value.""" @@ -671,11 +619,9 @@ class NestedBoolIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class U64Iterable(object): + def collect(self): ... +class U64Iterable(object): def __eq__(self, value): """Return self==value.""" @@ -703,23 +649,13 @@ class U64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - - def max(self): - ... - - def mean(self): - ... - - def min(self): - ... - - def sum(self): - ... - -class OptionUtcDateTimeIterable(object): + def collect(self): ... + def max(self): ... + def mean(self): ... + def min(self): ... + def sum(self): ... +class OptionUtcDateTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -747,11 +683,9 @@ class OptionUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class ArcStringVecIterable(object): + def collect(self): ... +class ArcStringVecIterable(object): def __eq__(self, value): """Return self==value.""" @@ -779,11 +713,9 @@ class ArcStringVecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... - -class NestedArcStringVecIterable(object): + def collect(self): ... +class NestedArcStringVecIterable(object): def __eq__(self, value): """Return self==value.""" @@ -811,5 +743,4 @@ class NestedArcStringVecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): - ... + def collect(self): ... diff --git a/python/python/raphtory/node_state/__init__.pyi b/python/python/raphtory/node_state/__init__.pyi index 469a550b2e..456f7240dd 100644 --- a/python/python/raphtory/node_state/__init__.pyi +++ b/python/python/raphtory/node_state/__init__.pyi @@ -23,9 +23,42 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['NodeGroups', 'DegreeView', 'NodeStateUsize', 'NodeStateU64', 'NodeStateOptionI64', 'IdView', 'NodeStateGID', 'EarliestTimeView', 'LatestTimeView', 'NameView', 'NodeStateString', 'EarliestDateTimeView', 'LatestDateTimeView', 'NodeStateOptionDateTime', 'HistoryView', 'EdgeHistoryCountView', 'NodeStateListI64', 'HistoryDateTimeView', 'NodeStateOptionListDateTime', 'NodeTypeView', 'NodeStateOptionStr', 'NodeStateListDateTime', 'NodeStateWeightedSP', 'NodeStateF64', 'NodeStateNodes', 'NodeStateReachability', 'NodeStateListF64', 'NodeStateMotifs', 'NodeStateHits', 'NodeStateSEIR', 'NodeLayout', 'NodeStateF64String'] -class NodeGroups(object): - +__all__ = [ + "NodeGroups", + "DegreeView", + "NodeStateUsize", + "NodeStateU64", + "NodeStateOptionI64", + "IdView", + "NodeStateGID", + "EarliestTimeView", + "LatestTimeView", + "NameView", + "NodeStateString", + "EarliestDateTimeView", + "LatestDateTimeView", + "NodeStateOptionDateTime", + "HistoryView", + "EdgeHistoryCountView", + "NodeStateListI64", + "HistoryDateTimeView", + "NodeStateOptionListDateTime", + "NodeTypeView", + "NodeStateOptionStr", + "NodeStateListDateTime", + "NodeStateWeightedSP", + "NodeStateF64", + "NodeStateNodes", + "NodeStateReachability", + "NodeStateListF64", + "NodeStateMotifs", + "NodeStateHits", + "NodeStateSEIR", + "NodeLayout", + "NodeStateF64String", +] + +class NodeGroups(object): def __bool__(self): """True if self else False""" @@ -68,7 +101,7 @@ class NodeGroups(object): Iterator[Tuple[Any, GraphView]]: Iterator over subgraphs with corresponding value """ -class DegreeView(object): +class DegreeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -230,7 +263,9 @@ class DegreeView(object): DegreeView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -385,7 +420,12 @@ class DegreeView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -574,8 +614,7 @@ class DegreeView(object): Optional[int]: """ -class NodeStateUsize(object): - +class NodeStateUsize(object): def __eq__(self, value): """Return self==value.""" @@ -768,8 +807,7 @@ class NodeStateUsize(object): Iterator[int]: Iterator over values """ -class NodeStateU64(object): - +class NodeStateU64(object): def __eq__(self, value): """Return self==value.""" @@ -954,8 +992,7 @@ class NodeStateU64(object): Iterator[int]: Iterator over values """ -class NodeStateOptionI64(object): - +class NodeStateOptionI64(object): def __eq__(self, value): """Return self==value.""" @@ -997,7 +1034,9 @@ class NodeStateOptionI64(object): NodeStateOptionI64: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: + def get( + self, node: NodeInput, default: Optional[Optional[int]] = None + ) -> Optional[Optional[int]]: """ Get value for node @@ -1131,7 +1170,7 @@ class NodeStateOptionI64(object): Iterator[Optional[int]]: Iterator over values """ -class IdView(object): +class IdView(object): """A lazy view over node values""" def __eq__(self, value): @@ -1317,8 +1356,7 @@ class IdView(object): Iterator[GID]: Iterator over values """ -class NodeStateGID(object): - +class NodeStateGID(object): def __eq__(self, value): """Return self==value.""" @@ -1486,7 +1524,7 @@ class NodeStateGID(object): Iterator[GID]: Iterator over values """ -class EarliestTimeView(object): +class EarliestTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -1648,7 +1686,9 @@ class EarliestTimeView(object): EarliestTimeView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -1667,7 +1707,9 @@ class EarliestTimeView(object): WindowSet: A `WindowSet` object. """ - def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: + def get( + self, node: NodeInput, default: Optional[Optional[int]] = None + ) -> Optional[Optional[int]]: """ Get value for node @@ -1795,7 +1837,12 @@ class EarliestTimeView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -1975,7 +2022,7 @@ class EarliestTimeView(object): Optional[int]: """ -class LatestTimeView(object): +class LatestTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -2137,7 +2184,9 @@ class LatestTimeView(object): LatestTimeView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2156,7 +2205,9 @@ class LatestTimeView(object): WindowSet: A `WindowSet` object. """ - def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: + def get( + self, node: NodeInput, default: Optional[Optional[int]] = None + ) -> Optional[Optional[int]]: """ Get value for node @@ -2284,7 +2335,12 @@ class LatestTimeView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -2464,7 +2520,7 @@ class LatestTimeView(object): Optional[int]: """ -class NameView(object): +class NameView(object): """A lazy view over node values""" def __eq__(self, value): @@ -2658,8 +2714,7 @@ class NameView(object): Iterator[str]: Iterator over values """ -class NodeStateString(object): - +class NodeStateString(object): def __eq__(self, value): """Return self==value.""" @@ -2835,7 +2890,7 @@ class NodeStateString(object): Iterator[str]: Iterator over values """ -class EarliestDateTimeView(object): +class EarliestDateTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -2997,7 +3052,9 @@ class EarliestDateTimeView(object): EarliestDateTimeView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3016,7 +3073,9 @@ class EarliestDateTimeView(object): WindowSet: A `WindowSet` object. """ - def get(self, node: NodeInput, default: Optional[Optional[datetime]] = None) -> Optional[Optional[datetime]]: + def get( + self, node: NodeInput, default: Optional[Optional[datetime]] = None + ) -> Optional[Optional[datetime]]: """ Get value for node @@ -3144,7 +3203,12 @@ class EarliestDateTimeView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3324,7 +3388,7 @@ class EarliestDateTimeView(object): Optional[int]: """ -class LatestDateTimeView(object): +class LatestDateTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -3486,7 +3550,9 @@ class LatestDateTimeView(object): LatestDateTimeView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3505,7 +3571,9 @@ class LatestDateTimeView(object): WindowSet: A `WindowSet` object. """ - def get(self, node: NodeInput, default: Optional[Optional[datetime]] = None) -> Optional[Optional[datetime]]: + def get( + self, node: NodeInput, default: Optional[Optional[datetime]] = None + ) -> Optional[Optional[datetime]]: """ Get value for node @@ -3633,7 +3701,12 @@ class LatestDateTimeView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3813,8 +3886,7 @@ class LatestDateTimeView(object): Optional[int]: """ -class NodeStateOptionDateTime(object): - +class NodeStateOptionDateTime(object): def __eq__(self, value): """Return self==value.""" @@ -3856,7 +3928,9 @@ class NodeStateOptionDateTime(object): NodeStateOptionDateTime: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[Optional[datetime]] = None) -> Optional[Optional[datetime]]: + def get( + self, node: NodeInput, default: Optional[Optional[datetime]] = None + ) -> Optional[Optional[datetime]]: """ Get value for node @@ -3990,7 +4064,7 @@ class NodeStateOptionDateTime(object): Iterator[Optional[datetime]]: Iterator over values """ -class HistoryView(object): +class HistoryView(object): """A lazy view over node values""" def __eq__(self, value): @@ -4152,7 +4226,9 @@ class HistoryView(object): HistoryView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4171,7 +4247,9 @@ class HistoryView(object): WindowSet: A `WindowSet` object. """ - def get(self, node: NodeInput, default: Optional[list[int]] = None) -> Optional[list[int]]: + def get( + self, node: NodeInput, default: Optional[list[int]] = None + ) -> Optional[list[int]]: """ Get value for node @@ -4291,7 +4369,12 @@ class HistoryView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -4471,7 +4554,7 @@ class HistoryView(object): Optional[int]: """ -class EdgeHistoryCountView(object): +class EdgeHistoryCountView(object): """A lazy view over node values""" def __eq__(self, value): @@ -4633,7 +4716,9 @@ class EdgeHistoryCountView(object): EdgeHistoryCountView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4780,7 +4865,12 @@ class EdgeHistoryCountView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -4969,8 +5059,7 @@ class EdgeHistoryCountView(object): Optional[int]: """ -class NodeStateListI64(object): - +class NodeStateListI64(object): def __eq__(self, value): """Return self==value.""" @@ -5012,7 +5101,9 @@ class NodeStateListI64(object): NodeStateListI64: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[list[int]] = None) -> Optional[list[int]]: + def get( + self, node: NodeInput, default: Optional[list[int]] = None + ) -> Optional[list[int]]: """ Get value for node @@ -5138,7 +5229,7 @@ class NodeStateListI64(object): Iterator[list[int]]: Iterator over values """ -class HistoryDateTimeView(object): +class HistoryDateTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -5300,7 +5391,9 @@ class HistoryDateTimeView(object): HistoryDateTimeView: The layered view """ - def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: + def expanding( + self, step: int | str, alignment_unit: str | None = None + ) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -5319,7 +5412,9 @@ class HistoryDateTimeView(object): WindowSet: A `WindowSet` object. """ - def get(self, node: NodeInput, default: Optional[Optional[list[datetime]]] = None) -> Optional[Optional[list[datetime]]]: + def get( + self, node: NodeInput, default: Optional[Optional[list[datetime]]] = None + ) -> Optional[Optional[list[datetime]]]: """ Get value for node @@ -5439,7 +5534,12 @@ class HistoryDateTimeView(object): Nodes: The nodes """ - def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: + def rolling( + self, + window: int | str, + step: int | str | None = None, + alignment_unit: str | None = None, + ) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -5619,8 +5719,7 @@ class HistoryDateTimeView(object): Optional[int]: """ -class NodeStateOptionListDateTime(object): - +class NodeStateOptionListDateTime(object): def __eq__(self, value): """Return self==value.""" @@ -5662,7 +5761,9 @@ class NodeStateOptionListDateTime(object): NodeStateOptionListDateTime: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[Optional[list[datetime]]] = None) -> Optional[Optional[list[datetime]]]: + def get( + self, node: NodeInput, default: Optional[Optional[list[datetime]]] = None + ) -> Optional[Optional[list[datetime]]]: """ Get value for node @@ -5788,7 +5889,7 @@ class NodeStateOptionListDateTime(object): Iterator[Optional[list[datetime]]]: Iterator over values """ -class NodeTypeView(object): +class NodeTypeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -5848,7 +5949,9 @@ class NodeTypeView(object): NodeStateOptionStr: the computed `NodeState` """ - def get(self, node: NodeInput, default: Optional[Optional[str]] = None) -> Optional[Optional[str]]: + def get( + self, node: NodeInput, default: Optional[Optional[str]] = None + ) -> Optional[Optional[str]]: """ Get value for node @@ -5982,8 +6085,7 @@ class NodeTypeView(object): Iterator[Optional[str]]: Iterator over values """ -class NodeStateOptionStr(object): - +class NodeStateOptionStr(object): def __eq__(self, value): """Return self==value.""" @@ -6025,7 +6127,9 @@ class NodeStateOptionStr(object): NodeStateOptionStr: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[Optional[str]] = None) -> Optional[Optional[str]]: + def get( + self, node: NodeInput, default: Optional[Optional[str]] = None + ) -> Optional[Optional[str]]: """ Get value for node @@ -6159,8 +6263,7 @@ class NodeStateOptionStr(object): Iterator[Optional[str]]: Iterator over values """ -class NodeStateListDateTime(object): - +class NodeStateListDateTime(object): def __eq__(self, value): """Return self==value.""" @@ -6202,7 +6305,9 @@ class NodeStateListDateTime(object): NodeStateListDateTime: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[list[datetime]] = None) -> Optional[list[datetime]]: + def get( + self, node: NodeInput, default: Optional[list[datetime]] = None + ) -> Optional[list[datetime]]: """ Get value for node @@ -6328,8 +6433,7 @@ class NodeStateListDateTime(object): Iterator[list[datetime]]: Iterator over values """ -class NodeStateWeightedSP(object): - +class NodeStateWeightedSP(object): def __eq__(self, value): """Return self==value.""" @@ -6360,7 +6464,9 @@ class NodeStateWeightedSP(object): def __repr__(self): """Return repr(self).""" - def get(self, node: NodeInput, default: Optional[Tuple[float, Nodes]] = None) -> Optional[Tuple[float, Nodes]]: + def get( + self, node: NodeInput, default: Optional[Tuple[float, Nodes]] = None + ) -> Optional[Tuple[float, Nodes]]: """ Get value for node @@ -6415,8 +6521,7 @@ class NodeStateWeightedSP(object): Iterator[Tuple[float, Nodes]]: Iterator over values """ -class NodeStateF64(object): - +class NodeStateF64(object): def __eq__(self, value): """Return self==value.""" @@ -6601,8 +6706,7 @@ class NodeStateF64(object): Iterator[float]: Iterator over values """ -class NodeStateNodes(object): - +class NodeStateNodes(object): def __eq__(self, value): """Return self==value.""" @@ -6688,8 +6792,7 @@ class NodeStateNodes(object): Iterator[Nodes]: Iterator over values """ -class NodeStateReachability(object): - +class NodeStateReachability(object): def __eq__(self, value): """Return self==value.""" @@ -6720,7 +6823,9 @@ class NodeStateReachability(object): def __repr__(self): """Return repr(self).""" - def get(self, node: NodeInput, default: Optional[list[Tuple[int, str]]] = None) -> Optional[list[Tuple[int, str]]]: + def get( + self, node: NodeInput, default: Optional[list[Tuple[int, str]]] = None + ) -> Optional[list[Tuple[int, str]]]: """ Get value for node @@ -6775,8 +6880,7 @@ class NodeStateReachability(object): Iterator[list[Tuple[int, str]]]: Iterator over values """ -class NodeStateListF64(object): - +class NodeStateListF64(object): def __eq__(self, value): """Return self==value.""" @@ -6807,7 +6911,9 @@ class NodeStateListF64(object): def __repr__(self): """Return repr(self).""" - def get(self, node: NodeInput, default: Optional[list[float]] = None) -> Optional[list[float]]: + def get( + self, node: NodeInput, default: Optional[list[float]] = None + ) -> Optional[list[float]]: """ Get value for node @@ -6862,8 +6968,7 @@ class NodeStateListF64(object): Iterator[list[float]]: Iterator over values """ -class NodeStateMotifs(object): - +class NodeStateMotifs(object): def __eq__(self, value): """Return self==value.""" @@ -6905,7 +7010,9 @@ class NodeStateMotifs(object): NodeStateMotifs: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[list[int]] = None) -> Optional[list[int]]: + def get( + self, node: NodeInput, default: Optional[list[int]] = None + ) -> Optional[list[int]]: """ Get value for node @@ -7031,8 +7138,7 @@ class NodeStateMotifs(object): Iterator[list[int]]: Iterator over values """ -class NodeStateHits(object): - +class NodeStateHits(object): def __eq__(self, value): """Return self==value.""" @@ -7074,7 +7180,9 @@ class NodeStateHits(object): NodeStateHits: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[Tuple[float, float]] = None) -> Optional[Tuple[float, float]]: + def get( + self, node: NodeInput, default: Optional[Tuple[float, float]] = None + ) -> Optional[Tuple[float, float]]: """ Get value for node @@ -7200,8 +7308,7 @@ class NodeStateHits(object): Iterator[Tuple[float, float]]: Iterator over values """ -class NodeStateSEIR(object): - +class NodeStateSEIR(object): def __eq__(self, value): """Return self==value.""" @@ -7243,7 +7350,9 @@ class NodeStateSEIR(object): NodeStateSEIR: The k smallest values as a node state """ - def get(self, node: NodeInput, default: Optional[Infected] = None) -> Optional[Infected]: + def get( + self, node: NodeInput, default: Optional[Infected] = None + ) -> Optional[Infected]: """ Get value for node @@ -7369,8 +7478,7 @@ class NodeStateSEIR(object): Iterator[Infected]: Iterator over values """ -class NodeLayout(object): - +class NodeLayout(object): def __eq__(self, value): """Return self==value.""" @@ -7401,7 +7509,9 @@ class NodeLayout(object): def __repr__(self): """Return repr(self).""" - def get(self, node: NodeInput, default: Optional[list[float]] = None) -> Optional[list[float]]: + def get( + self, node: NodeInput, default: Optional[list[float]] = None + ) -> Optional[list[float]]: """ Get value for node @@ -7456,8 +7566,7 @@ class NodeLayout(object): Iterator[list[float]]: Iterator over values """ -class NodeStateF64String(object): - +class NodeStateF64String(object): def __eq__(self, value): """Return self==value.""" @@ -7488,7 +7597,9 @@ class NodeStateF64String(object): def __repr__(self): """Return repr(self).""" - def get(self, node: NodeInput, default: Optional[Tuple[float, str]] = None) -> Optional[Tuple[float, str]]: + def get( + self, node: NodeInput, default: Optional[Tuple[float, str]] = None + ) -> Optional[Tuple[float, str]]: """ Get value for node diff --git a/python/python/raphtory/vectors/__init__.pyi b/python/python/raphtory/vectors/__init__.pyi index bd615cda2f..6b9e515fac 100644 --- a/python/python/raphtory/vectors/__init__.pyi +++ b/python/python/raphtory/vectors/__init__.pyi @@ -23,10 +23,15 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['VectorisedGraph', 'Document', 'Embedding', 'VectorSelection'] -class VectorisedGraph(object): +__all__ = ["VectorisedGraph", "Document", "Embedding", "VectorSelection"] - def edges_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: +class VectorisedGraph(object): + def edges_by_similarity( + self, + query: str | list, + limit: int, + window: Optional[Tuple[int | str, int | str]] = None, + ) -> VectorSelection: """ Search the top scoring edges according to `query` with no more than `limit` edges @@ -42,7 +47,12 @@ class VectorisedGraph(object): def empty_selection(self): """Return an empty selection of documents""" - def entities_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: + def entities_by_similarity( + self, + query: str | list, + limit: int, + window: Optional[Tuple[int | str, int | str]] = None, + ) -> VectorSelection: """ Search the top scoring entities according to `query` with no more than `limit` entities @@ -55,7 +65,12 @@ class VectorisedGraph(object): VectorSelection: The vector selection resulting from the search """ - def nodes_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: + def nodes_by_similarity( + self, + query: str | list, + limit: int, + window: Optional[Tuple[int | str, int | str]] = None, + ) -> VectorSelection: """ Search the top scoring nodes according to `query` with no more than `limit` nodes @@ -68,7 +83,7 @@ class VectorisedGraph(object): VectorSelection: The vector selection resulting from the search """ -class Document(object): +class Document(object): """ A Document @@ -109,13 +124,11 @@ class Document(object): Optional[Any]: """ -class Embedding(object): - +class Embedding(object): def __repr__(self): """Return repr(self).""" -class VectorSelection(object): - +class VectorSelection(object): def add_edges(self, edges: list) -> None: """ Add all the documents associated with the `edges` to the current selection @@ -161,7 +174,9 @@ class VectorSelection(object): list[Edge]: list of edges in the current selection """ - def expand(self, hops: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: + def expand( + self, hops: int, window: Optional[Tuple[int | str, int | str]] = None + ) -> None: """ Add all the documents `hops` hops away to the selection @@ -178,7 +193,12 @@ class VectorSelection(object): None: """ - def expand_edges_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: + def expand_edges_by_similarity( + self, + query: str | list, + limit: int, + window: Optional[Tuple[int | str, int | str]] = None, + ) -> None: """ Add the top `limit` adjacent edges with higher score for `query` to the selection @@ -193,7 +213,12 @@ class VectorSelection(object): None: """ - def expand_entities_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: + def expand_entities_by_similarity( + self, + query: str | list, + limit: int, + window: Optional[Tuple[int | str, int | str]] = None, + ) -> None: """ Add the top `limit` adjacent entities with higher score for `query` to the selection @@ -215,7 +240,12 @@ class VectorSelection(object): None: """ - def expand_nodes_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: + def expand_nodes_by_similarity( + self, + query: str | list, + limit: int, + window: Optional[Tuple[int | str, int | str]] = None, + ) -> None: """ Add the top `limit` adjacent nodes with higher score for `query` to the selection diff --git a/python/tests/test_base_install/test_graphdb/test_graphdb.py b/python/tests/test_base_install/test_graphdb/test_graphdb.py index 59b843676c..a0470cc96d 100644 --- a/python/tests/test_base_install/test_graphdb/test_graphdb.py +++ b/python/tests/test_base_install/test_graphdb/test_graphdb.py @@ -1203,7 +1203,7 @@ def test_save_missing_dir(): g = create_graph() tmpdirname = tempfile.TemporaryDirectory() inner_folder = "".join(random.choice(string.ascii_letters) for _ in range(10)) - graph_path = tmpdirname.name + "/" + inner_folder + "/test_graph.bin" + graph_path = tmpdirname.name + "/" + inner_folder + "/test_graph" with pytest.raises(Exception): g.save_to_file(graph_path) diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py index 858dd15f30..64abdc470e 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_archive_graph.py @@ -21,7 +21,7 @@ def test_archive_graph_fails_if_graph_not_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) def test_archive_graph_fails_if_graph_not_found_at_namespace(): @@ -38,7 +38,7 @@ def test_archive_graph_fails_if_graph_not_found_at_namespace(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g1' does not exist" in str(excinfo.value) def test_archive_graph_succeeds(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py index 3d72683421..734e08cce9 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_copy_graph.py @@ -20,7 +20,7 @@ def test_copy_graph_fails_if_graph_not_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'ben/g5' does not exist" in str(excinfo.value) def test_copy_graph_fails_if_graph_with_same_name_already_exists(): @@ -45,7 +45,7 @@ def test_copy_graph_fails_if_graph_with_same_name_already_exists(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'g6' already exists" in str(excinfo.value) def test_copy_graph_fails_if_graph_with_same_name_already_exists_at_same_namespace_as_graph(): @@ -70,7 +70,7 @@ def test_copy_graph_fails_if_graph_with_same_name_already_exists_at_same_namespa }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'ben/g6' already exists" in str(excinfo.value) def test_copy_graph_fails_if_graph_with_same_name_already_exists_at_diff_namespace_as_graph(): @@ -96,7 +96,7 @@ def test_copy_graph_fails_if_graph_with_same_name_already_exists_at_diff_namespa }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'shivam/g6' already exists" in str(excinfo.value) def test_copy_graph_succeeds(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_delete_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_delete_graph.py index 29b7a1d2b1..768aa23b61 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_delete_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_delete_graph.py @@ -18,7 +18,7 @@ def test_delete_graph_fails_if_graph_not_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'ben/g5' does not exist" in str(excinfo.value) def test_delete_graph_succeeds_if_graph_found(): @@ -43,7 +43,7 @@ def test_delete_graph_succeeds_if_graph_found(): query = """{graph(path: "g1") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) def test_delete_graph_using_client_api_succeeds_if_graph_found(): @@ -62,7 +62,7 @@ def test_delete_graph_using_client_api_succeeds_if_graph_found(): query = """{graph(path: "g1") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) def test_delete_graph_succeeds_if_graph_found_at_namespace(): @@ -87,4 +87,4 @@ def test_delete_graph_succeeds_if_graph_found_at_namespace(): query = """{graph(path: "g1") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_get_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_get_graph.py index 740278d623..6f22bc0928 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_get_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_get_graph.py @@ -16,7 +16,7 @@ def test_get_graph_fails_if_graph_not_found(): query = """{ graph(path: "g1") { name, path, nodes { list { name } } } }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g1' does not exist" in str(excinfo.value) def test_get_graph_fails_if_graph_not_found_at_namespace(): @@ -29,7 +29,7 @@ def test_get_graph_fails_if_graph_not_found_at_namespace(): ) with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g1' does not exist" in str(excinfo.value) def test_get_graph_succeeds_if_graph_found(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_graphql.py b/python/tests/test_base_install/test_graphql/edit_graph/test_graphql.py index 2af6156ae8..da015845d3 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_graphql.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_graphql.py @@ -47,8 +47,8 @@ def test_wrong_url(): with pytest.raises(Exception) as excinfo: client = RaphtoryClient("http://broken_url.com") assert ( - str(excinfo.value) - == "Could not connect to the given server - no response --error sending request for url (http://broken_url.com/)" + str(excinfo.value) + == "Could not connect to the given server - no response --error sending request for url (http://broken_url.com/)" ) @@ -156,39 +156,40 @@ def assert_graph_fetch(path): path = "../shivam/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the parent dir are not allowed within the path:" in str( + assert "Invalid path '../shivam/g': References to the parent dir are not allowed within the path" in str( excinfo.value ) path = "./shivam/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the current dir are not allowed within the path" in str( + assert "Invalid path './shivam/g': References to the current dir are not allowed within the path" in str( excinfo.value ) path = "shivam/../../../../investigation/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "References to the parent dir are not allowed within the path:" in str( + assert "Invalid path 'shivam/../../../../investigation/g': References to the parent dir are not allowed within the path" in str( excinfo.value ) path = "//shivam/investigation/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "Double forward slashes are not allowed in path" in str(excinfo.value) + assert "Invalid path '//shivam/investigation/g': Double forward slashes are not allowed in path" in str( + excinfo.value) path = "shivam/investigation//2024-12-12/g" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "Double forward slashes are not allowed in path" in str(excinfo.value) + assert "Invalid path 'shivam/investigation//2024-12-12/g': Double forward slashes are not allowed in path" in str( + excinfo.value) path = r"shivam/investigation\2024-12-12" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "Backslash not allowed in path" in str(excinfo.value) - + assert r"Backslash not allowed in path" in str(excinfo.value) # Test if we can escape through a symlink tmp_dir2 = tempfile.mkdtemp() nested_dir = os.path.join(tmp_work_dir, "shivam", "graphs") @@ -199,7 +200,8 @@ def assert_graph_fetch(path): path = "shivam/graphs/not_a_symlink_i_promise/escaped" with pytest.raises(Exception) as excinfo: client.send_graph(path=path, graph=g, overwrite=True) - assert "A component of the given path was a symlink" in str(excinfo.value) + assert "Invalid path 'shivam/graphs/not_a_symlink_i_promise/escaped': A component of the given path was a symlink" in str( + excinfo.value) def test_graph_windows_and_layers_query(): @@ -642,7 +644,6 @@ def test_edge_id(): } } - # def test_disk_graph_name(): # import pandas as pd # from raphtory import DiskGraphStorage diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_move_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_move_graph.py index 98eb97d4bf..f72762e3d8 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_move_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_move_graph.py @@ -20,7 +20,7 @@ def test_move_graph_fails_if_graph_not_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'ben/g5' does not exist" in str(excinfo.value) def test_move_graph_fails_if_graph_with_same_name_already_exists(): @@ -45,7 +45,7 @@ def test_move_graph_fails_if_graph_with_same_name_already_exists(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'g6' already exists" in str(excinfo.value) def test_move_graph_fails_if_graph_with_same_name_already_exists_at_same_namespace_as_graph(): @@ -70,7 +70,7 @@ def test_move_graph_fails_if_graph_with_same_name_already_exists_at_same_namespa }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'ben/g6' already exists" in str(excinfo.value) def test_move_graph_fails_if_graph_with_same_name_already_exists_at_diff_namespace_as_graph(): @@ -96,7 +96,7 @@ def test_move_graph_fails_if_graph_with_same_name_already_exists_at_diff_namespa }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'shivam/g6' already exists" in str(excinfo.value) def test_move_graph_succeeds(): @@ -124,7 +124,7 @@ def test_move_graph_succeeds(): query = """{graph(path: "shivam/g3") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g3' does not exist" in str(excinfo.value) query = """{graph(path: "g4") { nodes {list {name}} @@ -157,7 +157,7 @@ def test_move_graph_using_client_api_succeeds(): query = """{graph(path: "shivam/g3") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g3' does not exist" in str(excinfo.value) query = """{graph(path: "ben/g4") { nodes {list {name}} @@ -197,7 +197,7 @@ def test_move_graph_succeeds_at_same_namespace_as_graph(): query = """{graph(path: "shivam/g3") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g3' does not exist" in str(excinfo.value) query = """{graph(path: "shivam/g4") { nodes {list {name}} @@ -238,7 +238,7 @@ def test_move_graph_succeeds_at_diff_namespace_as_graph(): query = """{graph(path: "ben/g3") {nodes {list {name}}}}""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'ben/g3' does not exist" in str(excinfo.value) query = """{graph(path: "shivam/g4") { nodes {list {name}} diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_new_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_new_graph.py index da0d3f6c9d..adba406a92 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_new_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_new_graph.py @@ -45,7 +45,7 @@ def test_new_graph_fails_if_graph_found(): }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'test/path/g1' already exists" in str(excinfo.value) def test_client_new_graph_works(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_receive_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_receive_graph.py index 8f6daa3cd8..8fff9ff978 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_receive_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_receive_graph.py @@ -16,7 +16,7 @@ def test_receive_graph_fails_if_no_graph_found(): query = """{ receiveGraph(path: "g2") }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'g2' does not exist" in str(excinfo.value) def test_receive_graph_succeeds_if_graph_found(): @@ -62,7 +62,7 @@ def test_receive_graph_fails_if_no_graph_found_at_namespace(): query = """{ receiveGraph(path: "shivam/g2") }""" with pytest.raises(Exception) as excinfo: client.query(query) - assert "Graph not found" in str(excinfo.value) + assert "Graph 'shivam/g2' does not exist" in str(excinfo.value) def test_receive_graph_succeeds_if_graph_found_at_namespace(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_send_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_send_graph.py index d73703d88a..7b0b3b0e1d 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_send_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_send_graph.py @@ -31,7 +31,7 @@ def test_send_graph_fails_if_graph_already_exists(): client = RaphtoryClient("http://localhost:1736") with pytest.raises(Exception) as excinfo: client.send_graph(path="g", graph=g) - assert "Graph already exists by name = g" in str(excinfo.value) + assert "Graph 'g' already exists" in str(excinfo.value) def test_send_graph_succeeds_if_graph_already_exists_with_overwrite_enabled(): @@ -94,7 +94,7 @@ def test_send_graph_fails_if_graph_already_exists_at_namespace(): client = RaphtoryClient("http://localhost:1736") with pytest.raises(Exception) as excinfo: client.send_graph(path="shivam/g", graph=g) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'shivam/g' already exists" in str(excinfo.value) def test_send_graph_succeeds_if_graph_already_exists_at_namespace_with_overwrite_enabled(): diff --git a/python/tests/test_base_install/test_graphql/edit_graph/test_upload_graph.py b/python/tests/test_base_install/test_graphql/edit_graph/test_upload_graph.py index 78e7e7ac1b..974e1320ab 100644 --- a/python/tests/test_base_install/test_graphql/edit_graph/test_upload_graph.py +++ b/python/tests/test_base_install/test_graphql/edit_graph/test_upload_graph.py @@ -70,7 +70,7 @@ def test_upload_graph_fails_if_graph_already_exists(): client = RaphtoryClient("http://localhost:1736") with pytest.raises(Exception) as excinfo: client.upload_graph(path="g", file_path=g_file_path) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'g' already exists" in str(excinfo.value) def test_upload_graph_succeeds_if_graph_already_exists_with_overwrite_enabled(): @@ -153,7 +153,7 @@ def test_upload_graph_fails_if_graph_already_exists_at_namespace(): client = RaphtoryClient("http://localhost:1736") with pytest.raises(Exception) as excinfo: client.upload_graph(path="shivam/g", file_path=g_file_path, overwrite=False) - assert "Graph already exists by name" in str(excinfo.value) + assert "Graph 'shivam/g' already exists" in str(excinfo.value) def test_upload_graph_succeeds_if_graph_already_exists_at_namespace_with_overwrite_enabled(): diff --git a/python/tests/test_base_install/test_graph_benchmarks.py b/python/tests/test_graph_benchmarks.py similarity index 100% rename from python/tests/test_base_install/test_graph_benchmarks.py rename to python/tests/test_graph_benchmarks.py diff --git a/python/tox.ini b/python/tox.ini index 888e1380ed..1de129bd95 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -50,8 +50,11 @@ deps = matplotlib commands = pytest --nbmake --nbmake-timeout=1200 {tty:--color=yes} ../examples/python/socio-patterns/example.ipynb +[testenv:benchmark] +commands = pytest tests/test_graph_benchmarks.py + [testenv:docs] -deps = +deps = -r ../docs/requirements.txt change_dir = ../docs/user-guide commands = pytest --markdown-docs -m markdown-docs --markdown-docs-syntax=superfences diff --git a/raphtory-api/src/core/entities/properties/prop/prop_array.rs b/raphtory-api/src/core/entities/properties/prop/prop_array.rs index d492b11cb4..025736ffc7 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_array.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_array.rs @@ -1,14 +1,12 @@ use crate::{ - core::entities::properties::prop::{ - unify_types, ArrowRow, DirectConvert, Prop, PropType, SerdeProp, - }, + core::entities::properties::prop::{unify_types, ArrowRow, DirectConvert, Prop, PropType}, iter::{BoxedLIter, IntoDynBoxed}, }; use arrow_array::{ cast::AsArray, types::*, Array, ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray, }; use arrow_schema::{DataType, Field, Fields, TimeUnit}; -use serde::{ser::SerializeSeq, Deserialize, Serialize, Serializer}; +use serde::{ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer}; use std::{ hash::{Hash, Hasher}, sync::Arc, @@ -16,7 +14,7 @@ use std::{ #[derive(Debug, Clone, derive_more::From)] pub enum PropArray { - Vec(Arc>), + Vec(Arc<[Prop]>), Array(ArrayRef), } @@ -201,7 +199,7 @@ impl Serialize for PropArray { { let mut state = serializer.serialize_seq(Some(self.len()))?; for prop in self.iter_all() { - state.serialize_element(&prop.as_ref().map(SerdeProp))?; + state.serialize_element(&prop)?; } state.end() } @@ -210,10 +208,10 @@ impl Serialize for PropArray { impl<'de> Deserialize<'de> for PropArray { fn deserialize(deserializer: D) -> Result where - D: serde::Deserializer<'de>, + D: Deserializer<'de>, { - let vec: Vec = Deserialize::deserialize(deserializer)?; - Ok(PropArray::Vec(Arc::from(vec))) + let data = >::deserialize(deserializer)?; + Ok(PropArray::Vec(data.into())) } } diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index acde96f75d..667c937c9b 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -1,17 +1,27 @@ use crate::core::{ entities::{ - properties::prop::{prop_ref_enum::PropRef, PropNum, PropType}, + properties::prop::{prop_array::*, prop_ref_enum::PropRef, ArrowRow, PropNum, PropType}, GidRef, }, storage::arc_str::ArcStr, }; +use arrow_array::{ + cast::AsArray, + types::{ + Date32Type, Date64Type, Decimal128Type, DecimalType, Float32Type, Float64Type, Int32Type, + Int64Type, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, + TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, + }, + Array, ArrayRef, LargeListArray, StructArray, +}; +use arrow_schema::{DataType, Field, FieldRef, TimeUnit}; use bigdecimal::{num_bigint::BigInt, BigDecimal}; use chrono::{DateTime, NaiveDateTime, Utc}; use itertools::Itertools; use rustc_hash::{FxBuildHasher, FxHashMap}; use serde::{ - ser::{SerializeMap, SerializeSeq}, - Deserialize, Serialize, + ser::{Error, SerializeMap, SerializeSeq}, + Deserialize, Serialize, Serializer, }; use std::{ cmp::Ordering, @@ -23,10 +33,6 @@ use std::{ }; use thiserror::Error; -use crate::core::entities::properties::prop::prop_array::*; -use arrow_array::{cast::AsArray, ArrayRef, LargeListArray, StructArray}; -use arrow_schema::{DataType, Field, FieldRef}; - pub const DECIMAL_MAX: i128 = 99999999999999999999999999999999999999i128; // equivalent to parquet decimal(38, 0) #[derive(Error, Debug)] @@ -151,44 +157,52 @@ impl PartialOrd for Prop { } } -pub struct SerdeProp<'a>(pub &'a Prop); +pub struct SerdeArrowProp<'a>(pub &'a Prop); +#[derive(Clone, Copy, Debug)] +pub struct SerdeArrowList<'a>(pub &'a PropArray); + #[derive(Clone, Copy, Debug)] -pub struct SerdeList<'a>(pub &'a PropArray); +pub struct SerdeArrowArray<'a>(pub &'a ArrayRef); #[derive(Clone, Copy)] -pub struct SerdeMap<'a>(pub &'a HashMap); +pub struct SerdeArrowMap<'a>(pub &'a HashMap); #[derive(Clone, Copy, Serialize)] pub struct SerdeRow { value: Option

, } -impl<'a> Serialize for SerdeList<'a> { +impl<'a> Serialize for SerdeArrowList<'a> { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { - let mut state = serializer.serialize_seq(Some(self.0.len()))?; - for prop in self.0.iter() { - state.serialize_element(&SerdeProp(&prop))?; + match &self.0 { + PropArray::Vec(list) => { + let mut state = serializer.serialize_seq(Some(self.0.len()))?; + for prop in list.iter() { + state.serialize_element(&SerdeArrowProp(prop))?; + } + state.end() + } + PropArray::Array(array) => SerdeArrowArray(array).serialize(serializer), } - state.end() } } -impl<'a> Serialize for SerdeMap<'a> { +impl<'a> Serialize for SerdeArrowMap<'a> { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { let mut state = serializer.serialize_map(Some(self.0.len()))?; for (k, v) in self.0.iter() { - state.serialize_entry(k, &SerdeProp(v))?; + state.serialize_entry(k, &SerdeArrowProp(v))?; } state.end() } } -impl<'a> Serialize for SerdeProp<'a> { +impl<'a> Serialize for SerdeArrowProp<'a> { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, @@ -206,13 +220,161 @@ impl<'a> Serialize for SerdeProp<'a> { Prop::Bool(b) => serializer.serialize_bool(*b), Prop::DTime(dt) => serializer.serialize_i64(dt.timestamp_millis()), Prop::NDTime(dt) => serializer.serialize_i64(dt.and_utc().timestamp_millis()), - Prop::List(l) => SerdeList(l).serialize(serializer), - Prop::Map(m) => SerdeMap(m).serialize(serializer), + Prop::List(l) => SerdeArrowList(l).serialize(serializer), + Prop::Map(m) => SerdeArrowMap(m).serialize(serializer), Prop::Decimal(dec) => serializer.serialize_str(&dec.to_string()), } } } +impl<'a> Serialize for SerdeArrowArray<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let dtype = self.0.data_type(); + let len = self.0.len(); + let mut state = serializer.serialize_seq(Some(len))?; + match dtype { + DataType::Boolean => { + for v in self.0.as_boolean().iter() { + state.serialize_element(&v)?; + } + } + DataType::Int32 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::Int64 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::UInt8 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::UInt16 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::UInt32 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::UInt64 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::Float32 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::Float64 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::Timestamp(unit, _) => match unit { + TimeUnit::Second => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + TimeUnit::Millisecond => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + TimeUnit::Microsecond => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + TimeUnit::Nanosecond => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + }, + DataType::Date32 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::Date64 => { + for v in self.0.as_primitive::().iter() { + state.serialize_element(&v)?; + } + } + DataType::Utf8 => { + for v in self.0.as_string::().iter() { + state.serialize_element(&v)?; + } + } + DataType::LargeUtf8 => { + for v in self.0.as_string::().iter() { + state.serialize_element(&v)?; + } + } + DataType::Utf8View => { + for v in self.0.as_string_view().iter() { + state.serialize_element(&v)?; + } + } + DataType::Decimal128(precision, scale) => { + for v in self.0.as_primitive::().iter() { + let element = v.map(|v| Decimal128Type::format_decimal(v, *precision, *scale)); + state.serialize_element(&element)? + // i128 not supported by serde_arrow! + } + } + DataType::Struct(_) => { + let struct_array = self.0.as_struct(); + match struct_array.nulls() { + None => { + for i in 0..struct_array.len() { + state.serialize_element(&ArrowRow::new(struct_array, i))?; + } + } + Some(nulls) => { + for (i, is_valid) in nulls.iter().enumerate() { + state.serialize_element( + &is_valid.then_some(ArrowRow::new(struct_array, i)), + )?; + } + } + } + } + DataType::List(_) => { + let list = self.0.as_list::(); + for array in list.iter() { + state.serialize_element(&array.as_ref().map(SerdeArrowArray))?; + } + } + DataType::LargeList(_) => { + let list = self.0.as_list::(); + for array in list.iter() { + state.serialize_element(&array.as_ref().map(SerdeArrowArray))?; + } + } + DataType::Null => { + for _ in 0..self.0.len() { + state.serialize_element(&None::<()>)?; + } + } + dtype => Err(Error::custom(format!("unsuported data type {dtype:?}")))?, + } + state.end() + } +} + pub fn validate_prop(prop: Prop) -> Result { match prop { Prop::Decimal(ref bd) => { @@ -241,9 +403,9 @@ impl Prop { Prop::Map(h_map.into()) } - pub fn as_map(&self) -> Option> { + pub fn as_map(&self) -> Option> { match self { - Prop::Map(map) => Some(SerdeMap(map)), + Prop::Map(map) => Some(SerdeArrowMap(map)), _ => None, } } @@ -462,7 +624,7 @@ impl From> for Prop { impl From> for Prop { fn from(value: Vec) -> Self { - Prop::List(Arc::new(value).into()) + Prop::List(value.into()) } } @@ -499,7 +661,7 @@ pub trait IntoPropList { impl, K: Into> IntoPropList for I { fn into_prop_list(self) -> Prop { let vec = self.into_iter().map(|v| v.into()).collect::>(); - Prop::List(Arc::new(vec).into()) + Prop::List(vec.into()) } } diff --git a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs index f77d94dc20..2c4ade6b64 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs @@ -1,5 +1,5 @@ use crate::core::{ - entities::properties::prop::{Prop, SerdeList, SerdeMap}, + entities::properties::prop::{Prop, SerdeArrowList, SerdeArrowMap}, storage::arc_str::ArcStr, }; use bigdecimal::BigDecimal; @@ -124,7 +124,7 @@ impl<'a> Serialize for PropMapRef<'a> { S: serde::Serializer, { match self { - PropMapRef::Mem(map) => SerdeMap(map).serialize(serializer), + PropMapRef::Mem(map) => SerdeArrowMap(map).serialize(serializer), PropMapRef::Arrow(row) => row.serialize(serializer), } } @@ -148,7 +148,7 @@ impl<'a> Serialize for PropRef<'a> { PropNum::F64(v) => serializer.serialize_f64(*v), }, PropRef::Bool(b) => serializer.serialize_bool(*b), - PropRef::List(lst) => SerdeList(lst).serialize(serializer), + PropRef::List(lst) => SerdeArrowList(lst).serialize(serializer), PropRef::Map(map_ref) => map_ref.serialize(serializer), PropRef::NDTime(dt) => serializer.serialize_i64(dt.and_utc().timestamp_millis()), PropRef::DTime(dt) => serializer.serialize_i64(dt.timestamp_millis()), diff --git a/raphtory-api/src/python/prop.rs b/raphtory-api/src/python/prop.rs index cfb7d3828f..9283133c34 100644 --- a/raphtory-api/src/python/prop.rs +++ b/raphtory-api/src/python/prop.rs @@ -59,7 +59,39 @@ impl<'py> IntoPyObject<'py> for Prop { Prop::List(PropArray::Array(arr_ref)) => { PyArray::from_array_ref(arr_ref).into_pyarrow(py)? } - Prop::List(PropArray::Vec(v)) => v.deref().clone().into_pyobject(py)?.into_any(), // Fixme: optimise the clone here? + Prop::List(PropArray::Vec(v)) => v.into_pyobject(py)?.into_any(), // Fixme: optimise the clone here? + Prop::Map(v) => v.deref().clone().into_pyobject(py)?.into_any(), + Prop::Decimal(d) => { + let decl_cls = get_decimal_cls(py)?; + decl_cls.call1((d.to_string(),))? + } + }) + } +} + +impl<'a, 'py: 'a> IntoPyObject<'py> for &'a Prop { + type Target = PyAny; + type Output = Bound<'py, PyAny>; + type Error = PyErr; + + fn into_pyobject(self, py: Python<'py>) -> Result { + Ok(match self { + Prop::Str(s) => s.into_pyobject(py)?.into_any(), + Prop::Bool(bool) => bool.into_bound_py_any(py)?, + Prop::U8(u8) => u8.into_pyobject(py)?.into_any(), + Prop::U16(u16) => u16.into_pyobject(py)?.into_any(), + Prop::I64(i64) => i64.into_pyobject(py)?.into_any(), + Prop::U64(u64) => u64.into_pyobject(py)?.into_any(), + Prop::F64(f64) => f64.into_pyobject(py)?.into_any(), + Prop::DTime(dtime) => dtime.into_pyobject(py)?.into_any(), + Prop::NDTime(ndtime) => ndtime.into_pyobject(py)?.into_any(), + Prop::I32(v) => v.into_pyobject(py)?.into_any(), + Prop::U32(v) => v.into_pyobject(py)?.into_any(), + Prop::F32(v) => v.into_pyobject(py)?.into_any(), + Prop::List(PropArray::Array(arr_ref)) => { + PyArray::from_array_ref(arr_ref.clone()).into_pyarrow(py)? + } + Prop::List(PropArray::Vec(v)) => v.into_pyobject(py)?.into_any(), Prop::Map(v) => v.deref().clone().into_pyobject(py)?.into_any(), Prop::Decimal(d) => { let decl_cls = get_decimal_cls(py)?; @@ -108,8 +140,8 @@ impl<'source> FromPyObject<'source> for Prop { let (arr, _) = arrow.into_inner(); return Ok(Prop::List(PropArray::Array(arr))); } - if let Ok(list) = ob.extract() { - return Ok(Prop::List(PropArray::Vec(Arc::new(list)))); + if let Ok(list) = ob.extract::>() { + return Ok(Prop::List(PropArray::Vec(list.into()))); } if let Ok(map) = ob.extract() { return Ok(Prop::Map(Arc::new(map))); diff --git a/raphtory-benchmark/benches/index_bench.rs b/raphtory-benchmark/benches/index_bench.rs index 5434921574..e1534c4bc4 100644 --- a/raphtory-benchmark/benches/index_bench.rs +++ b/raphtory-benchmark/benches/index_bench.rs @@ -27,11 +27,8 @@ fn bench_graph_index_load(c: &mut Criterion) { let mut group = c.benchmark_group("graph_index_load"); group.sample_size(100); - - let path_for_decoded_graph = None; - group.bench_function(BenchmarkId::from_parameter("load_once"), |b| { - b.iter(|| Graph::decode(black_box(&path), path_for_decoded_graph.clone()).unwrap()); + b.iter(|| Graph::decode(black_box(&path)).unwrap()); }); group.finish(); diff --git a/raphtory-benchmark/benches/search_bench.rs b/raphtory-benchmark/benches/search_bench.rs index 03567243a6..426631ab6c 100644 --- a/raphtory-benchmark/benches/search_bench.rs +++ b/raphtory-benchmark/benches/search_bench.rs @@ -33,7 +33,7 @@ use std::{iter, sync::Arc, time::Instant}; static GRAPH: Lazy> = Lazy::new(|| { let data_dir = "/tmp/graphs/raph_social/rf0.1"; // TODO Fix this // let data_dir = "/tmp/graphs/raph_social/rf1.0"; - let graph = Graph::decode(data_dir, None).unwrap(); + let graph = Graph::decode(data_dir).unwrap(); println!("Nodes count = {}", graph.count_nodes()); println!("Edges count = {}", graph.count_edges()); diff --git a/raphtory-benchmark/src/common/mod.rs b/raphtory-benchmark/src/common/mod.rs index eb65cda3a6..3d83103de3 100644 --- a/raphtory-benchmark/src/common/mod.rs +++ b/raphtory-benchmark/src/common/mod.rs @@ -373,7 +373,7 @@ pub fn run_analysis_benchmarks( |b: &mut Bencher| { let mut rng = rand::rng(); let v: u64 = loop { - let v: u64 = rng.gen(); + let v: u64 = rng.random(); if !nodes.contains(&GID::U64(v)) { break v; } @@ -587,9 +587,8 @@ pub fn run_proto_encode_benchmark(group: &mut BenchmarkGroup, graph: G pub fn run_proto_decode_benchmark(group: &mut BenchmarkGroup, graph: Graph) { let f = TempDir::new().unwrap(); graph.encode(f.path()).unwrap(); - let path_for_decoded_graph = None; bench(group, "proto_decode", None, |b| { - b.iter(|| Graph::decode(f.path(), path_for_decoded_graph).unwrap()) + b.iter(|| Graph::decode(f.path()).unwrap()) }) } diff --git a/raphtory-benchmark/src/common/vectors.rs b/raphtory-benchmark/src/common/vectors.rs index 701ace6db2..919d201ccb 100644 --- a/raphtory-benchmark/src/common/vectors.rs +++ b/raphtory-benchmark/src/common/vectors.rs @@ -16,7 +16,7 @@ pub fn gen_embedding_for_bench(text: &str) -> Embedding { let hash = hasher.finish(); let mut rng: StdRng = SeedableRng::seed_from_u64(hash); - (0..1536).map(|_| rng.gen()).collect() + (0..1536).map(|_| rng.random()).collect() } async fn embedding_model(texts: Vec) -> EmbeddingResult> { diff --git a/raphtory-benchmark/src/graph_gen/raph_social.rs b/raphtory-benchmark/src/graph_gen/raph_social.rs index c7cef5ae35..c1a0ec6a15 100644 --- a/raphtory-benchmark/src/graph_gen/raph_social.rs +++ b/raphtory-benchmark/src/graph_gen/raph_social.rs @@ -15,7 +15,7 @@ use fake::{ }, Fake, }; -use rand::{prelude::SliceRandom, rng, seq::IndexedRandom, Rng}; +use rand::{rng, seq::IndexedRandom, Rng}; use raphtory::prelude::*; use serde::{de::DeserializeOwned, Deserialize, Serialize}; use std::{collections::HashMap, error::Error, fmt::Debug}; @@ -173,7 +173,7 @@ pub fn generate_data_write_to_csv( post_id: format!("post_{}", i), forum_id: format!("forum_{}", rng.random_range(1..=num_forums)), creation_date, // Use post's creation date - is_featured: rng.gen_bool(0.2), + is_featured: rng.random_bool(0.2), likes_count: rng.random_range(0..500), comments_count: rng.random_range(0..200), })?; @@ -200,7 +200,7 @@ pub fn generate_data_write_to_csv( comment_id: format!("comment_{}", i), post_id: format!("post_{}", rng.random_range(1..=num_posts)), creation_date, // Use comment's creation date - is_edited: rng.gen_bool(0.1), + is_edited: rng.random_bool(0.1), upvotes: rng.random_range(0..200), reply_count: rng.random_range(0..20), })?; @@ -421,7 +421,7 @@ pub fn generate_graph( ), ( "gender", - Prop::Str(ArcStr::from(if rng.gen_bool(0.5) { + Prop::Str(ArcStr::from(if rng.random_bool(0.5) { "male" } else { "female" @@ -465,7 +465,7 @@ pub fn generate_graph( forum_id.clone(), [ ("activity_score", Prop::F64(rng.random_range(0.0..100.0))), - ("is_moderator", Prop::Bool(rng.gen_bool(0.1))), + ("is_moderator", Prop::Bool(rng.random_bool(0.1))), ], None, ) @@ -516,7 +516,7 @@ pub fn generate_graph( post_id.clone(), forum_id.clone(), [ - ("is_featured", Prop::Bool(rng.gen_bool(0.2))), + ("is_featured", Prop::Bool(rng.random_bool(0.2))), ("likes_count", Prop::U64(rng.random_range(0..500))), ("comments_count", Prop::U64(rng.random_range(0..200))), ], @@ -568,7 +568,7 @@ pub fn generate_graph( comment_id.clone(), post_id.clone(), [ - ("is_edited", Prop::Bool(rng.gen_bool(0.1))), + ("is_edited", Prop::Bool(rng.random_bool(0.1))), ("upvotes", Prop::U64(rng.random_range(0..200))), ("reply_count", Prop::U64(rng.random_range(0..20))), ], diff --git a/raphtory-core/src/entities/properties/graph_meta.rs b/raphtory-core/src/entities/properties/graph_meta.rs index 49b5b193e6..ca53ee5bdc 100644 --- a/raphtory-core/src/entities/properties/graph_meta.rs +++ b/raphtory-core/src/entities/properties/graph_meta.rs @@ -16,10 +16,10 @@ use raphtory_api::core::{ FxDashMap, }, }; -use serde::{Deserialize, Serialize}; +use serde::Serialize; use std::ops::{Deref, DerefMut}; -#[derive(Serialize, Deserialize, Debug, Default)] +#[derive(Serialize, Debug, Default)] pub struct GraphMeta { metadata_mapper: PropMapper, temporal_mapper: PropMapper, diff --git a/raphtory-core/src/entities/properties/tprop.rs b/raphtory-core/src/entities/properties/tprop.rs index 3ea03418fe..33303a6085 100644 --- a/raphtory-core/src/entities/properties/tprop.rs +++ b/raphtory-core/src/entities/properties/tprop.rs @@ -15,11 +15,11 @@ use raphtory_api::core::{ storage::arc_str::ArcStr, }; use rustc_hash::FxHashMap; -use serde::{Deserialize, Serialize}; +use serde::Serialize; use std::{collections::HashMap, iter, ops::Range, sync::Arc}; use thiserror::Error; -#[derive(Debug, Default, PartialEq, Clone, Serialize, Deserialize)] +#[derive(Debug, Default, PartialEq, Clone, Serialize)] pub enum TProp { #[default] Empty, diff --git a/raphtory-core/src/storage/lazy_vec.rs b/raphtory-core/src/storage/lazy_vec.rs index b5f5cfe5ad..bdc9557368 100644 --- a/raphtory-core/src/storage/lazy_vec.rs +++ b/raphtory-core/src/storage/lazy_vec.rs @@ -274,8 +274,7 @@ where LazyVec::LazyVec1(A::default(), TupleCol::from(inner)) } - #[cfg(test)] - fn iter(&self) -> Box + Send + '_> { + pub fn iter(&self) -> Box + Send + '_> { match self { LazyVec::Empty => Box::new(iter::empty()), LazyVec::LazyVec1(default, tuples) => { @@ -287,8 +286,7 @@ where } } - #[cfg(test)] - fn iter_opt(&self) -> Box> + Send + '_> { + pub fn iter_opt(&self) -> Box> + Send + '_> { match self { LazyVec::Empty => Box::new(iter::empty()), LazyVec::LazyVec1(_, tuples) => Box::new(tuples.iter()), @@ -356,7 +354,6 @@ where #[cfg(test)] mod lazy_vec_tests { use super::*; - use itertools::Itertools; use proptest::{arbitrary::Arbitrary, proptest}; fn check_lazy_vec(lazy_vec: &LazyVec, v: Vec>) { diff --git a/raphtory-core/src/storage/mod.rs b/raphtory-core/src/storage/mod.rs index 8807dcf115..254f9b3d18 100644 --- a/raphtory-core/src/storage/mod.rs +++ b/raphtory-core/src/storage/mod.rs @@ -19,7 +19,7 @@ use raphtory_api::core::{ storage::arc_str::ArcStr, }; use rustc_hash::FxHashMap; -use serde::{Deserialize, Serialize}; +use serde::Serialize; use std::{borrow::Cow, collections::HashMap, fmt::Debug, sync::Arc}; use thiserror::Error; @@ -29,7 +29,7 @@ pub mod lazy_vec; pub mod locked_view; pub mod timeindex; -#[derive(Debug, Serialize, Deserialize, PartialEq, Default)] +#[derive(Debug, Serialize, PartialEq, Default)] pub struct TColumns { t_props_log: Vec, num_rows: usize, @@ -128,7 +128,7 @@ impl TColumns { } } -#[derive(Debug, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Serialize, PartialEq)] pub enum PropColumn { Empty(usize), Bool(LazyVec), diff --git a/raphtory-graphql/schema.graphql b/raphtory-graphql/schema.graphql index 965ea9c050..9a107ad3df 100644 --- a/raphtory-graphql/schema.graphql +++ b/raphtory-graphql/schema.graphql @@ -1208,19 +1208,13 @@ type MutRoot { """ newGraph(path: String!, graphType: GraphType!): Boolean! """ - Move graph from a path path on the server to a new_path on the server. - - If namespace is not provided, it will be set to the current working directory. - This applies to both the graph namespace and new graph namespace. + Move graph from a path on the server to a new_path on the server. """ - moveGraph(path: String!, newPath: String!): Boolean! + moveGraph(path: String!, newPath: String!, overwrite: Boolean): Boolean! """ - Copy graph from a path path on the server to a new_path on the server. - - If namespace is not provided, it will be set to the current working directory. - This applies to both the graph namespace and new graph namespace. + Copy graph from a path on the server to a new_path on the server. """ - copyGraph(path: String!, newPath: String!): Boolean! + copyGraph(path: String!, newPath: String!, overwrite: Boolean): Boolean! """ Upload a graph file from a path on the client using GQL multipart uploading. diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index 8205a1abc5..480ab08f07 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -2,14 +2,19 @@ use crate::{ config::app_config::AppConfig, graph::GraphWithVectors, model::blocking_io, - paths::{valid_path, ExistingGraphFolder, ValidGraphFolder}, + paths::{ + mark_dirty, ExistingGraphFolder, InternalPathValidationError, PathValidationError, + ValidGraphPaths, ValidWriteableGraphFolder, + }, + rayon::blocking_compute, + GQLError, }; -use itertools::Itertools; +use futures_util::FutureExt; use moka::future::Cache; use raphtory::{ - db::api::view::{internal::InternalStorageOps, MaterializedGraph}, - errors::{GraphError, InvalidPathReason}, - prelude::StableEncode, + db::api::view::MaterializedGraph, + errors::GraphError, + serialise::GraphPaths, vectors::{ cache::VectorCache, template::DocumentTemplate, vectorisable::Vectorisable, vectorised_graph::VectorisedGraph, @@ -17,46 +22,117 @@ use raphtory::{ }; use std::{ collections::HashMap, + fs, io, io::{Read, Seek}, path::{Path, PathBuf}, sync::Arc, }; -use tokio::fs; -use tracing::warn; +use tracing::{error, warn}; use walkdir::WalkDir; +pub const DIRTY_PATH: &'static str = ".dirty"; + #[derive(Clone)] pub struct EmbeddingConf { pub(crate) cache: VectorCache, pub(crate) global_template: Option, - pub(crate) individual_templates: HashMap, + pub(crate) individual_templates: HashMap, +} + +#[derive(thiserror::Error, Debug)] +pub enum MutationErrorInner { + #[error(transparent)] + GraphError(#[from] GraphError), + #[error(transparent)] + IO(#[from] io::Error), + #[error(transparent)] + InvalidInternal(#[from] InternalPathValidationError), +} + +#[derive(thiserror::Error, Debug)] +pub enum InsertionError { + #[error("Failed to insert graph {graph}: {error}")] + Insertion { + graph: String, + error: MutationErrorInner, + }, + #[error(transparent)] + PathValidation(#[from] PathValidationError), + #[error("Failed to insert graph {graph}: {error}")] + GraphError { graph: String, error: GraphError }, +} + +impl InsertionError { + pub fn from_inner(graph: &str, error: MutationErrorInner) -> Self { + InsertionError::Insertion { + graph: graph.to_string(), + error, + } + } + + pub fn from_graph_err(graph: &str, error: GraphError) -> Self { + InsertionError::GraphError { + graph: graph.to_string(), + error, + } + } +} + +#[derive(thiserror::Error, Debug)] +pub enum DeletionError { + #[error("Failed to delete graph {graph}: {error}")] + Insertion { + graph: String, + error: MutationErrorInner, + }, + #[error(transparent)] + PathValidation(#[from] PathValidationError), +} + +#[derive(thiserror::Error, Debug)] +pub enum MoveError { + #[error("Failed to move graph: {0}")] + Insertion(#[from] InsertionError), + #[error("Failed to move graph: {0}")] + Deletion(#[from] DeletionError), +} + +impl DeletionError { + fn from_inner(graph: &str, error: MutationErrorInner) -> Self { + DeletionError::Insertion { + graph: graph.to_string(), + error, + } + } } +/// Get relative path as String joined with `"/"` for use with the validation methods. +/// The path is not validated here! pub(crate) fn get_relative_path( - work_dir: PathBuf, + work_dir: &Path, path: &Path, - namespace: bool, -) -> Result { - let path_buf = path.strip_prefix(work_dir.clone())?.to_path_buf(); - let components = path_buf - .components() - .into_iter() - .map(|c| { - c.as_os_str() - .to_str() - .ok_or(InvalidPathReason::NonUTFCharacters) - }) - .collect::, _>>()?; - //a safe unwrap as checking above - let path_str = components.into_iter().join("/"); - valid_path(work_dir, &path_str, namespace)?; +) -> Result { + let relative = path.strip_prefix(work_dir)?; + let mut path_str = String::new(); + let mut components = relative.components().map(|component| { + component + .as_os_str() + .to_str() + .ok_or(InternalPathValidationError::NonUTFCharacters) + }); + if let Some(first) = components.next() { + path_str.push_str(first?); + } + for component in components { + path_str.push('/'); + path_str.push_str(component?); + } Ok(path_str) } -#[derive(Clone)] pub struct Data { pub(crate) work_dir: PathBuf, - cache: Cache, + cache: Cache, pub(crate) create_index: bool, pub(crate) embedding_conf: Option, } @@ -65,23 +141,23 @@ impl Data { pub fn new(work_dir: &Path, configs: &AppConfig) -> Self { let cache_configs = &configs.cache; - let cache = Cache::::builder() + let cache = Cache::::builder() .max_capacity(cache_configs.capacity) .time_to_idle(std::time::Duration::from_secs(cache_configs.tti_seconds)) - .eviction_listener(|_, graph, cause| { + .async_eviction_listener(|_, graph, cause| { // The eviction listener gets called any time a graph is removed from the cache, // not just when it is evicted. Only serialize on evictions. - if !cause.was_evicted() { - return; - } - - // On eviction, serialize graphs that don't have underlying storage. - // FIXME: don't have currently a way to know which embedding updates are pending - if !graph.graph.disk_storage_enabled() && graph.is_dirty() { - if let Err(e) = Self::encode_graph_to_disk(graph.clone()) { - warn!("Error encoding graph to disk on eviction: {e}"); + async move { + if !cause.was_evicted() { + return; + } + if let Err(e) = + blocking_compute(move || graph.folder.replace_graph_data(graph.graph)).await + { + error!("Error encoding graph to disk on eviction: {e}"); } } + .boxed() }) .build(); @@ -98,101 +174,94 @@ impl Data { } } - pub async fn get_graph( + pub fn validate_path_for_insert( &self, path: &str, - ) -> Result<(GraphWithVectors, ExistingGraphFolder), Arc> { - let graph_folder = ExistingGraphFolder::try_from(self.work_dir.clone(), path)?; - let graph_folder_clone = graph_folder.clone(); + overwrite: bool, + ) -> Result { + if overwrite { + ValidWriteableGraphFolder::try_existing_or_new(self.work_dir.clone(), path) + } else { + ValidWriteableGraphFolder::try_new(self.work_dir.clone(), path) + } + } + + pub async fn get_graph(&self, path: &str) -> Result> { self.cache - .try_get_with(path.into(), self.read_graph_from_folder(graph_folder_clone)) + .try_get_with(path.into(), self.read_graph_from_disk(path)) .await - .map(|graph| (graph, graph_folder)) } - pub async fn has_graph(&self, path: &str) -> bool { - ExistingGraphFolder::try_from(self.work_dir.clone(), path).is_ok() + pub async fn get_cached_graph(&self, path: &str) -> Option { + self.cache.get(path).await } - pub fn validate_path_for_insert( - &self, - path: &str, - overwrite: bool, - ) -> Result { - let folder = ValidGraphFolder::try_from(self.work_dir.clone(), path)?; - - match ExistingGraphFolder::try_from(self.work_dir.clone(), path) { - Ok(_) => { - if overwrite { - Ok(folder) - } else { - Err(GraphError::GraphNameAlreadyExists(folder.to_error_path())) - } - } - Err(_) => Ok(folder), - } + pub fn has_graph(&self, path: &str) -> bool { + self.cache.contains_key(path) + || ExistingGraphFolder::try_from(self.work_dir.clone(), path).is_ok() } pub async fn insert_graph( &self, - folder: ValidGraphFolder, + writeable_folder: ValidWriteableGraphFolder, graph: MaterializedGraph, - ) -> Result<(), GraphError> { - let vectors = self.vectorise(graph.clone(), &folder).await; - let graph = GraphWithVectors::new(graph, vectors, folder.clone().into()); - - let graph_clone = graph.clone(); - let folder_clone = folder.clone(); - - blocking_io(move || { - // Graphs with underlying storage already write data to disk. - // They just need to write metadata, primarily to infer the graph type. - // Graphs without storage are encoded to the folder. - if graph_clone.disk_storage_enabled() { - folder_clone.write_metadata(&graph_clone)?; - } else { - Self::encode_graph_to_disk(graph_clone)?; - } - - Ok::<(), GraphError>(()) + ) -> Result<(), InsertionError> { + let vectors = self.vectorise(graph.clone(), &writeable_folder).await; + let graph = blocking_compute(move || { + writeable_folder.write_graph_data(graph.clone())?; + let folder = writeable_folder.finish()?; + let graph = GraphWithVectors::new(graph, vectors, folder.as_existing()?); + Ok::<_, InsertionError>(graph) }) .await?; - let path = folder.get_original_path_str(); - self.cache.insert(path.into(), graph).await; - + self.cache + .insert(graph.folder.local_path_string(), graph) + .await; Ok(()) } - /// Insert a graph into the cache without writing to disk. - pub async fn insert_graph_into_cache(&self, path: &str, graph: GraphWithVectors) { - let path = path.into(); - self.cache.insert(path, graph).await; - } - /// Insert a graph serialized from a graph folder. - pub async fn insert_graph_as_bytes( + pub async fn insert_graph_as_bytes( &self, - folder: ValidGraphFolder, + folder: ValidWriteableGraphFolder, bytes: R, - ) -> Result<(), GraphError> { - let path = folder.get_original_path_str(); - folder.unzip_to_folder(bytes)?; - - let existing_folder = ExistingGraphFolder::try_from(self.work_dir.clone(), path)?; - self.vectorise_folder(&existing_folder).await; + ) -> Result<(), InsertionError> { + let folder_clone = folder.clone(); + blocking_io(move || folder_clone.write_graph_bytes(bytes)).await?; + if let Some(template) = self.resolve_template(folder.local_path()) { + let folder_clone = folder.clone(); + let graph = blocking_io(move || folder_clone.read_graph()).await?; + self.vectorise_with_template(graph, &folder, template).await; + } + blocking_io(move || folder.finish()).await?; + Ok(()) + } + async fn delete_graph_inner( + &self, + graph_folder: ExistingGraphFolder, + ) -> Result<(), MutationErrorInner> { + blocking_io(move || { + let dirty_file = mark_dirty(graph_folder.root())?; + fs::remove_dir_all(graph_folder.root())?; + fs::remove_file(dirty_file)?; + Ok::<_, MutationErrorInner>(()) + }) + .await?; Ok(()) } - pub async fn delete_graph(&self, path: &str) -> Result<(), GraphError> { + pub async fn delete_graph(&self, path: &str) -> Result<(), DeletionError> { let graph_folder = ExistingGraphFolder::try_from(self.work_dir.clone(), path)?; - fs::remove_dir_all(graph_folder.get_base_path()).await?; - self.cache.remove(&PathBuf::from(path)).await; + self.delete_graph_inner(graph_folder) + .await + .map_err(|err| DeletionError::from_inner(path, err))?; + self.cache.remove(path).await; Ok(()) } - fn resolve_template(&self, graph: &Path) -> Option<&DocumentTemplate> { + fn resolve_template(&self, graph: &str) -> Option<&DocumentTemplate> { let conf = self.embedding_conf.as_ref()?; conf.individual_templates .get(graph) @@ -202,7 +271,7 @@ impl Data { async fn vectorise_with_template( &self, graph: MaterializedGraph, - folder: &ValidGraphFolder, + folder: &impl ValidGraphPaths, template: &DocumentTemplate, ) -> Option> { let conf = self.embedding_conf.as_ref()?; @@ -210,14 +279,14 @@ impl Data { .vectorise( conf.cache.clone(), template.clone(), - Some(&folder.get_vectors_path()), + Some(&folder.graph_folder().vectors_path().ok()?), true, // verbose ) .await; match vectors { Ok(vectors) => Some(vectors), Err(error) => { - let name = folder.get_original_path_str(); + let name = folder.local_path_string(); warn!("An error occurred when trying to vectorise graph {name}: {error}"); None } @@ -227,85 +296,62 @@ impl Data { async fn vectorise( &self, graph: MaterializedGraph, - folder: &ValidGraphFolder, + folder: &ValidWriteableGraphFolder, ) -> Option> { - let template = self.resolve_template(folder.get_original_path())?; + let template = self.resolve_template(folder.local_path())?; self.vectorise_with_template(graph, folder, template).await } - async fn vectorise_folder(&self, folder: &ExistingGraphFolder) -> Option<()> { + async fn vectorise_folder(&self, folder: ExistingGraphFolder) -> Option<()> { // it's important that we check if there is a valid template set for this graph path // before actually loading the graph, otherwise we are loading the graph for no reason - let template = self.resolve_template(folder.get_original_path())?; + let template = self.resolve_template(folder.local_path())?; let graph = self - .read_graph_from_folder(folder.clone()) + .read_graph_from_disk_inner(folder.clone()) .await .ok()? .graph; - self.vectorise_with_template(graph, folder, template).await; + self.vectorise_with_template(graph, &folder, template).await; Some(()) } pub(crate) async fn vectorise_all_graphs_that_are_not(&self) -> Result<(), GraphError> { for folder in self.get_all_graph_folders() { - if !folder.get_vectors_path().exists() { - self.vectorise_folder(&folder).await; + if !folder.vectors_path()?.exists() { + self.vectorise_folder(folder).await; } } Ok(()) } - // TODO: return iter - pub fn get_all_graph_folders(&self) -> Vec { + pub fn get_all_graph_folders(&self) -> impl Iterator { let base_path = self.work_dir.clone(); WalkDir::new(&self.work_dir) .into_iter() - .filter_map(|e| { + .filter_map(move |e| { let entry = e.ok()?; let path = entry.path(); - let relative = get_relative_path(base_path.clone(), path, false).ok()?; + let relative = get_relative_path(&base_path, path).ok()?; let folder = ExistingGraphFolder::try_from(base_path.clone(), &relative).ok()?; Some(folder) }) - .collect() } - async fn read_graph_from_folder( + async fn read_graph_from_disk_inner( &self, folder: ExistingGraphFolder, - ) -> Result { + ) -> Result { let cache = self.embedding_conf.as_ref().map(|conf| conf.cache.clone()); let create_index = self.create_index; - blocking_io(move || GraphWithVectors::read_from_folder(&folder, cache, create_index)).await + Ok( + blocking_io(move || GraphWithVectors::read_from_folder(&folder, cache, create_index)) + .await?, + ) } - /// Serializes a graph to disk, overwriting any existing data in its folder. - fn encode_graph_to_disk(graph: GraphWithVectors) -> Result<(), GraphError> { - let folder_path = graph.folder.get_base_path(); - - // Create a backup of the existing folder - if folder_path.exists() { - let bak_path = folder_path.with_extension("bak"); - - // Remove any old backups - if bak_path.exists() { - std::fs::remove_dir_all(&bak_path)?; - } - - std::fs::rename(&folder_path, &bak_path)?; - } - - // Serialize the graph to the original folder path - graph.graph.encode(&folder_path)?; - - // Delete the backup on success - let bak_path = folder_path.with_extension("bak"); - - if bak_path.exists() { - std::fs::remove_dir_all(&bak_path)?; - } - - Ok(()) + async fn read_graph_from_disk(&self, path: &str) -> Result { + let folder = ExistingGraphFolder::try_from(self.work_dir.clone(), path)?; + self.read_graph_from_disk_inner(folder).await } } @@ -313,9 +359,9 @@ impl Drop for Data { fn drop(&mut self) { // On drop, serialize graphs that don't have underlying storage. for (_, graph) in self.cache.iter() { - if !graph.graph.disk_storage_enabled() && graph.is_dirty() { - if let Err(e) = Self::encode_graph_to_disk(graph.clone()) { - warn!("Error encoding graph to disk on drop: {e}"); + if graph.is_dirty() { + if let Err(e) = graph.folder.replace_graph_data(graph.graph) { + error!("Error encoding graph to disk on drop: {e}"); } } } @@ -324,34 +370,31 @@ impl Drop for Data { #[cfg(test)] pub(crate) mod data_tests { - use super::ValidGraphFolder; - use crate::{ - config::app_config::{AppConfig, AppConfigBuilder}, - data::Data, - }; + use super::InsertionError; + use crate::{config::app_config::AppConfigBuilder, data::Data}; use itertools::Itertools; use raphtory::{ db::api::view::{internal::InternalStorageOps, MaterializedGraph}, - errors::GraphError, prelude::*, + serialise::GraphPaths, }; use std::{collections::HashMap, fs, path::Path, time::Duration}; use tokio::time::sleep; fn create_graph_folder(path: &Path) { // Use empty graph to create folder structure + fs::create_dir_all(path).unwrap(); let graph = Graph::new(); graph.encode(path).unwrap(); } - pub(crate) fn save_graphs_to_work_dir( - work_dir: &Path, + pub(crate) async fn save_graphs_to_work_dir( + data: &Data, graphs: &HashMap, - ) -> Result<(), GraphError> { + ) -> Result<(), InsertionError> { for (name, graph) in graphs.into_iter() { - let data = Data::new(work_dir, &AppConfig::default()); - let folder = ValidGraphFolder::try_from(data.work_dir.clone(), name)?; - graph.encode(folder)?; + let folder = data.validate_path_for_insert(name, true)?; + data.insert_graph(folder, graph.clone()).await?; } Ok(()) } @@ -374,11 +417,10 @@ pub(crate) mod data_tests { let mut graphs = HashMap::new(); graphs.insert("test_g".to_string(), graph); - - save_graphs_to_work_dir(tmp_work_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_work_dir.path(), &Default::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); + for graph in graphs.keys() { assert!(data.get_graph(graph).await.is_ok(), "could not get {graph}") } @@ -406,21 +448,21 @@ pub(crate) mod data_tests { let data = Data::new(tmp_work_dir.path(), &configs); - assert!(!data.cache.contains_key(Path::new("test_g"))); - assert!(!data.cache.contains_key(Path::new("test_g2"))); + assert!(!data.cache.contains_key("test_g")); + assert!(!data.cache.contains_key("test_g2")); // Test size based eviction data.get_graph("test_g2").await.unwrap(); - assert!(data.cache.contains_key(Path::new("test_g2"))); - assert!(!data.cache.contains_key(Path::new("test_g"))); + assert!(data.cache.contains_key("test_g2")); + assert!(!data.cache.contains_key("test_g")); data.get_graph("test_g").await.unwrap(); // wait for any eviction data.cache.run_pending_tasks().await; assert_eq!(data.cache.iter().count(), 1); sleep(Duration::from_secs(3)).await; - assert!(!data.cache.contains_key(Path::new("test_g"))); - assert!(!data.cache.contains_key(Path::new("test_g2"))); + assert!(!data.cache.contains_key("test_g")); + assert!(!data.cache.contains_key("test_g2")); } #[tokio::test] @@ -459,7 +501,7 @@ pub(crate) mod data_tests { let paths = data .get_all_graph_folders() .into_iter() - .map(|folder| folder.get_base_path().to_path_buf()) + .map(|folder| folder.0.root().to_path_buf()) .collect_vec(); assert_eq!(paths.len(), 5); @@ -518,11 +560,11 @@ pub(crate) mod data_tests { let data = Data::new(tmp_work_dir.path(), &configs); - let (loaded_graph1, _) = data.get_graph("test_graph1").await.unwrap(); - let (loaded_graph2, _) = data.get_graph("test_graph2").await.unwrap(); + let loaded_graph1 = data.get_graph("test_graph1").await.unwrap(); + let loaded_graph2 = data.get_graph("test_graph2").await.unwrap(); // TODO: This test doesn't work with disk storage right now, make sure modification dates actually update correctly! - if loaded_graph1.graph.disk_storage_enabled() { + if loaded_graph1.graph.disk_storage_enabled().is_some() { assert!( !loaded_graph1.is_dirty(), "Graph1 should not be dirty when loaded from disk" @@ -603,7 +645,7 @@ pub(crate) mod data_tests { let data = Data::new(tmp_work_dir.path(), &configs); // Load first graph - let (loaded_graph1, _) = data.get_graph("test_graph1").await.unwrap(); + let loaded_graph1 = data.get_graph("test_graph1").await.unwrap(); assert!( !loaded_graph1.is_dirty(), "Graph1 should not be dirty when loaded from disk" @@ -618,7 +660,7 @@ pub(crate) mod data_tests { // Load second graph println!("Loading second graph"); - let (loaded_graph2, _) = data.get_graph("test_graph2").await.unwrap(); + let loaded_graph2 = data.get_graph("test_graph2").await.unwrap(); assert!( !loaded_graph2.is_dirty(), "Graph2 should not be dirty when loaded from disk" @@ -629,7 +671,7 @@ pub(crate) mod data_tests { data.cache.run_pending_tasks().await; // TODO: This test doesn't work with disk storage right now, make sure modification dates actually update correctly! - if loaded_graph1.graph.disk_storage_enabled() { + if loaded_graph1.graph.disk_storage_enabled().is_some() { // Check modification times after eviction let graph1_metadata_after = fs::metadata(&graph1_path).unwrap(); let graph2_metadata_after = fs::metadata(&graph2_path).unwrap(); diff --git a/raphtory-graphql/src/graph.rs b/raphtory-graphql/src/graph.rs index 13835e4f0d..5c039908ae 100644 --- a/raphtory-graphql/src/graph.rs +++ b/raphtory-graphql/src/graph.rs @@ -1,9 +1,4 @@ -use std::sync::{ - atomic::{AtomicBool, Ordering}, - Arc, -}; - -use crate::paths::ExistingGraphFolder; +use crate::paths::{ExistingGraphFolder, ValidGraphPaths}; use raphtory::{ core::entities::nodes::node_ref::AsNodeRef, db::{ @@ -13,27 +8,30 @@ use raphtory::{ }, Base, InheritViewOps, MaterializedGraph, }, - graph::{edge::EdgeView, node::NodeView, views::deletion_graph::PersistentGraph}, + graph::{edge::EdgeView, node::NodeView}, }, errors::{GraphError, GraphResult}, - prelude::{EdgeViewOps, Graph, StableDecode}, - serialise::GraphFolder, + prelude::EdgeViewOps, vectors::{cache::VectorCache, vectorised_graph::VectorisedGraph}, }; -use raphtory_api::GraphType; use raphtory_storage::{ core_ops::InheritCoreGraphOps, layer_ops::InheritLayerOps, mutation::InheritMutationOps, }; +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, +}; use tracing::info; #[cfg(feature = "search")] use raphtory::prelude::IndexMutationOps; +use raphtory::serialise::{GraphPaths, StableDecode}; #[derive(Clone)] pub struct GraphWithVectors { pub graph: MaterializedGraph, pub vectors: Option>, - pub(crate) folder: GraphFolder, + pub(crate) folder: ExistingGraphFolder, pub(crate) is_dirty: Arc, } @@ -41,12 +39,12 @@ impl GraphWithVectors { pub(crate) fn new( graph: MaterializedGraph, vectors: Option>, - folder: GraphFolder, + folder: ExistingGraphFolder, ) -> Self { Self { graph, vectors, - folder: folder, + folder, is_dirty: Arc::new(AtomicBool::new(false)), } } @@ -88,36 +86,17 @@ impl GraphWithVectors { cache: Option, create_index: bool, ) -> Result { - let graph = { - // Either decode a graph serialized using encode or load using underlying storage. - if MaterializedGraph::is_decodable(folder.get_graph_path()) { - let path_for_decoded_graph = None; - MaterializedGraph::decode(folder.clone(), path_for_decoded_graph)? - } else { - let metadata = folder.read_metadata()?; - let graph = match metadata.graph_type { - GraphType::EventGraph => { - let graph = Graph::load_from_path(folder.get_graph_path()); - MaterializedGraph::EventGraph(graph) - } - GraphType::PersistentGraph => { - let graph = PersistentGraph::load_from_path(folder.get_graph_path()); - MaterializedGraph::PersistentGraph(graph) - } - }; - - #[cfg(feature = "search")] - graph.load_index(&folder)?; - - graph - } + let graph_folder = folder.graph_folder(); + let graph = if graph_folder.read_metadata()?.is_diskgraph { + MaterializedGraph::load_from_path(graph_folder)? + } else { + MaterializedGraph::decode(graph_folder)? }; - let vectors = cache.and_then(|cache| { - VectorisedGraph::read_from_path(&folder.get_vectors_path(), graph.clone(), cache).ok() + VectorisedGraph::read_from_path(&folder.vectors_path().ok()?, graph.clone(), cache).ok() }); - info!("Graph loaded = {}", folder.get_original_path_str()); + info!("Graph loaded = {}", folder.local_path()); #[cfg(feature = "search")] if create_index { diff --git a/raphtory-graphql/src/lib.rs b/raphtory-graphql/src/lib.rs index be5eab06c4..db5a3bf972 100644 --- a/raphtory-graphql/src/lib.rs +++ b/raphtory-graphql/src/lib.rs @@ -1,4 +1,8 @@ pub use crate::server::GraphServer; +use crate::{data::InsertionError, paths::PathValidationError}; +use raphtory::errors::GraphError; +use std::sync::Arc; + mod auth; pub mod data; mod embeddings; @@ -15,15 +19,29 @@ pub mod config; pub mod python; pub mod rayon; +#[derive(thiserror::Error, Debug)] +pub enum GQLError { + #[error(transparent)] + GraphError(#[from] GraphError), + #[error(transparent)] + Validation(#[from] PathValidationError), + #[error("Insertion failed for Graph {graph}: {error}")] + Insertion { + graph: String, + error: InsertionError, + }, + #[error(transparent)] + Arc(#[from] Arc), +} + #[cfg(test)] mod graphql_test { use crate::{ - config::app_config::{AppConfig, AppConfigBuilder}, + config::app_config::AppConfig, data::{data_tests::save_graphs_to_work_dir, Data}, model::App, - url_encode::{url_decode_graph, url_encode_graph}, + url_encode::{url_decode_graph_at, url_encode_graph}, }; - use arrow_array::types::UInt8Type; use async_graphql::UploadValue; use dynamic_graphql::{Request, Variables}; use raphtory::{ @@ -42,6 +60,31 @@ mod graphql_test { }; use tempfile::tempdir; + #[cfg(feature = "search")] + use crate::config::app_config::AppConfigBuilder; + + #[tokio::test] + async fn test_copy_graph() { + let graph = Graph::new(); + graph.add_node(1, "test", NO_PROPS, None).unwrap(); + let tmp_dir = tempdir().unwrap(); + let data = Data::new(tmp_dir.path(), &AppConfig::default()); + let namespace = tmp_dir.path().join("test"); + fs::create_dir(&namespace).unwrap(); + graph.encode(namespace.join("g3")).unwrap(); + let schema = App::create_schema().data(data).finish().unwrap(); + let query = r#"mutation { + copyGraph( + path: "test/g3", + newPath: "test/g4", + ) + }"#; + + let req = Request::new(query); + let res = schema.execute(req).await; + assert_eq!(res.errors, []); + } + #[tokio::test] #[cfg(feature = "search")] async fn test_search_nodes_gql() { @@ -99,10 +142,9 @@ mod graphql_test { let graphs = HashMap::from([("master".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let config = AppConfigBuilder::new().with_create_index(true).build(); let data = Data::new(tmp_dir.path(), &config); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); let schema = App::create_schema().data(data).finish().unwrap(); @@ -199,9 +241,8 @@ mod graphql_test { let graph: MaterializedGraph = graph.into(); let graphs = HashMap::from([("lotr".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); let schema = App::create_schema().data(data).finish().unwrap(); @@ -310,9 +351,9 @@ mod graphql_test { let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); + let schema = App::create_schema().data(data).finish().unwrap(); let prop_has_key_filter = r#" { @@ -408,9 +449,9 @@ mod graphql_test { let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); + let schema = App::create_schema().data(data).finish().unwrap(); let prop_has_key_filter = r#" { @@ -449,6 +490,7 @@ mod graphql_test { #[tokio::test] async fn test_unique_temporal_properties() { + // TODO: this doesn't test anything? let g = Graph::new(); g.add_metadata([("name", "graph")]).unwrap(); g.add_properties(1, [("state", "abc")]).unwrap(); @@ -473,7 +515,8 @@ mod graphql_test { let graph: MaterializedGraph = g.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); + let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); let expected = json!({ "graph": { @@ -624,9 +667,9 @@ mod graphql_test { let g = g.into(); let graphs = HashMap::from([("graph".to_string(), g)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); + let schema = App::create_schema().data(data).finish().unwrap(); let prop_has_key_filter = r#" @@ -868,9 +911,9 @@ mod graphql_test { let graph = graph.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); + let schema = App::create_schema().data(data).finish().unwrap(); let prop_has_key_filter = r#" { @@ -953,7 +996,7 @@ mod graphql_test { let req = Request::new(list_nodes); let res = schema.execute(req).await; - assert_eq!(res.errors.len(), 0); + assert_eq!(res.errors, []); let res_json = res.data.into_json().unwrap(); assert_eq!( res_json, @@ -982,7 +1025,7 @@ mod graphql_test { )); let res = schema.execute(req).await; - assert_eq!(res.errors.len(), 0); + assert_eq!(res.errors, []); let res_json = res.data.into_json().unwrap(); assert_eq!(res_json, json!({"sendGraph": "test"})); @@ -1019,8 +1062,7 @@ mod graphql_test { let res_json = res.data.into_json().unwrap(); let graph_encoded = res_json.get("receiveGraph").unwrap().as_str().unwrap(); let temp_dir = tempdir().unwrap(); - let path_for_decoded_graph = Some(temp_dir.path()); - let graph_roundtrip = url_decode_graph(graph_encoded, path_for_decoded_graph) + let graph_roundtrip = url_decode_graph_at(graph_encoded, temp_dir.path()) .unwrap() .into_dynamic(); assert_eq!(g, graph_roundtrip); @@ -1047,9 +1089,9 @@ mod graphql_test { let graph = graph.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); + let schema = App::create_schema().data(data).finish().unwrap(); let req = r#" @@ -1188,9 +1230,8 @@ mod graphql_test { ("graph6".to_string(), graph6.into()), ]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); let schema = App::create_schema().data(data).finish().unwrap(); let req = r#" @@ -1409,9 +1450,8 @@ mod graphql_test { let graph = graph.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); - let data = Data::new(tmp_dir.path(), &AppConfig::default()); + save_graphs_to_work_dir(&data, &graphs).await.unwrap(); let schema = App::create_schema().data(data).finish().unwrap(); let req = r#" diff --git a/raphtory-graphql/src/model/graph/graph.rs b/raphtory-graphql/src/model/graph/graph.rs index 8b6222a614..72eac7fb3c 100644 --- a/raphtory-graphql/src/model/graph/graph.rs +++ b/raphtory-graphql/src/model/graph/graph.rs @@ -17,9 +17,10 @@ use crate::{ }, paths::ExistingGraphFolder, rayon::blocking_compute, + GQLError, }; use async_graphql::Context; -use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; +use dynamic_graphql::{ResolvedObject, ResolvedObjectFields, Result}; use itertools::Itertools; use raphtory::{ core::{ @@ -38,15 +39,18 @@ use raphtory::{ }, }, }, - errors::{GraphError, InvalidPathReason}, + errors::GraphError, prelude::*, }; use std::{ collections::HashSet, convert::{Into, TryInto}, - sync::Arc, }; +use crate::{ + graph::GraphWithVectors, + paths::{PathValidationError, ValidGraphPaths}, +}; #[cfg(feature = "search")] use raphtory::db::api::view::SearchableGraphOps; @@ -57,6 +61,12 @@ pub(crate) struct GqlGraph { graph: DynamicGraph, } +impl From for GqlGraph { + fn from(value: GraphWithVectors) -> Self { + GqlGraph::new(value.folder, value.graph) + } +} + impl GqlGraph { pub fn new(path: ExistingGraphFolder, graph: G) -> Self { Self { @@ -246,18 +256,18 @@ impl GqlGraph { //////////////////////// /// Returns the timestamp for the creation of the graph. - async fn created(&self) -> Result { - self.path.created_async().await + async fn created(&self) -> Result { + Ok(self.path.created_async().await?) } /// Returns the graph's last opened timestamp according to system time. - async fn last_opened(&self) -> Result { - self.path.last_opened_async().await + async fn last_opened(&self) -> Result { + Ok(self.path.last_opened_async().await?) } /// Returns the graph's last updated timestamp. - async fn last_updated(&self) -> Result { - self.path.last_updated_async().await + async fn last_updated(&self) -> Result { + Ok(self.path.last_updated_async().await?) } /// Returns the timestamp of the earliest activity in the graph. @@ -415,33 +425,22 @@ impl GqlGraph { //if someone write non-utf characters as a filename /// Returns the graph name. - async fn name(&self) -> Result { + async fn name(&self) -> Result { self.path.get_graph_name() } /// Returns path of graph. - async fn path(&self) -> Result { - Ok(self - .path - .get_original_path() - .to_str() - .ok_or(InvalidPathReason::PathNotParsable( - self.path.to_error_path(), - ))? - .to_owned()) + async fn path(&self) -> String { + self.path.local_path_string() } /// Returns namespace of graph. - async fn namespace(&self) -> Result { - Ok(self - .path - .get_original_path() - .parent() - .and_then(|p| p.to_str().map(|s| s.to_string())) - .ok_or(InvalidPathReason::PathNotParsable( - self.path.to_error_path(), - ))? - .to_owned()) + async fn namespace(&self) -> String { + self.path + .local_path() + .rsplit_once("/") + .map_or("", |(prefix, _)| prefix) + .to_string() } /// Returns the graph schema. @@ -486,13 +485,9 @@ impl GqlGraph { } /// Export all nodes and edges from this graph view to another existing graph - async fn export_to<'a>( - &self, - ctx: &Context<'a>, - path: String, - ) -> Result> { + async fn export_to<'a>(&self, ctx: &Context<'a>, path: String) -> Result { let data = ctx.data_unchecked::(); - let other_g = data.get_graph(path.as_ref()).await?.0; + let other_g = data.get_graph(path.as_ref()).await?.graph; let g = self.graph.clone(); blocking_compute(move || { other_g.import_nodes(g.nodes(), true)?; diff --git a/raphtory-graphql/src/model/graph/meta_graph.rs b/raphtory-graphql/src/model/graph/meta_graph.rs index 72316bf2b2..57590581e6 100644 --- a/raphtory-graphql/src/model/graph/meta_graph.rs +++ b/raphtory-graphql/src/model/graph/meta_graph.rs @@ -1,6 +1,16 @@ -use crate::{model::graph::property::GqlProperty, paths::ExistingGraphFolder}; -use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; -use raphtory::{errors::GraphError, serialise::metadata::GraphMetadata}; +use crate::{ + data::Data, + graph::GraphWithVectors, + model::graph::property::GqlProperty, + paths::{ExistingGraphFolder, ValidGraphPaths}, +}; +use async_graphql::Context; +use dynamic_graphql::{ResolvedObject, ResolvedObjectFields, Result}; +use raphtory::{ + db::api::storage::storage::{Extension, PersistentStrategy}, + prelude::{GraphViewOps, PropertiesOps}, + serialise::{metadata::GraphMetadata, parquet::decode_graph_metadata}, +}; use std::{cmp::Ordering, sync::Arc}; use tokio::sync::OnceCell; @@ -39,10 +49,11 @@ impl MetaGraph { } } - async fn meta(&self) -> Result<&GraphMetadata, GraphError> { - self.meta + async fn meta(&self) -> Result<&GraphMetadata> { + Ok(self + .meta .get_or_try_init(|| self.folder.read_metadata_async()) - .await + .await?) } } @@ -56,26 +67,26 @@ impl MetaGraph { /// Returns path of graph. async fn path(&self) -> String { - self.folder.get_original_path_str().to_owned() + self.folder.local_path_string() } /// Returns the timestamp for the creation of the graph. - async fn created(&self) -> Result { - self.folder.created_async().await + async fn created(&self) -> Result { + Ok(self.folder.created_async().await?) } /// Returns the graph's last opened timestamp according to system time. - async fn last_opened(&self) -> Result { - self.folder.last_opened_async().await + async fn last_opened(&self) -> Result { + Ok(self.folder.last_opened_async().await?) } /// Returns the graph's last updated timestamp. - async fn last_updated(&self) -> Result { - self.folder.last_updated_async().await + async fn last_updated(&self) -> Result { + Ok(self.folder.last_updated_async().await?) } /// Returns the number of nodes in the graph. - async fn node_count(&self) -> Result { + async fn node_count(&self) -> Result { Ok(self.meta().await?.node_count) } @@ -83,18 +94,31 @@ impl MetaGraph { /// /// Returns: /// int: - async fn edge_count(&self) -> Result { + async fn edge_count(&self) -> Result { Ok(self.meta().await?.edge_count) } /// Returns the metadata of the graph. - async fn metadata(&self) -> Result, GraphError> { - Ok(self - .meta() - .await? - .metadata - .iter() - .map(|(key, prop)| GqlProperty::new(key.to_string(), prop.clone())) - .collect()) + async fn metadata(&self, ctx: &Context<'_>) -> Result> { + let data: &Data = ctx.data_unchecked(); + let maybe_cached = if Extension::disk_storage_enabled() { + let graph = data.get_graph(self.folder.local_path()).await?; + Some(graph) + } else { + data.get_cached_graph(self.folder.local_path()).await + }; + let res = match maybe_cached { + None => decode_graph_metadata(self.folder.graph_folder())? + .into_iter() + .filter_map(|(key, value)| value.map(|prop| GqlProperty::new(key, prop))) + .collect(), + Some(graph) => graph + .graph + .metadata() + .iter() + .filter_map(|(key, value)| value.map(|prop| GqlProperty::new(key.into(), prop))) + .collect(), + }; + Ok(res) } } diff --git a/raphtory-graphql/src/model/graph/mod.rs b/raphtory-graphql/src/model/graph/mod.rs index 95b6802854..5e49aaba70 100644 --- a/raphtory-graphql/src/model/graph/mod.rs +++ b/raphtory-graphql/src/model/graph/mod.rs @@ -11,7 +11,7 @@ pub(crate) mod index; pub(crate) mod meta_graph; pub(crate) mod mutable_graph; pub(crate) mod namespace; -mod namespaced_item; +pub(crate) mod namespaced_item; pub(crate) mod node; mod nodes; mod path_from_node; diff --git a/raphtory-graphql/src/model/graph/mutable_graph.rs b/raphtory-graphql/src/model/graph/mutable_graph.rs index e4b77c0abe..4803866797 100644 --- a/raphtory-graphql/src/model/graph/mutable_graph.rs +++ b/raphtory-graphql/src/model/graph/mutable_graph.rs @@ -1,8 +1,6 @@ use crate::{ - data::Data, graph::{GraphWithVectors, UpdateEmbeddings}, model::graph::{edge::GqlEdge, graph::GqlGraph, node::GqlNode, property::Value}, - paths::ExistingGraphFolder, rayon::blocking_write, }; use dynamic_graphql::{InputObject, ResolvedObject, ResolvedObjectFields}; @@ -114,18 +112,12 @@ pub struct EdgeAddition { #[derive(ResolvedObject, Clone)] #[graphql(name = "MutableGraph")] pub struct GqlMutableGraph { - path: ExistingGraphFolder, graph: GraphWithVectors, - data: Data, } -impl GqlMutableGraph { - pub(crate) fn new(path: ExistingGraphFolder, graph: GraphWithVectors, data: Data) -> Self { - Self { - path: path.into(), - graph, - data, - } +impl From for GqlMutableGraph { + fn from(graph: GraphWithVectors) -> Self { + Self { graph } } } @@ -147,14 +139,12 @@ fn as_properties( impl GqlMutableGraph { /// Get the non-mutable graph. async fn graph(&self) -> GqlGraph { - GqlGraph::new(self.path.clone(), self.graph.graph.clone()) + GqlGraph::new(self.graph.folder.clone(), self.graph.graph.clone()) } /// Get mutable existing node. async fn node(&self, name: String) -> Option { - self.graph - .node(name) - .map(|n| GqlMutableNode::new(n, self.path.clone(), self.data.clone())) + self.graph.node(name).map(|n| GqlMutableNode::new(n)) } /// Add a new node or add updates to an existing node. @@ -179,11 +169,7 @@ impl GqlMutableGraph { self.post_mutation_ops().await; let _ = node.update_embeddings().await; - Ok(GqlMutableNode::new( - node, - self.path.clone(), - self.data.clone(), - )) + Ok(GqlMutableNode::new(node)) } /// Create a new node or fail if it already exists. @@ -208,11 +194,7 @@ impl GqlMutableGraph { self.post_mutation_ops().await; let _ = node.update_embeddings().await; - Ok(GqlMutableNode::new( - node, - self.path.clone(), - self.data.clone(), - )) + Ok(GqlMutableNode::new(node)) } /// Add a batch of nodes @@ -261,9 +243,7 @@ impl GqlMutableGraph { /// Get a mutable existing edge. async fn edge(&self, src: String, dst: String) -> Option { - self.graph - .edge(src, dst) - .map(|e| GqlMutableEdge::new(e, self.path.clone(), self.data.clone())) + self.graph.edge(src, dst).map(|e| GqlMutableEdge::new(e)) } /// Add a new edge or add updates to an existing edge. @@ -289,11 +269,7 @@ impl GqlMutableGraph { self.post_mutation_ops().await; let _ = edge.update_embeddings().await; - Ok(GqlMutableEdge::new( - edge, - self.path.clone(), - self.data.clone(), - )) + Ok(GqlMutableEdge::new(edge)) } /// Add a batch of edges @@ -358,11 +334,7 @@ impl GqlMutableGraph { self.post_mutation_ops().await; let _ = edge.update_embeddings().await; - Ok(GqlMutableEdge::new( - edge, - self.path.clone(), - self.data.clone(), - )) + Ok(GqlMutableEdge::new(edge)) } /// Add temporal properties to graph. @@ -439,13 +411,6 @@ impl GqlMutableGraph { /// Post mutation operations. async fn post_mutation_ops(&self) { self.graph.set_dirty(true); - - // Reinsert the graph into the cache to reset eviction priority. - // This prevents data loss if the graph is evicted after the mutation but before - // the `set_dirty` call. - self.data - .insert_graph_into_cache(self.path.get_original_path_str(), self.graph.clone()) - .await; } } @@ -453,17 +418,11 @@ impl GqlMutableGraph { #[graphql(name = "MutableNode")] pub struct GqlMutableNode { node: NodeView<'static, GraphWithVectors>, - path: ExistingGraphFolder, - data: Data, } impl GqlMutableNode { - pub fn new( - node: NodeView<'static, GraphWithVectors>, - path: ExistingGraphFolder, - data: Data, - ) -> Self { - Self { node, path, data } + pub fn new(node: NodeView<'static, GraphWithVectors>) -> Self { + Self { node } } } @@ -550,13 +509,6 @@ impl GqlMutableNode { /// Post mutation operations. async fn post_mutation_ops(&self) { self.node.graph.set_dirty(true); - - // Reinsert the graph into the cache to reset eviction priority. - // This prevents data loss if the graph is evicted after the mutation but before - // the `set_dirty` call. - self.data - .insert_graph_into_cache(self.path.get_original_path_str(), self.node.graph.clone()) - .await; } } @@ -564,13 +516,11 @@ impl GqlMutableNode { #[graphql(name = "MutableEdge")] pub struct GqlMutableEdge { edge: EdgeView, - path: ExistingGraphFolder, - data: Data, } impl GqlMutableEdge { - pub fn new(edge: EdgeView, path: ExistingGraphFolder, data: Data) -> Self { - Self { edge, path, data } + pub fn new(edge: EdgeView) -> Self { + Self { edge } } } @@ -588,12 +538,12 @@ impl GqlMutableEdge { /// Get the mutable source node of the edge. async fn src(&self) -> GqlMutableNode { - GqlMutableNode::new(self.edge.src(), self.path.clone(), self.data.clone()) + GqlMutableNode::new(self.edge.src()) } /// Get the mutable destination node of the edge. async fn dst(&self) -> GqlMutableNode { - GqlMutableNode::new(self.edge.dst(), self.path.clone(), self.data.clone()) + GqlMutableNode::new(self.edge.dst()) } /// Mark the edge as deleted at time time. @@ -694,13 +644,6 @@ impl GqlMutableEdge { /// Post mutation operations. async fn post_mutation_ops(&self) { self.edge.graph.set_dirty(true); - - // Reinsert the graph into the cache to reset eviction priority. - // This prevents data loss if the graph is evicted after the mutation but before - // the `set_dirty` call. - self.data - .insert_graph_into_cache(self.path.get_original_path_str(), self.edge.graph.clone()) - .await; } } @@ -740,7 +683,7 @@ mod tests { graph.into() } - async fn create_mutable_graph() -> (GqlMutableGraph, tempfile::TempDir) { + async fn create_mutable_graph() -> (GqlMutableGraph, Data, tempfile::TempDir) { let graph = create_test_graph(); let tmp_dir = tempdir().unwrap(); @@ -758,18 +701,17 @@ mod tests { let folder = data .validate_path_for_insert("test_graph", overwrite) .unwrap(); + data.insert_graph(folder.clone(), graph).await.unwrap(); - data.insert_graph(folder, graph).await.unwrap(); - - let (graph_with_vectors, path) = data.get_graph("test_graph").await.unwrap(); - let mutable_graph = GqlMutableGraph::new(path, graph_with_vectors, data.clone()); + let graph_with_vectors = data.get_graph("test_graph").await.unwrap(); + let mutable_graph = GqlMutableGraph::from(graph_with_vectors); - (mutable_graph, tmp_dir) + (mutable_graph, data, tmp_dir) } + #[tokio::test] - #[ignore = "TODO"] async fn test_add_nodes_empty_list() { - let (mutable_graph, _tmp_dir) = create_mutable_graph().await; + let (mutable_graph, _data, _tmp_dir) = create_mutable_graph().await; let nodes = vec![]; let result = mutable_graph.add_nodes(nodes).await; @@ -781,7 +723,7 @@ mod tests { #[tokio::test] #[ignore = "TODO: #2384"] async fn test_add_nodes_simple() { - let (mutable_graph, _tmp_dir) = create_mutable_graph().await; + let (mutable_graph, _data, _tmp_dir) = create_mutable_graph().await; let nodes = vec![ NodeAddition { @@ -810,23 +752,23 @@ mod tests { assert!(result.unwrap()); // TODO: #2380 (embeddings aren't working right now) - // let query = "node1".to_string(); - // let embedding = &fake_embedding(vec![query]).await.unwrap().remove(0); - // let limit = 5; - // let result = mutable_graph - // .graph - // .vectors - // .unwrap() - // .nodes_by_similarity(embedding, limit, None); - // - // assert!(result.is_ok()); - // assert!(result.unwrap().get_documents().unwrap().len() == 2); + let query = "node1".to_string(); + let embedding = &fake_embedding(vec![query]).await.unwrap().remove(0); + let limit = 5; + let result = mutable_graph + .graph + .vectors + .unwrap() + .nodes_by_similarity(embedding, limit, None); + + assert!(result.is_ok()); + assert!(result.unwrap().get_documents().unwrap().len() == 2); } #[tokio::test] #[ignore = "TODO: #2384"] async fn test_add_nodes_with_properties() { - let (mutable_graph, _tmp_dir) = create_mutable_graph().await; + let (mutable_graph, _data, _tmp_dir) = create_mutable_graph().await; let nodes = vec![ NodeAddition { @@ -898,7 +840,7 @@ mod tests { #[tokio::test] #[ignore = "TODO: #2384"] async fn test_add_edges_simple() { - let (mutable_graph, _tmp_dir) = create_mutable_graph().await; + let (mutable_graph, _data, _tmp_dir) = create_mutable_graph().await; // First add some nodes. let nodes = vec![ diff --git a/raphtory-graphql/src/model/graph/namespace.rs b/raphtory-graphql/src/model/graph/namespace.rs index b6001a2651..191e402d3a 100644 --- a/raphtory-graphql/src/model/graph/namespace.rs +++ b/raphtory-graphql/src/model/graph/namespace.rs @@ -3,73 +3,141 @@ use crate::{ model::graph::{ collection::GqlCollection, meta_graph::MetaGraph, namespaced_item::NamespacedItem, }, - paths::{valid_path, ExistingGraphFolder}, + paths::{valid_path, ExistingGraphFolder, PathValidationError, ValidPath}, rayon::blocking_compute, }; use dynamic_graphql::{ResolvedObject, ResolvedObjectFields}; use itertools::Itertools; -use raphtory::errors::InvalidPathReason; use std::path::PathBuf; use walkdir::WalkDir; #[derive(ResolvedObject, Clone, Ord, Eq, PartialEq, PartialOrd)] pub(crate) struct Namespace { - base_dir: PathBuf, - current_dir: PathBuf, + current_dir: PathBuf, // always validated + relative_path: String, // relative to the root working directory +} + +pub struct NamespaceIter { + it: walkdir::IntoIter, + root: Namespace, +} + +impl Iterator for NamespaceIter { + type Item = NamespacedItem; + + fn next(&mut self) -> Option { + loop { + match self.it.next() { + None => return None, + Some(Ok(entry)) => { + let path = entry.path(); + if path.is_dir() { + match get_relative_path(&self.root.current_dir, path) { + Ok(relative) => { + match self.root.try_new_child(&relative) { + Ok(child) => { + match &child { + NamespacedItem::Namespace(_) => {} + NamespacedItem::MetaGraph(_) => { + self.it.skip_current_dir() // graphs should not be traversed further + } + } + return Some(child); + } + Err(_) => { + self.it.skip_current_dir() // not a valid path + } + } + } + Err(_) => { + self.it.skip_current_dir() // not a valid path and shouldn't be traversed further} + } + } + } + } + _ => {} // skip errors + }; + } + } } impl Namespace { - pub fn new(base_dir: PathBuf, current_dir: PathBuf) -> Self { + pub fn root(root: PathBuf) -> Self { Self { - base_dir, - current_dir, + current_dir: root, + relative_path: "".to_owned(), } } - fn get_all_children(&self) -> impl Iterator + use<'_> { + pub fn try_new(root: PathBuf, relative_path: String) -> Result { + let current_dir = valid_path(root, relative_path.as_str())?; + Self::try_from_valid(current_dir, &relative_path) + } + + /// Create a namespace from a valid path if it exists and is a namespace + pub fn try_from_valid( + current_dir: ValidPath, + relative_path: impl Into, + ) -> Result { + if current_dir.is_namespace() { + Ok(Self { + current_dir: current_dir.into_path(), + relative_path: relative_path.into(), + }) + } else { + Err(PathValidationError::NamespaceDoesNotExist( + relative_path.into(), + )) + } + } + + pub fn new_child_namespace(&self, relative_path: &str) -> Result { + let current_dir = valid_path(self.current_dir.clone(), relative_path)?; + let relative_path = [&self.relative_path, relative_path].join("/"); + Self::try_from_valid(current_dir, relative_path) + } + + pub fn try_new_child(&self, file_name: &str) -> Result { + let current_dir = valid_path(self.current_dir.clone(), file_name)?; + let relative_path = if self.relative_path.is_empty() { + file_name.to_owned() + } else { + [&self.relative_path, file_name].join("/") + }; + let child = if current_dir.is_namespace() { + NamespacedItem::Namespace(Self::try_from_valid(current_dir, relative_path)?) + } else { + NamespacedItem::MetaGraph(MetaGraph::new(ExistingGraphFolder::try_from_valid( + current_dir, + &relative_path, + )?)) + }; + Ok(child) + } + + /// Non-recursively list children + pub fn get_children(&self) -> impl Iterator + use<'_> { WalkDir::new(&self.current_dir) + .min_depth(1) .max_depth(1) .into_iter() .flatten() .filter_map(|entry| { let path = entry.path(); - let file_name = entry.file_name().to_str()?; if path.is_dir() { - if path != self.current_dir - && valid_path(self.current_dir.clone(), file_name, true).is_ok() - { - Some(NamespacedItem::Namespace(Namespace::new( - self.base_dir.clone(), - path.to_path_buf(), - ))) - } else { - let base_path = self.base_dir.clone(); - let relative = get_relative_path(base_path.clone(), path, false).ok()?; - let folder = - ExistingGraphFolder::try_from(base_path.clone(), &relative).ok()?; - Some(NamespacedItem::MetaGraph(MetaGraph::new(folder))) - } + let file_name = entry.file_name().to_str()?; + self.try_new_child(file_name).ok() } else { None } }) } - pub(crate) fn get_all_namespaces(&self) -> Vec { - let base_path = self.base_dir.clone(); - WalkDir::new(&self.current_dir) - .into_iter() - .filter_map(|e| { - let entry = e.ok()?; - let path = entry.path(); - if path.is_dir() && get_relative_path(base_path.clone(), path, true).is_ok() { - Some(Namespace::new(self.base_dir.clone(), path.to_path_buf())) - } else { - None - } - }) - .sorted() - .collect() + /// Recursively list all children + pub fn get_all_children(&self) -> impl Iterator { + let it = WalkDir::new(&self.current_dir).into_iter(); + let root = self.clone(); + NamespaceIter { it, root } } } @@ -80,7 +148,7 @@ impl Namespace { blocking_compute(move || { GqlCollection::new( self_clone - .get_all_children() + .get_children() .into_iter() .filter_map(|g| match g { NamespacedItem::MetaGraph(g) => Some(g), @@ -92,16 +160,23 @@ impl Namespace { }) .await } - async fn path(&self) -> Result { - get_relative_path(self.base_dir.clone(), self.current_dir.as_path(), true) + async fn path(&self) -> String { + self.relative_path.clone() } async fn parent(&self) -> Option { - let parent = self.current_dir.parent()?.to_path_buf(); - if parent.starts_with(&self.base_dir) { - Some(Namespace::new(self.base_dir.clone(), parent)) - } else { + if self.relative_path.is_empty() { None + } else { + let parent = self.current_dir.parent()?.to_path_buf(); + let relative_path = self + .relative_path + .rsplit_once("/") + .map_or("", |(parent, _)| parent); + Some(Self { + current_dir: parent, + relative_path: relative_path.to_owned(), + }) } } @@ -110,7 +185,7 @@ impl Namespace { blocking_compute(move || { GqlCollection::new( self_clone - .get_all_children() + .get_children() .filter_map(|item| match item { NamespacedItem::MetaGraph(_) => None, NamespacedItem::Namespace(n) => Some(n), @@ -126,9 +201,7 @@ impl Namespace { // Namespaces will be listed before graphs. async fn items(&self) -> GqlCollection { let self_clone = self.clone(); - blocking_compute(move || { - GqlCollection::new(self_clone.get_all_children().sorted().collect()) - }) - .await + blocking_compute(move || GqlCollection::new(self_clone.get_children().sorted().collect())) + .await } } diff --git a/raphtory-graphql/src/model/graph/namespaced_item.rs b/raphtory-graphql/src/model/graph/namespaced_item.rs index 1f8e87bb13..8d315eebf7 100644 --- a/raphtory-graphql/src/model/graph/namespaced_item.rs +++ b/raphtory-graphql/src/model/graph/namespaced_item.rs @@ -5,7 +5,7 @@ use dynamic_graphql::Union; // This is useful for when fetching a collection of both for the purposes of displaying all such // items, paged. #[derive(Union, Clone, PartialOrd, PartialEq, Ord, Eq)] -pub(crate) enum NamespacedItem { +pub enum NamespacedItem { /// Namespace. Namespace(Namespace), /// Metagraph. diff --git a/raphtory-graphql/src/model/mod.rs b/raphtory-graphql/src/model/mod.rs index 85b724927c..baee486526 100644 --- a/raphtory-graphql/src/model/mod.rs +++ b/raphtory-graphql/src/model/mod.rs @@ -1,29 +1,33 @@ use crate::{ auth::ContextValidation, - data::Data, + data::{Data, DeletionError}, model::{ graph::{ collection::GqlCollection, graph::GqlGraph, index::IndexSpecInput, - mutable_graph::GqlMutableGraph, namespace::Namespace, + mutable_graph::GqlMutableGraph, namespace::Namespace, namespaced_item::NamespacedItem, vectorised_graph::GqlVectorisedGraph, }, plugins::{mutation_plugin::MutationPlugin, query_plugin::QueryPlugin}, }, - paths::valid_path, + paths::{ValidGraphPaths, ValidWriteableGraphFolder}, rayon::blocking_compute, - url_encode::{url_decode_graph, url_encode_graph}, + url_encode::{url_decode_graph_at, url_encode_graph}, }; use async_graphql::Context; use dynamic_graphql::{ App, Enum, Mutation, MutationFields, MutationRoot, ResolvedObject, ResolvedObjectFields, Result, Upload, }; +use itertools::Itertools; use raphtory::{ db::{ - api::view::{internal::InternalStorageOps, MaterializedGraph}, + api::{ + storage::storage::{Extension, PersistentStrategy}, + view::MaterializedGraph, + }, graph::views::deletion_graph::PersistentGraph, }, - errors::{GraphError, InvalidPathReason}, + errors::GraphError, prelude::*, serialise::*, version, @@ -31,8 +35,6 @@ use raphtory::{ use std::{ error::Error, fmt::{Display, Formatter}, - path::PathBuf, - sync::Arc, }; pub(crate) mod graph; @@ -97,10 +99,7 @@ impl QueryRoot { /// Returns a graph async fn graph<'a>(ctx: &Context<'a>, path: &str) -> Result { let data = ctx.data_unchecked::(); - Ok(data - .get_graph(path) - .await - .map(|(g, folder)| GqlGraph::new(folder, g.graph))?) + Ok(data.get_graph(path).await?.into()) } /// Update graph query, has side effects to update graph state @@ -110,10 +109,7 @@ impl QueryRoot { ctx.require_write_access()?; let data = ctx.data_unchecked::(); - let graph = data - .get_graph(path.as_ref()) - .await - .map(|(g, folder)| GqlMutableGraph::new(folder, g, data.clone()))?; + let graph = data.get_graph(path.as_ref()).await?.into(); Ok(graph) } @@ -123,7 +119,7 @@ impl QueryRoot { /// Returns:: GqlVectorisedGraph async fn vectorised_graph<'a>(ctx: &Context<'a>, path: &str) -> Option { let data = ctx.data_unchecked::(); - let g = data.get_graph(path).await.ok()?.0.vectors?; + let g = data.get_graph(path).await.ok()?.vectors?; Some(g.into()) } @@ -132,25 +128,26 @@ impl QueryRoot { /// Returns:: List of namespaces on root async fn namespaces<'a>(ctx: &Context<'a>) -> GqlCollection { let data = ctx.data_unchecked::(); - let root = Namespace::new(data.work_dir.clone(), data.work_dir.clone()); - GqlCollection::new(root.get_all_namespaces().into()) + let root = Namespace::root(data.work_dir.clone()); + let list = blocking_compute(move || { + root.get_all_children() + .filter_map(|child| match child { + NamespacedItem::Namespace(item) => Some(item), + NamespacedItem::MetaGraph(_) => None, + }) + .sorted() + .collect() + }) + .await; + GqlCollection::new(list) } /// Returns a specific namespace at a given path /// /// Returns:: Namespace or error if no namespace found - async fn namespace<'a>( - ctx: &Context<'a>, - path: String, - ) -> Result { + async fn namespace<'a>(ctx: &Context<'a>, path: String) -> Result { let data = ctx.data_unchecked::(); - let current_dir = valid_path(data.work_dir.clone(), path.as_str(), true)?; - - if current_dir.exists() { - Ok(Namespace::new(data.work_dir.clone(), current_dir)) - } else { - Err(InvalidPathReason::NamespaceDoesNotExist(path)) - } + Ok(Namespace::try_new(data.work_dir.clone(), path)?) } /// Returns root namespace @@ -158,7 +155,7 @@ impl QueryRoot { /// Returns:: Root namespace async fn root<'a>(ctx: &Context<'a>) -> Namespace { let data = ctx.data_unchecked::(); - Namespace::new(data.work_dir.clone(), data.work_dir.clone()) + Namespace::root(data.work_dir.clone()) } /// Returns a plugin. @@ -169,10 +166,10 @@ impl QueryRoot { /// Encodes graph and returns as string /// /// Returns:: Base64 url safe encoded string - async fn receive_graph<'a>(ctx: &Context<'a>, path: String) -> Result> { + async fn receive_graph<'a>(ctx: &Context<'a>, path: String) -> Result { let path = path.as_ref(); let data = ctx.data_unchecked::(); - let g = data.get_graph(path).await?.0.graph.clone(); + let g = data.get_graph(path).await?.graph.clone(); let res = url_encode_graph(g)?; Ok(res) } @@ -197,7 +194,7 @@ impl Mut { /// Delete graph from a path on the server. // If namespace is not provided, it will be set to the current working directory. - async fn delete_graph<'a>(ctx: &Context<'a>, path: String) -> Result { + async fn delete_graph<'a>(ctx: &Context<'a>, path: String) -> Result { let data = ctx.data_unchecked::(); data.delete_graph(&path).await?; Ok(true) @@ -212,10 +209,17 @@ impl Mut { let data = ctx.data_unchecked::(); let overwrite = false; let folder = data.validate_path_for_insert(&path, overwrite)?; - let path = folder.get_graph_path(); - let graph: MaterializedGraph = match graph_type { - GqlGraphType::Persistent => PersistentGraph::new_at_path(path).into(), - GqlGraphType::Event => Graph::new_at_path(path).into(), + let graph_path = folder.graph_folder(); + let graph: MaterializedGraph = if Extension::disk_storage_enabled() { + match graph_type { + GqlGraphType::Persistent => PersistentGraph::new_at_path(graph_path)?.into(), + GqlGraphType::Event => Graph::new_at_path(graph_path)?.into(), + } + } else { + match graph_type { + GqlGraphType::Persistent => PersistentGraph::new().into(), + GqlGraphType::Event => Graph::new().into(), + } }; data.insert_graph(folder, graph).await?; @@ -223,29 +227,33 @@ impl Mut { Ok(true) } - /// Move graph from a path path on the server to a new_path on the server. - /// - /// If namespace is not provided, it will be set to the current working directory. - /// This applies to both the graph namespace and new graph namespace. - async fn move_graph<'a>(ctx: &Context<'a>, path: &str, new_path: &str) -> Result { - Self::copy_graph(ctx, path, new_path).await?; + /// Move graph from a path on the server to a new_path on the server. + async fn move_graph<'a>( + ctx: &Context<'a>, + path: &str, + new_path: &str, + overwrite: Option, + ) -> Result { + Self::copy_graph(ctx, path, new_path, overwrite).await?; let data = ctx.data_unchecked::(); data.delete_graph(path).await?; Ok(true) } - /// Copy graph from a path path on the server to a new_path on the server. - /// - /// If namespace is not provided, it will be set to the current working directory. - /// This applies to both the graph namespace and new graph namespace. - async fn copy_graph<'a>(ctx: &Context<'a>, path: &str, new_path: &str) -> Result { + /// Copy graph from a path on the server to a new_path on the server. + async fn copy_graph<'a>( + ctx: &Context<'a>, + path: &str, + new_path: &str, + overwrite: Option, + ) -> Result { // doing this in a more efficient way is not trivial, this at least is correct // there are questions like, maybe the new vectorised graph have different rules // for the templates or if it needs to be vectorised at all + let overwrite = overwrite.unwrap_or(false); let data = ctx.data_unchecked::(); - let overwrite = false; + let graph = data.get_graph(path).await?.graph; let folder = data.validate_path_for_insert(new_path, overwrite)?; - let graph = data.get_graph(path).await?.0.graph; data.insert_graph(folder, graph).await?; Ok(true) @@ -264,11 +272,6 @@ impl Mut { let data = ctx.data_unchecked::(); let in_file = graph.value(ctx)?.content; let folder = data.validate_path_for_insert(&path, overwrite)?; - - if overwrite { - let _ignored = data.delete_graph(&path).await; - } - data.insert_graph_as_bytes(folder, in_file).await?; Ok(path) @@ -285,14 +288,12 @@ impl Mut { overwrite: bool, ) -> Result { let data = ctx.data_unchecked::(); - let folder = data.validate_path_for_insert(path, overwrite)?; - let path_for_decoded_graph = Some(folder.get_graph_path()); - let g: MaterializedGraph = url_decode_graph(graph, path_for_decoded_graph.as_deref())?; - - if overwrite { - let _ignored = data.delete_graph(path).await; - } - + let folder = if overwrite { + ValidWriteableGraphFolder::try_existing_or_new(data.work_dir.clone(), path)? + } else { + ValidWriteableGraphFolder::try_new(data.work_dir.clone(), path)? + }; + let g: MaterializedGraph = url_decode_graph_at(graph, folder.graph_folder())?; data.insert_graph(folder, g).await?; Ok(path.to_owned()) } @@ -309,14 +310,18 @@ impl Mut { overwrite: bool, ) -> Result { let data = ctx.data_unchecked::(); - let parent_graph = data.get_graph(parent_path).await?.0.graph; - let new_subgraph = - blocking_compute(move || parent_graph.subgraph(nodes).materialize()).await?; let folder = data.validate_path_for_insert(&new_path, overwrite)?; - - if overwrite { - let _ignored = data.delete_graph(&new_path).await; - } + let parent_graph = data.get_graph(parent_path).await?.graph; + let folder_clone = folder.clone(); + let new_subgraph = blocking_compute(move || { + let subgraph = parent_graph.subgraph(nodes); + if Extension::disk_storage_enabled() { + subgraph.materialize_at(folder_clone.graph_folder()) + } else { + subgraph.materialize() + } + }) + .await?; data.insert_graph(folder, new_subgraph).await?; Ok(new_path) @@ -332,7 +337,7 @@ impl Mut { #[cfg(feature = "search")] { let data = ctx.data_unchecked::(); - let graph = data.get_graph(path).await?.0.graph; + let graph = data.get_graph(path).await?.graph; match index_spec { Some(index_spec) => { let index_spec = index_spec.to_index_spec(graph.clone())?; diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index aa21703882..4f494c70f1 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -1,198 +1,683 @@ -use crate::rayon::blocking_compute; +use crate::{data::DIRTY_PATH, model::blocking_io, rayon::blocking_compute}; +use futures_util::io; use raphtory::{ + db::api::{ + storage::storage::{Extension, PersistentStrategy}, + view::{internal::InternalStorageOps, MaterializedGraph}, + }, errors::{GraphError, InvalidPathReason}, - serialise::{metadata::GraphMetadata, GraphFolder, META_PATH}, + prelude::GraphViewOps, + serialise::{ + metadata::GraphMetadata, GraphFolder, GraphPaths, RelativePath, StableDecode, + WriteableGraphFolder, META_PATH, + }, }; use std::{ + cmp::Ordering, + ffi::OsStr, fs, + fs::File, + io::{ErrorKind, Read, Seek, Write}, ops::Deref, - path::{Component, Path, PathBuf}, + path::{Component, Path, PathBuf, StripPrefixError}, time::{SystemTime, UNIX_EPOCH}, }; +use tracing::{error, warn}; +use zip::ZipArchive; -#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] -pub struct ExistingGraphFolder { - folder: ValidGraphFolder, +pub trait ValidGraphPaths { + fn local_path(&self) -> &str; + + fn graph_folder(&self) -> &impl GraphPaths; + + fn local_path_string(&self) -> String { + self.local_path().to_owned() + } + + fn with_internal_errors( + &self, + fun: impl FnOnce() -> R, + ) -> Result { + fun().with_path(self.local_path()) + } } -impl Deref for ExistingGraphFolder { - type Target = ValidGraphFolder; +pub struct ValidPath(PathBuf); - fn deref(&self) -> &Self::Target { - &self.folder +impl ValidPath { + /// path exists and is a graph + pub fn is_graph(&self) -> bool { + self.0.exists() && self.0.join(META_PATH).exists() + } + + /// path exists and is a namespace + pub fn is_namespace(&self) -> bool { + self.0.exists() && !self.0.join(META_PATH).exists() + } + + pub fn into_path(self) -> PathBuf { + self.0 } } -impl From for GraphFolder { - fn from(value: ValidGraphFolder) -> Self { - value.folder +#[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] +pub struct ExistingGraphFolder(pub(crate) ValidGraphFolder); + +impl ValidGraphPaths for ExistingGraphFolder { + fn local_path(&self) -> &str { + self.0.local_path() + } + + fn graph_folder(&self) -> &impl GraphPaths { + self.0.graph_folder() } } -impl From for GraphFolder { - fn from(value: ExistingGraphFolder) -> Self { - value.folder.folder +impl Deref for ExistingGraphFolder { + type Target = ValidGraphFolder; + + fn deref(&self) -> &Self::Target { + &self.0 } } + impl ExistingGraphFolder { - pub(crate) fn try_from(base_path: PathBuf, relative_path: &str) -> Result { - let graph_folder = ValidGraphFolder::try_from(base_path, relative_path)?; + pub fn try_from(base_path: PathBuf, relative_path: &str) -> Result { + let path = valid_path(base_path, relative_path)?; + Self::try_from_valid(path, relative_path) + } + pub fn try_from_valid( + base_path: ValidPath, + relative_path: &str, + ) -> Result { + let graph_folder: GraphFolder = base_path.into_path().into(); if graph_folder.is_reserved() { - Ok(Self { - folder: graph_folder, - }) + Ok(Self(ValidGraphFolder { + global_path: graph_folder, + local_path: relative_path.to_string(), + })) } else { - Err(GraphError::GraphNotFound(graph_folder.to_error_path())) + Err(PathValidationError::GraphNotExistsError( + relative_path.to_string(), + )) } } - pub(crate) fn get_graph_name(&self) -> Result { - let path = &self.get_base_path(); - let last_component: Component = path.components().last().ok_or_else(|| { - GraphError::from(InvalidPathReason::PathNotParsable(self.to_error_path())) - })?; - match last_component { - Component::Normal(value) => { - value - .to_str() - .map(|s| s.to_string()) - .ok_or(GraphError::from(InvalidPathReason::PathNotParsable( - self.to_error_path(), - ))) - } - Component::Prefix(_) - | Component::RootDir - | Component::CurDir - | Component::ParentDir => Err(GraphError::from(InvalidPathReason::PathNotParsable( - self.to_error_path(), - ))), - } + fn replace_graph_data_inner( + &self, + graph: MaterializedGraph, + ) -> Result<(), InternalPathValidationError> { + self.global_path.data_path()?.replace_graph(graph)?; + Ok(()) + } + pub fn replace_graph_data(&self, graph: MaterializedGraph) -> Result<(), PathValidationError> { + self.replace_graph_data_inner(graph) + .with_path(self.local_path()) } } #[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] pub struct ValidGraphFolder { - pub folder: GraphFolder, - original_path: String, + global_path: GraphFolder, + local_path: String, +} + +fn valid_component(component: Component<'_>) -> Result<&OsStr, InvalidPathReason> { + match component { + Component::Prefix(_) => Err(InvalidPathReason::RootNotAllowed), + Component::RootDir => Err(InvalidPathReason::RootNotAllowed), + Component::CurDir => Err(InvalidPathReason::CurDirNotAllowed), + Component::ParentDir => Err(InvalidPathReason::ParentDirNotAllowed), + Component::Normal(component) => Ok(component), + } } -impl From for ValidGraphFolder { - fn from(value: ExistingGraphFolder) -> Self { - value.folder +fn extend_and_validate( + full_path: &mut PathBuf, + component: Component, +) -> Result<(), InternalPathValidationError> { + let component = valid_component(component)?; + // check if some intermediate path is already a graph + if full_path.join(META_PATH).exists() { + return Err(InvalidPathReason::ParentIsGraph.into()); } + full_path.push(component); + //check for symlinks + if full_path.is_symlink() { + return Err(InvalidPathReason::SymlinkNotAllowed.into()); + } + ensure_clean_folder(&full_path)?; + Ok(()) } -impl Deref for ValidGraphFolder { - type Target = GraphFolder; +fn valid_path_inner( + base_path: PathBuf, + relative_path: &str, +) -> Result { + let mut full_path = base_path.clone(); + let user_facing_path: &Path = relative_path.as_ref(); - fn deref(&self) -> &Self::Target { - &self.folder + if relative_path.contains(r"//") { + Err(InvalidPathReason::DoubleForwardSlash)?; + } + if relative_path.contains(r"\") { + Err(InvalidPathReason::BackslashError)?; + } + + // fail if any component is a Prefix (C://), tries to access root, + // tries to access a parent dir or is a symlink which could break out of the working dir + for component in user_facing_path.components() { + extend_and_validate(&mut full_path, component)?; } + + Ok(full_path) } pub(crate) fn valid_path( base_path: PathBuf, relative_path: &str, - namespace: bool, -) -> Result { +) -> Result { + let full_path = valid_path_inner(base_path, relative_path).with_path(relative_path)?; + Ok(ValidPath(full_path)) +} + +#[derive(Clone, Debug)] +pub struct NewPath { + path: PathBuf, + cleanup: Option, +} + +impl NewPath { + pub fn is_new(&self) -> bool { + self.cleanup.is_some() + } +} + +impl PartialEq for NewPath { + fn eq(&self, other: &Self) -> bool { + self.path.eq(&other.path) + } +} + +impl PartialOrd for NewPath { + fn partial_cmp(&self, other: &Self) -> Option { + self.path.partial_cmp(&other.path) + } +} + +pub(crate) fn create_valid_path( + base_path: PathBuf, + relative_path: &str, +) -> Result { let user_facing_path = PathBuf::from(relative_path); if relative_path.contains(r"//") { - return Err(InvalidPathReason::DoubleForwardSlash(user_facing_path)); + return Err(InvalidPathReason::DoubleForwardSlash.into()); } if relative_path.contains(r"\") { - return Err(InvalidPathReason::BackslashError(user_facing_path)); + return Err(InvalidPathReason::BackslashError.into()); } let mut full_path = base_path.clone(); + let mut cleanup_marker = None; // fail if any component is a Prefix (C://), tries to access root, // tries to access a parent dir or is a symlink which could break out of the working dir for component in user_facing_path.components() { - match component { - Component::Prefix(_) => { - return Err(InvalidPathReason::RootNotAllowed(user_facing_path)) - } - Component::RootDir => return Err(InvalidPathReason::RootNotAllowed(user_facing_path)), - Component::CurDir => return Err(InvalidPathReason::CurDirNotAllowed(user_facing_path)), - Component::ParentDir => { - return Err(InvalidPathReason::ParentDirNotAllowed(user_facing_path)) - } - Component::Normal(component) => { - // check if some intermediate path is already a graph - if full_path.join(META_PATH).exists() { - return Err(InvalidPathReason::ParentIsGraph(user_facing_path)); - } - full_path.push(component); - //check if the path with the component is a graph - if namespace && full_path.join(META_PATH).exists() { - return Err(InvalidPathReason::ParentIsGraph(user_facing_path)); + match extend_and_validate(&mut full_path, component) { + Ok(_) => { + if !full_path.exists() { + if cleanup_marker.is_none() { + cleanup_marker = Some(CleanupPath { + path: full_path.clone(), + dirty_marker: mark_dirty(&full_path)?, + }); + fs::create_dir(&full_path)?; + } } - //check for symlinks - if full_path.is_symlink() { - return Err(InvalidPathReason::SymlinkNotAllowed(user_facing_path)); + } + Err(error) => { + if let Some(created_path) = cleanup_marker { + created_path.cleanup()?; } + return Err(error.into()); } } } - Ok(full_path) + Ok(NewPath { + path: full_path, + cleanup: cleanup_marker, + }) } -impl ValidGraphFolder { - pub(crate) fn try_from( +#[derive(Debug, Clone)] +struct CleanupPath { + path: PathBuf, + dirty_marker: PathBuf, +} + +impl CleanupPath { + fn persist(&self) -> Result<(), InternalPathValidationError> { + fs::remove_file(&self.dirty_marker)?; + Ok(()) + } + + fn cleanup(&self) -> Result<(), InternalPathValidationError> { + fs::remove_dir_all(&self.path)?; + fs::remove_file(&self.dirty_marker)?; + Ok(()) + } +} + +#[derive(Clone, Debug)] +pub struct ValidWriteableGraphFolder { + global_path: WriteableGraphFolder, + local_path: String, + dirty_marker: Option, +} + +impl ValidGraphPaths for ValidWriteableGraphFolder { + fn local_path(&self) -> &str { + &self.local_path + } + + fn graph_folder(&self) -> &impl GraphPaths { + &self.global_path + } +} + +impl ValidWriteableGraphFolder { + fn new_inner( + valid_path: NewPath, + graph_name: &str, + ) -> Result { + let is_new = valid_path.is_new(); + let graph_folder = GraphFolder::from(valid_path.path); + if !is_new { + if !graph_folder.is_reserved() { + return Err(InternalPathValidationError::GraphIsNamespace); + } + } + let data_path = graph_folder.init_swap()?; + Ok(Self { + global_path: data_path, + dirty_marker: valid_path.cleanup, + local_path: graph_name.to_string(), + }) + } + fn new(valid_path: NewPath, graph_name: &str) -> Result { + Self::new_inner(valid_path, graph_name).map_err(|error| { + PathValidationError::InternalError { + graph: graph_name.to_string(), + error, + } + }) + } + + pub(crate) fn try_new( base_path: PathBuf, relative_path: &str, - ) -> Result { - let full_path = valid_path(base_path, relative_path, false)?; - Ok(Self { - original_path: relative_path.to_owned(), - folder: GraphFolder::from(full_path), + ) -> Result { + let path = create_valid_path(base_path, relative_path).map_err(|error| { + PathValidationError::InternalError { + graph: relative_path.to_string(), + error, + } + })?; + if !path.cleanup.is_some() { + return Err(PathValidationError::GraphExistsError( + relative_path.to_string(), + )); + } + Self::new(path, relative_path) + } + + pub(crate) fn try_existing_or_new( + base_path: PathBuf, + relative_path: &str, + ) -> Result { + let path = create_valid_path(base_path, relative_path).with_path(relative_path)?; + Self::new(path, relative_path) + } + + fn write_graph_data_inner( + &self, + graph: MaterializedGraph, + ) -> Result<(), InternalPathValidationError> { + if Extension::disk_storage_enabled() { + let graph_path = self.graph_folder().graph_path()?; + if graph + .disk_storage_enabled() + .is_some_and(|path| path == &graph_path) + { + self.global_path.write_metadata(&graph)?; + } else { + graph.materialize_at(self.graph_folder())?; + } + } else { + self.global_path.data_path()?.replace_graph(graph)?; + } + Ok(()) + } + pub fn write_graph_data(&self, graph: MaterializedGraph) -> Result<(), PathValidationError> { + self.write_graph_data_inner(graph) + .with_path(self.local_path()) + } + + pub fn read_graph(&self) -> Result { + self.with_internal_errors(|| { + if self.graph_folder().read_metadata()?.is_diskgraph { + MaterializedGraph::load_from_path(self.graph_folder()) + } else { + MaterializedGraph::decode(self.graph_folder()) + } + }) + } + + pub fn write_graph_bytes( + &self, + bytes: R, + ) -> Result<(), PathValidationError> { + self.with_internal_errors(|| { + if Extension::disk_storage_enabled() { + MaterializedGraph::decode_from_zip_at( + ZipArchive::new(bytes)?, + self.graph_folder(), + )?; + } else { + self.global_path.data_path()?.unzip_to_folder(bytes)?; + } + Ok::<(), GraphError>(()) + }) + } + + /// Swap old and new data and delete the old graph + pub fn finish(self) -> Result { + let data_path = self.global_path.finish().with_path(&self.local_path)?; + if let Some(cleanup) = self.dirty_marker.as_ref() { + cleanup.persist().with_path(&self.local_path)?; + } + Ok(ValidGraphFolder { + global_path: data_path, + local_path: self.local_path, + }) + } +} + +#[derive(thiserror::Error, Debug)] +pub enum InternalPathValidationError { + #[error(transparent)] + InvalidPath(#[from] InvalidPathReason), + #[error(transparent)] + IOError(io::Error), + #[error("Graph path should not be nested: {0}")] + NestedPath(PathBuf), + #[error("Graph metadata file does not exist")] + MissingMetadataFile, + #[error("Reading path from metadata failed: {0}")] + InvalidMetadata(#[from] serde_json::Error), + #[error(transparent)] + GraphError(#[from] GraphError), + #[error("Graph path should always have a parent")] + MissingParent, + #[error(transparent)] + StripPrefix(#[from] StripPrefixError), + #[error("Expected a graph but found a namespace")] + GraphIsNamespace, + #[error("Expected a namespace but found a graph")] + NamespaceIsGraph, + #[error("The path provided contains non-UTF8 characters.")] + NonUTFCharacters, + #[error("Relative path from metadata is empty")] + EmptyRelativePath, + #[error("Relative path from metadata has more than one component")] + RelativePathMultipleComponents, +} + +impl From for InternalPathValidationError { + #[track_caller] + fn from(value: io::Error) -> Self { + error!("Unexpected IO failure: {}", value); + InternalPathValidationError::IOError(value) + } +} + +#[derive(thiserror::Error, Debug)] +pub enum PathValidationError { + #[error("Graph '{0}' already exists")] + GraphExistsError(String), + #[error("Graph '{0}' does not exist")] + GraphNotExistsError(String), + #[error("'{0}' does not exist as a namespace")] + NamespaceDoesNotExist(String), + #[error("Invalid path '{graph}': {reason}")] + InvalidPath { + graph: String, + reason: InvalidPathReason, + }, + #[error("Graph '{graph}' is corrupted: {error}")] + InternalError { + graph: String, + error: InternalPathValidationError, + }, + #[error("Unexpected IO error for graph '{graph}': {error}")] + IOError { graph: String, error: io::Error }, +} + +pub trait WithPath { + type Value; + fn with_path>(self, graph: S) -> Result; +} + +impl> WithPath for Result { + type Value = V; + fn with_path>(self, graph: S) -> Result { + self.map_err(move |error| { + let error = error.into(); + let graph = graph.into(); + match error { + InternalPathValidationError::InvalidPath(reason) => { + PathValidationError::InvalidPath { graph, reason } + } + _ => PathValidationError::InternalError { graph, error }, + } }) } +} + +fn is_graph(path: &Path) -> bool { + path.join(META_PATH).is_file() +} + +fn valid_relative_path(relative_path: &Path) -> Result<(), InternalPathValidationError> { + let mut components = relative_path.components(); + valid_component( + components + .next() + .ok_or(InternalPathValidationError::EmptyRelativePath)?, + )?; + if components.next().is_some() { + return Err(InternalPathValidationError::RelativePathMultipleComponents); + } + Ok(()) +} + +fn read_dirty_relative_path( + base_path: &Path, +) -> Result, InternalPathValidationError> { + let mut file = match File::open(base_path.join(DIRTY_PATH)) { + Ok(file) => file, + Err(error) => { + return match error.kind() { + ErrorKind::NotFound => Ok(None), + _ => Err(error.into()), + } + } + }; + let mut json_string = String::new(); + file.read_to_string(&mut json_string)?; + let path: RelativePath = serde_json::from_str(&json_string)?; + valid_relative_path(path.path.as_ref())?; + Ok(Some(base_path.join(path.path))) +} + +pub(crate) fn ensure_clean_folder(base_path: &Path) -> Result<(), InternalPathValidationError> { + if base_path.is_dir() { + match read_dirty_relative_path(base_path) { + Ok(path) => { + if let Some(path) = path { + warn!("Found dirty path {}, cleaning...", path.display()); + fs::remove_dir_all(base_path.join(path))?; + } + } + Err(error) => { + warn!("Found dirty file with invalid path: {error}, cleaning...") + } + } + match fs::remove_file(base_path.join(DIRTY_PATH)) { + Ok(_) => {} + Err(err) => match err.kind() { + ErrorKind::NotFound => {} + _ => Err(err)?, + }, + }; + } + Ok(()) +} + +/// Mark path as dirty +/// - ensure parent is clean +/// - create dirty file and fsync it +pub(crate) fn mark_dirty(path: &Path) -> Result { + let cleanup_path = path + .file_name() + .ok_or(InternalPathValidationError::MissingParent)? + .to_str() + .ok_or(InternalPathValidationError::NonUTFCharacters)? + .to_string(); + let parent = path + .parent() + .ok_or(InternalPathValidationError::MissingParent)?; + ensure_clean_folder(parent)?; + let dirty_file_path = parent.join(DIRTY_PATH); + let mut dirty_file = File::create_new(&dirty_file_path)?; + dirty_file.write_all(&serde_json::to_vec(&RelativePath { path: cleanup_path })?)?; + // make sure the dirty path is properly recorded before we proceed! + dirty_file.sync_all()?; + Ok(dirty_file_path) +} + +impl GraphPaths for ValidGraphFolder { + fn root(&self) -> &Path { + self.global_path.root() + } + + fn relative_data_path(&self) -> Result { + self.global_path.relative_data_path() + } + + fn relative_graph_path(&self) -> Result { + self.global_path.relative_graph_path() + } +} - pub fn created(&self) -> Result { - fs::metadata(self.get_graph_path())?.created()?.to_millis() +impl ValidGraphPaths for ValidGraphFolder { + fn local_path(&self) -> &str { + &self.local_path } - pub fn last_opened(&self) -> Result { - fs::metadata(self.get_graph_path())?.accessed()?.to_millis() + fn graph_folder(&self) -> &impl GraphPaths { + &self.global_path } +} - pub fn last_updated(&self) -> Result { - fs::metadata(self.get_graph_path())?.modified()?.to_millis() +impl ValidGraphFolder { + fn with_internal_errors( + &self, + map: impl FnOnce() -> Result, + ) -> Result { + map().with_path(self.local_path()) } - pub async fn created_async(&self) -> Result { - let metadata = tokio::fs::metadata(self.get_graph_path()).await?; - metadata.created()?.to_millis() + pub fn graph_folder(&self) -> &GraphFolder { + &self.global_path + } + pub fn created(&self) -> Result { + self.with_internal_errors(|| { + Ok(self.root_meta_path().metadata()?.created()?.to_millis()?) + }) + } + + pub fn last_opened(&self) -> Result { + self.with_internal_errors(|| { + Ok(fs::metadata(self.global_path.meta_path()?)? + .accessed()? + .to_millis()?) + }) } - pub async fn last_opened_async(&self) -> Result { - let metadata = tokio::fs::metadata(self.get_graph_path()).await?; - metadata.accessed()?.to_millis() + pub fn last_updated(&self) -> Result { + self.with_internal_errors(|| { + Ok(fs::metadata(self.meta_path()?)?.modified()?.to_millis()?) + }) } - pub async fn last_updated_async(&self) -> Result { - let metadata = tokio::fs::metadata(self.get_graph_path()).await?; - metadata.modified()?.to_millis() + pub async fn created_async(&self) -> Result { + let cloned = self.clone(); + blocking_io(move || cloned.created()).await } - pub async fn read_metadata_async(&self) -> Result { - let folder = self.folder.clone(); - blocking_compute(move || folder.read_metadata()).await + pub async fn last_opened_async(&self) -> Result { + let cloned = self.clone(); + blocking_io(move || cloned.last_opened()).await } - pub fn get_original_path_str(&self) -> &str { - &self.original_path + pub async fn last_updated_async(&self) -> Result { + let cloned = self.clone(); + blocking_io(move || cloned.last_updated()).await } - pub fn get_original_path(&self) -> &Path { - &Path::new(&self.original_path) + pub async fn read_metadata_async(&self) -> Result { + let folder: GraphFolder = self.global_path.clone(); + blocking_compute(move || folder.read_metadata()) + .await + .with_path(self.local_path()) } /// This returns the PathBuf used to build multiple GraphError types pub fn to_error_path(&self) -> PathBuf { - self.original_path.to_owned().into() + self.local_path.to_owned().into() + } + + pub fn get_graph_name(&self) -> Result { + let path: &Path = self.local_path.as_ref(); + let name = self.with_internal_errors(|| { + let last_component: Component = path + .components() + .last() + .ok_or(InvalidPathReason::PathNotParsable)?; + match last_component { + Component::Normal(value) => Ok(value + .to_str() + .map(|s| s.to_string()) + .ok_or(InvalidPathReason::PathNotParsable)?), + Component::Prefix(_) + | Component::RootDir + | Component::CurDir + | Component::ParentDir => Err(InvalidPathReason::PathNotParsable)?, + } + })?; + + Ok(name) + } + pub(crate) fn as_existing(&self) -> Result { + if self.global_path.is_reserved() { + Ok(ExistingGraphFolder(self.clone())) + } else { + Err(PathValidationError::GraphNotExistsError( + self.local_path.clone(), + )) + } } } diff --git a/raphtory-graphql/src/python/client/raphtory_client.rs b/raphtory-graphql/src/python/client/raphtory_client.rs index e14fd931c8..786df1df90 100644 --- a/raphtory-graphql/src/python/client/raphtory_client.rs +++ b/raphtory-graphql/src/python/client/raphtory_client.rs @@ -1,12 +1,11 @@ use crate::{ - paths::ExistingGraphFolder, python::{ client::{remote_graph::PyRemoteGraph, PyRemoteIndexSpec}, encode_graph, server::is_online, translate_from_python, translate_map_to_python, }, - url_encode::url_decode_graph, + url_encode::{url_decode_graph, url_decode_graph_at}, }; use pyo3::{ exceptions::{PyException, PyValueError}, @@ -17,7 +16,7 @@ use raphtory::{db::api::view::MaterializedGraph, serialise::GraphFolder}; use raphtory_api::python::error::adapt_err_value; use reqwest::{multipart, multipart::Part, Client}; use serde_json::{json, Value as JsonValue}; -use std::{collections::HashMap, future::Future, io::Cursor, path::PathBuf, sync::Arc}; +use std::{collections::HashMap, future::Future, io::Cursor, sync::Arc}; use tokio::runtime::Runtime; use tracing::debug; @@ -419,8 +418,7 @@ impl PyRaphtoryClient { let data = self.query_with_json_variables(query.clone(), variables.into())?; match data.get("receiveGraph") { Some(JsonValue::String(graph)) => { - let path_for_decoded_graph = None; - let mat_graph = url_decode_graph(graph, path_for_decoded_graph)?; + let mat_graph = url_decode_graph(graph)?; Ok(mat_graph) } _ => Err(PyException::new_err(format!( diff --git a/raphtory-graphql/src/python/mod.rs b/raphtory-graphql/src/python/mod.rs index f7f82643f7..0292834874 100644 --- a/raphtory-graphql/src/python/mod.rs +++ b/raphtory-graphql/src/python/mod.rs @@ -1,5 +1,3 @@ -use std::path::PathBuf; - use crate::{ model::App, url_encode::{url_decode_graph, url_encode_graph, UrlDecodeError}, @@ -10,7 +8,7 @@ use pyo3::{ types::{PyDict, PyList, PyNone}, IntoPyObjectExt, }; -use raphtory::{db::api::view::MaterializedGraph, prelude::GraphViewOps}; +use raphtory::db::api::view::MaterializedGraph; use raphtory_api::python::error::adapt_err_value; use serde_json::{Map, Number, Value as JsonValue}; @@ -121,8 +119,7 @@ pub(crate) fn encode_graph(graph: MaterializedGraph) -> PyResult { /// Union[Graph, PersistentGraph]: the decoded graph #[pyfunction] pub(crate) fn decode_graph(graph: &str) -> PyResult { - let path_for_decoded_graph = None; - let result = url_decode_graph(graph, path_for_decoded_graph); + let result = url_decode_graph(graph); match result { Ok(g) => Ok(g), Err(e) => Err(PyValueError::new_err(format!("Error decoding: {:?}", e))), diff --git a/raphtory-graphql/src/url_encode.rs b/raphtory-graphql/src/url_encode.rs index 0cd177285a..98f4c09cb5 100644 --- a/raphtory-graphql/src/url_encode.rs +++ b/raphtory-graphql/src/url_encode.rs @@ -1,10 +1,14 @@ use base64::{prelude::BASE64_URL_SAFE, DecodeError, Engine}; use raphtory::{ - db::api::view::MaterializedGraph, + db::api::{ + storage::storage::{Extension, PersistentStrategy}, + view::MaterializedGraph, + }, errors::GraphError, prelude::{StableDecode, StableEncode}, + serialise::GraphPaths, }; -use std::path::Path; + #[derive(thiserror::Error, Debug)] pub enum UrlDecodeError { #[error("Bincode operation failed")] @@ -21,18 +25,26 @@ pub enum UrlDecodeError { pub fn url_encode_graph>(graph: G) -> Result { let g: MaterializedGraph = graph.into(); - let bytes = g.encode_to_bytes(); + let bytes = g.encode_to_bytes()?; Ok(BASE64_URL_SAFE.encode(bytes)) } -pub fn url_decode_graph>( +pub fn url_decode_graph>(graph: T) -> Result { + let bytes = BASE64_URL_SAFE.decode(graph.as_ref()).unwrap(); + MaterializedGraph::decode_from_bytes(&bytes) +} + +pub fn url_decode_graph_at>( graph: T, - storage_path: Option<&Path>, + storage_path: &(impl GraphPaths + ?Sized), ) -> Result { let bytes = BASE64_URL_SAFE.decode(graph.as_ref()).unwrap(); - - MaterializedGraph::decode_from_bytes(&bytes, storage_path) + if Extension::disk_storage_enabled() { + MaterializedGraph::decode_from_bytes_at(&bytes, storage_path) + } else { + MaterializedGraph::decode_from_bytes(&bytes) + } } #[cfg(test)] @@ -55,7 +67,7 @@ mod tests { let bytes = url_encode_graph(graph.clone()).unwrap(); let tempdir = tempfile::tempdir().unwrap(); let storage_path = tempdir.path().to_path_buf(); - let decoded_graph = url_decode_graph(bytes, Some(&storage_path)).unwrap(); + let decoded_graph = url_decode_graph_at(bytes, &storage_path).unwrap(); let g2 = decoded_graph.into_events().unwrap(); diff --git a/raphtory-storage/src/core_ops.rs b/raphtory-storage/src/core_ops.rs index a0b82be773..c6c1897a23 100644 --- a/raphtory-storage/src/core_ops.rs +++ b/raphtory-storage/src/core_ops.rs @@ -15,7 +15,7 @@ use raphtory_api::{ inherit::Base, iter::{BoxedIter, BoxedLIter, IntoDynBoxed}, }; -use raphtory_core::entities::{nodes::node_ref::NodeRef, properties::graph_meta::GraphMeta}; +use raphtory_core::entities::nodes::node_ref::NodeRef; use std::{iter, sync::Arc}; use storage::resolver::GIDResolverOps; diff --git a/raphtory-storage/src/graph/graph.rs b/raphtory-storage/src/graph/graph.rs index 38176450d2..40fd0aa572 100644 --- a/raphtory-storage/src/graph/graph.rs +++ b/raphtory-storage/src/graph/graph.rs @@ -12,8 +12,8 @@ use crate::{ }; use db4_graph::TemporalGraph; use raphtory_api::core::entities::{properties::meta::Meta, LayerIds, LayerVariants, EID, VID}; -use raphtory_core::entities::{nodes::node_ref::NodeRef, properties::graph_meta::GraphMeta}; -use std::{fmt::Debug, iter, sync::Arc}; +use raphtory_core::entities::nodes::node_ref::NodeRef; +use std::{fmt::Debug, iter, path::Path, sync::Arc}; use storage::{Extension, GraphPropEntry}; use thiserror::Error; @@ -94,7 +94,7 @@ impl GraphStorage { } } - pub fn disk_storage_enabled(&self) -> bool { + pub fn disk_storage_enabled(&self) -> Option<&Path> { match self { GraphStorage::Mem(graph) => graph.graph.disk_storage_enabled(), GraphStorage::Unlocked(graph) => graph.disk_storage_enabled(), diff --git a/raphtory-storage/src/mutation/addition_ops.rs b/raphtory-storage/src/mutation/addition_ops.rs index e918be32d9..6c399b8598 100644 --- a/raphtory-storage/src/mutation/addition_ops.rs +++ b/raphtory-storage/src/mutation/addition_ops.rs @@ -178,33 +178,6 @@ pub trait SessionAdditionOps: Send + Sync { dtype: PropType, is_static: bool, ) -> Result, Self::Error>; - - /// add node update - fn internal_add_node( - &self, - t: TimeIndexEntry, - v: VID, - props: &[(usize, Prop)], - ) -> Result<(), Self::Error>; - - /// add edge update - fn internal_add_edge( - &self, - t: TimeIndexEntry, - src: VID, - dst: VID, - props: &[(usize, Prop)], - layer: usize, - ) -> Result, Self::Error>; - - /// add update for an existing edge - fn internal_add_edge_update( - &self, - t: TimeIndexEntry, - edge: EID, - props: &[(usize, Prop)], - layer: usize, - ) -> Result<(), Self::Error>; } impl InternalAdditionOps for GraphStorage { diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index 70cba75036..ed9aadf9e7 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -169,36 +169,6 @@ impl<'a> SessionAdditionOps for UnlockedSession<'a> { .edge_meta() .resolve_prop_id(prop, dtype, is_static)?) } - - fn internal_add_node( - &self, - t: TimeIndexEntry, - v: VID, - props: &[(usize, Prop)], - ) -> Result<(), Self::Error> { - todo!() - } - - fn internal_add_edge( - &self, - t: TimeIndexEntry, - src: VID, - dst: VID, - props: &[(usize, Prop)], - layer: usize, - ) -> Result, Self::Error> { - todo!() - } - - fn internal_add_edge_update( - &self, - t: TimeIndexEntry, - edge: EID, - props: &[(usize, Prop)], - layer: usize, - ) -> Result<(), Self::Error> { - todo!() - } } impl InternalAdditionOps for TemporalGraph { diff --git a/raphtory/src/db/api/mutation/index_ops.rs b/raphtory/src/db/api/mutation/index_ops.rs index 0502bd7b29..bce81bf9c1 100644 --- a/raphtory/src/db/api/mutation/index_ops.rs +++ b/raphtory/src/db/api/mutation/index_ops.rs @@ -1,11 +1,15 @@ use crate::{ - db::api::view::{IndexSpec, IndexSpecBuilder}, + db::api::view::{internal::InternalStorageOps, IndexSpec, IndexSpecBuilder}, errors::GraphError, prelude::AdditionOps, - serialise::GraphFolder, + serialise::{GraphFolder, GraphPaths}, }; -use std::{fs::File, path::Path}; -use zip::ZipArchive; +use std::{ + fs::File, + io::{Seek, Write}, + path::Path, +}; +use zip::{ZipArchive, ZipWriter}; /// Mutation operations for managing indexes. pub trait IndexMutationOps: Sized + AdditionOps { @@ -55,7 +59,7 @@ pub trait IndexMutationOps: Sized + AdditionOps { /// /// Returns: /// None: - fn persist_index_to_disk(&self, path: &GraphFolder) -> Result<(), GraphError>; + fn persist_index_to_disk(&self, path: &impl GraphPaths) -> Result<(), GraphError>; /// Persists the current index to disk as a compressed ZIP file at the specified path. /// @@ -64,7 +68,11 @@ pub trait IndexMutationOps: Sized + AdditionOps { /// /// Returns: /// None: - fn persist_index_to_disk_zip(&self, path: &GraphFolder) -> Result<(), GraphError>; + fn persist_index_to_disk_zip( + &self, + writer: &mut ZipWriter, + prefix: &str, + ) -> Result<(), GraphError>; /// Drops (removes) the current index from the database. /// @@ -138,13 +146,13 @@ impl IndexMutationOps for G { self.get_storage() .map_or(Err(GraphError::IndexingNotSupported), |storage| { if path.is_zip() { - if has_index(path.get_base_path())? { + if has_index(path.root())? { storage.load_index_if_empty(&path)?; } else { return Ok(()); // Skip if no index in zip } } else { - let index_path = path.get_index_path(); + let index_path = path.index_path()?; if index_path.exists() && index_path.read_dir()?.next().is_some() { storage.load_index_if_empty(&path)?; } @@ -154,20 +162,22 @@ impl IndexMutationOps for G { }) } - fn persist_index_to_disk(&self, path: &GraphFolder) -> Result<(), GraphError> { + fn persist_index_to_disk(&self, path: &impl GraphPaths) -> Result<(), GraphError> { self.get_storage() .map_or(Err(GraphError::IndexingNotSupported), |storage| { - storage.persist_index_to_disk(&path)?; + storage.persist_index_to_disk(path)?; Ok(()) }) } - fn persist_index_to_disk_zip(&self, path: &GraphFolder) -> Result<(), GraphError> { + fn persist_index_to_disk_zip( + &self, + writer: &mut ZipWriter, + prefix: &str, + ) -> Result<(), GraphError> { self.get_storage() - .map_or(Err(GraphError::IndexingNotSupported), |storage| { - storage.persist_index_to_disk_zip(&path)?; - Ok(()) - }) + .ok_or(GraphError::IndexingNotSupported)? + .persist_index_to_disk_zip(writer, prefix) } fn drop_index(&self) -> Result<(), GraphError> { diff --git a/raphtory/src/db/api/storage/graph/storage_ops/disk_storage.rs b/raphtory/src/db/api/storage/graph/storage_ops/disk_storage.rs deleted file mode 100644 index 0a217c8bc1..0000000000 --- a/raphtory/src/db/api/storage/graph/storage_ops/disk_storage.rs +++ /dev/null @@ -1,205 +0,0 @@ -use crate::{ - db::{ - api::view::internal::GraphTimeSemanticsOps, graph::views::deletion_graph::PersistentGraph, - }, - errors::GraphError, - prelude::{Graph, GraphViewOps, NodeStateOps, NodeViewOps}, -}; -use arrow::array::ArrayRef; -use itertools::Itertools; -use pometry_storage::interop::GraphLike; -use raphtory_api::{ - core::{ - entities::{properties::tprop::TPropOps, LayerIds, EID, GID, VID}, - storage::timeindex::{TimeIndexEntry, TimeIndexOps}, - Direction, - }, - iter::IntoDynBoxed, -}; -use raphtory_core::utils::iter::GenLockedIter; -use raphtory_storage::{ - core_ops::CoreGraphOps, - disk::{graph_impl::prop_conversion::arrow_array_from_props, DiskGraphStorage}, - graph::{ - edges::edge_storage_ops::EdgeStorageOps, graph::GraphStorage, - nodes::node_storage_ops::NodeStorageOps, - }, -}; -use std::{path::Path, sync::Arc}; - -impl From for Graph { - fn from(value: DiskGraphStorage) -> Self { - Graph::from_internal_graph(GraphStorage::Disk(Arc::new(value))) - } -} - -impl From for PersistentGraph { - fn from(value: DiskGraphStorage) -> Self { - PersistentGraph::from_internal_graph(GraphStorage::Disk(Arc::new(value))) - } -} - -pub trait IntoGraph { - fn into_graph(self) -> Graph; - - fn into_persistent_graph(self) -> PersistentGraph; -} - -impl IntoGraph for DiskGraphStorage { - fn into_graph(self) -> Graph { - self.into() - } - - fn into_persistent_graph(self) -> PersistentGraph { - self.into() - } -} - -impl Graph { - pub fn persist_as_disk_graph(&self, graph_dir: impl AsRef) -> Result { - Ok(Graph::from(DiskGraphStorage::from_graph(self, graph_dir)?)) - } -} - -impl PersistentGraph { - pub fn persist_as_disk_graph( - &self, - graph_dir: impl AsRef, - ) -> Result { - Ok(PersistentGraph::from(DiskGraphStorage::from_graph( - &self.event_graph(), - graph_dir, - )?)) - } -} - -impl GraphLike for Graph { - fn external_ids(&self) -> Vec { - self.nodes().id().collect() - } - - fn node_names(&self) -> impl Iterator { - self.nodes().name().into_iter_values() - } - - fn node_type_ids(&self) -> Option> { - if self.core_graph().node_meta().node_type_meta().len() <= 1 { - None - } else { - let core_nodes = self.core_nodes(); - Some((0..core_nodes.len()).map(move |i| core_nodes.node_entry(VID(i)).node_type_id())) - } - } - - fn node_types(&self) -> Option> { - let meta = self.core_graph().node_meta().node_type_meta(); - if meta.len() <= 1 { - None - } else { - Some(meta.get_keys().into_iter().map(|s| s.to_string())) - } - } - - fn layer_names(&self) -> Vec { - self.edge_meta() - .layer_meta() - .get_keys() - .into_iter() - .map_into() - .collect() - } - - fn num_nodes(&self) -> usize { - self.unfiltered_num_nodes() - } - - fn num_edges(&self) -> usize { - self.count_edges() - } - - fn out_degree(&self, vid: VID, layer: usize) -> usize { - self.core_node(vid.0.into()) - .degree(&LayerIds::One(layer), Direction::OUT) - } - - fn in_degree(&self, vid: VID, layer: usize) -> usize { - self.core_node(vid.0.into()) - .degree(&LayerIds::One(layer), Direction::IN) - } - - fn in_edges(&self, vid: VID, layer: usize, map: impl Fn(VID, EID) -> B) -> Vec { - let node = self.core_node(vid.0.into()); - node.edges_iter(&LayerIds::One(layer), Direction::IN) - .map(|edge| map(edge.src(), edge.pid())) - .collect() - } - fn out_edges(&self, vid: VID, layer: usize) -> Vec<(VID, VID, EID)> { - let node = self.core_node(vid.0.into()); - let edges = node - .edges_iter(&LayerIds::One(layer), Direction::OUT) - .map(|edge| { - let src = edge.src(); - let dst = edge.dst(); - let eid = edge.pid(); - (src, dst, eid) - }) - .collect(); - edges - } - - fn edge_additions(&self, eid: EID, layer: usize) -> impl Iterator + '_ { - let edge = self.core_edge(eid); - GenLockedIter::from(edge, |edge| edge.additions(layer).iter().into_dyn_boxed()) - } - - fn edge_prop_keys(&self) -> Vec { - let props = self.edge_meta().temporal_prop_mapper().get_keys(); - props.into_iter().map(|s| s.to_string()).collect() - } - - fn find_name(&self, vid: VID) -> Option { - self.core_node(vid.0.into()).name().map(|s| s.to_string()) - } - - fn prop_as_arrow>( - &self, - disk_edges: &[u64], - edge_id_map: &[usize], - edge_ts: &[TimeIndexEntry], - edge_t_offsets: &[usize], - layer: usize, - prop_id: usize, - _key: S, - ) -> Option { - let prop_type = self - .edge_meta() - .temporal_prop_mapper() - .get_dtype(prop_id) - .unwrap(); - arrow_array_from_props( - disk_edges.iter().flat_map(|&disk_eid| { - let disk_eid = disk_eid as usize; - let eid = edge_id_map[disk_eid]; - let ts = &edge_ts[edge_t_offsets[disk_eid]..edge_t_offsets[disk_eid + 1]]; - let edge = self.core_edge(EID(eid)); - ts.iter() - .map(move |t| edge.temporal_prop_layer(layer, prop_id).at(t)) - }), - prop_type, - ) - } - - fn earliest_time(&self) -> i64 { - self.earliest_time_global().unwrap_or(i64::MAX) - } - - fn latest_time(&self) -> i64 { - self.latest_time_global().unwrap_or(i64::MIN) - } - - fn out_neighbours(&self, vid: VID) -> impl Iterator + '_ { - self.core_node(vid) - .into_edges_iter(&LayerIds::All, Direction::OUT) - .map(|e_ref| (e_ref.dst(), e_ref.pid())) - } -} diff --git a/raphtory/src/db/api/storage/graph/storage_ops/mod.rs b/raphtory/src/db/api/storage/graph/storage_ops/mod.rs index 9593817d11..5366d60ac8 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/mod.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/mod.rs @@ -1,5 +1,6 @@ use crate::db::api::{storage::storage::Storage, view::internal::InternalStorageOps}; use raphtory_storage::graph::graph::GraphStorage; +use std::path::Path; pub mod edge_filter; pub mod list_ops; @@ -14,7 +15,7 @@ impl InternalStorageOps for GraphStorage { None } - fn disk_storage_enabled(&self) -> bool { + fn disk_storage_enabled(&self) -> Option<&Path> { self.disk_storage_enabled() } } diff --git a/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs b/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs index 621f0c44ab..4930179fde 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs @@ -1,11 +1,8 @@ use super::GraphStorage; use crate::{ core::{entities::LayerIds, storage::timeindex::TimeIndexOps}, - db::api::{ - storage::graph, - view::internal::{ - EdgeHistoryFilter, GraphTimeSemanticsOps, NodeHistoryFilter, TimeSemantics, - }, + db::api::view::internal::{ + EdgeHistoryFilter, GraphTimeSemanticsOps, NodeHistoryFilter, TimeSemantics, }, prelude::Prop, }; @@ -25,7 +22,7 @@ use raphtory_storage::{ }, }; use rayon::iter::ParallelIterator; -use std::ops::{Deref, Range}; +use std::ops::Range; use storage::{ api::graph_props::{GraphPropEntryOps, GraphPropRefOps}, gen_ts::ALL_LAYERS, diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 84de4c5e81..f69144d128 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -35,21 +35,28 @@ use std::{ path::Path, sync::Arc, }; -use storage::{Extension, WalImpl}; +pub use storage::{ + persist::strategy::{Config, PersistentStrategy}, + Extension, WalImpl, +}; #[cfg(feature = "search")] use { crate::{ db::api::view::IndexSpec, search::graph_index::{GraphIndex, MutableGraphIndex}, - serialise::GraphFolder, + serialise::{GraphFolder, GraphPaths}, }, either::Either, parking_lot::RwLock, raphtory_core::entities::nodes::node_ref::AsNodeRef, raphtory_storage::{core_ops::CoreGraphOps, graph::nodes::node_storage_ops::NodeStorageOps}, - std::ops::{Deref, DerefMut}, + std::{ + io::{Seek, Write}, + ops::{Deref, DerefMut}, + }, tracing::info, + zip::ZipWriter, }; #[derive(Debug, Default)] @@ -95,33 +102,35 @@ impl Storage { } } - pub(crate) fn new_at_path(path: impl AsRef) -> Self { - Self { - graph: GraphStorage::Unlocked(Arc::new( - TemporalGraph::new_with_path(path, Extension::default()).unwrap(), - )), + pub(crate) fn new_at_path(path: impl AsRef) -> Result { + Ok(Self { + graph: GraphStorage::Unlocked(Arc::new(TemporalGraph::new_with_path( + path, + Extension::default(), + )?)), #[cfg(feature = "search")] index: RwLock::new(GraphIndex::Empty), - } + }) } - pub(crate) fn new_with_path_and_ext(path: impl AsRef, ext: Extension) -> Self { - Self { - graph: GraphStorage::Unlocked(Arc::new( - TemporalGraph::new_with_path(path, ext).unwrap(), - )), + pub(crate) fn new_with_path_and_ext( + path: impl AsRef, + ext: Extension, + ) -> Result { + Ok(Self { + graph: GraphStorage::Unlocked(Arc::new(TemporalGraph::new_with_path(path, ext)?)), #[cfg(feature = "search")] index: RwLock::new(GraphIndex::Empty), - } + }) } - pub(crate) fn load_from(path: impl AsRef) -> Self { - let graph = GraphStorage::Unlocked(Arc::new(TemporalGraph::load_from_path(path).unwrap())); - Self { + pub(crate) fn load_from(path: impl AsRef) -> Result { + let graph = GraphStorage::Unlocked(Arc::new(TemporalGraph::load_from_path(path)?)); + Ok(Self { graph, #[cfg(feature = "search")] index: RwLock::new(GraphIndex::Empty), - } + }) } pub(crate) fn from_inner(graph: GraphStorage) -> Self { @@ -239,7 +248,7 @@ impl Storage { self.index.read_recursive().is_indexed() } - pub(crate) fn persist_index_to_disk(&self, path: &GraphFolder) -> Result<(), GraphError> { + pub(crate) fn persist_index_to_disk(&self, path: &impl GraphPaths) -> Result<(), GraphError> { let guard = self.get_index().read_recursive(); if guard.is_indexed() { if guard.path().is_none() { @@ -251,14 +260,18 @@ impl Storage { Ok(()) } - pub(crate) fn persist_index_to_disk_zip(&self, path: &GraphFolder) -> Result<(), GraphError> { + pub(crate) fn persist_index_to_disk_zip( + &self, + writer: &mut ZipWriter, + prefix: &str, + ) -> Result<(), GraphError> { let guard = self.get_index().read_recursive(); if guard.is_indexed() { if guard.path().is_none() { info!("{}", IN_MEMORY_INDEX_NOT_PERSISTED); return Ok(()); } - self.if_index(|index| index.persist_to_disk_zip(path))?; + self.if_index(|index| index.persist_to_disk_zip(writer, prefix))?; } Ok(()) } @@ -275,7 +288,7 @@ impl InternalStorageOps for Storage { Some(self) } - fn disk_storage_enabled(&self) -> bool { + fn disk_storage_enabled(&self) -> Option<&Path> { self.graph.disk_storage_enabled() } } @@ -404,61 +417,6 @@ impl<'a> SessionAdditionOps for StorageWriteSession<'a> { Ok(id) } - - fn internal_add_node( - &self, - t: TimeIndexEntry, - v: VID, - props: &[(usize, Prop)], - ) -> Result<(), Self::Error> { - self.session.internal_add_node(t, v, props)?; - - #[cfg(feature = "search")] - self.storage - .if_index_mut(|index| index.add_node_update(t, v, props))?; - - Ok(()) - } - - fn internal_add_edge( - &self, - t: TimeIndexEntry, - src: VID, - dst: VID, - props: &[(usize, Prop)], - layer: usize, - ) -> Result, Self::Error> { - let id = self.session.internal_add_edge(t, src, dst, props, layer)?; - #[cfg(feature = "search")] - self.storage.if_index_mut(|index| { - index.add_edge_update(&self.storage.graph, id, t, layer, props) - })?; - - Ok(id) - } - - fn internal_add_edge_update( - &self, - t: TimeIndexEntry, - edge: EID, - props: &[(usize, Prop)], - layer: usize, - ) -> Result<(), Self::Error> { - self.session - .internal_add_edge_update(t, edge, props, layer)?; - - #[cfg(feature = "search")] - self.storage.if_index_mut(|index| { - index.add_edge_update( - &self.storage.graph, - MaybeNew::Existing(edge), - t, - layer, - props, - ) - })?; - Ok(()) - } } impl InternalAdditionOps for Storage { diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index f8fc5d3387..bae70fdfe3 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -1,5 +1,5 @@ -#[cfg(feature = "search")] -use crate::search::{fallback_filter_edges, fallback_filter_nodes}; +#[cfg(feature = "io")] +use crate::serialise::GraphPaths; use crate::{ core::{ entities::{nodes::node_ref::AsNodeRef, LayerIds, VID}, @@ -17,13 +17,8 @@ use crate::{ node::NodeView, nodes::Nodes, views::{ - cached_view::CachedView, - filter::{ - model::{AsEdgeFilter, AsNodeFilter}, - node_type_filtered_graph::NodeTypeFilteredGraph, - }, - node_subgraph::NodeSubgraph, - valid_graph::ValidGraph, + cached_view::CachedView, filter::node_type_filtered_graph::NodeTypeFilteredGraph, + node_subgraph::NodeSubgraph, valid_graph::ValidGraph, }, }, }, @@ -62,6 +57,13 @@ use std::{ path::Path, sync::{atomic::Ordering, Arc}, }; +use storage::{persist::strategy::PersistentStrategy, Extension}; + +#[cfg(feature = "search")] +use crate::{ + db::graph::views::filter::model::{AsEdgeFilter, AsNodeFilter}, + search::{fallback_filter_edges, fallback_filter_nodes}, +}; /// This trait GraphViewOps defines operations for accessing /// information about a graph. The trait has associated types @@ -86,11 +88,13 @@ pub trait GraphViewOps<'graph>: BoxableGraphView + Sized + Clone + 'graph { /// /// Returns: /// MaterializedGraph: Returns a new materialized graph. - fn materialize_at(&self, path: Option<&Path>) -> Result; + #[cfg(feature = "io")] + fn materialize_at( + &self, + path: &(impl GraphPaths + ?Sized), + ) -> Result; - fn materialize(&self) -> Result { - self.materialize_at(None) - } + fn materialize(&self) -> Result; fn subgraph, V: AsNodeRef>(&self, nodes: I) -> NodeSubgraph; @@ -229,346 +233,365 @@ fn edges_inner<'graph, G: GraphView + 'graph>(g: &G, locked: bool) -> Edges<'gra } } -impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { - fn edges(&self) -> Edges<'graph, Self, Self> { - edges_inner(self, true) - } +fn materialize_impl( + graph: &impl GraphView, + path: Option<&Path>, +) -> Result { + let storage = graph.core_graph().lock(); + let mut node_meta = Meta::new_for_nodes(); + let mut edge_meta = Meta::new_for_edges(); + let mut graph_props_meta = Meta::new_for_graph_props(); + + node_meta.set_metadata_mapper(graph.node_meta().metadata_mapper().deep_clone()); + node_meta.set_temporal_prop_mapper(graph.node_meta().temporal_prop_mapper().deep_clone()); + edge_meta.set_metadata_mapper(graph.edge_meta().metadata_mapper().deep_clone()); + edge_meta.set_temporal_prop_mapper(graph.edge_meta().temporal_prop_mapper().deep_clone()); + graph_props_meta.set_metadata_mapper(graph.graph_props_meta().metadata_mapper().deep_clone()); + graph_props_meta + .set_temporal_prop_mapper(graph.graph_props_meta().temporal_prop_mapper().deep_clone()); + + let layer_meta = edge_meta.layer_meta(); + + // NOTE: layers must be set in layer_meta before the TemporalGraph is initialized to + // make sure empty layers are created. + let layer_map: Vec<_> = match graph.layer_ids() { + LayerIds::None => { + // no layers to map + vec![] + } + LayerIds::All => { + let layers = storage.edge_meta().layer_meta().keys(); + let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; - fn edges_unlocked(&self) -> Edges<'graph, Self, Self> { - edges_inner(self, false) - } + for (id, name) in storage.edge_meta().layer_meta().ids().zip(layers.iter()) { + let new_id = layer_meta.get_or_create_id(name).inner(); + layer_map[id] = new_id; + } - fn nodes(&self) -> Nodes<'graph, Self, Self> { - let graph = self.clone(); - Nodes::new(graph) - } + layer_map + } + LayerIds::One(l_id) => { + let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; + let layer_name = storage.edge_meta().get_layer_name_by_id(*l_id); + let new_id = layer_meta.get_or_create_id(&layer_name).inner(); - fn materialize_at(&self, path: Option<&Path>) -> Result { - let storage = self.core_graph().lock(); - - let mut node_meta = Meta::new_for_nodes(); - let mut edge_meta = Meta::new_for_edges(); - let mut graph_props_meta = Meta::new_for_graph_props(); - - node_meta.set_metadata_mapper(self.node_meta().metadata_mapper().deep_clone()); - node_meta.set_temporal_prop_mapper(self.node_meta().temporal_prop_mapper().deep_clone()); - edge_meta.set_metadata_mapper(self.edge_meta().metadata_mapper().deep_clone()); - edge_meta.set_temporal_prop_mapper(self.edge_meta().temporal_prop_mapper().deep_clone()); - graph_props_meta - .set_metadata_mapper(self.graph_props_meta().metadata_mapper().deep_clone()); - graph_props_meta - .set_temporal_prop_mapper(self.graph_props_meta().temporal_prop_mapper().deep_clone()); - - let layer_meta = edge_meta.layer_meta(); - - // NOTE: layers must be set in layer_meta before the TemporalGraph is initialized to - // make sure empty layers are created. - let layer_map: Vec<_> = match self.layer_ids() { - LayerIds::None => { - // no layers to map - vec![] + layer_map[*l_id] = new_id; + layer_map + } + LayerIds::Multiple(ids) => { + let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; + let layers = storage.edge_meta().layer_meta().all_keys(); + + for id in ids { + let layer_name = &layers[id]; + let new_id = layer_meta.get_or_create_id(layer_name).inner(); + layer_map[id] = new_id; } - LayerIds::All => { - let layers = storage.edge_meta().layer_meta().keys(); - let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; - for (id, name) in storage.edge_meta().layer_meta().ids().zip(layers.iter()) { - let new_id = layer_meta.get_or_create_id(name).inner(); - layer_map[id] = new_id; - } + layer_map + } + }; - layer_map - } - LayerIds::One(l_id) => { - let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; - let layer_name = storage.edge_meta().get_layer_name_by_id(*l_id); - let new_id = layer_meta.get_or_create_id(&layer_name).inner(); + node_meta.set_layer_mapper(layer_meta.clone()); - layer_map[*l_id] = new_id; - layer_map - } - LayerIds::Multiple(ids) => { - let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; - let layers = storage.edge_meta().layer_meta().all_keys(); - - for id in ids { - let layer_name = &layers[id]; - let new_id = layer_meta.get_or_create_id(layer_name).inner(); - layer_map[id] = new_id; - } + let temporal_graph = TemporalGraph::new_with_meta( + path.map(|p| p.into()), + node_meta, + edge_meta, + graph_props_meta, + storage.extension().clone(), + )?; - layer_map - } - }; + if let Some(earliest) = graph.earliest_time() { + temporal_graph.update_time(TimeIndexEntry::start(earliest)); + }; - node_meta.set_layer_mapper(layer_meta.clone()); + if let Some(latest) = graph.latest_time() { + temporal_graph.update_time(TimeIndexEntry::end(latest)); + }; - let temporal_graph = TemporalGraph::new_with_meta( - path.map(|p| p.into()), - node_meta, - edge_meta, - graph_props_meta, - storage.extension().clone(), - ) - .unwrap(); + // Set event counter to be the same as old graph to avoid any possibility for duplicate event ids + temporal_graph + .storage() + .set_event_id(storage.read_event_id()); - if let Some(earliest) = self.earliest_time() { - temporal_graph.update_time(TimeIndexEntry::start(earliest)); - }; + let graph_storage = GraphStorage::from(temporal_graph); - if let Some(latest) = self.latest_time() { - temporal_graph.update_time(TimeIndexEntry::end(latest)); - }; + { + // scope for the write lock + let mut new_storage = graph_storage.write_lock()?; + new_storage.resize_chunks_to_num_nodes(graph.count_nodes()); + for layer_id in &layer_map { + new_storage.nodes.ensure_layer(*layer_id); + } - // Set event counter to be the same as old graph to avoid any possibility for duplicate event ids - temporal_graph - .storage() - .set_event_id(storage.read_event_id()); + let mut node_map = vec![VID::default(); storage.unfiltered_num_nodes()]; + let node_map_shared = atomic_usize_from_mut_slice(bytemuck::cast_slice_mut(&mut node_map)); + + new_storage.nodes.par_iter_mut().try_for_each(|shard| { + for (index, node) in graph.nodes().iter().enumerate() { + let new_id = VID(index); + let gid = node.id(); + node_map_shared[node.node.index()].store(new_id.index(), Ordering::Relaxed); + if let Some(node_pos) = shard.resolve_pos(new_id) { + let mut writer = shard.writer(); + if let Some(node_type) = node.node_type() { + let new_type_id = graph_storage + .node_meta() + .node_type_meta() + .get_or_create_id(&node_type) + .inner(); + writer.store_node_id_and_node_type( + node_pos, + 0, + gid.as_ref(), + new_type_id, + 0, + ); + } else { + writer.store_node_id(node_pos, 0, gid.as_ref(), 0); + } + graph_storage + .write_session()? + .set_node(gid.as_ref(), new_id)?; - let graph_storage = GraphStorage::from(temporal_graph); + for (t, row) in node.rows() { + writer.add_props(t, node_pos, 0, row, 0); + } - { - // scope for the write lock - let mut new_storage = graph_storage.write_lock()?; - new_storage.resize_chunks_to_num_nodes(self.count_nodes()); - for layer_id in &layer_map { - new_storage.nodes.ensure_layer(*layer_id); + writer.update_c_props( + node_pos, + 0, + node.metadata_ids() + .filter_map(|id| node.get_metadata(id).map(|prop| (id, prop))), + 0, + ); + } } + Ok::<(), MutationError>(()) + })?; - let mut node_map = vec![VID::default(); storage.unfiltered_num_nodes()]; - let node_map_shared = - atomic_usize_from_mut_slice(bytemuck::cast_slice_mut(&mut node_map)); + new_storage.resize_chunks_to_num_edges(graph.count_edges()); - new_storage.nodes.par_iter_mut().try_for_each(|shard| { - for (index, node) in self.nodes().iter().enumerate() { - let new_id = VID(index); - let gid = node.id(); - node_map_shared[node.node.index()].store(new_id.index(), Ordering::Relaxed); - if let Some(node_pos) = shard.resolve_pos(new_id) { - let mut writer = shard.writer(); - if let Some(node_type) = node.node_type() { - let new_type_id = graph_storage - .node_meta() - .node_type_meta() - .get_or_create_id(&node_type) - .inner(); - writer.store_node_id_and_node_type( - node_pos, - 0, - gid.as_ref(), - new_type_id, - 0, - ); - } else { - writer.store_node_id(node_pos, 0, gid.as_ref(), 0); - } - graph_storage - .write_session()? - .set_node(gid.as_ref(), new_id)?; + for layer_id in &layer_map { + new_storage.edges.ensure_layer(*layer_id); + } - for (t, row) in node.rows() { - writer.add_props(t, node_pos, 0, row, 0); + new_storage.edges.par_iter_mut().try_for_each(|shard| { + for (eid, edge) in graph.edges().iter().enumerate() { + let src = node_map[edge.edge.src().index()]; + let dst = node_map[edge.edge.dst().index()]; + let eid = EID(eid); + if let Some(edge_pos) = shard.resolve_pos(eid) { + let mut writer = shard.writer(); + // make the edge for the first time + writer.add_static_edge(Some(edge_pos), src, dst, 0, false); + + for edge in edge.explode_layers() { + let layer = layer_map[edge.edge.layer().unwrap()]; + for edge in edge.explode() { + let t = edge.edge.time().unwrap(); + writer.add_edge(t, edge_pos, src, dst, [], layer, 0); + } + //TODO: move this in edge.row() + for (t, t_props) in edge + .properties() + .temporal() + .values() + .map(|tp| { + let prop_id = tp.id(); + tp.iter_indexed() + .map(|(t, prop)| (t, prop_id, prop)) + .collect::>() + }) + .kmerge_by(|(t, _, _), (t2, _, _)| t <= t2) + .chunk_by(|(t, _, _)| *t) + .into_iter() + { + let props = t_props + .map(|(_, prop_id, prop)| (prop_id, prop)) + .collect::>(); + writer.add_edge(t, edge_pos, src, dst, props, layer, 0); } - writer.update_c_props( + edge_pos, + src, + dst, + layer, + edge.metadata_ids().filter_map(move |prop_id| { + edge.get_metadata(prop_id).map(|prop| (prop_id, prop)) + }), + ); + } + + let time_semantics = graph.edge_time_semantics(); + let edge_entry = graph.core_edge(edge.edge.pid()); + for (t, layer) in time_semantics.edge_deletion_history( + edge_entry.as_ref(), + graph, + graph.layer_ids(), + ) { + let layer = layer_map[layer]; + writer.delete_edge(t, edge_pos, src, dst, layer, 0); + } + } + } + Ok::<(), MutationError>(()) + })?; + + new_storage.nodes.par_iter_mut().try_for_each(|shard| { + for (eid, edge) in graph.edges().iter().enumerate() { + let eid = EID(eid); + let src_id = node_map[edge.edge.src().index()]; + let dst_id = node_map[edge.edge.dst().index()]; + let maybe_src_pos = shard.resolve_pos(src_id); + let maybe_dst_pos = shard.resolve_pos(dst_id); + + if let Some(node_pos) = maybe_src_pos { + let mut writer = shard.writer(); + writer.add_static_outbound_edge(node_pos, dst_id, eid, 0); + } + + if let Some(node_pos) = maybe_dst_pos { + let mut writer = shard.writer(); + writer.add_static_inbound_edge(node_pos, src_id, eid, 0); + } + + for e in edge.explode_layers() { + let layer = layer_map[e.edge.layer().unwrap()]; + if let Some(node_pos) = maybe_src_pos { + let mut writer = shard.writer(); + writer.add_outbound_edge::( + None, node_pos, + dst_id, + eid.with_layer(layer), 0, - node.metadata_ids() - .filter_map(|id| node.get_metadata(id).map(|prop| (id, prop))), + ); + } + if let Some(node_pos) = maybe_dst_pos { + let mut writer = shard.writer(); + writer.add_inbound_edge::( + None, + node_pos, + src_id, + eid.with_layer(layer), 0, ); } } - Ok::<(), MutationError>(()) - })?; - new_storage.resize_chunks_to_num_edges(self.count_edges()); - - for layer_id in &layer_map { - new_storage.edges.ensure_layer(*layer_id); - } + for e in edge.explode() { + if let Some(node_pos) = maybe_src_pos { + let mut writer = shard.writer(); - new_storage.edges.par_iter_mut().try_for_each(|shard| { - for (eid, edge) in self.edges().iter().enumerate() { - let src = node_map[edge.edge.src().index()]; - let dst = node_map[edge.edge.dst().index()]; - let eid = EID(eid); - if let Some(edge_pos) = shard.resolve_pos(eid) { + let t = e.time_and_index().expect("exploded edge should have time"); + let l = layer_map[e.edge.layer().unwrap()]; + writer.update_timestamp(t, node_pos, eid.with_layer(l), 0); + } + if let Some(node_pos) = maybe_dst_pos { let mut writer = shard.writer(); - // make the edge for the first time - writer.add_static_edge(Some(edge_pos), src, dst, 0, false); - - for edge in edge.explode_layers() { - let layer = layer_map[edge.edge.layer().unwrap()]; - for edge in edge.explode() { - let t = edge.edge.time().unwrap(); - writer.add_edge(t, edge_pos, src, dst, [], layer, 0); - } - //TODO: move this in edge.row() - for (t, t_props) in edge - .properties() - .temporal() - .values() - .map(|tp| { - let prop_id = tp.id(); - tp.iter_indexed() - .map(|(t, prop)| (t, prop_id, prop)) - .collect::>() - }) - .kmerge_by(|(t, _, _), (t2, _, _)| t <= t2) - .chunk_by(|(t, _, _)| *t) - .into_iter() - { - let props = t_props - .map(|(_, prop_id, prop)| (prop_id, prop)) - .collect::>(); - writer.add_edge(t, edge_pos, src, dst, props, layer, 0); - } - writer.update_c_props( - edge_pos, - src, - dst, - layer, - edge.metadata_ids().filter_map(move |prop_id| { - edge.get_metadata(prop_id).map(|prop| (prop_id, prop)) - }), - ); - } - let time_semantics = self.edge_time_semantics(); - let edge_entry = self.core_edge(edge.edge.pid()); - for (t, layer) in time_semantics.edge_deletion_history( - edge_entry.as_ref(), - self, - self.layer_ids(), - ) { - let layer = layer_map[layer]; - writer.delete_edge(t, edge_pos, src, dst, layer, 0); - } + let t = e.time_and_index().expect("exploded edge should have time"); + let l = layer_map[e.edge.layer().unwrap()]; + writer.update_timestamp(t, node_pos, eid.with_layer(l), 0); } } - Ok::<(), MutationError>(()) - })?; - - new_storage.nodes.par_iter_mut().try_for_each(|shard| { - for (eid, edge) in self.edges().iter().enumerate() { - let eid = EID(eid); - let src_id = node_map[edge.edge.src().index()]; - let dst_id = node_map[edge.edge.dst().index()]; - let maybe_src_pos = shard.resolve_pos(src_id); - let maybe_dst_pos = shard.resolve_pos(dst_id); + let edge_time_semantics = graph.edge_time_semantics(); + let edge_entry = graph.core_edge(edge.edge.pid()); + for (t, layer) in edge_time_semantics.edge_deletion_history( + edge_entry.as_ref(), + graph, + graph.layer_ids(), + ) { + let layer = layer_map[layer]; if let Some(node_pos) = maybe_src_pos { let mut writer = shard.writer(); - writer.add_static_outbound_edge(node_pos, dst_id, eid, 0); + writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer), 0); } - if let Some(node_pos) = maybe_dst_pos { let mut writer = shard.writer(); - writer.add_static_inbound_edge(node_pos, src_id, eid, 0); + writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer), 0); } + } + } - for e in edge.explode_layers() { - let layer = layer_map[e.edge.layer().unwrap()]; - if let Some(node_pos) = maybe_src_pos { - let mut writer = shard.writer(); - writer.add_outbound_edge::( - None, - node_pos, - dst_id, - eid.with_layer(layer), - 0, - ); - } - if let Some(node_pos) = maybe_dst_pos { - let mut writer = shard.writer(); - writer.add_inbound_edge::( - None, - node_pos, - src_id, - eid.with_layer(layer), - 0, - ); - } - } - - for e in edge.explode() { - if let Some(node_pos) = maybe_src_pos { - let mut writer = shard.writer(); - - let t = e.time_and_index().expect("exploded edge should have time"); - let l = layer_map[e.edge.layer().unwrap()]; - writer.update_timestamp(t, node_pos, eid.with_layer(l), 0); - } - if let Some(node_pos) = maybe_dst_pos { - let mut writer = shard.writer(); + Ok::<(), MutationError>(()) + })?; - let t = e.time_and_index().expect("exploded edge should have time"); - let l = layer_map[e.edge.layer().unwrap()]; - writer.update_timestamp(t, node_pos, eid.with_layer(l), 0); - } - } + // Copy over graph properties + if let Some(graph_writer) = new_storage.graph_props.writer() { + // Copy temporal properties + for (prop_name, temporal_prop) in graph.properties().temporal().iter() { + let prop_id = graph_storage + .graph_props_meta() + .temporal_prop_mapper() + .get_or_create_id(&prop_name) + .inner(); - let edge_time_semantics = self.edge_time_semantics(); - let edge_entry = self.core_edge(edge.edge.pid()); - for (t, layer) in edge_time_semantics.edge_deletion_history( - edge_entry.as_ref(), - self, - self.layer_ids(), - ) { - let layer = layer_map[layer]; - if let Some(node_pos) = maybe_src_pos { - let mut writer = shard.writer(); - writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer), 0); - } - if let Some(node_pos) = maybe_dst_pos { - let mut writer = shard.writer(); - writer.update_timestamp(t, node_pos, eid.with_layer_deletion(layer), 0); - } - } + for (t, prop_value) in temporal_prop.iter_indexed() { + let lsn = 0; + graph_writer.add_properties(t, [(prop_id, prop_value)], lsn); } + } - Ok::<(), MutationError>(()) - })?; - - // Copy over graph properties - if let Some(graph_writer) = new_storage.graph_props.writer() { - // Copy temporal properties - for (prop_name, temporal_prop) in self.properties().temporal().iter() { + // Copy metadata (constant properties) + let metadata_props: Vec<_> = graph + .metadata() + .iter_filtered() + .map(|(prop_name, prop_value)| { let prop_id = graph_storage .graph_props_meta() - .temporal_prop_mapper() + .metadata_mapper() .get_or_create_id(&prop_name) .inner(); + (prop_id, prop_value) + }) + .collect(); - for (t, prop_value) in temporal_prop.iter_indexed() { - let lsn = 0; - graph_writer.add_properties(t, [(prop_id, prop_value)], lsn); - } - } - - // Copy metadata (constant properties) - let metadata_props: Vec<_> = self - .metadata() - .iter_filtered() - .map(|(prop_name, prop_value)| { - let prop_id = graph_storage - .graph_props_meta() - .metadata_mapper() - .get_or_create_id(&prop_name) - .inner(); - (prop_id, prop_value) - }) - .collect(); - - if !metadata_props.is_empty() { - let lsn = 0; - graph_writer.update_metadata(metadata_props, lsn); - } + if !metadata_props.is_empty() { + let lsn = 0; + graph_writer.update_metadata(metadata_props, lsn); } } + } + + Ok(graph.new_base_graph(graph_storage)) +} + +impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { + fn edges(&self) -> Edges<'graph, Self, Self> { + edges_inner(self, true) + } + + fn edges_unlocked(&self) -> Edges<'graph, Self, Self> { + edges_inner(self, false) + } + + fn nodes(&self) -> Nodes<'graph, Self, Self> { + let graph = self.clone(); + Nodes::new(graph) + } + + fn materialize(&self) -> Result { + materialize_impl(self, None) + } - Ok(self.new_base_graph(graph_storage)) + #[cfg(feature = "io")] + fn materialize_at( + &self, + path: &(impl GraphPaths + ?Sized), + ) -> Result { + if Extension::disk_storage_enabled() { + path.init()?; + let graph_path = path.graph_path()?; + let graph = materialize_impl(self, Some(graph_path.as_ref()))?; + path.write_metadata(&graph)?; + Ok(graph) + } else { + Err(GraphError::DiskGraphNotEnabled) + } } fn subgraph, V: AsNodeRef>(&self, nodes: I) -> NodeSubgraph { diff --git a/raphtory/src/db/api/view/internal/materialize.rs b/raphtory/src/db/api/view/internal/materialize.rs index 670de56613..6ee4893eba 100644 --- a/raphtory/src/db/api/view/internal/materialize.rs +++ b/raphtory/src/db/api/view/internal/materialize.rs @@ -7,12 +7,16 @@ use crate::{ api::view::internal::*, graph::{graph::Graph, views::deletion_graph::PersistentGraph}, }, + errors::GraphError, prelude::*, }; use raphtory_api::{iter::BoxedLIter, GraphType}; use raphtory_storage::{graph::graph::GraphStorage, mutation::InheritMutationOps}; use std::ops::Range; +#[cfg(feature = "io")] +use crate::serialise::GraphPaths; + #[derive(Clone)] pub enum MaterializedGraph { EventGraph(Graph), @@ -93,6 +97,21 @@ impl MaterializedGraph { MaterializedGraph::PersistentGraph(g) => Some(g), } } + + #[cfg(feature = "io")] + pub fn load_from_path(path: &(impl GraphPaths + ?Sized)) -> Result { + let meta = path.read_metadata()?; + if meta.is_diskgraph { + match meta.graph_type { + GraphType::EventGraph => Ok(Self::EventGraph(Graph::load_from_path(path)?)), + GraphType::PersistentGraph => Ok(Self::PersistentGraph( + PersistentGraph::load_from_path(path)?, + )), + } + } else { + Err(GraphError::NotADiskGraph) + } + } } impl InternalStorageOps for MaterializedGraph { @@ -100,7 +119,7 @@ impl InternalStorageOps for MaterializedGraph { for_all!(self, g => g.get_storage()) } - fn disk_storage_enabled(&self) -> bool { + fn disk_storage_enabled(&self) -> Option<&Path> { for_all!(self, g => g.disk_storage_enabled()) } } diff --git a/raphtory/src/db/api/view/internal/mod.rs b/raphtory/src/db/api/view/internal/mod.rs index 827007db45..0ab2d7eecc 100644 --- a/raphtory/src/db/api/view/internal/mod.rs +++ b/raphtory/src/db/api/view/internal/mod.rs @@ -12,6 +12,7 @@ use crate::{ }; use std::{ fmt::{Debug, Formatter}, + path::Path, sync::Arc, }; @@ -106,7 +107,7 @@ pub trait InternalStorageOps { /// Returns `true` if the underlying storage saves data to disk, /// or `false` if the storage is in-memory only. - fn disk_storage_enabled(&self) -> bool; + fn disk_storage_enabled(&self) -> Option<&Path>; } impl InternalStorageOps for G @@ -117,7 +118,7 @@ where self.base().get_storage() } - fn disk_storage_enabled(&self) -> bool { + fn disk_storage_enabled(&self) -> Option<&Path> { self.base().disk_storage_enabled() } } diff --git a/raphtory/src/db/graph/assertions.rs b/raphtory/src/db/graph/assertions.rs index 03008434f2..ef151a919b 100644 --- a/raphtory/src/db/graph/assertions.rs +++ b/raphtory/src/db/graph/assertions.rs @@ -96,26 +96,6 @@ impl ApplyFilter for FilterNeighbour } } -pub struct SearchNodes(F); - -impl ApplyFilter for SearchNodes { - fn apply(&self, graph: G) -> Vec { - #[cfg(feature = "search")] - { - let mut results = graph - .search_nodes(self.0.clone(), 20, 0) - .unwrap() - .into_iter() - .map(|nv| nv.name()) - .collect::>(); - results.sort(); - return results; - } - #[cfg(not(feature = "search"))] - Vec::::new() - } -} - pub struct FilterEdges(F); impl ApplyFilter for FilterEdges { @@ -132,26 +112,6 @@ impl ApplyFilter for FilterEdges } } -pub struct SearchEdges(F); - -impl ApplyFilter for SearchEdges { - fn apply(&self, graph: G) -> Vec { - #[cfg(feature = "search")] - { - let mut results = graph - .search_edges(self.0.clone(), 20, 0) - .unwrap() - .into_iter() - .map(|ev| format!("{}->{}", ev.src().name(), ev.dst().name())) - .collect::>(); - results.sort(); - return results; - } - #[cfg(not(feature = "search"))] - Vec::::new() - } -} - #[track_caller] pub fn assert_filter_nodes_results( init_graph: impl FnOnce(Graph) -> Graph, @@ -293,18 +253,6 @@ pub fn filter_nodes(graph: &Graph, filter: impl CreateNodeFilter) -> Vec results } -#[cfg(feature = "search")] -pub fn search_nodes(graph: &Graph, filter: impl AsNodeFilter) -> Vec { - let mut results = graph - .search_nodes(filter, 10, 0) - .expect("Failed to search nodes") - .into_iter() - .map(|v| v.name()) - .collect::>(); - results.sort(); - results -} - pub fn filter_edges(graph: &Graph, filter: impl CreateEdgeFilter) -> Vec { let mut results = graph .filter_edges(filter) @@ -318,13 +266,61 @@ pub fn filter_edges(graph: &Graph, filter: impl CreateEdgeFilter) -> Vec } #[cfg(feature = "search")] -pub fn search_edges(graph: &Graph, filter: impl AsEdgeFilter) -> Vec { - let mut results = graph - .search_edges(filter, 10, 0) - .expect("Failed to filter edges") - .into_iter() - .map(|e| format!("{}->{}", e.src().name(), e.dst().name())) - .collect::>(); - results.sort(); - results +mod search { + use super::*; + + pub struct SearchNodes(pub F); + + impl ApplyFilter for SearchNodes { + fn apply(&self, graph: G) -> Vec { + let mut results = graph + .search_nodes(self.0.clone(), 20, 0) + .unwrap() + .into_iter() + .map(|nv| nv.name()) + .collect::>(); + results.sort(); + results + } + } + + pub struct SearchEdges(pub F); + + impl ApplyFilter for SearchEdges { + fn apply(&self, graph: G) -> Vec { + let mut results = graph + .search_edges(self.0.clone(), 20, 0) + .unwrap() + .into_iter() + .map(|ev| format!("{}->{}", ev.src().name(), ev.dst().name())) + .collect::>(); + results.sort(); + results + } + } + + pub fn search_nodes(graph: &Graph, filter: impl AsNodeFilter) -> Vec { + let mut results = graph + .search_nodes(filter, 10, 0) + .expect("Failed to search nodes") + .into_iter() + .map(|v| v.name()) + .collect::>(); + results.sort(); + results + } + + pub fn search_edges(graph: &Graph, filter: impl AsEdgeFilter) -> Vec { + let mut results = graph + .search_edges(filter, 10, 0) + .expect("Failed to filter edges") + .into_iter() + .map(|e| format!("{}->{}", e.src().name(), e.dst().name())) + .collect::>(); + results.sort(); + results + } } + +#[cfg(feature = "search")] +pub use search::*; diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index dc6156ebfb..a8bd2c2473 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -16,6 +16,8 @@ //! ``` //! use super::views::deletion_graph::PersistentGraph; +#[cfg(feature = "io")] +use crate::serialise::GraphPaths; use crate::{ db::{ api::{ @@ -30,6 +32,7 @@ use crate::{ }, graph::{edges::Edges, node::NodeView, nodes::Nodes}, }, + errors::GraphError, prelude::*, }; use raphtory_api::inherit::Base; @@ -43,9 +46,9 @@ use std::{ fmt::{Display, Formatter}, hint::black_box, ops::Deref, - path::Path, sync::Arc, }; +use storage::{persist::strategy::PersistentStrategy, Extension}; #[repr(transparent)] #[derive(Debug, Clone, Default)] @@ -578,10 +581,17 @@ impl Graph { /// use raphtory::prelude::Graph; /// let g = Graph::new_at_path("/path/to/storage"); /// ``` - pub fn new_at_path(path: impl AsRef) -> Self { - Self { - inner: Arc::new(Storage::new_at_path(path)), + #[cfg(feature = "io")] + pub fn new_at_path(path: &(impl GraphPaths + ?Sized)) -> Result { + if !Extension::disk_storage_enabled() { + return Err(GraphError::DiskGraphNotEnabled); } + path.init()?; + let graph = Self { + inner: Arc::new(Storage::new_at_path(path.graph_path()?)?), + }; + path.write_metadata(&graph)?; + Ok(graph) } /// Load a graph from a specific path @@ -594,10 +604,12 @@ impl Graph { /// use raphtory::prelude::Graph; /// let g = Graph::load_from_path("/path/to/storage"); /// - pub fn load_from_path(path: impl AsRef) -> Self { - Self { - inner: Arc::new(Storage::load_from(path)), - } + #[cfg(feature = "io")] + pub fn load_from_path(path: &(impl GraphPaths + ?Sized)) -> Result { + //TODO: add support for loading indexes and vectors + Ok(Self { + inner: Arc::new(Storage::load_from(path.graph_path()?)?), + }) } pub(crate) fn from_storage(inner: Arc) -> Self { diff --git a/raphtory/src/db/graph/views/deletion_graph.rs b/raphtory/src/db/graph/views/deletion_graph.rs index fab23f7658..91a45774a7 100644 --- a/raphtory/src/db/graph/views/deletion_graph.rs +++ b/raphtory/src/db/graph/views/deletion_graph.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "io")] +use crate::serialise::GraphPaths; use crate::{ core::{ entities::LayerIds, @@ -10,6 +12,7 @@ use crate::{ }, graph::graph::graph_equal, }, + errors::GraphError, prelude::*, }; use raphtory_api::{ @@ -28,12 +31,14 @@ use raphtory_storage::{ }; use std::{ fmt::{Display, Formatter}, - iter, - ops::{Deref, Range}, - path::Path, + ops::Range, sync::Arc, }; -use storage::api::graph_props::{GraphPropEntryOps, GraphPropRefOps}; +use storage::{ + api::graph_props::{GraphPropEntryOps, GraphPropRefOps}, + persist::strategy::PersistentStrategy, + Extension, +}; /// A graph view where an edge remains active from the time it is added until it is explicitly marked as deleted. /// @@ -110,8 +115,15 @@ impl PersistentGraph { /// use raphtory::prelude::PersistentGraph; /// let g = Graph::new_at_path("/path/to/storage"); /// ``` - pub fn new_at_path(path: impl AsRef) -> Self { - Self(Arc::new(Storage::new_at_path(path))) + #[cfg(feature = "io")] + pub fn new_at_path(path: &(impl GraphPaths + ?Sized)) -> Result { + if !Extension::disk_storage_enabled() { + return Err(GraphError::DiskGraphNotEnabled); + } + path.init()?; + let graph = Self(Arc::new(Storage::new_at_path(path.graph_path()?)?)); + path.write_metadata(&graph)?; + Ok(graph) } /// Load a graph from a specific path @@ -124,8 +136,9 @@ impl PersistentGraph { /// use raphtory::prelude::Graph; /// let g = Graph::load_from_path("/path/to/storage"); /// - pub fn load_from_path(path: impl AsRef) -> Self { - Self(Arc::new(Storage::load_from(path))) + #[cfg(feature = "io")] + pub fn load_from_path(path: &(impl GraphPaths + ?Sized)) -> Result { + Ok(Self(Arc::new(Storage::load_from(path.graph_path()?)?))) } pub fn from_storage(storage: Arc) -> Self { diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index f7d790b29b..301d7764b4 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -18,9 +18,8 @@ use raphtory_core::{ }; use raphtory_storage::mutation::MutationError; use std::{ - backtrace::Backtrace, fmt::Debug, - io, panic, + io, panic::Location, path::{PathBuf, StripPrefixError}, sync::Arc, @@ -34,35 +33,36 @@ use pyo3::PyErr; #[cfg(feature = "search")] use {tantivy, tantivy::query::QueryParserError}; +use storage::error::StorageError; +#[cfg(feature = "io")] +use zip::result::ZipError; + #[derive(thiserror::Error, Debug)] pub enum InvalidPathReason { - #[error("Backslash not allowed in path: {0}")] - BackslashError(PathBuf), - #[error("Double forward slashes are not allowed in path: {0}")] - DoubleForwardSlash(PathBuf), - #[error("Only relative paths are allowed to be used within the working_dir: {0}")] - RootNotAllowed(PathBuf), - #[error("References to the current dir are not allowed within the path: {0}")] - CurDirNotAllowed(PathBuf), - #[error("References to the parent dir are not allowed within the path: {0}")] - ParentDirNotAllowed(PathBuf), - #[error("A component of the given path was a symlink: {0}")] - SymlinkNotAllowed(PathBuf), - #[error("The give path does not exist: {0}")] - PathDoesNotExist(PathBuf), - #[error("Could not parse Path: {0}")] - PathNotParsable(PathBuf), - #[error("The path to the graph contains a subpath to an existing graph: {0}")] - ParentIsGraph(PathBuf), - #[error("The path provided does not exists as a namespace: {0}")] - NamespaceDoesNotExist(String), - #[error("The path provided contains non-UTF8 characters.")] - NonUTFCharacters, - #[error("Failed to strip prefix")] - StripPrefix { - #[from] - source: StripPrefixError, - }, + #[error("Backslash not allowed in path")] + BackslashError, + #[error("Double forward slashes are not allowed in path")] + DoubleForwardSlash, + #[error("Only relative paths are allowed to be used within the working_dir")] + RootNotAllowed, + #[error("References to the current dir are not allowed within the path")] + CurDirNotAllowed, + #[error("References to the parent dir are not allowed within the path")] + ParentDirNotAllowed, + #[error("A component of the given path was a symlink")] + SymlinkNotAllowed, + #[error("Could not parse Path")] + PathNotParsable, + #[error("The path to the graph contains a subpath to an existing graph")] + ParentIsGraph, + #[error("Graph name cannot start with _")] + GraphNamePrefix, + #[error("The path provided already exists as a namespace")] + GraphIsNamespace, + #[error("The path provided already exists as a graph")] + NamespaceIsGraph, + #[error("Failed to strip prefix: {source}")] + StripPrefix { source: StripPrefixError }, } #[derive(thiserror::Error, Debug)] @@ -147,8 +147,11 @@ pub enum GraphError { source: LoadError, }, + #[error("Path {0} does not exist")] + PathDoesNotExist(PathBuf), + #[error("Storage feature not enabled")] - DiskGraphNotFound, + DiskGraphNotEnabled, #[error("Missing graph index. You need to create an index first.")] IndexNotCreated, @@ -251,10 +254,19 @@ pub enum GraphError { #[cfg(feature = "io")] #[error("zip operation failed")] ZipError { - #[from] source: zip::result::ZipError, + location: &'static Location<'static>, }, + #[error("Not a zip archive")] + NotAZip, + + #[error("Not a disk graph")] + NotADiskGraph, + + #[error("Graph folder is not initialised for writing")] + NoWriteInProgress, + #[error("Failed to load graph: {0}")] LoadFailure(String), @@ -423,8 +435,19 @@ pub enum GraphError { #[error("Your window and step must be of the same type: duration (string) or epoch (int)")] MismatchedIntervalTypes, - #[error("Cannot initialize cache for zipped graph. Unzip the graph to initialize the cache.")] - ZippedGraphCannotBeCached, + #[error("Cannot swap zipped graph data")] + ZippedGraphCannotBeSwapped, + + #[error("{source} at {location}")] + StripPrefixError { + source: StripPrefixError, + location: &'static Location<'static>, + }, + #[error("Path {0} is not a valid relative data path")] + InvalidRelativePath(String), + + #[error(transparent)] + StorageError(#[from] StorageError), } impl From for GraphError { @@ -472,6 +495,23 @@ impl From for GraphError { } } +#[cfg(feature = "io")] +impl From for GraphError { + #[track_caller] + fn from(source: ZipError) -> Self { + let location = Location::caller(); + GraphError::ZipError { source, location } + } +} + +impl From for GraphError { + #[track_caller] + fn from(source: StripPrefixError) -> Self { + let location = Location::caller(); + GraphError::StripPrefixError { source, location } + } +} + #[cfg(test)] mod test { use crate::errors::GraphError; diff --git a/raphtory/src/graph_loader/company_house.rs b/raphtory/src/graph_loader/company_house.rs index 18ef83483b..e123b56bae 100644 --- a/raphtory/src/graph_loader/company_house.rs +++ b/raphtory/src/graph_loader/company_house.rs @@ -34,7 +34,7 @@ pub fn company_house_graph(path: Option) -> Graph { fn restore_from_bincode(encoded_data_dir: &Path) -> Option { if encoded_data_dir.exists() { let now = Instant::now(); - let g = Graph::decode(encoded_data_dir, None) + let g = Graph::decode(encoded_data_dir) .map_err(|err| { error!( "Restoring from bincode failed with error: {}! Reloading file!", diff --git a/raphtory/src/graph_loader/stable_coins.rs b/raphtory/src/graph_loader/stable_coins.rs index ea5efee7f5..b92b8265de 100644 --- a/raphtory/src/graph_loader/stable_coins.rs +++ b/raphtory/src/graph_loader/stable_coins.rs @@ -48,7 +48,7 @@ pub fn stable_coin_graph(path: Option, subset: bool) -> Graph { fn restore_from_file(encoded_data_file: &PathBuf) -> Option { if encoded_data_file.exists() { let now = Instant::now(); - let g = Graph::decode(encoded_data_file.as_path(), None) + let g = Graph::decode(encoded_data_file.as_path()) .map_err(|err| { error!( "Restoring from bincode failed with error: {}! Reloading file!", diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 7ddeb6cae1..b3fdc23192 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -9,12 +9,10 @@ use crate::{ }, prelude::*, }; -use arrow::array::BooleanArray; use bytemuck::checked::cast_slice_mut; use db4_graph::WriteLockedGraph; use either::Either; use itertools::izip; -use kdam::{Bar, BarBuilder, BarExt}; use raphtory_api::{ atomic_extra::atomic_usize_from_mut_slice, core::{ @@ -29,21 +27,18 @@ use raphtory_core::{ entities::{graph::logical_to_physical::ResolverShardT, GidRef, VID}, storage::timeindex::AsTime, }; -use raphtory_storage::{ - core_ops::CoreGraphOps, - layer_ops::InternalLayerOps, - mutation::addition_ops::{InternalAdditionOps, SessionAdditionOps}, -}; +use raphtory_storage::mutation::addition_ops::{InternalAdditionOps, SessionAdditionOps}; use rayon::prelude::*; use std::{ borrow::{Borrow, Cow}, collections::HashMap, - sync::{ - atomic::{AtomicBool, AtomicUsize, Ordering}, - Arc, - }, + sync::atomic::{AtomicBool, AtomicUsize, Ordering}, }; +#[cfg(feature = "python")] +use kdam::{Bar, BarBuilder, BarExt}; + +#[cfg(feature = "python")] fn build_progress_bar(des: String, num_rows: usize) -> Result { BarBuilder::default() .desc(des) diff --git a/raphtory/src/io/arrow/prop_handler.rs b/raphtory/src/io/arrow/prop_handler.rs index aa3849d8ff..183ad63f44 100644 --- a/raphtory/src/io/arrow/prop_handler.rs +++ b/raphtory/src/io/arrow/prop_handler.rs @@ -242,7 +242,7 @@ fn data_type_as_prop_type(dt: &DataType) -> Result { } } -trait PropCol: Send + Sync { +pub trait PropCol: Send + Sync { fn get(&self, i: usize) -> Option; fn as_array(&self) -> ArrayRef; @@ -442,7 +442,7 @@ impl PropCol for EmptyCol { } } -fn lift_property_col(arr: &dyn Array) -> Box { +pub fn lift_property_col(arr: &dyn Array) -> Box { match arr.data_type() { DataType::Boolean => Box::new(arr.as_boolean().clone()), DataType::Int32 => Box::new(arr.as_primitive::().clone()), diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index e2251ba160..1e6ac51145 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -1,6 +1,6 @@ use crate::{ db::api::view::StaticGraphViewOps, - errors::{GraphError, InvalidPathReason::PathDoesNotExist}, + errors::GraphError, io::arrow::{dataframe::*, df_loaders::*}, prelude::{AdditionOps, DeletionOps, PropertyAdditionOps}, }; @@ -360,9 +360,7 @@ pub fn get_parquet_file_paths(parquet_path: &Path) -> Result, Graph } } } else { - return Err(GraphError::from(PathDoesNotExist( - parquet_path.to_path_buf(), - ))); + return Err(GraphError::PathDoesNotExist(parquet_path.to_path_buf())); } parquet_files.sort(); diff --git a/raphtory/src/python/graph/disk_graph.rs b/raphtory/src/python/graph/disk_graph.rs deleted file mode 100644 index b80f8aa2bb..0000000000 --- a/raphtory/src/python/graph/disk_graph.rs +++ /dev/null @@ -1,321 +0,0 @@ -//! A columnar temporal graph. -//! -use super::io::pandas_loaders::*; -use crate::{ - db::{ - api::storage::graph::storage_ops::disk_storage::IntoGraph, - graph::views::deletion_graph::PersistentGraph, - }, - errors::GraphError, - io::parquet_loaders::read_struct_arrays, - prelude::Graph, - python::{graph::graph::PyGraph, types::repr::StructReprBuilder}, -}; -use arrow::{array::StructArray, datatypes::Field}; -use itertools::Itertools; -use pometry_storage::{ - graph::{load_node_metadata, TemporalGraph}, - RAError, -}; -use pyo3::{exceptions::PyRuntimeError, prelude::*, pybacked::PyBackedStr, types::PyDict}; -use raphtory_storage::disk::{DiskGraphStorage, ParquetLayerCols}; -use std::{ - ops::Deref, - path::{Path, PathBuf}, - str::FromStr, -}; - -#[derive(Clone)] -#[pyclass(name = "DiskGraphStorage", frozen, module = "raphtory")] -pub struct PyDiskGraph(pub DiskGraphStorage); - -impl AsRef for PyDiskGraph -where - DiskGraphStorage: AsRef, -{ - fn as_ref(&self) -> &G { - self.0.as_ref() - } -} - -impl From for PyDiskGraph { - fn from(value: DiskGraphStorage) -> Self { - Self(value) - } -} - -impl From for DiskGraphStorage { - fn from(value: PyDiskGraph) -> Self { - value.0 - } -} - -struct PyParquetLayerCols { - parquet_dir: PyBackedStr, - layer: PyBackedStr, - src_col: PyBackedStr, - dst_col: PyBackedStr, - time_col: PyBackedStr, - exclude_edge_props: Vec, -} - -impl PyParquetLayerCols { - pub fn as_deref(&self) -> ParquetLayerCols<'_> { - ParquetLayerCols { - parquet_dir: self.parquet_dir.deref(), - layer: self.layer.deref(), - src_col: self.src_col.deref(), - dst_col: self.dst_col.deref(), - time_col: self.time_col.deref(), - exclude_edge_props: self.exclude_edge_props.iter().map(|s| s.deref()).collect(), - } - } -} - -impl<'a> FromPyObject<'a> for PyParquetLayerCols { - fn extract_bound(obj: &Bound<'a, PyAny>) -> PyResult { - let dict = obj.downcast::()?; - Ok(PyParquetLayerCols { - parquet_dir: dict - .get_item("parquet_dir")? - .ok_or(PyRuntimeError::new_err("parquet_dir is required"))? - .extract::()?, - layer: dict - .get_item("layer")? - .ok_or(PyRuntimeError::new_err("layer is required"))? - .extract::()?, - src_col: dict - .get_item("src_col")? - .ok_or(PyRuntimeError::new_err("src_col is required"))? - .extract::()?, - dst_col: dict - .get_item("dst_col")? - .ok_or(PyRuntimeError::new_err("dst_col is required"))? - .extract::()?, - time_col: dict - .get_item("time_col")? - .ok_or(PyRuntimeError::new_err("time_col is required"))? - .extract::()?, - exclude_edge_props: match dict.get_item("exclude_edge_props")? { - None => Ok(vec![]), - Some(item) => item - .try_iter()? - .map(|v| v.and_then(|v| v.extract::())) - .collect::>>(), - }?, - }) - } -} - -#[pymethods] -impl PyGraph { - /// save graph in disk_graph format and memory map the result - /// - /// Arguments: - /// graph_dir (str | PathLike): folder where the graph will be saved - /// - /// Returns: - /// DiskGraphStorage: the persisted graph storage - pub fn persist_as_disk_graph(&self, graph_dir: PathBuf) -> Result { - Ok(PyDiskGraph(DiskGraphStorage::from_graph( - &self.graph, - &graph_dir, - )?)) - } -} - -#[pymethods] -impl PyDiskGraph { - pub fn graph_dir(&self) -> &Path { - self.0.graph_dir() - } - - pub fn to_events(&self) -> Graph { - self.0.clone().into_graph() - } - - pub fn to_persistent(&self) -> PersistentGraph { - self.0.clone().into_persistent_graph() - } - - #[staticmethod] - #[pyo3(signature = (graph_dir, edge_df, time_col, src_col, dst_col))] - pub fn load_from_pandas( - graph_dir: PathBuf, - edge_df: &Bound, - time_col: &str, - src_col: &str, - dst_col: &str, - ) -> Result { - let cols_to_check = vec![src_col, dst_col, time_col]; - - let df_columns: Vec = edge_df.getattr("columns")?.extract()?; - let df_columns: Vec<&str> = df_columns.iter().map(|x| x.as_str()).collect(); - - let df_view = process_pandas_py_df(edge_df, df_columns)?; - df_view.check_cols_exist(&cols_to_check)?; - let src_index = df_view.get_index(src_col)?; - let dst_index = df_view.get_index(dst_col)?; - let time_index = df_view.get_index(time_col)?; - - let mut chunks_iter = df_view.chunks.peekable(); - let chunk_size = if let Some(result) = chunks_iter.peek() { - match result { - Ok(df) => df.chunk.len(), - Err(e) => { - return Err(GraphError::LoadFailure(format!( - "Failed to load graph {e:?}" - ))) - } - } - } else { - return Err(GraphError::LoadFailure("No chunks available".to_string())); - }; - - let edge_lists = chunks_iter - .map_ok(|df| { - let fields = df - .chunk - .iter() - .zip(df_view.names.iter()) - .map(|(arr, col_name)| { - Field::new(col_name, arr.data_type().clone(), arr.null_count() > 0) - }) - .collect_vec(); - let s_array = StructArray::new(fields.into(), df.chunk, None); - s_array - }) - .collect::, GraphError>>()?; - - let graph = DiskGraphStorage::load_from_edge_lists( - &edge_lists, - chunk_size, - chunk_size, - graph_dir, - time_index, - src_index, - dst_index, - )?; - - Ok(PyDiskGraph(graph)) - } - - #[staticmethod] - fn load_from_dir(graph_dir: PathBuf) -> Result { - DiskGraphStorage::load_from_dir(&graph_dir) - .map_err(|err| { - GraphError::LoadFailure(format!( - "Failed to load graph {err:?} from dir {}", - graph_dir.display() - )) - }) - .map(PyDiskGraph) - } - - #[staticmethod] - #[pyo3( - signature = (graph_dir, layer_parquet_cols, node_properties=None, chunk_size=10_000_000, t_props_chunk_size=10_000_000, num_threads=4, node_type_col=None, node_id_col=None, num_rows=None) - )] - fn load_from_parquets( - graph_dir: PathBuf, - layer_parquet_cols: Vec, - node_properties: Option, - chunk_size: usize, - t_props_chunk_size: usize, - num_threads: usize, - node_type_col: Option<&str>, - node_id_col: Option<&str>, - num_rows: Option, - ) -> Result { - let layer_cols = layer_parquet_cols - .iter() - .map(|layer| layer.as_deref()) - .collect(); - DiskGraphStorage::load_from_parquets( - graph_dir, - layer_cols, - node_properties, - chunk_size, - t_props_chunk_size, - num_threads, - node_type_col, - node_id_col, - num_rows, - ) - .map_err(|err| { - GraphError::LoadFailure(format!("Failed to load graph from parquet files: {err:?}")) - }) - .map(PyDiskGraph) - } - - #[pyo3(signature = (location, col_names=None, chunk_size=None))] - pub fn load_node_metadata( - &self, - location: PathBuf, - col_names: Option>, - chunk_size: Option, - ) -> Result { - let col_names = convert_py_prop_args(col_names.as_deref()); - let chunks = read_struct_arrays(&location, col_names.as_deref())?; - let _ = load_node_metadata(chunk_size.unwrap_or(200_000), self.graph_dir(), chunks)?; - Self::load_from_dir(self.graph_dir().to_path_buf()) - } - - #[pyo3(signature=(location, col_name, chunk_size=None))] - pub fn load_node_types( - &self, - location: PathBuf, - col_name: &str, - chunk_size: Option, - ) -> Result { - let mut cloned = self.clone(); - let chunks = read_struct_arrays(&location, Some(&[col_name]))?.map(|chunk| match chunk { - Ok(chunk) => { - let (_, cols, _) = chunk.into_parts(); - cols.into_iter().next().ok_or(RAError::EmptyChunk) - } - Err(err) => Err(err), - }); - cloned - .0 - .load_node_types_from_arrays(chunks, chunk_size.unwrap_or(1_000_000))?; - Ok(cloned) - } - - #[pyo3(signature = (location, chunk_size=20_000_000))] - pub fn append_node_temporal_properties( - &self, - location: &str, - chunk_size: usize, - ) -> Result { - let path = PathBuf::from_str(location).unwrap(); - let chunks = read_struct_arrays(&path, None)?; - let mut graph = TemporalGraph::new(self.0.inner().graph_dir())?; - graph.load_temporal_node_props_from_chunks(chunks, chunk_size, false)?; - Self::load_from_dir(self.graph_dir().to_path_buf()) - } - - /// Merge this graph with another `DiskGraph`. Note that both graphs should have nodes that are - /// sorted by their global ids or the resulting graph will be nonsense! - fn merge_by_sorted_gids( - &self, - other: &Self, - graph_dir: PathBuf, - ) -> Result { - Ok(PyDiskGraph( - self.0.merge_by_sorted_gids(&other.0, graph_dir)?, - )) - } - - fn __repr__(&self) -> String { - StructReprBuilder::new("DiskGraph") - .add_field("number_of_nodes", self.0.inner.num_nodes()) - .add_field( - "number_of_temporal_edges", - self.0.inner.count_temporal_edges(), - ) - .add_field("earliest_time", self.0.inner.earliest()) - .add_field("latest_time", self.0.inner.latest()) - .finish() - } -} diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index e606d393fc..4040963970 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -139,9 +139,7 @@ impl PyGraphEncoder { } fn __call__(&self, bytes: Vec) -> Result { - let path_for_decoded_graph: Option<&std::path::Path> = None; - - MaterializedGraph::decode_from_bytes(&bytes, path_for_decoded_graph) + MaterializedGraph::decode_from_bytes(&bytes) } fn __setstate__(&self) {} fn __getstate__(&self) {} @@ -152,27 +150,27 @@ impl PyGraphEncoder { impl PyGraph { #[new] #[pyo3(signature = (path = None))] - pub fn py_new(path: Option) -> (Self, PyGraphView) { + pub fn py_new(path: Option) -> Result<(Self, PyGraphView), GraphError> { let graph = match path { None => Graph::new(), - Some(path) => Graph::new_at_path(path), + Some(path) => Graph::new_at_path(&path)?, }; - ( + Ok(( Self { graph: graph.clone(), }, PyGraphView::from(graph), - ) + )) } #[staticmethod] - pub fn load(path: PathBuf) -> Graph { - Graph::load_from_path(path) + pub fn load(path: PathBuf) -> Result { + Graph::load_from_path(&path) } - fn __reduce__(&self) -> (PyGraphEncoder, (Vec,)) { - let state = self.graph.encode_to_bytes(); - (PyGraphEncoder, (state,)) + fn __reduce__(&self) -> Result<(PyGraphEncoder, (Vec,)), GraphError> { + let state = self.graph.encode_to_bytes()?; + Ok((PyGraphEncoder, (state,))) } /// Persist graph to parquet files @@ -627,12 +625,12 @@ impl PyGraph { /// df (DataFrame): The Pandas DataFrame containing the nodes. /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -640,19 +638,20 @@ impl PyGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (df, time, id, secondary_index = None, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None) + signature = (df, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None, secondary_index = None) )] fn load_nodes_from_pandas<'py>( &self, df: &Bound<'py, PyAny>, time: &str, id: &str, - secondary_index: Option<&str>, + node_type: Option<&str>, node_type_col: Option<&str>, properties: Option>, metadata: Option>, shared_metadata: Option>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -676,12 +675,12 @@ impl PyGraph { /// parquet_path (str): Parquet file or directory of Parquet files containing the nodes /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -689,19 +688,19 @@ impl PyGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (parquet_path, time, id, secondary_index = None, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None) + signature = (parquet_path, time, id, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None, secondary_index = None) )] fn load_nodes_from_parquet( &self, parquet_path: PathBuf, time: &str, id: &str, - secondary_index: Option<&str>, node_type: Option<&str>, node_type_col: Option<&str>, properties: Option>, metadata: Option>, shared_metadata: Option>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -727,12 +726,12 @@ impl PyGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -740,7 +739,7 @@ impl PyGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (df, time, src, dst, secondary_index = None, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, secondary_index = None) )] fn load_edges_from_pandas( &self, @@ -748,12 +747,12 @@ impl PyGraph { time: &str, src: &str, dst: &str, - secondary_index: Option<&str>, properties: Option>, metadata: Option>, shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -779,12 +778,12 @@ impl PyGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// secondary_index (str, optional): The column name for the secondary index. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -792,7 +791,7 @@ impl PyGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (parquet_path, time, src, dst, secondary_index = None, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + signature = (parquet_path, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, secondary_index = None) )] fn load_edges_from_parquet( &self, @@ -800,12 +799,12 @@ impl PyGraph { time: &str, src: &str, dst: &str, - secondary_index: Option<&str>, properties: Option>, metadata: Option>, shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index 67cc9ad62c..966aba49a6 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -101,27 +101,27 @@ impl PyPersistentGraph { impl PyPersistentGraph { #[new] #[pyo3(signature = (path = None))] - pub fn py_new(path: Option) -> (Self, PyGraphView) { + pub fn py_new(path: Option) -> Result<(Self, PyGraphView), GraphError> { let graph = match path { - Some(path) => PersistentGraph::new_at_path(path), + Some(path) => PersistentGraph::new_at_path(&path)?, None => PersistentGraph::new(), }; - ( + Ok(( Self { graph: graph.clone(), }, PyGraphView::from(graph), - ) + )) } #[staticmethod] - pub fn load(path: PathBuf) -> PersistentGraph { - PersistentGraph::load_from_path(path) + pub fn load(path: PathBuf) -> Result { + PersistentGraph::load_from_path(&path) } - fn __reduce__(&self) -> (PyGraphEncoder, (Vec,)) { - let state = self.graph.encode_to_bytes(); - (PyGraphEncoder, (state,)) + fn __reduce__(&self) -> Result<(PyGraphEncoder, (Vec,)), GraphError> { + let state = self.graph.encode_to_bytes()?; + Ok((PyGraphEncoder, (state,))) } /// Adds a new node with the given id and properties to the graph. @@ -575,31 +575,32 @@ impl PyPersistentGraph { /// df (DataFrame): The Pandas DataFrame containing the nodes. /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// secondary_index (str, optional): The column name for the secondary index. /// NOTE: All values in this column must be unique. Defaults to None. /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// secondary_index (str, optional): The column name for the secondary index. /// /// Returns: /// None: This function does not return a value, if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. - #[pyo3(signature = (df, time, id, secondary_index = None, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None))] + #[pyo3(signature = (df, time, id, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None, secondary_index = None))] fn load_nodes_from_pandas( &self, df: &Bound, time: &str, id: &str, - secondary_index: Option<&str>, + node_type: Option<&str>, node_type_col: Option<&str>, properties: Option>, metadata: Option>, shared_metadata: Option>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -623,31 +624,32 @@ impl PyPersistentGraph { /// parquet_path (str): Parquet file or directory of Parquet files containing the nodes /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// secondary_index (str, optional): The column name for the secondary index. /// NOTE: All values in this column must be unique. Defaults to None. /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// secondary_index (str, optional): The column name for the secondary index. /// /// Returns: /// None: This function does not return a value, if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. - #[pyo3(signature = (parquet_path, time, id, secondary_index = None, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None))] + #[pyo3(signature = (parquet_path, time, id, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None, secondary_index = None))] fn load_nodes_from_parquet( &self, parquet_path: PathBuf, time: &str, id: &str, - secondary_index: Option<&str>, + node_type: Option<&str>, node_type_col: Option<&str>, properties: Option>, metadata: Option>, shared_metadata: Option>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -673,32 +675,33 @@ impl PyPersistentGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// secondary_index (str, optional): The column name for the secondary index. /// NOTE: All values in this column must be unique. Defaults to None. /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// secondary_index (str, optional): The column name for the secondary index. /// /// Returns: /// None: This function does not return a value, if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. - #[pyo3(signature = (df, time, src, dst, secondary_index = None, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None))] + #[pyo3(signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, secondary_index = None))] fn load_edges_from_pandas( &self, df: &Bound, time: &str, src: &str, dst: &str, - secondary_index: Option<&str>, + properties: Option>, metadata: Option>, shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -724,32 +727,33 @@ impl PyPersistentGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// secondary_index (str, optional): The column name for the secondary index. /// NOTE: All values in this column must be unique. Defaults to None. /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// secondary_index (str, optional): The column name for the secondary index. /// /// Returns: /// None: This function does not return a value, if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. - #[pyo3(signature = (parquet_path, time, src, dst, secondary_index = None, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None))] + #[pyo3(signature = (parquet_path, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, secondary_index = None))] fn load_edges_from_parquet( &self, parquet_path: PathBuf, time: &str, src: &str, dst: &str, - secondary_index: Option<&str>, + properties: Option>, metadata: Option>, shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -776,26 +780,27 @@ impl PyPersistentGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// secondary_index (str, optional): The column name for the secondary index. /// NOTE: All values in this column must be unique. Defaults to None. /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// secondary_index (str, optional): The column name for the secondary index. /// /// Returns: /// None: This function does not return a value, if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. - #[pyo3(signature = (df, time, src, dst, secondary_index = None, layer = None, layer_col = None))] + #[pyo3(signature = (df, time, src, dst, layer = None, layer_col = None, secondary_index = None))] fn load_edge_deletions_from_pandas( &self, df: &Bound, time: &str, src: &str, dst: &str, - secondary_index: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { load_edge_deletions_from_pandas( &self.graph, @@ -816,26 +821,27 @@ impl PyPersistentGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// secondary_index (str, optional): The column name for the secondary index. /// NOTE: All values in this column must be unique. Defaults to None. /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// secondary_index (str, optional): The column name for the secondary index. /// /// Returns: /// None: This function does not return a value, if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. - #[pyo3(signature = (parquet_path, time, src, dst, secondary_index = None, layer = None, layer_col = None))] + #[pyo3(signature = (parquet_path, time, src, dst, layer = None, layer_col = None, secondary_index = None))] fn load_edge_deletions_from_parquet( &self, parquet_path: PathBuf, time: &str, src: &str, dst: &str, - secondary_index: Option<&str>, + layer: Option<&str>, layer_col: Option<&str>, + secondary_index: Option<&str>, ) -> Result<(), GraphError> { load_edge_deletions_from_parquet( &self.graph, diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index 6a8b6abfca..a973bbd8fa 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -3,7 +3,6 @@ use crate::{ errors::GraphError, io::arrow::{dataframe::*, df_loaders::*}, prelude::{AdditionOps, PropertyAdditionOps}, - python::graph::io::*, }; use arrow::array::ArrayRef; use pyo3::{ diff --git a/raphtory/src/python/graph/views/graph_view.rs b/raphtory/src/python/graph/views/graph_view.rs index e0113c8f6c..bdf7b48fa4 100644 --- a/raphtory/src/python/graph/views/graph_view.rs +++ b/raphtory/src/python/graph/views/graph_view.rs @@ -39,7 +39,6 @@ use crate::{ types::repr::{Repr, StructReprBuilder}, utils::PyNodeRef, }, - serialise::GraphFolder, }; use chrono::prelude::*; use pyo3::prelude::*; @@ -465,25 +464,16 @@ impl PyGraphView { /// Returns a 'materialized' clone of the graph view - i.e. a new graph with a /// copy of the data seen within the view instead of just a mask over the original graph. - /// If a path is provided, the new graph will be stored at that path - /// (assuming the storage feature is enabled). /// /// Returns: /// GraphView: Returns a graph clone - #[pyo3(signature = (path = None))] - fn materialize(&self, path: Option) -> Result { - self.graph.materialize_at(path.as_deref()) + fn materialize(&self) -> Result { + self.graph.materialize() } /// Materializes the graph view into a graphql compatible folder. - fn materialize_to_graph_folder(&self, path: PathBuf) -> Result { - let folder: GraphFolder = path.into(); - folder.reserve()?; - - let graph = self.graph.materialize_at(Some(&folder.get_graph_path()))?; - folder.write_metadata(&graph)?; - - Ok(graph) + fn materialize_at(&self, path: PathBuf) -> Result { + self.graph.materialize_at(&path) } /// Displays the graph diff --git a/raphtory/src/python/types/macros/trait_impl/serialise.rs b/raphtory/src/python/types/macros/trait_impl/serialise.rs index 3ea61824be..bc0bfab9d3 100644 --- a/raphtory/src/python/types/macros/trait_impl/serialise.rs +++ b/raphtory/src/python/types/macros/trait_impl/serialise.rs @@ -18,12 +18,7 @@ macro_rules! impl_serialise { #[doc = concat!(" ", $name, ":")] #[staticmethod] fn load_from_file(path: PathBuf) -> Result<$base_type, GraphError> { - let path_for_decoded_graph = None; - - <$base_type as $crate::serialise::StableDecode>::decode( - path, - path_for_decoded_graph, - ) + <$base_type as $crate::serialise::StableDecode>::decode(&path) } #[doc = concat!(" Saves the ", $name, " to the given path in parquet format.")] @@ -57,21 +52,19 @@ macro_rules! impl_serialise { #[doc = concat!(" ", $name, ":")] #[staticmethod] fn deserialise(bytes: &[u8]) -> Result<$base_type, GraphError> { - let path_for_decoded_graph = None; - - <$base_type as $crate::serialise::StableDecode>::decode_from_bytes( - bytes, - path_for_decoded_graph, - ) + <$base_type as $crate::serialise::StableDecode>::decode_from_bytes(bytes) } #[doc = concat!(" Serialise ", $name, " to bytes.")] /// /// Returns: /// bytes: - fn serialise<'py>(&self, py: Python<'py>) -> Bound<'py, pyo3::types::PyBytes> { - let bytes = $crate::serialise::StableEncode::encode_to_bytes(&self.$field); - pyo3::types::PyBytes::new(py, &bytes) + fn serialise<'py>( + &self, + py: Python<'py>, + ) -> Result, GraphError> { + let bytes = $crate::serialise::StableEncode::encode_to_bytes(&self.$field)?; + Ok(pyo3::types::PyBytes::new(py, &bytes)) } } }; diff --git a/raphtory/src/python/utils/export.rs b/raphtory/src/python/utils/export.rs index b84ae7caa9..95ea8ea119 100644 --- a/raphtory/src/python/utils/export.rs +++ b/raphtory/src/python/utils/export.rs @@ -8,10 +8,7 @@ use raphtory_api::core::{ storage::{arc_str::ArcStr, timeindex::AsTime}, }; use rayon::{iter::IntoParallelRefIterator, prelude::*}; -use std::{ - collections::{HashMap, HashSet}, - sync::Arc, -}; +use std::collections::{HashMap, HashSet}; pub(crate) fn extract_properties

( include_property_history: bool, diff --git a/raphtory/src/search/graph_index.rs b/raphtory/src/search/graph_index.rs index 6bedbeace9..e716411392 100644 --- a/raphtory/src/search/graph_index.rs +++ b/raphtory/src/search/graph_index.rs @@ -7,7 +7,7 @@ use crate::{ errors::GraphError, prelude::*, search::{edge_index::EdgeIndex, node_index::NodeIndex, searcher::Searcher}, - serialise::{GraphFolder, INDEX_PATH}, + serialise::{GraphFolder, GraphPaths, InnerGraphFolder, INDEX_PATH}, }; use parking_lot::RwLock; use raphtory_api::core::storage::dict_mapper::MaybeNew; @@ -17,6 +17,7 @@ use std::{ fmt::Debug, fs, fs::File, + io::{Seek, Write}, ops::Deref, path::{Path, PathBuf}, sync::Arc, @@ -24,7 +25,10 @@ use std::{ use tempfile::TempDir; use uuid::Uuid; use walkdir::WalkDir; -use zip::{write::FileOptions, ZipArchive, ZipWriter}; +use zip::{ + write::{FileOptions, SimpleFileOptions}, + ZipArchive, ZipWriter, +}; #[derive(Clone)] pub struct Index { @@ -43,7 +47,7 @@ impl Index { #[derive(Clone)] pub struct ImmutableGraphIndex { pub(crate) index: Index, - pub(crate) path: Arc, + pub(crate) path: Arc, pub index_spec: Arc, } @@ -189,7 +193,7 @@ impl GraphIndex { let temp_dir = match cached_graph_path { // Creates index in a temp dir within cache graph dir. // The intention is to avoid creating index in a tmp dir that could be on another file system. - Some(path) => TempDir::new_in(path.get_base_path())?, + Some(path) => TempDir::new_in(path.root())?, None => TempDir::new()?, }; @@ -221,7 +225,7 @@ impl GraphIndex { pub fn load_from_path(path: &GraphFolder) -> Result { if path.is_zip() { let index_path = TempDir::new()?; - unzip_index(&path.get_base_path(), index_path.path())?; + unzip_index(&path.root(), index_path.path())?; let (index, index_spec) = load_indexes(index_path.path())?; @@ -231,93 +235,55 @@ impl GraphIndex { index_spec: Arc::new(RwLock::new(index_spec)), })) } else { - let index_path = path.get_index_path(); + let index_path = path.index_path()?; let (index, index_spec) = load_indexes(index_path.as_path())?; Ok(GraphIndex::Immutable(ImmutableGraphIndex { index, - path: Arc::new(path.clone()), + path: Arc::new(path.data_path()?), index_spec: Arc::new(index_spec), })) } } - pub(crate) fn persist_to_disk(&self, path: &GraphFolder) -> Result<(), GraphError> { + pub(crate) fn persist_to_disk(&self, path: &impl GraphPaths) -> Result<(), GraphError> { let source_path = self.path().ok_or(GraphError::CannotPersistRamIndex)?; - let path = path.get_index_path(); - let path = path.as_path(); - - let temp_path = &path.with_extension(format!("tmp-{}", Uuid::new_v4())); - - copy_dir_recursive(&source_path, temp_path)?; - - // Always overwrite the existing graph index when persisting, since the in-memory - // working index may have newer updates. The persisted index is decoupled from the - // active one, and changes remain in memory unless explicitly saved. - // This behavior mirrors how the in-memory graph works — updates are not persisted - // unless manually saved, except when using the cached view (see db/graph/views/cached_view). - // This however is reached only when write_updates, otherwise graph is not allowed to be written to - // the existing location anyway. See GraphError::NonEmptyGraphFolder. - if path.exists() { - fs::remove_dir_all(path) - .map_err(|_e| GraphError::FailedToRemoveExistingGraphIndex(path.to_path_buf()))?; + let path = path.index_path()?; + if source_path != path { + copy_dir_recursive(&source_path, &path)?; } - - fs::rename(temp_path, path).map_err(|e| { - GraphError::IOErrorMsg(format!("Failed to rename temp index folder: {}", e)) - })?; - Ok(()) } - pub(crate) fn persist_to_disk_zip(&self, path: &GraphFolder) -> Result<(), GraphError> { - let file = File::options() - .read(true) - .write(true) - .open(path.get_base_path())?; - let mut zip = ZipWriter::new_append(file)?; - + pub(crate) fn persist_to_disk_zip( + &self, + writer: &mut ZipWriter, + prefix: &str, + ) -> Result<(), GraphError> { let source_path = self.path().ok_or(GraphError::CannotPersistRamIndex)?; - for entry in WalkDir::new(&source_path) .into_iter() .filter_map(Result::ok) .filter(|e| e.path().is_file()) { - let rel_path = entry - .path() - .strip_prefix(&source_path) - .map_err(|e| GraphError::IOErrorMsg(format!("Failed to strip path: {}", e)))?; - - let zip_entry_name = PathBuf::from(INDEX_PATH) - .join(rel_path) - .to_string_lossy() - .into_owned(); - zip.start_file::<_, ()>(zip_entry_name, FileOptions::default()) - .map_err(|e| { - GraphError::IOErrorMsg(format!("Failed to start zip file entry: {}", e)) - })?; - - let mut f = File::open(entry.path()) - .map_err(|e| GraphError::IOErrorMsg(format!("Failed to open index file: {}", e)))?; + let rel_path = entry.path().strip_prefix(&source_path)?; - std::io::copy(&mut f, &mut zip).map_err(|e| { - GraphError::IOErrorMsg(format!("Failed to write zip content: {}", e)) - })?; - } + let zip_entry_name = Path::new(prefix).join(rel_path); + writer.start_file_from_path(zip_entry_name, SimpleFileOptions::default())?; - zip.finish() - .map_err(|e| GraphError::IOErrorMsg(format!("Failed to finalize zip: {}", e)))?; + let mut f = File::open(entry.path())?; + std::io::copy(&mut f, writer)?; + } Ok(()) } pub fn make_mutable_if_needed(&mut self) -> Result<(), GraphError> { if let GraphIndex::Immutable(immutable) = self { - let temp_dir = TempDir::new_in(&immutable.path.get_base_path())?; + let temp_dir = TempDir::new_in(immutable.path.as_ref())?; let temp_path = temp_dir.path(); - copy_dir_recursive(&immutable.path.get_index_path(), temp_path)?; + copy_dir_recursive(&immutable.path.index_path(), temp_path)?; let node_index = NodeIndex::load_from_path(&temp_path.join("nodes"))?; let edge_index = EdgeIndex::load_from_path(&temp_path.join("edges"))?; @@ -350,7 +316,7 @@ impl GraphIndex { pub fn path(&self) -> Option { match self { - GraphIndex::Immutable(i) => Some(i.path.get_index_path()), + GraphIndex::Immutable(i) => Some(i.path.index_path()), GraphIndex::Mutable(m) => m.path.as_ref().map(|p| p.path().to_path_buf()), GraphIndex::Empty => None, } diff --git a/raphtory/src/search/mod.rs b/raphtory/src/search/mod.rs index 69d810e807..735a79e657 100644 --- a/raphtory/src/search/mod.rs +++ b/raphtory/src/search/mod.rs @@ -168,7 +168,7 @@ mod test_index { mod test_index_io { use crate::{ db::{ - api::view::{internal::InternalStorageOps, ResolvedIndexSpec, StaticGraphViewOps}, + api::view::{internal::InternalStorageOps, ResolvedIndexSpec}, graph::views::filter::model::{AsNodeFilter, NodeFilter, NodeFilterBuilderOps}, }, errors::GraphError, @@ -221,11 +221,11 @@ mod test_index { let filter = NodeFilter::name().eq("Alice"); assert_search_results(&graph, &filter, vec!["Alice"]); - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let is_indexed = graph.get_storage().unwrap().is_indexed(); assert!(!is_indexed); } @@ -241,12 +241,12 @@ mod test_index { assert_search_results(&graph, &filter, vec!["Alice"]); // Persisted both graph and index - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); // Loaded index that was persisted - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let is_indexed = graph.get_storage().unwrap().is_indexed(); assert!(is_indexed); @@ -257,7 +257,7 @@ mod test_index { fn test_encoding_graph_twice_to_same_storage_path_fails() { let graph = init_graph(); graph.create_index().unwrap(); - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); let result = graph.encode(path); @@ -282,7 +282,7 @@ mod test_index { assert_search_results(&graph, &filter1, vec!["Alice"]); // Persisted both graph and index - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); @@ -299,7 +299,7 @@ mod test_index { assert_search_results(&graph, &filter2, vec!["Tommy"]); // Loaded index that was persisted - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let is_indexed = graph.get_storage().unwrap().is_indexed(); assert!(is_indexed); assert_search_results(&graph, &filter1, vec!["Alice"]); @@ -319,13 +319,13 @@ mod test_index { assert_search_results(&graph, &filter2, vec!["Tommy"]); // Should persist the updated graph and index - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); // Should load the updated graph and index let storage_path = path.parent().unwrap().to_path_buf(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let is_indexed = graph.get_storage().unwrap().is_indexed(); assert!(is_indexed); assert_search_results(&graph, &filter1, vec!["Alice"]); @@ -336,13 +336,13 @@ mod test_index { fn test_zip_encode_decode_index() { let graph = init_graph(); graph.create_index().unwrap(); - let tmp_dir = tempfile::TempDir::new().unwrap(); + let tmp_dir = TempDir::new().unwrap(); let zip_path = tmp_dir.path().join("graph.zip"); let folder = GraphFolder::new_as_zip(zip_path); graph.encode(&folder).unwrap(); let storage_path = tmp_dir.path().to_path_buf(); - let graph = Graph::decode(folder, None).unwrap(); + let graph = Graph::decode(&folder).unwrap(); let node = graph.node("Alice").unwrap(); let node_type = node.node_type(); assert_eq!(node_type, Some(ArcStr::from("fire_nation"))); @@ -355,7 +355,7 @@ mod test_index { fn test_encoding_graph_twice_to_same_storage_path_fails_zip() { let graph = init_graph(); graph.create_index().unwrap(); - let tmp_dir = tempfile::TempDir::new().unwrap(); + let tmp_dir = TempDir::new().unwrap(); let zip_path = tmp_dir.path().join("graph.zip"); let folder = GraphFolder::new_as_zip(&zip_path); graph.encode(&folder).unwrap(); @@ -381,19 +381,19 @@ mod test_index { let graph = init_graph(); graph.create_index().unwrap(); - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); // This gives us immutable index - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); // This tests that we are able to persist the immutable index - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let filter1 = NodeFilter::name().eq("Alice"); assert_search_results(&graph, &filter1, vec!["Alice"]); } @@ -403,12 +403,12 @@ mod test_index { let graph = init_graph(); graph.create_index().unwrap(); - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); // This gives us immutable index - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); // This converts immutable index to mutable index graph @@ -416,11 +416,11 @@ mod test_index { .unwrap(); // This tests that we are able to persist the mutable index - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let filter = NodeFilter::name().eq("Ozai"); assert_search_results(&graph, &filter, vec!["Ozai"]); } @@ -429,12 +429,12 @@ mod test_index { fn test_loading_zip_index_creates_mutable_index() { let graph = init_graph(); graph.create_index().unwrap(); - let tmp_dir = tempfile::TempDir::new().unwrap(); + let tmp_dir = TempDir::new().unwrap(); let zip_path = tmp_dir.path().join("graph.zip"); let folder = GraphFolder::new_as_zip(&zip_path); graph.encode(&folder).unwrap(); - let graph = Graph::decode(folder, None).unwrap(); + let graph = Graph::decode(&folder).unwrap(); let immutable = graph .get_storage() .unwrap() @@ -448,11 +448,11 @@ mod test_index { fn test_loading_index_creates_immutable_index() { let graph = init_graph(); graph.create_index().unwrap(); - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let immutable = graph .get_storage() .unwrap() @@ -472,11 +472,11 @@ mod test_index { let filter = NodeFilter::name().eq("Alice"); assert_search_results(&graph, &filter, vec!["Alice"]); - let binding = tempfile::TempDir::new().unwrap(); + let binding = TempDir::new().unwrap(); let path = binding.path(); graph.encode(path).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); let is_indexed = graph.get_storage().unwrap().is_indexed(); assert!(!is_indexed); @@ -543,7 +543,7 @@ mod test_index { let tmp_dir = TempDir::new().unwrap(); let path = tmp_dir.path().to_path_buf(); graph.encode(&path).unwrap(); - let graph = Graph::decode(&path, None).unwrap(); + let graph = Graph::decode(&path).unwrap(); let spec = graph.get_index_spec().unwrap().props(&graph); assert_eq!( @@ -833,8 +833,8 @@ mod test_index { let tmp_graph_dir = tempfile::tempdir().unwrap(); let path = tmp_graph_dir.path().to_path_buf(); - graph.encode(path.clone()).unwrap(); - let graph = Graph::decode(path.clone(), None).unwrap(); + graph.encode(&path).unwrap(); + let graph = Graph::decode(&path).unwrap(); assert_eq!(index_spec, graph.get_index_spec().unwrap()); let results = search_nodes(&graph, PropertyFilter::metadata("y").eq(false)); @@ -854,7 +854,7 @@ mod test_index { let tmp_graph_dir = tempfile::tempdir().unwrap(); let path = tmp_graph_dir.path().to_path_buf(); graph.encode(path.clone()).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(&path).unwrap(); assert_eq!(index_spec, graph.get_index_spec().unwrap()); let results = search_nodes(&graph, PropertyFilter::metadata("y").eq(false)); @@ -883,7 +883,7 @@ mod test_index { let path = tmp_graph_dir.path().to_path_buf(); graph.encode(path.clone()).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(&path).unwrap(); let index_spec2 = graph.get_index_spec().unwrap(); assert_eq!(index_spec, index_spec2); @@ -906,9 +906,9 @@ mod test_index { let binding = tempfile::TempDir::new().unwrap(); let path = binding.path(); let folder = GraphFolder::new_as_zip(path); - graph.encode(folder.root_folder).unwrap(); + graph.encode(folder).unwrap(); - let graph = Graph::decode(path, None).unwrap(); + let graph = Graph::decode(path).unwrap(); assert_eq!(index_spec, graph.get_index_spec().unwrap()); } diff --git a/raphtory/src/search/searcher.rs b/raphtory/src/search/searcher.rs index 20005ce97b..67f38f6ea5 100644 --- a/raphtory/src/search/searcher.rs +++ b/raphtory/src/search/searcher.rs @@ -265,7 +265,7 @@ mod search_tests { fn load_jira_graph() -> Result<(), GraphError> { global_info_logger(); - let graph = Graph::decode("/tmp/graphs/jira", None).expect("failed to load graph"); + let graph = Graph::decode("/tmp/graphs/jira").expect("failed to load graph"); assert!(graph.count_nodes() > 0); let now = SystemTime::now(); diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index 1d1c2913a4..d6fd08a534 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -1,47 +1,298 @@ use crate::{ - db::api::view::MaterializedGraph, - errors::GraphError, - prelude::{Graph, GraphViewOps, PropertiesOps}, - serialise::{metadata::GraphMetadata, serialise::StableDecode}, + db::api::view::internal::GraphView, errors::GraphError, prelude::ParquetEncoder, + serialise::metadata::GraphMetadata, }; +use raphtory_api::core::input::input_node::parse_u64_strict; +use serde::{Deserialize, Serialize}; use std::{ - fs::{self, File, OpenOptions}, - io::{self, BufReader, ErrorKind, Read, Seek, Write}, + fs::{self, File}, + io::{self, ErrorKind, Read, Seek, Write}, path::{Path, PathBuf}, }; -use tracing::info; use walkdir::WalkDir; use zip::{write::FileOptions, ZipArchive, ZipWriter}; /// Stores graph data pub const GRAPH_PATH: &str = "graph"; +pub const DEFAULT_GRAPH_PATH: &str = "graph0"; + +pub const DATA_PATH: &str = "data"; +pub const DEFAULT_DATA_PATH: &str = "data0"; /// Stores graph metadata pub const META_PATH: &str = ".raph"; +/// Temporary metadata for atomic replacement +pub const DIRTY_PATH: &str = ".dirty"; + /// Directory that stores search indexes pub const INDEX_PATH: &str = "index"; /// Directory that stores vector embeddings of the graph pub const VECTORS_PATH: &str = "vectors"; +pub(crate) fn valid_relative_graph_path( + relative_path: &str, + prefix: &str, +) -> Result<(), GraphError> { + relative_path + .strip_prefix(prefix) // should have the prefix + .and_then(|id| parse_u64_strict(id)) // the remainder should be the id + .ok_or_else(|| GraphError::InvalidRelativePath(relative_path.to_string()))?; + Ok(()) +} + +fn read_path_from_file(mut file: impl Read, prefix: &str) -> Result { + let mut value = String::new(); + file.read_to_string(&mut value)?; + let path: RelativePath = serde_json::from_str(&value)?; + valid_relative_graph_path(&path.path, prefix)?; + Ok(path.path) +} + +pub fn read_path_pointer( + base_path: &Path, + file_name: &str, + prefix: &str, +) -> Result, GraphError> { + let file = match File::open(base_path.join(file_name)) { + Ok(file) => file, + Err(error) => { + return match error.kind() { + ErrorKind::NotFound => Ok(None), + _ => Err(error.into()), + } + } + }; + let path = read_path_from_file(file, prefix)?; + Ok(Some(path)) +} + +pub fn read_data_path(base_path: &Path, prefix: &str) -> Result, GraphError> { + read_path_pointer(base_path, META_PATH, prefix) +} + +pub fn read_dirty_path(base_path: &Path, prefix: &str) -> Result, GraphError> { + read_path_pointer(base_path, DIRTY_PATH, prefix) +} + +pub fn make_data_path(base_path: &Path, prefix: &str) -> Result { + let mut id = read_data_path(base_path, prefix)? + .and_then(|path| { + path.strip_prefix(prefix) + .and_then(|id| id.parse::().ok()) + }) + .map_or(0, |id| id + 1); + + let mut path = format!("{prefix}{id}"); + while base_path.join(&path).exists() { + id += 1; + path = format!("{prefix}{id}"); + } + Ok(path) +} + +pub fn read_or_default_data_path(base_path: &Path, prefix: &str) -> Result { + Ok(read_data_path(base_path, prefix)?.unwrap_or_else(|| prefix.to_owned() + "0")) +} + +pub fn get_zip_data_path(zip: &mut ZipArchive) -> Result { + let file = zip.by_name(META_PATH)?; + Ok(read_path_from_file(file, DATA_PATH)?) +} + +pub fn get_zip_graph_path(zip: &mut ZipArchive) -> Result { + let mut path = get_zip_data_path(zip)?; + let graph_path = get_zip_graph_path_name(zip, path.clone())?; + path.push('/'); + path.push_str(&graph_path); + Ok(path) +} + +pub fn get_zip_graph_path_name( + zip: &mut ZipArchive, + mut data_path: String, +) -> Result { + data_path.push('/'); + data_path.push_str(META_PATH); + let graph_path = read_path_from_file(zip.by_name(&data_path)?, GRAPH_PATH)?; + Ok(graph_path) +} + +pub fn get_zip_meta_path(zip: &mut ZipArchive) -> Result { + let mut path = get_zip_data_path(zip)?; + path.push('/'); + path.push_str(META_PATH); + Ok(path) +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct RelativePath { + pub path: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Metadata { + pub path: String, + pub meta: GraphMetadata, +} + +pub trait GraphPaths { + fn root(&self) -> &Path; + + fn root_meta_path(&self) -> PathBuf { + self.root().join(META_PATH) + } + + fn data_path(&self) -> Result { + Ok(InnerGraphFolder { + path: self.root().join(self.relative_data_path()?), + }) + } + + fn vectors_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(VECTORS_PATH); + Ok(path) + } + + fn index_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(INDEX_PATH); + Ok(path) + } + + fn graph_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(self.relative_graph_path()?); + Ok(path) + } + + fn meta_path(&self) -> Result { + let mut path = self.data_path()?.path; + path.push(META_PATH); + Ok(path) + } + + fn is_zip(&self) -> bool { + self.root().is_file() + } + + fn read_zip(&self) -> Result, GraphError> { + if self.is_zip() { + let file = File::open(self.root())?; + let archive = ZipArchive::new(file)?; + Ok(archive) + } else { + Err(GraphError::NotAZip) + } + } + + fn relative_data_path(&self) -> Result { + let path = if self.is_zip() { + let mut zip = self.read_zip()?; + get_zip_data_path(&mut zip)? + } else { + read_or_default_data_path(self.root(), DATA_PATH)? + }; + Ok(path) + } + + fn relative_graph_path(&self) -> Result { + if self.is_zip() { + let mut zip = self.read_zip()?; + let data_path = get_zip_data_path(&mut zip)?; + get_zip_graph_path_name(&mut zip, data_path) + } else { + let data_path = self.data_path()?; + read_or_default_data_path(data_path.as_ref(), GRAPH_PATH) + } + } + + fn read_metadata(&self) -> Result { + let mut json = String::new(); + if self.is_zip() { + let mut zip = self.read_zip()?; + let path = get_zip_meta_path(&mut zip)?; + let mut zip_file = zip.by_name(&path)?; + zip_file.read_to_string(&mut json)?; + } else { + let mut file = File::open(self.meta_path()?)?; + file.read_to_string(&mut json)?; + } + let metadata: Metadata = serde_json::from_str(&json)?; + Ok(metadata.meta) + } + + fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { + let graph_path = self.relative_graph_path()?; + let metadata = GraphMetadata::from_graph(graph); + let meta = Metadata { + path: graph_path, + meta: metadata, + }; + let path = self.meta_path()?; + let file = File::create(&path)?; + Ok(serde_json::to_writer(file, &meta)?) + } + + /// Returns true if folder is occupied by a graph. + fn is_reserved(&self) -> bool { + self.meta_path().map_or(false, |path| path.exists()) + } + + /// Initialise the data folder and metadata pointer + fn init(&self) -> Result<(), GraphError> { + if self.root().is_dir() { + let non_empty = self.root().read_dir()?.next().is_some(); + if non_empty { + return Err(GraphError::NonEmptyGraphFolder(self.root().into())); + } + } else { + fs::create_dir(self.root())? + } + let meta_path = self.relative_data_path()?; + fs::create_dir(self.root().join(&meta_path))?; + fs::write( + self.root_meta_path(), + serde_json::to_string(&RelativePath { path: meta_path })?, + )?; + Ok(()) + } +} + +impl + ?Sized> GraphPaths for P { + fn root(&self) -> &Path { + self.as_ref() + } +} + /// A container for managing graph data. +/// /// Folder structure: /// /// GraphFolder -/// ├── graph/ # Graph data -/// ├── .raph # Metadata file -/// ├── index/ # Search indexes (optional) -/// └── vectors/ # Vector embeddings (optional) +/// ├── .raph # Metadata file (json: {path: "data_{id}"}) +/// └── data_{id}/ # Data folder (incremental id for atomic replacement) +/// ├── .raph # Metadata file (json: {path: "graph_{id}", meta: {}}) +/// ├── graph_{id}/ # Graph data +/// ├── index/ # Search indexes (optional) +/// └── vectors/ # Vector embeddings (optional) /// /// If `write_as_zip_format` is true, then the folder is compressed /// and stored as a zip file. #[derive(Clone, Debug, PartialOrd, PartialEq, Ord, Eq)] pub struct GraphFolder { - pub root_folder: PathBuf, + root_folder: PathBuf, pub(crate) write_as_zip_format: bool, } +impl GraphPaths for GraphFolder { + fn root(&self) -> &Path { + &self.root_folder + } +} + impl GraphFolder { pub fn new_as_zip(path: impl AsRef) -> Self { let folder: GraphFolder = path.into(); @@ -52,26 +303,68 @@ impl GraphFolder { } /// Reserve a folder, marking it as occupied by a graph. - /// Returns an error if `write_as_zip_format` is true or if the folder has data. - pub fn reserve(&self) -> Result<(), GraphError> { + /// Returns an error if the folder has data. + pub fn init_write(self) -> Result { if self.write_as_zip_format { - return Err(GraphError::IOErrorMsg( - "Cannot reserve a zip folder".to_string(), - )); + return Err(GraphError::ZippedGraphCannotBeSwapped); } - + let relative_data_path = self.relative_data_path()?; + let meta = serde_json::to_string(&RelativePath { + path: relative_data_path.clone(), + })?; self.ensure_clean_root_dir()?; + let metapath = self.root_folder.join(DIRTY_PATH); + let mut path_file = File::create_new(&metapath)?; + path_file.write_all(meta.as_bytes())?; + fs::create_dir_all(self.root_folder.join(relative_data_path))?; + Ok(WriteableGraphFolder { + path: self.root_folder, + }) + } - // Mark as occupied using empty metadata & graph data. - File::create_new(self.get_meta_path())?; - fs::create_dir_all(self.get_graph_path())?; + /// Prepare a graph folder for atomically swapping the data contents. + /// This returns an error if the folder is set to write as Zip. + /// + /// If a swap is already in progress (i.e., `.dirty` file exists) it is aborted and + /// the contents of the corresponding folder are deleted. + pub fn init_swap(self) -> Result { + if self.write_as_zip_format { + return Err(GraphError::ZippedGraphCannotBeSwapped); + } + let old_swap = match read_dirty_path(self.root(), DATA_PATH) { + Ok(path) => path, + Err(_) => { + fs::remove_file(self.root_folder.join(DIRTY_PATH))?; // dirty file is corrupted, clean it up + None + } + }; - Ok(()) - } + fs::create_dir_all(self.root())?; - /// Returns true if folder is occupied by a graph. - pub fn is_reserved(&self) -> bool { - self.get_meta_path().exists() + let swap_path = match old_swap { + Some(relative_path) => { + let swap_path = self.root_folder.join(relative_path); + if swap_path.exists() { + fs::remove_dir_all(&swap_path)?; + } + swap_path + } + None => { + let new_relative_data_path = make_data_path(self.root(), DATA_PATH)?; + let new_data_path = self.root_folder.join(&new_relative_data_path); + let meta = serde_json::to_string(&RelativePath { + path: new_relative_data_path, + })?; + let mut dirty_file = File::create_new(self.root_folder.join(DIRTY_PATH))?; + dirty_file.write_all(meta.as_bytes())?; + dirty_file.sync_all()?; + new_data_path + } + }; + fs::create_dir_all(swap_path)?; + Ok(WriteableGraphFolder { + path: self.root_folder, + }) } /// Clears the folder of any contents. @@ -87,117 +380,17 @@ impl GraphFolder { Ok(()) } - pub fn get_graph_path(&self) -> PathBuf { - self.root_folder.join(GRAPH_PATH) - } - - pub fn get_meta_path(&self) -> PathBuf { - self.root_folder.join(META_PATH) - } - - pub fn get_index_path(&self) -> PathBuf { - self.root_folder.join(INDEX_PATH) - } - - pub fn get_vectors_path(&self) -> PathBuf { - self.root_folder.join(VECTORS_PATH) - } - - pub fn get_base_path(&self) -> &Path { - &self.root_folder - } - - pub fn is_zip(&self) -> bool { - self.root_folder.is_file() - } - - pub fn read_metadata(&self) -> Result { - match self.try_read_metadata() { - Ok(data) => Ok(data), - Err(e) => { - match e.kind() { - // In the case that the file is not found or invalid, try creating it then re-reading - ErrorKind::NotFound | ErrorKind::InvalidData | ErrorKind::UnexpectedEof => { - info!( - "Metadata file does not exist or is invalid. Attempting to recreate..." - ); - - // Either decode a graph serialized using encode or load using underlying storage. - let graph = if self.is_zip() - || MaterializedGraph::is_decodable(self.get_graph_path()) - { - MaterializedGraph::decode(self, None)? - } else { - // We currently do not have a way of figuring out the graph type - // from storage, so for now default to an EventGraph. - let graph = Graph::load_from_path(self.get_graph_path()); - MaterializedGraph::EventGraph(graph) - }; - - self.write_metadata(&graph)?; - - info!("Metadata file recreated successfully"); - - Ok(self.try_read_metadata()?) - } - _ => Err(e.into()), - } - } - } - } - - pub fn try_read_metadata(&self) -> Result { + pub fn get_zip_graph_prefix(&self) -> Result { if self.is_zip() { - let file = File::open(&self.root_folder)?; - let mut archive = ZipArchive::new(file)?; - let zip_file = archive.by_name(META_PATH)?; - let reader = BufReader::new(zip_file); - let metadata = serde_json::from_reader(reader)?; - Ok(metadata) + let mut zip = self.read_zip()?; + Ok([get_zip_data_path(&mut zip)?, get_zip_graph_path(&mut zip)?].join("/")) } else { - let file = File::open(self.get_meta_path())?; - let reader = BufReader::new(file); - let metadata = serde_json::from_reader(reader)?; - Ok(metadata) + let data_path = read_or_default_data_path(self.root(), DATA_PATH)?; + let graph_path = read_or_default_data_path(&self.root().join(&data_path), GRAPH_PATH)?; + Ok([data_path, graph_path].join("/")) } } - pub fn write_metadata<'graph>( - &self, - graph: &impl GraphViewOps<'graph>, - ) -> Result<(), GraphError> { - let node_count = graph.count_nodes(); - let edge_count = graph.count_edges(); - let properties = graph.metadata(); - let metadata = GraphMetadata { - node_count, - edge_count, - metadata: properties.as_vec(), - graph_type: graph.graph_type(), - }; - - if self.write_as_zip_format { - let file = File::options() - .read(true) - .write(true) - .open(&self.get_base_path())?; - let mut zip = ZipWriter::new_append(file)?; - - zip.start_file::<_, ()>(META_PATH, FileOptions::default())?; - Ok(serde_json::to_writer(zip, &metadata)?) - } else { - let path = self.get_meta_path(); - let file = File::create(path.clone())?; - - Ok(serde_json::to_writer(file, &metadata)?) - } - } - - pub(crate) fn get_appendable_graph_file(&self) -> Result { - let path = self.get_graph_path(); - Ok(OpenOptions::new().append(true).open(path)?) - } - fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { if self.root_folder.exists() { let non_empty = self.root_folder.read_dir()?.next().is_some(); @@ -205,28 +398,24 @@ impl GraphFolder { return Err(GraphError::NonEmptyGraphFolder(self.root_folder.clone())); } } else { - fs::create_dir_all(&self.root_folder)? + fs::create_dir(&self.root_folder)? } Ok(()) } - fn is_disk_graph(&self) -> bool { - let path = self.get_graph_path(); - path.is_dir() + pub fn is_disk_graph(&self) -> Result { + let meta = self.read_metadata()?; + Ok(meta.is_diskgraph) } /// Creates a zip file from the folder. pub fn zip_from_folder(&self, mut writer: W) -> Result<(), GraphError> { - let mut buffer = Vec::new(); - if self.is_zip() { let mut reader = File::open(&self.root_folder)?; - reader.read_to_end(&mut buffer)?; - writer.write_all(&buffer)?; + io::copy(&mut reader, &mut writer)?; } else { let mut zip = ZipWriter::new(writer); - for entry in WalkDir::new(&self.root_folder) .into_iter() .filter_map(Result::ok) @@ -243,7 +432,7 @@ impl GraphFolder { let mut file = File::open(path)?; std::io::copy(&mut file, &mut zip)?; - } else if path.is_dir() { + } else if path.is_dir() && !zip_entry_name.is_empty() { // Add empty directories to the zip zip.add_directory::<_, ()>(zip_entry_name, FileOptions::default())?; } @@ -251,21 +440,158 @@ impl GraphFolder { zip.finish()?; } - Ok(()) } - /// Extracts a zip file to the folder. pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { - if self.write_as_zip_format { - return Err(GraphError::IOErrorMsg( - "Cannot unzip to a zip format folder".to_string(), - )); + self.ensure_clean_root_dir()?; + let mut archive = ZipArchive::new(reader)?; + archive.extract(self.root())?; + Ok(()) + } +} + +#[must_use] +#[derive(Debug, Clone, PartialOrd, PartialEq, Ord, Eq)] +pub struct WriteableGraphFolder { + path: PathBuf, +} + +impl GraphPaths for WriteableGraphFolder { + fn root(&self) -> &Path { + &self.path + } + + fn relative_data_path(&self) -> Result { + let path = read_dirty_path(self.root(), DATA_PATH)?.ok_or(GraphError::NoWriteInProgress)?; + Ok(path) + } + + fn relative_graph_path(&self) -> Result { + let path = read_or_default_data_path(&self.data_path()?.as_ref(), GRAPH_PATH)?; + Ok(path) + } + + fn init(&self) -> Result<(), GraphError> { + Ok(()) + } +} + +impl WriteableGraphFolder { + /// Finalise an in-progress write by atomically renaming the '.dirty' file to '.raph' + /// and cleaning up any old data if it exists. + /// + /// This operation returns an error if there is no write in progress. + pub fn finish(self) -> Result { + let old_data = read_data_path(self.root(), DATA_PATH)?; + fs::rename(self.root().join(DIRTY_PATH), self.root().join(META_PATH))?; + if let Some(old_data) = old_data { + let old_data_path = self.root().join(old_data); + if old_data_path.is_dir() { + fs::remove_dir_all(old_data_path)?; + } } + Ok(GraphFolder { + root_folder: self.path, + write_as_zip_format: false, + }) + } +} + +#[derive(Clone, Debug)] +pub struct InnerGraphFolder { + path: PathBuf, +} + +impl AsRef for InnerGraphFolder { + fn as_ref(&self) -> &Path { + &self.path + } +} + +impl InnerGraphFolder { + pub fn write_metadata(&self, graph: impl GraphView) -> Result<(), GraphError> { + let graph_path = self.relative_graph_path()?; + let metadata = GraphMetadata::from_graph(graph); + let meta = Metadata { + path: graph_path, + meta: metadata, + }; + let path = self.meta_path(); + let file = File::create(&path)?; + Ok(serde_json::to_writer(file, &meta)?) + } + pub fn read_metadata(&self) -> Result { + let mut json = String::new(); + let mut file = File::open(self.meta_path())?; + file.read_to_string(&mut json)?; + let metadata: Metadata = serde_json::from_str(&json)?; + Ok(metadata.meta) + } + + pub fn replace_graph(&self, graph: impl ParquetEncoder + GraphView) -> Result<(), GraphError> { + let data_path = self.as_ref(); + let old_relative_graph_path = self.relative_graph_path()?; + let old_graph_path = self.path.join(&old_relative_graph_path); + let meta = GraphMetadata::from_graph(&graph); + let new_relative_graph_path = make_data_path(&data_path, GRAPH_PATH)?; + graph.encode_parquet(&data_path.join(&new_relative_graph_path))?; + + let dirty_path = data_path.join(DIRTY_PATH); + fs::write( + &dirty_path, + &serde_json::to_vec(&Metadata { + path: new_relative_graph_path.clone(), + meta, + })?, + )?; + fs::rename(&dirty_path, data_path.join(META_PATH))?; + if new_relative_graph_path != old_relative_graph_path { + fs::remove_dir_all(old_graph_path)?; + } + Ok(()) + } + pub fn vectors_path(&self) -> PathBuf { + self.path.join(VECTORS_PATH) + } + + pub fn index_path(&self) -> PathBuf { + self.path.join(INDEX_PATH) + } + + pub fn meta_path(&self) -> PathBuf { + self.path.join(META_PATH) + } + + pub fn relative_graph_path(&self) -> Result { + let relative = + read_data_path(&self.path, GRAPH_PATH)?.unwrap_or_else(|| GRAPH_PATH.to_owned() + "0"); + Ok(relative) + } + + pub fn graph_path(&self) -> Result { + Ok(self.path.join(self.relative_graph_path()?)) + } + + fn ensure_clean_root_dir(&self) -> Result<(), GraphError> { + if self.as_ref().exists() { + let non_empty = self.as_ref().read_dir()?.next().is_some(); + if non_empty { + return Err(GraphError::NonEmptyGraphFolder(self.as_ref().to_path_buf())); + } + } else { + fs::create_dir_all(self)? + } + Ok(()) + } + + /// Extracts a zip file to the folder. + pub fn unzip_to_folder(&self, reader: R) -> Result<(), GraphError> { self.ensure_clean_root_dir()?; let mut zip = ZipArchive::new(reader)?; + let data_dir = get_zip_data_path(&mut zip)?; for i in 0..zip.len() { let mut file = zip.by_index(i)?; @@ -273,19 +599,19 @@ impl GraphFolder { Some(name) => name, None => continue, }; + if let Ok(inner_path) = zip_entry_name.strip_prefix(&data_dir) { + let out_path = self.as_ref().join(inner_path); + if file.is_dir() { + std::fs::create_dir_all(&out_path)?; + } else { + // Create any parent directories + if let Some(parent) = out_path.parent() { + std::fs::create_dir_all(parent)?; + } - let out_path = self.root_folder.join(zip_entry_name); - - if file.is_dir() { - std::fs::create_dir_all(&out_path)?; - } else { - // Create any parent directories - if let Some(parent) = out_path.parent() { - std::fs::create_dir_all(parent)?; + let mut out_file = std::fs::File::create(&out_path)?; + std::io::copy(&mut file, &mut out_file)?; } - - let mut out_file = std::fs::File::create(&out_path)?; - std::io::copy(&mut file, &mut out_file)?; } } @@ -315,102 +641,105 @@ mod tests { use crate::{ db::graph::graph::assert_graph_equal, prelude::{AdditionOps, Graph, Prop, StableEncode, NO_PROPS}, + serialise::serialise::StableDecode, }; - use raphtory_api::{core::utils::logging::global_info_logger, GraphType}; - - /// Verify that the metadata is re-created if it does not exist. - #[test] - fn test_read_metadata_from_noninitialized_zip() { - global_info_logger(); - - let graph = Graph::new(); - graph.add_node(0, 0, NO_PROPS, None).unwrap(); - - let tmp_dir = tempfile::TempDir::new().unwrap(); - let zip_path = tmp_dir.path().join("graph.zip"); - let folder = GraphFolder::new_as_zip(&zip_path); - graph.encode(&folder).unwrap(); - - // Remove the metadata file from the zip to simulate a noninitialized zip - remove_metadata_from_zip(&zip_path); - - // Should fail because the metadata file is not present - let err = folder.try_read_metadata(); - assert!(err.is_err()); - - // Should re-create the metadata file - let result = folder.read_metadata().unwrap(); - assert_eq!( - result, - GraphMetadata { - node_count: 1, - edge_count: 0, - metadata: vec![], - graph_type: GraphType::EventGraph, - } - ); - } - - /// Helper function to remove the metadata file from a zip - fn remove_metadata_from_zip(zip_path: &Path) { - let mut zip_file = std::fs::File::open(&zip_path).unwrap(); - let mut zip_archive = zip::ZipArchive::new(&mut zip_file).unwrap(); - let mut temp_zip = tempfile::NamedTempFile::new().unwrap(); - - // Scope for the zip writer - { - let mut zip_writer = zip::ZipWriter::new(&mut temp_zip); - - for i in 0..zip_archive.len() { - let mut file = zip_archive.by_index(i).unwrap(); - - // Copy all files except the metadata file - if file.name() != META_PATH { - zip_writer - .start_file::<_, ()>(file.name(), FileOptions::default()) - .unwrap(); - std::io::copy(&mut file, &mut zip_writer).unwrap(); - } - } - - zip_writer.finish().unwrap(); - } - - std::fs::copy(temp_zip.path(), &zip_path).unwrap(); - } - - /// Verify that the metadata is re-created if it does not exist. - #[test] - fn test_read_metadata_from_noninitialized_folder() { - global_info_logger(); - - let graph = Graph::new(); - graph.add_node(0, 0, NO_PROPS, None).unwrap(); - - let temp_folder = tempfile::TempDir::new().unwrap(); - let folder = GraphFolder::from(temp_folder.path()); - graph.encode(&folder).unwrap(); - - // Remove the metadata file - std::fs::remove_file(folder.get_meta_path()).unwrap(); - - // Should fail because the metadata file is not present - let err = folder.try_read_metadata(); - assert!(err.is_err()); - - // Should re-create the metadata file - let result = folder.read_metadata().unwrap(); - assert_eq!( - result, - GraphMetadata { - node_count: 1, - edge_count: 0, - metadata: vec![], - graph_type: GraphType::EventGraph, - } - ); - } + // /// Verify that the metadata is re-created if it does not exist. + // #[test] + // #[ignore = "Need to think about how to deal with reading old format"] + // fn test_read_metadata_from_noninitialized_zip() { + // global_info_logger(); + // + // let graph = Graph::new(); + // graph.add_node(0, 0, NO_PROPS, None).unwrap(); + // + // let tmp_dir = tempfile::TempDir::new().unwrap(); + // let zip_path = tmp_dir.path().join("graph.zip"); + // let folder = GraphFolder::new_as_zip(&zip_path); + // graph.encode(&folder).unwrap(); + // + // // Remove the metadata file from the zip to simulate a noninitialized zip + // remove_metadata_from_zip(&zip_path); + // + // // Should fail because the metadata file is not present + // let err = folder.try_read_metadata(); + // assert!(err.is_err()); + // + // // Should re-create the metadata file + // let result = folder.read_metadata().unwrap(); + // assert_eq!( + // result, + // GraphMetadata { + // node_count: 1, + // edge_count: 0, + // metadata: vec![], + // graph_type: GraphType::EventGraph, + // is_diskgraph: false + // } + // ); + // } + + // /// Helper function to remove the metadata file from a zip + // fn remove_metadata_from_zip(zip_path: &Path) { + // let mut zip_file = std::fs::File::open(&zip_path).unwrap(); + // let mut zip_archive = zip::ZipArchive::new(&mut zip_file).unwrap(); + // let mut temp_zip = tempfile::NamedTempFile::new().unwrap(); + // + // // Scope for the zip writer + // { + // let mut zip_writer = zip::ZipWriter::new(&mut temp_zip); + // + // for i in 0..zip_archive.len() { + // let mut file = zip_archive.by_index(i).unwrap(); + // + // // Copy all files except the metadata file + // if file.name() != META_PATH { + // zip_writer + // .start_file::<_, ()>(file.name(), FileOptions::default()) + // .unwrap(); + // std::io::copy(&mut file, &mut zip_writer).unwrap(); + // } + // } + // + // zip_writer.finish().unwrap(); + // } + // + // std::fs::copy(temp_zip.path(), &zip_path).unwrap(); + // } + + // /// Verify that the metadata is re-created if it does not exist. + // #[test] + // #[ignore = "Need to think about how to handle reading from old format"] + // fn test_read_metadata_from_noninitialized_folder() { + // global_info_logger(); + // + // let graph = Graph::new(); + // graph.add_node(0, 0, NO_PROPS, None).unwrap(); + // + // let temp_folder = tempfile::TempDir::new().unwrap(); + // let folder = GraphFolder::from(temp_folder.path()); + // graph.encode(&folder).unwrap(); + // + // // Remove the metadata file + // std::fs::remove_file(folder.get_meta_path()).unwrap(); + // + // // Should fail because the metadata file is not present + // let err = folder.try_read_metadata(); + // assert!(err.is_err()); + // + // // Should re-create the metadata file + // let result = folder.read_metadata().unwrap(); + // assert_eq!( + // result, + // GraphMetadata { + // node_count: 1, + // edge_count: 0, + // metadata: vec![], + // graph_type: GraphType::EventGraph, + // is_diskgraph: false + // } + // ); + // } #[test] fn test_zip_from_folder() { let graph = Graph::new(); @@ -423,8 +752,8 @@ mod tests { let initial_folder = GraphFolder::from(temp_folder.path().join("initial")); graph.encode(&initial_folder).unwrap(); - assert!(initial_folder.get_graph_path().exists()); - assert!(initial_folder.get_meta_path().exists()); + assert!(initial_folder.graph_path().unwrap().exists()); + assert!(initial_folder.meta_path().unwrap().exists()); // Create a zip file from the folder let output_zip_path = temp_folder.path().join("output.zip"); @@ -435,7 +764,7 @@ mod tests { // Verify the output zip contains the same graph let zip_folder = GraphFolder::new_as_zip(&output_zip_path); - let decoded_graph = Graph::decode(&zip_folder, None::<&std::path::Path>).unwrap(); + let decoded_graph = Graph::decode(&zip_folder).unwrap(); assert_graph_equal(&graph, &decoded_graph); } @@ -469,7 +798,7 @@ mod tests { // Verify the output zip contains the same graph let zip_folder = GraphFolder::new_as_zip(&output_zip_path); - let decoded_graph = Graph::decode(&zip_folder, None::<&std::path::Path>).unwrap(); + let decoded_graph = Graph::decode(&zip_folder).unwrap(); assert_graph_equal(&graph, &decoded_graph); } @@ -505,7 +834,7 @@ mod tests { let graph_folder = GraphFolder::from(&folder); graph.encode(&graph_folder).unwrap(); - assert!(graph_folder.get_graph_path().exists()); + assert!(graph_folder.graph_path().unwrap().exists()); // Zip the folder let mut zip_bytes = Vec::new(); @@ -519,11 +848,11 @@ mod tests { unzip_folder.unzip_to_folder(cursor).unwrap(); // Verify the extracted folder has the same structure - assert!(unzip_folder.get_graph_path().exists()); - assert!(unzip_folder.get_meta_path().exists()); + assert!(unzip_folder.graph_path().unwrap().exists()); + assert!(unzip_folder.meta_path().unwrap().exists()); // Verify the extracted graph is the same as the original - let extracted_graph = Graph::decode(&unzip_folder, None::<&std::path::Path>).unwrap(); + let extracted_graph = Graph::decode(&unzip_folder).unwrap(); assert_graph_equal(&graph, &extracted_graph); } } diff --git a/raphtory/src/serialise/metadata.rs b/raphtory/src/serialise/metadata.rs index 92971a61c9..faedfbaccc 100644 --- a/raphtory/src/serialise/metadata.rs +++ b/raphtory/src/serialise/metadata.rs @@ -1,25 +1,37 @@ use crate::{ - prelude::{GraphViewOps, PropertiesOps}, - serialise::GraphFolder, -}; -use raphtory_api::{ - core::{entities::properties::prop::Prop, storage::arc_str::ArcStr}, - GraphType, + db::api::view::internal::GraphView, + prelude::GraphViewOps, + serialise::{GraphFolder, GraphPaths}, }; +use raphtory_api::GraphType; use serde::{Deserialize, Serialize}; #[derive(PartialEq, Serialize, Deserialize, Debug)] pub struct GraphMetadata { pub node_count: usize, pub edge_count: usize, - pub metadata: Vec<(ArcStr, Prop)>, pub graph_type: GraphType, + pub is_diskgraph: bool, +} + +impl GraphMetadata { + pub fn from_graph(graph: G) -> Self { + let node_count = graph.count_nodes(); + let edge_count = graph.count_edges(); + let graph_type = graph.graph_type(); + let is_diskgraph = graph.disk_storage_enabled().is_some(); + Self { + node_count, + edge_count, + graph_type, + is_diskgraph, + } + } } pub fn assert_metadata_correct<'graph>(folder: &GraphFolder, graph: &impl GraphViewOps<'graph>) { let metadata = folder.read_metadata().unwrap(); assert_eq!(metadata.node_count, graph.count_nodes()); assert_eq!(metadata.edge_count, graph.count_edges()); - assert_eq!(metadata.metadata, graph.properties().as_vec()); assert_eq!(metadata.graph_type, graph.graph_type()); } diff --git a/raphtory/src/serialise/mod.rs b/raphtory/src/serialise/mod.rs index e6a139713b..ec33629745 100644 --- a/raphtory/src/serialise/mod.rs +++ b/raphtory/src/serialise/mod.rs @@ -1,13 +1,13 @@ mod graph_folder; pub mod metadata; -pub(crate) mod parquet; +pub mod parquet; #[cfg(feature = "proto")] pub mod proto; mod serialise; -pub use graph_folder::{GraphFolder, GRAPH_PATH, INDEX_PATH, META_PATH, VECTORS_PATH}; +pub use graph_folder::*; pub use serialise::{StableDecode, StableEncode}; #[cfg(feature = "proto")] diff --git a/raphtory/src/serialise/parquet/graph.rs b/raphtory/src/serialise/parquet/graph.rs index 735c6cab58..acd3230676 100644 --- a/raphtory/src/serialise/parquet/graph.rs +++ b/raphtory/src/serialise/parquet/graph.rs @@ -10,7 +10,7 @@ use arrow::datatypes::{DataType, Field}; use itertools::Itertools; use parquet::format::KeyValue; use raphtory_api::{ - core::{entities::properties::prop::SerdeProp, storage::arc_str::ArcStr}, + core::{entities::properties::prop::SerdeArrowProp, storage::arc_str::ArcStr}, GraphType, }; use raphtory_core::storage::timeindex::TimeIndexEntry; @@ -88,7 +88,7 @@ impl Serialize for Row { let mut state = serializer.serialize_map(Some(self.row.len()))?; for (k, v) in self.row.iter() { - state.serialize_entry(k, &SerdeProp(v))?; + state.serialize_entry(k, &SerdeArrowProp(v))?; } state.serialize_entry(TIME_COL, &self.t.0)?; diff --git a/raphtory/src/serialise/parquet/mod.rs b/raphtory/src/serialise/parquet/mod.rs index 93eb4740fb..9889ec575f 100644 --- a/raphtory/src/serialise/parquet/mod.rs +++ b/raphtory/src/serialise/parquet/mod.rs @@ -4,19 +4,23 @@ use crate::{ graph::views::deletion_graph::PersistentGraph, }, errors::GraphError, - io::parquet_loaders::{ - load_edge_deletions_from_parquet, load_edge_props_from_parquet, load_edges_from_parquet, - load_graph_props_from_parquet, load_node_props_from_parquet, load_nodes_from_parquet, + io::{ + arrow::prop_handler::lift_property_col, + parquet_loaders::{ + get_parquet_file_paths, load_edge_deletions_from_parquet, load_edge_props_from_parquet, + load_edges_from_parquet, load_graph_props_from_parquet, load_node_props_from_parquet, + load_nodes_from_parquet, process_parquet_file_to_df, + }, }, prelude::*, serialise::{ - graph_folder::GRAPH_PATH, parquet::{ edges::encode_edge_deletions, graph::{encode_graph_cprop, encode_graph_tprop}, model::get_id_type, nodes::{encode_nodes_cprop, encode_nodes_tprop}, }, + GraphPaths, }, }; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; @@ -46,7 +50,7 @@ use std::{ sync::Arc, }; use walkdir::WalkDir; -use zip::{write::FileOptions, ZipWriter}; +use zip::{write::FileOptions, ZipArchive, ZipWriter}; mod edges; mod model; @@ -55,23 +59,20 @@ mod nodes; mod graph; pub trait ParquetEncoder { - fn encode_parquet_to_bytes(&self) -> Result, GraphError> { - // Write directly to an in-memory cursor - let mut zip_buffer = Vec::new(); - let cursor = std::io::Cursor::new(&mut zip_buffer); - - self.encode_parquet_to_zip(cursor)?; - - Ok(zip_buffer) - } - - fn encode_parquet_to_zip(&self, writer: W) -> Result<(), GraphError> { + /// Encode the graph as parquet data to the zip writer + /// (note the writer is still open for appending more data after calling this function) + /// + /// The graph data will be written at `prefix` inside the zip. + fn encode_parquet_to_zip>( + &self, + mut zip_writer: &mut ZipWriter, + prefix: P, + ) -> Result<(), GraphError> { + let prefix = prefix.as_ref(); // Encode to a tmp dir using parquet, then zip it to the writer let temp_dir = tempfile::tempdir()?; self.encode_parquet(&temp_dir)?; - let mut zip_writer = ZipWriter::new(writer); - // Walk through the directory and add files and directories to the zip. // Files and directories are stored in the archive under the GRAPH_PATH directory. for entry in WalkDir::new(temp_dir.path()) @@ -85,10 +86,7 @@ pub trait ParquetEncoder { })?; // Attach GRAPH_PATH as a prefix to the relative path - let zip_entry_name = PathBuf::from(GRAPH_PATH) - .join(relative_path) - .to_string_lossy() - .into_owned(); + let zip_entry_name = prefix.join(relative_path).to_string_lossy().into_owned(); if path.is_file() { zip_writer.start_file::<_, ()>(zip_entry_name, FileOptions::<()>::default())?; @@ -100,8 +98,6 @@ pub trait ParquetEncoder { zip_writer.add_directory::<_, ()>(zip_entry_name, FileOptions::<()>::default())?; } } - - zip_writer.finish()?; Ok(()) } @@ -109,22 +105,23 @@ pub trait ParquetEncoder { } pub trait ParquetDecoder: Sized { - fn decode_parquet_from_bytes( + fn decode_parquet_from_bytes>( bytes: &[u8], path_for_decoded_graph: Option<&Path>, + prefix: P, ) -> Result { // Read directly from an in-memory cursor - let reader = std::io::Cursor::new(bytes); - - Self::decode_parquet_from_zip(reader, path_for_decoded_graph) + let mut reader = ZipArchive::new(std::io::Cursor::new(bytes))?; + Self::decode_parquet_from_zip(&mut reader, path_for_decoded_graph, prefix) } - fn decode_parquet_from_zip( - reader: R, + fn decode_parquet_from_zip>( + zip: &mut ZipArchive, path_for_decoded_graph: Option<&Path>, + prefix: P, ) -> Result { + let prefix = prefix.as_ref(); // Unzip to a temp dir and decode parquet from there - let mut zip = zip::ZipArchive::new(reader)?; let temp_dir = tempfile::tempdir()?; for i in 0..zip.len() { @@ -134,18 +131,8 @@ pub trait ParquetDecoder: Sized { None => continue, }; - if zip_entry_name.starts_with(GRAPH_PATH) { - // Since we attach the GRAPH_PATH prefix to the zip entry name - // when encoding, we strip it away while decoding. - let relative_path = zip_entry_name - .strip_prefix(GRAPH_PATH) - .map_err(|e| { - GraphError::IOErrorMsg(format!("Failed to strip prefix from path: {}", e)) - })? - .to_path_buf(); - + if let Ok(relative_path) = zip_entry_name.strip_prefix(prefix) { let out_path = temp_dir.path().join(relative_path); - if file.is_dir() { std::fs::create_dir_all(&out_path)?; } else { @@ -153,27 +140,14 @@ pub trait ParquetDecoder: Sized { if let Some(parent) = out_path.parent() { std::fs::create_dir_all(parent)?; } - let mut out_file = std::fs::File::create(&out_path)?; std::io::copy(&mut file, &mut out_file)?; } } } - Self::decode_parquet(temp_dir.path(), path_for_decoded_graph) } - fn is_parquet_decodable(path: impl AsRef) -> bool { - // Considered to be decodable if there is at least one .parquet - WalkDir::new(path) - .into_iter() - .filter_map(Result::ok) - .any(|entry| { - entry.path().is_file() - && entry.path().extension().is_some_and(|ext| ext == "parquet") - }) - } - fn decode_parquet( path: impl AsRef, path_for_decoded_graph: Option<&Path>, @@ -431,13 +405,37 @@ fn decode_graph_type(path: impl AsRef) -> Result { g_type.ok_or_else(|| GraphError::LoadFailure("Graph type not found".to_string())) } +pub fn decode_graph_metadata( + path: &impl GraphPaths, +) -> Result)>, GraphError> { + let c_graph_path = path.graph_path()?.join(GRAPH_C_PATH); + let exclude = vec![TIME_COL]; + let (c_props, _) = collect_prop_columns(&c_graph_path, &exclude)?; + let c_props = c_props.iter().map(|s| s.as_str()).collect::>(); + let mut result: Vec<(String, Option)> = + c_props.iter().map(|s| (s.to_string(), None)).collect(); + + for path in get_parquet_file_paths(&c_graph_path)? { + let df_view = process_parquet_file_to_df(path.as_path(), Some(&c_props), None)?; + for chunk in df_view.chunks { + let chunk = chunk?; + for (col, res) in chunk.chunk.into_iter().zip(&mut result) { + if let Some(value) = lift_property_col(&col).get(0) { + res.1 = Some(value); + } + } + } + } + Ok(result) +} + fn decode_graph_storage( path: impl AsRef, batch_size: Option, path_for_decoded_graph: Option<&Path>, ) -> Result, GraphError> { let graph = if let Some(storage_path) = path_for_decoded_graph { - Arc::new(Storage::new_at_path(storage_path)) + Arc::new(Storage::new_at_path(storage_path)?) } else { Arc::new(Storage::default()) }; diff --git a/raphtory/src/serialise/parquet/model.rs b/raphtory/src/serialise/parquet/model.rs index a34b661c25..2b1562bf87 100644 --- a/raphtory/src/serialise/parquet/model.rs +++ b/raphtory/src/serialise/parquet/model.rs @@ -10,7 +10,7 @@ use crate::{ }; use arrow::datatypes::DataType; use raphtory_api::core::{ - entities::{properties::prop::SerdeProp, GidType}, + entities::{properties::prop::SerdeArrowProp, GidType}, storage::{arc_str::ArcStr, timeindex::TimeIndexEntry}, }; use raphtory_storage::graph::graph::GraphStorage; @@ -59,7 +59,7 @@ impl<'a, G: StaticGraphViewOps> Serialize for ParquetTEdge<'a, G> { state.serialize_entry(LAYER_COL, &layer)?; for (name, prop) in edge.properties().temporal().iter_latest() { - state.serialize_entry(&name, &SerdeProp(&prop))?; + state.serialize_entry(&name, &SerdeArrowProp(&prop))?; } state.end() @@ -85,7 +85,7 @@ impl<'a, G: StaticGraphViewOps> Serialize for ParquetCEdge<'a, G> { state.serialize_entry(LAYER_COL, &layer)?; for (name, prop) in edge.metadata().iter_filtered() { - state.serialize_entry(&name, &SerdeProp(&prop))?; + state.serialize_entry(&name, &SerdeArrowProp(&prop))?; } state.end() @@ -136,7 +136,7 @@ impl<'a> Serialize for ParquetTNode<'a> { state.serialize_entry(TYPE_COL, &self.node.node_type())?; for (name, prop) in self.props.iter() { - state.serialize_entry(&self.cols[*name], &SerdeProp(prop))?; + state.serialize_entry(&self.cols[*name], &SerdeArrowProp(prop))?; } state.end() @@ -158,7 +158,7 @@ impl<'a> Serialize for ParquetCNode<'a> { state.serialize_entry(TYPE_COL, &self.node.node_type())?; for (name, prop) in self.node.metadata().iter_filtered() { - state.serialize_entry(&name, &SerdeProp(&prop))?; + state.serialize_entry(&name, &SerdeArrowProp(&prop))?; } state.end() diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 88ff739193..ec9c949320 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -1,55 +1,77 @@ +#[cfg(feature = "search")] +use crate::prelude::IndexMutationOps; use crate::{ - db::api::{mutation::AdditionOps, view::StaticGraphViewOps}, + db::api::{ + mutation::AdditionOps, storage::storage::PersistentStrategy, view::StaticGraphViewOps, + }, errors::GraphError, serialise::{ + get_zip_graph_path, + metadata::GraphMetadata, parquet::{ParquetDecoder, ParquetEncoder}, - GraphFolder, + GraphFolder, GraphPaths, Metadata, RelativePath, DEFAULT_DATA_PATH, DEFAULT_GRAPH_PATH, + META_PATH, }, }; -use std::{fs, fs::File, path::Path}; -use tempfile; - -#[cfg(feature = "search")] -use crate::prelude::IndexMutationOps; +use std::{ + fs::File, + io::{Cursor, Read, Seek, Write}, +}; +use storage::Extension; +use zip::{write::SimpleFileOptions, ZipArchive, ZipWriter}; pub trait StableEncode: StaticGraphViewOps + AdditionOps { + fn encode_to_zip(&self, writer: ZipWriter) -> Result<(), GraphError>; /// Encode the graph into bytes. - fn encode_to_bytes(&self) -> Vec; + fn encode_to_bytes(&self) -> Result, GraphError>; /// Encode the graph into the given path. fn encode(&self, path: impl Into) -> Result<(), GraphError>; } impl StableEncode for T { - fn encode_to_bytes(&self) -> Vec { - // Encode to a temp zip file and return the bytes - let tempdir = tempfile::tempdir().unwrap(); - let zip_path = tempdir.path().join("graph.zip"); - let folder = GraphFolder::new_as_zip(&zip_path); - - self.encode(&folder).unwrap(); - fs::read(&zip_path).unwrap() + fn encode_to_zip(&self, mut writer: ZipWriter) -> Result<(), GraphError> { + let graph_meta = GraphMetadata::from_graph(self); + writer.start_file(META_PATH, SimpleFileOptions::default())?; + writer.write(&serde_json::to_vec(&RelativePath { + path: DEFAULT_DATA_PATH.to_string(), + })?)?; + writer.start_file( + [DEFAULT_DATA_PATH, META_PATH].join("/"), + SimpleFileOptions::default(), + )?; + writer.write(&serde_json::to_vec(&Metadata { + path: DEFAULT_GRAPH_PATH.to_string(), + meta: graph_meta, + })?)?; + let graph_prefix = [DEFAULT_DATA_PATH, DEFAULT_GRAPH_PATH].join("/"); + self.encode_parquet_to_zip(&mut writer, graph_prefix)?; + // TODO: Encode Index to zip + writer.finish()?; + Ok(()) + } + + fn encode_to_bytes(&self) -> Result, GraphError> { + let mut bytes = Vec::new(); + let writer = ZipWriter::new(Cursor::new(&mut bytes)); + self.encode_to_zip(writer)?; + Ok(bytes) } fn encode(&self, path: impl Into) -> Result<(), GraphError> { let folder: GraphFolder = path.into(); if folder.write_as_zip_format { - let file = File::create_new(&folder.get_base_path())?; - self.encode_parquet_to_zip(file)?; - - #[cfg(feature = "search")] - self.persist_index_to_disk_zip(&folder)?; + let file = File::create_new(&folder.root())?; + self.encode_to_zip(ZipWriter::new(file))?; } else { - folder.reserve()?; - self.encode_parquet(&folder.get_graph_path())?; - + let write_folder = folder.init_write()?; + self.encode_parquet(write_folder.graph_path()?)?; #[cfg(feature = "search")] - self.persist_index_to_disk(&folder)?; + self.persist_index_to_disk(&write_folder)?; + write_folder.data_path()?.write_metadata(self)?; + write_folder.finish()?; } - - folder.write_metadata(self)?; - Ok(()) } } @@ -57,59 +79,100 @@ impl StableEncode for T { pub trait StableDecode: StaticGraphViewOps + AdditionOps { // Decode the graph from the given bytes array. // `path_for_decoded_graph` gets passed to the newly created graph. - fn decode_from_bytes( + fn decode_from_bytes(bytes: &[u8]) -> Result; + + fn decode_from_bytes_at( bytes: &[u8], - path_for_decoded_graph: Option<&Path>, + target: &(impl GraphPaths + ?Sized), + ) -> Result; + + fn decode_from_zip(reader: ZipArchive) -> Result; + + fn decode_from_zip_at( + reader: ZipArchive, + target: &(impl GraphPaths + ?Sized), ) -> Result; // Decode the graph from the given path. // `path_for_decoded_graph` gets passed to the newly created graph. - fn decode( - path: impl Into, - path_for_decoded_graph: Option<&Path>, - ) -> Result; + fn decode(path: &(impl GraphPaths + ?Sized)) -> Result; - /// Returns true if the graph can be decoded from the given path. - fn is_decodable(path: impl AsRef) -> bool; + fn decode_at( + path: &(impl GraphPaths + ?Sized), + target: &(impl GraphPaths + ?Sized), + ) -> Result; } impl StableDecode for T { - fn decode_from_bytes( + fn decode_from_bytes(bytes: &[u8]) -> Result { + let cursor = Cursor::new(bytes); + Self::decode_from_zip(ZipArchive::new(cursor)?) + } + + fn decode_from_bytes_at( bytes: &[u8], - path_for_decoded_graph: Option<&Path>, + target: &(impl GraphPaths + ?Sized), ) -> Result { - // Write bytes to a temp zip file and decode - let tempdir = tempfile::tempdir()?; - let zip_path = tempdir.path().join("graph.zip"); - let folder = GraphFolder::new_as_zip(&zip_path); - std::fs::write(&zip_path, bytes)?; + let cursor = Cursor::new(bytes); + Self::decode_from_zip_at(ZipArchive::new(cursor)?, target) + } - let graph = Self::decode(&folder, path_for_decoded_graph)?; + fn decode_from_zip(mut reader: ZipArchive) -> Result { + let graph_prefix = get_zip_graph_path(&mut reader)?; + let graph = Self::decode_parquet_from_zip(&mut reader, None, graph_prefix)?; + + //TODO: graph.load_index_from_zip(&mut reader, prefix) Ok(graph) } - fn decode( - path: impl Into, - path_for_decoded_graph: Option<&Path>, + fn decode_from_zip_at( + mut reader: ZipArchive, + target: &(impl GraphPaths + ?Sized), ) -> Result { - let graph; - let folder: GraphFolder = path.into(); + if Extension::disk_storage_enabled() { + return Err(GraphError::DiskGraphNotEnabled); + } + target.init()?; + let graph_prefix = get_zip_graph_path(&mut reader)?; + let graph = Self::decode_parquet_from_zip( + &mut reader, + Some(target.graph_path()?.as_path()), + graph_prefix, + )?; + + //TODO: graph.load_index_from_zip(&mut reader, prefix) + target.write_metadata(&graph)?; + Ok(graph) + } - if folder.is_zip() { - let reader = std::fs::File::open(&folder.get_base_path())?; - graph = Self::decode_parquet_from_zip(reader, path_for_decoded_graph)?; + fn decode(path: &(impl GraphPaths + ?Sized)) -> Result { + let graph; + if path.is_zip() { + let reader = path.read_zip()?; + graph = Self::decode_from_zip(reader)?; } else { - graph = Self::decode_parquet(&folder.get_graph_path(), path_for_decoded_graph)?; + graph = Self::decode_parquet(&path.graph_path()?, None)?; + // TODO: Fix index loading: + // #[cfg(feature = "search")] + // graph.load_index(&path)?; } - - #[cfg(feature = "search")] - graph.load_index(&folder)?; - Ok(graph) } - fn is_decodable(path: impl AsRef) -> bool { - Self::is_parquet_decodable(path) + fn decode_at( + path: &(impl GraphPaths + ?Sized), + target: &(impl GraphPaths + ?Sized), + ) -> Result { + target.init()?; + let graph; + if path.is_zip() { + let reader = path.read_zip()?; + graph = Self::decode_from_zip_at(reader, target)?; + } else { + graph = Self::decode_parquet(path.graph_path()?, Some(target.graph_path()?.as_path()))?; + } + target.write_metadata(&graph)?; + Ok(graph) } } diff --git a/raphtory/src/vectors/db.rs b/raphtory/src/vectors/db.rs index 2e464f4ef6..2d6e72de36 100644 --- a/raphtory/src/vectors/db.rs +++ b/raphtory/src/vectors/db.rs @@ -1,15 +1,3 @@ -use std::{ - collections::HashSet, - ops::Deref, - path::{Path, PathBuf}, - sync::{Arc, OnceLock}, -}; - -use arroy::{distances::Cosine, Database as ArroyDatabase, Reader, Writer}; -use futures_util::StreamExt; -use rand::{rngs::StdRng, SeedableRng}; -use tempfile::TempDir; - use super::{ entity_ref::{EntityRef, IntoDbId}, Embedding, @@ -19,6 +7,15 @@ use crate::{ errors::{GraphError, GraphResult}, prelude::GraphViewOps, }; +use arroy::{distances::Cosine, Database as ArroyDatabase, Reader, Writer}; +use futures_util::StreamExt; +use std::{ + collections::HashSet, + ops::Deref, + path::{Path, PathBuf}, + sync::{Arc, OnceLock}, +}; +use tempfile::TempDir; const LMDB_MAX_SIZE: usize = 1024 * 1024 * 1024 * 1024; // 1TB diff --git a/raphtory/tests/db_tests.rs b/raphtory/tests/db_tests.rs index e9f6182dc5..5dfe5f1a38 100644 --- a/raphtory/tests/db_tests.rs +++ b/raphtory/tests/db_tests.rs @@ -709,7 +709,7 @@ fn graph_save_to_load_from_file() { g.encode(&graph_path).unwrap(); // Load from files - let g2 = Graph::decode(&graph_path, None).unwrap(); + let g2 = Graph::decode(&graph_path).unwrap(); assert_eq!(g, g2); } @@ -2947,7 +2947,7 @@ fn save_load_serial() { let dir = tempfile::tempdir().unwrap(); let file_path = dir.path().join("abcd11"); g.encode(&file_path).unwrap(); - let gg = Graph::decode(file_path, None).unwrap(); + let gg = Graph::decode(&file_path).unwrap(); assert_graph_equal(&g, &gg); } diff --git a/raphtory/tests/df_loaders.rs b/raphtory/tests/df_loaders.rs index 5066aa004d..69f01772a5 100644 --- a/raphtory/tests/df_loaders.rs +++ b/raphtory/tests/df_loaders.rs @@ -615,7 +615,8 @@ mod parquet_tests { PropUpdatesFixture, }, }; - use std::str::FromStr; + use std::{io::Cursor, str::FromStr}; + use zip::{ZipArchive, ZipWriter}; #[test] fn node_temp_props() { @@ -1127,10 +1128,13 @@ mod parquet_tests { // Test writing to a file let file = std::fs::File::create(&zip_path).unwrap(); - g.encode_parquet_to_zip(file).unwrap(); + let mut writer = ZipWriter::new(file); + g.encode_parquet_to_zip(&mut writer, "graph").unwrap(); + writer.finish().unwrap(); - let reader = std::fs::File::open(&zip_path).unwrap(); - let g2 = Graph::decode_parquet_from_zip(reader, None::<&std::path::Path>).unwrap(); + let mut reader = ZipArchive::new(std::fs::File::open(&zip_path).unwrap()).unwrap(); + let g2 = + Graph::decode_parquet_from_zip(&mut reader, None::<&std::path::Path>, "graph").unwrap(); assert_graph_equal(&g, &g2); } @@ -1153,8 +1157,12 @@ mod parquet_tests { g.add_edge(3, 1, 4, [("test prop 3", 10.0)], None).unwrap(); g.add_edge(4, 1, 3, [("test prop 4", true)], None).unwrap(); - let bytes = g.encode_parquet_to_bytes().unwrap(); - let g2 = Graph::decode_parquet_from_bytes(&bytes, None::<&std::path::Path>).unwrap(); + let mut bytes = Vec::new(); + let mut writer = ZipWriter::new(Cursor::new(&mut bytes)); + g.encode_parquet_to_zip(&mut writer, "graph").unwrap(); + writer.finish().unwrap(); + let g2 = + Graph::decode_parquet_from_bytes(&bytes, None::<&std::path::Path>, "graph").unwrap(); assert_graph_equal(&g, &g2); } @@ -1162,8 +1170,11 @@ mod parquet_tests { fn test_parquet_bytes_proptest() { proptest!(|(edges in build_graph_strat(30, 30, 10, 10, true))| { let g = Graph::from(build_graph(&edges)); - let bytes = g.encode_parquet_to_bytes().unwrap(); - let g2 = Graph::decode_parquet_from_bytes(&bytes, None::<&std::path::Path>).unwrap(); + let mut bytes = Vec::new(); + let mut writer = ZipWriter::new(Cursor::new(&mut bytes)); + g.encode_parquet_to_zip(&mut writer, "graph").unwrap(); + writer.finish().unwrap(); + let g2 = Graph::decode_parquet_from_bytes(&bytes, None::<&std::path::Path>, "graph").unwrap(); assert_graph_equal(&g, &g2); }) diff --git a/raphtory/tests/serialise_test.rs b/raphtory/tests/serialise_test.rs index 6d77b963e4..7339f1b1f2 100644 --- a/raphtory/tests/serialise_test.rs +++ b/raphtory/tests/serialise_test.rs @@ -32,7 +32,7 @@ mod serialise_test { let g1 = Graph::new(); g1.add_node(1, "Alice", NO_PROPS, None).unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); } @@ -45,7 +45,7 @@ mod serialise_test { g1.add_node(2, "Bob", [("age", Prop::U32(47))], None) .unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); } @@ -61,7 +61,7 @@ mod serialise_test { let temp_file = TempDir::new().unwrap(); g.encode(&temp_file).unwrap(); - let g2 = MaterializedGraph::decode(&temp_file, None).unwrap(); + let g2 = MaterializedGraph::decode(&temp_file).unwrap(); assert_eq!(g2.nodes().name().collect_vec(), ["ben", "hamza", "haaroon"]); let node_names: Vec<_> = g2.nodes().iter().map(|n| n.name()).collect(); assert_eq!(node_names, ["ben", "hamza", "haaroon"]); @@ -77,7 +77,7 @@ mod serialise_test { let temp_file = TempDir::new().unwrap(); g3.encode(&temp_file).unwrap(); - let g4 = MaterializedGraph::decode(&temp_file, None).unwrap(); + let g4 = MaterializedGraph::decode(&temp_file).unwrap(); assert_eq!(g4.nodes().name().collect_vec(), ["ben", "hamza", "haaroon"]); let node_names: Vec<_> = g4.nodes().iter().map(|n| n.name()).collect(); assert_eq!(node_names, ["ben", "hamza", "haaroon"]); @@ -97,7 +97,7 @@ mod serialise_test { .expect("Failed to update metadata"); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); } @@ -110,7 +110,7 @@ mod serialise_test { g1.add_node(2, "Bob", NO_PROPS, None).unwrap(); g1.add_edge(3, "Alice", "Bob", NO_PROPS, None).unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); } @@ -122,7 +122,7 @@ mod serialise_test { g1.add_edge(3, "Alice", "Bob", NO_PROPS, None).unwrap(); g1.delete_edge(19, "Alice", "Bob", None).unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = PersistentGraph::decode(&temp_file, None).unwrap(); + let g2 = PersistentGraph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); let edge = g2.edge("Alice", "Bob").expect("Failed to get edge"); @@ -131,7 +131,6 @@ mod serialise_test { } #[test] - #[ignore = "TODO: #2377"] fn edge_t_props() { let tempdir = TempDir::new().unwrap(); let temp_file = tempdir.path().join("graph"); @@ -151,7 +150,7 @@ mod serialise_test { .unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); } @@ -164,7 +163,7 @@ mod serialise_test { e1.update_metadata([("friends", true)], None) .expect("Failed to update metadata"); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); } @@ -178,12 +177,11 @@ mod serialise_test { g1.add_edge(7, "Bob", "Charlie", [("friends", false)], Some("two")) .unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); } #[test] - #[ignore = "TODO: #2377"] fn test_all_the_t_props_on_node() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -193,7 +191,7 @@ mod serialise_test { let g1 = Graph::new(); g1.add_node(1, "Alice", props.clone(), None).unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); let node = g2.node("Alice").expect("Failed to get node"); @@ -211,7 +209,6 @@ mod serialise_test { } #[test] - #[ignore = "TODO: #2377"] fn test_all_the_t_props_on_edge() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -221,7 +218,7 @@ mod serialise_test { let g1 = Graph::new(); g1.add_edge(1, "Alice", "Bob", props.clone(), None).unwrap(); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); let edge = g2.edge("Alice", "Bob").expect("Failed to get edge"); @@ -239,7 +236,6 @@ mod serialise_test { } #[test] - #[ignore = "TODO: #2377"] fn test_all_the_metadata_on_edge() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -251,7 +247,7 @@ mod serialise_test { e.update_metadata(props.clone(), Some("a")) .expect("Failed to update metadata"); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); let edge = g2 @@ -267,7 +263,6 @@ mod serialise_test { } #[test] - #[ignore = "TODO: #2377"] fn test_all_the_metadata_on_node() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -279,7 +274,7 @@ mod serialise_test { n.update_metadata(props.clone()) .expect("Failed to update metadata"); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); let node = g2.node("Alice").expect("Failed to get node"); @@ -293,7 +288,6 @@ mod serialise_test { } #[test] - #[ignore = "TODO: #2377"] fn graph_metadata() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -305,7 +299,7 @@ mod serialise_test { let tempdir = TempDir::new().unwrap(); let temp_file = tempdir.path().join("graph"); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); props.into_iter().for_each(|(name, prop)| { @@ -315,7 +309,6 @@ mod serialise_test { } #[test] - #[ignore = "TODO: #2377"] fn graph_temp_properties() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -329,7 +322,7 @@ mod serialise_test { let tempdir = TempDir::new().unwrap(); let temp_file = tempdir.path().join("graph"); g1.encode(&temp_file).unwrap(); - let g2 = Graph::decode(&temp_file, None).unwrap(); + let g2 = Graph::decode(&temp_file).unwrap(); assert_graph_equal(&g1, &g2); props @@ -385,7 +378,7 @@ mod serialise_test { g.add_edge(7, "Bob", "Charlie", [("friends", false)], Some("two")) .unwrap(); - let g2 = Graph::decode(&temp_cache_file, None).unwrap(); + let g2 = Graph::decode(&temp_cache_file).unwrap(); assert_graph_equal(&g, &g2); assert_metadata_correct(&folder, &g); @@ -428,7 +421,7 @@ mod serialise_test { g.add_edge(7, "Bob", "Charlie", [("friends", false)], Some("two")) .unwrap(); - let g2 = PersistentGraph::decode(&temp_cache_file, None).unwrap(); + let g2 = PersistentGraph::decode(&temp_cache_file).unwrap(); assert_graph_equal(&g, &g2); assert_metadata_correct(&folder, &g); @@ -438,8 +431,8 @@ mod serialise_test { fn encode_decode_prop_test() { proptest!(|(edges in build_edge_list(100, 100))| { let g = build_graph_from_edge_list(&edges); - let bytes = g.encode_to_bytes(); - let g2 = Graph::decode_from_bytes(&bytes, None).unwrap(); + let bytes = g.encode_to_bytes().unwrap(); + let g2 = Graph::decode_from_bytes(&bytes).unwrap(); assert_graph_equal(&g, &g2); }) } diff --git a/raphtory/tests/test_deletions.rs b/raphtory/tests/test_deletions.rs index a0e6637456..e096574a30 100644 --- a/raphtory/tests/test_deletions.rs +++ b/raphtory/tests/test_deletions.rs @@ -254,10 +254,8 @@ fn materialize_window_multilayer() { let g = PersistentGraph::new(); g.add_edge(1, 0, 0, NO_PROPS, None).unwrap(); g.delete_edge(3, 0, 0, Some("a")).unwrap(); - let w = 0..10; let glw = g.valid_layers("a").window(w.start, w.end); - let layers = glw.edge(0, 0).unwrap().explode_layers(); let gmlw = glw.materialize().unwrap(); assert_persistent_materialize_graph_equal(&glw, &gmlw); } diff --git a/raphtory/tests/test_materialize.rs b/raphtory/tests/test_materialize.rs index a33285a2ba..23410f7c91 100644 --- a/raphtory/tests/test_materialize.rs +++ b/raphtory/tests/test_materialize.rs @@ -1,6 +1,6 @@ use proptest::{arbitrary::any, proptest}; use raphtory::{ - db::{api::view::MaterializedGraph, graph::graph::assert_graph_equal}, + db::graph::graph::assert_graph_equal, prelude::*, test_storage, test_utils::{build_edge_list, build_graph_from_edge_list},