From a13da1e1f5cb05211f546a6ffcf9e1c3a5674a89 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Tue, 11 Nov 2025 03:27:24 -0500 Subject: [PATCH 01/55] Added tests for loading edges from polars and from fireducks. Added a load_edges_from_polars that internally calls to_pandas() on the polars dataframe. Fireducks works. --- python/python/raphtory/__init__.pyi | 3 ++ .../test_loaders/test_load_from_fireducks.py | 53 +++++++++++++++++++ .../test_loaders/test_load_from_polars.py | 53 +++++++++++++++++++ raphtory/src/python/graph/graph.rs | 37 +++++++++++++ 4 files changed, 146 insertions(+) create mode 100644 python/tests/test_base_install/test_loaders/test_load_from_fireducks.py create mode 100644 python/tests/test_base_install/test_loaders/test_load_from_polars.py diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index adac628ab2..9a91bb2bd0 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1173,6 +1173,9 @@ class Graph(GraphView): GraphError: If the operation fails. """ + def load_edges_from_polars(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): + ... + @staticmethod def load_from_file(path: str) -> Graph: """ diff --git a/python/tests/test_base_install/test_loaders/test_load_from_fireducks.py b/python/tests/test_base_install/test_loaders/test_load_from_fireducks.py new file mode 100644 index 0000000000..e04524cbad --- /dev/null +++ b/python/tests/test_base_install/test_loaders/test_load_from_fireducks.py @@ -0,0 +1,53 @@ +import pytest +from raphtory import Graph +import fireducks +import fireducks.pandas as fpd +import pandas + +def _collect_edges(g: Graph): + return sorted((e.history()[0], e.src.id, e.dst.id, e["value"]) for e in g.edges) + +def test_load_edges_from_fireducks_df(): + # FireDucks DataFrame (pandas-compatible API) + df = fpd.DataFrame( + { + "time": [1, 2, 3], + "src": [1, 2, 3], + "dst": [2, 3, 4], + "value": [10.0, 20.0, 30.0], + } + ) + + g: Graph = Graph() + g.load_edges_from_pandas(df=df, time="time", src="src", dst="dst", properties=["value"]) + assert [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] == _collect_edges(g) + +def test_fireducks_matches_pandas_for_same_edges(): + df_fireducks = fpd.DataFrame( + { + "time": [1, 2, 3], + "src": [1, 2, 3], + "dst": [2, 3, 4], + "value": [10.0, 20.0, 30.0], + } + ) + df_pandas = pandas.DataFrame( + { + "time": [1, 2, 3], + "src": [1, 2, 3], + "dst": [2, 3, 4], + "value": [10.0, 20.0, 30.0], + } + ) + + g_fireducks: Graph = Graph() + g_fireducks.load_edges_from_pandas(df=df_fireducks, time="time", src="src", dst="dst", properties=["value"]) + + g_pandas = Graph() + g_pandas.load_edges_from_pandas(df=df_pandas, time="time", src="src", dst="dst", properties=["value"]) + + expected = [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] + + assert _collect_edges(g_fireducks) == _collect_edges(g_pandas) + assert _collect_edges(g_fireducks) == expected + assert _collect_edges(g_pandas) == expected \ No newline at end of file diff --git a/python/tests/test_base_install/test_loaders/test_load_from_polars.py b/python/tests/test_base_install/test_loaders/test_load_from_polars.py new file mode 100644 index 0000000000..e696040179 --- /dev/null +++ b/python/tests/test_base_install/test_loaders/test_load_from_polars.py @@ -0,0 +1,53 @@ +import polars as pl +from raphtory import Graph +import pytest + +def _collect_edges(g: Graph): + return sorted((e.history()[0], e.src.id, e.dst.id, e["value"]) for e in g.edges) + +def test_load_edges_from_polars_df_error(): + df = pl.DataFrame( + { + "time": [1, 2, 3], + "src": [1, 2, 3], + "dst": [2, 3, 4], + "value": [10.0, 20.0, 30.0], + } + ) + + g = Graph() + with pytest.raises(Exception) as e: + # Current loader expects a pandas DataFrame; this will fail in pyarrow.Table.from_pandas + g.load_edges_from_pandas(df=df, time="time", src="src", dst="dst", properties=["value"]) + + print(f"\nCaptured error: {str(e.value)}") + +def test_load_edges_from_polars_df_via_to_pandas(): + df = pl.DataFrame( + { + "time": [1, 2, 3], + "src": [1, 2, 3], + "dst": [2, 3, 4], + "value": [10.0, 20.0, 30.0], + } + ) + + g = Graph() + g.load_edges_from_pandas(df=df.to_pandas(), time="time", src="src", dst="dst", properties=["value"]) + expected = [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] + assert _collect_edges(g) == expected + +def test_load_edges_from_polars_df(): + df = pl.DataFrame( + { + "time": [1, 2, 3], + "src": [1, 2, 3], + "dst": [2, 3, 4], + "value": [10.0, 20.0, 30.0], + } + ) + + g = Graph() + g.load_edges_from_polars(df=df, time="time", src="src", dst="dst", properties=["value"]) + expected = [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] + assert _collect_edges(g) == expected \ No newline at end of file diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index b02e7cbc25..6abef57dee 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -763,6 +763,43 @@ impl PyGraph { ) } + #[pyo3(signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None))] + fn load_edges_from_polars( + &self, + df: &Bound, + time: &str, + src: &str, + dst: &str, + properties: Option>, + metadata: Option>, + shared_metadata: Option>, + layer: Option<&str>, + layer_col: Option<&str>, + ) -> Result<(), GraphError> { + let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + + // Convert Polars DataFrame -> pandas.DataFrame + let pandas_df = df.call_method0("to_pandas").map_err(|e| { + GraphError::LoadFailure(format!( + "Failed converting Polars DataFrame to pandas via to_pandas(): {e}" + )) + })?; + + load_edges_from_pandas( + &self.graph, + &pandas_df, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + ) + } + /// Load edges from a Parquet file into the graph. /// /// Arguments: From 6afe50e4312fdcdc6f6c941ccbe97dbcf641fbfa Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Sun, 16 Nov 2025 18:17:10 -0500 Subject: [PATCH 02/55] Adding loading of data (only edges for now) from arrow directly --- python/python/raphtory/__init__.pyi | 3 + raphtory/src/python/graph/graph.rs | 45 ++++++++- raphtory/src/python/graph/io/arrow_loaders.rs | 92 +++++++++++++++++++ raphtory/src/python/graph/io/mod.rs | 1 + .../src/python/graph/io/pandas_loaders.rs | 2 +- 5 files changed, 139 insertions(+), 4 deletions(-) create mode 100644 raphtory/src/python/graph/io/arrow_loaders.rs diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 9a91bb2bd0..a8b093d350 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1129,6 +1129,9 @@ class Graph(GraphView): GraphError: If the operation fails. """ + def load_edges_from_arrow(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): + ... + def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges from a Pandas DataFrame into the graph. diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 6abef57dee..538bc1d3f7 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -33,6 +33,7 @@ use std::{ fmt::{Debug, Formatter}, path::PathBuf, }; +use crate::python::graph::io::arrow_loaders::load_edges_from_arrow; /// A temporal graph with event semantics. /// @@ -779,10 +780,17 @@ impl PyGraph { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - // Convert Polars DataFrame -> pandas.DataFrame - let pandas_df = df.call_method0("to_pandas").map_err(|e| { + // Convert Polars DataFrame to pandas.DataFrame + let kwargs = PyDict::new(df.py()); + kwargs + .set_item("use_pyarrow_extension_array", true) + .map_err(|e| { + GraphError::LoadFailure(format!("Failed setting kwargs for to_pandas(): {e}")) + })?; + + let pandas_df = df.call_method("to_pandas", (), Some(&kwargs)).map_err(|e| { GraphError::LoadFailure(format!( - "Failed converting Polars DataFrame to pandas via to_pandas(): {e}" + "Failed converting Polars DataFrame to pandas via to_pandas(use_pyarrow_extension_array=True): {e}" )) })?; @@ -800,6 +808,37 @@ impl PyGraph { ) } + #[pyo3( + signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + )] + fn load_edges_from_arrow( + &self, + df: &Bound, + time: &str, + src: &str, + dst: &str, + properties: Option>, + metadata: Option>, + shared_metadata: Option>, + layer: Option<&str>, + layer_col: Option<&str>, + ) -> Result<(), GraphError> { + let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + load_edges_from_arrow( + &self.graph, + df, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + ) + } + /// Load edges from a Parquet file into the graph. /// /// Arguments: diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs new file mode 100644 index 0000000000..71e81e467e --- /dev/null +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -0,0 +1,92 @@ +use crate::{ + db::api::view::StaticGraphViewOps, + errors::GraphError, + io::arrow::dataframe::{DFChunk, DFView}, + prelude::{AdditionOps, PropertyAdditionOps}, + python::graph::io::pandas_loaders::{array_to_rust, is_jupyter}, + serialise::incremental::InternalCache, +}; +use pyo3::{prelude::*, types::PyDict}; +use raphtory_api::core::entities::properties::prop::Prop; +use std::collections::HashMap; +use crate::io::arrow::df_loaders::load_edges_from_df; + +pub(crate) fn load_edges_from_arrow< + 'py, + G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, +>( + graph: &G, + df: &Bound<'py, PyAny>, + time: &str, + src: &str, + dst: &str, + properties: &[&str], + metadata: &[&str], + shared_metadata: Option<&HashMap>, + layer: Option<&str>, + layer_col: Option<&str>, +) -> Result<(), GraphError> { + let mut cols_to_check = vec![src, dst, time]; + cols_to_check.extend_from_slice(properties); + cols_to_check.extend_from_slice(metadata); + if let Some(layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); + } + + let df_view = process_arrow_py_df(df, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; + load_edges_from_df(df_view, time, src, dst, properties, metadata, shared_metadata, layer, layer_col, graph) +} + +pub(crate) fn process_arrow_py_df<'a>( + df: &Bound<'a, PyAny>, + col_names: Vec<&str>, +) -> PyResult> + 'a>> { + let py = df.py(); + is_jupyter(py); + + // We assume df is an Arrow object (e.g. pyarrow Table or RecordBatchReader) + // that implements a to_batches(max_chunksize=...) method + let kwargs = PyDict::new(py); + kwargs.set_item("max_chunksize", 1_000_000)?; + + // Get a list of RecordBatch-like Python objects + let rb = df + .call_method("to_batches", (), Some(&kwargs))? + .extract::>>()?; + + // Derive the column names from the first batch's schema, then filter + let names: Vec = if let Some(batch0) = rb.first() { + let schema = batch0.getattr("schema")?; + schema.getattr("names")?.extract::>()? + } else { + vec![] + } + .into_iter() + .filter(|x| col_names.contains(&x.as_str())) + .collect(); + + let names_len = names.len(); + + let chunks = rb.into_iter().map(move |rb| { + let columns = rb.getattr("columns")?.extract::>>()?; + let chunk = (0..names_len) + .map(|i| { + // `rb.column(i)` -> pyarrow.Array + let array = &columns[i]; + let arr = array_to_rust(array).map_err(GraphError::from)?; + Ok::<_, GraphError>(arr) + }) + .collect::, GraphError>>()?; + + Ok(DFChunk { chunk }) + }); + + let num_rows: usize = df.call_method0("__len__")?.extract()?; + + Ok(DFView { + names, + chunks, + num_rows, + }) +} diff --git a/raphtory/src/python/graph/io/mod.rs b/raphtory/src/python/graph/io/mod.rs index 8252b4b5ab..b3b8faa385 100644 --- a/raphtory/src/python/graph/io/mod.rs +++ b/raphtory/src/python/graph/io/mod.rs @@ -1,5 +1,6 @@ use pyo3::{create_exception, exceptions::PyException}; +pub mod arrow_loaders; pub mod pandas_loaders; create_exception!(exceptions, ArrowErrorException, PyException); diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index 5f7bef7427..aa311b16dc 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -253,7 +253,7 @@ pub fn array_to_rust(obj: &Bound) -> PyResult { Ok(array) } -fn is_jupyter(py: Python) { +pub(crate) fn is_jupyter(py: Python) { let code = c_str!( r#" try: From c1672522b405a62d4a0cdda614caf42b52a0e9f8 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Mon, 17 Nov 2025 00:32:09 -0500 Subject: [PATCH 03/55] Adding loading of data (only edges for now) from arrow with streaming in rust instead of obtaining each column of each batch from Python individually. --- python/python/raphtory/__init__.pyi | 3 + raphtory/src/python/graph/graph.rs | 43 +++++- raphtory/src/python/graph/io/arrow_loaders.rs | 125 +++++++++++++++++- 3 files changed, 164 insertions(+), 7 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index a8b093d350..d9cc72e39b 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1132,6 +1132,9 @@ class Graph(GraphView): def load_edges_from_arrow(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): ... + def load_edges_from_arrow_streaming(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): + ... + def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges from a Pandas DataFrame into the graph. diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 538bc1d3f7..438b4e9e56 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -14,8 +14,15 @@ use crate::{ prelude::*, python::{ graph::{ - edge::PyEdge, graph_with_deletions::PyPersistentGraph, index::PyIndexSpec, - io::pandas_loaders::*, node::PyNode, views::graph_view::PyGraphView, + edge::PyEdge, + graph_with_deletions::PyPersistentGraph, + index::PyIndexSpec, + io::{ + arrow_loaders::{load_edges_from_arrow, load_edges_from_arrow_streaming}, + pandas_loaders::*, + }, + node::PyNode, + views::graph_view::PyGraphView, }, types::iterable::FromIterable, utils::{PyNodeRef, PyTime}, @@ -33,7 +40,6 @@ use std::{ fmt::{Debug, Formatter}, path::PathBuf, }; -use crate::python::graph::io::arrow_loaders::load_edges_from_arrow; /// A temporal graph with event semantics. /// @@ -839,6 +845,37 @@ impl PyGraph { ) } + #[pyo3( + signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + )] + fn load_edges_from_arrow_streaming( + &self, + df: &Bound, + time: &str, + src: &str, + dst: &str, + properties: Option>, + metadata: Option>, + shared_metadata: Option>, + layer: Option<&str>, + layer_col: Option<&str>, + ) -> Result<(), GraphError> { + let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + load_edges_from_arrow_streaming( + &self.graph, + df, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + ) + } + /// Load edges from a Parquet file into the graph. /// /// Arguments: diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 71e81e467e..0dacae0d0d 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -1,15 +1,24 @@ use crate::{ db::api::view::StaticGraphViewOps, errors::GraphError, - io::arrow::dataframe::{DFChunk, DFView}, + io::arrow::{ + dataframe::{DFChunk, DFView}, + df_loaders::load_edges_from_df, + }, prelude::{AdditionOps, PropertyAdditionOps}, python::graph::io::pandas_loaders::{array_to_rust, is_jupyter}, serialise::incremental::InternalCache, }; -use pyo3::{prelude::*, types::PyDict}; +use arrow::array::{ + ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream}, + RecordBatchReader, +}; +use pyo3::{ + prelude::*, + types::{PyCapsule, PyDict}, +}; use raphtory_api::core::entities::properties::prop::Prop; use std::collections::HashMap; -use crate::io::arrow::df_loaders::load_edges_from_df; pub(crate) fn load_edges_from_arrow< 'py, @@ -35,7 +44,115 @@ pub(crate) fn load_edges_from_arrow< let df_view = process_arrow_py_df(df, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; - load_edges_from_df(df_view, time, src, dst, properties, metadata, shared_metadata, layer, layer_col, graph) + load_edges_from_df( + df_view, + time, + src, + dst, + properties, + metadata, + shared_metadata, + layer, + layer_col, + graph, + ) +} + +pub(crate) fn load_edges_from_arrow_streaming< + 'py, + G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, +>( + graph: &G, + df: &Bound<'py, PyAny>, + time: &str, + src: &str, + dst: &str, + properties: &[&str], + metadata: &[&str], + shared_metadata: Option<&HashMap>, + layer: Option<&str>, + layer_col: Option<&str>, +) -> Result<(), GraphError> { + let mut cols_to_check = vec![src, dst, time]; + cols_to_check.extend_from_slice(properties); + cols_to_check.extend_from_slice(metadata); + if let Some(layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); + } + + let df_view = process_arrow_py_df_streaming(df, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; + load_edges_from_df( + df_view, + time, + src, + dst, + properties, + metadata, + shared_metadata, + layer, + layer_col, + graph, + ) +} + +pub(crate) fn process_arrow_py_df_streaming<'a>( + df: &Bound<'a, PyAny>, + col_names: Vec<&str>, +) -> PyResult> + 'a>> { + let py = df.py(); + is_jupyter(py); + + // Expect an object that can use the Arrow C Stream interface + if !df.hasattr("__arrow_c_stream__")? { + return Err(GraphError::LoadFailure( + "arrow object must implement __arrow_c_stream__", + )); + } + + let stream_capsule_any = df.call_method0("__arrow_c_stream__")?; + let stream_capsule = stream_capsule_any.downcast::()?; + + // We need to use the pointer to build an ArrowArrayStreamReader + if !stream_capsule.is_valid() { + return Err(GraphError::LoadFailure("Stream capsule is not valid")); + } + let stream_ptr = stream_capsule.pointer() as *mut FFI_ArrowArrayStream; + let reader = unsafe { ArrowArrayStreamReader::from_raw(stream_ptr) }.map_err(|e| { + GraphError::LoadFailure(format!( + "Arrow stream error while creating the reader: {}", + e.to_string() + )) + })?; + + // Get column names and indices once only + let schema = reader.schema(); + let mut names: Vec = Vec::with_capacity(col_names.len()); + let mut indices: Vec = Vec::with_capacity(col_names.len()); + + for (idx, field) in schema.fields().iter().enumerate() { + if col_names.contains(&field.name().as_str()) { + names.push(field.name().clone()); + indices.push(idx); + } + } + + let chunks = reader.into_iter().map(move |batch_res| { + let batch = batch_res.map_err(|e| { + GraphError::LoadFailure(format!( + "Arrow stream error while reading a batch: {}", + e.to_string() + )) + })?; + let chunk_arrays = indices + .iter() + .map(|&idx| batch.column(idx).clone()) + .collect::>(); + Ok(DFChunk::new(chunk_arrays)) + }); + + let num_rows: usize = df.call_method0("__len__")?.extract()?; + Ok(DFView::new(names, chunks, num_rows)) } pub(crate) fn process_arrow_py_df<'a>( From dc5635d69219c9d3b34533e8b22aa0a49f78328f Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Tue, 18 Nov 2025 01:34:01 -0500 Subject: [PATCH 04/55] Added loading of edges from DuckDB, either normally or using streaming. --- python/python/raphtory/__init__.pyi | 6 ++ raphtory/src/python/graph/graph.rs | 69 +++++++++++++++++++ raphtory/src/python/graph/io/arrow_loaders.rs | 24 +++---- 3 files changed, 86 insertions(+), 13 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index d9cc72e39b..ed4492bad0 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1135,6 +1135,12 @@ class Graph(GraphView): def load_edges_from_arrow_streaming(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): ... + def load_edges_from_duckdb(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): + ... + + def load_edges_from_duckdb_streaming(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): + ... + def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges from a Pandas DataFrame into the graph. diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 438b4e9e56..69ad012118 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -876,6 +876,75 @@ impl PyGraph { ) } + #[pyo3( + signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + )] + fn load_edges_from_duckdb( + &self, + df: &Bound, + time: &str, + src: &str, + dst: &str, + properties: Option>, + metadata: Option>, + shared_metadata: Option>, + layer: Option<&str>, + layer_col: Option<&str>, + ) -> Result<(), GraphError> { + let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + + // Call fetch_arrow_table() so we can use arrow ingestion pathway + let arrow_df = df.call_method0("fetch_arrow_table").map_err(|e| { + GraphError::LoadFailure("Failed calling fetch_arrow_table() on the DuckDB instance".to_string()) + })?; + + load_edges_from_arrow( + &self.graph, + &arrow_df, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + ) + } + + #[pyo3( + signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + )] + fn load_edges_from_duckdb_streaming( + &self, + df: &Bound, + time: &str, + src: &str, + dst: &str, + properties: Option>, + metadata: Option>, + shared_metadata: Option>, + layer: Option<&str>, + layer_col: Option<&str>, + ) -> Result<(), GraphError> { + let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + + load_edges_from_arrow_streaming( + &self.graph, + df, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + ) + } + /// Load edges from a Parquet file into the graph. /// /// Arguments: diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 0dacae0d0d..1e180753df 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -9,16 +9,14 @@ use crate::{ python::graph::io::pandas_loaders::{array_to_rust, is_jupyter}, serialise::incremental::InternalCache, }; -use arrow::array::{ - ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream}, - RecordBatchReader, -}; +use arrow::array::{ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream}, RecordBatch, RecordBatchReader}; use pyo3::{ prelude::*, types::{PyCapsule, PyDict}, }; use raphtory_api::core::entities::properties::prop::Prop; use std::collections::HashMap; +use arrow::datatypes::SchemaRef; pub(crate) fn load_edges_from_arrow< 'py, @@ -105,20 +103,20 @@ pub(crate) fn process_arrow_py_df_streaming<'a>( // Expect an object that can use the Arrow C Stream interface if !df.hasattr("__arrow_c_stream__")? { - return Err(GraphError::LoadFailure( - "arrow object must implement __arrow_c_stream__", - )); + return Err(PyErr::from(GraphError::LoadFailure( + "arrow object must implement __arrow_c_stream__".to_string(), + ))); } - let stream_capsule_any = df.call_method0("__arrow_c_stream__")?; - let stream_capsule = stream_capsule_any.downcast::()?; + let stream_capsule_any: Bound<'a, PyAny> = df.call_method0("__arrow_c_stream__")?; + let stream_capsule: &Bound<'a, PyCapsule> = stream_capsule_any.downcast::()?; // We need to use the pointer to build an ArrowArrayStreamReader if !stream_capsule.is_valid() { - return Err(GraphError::LoadFailure("Stream capsule is not valid")); + return Err(PyErr::from(GraphError::LoadFailure("Stream capsule is not valid".to_string()))); } let stream_ptr = stream_capsule.pointer() as *mut FFI_ArrowArrayStream; - let reader = unsafe { ArrowArrayStreamReader::from_raw(stream_ptr) }.map_err(|e| { + let reader: ArrowArrayStreamReader = unsafe { ArrowArrayStreamReader::from_raw(stream_ptr) }.map_err(|e| { GraphError::LoadFailure(format!( "Arrow stream error while creating the reader: {}", e.to_string() @@ -126,7 +124,7 @@ pub(crate) fn process_arrow_py_df_streaming<'a>( })?; // Get column names and indices once only - let schema = reader.schema(); + let schema: SchemaRef = reader.schema(); let mut names: Vec = Vec::with_capacity(col_names.len()); let mut indices: Vec = Vec::with_capacity(col_names.len()); @@ -137,7 +135,7 @@ pub(crate) fn process_arrow_py_df_streaming<'a>( } } - let chunks = reader.into_iter().map(move |batch_res| { + let chunks = reader.into_iter().map(move |batch_res: Result| { let batch = batch_res.map_err(|e| { GraphError::LoadFailure(format!( "Arrow stream error while reading a batch: {}", From 7e11387af107d31582caef4057c21c8ba49128c0 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Tue, 18 Nov 2025 16:20:40 -0500 Subject: [PATCH 05/55] Added loading edges from fireducks.pandas dataframes. General cleaning up. Committing benchmarks and tests that check graph equality when using different ingestion pathways. --- dataset_tests/flatten_btc_datasets.py | 38 ++++ dataset_tests/ingestion_benchmarks.py | 209 ++++++++++++++++++ .../ingestion_equivalence_assertions.py | 66 ++++++ python/python/raphtory/__init__.pyi | 3 + raphtory/src/python/graph/graph.rs | 41 +++- raphtory/src/python/graph/io/arrow_loaders.rs | 54 +++-- 6 files changed, 388 insertions(+), 23 deletions(-) create mode 100644 dataset_tests/flatten_btc_datasets.py create mode 100644 dataset_tests/ingestion_benchmarks.py create mode 100644 dataset_tests/ingestion_equivalence_assertions.py diff --git a/dataset_tests/flatten_btc_datasets.py b/dataset_tests/flatten_btc_datasets.py new file mode 100644 index 0000000000..0254c0722f --- /dev/null +++ b/dataset_tests/flatten_btc_datasets.py @@ -0,0 +1,38 @@ +from pathlib import Path +import pandas as pd + +FLATTENED_FILE = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/flattened_data.parquet" +DATASET_DIR = "/Users/arien/Downloads" + +def flatten_dataframes_append(): + dfs = [] + flattened_file = Path(FLATTENED_FILE) + dataset_dir = Path(DATASET_DIR) + + if flattened_file.exists(): + dfs.append(pd.read_parquet(flattened_file)) + + def get_addr(v): + if v is not None: + return v[0]["address"] + files = list(dataset_dir.glob("*.snappy.parquet")) + num_files = len(files) + for i in range(num_files): + fp = files[i] + print(f"Processing file {i}/{num_files}: {fp}") + df = pd.read_parquet(fp) + df = pd.DataFrame({ + "block_timestamp": df["block_timestamp"], + "inputs_address": df["inputs"].apply(get_addr), + "outputs_address": df["outputs"].apply(get_addr), + }) + df = df.dropna(subset=["block_timestamp", "inputs_address", "outputs_address"]) + dfs.append(df) + + out = pd.concat(dfs, ignore_index=True) + print(f"Total: {len(out)} rows") + out.to_parquet(FLATTENED_FILE, index=False, compression="snappy") + + +if __name__ == "__main__": + flatten_dataframes_append() \ No newline at end of file diff --git a/dataset_tests/ingestion_benchmarks.py b/dataset_tests/ingestion_benchmarks.py new file mode 100644 index 0000000000..7b117b8625 --- /dev/null +++ b/dataset_tests/ingestion_benchmarks.py @@ -0,0 +1,209 @@ +import gc +import time + +import pandas as pd +import polars as pl +import duckdb +import fireducks.pandas as fpd + +from raphtory import Graph + +FLATTENED_FILE = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/flattened_data.parquet" + +def bench_pandas(df: pd.DataFrame) -> float: + g = Graph() + start = time.perf_counter() + g.load_edges_from_pandas(df=df, time="block_timestamp", src="inputs_address", dst="outputs_address") + total = time.perf_counter() - start + print(f"[pandas] ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") + del g + gc.collect() + return total + +def bench_fire_ducks_pandas(df: fpd.frame.DataFrame) -> float: + assert "fireducks.pandas.frame.DataFrame" in str(type(df)) + g = Graph() + start = time.perf_counter() + g.load_edges_from_fireducks(df=df, time="block_timestamp", src="inputs_address", dst="outputs_address") + total = time.perf_counter() - start + print(f"[fireducks] ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") + del g + gc.collect() + return total + +def bench_polars_to_pandas(df: pl.DataFrame, use_pyarrow: bool) -> float: + g = Graph() + start = time.perf_counter() + df_pd_from_pl = df.to_pandas(use_pyarrow_extension_array=use_pyarrow) + mid = time.perf_counter() + g.load_edges_from_pandas(df=df_pd_from_pl, time="block_timestamp", src="inputs_address", dst="outputs_address") + end = time.perf_counter() + convert_time = mid - start + ingestion_time = end - mid + total_time = end - start + print( + f"[polars->pandas] convert use_pyarrow_extension_array={use_pyarrow} {convert_time:.3f}s, ingest {ingestion_time:.3f}s, " + f"total {total_time:.3f}s;" + ) + del g, df_pd_from_pl + gc.collect() + return total_time + +def bench_polars_native(df: pl.DataFrame) -> float: + g = Graph() + start = time.perf_counter() + g.load_edges_from_polars(df=df, time="block_timestamp", src="inputs_address", dst="outputs_address") + total = time.perf_counter() - start + print( + f"[polars native] ingestion took {total:.3f}s" + ) + del g + gc.collect() + return total + +def bench_polars_to_arrow(df: pl.DataFrame) -> float: + g = Graph() + start = time.perf_counter() + df_arrow_from_pl = df.to_arrow() + mid = time.perf_counter() + g.load_edges_from_arrow(df=df_arrow_from_pl, time="block_timestamp", src="inputs_address", dst="outputs_address") + end = time.perf_counter() + convert_time = mid - start + ingestion_time = end - mid + total_time = end - start + print( + f"[polars->arrow] convert {convert_time:.3f}s, ingest {ingestion_time:.3f}s, " + f"total {total_time:.3f}s;" + ) + del g, df_arrow_from_pl + gc.collect() + return total_time + +def bench_polars_to_arrow_streaming(df: pl.DataFrame) -> float: + g = Graph() + start = time.perf_counter() + df_arrow_from_pl = df.to_arrow() + mid = time.perf_counter() + g.load_edges_from_arrow_streaming(df=df_arrow_from_pl, time="block_timestamp", src="inputs_address", dst="outputs_address") + end = time.perf_counter() + convert_time = mid - start + ingestion_time = end - mid + total_time = end - start + print( + f"[polars->arrow] with streaming convert {convert_time:.3f}s, ingest {ingestion_time:.3f}s, " + f"total {total_time:.3f}s;" + ) + del g, df_arrow_from_pl + gc.collect() + return total_time + +def bench_duckdb(df: pl.DataFrame) -> float: + g = Graph() + df_arrow_from_pl = df.to_arrow() + start = time.perf_counter() + duckdb_df = duckdb.sql("SELECT * FROM df_arrow_from_pl") + mid = time.perf_counter() + # internally calls fetch_arrow_table() on duckdb_df + g.load_edges_from_duckdb(df=duckdb_df, time="block_timestamp", src="inputs_address", dst="outputs_address") + end = time.perf_counter() + load_time = mid - start + ingestion_time = end - mid + total_time = end - start + print( + f"[polars->duckdb] load {load_time:.3f}s, ingest {ingestion_time:.3f}s, " + f"total {total_time:.3f}s;" + ) + del g, df_arrow_from_pl, duckdb_df + gc.collect() + return total_time + +def bench_duckdb_streaming(df: pl.DataFrame) -> float: + g = Graph() + df_arrow_from_pl = df.to_arrow() + start = time.perf_counter() + duckdb_df = duckdb.sql("SELECT * FROM df_arrow_from_pl") + mid = time.perf_counter() + # uses the __arrow_c_stream__() interface internally + g.load_edges_from_duckdb_streaming(df=duckdb_df, time="block_timestamp", src="inputs_address", dst="outputs_address") + end = time.perf_counter() + load_time = mid - start + ingestion_time = end - mid + total_time = end - start + print( + f"[polars->duckdb] streaming load {load_time:.3f}s, ingest {ingestion_time:.3f}s, " + f"total {total_time:.3f}s;" + ) + del g, df_arrow_from_pl, duckdb_df + gc.collect() + return total_time + + +def ingestion_speed_btc_dataset(): + df_pd: pd.DataFrame = pd.read_parquet(FLATTENED_FILE) + df_fireducks: fpd.frame.DataFrame = fpd.read_parquet(FLATTENED_FILE) + df_pl: pl.DataFrame = pl.read_parquet(FLATTENED_FILE) + + pandas_ingestion_times = [] + fireducks_ingestion_times = [] + pl_native_total_times = [] + pl_to_arrow_total_times = [] + pl_to_arrow_streaming_total_times = [] + duckdb_ingestion_times = [] + duckdb_streaming_ingestion_times = [] + + for _ in range(5): + # 1) Pandas ingestion + pandas_time = bench_pandas(df_pd) + pandas_ingestion_times.append(pandas_time) + gc.collect() + + # 2) Fireducks Pandas ingestion + fpd_time = bench_fire_ducks_pandas(df_fireducks) + fireducks_ingestion_times.append(fpd_time) + gc.collect() + + # 3) to_pandas() called within rust + polars_native_time = bench_polars_native(df=df_pl) + pl_native_total_times.append(polars_native_time) + gc.collect() + + # 4) Arrow ingestion + arrow_time = bench_polars_to_arrow(df_pl) + pl_to_arrow_total_times.append(arrow_time) + gc.collect() + + # 5) Arrow ingestion streaming + arrow_streaming_time = bench_polars_to_arrow_streaming(df_pl) + pl_to_arrow_streaming_total_times.append(arrow_streaming_time) + gc.collect() + + # 6) DuckDB ingestion + duckdb_time = bench_duckdb(df_pl) + duckdb_ingestion_times.append(duckdb_time) + gc.collect() + + # 7) DuckDB streaming ingestion + duckdb_streaming_time = bench_duckdb_streaming(df_pl) + duckdb_streaming_ingestion_times.append(duckdb_streaming_time) + gc.collect() + + + formatted_pandas = [f"{num:.3f}s" for num in pandas_ingestion_times] + formatted_fireducks = [f"{num:.3f}s" for num in fireducks_ingestion_times] + formatted_pl_native = [f"{num:.3f}s" for num in pl_native_total_times] + formatted_pl_to_arrow = [f"{num:.3f}s" for num in pl_to_arrow_total_times] + formatted_pl_to_arrow_streaming = [f"{num:.3f}s" for num in pl_to_arrow_streaming_total_times] + formatted_duckdb_time = [f"{num:.3f}s" for num in duckdb_ingestion_times] + formatted_duckdb_streaming_time = [f"{num:.3f}s" for num in duckdb_streaming_ingestion_times] + + print(f"Pandas:\t\t\t\t\t{formatted_pandas}") + print(f"Fireducks:\t\t\t\t{formatted_fireducks}") + print(f"Polars native:\t\t\t{formatted_pl_native}") + print(f"Load from arrow:\t\t{formatted_pl_to_arrow}") + print(f"Arrow with streaming:\t{formatted_pl_to_arrow_streaming}") + print(f"Load from duckdb:\t\t{formatted_duckdb_time}") + print(f"Duckdb with streaming:\t{formatted_duckdb_streaming_time}") + + +if __name__ == "__main__": + ingestion_speed_btc_dataset() diff --git a/dataset_tests/ingestion_equivalence_assertions.py b/dataset_tests/ingestion_equivalence_assertions.py new file mode 100644 index 0000000000..2e16a2426a --- /dev/null +++ b/dataset_tests/ingestion_equivalence_assertions.py @@ -0,0 +1,66 @@ +import gc + +import duckdb + +from raphtory import Graph +import pandas as pd +import polars as pl +import fireducks.pandas as fpd + +FLATTENED_FILE = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/flattened_data.parquet" + +if __name__ == "__main__": + df_pd = pd.read_parquet(FLATTENED_FILE) + g_pandas = Graph() + g_pandas.load_edges_from_pandas( + df=df_pd, time="block_timestamp", src="inputs_address", dst="outputs_address" + ) + + df_fireducks: fpd.frame.DataFrame = fpd.read_parquet(FLATTENED_FILE) + g_fireducks = Graph() + g_fireducks.load_edges_from_fireducks(df=df_fireducks, time="block_timestamp", src="inputs_address", dst="outputs_address") + print("Checking equality...") + assert g_pandas == g_fireducks + print("g_pandas == g_fireducks") + del df_fireducks, g_fireducks + gc.collect() + + df_pl = pl.read_parquet(FLATTENED_FILE) + g_polars = Graph() + g_polars.load_edges_from_polars( + df=df_pl, time="block_timestamp", src="inputs_address", dst="outputs_address" + ) + + print("Checking equality...") + assert g_pandas == g_polars + print("g_pandas == g_polars") + + df_pl_arrow = df_pl.to_arrow() + g_polars_arrow = Graph() + g_polars_arrow.load_edges_from_arrow(df=df_pl_arrow, time="block_timestamp", src="inputs_address", dst="outputs_address") + print("Checking equality...") + assert g_pandas == g_polars_arrow + print("g_pandas == g_polars_arrow") + del g_polars_arrow + gc.collect() + + g_polars_arrow_streaming = Graph() + g_polars_arrow_streaming.load_edges_from_arrow_streaming(df=df_pl_arrow, time="block_timestamp", src="inputs_address", dst="outputs_address") + print("Checking equality...") + assert g_pandas == g_polars_arrow_streaming + print("g_pandas == g_polars_arrow_streaming") + del g_polars_arrow_streaming + gc.collect() + + g_duckdb = Graph() + duckdb_results = duckdb.sql("SELECT * FROM df_pl_arrow") + g_duckdb.load_edges_from_duckdb(df=duckdb_results, time="block_timestamp", src="inputs_address", dst="outputs_address") + print("Checking equality...") + assert g_pandas == g_duckdb + print("g_pandas == g_duckdb") + + g_duckdb_streaming = Graph() + g_duckdb_streaming.load_edges_from_duckdb_streaming(df=duckdb_results, time="block_timestamp", src="inputs_address", dst="outputs_address") + print("Checking equality...") + assert g_pandas == g_duckdb_streaming + print("g_pandas == g_duckdb_streaming") \ No newline at end of file diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index ed4492bad0..4a54dec171 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1141,6 +1141,9 @@ class Graph(GraphView): def load_edges_from_duckdb_streaming(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): ... + def load_edges_from_fireducks(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): + ... + def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges from a Pandas DataFrame into the graph. diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 69ad012118..27d93eb250 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -770,6 +770,43 @@ impl PyGraph { ) } + #[pyo3(signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None))] + fn load_edges_from_fireducks( + &self, + df: &Bound, + time: &str, + src: &str, + dst: &str, + properties: Option>, + metadata: Option>, + shared_metadata: Option>, + layer: Option<&str>, + layer_col: Option<&str>, + ) -> Result<(), GraphError> { + let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + + // Convert Fireducks DataFrame to pandas.DataFrame + let pandas_df = df.call_method0("to_pandas").map_err(|e| { + GraphError::LoadFailure(format!( + "Failed converting Fireducks DataFrame to pandas via to_pandas: {e}" + )) + })?; + + load_edges_from_pandas( + &self.graph, + &pandas_df, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + ) + } + #[pyo3(signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None))] fn load_edges_from_polars( &self, @@ -896,7 +933,9 @@ impl PyGraph { // Call fetch_arrow_table() so we can use arrow ingestion pathway let arrow_df = df.call_method0("fetch_arrow_table").map_err(|e| { - GraphError::LoadFailure("Failed calling fetch_arrow_table() on the DuckDB instance".to_string()) + GraphError::LoadFailure( + "Failed calling fetch_arrow_table() on the DuckDB instance".to_string(), + ) })?; load_edges_from_arrow( diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 1e180753df..bfc535c708 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -9,14 +9,19 @@ use crate::{ python::graph::io::pandas_loaders::{array_to_rust, is_jupyter}, serialise::incremental::InternalCache, }; -use arrow::array::{ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream}, RecordBatch, RecordBatchReader}; +use arrow::{ + array::{ + ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream}, + RecordBatch, RecordBatchReader, + }, + datatypes::SchemaRef, +}; use pyo3::{ prelude::*, types::{PyCapsule, PyDict}, }; use raphtory_api::core::entities::properties::prop::Prop; use std::collections::HashMap; -use arrow::datatypes::SchemaRef; pub(crate) fn load_edges_from_arrow< 'py, @@ -113,15 +118,18 @@ pub(crate) fn process_arrow_py_df_streaming<'a>( // We need to use the pointer to build an ArrowArrayStreamReader if !stream_capsule.is_valid() { - return Err(PyErr::from(GraphError::LoadFailure("Stream capsule is not valid".to_string()))); + return Err(PyErr::from(GraphError::LoadFailure( + "Stream capsule is not valid".to_string(), + ))); } let stream_ptr = stream_capsule.pointer() as *mut FFI_ArrowArrayStream; - let reader: ArrowArrayStreamReader = unsafe { ArrowArrayStreamReader::from_raw(stream_ptr) }.map_err(|e| { - GraphError::LoadFailure(format!( - "Arrow stream error while creating the reader: {}", - e.to_string() - )) - })?; + let reader: ArrowArrayStreamReader = unsafe { ArrowArrayStreamReader::from_raw(stream_ptr) } + .map_err(|e| { + GraphError::LoadFailure(format!( + "Arrow stream error while creating the reader: {}", + e.to_string() + )) + })?; // Get column names and indices once only let schema: SchemaRef = reader.schema(); @@ -135,19 +143,21 @@ pub(crate) fn process_arrow_py_df_streaming<'a>( } } - let chunks = reader.into_iter().map(move |batch_res: Result| { - let batch = batch_res.map_err(|e| { - GraphError::LoadFailure(format!( - "Arrow stream error while reading a batch: {}", - e.to_string() - )) - })?; - let chunk_arrays = indices - .iter() - .map(|&idx| batch.column(idx).clone()) - .collect::>(); - Ok(DFChunk::new(chunk_arrays)) - }); + let chunks = reader + .into_iter() + .map(move |batch_res: Result| { + let batch = batch_res.map_err(|e| { + GraphError::LoadFailure(format!( + "Arrow stream error while reading a batch: {}", + e.to_string() + )) + })?; + let chunk_arrays = indices + .iter() + .map(|&idx| batch.column(idx).clone()) + .collect::>(); + Ok(DFChunk::new(chunk_arrays)) + }); let num_rows: usize = df.call_method0("__len__")?.extract()?; Ok(DFView::new(names, chunks, num_rows)) From 2b58f185bd73f90b6770843f608cd5ecb151ca59 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Wed, 19 Nov 2025 11:47:17 -0500 Subject: [PATCH 06/55] Adding flag to stream/not stream data in load_* functions. Will get rid of them and always stream. Added benchmark for loading from fireducks. --- dataset_tests/ingestion_benchmarks.py | 23 +- python/python/raphtory/__init__.pyi | 14 +- raphtory/src/python/graph/graph.rs | 212 ++++++++---------- raphtory/src/python/graph/io/arrow_loaders.rs | 85 +++---- 4 files changed, 139 insertions(+), 195 deletions(-) diff --git a/dataset_tests/ingestion_benchmarks.py b/dataset_tests/ingestion_benchmarks.py index 7b117b8625..bf40f9fd3e 100644 --- a/dataset_tests/ingestion_benchmarks.py +++ b/dataset_tests/ingestion_benchmarks.py @@ -31,23 +31,16 @@ def bench_fire_ducks_pandas(df: fpd.frame.DataFrame) -> float: gc.collect() return total -def bench_polars_to_pandas(df: pl.DataFrame, use_pyarrow: bool) -> float: +def bench_fire_ducks_pandas_streaming(df: fpd.frame.DataFrame) -> float: + assert "fireducks.pandas.frame.DataFrame" in str(type(df)) g = Graph() start = time.perf_counter() - df_pd_from_pl = df.to_pandas(use_pyarrow_extension_array=use_pyarrow) - mid = time.perf_counter() - g.load_edges_from_pandas(df=df_pd_from_pl, time="block_timestamp", src="inputs_address", dst="outputs_address") - end = time.perf_counter() - convert_time = mid - start - ingestion_time = end - mid - total_time = end - start - print( - f"[polars->pandas] convert use_pyarrow_extension_array={use_pyarrow} {convert_time:.3f}s, ingest {ingestion_time:.3f}s, " - f"total {total_time:.3f}s;" - ) - del g, df_pd_from_pl + g.load_edges_from_fireducks(df=df, time="block_timestamp", src="inputs_address", dst="outputs_address") + total = time.perf_counter() - start + print(f"[fireducks] streaming ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") + del g gc.collect() - return total_time + return total def bench_polars_native(df: pl.DataFrame) -> float: g = Graph() @@ -206,4 +199,4 @@ def ingestion_speed_btc_dataset(): if __name__ == "__main__": - ingestion_speed_btc_dataset() + ingestion_speed_btc_dataset() \ No newline at end of file diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 4a54dec171..0faa55fa94 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1129,19 +1129,13 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_arrow(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): + def load_edges_from_arrow(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None, stream_data=False): ... - def load_edges_from_arrow_streaming(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): + def load_edges_from_duckdb(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None, stream_data=False): ... - def load_edges_from_duckdb(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): - ... - - def load_edges_from_duckdb_streaming(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): - ... - - def load_edges_from_fireducks(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): + def load_edges_from_fireducks(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None, stream_data=False): ... def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: @@ -1188,7 +1182,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_polars(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None): + def load_edges_from_polars(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None, stream_data=False): ... @staticmethod diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 27d93eb250..31aa8bc451 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -17,10 +17,7 @@ use crate::{ edge::PyEdge, graph_with_deletions::PyPersistentGraph, index::PyIndexSpec, - io::{ - arrow_loaders::{load_edges_from_arrow, load_edges_from_arrow_streaming}, - pandas_loaders::*, - }, + io::{arrow_loaders::load_edges_from_arrow, pandas_loaders::*}, node::PyNode, views::graph_view::PyGraphView, }, @@ -770,7 +767,7 @@ impl PyGraph { ) } - #[pyo3(signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None))] + #[pyo3(signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, stream_data = false))] fn load_edges_from_fireducks( &self, df: &Bound, @@ -782,32 +779,49 @@ impl PyGraph { shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + stream_data: bool, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - // Convert Fireducks DataFrame to pandas.DataFrame - let pandas_df = df.call_method0("to_pandas").map_err(|e| { - GraphError::LoadFailure(format!( - "Failed converting Fireducks DataFrame to pandas via to_pandas: {e}" - )) - })?; + if stream_data { + load_edges_from_arrow( + &self.graph, + &df, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + true, + ) + } else { + // Convert Fireducks DataFrame to pandas.DataFrame + let pandas_df = df.call_method0("to_pandas").map_err(|e| { + GraphError::LoadFailure(format!( + "Failed converting Fireducks DataFrame to pandas via to_pandas: {e}" + )) + })?; - load_edges_from_pandas( - &self.graph, - &pandas_df, - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - ) + load_edges_from_pandas( + &self.graph, + &pandas_df, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + ) + } } - #[pyo3(signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None))] + #[pyo3(signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, stream_data = false))] fn load_edges_from_polars( &self, df: &Bound, @@ -819,40 +833,57 @@ impl PyGraph { shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + stream_data: bool, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - // Convert Polars DataFrame to pandas.DataFrame - let kwargs = PyDict::new(df.py()); - kwargs - .set_item("use_pyarrow_extension_array", true) - .map_err(|e| { - GraphError::LoadFailure(format!("Failed setting kwargs for to_pandas(): {e}")) + if stream_data { + load_edges_from_arrow( + &self.graph, + &df, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + true, + ) + } else { + // Convert Polars DataFrame to pandas.DataFrame + let kwargs = PyDict::new(df.py()); + kwargs + .set_item("use_pyarrow_extension_array", true) + .map_err(|e| { + GraphError::LoadFailure(format!("Failed setting kwargs for to_pandas(): {e}")) + })?; + + let pandas_df = df.call_method("to_pandas", (), Some(&kwargs)).map_err(|e| { + GraphError::LoadFailure(format!( + "Failed converting Polars DataFrame to pandas via to_pandas(use_pyarrow_extension_array=True): {e}" + )) })?; - let pandas_df = df.call_method("to_pandas", (), Some(&kwargs)).map_err(|e| { - GraphError::LoadFailure(format!( - "Failed converting Polars DataFrame to pandas via to_pandas(use_pyarrow_extension_array=True): {e}" - )) - })?; - - load_edges_from_pandas( - &self.graph, - &pandas_df, - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - ) + load_edges_from_pandas( + &self.graph, + &pandas_df, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + ) + } } #[pyo3( - signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, stream_data = false) )] fn load_edges_from_arrow( &self, @@ -865,6 +896,7 @@ impl PyGraph { shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + stream_data: bool, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -879,42 +911,12 @@ impl PyGraph { shared_metadata.as_ref(), layer, layer_col, + stream_data, ) } #[pyo3( - signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) - )] - fn load_edges_from_arrow_streaming( - &self, - df: &Bound, - time: &str, - src: &str, - dst: &str, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - layer: Option<&str>, - layer_col: Option<&str>, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edges_from_arrow_streaming( - &self.graph, - df, - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - ) - } - - #[pyo3( - signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, stream_data = false) )] fn load_edges_from_duckdb( &self, @@ -927,50 +929,23 @@ impl PyGraph { shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + stream_data: bool, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); // Call fetch_arrow_table() so we can use arrow ingestion pathway - let arrow_df = df.call_method0("fetch_arrow_table").map_err(|e| { - GraphError::LoadFailure( - "Failed calling fetch_arrow_table() on the DuckDB instance".to_string(), - ) - })?; + let df = if stream_data { + df + } else { + &df.call_method0("fetch_arrow_table").map_err(|e| { + GraphError::LoadFailure( + "Failed calling fetch_arrow_table() on the DuckDB instance".to_string(), + ) + })? + }; load_edges_from_arrow( - &self.graph, - &arrow_df, - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - ) - } - - #[pyo3( - signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) - )] - fn load_edges_from_duckdb_streaming( - &self, - df: &Bound, - time: &str, - src: &str, - dst: &str, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - layer: Option<&str>, - layer_col: Option<&str>, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - - load_edges_from_arrow_streaming( &self.graph, df, time, @@ -981,6 +956,7 @@ impl PyGraph { shared_metadata.as_ref(), layer, layer_col, + stream_data, ) } diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index bfc535c708..08fcad1055 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -37,6 +37,7 @@ pub(crate) fn load_edges_from_arrow< shared_metadata: Option<&HashMap>, layer: Option<&str>, layer_col: Option<&str>, + stream_data: bool, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst, time]; cols_to_check.extend_from_slice(properties); @@ -45,61 +46,41 @@ pub(crate) fn load_edges_from_arrow< cols_to_check.push(layer_col.as_ref()); } - let df_view = process_arrow_py_df(df, cols_to_check.clone())?; - df_view.check_cols_exist(&cols_to_check)?; - load_edges_from_df( - df_view, - time, - src, - dst, - properties, - metadata, - shared_metadata, - layer, - layer_col, - graph, - ) -} - -pub(crate) fn load_edges_from_arrow_streaming< - 'py, - G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, ->( - graph: &G, - df: &Bound<'py, PyAny>, - time: &str, - src: &str, - dst: &str, - properties: &[&str], - metadata: &[&str], - shared_metadata: Option<&HashMap>, - layer: Option<&str>, - layer_col: Option<&str>, -) -> Result<(), GraphError> { - let mut cols_to_check = vec![src, dst, time]; - cols_to_check.extend_from_slice(properties); - cols_to_check.extend_from_slice(metadata); - if let Some(layer_col) = layer_col { - cols_to_check.push(layer_col.as_ref()); + if stream_data { + let df_view = process_arrow_c_stream_df(df, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; + load_edges_from_df( + df_view, + time, + src, + dst, + properties, + metadata, + shared_metadata, + layer, + layer_col, + graph, + ) + } else { + let df_view = process_arrow_py_df(df, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; + load_edges_from_df( + df_view, + time, + src, + dst, + properties, + metadata, + shared_metadata, + layer, + layer_col, + graph, + ) } - - let df_view = process_arrow_py_df_streaming(df, cols_to_check.clone())?; - df_view.check_cols_exist(&cols_to_check)?; - load_edges_from_df( - df_view, - time, - src, - dst, - properties, - metadata, - shared_metadata, - layer, - layer_col, - graph, - ) } -pub(crate) fn process_arrow_py_df_streaming<'a>( +/// Can handle any object that provides the \_\_arrow_c_stream__() interface and \_\_len__() function +pub(crate) fn process_arrow_c_stream_df<'a>( df: &Bound<'a, PyAny>, col_names: Vec<&str>, ) -> PyResult> + 'a>> { From eedc1a8f8dd4b4e6003159c4bc44b98c6c155dce Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 21 Nov 2025 04:21:22 -0500 Subject: [PATCH 07/55] Added functions for load_nodes, load_node_props, load_edges, load_edge_props, that all use the __arrow_c_stream__() interface. If a data source is passed with no __len__ function, we calculate the len ourselves. Updated ingestion benchmarks to also test pandas_streaming, fireducks_streaming, polars_streaming --- dataset_tests/ingestion_benchmarks.py | 175 ++++++++------- python/python/raphtory/__init__.pyi | 106 +++++++++ raphtory/src/python/graph/graph.rs | 209 +++++++++++++++++- raphtory/src/python/graph/io/arrow_loaders.rs | 145 +++++++++++- 4 files changed, 551 insertions(+), 84 deletions(-) diff --git a/dataset_tests/ingestion_benchmarks.py b/dataset_tests/ingestion_benchmarks.py index bf40f9fd3e..727eb44480 100644 --- a/dataset_tests/ingestion_benchmarks.py +++ b/dataset_tests/ingestion_benchmarks.py @@ -15,7 +15,17 @@ def bench_pandas(df: pd.DataFrame) -> float: start = time.perf_counter() g.load_edges_from_pandas(df=df, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start - print(f"[pandas] ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") + print(f"[pandas] ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") + del g + gc.collect() + return total + +def bench_pandas_streaming(df: pd.DataFrame) -> float: + g = Graph() + start = time.perf_counter() + g.load_edges(data_source=df, time="block_timestamp", src="inputs_address", dst="outputs_address") + total = time.perf_counter() - start + print(f"[pandas streaming] ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") del g gc.collect() return total @@ -26,7 +36,7 @@ def bench_fire_ducks_pandas(df: fpd.frame.DataFrame) -> float: start = time.perf_counter() g.load_edges_from_fireducks(df=df, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start - print(f"[fireducks] ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") + print(f"[fireducks] ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") del g gc.collect() return total @@ -35,100 +45,92 @@ def bench_fire_ducks_pandas_streaming(df: fpd.frame.DataFrame) -> float: assert "fireducks.pandas.frame.DataFrame" in str(type(df)) g = Graph() start = time.perf_counter() - g.load_edges_from_fireducks(df=df, time="block_timestamp", src="inputs_address", dst="outputs_address") + g.load_edges(data_source=df, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start - print(f"[fireducks] streaming ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") + print(f"[fireducks streaming] ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") del g gc.collect() return total -def bench_polars_native(df: pl.DataFrame) -> float: +def bench_polars(df: pl.DataFrame) -> float: g = Graph() start = time.perf_counter() g.load_edges_from_polars(df=df, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start print( - f"[polars native] ingestion took {total:.3f}s" + f"[polars] ingestion took {total:.3f}s" ) del g gc.collect() return total -def bench_polars_to_arrow(df: pl.DataFrame) -> float: +def bench_polars_streaming(df: pl.DataFrame) -> float: g = Graph() start = time.perf_counter() + g.load_edges(data_source=df, time="block_timestamp", src="inputs_address", dst="outputs_address") + total = time.perf_counter() - start + print( + f"[polars streaming] ingestion took {total:.3f}s" + ) + del g + gc.collect() + return total + +def bench_arrow(df: pl.DataFrame) -> float: + g = Graph() df_arrow_from_pl = df.to_arrow() - mid = time.perf_counter() + start = time.perf_counter() g.load_edges_from_arrow(df=df_arrow_from_pl, time="block_timestamp", src="inputs_address", dst="outputs_address") - end = time.perf_counter() - convert_time = mid - start - ingestion_time = end - mid - total_time = end - start + total = time.perf_counter() - start print( - f"[polars->arrow] convert {convert_time:.3f}s, ingest {ingestion_time:.3f}s, " - f"total {total_time:.3f}s;" + f"[arrow] ingestion took {total:.3f}s" ) del g, df_arrow_from_pl gc.collect() - return total_time + return total -def bench_polars_to_arrow_streaming(df: pl.DataFrame) -> float: +def bench_arrow_streaming(df: pl.DataFrame) -> float: g = Graph() - start = time.perf_counter() df_arrow_from_pl = df.to_arrow() - mid = time.perf_counter() - g.load_edges_from_arrow_streaming(df=df_arrow_from_pl, time="block_timestamp", src="inputs_address", dst="outputs_address") - end = time.perf_counter() - convert_time = mid - start - ingestion_time = end - mid - total_time = end - start + start = time.perf_counter() + g.load_edges(data_source=df_arrow_from_pl, time="block_timestamp", src="inputs_address", dst="outputs_address") + total = time.perf_counter() - start print( - f"[polars->arrow] with streaming convert {convert_time:.3f}s, ingest {ingestion_time:.3f}s, " - f"total {total_time:.3f}s;" + f"[arrow streaming] ingestion took {total:.3f}s" ) del g, df_arrow_from_pl gc.collect() - return total_time + return total def bench_duckdb(df: pl.DataFrame) -> float: g = Graph() df_arrow_from_pl = df.to_arrow() - start = time.perf_counter() duckdb_df = duckdb.sql("SELECT * FROM df_arrow_from_pl") - mid = time.perf_counter() + start = time.perf_counter() # internally calls fetch_arrow_table() on duckdb_df g.load_edges_from_duckdb(df=duckdb_df, time="block_timestamp", src="inputs_address", dst="outputs_address") - end = time.perf_counter() - load_time = mid - start - ingestion_time = end - mid - total_time = end - start + total = time.perf_counter() - start print( - f"[polars->duckdb] load {load_time:.3f}s, ingest {ingestion_time:.3f}s, " - f"total {total_time:.3f}s;" + f"[duckdb] ingestion took {total:.3f}s" ) del g, df_arrow_from_pl, duckdb_df gc.collect() - return total_time + return total def bench_duckdb_streaming(df: pl.DataFrame) -> float: g = Graph() df_arrow_from_pl = df.to_arrow() - start = time.perf_counter() duckdb_df = duckdb.sql("SELECT * FROM df_arrow_from_pl") - mid = time.perf_counter() + start = time.perf_counter() # uses the __arrow_c_stream__() interface internally - g.load_edges_from_duckdb_streaming(df=duckdb_df, time="block_timestamp", src="inputs_address", dst="outputs_address") - end = time.perf_counter() - load_time = mid - start - ingestion_time = end - mid - total_time = end - start + g.load_edges(data_source=duckdb_df, time="block_timestamp", src="inputs_address", dst="outputs_address") + total = time.perf_counter() - start print( - f"[polars->duckdb] streaming load {load_time:.3f}s, ingest {ingestion_time:.3f}s, " - f"total {total_time:.3f}s;" + f"[duckdb streaming] ingestion took {total:.3f}s" ) del g, df_arrow_from_pl, duckdb_df gc.collect() - return total_time + return total def ingestion_speed_btc_dataset(): @@ -137,65 +139,88 @@ def ingestion_speed_btc_dataset(): df_pl: pl.DataFrame = pl.read_parquet(FLATTENED_FILE) pandas_ingestion_times = [] + pandas_streaming_ingestion_times = [] fireducks_ingestion_times = [] - pl_native_total_times = [] - pl_to_arrow_total_times = [] - pl_to_arrow_streaming_total_times = [] + fireducks_streaming_ingestion_times = [] + polars_ingestion_times = [] + polars_streaming_ingestion_times = [] + arrow_ingestion_times = [] + arrow_streaming_ingestion_times = [] duckdb_ingestion_times = [] duckdb_streaming_ingestion_times = [] for _ in range(5): - # 1) Pandas ingestion + # 1.1) Pandas ingestion pandas_time = bench_pandas(df_pd) pandas_ingestion_times.append(pandas_time) gc.collect() - # 2) Fireducks Pandas ingestion + # 1.2) Pandas ingestion streaming + pandas_streaming_time = bench_pandas_streaming(df_pd) + pandas_streaming_ingestion_times.append(pandas_streaming_time) + gc.collect() + + # 2.1) Fireducks Pandas ingestion fpd_time = bench_fire_ducks_pandas(df_fireducks) fireducks_ingestion_times.append(fpd_time) gc.collect() - # 3) to_pandas() called within rust - polars_native_time = bench_polars_native(df=df_pl) - pl_native_total_times.append(polars_native_time) + # 2.2) Fireducks Pandas ingestion streaming + fpd_streaming_time = bench_fire_ducks_pandas_streaming(df_fireducks) + fireducks_streaming_ingestion_times.append(fpd_streaming_time) gc.collect() - # 4) Arrow ingestion - arrow_time = bench_polars_to_arrow(df_pl) - pl_to_arrow_total_times.append(arrow_time) + # 3.1) Polars ingestion (to_pandas() called internally) + polars_time = bench_polars(df=df_pl) + polars_ingestion_times.append(polars_time) gc.collect() - # 5) Arrow ingestion streaming - arrow_streaming_time = bench_polars_to_arrow_streaming(df_pl) - pl_to_arrow_streaming_total_times.append(arrow_streaming_time) + # 3.2) Polars ingestion streaming (no internal to_pandas() call) + polars_streaming_time = bench_polars_streaming(df=df_pl) + polars_streaming_ingestion_times.append(polars_streaming_time) gc.collect() - # 6) DuckDB ingestion + # 4.1) Arrow ingestion + arrow_time = bench_arrow(df_pl) + arrow_ingestion_times.append(arrow_time) + gc.collect() + + # 4.2) Arrow ingestion streaming + arrow_streaming_time = bench_arrow_streaming(df_pl) + arrow_streaming_ingestion_times.append(arrow_streaming_time) + gc.collect() + + # 5.1) DuckDB ingestion (fetch_arrow_table() called internally) duckdb_time = bench_duckdb(df_pl) duckdb_ingestion_times.append(duckdb_time) gc.collect() - # 7) DuckDB streaming ingestion + # 5.2) DuckDB streaming ingestion (no internal fetch_arrow_table() call) duckdb_streaming_time = bench_duckdb_streaming(df_pl) duckdb_streaming_ingestion_times.append(duckdb_streaming_time) gc.collect() - formatted_pandas = [f"{num:.3f}s" for num in pandas_ingestion_times] + formatted_pandas_streaming = [f"{num:.3f}s" for num in pandas_streaming_ingestion_times] formatted_fireducks = [f"{num:.3f}s" for num in fireducks_ingestion_times] - formatted_pl_native = [f"{num:.3f}s" for num in pl_native_total_times] - formatted_pl_to_arrow = [f"{num:.3f}s" for num in pl_to_arrow_total_times] - formatted_pl_to_arrow_streaming = [f"{num:.3f}s" for num in pl_to_arrow_streaming_total_times] - formatted_duckdb_time = [f"{num:.3f}s" for num in duckdb_ingestion_times] - formatted_duckdb_streaming_time = [f"{num:.3f}s" for num in duckdb_streaming_ingestion_times] - - print(f"Pandas:\t\t\t\t\t{formatted_pandas}") - print(f"Fireducks:\t\t\t\t{formatted_fireducks}") - print(f"Polars native:\t\t\t{formatted_pl_native}") - print(f"Load from arrow:\t\t{formatted_pl_to_arrow}") - print(f"Arrow with streaming:\t{formatted_pl_to_arrow_streaming}") - print(f"Load from duckdb:\t\t{formatted_duckdb_time}") - print(f"Duckdb with streaming:\t{formatted_duckdb_streaming_time}") + formatted_fireducks_streaming = [f"{num:.3f}s" for num in fireducks_streaming_ingestion_times] + formatted_polars = [f"{num:.3f}s" for num in polars_ingestion_times] + formatted_polars_streaming = [f"{num:.3f}s" for num in polars_streaming_ingestion_times] + formatted_arrow = [f"{num:.3f}s" for num in arrow_ingestion_times] + formatted_arrow_streaming = [f"{num:.3f}s" for num in arrow_streaming_ingestion_times] + formatted_duckdb = [f"{num:.3f}s" for num in duckdb_ingestion_times] + formatted_duckdb_streaming = [f"{num:.3f}s" for num in duckdb_streaming_ingestion_times] + + print(f"Pandas: {formatted_pandas}") + print(f"Pandas streaming: {formatted_pandas_streaming}") + print(f"Fireducks: {formatted_fireducks}") + print(f"Fireducks streaming: {formatted_fireducks_streaming}") + print(f"Polars: {formatted_polars}") + print(f"Polars streaming: {formatted_polars_streaming}") + print(f"Arrow: {formatted_arrow}") + print(f"Arrow streaming: {formatted_arrow_streaming}") + print(f"DuckDB: {formatted_duckdb}") + print(f"DuckDB streaming: {formatted_duckdb_streaming}") if __name__ == "__main__": diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 0faa55fa94..af6defe1d3 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1089,6 +1089,32 @@ class Graph(GraphView): Graph: the loaded graph with initialised cache """ + def load_edge_props(self, data_source: Any, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + """ + Load edge properties into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + This includes, but is not limited to: + * Pandas dataframes + * FireDucks(.pandas) dataframes + * Polars dataframes + * Arrow tables + * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data_source (Any): The data source containing edge information. + src (str): The column name for the source node. + dst (str): The column name for the destination node. + metadata (List[str], optional): List of edge metadata column names. Defaults to None. + shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. + layer (str, optional): The edge layer name. Defaults to None. + layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_edge_props_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties from a Pandas DataFrame. @@ -1129,6 +1155,34 @@ class Graph(GraphView): GraphError: If the operation fails. """ + def load_edges(self, data_source: Any, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + """ + Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + This includes, but is not limited to: + * Pandas dataframes + * FireDucks(.pandas) dataframes + * Polars dataframes + * Arrow tables + * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data_source (Any): The data source containing the edges. + time (str): The column name for the update timestamps. + src (str): The column name for the source node ids. + dst (str): The column name for the destination node ids. + properties (List[str], optional): List of edge property column names. Defaults to None. + metadata (List[str], optional): List of edge metadata column names. Defaults to None. + shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. + layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) + layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_edges_from_arrow(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None, stream_data=False): ... @@ -1197,6 +1251,31 @@ class Graph(GraphView): Graph: """ + def load_node_props(self, data_source: Any, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + """ + Load node properties into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + This includes, but is not limited to: + * Pandas dataframes + * FireDucks(.pandas) dataframes + * Polars dataframes + * Arrow tables + * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data_source (Any): The data source containing node information. + id(str): The column name for the node IDs. + node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + metadata (List[str], optional): List of node metadata column names. Defaults to None. + shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_node_props_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties from a Pandas DataFrame. @@ -1235,6 +1314,33 @@ class Graph(GraphView): GraphError: If the operation fails. """ + def load_nodes(self, data_source: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + """ + Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + This includes, but is not limited to: + * Pandas dataframes + * FireDucks(.pandas) dataframes + * Polars dataframes + * Arrow tables + * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data_source (Any): The data source containing the nodes. + time (str): The column name for the timestamps. + id (str): The column name for the node IDs. + node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + properties (List[str], optional): List of node property column names. Defaults to None. + metadata (List[str], optional): List of node metadata column names. Defaults to None. + shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_nodes_from_pandas(self, df: DataFrame, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load nodes from a Pandas DataFrame into the graph. diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 31aa8bc451..7d290b756a 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -17,7 +17,13 @@ use crate::{ edge::PyEdge, graph_with_deletions::PyPersistentGraph, index::PyIndexSpec, - io::{arrow_loaders::load_edges_from_arrow, pandas_loaders::*}, + io::{ + arrow_loaders::{ + load_edge_props_from_arrow_c_stream, load_edges_from_arrow, + load_node_props_from_arrow_c_stream, load_nodes_from_arrow_c_stream, + }, + pandas_loaders::*, + }, node::PyNode, views::graph_view::PyGraphView, }, @@ -625,6 +631,58 @@ impl PyGraph { PyGraph::py_from_db_graph(self.graph.event_graph()) } + /// Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + /// This includes, but is not limited to: + /// * Pandas dataframes + /// * FireDucks(.pandas) dataframes + /// * Polars dataframes + /// * Arrow tables + /// * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// + /// Arguments: + /// data_source (Any): The data source containing the nodes. + /// time (str): The column name for the timestamps. + /// id (str): The column name for the node IDs. + /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + /// properties (List[str], optional): List of node property column names. Defaults to None. + /// metadata (List[str], optional): List of node metadata column names. Defaults to None. + /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// + /// Returns: + /// None: This function does not return a value, if the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3( + signature = (data_source, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None) + )] + fn load_nodes<'py>( + &self, + data_source: &Bound<'py, PyAny>, + time: &str, + id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, + properties: Option>, + metadata: Option>, + shared_metadata: Option>, + ) -> Result<(), GraphError> { + let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + load_nodes_from_arrow_c_stream( + &self.graph, + data_source, + time, + id, + node_type, + node_type_col, + &properties, + &metadata, + shared_metadata.as_ref(), + ) + } + /// Load nodes from a Pandas DataFrame into the graph. /// /// Arguments: @@ -718,6 +776,62 @@ impl PyGraph { ) } + /// Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + /// This includes, but is not limited to: + /// * Pandas dataframes + /// * FireDucks(.pandas) dataframes + /// * Polars dataframes + /// * Arrow tables + /// * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// + /// Arguments: + /// data_source (Any): The data source containing the edges. + /// time (str): The column name for the update timestamps. + /// src (str): The column name for the source node ids. + /// dst (str): The column name for the destination node ids. + /// properties (List[str], optional): List of edge property column names. Defaults to None. + /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. + /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. + /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) + /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// + /// Returns: + /// None: This function does not return a value, if the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3( + signature = (data_source, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + )] + fn load_edges( + &self, + data_source: &Bound, + time: &str, + src: &str, + dst: &str, + properties: Option>, + metadata: Option>, + shared_metadata: Option>, + layer: Option<&str>, + layer_col: Option<&str>, + ) -> Result<(), GraphError> { + let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + load_edges_from_arrow( + &self.graph, + data_source, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + true, + ) + } + /// Load edges from a Pandas DataFrame into the graph. /// /// Arguments: @@ -1010,6 +1124,51 @@ impl PyGraph { ) } + /// Load node properties into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + /// This includes, but is not limited to: + /// * Pandas dataframes + /// * FireDucks(.pandas) dataframes + /// * Polars dataframes + /// * Arrow tables + /// * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// + /// Arguments: + /// data_source (Any): The data source containing node information. + /// id(str): The column name for the node IDs. + /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) + /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + /// metadata (List[str], optional): List of node metadata column names. Defaults to None. + /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// + /// Returns: + /// None: This function does not return a value, if the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3( + signature = (data_source, id, node_type = None, node_type_col = None, metadata = None, shared_metadata = None) + )] + fn load_node_props( + &self, + data_source: &Bound, + id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, + metadata: Option>, + shared_metadata: Option>, + ) -> Result<(), GraphError> { + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + load_node_props_from_arrow_c_stream( + &self.graph, + data_source, + id, + node_type, + node_type_col, + &metadata, + shared_metadata.as_ref(), + ) + } + /// Load node properties from a Pandas DataFrame. /// /// Arguments: @@ -1089,6 +1248,54 @@ impl PyGraph { ) } + /// Load edge properties into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + /// This includes, but is not limited to: + /// * Pandas dataframes + /// * FireDucks(.pandas) dataframes + /// * Polars dataframes + /// * Arrow tables + /// * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// + /// Arguments: + /// data_source (Any): The data source containing edge information. + /// src (str): The column name for the source node. + /// dst (str): The column name for the destination node. + /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. + /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. + /// layer (str, optional): The edge layer name. Defaults to None. + /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. + /// + /// Returns: + /// None: This function does not return a value, if the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3( + signature = (data_source, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None) + )] + fn load_edge_props( + &self, + data_source: &Bound, + src: &str, + dst: &str, + metadata: Option>, + shared_metadata: Option>, + layer: Option<&str>, + layer_col: Option<&str>, + ) -> Result<(), GraphError> { + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + load_edge_props_from_arrow_c_stream( + &self.graph, + data_source, + src, + dst, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + ) + } + /// Load edge properties from a Pandas DataFrame. /// /// Arguments: diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 08fcad1055..e484312b9c 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -3,7 +3,10 @@ use crate::{ errors::GraphError, io::arrow::{ dataframe::{DFChunk, DFView}, - df_loaders::load_edges_from_df, + df_loaders::{ + load_edges_from_df, load_edges_props_from_df, load_node_props_from_df, + load_nodes_from_df, + }, }, prelude::{AdditionOps, PropertyAdditionOps}, python::graph::io::pandas_loaders::{array_to_rust, is_jupyter}, @@ -16,6 +19,7 @@ use arrow::{ }, datatypes::SchemaRef, }; +use itertools::Either; use pyo3::{ prelude::*, types::{PyCapsule, PyDict}, @@ -23,6 +27,42 @@ use pyo3::{ use raphtory_api::core::entities::properties::prop::Prop; use std::collections::HashMap; +pub(crate) fn load_nodes_from_arrow_c_stream< + 'py, + G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, +>( + graph: &G, + df: &Bound<'py, PyAny>, + time: &str, + id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, + properties: &[&str], + metadata: &[&str], + shared_metadata: Option<&HashMap>, +) -> Result<(), GraphError> { + let mut cols_to_check = vec![id, time]; + cols_to_check.extend_from_slice(properties); + cols_to_check.extend_from_slice(metadata); + if let Some(ref node_type_col) = node_type_col { + cols_to_check.push(node_type_col.as_ref()); + } + + let df_view = process_arrow_c_stream_df(df, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; + load_nodes_from_df( + df_view, + time, + id, + properties, + metadata, + shared_metadata, + node_type, + node_type_col, + graph, + ) +} + pub(crate) fn load_edges_from_arrow< 'py, G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, @@ -79,6 +119,68 @@ pub(crate) fn load_edges_from_arrow< } } +pub(crate) fn load_node_props_from_arrow_c_stream< + 'py, + G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, +>( + graph: &G, + df: &Bound<'py, PyAny>, + id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, + metadata: &[&str], + shared_metadata: Option<&HashMap>, +) -> Result<(), GraphError> { + let mut cols_to_check = vec![id]; + cols_to_check.extend_from_slice(metadata); + if let Some(ref node_type_col) = node_type_col { + cols_to_check.push(node_type_col.as_ref()); + } + let df_view = process_arrow_c_stream_df(df, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; + load_node_props_from_df( + df_view, + id, + node_type, + node_type_col, + metadata, + shared_metadata, + graph, + ) +} + +pub(crate) fn load_edge_props_from_arrow_c_stream< + 'py, + G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, +>( + graph: &G, + df: &Bound<'py, PyAny>, + src: &str, + dst: &str, + metadata: &[&str], + shared_metadata: Option<&HashMap>, + layer: Option<&str>, + layer_col: Option<&str>, +) -> Result<(), GraphError> { + let mut cols_to_check = vec![src, dst]; + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); + } + cols_to_check.extend_from_slice(metadata); + let df_view = process_arrow_c_stream_df(df, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; + load_edges_props_from_df( + df_view, + src, + dst, + metadata, + shared_metadata, + layer, + layer_col, + graph, + ) +} + /// Can handle any object that provides the \_\_arrow_c_stream__() interface and \_\_len__() function pub(crate) fn process_arrow_c_stream_df<'a>( df: &Bound<'a, PyAny>, @@ -123,25 +225,52 @@ pub(crate) fn process_arrow_c_stream_df<'a>( indices.push(idx); } } + let len_from_python: Option = if df.hasattr("__len__")? { + Some(df.call_method0("__len__")?.extract()?) + } else { + None + }; + + if let Some(num_rows) = len_from_python { + let chunks = reader + .into_iter() + .map(move |batch_res: Result| { + let batch = batch_res.map_err(|e| { + GraphError::LoadFailure(format!( + "Arrow stream error while reading a batch: {}", + e.to_string() + )) + })?; + let chunk_arrays = indices + .iter() + .map(|&idx| batch.column(idx).clone()) + .collect::>(); + Ok(DFChunk::new(chunk_arrays)) + }); + Ok(DFView::new(names, Either::Left(chunks), num_rows)) + } else { + // if the python data source has no __len__ method, collect the iterator so we can calculate the num_rows() of each batch + let mut num_rows = 0usize; + let mut df_chunks = Vec::new(); - let chunks = reader - .into_iter() - .map(move |batch_res: Result| { + for batch_res in reader { let batch = batch_res.map_err(|e| { GraphError::LoadFailure(format!( "Arrow stream error while reading a batch: {}", e.to_string() )) })?; + num_rows += batch.num_rows(); let chunk_arrays = indices .iter() .map(|&idx| batch.column(idx).clone()) .collect::>(); - Ok(DFChunk::new(chunk_arrays)) - }); + df_chunks.push(Ok(DFChunk::new(chunk_arrays))); + } - let num_rows: usize = df.call_method0("__len__")?.extract()?; - Ok(DFView::new(names, chunks, num_rows)) + let chunks = Either::Right(df_chunks.into_iter()); + Ok(DFView::new(names, chunks, num_rows)) + } } pub(crate) fn process_arrow_py_df<'a>( From a1137db5d5eca6eac7a0f0fb0a1e856e430b0ca1 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Sat, 22 Nov 2025 17:08:13 -0500 Subject: [PATCH 08/55] Cleaned up benchmark print statements --- dataset_tests/ingestion_benchmarks.py | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/dataset_tests/ingestion_benchmarks.py b/dataset_tests/ingestion_benchmarks.py index 727eb44480..6a876ca33b 100644 --- a/dataset_tests/ingestion_benchmarks.py +++ b/dataset_tests/ingestion_benchmarks.py @@ -57,9 +57,7 @@ def bench_polars(df: pl.DataFrame) -> float: start = time.perf_counter() g.load_edges_from_polars(df=df, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start - print( - f"[polars] ingestion took {total:.3f}s" - ) + print(f"[polars] ingestion took {total:.3f}s") del g gc.collect() return total @@ -69,9 +67,7 @@ def bench_polars_streaming(df: pl.DataFrame) -> float: start = time.perf_counter() g.load_edges(data_source=df, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start - print( - f"[polars streaming] ingestion took {total:.3f}s" - ) + print(f"[polars streaming] ingestion took {total:.3f}s") del g gc.collect() return total @@ -82,9 +78,7 @@ def bench_arrow(df: pl.DataFrame) -> float: start = time.perf_counter() g.load_edges_from_arrow(df=df_arrow_from_pl, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start - print( - f"[arrow] ingestion took {total:.3f}s" - ) + print(f"[arrow] ingestion took {total:.3f}s") del g, df_arrow_from_pl gc.collect() return total @@ -95,9 +89,7 @@ def bench_arrow_streaming(df: pl.DataFrame) -> float: start = time.perf_counter() g.load_edges(data_source=df_arrow_from_pl, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start - print( - f"[arrow streaming] ingestion took {total:.3f}s" - ) + print(f"[arrow streaming] ingestion took {total:.3f}s") del g, df_arrow_from_pl gc.collect() return total @@ -110,9 +102,7 @@ def bench_duckdb(df: pl.DataFrame) -> float: # internally calls fetch_arrow_table() on duckdb_df g.load_edges_from_duckdb(df=duckdb_df, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start - print( - f"[duckdb] ingestion took {total:.3f}s" - ) + print(f"[duckdb] ingestion took {total:.3f}s") del g, df_arrow_from_pl, duckdb_df gc.collect() return total @@ -125,9 +115,7 @@ def bench_duckdb_streaming(df: pl.DataFrame) -> float: # uses the __arrow_c_stream__() interface internally g.load_edges(data_source=duckdb_df, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start - print( - f"[duckdb streaming] ingestion took {total:.3f}s" - ) + print(f"[duckdb streaming] ingestion took {total:.3f}s") del g, df_arrow_from_pl, duckdb_df gc.collect() return total From 52365af608edc68a4100aa2ab0682c57905a0757 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Sat, 22 Nov 2025 17:45:21 -0500 Subject: [PATCH 09/55] Ran make stubs --- python/python/raphtory/__init__.pyi | 682 ++++++------------ .../python/raphtory/algorithms/__init__.pyi | 168 +---- python/python/raphtory/filter/__init__.pyi | 99 ++- python/python/raphtory/graph_gen/__init__.pyi | 8 +- .../python/raphtory/graph_loader/__init__.pyi | 16 +- python/python/raphtory/graphql/__init__.pyi | 179 ++--- python/python/raphtory/iterables/__init__.pyi | 261 ++++--- .../python/raphtory/node_state/__init__.pyi | 293 +++----- python/python/raphtory/vectors/__init__.pyi | 58 +- 9 files changed, 607 insertions(+), 1157 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index fd5fd418f9..af6defe1d3 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1,7 +1,6 @@ """ Raphtory graph analytics library """ - from __future__ import annotations ############################################################################### @@ -27,42 +26,8 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = [ - "GraphView", - "Graph", - "PersistentGraph", - "Node", - "Nodes", - "PathFromNode", - "PathFromGraph", - "MutableNode", - "Edge", - "Edges", - "NestedEdges", - "MutableEdge", - "Properties", - "PyPropValueList", - "Metadata", - "TemporalProperties", - "PropertiesView", - "TemporalProp", - "WindowSet", - "IndexSpecBuilder", - "IndexSpec", - "version", - "graphql", - "algorithms", - "graph_loader", - "graph_gen", - "vectors", - "node_state", - "filter", - "iterables", - "nullmodels", - "plottingutils", -] - -class GraphView(object): +__all__ = ['GraphView', 'Graph', 'PersistentGraph', 'Node', 'Nodes', 'PathFromNode', 'PathFromGraph', 'MutableNode', 'Edge', 'Edges', 'NestedEdges', 'MutableEdge', 'Properties', 'PyPropValueList', 'Metadata', 'TemporalProperties', 'PropertiesView', 'TemporalProp', 'WindowSet', 'IndexSpecBuilder', 'IndexSpec', 'version', 'graphql', 'algorithms', 'graph_loader', 'graph_gen', 'vectors', 'node_state', 'filter', 'iterables', 'nullmodels', 'plottingutils'] +class GraphView(object): """Graph view is a read-only version of a graph at a certain point in time.""" def __eq__(self, value): @@ -272,9 +237,7 @@ class GraphView(object): GraphView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -484,12 +447,7 @@ class GraphView(object): Properties: Properties paired with their names """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -635,14 +593,7 @@ class GraphView(object): GraphView: Returns the subgraph """ - def to_networkx( - self, - explode_edges: bool = False, - include_node_properties: bool = True, - include_edge_properties: bool = True, - include_update_history: bool = True, - include_property_history: bool = True, - ) -> nx.MultiDiGraph: + def to_networkx(self, explode_edges: bool = False, include_node_properties: bool = True, include_edge_properties: bool = True, include_update_history: bool = True, include_property_history: bool = True) -> nx.MultiDiGraph: """ Returns a graph with NetworkX. @@ -661,19 +612,7 @@ class GraphView(object): nx.MultiDiGraph: A Networkx MultiDiGraph. """ - def to_pyvis( - self, - explode_edges: bool = False, - edge_color: str = "#000000", - shape: str = "dot", - node_image: Optional[str] = None, - edge_weight: Optional[str] = None, - edge_label: Optional[str] = None, - colour_nodes_by_type: bool = False, - directed: bool = True, - notebook: bool = False, - **kwargs: Any, - ) -> pyvis.network.Network: + def to_pyvis(self, explode_edges: bool = False, edge_color: str = '#000000', shape: str = 'dot', node_image: Optional[str] = None, edge_weight: Optional[str] = None, edge_label: Optional[str] = None, colour_nodes_by_type: bool = False, directed: bool = True, notebook: bool = False, **kwargs: Any) -> pyvis.network.Network: """ Draw a graph with PyVis. Pyvis is a required dependency. If you intend to use this function make sure that you install Pyvis @@ -734,14 +673,7 @@ class GraphView(object): GraphView: The layered view """ - def vectorise( - self, - embedding: Callable[[list], list], - nodes: bool | str = True, - edges: bool | str = True, - cache: Optional[str] = None, - verbose: bool = False, - ) -> VectorisedGraph: + def vectorise(self, embedding: Callable[[list], list], nodes: bool | str = True, edges: bool | str = True, cache: Optional[str] = None, verbose: bool = False) -> VectorisedGraph: """ Create a VectorisedGraph from the current graph @@ -777,7 +709,7 @@ class GraphView(object): Optional[int]: """ -class Graph(GraphView): +class Graph(GraphView): """ A temporal graph with event semantics. @@ -788,16 +720,10 @@ class Graph(GraphView): def __new__(cls, num_shards: Optional[int] = None) -> Graph: """Create and return a new object. See help(type) for accurate signature.""" - def __reduce__(self): ... - def add_edge( - self, - timestamp: TimeInput, - src: str | int, - dst: str | int, - properties: Optional[PropInput] = None, - layer: Optional[str] = None, - secondary_index: Optional[int] = None, - ) -> MutableEdge: + def __reduce__(self): + ... + + def add_edge(self, timestamp: TimeInput, src: str|int, dst: str|int, properties: Optional[PropInput] = None, layer: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableEdge: """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -830,14 +756,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def add_node( - self, - timestamp: TimeInput, - id: str | int, - properties: Optional[PropInput] = None, - node_type: Optional[str] = None, - secondary_index: Optional[int] = None, - ) -> MutableNode: + def add_node(self, timestamp: TimeInput, id: str|int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableNode: """ Adds a new node with the given id and properties to the graph. @@ -855,12 +774,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def add_properties( - self, - timestamp: TimeInput, - properties: PropInput, - secondary_index: Optional[int] = None, - ) -> None: + def add_properties(self, timestamp: TimeInput, properties: PropInput, secondary_index: Optional[int] = None) -> None: """ Adds properties to the graph. @@ -937,14 +851,7 @@ class Graph(GraphView): None: """ - def create_node( - self, - timestamp: TimeInput, - id: str | int, - properties: Optional[PropInput] = None, - node_type: Optional[str] = None, - secondary_index: Optional[int] = None, - ) -> MutableNode: + def create_node(self, timestamp: TimeInput, id: str|int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableNode: """ Creates a new node with the given id and properties to the graph. It fails if the node already exists. @@ -974,7 +881,7 @@ class Graph(GraphView): Graph: """ - def edge(self, src: str | int, dst: str | int) -> MutableEdge: + def edge(self, src: str|int, dst: str|int) -> MutableEdge: """ Gets the edge with the specified source and destination nodes @@ -1067,9 +974,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def import_edges_as( - self, edges: List[Edge], new_ids: List[Tuple[int, int]], merge: bool = False - ) -> None: + def import_edges_as(self, edges: List[Edge], new_ids: List[Tuple[int, int]], merge: bool = False) -> None: """ Import multiple edges into the graph with new ids. @@ -1104,9 +1009,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def import_node_as( - self, node: Node, new_id: str | int, merge: bool = False - ) -> MutableNode: + def import_node_as(self, node: Node, new_id: str|int, merge: bool = False) -> MutableNode: """ Import a single node into the graph with new id. @@ -1141,9 +1044,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def import_nodes_as( - self, nodes: List[Node], new_ids: List[str | int], merge: bool = False - ) -> None: + def import_nodes_as(self, nodes: List[Node], new_ids: List[str|int], merge: bool = False) -> None: """ Import multiple nodes into the graph with new ids. @@ -1188,16 +1089,33 @@ class Graph(GraphView): Graph: the loaded graph with initialised cache """ - def load_edge_props_from_pandas( - self, - df: DataFrame, - src: str, - dst: str, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_props(self, data_source: Any, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + """ + Load edge properties into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + This includes, but is not limited to: + * Pandas dataframes + * FireDucks(.pandas) dataframes + * Polars dataframes + * Arrow tables + * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data_source (Any): The data source containing edge information. + src (str): The column name for the source node. + dst (str): The column name for the destination node. + metadata (List[str], optional): List of edge metadata column names. Defaults to None. + shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. + layer (str, optional): The edge layer name. Defaults to None. + layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + + def load_edge_props_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties from a Pandas DataFrame. @@ -1217,16 +1135,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_parquet( - self, - parquet_path: str, - src: str, - dst: str, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_props_from_parquet(self, parquet_path: str, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties from parquet file @@ -1246,18 +1155,44 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_pandas( - self, - df: DataFrame, - time: str, - src: str, - dst: str, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edges(self, data_source: Any, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + """ + Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + This includes, but is not limited to: + * Pandas dataframes + * FireDucks(.pandas) dataframes + * Polars dataframes + * Arrow tables + * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data_source (Any): The data source containing the edges. + time (str): The column name for the update timestamps. + src (str): The column name for the source node ids. + dst (str): The column name for the destination node ids. + properties (List[str], optional): List of edge property column names. Defaults to None. + metadata (List[str], optional): List of edge metadata column names. Defaults to None. + shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. + layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) + layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + + def load_edges_from_arrow(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None, stream_data=False): + ... + + def load_edges_from_duckdb(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None, stream_data=False): + ... + + def load_edges_from_fireducks(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None, stream_data=False): + ... + + def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges from a Pandas DataFrame into the graph. @@ -1279,18 +1214,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_parquet( - self, - parquet_path: str, - time: str, - src: str, - dst: str, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edges_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges from a Parquet file into the graph. @@ -1312,6 +1236,9 @@ class Graph(GraphView): GraphError: If the operation fails. """ + def load_edges_from_polars(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None, stream_data=False): + ... + @staticmethod def load_from_file(path: str) -> Graph: """ @@ -1324,15 +1251,32 @@ class Graph(GraphView): Graph: """ - def load_node_props_from_pandas( - self, - df: DataFrame, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_node_props(self, data_source: Any, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + """ + Load node properties into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + This includes, but is not limited to: + * Pandas dataframes + * FireDucks(.pandas) dataframes + * Polars dataframes + * Arrow tables + * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data_source (Any): The data source containing node information. + id(str): The column name for the node IDs. + node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + metadata (List[str], optional): List of node metadata column names. Defaults to None. + shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + + def load_node_props_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties from a Pandas DataFrame. @@ -1351,15 +1295,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_node_props_from_parquet( - self, - parquet_path: str, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_node_props_from_parquet(self, parquet_path: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties from a parquet file. @@ -1378,17 +1314,34 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_pandas( - self, - df: DataFrame, - time: str, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_nodes(self, data_source: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + """ + Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + This includes, but is not limited to: + * Pandas dataframes + * FireDucks(.pandas) dataframes + * Polars dataframes + * Arrow tables + * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data_source (Any): The data source containing the nodes. + time (str): The column name for the timestamps. + id (str): The column name for the node IDs. + node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) + node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + properties (List[str], optional): List of node property column names. Defaults to None. + metadata (List[str], optional): List of node metadata column names. Defaults to None. + shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + + def load_nodes_from_pandas(self, df: DataFrame, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load nodes from a Pandas DataFrame into the graph. @@ -1409,17 +1362,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_parquet( - self, - parquet_path: str, - time: str, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_nodes_from_parquet(self, parquet_path: str, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load nodes from a Parquet file into the graph. @@ -1440,7 +1383,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def node(self, id: str | int) -> MutableNode: + def node(self, id: str|int) -> MutableNode: """ Gets the node with the specified id @@ -1521,22 +1464,16 @@ class Graph(GraphView): None: """ -class PersistentGraph(GraphView): +class PersistentGraph(GraphView): """A temporal graph that allows edges and nodes to be deleted.""" def __new__(cls) -> PersistentGraph: """Create and return a new object. See help(type) for accurate signature.""" - def __reduce__(self): ... - def add_edge( - self, - timestamp: int, - src: str | int, - dst: str | int, - properties: Optional[PropInput] = None, - layer: Optional[str] = None, - secondary_index: Optional[int] = None, - ) -> None: + def __reduce__(self): + ... + + def add_edge(self, timestamp: int, src: str | int, dst: str | int, properties: Optional[PropInput] = None, layer: Optional[str] = None, secondary_index: Optional[int] = None) -> None: """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -1569,14 +1506,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def add_node( - self, - timestamp: TimeInput, - id: str | int, - properties: Optional[PropInput] = None, - node_type: Optional[str] = None, - secondary_index: Optional[int] = None, - ) -> None: + def add_node(self, timestamp: TimeInput, id: str | int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, secondary_index: Optional[int] = None) -> None: """ Adds a new node with the given id and properties to the graph. @@ -1594,12 +1524,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def add_properties( - self, - timestamp: TimeInput, - properties: dict, - secondary_index: Optional[int] = None, - ) -> None: + def add_properties(self, timestamp: TimeInput, properties: dict, secondary_index: Optional[int] = None) -> None: """ Adds properties to the graph. @@ -1675,14 +1600,7 @@ class PersistentGraph(GraphView): None: """ - def create_node( - self, - timestamp: TimeInput, - id: str | int, - properties: Optional[PropInput] = None, - node_type: Optional[str] = None, - secondary_index: Optional[int] = None, - ) -> MutableNode: + def create_node(self, timestamp: TimeInput, id: str | int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableNode: """ Creates a new node with the given id and properties to the graph. It fails if the node already exists. @@ -1700,14 +1618,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def delete_edge( - self, - timestamp: int, - src: str | int, - dst: str | int, - layer: Optional[str] = None, - secondary_index: Optional[int] = None, - ) -> MutableEdge: + def delete_edge(self, timestamp: int, src: str | int, dst: str | int, layer: Optional[str] = None, secondary_index: Optional[int] = None) -> MutableEdge: """ Deletes an edge given the timestamp, src and dst nodes and layer (optional) @@ -1820,9 +1731,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def import_edges_as( - self, edges: List[Edge], new_ids: list[Tuple[GID, GID]], merge: bool = False - ) -> None: + def import_edges_as(self, edges: List[Edge], new_ids: list[Tuple[GID, GID]], merge: bool = False) -> None: """ Import multiple edges into the graph with new ids. @@ -1859,9 +1768,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def import_node_as( - self, node: Node, new_id: str | int, merge: bool = False - ) -> Node: + def import_node_as(self, node: Node, new_id: str|int, merge: bool = False) -> Node: """ Import a single node into the graph with new id. @@ -1898,9 +1805,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def import_nodes_as( - self, nodes: List[Node], new_ids: List[str | int], merge: bool = False - ) -> None: + def import_nodes_as(self, nodes: List[Node], new_ids: List[str|int], merge: bool = False) -> None: """ Import multiple nodes into the graph with new ids. @@ -1934,15 +1839,7 @@ class PersistentGraph(GraphView): PersistentGraph: the loaded graph with initialised cache """ - def load_edge_deletions_from_pandas( - self, - df: DataFrame, - time: str, - src: str, - dst: str, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_deletions_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges deletions from a Pandas DataFrame into the graph. @@ -1961,15 +1858,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_deletions_from_parquet( - self, - parquet_path: str, - time: str, - src: str, - dst: str, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_deletions_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges deletions from a Parquet file into the graph. @@ -1988,16 +1877,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_pandas( - self, - df: DataFrame, - src: str, - dst: str, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_props_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties from a Pandas DataFrame. @@ -2017,16 +1897,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_parquet( - self, - parquet_path: str, - src: str, - dst: str, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_props_from_parquet(self, parquet_path: str, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties from parquet file @@ -2046,18 +1917,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_pandas( - self, - df: DataFrame, - time: str, - src: str, - dst: str, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges from a Pandas DataFrame into the graph. @@ -2079,18 +1939,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_parquet( - self, - parquet_path: str, - time: str, - src: str, - dst: str, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edges_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges from a Parquet file into the graph. @@ -2124,15 +1973,7 @@ class PersistentGraph(GraphView): PersistentGraph: """ - def load_node_props_from_pandas( - self, - df: DataFrame, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_node_props_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties from a Pandas DataFrame. @@ -2151,15 +1992,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_node_props_from_parquet( - self, - parquet_path: str, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_node_props_from_parquet(self, parquet_path: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties from a parquet file. @@ -2178,17 +2011,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_pandas( - self, - df: DataFrame, - time: str, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_nodes_from_pandas(self, df: DataFrame, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load nodes from a Pandas DataFrame into the graph. @@ -2209,17 +2032,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_parquet( - self, - parquet_path: str, - time: str, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_nodes_from_parquet(self, parquet_path: str, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load nodes from a Parquet file into the graph. @@ -2310,7 +2123,7 @@ class PersistentGraph(GraphView): None: """ -class Node(object): +class Node(object): """A node (or node) in the graph.""" def __eq__(self, value): @@ -2485,9 +2298,7 @@ class Node(object): Node: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2568,7 +2379,7 @@ class Node(object): """ @property - def id(self) -> str | int: + def id(self) -> (str|int): """ Returns the id of the node. This is a unique identifier for the node. @@ -2732,12 +2543,7 @@ class Node(object): Properties: A list of properties. """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -2868,7 +2674,7 @@ class Node(object): Optional[int]: """ -class Nodes(object): +class Nodes(object): """A list of nodes that can be iterated over.""" def __bool__(self): @@ -3057,9 +2863,7 @@ class Nodes(object): Nodes: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3296,12 +3100,7 @@ class Nodes(object): PropertiesView: A view of the node properties. """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3399,9 +3198,7 @@ class Nodes(object): Optional[datetime]: The earliest datetime that this Nodes is valid or None if the Nodes is valid for all times. """ - def to_df( - self, include_property_history: bool = False, convert_datetime: bool = False - ) -> DataFrame: + def to_df(self, include_property_history: bool = False, convert_datetime: bool = False) -> DataFrame: """ Converts the graph's nodes into a Pandas DataFrame. @@ -3462,7 +3259,8 @@ class Nodes(object): Optional[int]: """ -class PathFromNode(object): +class PathFromNode(object): + def __bool__(self): """True if self else False""" @@ -3619,9 +3417,7 @@ class PathFromNode(object): PathFromNode: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3831,12 +3627,7 @@ class PathFromNode(object): PropertiesView: """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3978,7 +3769,8 @@ class PathFromNode(object): Optional[int]: """ -class PathFromGraph(object): +class PathFromGraph(object): + def __bool__(self): """True if self else False""" @@ -4144,9 +3936,7 @@ class PathFromGraph(object): PathFromGraph: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4381,12 +4171,7 @@ class PathFromGraph(object): NestedPropsIterable: """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -4528,7 +4313,8 @@ class PathFromGraph(object): Optional[int]: """ -class MutableNode(Node): +class MutableNode(Node): + def __repr__(self): """Return repr(self).""" @@ -4545,12 +4331,7 @@ class MutableNode(Node): None: """ - def add_updates( - self, - t: TimeInput, - properties: Optional[PropInput] = None, - secondary_index: Optional[int] = None, - ) -> None: + def add_updates(self, t: TimeInput, properties: Optional[PropInput] = None, secondary_index: Optional[int] = None) -> None: """ Add updates to a node in the graph at a specified time. This function allows for the addition of property updates to a node within the graph. The updates are time-stamped, meaning they are applied at the specified time. @@ -4595,7 +4376,7 @@ class MutableNode(Node): None: """ -class Edge(object): +class Edge(object): """ PyEdge is a Python class that represents an edge in the graph. An edge is a directed connection between two nodes. @@ -4782,9 +4563,7 @@ class Edge(object): Edge: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4986,12 +4765,7 @@ class Edge(object): Properties: Properties on the Edge. """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -5140,7 +4914,7 @@ class Edge(object): Optional[int]: """ -class Edges(object): +class Edges(object): """A list of edges that can be iterated over.""" def __bool__(self): @@ -5325,9 +5099,7 @@ class Edges(object): Edges: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -5534,12 +5306,7 @@ class Edges(object): PropertiesView: """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -5655,12 +5422,7 @@ class Edges(object): I64Iterable: """ - def to_df( - self, - include_property_history: bool = True, - convert_datetime: bool = False, - explode: bool = False, - ) -> DataFrame: + def to_df(self, include_property_history: bool = True, convert_datetime: bool = False, explode: bool = False) -> DataFrame: """ Converts the graph's edges into a Pandas DataFrame. @@ -5713,7 +5475,8 @@ class Edges(object): Optional[int]: """ -class NestedEdges(object): +class NestedEdges(object): + def __bool__(self): """True if self else False""" @@ -5888,9 +5651,7 @@ class NestedEdges(object): NestedEdges: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -6088,12 +5849,7 @@ class NestedEdges(object): PyNestedPropsIterable: """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -6242,7 +5998,8 @@ class NestedEdges(object): Optional[int]: """ -class MutableEdge(Edge): +class MutableEdge(Edge): + def __repr__(self): """Return repr(self).""" @@ -6260,13 +6017,7 @@ class MutableEdge(Edge): None: """ - def add_updates( - self, - t: TimeInput, - properties: Optional[PropInput] = None, - layer: Optional[str] = None, - secondary_index: Optional[int] = None, - ) -> None: + def add_updates(self, t: TimeInput, properties: Optional[PropInput] = None, layer: Optional[str] = None, secondary_index: Optional[int] = None) -> None: """ Add updates to an edge in the graph at a specified time. This function allows for the addition of property updates to an edge within the graph. The updates are time-stamped, meaning they are applied at the specified time. @@ -6313,7 +6064,7 @@ class MutableEdge(Edge): None: """ -class Properties(object): +class Properties(object): """A view of the properties of an entity""" def __contains__(self, key): @@ -6404,7 +6155,8 @@ class Properties(object): list[PropValue]: """ -class PyPropValueList(object): +class PyPropValueList(object): + def __eq__(self, value): """Return self==value.""" @@ -6440,8 +6192,12 @@ class PyPropValueList(object): PropValue: The average of each property values, or None if count is zero. """ - def collect(self): ... - def count(self): ... + def collect(self): + ... + + def count(self): + ... + def drop_none(self) -> list[PropValue]: """ Drop none. @@ -6490,7 +6246,7 @@ class PyPropValueList(object): PropValue: """ -class Metadata(object): +class Metadata(object): """A view of metadata of an entity""" def __contains__(self, key): @@ -6571,7 +6327,7 @@ class Metadata(object): list[PropValue]: """ -class TemporalProperties(object): +class TemporalProperties(object): """A view of the temporal properties of an entity""" def __contains__(self, key): @@ -6666,7 +6422,8 @@ class TemporalProperties(object): list[TemporalProp]: the list of property views """ -class PropertiesView(object): +class PropertiesView(object): + def __contains__(self, key): """Return bool(key in self).""" @@ -6749,7 +6506,7 @@ class PropertiesView(object): list[list[PropValue]]: """ -class TemporalProp(object): +class TemporalProp(object): """A view of a temporal property""" def __eq__(self, value): @@ -6910,7 +6667,8 @@ class TemporalProp(object): NumpyArray: """ -class WindowSet(object): +class WindowSet(object): + def __iter__(self): """Implement iter(self).""" @@ -6928,7 +6686,8 @@ class WindowSet(object): Iterable: The time index. """ -class IndexSpecBuilder(object): +class IndexSpecBuilder(object): + def __new__(cls, graph) -> IndexSpecBuilder: """Create and return a new object. See help(type) for accurate signature.""" @@ -7032,7 +6791,8 @@ class IndexSpecBuilder(object): dict[str, Any]: """ -class IndexSpec(object): +class IndexSpec(object): + def __repr__(self): """Return repr(self).""" diff --git a/python/python/raphtory/algorithms/__init__.pyi b/python/python/raphtory/algorithms/__init__.pyi index ae2892f399..c3005db67e 100644 --- a/python/python/raphtory/algorithms/__init__.pyi +++ b/python/python/raphtory/algorithms/__init__.pyi @@ -1,7 +1,6 @@ """ Algorithmic functions that can be run on Raphtory graphs """ - from __future__ import annotations ############################################################################### @@ -27,59 +26,8 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = [ - "dijkstra_single_source_shortest_paths", - "global_reciprocity", - "betweenness_centrality", - "all_local_reciprocity", - "triplet_count", - "local_triangle_count", - "average_degree", - "directed_graph_density", - "degree_centrality", - "max_degree", - "min_degree", - "max_out_degree", - "max_in_degree", - "min_out_degree", - "min_in_degree", - "pagerank", - "single_source_shortest_path", - "global_clustering_coefficient", - "temporally_reachable_nodes", - "temporal_bipartite_graph_projection", - "local_clustering_coefficient", - "local_clustering_coefficient_batch", - "weakly_connected_components", - "strongly_connected_components", - "in_components", - "in_component", - "out_components", - "out_component", - "fast_rp", - "global_temporal_three_node_motif", - "global_temporal_three_node_motif_multi", - "local_temporal_three_node_motifs", - "hits", - "balance", - "label_propagation", - "k_core", - "temporal_SEIR", - "louvain", - "fruchterman_reingold", - "cohesive_fruchterman_reingold", - "max_weight_matching", - "Matching", - "Infected", -] - -def dijkstra_single_source_shortest_paths( - graph: GraphView, - source: NodeInput, - targets: list[NodeInput], - direction: Direction = "both", - weight: str = "weight", -) -> NodeStateWeightedSP: +__all__ = ['dijkstra_single_source_shortest_paths', 'global_reciprocity', 'betweenness_centrality', 'all_local_reciprocity', 'triplet_count', 'local_triangle_count', 'average_degree', 'directed_graph_density', 'degree_centrality', 'max_degree', 'min_degree', 'max_out_degree', 'max_in_degree', 'min_out_degree', 'min_in_degree', 'pagerank', 'single_source_shortest_path', 'global_clustering_coefficient', 'temporally_reachable_nodes', 'temporal_bipartite_graph_projection', 'local_clustering_coefficient', 'local_clustering_coefficient_batch', 'weakly_connected_components', 'strongly_connected_components', 'in_components', 'in_component', 'out_components', 'out_component', 'fast_rp', 'global_temporal_three_node_motif', 'global_temporal_three_node_motif_multi', 'local_temporal_three_node_motifs', 'hits', 'balance', 'label_propagation', 'k_core', 'temporal_SEIR', 'louvain', 'fruchterman_reingold', 'cohesive_fruchterman_reingold', 'max_weight_matching', 'Matching', 'Infected'] +def dijkstra_single_source_shortest_paths(graph: GraphView, source: NodeInput, targets: list[NodeInput], direction: Direction = "both", weight: str = 'weight') -> NodeStateWeightedSP: """ Finds the shortest paths from a single source to multiple targets in a graph. @@ -109,9 +57,7 @@ def global_reciprocity(graph: GraphView) -> float: float: reciprocity of the graph between 0 and 1. """ -def betweenness_centrality( - graph: GraphView, k: Optional[int] = None, normalized: bool = True -) -> NodeStateF64: +def betweenness_centrality(graph: GraphView, k: Optional[int] = None, normalized: bool = True) -> NodeStateF64: """ Computes the betweenness centrality for nodes in a given graph. @@ -279,13 +225,7 @@ def min_in_degree(graph: GraphView) -> int: int: value of the smallest indegree """ -def pagerank( - graph: GraphView, - iter_count: int = 20, - max_diff: Optional[float] = None, - use_l2_norm: bool = True, - damping_factor: float = 0.85, -) -> NodeStateF64: +def pagerank(graph: GraphView, iter_count: int = 20, max_diff: Optional[float] = None, use_l2_norm: bool = True, damping_factor: float = 0.85) -> NodeStateF64: """ Pagerank -- pagerank centrality value of the nodes in a graph @@ -306,9 +246,7 @@ def pagerank( NodeStateF64: Mapping of nodes to their pagerank value. """ -def single_source_shortest_path( - graph: GraphView, source: NodeInput, cutoff: Optional[int] = None -) -> NodeStateNodes: +def single_source_shortest_path(graph: GraphView, source: NodeInput, cutoff: Optional[int] = None) -> NodeStateNodes: """ Calculates the single source shortest paths from a given source node. @@ -339,13 +277,7 @@ def global_clustering_coefficient(graph: GraphView) -> float: [`Triplet Count`](triplet_count) """ -def temporally_reachable_nodes( - graph: GraphView, - max_hops: int, - start_time: int, - seed_nodes: list[NodeInput], - stop_nodes: Optional[list[NodeInput]] = None, -) -> NodeStateReachability: +def temporally_reachable_nodes(graph: GraphView, max_hops: int, start_time: int, seed_nodes: list[NodeInput], stop_nodes: Optional[list[NodeInput]] = None) -> NodeStateReachability: """ Temporally reachable nodes -- the nodes that are reachable by a time respecting path followed out from a set of seed nodes at a starting time. @@ -364,9 +296,7 @@ def temporally_reachable_nodes( NodeStateReachability: Mapping of nodes to their reachability history. """ -def temporal_bipartite_graph_projection( - graph: GraphView, delta: int, pivot_type: str -) -> Graph: +def temporal_bipartite_graph_projection(graph: GraphView, delta: int, pivot_type: str) -> Graph: """ Projects a temporal bipartite graph into an undirected temporal graph over the pivot node type. Let `G` be a bipartite graph with node types `A` and `B`. Given `delta > 0`, the projection graph `G'` pivoting over type `B` nodes, will make a connection between nodes `n1` and `n2` (of type `A`) at time `(t1 + t2)/2` if they respectively have an edge at time `t1`, `t2` with the same node of type `B` in `G`, and `|t2-t1| < delta`. @@ -479,14 +409,7 @@ def out_component(node: Node) -> NodeStateUsize: NodeStateUsize: A NodeState mapping the nodes in the out-component to their distance from the starting node. """ -def fast_rp( - graph: GraphView, - embedding_dim: int, - normalization_strength: float, - iter_weights: list[float], - seed: Optional[int] = None, - threads: Optional[int] = None, -) -> NodeStateListF64: +def fast_rp(graph: GraphView, embedding_dim: int, normalization_strength: float, iter_weights: list[float], seed: Optional[int] = None, threads: Optional[int] = None) -> NodeStateListF64: """ Computes embedding vectors for each vertex of an undirected/bidirectional graph according to the Fast RP algorithm. Original Paper: https://doi.org/10.48550/arXiv.1908.11512 @@ -502,9 +425,7 @@ def fast_rp( NodeStateListF64: Mapping from nodes to embedding vectors. """ -def global_temporal_three_node_motif( - graph: GraphView, delta: int, threads: Optional[int] = None -) -> list[int]: +def global_temporal_three_node_motif(graph: GraphView, delta: int, threads: Optional[int] = None) -> list[int]: """ Computes the number of three edge, up-to-three node delta-temporal motifs in the graph, using the algorithm of Paranjape et al, Motifs in Temporal Networks (2017). We point the reader to this reference for more information on the algorithm and background, but provide a short summary below. @@ -553,9 +474,7 @@ def global_temporal_three_node_motif( """ -def global_temporal_three_node_motif_multi( - graph: GraphView, deltas: list[int], threads: Optional[int] = None -) -> list[list[int]]: +def global_temporal_three_node_motif_multi(graph: GraphView, deltas: list[int], threads: Optional[int] = None) -> list[list[int]]: """ Computes the global counts of three-edge up-to-three node temporal motifs for a range of timescales. See `global_temporal_three_node_motif` for an interpretation of each row returned. @@ -568,9 +487,7 @@ def global_temporal_three_node_motif_multi( list[list[int]]: A list of 40d arrays, each array is the motif count for a particular value of delta, returned in the order that the deltas were given as input. """ -def local_temporal_three_node_motifs( - graph: GraphView, delta: int, threads=None -) -> NodeStateMotifs: +def local_temporal_three_node_motifs(graph: GraphView, delta: int, threads=None) -> NodeStateMotifs: """ Computes the number of each type of motif that each node participates in. See global_temporal_three_node_motifs for a summary of the motifs involved. @@ -586,9 +503,7 @@ def local_temporal_three_node_motifs( the motif. For two node motifs, both constituent nodes count the motif. For triangles, all three constituent nodes count the motif. """ -def hits( - graph: GraphView, iter_count: int = 20, threads: Optional[int] = None -) -> NodeStateHits: +def hits(graph: GraphView, iter_count: int = 20, threads: Optional[int] = None) -> NodeStateHits: """ HITS (Hubs and Authority) Algorithm: @@ -607,9 +522,7 @@ def hits( NodeStateHits: A mapping from nodes their hub and authority scores """ -def balance( - graph: GraphView, name: str = "weight", direction: Direction = "both" -) -> NodeStateF64: +def balance(graph: GraphView, name: str = "weight", direction: Direction = "both") -> NodeStateF64: """ Sums the weights of edges in the graph based on the specified direction. @@ -628,9 +541,7 @@ def balance( """ -def label_propagation( - graph: GraphView, seed: Optional[bytes] = None -) -> list[set[Node]]: +def label_propagation(graph: GraphView, seed: Optional[bytes] = None) -> list[set[Node]]: """ Computes components using a label propagation algorithm @@ -643,9 +554,7 @@ def label_propagation( """ -def k_core( - graph: GraphView, k: int, iter_count: int, threads: Optional[int] = None -) -> list[Node]: +def k_core(graph: GraphView, k: int, iter_count: int, threads: Optional[int] = None) -> list[Node]: """ Determines which nodes are in the k-core for a given value of k @@ -660,15 +569,7 @@ def k_core( """ -def temporal_SEIR( - graph: GraphView, - seeds: int | float | list[NodeInput], - infection_prob: float, - initial_infection: int | str | datetime, - recovery_rate: float | None = None, - incubation_rate: float | None = None, - rng_seed: int | None = None, -) -> NodeStateSEIR: +def temporal_SEIR(graph: GraphView, seeds: int | float | list[NodeInput], infection_prob: float, initial_infection: int | str | datetime, recovery_rate: float | None = None, incubation_rate: float | None = None, rng_seed: int | None = None) -> NodeStateSEIR: """ Simulate an SEIR dynamic on the network @@ -698,12 +599,7 @@ def temporal_SEIR( """ -def louvain( - graph: GraphView, - resolution: float = 1.0, - weight_prop: str | None = None, - tol: None | float = None, -) -> NodeStateUsize: +def louvain(graph: GraphView, resolution: float = 1.0, weight_prop: str | None = None, tol: None | float = None) -> NodeStateUsize: """ Louvain algorithm for community detection @@ -717,14 +613,7 @@ def louvain( NodeStateUsize: Mapping of nodes to their community assignment """ -def fruchterman_reingold( - graph: GraphView, - iterations: int | None = 100, - scale: float | None = 1.0, - node_start_size: float | None = 1.0, - cooloff_factor: float | None = 0.95, - dt: float | None = 0.1, -) -> NodeLayout: +def fruchterman_reingold(graph: GraphView, iterations: int | None = 100, scale: float | None = 1.0, node_start_size: float | None = 1.0, cooloff_factor: float | None = 0.95, dt: float | None = 0.1) -> NodeLayout: """ Fruchterman Reingold layout algorithm @@ -740,14 +629,7 @@ def fruchterman_reingold( NodeLayout: A mapping from nodes to their [x, y] positions """ -def cohesive_fruchterman_reingold( - graph: GraphView, - iter_count: int = 100, - scale: float = 1.0, - node_start_size: float = 1.0, - cooloff_factor: float = 0.95, - dt: float = 0.1, -) -> NodeLayout: +def cohesive_fruchterman_reingold(graph: GraphView, iter_count: int = 100, scale: float = 1.0, node_start_size: float = 1.0, cooloff_factor: float = 0.95, dt: float = 0.1) -> NodeLayout: """ Cohesive version of `fruchterman_reingold` that adds virtual edges between isolated nodes Arguments: @@ -763,12 +645,7 @@ def cohesive_fruchterman_reingold( """ -def max_weight_matching( - graph: GraphView, - weight_prop: Optional[str] = None, - max_cardinality: bool = True, - verify_optimum_flag: bool = False, -) -> Matching: +def max_weight_matching(graph: GraphView, weight_prop: Optional[str] = None, max_cardinality: bool = True, verify_optimum_flag: bool = False) -> Matching: """ Compute a maximum-weighted matching in the general undirected weighted graph given by "edges". If `max_cardinality` is true, only @@ -805,7 +682,7 @@ def max_weight_matching( Matching: The matching """ -class Matching(object): +class Matching(object): """A Matching (i.e., a set of edges that do not share any nodes)""" def __bool__(self): @@ -877,7 +754,8 @@ class Matching(object): """ -class Infected(object): +class Infected(object): + def __repr__(self): """Return repr(self).""" diff --git a/python/python/raphtory/filter/__init__.pyi b/python/python/raphtory/filter/__init__.pyi index 5f33a18fcb..36d732c413 100644 --- a/python/python/raphtory/filter/__init__.pyi +++ b/python/python/raphtory/filter/__init__.pyi @@ -23,20 +23,9 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = [ - "FilterExpr", - "PropertyFilterOps", - "NodeFilterBuilder", - "Node", - "EdgeFilterOp", - "EdgeEndpoint", - "Edge", - "Property", - "Metadata", - "TemporalPropertyFilterBuilder", -] - -class FilterExpr(object): +__all__ = ['FilterExpr', 'PropertyFilterOps', 'NodeFilterBuilder', 'Node', 'EdgeFilterOp', 'EdgeEndpoint', 'Edge', 'Property', 'Metadata', 'TemporalPropertyFilterBuilder'] +class FilterExpr(object): + def __and__(self, value): """Return self&value.""" @@ -52,7 +41,8 @@ class FilterExpr(object): def __ror__(self, value): """Return value|self.""" -class PropertyFilterOps(object): +class PropertyFilterOps(object): + def __eq__(self, value): """Return self==value.""" @@ -74,7 +64,7 @@ class PropertyFilterOps(object): def contains(self, value) -> filter.FilterExpr: """ Returns a filter expression that checks if this object contains a specified property. - + Arguments: PropValue: @@ -82,9 +72,7 @@ class PropertyFilterOps(object): filter.FilterExpr: """ - def fuzzy_search( - self, prop_value: str, levenshtein_distance: int, prefix_match: bool - ) -> filter.FilterExpr: + def fuzzy_search(self, prop_value: str, levenshtein_distance: int, prefix_match: bool) -> filter.FilterExpr: """ Returns a filter expression that checks if the specified properties approximately match the specified string. @@ -94,7 +82,7 @@ class PropertyFilterOps(object): prop_value (str): Property to match against. levenshtein_distance (int): Maximum levenshtein distance between the specified prop_value and the result. prefix_match (bool): Enable prefix matching. - + Returns: filter.FilterExpr: """ @@ -102,7 +90,7 @@ class PropertyFilterOps(object): def is_in(self, values: list[PropValue]) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is in a specified iterable of properties. - + Arguments: values (list[PropValue]): @@ -113,7 +101,7 @@ class PropertyFilterOps(object): def is_none(self) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is none. - + Returns: filter.FilterExpr: """ @@ -121,7 +109,7 @@ class PropertyFilterOps(object): def is_not_in(self, values: list[PropValue]) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is not in a specified iterable of properties. - + Arguments: values (list[PropValue]): @@ -132,7 +120,7 @@ class PropertyFilterOps(object): def is_some(self) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is some. - + Returns: filter.FilterExpr: """ @@ -140,7 +128,7 @@ class PropertyFilterOps(object): def not_contains(self, value) -> filter.FilterExpr: """ Returns a filter expression that checks if this object does not contain a specified property. - + Arguments: PropValue: @@ -148,7 +136,7 @@ class PropertyFilterOps(object): filter.FilterExpr: """ -class NodeFilterBuilder(object): +class NodeFilterBuilder(object): """ A builder for constructing node filters @@ -184,9 +172,7 @@ class NodeFilterBuilder(object): filter.FilterExpr: """ - def fuzzy_search( - self, value, levenshtein_distance: int, prefix_match: bool - ) -> filter.FilterExpr: + def fuzzy_search(self, value, levenshtein_distance: int, prefix_match: bool) -> filter.FilterExpr: """ Returns a filter expression that checks if the specified properties approximately match the specified string. @@ -227,7 +213,7 @@ class NodeFilterBuilder(object): """ Returns a filter expression that checks if the specified iterable of strings does not contain a given value. - + Arguments: value (str): @@ -235,7 +221,8 @@ class NodeFilterBuilder(object): filter.FilterExpr: """ -class Node(object): +class Node(object): + @staticmethod def name(): """ @@ -254,7 +241,8 @@ class Node(object): NodeFilterBuilder: A filter builder for filtering by node type """ -class EdgeFilterOp(object): +class EdgeFilterOp(object): + def __eq__(self, value): """Return self==value.""" @@ -276,7 +264,7 @@ class EdgeFilterOp(object): def contains(self, value: str) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value contains the specified string. - + Arguments: value (str): @@ -284,9 +272,7 @@ class EdgeFilterOp(object): filter.FilterExpr: """ - def fuzzy_search( - self, value, levenshtein_distance: int, prefix_match: bool - ) -> filter.FilterExpr: + def fuzzy_search(self, value, levenshtein_distance: int, prefix_match: bool) -> filter.FilterExpr: """ Returns a filter expression that checks if the specified properties approximately match the specified string. @@ -296,7 +282,7 @@ class EdgeFilterOp(object): prop_value (str): Property to match against. levenshtein_distance (int): Maximum levenshtein distance between the specified prop_value and the result. prefix_match (bool): Enable prefix matching. - + Returns: filter.FilterExpr: """ @@ -304,7 +290,7 @@ class EdgeFilterOp(object): def is_in(self, values: list[str]) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is contained within the specified iterable of strings. - + Arguments: values (list[str]): @@ -315,7 +301,7 @@ class EdgeFilterOp(object): def is_not_in(self, values: list[str]) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value is not contained within the provided iterable of strings. - + Arguments: values (list[str]): @@ -326,7 +312,7 @@ class EdgeFilterOp(object): def not_contains(self, value: str) -> filter.FilterExpr: """ Returns a filter expression that checks if a given value does not contain the specified string. - + Arguments: value (str): @@ -334,16 +320,22 @@ class EdgeFilterOp(object): filter.FilterExpr: """ -class EdgeEndpoint(object): - def name(self): ... +class EdgeEndpoint(object): + + def name(self): + ... + +class Edge(object): -class Edge(object): @staticmethod - def dst(): ... + def dst(): + ... + @staticmethod - def src(): ... + def src(): + ... -class Property(PropertyFilterOps): +class Property(PropertyFilterOps): """ Construct a property filter @@ -354,9 +346,10 @@ class Property(PropertyFilterOps): def __new__(cls, name: str) -> Property: """Create and return a new object. See help(type) for accurate signature.""" - def temporal(self): ... + def temporal(self): + ... -class Metadata(PropertyFilterOps): +class Metadata(PropertyFilterOps): """ Construct a metadata filter @@ -367,6 +360,10 @@ class Metadata(PropertyFilterOps): def __new__(cls, name: str) -> Metadata: """Create and return a new object. See help(type) for accurate signature.""" -class TemporalPropertyFilterBuilder(object): - def any(self): ... - def latest(self): ... +class TemporalPropertyFilterBuilder(object): + + def any(self): + ... + + def latest(self): + ... diff --git a/python/python/raphtory/graph_gen/__init__.pyi b/python/python/raphtory/graph_gen/__init__.pyi index 3a9f849f05..3ec394b85c 100644 --- a/python/python/raphtory/graph_gen/__init__.pyi +++ b/python/python/raphtory/graph_gen/__init__.pyi @@ -1,7 +1,6 @@ """ Generate Raphtory graphs from attachment models """ - from __future__ import annotations ############################################################################### @@ -28,8 +27,7 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ["random_attachment", "ba_preferential_attachment"] - +__all__ = ['random_attachment', 'ba_preferential_attachment'] def random_attachment(g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any = None): """ Generates a graph using the random attachment model @@ -48,9 +46,7 @@ def random_attachment(g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any None """ -def ba_preferential_attachment( - g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any = None -): +def ba_preferential_attachment(g: Any, nodes_to_add: Any, edges_per_step: Any, seed: Any = None): """ Generates a graph using the preferential attachment model. diff --git a/python/python/raphtory/graph_loader/__init__.pyi b/python/python/raphtory/graph_loader/__init__.pyi index e0b31f720f..10ba033c37 100644 --- a/python/python/raphtory/graph_loader/__init__.pyi +++ b/python/python/raphtory/graph_loader/__init__.pyi @@ -1,7 +1,6 @@ """ Load and save Raphtory graphs from/to file(s) """ - from __future__ import annotations ############################################################################### @@ -28,16 +27,7 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = [ - "lotr_graph", - "lotr_graph_with_props", - "neo4j_movie_graph", - "stable_coin_graph", - "reddit_hyperlink_graph", - "reddit_hyperlink_graph_local", - "karate_club_graph", -] - +__all__ = ['lotr_graph', 'lotr_graph_with_props', 'neo4j_movie_graph', 'stable_coin_graph', 'reddit_hyperlink_graph', 'reddit_hyperlink_graph_local', 'karate_club_graph'] def lotr_graph() -> Graph: """ Load the Lord of the Rings dataset into a graph. @@ -66,9 +56,7 @@ def lotr_graph_with_props() -> Graph: Graph: """ -def neo4j_movie_graph( - uri: str, username: str, password: str, database: str = ... -) -> Graph: +def neo4j_movie_graph(uri: str, username: str, password: str, database: str = ...) -> Graph: """ Returns the neo4j movie graph example. diff --git a/python/python/raphtory/graphql/__init__.pyi b/python/python/raphtory/graphql/__init__.pyi index 4cd4d5c51d..b8315a8395 100644 --- a/python/python/raphtory/graphql/__init__.pyi +++ b/python/python/raphtory/graphql/__init__.pyi @@ -23,26 +23,8 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = [ - "GraphServer", - "RunningGraphServer", - "RaphtoryClient", - "RemoteGraph", - "RemoteEdge", - "RemoteNode", - "RemoteNodeAddition", - "RemoteUpdate", - "RemoteEdgeAddition", - "RemoteIndexSpec", - "PropsInput", - "SomePropertySpec", - "AllPropertySpec", - "encode_graph", - "decode_graph", - "schema", -] - -class GraphServer(object): +__all__ = ['GraphServer', 'RunningGraphServer', 'RaphtoryClient', 'RemoteGraph', 'RemoteEdge', 'RemoteNode', 'RemoteNodeAddition', 'RemoteUpdate', 'RemoteEdgeAddition', 'RemoteIndexSpec', 'PropsInput', 'SomePropertySpec', 'AllPropertySpec', 'encode_graph', 'decode_graph', 'schema'] +class GraphServer(object): """ A class for defining and running a Raphtory GraphQL server @@ -61,21 +43,7 @@ class GraphServer(object): create_index: """ - def __new__( - cls, - work_dir: str | PathLike, - cache_capacity: Optional[int] = None, - cache_tti_seconds: Optional[int] = None, - log_level: Optional[str] = None, - tracing: Optional[bool] = None, - otlp_agent_host: Optional[str] = None, - otlp_agent_port: Optional[str] = None, - otlp_tracing_service_name: Optional[str] = None, - auth_public_key: Any = None, - auth_enabled_for_reads: Any = None, - config_path: Optional[str | PathLike] = None, - create_index: Any = None, - ) -> GraphServer: + def __new__(cls, work_dir: str | PathLike, cache_capacity: Optional[int] = None, cache_tti_seconds: Optional[int] = None, log_level: Optional[str] = None, tracing: Optional[bool] = None, otlp_agent_host: Optional[str] = None, otlp_agent_port: Optional[str] = None, otlp_tracing_service_name: Optional[str] = None, auth_public_key: Any = None, auth_enabled_for_reads: Any = None, config_path: Optional[str | PathLike] = None, create_index: Any = None) -> GraphServer: """Create and return a new object. See help(type) for accurate signature.""" def run(self, port: int = 1736, timeout_ms: int = 180000) -> None: @@ -90,13 +58,7 @@ class GraphServer(object): None: """ - def set_embeddings( - self, - cache: str, - embedding: Optional[Callable] = None, - nodes: bool | str = True, - edges: bool | str = True, - ) -> GraphServer: + def set_embeddings(self, cache: str, embedding: Optional[Callable] = None, nodes: bool | str = True, edges: bool | str = True) -> GraphServer: """ Setup the server to vectorise graphs with a default template. @@ -132,9 +94,7 @@ class GraphServer(object): GraphServer: The server with indexing disabled """ - def with_vectorised_graphs( - self, graph_names: list[str], nodes: bool | str = True, edges: bool | str = True - ) -> GraphServer: + def with_vectorised_graphs(self, graph_names: list[str], nodes: bool | str = True, edges: bool | str = True) -> GraphServer: """ Vectorise a subset of the graphs of the server. @@ -147,11 +107,15 @@ class GraphServer(object): GraphServer: A new server object containing the vectorised graphs. """ -class RunningGraphServer(object): +class RunningGraphServer(object): """A Raphtory server handler that also enables querying the server""" - def __enter__(self): ... - def __exit__(self, _exc_type, _exc_val, _exc_tb): ... + def __enter__(self): + ... + + def __exit__(self, _exc_type, _exc_val, _exc_tb): + ... + def get_client(self): """ Get the client for the server @@ -168,7 +132,7 @@ class RunningGraphServer(object): None: """ -class RaphtoryClient(object): +class RaphtoryClient(object): """ A client for handling GraphQL operations in the context of Raphtory. @@ -250,9 +214,7 @@ class RaphtoryClient(object): """ - def query( - self, query: str, variables: Optional[dict[str, Any]] = None - ) -> dict[str, Any]: + def query(self, query: str, variables: Optional[dict[str, Any]] = None) -> dict[str, Any]: """ Make a GraphQL query against the server. @@ -290,9 +252,7 @@ class RaphtoryClient(object): """ - def send_graph( - self, path: str, graph: Graph | PersistentGraph, overwrite: bool = False - ) -> dict[str, Any]: + def send_graph(self, path: str, graph: Graph | PersistentGraph, overwrite: bool = False) -> dict[str, Any]: """ Send a graph to the server @@ -305,9 +265,7 @@ class RaphtoryClient(object): dict[str, Any]: The data field from the graphQL response after executing the mutation. """ - def upload_graph( - self, path: str, file_path: str, overwrite: bool = False - ) -> dict[str, Any]: + def upload_graph(self, path: str, file_path: str, overwrite: bool = False) -> dict[str, Any]: """ Upload graph file from a path file_path on the client @@ -320,15 +278,9 @@ class RaphtoryClient(object): dict[str, Any]: The data field from the graphQL response after executing the mutation. """ -class RemoteGraph(object): - def add_edge( - self, - timestamp: int | str | datetime, - src: str | int, - dst: str | int, - properties: Optional[dict] = None, - layer: Optional[str] = None, - ) -> RemoteEdge: +class RemoteGraph(object): + + def add_edge(self, timestamp: int | str | datetime, src: str | int, dst: str | int, properties: Optional[dict] = None, layer: Optional[str] = None) -> RemoteEdge: """ Adds a new edge with the given source and destination nodes and properties to the remote graph. @@ -365,13 +317,7 @@ class RemoteGraph(object): None: """ - def add_node( - self, - timestamp: int | str | datetime, - id: str | int, - properties: Optional[dict] = None, - node_type: Optional[str] = None, - ) -> RemoteNode: + def add_node(self, timestamp: int | str | datetime, id: str | int, properties: Optional[dict] = None, node_type: Optional[str] = None) -> RemoteNode: """ Adds a new node with the given id and properties to the remote graph. @@ -408,13 +354,7 @@ class RemoteGraph(object): None: """ - def create_node( - self, - timestamp: int | str | datetime, - id: str | int, - properties: Optional[dict] = None, - node_type: Optional[str] = None, - ) -> RemoteNode: + def create_node(self, timestamp: int | str | datetime, id: str | int, properties: Optional[dict] = None, node_type: Optional[str] = None) -> RemoteNode: """ Create a new node with the given id and properties to the remote graph and fail if the node already exists. @@ -428,13 +368,7 @@ class RemoteGraph(object): RemoteNode: the new remote node """ - def delete_edge( - self, - timestamp: int, - src: str | int, - dst: str | int, - layer: Optional[str] = None, - ) -> RemoteEdge: + def delete_edge(self, timestamp: int, src: str | int, dst: str | int, layer: Optional[str] = None) -> RemoteEdge: """ Deletes an edge in the remote graph, given the timestamp, src and dst nodes and layer (optional) @@ -482,7 +416,7 @@ class RemoteGraph(object): None: """ -class RemoteEdge(object): +class RemoteEdge(object): """ A remote edge reference @@ -491,9 +425,7 @@ class RemoteEdge(object): and [RemoteGraph.delete_edge][raphtory.graphql.RemoteGraph.delete_edge]. """ - def add_metadata( - self, properties: dict[str, PropValue], layer: Optional[str] = None - ) -> None: + def add_metadata(self, properties: dict[str, PropValue], layer: Optional[str] = None) -> None: """ Add metadata to the edge within the remote graph. This function is used to add metadata to an edge that does not @@ -507,12 +439,7 @@ class RemoteEdge(object): None: """ - def add_updates( - self, - t: int | str | datetime, - properties: Optional[dict[str, PropValue]] = None, - layer: Optional[str] = None, - ) -> None: + def add_updates(self, t: int | str | datetime, properties: Optional[dict[str, PropValue]] = None, layer: Optional[str] = None) -> None: """ Add updates to an edge in the remote graph at a specified time. @@ -543,9 +470,7 @@ class RemoteEdge(object): GraphError: If the operation fails. """ - def update_metadata( - self, properties: dict[str, PropValue], layer: Optional[str] = None - ) -> None: + def update_metadata(self, properties: dict[str, PropValue], layer: Optional[str] = None) -> None: """ Update metadata of an edge in the remote graph overwriting existing values. This function is used to add properties to an edge that does not @@ -559,7 +484,8 @@ class RemoteEdge(object): None: """ -class RemoteNode(object): +class RemoteNode(object): + def add_metadata(self, properties: dict[str, PropValue]) -> None: """ Add metadata to a node in the remote graph. @@ -573,9 +499,7 @@ class RemoteNode(object): None: """ - def add_updates( - self, t: int | str | datetime, properties: Optional[dict[str, PropValue]] = None - ) -> None: + def add_updates(self, t: int | str | datetime, properties: Optional[dict[str, PropValue]] = None) -> None: """ Add updates to a node in the remote graph at a specified time. This function allows for the addition of property updates to a node within the graph. The updates are time-stamped, meaning they are applied at the specified time. @@ -613,7 +537,7 @@ class RemoteNode(object): None: """ -class RemoteNodeAddition(object): +class RemoteNodeAddition(object): """ Node addition update @@ -624,16 +548,10 @@ class RemoteNodeAddition(object): updates (list[RemoteUpdate], optional): the temporal updates """ - def __new__( - cls, - name: GID, - node_type: Optional[str] = None, - metadata: Optional[PropInput] = None, - updates: Optional[list[RemoteUpdate]] = None, - ) -> RemoteNodeAddition: + def __new__(cls, name: GID, node_type: Optional[str] = None, metadata: Optional[PropInput] = None, updates: Optional[list[RemoteUpdate]] = None) -> RemoteNodeAddition: """Create and return a new object. See help(type) for accurate signature.""" -class RemoteUpdate(object): +class RemoteUpdate(object): """ A temporal update @@ -642,12 +560,10 @@ class RemoteUpdate(object): properties (PropInput, optional): the properties for the update """ - def __new__( - cls, time: TimeInput, properties: Optional[PropInput] = None - ) -> RemoteUpdate: + def __new__(cls, time: TimeInput, properties: Optional[PropInput] = None) -> RemoteUpdate: """Create and return a new object. See help(type) for accurate signature.""" -class RemoteEdgeAddition(object): +class RemoteEdgeAddition(object): """ An edge update @@ -659,17 +575,10 @@ class RemoteEdgeAddition(object): updates (list[RemoteUpdate], optional): the temporal updates for the edge """ - def __new__( - cls, - src: GID, - dst: GID, - layer: Optional[str] = None, - metadata: Optional[PropInput] = None, - updates: Optional[list[RemoteUpdate]] = None, - ) -> RemoteEdgeAddition: + def __new__(cls, src: GID, dst: GID, layer: Optional[str] = None, metadata: Optional[PropInput] = None, updates: Optional[list[RemoteUpdate]] = None) -> RemoteEdgeAddition: """Create and return a new object. See help(type) for accurate signature.""" -class RemoteIndexSpec(object): +class RemoteIndexSpec(object): """ Create a RemoteIndexSpec specifying which node and edge properties to index. @@ -681,7 +590,7 @@ class RemoteIndexSpec(object): def __new__(cls, node_props: PropsInput, edge_props: PropsInput) -> RemoteIndexSpec: """Create and return a new object. See help(type) for accurate signature.""" -class PropsInput(object): +class PropsInput(object): """ Create a PropsInput by choosing to include all/some properties explicitly. @@ -693,14 +602,10 @@ class PropsInput(object): ValueError: If neither all and some are specified. """ - def __new__( - cls, - all: Optional[AllPropertySpec] = None, - some: Optional[SomePropertySpec] = None, - ) -> PropsInput: + def __new__(cls, all: Optional[AllPropertySpec] = None, some: Optional[SomePropertySpec] = None) -> PropsInput: """Create and return a new object. See help(type) for accurate signature.""" -class SomePropertySpec(object): +class SomePropertySpec(object): """ Create a SomePropertySpec by explicitly listing metadata and/or temporal property names. @@ -709,12 +614,10 @@ class SomePropertySpec(object): properties (list[str]): Temporal property names. Defaults to []. """ - def __new__( - cls, metadata: list[str] = [], properties: list[str] = [] - ) -> SomePropertySpec: + def __new__(cls, metadata: list[str] = [], properties: list[str] = []) -> SomePropertySpec: """Create and return a new object. See help(type) for accurate signature.""" -class AllPropertySpec(object): +class AllPropertySpec(object): """ Specifies that **all** properties should be included when creating an index. Use one of the predefined variants: ALL , ALL_METADATA , or ALL_TEMPORAL . diff --git a/python/python/raphtory/iterables/__init__.pyi b/python/python/raphtory/iterables/__init__.pyi index 2a80bbc5cb..ec2c4d6ee9 100644 --- a/python/python/raphtory/iterables/__init__.pyi +++ b/python/python/raphtory/iterables/__init__.pyi @@ -23,33 +23,9 @@ from os import PathLike import networkx as nx # type: ignore import pyvis # type: ignore -__all__ = [ - "NestedUtcDateTimeIterable", - "NestedGIDIterable", - "GIDIterable", - "StringIterable", - "OptionArcStringIterable", - "UsizeIterable", - "OptionI64Iterable", - "NestedOptionArcStringIterable", - "NestedStringIterable", - "NestedOptionI64Iterable", - "NestedI64VecIterable", - "NestedUsizeIterable", - "BoolIterable", - "ArcStringIterable", - "NestedVecUtcDateTimeIterable", - "OptionVecUtcDateTimeIterable", - "GIDGIDIterable", - "NestedGIDGIDIterable", - "NestedBoolIterable", - "U64Iterable", - "OptionUtcDateTimeIterable", - "ArcStringVecIterable", - "NestedArcStringVecIterable", -] - -class NestedUtcDateTimeIterable(object): +__all__ = ['NestedUtcDateTimeIterable', 'NestedGIDIterable', 'GIDIterable', 'StringIterable', 'OptionArcStringIterable', 'UsizeIterable', 'OptionI64Iterable', 'NestedOptionArcStringIterable', 'NestedStringIterable', 'NestedOptionI64Iterable', 'NestedI64VecIterable', 'NestedUsizeIterable', 'BoolIterable', 'ArcStringIterable', 'NestedVecUtcDateTimeIterable', 'OptionVecUtcDateTimeIterable', 'GIDGIDIterable', 'NestedGIDGIDIterable', 'NestedBoolIterable', 'U64Iterable', 'OptionUtcDateTimeIterable', 'ArcStringVecIterable', 'NestedArcStringVecIterable'] +class NestedUtcDateTimeIterable(object): + def __eq__(self, value): """Return self==value.""" @@ -77,9 +53,11 @@ class NestedUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedGIDIterable(object): -class NestedGIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -107,11 +85,17 @@ class NestedGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self): + ... + + def max(self): + ... + + def min(self): + ... + +class GIDIterable(object): -class GIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -139,11 +123,17 @@ class GIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self): + ... + + def max(self): + ... + + def min(self): + ... + +class StringIterable(object): -class StringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -171,9 +161,11 @@ class StringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class OptionArcStringIterable(object): -class OptionArcStringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -201,9 +193,11 @@ class OptionArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class UsizeIterable(object): -class UsizeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -231,13 +225,23 @@ class UsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self): + ... + + def max(self): + ... + + def mean(self): + ... + + def min(self): + ... + + def sum(self): + ... + +class OptionI64Iterable(object): -class OptionI64Iterable(object): def __eq__(self, value): """Return self==value.""" @@ -265,11 +269,17 @@ class OptionI64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self): + ... + + def max(self): + ... + + def min(self): + ... + +class NestedOptionArcStringIterable(object): -class NestedOptionArcStringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -297,9 +307,11 @@ class NestedOptionArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedStringIterable(object): -class NestedStringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -327,9 +339,11 @@ class NestedStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedOptionI64Iterable(object): -class NestedOptionI64Iterable(object): def __eq__(self, value): """Return self==value.""" @@ -357,11 +371,17 @@ class NestedOptionI64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self): + ... + + def max(self): + ... + + def min(self): + ... + +class NestedI64VecIterable(object): -class NestedI64VecIterable(object): def __eq__(self, value): """Return self==value.""" @@ -389,9 +409,11 @@ class NestedI64VecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedUsizeIterable(object): -class NestedUsizeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -419,13 +441,23 @@ class NestedUsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self): + ... + + def max(self): + ... + + def mean(self): + ... + + def min(self): + ... + + def sum(self): + ... + +class BoolIterable(object): -class BoolIterable(object): def __eq__(self, value): """Return self==value.""" @@ -453,9 +485,11 @@ class BoolIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class ArcStringIterable(object): -class ArcStringIterable(object): def __iter__(self): """Implement iter(self).""" @@ -465,9 +499,11 @@ class ArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedVecUtcDateTimeIterable(object): -class NestedVecUtcDateTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -495,9 +531,11 @@ class NestedVecUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class OptionVecUtcDateTimeIterable(object): -class OptionVecUtcDateTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -525,9 +563,11 @@ class OptionVecUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class GIDGIDIterable(object): -class GIDGIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -555,11 +595,17 @@ class GIDGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self): + ... + + def max(self): + ... + + def min(self): + ... + +class NestedGIDGIDIterable(object): -class NestedGIDGIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -587,11 +633,17 @@ class NestedGIDGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self): + ... + + def max(self): + ... + + def min(self): + ... + +class NestedBoolIterable(object): -class NestedBoolIterable(object): def __eq__(self, value): """Return self==value.""" @@ -619,9 +671,11 @@ class NestedBoolIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class U64Iterable(object): -class U64Iterable(object): def __eq__(self, value): """Return self==value.""" @@ -649,13 +703,23 @@ class U64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self): + ... + + def max(self): + ... + + def mean(self): + ... + + def min(self): + ... + + def sum(self): + ... + +class OptionUtcDateTimeIterable(object): -class OptionUtcDateTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -683,9 +747,11 @@ class OptionUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class ArcStringVecIterable(object): -class ArcStringVecIterable(object): def __eq__(self, value): """Return self==value.""" @@ -713,9 +779,11 @@ class ArcStringVecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedArcStringVecIterable(object): -class NestedArcStringVecIterable(object): def __eq__(self, value): """Return self==value.""" @@ -743,4 +811,5 @@ class NestedArcStringVecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... diff --git a/python/python/raphtory/node_state/__init__.pyi b/python/python/raphtory/node_state/__init__.pyi index 456f7240dd..469a550b2e 100644 --- a/python/python/raphtory/node_state/__init__.pyi +++ b/python/python/raphtory/node_state/__init__.pyi @@ -23,42 +23,9 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = [ - "NodeGroups", - "DegreeView", - "NodeStateUsize", - "NodeStateU64", - "NodeStateOptionI64", - "IdView", - "NodeStateGID", - "EarliestTimeView", - "LatestTimeView", - "NameView", - "NodeStateString", - "EarliestDateTimeView", - "LatestDateTimeView", - "NodeStateOptionDateTime", - "HistoryView", - "EdgeHistoryCountView", - "NodeStateListI64", - "HistoryDateTimeView", - "NodeStateOptionListDateTime", - "NodeTypeView", - "NodeStateOptionStr", - "NodeStateListDateTime", - "NodeStateWeightedSP", - "NodeStateF64", - "NodeStateNodes", - "NodeStateReachability", - "NodeStateListF64", - "NodeStateMotifs", - "NodeStateHits", - "NodeStateSEIR", - "NodeLayout", - "NodeStateF64String", -] - -class NodeGroups(object): +__all__ = ['NodeGroups', 'DegreeView', 'NodeStateUsize', 'NodeStateU64', 'NodeStateOptionI64', 'IdView', 'NodeStateGID', 'EarliestTimeView', 'LatestTimeView', 'NameView', 'NodeStateString', 'EarliestDateTimeView', 'LatestDateTimeView', 'NodeStateOptionDateTime', 'HistoryView', 'EdgeHistoryCountView', 'NodeStateListI64', 'HistoryDateTimeView', 'NodeStateOptionListDateTime', 'NodeTypeView', 'NodeStateOptionStr', 'NodeStateListDateTime', 'NodeStateWeightedSP', 'NodeStateF64', 'NodeStateNodes', 'NodeStateReachability', 'NodeStateListF64', 'NodeStateMotifs', 'NodeStateHits', 'NodeStateSEIR', 'NodeLayout', 'NodeStateF64String'] +class NodeGroups(object): + def __bool__(self): """True if self else False""" @@ -101,7 +68,7 @@ class NodeGroups(object): Iterator[Tuple[Any, GraphView]]: Iterator over subgraphs with corresponding value """ -class DegreeView(object): +class DegreeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -263,9 +230,7 @@ class DegreeView(object): DegreeView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -420,12 +385,7 @@ class DegreeView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -614,7 +574,8 @@ class DegreeView(object): Optional[int]: """ -class NodeStateUsize(object): +class NodeStateUsize(object): + def __eq__(self, value): """Return self==value.""" @@ -807,7 +768,8 @@ class NodeStateUsize(object): Iterator[int]: Iterator over values """ -class NodeStateU64(object): +class NodeStateU64(object): + def __eq__(self, value): """Return self==value.""" @@ -992,7 +954,8 @@ class NodeStateU64(object): Iterator[int]: Iterator over values """ -class NodeStateOptionI64(object): +class NodeStateOptionI64(object): + def __eq__(self, value): """Return self==value.""" @@ -1034,9 +997,7 @@ class NodeStateOptionI64(object): NodeStateOptionI64: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[Optional[int]] = None - ) -> Optional[Optional[int]]: + def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: """ Get value for node @@ -1170,7 +1131,7 @@ class NodeStateOptionI64(object): Iterator[Optional[int]]: Iterator over values """ -class IdView(object): +class IdView(object): """A lazy view over node values""" def __eq__(self, value): @@ -1356,7 +1317,8 @@ class IdView(object): Iterator[GID]: Iterator over values """ -class NodeStateGID(object): +class NodeStateGID(object): + def __eq__(self, value): """Return self==value.""" @@ -1524,7 +1486,7 @@ class NodeStateGID(object): Iterator[GID]: Iterator over values """ -class EarliestTimeView(object): +class EarliestTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -1686,9 +1648,7 @@ class EarliestTimeView(object): EarliestTimeView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -1707,9 +1667,7 @@ class EarliestTimeView(object): WindowSet: A `WindowSet` object. """ - def get( - self, node: NodeInput, default: Optional[Optional[int]] = None - ) -> Optional[Optional[int]]: + def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: """ Get value for node @@ -1837,12 +1795,7 @@ class EarliestTimeView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -2022,7 +1975,7 @@ class EarliestTimeView(object): Optional[int]: """ -class LatestTimeView(object): +class LatestTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -2184,9 +2137,7 @@ class LatestTimeView(object): LatestTimeView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2205,9 +2156,7 @@ class LatestTimeView(object): WindowSet: A `WindowSet` object. """ - def get( - self, node: NodeInput, default: Optional[Optional[int]] = None - ) -> Optional[Optional[int]]: + def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: """ Get value for node @@ -2335,12 +2284,7 @@ class LatestTimeView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -2520,7 +2464,7 @@ class LatestTimeView(object): Optional[int]: """ -class NameView(object): +class NameView(object): """A lazy view over node values""" def __eq__(self, value): @@ -2714,7 +2658,8 @@ class NameView(object): Iterator[str]: Iterator over values """ -class NodeStateString(object): +class NodeStateString(object): + def __eq__(self, value): """Return self==value.""" @@ -2890,7 +2835,7 @@ class NodeStateString(object): Iterator[str]: Iterator over values """ -class EarliestDateTimeView(object): +class EarliestDateTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -3052,9 +2997,7 @@ class EarliestDateTimeView(object): EarliestDateTimeView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3073,9 +3016,7 @@ class EarliestDateTimeView(object): WindowSet: A `WindowSet` object. """ - def get( - self, node: NodeInput, default: Optional[Optional[datetime]] = None - ) -> Optional[Optional[datetime]]: + def get(self, node: NodeInput, default: Optional[Optional[datetime]] = None) -> Optional[Optional[datetime]]: """ Get value for node @@ -3203,12 +3144,7 @@ class EarliestDateTimeView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3388,7 +3324,7 @@ class EarliestDateTimeView(object): Optional[int]: """ -class LatestDateTimeView(object): +class LatestDateTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -3550,9 +3486,7 @@ class LatestDateTimeView(object): LatestDateTimeView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3571,9 +3505,7 @@ class LatestDateTimeView(object): WindowSet: A `WindowSet` object. """ - def get( - self, node: NodeInput, default: Optional[Optional[datetime]] = None - ) -> Optional[Optional[datetime]]: + def get(self, node: NodeInput, default: Optional[Optional[datetime]] = None) -> Optional[Optional[datetime]]: """ Get value for node @@ -3701,12 +3633,7 @@ class LatestDateTimeView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3886,7 +3813,8 @@ class LatestDateTimeView(object): Optional[int]: """ -class NodeStateOptionDateTime(object): +class NodeStateOptionDateTime(object): + def __eq__(self, value): """Return self==value.""" @@ -3928,9 +3856,7 @@ class NodeStateOptionDateTime(object): NodeStateOptionDateTime: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[Optional[datetime]] = None - ) -> Optional[Optional[datetime]]: + def get(self, node: NodeInput, default: Optional[Optional[datetime]] = None) -> Optional[Optional[datetime]]: """ Get value for node @@ -4064,7 +3990,7 @@ class NodeStateOptionDateTime(object): Iterator[Optional[datetime]]: Iterator over values """ -class HistoryView(object): +class HistoryView(object): """A lazy view over node values""" def __eq__(self, value): @@ -4226,9 +4152,7 @@ class HistoryView(object): HistoryView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4247,9 +4171,7 @@ class HistoryView(object): WindowSet: A `WindowSet` object. """ - def get( - self, node: NodeInput, default: Optional[list[int]] = None - ) -> Optional[list[int]]: + def get(self, node: NodeInput, default: Optional[list[int]] = None) -> Optional[list[int]]: """ Get value for node @@ -4369,12 +4291,7 @@ class HistoryView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -4554,7 +4471,7 @@ class HistoryView(object): Optional[int]: """ -class EdgeHistoryCountView(object): +class EdgeHistoryCountView(object): """A lazy view over node values""" def __eq__(self, value): @@ -4716,9 +4633,7 @@ class EdgeHistoryCountView(object): EdgeHistoryCountView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4865,12 +4780,7 @@ class EdgeHistoryCountView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -5059,7 +4969,8 @@ class EdgeHistoryCountView(object): Optional[int]: """ -class NodeStateListI64(object): +class NodeStateListI64(object): + def __eq__(self, value): """Return self==value.""" @@ -5101,9 +5012,7 @@ class NodeStateListI64(object): NodeStateListI64: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[list[int]] = None - ) -> Optional[list[int]]: + def get(self, node: NodeInput, default: Optional[list[int]] = None) -> Optional[list[int]]: """ Get value for node @@ -5229,7 +5138,7 @@ class NodeStateListI64(object): Iterator[list[int]]: Iterator over values """ -class HistoryDateTimeView(object): +class HistoryDateTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -5391,9 +5300,7 @@ class HistoryDateTimeView(object): HistoryDateTimeView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -5412,9 +5319,7 @@ class HistoryDateTimeView(object): WindowSet: A `WindowSet` object. """ - def get( - self, node: NodeInput, default: Optional[Optional[list[datetime]]] = None - ) -> Optional[Optional[list[datetime]]]: + def get(self, node: NodeInput, default: Optional[Optional[list[datetime]]] = None) -> Optional[Optional[list[datetime]]]: """ Get value for node @@ -5534,12 +5439,7 @@ class HistoryDateTimeView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -5719,7 +5619,8 @@ class HistoryDateTimeView(object): Optional[int]: """ -class NodeStateOptionListDateTime(object): +class NodeStateOptionListDateTime(object): + def __eq__(self, value): """Return self==value.""" @@ -5761,9 +5662,7 @@ class NodeStateOptionListDateTime(object): NodeStateOptionListDateTime: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[Optional[list[datetime]]] = None - ) -> Optional[Optional[list[datetime]]]: + def get(self, node: NodeInput, default: Optional[Optional[list[datetime]]] = None) -> Optional[Optional[list[datetime]]]: """ Get value for node @@ -5889,7 +5788,7 @@ class NodeStateOptionListDateTime(object): Iterator[Optional[list[datetime]]]: Iterator over values """ -class NodeTypeView(object): +class NodeTypeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -5949,9 +5848,7 @@ class NodeTypeView(object): NodeStateOptionStr: the computed `NodeState` """ - def get( - self, node: NodeInput, default: Optional[Optional[str]] = None - ) -> Optional[Optional[str]]: + def get(self, node: NodeInput, default: Optional[Optional[str]] = None) -> Optional[Optional[str]]: """ Get value for node @@ -6085,7 +5982,8 @@ class NodeTypeView(object): Iterator[Optional[str]]: Iterator over values """ -class NodeStateOptionStr(object): +class NodeStateOptionStr(object): + def __eq__(self, value): """Return self==value.""" @@ -6127,9 +6025,7 @@ class NodeStateOptionStr(object): NodeStateOptionStr: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[Optional[str]] = None - ) -> Optional[Optional[str]]: + def get(self, node: NodeInput, default: Optional[Optional[str]] = None) -> Optional[Optional[str]]: """ Get value for node @@ -6263,7 +6159,8 @@ class NodeStateOptionStr(object): Iterator[Optional[str]]: Iterator over values """ -class NodeStateListDateTime(object): +class NodeStateListDateTime(object): + def __eq__(self, value): """Return self==value.""" @@ -6305,9 +6202,7 @@ class NodeStateListDateTime(object): NodeStateListDateTime: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[list[datetime]] = None - ) -> Optional[list[datetime]]: + def get(self, node: NodeInput, default: Optional[list[datetime]] = None) -> Optional[list[datetime]]: """ Get value for node @@ -6433,7 +6328,8 @@ class NodeStateListDateTime(object): Iterator[list[datetime]]: Iterator over values """ -class NodeStateWeightedSP(object): +class NodeStateWeightedSP(object): + def __eq__(self, value): """Return self==value.""" @@ -6464,9 +6360,7 @@ class NodeStateWeightedSP(object): def __repr__(self): """Return repr(self).""" - def get( - self, node: NodeInput, default: Optional[Tuple[float, Nodes]] = None - ) -> Optional[Tuple[float, Nodes]]: + def get(self, node: NodeInput, default: Optional[Tuple[float, Nodes]] = None) -> Optional[Tuple[float, Nodes]]: """ Get value for node @@ -6521,7 +6415,8 @@ class NodeStateWeightedSP(object): Iterator[Tuple[float, Nodes]]: Iterator over values """ -class NodeStateF64(object): +class NodeStateF64(object): + def __eq__(self, value): """Return self==value.""" @@ -6706,7 +6601,8 @@ class NodeStateF64(object): Iterator[float]: Iterator over values """ -class NodeStateNodes(object): +class NodeStateNodes(object): + def __eq__(self, value): """Return self==value.""" @@ -6792,7 +6688,8 @@ class NodeStateNodes(object): Iterator[Nodes]: Iterator over values """ -class NodeStateReachability(object): +class NodeStateReachability(object): + def __eq__(self, value): """Return self==value.""" @@ -6823,9 +6720,7 @@ class NodeStateReachability(object): def __repr__(self): """Return repr(self).""" - def get( - self, node: NodeInput, default: Optional[list[Tuple[int, str]]] = None - ) -> Optional[list[Tuple[int, str]]]: + def get(self, node: NodeInput, default: Optional[list[Tuple[int, str]]] = None) -> Optional[list[Tuple[int, str]]]: """ Get value for node @@ -6880,7 +6775,8 @@ class NodeStateReachability(object): Iterator[list[Tuple[int, str]]]: Iterator over values """ -class NodeStateListF64(object): +class NodeStateListF64(object): + def __eq__(self, value): """Return self==value.""" @@ -6911,9 +6807,7 @@ class NodeStateListF64(object): def __repr__(self): """Return repr(self).""" - def get( - self, node: NodeInput, default: Optional[list[float]] = None - ) -> Optional[list[float]]: + def get(self, node: NodeInput, default: Optional[list[float]] = None) -> Optional[list[float]]: """ Get value for node @@ -6968,7 +6862,8 @@ class NodeStateListF64(object): Iterator[list[float]]: Iterator over values """ -class NodeStateMotifs(object): +class NodeStateMotifs(object): + def __eq__(self, value): """Return self==value.""" @@ -7010,9 +6905,7 @@ class NodeStateMotifs(object): NodeStateMotifs: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[list[int]] = None - ) -> Optional[list[int]]: + def get(self, node: NodeInput, default: Optional[list[int]] = None) -> Optional[list[int]]: """ Get value for node @@ -7138,7 +7031,8 @@ class NodeStateMotifs(object): Iterator[list[int]]: Iterator over values """ -class NodeStateHits(object): +class NodeStateHits(object): + def __eq__(self, value): """Return self==value.""" @@ -7180,9 +7074,7 @@ class NodeStateHits(object): NodeStateHits: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[Tuple[float, float]] = None - ) -> Optional[Tuple[float, float]]: + def get(self, node: NodeInput, default: Optional[Tuple[float, float]] = None) -> Optional[Tuple[float, float]]: """ Get value for node @@ -7308,7 +7200,8 @@ class NodeStateHits(object): Iterator[Tuple[float, float]]: Iterator over values """ -class NodeStateSEIR(object): +class NodeStateSEIR(object): + def __eq__(self, value): """Return self==value.""" @@ -7350,9 +7243,7 @@ class NodeStateSEIR(object): NodeStateSEIR: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[Infected] = None - ) -> Optional[Infected]: + def get(self, node: NodeInput, default: Optional[Infected] = None) -> Optional[Infected]: """ Get value for node @@ -7478,7 +7369,8 @@ class NodeStateSEIR(object): Iterator[Infected]: Iterator over values """ -class NodeLayout(object): +class NodeLayout(object): + def __eq__(self, value): """Return self==value.""" @@ -7509,9 +7401,7 @@ class NodeLayout(object): def __repr__(self): """Return repr(self).""" - def get( - self, node: NodeInput, default: Optional[list[float]] = None - ) -> Optional[list[float]]: + def get(self, node: NodeInput, default: Optional[list[float]] = None) -> Optional[list[float]]: """ Get value for node @@ -7566,7 +7456,8 @@ class NodeLayout(object): Iterator[list[float]]: Iterator over values """ -class NodeStateF64String(object): +class NodeStateF64String(object): + def __eq__(self, value): """Return self==value.""" @@ -7597,9 +7488,7 @@ class NodeStateF64String(object): def __repr__(self): """Return repr(self).""" - def get( - self, node: NodeInput, default: Optional[Tuple[float, str]] = None - ) -> Optional[Tuple[float, str]]: + def get(self, node: NodeInput, default: Optional[Tuple[float, str]] = None) -> Optional[Tuple[float, str]]: """ Get value for node diff --git a/python/python/raphtory/vectors/__init__.pyi b/python/python/raphtory/vectors/__init__.pyi index 6b9e515fac..bd615cda2f 100644 --- a/python/python/raphtory/vectors/__init__.pyi +++ b/python/python/raphtory/vectors/__init__.pyi @@ -23,15 +23,10 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ["VectorisedGraph", "Document", "Embedding", "VectorSelection"] +__all__ = ['VectorisedGraph', 'Document', 'Embedding', 'VectorSelection'] +class VectorisedGraph(object): -class VectorisedGraph(object): - def edges_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> VectorSelection: + def edges_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: """ Search the top scoring edges according to `query` with no more than `limit` edges @@ -47,12 +42,7 @@ class VectorisedGraph(object): def empty_selection(self): """Return an empty selection of documents""" - def entities_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> VectorSelection: + def entities_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: """ Search the top scoring entities according to `query` with no more than `limit` entities @@ -65,12 +55,7 @@ class VectorisedGraph(object): VectorSelection: The vector selection resulting from the search """ - def nodes_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> VectorSelection: + def nodes_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: """ Search the top scoring nodes according to `query` with no more than `limit` nodes @@ -83,7 +68,7 @@ class VectorisedGraph(object): VectorSelection: The vector selection resulting from the search """ -class Document(object): +class Document(object): """ A Document @@ -124,11 +109,13 @@ class Document(object): Optional[Any]: """ -class Embedding(object): +class Embedding(object): + def __repr__(self): """Return repr(self).""" -class VectorSelection(object): +class VectorSelection(object): + def add_edges(self, edges: list) -> None: """ Add all the documents associated with the `edges` to the current selection @@ -174,9 +161,7 @@ class VectorSelection(object): list[Edge]: list of edges in the current selection """ - def expand( - self, hops: int, window: Optional[Tuple[int | str, int | str]] = None - ) -> None: + def expand(self, hops: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: """ Add all the documents `hops` hops away to the selection @@ -193,12 +178,7 @@ class VectorSelection(object): None: """ - def expand_edges_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> None: + def expand_edges_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: """ Add the top `limit` adjacent edges with higher score for `query` to the selection @@ -213,12 +193,7 @@ class VectorSelection(object): None: """ - def expand_entities_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> None: + def expand_entities_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: """ Add the top `limit` adjacent entities with higher score for `query` to the selection @@ -240,12 +215,7 @@ class VectorSelection(object): None: """ - def expand_nodes_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> None: + def expand_nodes_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: """ Add the top `limit` adjacent nodes with higher score for `query` to the selection From 55bb5537e58139617f6a99d3a82422c0c5e2a031 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Mon, 24 Nov 2025 01:51:40 -0500 Subject: [PATCH 10/55] Removed num_rows from DFView. No longer calculating/storing the total number of rows. --- raphtory/src/io/arrow/dataframe.rs | 11 +--- raphtory/src/io/arrow/df_loaders.rs | 62 ++++++------------- raphtory/src/io/parquet_loaders.rs | 12 ++-- raphtory/src/python/graph/io/arrow_loaders.rs | 52 +++------------- .../src/python/graph/io/pandas_loaders.rs | 2 - 5 files changed, 34 insertions(+), 105 deletions(-) diff --git a/raphtory/src/io/arrow/dataframe.rs b/raphtory/src/io/arrow/dataframe.rs index 04f06d78b8..b0bfe6d1d4 100644 --- a/raphtory/src/io/arrow/dataframe.rs +++ b/raphtory/src/io/arrow/dataframe.rs @@ -5,7 +5,7 @@ use crate::{ use arrow::{ array::{cast::AsArray, Array, ArrayRef, PrimitiveArray}, compute::cast, - datatypes::{DataType, Date64Type, Int64Type, TimeUnit, TimestampMillisecondType, UInt64Type}, + datatypes::{DataType, Date64Type, Int64Type, TimeUnit, TimestampMillisecondType}, }; use itertools::Itertools; use raphtory_core::utils::time::TryIntoTime; @@ -15,14 +15,12 @@ use std::fmt::{Debug, Formatter}; pub struct DFView { pub names: Vec, pub chunks: I, - pub num_rows: usize, } impl Debug for DFView { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("DFView") .field("names", &self.names) - .field("num_rows", &self.num_rows) .finish() } } @@ -50,15 +48,10 @@ where .ok_or_else(|| GraphError::ColumnDoesNotExist(name.to_string())) } - pub fn is_empty(&self) -> bool { - self.num_rows == 0 - } - - pub fn new(names: Vec, chunks: I, num_rows: usize) -> Self { + pub fn new(names: Vec, chunks: I) -> Self { Self { names, chunks, - num_rows, } } } diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index d83987c49b..bc5993999e 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -25,11 +25,10 @@ use rayon::prelude::*; use std::{collections::HashMap, sync::atomic::Ordering}; #[cfg(feature = "python")] -fn build_progress_bar(des: String, num_rows: usize) -> Result { +fn build_progress_bar(des: String) -> Result { BarBuilder::default() .desc(des) .animation(kdam::Animation::FillUp) - .total(num_rows) .unit_scale(true) .build() .map_err(|_| GraphError::TqdmError) @@ -61,9 +60,6 @@ pub(crate) fn load_nodes_from_df< node_type_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { - if df_view.is_empty() { - return Ok(()); - } let properties_indices = properties .iter() .map(|name| df_view.get_index(name)) @@ -87,7 +83,7 @@ pub(crate) fn load_nodes_from_df< })?; #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading nodes".to_string(), df_view.num_rows)?; + let mut pb = build_progress_bar("Loading nodes".to_string())?; let mut node_col_resolved = vec![]; let mut node_type_col_resolved = vec![]; @@ -100,11 +96,11 @@ pub(crate) fn load_nodes_from_df< .collect::>() }); - let mut start_id = graph - .reserve_event_ids(df_view.num_rows) - .map_err(into_graph_err)?; for chunk in df_view.chunks { let df = chunk?; + let start_id = graph + .reserve_event_ids(df.len()) + .map_err(into_graph_err)?; let prop_cols = combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { graph @@ -209,7 +205,6 @@ pub(crate) fn load_nodes_from_df< #[cfg(feature = "python")] let _ = pb.update(df.len()); - start_id += df.len(); } Ok(()) } @@ -228,9 +223,6 @@ pub fn load_edges_from_df< layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { - if df_view.is_empty() { - return Ok(()); - } let properties_indices = properties .iter() .map(|name| df_view.get_index(name)) @@ -255,12 +247,9 @@ pub fn load_edges_from_df< })?; #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading edges".to_string(), df_view.num_rows)?; + let mut pb = build_progress_bar("Loading edges".to_string())?; #[cfg(feature = "python")] let _ = pb.update(0); - let mut start_idx = graph - .reserve_event_ids(df_view.num_rows) - .map_err(into_graph_err)?; let mut src_col_resolved = vec![]; let mut dst_col_resolved = vec![]; @@ -276,6 +265,9 @@ pub fn load_edges_from_df< for chunk in df_view.chunks { let df = chunk?; + let start_idx = graph + .reserve_event_ids(df.len()) + .map_err(into_graph_err)?; let prop_cols = combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { graph @@ -474,7 +466,6 @@ pub fn load_edges_from_df< } } - start_idx += df.len(); #[cfg(feature = "python")] let _ = pb.update(df.len()); } @@ -492,22 +483,19 @@ pub(crate) fn load_edge_deletions_from_df< layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { - if df_view.is_empty() { - return Ok(()); - } let src_index = df_view.get_index(src)?; let dst_index = df_view.get_index(dst)?; let time_index = df_view.get_index(time)?; let layer_index = layer_col.map(|layer_col| df_view.get_index(layer_col.as_ref())); let layer_index = layer_index.transpose()?; #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading edge deletions".to_string(), df_view.num_rows)?; - let mut start_idx = graph - .reserve_event_ids(df_view.num_rows) - .map_err(into_graph_err)?; + let mut pb = build_progress_bar("Loading edge deletions".to_string())?; for chunk in df_view.chunks { let df = chunk?; + let start_idx = graph + .reserve_event_ids(df.len()) + .map_err(into_graph_err)?; let layer = lift_layer_col(layer, layer_index, &df)?; let src_col = df.node_col(src_index)?; let dst_col = df.node_col(dst_index)?; @@ -526,7 +514,6 @@ pub(crate) fn load_edge_deletions_from_df< })?; #[cfg(feature = "python")] let _ = pb.update(df.len()); - start_idx += df.len(); } Ok(()) @@ -544,9 +531,6 @@ pub(crate) fn load_node_props_from_df< shared_metadata: Option<&HashMap>, graph: &G, ) -> Result<(), GraphError> { - if df_view.is_empty() { - return Ok(()); - } let metadata_indices = metadata .iter() .map(|name| df_view.get_index(name)) @@ -565,7 +549,7 @@ pub(crate) fn load_node_props_from_df< })?; #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading node properties".to_string(), df_view.num_rows)?; + let mut pb = build_progress_bar("Loading node properties".to_string())?; let mut node_col_resolved = vec![]; let mut node_type_col_resolved = vec![]; @@ -667,9 +651,6 @@ pub(crate) fn load_edges_props_from_df< layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { - if df_view.is_empty() { - return Ok(()); - } let metadata_indices = metadata .iter() .map(|name| df_view.get_index(name)) @@ -689,7 +670,7 @@ pub(crate) fn load_edges_props_from_df< })?; #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading edge properties".to_string(), df_view.num_rows)?; + let mut pb = build_progress_bar("Loading edge properties".to_string())?; #[cfg(feature = "python")] let _ = pb.update(0); @@ -831,9 +812,6 @@ pub(crate) fn load_graph_props_from_df< metadata: Option<&[&str]>, graph: &G, ) -> Result<(), GraphError> { - if df_view.is_empty() { - return Ok(()); - } let properties = properties.unwrap_or(&[]); let metadata = metadata.unwrap_or(&[]); @@ -849,14 +827,13 @@ pub(crate) fn load_graph_props_from_df< let time_index = df_view.get_index(time)?; #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading graph properties".to_string(), df_view.num_rows)?; - - let mut start_id = graph - .reserve_event_ids(df_view.num_rows) - .map_err(into_graph_err)?; + let mut pb = build_progress_bar("Loading graph properties".to_string())?; for chunk in df_view.chunks { let df = chunk?; + let start_id = graph + .reserve_event_ids(df.len()) + .map_err(into_graph_err)?; let prop_cols = combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { graph @@ -896,7 +873,6 @@ pub(crate) fn load_graph_props_from_df< })?; #[cfg(feature = "python")] let _ = pb.update(df.len()); - start_id += df.len(); } Ok(()) } diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index f1be90c151..4b2c805bb4 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -85,16 +85,14 @@ pub fn load_edges_from_parquet< let all_files = get_parquet_file_paths(parquet_path)? .into_iter() .map(|file| { - let (names, _, num_rows) = + let (names, _, _) = read_parquet_file(file, (!cols_to_check.is_empty()).then_some(&cols_to_check))?; - Ok::<_, GraphError>((names, num_rows)) + Ok::<_, GraphError>(names) }) .collect::, _>>()?; - let mut count_rows = 0; let mut all_names = Vec::new(); - for (names, num_rows) in all_files { - count_rows += num_rows; + for names in all_files { if all_names.is_empty() { all_names = names; } else if all_names != names { @@ -116,7 +114,6 @@ pub fn load_edges_from_parquet< let df_view = DFView { names: all_names, chunks: all_df_view, - num_rows: count_rows, }; load_edges_from_df( @@ -266,7 +263,7 @@ pub(crate) fn process_parquet_file_to_df( col_names: Option<&[&str]>, batch_size: Option, ) -> Result>>, GraphError> { - let (names, chunks, num_rows) = read_parquet_file(parquet_file_path, col_names)?; + let (names, chunks, _) = read_parquet_file(parquet_file_path, col_names)?; let names: Vec = names .into_iter() @@ -289,7 +286,6 @@ pub(crate) fn process_parquet_file_to_df( Ok(DFView { names, chunks, - num_rows, }) } diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index e484312b9c..fa506d5422 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -19,7 +19,6 @@ use arrow::{ }, datatypes::SchemaRef, }; -use itertools::Either; use pyo3::{ prelude::*, types::{PyCapsule, PyDict}, @@ -181,7 +180,7 @@ pub(crate) fn load_edge_props_from_arrow_c_stream< ) } -/// Can handle any object that provides the \_\_arrow_c_stream__() interface and \_\_len__() function +/// Can handle any object that provides the \_\_arrow_c_stream__() interface pub(crate) fn process_arrow_c_stream_df<'a>( df: &Bound<'a, PyAny>, col_names: Vec<&str>, @@ -189,10 +188,9 @@ pub(crate) fn process_arrow_c_stream_df<'a>( let py = df.py(); is_jupyter(py); - // Expect an object that can use the Arrow C Stream interface if !df.hasattr("__arrow_c_stream__")? { return Err(PyErr::from(GraphError::LoadFailure( - "arrow object must implement __arrow_c_stream__".to_string(), + "Object must implement __arrow_c_stream__".to_string(), ))); } @@ -225,52 +223,23 @@ pub(crate) fn process_arrow_c_stream_df<'a>( indices.push(idx); } } - let len_from_python: Option = if df.hasattr("__len__")? { - Some(df.call_method0("__len__")?.extract()?) - } else { - None - }; - - if let Some(num_rows) = len_from_python { - let chunks = reader - .into_iter() - .map(move |batch_res: Result| { - let batch = batch_res.map_err(|e| { - GraphError::LoadFailure(format!( - "Arrow stream error while reading a batch: {}", - e.to_string() - )) - })?; - let chunk_arrays = indices - .iter() - .map(|&idx| batch.column(idx).clone()) - .collect::>(); - Ok(DFChunk::new(chunk_arrays)) - }); - Ok(DFView::new(names, Either::Left(chunks), num_rows)) - } else { - // if the python data source has no __len__ method, collect the iterator so we can calculate the num_rows() of each batch - let mut num_rows = 0usize; - let mut df_chunks = Vec::new(); - for batch_res in reader { + let chunks = reader + .into_iter() + .map(move |batch_res: Result| { let batch = batch_res.map_err(|e| { GraphError::LoadFailure(format!( "Arrow stream error while reading a batch: {}", e.to_string() )) })?; - num_rows += batch.num_rows(); let chunk_arrays = indices .iter() .map(|&idx| batch.column(idx).clone()) .collect::>(); - df_chunks.push(Ok(DFChunk::new(chunk_arrays))); - } - - let chunks = Either::Right(df_chunks.into_iter()); - Ok(DFView::new(names, chunks, num_rows)) - } + Ok(DFChunk::new(chunk_arrays)) + }); + Ok(DFView::new(names, chunks)) } pub(crate) fn process_arrow_py_df<'a>( @@ -307,7 +276,7 @@ pub(crate) fn process_arrow_py_df<'a>( let columns = rb.getattr("columns")?.extract::>>()?; let chunk = (0..names_len) .map(|i| { - // `rb.column(i)` -> pyarrow.Array + // rb.column(i) -> pyarrow.Array let array = &columns[i]; let arr = array_to_rust(array).map_err(GraphError::from)?; Ok::<_, GraphError>(arr) @@ -317,11 +286,8 @@ pub(crate) fn process_arrow_py_df<'a>( Ok(DFChunk { chunk }) }); - let num_rows: usize = df.call_method0("__len__")?.extract()?; - Ok(DFView { names, chunks, - num_rows, }) } diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index aa311b16dc..6a831bf8d7 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -239,12 +239,10 @@ pub(crate) fn process_pandas_py_df<'a>( Ok(DFChunk { chunk }) }); - let num_rows: usize = dropped_df.call_method0("__len__")?.extract()?; Ok(DFView { names, chunks, - num_rows, }) } From c171dff74399553c566149845c7434bdf8d5e91c Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Mon, 24 Nov 2025 03:13:38 -0500 Subject: [PATCH 11/55] Cleaned up load_*_from_df functions. load_edge_props/load_node_props renamed to load_edge_metadata/load_node_metadata. --- dataset_tests/ingestion_benchmarks.py | 103 ++---- python/python/raphtory/__init__.pyi | 86 ++--- raphtory/src/io/arrow/dataframe.rs | 5 +- raphtory/src/io/arrow/df_loaders.rs | 16 +- raphtory/src/io/parquet_loaders.rs | 7 +- raphtory/src/python/graph/graph.rs | 305 +++--------------- .../src/python/graph/graph_with_deletions.rs | 4 +- raphtory/src/python/graph/io/arrow_loaders.rs | 108 ++----- .../src/python/graph/io/pandas_loaders.rs | 7 +- raphtory/src/serialise/parquet/mod.rs | 4 +- 10 files changed, 127 insertions(+), 518 deletions(-) diff --git a/dataset_tests/ingestion_benchmarks.py b/dataset_tests/ingestion_benchmarks.py index 6a876ca33b..f05e6d8289 100644 --- a/dataset_tests/ingestion_benchmarks.py +++ b/dataset_tests/ingestion_benchmarks.py @@ -23,45 +23,24 @@ def bench_pandas(df: pd.DataFrame) -> float: def bench_pandas_streaming(df: pd.DataFrame) -> float: g = Graph() start = time.perf_counter() - g.load_edges(data_source=df, time="block_timestamp", src="inputs_address", dst="outputs_address") + g.load_edges_from_df(data=df, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start print(f"[pandas streaming] ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") del g gc.collect() return total -def bench_fire_ducks_pandas(df: fpd.frame.DataFrame) -> float: - assert "fireducks.pandas.frame.DataFrame" in str(type(df)) - g = Graph() - start = time.perf_counter() - g.load_edges_from_fireducks(df=df, time="block_timestamp", src="inputs_address", dst="outputs_address") - total = time.perf_counter() - start - print(f"[fireducks] ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") - del g - gc.collect() - return total - def bench_fire_ducks_pandas_streaming(df: fpd.frame.DataFrame) -> float: assert "fireducks.pandas.frame.DataFrame" in str(type(df)) g = Graph() start = time.perf_counter() - g.load_edges(data_source=df, time="block_timestamp", src="inputs_address", dst="outputs_address") + g.load_edges_from_df(data=df, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start print(f"[fireducks streaming] ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") del g gc.collect() return total -def bench_polars(df: pl.DataFrame) -> float: - g = Graph() - start = time.perf_counter() - g.load_edges_from_polars(df=df, time="block_timestamp", src="inputs_address", dst="outputs_address") - total = time.perf_counter() - start - print(f"[polars] ingestion took {total:.3f}s") - del g - gc.collect() - return total - def bench_polars_streaming(df: pl.DataFrame) -> float: g = Graph() start = time.perf_counter() @@ -72,17 +51,6 @@ def bench_polars_streaming(df: pl.DataFrame) -> float: gc.collect() return total -def bench_arrow(df: pl.DataFrame) -> float: - g = Graph() - df_arrow_from_pl = df.to_arrow() - start = time.perf_counter() - g.load_edges_from_arrow(df=df_arrow_from_pl, time="block_timestamp", src="inputs_address", dst="outputs_address") - total = time.perf_counter() - start - print(f"[arrow] ingestion took {total:.3f}s") - del g, df_arrow_from_pl - gc.collect() - return total - def bench_arrow_streaming(df: pl.DataFrame) -> float: g = Graph() df_arrow_from_pl = df.to_arrow() @@ -94,26 +62,13 @@ def bench_arrow_streaming(df: pl.DataFrame) -> float: gc.collect() return total -def bench_duckdb(df: pl.DataFrame) -> float: - g = Graph() - df_arrow_from_pl = df.to_arrow() - duckdb_df = duckdb.sql("SELECT * FROM df_arrow_from_pl") - start = time.perf_counter() - # internally calls fetch_arrow_table() on duckdb_df - g.load_edges_from_duckdb(df=duckdb_df, time="block_timestamp", src="inputs_address", dst="outputs_address") - total = time.perf_counter() - start - print(f"[duckdb] ingestion took {total:.3f}s") - del g, df_arrow_from_pl, duckdb_df - gc.collect() - return total - def bench_duckdb_streaming(df: pl.DataFrame) -> float: g = Graph() df_arrow_from_pl = df.to_arrow() duckdb_df = duckdb.sql("SELECT * FROM df_arrow_from_pl") start = time.perf_counter() # uses the __arrow_c_stream__() interface internally - g.load_edges(data_source=duckdb_df, time="block_timestamp", src="inputs_address", dst="outputs_address") + g.load_edges_from_df(data=duckdb_df, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start print(f"[duckdb streaming] ingestion took {total:.3f}s") del g, df_arrow_from_pl, duckdb_df @@ -128,13 +83,13 @@ def ingestion_speed_btc_dataset(): pandas_ingestion_times = [] pandas_streaming_ingestion_times = [] - fireducks_ingestion_times = [] + # fireducks_ingestion_times = [] fireducks_streaming_ingestion_times = [] - polars_ingestion_times = [] + # polars_ingestion_times = [] polars_streaming_ingestion_times = [] - arrow_ingestion_times = [] + # arrow_ingestion_times = [] arrow_streaming_ingestion_times = [] - duckdb_ingestion_times = [] + # duckdb_ingestion_times = [] duckdb_streaming_ingestion_times = [] for _ in range(5): @@ -148,66 +103,46 @@ def ingestion_speed_btc_dataset(): pandas_streaming_ingestion_times.append(pandas_streaming_time) gc.collect() - # 2.1) Fireducks Pandas ingestion - fpd_time = bench_fire_ducks_pandas(df_fireducks) - fireducks_ingestion_times.append(fpd_time) - gc.collect() - - # 2.2) Fireducks Pandas ingestion streaming + # 2) Fireducks Pandas ingestion streaming fpd_streaming_time = bench_fire_ducks_pandas_streaming(df_fireducks) fireducks_streaming_ingestion_times.append(fpd_streaming_time) gc.collect() - # 3.1) Polars ingestion (to_pandas() called internally) - polars_time = bench_polars(df=df_pl) - polars_ingestion_times.append(polars_time) - gc.collect() - - # 3.2) Polars ingestion streaming (no internal to_pandas() call) + # 3) Polars ingestion streaming (no internal to_pandas() call) polars_streaming_time = bench_polars_streaming(df=df_pl) polars_streaming_ingestion_times.append(polars_streaming_time) gc.collect() - # 4.1) Arrow ingestion - arrow_time = bench_arrow(df_pl) - arrow_ingestion_times.append(arrow_time) - gc.collect() - - # 4.2) Arrow ingestion streaming + # 4) Arrow ingestion streaming arrow_streaming_time = bench_arrow_streaming(df_pl) arrow_streaming_ingestion_times.append(arrow_streaming_time) gc.collect() - # 5.1) DuckDB ingestion (fetch_arrow_table() called internally) - duckdb_time = bench_duckdb(df_pl) - duckdb_ingestion_times.append(duckdb_time) - gc.collect() - - # 5.2) DuckDB streaming ingestion (no internal fetch_arrow_table() call) + # 5) DuckDB streaming ingestion (no internal fetch_arrow_table() call) duckdb_streaming_time = bench_duckdb_streaming(df_pl) duckdb_streaming_ingestion_times.append(duckdb_streaming_time) gc.collect() formatted_pandas = [f"{num:.3f}s" for num in pandas_ingestion_times] formatted_pandas_streaming = [f"{num:.3f}s" for num in pandas_streaming_ingestion_times] - formatted_fireducks = [f"{num:.3f}s" for num in fireducks_ingestion_times] + # formatted_fireducks = [f"{num:.3f}s" for num in fireducks_ingestion_times] formatted_fireducks_streaming = [f"{num:.3f}s" for num in fireducks_streaming_ingestion_times] - formatted_polars = [f"{num:.3f}s" for num in polars_ingestion_times] + # formatted_polars = [f"{num:.3f}s" for num in polars_ingestion_times] formatted_polars_streaming = [f"{num:.3f}s" for num in polars_streaming_ingestion_times] - formatted_arrow = [f"{num:.3f}s" for num in arrow_ingestion_times] + # formatted_arrow = [f"{num:.3f}s" for num in arrow_ingestion_times] formatted_arrow_streaming = [f"{num:.3f}s" for num in arrow_streaming_ingestion_times] - formatted_duckdb = [f"{num:.3f}s" for num in duckdb_ingestion_times] + # formatted_duckdb = [f"{num:.3f}s" for num in duckdb_ingestion_times] formatted_duckdb_streaming = [f"{num:.3f}s" for num in duckdb_streaming_ingestion_times] print(f"Pandas: {formatted_pandas}") print(f"Pandas streaming: {formatted_pandas_streaming}") - print(f"Fireducks: {formatted_fireducks}") + # print(f"Fireducks: {formatted_fireducks}") print(f"Fireducks streaming: {formatted_fireducks_streaming}") - print(f"Polars: {formatted_polars}") + # print(f"Polars: {formatted_polars}") print(f"Polars streaming: {formatted_polars_streaming}") - print(f"Arrow: {formatted_arrow}") + # print(f"Arrow: {formatted_arrow}") print(f"Arrow streaming: {formatted_arrow_streaming}") - print(f"DuckDB: {formatted_duckdb}") + # print(f"DuckDB: {formatted_duckdb}") print(f"DuckDB streaming: {formatted_duckdb_streaming}") diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index af6defe1d3..e689b5f3a7 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1089,33 +1089,29 @@ class Graph(GraphView): Graph: the loaded graph with initialised cache """ - def load_edge_props(self, data_source: Any, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_metadata_from_df(self, data: Any, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - This includes, but is not limited to: - * Pandas dataframes - * FireDucks(.pandas) dataframes - * Polars dataframes - * Arrow tables - * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) Arguments: - data_source (Any): The data source containing edge information. + data (Any): The data source containing edge information. src (str): The column name for the source node. dst (str): The column name for the destination node. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. layer (str, optional): The edge layer name. Defaults to None. - layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. + layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. Returns: - None: This function does not return a value, if the operation is successful. + None: This function does not return a value if the operation is successful. Raises: GraphError: If the operation fails. """ - def load_edge_props_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_metadata_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties from a Pandas DataFrame. @@ -1135,7 +1131,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_parquet(self, parquet_path: str, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_metadata_from_parquet(self, parquet_path: str, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties from parquet file @@ -1155,43 +1151,30 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edges(self, data_source: Any, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edges_from_df(self, data: Any, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - This includes, but is not limited to: - * Pandas dataframes - * FireDucks(.pandas) dataframes - * Polars dataframes - * Arrow tables - * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) Arguments: - data_source (Any): The data source containing the edges. + data (Any): The data source containing the edges. time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) - layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + layer (str, optional): A value to use as the layer for all edges. Defaults to None. Cannot be used in combination with layer_col. + layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. Cannot be used in combination with layer. Returns: - None: This function does not return a value, if the operation is successful. + None: This function does not return a value if the operation is successful. Raises: GraphError: If the operation fails. """ - def load_edges_from_arrow(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None, stream_data=False): - ... - - def load_edges_from_duckdb(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None, stream_data=False): - ... - - def load_edges_from_fireducks(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None, stream_data=False): - ... - def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges from a Pandas DataFrame into the graph. @@ -1236,9 +1219,6 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_polars(self, df, time, src, dst, properties=None, metadata=None, shared_metadata=None, layer=None, layer_col=None, stream_data=False): - ... - @staticmethod def load_from_file(path: str) -> Graph: """ @@ -1251,26 +1231,22 @@ class Graph(GraphView): Graph: """ - def load_node_props(self, data_source: Any, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_metadata_from_df(self, data: Any, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - This includes, but is not limited to: - * Pandas dataframes - * FireDucks(.pandas) dataframes - * Polars dataframes - * Arrow tables - * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) Arguments: - data_source (Any): The data source containing node information. + data (Any): The data source containing node information. id(str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. + node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. Returns: - None: This function does not return a value, if the operation is successful. + None: This function does not return a value if the operation is successful. Raises: GraphError: If the operation fails. @@ -1314,28 +1290,24 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes(self, data_source: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_nodes_from_df(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - This includes, but is not limited to: - * Pandas dataframes - * FireDucks(.pandas) dataframes - * Polars dataframes - * Arrow tables - * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) Arguments: - data_source (Any): The data source containing the nodes. + data (Any): The data source containing the nodes. time (str): The column name for the timestamps. id (str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. + node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. Returns: - None: This function does not return a value, if the operation is successful. + None: This function does not return a value if the operation is successful. Raises: GraphError: If the operation fails. diff --git a/raphtory/src/io/arrow/dataframe.rs b/raphtory/src/io/arrow/dataframe.rs index b0bfe6d1d4..d379ef5c96 100644 --- a/raphtory/src/io/arrow/dataframe.rs +++ b/raphtory/src/io/arrow/dataframe.rs @@ -49,10 +49,7 @@ where } pub fn new(names: Vec, chunks: I) -> Self { - Self { - names, - chunks, - } + Self { names, chunks } } } diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index bc5993999e..83a7a73af9 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -98,9 +98,7 @@ pub(crate) fn load_nodes_from_df< for chunk in df_view.chunks { let df = chunk?; - let start_id = graph - .reserve_event_ids(df.len()) - .map_err(into_graph_err)?; + let start_id = graph.reserve_event_ids(df.len()).map_err(into_graph_err)?; let prop_cols = combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { graph @@ -265,9 +263,7 @@ pub fn load_edges_from_df< for chunk in df_view.chunks { let df = chunk?; - let start_idx = graph - .reserve_event_ids(df.len()) - .map_err(into_graph_err)?; + let start_idx = graph.reserve_event_ids(df.len()).map_err(into_graph_err)?; let prop_cols = combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { graph @@ -493,9 +489,7 @@ pub(crate) fn load_edge_deletions_from_df< for chunk in df_view.chunks { let df = chunk?; - let start_idx = graph - .reserve_event_ids(df.len()) - .map_err(into_graph_err)?; + let start_idx = graph.reserve_event_ids(df.len()).map_err(into_graph_err)?; let layer = lift_layer_col(layer, layer_index, &df)?; let src_col = df.node_col(src_index)?; let dst_col = df.node_col(dst_index)?; @@ -831,9 +825,7 @@ pub(crate) fn load_graph_props_from_df< for chunk in df_view.chunks { let df = chunk?; - let start_id = graph - .reserve_event_ids(df.len()) - .map_err(into_graph_err)?; + let start_id = graph.reserve_event_ids(df.len()).map_err(into_graph_err)?; let prop_cols = combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { graph diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 4b2c805bb4..029c9adc26 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -171,7 +171,7 @@ pub fn load_node_props_from_parquet< Ok(()) } -pub fn load_edge_props_from_parquet< +pub fn load_edge_metadata_from_parquet< G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( graph: &G, @@ -283,10 +283,7 @@ pub(crate) fn process_parquet_file_to_df( .map_err(|e| GraphError::LoadFailure(format!("Failed to process Parquet file: {e:?}"))) }); - Ok(DFView { - names, - chunks, - }) + Ok(DFView { names, chunks }) } pub fn read_parquet_file( diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 7d290b756a..e58188c51e 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -19,8 +19,8 @@ use crate::{ index::PyIndexSpec, io::{ arrow_loaders::{ - load_edge_props_from_arrow_c_stream, load_edges_from_arrow, - load_node_props_from_arrow_c_stream, load_nodes_from_arrow_c_stream, + load_edge_metadata_from_arrow_c_stream, load_edges_from_arrow_c_stream, + load_node_metadata_from_arrow_c_stream, load_nodes_from_arrow_c_stream, }, pandas_loaders::*, }, @@ -632,34 +632,30 @@ impl PyGraph { } /// Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - /// This includes, but is not limited to: - /// * Pandas dataframes - /// * FireDucks(.pandas) dataframes - /// * Polars dataframes - /// * Arrow tables - /// * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) /// /// Arguments: - /// data_source (Any): The data source containing the nodes. + /// data (Any): The data source containing the nodes. /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. + /// node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. /// /// Returns: - /// None: This function does not return a value, if the operation is successful. + /// None: This function does not return a value if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (data_source, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None) + signature = (data, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None) )] - fn load_nodes<'py>( + fn load_nodes_from_df<'py>( &self, - data_source: &Bound<'py, PyAny>, + data: &Bound<'py, PyAny>, time: &str, id: &str, node_type: Option<&str>, @@ -672,7 +668,7 @@ impl PyGraph { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); load_nodes_from_arrow_c_stream( &self.graph, - data_source, + data, time, id, node_type, @@ -777,35 +773,31 @@ impl PyGraph { } /// Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - /// This includes, but is not limited to: - /// * Pandas dataframes - /// * FireDucks(.pandas) dataframes - /// * Polars dataframes - /// * Arrow tables - /// * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) /// /// Arguments: - /// data_source (Any): The data source containing the edges. + /// data (Any): The data source containing the edges. /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) - /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. Cannot be used in combination with layer_col. + /// layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. Cannot be used in combination with layer. /// /// Returns: - /// None: This function does not return a value, if the operation is successful. + /// None: This function does not return a value if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (data_source, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + signature = (data, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) )] - fn load_edges( + fn load_edges_from_df( &self, - data_source: &Bound, + data: &Bound, time: &str, src: &str, dst: &str, @@ -817,9 +809,9 @@ impl PyGraph { ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edges_from_arrow( + load_edges_from_arrow_c_stream( &self.graph, - data_source, + data, time, src, dst, @@ -881,199 +873,6 @@ impl PyGraph { ) } - #[pyo3(signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, stream_data = false))] - fn load_edges_from_fireducks( - &self, - df: &Bound, - time: &str, - src: &str, - dst: &str, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - layer: Option<&str>, - layer_col: Option<&str>, - stream_data: bool, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - - if stream_data { - load_edges_from_arrow( - &self.graph, - &df, - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - true, - ) - } else { - // Convert Fireducks DataFrame to pandas.DataFrame - let pandas_df = df.call_method0("to_pandas").map_err(|e| { - GraphError::LoadFailure(format!( - "Failed converting Fireducks DataFrame to pandas via to_pandas: {e}" - )) - })?; - - load_edges_from_pandas( - &self.graph, - &pandas_df, - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - ) - } - } - - #[pyo3(signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, stream_data = false))] - fn load_edges_from_polars( - &self, - df: &Bound, - time: &str, - src: &str, - dst: &str, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - layer: Option<&str>, - layer_col: Option<&str>, - stream_data: bool, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - - if stream_data { - load_edges_from_arrow( - &self.graph, - &df, - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - true, - ) - } else { - // Convert Polars DataFrame to pandas.DataFrame - let kwargs = PyDict::new(df.py()); - kwargs - .set_item("use_pyarrow_extension_array", true) - .map_err(|e| { - GraphError::LoadFailure(format!("Failed setting kwargs for to_pandas(): {e}")) - })?; - - let pandas_df = df.call_method("to_pandas", (), Some(&kwargs)).map_err(|e| { - GraphError::LoadFailure(format!( - "Failed converting Polars DataFrame to pandas via to_pandas(use_pyarrow_extension_array=True): {e}" - )) - })?; - - load_edges_from_pandas( - &self.graph, - &pandas_df, - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - ) - } - } - - #[pyo3( - signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, stream_data = false) - )] - fn load_edges_from_arrow( - &self, - df: &Bound, - time: &str, - src: &str, - dst: &str, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - layer: Option<&str>, - layer_col: Option<&str>, - stream_data: bool, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edges_from_arrow( - &self.graph, - df, - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - stream_data, - ) - } - - #[pyo3( - signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, stream_data = false) - )] - fn load_edges_from_duckdb( - &self, - df: &Bound, - time: &str, - src: &str, - dst: &str, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - layer: Option<&str>, - layer_col: Option<&str>, - stream_data: bool, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - - // Call fetch_arrow_table() so we can use arrow ingestion pathway - let df = if stream_data { - df - } else { - &df.call_method0("fetch_arrow_table").map_err(|e| { - GraphError::LoadFailure( - "Failed calling fetch_arrow_table() on the DuckDB instance".to_string(), - ) - })? - }; - - load_edges_from_arrow( - &self.graph, - df, - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - stream_data, - ) - } - /// Load edges from a Parquet file into the graph. /// /// Arguments: @@ -1125,32 +924,28 @@ impl PyGraph { } /// Load node properties into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - /// This includes, but is not limited to: - /// * Pandas dataframes - /// * FireDucks(.pandas) dataframes - /// * Polars dataframes - /// * Arrow tables - /// * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) /// /// Arguments: - /// data_source (Any): The data source containing node information. + /// data (Any): The data source containing node information. /// id(str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. + /// node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. /// /// Returns: - /// None: This function does not return a value, if the operation is successful. + /// None: This function does not return a value if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (data_source, id, node_type = None, node_type_col = None, metadata = None, shared_metadata = None) + signature = (data, id, node_type = None, node_type_col = None, metadata = None, shared_metadata = None) )] - fn load_node_props( + fn load_node_metadata_from_df( &self, - data_source: &Bound, + data: &Bound, id: &str, node_type: Option<&str>, node_type_col: Option<&str>, @@ -1158,9 +953,9 @@ impl PyGraph { shared_metadata: Option>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_props_from_arrow_c_stream( + load_node_metadata_from_arrow_c_stream( &self.graph, - data_source, + data, id, node_type, node_type_col, @@ -1249,33 +1044,29 @@ impl PyGraph { } /// Load edge properties into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - /// This includes, but is not limited to: - /// * Pandas dataframes - /// * FireDucks(.pandas) dataframes - /// * Polars dataframes - /// * Arrow tables - /// * DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) /// /// Arguments: - /// data_source (Any): The data source containing edge information. + /// data (Any): The data source containing edge information. /// src (str): The column name for the source node. /// dst (str): The column name for the destination node. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. /// layer (str, optional): The edge layer name. Defaults to None. - /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. + /// layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. /// /// Returns: - /// None: This function does not return a value, if the operation is successful. + /// None: This function does not return a value if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (data_source, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None) + signature = (data, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None) )] - fn load_edge_props( + fn load_edge_metadata_from_df( &self, - data_source: &Bound, + data: &Bound, src: &str, dst: &str, metadata: Option>, @@ -1284,9 +1075,9 @@ impl PyGraph { layer_col: Option<&str>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_props_from_arrow_c_stream( + load_edge_metadata_from_arrow_c_stream( &self.graph, - data_source, + data, src, dst, &metadata, @@ -1315,7 +1106,7 @@ impl PyGraph { #[pyo3( signature = (df, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None) )] - fn load_edge_props_from_pandas( + fn load_edge_metadata_from_pandas( &self, df: &Bound, src: &str, @@ -1326,7 +1117,7 @@ impl PyGraph { layer_col: Option<&str>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_props_from_pandas( + load_edge_metadata_from_pandas( &self.graph, df, src, @@ -1357,7 +1148,7 @@ impl PyGraph { #[pyo3( signature = (parquet_path, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None) )] - fn load_edge_props_from_parquet( + fn load_edge_metadata_from_parquet( &self, parquet_path: PathBuf, src: &str, @@ -1368,7 +1159,7 @@ impl PyGraph { layer_col: Option<&str>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_props_from_parquet( + load_edge_metadata_from_parquet( &self.graph, parquet_path.as_path(), src, diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index 0b00c0f9f5..9fdf5fc22f 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -914,7 +914,7 @@ impl PyPersistentGraph { layer_col: Option<&str>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_props_from_pandas( + load_edge_metadata_from_pandas( &self.graph, df, src, @@ -954,7 +954,7 @@ impl PyPersistentGraph { layer_col: Option<&str>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_props_from_parquet( + load_edge_metadata_from_parquet( &self.graph, parquet_path.as_path(), src, diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index fa506d5422..06d11411a9 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -19,10 +19,7 @@ use arrow::{ }, datatypes::SchemaRef, }; -use pyo3::{ - prelude::*, - types::{PyCapsule, PyDict}, -}; +use pyo3::{prelude::*, types::PyCapsule}; use raphtory_api::core::entities::properties::prop::Prop; use std::collections::HashMap; @@ -46,7 +43,6 @@ pub(crate) fn load_nodes_from_arrow_c_stream< if let Some(ref node_type_col) = node_type_col { cols_to_check.push(node_type_col.as_ref()); } - let df_view = process_arrow_c_stream_df(df, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; load_nodes_from_df( @@ -62,7 +58,7 @@ pub(crate) fn load_nodes_from_arrow_c_stream< ) } -pub(crate) fn load_edges_from_arrow< +pub(crate) fn load_edges_from_arrow_c_stream< 'py, G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( @@ -84,41 +80,23 @@ pub(crate) fn load_edges_from_arrow< if let Some(layer_col) = layer_col { cols_to_check.push(layer_col.as_ref()); } - - if stream_data { - let df_view = process_arrow_c_stream_df(df, cols_to_check.clone())?; - df_view.check_cols_exist(&cols_to_check)?; - load_edges_from_df( - df_view, - time, - src, - dst, - properties, - metadata, - shared_metadata, - layer, - layer_col, - graph, - ) - } else { - let df_view = process_arrow_py_df(df, cols_to_check.clone())?; - df_view.check_cols_exist(&cols_to_check)?; - load_edges_from_df( - df_view, - time, - src, - dst, - properties, - metadata, - shared_metadata, - layer, - layer_col, - graph, - ) - } + let df_view = process_arrow_c_stream_df(df, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; + load_edges_from_df( + df_view, + time, + src, + dst, + properties, + metadata, + shared_metadata, + layer, + layer_col, + graph, + ) } -pub(crate) fn load_node_props_from_arrow_c_stream< +pub(crate) fn load_node_metadata_from_arrow_c_stream< 'py, G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( @@ -148,7 +126,7 @@ pub(crate) fn load_node_props_from_arrow_c_stream< ) } -pub(crate) fn load_edge_props_from_arrow_c_stream< +pub(crate) fn load_edge_metadata_from_arrow_c_stream< 'py, G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( @@ -241,53 +219,3 @@ pub(crate) fn process_arrow_c_stream_df<'a>( }); Ok(DFView::new(names, chunks)) } - -pub(crate) fn process_arrow_py_df<'a>( - df: &Bound<'a, PyAny>, - col_names: Vec<&str>, -) -> PyResult> + 'a>> { - let py = df.py(); - is_jupyter(py); - - // We assume df is an Arrow object (e.g. pyarrow Table or RecordBatchReader) - // that implements a to_batches(max_chunksize=...) method - let kwargs = PyDict::new(py); - kwargs.set_item("max_chunksize", 1_000_000)?; - - // Get a list of RecordBatch-like Python objects - let rb = df - .call_method("to_batches", (), Some(&kwargs))? - .extract::>>()?; - - // Derive the column names from the first batch's schema, then filter - let names: Vec = if let Some(batch0) = rb.first() { - let schema = batch0.getattr("schema")?; - schema.getattr("names")?.extract::>()? - } else { - vec![] - } - .into_iter() - .filter(|x| col_names.contains(&x.as_str())) - .collect(); - - let names_len = names.len(); - - let chunks = rb.into_iter().map(move |rb| { - let columns = rb.getattr("columns")?.extract::>>()?; - let chunk = (0..names_len) - .map(|i| { - // rb.column(i) -> pyarrow.Array - let array = &columns[i]; - let arr = array_to_rust(array).map_err(GraphError::from)?; - Ok::<_, GraphError>(arr) - }) - .collect::, GraphError>>()?; - - Ok(DFChunk { chunk }) - }); - - Ok(DFView { - names, - chunks, - }) -} diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index 6a831bf8d7..ed441d64f7 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -125,7 +125,7 @@ pub(crate) fn load_node_props_from_pandas< ) } -pub(crate) fn load_edge_props_from_pandas< +pub(crate) fn load_edge_metadata_from_pandas< 'py, G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( @@ -240,10 +240,7 @@ pub(crate) fn process_pandas_py_df<'a>( Ok(DFChunk { chunk }) }); - Ok(DFView { - names, - chunks, - }) + Ok(DFView { names, chunks }) } pub fn array_to_rust(obj: &Bound) -> PyResult { diff --git a/raphtory/src/serialise/parquet/mod.rs b/raphtory/src/serialise/parquet/mod.rs index 83966bd597..23f90ec61e 100644 --- a/raphtory/src/serialise/parquet/mod.rs +++ b/raphtory/src/serialise/parquet/mod.rs @@ -5,7 +5,7 @@ use crate::{ }, errors::GraphError, io::parquet_loaders::{ - load_edge_deletions_from_parquet, load_edge_props_from_parquet, load_edges_from_parquet, + load_edge_deletions_from_parquet, load_edge_metadata_from_parquet, load_edges_from_parquet, load_graph_props_from_parquet, load_node_props_from_parquet, load_nodes_from_parquet, }, prelude::*, @@ -371,7 +371,7 @@ fn decode_graph_storage( .map(|s| s.as_str()) .collect::>(); - load_edge_props_from_parquet( + load_edge_metadata_from_parquet( &g, &c_edge_path, SRC_COL, From 6bf05b8ffce989b9e98d29695ea1fb243a3b73ca Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Mon, 24 Nov 2025 19:08:27 -0500 Subject: [PATCH 12/55] Re-added total number of rows in DFView, but as an Option. We use it if the data source provides __len__(), and if not, the loading/progress bar for loading nodes and edges doesn't show progression, only iterations per second. --- dataset_tests/ingestion_benchmarks.py | 58 +++++++++++-------- raphtory/src/io/arrow/dataframe.rs | 21 ++++++- raphtory/src/io/arrow/df_loaders.rs | 54 ++++++++++++----- raphtory/src/io/parquet_loaders.rs | 17 ++++-- raphtory/src/python/graph/graph.rs | 1 - raphtory/src/python/graph/io/arrow_loaders.rs | 35 ++++++----- .../src/python/graph/io/pandas_loaders.rs | 7 ++- 7 files changed, 133 insertions(+), 60 deletions(-) diff --git a/dataset_tests/ingestion_benchmarks.py b/dataset_tests/ingestion_benchmarks.py index f05e6d8289..382b4307f8 100644 --- a/dataset_tests/ingestion_benchmarks.py +++ b/dataset_tests/ingestion_benchmarks.py @@ -5,6 +5,7 @@ import polars as pl import duckdb import fireducks.pandas as fpd +from pyarrow import RecordBatchReader from raphtory import Graph @@ -44,7 +45,7 @@ def bench_fire_ducks_pandas_streaming(df: fpd.frame.DataFrame) -> float: def bench_polars_streaming(df: pl.DataFrame) -> float: g = Graph() start = time.perf_counter() - g.load_edges(data_source=df, time="block_timestamp", src="inputs_address", dst="outputs_address") + g.load_edges_from_df(data=df, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start print(f"[polars streaming] ingestion took {total:.3f}s") del g @@ -55,7 +56,7 @@ def bench_arrow_streaming(df: pl.DataFrame) -> float: g = Graph() df_arrow_from_pl = df.to_arrow() start = time.perf_counter() - g.load_edges(data_source=df_arrow_from_pl, time="block_timestamp", src="inputs_address", dst="outputs_address") + g.load_edges_from_df(data=df_arrow_from_pl, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start print(f"[arrow streaming] ingestion took {total:.3f}s") del g, df_arrow_from_pl @@ -75,6 +76,20 @@ def bench_duckdb_streaming(df: pl.DataFrame) -> float: gc.collect() return total +def bench_duckdb_reader(df: pl.DataFrame) -> float: + g = Graph() + df_arrow_from_pl = df.to_arrow() + # RecordBatchReader doesn't implement __len__(), should still work but the loading bar doesn't display progress + duckdb_df: RecordBatchReader = duckdb.sql("SELECT * FROM df_arrow_from_pl").arrow() + start = time.perf_counter() + # uses the __arrow_c_stream__() interface internally + g.load_edges_from_df(data=duckdb_df, time="block_timestamp", src="inputs_address", dst="outputs_address") + total = time.perf_counter() - start + print(f"[duckdb streaming] ingestion took {total:.3f}s") + del g, df_arrow_from_pl, duckdb_df + gc.collect() + return total + def ingestion_speed_btc_dataset(): df_pd: pd.DataFrame = pd.read_parquet(FLATTENED_FILE) @@ -83,14 +98,11 @@ def ingestion_speed_btc_dataset(): pandas_ingestion_times = [] pandas_streaming_ingestion_times = [] - # fireducks_ingestion_times = [] fireducks_streaming_ingestion_times = [] - # polars_ingestion_times = [] polars_streaming_ingestion_times = [] - # arrow_ingestion_times = [] arrow_streaming_ingestion_times = [] - # duckdb_ingestion_times = [] duckdb_streaming_ingestion_times = [] + duckdb_reader_ingestion_times = [] for _ in range(5): # 1.1) Pandas ingestion @@ -108,7 +120,7 @@ def ingestion_speed_btc_dataset(): fireducks_streaming_ingestion_times.append(fpd_streaming_time) gc.collect() - # 3) Polars ingestion streaming (no internal to_pandas() call) + # 3) Polars ingestion streaming polars_streaming_time = bench_polars_streaming(df=df_pl) polars_streaming_ingestion_times.append(polars_streaming_time) gc.collect() @@ -118,32 +130,32 @@ def ingestion_speed_btc_dataset(): arrow_streaming_ingestion_times.append(arrow_streaming_time) gc.collect() - # 5) DuckDB streaming ingestion (no internal fetch_arrow_table() call) + # 5) DuckDB streaming ingestion duckdb_streaming_time = bench_duckdb_streaming(df_pl) duckdb_streaming_ingestion_times.append(duckdb_streaming_time) gc.collect() + # 6) DuckDB RecordBatchReader ingestion + # RecordBatchReader doesn't implement __len__(), should still work but the loading bar doesn't display progress + duckdb_reader_time = bench_duckdb_reader(df_pl) + duckdb_reader_ingestion_times.append(duckdb_reader_time) + gc.collect() + formatted_pandas = [f"{num:.3f}s" for num in pandas_ingestion_times] formatted_pandas_streaming = [f"{num:.3f}s" for num in pandas_streaming_ingestion_times] - # formatted_fireducks = [f"{num:.3f}s" for num in fireducks_ingestion_times] formatted_fireducks_streaming = [f"{num:.3f}s" for num in fireducks_streaming_ingestion_times] - # formatted_polars = [f"{num:.3f}s" for num in polars_ingestion_times] formatted_polars_streaming = [f"{num:.3f}s" for num in polars_streaming_ingestion_times] - # formatted_arrow = [f"{num:.3f}s" for num in arrow_ingestion_times] formatted_arrow_streaming = [f"{num:.3f}s" for num in arrow_streaming_ingestion_times] - # formatted_duckdb = [f"{num:.3f}s" for num in duckdb_ingestion_times] formatted_duckdb_streaming = [f"{num:.3f}s" for num in duckdb_streaming_ingestion_times] - - print(f"Pandas: {formatted_pandas}") - print(f"Pandas streaming: {formatted_pandas_streaming}") - # print(f"Fireducks: {formatted_fireducks}") - print(f"Fireducks streaming: {formatted_fireducks_streaming}") - # print(f"Polars: {formatted_polars}") - print(f"Polars streaming: {formatted_polars_streaming}") - # print(f"Arrow: {formatted_arrow}") - print(f"Arrow streaming: {formatted_arrow_streaming}") - # print(f"DuckDB: {formatted_duckdb}") - print(f"DuckDB streaming: {formatted_duckdb_streaming}") + formatted_duckdb_reader = [f"{num:.3f}s" for num in duckdb_reader_ingestion_times] + + print(f"Pandas: {formatted_pandas}") + print(f"Pandas streaming: {formatted_pandas_streaming}") + print(f"Fireducks streaming: {formatted_fireducks_streaming}") + print(f"Polars streaming: {formatted_polars_streaming}") + print(f"Arrow streaming: {formatted_arrow_streaming}") + print(f"DuckDB streaming: {formatted_duckdb_streaming}") + print(f"DuckDB RecordBatchReader: {formatted_duckdb_reader}") if __name__ == "__main__": diff --git a/raphtory/src/io/arrow/dataframe.rs b/raphtory/src/io/arrow/dataframe.rs index d379ef5c96..2143142a77 100644 --- a/raphtory/src/io/arrow/dataframe.rs +++ b/raphtory/src/io/arrow/dataframe.rs @@ -15,12 +15,20 @@ use std::fmt::{Debug, Formatter}; pub struct DFView { pub names: Vec, pub chunks: I, + pub num_rows: Option, } impl Debug for DFView { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("DFView") .field("names", &self.names) + .field( + "num_rows", + &self + .num_rows + .map(|x| x.to_string()) + .unwrap_or("Unknown".to_string()), + ) .finish() } } @@ -48,8 +56,17 @@ where .ok_or_else(|| GraphError::ColumnDoesNotExist(name.to_string())) } - pub fn new(names: Vec, chunks: I) -> Self { - Self { names, chunks } + /// Returns Some(_) only if we know the total number of rows. + pub fn is_empty(&self) -> Option { + self.num_rows.map(|x| x == 0) + } + + pub fn new(names: Vec, chunks: I, num_rows: Option) -> Self { + Self { + names, + chunks, + num_rows, + } } } diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 83a7a73af9..f12e96b9c1 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -25,13 +25,23 @@ use rayon::prelude::*; use std::{collections::HashMap, sync::atomic::Ordering}; #[cfg(feature = "python")] -fn build_progress_bar(des: String) -> Result { - BarBuilder::default() - .desc(des) - .animation(kdam::Animation::FillUp) - .unit_scale(true) - .build() - .map_err(|_| GraphError::TqdmError) +fn build_progress_bar(des: String, num_rows: Option) -> Result { + if let Some(num_rows) = num_rows { + BarBuilder::default() + .desc(des) + .animation(kdam::Animation::FillUp) + .total(num_rows) + .unit_scale(true) + .build() + .map_err(|_| GraphError::TqdmError) + } else { + BarBuilder::default() + .desc(des) + .animation(kdam::Animation::FillUp) + .unit_scale(true) + .build() + .map_err(|_| GraphError::TqdmError) + } } fn process_shared_properties( @@ -60,6 +70,9 @@ pub(crate) fn load_nodes_from_df< node_type_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { + if matches!(df_view.is_empty(), Some(true)) { + return Ok(()); + } let properties_indices = properties .iter() .map(|name| df_view.get_index(name)) @@ -83,7 +96,7 @@ pub(crate) fn load_nodes_from_df< })?; #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading nodes".to_string())?; + let mut pb = build_progress_bar("Loading nodes".to_string(), df_view.num_rows)?; let mut node_col_resolved = vec![]; let mut node_type_col_resolved = vec![]; @@ -221,6 +234,9 @@ pub fn load_edges_from_df< layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { + if matches!(df_view.is_empty(), Some(true)) { + return Ok(()); + } let properties_indices = properties .iter() .map(|name| df_view.get_index(name)) @@ -245,7 +261,7 @@ pub fn load_edges_from_df< })?; #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading edges".to_string())?; + let mut pb = build_progress_bar("Loading edges".to_string(), df_view.num_rows)?; #[cfg(feature = "python")] let _ = pb.update(0); @@ -479,13 +495,16 @@ pub(crate) fn load_edge_deletions_from_df< layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { + if matches!(df_view.is_empty(), Some(true)) { + return Ok(()); + } let src_index = df_view.get_index(src)?; let dst_index = df_view.get_index(dst)?; let time_index = df_view.get_index(time)?; let layer_index = layer_col.map(|layer_col| df_view.get_index(layer_col.as_ref())); let layer_index = layer_index.transpose()?; #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading edge deletions".to_string())?; + let mut pb = build_progress_bar("Loading edge deletions".to_string(), df_view.num_rows)?; for chunk in df_view.chunks { let df = chunk?; @@ -525,6 +544,9 @@ pub(crate) fn load_node_props_from_df< shared_metadata: Option<&HashMap>, graph: &G, ) -> Result<(), GraphError> { + if matches!(df_view.is_empty(), Some(true)) { + return Ok(()); + } let metadata_indices = metadata .iter() .map(|name| df_view.get_index(name)) @@ -543,7 +565,7 @@ pub(crate) fn load_node_props_from_df< })?; #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading node properties".to_string())?; + let mut pb = build_progress_bar("Loading node properties".to_string(), df_view.num_rows)?; let mut node_col_resolved = vec![]; let mut node_type_col_resolved = vec![]; @@ -645,6 +667,9 @@ pub(crate) fn load_edges_props_from_df< layer_col: Option<&str>, graph: &G, ) -> Result<(), GraphError> { + if matches!(df_view.is_empty(), Some(true)) { + return Ok(()); + } let metadata_indices = metadata .iter() .map(|name| df_view.get_index(name)) @@ -664,7 +689,7 @@ pub(crate) fn load_edges_props_from_df< })?; #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading edge properties".to_string())?; + let mut pb = build_progress_bar("Loading edge properties".to_string(), df_view.num_rows)?; #[cfg(feature = "python")] let _ = pb.update(0); @@ -806,6 +831,9 @@ pub(crate) fn load_graph_props_from_df< metadata: Option<&[&str]>, graph: &G, ) -> Result<(), GraphError> { + if matches!(df_view.is_empty(), Some(true)) { + return Ok(()); + } let properties = properties.unwrap_or(&[]); let metadata = metadata.unwrap_or(&[]); @@ -821,7 +849,7 @@ pub(crate) fn load_graph_props_from_df< let time_index = df_view.get_index(time)?; #[cfg(feature = "python")] - let mut pb = build_progress_bar("Loading graph properties".to_string())?; + let mut pb = build_progress_bar("Loading graph properties".to_string(), df_view.num_rows)?; for chunk in df_view.chunks { let df = chunk?; diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 029c9adc26..1c1a0b9ac6 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -85,14 +85,16 @@ pub fn load_edges_from_parquet< let all_files = get_parquet_file_paths(parquet_path)? .into_iter() .map(|file| { - let (names, _, _) = + let (names, _, num_rows) = read_parquet_file(file, (!cols_to_check.is_empty()).then_some(&cols_to_check))?; - Ok::<_, GraphError>(names) + Ok::<_, GraphError>((names, num_rows)) }) .collect::, _>>()?; + let mut count_rows = 0; let mut all_names = Vec::new(); - for names in all_files { + for (names, num_rows) in all_files { + count_rows += num_rows; if all_names.is_empty() { all_names = names; } else if all_names != names { @@ -114,6 +116,7 @@ pub fn load_edges_from_parquet< let df_view = DFView { names: all_names, chunks: all_df_view, + num_rows: Some(count_rows), }; load_edges_from_df( @@ -263,7 +266,7 @@ pub(crate) fn process_parquet_file_to_df( col_names: Option<&[&str]>, batch_size: Option, ) -> Result>>, GraphError> { - let (names, chunks, _) = read_parquet_file(parquet_file_path, col_names)?; + let (names, chunks, num_rows) = read_parquet_file(parquet_file_path, col_names)?; let names: Vec = names .into_iter() @@ -283,7 +286,11 @@ pub(crate) fn process_parquet_file_to_df( .map_err(|e| GraphError::LoadFailure(format!("Failed to process Parquet file: {e:?}"))) }); - Ok(DFView { names, chunks }) + Ok(DFView { + names, + chunks, + num_rows: Some(num_rows), + }) } pub fn read_parquet_file( diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index e58188c51e..a82b566efb 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -820,7 +820,6 @@ impl PyGraph { shared_metadata.as_ref(), layer, layer_col, - true, ) } diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 06d11411a9..3dfc2ca74f 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -9,7 +9,7 @@ use crate::{ }, }, prelude::{AdditionOps, PropertyAdditionOps}, - python::graph::io::pandas_loaders::{array_to_rust, is_jupyter}, + python::graph::io::pandas_loaders::is_jupyter, serialise::incremental::InternalCache, }; use arrow::{ @@ -28,7 +28,7 @@ pub(crate) fn load_nodes_from_arrow_c_stream< G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( graph: &G, - df: &Bound<'py, PyAny>, + data: &Bound<'py, PyAny>, time: &str, id: &str, node_type: Option<&str>, @@ -43,7 +43,7 @@ pub(crate) fn load_nodes_from_arrow_c_stream< if let Some(ref node_type_col) = node_type_col { cols_to_check.push(node_type_col.as_ref()); } - let df_view = process_arrow_c_stream_df(df, cols_to_check.clone())?; + let df_view = process_arrow_c_stream_df(data, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; load_nodes_from_df( df_view, @@ -63,7 +63,7 @@ pub(crate) fn load_edges_from_arrow_c_stream< G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( graph: &G, - df: &Bound<'py, PyAny>, + data: &Bound<'py, PyAny>, time: &str, src: &str, dst: &str, @@ -72,7 +72,6 @@ pub(crate) fn load_edges_from_arrow_c_stream< shared_metadata: Option<&HashMap>, layer: Option<&str>, layer_col: Option<&str>, - stream_data: bool, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst, time]; cols_to_check.extend_from_slice(properties); @@ -80,7 +79,7 @@ pub(crate) fn load_edges_from_arrow_c_stream< if let Some(layer_col) = layer_col { cols_to_check.push(layer_col.as_ref()); } - let df_view = process_arrow_c_stream_df(df, cols_to_check.clone())?; + let df_view = process_arrow_c_stream_df(data, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; load_edges_from_df( df_view, @@ -101,7 +100,7 @@ pub(crate) fn load_node_metadata_from_arrow_c_stream< G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( graph: &G, - df: &Bound<'py, PyAny>, + data: &Bound<'py, PyAny>, id: &str, node_type: Option<&str>, node_type_col: Option<&str>, @@ -113,7 +112,7 @@ pub(crate) fn load_node_metadata_from_arrow_c_stream< if let Some(ref node_type_col) = node_type_col { cols_to_check.push(node_type_col.as_ref()); } - let df_view = process_arrow_c_stream_df(df, cols_to_check.clone())?; + let df_view = process_arrow_c_stream_df(data, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; load_node_props_from_df( df_view, @@ -131,7 +130,7 @@ pub(crate) fn load_edge_metadata_from_arrow_c_stream< G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( graph: &G, - df: &Bound<'py, PyAny>, + data: &Bound<'py, PyAny>, src: &str, dst: &str, metadata: &[&str], @@ -144,7 +143,7 @@ pub(crate) fn load_edge_metadata_from_arrow_c_stream< cols_to_check.push(layer_col.as_ref()); } cols_to_check.extend_from_slice(metadata); - let df_view = process_arrow_c_stream_df(df, cols_to_check.clone())?; + let df_view = process_arrow_c_stream_df(data, cols_to_check.clone())?; df_view.check_cols_exist(&cols_to_check)?; load_edges_props_from_df( df_view, @@ -160,19 +159,19 @@ pub(crate) fn load_edge_metadata_from_arrow_c_stream< /// Can handle any object that provides the \_\_arrow_c_stream__() interface pub(crate) fn process_arrow_c_stream_df<'a>( - df: &Bound<'a, PyAny>, + data: &Bound<'a, PyAny>, col_names: Vec<&str>, ) -> PyResult> + 'a>> { - let py = df.py(); + let py = data.py(); is_jupyter(py); - if !df.hasattr("__arrow_c_stream__")? { + if !data.hasattr("__arrow_c_stream__")? { return Err(PyErr::from(GraphError::LoadFailure( "Object must implement __arrow_c_stream__".to_string(), ))); } - let stream_capsule_any: Bound<'a, PyAny> = df.call_method0("__arrow_c_stream__")?; + let stream_capsule_any: Bound<'a, PyAny> = data.call_method0("__arrow_c_stream__")?; let stream_capsule: &Bound<'a, PyCapsule> = stream_capsule_any.downcast::()?; // We need to use the pointer to build an ArrowArrayStreamReader @@ -202,6 +201,12 @@ pub(crate) fn process_arrow_c_stream_df<'a>( } } + let len_from_python: Option = if data.hasattr("__len__")? { + Some(data.call_method0("__len__")?.extract()?) + } else { + None + }; + let chunks = reader .into_iter() .map(move |batch_res: Result| { @@ -217,5 +222,5 @@ pub(crate) fn process_arrow_c_stream_df<'a>( .collect::>(); Ok(DFChunk::new(chunk_arrays)) }); - Ok(DFView::new(names, chunks)) + Ok(DFView::new(names, chunks, len_from_python)) } diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index ed441d64f7..e753645174 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -239,8 +239,13 @@ pub(crate) fn process_pandas_py_df<'a>( Ok(DFChunk { chunk }) }); + let num_rows: usize = dropped_df.call_method0("__len__")?.extract()?; - Ok(DFView { names, chunks }) + Ok(DFView { + names, + chunks, + num_rows: Some(num_rows), + }) } pub fn array_to_rust(obj: &Bound) -> PyResult { From 228b0f525076f55b0e247f17ca7c7f9307e8dfd4 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Mon, 24 Nov 2025 22:06:21 -0500 Subject: [PATCH 13/55] Added splitting of large chunks into smaller chunks so that the progress bar for loading updates properly when using the __arrow_c_stream__ interface. --- dataset_tests/ingestion_benchmarks.py | 4 +- raphtory/src/python/graph/io/arrow_loaders.rs | 45 +++++++++++++++---- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/dataset_tests/ingestion_benchmarks.py b/dataset_tests/ingestion_benchmarks.py index 382b4307f8..3dc44fe27e 100644 --- a/dataset_tests/ingestion_benchmarks.py +++ b/dataset_tests/ingestion_benchmarks.py @@ -26,7 +26,7 @@ def bench_pandas_streaming(df: pd.DataFrame) -> float: start = time.perf_counter() g.load_edges_from_df(data=df, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start - print(f"[pandas streaming] ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") + print(f"[pandas streaming] ingestion took {total:.3f}s") del g gc.collect() return total @@ -37,7 +37,7 @@ def bench_fire_ducks_pandas_streaming(df: fpd.frame.DataFrame) -> float: start = time.perf_counter() g.load_edges_from_df(data=df, time="block_timestamp", src="inputs_address", dst="outputs_address") total = time.perf_counter() - start - print(f"[fireducks streaming] ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") + print(f"[fireducks streaming] ingestion took {total:.3f}s") del g gc.collect() return total diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 3dfc2ca74f..d6ea26acb9 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -21,7 +21,9 @@ use arrow::{ }; use pyo3::{prelude::*, types::PyCapsule}; use raphtory_api::core::entities::properties::prop::Prop; -use std::collections::HashMap; +use std::{cmp::min, collections::HashMap}; + +const CHUNK_SIZE: usize = 1_000_000; // split large chunks so progress bar updates reasonably pub(crate) fn load_nodes_from_arrow_c_stream< 'py, @@ -209,18 +211,43 @@ pub(crate) fn process_arrow_c_stream_df<'a>( let chunks = reader .into_iter() - .map(move |batch_res: Result| { - let batch = batch_res.map_err(|e| { + .flat_map(move |batch_res: Result| { + let batch = match batch_res.map_err(|e| { GraphError::LoadFailure(format!( "Arrow stream error while reading a batch: {}", e.to_string() )) - })?; - let chunk_arrays = indices - .iter() - .map(|&idx| batch.column(idx).clone()) - .collect::>(); - Ok(DFChunk::new(chunk_arrays)) + }) { + Ok(batch) => batch, + Err(e) => return vec![Err(e)], + }; + let num_rows = batch.num_rows(); + + // many times, all the data will be passed as a single RecordBatch, meaning the progress bar + // will not update properly (only updates at the end of each batch). Splitting into smaller batches + // means the progress bar will update reasonably (every CHUNK_SIZE rows) + if num_rows > CHUNK_SIZE { + let num_chunks = (num_rows + CHUNK_SIZE - 1) / CHUNK_SIZE; + let mut result = Vec::with_capacity(num_chunks); + for i in 0..num_chunks { + let offset = i * CHUNK_SIZE; + let length = min(CHUNK_SIZE, num_rows - offset); + let sliced_batch = batch.slice(offset, length); + let chunk_arrays = indices + .iter() + .map(|&idx| sliced_batch.column(idx).clone()) + .collect::>(); + result.push(Ok(DFChunk::new(chunk_arrays))); + } + result + } else { + let chunk_arrays = indices + .iter() + .map(|&idx| batch.column(idx).clone()) + .collect::>(); + vec![Ok(DFChunk::new(chunk_arrays))] + } }); + Ok(DFView::new(names, chunks, len_from_python)) } From 4f428bd1b434694355276fe916884d778483f2c9 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Tue, 25 Nov 2025 03:23:53 -0500 Subject: [PATCH 14/55] Renamed props to metadata for remaining functions --- python/python/raphtory/__init__.pyi | 4 ++-- raphtory/src/io/parquet_loaders.rs | 2 +- raphtory/src/python/graph/graph.rs | 8 ++++---- raphtory/src/python/graph/graph_with_deletions.rs | 4 ++-- raphtory/src/python/graph/io/pandas_loaders.rs | 2 +- raphtory/src/serialise/parquet/mod.rs | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index e689b5f3a7..3a272910de 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1252,7 +1252,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_node_props_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_metadata_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties from a Pandas DataFrame. @@ -1271,7 +1271,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_node_props_from_parquet(self, parquet_path: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_metadata_from_parquet(self, parquet_path: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties from a parquet file. diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 1c1a0b9ac6..0e58b8981b 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -136,7 +136,7 @@ pub fn load_edges_from_parquet< Ok(()) } -pub fn load_node_props_from_parquet< +pub fn load_node_metadata_from_parquet< G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( graph: &G, diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index a82b566efb..134fb70596 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -981,7 +981,7 @@ impl PyGraph { #[pyo3( signature = (df, id, node_type = None, node_type_col = None, metadata = None, shared_metadata = None) )] - fn load_node_props_from_pandas( + fn load_node_metadata_from_pandas( &self, df: &Bound, id: &str, @@ -991,7 +991,7 @@ impl PyGraph { shared_metadata: Option>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_props_from_pandas( + load_node_metadata_from_pandas( &self.graph, df, id, @@ -1020,7 +1020,7 @@ impl PyGraph { #[pyo3( signature = (parquet_path, id, node_type = None, node_type_col = None, metadata = None, shared_metadata= None) )] - fn load_node_props_from_parquet( + fn load_node_metadata_from_parquet( &self, parquet_path: PathBuf, id: &str, @@ -1030,7 +1030,7 @@ impl PyGraph { shared_metadata: Option>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_props_from_parquet( + load_node_metadata_from_parquet( &self.graph, parquet_path.as_path(), id, diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index 9fdf5fc22f..253293548b 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -837,7 +837,7 @@ impl PyPersistentGraph { shared_metadata: Option>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_props_from_pandas( + load_node_metadata_from_pandas( &self.graph, df, id, @@ -874,7 +874,7 @@ impl PyPersistentGraph { shared_metadata: Option>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_props_from_parquet( + load_node_metadata_from_parquet( &self.graph, parquet_path.as_path(), id, diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index e753645174..d89c39b29c 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -95,7 +95,7 @@ pub(crate) fn load_edges_from_pandas< ) } -pub(crate) fn load_node_props_from_pandas< +pub(crate) fn load_node_metadata_from_pandas< 'py, G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( diff --git a/raphtory/src/serialise/parquet/mod.rs b/raphtory/src/serialise/parquet/mod.rs index 23f90ec61e..af24aef732 100644 --- a/raphtory/src/serialise/parquet/mod.rs +++ b/raphtory/src/serialise/parquet/mod.rs @@ -6,7 +6,7 @@ use crate::{ errors::GraphError, io::parquet_loaders::{ load_edge_deletions_from_parquet, load_edge_metadata_from_parquet, load_edges_from_parquet, - load_graph_props_from_parquet, load_node_props_from_parquet, load_nodes_from_parquet, + load_graph_props_from_parquet, load_node_metadata_from_parquet, load_nodes_from_parquet, }, prelude::*, serialise::parquet::{ @@ -313,7 +313,7 @@ fn decode_graph_storage( .map(|s| s.as_str()) .collect::>(); - load_node_props_from_parquet( + load_node_metadata_from_parquet( &g, &c_node_path, NODE_ID, From 39b89ec1632792a274f23cc4d2eebb91b59d4b3e Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Tue, 25 Nov 2025 04:04:55 -0500 Subject: [PATCH 15/55] Added tests to check equality between graphs created using different ingestion pathways --- .../test_ingestion_equivalence_df.py | 333 ++++++++++++++++++ 1 file changed, 333 insertions(+) create mode 100644 python/tests/test_base_install/test_loaders/test_ingestion_equivalence_df.py diff --git a/python/tests/test_base_install/test_loaders/test_ingestion_equivalence_df.py b/python/tests/test_base_install/test_loaders/test_ingestion_equivalence_df.py new file mode 100644 index 0000000000..90898957ec --- /dev/null +++ b/python/tests/test_base_install/test_loaders/test_ingestion_equivalence_df.py @@ -0,0 +1,333 @@ +import os.path +import pytest +from pathlib import Path +import pandas as pd +import polars as pl +import pyarrow as pa +import duckdb +import fireducks.pandas as fpd +from raphtory import Graph + +base_dir = Path(__file__).parent.parent.parent +EDGES_FILE = os.path.join(base_dir, "data/network_traffic_edges.csv") +NODES_FILE = os.path.join(base_dir, "data/network_traffic_nodes.csv") + +@pytest.fixture(scope="module") +def dataframes(): + # Load Data using Pandas + df_edges_pd = pd.read_csv(EDGES_FILE) + df_nodes_pd = pd.read_csv(NODES_FILE) + + data = { + "pandas": {"edges": df_edges_pd, "nodes": df_nodes_pd}, + "polars": {"edges": pl.from_pandas(df_edges_pd), "nodes": pl.from_pandas(df_nodes_pd)}, + "arrow": {"edges": pa.Table.from_pandas(df_edges_pd), "nodes": pa.Table.from_pandas(df_nodes_pd)}, + "duckdb": { + "edges": duckdb.from_df(df_edges_pd), + "nodes": duckdb.from_df(df_nodes_pd) + }, + "fireducks": {"edges": fpd.read_csv(EDGES_FILE), "nodes": fpd.read_csv(NODES_FILE)} + } + + return data + +def test_edge_ingestion_equivalence(dataframes): + # reference graph + g_pd = Graph() + g_pd.load_edges_from_pandas( + df=dataframes["pandas"]["edges"], + time="timestamp", + src="source", + dst="destination", + properties=["data_size_MB", "transaction_type"], + metadata=["is_encrypted"] + ) + + # Pandas streaming + g_pd_stream = Graph() + g_pd_stream.load_edges_from_df( + data=dataframes["pandas"]["edges"], + time="timestamp", + src="source", + dst="destination", + properties=["data_size_MB", "transaction_type"], + metadata=["is_encrypted"] + ) + assert g_pd == g_pd_stream, "Pandas streaming edge ingestion failed equivalence check" + + # Polars + g_pl = Graph() + g_pl.load_edges_from_df( + data=dataframes["polars"]["edges"], + time="timestamp", + src="source", + dst="destination", + properties=["data_size_MB", "transaction_type"], + metadata=["is_encrypted"] + ) + assert g_pd == g_pl, "Polars edge ingestion failed equivalence check" + + # Arrow + g_arrow = Graph() + g_arrow.load_edges_from_df( + data=dataframes["arrow"]["edges"], + time="timestamp", + src="source", + dst="destination", + properties=["data_size_MB", "transaction_type"], + metadata=["is_encrypted"] + ) + assert g_pd == g_arrow, "Arrow edge ingestion failed equivalence check" + + # DuckDB + g_duckdb = Graph() + g_duckdb.load_edges_from_df( + data=dataframes["duckdb"]["edges"], + time="timestamp", + src="source", + dst="destination", + properties=["data_size_MB", "transaction_type"], + metadata=["is_encrypted"] + ) + assert g_pd == g_duckdb, "DuckDB edge ingestion failed equivalence check" + + # FireDucks + g_fd = Graph() + g_fd.load_edges_from_df( + data=dataframes["fireducks"]["edges"], + time="timestamp", + src="source", + dst="destination", + properties=["data_size_MB", "transaction_type"], + metadata=["is_encrypted"] + ) + assert g_pd == g_fd, "FireDucks edge ingestion failed equivalence check" + + +def test_node_ingestion_equivalence(dataframes): + # reference graph + g_pd = Graph() + g_pd.load_nodes_from_pandas( + df=dataframes["pandas"]["nodes"], + time="timestamp", + id="server_id", + properties=["OS_version", "uptime_days"], + metadata=["primary_function", "server_name", "hardware_type"] + ) + + # Pandas streaming + g_pd_stream = Graph() + g_pd_stream.load_nodes_from_df( + data=dataframes["pandas"]["nodes"], + time="timestamp", + id="server_id", + properties=["OS_version", "uptime_days"], + metadata=["primary_function", "server_name", "hardware_type"] + ) + assert g_pd == g_pd_stream, "Pandas streaming node ingestion failed equivalence check" + + # Polars + g_pl = Graph() + g_pl.load_nodes_from_df( + data=dataframes["polars"]["nodes"], + time="timestamp", + id="server_id", + properties=["OS_version", "uptime_days"], + metadata=["primary_function", "server_name", "hardware_type"] + ) + assert g_pd == g_pl, "Polars node ingestion failed equivalence check" + + # Arrow + g_arrow = Graph() + g_arrow.load_nodes_from_df( + data=dataframes["arrow"]["nodes"], + time="timestamp", + id="server_id", + properties=["OS_version", "uptime_days"], + metadata=["primary_function", "server_name", "hardware_type"] + ) + assert g_pd == g_arrow, "Arrow node ingestion failed equivalence check" + + # DuckDB + g_duckdb = Graph() + g_duckdb.load_nodes_from_df( + data=dataframes["duckdb"]["nodes"], + time="timestamp", + id="server_id", + properties=["OS_version", "uptime_days"], + metadata=["primary_function", "server_name", "hardware_type"] + ) + assert g_pd == g_duckdb, "DuckDB node ingestion failed equivalence check" + + # FireDucks + g_fd = Graph() + g_fd.load_nodes_from_df( + data=dataframes["fireducks"]["nodes"], + time="timestamp", + id="server_id", + properties=["OS_version", "uptime_days"], + metadata=["primary_function", "server_name", "hardware_type"] + ) + assert g_pd == g_fd, "FireDucks node ingestion failed equivalence check" + +def test_metadata_update_equivalence(dataframes): + # reference graph + g_pd = Graph() + g_pd.load_edges_from_pandas( + df=dataframes["pandas"]["edges"], + time="timestamp", + src="source", + dst="destination", + ) + g_pd.load_nodes_from_pandas( + df=dataframes["pandas"]["nodes"], + time="timestamp", + id="server_id", + ) + # update metadata + g_pd.load_node_metadata_from_pandas( + df=dataframes["pandas"]["nodes"], + id="server_id", + metadata=["primary_function", "server_name", "hardware_type"] + ) + g_pd.load_edge_metadata_from_pandas( + df=dataframes["pandas"]["edges"], + src="source", + dst="destination", + metadata=["is_encrypted"] + ) + + # Pandas streaming + g_pd_stream = Graph() + g_pd_stream.load_edges_from_df( + data=dataframes["pandas"]["edges"], + time="timestamp", + src="source", + dst="destination", + ) + g_pd_stream.load_nodes_from_df( + data=dataframes["pandas"]["nodes"], + time="timestamp", + id="server_id", + ) + # update metadata + g_pd_stream.load_node_metadata_from_df( + data=dataframes["pandas"]["nodes"], + id="server_id", + metadata=["primary_function", "server_name", "hardware_type"] + ) + g_pd_stream.load_edge_metadata_from_df( + data=dataframes["pandas"]["edges"], + src="source", + dst="destination", + metadata=["is_encrypted"] + ) + assert g_pd == g_pd_stream, "Pandas streaming metadata ingestion failed equivalence check" + + # Polars + g_pl = Graph() + g_pl.load_edges_from_df( + data=dataframes["polars"]["edges"], + time="timestamp", + src="source", + dst="destination", + ) + g_pl.load_nodes_from_df( + data=dataframes["polars"]["nodes"], + time="timestamp", + id="server_id", + ) + # update metadata + g_pl.load_node_metadata_from_df( + data=dataframes["polars"]["nodes"], + id="server_id", + metadata=["primary_function", "server_name", "hardware_type"] + ) + g_pl.load_edge_metadata_from_df( + data=dataframes["polars"]["edges"], + src="source", + dst="destination", + metadata=["is_encrypted"] + ) + assert g_pd == g_pl, "Polars metadata ingestion failed equivalence check" + + # Arrow + g_arrow = Graph() + g_arrow.load_edges_from_df( + data=dataframes["arrow"]["edges"], + time="timestamp", + src="source", + dst="destination", + ) + g_arrow.load_nodes_from_df( + data=dataframes["arrow"]["nodes"], + time="timestamp", + id="server_id", + ) + # update metadata + g_arrow.load_node_metadata_from_df( + data=dataframes["arrow"]["nodes"], + id="server_id", + metadata=["primary_function", "server_name", "hardware_type"] + ) + g_arrow.load_edge_metadata_from_df( + data=dataframes["arrow"]["edges"], + src="source", + dst="destination", + metadata=["is_encrypted"] + ) + assert g_pd == g_arrow, "Arrow metadata ingestion failed equivalence check" + + # DuckDB + g_duckdb = Graph() + g_duckdb.load_edges_from_df( + data=dataframes["duckdb"]["edges"], + time="timestamp", + src="source", + dst="destination", + ) + g_duckdb.load_nodes_from_df( + data=dataframes["duckdb"]["nodes"], + time="timestamp", + id="server_id", + ) + # update metadata + g_duckdb.load_node_metadata_from_df( + data=dataframes["duckdb"]["nodes"], + id="server_id", + metadata=["primary_function", "server_name", "hardware_type"] + ) + g_duckdb.load_edge_metadata_from_df( + data=dataframes["duckdb"]["edges"], + src="source", + dst="destination", + metadata=["is_encrypted"] + ) + assert g_pd == g_duckdb, "DuckDB metadata ingestion failed equivalence check" + + # FireDucks + g_fd = Graph() + g_fd.load_edges_from_df( + data=dataframes["fireducks"]["edges"], + time="timestamp", + src="source", + dst="destination", + ) + g_fd.load_nodes_from_df( + data=dataframes["fireducks"]["nodes"], + time="timestamp", + id="server_id", + ) + # update metadata + g_fd.load_node_metadata_from_df( + data=dataframes["fireducks"]["nodes"], + id="server_id", + metadata=["primary_function", "server_name", "hardware_type"] + ) + g_fd.load_edge_metadata_from_df( + data=dataframes["fireducks"]["edges"], + src="source", + dst="destination", + metadata=["is_encrypted"] + ) + assert g_pd == g_fd, "FireDucks metadata ingestion failed equivalence check" \ No newline at end of file From 7690df555ca85f7fae2cbfc6e085e0dbb9afe57c Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Wed, 26 Nov 2025 03:23:24 -0500 Subject: [PATCH 16/55] Changed load_*_metadata_* back to load_*_props_* --- python/python/raphtory/__init__.pyi | 8 ++++---- raphtory/src/io/parquet_loaders.rs | 4 ++-- raphtory/src/python/graph/graph.rs | 16 ++++++++-------- .../src/python/graph/graph_with_deletions.rs | 8 ++++---- raphtory/src/python/graph/io/pandas_loaders.rs | 4 ++-- raphtory/src/serialise/parquet/mod.rs | 8 ++++---- 6 files changed, 24 insertions(+), 24 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 3a272910de..36c8777e38 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1111,7 +1111,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edge_metadata_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_props_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties from a Pandas DataFrame. @@ -1131,7 +1131,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edge_metadata_from_parquet(self, parquet_path: str, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_props_from_parquet(self, parquet_path: str, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties from parquet file @@ -1252,7 +1252,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_node_metadata_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_props_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties from a Pandas DataFrame. @@ -1271,7 +1271,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_node_metadata_from_parquet(self, parquet_path: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_props_from_parquet(self, parquet_path: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties from a parquet file. diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 0e58b8981b..1ade06f350 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -136,7 +136,7 @@ pub fn load_edges_from_parquet< Ok(()) } -pub fn load_node_metadata_from_parquet< +pub fn load_node_props_from_parquet< G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( graph: &G, @@ -174,7 +174,7 @@ pub fn load_node_metadata_from_parquet< Ok(()) } -pub fn load_edge_metadata_from_parquet< +pub fn load_edge_props_from_parquet< G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( graph: &G, diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 134fb70596..ec846a6100 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -981,7 +981,7 @@ impl PyGraph { #[pyo3( signature = (df, id, node_type = None, node_type_col = None, metadata = None, shared_metadata = None) )] - fn load_node_metadata_from_pandas( + fn load_node_props_from_pandas( &self, df: &Bound, id: &str, @@ -991,7 +991,7 @@ impl PyGraph { shared_metadata: Option>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_metadata_from_pandas( + load_node_props_from_pandas( &self.graph, df, id, @@ -1020,7 +1020,7 @@ impl PyGraph { #[pyo3( signature = (parquet_path, id, node_type = None, node_type_col = None, metadata = None, shared_metadata= None) )] - fn load_node_metadata_from_parquet( + fn load_node_props_from_parquet( &self, parquet_path: PathBuf, id: &str, @@ -1030,7 +1030,7 @@ impl PyGraph { shared_metadata: Option>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_metadata_from_parquet( + load_node_props_from_parquet( &self.graph, parquet_path.as_path(), id, @@ -1105,7 +1105,7 @@ impl PyGraph { #[pyo3( signature = (df, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None) )] - fn load_edge_metadata_from_pandas( + fn load_edge_props_from_pandas( &self, df: &Bound, src: &str, @@ -1116,7 +1116,7 @@ impl PyGraph { layer_col: Option<&str>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_metadata_from_pandas( + load_edge_props_from_pandas( &self.graph, df, src, @@ -1147,7 +1147,7 @@ impl PyGraph { #[pyo3( signature = (parquet_path, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None) )] - fn load_edge_metadata_from_parquet( + fn load_edge_props_from_parquet( &self, parquet_path: PathBuf, src: &str, @@ -1158,7 +1158,7 @@ impl PyGraph { layer_col: Option<&str>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_metadata_from_parquet( + load_edge_props_from_parquet( &self.graph, parquet_path.as_path(), src, diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index 253293548b..0b00c0f9f5 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -837,7 +837,7 @@ impl PyPersistentGraph { shared_metadata: Option>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_metadata_from_pandas( + load_node_props_from_pandas( &self.graph, df, id, @@ -874,7 +874,7 @@ impl PyPersistentGraph { shared_metadata: Option>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_metadata_from_parquet( + load_node_props_from_parquet( &self.graph, parquet_path.as_path(), id, @@ -914,7 +914,7 @@ impl PyPersistentGraph { layer_col: Option<&str>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_metadata_from_pandas( + load_edge_props_from_pandas( &self.graph, df, src, @@ -954,7 +954,7 @@ impl PyPersistentGraph { layer_col: Option<&str>, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_metadata_from_parquet( + load_edge_props_from_parquet( &self.graph, parquet_path.as_path(), src, diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index d89c39b29c..1bac0e00db 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -95,7 +95,7 @@ pub(crate) fn load_edges_from_pandas< ) } -pub(crate) fn load_node_metadata_from_pandas< +pub(crate) fn load_node_props_from_pandas< 'py, G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( @@ -125,7 +125,7 @@ pub(crate) fn load_node_metadata_from_pandas< ) } -pub(crate) fn load_edge_metadata_from_pandas< +pub(crate) fn load_edge_props_from_pandas< 'py, G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( diff --git a/raphtory/src/serialise/parquet/mod.rs b/raphtory/src/serialise/parquet/mod.rs index af24aef732..83966bd597 100644 --- a/raphtory/src/serialise/parquet/mod.rs +++ b/raphtory/src/serialise/parquet/mod.rs @@ -5,8 +5,8 @@ use crate::{ }, errors::GraphError, io::parquet_loaders::{ - load_edge_deletions_from_parquet, load_edge_metadata_from_parquet, load_edges_from_parquet, - load_graph_props_from_parquet, load_node_metadata_from_parquet, load_nodes_from_parquet, + load_edge_deletions_from_parquet, load_edge_props_from_parquet, load_edges_from_parquet, + load_graph_props_from_parquet, load_node_props_from_parquet, load_nodes_from_parquet, }, prelude::*, serialise::parquet::{ @@ -313,7 +313,7 @@ fn decode_graph_storage( .map(|s| s.as_str()) .collect::>(); - load_node_metadata_from_parquet( + load_node_props_from_parquet( &g, &c_node_path, NODE_ID, @@ -371,7 +371,7 @@ fn decode_graph_storage( .map(|s| s.as_str()) .collect::>(); - load_edge_metadata_from_parquet( + load_edge_props_from_parquet( &g, &c_edge_path, SRC_COL, From 941c7c1ef060f9cd8757a3b3eda7d72ec2621ed6 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Wed, 26 Nov 2025 03:55:15 -0500 Subject: [PATCH 17/55] Fixed tests and updated workflow dependencies --- .github/workflows/test_python_workflow.yml | 2 +- raphtory/src/io/arrow/mod.rs | 4 ++-- raphtory/tests/df_loaders.rs | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_python_workflow.yml b/.github/workflows/test_python_workflow.yml index 8be35c686a..db1fc4ba4b 100644 --- a/.github/workflows/test_python_workflow.yml +++ b/.github/workflows/test_python_workflow.yml @@ -63,7 +63,7 @@ jobs: python -m pip install -e examples/netflow python -m pip install black echo "Installing linting dependencies from cache..." - python -m pip install maturin mypy networkx pyvis pandas-stubs + python -m pip install maturin mypy networkx pyvis pandas-stubs polars fireducks - name: Setup Node.js uses: actions/setup-node@v4 with: diff --git a/raphtory/src/io/arrow/mod.rs b/raphtory/src/io/arrow/mod.rs index d40853bb3d..6815547a7a 100644 --- a/raphtory/src/io/arrow/mod.rs +++ b/raphtory/src/io/arrow/mod.rs @@ -45,7 +45,7 @@ mod test { }), ] .into_iter(), - num_rows: 3, + num_rows: Some(3), }; let graph = Graph::new(); let layer_name: Option<&str> = None; @@ -142,7 +142,7 @@ mod test { }), ] .into_iter(), - num_rows: 2, + num_rows: Some(2), }; let graph = Graph::new(); diff --git a/raphtory/tests/df_loaders.rs b/raphtory/tests/df_loaders.rs index 3b3da4037c..35ef4fee9e 100644 --- a/raphtory/tests/df_loaders.rs +++ b/raphtory/tests/df_loaders.rs @@ -228,7 +228,7 @@ mod io_tests { "int_prop".to_owned(), ], chunks: chunks.into_iter(), - num_rows: edges.len(), + num_rows: Some(edges.len()), } } @@ -271,7 +271,7 @@ mod io_tests { "int_prop".to_owned(), ], chunks: chunks.into_iter(), - num_rows: edges.len(), + num_rows: Some(edges.len()), } } From 7d259135b2d5560ec74f097ef064c2c671839b4c Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Wed, 26 Nov 2025 11:33:41 -0500 Subject: [PATCH 18/55] Added try-catch blocks for fireducks import in tests --- .github/workflows/test_python_workflow.yml | 2 +- python/pyproject.toml | 3 +- .../test_loaders/test_load_from_fireducks.py | 53 ---------- .../test_ingestion_equivalence_df.py | 96 ++++++++++--------- python/tests/test_load_from_fireducks.py | 55 +++++++++++ .../test_load_from_polars.py | 0 6 files changed, 110 insertions(+), 99 deletions(-) delete mode 100644 python/tests/test_base_install/test_loaders/test_load_from_fireducks.py rename python/tests/{test_base_install/test_loaders => }/test_ingestion_equivalence_df.py (83%) create mode 100644 python/tests/test_load_from_fireducks.py rename python/tests/{test_base_install/test_loaders => }/test_load_from_polars.py (100%) diff --git a/.github/workflows/test_python_workflow.yml b/.github/workflows/test_python_workflow.yml index db1fc4ba4b..8be35c686a 100644 --- a/.github/workflows/test_python_workflow.yml +++ b/.github/workflows/test_python_workflow.yml @@ -63,7 +63,7 @@ jobs: python -m pip install -e examples/netflow python -m pip install black echo "Installing linting dependencies from cache..." - python -m pip install maturin mypy networkx pyvis pandas-stubs polars fireducks + python -m pip install maturin mypy networkx pyvis pandas-stubs - name: Setup Node.js uses: actions/setup-node@v4 with: diff --git a/python/pyproject.toml b/python/pyproject.toml index fdb61bdc9b..661b133e99 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "pandas >= 2.0.3", "pyarrow >=18", "numpy >= 1.26.0", + "ipywidgets", ] @@ -33,7 +34,7 @@ networkx = ["networkx >= 2.6.3"] export = ["raphtory[pyvis,networkx]"] all = ["raphtory[export,plot]"] dev = ["docstring_parser >= 0.16", "pandas-stubs", "maturin>=1.8.3", "tox>=4.25"] -test = ["raphtory[all]", "requests >= 2.31.0", "pyjwt[crypto] >= 2.10.1", "pytest >= 8", "pytest-benchmark >= 5.1.0"] +test = ["raphtory[all]", "requests >= 2.31.0", "pyjwt[crypto] >= 2.10.1", "pytest >= 8", "pytest-benchmark >= 5.1.0", "polars >= 1.35.2", "fireducks;python_version<'3.14'", "duckdb >= 1.4.2"] tox = ["nbmake"] [tool.maturin] diff --git a/python/tests/test_base_install/test_loaders/test_load_from_fireducks.py b/python/tests/test_base_install/test_loaders/test_load_from_fireducks.py deleted file mode 100644 index e04524cbad..0000000000 --- a/python/tests/test_base_install/test_loaders/test_load_from_fireducks.py +++ /dev/null @@ -1,53 +0,0 @@ -import pytest -from raphtory import Graph -import fireducks -import fireducks.pandas as fpd -import pandas - -def _collect_edges(g: Graph): - return sorted((e.history()[0], e.src.id, e.dst.id, e["value"]) for e in g.edges) - -def test_load_edges_from_fireducks_df(): - # FireDucks DataFrame (pandas-compatible API) - df = fpd.DataFrame( - { - "time": [1, 2, 3], - "src": [1, 2, 3], - "dst": [2, 3, 4], - "value": [10.0, 20.0, 30.0], - } - ) - - g: Graph = Graph() - g.load_edges_from_pandas(df=df, time="time", src="src", dst="dst", properties=["value"]) - assert [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] == _collect_edges(g) - -def test_fireducks_matches_pandas_for_same_edges(): - df_fireducks = fpd.DataFrame( - { - "time": [1, 2, 3], - "src": [1, 2, 3], - "dst": [2, 3, 4], - "value": [10.0, 20.0, 30.0], - } - ) - df_pandas = pandas.DataFrame( - { - "time": [1, 2, 3], - "src": [1, 2, 3], - "dst": [2, 3, 4], - "value": [10.0, 20.0, 30.0], - } - ) - - g_fireducks: Graph = Graph() - g_fireducks.load_edges_from_pandas(df=df_fireducks, time="time", src="src", dst="dst", properties=["value"]) - - g_pandas = Graph() - g_pandas.load_edges_from_pandas(df=df_pandas, time="time", src="src", dst="dst", properties=["value"]) - - expected = [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] - - assert _collect_edges(g_fireducks) == _collect_edges(g_pandas) - assert _collect_edges(g_fireducks) == expected - assert _collect_edges(g_pandas) == expected \ No newline at end of file diff --git a/python/tests/test_base_install/test_loaders/test_ingestion_equivalence_df.py b/python/tests/test_ingestion_equivalence_df.py similarity index 83% rename from python/tests/test_base_install/test_loaders/test_ingestion_equivalence_df.py rename to python/tests/test_ingestion_equivalence_df.py index 90898957ec..88a4cdf03c 100644 --- a/python/tests/test_base_install/test_loaders/test_ingestion_equivalence_df.py +++ b/python/tests/test_ingestion_equivalence_df.py @@ -5,7 +5,10 @@ import polars as pl import pyarrow as pa import duckdb -import fireducks.pandas as fpd +try: + import fireducks.pandas as fpd +except ModuleNotFoundError: + fpd = None from raphtory import Graph base_dir = Path(__file__).parent.parent.parent @@ -26,8 +29,9 @@ def dataframes(): "edges": duckdb.from_df(df_edges_pd), "nodes": duckdb.from_df(df_nodes_pd) }, - "fireducks": {"edges": fpd.read_csv(EDGES_FILE), "nodes": fpd.read_csv(NODES_FILE)} } + if fpd: + data["fireducks"] = {"edges": fpd.read_csv(EDGES_FILE), "nodes": fpd.read_csv(NODES_FILE)} return data @@ -91,17 +95,18 @@ def test_edge_ingestion_equivalence(dataframes): ) assert g_pd == g_duckdb, "DuckDB edge ingestion failed equivalence check" - # FireDucks - g_fd = Graph() - g_fd.load_edges_from_df( - data=dataframes["fireducks"]["edges"], - time="timestamp", - src="source", - dst="destination", - properties=["data_size_MB", "transaction_type"], - metadata=["is_encrypted"] - ) - assert g_pd == g_fd, "FireDucks edge ingestion failed equivalence check" + if fpd: + # FireDucks + g_fd = Graph() + g_fd.load_edges_from_df( + data=dataframes["fireducks"]["edges"], + time="timestamp", + src="source", + dst="destination", + properties=["data_size_MB", "transaction_type"], + metadata=["is_encrypted"] + ) + assert g_pd == g_fd, "FireDucks edge ingestion failed equivalence check" def test_node_ingestion_equivalence(dataframes): @@ -159,16 +164,18 @@ def test_node_ingestion_equivalence(dataframes): ) assert g_pd == g_duckdb, "DuckDB node ingestion failed equivalence check" - # FireDucks - g_fd = Graph() - g_fd.load_nodes_from_df( + if fpd: + # FireDucks + print("Testing fireducks...") + g_fd = Graph() + g_fd.load_nodes_from_df( data=dataframes["fireducks"]["nodes"], time="timestamp", id="server_id", properties=["OS_version", "uptime_days"], metadata=["primary_function", "server_name", "hardware_type"] - ) - assert g_pd == g_fd, "FireDucks node ingestion failed equivalence check" + ) + assert g_pd == g_fd, "FireDucks node ingestion failed equivalence check" def test_metadata_update_equivalence(dataframes): # reference graph @@ -305,29 +312,30 @@ def test_metadata_update_equivalence(dataframes): ) assert g_pd == g_duckdb, "DuckDB metadata ingestion failed equivalence check" - # FireDucks - g_fd = Graph() - g_fd.load_edges_from_df( - data=dataframes["fireducks"]["edges"], - time="timestamp", - src="source", - dst="destination", - ) - g_fd.load_nodes_from_df( - data=dataframes["fireducks"]["nodes"], - time="timestamp", - id="server_id", - ) - # update metadata - g_fd.load_node_metadata_from_df( - data=dataframes["fireducks"]["nodes"], - id="server_id", - metadata=["primary_function", "server_name", "hardware_type"] - ) - g_fd.load_edge_metadata_from_df( - data=dataframes["fireducks"]["edges"], - src="source", - dst="destination", - metadata=["is_encrypted"] - ) - assert g_pd == g_fd, "FireDucks metadata ingestion failed equivalence check" \ No newline at end of file + if fpd: + # FireDucks + g_fd = Graph() + g_fd.load_edges_from_df( + data=dataframes["fireducks"]["edges"], + time="timestamp", + src="source", + dst="destination", + ) + g_fd.load_nodes_from_df( + data=dataframes["fireducks"]["nodes"], + time="timestamp", + id="server_id", + ) + # update metadata + g_fd.load_node_metadata_from_df( + data=dataframes["fireducks"]["nodes"], + id="server_id", + metadata=["primary_function", "server_name", "hardware_type"] + ) + g_fd.load_edge_metadata_from_df( + data=dataframes["fireducks"]["edges"], + src="source", + dst="destination", + metadata=["is_encrypted"] + ) + assert g_pd == g_fd, "FireDucks metadata ingestion failed equivalence check" \ No newline at end of file diff --git a/python/tests/test_load_from_fireducks.py b/python/tests/test_load_from_fireducks.py new file mode 100644 index 0000000000..b013a353c3 --- /dev/null +++ b/python/tests/test_load_from_fireducks.py @@ -0,0 +1,55 @@ +try: + import fireducks.pandas as fpd +except ModuleNotFoundError: + fpd = None + +if fpd: + import pandas + from raphtory import Graph + def _collect_edges(g: Graph): + return sorted((e.history()[0], e.src.id, e.dst.id, e["value"]) for e in g.edges) + + def test_load_edges_from_fireducks_df(): + # FireDucks DataFrame (pandas-compatible API) + df = fpd.DataFrame( + { + "time": [1, 2, 3], + "src": [1, 2, 3], + "dst": [2, 3, 4], + "value": [10.0, 20.0, 30.0], + } + ) + + g: Graph = Graph() + g.load_edges_from_pandas(df=df, time="time", src="src", dst="dst", properties=["value"]) + assert [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] == _collect_edges(g) + + def test_fireducks_matches_pandas_for_same_edges(): + df_fireducks = fpd.DataFrame( + { + "time": [1, 2, 3], + "src": [1, 2, 3], + "dst": [2, 3, 4], + "value": [10.0, 20.0, 30.0], + } + ) + df_pandas = pandas.DataFrame( + { + "time": [1, 2, 3], + "src": [1, 2, 3], + "dst": [2, 3, 4], + "value": [10.0, 20.0, 30.0], + } + ) + + g_fireducks: Graph = Graph() + g_fireducks.load_edges_from_pandas(df=df_fireducks, time="time", src="src", dst="dst", properties=["value"]) + + g_pandas = Graph() + g_pandas.load_edges_from_pandas(df=df_pandas, time="time", src="src", dst="dst", properties=["value"]) + + expected = [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] + + assert _collect_edges(g_fireducks) == _collect_edges(g_pandas) + assert _collect_edges(g_fireducks) == expected + assert _collect_edges(g_pandas) == expected \ No newline at end of file diff --git a/python/tests/test_base_install/test_loaders/test_load_from_polars.py b/python/tests/test_load_from_polars.py similarity index 100% rename from python/tests/test_base_install/test_loaders/test_load_from_polars.py rename to python/tests/test_load_from_polars.py From f72b010310a62afb8c39de23befaac1c394d5b8e Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Wed, 26 Nov 2025 12:18:14 -0500 Subject: [PATCH 19/55] Fixed tests and notebooks --- python/tests/notebook.ipynb | 407 +----------------- .../test_base_install/base_notebook.ipynb | 330 ++++++++++++++ python/tests/test_ingestion_equivalence_df.py | 6 +- 3 files changed, 338 insertions(+), 405 deletions(-) create mode 100644 python/tests/test_base_install/base_notebook.ipynb diff --git a/python/tests/notebook.ipynb b/python/tests/notebook.ipynb index e24d76744c..3a9ce58e08 100644 --- a/python/tests/notebook.ipynb +++ b/python/tests/notebook.ipynb @@ -2,210 +2,16 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "import tempfile" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Basic functionality on a graph\n", - "\n", - "After importing a Raphtory graph we can create a blank one to work with:\n", - "\n", - "* Graphs in Raphtory are directed by default\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Graph(number_of_nodes=0, number_of_edges=0, number_of_temporal_edges=0, earliest_time=None, latest_time=None)" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from raphtory import Graph\n", - "\n", - "g = Graph()\n", - "g" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "NestedArcStringVecIterable([[[_default], [layer1], [layer2]], [[_default]], [[layer1]], [[layer2]]])" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "g.add_edge(0, \"1\", \"2\")\n", - "g.add_edge(0, \"1\", \"3\", layer=\"layer1\")\n", - "g.add_edge(0, \"1\", \"4\", layer=\"layer2\")\n", - "\n", - "g.nodes.edges.layer_names" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Once we have a new graph we can add nodes and edges to it via `add_node()` and `add_edge()`. For these:\n", - "* The ids of nodes and the source/destination of an edge can be either strings or integers\n", - "* All additions into the graph must happen at a specific time - this means updates are also additions\n", - "* If you add an edge between nodes which do no exist in the graph yet, these will be automatically created\n", - "* Properties can be added onto nodes and edges - this is a dict of any value, but the keys must be strings\n", - "* We have a special type of `static property` which exists outside of the timeline and is always accessible. \n", - "* Additions can be completed out of order, making it very easy to merge datasets together\n", - "\n", - "\n", - "We can then check the state of the graph:\n", - "* To see if a node or edge exists you can use `has_node()` and `has_edge()`\n", - "* To get the earliest and latest times at which updates have been applied to the graph you can use `earliest_time()` and `latest_time()` - if no updates have been applied these will return `None`\n", - "* To get the total number of nodes and edges of a graph you can use `num_edges()` and `num_nodes()`." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "True True False\n", - "True False\n", - "702 142\n", - "True True False\n", - "True False\n", - "703 144\n", - "Node(name=Ben, earliest_time=5, latest_time=8)\n", - "Edge(source=Haaroon, target=Hamza, earliest_time=7, latest_time=7, properties={property3: test, property1: 1, property2: 9.8, First-Met: {ArcStr(\"toad\"): Str(ArcStr(\"01/01/1990\"))}})\n", - "Graph(number_of_nodes=146, number_of_edges=705, number_of_temporal_edges=2653, earliest_time=0, latest_time=32674)\n", - "True\n" - ] - } - ], - "source": [ - "# Basic Addition of Nodes and Edges\n", - "g.add_node(timestamp=1, id=\"10\")\n", - "g.add_edge(timestamp=2, src=\"1\", dst=\"2\")\n", - "\n", - "# checking node 10, 1 and 5 exist\n", - "print(g.has_node(\"10\"), g.has_node(\"1\"), g.has_node(\"5\"))\n", - "# checking edge 1,2 exists and 2,1 doesn't as Raphtory is directed\n", - "print(g.has_edge(\"1\", \"2\"), g.has_edge(\"2\", \"1\"))\n", - "# Check the total number of edges and nodes\n", - "print(g.count_edges(), g.count_nodes())\n", - "\n", - "# Adding nodes and edges with String IDs\n", - "g.add_node(timestamp=5, id=\"Ben\")\n", - "g.add_edge(timestamp=8, src=\"Hamza\", dst=\"Ben\", layer=\"toad\")\n", - "\n", - "# Performing the same checks as before, but with strings\n", - "print(g.has_node(id=\"Ben\"), g.has_node(id=\"Hamza\"), g.has_node(id=\"Dave\"))\n", - "print(g.has_edge(src=\"Hamza\", dst=\"Ben\"), g.has_edge(src=\"Ben\", dst=\"Hamza\"))\n", - "print(g.count_edges(), g.count_nodes())\n", - "\n", - "g.add_edge(0, \"1\", \"3\", layer=\"toad\")\n", - "# Add an edge with Temporal Properties which can change over time\n", - "e = g.add_edge(\n", - " timestamp=7,\n", - " src=\"Haaroon\",\n", - " dst=\"Hamza\",\n", - " properties={\"property1\": 1, \"property2\": 9.8, \"property3\": \"test\"},\n", - " layer=\"toad\",\n", - ")\n", - "# Add a static property which is immutable\n", - "e.add_metadata(metadata={\"First-Met\": \"01/01/1990\"})\n", - "\n", - "# Add an node with Temporal Properties which can change over time\n", - "v = g.add_node(\n", - " timestamp=5,\n", - " id=\"Hamza\",\n", - " properties={\"property1\": 5, \"property2\": 12.5, \"property3\": \"test2\"},\n", - ")\n", - "# Add a static property which is immutable\n", - "v.add_metadata(metadata={\"Date-of-Birth\": \"01/01/1990\"})\n", - "print(g.node(\"Ben\").__repr__())\n", - "print(g.edge(\"Haaroon\", \"Hamza\").__repr__())\n", - "print(g.__repr__())\n", - "g_path = tempfile.mkdtemp()\n", - "g.save_to_file(g_path)\n", - "loaded_graph = Graph.load_from_file(g_path)\n", - "print(loaded_graph.has_node(\"Hamza\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[['_default'], ['layer1'], ['layer2']]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(g.edges.layer_names)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "g.nodes.edges.start" - ] - }, - { - "cell_type": "code", - "execution_count": 7, + "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 7, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -221,209 +27,11 @@ "\n", "g.to_networkx()" ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/shivamkapoor/opt/miniconda3/envs/pyraphtory/lib/python3.10/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/Users/shivamkapoor/opt/miniconda3/envs/pyraphtory/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n", - " with pd.option_context('mode.use_inf_as_na', True):\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAAG1CAYAAAAWb5UUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA2h0lEQVR4nO3de3hU5bn+8XsOORESTQIkbKwC0YBIOASSQn8l0KCIG+0Wcbu3AgqiUBBQFLFQKgoXiICAhSoHURSk26p4qu4ialuLG2MCUlEOAeWoJCEkIRxCwsys3x+YkSEBQjLJTPJ+P9clZN61Zq3nWWsa7q71zozNsixLAAAAhrAHugAAAID6RPgBAABGIfwAAACjEH4AAIBRCD8AAMAohB8AAGAUwg8AADAK4QcAABiF8AMAAIziDHQBwciyLHk8Zn3wtd1uM67ns9E//dM//ZusMRwDu90mm81WrXUJP1XweCwVFp4IdBn1xum0KyYmUiUlJ+VyeQJdTr2jf/qnf/o3tX+p8RyD2NhIORzVCz/c9gIAAEYh/AAAAKMQfgAAgFEIPwAAwCiEHwAAYBTCDwAAMArhBwAAGIXwAwAAjEL4AQAARiH8AAAAoxB+AACAUQg/AADAKIQfAABgFMIPAAAwijPQBZjG6fTNmy6XJ0CVAABgJsJPPXI67fpk8/c6dOSEJKllXKQyUloRgAAAqEeEn3p26MgJ7TtUEugyAAAwFnN+AACAUQg/AADAKIQfAABgFMIPAAAwCuEHAAAYhfADAACMQvgBAABGIfwAAACjEH4AAIBRCD8AAMAohB8AAGAUwg8AADAK4QcAABiF8AMAAIxC+AEAAEYh/AAAAKMQfgAAgFEIPwAAwCiEHwAAYBTCDwAAMArhBwAAGIXwAwAAjEL4AQAARiH8AAAAoxB+AACAUQg/AADAKIQfAABgFMIPAAAwCuEHAAAYhfADAACMQvgBAABGIfwAAACjEH4AAIBRCD8AAMAohB8AAGCUgIef4uJiPf7440pPT1dKSoruvPNOZWdne5cPHz5c7dq18/lv6NCh3uVlZWV68skn1bNnT3Xt2lWPPPKICgsLA9EKAABoAJyBLuDhhx/W4cOHNX/+fMXFxWnVqlUaMWKE3nrrLbVt21Y7d+7UE088oeuvv977nJCQEO/PTzzxhLKzs7Vo0SKFhoZq2rRpGj9+vFavXh2IdgAAQJALaPjZt2+fPvvsM61Zs0bdunWTJP3+97/XP//5T7333nsaMmSIjhw5os6dO6t58+aVnp+Xl6e3335bS5YsUffu3SVJ8+fPV//+/fXll1+qa9eu9doPAAAIfgG97RUTE6Nly5YpOTnZO2az2WSz2VRSUqKdO3fKZrOpTZs2VT5/06ZNkqQePXp4x9q0aaP4+HhlZWXVbfEAAKBBCuiVn+joaPXu3dtnbN26ddq3b5+mTJminJwcRUVFafr06frss8/UpEkT9e/fX2PGjFFoaKjy8vIUExOjsLAwn220aNFCubm5tarN6fR/LnQ47LLpTLiTJJtscjgCPu3KW0Mw1BII9E//Z/9tGvo3u3/JzGMQ8Dk/Z9u8ebMmT56sfv36qU+fPpoyZYrKysrUqVMnDR8+XNu3b9ecOXP0ww8/aM6cOSotLVVoaGil7YSFhamsrKzGddjtNsXERNamlfNyOO1yOh3en6OjI+pkPzURTLUEAv3Tv8no3+z+JbOOQdCEn48++kgTJ05USkqK5s2bJ0maPn26HnvsMV122WWSpKSkJIWEhGjChAmaNGmSwsPDVV5eXmlbZWVlioio+Un0eCyVlJys8fPPx+Gwy+3yyOVyS5LcLo9KSkrldnv8vq9LrSs6OiIoagkE+qd/+qd/U/uXGs8xiI6OqPbVq6AIP6tXr9bMmTPVv39/Pf30096rOU6n0xt8KlxzzTWSpNzcXCUkJKi4uFjl5eU+V4Dy8/MVHx9fq5pcrrp5AViyZFmW92e321Nn+7pUwVRLINA//dM//ZvMpGMQ8Bt8a9as0YwZMzR48GDNnz/fJ8QMHTpUkydP9ll/69atCgkJUevWrdWtWzd5PB7vxGdJ2rNnj/Ly8pSamlpvPQAAgIYjoFd+9uzZo1mzZumGG27QqFGjVFBQ4F0WHh6uG2+8UbNmzVKnTp30y1/+Ulu3btWcOXM0YsQINW3aVE2bNtWAAQM0depUzZo1SxEREZo2bZrS0tLUpUuXwDUGAACCVkDDz7p163T69GmtX79e69ev91k2cOBAzZ49WzabTatWrdKsWbPUvHlzDRs2TCNHjvSuN2PGDM2aNUtjx46VJKWnp2vq1Kn12gcAAGg4bFbFBBR4ud0eFRae8Pt2nU67Xl2fo32HSiRJV7WM1uAbkgJ+j9XptCsmJlJFRScCXksg0D/90z/9m9q/1HiOQWxsZLUnPAd8zg8AAEB9IvwAAACjEH4AAIBRCD8AAMAohB8AAGAUwg8AADAK4QcAABiF8AMAAIxC+AEAAEYh/AAAAKMQfgAAgFEIPwAAwCiEHwAAYBTCDwAAMArhBwAAGIXwAwAAjEL4AQAARiH8AAAAoxB+AACAUQg/AADAKIQfAABgFMIPAAAwCuEHAAAYhfADAACMQvgBAABGIfwAAACjEH4AAIBRCD8AAMAohB8AAGAUwg8AADAK4QcAABiF8AMAAIxC+AEAAEYh/AAAAKMQfgAAgFEIPwAAwCiEHwAAYBTCDwAAMArhBwAAGIXwAwAAjEL4AQAARiH8AAAAoxB+AACAUQg/AADAKIQfAABgFMIPAAAwCuEHAAAYhfADAACMQvgBAABGIfwAAACjBDz8FBcX6/HHH1d6erpSUlJ05513Kjs727t848aNuu2229S5c2f1799f77//vs/zy8rK9OSTT6pnz57q2rWrHnnkERUWFtZ3GwAAoIEIePh5+OGH9eWXX2r+/Pl68803de2112rEiBH67rvv9O2332rUqFHq1auX1q5dq//8z//UpEmTtHHjRu/zn3jiCW3YsEGLFi3Syy+/rO+++07jx48PYEcAACCYOQO583379umzzz7TmjVr1K1bN0nS73//e/3zn//Ue++9pyNHjqhdu3aaMGGCJCkxMVHbtm3TCy+8oJ49eyovL09vv/22lixZou7du0uS5s+fr/79++vLL79U165dA9YbAAAITgG98hMTE6Nly5YpOTnZO2az2WSz2VRSUqLs7Gz17NnT5zk9evTQpk2bZFmWNm3a5B2r0KZNG8XHxysrK6t+mgAAAA1KQK/8REdHq3fv3j5j69at0759+zRlyhS99dZbSkhI8FneokULlZaWqqioSHl5eYqJiVFYWFildXJzc2tVm9Pp/1zocNhl05lwJ0k22eRwBPzOo7eGYKglEOif/s/+2zT0b3b/kpnHIKDh51ybN2/W5MmT1a9fP/Xp00enTp1SaGiozzoVj8vLy1VaWlppuSSFhYWprKysxnXY7TbFxETW+PkX4nDa5XQ6vD9HR0fUyX5qIphqCQT6p3+T0b/Z/UtmHYOgCT8fffSRJk6cqJSUFM2bN0/SmRBTXl7us17F44iICIWHh1daLp15B1hERM1PosdjqaTkZI2ffz4Oh11ul0cul1uS5HZ5VFJSKrfb4/d9XWpd0dERQVFLINA//dM//Zvav9R4jkF0dES1r14FRfhZvXq1Zs6cqf79++vpp5/2Xs1p2bKl8vPzfdbNz89XkyZNFBUVpYSEBBUXF6u8vNznClB+fr7i4+NrVZPLVTcvAEuWLMvy/ux2e+psX5cqmGoJBPqnf/qnf5OZdAwCfoNvzZo1mjFjhgYPHqz58+f7hJju3bvriy++8Fn/888/V0pKiux2u7p16yaPx+Od+CxJe/bsUV5enlJTU+utBwAA0HAENPzs2bNHs2bN0g033KBRo0apoKBAhw8f1uHDh3Xs2DENHTpUX331lebNm6dvv/1WL774ov7617/qvvvukyTFx8drwIABmjp1qjIzM/XVV1/p4YcfVlpamrp06RLI1gAAQJAK6G2vdevW6fTp01q/fr3Wr1/vs2zgwIGaPXu2nnvuOc2dO1cvv/yyrrjiCs2dO9fn7e8zZszQrFmzNHbsWElSenq6pk6dWq99AACAhsNmVUxAgZfb7VFh4Qm/b9fptOvV9Tnad6hEknRVy2gNviEp4PdYnU67YmIiVVR0IuC1BAL90z/907+p/UuN5xjExkZWe8JzwOf8AAAA1CfCDwAAMArhBwAAGIXwAwAAjBIUH3JoKru96u/2asgTzgAACHaEnwBqEdNE67MP6lDBce9Yy7hIZaS0IgABAFBHCD8BlltwwvvWdwAAUPeY8wMAAIxC+AEAAEYh/AAAAKMQfgAAgFEIPwAAwCiEHwAAYBTCDwAAMArhBwAAGIXwAwAAjEL4AQAARiH8AAAAoxB+AACAUQg/AADAKIQfAABgFMIPAAAwCuEHAAAYhfADAACMQvgBAABGIfwAAACjEH4AAIBRCD8AAMAohB8AAGAUwg8AADAK4QcAABiF8AMAAIxC+AEAAEYh/AAAAKMQfgAAgFEIPwAAwCh1En5yc3PrYrMAAAC1VqPwc+211+qrr76qcll2drZuuummWhUFAABQV5zVXfHFF1/UyZMnJUmWZen111/Xp59+Wmm9L7/8UqGhof6rEAAAwI+qHX7Kysq0ePFiSZLNZtPrr79eaR273a6oqCiNHj3afxUCAAD4UbXDz+jRo72hpn379vrzn/+sTp061VlhAAAAdaHa4edsO3bs8HcdAAAA9aJG4UeSPvvsM/3tb39TaWmpPB6PzzKbzaZZs2bVujgAAAB/q1H4efHFFzVnzhyFhYUpNjZWNpvNZ/m5jwEAAIJFjcLP6tWrdcstt2jmzJm8swsAADQoNfqcn4KCAt1+++0EHwAA0ODUKPx06NBBu3bt8nctAAAAda5Gt72mTJmihx56SE2aNFHnzp0VERFRaZ1/+7d/q3VxAAAA/laj8HPnnXfK4/FoypQp553cvH379loVBgAAUBdqFH5mzJjBO7oAAECDVKPwc9ttt/m7DknS0qVLtWHDBq1atco7NnXq1EpfpdGqVSt98sknkiSPx6PFixfr9ddf17Fjx5SamqrHH39cP/vZz+qkRgAA0LDVKPxkZWVddJ3U1NRL2uarr76qhQsXqnv37j7jO3fu1G9+8xsNGTLEO+ZwOLw/P/fcc1qzZo1mz56thIQEzZ07V/fdd5/ee+893o0GAAAqqVH4GTp0qGw2myzL8o6dexusunN+8vLyNG3aNGVmZqp169Y+yyzL0u7duzVy5Eg1b9680nPLy8v14osvauLEierTp48kacGCBerVq5c+/PBD3XzzzZfWGAAAaPRqFH5eeeWVSmMnT55Udna23nnnHS1atKja2/rmm28UEhKid999V3/84x/1/fffe5ft379fJ0+eVNu2bat87o4dO3TixAn17NnTOxYdHa0OHTooKyuL8AMAACqpUfhJS0urcrxPnz5q0qSJnn/+eS1durRa28rIyFBGRkaVy3JyciRJq1at0qeffiq73a709HRNmDBBUVFRys3NlSS1bNnS53ktWrTwLqspp7NGH4F0QQ6HXTbZvFfJbD/+cfZVM5tscjj8v++L1XX236ahf/o/+2/T0L/Z/UtmHoMaf7Hp+XTv3l3Lly/3y7ZycnJkt9vVokULLVmyRPv379ecOXO0a9cuvfzyyyotLZWkSnN7wsLCdPTo0Rrv1263KSYmsla1n4/DaZfTeWbOkt1hl8Px0+OK5dHRlT83qT4Ear/Bgv7p32T0b3b/klnHwO/h55NPPlFkpH+Cw+jRo3XXXXcpJiZGkpSUlKTmzZvrjjvu0NatWxUeHi7pzNyfip8lqaysrMoPXqwuj8dSScnJ2hVfBYfDLrfLI5fLfWY/bo/c7p8eS5Lb5VFJSancbo/f93+huqKjI+p9v8GC/umf/unf1P6lxnMMoqMjqn31qkbh5+6776405vF4lJubq++//173339/TTZbid1u9wafCtdcc40kKTc313u7Kz8/X1deeaV3nfz8fLVr165W+3a56uYFYMnyThS3fvzj7InjlqwfA1H9vwADtd9gQf/0T//0bzKTjkGNbvBZllXpP7vdrqSkJE2fPl0PPfSQX4qbNGmShg0b5jO2detWSdLVV1+t9u3bq2nTpsrMzPQuLykp0bZt2y75rfYAAMAMNbryc/aHENalG2+8UWPGjNHixYv161//Wnv27NH06dN18803KzExUZI0ZMgQzZs3T7GxsWrVqpXmzp2rhIQE9evXr15qBAAADUut5vx8+umn+uKLL1RSUqLY2Fh169ZNvXr18ldt6tu3rxYuXKhly5Zp+fLlioqK0i233OJzZWn8+PFyuVyaOnWqTp06pdTUVK1YsUIhISF+qwMAADQeNuvsCSfVVF5erjFjxmjDhg1yOByKiYlRUVGRPB6PevTooaVLlzboT1d2uz0qLDzh9+06nXa9uj5H+w6VSJJ+3rGl8o6c1N5DP70z7aqW0Rp8Q1K93nd1Ou2KiYlUUdEJY+73no3+6Z/+6d/U/qXGcwxiYyOrPeG5RnN+Fi1apE2bNmnOnDn66quvtGHDBv3rX//SU089pS1btuj555+vyWYBAADqXI3Cz1/+8heNHTtWv/71r73fs+V0OnXrrbdq7Nixeu+99/xaJAAAgL/UKPwUFhaqQ4cOVS7r0KGD8vLyalUUAABAXalR+Lnyyiu1adOmKpdlZWVV+roJAACAYFGjd3v993//t2bPnq3w8HANGDBAzZo1U0FBgf7yl79o+fLlGjt2rL/rBAAA8IsahZ8777xT27Zt07x58/TMM894xy3L0sCBAzVy5Ei/FQgAAOBPNQo/5eXlmjlzpu6991598cUXOnr0qGw2m66//nrvhw8CAAAEo0ua87Nz504NGjRIL730kiQpMTFRd955p+666y49++yzevjhh7Vnz546KRQAAMAfqh1+Dh48qLvvvlsFBQVq06aNz7KQkBBNmjRJxcXFuuuuu3i3FwAACFrVDj/Lli3T5Zdfrrfeekv9+/f3WRYREaFhw4bpjTfeUFhYmJYuXer3QgEAAPyh2uFn48aNuu+++xQbG3vedZo3b657771Xn332mV+KAwAA8Ldqh5/8/Hy1bt36ouslJSUpNze3NjUBAADUmWqHn9jYWOXn5190vaKiIl122WW1KgoAAKCuVDv8pKamau3atRdd7+233z7vV18AAAAEWrXDz9ChQ5WZmanZs2errKys0vLy8nLNmTNHn376qQYPHuzXIgEAAPyl2h9ymJycrMmTJ2vWrFl655131LNnT11xxRVyu9364YcflJmZqaKiIj344IPq1atXXdYMAABQY5f0Cc+DBw9W+/bttWLFCn388cfeK0CRkZH65S9/qXvvvVedO3euk0IBAAD84ZK/3qJbt27q1q2bJKmwsFBOp1PR0dF+LwwAAKAu1Oi7vSpc6DN/AAAAgtElfbcXAABAQ0f4AQAARiH8AAAAoxB+AACAUQg/AADAKIQfAABgFMIPAAAwCuEHAAAYhfADAACMQvgBAABGIfwAAACjEH4AAIBRCD8AAMAohB8AAGAUwg8AADAK4QcAABiF8AMAAIxC+AEAAEYh/AAAAKMQfgAAgFEIPwAAwCiEHwAAYBTCDwAAMArhBwAAGIXwAwAAjEL4AQAARiH8AAAAozgDXQB82e02ORy+mdTl8gSoGgAAGh/CT5BpEdNE67MP6lDBcUlSy7hIZaS0IgABAOAnhJ8glFtwQvsOlQS6DAAAGiXm/AAAAKMEVfhZunSphg4d6jO2fft2DRkyRF26dFFGRoZeeeUVn+Uej0d/+MMf1KtXL3Xp0kX333+/Dhw4UJ9lAwCABiRows+rr76qhQsX+owVFRVp+PDhuvLKK/Xmm2/qgQce0Lx58/Tmm29613nuuee0Zs0azZgxQ//zP/8jj8ej++67T+Xl5fXcAQAAaAgCPucnLy9P06ZNU2Zmplq3bu2z7M9//rNCQkI0ffp0OZ1OJSYmat++fVq2bJkGDRqk8vJyvfjii5o4caL69OkjSVqwYIF69eqlDz/8UDfffHP9NwQAAIJawK/8fPPNNwoJCdG7776rzp07+yzLzs5WWlqanM6fMlqPHj20d+9eFRQUaMeOHTpx4oR69uzpXR4dHa0OHTooKyur3noAAAANR8Cv/GRkZCgjI6PKZbm5uUpKSvIZa9GihSTp0KFDys3NlSS1bNmy0joVy2rK6fR/LnQ47LLJJpvNJkmy/fhHxeOqxmyq/Lk/dVHX2X+bhv7p/+y/TUP/ZvcvmXkMAh5+LuTUqVMKDQ31GQsLC5MklZWVqbS0VJKqXOfo0aM13q/dblNMTGSNn38hDqddTqfjzH4cdjkcPz2uaszhtCs6OqJOajlXfe0nWNE//ZuM/s3uXzLrGAR1+AkPD680cbmsrEyS1KRJE4WHh0uSysvLvT9XrBMRUfOT6PFYKik5WePnn4/DYZfb5ZHL5T6zH7dHbvdPj6sac7s8Kikpldtddx9y6HCcCVh1vZ9gRf/0T//0b2r/UuM5BtHREdW+ehXU4SchIUH5+fk+YxWP4+Pj5XK5vGNXXnmlzzrt2rWr1b7r6hOVLVmyLOvHn8/8UfG4qjFL1o9hqO5fkPW1n2BF//RP//RvMpOOQVDf4EtNTdWmTZvkdv90ZeTzzz9XmzZtFBcXp/bt26tp06bKzMz0Li8pKdG2bduUmpoaiJIBAECQC+rwM2jQIB0/fly/+93vtHv3bq1du1YrV67UqFGjJJ2Z6zNkyBDNmzdPH3/8sXbs2KEJEyYoISFB/fr1C3D1AAAgGAX1ba+4uDi98MILmjlzpgYOHKjmzZtr0qRJGjhwoHed8ePHy+VyaerUqTp16pRSU1O1YsUKhYSEBLByAAAQrIIq/MyePbvSWKdOnfTaa6+d9zkOh0OPPvqoHn300bosLWDs9qrf6m7KfVkAAPwtqMIPKmsR00Trsw/qUMFx71jLuEhlpLQiAAEAUAOEnwYgt+CE9h0qCXQZAAA0CkE94RkAAMDfCD8AAMAohB8AAGAUwg8AADAK4QcAABiF8AMAAIxC+AEAAEYh/AAAAKMQfgAAgFEIPwAAwCiEHwAAYBTCDwAAMArhBwAAGIXwAwAAjEL4AQAARiH8AAAAoxB+AACAUQg/AADAKIQfAABgFMIPAAAwCuEHAAAYhfADAACMQvgBAABGIfwAAACjEH4AAIBRCD8AAMAohB8AAGAUwg8AADAK4QcAABiF8AMAAIxC+AEAAEYh/AAAAKMQfgAAgFEIPwAAwCiEHwAAYBTCDwAAMArhBwAAGIXwAwAAjEL4AQAARiH8AAAAoxB+AACAUQg/AADAKIQfAABgFMIPAAAwCuEHAAAYhfADAACMQvgBAABGcQa6APiP0+mbZV0uT4AqAQAgeDWI8JOXl6f09PRK40899ZRuu+02bd++XTNnztTXX3+t2NhYDRs2THfffXcAKg0cp9OuTzZ/r0NHTkiSWsZFKiOlFQEIAIBzNIjws2PHDoWFhemjjz6SzWbzjkdFRamoqEjDhw9XRkaGnnzySW3ZskVPPvmkIiMjNWjQoABWXf8OHTmhfYdKAl0GAABBrUGEn5ycHLVu3VotWrSotOzll19WSEiIpk+fLqfTqcTERO3bt0/Lli0zLvwAAICLaxATnnfu3KnExMQql2VnZystLU1O5085rkePHtq7d68KCgrqq0QAANBANJgrPzExMRo8eLD27Nmjq666SqNHj1Z6erpyc3OVlJTks37FFaJDhw6pWbNmNdrnuZOH/cHhsMsmm/fWne3HP86+lXfuWNXr2ORw+NZXeduV17lQXWf/bRr6p/+z/zYN/Zvdv2TmMQj68ONyufTdd9/p6quv1m9/+1s1bdpU77//vkaOHKmXXnpJp06dUmhoqM9zwsLCJEllZWU12qfdblNMTGSta6+Kw2mX0+k4sx+HXQ7HT4+rGqtqHYfTrujoiAtu+3zrXMilrt/Y0D/9m4z+ze5fMusYBH34cTqdyszMlMPhUHh4uCSpY8eO2rVrl1asWKHw8HCVl5f7PKci9DRp0qRG+/R4LJWUnKxd4VVwOOxyuzxyudxn9uP2yO3+6XFVY1Wt43Z5VFJSKrfbc95tV7XOheqKjo6o9vqNDf3TP/3Tv6n9S43nGERHR1T76lXQhx9JioysfBXmmmuu0YYNG5SQkKD8/HyfZRWP4+Pja7zPunqLuCVLlmX9+POZPyoeVzVW9TrWj4HIt0bfbVe9zoVc6vqNDf3TP/3Tv8lMOgZBf4Nv165dSklJUWZmps/4119/rauvvlqpqanatGmT3O6frox8/vnnatOmjeLi4uq7XAAAEOSCPvwkJiaqbdu2mj59urKzs/Xtt9/qqaee0pYtWzR69GgNGjRIx48f1+9+9zvt3r1ba9eu1cqVKzVq1KhAlw4AAIJQ0N/2stvtWrJkiZ555hk99NBDKikpUYcOHfTSSy953+X1wgsvaObMmRo4cKCaN2+uSZMmaeDAgQGuHAAABKOgDz+S1KxZMz311FPnXd6pUye99tpr9VgRAABoqIL+thcAAIA/EX4AAIBRCD8AAMAohB8AAGAUwg8AADAK4QcAABiF8AMAAIxC+AEAAEYh/AAAAKMQfgAAgFEaxNdbwH+cTt+863J5AlQJAACBQfgxiNNp1yebv9ehIyckSS3jIpWR0irAVQEAUL8IP4Y5dOSE9h0qCXQZAAAEDOGnkbLbbXI4fG9xnfsYAAATEX4aqRYxTbQ++6AOFRz3jl3XNk422QJYFQAAgUf4acRyC3xvcSXERQawGgAAggPhpwHilhYAADVH+GmAuKUFAEDNEX4aKG5pAQBQM9wrAQAARiH8AAAAoxB+AACAUQg/AADAKIQfAABgFMIPAAAwCuEHAAAYhfADAACMwocc4qKczsoZ2eXyBKASAABqj/CDSs4OOw6HXeuzDujQkRPesZZxkcpIaUUAAgA0SIQf+HA67fpk8/fesHNd2zjlHjnp81UaAAA0ZIQfVHLoyE/fG8Z3hgEAGhsmPAMAAKMQfgAAgFEIPwAAwCiEHwAAYBTCDwAAMArhBwAAGIW3uuOS2e02ORy+uZkPPAQANBSEH1yyFjFNtD77oA4VHJfEJz4DABoWwg9qJLfgBJ/6DABokJjzAwAAjEL4AQAARuG2l8HOnbh87iTmQDn7W+UlJlMDAPyL8GOwionLuQUn5HDa5XZ51KFtrGyyBaymc79VnsnUAAB/I/wYLrfghPbllsjpdMjlcis+ronftn3uFRypeldxzv5W+UvZdk32BQAwD+EHdeLcKziS/67inLvt69rGqbCkzPvWe3/uCwDQ+BB+UGtVfeihw2Gv1hWcmjp72wlxkco7cpK33gMAqoXwg1o790MPpTNXY6ozd+jc21fBMukaANB4EX7gF+d+6GFCXGSldap6d9n6rAM+t8aqG5oAAKgpwg/qzblXiK5rG6fcc25XVRWa/KWmE7Cruy1/bbs+OZ12byB1OOx1WrM/jz8A1EajCD8ej0eLFy/W66+/rmPHjik1NVWPP/64fvaznwW6NJzj7CtEdRl0znWhCdi13VZDnXBd0UfukZNyOO1qfnmEMrr+W53UXJcT4AHgUjWK8PPcc89pzZo1mj17thISEjR37lzdd999eu+99xQaGhro8hAAVd1i8+cE7ItNuK5qEngw/iN/6MgJ7c89JqfTIXcd13fu8W8oxwhA49Pgw095eblefPFFTZw4UX369JEkLViwQL169dKHH36om2++ObAFolbO906yi6nqFlt9ziU6d/+tmjVV3+5XyO2+9H/cqwoEjeFTsM89Rv68EhSMt9gawzkDGosGH3527NihEydOqGfPnt6x6OhodejQQVlZWYSfBq427yS72C226n69R03/kTp3/1X1cfbtsurePjv3FtL5glVd/eNa3WBx9nrnO7bnTpSvzv6qs69zJ9JXN1jVNDRdrMbanLNL7f98CFu+Ko5Zbee8BWPQDjbBeIxslmVZAa2glj788EONGzdO//rXvxQeHu4df/DBB3Xq1CktXbr0krdpWZY8Hv8fFptNKi1zy/XjL7zQEIc8Hsv7uKqx6qxT0+ed/dhmkyyr9tupyxrrYh2bJLdlySbJkuR02GV5LLl/PP8Ou02hIY4fl3rPpErLXPVSo9NhV0SY86L7t0nems9f97l+2o7NJjnsVe2r8nPKT7ursS/f9ZzOM/+wXKj/8/V69nZquq+qt/3T8+12mzweq5q9XfiYVPc1U71zVrP+z34NX7yPn/q/cJ+NyVnHzCbZJNlt1TnXF9jOj6r3mgk2dfkaON8xssvf6cNut8lmq94V/gZ/5ae0tFSSKs3tCQsL09GjR2u0TZvNJoejbm6RREbwOTYNk+/rITIipIHs/8Kv46q3c+HnhIed79eG7YLrhYU4Lrjd6m6ndvs6f292u63avZ2rOjXW9JzVpP/qbvtsdrvtgssbm5qe67raTjCoq9dA9V+f9afB/0tccbWnvLzcZ7ysrEwRERGBKAkAAASxBh9+WrZsKUnKz8/3Gc/Pz1d8fHwgSgIAAEGswYef9u3bq2nTpsrMzPSOlZSUaNu2bUpNTQ1gZQAAIBgF3424SxQaGqohQ4Zo3rx5io2NVatWrTR37lwlJCSoX79+gS4PAAAEmQYffiRp/Pjxcrlcmjp1qk6dOqXU1FStWLFCISH1PSkVAAAEuwb/VncAAIBL0eDn/AAAAFwKwg8AADAK4QcAABiF8AMAAIxC+AEAAEYh/AAAAKMQfgAAgFEIPwYpLi7W448/rvT0dKWkpOjOO+9Udna2d/nw4cPVrl07n/+GDh0awIr9Ky8vr1J/7dq109q1ayVJ27dv15AhQ9SlSxdlZGTolVdeCXDF/pWZmVll/+3atVPfvn0lSc8//3yVyxu6pUuXVnotX+x8ezwe/eEPf1CvXr3UpUsX3X///Tpw4EB9lu03VfX/ySefaNCgQeratasyMjL09NNP69SpU97lmzZtqvK1cPZXCTUUVfU/derUSr1lZGR4lzfm8z906NDz/i54++23JUlut1udOnWqtHzRokUB6sLPLBhj+PDh1s0332xlZWVZ3333nfXkk09anTp1sr799lvLsiyrZ8+e1po1a6z8/Hzvf0VFRYEt2o/+/ve/W8nJyVZeXp5Pj6WlpVZhYaH185//3Jo8ebK1e/du64033rCSk5OtN954I9Bl+01ZWZlP3/n5+daHH35otWvXztvngw8+aD366KOV1mvIVq9ebbVv394aMmSId6w653vRokXWz3/+c+tvf/ubtX37duvee++1+vXrZ5WVlQWijRqrqv+srCzr2muvtZ5//nlrz5491t///ncrPT3d+u1vf+td59VXX7Wuv/76Sq+FxtC/ZVnW7bffbs2fP9+ntyNHjniXN+bzX1RU5NN3Xl6eddddd1kDBgywjh8/blmWZe3evdtKSkqytm/f7rNuxfKGjvBjiL1791pJSUlWdna2d8zj8VjXX3+9tXDhQqugoMBKSkqyvvnmmwBWWbeWLVtm3XLLLVUuW7JkifXLX/7SOn36tHfsmWeesfr161df5dW7EydOWL/61a98/sG76aabrJdeeilwRflRbm6uNWrUKKtLly5W//79fX75X+x8l5WVWV27drVeffVV7/KjR49anTp1st577736a6IWLtT/I488Yg0bNsxn/bfeesu67rrrvP+4T5s2zfrNb35TrzX704X693g8VpcuXawPP/ywyuc29vN/rlWrVlkdO3b0/h9hy7Ks999/30pJSamPUgOC216GiImJ0bJly5ScnOwds9lsstlsKikp0c6dO2Wz2dSmTZsAVlm3du7cqcTExCqXZWdnKy0tTU7nT19316NHD+3du1cFBQX1VWK9WrJkiUpLS/XYY49JksrLy7V37161bds2wJX5xzfffKOQkBC9++676ty5s8+yi53vHTt26MSJE+rZs6d3eXR0tDp06KCsrKx666E2LtT/vffe6z3vFex2u06fPq3jx49LuvD/XhqCC/W/f/9+nTx58ryv9cZ+/s9WWFiohQsXavTo0T7Ho6Gf/4tpFF9siouLjo5W7969fcbWrVunffv2acqUKcrJyVFUVJSmT5+uzz77TE2aNFH//v01ZswYhYaGBqhq/8rJyVFMTIwGDx6sPXv26KqrrtLo0aOVnp6u3NxcJSUl+azfokULSdKhQ4fUrFmzQJRcZwoLC7Vy5Uo98sgjuvzyyyVJu3fvltvt1rp16zRz5kyVlZUpNTVVjz76qPdYNCQZGRk+czjOdrHznZubK0lq2bJlpXUqlgW7C/XfoUMHn8enT5/WypUr1bFjR8XGxkqSdu3apZiYGN12223Ky8tTUlKSJkyYoE6dOtV57f5wof5zcnIkSatWrdKnn34qu92u9PR0TZgwQVFRUY3+/J9t+fLlCg8P14gRI3zGc3Jy5HK5NGLECO3YsUPx8fG655579B//8R91VXK94sqPoTZv3qzJkyerX79+6tOnj3JyclRWVqZOnTrphRde0OjRo/X6669r6tSpgS7VL1wul7777jsdPXpU48aN07Jly9SlSxeNHDlSGzdu1KlTpyqFvLCwMElSWVlZIEquU2vWrFFUVJT+67/+yztW8Q9CRESEnn32Wc2cOVPfffed7r77bp+JsI3Bxc53aWmpJFW5TmN7PbhcLk2aNEm7du3StGnTJJ0JgMeOHdPJkyc1depUPffcc2rWrJmGDBmi3bt3B7ji2svJyZHdbleLFi20ZMkS/fa3v9WGDRs0ZswYeTweY87/8ePH9ec//1kjRozwvv4r7Nq1S8XFxRo6dKhWrFihG2+8UZMnT9Ybb7wRoGr9iys/Bvroo480ceJEpaSkaN68eZKk6dOn67HHHtNll10mSUpKSlJISIgmTJigSZMmNfgrH06nU5mZmXI4HAoPD5ckdezYUbt27dKKFSsUHh6u8vJyn+dU/JJr0qRJvddb195++23deuut3mMhSbfeeqvS09O9/89fkq655hqlp6frk08+0b//+78HotQ6cbHzXXFcysvLfY5RWVmZIiIi6q/QOnb8+HE99NBD+uKLL7R48WLvVZ2WLVsqKytLERERCgkJkSQlJydr27ZtWrVqlZ588slAll1ro0eP1l133aWYmBhJZ37fNW/eXHfccYe2bt1qzPn/6KOPVF5erkGDBlVa9pe//EVut1uRkZGSpPbt2+uHH37QihUrdPvtt9d3qX7HlR/DrF69WuPGjdOvfvUrLVmyxJv2nU6nN/hUuOaaaySpwVzmvZjIyEifX2TSmR7z8vKUkJCg/Px8n2UVj+Pj4+utxvqwY8cOHThwQLfcckulZWcHH+nMZf7LL7+80bwGKlzsfFfc7qhqncbyesjPz9fgwYO1ZcsWrVixotJt8ejoaG/wkc7MCUpMTFReXl59l+p3drvdG3wqnP37zoTzL50JP71791Z0dHSlZeHh4d7gUyEpKanR/C4g/BhkzZo1mjFjhgYPHqz58+f7XNIdOnSoJk+e7LP+1q1bFRISotatW9dzpf63a9cupaSkVPqMkq+//lpXX321UlNTtWnTJrndbu+yzz//XG3atFFcXFx9l1unsrOzFRcXp/bt2/uML1iwQDfeeKMsy/KOHTx4UEVFRbr66qvru8w6dbHz3b59ezVt2tTn9VJSUqJt27YpNTU1ECX71dGjR3XPPfeosLBQr776aqWePv30U3Xt2tXnc21cLpd27NjRKF4LkyZN0rBhw3zGtm7dKkm6+uqrG/35r5Cdne0zqbtCSUmJ0tLSvJ+BVmHr1q3ekNjQEX4MsWfPHs2aNUs33HCDRo0apYKCAh0+fFiHDx/WsWPHdOONN+qdd97Rn/70Jx04cEAffPCB5syZoxEjRqhp06aBLr/WEhMT1bZtW02fPl3Z2dn69ttv9dRTT2nLli0aPXq0Bg0apOPHj+t3v/uddu/erbVr12rlypUaNWpUoEv3u23btlX5wYU33HCDvv/+ez3xxBPas2ePsrKyNG7cOKWkpKhXr14BqLTuXOx8h4aGasiQIZo3b54+/vhj7dixQxMmTFBCQoL69esX4Opr76mnntKBAwc0d+5cxcbGen8XHD58WG63WykpKYqJidFjjz2mr7/+Wjt37tRjjz2m4uLiSqGhIbrxxhu1ceNGLV68WPv379c//vEPTZkyRTfffLMSExMb/fmXzszrKioqqvR/gqQzV/169OihBQsW6B//+If27t2rZcuW6d1339W4ceMCUK3/MefHEOvWrdPp06e1fv16rV+/3mfZwIEDNXv2bNlsNq1atUqzZs1S8+bNNWzYMI0cOTJAFfuX3W7XkiVL9Mwzz+ihhx5SSUmJOnTooJdeesn7rp8XXnhBM2fO1MCBA9W8eXNNmjRJAwcODHDl/nf48GHvO7zO1rFjRy1fvlzPPvusbrvtNoWGhqpv37567LHHZLPZ6r/QOhQXF3fR8z1+/Hi5XC5NnTpVp06dUmpqqlasWOFzK6ghcrvd+uCDD3T69Gndc889lZZ//PHHuuKKK7Ry5UrNmzdPI0aMUFlZmbp166bVq1c3+Pl/ktS3b18tXLhQy5Yt0/LlyxUVFaVbbrlFDz30kHedxnr+Kxw+fFiSqvxdIEmzZs3SokWLNG3aNB05ckSJiYneT7xuDGzW2de4AQAAGjluewEAAKMQfgAAgFEIPwAAwCiEHwAAYBTCDwAAMArhBwAAGIXwAwAAjEL4ARD01q5dq3bt2ungwYOBLgVAI0D4AQAARiH8AAAAoxB+AAQVj8ej5557Tn369FHnzp01ZswYHT161GednJwcjRo1SikpKUpJSdEDDzzg8w3kkvTtt9/q/vvvV0pKin7xi19owYIFmjx5soYOHepdp127dlq8eLFuu+02derUSYsXL5Yk/fDDD3r44YeVlpamzp0765577tG2bdt8tl9WVqY5c+aod+/e6tixo2655RZ98MEHdXRUAPgT3+0FIKg8/fTTeuWVVzR69Gh17txZ//u//6t3331Xp0+f1scff6zTp09r0KBBatu2rUaNGiWXy6Xnn39ehYWFeueddxQXF6fCwkINGDBAcXFxGjdunNxut5599ln98MMP6tKli1atWiXpTPgJCQnRI488ojZt2qhVq1aKi4vTrbfeqoiICI0dO1YRERF6+eWX9fXXX+uNN95QYmKiLMvS/fffr82bN2v8+PFKTEzU+vXr9dprr+npp5/WrbfeGtiDCODCLAAIEkePHrWuu+46a+7cuT7jI0aMsJKSkqwDBw5YDz/8sPWLX/zCOnbsmHd5UVGR1a1bN2v27NmWZVnWwoULreTkZCs3N9e7zsGDB63rrrvOGjJkiHcsKSnJuueee3z2NX/+fCs5Odk6ePCgd6ysrMzq27evNW7cOMuyLGvDhg1WUlKS9f777/s8d+LEidb/+3//zzp9+nTtDgSAOsVtLwBBY8uWLTp9+rR+9atf+YzfdNNN3p8///xzpaWlKTw8XC6XSy6XS02bNlX37t31f//3f951unbtqvj4eO/zWrVqpa5du1ba57XXXuvzeOPGjbr22msVHx/v3b7dbld6erp3+xs3bpTNZlPv3r2967hcLmVkZOjw4cPatWuX344JAP9zBroAAKhQMbcnJibGZ7x58+ben4uLi/XBBx9UOb8mNjZWklRYWKjrrruu0vJmzZqpoKDAZ6xJkyY+j4uLi7Vv374qny9JpaWlKi4ulmVZSklJqXKd/Pz8SqEKQPAg/AAIGhWh58iRI2rbtq13vLi42PtzVFSUfvGLX2j48OGVnu90nvmVlpCQUCnkVGz3YqKiopSWlqZJkyZVuTw0NFRRUVFq0qSJXnnllSrXueqqqy66HwCBw20vAEGja9euCg8P11//+lef8b/97W/en9PS0rR7925de+21Sk5OVnJysjp27KiVK1dq/fr1kqTU1FRt2bJFhw8f9j4vPz9fW7ZsuWgNaWlp2rNnj9q0aePdfnJyst555x298cYbcjgcSktL08mTJ2VZls86OTk5+uMf/yiXy+WfAwKgThB+AASNyMhIjRkzRmvWrNG8efO0YcMGzZw50yf8jBkzRvv379eoUaP00Ucf6Z///KfGjRun999/X+3bt5ck3X333YqMjNSIESO0bt06rVu3Tvfff79Onz4tm812wRqGDRsmj8ejYcOG6YMPPtDGjRv1+9//XqtWrVKbNm0kSb1791Zqaqq31szMTC1fvlxPPPGE7Ha79/YbgODEW90BBJ1Vq1bp5ZdfVl5enrp27aqbbrpJTzzxhD7++GNdccUV+uabb7RgwQJt3rxZlmUpKSlJI0eOVN++fb3b2LVrl2bOnKkvv/xSkZGRuuuuu7RhwwZdfvnlWrJkiaQzb3UfO3asxo0b57P//fv365lnntHGjRtVVlam1q1ba+jQobr99tu965w8eVLPPvus/vrXv+rIkSOKj4/XgAED9MADDygsLKx+DhSAGiH8AGh0/vWvf6m4uFi9e/f2jrlcLvXp00cDBgzQ5MmTA1gdgEBjwjOARueHH37QhAkT9MADDygtLU2lpaV67bXXdOzYMd1xxx2BLg9AgHHlB0Cj9Kc//Ulr1qzRgQMHFBISos6dO+vBBx9UcnJyoEsDEGCEHwAAYBTe7QUAAIxC+AEAAEYh/AAAAKMQfgAAgFEIPwAAwCiEHwAAYBTCDwAAMArhBwAAGIXwAwAAjPL/AXNlE7/xOS7+AAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from raphtory import graph_gen\n", - "\n", - "g = Graph()\n", - "graph_gen.ba_preferential_attachment(g, nodes_to_add=1000, edges_per_step=10)\n", - "view = g.window(0, 1000)\n", - "\n", - "ids = []\n", - "degrees = []\n", - "for v in view.nodes:\n", - " ids.append(v.id)\n", - " degrees.append(v.degree())\n", - "\n", - "df = pd.DataFrame.from_dict({\"id\": ids, \"degree\": degrees})\n", - "\n", - "sns.set()\n", - "sns.histplot(df.degree)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, - "outputs": [], - "source": [ - "from raphtory import Graph\n", - "from raphtory import algorithms\n", - "from raphtory import graph_loader\n", - "\n", - "g = graph_loader.lotr_graph()\n", - "views_l1 = g.rolling(1000)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/shivamkapoor/opt/miniconda3/envs/pyraphtory/lib/python3.10/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/Users/shivamkapoor/opt/miniconda3/envs/pyraphtory/lib/python3.10/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/Users/shivamkapoor/opt/miniconda3/envs/pyraphtory/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n", - " with pd.option_context('mode.use_inf_as_na', True):\n", - "/Users/shivamkapoor/opt/miniconda3/envs/pyraphtory/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n", - " with pd.option_context('mode.use_inf_as_na', True):\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAi4AAAHPCAYAAAB0ulFlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABgL0lEQVR4nO3dZ2CTZdsG4DOj6d6bllJK2XvKkD30lSGIICo4EZAhyFD0YzlAUOYLvAqylA0qyFIEGSJ7I0PooLQUukc604z7+1EaqCnSQNqnSc/jjzZJH65cjJy9n3vIhBACRERERFZALnUBRERERKXF4EJERERWg8GFiIiIrAaDCxEREVkNBhciIiKyGgwuREREZDUYXIiIiMhqMLgQERGR1WBwISIiIquhlLoASxNCwGCw7GbAcrnM4te0BeyLKfbEFHtiij0pGftiqrL0RC6XQSaTleq1NhdcDAaBtLQci11PqZTD09MZanUudDqDxa5r7dgXU+yJKfbEFHtSMvbFVGXqiZeXMxSK0gUX3ioiIiIiq8HgQkRERFaDwYWIiIisBoMLERERWQ0GFyIiIrIaDC5ERERkNRhciIiIyGowuBAREZHVYHAhIiIiq8HgQkRERFaDwYWIiIisBoMLERERWQ0GFyIiIrIakp8OnZiYiA4dOpg8/sUXX+CFF16QoCIiIiL6pxtxGfgrOhWt6vqjqp+LZHVIHlz+/vtv2NvbY//+/ZDJ7h9p7erqKmFVREREVCQlIw8Ltl6EpkCPmIQsTHipiWS1SB5cbty4gdDQUPj5+UldChERET0gv0CH8zdScPBCPDQFejioFHihQ5ikNUkeXK5fv44aNWpIXQYREVGlkZKZhxW7rqFVXT90aRZc4ms0BXp8ueE8YhKyAAAqpRzT32wJf0+n8izVhOTB5caNG/D09MSrr76Kmzdvolq1anj33XdLnPdSWkql5eYcKxTyYv+lQuyLKfbEFHtiij0pGftiqqx6IoTAml+u40ZcBm7eUaNZLV/4eDji71vp2HUsBlm5WgBAdp4WyRl5cHZQom6oJ9o1DESQr3RzW4rIhBBCql9cp9OhSZMmCA8Px+TJk+Hi4oLdu3dj9erVWL16Ndq0aWP2NYUQxebKEBERVWZanR53UnKQmpmPI+fjcTc1B1eiU43PB/m6wM1ZhWsxaSbfq1LK8dmItqhX3bs8S/5XkgYXAMjJyYFCoYCDg4PxsaFDhwIAVqxYYfb19HoD1Oo8i9WnUMjh5uYItToPer3BYte1duyLKfbEFHtiij0pGftiyhI9MQiBz9acRlS82uS59o0D8eeluyhKATIZ0KlpEJrU9IEMhQMAIf4u8HJzMPleS3Nzcyz1yJLkt4qcnZ1NHqtZsyb+/PPPx76mTmf5P/R6vaFMrmvt2BdT7Ikp9sQUe1Iy9sXUk/Tk2OW7iIpXQyGXwc1ZhSbhPggNdIWLox2ahPugQ+MqSEzLBQCEVXFHgJfp/JWK9vshaXCJiIjASy+9hK+//hpPPfWU8fHLly8jPDxcwsqIiIism1anx09/RAMA+nUIw3Otq5m8pkYVd9So4l7epT0RSWdB1ahRA2FhYfj0009x5swZREVF4YsvvsCFCxfw7rvvSlkaERGRVTt4/g7S1Bp4utqjW/OSVw5ZI0lHXORyOb755hvMmzcP48aNg1qtRr169bB69WrUqlVLytKIiIisWlR8JgCgS7MgqOwUEldjOZLPcfHx8cEXX3whdRlEREQ2RXdvQq+zg53ElVgWF8wTERHZIO294KK0sb1xbOvdEBEREYD7q4GUStva24zBhYiIyAbp9IUbtNhxxIWIiIgqOt4qIiIiIqtRNDnXkuf3VQS29W6IiIgIwANzXOSc40JEREQVXNEcF464EBERUYVXdKuIk3OJiIiowtNxci4RERFZCy0n5xIREZG10Om4jwsRERFZAYNBwCDuTc5VcFURERERVWBFt4kAznEhIiKiCk73QHCx4xwXIiIiqsiKNp8DAAU3oCMiIqKK7MFzimQyBhciIiKqwIwnQyttK7QADC5EREQ2x3hOkY1NzAUYXIiIiGyO1kZ3zQUYXIiIiGyOrZ5TBDC4EBER2RzjrSIbWwoNMLgQERHZHK3eNnfNBRhciIiIbA5vFREREZHV0HFyLhEREVkLLee4EBERkbXgrSIiIiKyGjpOziUiIiJrwTkuREREZDUYXIiIiMhqcHIuERERWQ3OcSEiIiKrwVVFREREZDV4OjQRERFZDR6ySERERFYjK1cLAHBUKSSuxPIYXIiIiGzMrcQsAECIv6vElVgegwsREZENyczWID1LAxmAEH8XqcuxOAYXIiIiGxKTUDjaEuDtBAeVUuJqLI/BhYiIyIbcuhdcQgNs7zYRwOBCRERkU4pGXKoFuElcSdmwvTEkIiIiK6fTG/B3bDoychORm6eB4d5OuEVkMhkahnnBz9Op2ONCCMQkqAHY7ogLgwsREVE5KtDqEZeUDYO4H0bS1Br8cfEO1DkFAIBUdT7yC/T/eh0HlQL/91oL+Hk4YsfRm9DqDIhLykZGdgHkMplNTswFGFyIiIjKxa2ELPxx6Q5OXElEnkb3yNe7O6tQL8wber0BwlB8xCU+JQd3UnKw+IdLCPJ1xvmIFONzSoUcg7qG2+TEXIDBhYiIqMxtORCJX0/FGr92c7KDo/39j2ClQo5mtXxRJ8QDkMngZK9E9SA3eHu5ID09x7gTbhF1bgE+/+4MkjLykJSRB5kM6Ni4ClR2CnRtHgxfD8fyemvljsGFiIioDN28q8bee6GlZR0/dGhSBXWreUIu+/eTm//teTcnFSYOaoK9p+OgKdCjVV0/NKrhY9G6KyoGFyIiojIihMDG/REQANrU98c7vetb7Np+nk4Y0qO2xa5nLbgcmoiIqIycuJqIyPhM2Nsp8GKncKnLsQkMLkRERGVAq9Nj68FIAEDPNtXg6WovcUW2gcGFiIioDETfUSMjuwBuzio806qq1OXYDAYXIiKiMhCbmA0AqFHFDXZKhcTV2A4GFyIiojIQm1i49X6Iv23uYCsVBhciIqIyEJtUOOIS4mebO9hKhcGFiIjIwrQ6A+6k5ADgiIulMbgQERFZ2J2UHOgNAs4OSni5cTWRJTG4EBERWdiD81tkj9ghl8zD4EJERGRhMQmFwaUq57dYHIMLERGRBRVo9Th1LREAULeap8TV2B4GFyIiIgs6eS0ROfk6eLs5oGGYt9Tl2BwGFyIiIgs6dP4OAKBzsyDI5ZzfYmkMLkRERBaSnJGHm3fVkMmApxsGSl2OTWJwISIispCz15MBAHVCPOHmrJK4GttUoYLLzZs30bRpU/z0009Sl0JERGS2M9eTAAAtavtKXIntqjDBRavVYuLEicjNzZW6FCIiIrMlZ+Qh+o4aMgDNajG4lJUKE1wWL14MFxeudyciIuv04+EoAEC9UE+4u3C33LJSIYLL6dOnsXnzZsyePVvqUoiIiMwWcTsDp64lQQbgxU7hUpdj0yQPLmq1Gh988AGmTJmCwEDOwCYiIuuz81gMAKB940BUC+ChimVJKXUBM2bMQNOmTdG7d2+LXVOptFweUyjkxf5LhdgXU+yJKfbEFHtSMmvuS0pGHq5EpwEAererbrHPIGvuSVmSNLhs374dZ86cwc6dOy12TblcBk9PZ4tdr4ibm6PFr2kL2BdT7Ikp9sQUe1Iya+zLnpNxEAAahfugTg3LT8q1xp6UJZkQQkj1iw8ZMgTnzp2DSnV/rXtubi5UKhWeeuoprFixwuxr6vUGqNV5FqtRoZDDzc0RanUe9HqDxa5r7dgXU+yJKfbEFHtSMmvti8EgMH7xn0jL0uDdvg3QpkGAxa5trT15HG5ujqUeWZJ0xGXu3LnIz88v9liPHj3w3nvvoU+fPo99XZ3O8r/Ber2hTK5r7dgXU+yJKfbEFHtSMmvry6WoFKRlaeDsoESTcG9+/pQDSYOLv79/iY97e3s/9DkiIqKK4vCFwnOJ2jYIhJ1SIXE1lQNn/BARET2GjGwNLkamAgA6NKkicTWVh+Sriv7p+vXrUpdARET0SNv+iIZBCIQHuSPIx/KLQqhkHHEhIiIy0+EL8Thy6S5kAPq2ry51OZUKgwsREZEZou+osX7fDQBAvw5hqBfqJXFFlQuDCxERUSndSsjC0m1/QacXaFrTB8+1qSZ1SZVOhZvjQkREVBEdvhCP7/dehxCAv5cThvaqB7lMJnVZlQ6DCxER0SPkF+jww6EoCAG0qO2LV7vXgqM9P0KlwK4TERE9wpGLd5GTr4OfpyNGPN8AcjlHWqTC4EJERJWaOqcAmw5EIDdfZ3ysmr8rercLhVIhR5o6H7+eigUAPNsqhKFFYgwuRERUqW38PQInryYWe+xSVCrupuWiYXUvbDkYiZx8HTxd7dGuoeXOIqLHw+BCRESVisEgcD02HWduJCMpLRdXYtIhkwGDutSEg70COXk6/PRHFM78nYQzfycBAKoFuGLE8/W5rX8FwOBCRESVghACEbczsX7fDcQlZRd7rnPTIHRvWdX4dYCXE775+TJcnezQpVkwuresCmUpTy+msvVYwSUuLg4FBQWoUaMGsrKysHDhQsTHx+PZZ59F3759LVwiERHRk7mbmoNlP19B7L3A4mivRGiAK67dSoerkx36dQgr9vomNX2w6L32sLOTc8lzBWN2cDl8+DBGjRqFIUOG4MMPP8S0adPw22+/oVatWvjoo4+g1WoxYMCAsqiViIjILEIIXIlJw7c7ryIrVwuVUo7W9f3xQocacHGyw6mriQj2dYGzg53J99qreFuoIjI7uHz99dd4+umnMWrUKKjVauzbtw/Dhg3D2LFjsWDBAnz//fcMLkREldTNu2rE3FUbv1Yo5GhS0wduTqpyq0Gj1SM5PQ8yuQxrf/0bN25nAgBC/F3w/sAmcHe+X0vr+pxsa23MDi5///03vv76a7i4uGDXrl3Q6/V45plnAADt2rXD6tWrLV4kERFVfGnqfHyx7hx0ekOxx32OOWDKay3g5lz24UVvMGDepguIjM80PmanlKND4yro1z4MTg6c2mntzP4dtLe3h05XuNb9zz//hLe3N+rUqQMASElJgZubm2UrJCIiq7DrWAx0egN8PRwQ4ucKAIi+q0ZKZj4W/3gJbRsGol6oJ/w9ncqsht3HbhULLY1qeGNIj9rwdncos1+TypfZwaVZs2ZYtWoV1Go19u7di379+gEALl++jCVLlqBZs2YWL5KIiMqHEALxyTnQaPUAAG93B3i42AMoHFG5GJkCjdZg8n16gwFHLt0FALzdsx5qVfUAUDgp9vPvzyLqjhpRd9Rwd1bhk7dawc1ZhZx8LdKzNPD3dIRS+WQrdgq0evxwKAr7z94GALzavRZqBLmhmr8rZJxca1PMDi4ff/wxhg0bhgkTJiA8PBzvvvsuAGD48OFwdHTExIkTLV4kERGVHa1Oj3M3UpCSmYe/b6XjSky68TmFXIZOTYKQkJ6LqzfTIB5xrfqhnsbQAgCB3s6YOKgJ9p2JQ0RcBlLVGvxv+2VU9XXBH5fuQKszwNFeibYNAtC3c024OzzehNjNByJx8Hw8AOCZVlXRtXnwY12HKj6ZEOJRfw5NCCGQmpoKHx8f42MXLlxAvXr1oFKV3wSskuj1BqSl5VjsekqlHJ6ezkhPz4FOZ/pTRmXFvphiT0yxJ6YqWk+OX0nAxv0RyM7TGh9TKmTwdLWHTi+QnqUp9vpawe7wdncs8Vp2Sjmea1MNfh4lP387ORuffXcG2gfet0Iug95w/2OofnUvDOlRC36PuJ2k0xsQm5iNqn4uMAiBcYv/hKZAj+F96uOpev6PfN/WoKL9WSlLXl7OUJRyn5zHmqUkk8mKhRYAaNKkyeNcioiIJKIp0OO7X/9GgdYALzd71KvmBXuVAp2aBiHIxxlCCOw/extbD0aheqAr3u5Z95GB4t8E+7pgVL8GOHUtCXK5DC1q+6FBdS9cuZmKwxfv4mJkCq7cTMO0lafQu10ocvN1UOcW4NXuteCguv9xpSnQY8GWC7hxOxP2KgVcHe2gKdDDx90Brer6WaI1VIGZHVzS0tIwc+ZMHDp0CHl5efjngI1MJsPVq1ctViAREZWN85HJKNAWTqadNaw1FPLiP/HKZDJ0b1EVHRpXgUopt8hckUY1fNCoRvEffBvW8EHT2n7I1wMLN57D1Zg0/Hg42vi8r7sj+jxdHQCg1Rmw5KdLxiXOmgI9NAWF83Ha1A/gfJZKwOzg8umnn+LgwYPo2bMnAgICIJdzC2QiImt08krhwYJP1QswCS0Psrcrn43YAn2c8eGrTXH4/B38cCgSjvZKJKbnYe/pWHRpHox8jQ4bf4/AlZh02NspMP6lxkjJzMeaX/6GwSDQtgH3ZKkMzA4uf/zxBz7++GO89NJLZVEPERGVg6zcAly+mQYAaF2B5oTIZDI83SgQ7RoGQACYseo0bidnY/ySP6HTF47wKxVyvNe/IWoGe6BmMFCjihtyNTr4e5XdMmuqOMweLrGzs0PVqlUf/UIiIqowMrM12HwgArcSsgAAZ64nQ28QCPF3QRUfZ4mrMyWTySCXydC/Y+EZQjq9gEIuQ4i/C957sSHqhnoZX+vn6YTQAO4hVlmYPeLSvXt37Nq1C23bti2LeoiI6DHlF+iQk6czeTxPo8PS7ZeRmJaLmLtZ+PDVZjh5JQEA0Lpexb690jjcB1NfbwGFXIZAb2fYPeF+L2T9zA4u9erVw8KFCxEXF4fGjRvDwaH4boQymQyjRo2yWIFERPRoJ68mYvUv11BQwuZwD4q6k4mo+EzcuJ0JmQxWsQqneiBHU+i+x5qcCwCnT5/G6dOnTZ5ncCEishx1TgGOXLqD1Mz8h74mr0CPk1cLJ9oqFbISV9YE+zojVa2BOqcAy3ZcAQC0qO0HLzduhU/W5bEOWSQiIvMJIZCTr4VdbgFy8rQmhxECwJ2UHPxyIhZJGXkAgKT0vBJfV5LuLaripa7hkD9kSfCKXVdx7HICUjLzoZDL0Kdd6GO/FyKpPNExmVFRUcjKyoKXlxdCQkIsVRMRkc3JzC7c6j7iduajX/wP1QNd0TDM+6GBBACq+rugaU3ff71O3WqeOHa5cG5Lvw5hCPJ1MbsWIqk9VnDZtWsX5syZg5SUFONjPj4+mDBhAvr27Wup2oiIitHq9NDqDHBysJPk18/N1+HU34lISM2FQQjI7618KY3TfychVf3w2z1FlAoZ2tQPQKt6/pDLZHBxtEOwr7NFNlZrWMMb7i4qhPi54tlW/GGTrJPZweXAgQOYNGkSWrdujfHjx8PHxwdJSUnYsWMHPvroI3h4eKBTp05lUCoRVXaLfriEa7fS0bNNKPq0C4VCXvKH+ZN+yBuEgMEgkJCai53HYpCqzocQQHxK9iMnv/4bf09HjB/UBLWr+yA9o+TzZ2RmhCFzuTmpMH9UO+OvQ2SNzD5kccCAAQgODsaCBQtMnnv//feRkJCAjRs3WqxAc/GQxfLBvphiT0xZoieaAj0OX7yDWwlZOH5vCe+/CfZ1wZTXmkNl5m6v2XlaxCVmISEtFz/9EY2cfNNlxQBQxccZ9UM9AdwLGQ8JT//k7KBExyZB8HC155+TEvDvj6nK1JMyPWTxxo0bGDNmTInP9evXD2PHjjX3kkRUCahzCrD/7G1cjUmDwVD6n5dSMvOLnVwcGuBq8tiDbidn49qtdDQO9ynxeaBwX5OMbA02H4hEnkYHH3dHnL2RVOJoSvNavmjboPAMHHcXFUIDXDlaQSQhs4OLp6cnMjNLnlyWkZEBlUr1xEURke24k5KNTXv/xpGLd0u9OuaffD0c0LpeABzsFWjXIBBODkrkakxHRH44FIU/L93FX9GpDw0uf1y8g3W/3ShWS9GEWW83BzjaK9Cijh+6NAuGUiErdioxEUnP7L+Rbdq0wZIlS9CyZUsEBNzfcfHu3btYunQp2rVrZ9ECich6panzMel/x6C9N8xdPdANXZoFwcWx9JNrVUo5alb1gPIfw8huTqY/JDUN98Gfl+7iwLl4RMZnok+76mhW6/5Km6g7mVi79zr090Z8vN3s0bp+AIQAagS5oUm4D0dTiCo4s4PL+PHj0b9/f/To0QNNmzaFj48PUlJScP78ebi7u2PChAllUScRPaHsPC1++iMaEXEZD32NTCbDs09VRdsGgcbHNFo9snNLvi3zKFdi0qDVGeDj7oC3e9ZFraoeZRoM6lTzNP5/bGI2Vu+5hgKtHtdupeNOag4SUnOhNwg0r+2LkX0bMKQQWSGzg4uvry+2bduGVatW4fTp07h8+TLc3d0xZMgQvPnmm/Dxefh9ZSIqf9dj07Hoh0vIL9CX6vWr9/yNPI0eXm72iIpX4/ezt6HRlu57H6ZBmDdqh3g++oVPyNFeibAqboi+owYA5OTrsHzn1WKvCfBywlvP1WVoIbJSj3Xz1tvbG5MmTbJ0LURkAUIIXL2Vjpx7k1d/OhxtDC1BPs7o2z4MTg4l/9XfczwGV2LSsX7fjWKPP2wbedNfGwAEABmKXu7koESb+v6P+3bM9nK3mvj9zG3kF+hxMSoFIX6uqFvNE9WruEGllKNONU/Ym7niiIgqjlIFlyVLlmDAgAHw9/fHkiVL/vW1PKuISFoHzsWbBA9PV3t8PLg5vNzs/zWAhAa44rfTcbgWkwa9QcDRXonOzYJKPfdDpzdAbxBQyGVQKuSSLOesUcUdNfq4AwAMBlHq5cpEZB1KHVw6dOjA4EJUwSWl52LroUgAhdvE29spoJDL0KttKLzdH32YnqO9Es8/XR3PP139sX59pUIOZQUazGBoIbI9pQouDx6syEMWiSomgxBYtedvFGgNqBPigYkvNy2zHViJiKRSum3qHrBkyRIkJiaW+Nzt27fx6aefPnFRRFQ6BiFwOzkbsYlZ2H38Fm7EZcDeToE3n6vL0EJENsnsyblLly413jb6p4sXL2Lr1q2YNm2aRYojoofT6vSYu+mCyWnDAzrXgK+Ho0RVERGVrVIFl0GDBuHixYsAClcsvPTSSw99bcOGDS1TGRHBIAQOnY9HepbG5LlbCVmIuJ0JpUIG53unJdcL9UKnpkHlXSYRUbkpVXD5/PPP8euvv0IIgaVLl6J///7Fds0FALlcDjc3N/To0aNMCiWqjA6WsELoQTIZMHZAY9QP9SrHqoiIpFOq4BIeHo7Ro0cDKFw1VLQ0uohOp4NSyfM8iCxJU6DHtj+iAQDNa/vC09Xe5DVNw31Ql6GFiCoRs9PG6NGjsXz5cpw5cwbLly8HAJw9exYTJkzAiBEjMHjwYIsXSVQZxSSokavRwd1FhXefb8ClvUREeIxVRatWrcLChQsRGhpqfCwkJATPPvssZs+eja1bt1qyPqJK61ZCFgAgLNCNoYWI6B6zR1w2bdqEcePGYdiwYcbHAgMDMWXKFPj4+GDNmjUYMGCARYskqoxiEguDS7UAV4krISKqOMwecUlMTHzoyqHGjRvj9u3bT1wUEQExdwuDS2iAm8SVEBFVHGYHl6CgIBw/frzE506fPm2y2oiIzJen0SExLRdA4flBRERUyOxbRQMHDsRXX30FrVaLbt26wdvbG2lpaTh48CBWr16NCRMmlEWdRJVKbGIWBAoPR3RzVkldDhFRhWF2cHnjjTeQmJiItWvXYs2aNcbHFQoFXn/9dbz55puWrI+oUopJKLpNxNEWIqIHPdbmKx9++CFGjhyJ8+fPIzMzE25ubmjUqBE8PT0tXR9RpXSLwYWIqESPvWucq6srOnToYPJ4dHQ0wsLCnqgoosquaMSlGifmEhEVY3ZwyczMxIIFC3Dq1CkUFBRACAGg8Ayj3NxcZGZm4tq1axYvlMhWpWdpkJVbYPw6NjGbE3OJiB7C7OAya9Ys7N69G+3bt0d0dDQcHR0RGhqKs2fPQq1W49NPPy2LOols0s27asz8/iwM934AeFD9UE9OzCUi+gezg8uRI0cwZswYDB8+HKtWrcKpU6ewcOFC5OTkYPDgwYiMjCyLOomsjkarR26+Dh4uKshkJe98u+90HAxCwNFeAXs7BQBApVSgfeNA9GgZUp7lEhFZBbODi1qtRtOmTQEANWrUwKpVqwAAzs7OeOutt7BkyRJ89NFHlq2SqAITQiAuKRt6g4BrhgYZmbk4fiUBR/+6iwKtAc1r+2JUP9NNG9U5BThzPQkAMOnlptxojoioFMwOLp6ensjKurfiITQUqampyMjIgIeHB/z9/ZGYmGjxIokqKiEEVu25hqN/JTz0NWevJ+NGXAZqVfUo9viRS3eg0wtUD3RlaCEiKiWzd85t06YNvvnmG8THxyMkJATu7u7Ytm0bAODgwYNcEk2VysHz8Tj6VwJkMsDf0xGBPs7w93RE05o+mDSoCTo2qQIA+PFwFIQQSFPnQ6c3QKsz4PezhcdjdG4aLOVbICKyKmaPuLz33nt47bXX8OGHH2LdunUYPnw45syZg2+++QZqtRqjRo0qizqJKpTsPC12HYvBb6fjAAADOoWjV7tQeHo6Iz09BzqdAQAQ4O2Mo38lIOJ2Jr7deRUnriYiyMcZzWv7IiO7AJ6u9niqnr+Ub4WIyKqYHVyCg4OxZ88exMTEAADefPNN+Pj44Ny5c2jUqBH69etn6RqJKoS7qTnYsD8CmgI94lNykKfRAQBa1PbFM62qlvg9nq726No8CHtPxeHE1cLbqPEpOYhPyQEA9GhZFXZKswc+iYgqLbODy9tvv42hQ4eiTZs2xsd69+6N3r17P1YBqampmD17No4cOQKNRoOWLVviww8/RI0aNR7rekRl5ZcTsbhyM834dbCvMzo2CUL7RoEPXTUEAM+1robDF+4gv0CPzs2CEHU7E7FJ2XB2UKJD4yrlUToRkc0wO7icO3fuX/+RNteoUaNgMBiwfPlyODs7Y9GiRXjjjTfw22+/wdHR0WK/DtGT0BsMuBCZAgAY0LkGqge4oVaIB+Sl+Lvg6qTChJeaIDkzD0/V9YdGq8eBc/GoGewOR/vH3ryaiKhSMnuMun379tixYwe0Wu0T/+KZmZkICgrC559/jkaNGqFGjRoYOXIkkpKSEBER8cTXJ7KUG7EZyM7TwtlBiR4tq6JONc9ShZYiNYLc0bpeAGQyGRxUSjzXuhpqBnuUXcFERDbK7B/37O3tsWPHDvzyyy+oUaMGnJycij0vk8nw3Xfflepa7u7umDdvnvHrtLQ0rFmzBgEBAQgPDze3NCKLyc3XIjE9D6EBrpDJZNh3pnAFUMs6flDIOSeFiEgqZgeXhIQE4wZ0AIxnFT3s69KaOnUqtmzZApVKha+//tokEBGVJa1Oj/MRKagT4onkzDz8b9tlpGdp0K9DGJrW9MGFyBTIAHRvWfIkXCIiKh8y8bhJw8IiIyORn5+P9evXY8+ePdiwYQPq169v9nX0egPU6jyL1aVQyOHm5gi1Og96vcFi17V2ttaXvadisf63G1DZyaHTiWJnB9nbKaDR6tGsli/GDWz80GvYWk8sgT0xxZ6UjH0xVZl64ubmCIWidKPZjx1cMjMzcebMGSQlJeGZZ55BRkYGqlev/sQTdw0GA3r16oXGjRvjiy++MPv7hRAWnTxMlcPnq07i5JX7u992aBIERwcl9p64BQAIr+qBya+1hL8XRwKJiKT0WEsavv76ayxbtgz5+fmQyWRo1KgRFi5ciPT0dKxatQpubqXbvjwtLQ3Hjx/HM888A6WysBS5XI7w8HAkJSU9TmkwGATU6tzH+t6SVKbEaw5b6osQAldvpgIAWtX1Q/vGVdA43AcGIRDo6Qg3ZxWa1/aFTCaQnp7z0OvYUk8shT0xxZ6UjH0xVZl6Ys6Ii9nBZd26dVi8eDGGDx+Ozp07Y+DAgQCAwYMH44MPPsCiRYswderUUl0rJSUF48ePx4oVK9C+fXsAgFarxdWrV9GlSxdzSzMq2rXUkvR6Q5lc19rZQl+S0nORlauFUiHD2z3rwU4pN76non1W9HoBoHSDk7bQE0tjT0yxJyVjX0yxJ8WZvTxi7dq1GDZsGMaOHVtsDkrHjh0xbtw4HDhwoNTXqlWrFjp06IDPP/8cp0+fxo0bNzB58mSo1Wq88cYb5pZG9Fii4tUAgGr+rtzFloiogjP7X+k7d+6gVatWJT4XFhaGlJQUs643f/58tGnTBu+//z4GDBiAjIwMrF+/HlWqcEdRKh+RdzIBFO61QkREFZvZt4oCAwNx/vx5tG3b1uS5y5cvIzAw0Kzrubq6YsaMGZgxY4a5pRBZRNTtwuASzuBCRFThmR1cXnzxRSxevBgODg7o1KkTACA3Nxd79+7FsmXL8Oabb1q6RiKL0+kNiE/Owe3kbMQlZwPgiAsRkTUwO7i88847uH37NubOnYu5c+cCAF577TUAhYctDh8+3LIVElnYpagUbNwfgcT0+/v9NKrhDU9XewmrIiKi0jA7uMhkMnz66ad46623cOLECWRkZMDV1RUtW7ZErVq1yqJGIotIU+fj+73XcSmqcOmzo70Cro4qtK7vj15tQ6UtjoiISsXs4LJkyRIMGDAAoaGhCA0NLfbc7du3sWrVKkybNs1S9RFZzJpf/sblm2lQyGXo3rIqercN5enMRERWxuxVRUuXLkViYmKJz128eBFbt2594qKIysKtxCwAwPiBjTGwczhDCxGRFSrVv9yDBg3CxYsXARTuMvrSSy899LUNGza0TGVEFqTR6pGVqwUAhAS4SlwNERE9rlIFl88//xy//vorhBBYunQp+vfvj4CAgGKvkcvlcHNzQ48ePcqkUKInkabOBwDYqxRw4kgLEZHVKtW/4OHh4Rg9ejSAwsm5AwYMgL+/f5kWRmRJqfeCi4+bAw/hJCKyYmb/6FkUYDIzM5GXlweDwfT8BO56SxVNmloDAPByc5C4EiIiehJmB5fY2Fh88MEHxjkvJbl27doTFUVkaSmZhSMu3u4MLkRE1szs4PLpp58iJiYGo0ePRkBAAORyHkpHFV/RHBdvN24yR0RkzcwOLqdPn8bMmTPRq1evsqiHqEykFo248FYREZFVM3u4xMXFBe7uPNOFrEvR5FzOcSEism5mB5fnn38e69evhxCiLOohsjiDQSA9q3Byrg/nuBARWTWzbxU5Ojri7Nmz6N69Oxo2bAgHh+IfBDKZDLNmzbJYgURPKjOnAHqDgFwmg7uLSupyiIjoCZgdXLZt2wZXV1cYDIYSVxZxjwyqaIrmt3i62kPByeRERFbN7OBy4MCBsqiDyGKEEIiKV+NCZAoysjXG20R+no4SV0ZERE+Ke5+TTTl5NRG/nLiF2KRsk+e6NAuWoCIiIrKkUgWXJUuWlPqCMpkMo0aNeuyCiMylKdBDZSfHoQt3sHbvdQCASilH89q+yC/Q43xECuqEeKBZLR+JKyUioifF4EJW7djlu1ixq/hOzV2bBeP59tXh4mgHIQRiE7MR6O3E+VdERDagVMHl77//Lus6iB7LwXPxxv9XyGV49qkQ9OsQBvm9kCKTyVAtwFWq8oiIyMI4x4WsVma2BtF31ACAzs2C0KVZMIJ8nCWuioiIyhKDC1mtC5EpEACqB7phSI/aUpdDRETlgJtakNU6H5ECAGhak5NuiYgqCwYXskr5BTpcjUkHwOBCRFSZMLiQVbqdlAOd3gBPV3tU4bwWIqJK44mCS1ZWFqKiolBQUAC9Xm+pmogeKSkjFwDg7+nIZc5ERJXIYwWXkydPYsCAAWjVqhV69+6NiIgITJgwAbNnz7Z0fUQlSs4oPH+I2/gTEVUuZgeX48eP4+2334aDgwMmTpwIIQQAoE6dOvj++++xevVqixdJ9E9J6XkAAF8PBhciosrE7OCycOFCdO3aFWvXrsXrr79uDC4jRozA0KFDsXXrVosXSfRPyZkMLkRElZHZweXatWvo378/AJjMLWjXrh3i4+NL+jYii0q+N+LCW0VERJWL2cHF1dUVycnJJT539+5duLpye3UqWxqtHpk5BQA44kJEVNmYHVy6du2KBQsW4K+//jI+JpPJkJCQgG+++QadOnWyZH1EJormtzg7KOHsYCdxNUREVJ7M3vJ/woQJuHjxIgYOHAgfn8KNv8aPH4+EhAQEBgZi/PjxFi+S6EGn/04CAIT4c3SPiKiyMTu4uLu7Y+vWrdi+fTtOnDiBjIwMuLq6YsiQIXjhhRfg6Miheyo7Wp0Bf1wonEfVuWmQxNUQEVF5e6xDFlUqFQYOHIiBAwdauh6q5K7cTENOvhat6vqX+PyZ60lQ52rh6WqPprW41T8RUWVjdnBZsmTJQ5+Ty+VwcnJCtWrV0K5dO6hUqicqjioPjVaPCxEpWLbjCoDCE59Lmnh74OxtAECnpkFQyHliBRFRZWN2cNmxYwcSEhJQUFAApVIJDw8PZGRkQKfTQSaTGfd1CQ8Px/fffw8vLy+LF022ZfuRaOw4GlPssZt31SbB5dqtdETdUUMhl6FD4yrlWCEREVUUZv/IOnbsWKhUKsyfPx+XLl3Cn3/+ib/++gtLliyBp6cnFi5ciJ07d0Imk2H+/PllUTPZkIxsDX45GQsAeHBXoFsJWcVeZxACmw9EAAA6NQmCuzNH84iIKiOzg8vixYsxbtw4PPfcc5DfG6qXyWTo1q0b3nvvPSxatAg1a9bEiBEjcPjwYYsXTLbllxOx0OoMCA9yxzcTO+H1Z2sDAG4lFg8uxy8nIDYxG472SvR5OlSCSomIqCIwO7jcvXsX1apVK/G5oKAg4865/v7+yMzMfLLqyKbFJmbh0L0VQn2eDoWdUo5qAYVLnG8lZBlvO2oK9PjxcBQAoFfbanB14mgLEVFlZXZwCQ8Pf+h5RD/88AOqV68OAIiJiYGfn9+TVUc263ZSNuZtvgCtzoB6oZ6oH1o4FyrIxwUKuQw5+TqkqvOhNxiw+pdryMgugI+7A7o1D5a4ciIikpLZk3PHjBmDUaNGoV+/fujRowe8vb2RkpKC/fv34/r16/jvf/+Lq1ev4quvvjKeaUSVU2JaLn47HQetzlDscb1B4Mz1JGh1BlTzd8XIvg2N517ZKeUI8nVGbGI2Lt9Mw/XYDJy6lgSFXIbXnqkNO6VCirdCREQVhNnBpVOnTli5ciUWL16MJUuWQK/XQ6lUonnz5vjuu+/QokULHDhwAD179sS4cePKoGSyFr+cvIU/Lt596PMNwrwwrHd9ODkU/2NYzd8VsYnZ+P7X6wAAhVyGkX0boEGYd5nWS0REFd9jbUDXunVrtG7dGgUFBcjMzIS3t7dxoi4AdOnSBV26dLFYkWSd0rI0AICWdfyMc1eK+Lg7oEUdP8j/ccI4AHRtHoyYhCzEJWVDpZRjWJ/6aFrLt1xqJiKiiu2xgotGo8H169dRUFAAIQRiYmJgMBiQl5eHM2fOYOLEiZauk6xQVq4WANCmQQCahJd+l9sQf1d88lYrpGdp4KBSwNH+sf6YEhGRDTL7E+HkyZMYO3bsQ1cMOTs7M7gQACArtwAA4PaYq4A8Xe0tWQ4REdkAs4PLggUL4Onpic8++ww7duyAXC7HCy+8gD/++AMbN27Et99+WxZ1kpURQhhHXFyd7CSuhoiIbIXZweX69ev4/PPP0b17d2RlZWHTpk3o2LEjOnbsCK1Wi6+//hrLly8vi1rJimi0euNqoscdcSEiIvons/dxMRgM8PcvPLm3WrVqiIiIMD73zDPP4OrVq5arjqyW+t5oi0oph72KS5iJiMgyzA4uISEhuH69cJlq9erVkZeXh+joaACATqdDTk6OZSskq1Q0v4W73BIRkSWZHVx69+6NuXPnYt26dfDy8kKDBg3w2Wef4cCBA1i6dCnCw8PLok6yMlk5nN9CRESWZ3ZwGTp0KAYNGoSLFy8CAKZPn45r165h5MiRiI6OxgcffGDxIsn6GFcU8RRnIiKyILMn5968eRMffvih8euGDRti//79iI6ORlhYGFxcXCxaIFknddGtIkeOuBARkeWYPeLyyiuvYPv27cUec3FxQaNGjRhayOj+UmiOuBARkeWYHVzs7Ozg6elZFrWQDTEGF2eOuBARkeWYfato7Nix+PLLL5GVlYU6derAycnJ5DVVqlSxSHFkvYyrihw54kJERJZjdnCZMWMG9Ho9Jk2a9NDXXLt27YmKIutXNOLixhEXIiKyILODy+eff14WdZANEUIgI7vwZGjOcSEiIksyO7j069evLOogG3IpKhWZOQWwt1MgwMv0ViIREdHjMju4AEBBQQF++OEHHDt2DMnJyZg1axZOnTqF+vXro1GjRpaukayIEAK7jsUAADo3C4Kj/WP9ESMiIiqR2Z8qaWlpeP311437tkRGRiI/Px+HDh3C7NmzsWbNGjRt2rQsaqUKKi4pG/vPxEGnFyjQ6RF1Rw07pRzPtKwqdWlERGRjzF4O/eWXXyInJwd79uzBtm3bIIQAAPz3v/9Fw4YN8d///tes62VkZGDatGno0KEDmjVrhpdffhlnzpwxtyyS0I4/b+LIpbs4fiUBZ68nAwA6NK4Cdxd7iSsjIiJbY/aIy8GDB/Hxxx+jWrVq0Ov1xsft7e3x1ltvYfLkyWZdb/z48UhOTsb8+fPh7e2NtWvX4u2338a2bdsQFhZmbnlUTlIy8zF6wR9oHO6NVHU+AKB9o0AEejtDZSdHuwaBEldIRES2yOzgotFo4OHhUeJzCoUCWq221Ne6desWjh49ig0bNqB58+YAgKlTp+LIkSPYuXMnxo4da255VE6OX06AOqcAf166CweVAgDQqWkQqge6SVwZERHZMrNvFTVs2BAbNmwo8bmdO3eiQYMGpb6Wp6cnli9fjoYNGxofk8lkkMlkUKvV5pZG5ehCROEtISGAPE3hyJs7D1QkIqIyZnZwGTt2LI4ePYrnn38eixYtgkwmw65duzBixAj8+uuvGDVqVKmv5ebmho4dO0Kluv+Bt3fvXty6dQvt27c3tzQqJ1m5BYiMzzR5nCdBExFRWTP7VlGLFi2wevVqzJs3DytWrIAQAmvWrEG9evWwbNkytG7d+rGLOXfuHD766CP06NEDnTp1euzrKJVm57GHUijkxf5LwOGLdyAEEOjjjKS0XOgNAi6OdnCo5Euf+WfFFHtiij0pGftiij0pmUwULQt6DPn5+cjMzISLiwucnZ2fqJD9+/dj4sSJaNasGb7++mvY2z/eihQhBGQy2RPVQg8XdTsDkxYfgVZnwMRXm+PIhXicvJKAkABXLJ3UReryiIjIxpn9I3Lfvn3Rt29f9OrVCz4+PnBwcHjiItatW4eZM2fi2WefxZw5c4rdOjKXwSCgVuc+cU1FFAo53NwcoVbnQa83WOy61kYIgf1nbmPT/gho9QY0quGNDk2DIBMGnLySgGAfZ6Sn50hdpqT4Z8UUe2KKPSkZ+2KqMvXEzc2x1CNLZgeXKlWqYN68efjqq6/QunVr9O3bF927d3/sALNhwwZ89tlnGDJkCP7v//7PIqMlOp3lf4P1ekOZXLc8GYTAb6fikJKZZ3xMqZCje4uq8Hb/99+/lbuu4ujlBABAoxreGP58fchkMjQM88aMN1vCx93R6vtjKbbwZ8XS2BNT7EnJ2BdT7ElxZgeX//3vf8jKysLevXuxZ88eTJ48GdOnT0f37t3x/PPPo02bNqUOHzdv3sSsWbPQvXt3DB8+HCkpKcbnHBwc4Orqam559C8uRqZgy8FIk8fjkrIxcVCTYr9vBVo9dPrCu4iZORocvZwAGYBXutdCl2ZBsLNTGF8b4s/fJyIiKh+PNZvS1dUVL774Il588UWkpqbi119/xa+//op33nkHPj4+OHz4cKmus3fvXmi1Wuzbtw/79u0r9ly/fv0we/bsxymvUhJCIOJ2JnLy7u+jI5PJUCPIzXhC89G/CkdM6oV6IjzIHQYB/HoyFtdupeNCZAqa1vQFAJz+OwnLfr4Cwz+mP4UGuqFr8+ByekdERESmnngZSGpqKlJSUqBWq6HX6+Hu7l7q7x0xYgRGjBjxpCUQCsPGNz9fMXnc2UGJwT1qo16oJy5GFo5oDepSE8F+LgAK5wTtOXELWw5EomGYN5QKOfafiTMJLQq5DJ2aVCn7N0JERPQvHiu4xMXFYdeuXdizZw8iIyPh4+ODXr16Yc6cOahTp46la6RS2HcmDgDg7+UEF8fC31Z1TgGSM/KxbMf9QFPN39UYWgCgZ5tq+PPSHSSm5+Hr7Zfxn6eqIeJ24R4tc0a0gadr4eoumQxQyLkkj4iIpGV2cOnfvz+uXr0KBwcHdO/eHZMnT0abNm0gv/ehxuXI5e/mXTWi4tVQyGWY/EpT4+GGOr0Bu47FYNexW8YRlPaNi58h5GivxMvdamH5zis4H5GC8xGFozI1g93h6+FYvm+EiIjoEcwOLh4eHpg9ezZ69OgBR8f7H2xJSUnYsmULfvzxRxw8eNCiRdK/23k0BgDQqq5fsROZlQo5+rYPQ8s6foiMz4SjvRItavuZfP9T9fwR6O2EDfsjcCMu4961/MujdCIiIrOYHVxWrlxZ7OsjR45g06ZNOHz4MHQ6HYKDOXmzPN1JycGFyBTIZECvtqElvibI1wVBvi4lPlckxN8VH77SFGevJ+Nuag46cj4LERFVQI81xyUtLQ0//PADtmzZgvj4eLi4uKBfv354/vnn0aJFC0vXSP/i+r0RkrrVPBHo/WS7F8tkMrSoYzoiQ0REVFGYFVxOnDiBzZs3Y//+/dDr9WjevDni4+OxdOlStGrVqqxqpH8Rfe+wwxpVSr+ai4iIyFqVKrisWbMGmzdvxs2bN1GtWjWMHDkS/fr1g5OTE1q1asXJuBKKvqsGAIRVcZO4EiIiorJXquAye/Zs1K5dG99//32xkZWsrKwyK4weLTdfi7uphecyVWdwISKiSqBUG3P07NkTt27dwvDhwzFy5Ejs27cPOp2urGujRygabfH1cICb0+MfTElERGQtSjXiMm/ePGRnZ2Pnzp346aefMGbMGHh6eqJbt26QyWS8VSSRyHsbxYVxfgsREVUSpd4K1cXFBS+//DK2bt2KnTt34vnnn8eBAwcghMDHH3+MRYsWITLS9AA/Khs6vQF//nUXQOHZQ0RERJXBY+3hXrNmTUyePBmHDx/G4sWLERYWhm+//Ra9e/dGnz59LF0jleD030lIU2vg5qxC63rcLI6IiCqHJzpkUalUonv37ujevTtSUlKwbds2bNu2zVK10UMIIbD3ZCwAoGvzYNgpFRJXREREVD4sdmqej48P3nnnHezZs8dSl6QSJKbnYvTCPxCblA2VnRydmwZJXRIREVG54XG/Vub3s7eRp9EDKBxtcXG0k7giIiKi8vNEt4qofOkNBpy6mggAeKVbTXRtznOhiIiocuGIixW5GpMOda4WLo526NQ0iMvQiYio0uGISwWUka2BXi+KPZaqzseOP28CAJ6q6w+lgpmTiIgqHwaXCsRgEFi+8wpOXUt66GuUChnaNw4sx6qIiIgqDgaXCkIIgfX7bhhDyz9HVOyUcjSr5YOuzYMR4u8qRYlERESSY3CpIA5duIOD5+MhA/Bu3wZoUcdP6pKIiIgqHE6UqACEENh7qnBDuRc6hjG0EBERPQSDSwVwIy4DSel5sFcpuMSZiIjoXzC4VAAR9055blzDGw4q3r0jIiJ6GAaXCiApPQ8AUMXHWeJKiIiIKjYGlwogKT0XAODn6ShxJURERBUbg0sFkJRROOLi5+EkcSVEREQVG4OLxDQFemRkFwDgiAsREdGjMLhILPneaIuzg5InPRMRET0Cg4vEim4T+XpwtIWIiOhRGFwkVrSiiLeJiIiIHo3BRWJcUURERFR6DC4S44oiIiKi0mNwkZBBCNxN5YgLERFRaTG4SOhCRArSszRwtFegqp+L1OUQERFVeAwuEhFCYPfxWwCALs2C4WjPM4qIiIgehcFFItdupePmXTXslHJ0b1FV6nKIiIisAn/ML2cGIbD9yE3sOhYDAOjQuArcnFXSFkVERGQlOOJSzv6KSjWGFoVchmdbhUhbEBERkRXhiEs5O3UtEUDhFv9jX2wMb3cHiSsiIiKyHhxxKUdCCFyMTAUAjOnfCOHB7hJXREREZF0YXMpRRnYBcjU6yGUyhFVxk7ocIiIiq8PgUo4S0go3m/PxcIBSwdYTERGZi5+e5agouAR4cXt/IiKix8HgUo4SUhlciIiIngSDSzlKTGdwISIiehIMLuWIIy5ERERPhsGlnGh1BiRn5gEAArwZXIiIiB4Hg0s5ScrIgxCAg0oBd27xT0RE9FgYXMrJuRvJAIBAbyfIZDKJqyEiIrJO3PK/DBkMAl9tPA+NVo+4pGwAQJdmwRJXRUREZL0YXMpQckYersdlGL9uEu6Dtg0CpCuIiIjIyjG4lKHMnALj/7dvFIj+nWrwNhEREdETYHApQ+p7wSU8yB1vPldX4mqIiIisHyfnliF1bmFwceMqIiIiIotgcClDmdkMLkRERJbE4FKGikZcuG8LERGRZTC4lKGiOS4ccSEiIrIMBhczGYTA+t9u4Ohfdx/5WmNwcWJwISIisgSuKjLTpchU/H7uNgCgXcPAf31t0XJo3ioiIiKyDI64mCknX1uq1wkh7o+4uDC4EBERWQKDi5kU8vsbyOkNhoe+Lr9AjwJd4fPuvFVERERkERUquCxbtgxDhgyRuox/JX8guGgK9A99XdGKIns7BexVijKvi4iIqDKoMMFl/fr1WLhwodRlPJLeIIz/n6f5l+BiXFFkV+Y1ERERVRaST85NTEzE9OnTcfLkSYSGhkpdziNpdfdvD+UV6B76uqLN59yd7cu8JiIiospC8hGXK1euwM7ODjt27EDjxo2lLueRNNr7oyz5/zbiwu3+iYiILE7yEZcuXbqgS5cuUpdRagUPBpd/GXHh5nNERESWJ3lwKQtKpeUGkhQKebH/6vT357gU6AwP/bWy8gqXTXu4qCxaT0Xxz74Qe1IS9sQUe1Iy9sUUe1IymwsucrkMnp7OFr+um5tj4fWV91cIyZSKh/5aufduI1Xxcy2TeiqKor7QfeyJKfbEFHtSMvbFFHtSnM0FF4NBQK3Otdj1FAo53NwcoVbnQa83QJ2Vb3wuNT0H6ek5JX5fSkYeAEApw0NfY83+2RdiT0rCnphiT0rGvpiqTD1xc3Ms9ciSzQUXANDpLP8brNcboNMZis1ryc3TPfTXyszWAABcHOzKpJ6KoqgvdB97Yoo9McWelIx9McWeFMcbZ2YqzXJoIcQDq4q4jwsREZGlMLiYqUD7QHB5yHLo/AK98XVcVURERGQ5FepW0ezZs6Uu4ZE0pVgO/eB2/w6qCtViIiIiq8YRFzMV6O4Hl4eNuHC7fyIiorLB4GIm7QO3ih464sLN54iIiMoEg4uZNLp/n+Oi1RlwJSYdAODmxOBCRERkSZyAYaYHt/y/nZyNd+cdLva8Tm8wniBdzd+1XGsjIiKydQwuZtL+Yy39g5N1i3i4qNCqrj/+07paeZVFRERUKTC4mKloxOWzoU9BVcIZRHKZDF5u9pDJZOVdGhERkc1jcDGDQQgU3BtxcXW04+RbIiKicsbJuWZ48DaRyo6tIyIiKm/89DVDseDywCnRREREVD4YXMxQNL9FqZBBLuccFiIiovLG4GKGohVEHG0hIiKSBoOLGYpuFdlxfgsREZEk+AlshqITn+054kJERCQJBhczaO4dsMgVRURERNLgJ7AZiibnquw44kJERCQFBhczFM1xKWnHXCIiIip7/AQ2g4YjLkRERJJicDFD0eRcjrgQERFJg5/AZjAuh+aqIiIiIkkwuJihaHKuPVcVERERSYKfwGa4vxyaIy5ERERSYHAxg3GOC0dciIiIJMFPYDNo7424cI4LERGRNBhczHB/y3+2jYiISAr8BDYD93EhIiKSFoOLGe4vh2bbiIiIpMBPYDPcXw7NERciIiIpMLiYQaPjqiIiIiIp8RPYDMbTobmqiIiISBIMLqV05OId3E3NBQDYccSFiIhIEvwELoWs3AJ8u/Oq8WtXRzsJqyEiIqq8lFIXYA1cnVQY1qceYhOzEeTjDD9PJ6lLIiIiqpQYXErp6UZVoLs3OZeIiIikwVtFREREZDUYXIiIiMhqMLgQERGR1WBwISIiIqvB4EJERERWg8GFiIiIrAaDCxEREVkNBhciIiKyGgwuREREZDUYXIiIiMhqMLgQERGR1WBwISIiIqvB4EJERERWQyaEEFIXYUlCCBgMln1LCoUcej1Phv4n9sUUe2KKPTHFnpSMfTFVWXoil8sgk8lK9VqbCy5ERERku3iriIiIiKwGgwsRERFZDQYXIiIishoMLkRERGQ1GFyIiIjIajC4EBERkdVgcCEiIiKrweBCREREVoPBhYiIiKwGgwsRERFZDQYXIiIishoMLkRERGQ1GFyIiIjIajC4EBERkdVgcCEiqoCEEFKXUOGwJ6YqY0+UUhdQER07dgy5ubkwGAxo27YtXFxcpC7JKgghIJPJjF8bDAbI5czGJWFvSsa+3Pfg3yUqxJ4QAMhEZYxr/2LOnDnYsWMHPDw8cOvWLTRu3Bi9evXCyy+/LHVpFdqmTZtw5coV6HQ6hIeH4+2335a6pAonMzMTWq0WPj4+xsf+GfYqI/aluJ9++gk3b95ESkoKevbsicaNG8PV1VXqsiTFnphav349bty4gdu3b6N3795o0qQJQkNDpS6rXDC4PODQoUOYMWMGFi9ejOrVqyM3Nxeffvop4uPj0bZtW0yaNEnqEiukBQsWYPPmzXjuuecQHx+PqKgouLm5Yd68eahevbrU5VUIS5YswYEDB5CcnIwqVarg5ZdfRseOHeHp6VmpRxnYl+Lmzp2LH3/8EU2bNkVeXh5OnTqFF154AS+88AKaNm0qdXmSYE9MFf2b261bN+Tk5ODYsWNo1qwZ+vfvj27dukldXtkTZLRx40bRt29fodFojI+lpqaKmTNnit69e4sFCxZIV1wFdfv2bfGf//xHHDp0SAghhMFgEJcvXxYvvPCC6Nq1q7h06ZLEFUpv1apVonXr1uLHH38Uhw4dEqNHjxa9evUSkydPFgkJCUIIIfR6vcRVlj/2pbirV6+KHj16iAsXLhgf+/nnn8V//vMfMXz4cHHs2DEJq5MGe2IqJiZG9OnTR5w4ccL42KFDh8Rbb70lXnjhBbFr1y4JqysflevHmYcQ9wad7OzsUFBQALVaDQDQ6XTw8vLCqFGj0KpVKxw5cgQ7duyQstQKJy8vD+np6QgODgZQeA+6fv36+Pbbb+Hn54eJEyciISEBQOH8hcpECIGCggKcOnUK77zzDl544QV07NgRixcvRu/evXH9+nXMnDkTiYmJkMvllWaSHftSMplMhry8PCiV96ce9unTBx9++CGSk5Oxfv16XL58WcIKyx97YkqhUCA5ORkajcb4WMeOHTFmzBgEBARg7dq1OHz4sIQVlj0GF9yf8NWyZUvExcVh7dq1AAClUgmdTgd3d3e8++67cHFxYXC5p+jDJCQkBI6Ojti5c6fxOYPBAC8vLyxatAgODg4YN24cAFS6YX+ZTAaVSoW8vDwkJiYCAPR6PQBg2LBheOGFFxAfH49vvvkGarW60szpYF9KptPpoNFokJ6eDgAoKCgAUPihNHr0aFy/fh07d+6EXq+vNGGOPSlOCAGDwQAnJyfcvXsXAKDVagEATZo0wdtvvw2VSoXt27cjJSVFylLLVOX6JHmEkJAQfPzxx1i2bBk2btwI4H548fb2xkcffYTjx4/jypUrElcqvaIPE4VCgWeffRZHjx7F/v37AcD4U7Kvry+mTp2KtLQ0/Pbbb1KWKwkhBIQQ8PPzw+nTp5GdnQ2FQmH8x3fw4MHo3LkzTpw4gQsXLgCoHKNS7EvJGjRogKeffhoffPABEhMToVKpjB9KnTt3xogRI7Bu3TrcuHGj0oQ59qRQUSiTyWQICQlB165dMWfOHFy/fh12dnbGnjRr1gxvvPEGDhw4gMjISClLLlMMLv/Qr18/vPPOO/jkk0+wfv16ACg2TFm1alW4ublJVZ7kNm7ciM8++wzDhw/Hnj17kJmZiTfffBMKhQLr1q3D0aNHAdwPNnXq1IHBYEBcXJyUZZer1NRUZGZmIisrCzKZDJMmTUJiYiKmTZsGAFCpVMYP6dGjR8PHxwdbtmwBYNujUuxLcdu3b8f8+fPx1VdfYffu3QCACRMmIDg4GO+++y4SExONt68BoH///ggODsa5c+ekLLtMsSemNm3ahGnTpuGjjz7C8uXLAQDjx49H8+bN8frrryMuLq5YeOnSpQvCwsJw4sQJKcsuU7b3r8ETsre3x4gRIzB8+HB8/vnn+PLLL3Hjxg0kJibi119/BQA4OTlJXKU05s+fj0WLFiE3NxcKhQIzZszAlClTkJiYiHnz5iE5ORnLly/HL7/8YvweFxcXVK1atdL0bMmSJRg9ejR69eqFsWPHYsuWLfD19cX06dNx8OBBTJ48GUDhh3TRKEKrVq2Qk5MjZdlljn0pbt68eZg9ezbi4+Nx7NgxLFq0CMOGDYOXlxfGjRsHuVyOYcOGISEhASqVCkDhfDJHR0e4u7tLXH3ZYE9MLViwAAsXLoSdnR3S0tKwZcsW463UCRMmoGbNmhgwYAAuX74MOzs7AIW31+zt7eHn5ydx9WVIggnBVkGj0YgdO3aINm3aiA4dOohu3bqJjh07iitXrkhdmiSioqJEr169xOnTp42P7du3T7z22muiX79+4ty5c+L27dti8ODBom/fvmL69Oli586dYsaMGaJly5bi1q1bElZfPr799lvRunVrsXv3brFu3Toxc+ZMUbt2bfHll1+KhIQE8eOPP4omTZqIMWPGiPT0dKHT6YQQQnz44Ydi3LhxQqfTCYPBIPG7sDz2pbiIiAjRvXt344oYjUYj9u7dKzp37ixeeuklkZKSIk6ePCkGDBggWrRoIbZt2yZ2794t5s6dK9q1ayfi4uIkfgeWx56YetiKzX79+okePXqIixcvioiICDF8+HDRqFEj8b///U9899134osvvhBPPfWUiImJkfgdlB3unPsQKpUKvXv3RqtWrRAbGwudToewsDD4+/tLXZokFAoFUlJSjMORANCtWzd4eHhgxYoVmD17Nj755BMsWrQIP//8M7Zv347z58/DxcUF33//PUJCQiSsvuwJIXDp0iW8+eabeO655wAA+fn5qFevHqZMmQKNRoORI0fCy8sLU6ZMwZAhQ+Dr6wtnZ2ccO3YMGzduhEKhkPhdWB77YkqtViM7OxthYWEACv+t6dq1KwIDAzFp0iSMHj0a69evx4oVK7Bw4UIsXrwYSqUSrq6uWL58uXEFny1hT0w9bMXmihUrMHLkSEyePBmrVq3CN998gyVLluDw4cPIysqCj48PVq9ejWrVqkn8DsqQ1MmJKj6DwSCio6NFly5dxI8//iiEEKKgoMD4/MmTJ8Wrr74qxo8fL3Jzc42PZ2dnF/valuXl5Ylnn31WzJ8/3+S53377TdSvX18sWrRICCFEVlaWWLBggZg6dar44osvRGRkZHmXW27Yl/uK9qRJTEwUnTp1Eps2bTJ5zYULF0Tnzp3FmDFjjI/duXNHZGZmiszMzHKrtbywJ6aKRhc1Go3o3Llzsf3DivqVlJQkevXqJV566SXjcxkZGSI/P19kZ2eXa71SYHChUps+fbpo0aKFiI6OFkKIYhv1/fLLL6JRo0bizJkzUpUniQdvYcyfP1/07NlT/P333yav27Jli6hbt67YvXv3Q7/flrAvD5eVlSXGjBkj3njjDfHXX38Ve06j0Yht27aJXr16ifPnzwshKscmfOyJKZ1OJ+bMmSNefPFFsW/fPuPjRX83Tp8+Lbp37y5++eUXIUTl6EkRTs6lEm3ZsgXTp0/HtGnTsHr1agDABx98gPr162PIkCEmSxOfffZZhISE4Pjx41KWXa6ysrKM+0sAQPv27aFUKrF161bcuXPH+LgQAv/5z3/w3HPP4cSJE9DpdMZ9S2wR+1Lc9u3bsWjRIkybNg0nT56Ei4sLxo0bh4iICHzzzTeIiooyvlalUqF9+/ZISEgwPm6rK6rYk+Ied8VmfHw8ANvsycNUnndKpbZgwQLMnz8fQgjcuXMH33//PQYNGoTU1FR88MEHCA4ORv/+/XHjxg3jTPaCggI4OTnZ9kz2ByxZsgRvvfUW+vbti8GDB2PPnj1o3rw5Xn31Vfz222/YsGGD8R8UmUwGFxcXuLi44ObNm1AqlcZ5G7a29wT7UtxXX32FOXPm4OrVq4iOjsZbb72F6dOnw8XFBStWrMCRI0cwf/58nD9/3vg9rq6uqFmzps0eIsiemLLEik1RCTbgK8LJuVRMbGws9u7diy+//BIdOnSAXq/H5cuXMWXKFIwcORJz5szBJ598gi+//BKDBg3CyJEj4eTkhLi4OMTGxqJ169ZSv4Uyt3LlSqxbtw4TJkyAp6cntmzZgqVLl+Ls2bP46KOPkJ+fj5UrVyIrKwuvvvoqatWqBaBwd9jg4GBotVpj4LMl7Etxly9fxv79+7Fs2TI0atQIALB161asWLECiYmJmDp1KjZv3oyRI0di/vz5aNeuHRo1aoRDhw4hOjoa9erVk/gdWB57Yio6OhoHDx7EkiVL0KJFCwDA/v37sXbtWkybNg1Tp07F8uXLMXnyZCxfvhwnT55EixYtcPbsWVy5cgWffPIJANsJ+6XB06GpmKioKAwZMgQbNmwodkR6UlISRowYAZ1Oh5UrV8LX1xfz58/H8ePHkZ2dDR8fH3z88ceoW7eudMWXMSEENBoNxo4di7Zt2+L11183Prd06VLs3bsXdevWxcyZM7Fz505s2rQJycnJqFOnDvR6Pc6cOYMNGzagdu3aEr4Ly2NfSnb16lW8++67WLZsGerUqWN8fP/+/Vi6dCmCgoLw6aefIisrC99//z3++OMPyOVyODs7Y+bMmTb5d4k9MXXr1i0MGjQI8+fPR5s2bYyPnzlzBitWrEB6ejo++eQT+Pn5GVdsAoUjLlOnTi3Wx0pDuuk1VJH8+OOP4urVqyI3N1d06NBBLF261Phc0aSvu3fvimeeeUYMHjzY+FxaWprIzc0VWVlZ5V6zVAYPHizmzJkjhBDGPUeEEGL16tWiT58+Yvbs2UIIIa5duybWr18vxo0bJ7766isREREhSb1lqWiioF6vZ1/+4cKFC6JVq1bi5MmTQojik9n37t0runTpYuyJRqMROTk5IjEx0ab/LrEnxXHF5uNhcCHx2WefiQYNGojY2Fih1+vFzJkzxUsvvSQOHjxofE3RB9Tx48dF165djbPcK9NMdoPBIPR6vXj//ffFwIEDjf9wPPiP74IFC8Qzzzwjjh8/LlWZ5SoqKkoIUfjnYPz48ezLP4waNUq0b99epKamCiGK92T9+vWifv364saNG1KVVy4WLFggVq5cafx67Nixlb4n/8QVm+bh5NxKbtasWdi5cye2bt2KqlWrQi6XY8CAAdDpdFi/fr1xlVDR/dN69epVupnsaWlpyMrKQnZ2NuRyOSZOnIiYmBh8+umnAIqfsTNu3Di4u7tj8+bNUpZcLj7//HOMGDGCfblnx44dWLRoERYtWoS9e/cCKFyJ5+PjgxEjRiAtLQ0qlQoajQYA8MorryAgIKDYJFRbM3PmTKxbtw4dOnQwPjZ8+HD4+/tX2p5wxeaTs/1PHXqoOXPmYPv27fjhhx+M90mFEKhZsyamTJmC2NhYbNy4sdhMdjc3t0p39tCYMWPQu3dvvP/++9i+fTuqVKmCadOmYffu3Zg+fTqA4mfstG7dGmq1Wsqyy9ysWbOwY8cO/Pe//4WLiwv0ej0CAwMxbdo07Ny5EzNmzABQefoyb948zJo1C1FRUfj9998xd+5cjB49GgEBARg7diy0Wi3eeecdpKamwt7eHgCQk5MDJycnm10pU/RnZO3atQgPDzeueqlduzZGjBgBjUZT6XrCFZuWwVVFlZRer8eFCxcQGBiIqlWrAgC0Wi3++9//IjIyEgEBAahZsyaSkpKwadMmnD17Fs2bN8fp06dx7do1fP755xK/g7L37bffYv369fj444+RmpqKW7duYfLkyYiNjcWgQYPwf//3f5g1axZyc3Mxbdo0ODs7AwDu3r0LDw8P6PV6yOVym5vtP3v2bPz8889Yv349atasCQDGZcz/+c9/oFarMXv2bGRnZ2P69Ok235cbN27g119/xcKFC9G6dWvk5eXh4MGDmD17Nt5++20sWrQIkyZNwty5c9GrVy9MnToVSqUSly9fRlpaGho2bCj1W7C4NWvWYN26ddiyZYtxQq1MJkNmZib0ej26du0KBwcHLFq0CD179sS0adNsvidxcXFcsWkhDC6VlEKhwEcffYT/+7//w4IFC/D+++9j+PDhyM3NRb169RAbG4vc3FwAQNu2bbFr1y6cPHkSzs7O+P77741hx1YVBbuhQ4eid+/eAArPDqlbty4++eQT5OXlGU+unTZtGl5//XV4e3vDyckJf/75p02esQMAR48exYYNGzB16lRjaDEYDDhy5AjS09Ph7++Pnj17wtfXFzNmzMBrr70GHx8fm+5LZmYm8vLyjP1wdHREjx49UKVKFUyYMAHvv/8+vvvuO6xYsQLz5s3D3LlzoVQq4eLiYrPn7Ny+fRtBQUFwdHQEUDhqMH36dERERCA5ORl169bFxx9/jIULF2Lp0qWVoie5ublQq9XGc9sUCgUaN26MlStXYsSIEZg8eTJWrlyJlStXYv78+di7d69xxeaqVats/rw3c3A5dCWWn5+PtWvX4sCBAwgJCYHBYMDHH38MT09PFBQUYNWqVfj999/x5Zdfolq1asjJyYFCobD520QGgwH5+fno27cvnn/+eYwaNarY87t27cKHH36IMWPGYMSIEUhPT8fKlSuRnp4OR0dHvPzyy6hRo4ZE1ZetmJgYfPTRRwgPD8f06dMhl8vx1ltvITU1FRkZGUhNTcXzzz+PSZMmQS6XG5dz2nJfEhIS8Morr+Ddd9/FgAEDij139uxZjBs3Dk899RTmzp0LoPBD3dnZGXK5HO7u7lKUXC5eeeUVAMCGDRswevRo5Obmolu3blCpVFi1ahWUSiW2bdsGhUKBuLg4uLi42HRP8vPz8cwzz2DgwIHGf1MMBgPkcjkSEhLwxhtvwNfXF2vXrgUApKenw8HBAXq9Hi4uLlKWXvFIOjWYJJeQkCCGDx8u6tSpI7766ivjyhkhhEhNTRX169cXO3bskLhKacyePVv06dOnxMP+1q1bJ+rWrVvsDBEhKscqq9OnT4sGDRqIFStWiMWLF4t3331X3Lx5U6SkpIijR4+KevXqiXnz5gkhii+XtiW//fabWLNmjfj666/F0aNHxYgRI8To0aPF5cuXi71Oo9GIrVu3il69eolLly4JIWyvF0WKerJ48WIRHR0t4uLiRJcuXcSLL74opkyZIhITE42vjYuLE08//bT49ttvhRC225MjR46I3bt3i59++knk5OSImTNnioEDB3LF5hPi5NxKzt/fH++//z7CwsLQp08fyGQyyOVyiMKl8qhbty4CAgKkLrNc/Pzzz1ixYoXx65YtW0KhUGDLli1ITEw0Pi6EQJ8+fdCjRw8cP34cer3eeMaOrczbeNA/+9KiRQtMnjwZCxYswB9//IGhQ4ciNDQU3t7eaNu2LSZNmoT9+/cjLS3NODHXlvoyd+5cfPLJJ/jjjz+wZs0aLF++HL6+vjhz5gxWrVqFmJgY42uLztm5c+cOoqOjAdjmSrwHe7J+/XqMGzcOx48fx5gxY3Dt2jUkJibC09PT+Ho/Pz9UqVIF2dnZAGyzJ3PmzMHHH3+MVatW4aOPPsLixYvx1ltvQa/Xc8XmE2JnCLVr18ZPP/2EWrVq4e7du8jMzER2djbWrVuH5ORkm5/PUhTSTp48idWrV2Pr1q0AgC5duqBLly7Yt28fNm7ciISEBACF/9C4urrC2dkZUVFRUCgUNnfGDmDalx9//NH4XM+ePdGlSxckJCQgKCjI+Hqg8N69g4MD3NzcbK4vu3fvxi+//IIVK1Zg5cqVOHDgALKzs6HRaDB79mzs3bsXixYtwqVLl4zf4+HhgZo1a9rscH9JPbG3t8e+ffvQokULDBkyBOPHjy92nINKpYKbm5vxtpCwsRkL27Ztw549e7B8+XKsWbMGs2bNwk8//YSAgABMnToVsbGxWLduHX799Vfj91S2FZtPgpNzCQBgb2+P1NRUvPjiizAYDAgMDERGRgb+97//2fyIi8FggEKhgKOjI/Ly8rB27Vrk5+djyJAhxnvzO3fuNJ6xExYWBqDwwzg4OBg6nQ5Kpe39VfpnX7777jvk5+fj1VdfhYeHB95++234+vrC398fBQUFUKlUAApXT/j5+UGr1dpcX6Kjo1GzZk3Url0bWq0WTk5OGDZsGMaPH4//+7//w7fffovJkycjMzMT7dq1Q8OGDfH777/j1q1bNrs1+z974ujoiHfeeQfjx48HAEycOBEKhQKRkZGIjIxErVq1sG3bNly6dAlTpkwBYDvBtkhERASaN29u/D13d3eHs7MzZsyYATc3N7Rp0wbXr1/H+vXrcebMmUq3YvNJ2da/KvREvL29jT8t+vn5oWnTpsafpm1Z0ahATEwMGjZsCF9fX2zZsgUAMGTIEHzwwQdwd3fHgQMHMGzYMDRo0AD5+fk4ffo0Nm7caHMfzkVK6sumTZsAAK+++ioaN24MALh27Rrmz5+P6tWrIysry3hAXNGKElsghIBMJkNycjJSU1Mhk8mMIwju7u7Q6XS4c+cO2rRpg6VLl2LLli1Yt24d7Ozs4OjoiFWrVtnc36V/64mbmxt0Op3xtOOcnBzMmjULJ0+eRHBwMFQqFdasWYNq1apJ/C4sq2jkKD4+3hjGhBBYtmwZACA7OxtnzpyBm5sbfH190aBBA/z888+VasWmJdjmv7j02Fq0aGE8obSyEEIgPT0dOTk5GDlyJMLCwrBgwYJi4WX48OFo0aIF/vrrL5w7dw6hoaGYOHEiwsPDJa6+7DysL5s2bYJMJjOuGomMjIRCocD58+cRHh6ODRs2GJcG24qiD6Hu3bvjwoULiIuLM37AuLu7Q6FQoKCgAEIINGjQAA0aNEBWVhb0ej0UCoVNbqj2qJ7I5XLjjrjOzs6YPXs2EhIS4OjoCC8vL3h7e0tWe1kp6smwYcNw7tw5AIUh5umnn8bgwYPh5eWF7OxszJ49G5GRkejfvz+GDh1aaVZsWgqXQxMB0Ol02LJlC5566inUqFEDkZGR+Oabb3D9+nW89NJLGDx4sNQlSuLf+jJo0CC8+uqrAGDc2v/Bn7ptVUJCAry9vY3v88yZMxg6dCi2bt2K8PBwyGQyfPfdd1CpVHj55ZclrrZ8lKYn69evh52dHQYOHChxteUvPz/fuLRZoVAgPj4eXbt2xbJly9CxY0epy7M6nJxLBECpVGLgwIGoUaMGDAYDwsPDMWLECNSuXRubN2/Gxo0bpS5REv/Wl02bNmHDhg0ACidbqlQqmw8tABAQEFDsfSYmJkKn08HV1RUymQyLFi3CnDlzKtXIZWl6MmvWLDRp0kS6IiVQNC7g4OAAoPD2qxACOp0OtWrVQmBgoJTlWS0GF6J7iuaqFA33Fn1I16tXD19//TV++OEHKcuTzL/15Ztvvqm0fSmi1WqhUCjg4uKCpUuXYtWqVdiyZYvN3S4zR0k92bx5M2rVqiV1aeWq6O/MnTt3cOrUKaSlpSE7Oxvbt29HXl5esSXiVHqc40L0Dw+ucAgPD8ebb74Je3t7PPXUUxJWJT32pbiiyan29vZwc3PDlClTsH//fmzatAkNGjSQujxJsCclS05OxtChQ+Hq6go/Pz9kZmZiyZIl8PX1lbo0q8Q5LkSl8OByX7qPfSlcVdWvXz/Y29tj06ZNxkMFKzP2xNSlS5cQEREBd3d31K9fn7eJngCDCxHRE8jPz8dXX32FV155xSbPYnoc7AmVJQYXIqInpNVqK8XEZHOwJ1RWGFyIiIjIanBVEREREVkNBhciIiKyGgwuREREZDUYXIiIiMhqMLgQERGR1eDOuURU5iZPnoxt27b962uCgoIQHx+P33//HcHBweVUGRFZGy6HJqIyFxsbi7S0NOPX//vf/3D16lUsWbLE+FjRLrz16tWr9LvxEtHDccSFiMpcSEgIQkJCjF97eXlBpVJVutOCiejJcY4LEVUIP/30E2rXro3bt28DKLy99Pbbb2Pz5s3o1q0bGjVqhEGDBuHmzZs4ePAgevfujcaNG2PAgAG4du1asWudOXMGgwcPRuPGjdGqVSt8+OGHxUZ8iMh6ccSFiCqs8+fPIykpCZMnT4ZGo8GMGTMwbNgwyGQyvPfee3B0dMT06dMxceJE7N69GwBw+vRpvPnmm2jdujUWLlyIzMxMLFq0CK+99hp++OEHODg4SPyuiOhJMLgQUYWVk5ODhQsXGg/qO3XqFDZt2oQ1a9agTZs2AIBbt25hzpw5UKvVcHNzw7x581C9enUsW7YMCoUCANC4cWP07NkTP/74I1599VXJ3g8RPTneKiKiCsvd3b3Y6cI+Pj4ACoNIEQ8PDwCAWq1GXl4eLl68iI4dO0IIAZ1OB51Oh6pVq6JGjRo4evRoudZPRJbHERciqrBcXFxKfNzJyanEx9VqNQwGA7799lt8++23Js/b29tbtD4iKn8MLkRkM5ydnSGTyfDGG2+gZ8+eJs87OjpKUBURWRKDCxHZDBcXF9SrVw/R0dFo2LCh8fH8/Hy899576NixI8LDwyWskIieFOe4EJFNGT9+PP78809MmDABhw8fxoEDBzB06FAcP34c9evXl7o8InpCDC5EZFOefvpprFy5EgkJCXjvvffwwQcfQKFQYPXq1dzwjsgGcMt/IiIishoccSEiIiKrweBCREREVoPBhYiIiKwGgwsRERFZDQYXIiIishoMLkRERGQ1GFyIiIjIajC4EBERkdVgcCEiIiKrweBCREREVoPBhYiIiKwGgwsRERFZjf8H76ZyX2PAssQAAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "views = g.expanding(100)\n", - "\n", - "timestamps = []\n", - "node_count = []\n", - "edge_count = []\n", - "degree = []\n", - "\n", - "for view in views:\n", - " timestamps.append(view.latest_time)\n", - " # node_count.append(view.num_nodes())\n", - " # edge_count.append(view.num_edges())\n", - " degree.append(view.count_edges() / max(1, view.count_nodes()))\n", - "\n", - "sns.set_context()\n", - "ax = plt.gca()\n", - "plt.xticks(rotation=45)\n", - "ax.set_xlabel(\"Time\")\n", - "ax.set_ylabel(\"Average Interactions\")\n", - "sns.lineplot(x=timestamps, y=degree, ax=ax)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/shivamkapoor/opt/miniconda3/envs/pyraphtory/lib/python3.10/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/Users/shivamkapoor/opt/miniconda3/envs/pyraphtory/lib/python3.10/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/Users/shivamkapoor/opt/miniconda3/envs/pyraphtory/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n", - " with pd.option_context('mode.use_inf_as_na', True):\n", - "/Users/shivamkapoor/opt/miniconda3/envs/pyraphtory/lib/python3.10/site-packages/seaborn/_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n", - " with pd.option_context('mode.use_inf_as_na', True):\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjYAAAHPCAYAAABAw5B5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABUWUlEQVR4nO3deXgT1f4G8Hcmabq3tKW07FvZEVABQa8goLiwXAFFvOCCG7sXQRDZUUGQTX6iF7AgKmWVRREVBQS9CEgR5CKL7HtbutC9SZM5vz/SBEIqQpp0ksn7eR4f6SSdfvslpG/PnDNHEkIIEBEREWmArHYBRERERO7CYENERESawWBDREREmsFgQ0RERJrBYENERESawWBDREREmsFgQ0RERJrBYENERESawWBDREREmqFXuwA1CCGgKO6/4bIsSx45ry9jT5yxJ87YE2fsiTP2xJk/9USWJUiS9LfP88tgoygCmZn5bj2nXi8jKioUOTkFMJsVt57bV7EnztgTZ+yJM/bEGXvizN96Eh0dCp3u74MNL0URERGRZjDYEBERkWaoHmxSU1PRoEEDp//WrVsHADhy5Aj69euHFi1aoGPHjvjss89UrpiIiIi8lepzbI4ePYrAwEBs2bLFYVJQeHg4srKy0L9/f3Ts2BFTpkzBgQMHMGXKFISGhqJXr14qVk1ERETeSPVg8+eff6JWrVqoVKmS02OffvopAgIC8NZbb0Gv16Nu3bo4e/YsFi1axGBDRERETlS/FHXs2DHUrVu31MeSk5PRunVr6PXX8lebNm1w5swZpKenl1eJRERE5CO8YsQmKioKffv2xenTp1GzZk0MGjQI7dq1Q0pKCurXr+/wfNvIzuXLl1GxYkWXv65e795Mp9PJDv8n9qQ07Ikz9sQZe+KMPXHGnpRO1WBjNptx6tQpJCQkYMyYMQgLC8OmTZvwyiuv4JNPPkFRUREMBoPD5wQGBgIAjEajy19XliVERYWWqfa/EhER7JHz+jL2xBl74ow9ccaeOGNPnLEnjlQNNnq9Hnv27IFOp0NQUBAAoGnTpjh+/DgWL16MoKAgmEwmh8+xBZqQkBCXv66iCOTkFLheeCl0OhkREcHIySmExaL9GyXdCvbEGXvijD1xxp44Y0+c+VtPIiKCb2l0SvVLUaGhziMn9erVw3//+1/Ex8cjLS3N4THbx3FxcWX6up66S6PFovjFHSBvB3vijD1xxp44Y0+csSfO2BNHql6YO378OO666y7s2bPH4fihQ4eQkJCAVq1aYd++fbBYLPbHdu/ejdq1ayMmJqa8yyUiIiIvp2qwqVu3LurUqYO33noLycnJOHnyJN59910cOHAAgwYNQq9evZCXl4dx48bhxIkTWLduHZYuXYoBAwaoWTYRERF5KVUvRcmyjAULFmD27NkYPnw4cnJy0LhxY3zyySf21VCJiYmYOnUqevTogdjYWIwePRo9evRQs2wiIiLyUqrPsalYsSLefffdv3y8WbNmWLVqVTlWRERE5N9OXszG3qNpUBQBWZYgX7czgCIEFCEgS9eOK0IAAGRJQmyFIDxwZ1WH3QTKk+rBhoiIiLzL0u+O4uKVfJc/v3GtaMRFu756uSwYbIiIiMhBQZEZANC6USVUCAuELF83YqMICCEgSZL9uKKUjNjIEmIrBKNSlHr31mGwISIiIge2S0uPtamJGnHhKldze3gfZiIiInIglGtzZnwNgw0RERE5KMk1kGQGGyIiIvJx9jkzvpdrOMeGiIiIrC6n5+PHAxdhMlvv+C/7YLJhsCEiIiIAwOffH8PRc1cBABKAIIPvxQTfq5iIiIg8IiPHCABo1bAS7qofi8hQg8oV3T4GGyIiIgIA5BWaAADd7quFarFhKlfjGk4eJiIiIhSbFRQarXNrKoQFqlyN6xhsiIiICDn51tEaWZYQGuS7F3QYbIiIiAhX863za8KCA1TbwNIdfDeSERERkcv+dyoDR85k2T9Ou1oIAAgPCVCrJLdgsCEiIvIzQgh8tP4QjMUWp8cqRgapUJH7MNgQERH5GYsi7KHmwburQa+3zkwRQqDDXdXULK3MGGyIiIj8jNmi2P/8xAN1YQjQqViNe3HyMBERkZ8xW4T9zzqd704ULg2DDRERkZ+xlGxyKQGQfXgFVGkYbIiIiPyMpeRSlE4n+fTS7tIw2BAREfmZs6m5AACdTnsxQHvfEREREd3Uxp1nAABms3LzJ/ogBhsiIiI/Y5s8/GibmipX4n4MNkRERH6m2Gy9h03T2tEqV+J+DDZERER+xlRyCSpQQ/evsWGwISIi8jOmkrsOB+i1FwO09x0RERHRTdlGbAwB2osB3FKBiIhIo/YdS8PJizkAAEVYJwzLkoRiW7DRa+9SFIMNERGRBhUazfjPhj/sgeZGep2EIAODDREREfmAgiIzFCEgSxI6t64OpWQbBVm23mm4XrVITW1+acNgQ0REpEGmkiXdwYE69O6QoHI15Ud7s4aIiIgIpmLbBGHtjcrcDIMNERGRBhlLlnQbNLik+2b867slIiLyE7ZLUf42YsM5NkRERB4khMCO3y8hLbPQ4fiNq5VkSUKxRYEQApIkIeDvdt6WgMDAABiNxVAUAYsi7J+rkyWkZVm/nhbvVXMzDDZEREQedDE9H599d0y1rx8ebFDta6uBwYaIiMiD8guLAQChQXrc37yK/bht+bWNLEswmxX7Em39LcyNCQoKQFGRdcRGUYT9c21LugP0Mu5tGu/G78b7MdgQERF5kLkkwESFB7p12bVeLyMqKhRZWfkwl9xJmDh5mIiIyKMsFmvo0P3dnBlyC3aZiIjIg8wW64iNXiepXIl/YLAhIiLyIHPJiI1e5o/c8sAuExEReYgQAj/sPQ+AIzblhcGGiIjIQ05fzsXJSzkAgNDgAJWr8Q8MNkRERB6SV2iy/7lnuzoqVuI/GGyIiIg8pLhkGXZCtUhUigpRuRr/wGBDRETkIaaSYONvG1GqiZ0mIiLyENuIzd/u+0Ruw04TERF5iD3Y+NkO22pisCEiIvIQk9kCgJeiyhM7TURE5CH2ERsGm3LDThMREXkIg035Y6eJiIg8xFRsWxXFOTblhcGGiIjIQ4otHLEpb+w0ERGRhxQXc/JweWOniYiIPMTEOTblTq92AURERFpTUGTG1t8u4GxqLgAGm/LEYENERORm//3fZaz/6ZT947Bgg4rV+BcGGyIiIjez7epdMy4cbZrEoVndGJUr8h9eNTZ2+vRp3HnnnVi3bp392JEjR9CvXz+0aNECHTt2xGeffaZihURERH/Ptsy7ca0oPNy6Bi9FlSOv6XRxcTFef/11FBQU2I9lZWWhf//+qFGjBtauXYshQ4Zg1qxZWLt2rYqVEhER3RxvzKcer7kU9cEHHyAsLMzh2OrVqxEQEIC33noLer0edevWxdmzZ7Fo0SL06tVLpUqJiIhuzrZHFINN+fOKju/duxerVq3C9OnTHY4nJyejdevW0Ouv5a82bdrgzJkzSE9PL+8yiYiIboltxIZ3HC5/qo/Y5OTkYPTo0Rg/fjwqV67s8FhKSgrq16/vcKxSpUoAgMuXL6NixYouf129m1O0Tic7/J/Yk9KwJ87YE2fsiTNf64nZIgAAQYE6t/+8sfG1npQX1YPN5MmTceedd6Jbt25OjxUVFcFgcFwiFxgYCAAwGo0uf01ZlhAVFery599MRESwR87ry9gTZ+yJM/bEGXvizFd6Ikr+XyEy2GM/b2x8pSflRdVgs2HDBiQnJ2Pjxo2lPh4UFASTyeRwzBZoQkJCXP66iiKQk1Pw90+8DTqdjIiIYOTkFMJSsjeIv2NPnLEnztgTZ+yJM1/rSUFhMQCg2GhGVla+R76Gr/WkrCIigm9pdErVYLN27VpkZGTggQcecDg+adIkfPPNN4iPj0daWprDY7aP4+LiyvS1zWbPvAgsFsVj5/ZV7Ikz9sQZe+KMPXHmKz0xluwRJcuSx+v1lZ6UF1WDzaxZs1BUVORwrHPnznj11VfRvXt3fPnll1i5ciUsFgt0OusErN27d6N27dqIieHNjoiIyDtdmzzM+S/lTdWOx8XFoWbNmg7/AUBMTAzi4uLQq1cv5OXlYdy4cThx4gTWrVuHpUuXYsCAAWqWTUREdFNc7q0er+54TEwMEhMTcfr0afTo0QPz58/H6NGj0aNHD7VLIyIi+ktc7q0e1VdF3ejYsWMOHzdr1gyrVq1SqRoiIvIXaVkF+PngZXsouZEsSQCAYosCIYTT45IkQSdbn5NfZAbAERs1eF2wISIiUsMX208i+dgVt51PAhAaHOC289GtYbAhIiICkFNgvb1Is7oxiI92vqWIXDIaYzYrUEoZsZElyf4cWZZQMy4ckaEGp+eRZzHYEBER4dqO3O2aV8Fd9WNVroZcxYt/REREAEy2Cb8B/NHoy/i3R0REBMBUclO9wACuZPJlDDZERETgEm2t4BwbIiLyW0II7Pj9EtIyC+1LtHkpyrcx2BARkd+6eCUfn3137f5pXKLt+xhsiIjIbxUYraM0oUF63N+8CmrFhyMihEu0fRmDDRER+S2LYr0fTWRYIHp3SFC5GnIHXkgkIiK/pZQEG9t2CeT7GGyIiMhv2UZsbHs8ke9jsCEiIr9lH7FhsNEMzrEhIiK/tPN/l7H3SBoAjthoCYMNERH5new8IxZvOmL/OCSIPw61gn+TRETkd3ILigEAAXoZnVtVR9sm8SpXRO7CYENERH6nqGRfqLDgAPRqX1flasidOHmYiIj8TpGpZPsEPX8Mag3/RomIyO8YTdzJW6sYbIiIyO8U2oKNgcFGazjHhoiINOdsSg52/ZFa6h2FFSFwLjUXAIONFjHYEBGR5izedAQXruT/7fMiQ7nhpdYw2BARkeZk5hgBAPc3q4zQ4ACHx2x3Gw7QSehwV7Vyr408i8GGiIg0pdhsQYHRuurpyQ4JCLsh2JC2cfIwERFpSlaudbRGr5MRyjsK+x0GGyIi0hRbsIkIDYBUyuRh0jYGGyIi0pTMkmBTITRQ5UpIDRyjIyIin2S2KNi67wKy80wOx09cygYARIUz2PgjBhsiIvJJh05nYtW2E3/5eFx0SDlWQ96CwYaIiHxSboF1pCa2QhDublDJftxiURAYoEPn1jXUKo1UxGBDREQ+qdisAABqxIWjd4cElashb8HJw0RE5JNMxdZgwx266Xp8NRARkU8yma0bWRq4Qzddh8GGiIh80rURGwYbuoZzbIiIqNwoisDmX88hM7cIQvz182w31hMlT5IkCYGBehiNZvuxExesy7oNAfwdna5hsCEionLz+8l0rNl+0q3n5F5QdD0GGyIiKjenLuUAACrHhKBRzai/fJ5cMmKjlIzOyLKEwMAAGI3F9t25ZUlCeKgB9zer7OGqyZcw2BARUbk5n5YHAGjTOB7d7qt1y5+n18uIigpFVlY+zCXLvIlKwwuTRERUbi5n5AMAasaHq1wJaRWDDRERlQuzRUFGtnWDyqoVQ1WuhrSKwYaIiMpFamYBFCFgCJARHcENKskzOMeGiPxSenYhfvr9EgqNFvvy4dJIkoQAnfV3wGKLAiGE01Lk65974/HrPx+wToZVFAFZvmFyrCTBogj7+UOCA1BkNMNicZ5PYjvn9Z97Y30BOtnp4xu/nu3Ptq8LwF6bJEnQldT4V88t7Xu+/uMbH0vPLgIAxFUItj9G5G4MNkTklzbuPIOfD15Wuwy/VIPza8iDGGyIyC8VGM0AgIY1KqDKTeZ7yJIEfcleRGazAkUIp6XI1z/3xuPXfz5gHRG5/hzXj3TYH5MlBAcbYCwqhrmUERvbOe3LnktGVq6vT6+XnT62fX3b59j+bPu6ttplSbL+Zxux+YvnlvY9X/9xaY8F6GU82LL6X/abqKwYbIjIL1ks1h+2bZrEo13zKipX44hLm4lcx8nDROSXLIrjCAMRaQODDRH5JUWxjoTodAw2RFrCYENEfsk2YmNb+UNE2sBgQ0R+icGGSJsYbIjIL1luWFFERNrAYENEfunaiA3fBom0hP+iicgv2ZZ781IUkbYw2BCRX7LdNI7BhkhbGGyIyC/Z9mDicm8ibWGwISK/xMnDRNrELRWISJN+PngJ51Lz/nIH7tyCYgCAnpOHiTSFwYaINCc1qwCffHP0lp4boOeIDZGWMNgQkebk5ltHY4ID9WjbJM7hset3nK5SMRRVKoaVe31E5DmqB5uMjAxMnz4dP//8M4xGI1q1aoU33ngDdevWBQAcOXIEU6dOxaFDhxAdHY3nn38ezz77rMpVE5E3MxZbAAAxEUHo17mBytUQUXly68XlK1eu4I8//oDFYrnlzxkyZAjOnj2LRYsW4YsvvkBQUBCef/55FBYWIisrC/3790eNGjWwdu1aDBkyBLNmzcLatWvdWTYRaUyRyfoeFGTQqVwJEZU3l0ds8vLyMHXqVDRt2hR9+/bFt99+i1GjRsFisaBWrVpYsmQJKleufNNzZGdno2rVqhgwYADq168PABg8eDD++c9/4vjx49i1axcCAgLw1ltvQa/Xo27duvYQ1KtXL1dLJyKNM5WM2AQGcGIwkb9x+V/97NmzsXnzZkRGRgIAZs2ahYYNG2L+/PnQ6/WYNWvW354jMjISs2fPtoeazMxMLF26FPHx8UhISEBycjJat24Nvf5a/mrTpg3OnDmD9PR0V0snIo0rsgUbg+pX24monLn8r37r1q0YM2YMunbtikOHDuHixYsYPXo0OnXqBLPZjEmTJt3W+SZMmIDVq1fDYDDgP//5D0JCQpCSkmIPPTaVKlUCAFy+fBkVK1Z0tXzo9e79TU6nkx3+T+xJadgTZ57oSbHZevO9IIPO7f/WywNfJ87YE2fsSelcDjZXr15FnTp1AAA7duyAXq/HfffdB8A6EmM0Gm/rfM899xyeeuopJCUlYciQIVi+fDmKiopgMBgcnhcYGAgAt33+68myhKioUJc//2YiIoI9cl5fxp44Y0+cubMn5pJb11SICPLYv/XywNeJM/bEGXviyOVgU7VqVRw7dgwtW7bEli1b0KJFC4SFWZdN7tixA9WqVbut8yUkJAAApk6dit9//x3Lli1DUFAQTCaTw/NsgSYkJMTV0qEoAjk5BS5/fml0OhkREcHIySm036rd37EnztgTZ57oyZ9nMwEAsZFByMrKd8s5yxNfJ87YE2f+1pOIiOBbGp1yOdj06dMH06dPR1JSEk6dOoU5c+YAAIYOHYqtW7di/Pjxf3uOzMxM7Nq1Cw8//LB9Ho0sy0hISEBaWhri4+ORlpbm8Dm2j+Pi4pzOdzvMZs+8CCwWxWPn9lXsiTP2xJm7eiKEwOnLOQCAarGhPt1nvk6csSfO2BNHLl+Ye+655/Duu++iVatWmDNnDh577DEAQEBAACZPnoy+ffv+7TnS09MxYsQI7Nq1y36suLgYhw8fRt26ddGqVSvs27fPYfn47t27Ubt2bcTExLhaOhFpWFauEbkFxZAlCdVjefM9In9TpiUDXbt2RdeuXR2OzZ0795Y/v379+mjXrh3eeecdvPPOO4iMjMTChQuRk5OD559/HoGBgUhMTMS4cePw0ksv4eDBg1i6dCmmTJlSlrKJSMPOpuQCAKpUDIEhgPexIfI3ZQo2p0+fxo4dO1BQUABFcRwGkyQJQ4YM+dtzzJkzB7Nnz8Zrr72G3NxctGzZEklJSahSpQoAIDExEVOnTkWPHj0QGxuL0aNHo0ePHmUpm4g07GyqNdjUjA9XuRIiUoMkbtz69hZ9+eWXGDNmjNPOufYTSxKOHDlSpuI8xWJRkJnp3gmFer2MqKhQZGXl81pnCfbEGXviTK+Xse94Bv48mwmhWN9PlJL3Fdu+TgBQbFFK3albJ0v25ypC4ODJDKRkFqDvQ/XR6e7bW8TgLfg6ccaeOPO3nkRHh3p28vBHH32Ee++9F++88w7i4+MhSdwhl4huX1pWAeat2u/289auHOH2cxKR93M52Fy6dAmTJ0/+220TiIhuJr/IDMB6M70H7qwKwHpLBsB6zykbs1mxj+TYyJJkf44sS1AUASEE4qJDULsyL0UR+SOXg03t2rVx+fJld9ZCRH7IYrGGlfAQA3p3SFC5GiLydS4v9x45ciQ++ugj7Nmzp0x3ASYi/2YuubGYXsfL2URUdi6P2EydOhUZGRl4/vnnS31ckiQcPnzY1dMTkZ+wlFx20skMNkRUdi4Hm+7du7uzDiLyU5aSW0VwIz8icgeXg83QoUPdWQcR+SnbHBteiiIidyjTDfpMJhPWrl2LX3/9FTk5OYiKikLLli3x+OOPIygoyF01EpGG2ebY6GSO2BBR2bkcbHJycvDss8/i6NGjqFKlCmJjY3H69Gl8/fXXSEpKwvLlyxEezuWWRHRznGNDRO7k8q9Is2fPRkpKCpYtW4Zt27Zh1apV2LZtG5YtW4aMjAzMmzfPnXUSkUbZLkXpeCmKiNzA5WCzdetWDB8+HC1btnQ43rJlS7z66qv4/vvvy1wcEWmfWbEt9+alKCIqO5ffSfLz81G9evVSH6tevTquXr3q6qmJyI/YR2x4KYqI3MDlYFOnTh38+OOPpT72448/ombNmi4XRUT+w8zl3kTkRi5PHn7xxRcxcuRIWCwWdOnSBRUrVkR6ejq+/vprrF69GpMmTXJnnUTkRQqNZmzddwF5hcUArHs2lbb7dmkkSULAdSHmbGouAEDPERsicgOXg81jjz2GM2fOYMGCBVi5ciUAQAgBg8GAwYMH46mnnnJbkUTkXX45lIJ1P51y6zlDgwPcej4i8k9luo/N4MGD0a9fP+zfvx85OTmIjIxE8+bNERkZ6a76iMgL5RaYAADVK4WhUc0oyLJU6u7bpZElCXr9tREbWZYQERaENo1iPVYvEfmPMgUbAIiIiED79u3dUQsR+Yhis3VeTKOaUejTqV6ZzqXXy4iKCkVWVj7MJeclInLVbQWbRo0aYdWqVWjWrBkaNmwISfrra+LcBJNIu0zF1gBiCNCpXAkRkaPbCjZDhgxBXFyc/c83CzZEpF1GswUAYNBzJRMReZfbCjbXb3w5bNiwmz43JSXFtYqIyOvZLkVxxIaIvI3Lc2yuvyx1o+TkZLz88svYv39/mYoj8mU5BSZs23cBRSbr6IYsSbAIgYAAHYqLLdCVskTaNgr6d8duPH79EmrbBF655HFFCCiKgCxLsCii1PME6GR7LTd+XJpTl7IBcMSGiLzPbQWbJUuWoKCgAID1zXTNmjX46aefnJ63f/9+GAwG91RI5KO+//U8vtl9Vu0yPCo8hEu0ici73FawMRqNmD9/PgDrb3lr1qxxeo4sywgPD8egQYPcUyGRj7qUng8AaFI7GlUrhkKWJSiKQIBBj2KTudQl0tePstzs2I3Hr19CrZTsli2X3PBOUYT9ObY/33gevV6213Ljx6WRJQnREUFoVreia80hIvKQ2wo2gwYNsgeWhg0bYtWqVWjevLlHCiPydWlXCwEAD7eujqa1YwBwaTMRkae5fIH86NGjqF27tsOlqIsXLyIpKQl5eXluKY7IVylC4EpJsKkUFaJyNURE/sPlYHPq1Cl06dIFkydPth87d+4c3n33XfTs2ROXLl1yR31EPulqrhHFZgU6WUJMRKDa5RAR+Q2Xg817772HuLg4rFixwn6sbdu22LFjBypUqID33nvPLQUS+aK0LOtoTcXIIOhkrhwiIiovLi/3/u233zBz5kz7DftsYmJiMHDgQIwdO7bMxRH5AouiYPOv55GTb7JP6L2Qbr0cW7FCsJqlERH5HZeDjSRJKCwsLPUxs9mM4uJil4si8iVHz17FF9tPlvpYxYigcq6GiMi/uRxsWrVqhQ8//BCtW7dGdHS0/fjVq1exYMECtG7d2i0FEnm73ELrTtcVwgxo0yQegHWJdYBeRoc7q6pZGhGR33E52IwcORK9e/dGp06d0KJFC0RHRyMrKwsHDhyAwWDA7Nmz3Vknkdey3Vk4LjoEvTskqFwNEZF/c3lWY+3atfH111+jT58+KCgowKFDh5CTk4PevXtjw4YNqF27tjvrJPJaJhM3hCQi8hYuj9gAQFxcHN544w131ULkk4zFJRtC6rkhJBGR2soUbFJTU7Fv3z6YTCb7MUVRUFhYiOTkZMydO7fMBRJ5O1OxdcQmIIAjNkREanM52Hz33Xd4/fXXYTabnXYZBoA6deq4p0IiL3fwVAYAIDCAIzZERGpz+VfMBQsWoEmTJli3bh169uyJf/7zn9i0aRNGjRoFnU7H+9iQXxBC4Hya9Z41QQYGGyIitbk8YnP69GnMnj0bjRs3xj333IMlS5agbt26qFu3LtLT07FgwQLcd9997qyVyOuYrtvIstNd1VSshIiIgDKM2MiyjMjISABAzZo1cerUKSiK9U2+Xbt2OHHihHsqJPJiBUVmAIAkATGRvBkfEZHaXA42derUwW+//Wb/s8lkwtGjRwEAOTk5DhOKibSqwGgNNkEGnX1+GRERqcflS1F9+vTBpEmTUFBQgNdeew1t2rTBm2++iSeeeALLli1DkyZN3FknkVcqLAk2nDhMROQdXB6xefLJJzFu3Dj7yMzbb78No9GIqVOnwmw2Y9y4cW4rkshb2S5FBRnKdOcEIiJyE5ffjXft2oVevXohKMg6r6B69er49ttvkZWV5bB3FJHW/O9UBv44nQmzRUFqVgEAICSQwYaIyBu4PGIzbNgwfP/99w7HJEliqCFNU4TAR+sP4fu957Htt4v443QWACAiNEDlyoiICCjDiE1ERIR9tIbIXxhNFhhL7jTc4c4qkCQJOllGp5Zc6k1E5A1cDjYDBgzAO++8g9OnT6Nhw4YICQlxek6rVq3KVByRt7Ht5C1LEvp1bsCVUEREXsblYDNp0iQAsO8Hdf0bvG1rhSNHjpSxPCLvUmSyThYODuTybiIib+RysPnss8/cWQeRTyg0WkdsuAqKiMg7ufzu3Lp1a3fWQeQTbCM2QYG8bw0RkTcq06+dmZmZWLx4MX755RdcuXIFiYmJ2LJlCxo2bIgHH3zQXTUSeQUhBL779RwAIIg35CMi8kouL/c+f/48unfvjtWrVyMuLg4ZGRmwWCw4ffo0Xn31VWzfvt2NZRKp7+KVfBw6lQkACAnipSgiIm/k8rvzjBkzEBMTg88//xwhISFo2rQpAGD27NkwGo1YsGABHnjgAXfVSaQ6275QANC7Y4KKlRAR0V9xecRm165dGDx4MCIiIpxWhzz11FM4fvx4mYsj8iZCCABAfHQIqlYMU7kaIiIqjcvBBgD0+tIHfEwmE5fCkuaU5BrwpU1E5L1cDjYtW7bEwoULUVBQYD8mSRIURcGKFStw1113uaVAIm9hG7GRmWyIiLyWy3NsRo4ciaeffhqdO3fGPffcA0mSsHjxYpw8eRJnz57F8uXL3VknkeqUkv8z1xAReS+XR2zq16+PL774Avfccw/27NkDnU6HX375BTVq1MDKlSvRqFEjd9ZJpDrbiA0vsxIRea8yrVmtXbs2Zs+eXepjKSkpiI+PL8vpibwK59gQEXk/l0dsGjVqhIMHD5b6WHJyMh599FGXiyLyRhyxISLyfrc1YrNkyRL7ZGEhBNasWYOffvrJ6Xn79++HwWC4pXNevXoVc+bMwfbt25GXl4cGDRpg5MiRaNmyJQDrsvKZM2fi5MmTqFy5MoYNG4YuXbrcTtlEbqGUjNjIzDVERF7rtoKN0WjE/PnzAVh/a12zZo3Tc2RZRnh4OAYNGnRL5xwxYgSuXLmCOXPm2G/49+KLL2L9+vUQQmDAgAHo378/Zs6cie3bt2P06NGIjo5G27Ztb6d0ojLjiA0Rkfe7rWAzaNAge2Bp2LAhVq9ejWbNmrn8xc+ePYudO3di+fLluPvuuwEAEyZMwM8//4yNGzciIyMDDRo0wGuvvQYAqFu3Lg4fPozExEQGGyp3nGNDROT9XJ5jc/To0TKFGgCIiorCokWLcMcdd9iPSZIESZKQk5OD5ORkpwDTpk0b7Nu3z/7bM1F54YgNEZH3K9OqqJ07d+LHH39EYWEhFEVxeEySJEybNu2mnx8REYH27ds7HNu8eTPOnj2LsWPHYv369U4rqypVqoTCwkJkZWUhOjra5dr1+jLddNmJTic7/J+015Pdh1MBADpJcvn1o7WeuAN74ow9ccaeOGNPSudysFmyZAnee+89BAYGIjo62um3WFd+q/3tt9/w5ptvonPnznjggQdQVFTkNAnZ9rHJZHK1dMiyhKioUJc//2YiIoI9cl5fpoWeZGQXYt+xKwCAiPDAMr9+tNATd2NPnLEnztgTZ+yJI5eDzbJly9CtWzdMnTr1lldA3cyWLVvw+uuv46677sKsWbMAAIGBgU4BxvZxcLDrf5GKIpCTU/D3T7wNOp2MiIhg5OQUwmJR/v4T/ICWenL83FX7n3u2q4OsrHyXzqOlnrgLe+KMPXHGnjjzt55ERATf0uiUy8EmPT0dTzzxhFtCzbJlyzB16lQ88sgjmDFjhv2clStXRlpamsNz09LSEBISgvDw8DJ9TbPZMy8Ci0Xx2Ll9lRZ6kpFdCACoXy0ScRWCy/z9aKEn7saeOGNPnLEnztgTRy5fmGvcuDGOHz9e5gKWL1+Ot99+G3379sWcOXMcglLLli3x66+/Ojx/9+7duOuuuyDLvKZI5edqrhEAUCE8UOVKiIjoZlwesRk7diyGDx+OkJAQNG/evNRLQ1WqVLnpOU6fPo1p06bhoYcewoABA5Cenm5/LCgoCM888wx69OiBWbNmoUePHtixYwe+++47JCYmulo2kUuy8kqCTRiDDRGRN3M52Dz99NNQFAVjx479y4nCR44cuek5Nm/ejOLiYvzwww/44YcfHB7r0aMHpk+fjo8++ggzZ87Ep59+imrVqmHmzJm8hw2Vu6ySEZsojtgQEXk1l4PN22+/Xeb7eQwcOBADBw686XPatWuHdu3alenrEN2qnAITtiZfgLHYYj9mUQSOX8gGwGBDROTtXA42PXv2dGcdRF5h274L2PjLmb98PCqs7JPliYjIc24r2DRs2PCWR2kkScLhw4ddKopILQVGMwCgduVw1KtWAYD19gCKEKhUIRgJJceIiMg73VawGTJkCG8nT5qmlGzh3bR2DHq0q6NyNUREdLtuK9gMGzbMU3UQeQVbsNHJDPBERL6IN4Mhuo6lJNjIDDZERD6JwYboOhyxISLybWXa3ZvIVwghsHXfBaRkWvcI08kydLKEYosCIQQkSUKATsapyzkAOGJDROSrGGzIL5xPy8PyLbe+BUhIIP9pEBH5Ir57k1/ILSwGAIQE6dGmcRz0OhmyLMFsVqAIAVmSoNfLUBSBsOAAtGpUSeWKiYjIFQw25BdMJXcSjgoLRL/ODVSuhoiIPIWTh8kvFJsVAECAni95IiIt47s8+QWjyTpiYwjgS56ISMv4Lk9+wbapZYBep3IlRETkSZxjQ5p2/MJV7D2ShtMly7gNvBRFRKRpDDakaR9vPIz07CL7x+Eh3J2biEjLGGxI02y7dd/TuBKiw4PwYMvqKldERESexGBDmma2WFdDPX5/HcRFhahcDREReRonHJCmmS3WvZ8CdHypExH5A77bk2YpirBvamkI4GooIiJ/wGBDmmW7KR/AERsiIn/Bd3vSrGLLtWCj13O3biIif8BgQ5pl2x9KliToZL7UiYj8Ad/tSbMKiqxLvYMCOb+GiMhfMNiQZuUXFQMAgg28qwERkb9gsCHNyiu0jtiEBDHYEBH5CwYb0qy8QhMAICSQwYaIyF8w2JBm5RZYL0WFBgeoXAkREZUXBhvSrGvBhiM2RET+gsGGNKugZPJwWBBHbIiI/AWDDWmW7QZ9ej1f5kRE/oLv+KRZlpJ9ovS8OR8Rkd/gOz5plsU2YqPjdgpERP6CwYY0yzZio+MGmEREfoPv+KRZ9ktRHLEhIvIbDDakWRaLLdjwZU5E5C/4jk+aZVE4x4aIyN8w2JBmXbsUxZc5EZG/4Ds+aZZ98rDMERsiIn/BYEOaZZtjo+N9bIiI/Abf8Umzri335ogNEZG/YLAhzVJ4KYqIyO9w22PSjMycIuw4cAnFZgWKEMgtMAFgsCEi8icMNqQZ3+4+h62/XXA6HsrdvYmI/AaDDWlGflExAKBhjQqoERcOAYH4qBBUjQ1VuTIiIiovDDakGeaSTS/vblAJne6upnI1RESkBk4eJs0wlyzvDtDzZU1E5K/4E4A0w1yyhQInCxMR+S8GG9IMs9m2NxRf1kRE/opzbMgnJR9Nw6lLOSi2KBDCegkqJbMAADe9JCLyZww25HMKioqx4Ms/oJQEmhtxeTcRkf9isCGfk5VngiIEDAEy2jWrYg84siQhNioY9atXULdAIiJSDYMN+Zy8kjsKR4UH4V8P1Ve5GiIi8iacZUk+J7fAeiO+8BBeciIiIkcMNuRzcgtLgk0wgw0RETlisCGfY9vckiM2RER0I86xIZ9QbFbw3Z6zyM434eTFHABAeIhB5aqIiMjbMNiQT9h3LA3rfz7tcCw6PFClaoiIyFsx2JBPSMmy3nwvLjoEzepEIyLUgDZN4lWuioiIvI1XzbFZuHAhnnnmGYdjR44cQb9+/dCiRQt07NgRn332mUrVkZqycowAgDvqROPpB+ujS9taCA5kLiciIkdeE2ySkpLw/vvvOxzLyspC//79UaNGDaxduxZDhgzBrFmzsHbtWnWKJNVczbMGm5iIIJUrISIib6b6r7ypqamYNGkS9uzZg1q1ajk8tnr1agQEBOCtt96CXq9H3bp1cfbsWSxatAi9evVSp2BSRXa+dSUU59UQEdHNqD5i88cffyAgIABfffUVmjdv7vBYcnIyWrduDb3+Wv5q06YNzpw5g/T09PIulVSUUxJsKjDYEBHRTag+YtOxY0d07Nix1MdSUlJQv77jLfMrVaoEALh8+TIqVqzo8tfV692b6XQ62eH/ZO3FHyfT8dP+C7DtV2m2KFAU6wcSrv09KIqALEuQpGs7cwshYFEEFEXY7zYcExnk9r+78sTXiTP2xBl74ow9ccaelE71YHMzRUVFMBgc71USGGj9jd1oNLp8XlmWEBUVWqba/kpERLBHzuurZszZgaxc1/+urqfXSahVPRqBATq3nE9NfJ04Y0+csSfO2BNn7Ikjrw42QUFBMJlMDsdsgSYkJMTl8yqKQE5OQZlqu5FOJyMiIhg5OYWwWBS3nttXGYst9lDzaJsakCQJZrNi340bAPQlv2koQkCWJMjytREbpWS0xvb8xrWiUZBXBPf+zZUvvk6csSfO2BNn7Ikzf+tJRETwLY1OeXWwiY+PR1pamsMx28dxcXFlOrfZ7JkXgcWieOzcvuZCWh4A69YHTz6Q4JZzaqW3fJ04Y0+csSfO2BNn7Ikjr74w16pVK+zbtw8Wi8V+bPfu3ahduzZiYmJUrIxuxaX0fABAbAUOkxIRUfnw6mDTq1cv5OXlYdy4cThx4gTWrVuHpUuXYsCAAWqXRrfgfMmITeUY1y8bEhER3Q6vDjYxMTFITEzE6dOn0aNHD8yfPx+jR49Gjx491C6NboFtxKZabJjKlRARkb/wqjk206dPdzrWrFkzrFq1SoVqqCx2/5GCgyczAADV4xhsiIiofHj1iA35przCYnz89WH7x1UrMtgQEVH5YLAht8svKrbfkO+1p+9ETCT3dyIiovLBYENuZyq2LjsMCw5Ax5Y1VK6GiIj8CYMNuZ2x2Lo8P8CHtz4gIiLfxJ885HYmBhsiIlIJf/KQ29kuRRkYbIiIqJx51XJv8g6/n0jHodOZECUzgCVJQoBOtu/ZJJfswF1sUZyeAwApmdbdnAwa2KySiIh8C4MNOVAUgY82HEKxG/YdCQ8JcENFREREt47BhhxYFGEPNe2aV4ZeJ0OWJOj1MhSlZMSmZAfu63fqtj3HRgLQ4e5q5Vs8ERH5PQYbcmC7tAQAT3Wsh+BA118ies6xISKicsafPORAXPdn21waIiIiX8FgQw6uH7EBcw0REfkYBhtycH2ukRlsiIjIx3COjZ85fCYTv/15BYB1ibZOlqyTgiXrpSez5dpqKImXooiIyMcw2PiZjzYcQkGR+W+fZ9DLnGNDREQ+h8HGjxQUFdtDTYc7q0InS5BlCYoQkOA4QtO4VpR9WTcREZGvYLDxIxk5RgBASKAezzzcQOVqiIiI3I+Th/1IRnYRAKBCmEHlSoiIiDyDwcaPpGVZ93CKjghSuRIiIiLPYLDxI2lXCwEAMZEMNkREpE0MNn7kp98vAQAqVQhWuRIiIiLPYLDxE5k5RTBbrHffqxobqnI1REREnsFg4ydyC4rtf25aO0bFSoiIiDyHwcZPFJms96+JiQjk/WmIiEizGGz8hLHYulWCIUCnciVERESew2DjJ4zFFgBAIIMNERFpGIONnyg0Wi9FccSGiIi0jFsq+IADx9Px5/mrDseKLQqEsK5ysu3xdP3HATprZlVKjp1LzQUABDHYEBGRhjHYeLlis4KPNhyC2aK45XwR3E6BiIg0jMHGyxWazPZQ80jrGkDJgiazWbGPxsglIzbXf6zXl4zYKMJ+Lp0sodPd1cqrdCIionLHYOPlTCbrpN8AvYzeHRNUroaIiMi7cfKwlzOaraM1XM1ERET09xhsvJypZJm2IYB/VURERH+HPy29nNHE+88QERHdKs6x8ZD0q4X46eAlmIqtk3yvn+ArS5L9Y9sxwDrpVxECiiIgyxIsikD61UIAgF7HDEpERPR3GGw85KtfzuC/By+77XwGvYxiswUBeo7cEBER/RUGGw8pLLLe6bdRzShUiw11uImeJEkOG1HalmTLsgRFEfZRHfufZQltGscx1BAREf0NBhsPsZSElXsax6Fd8yoqV0NEROQfOHHDQ8yKdZm27rqRGSIiIvIsBhsPsV1eYrAhIiIqPww2HmKxXJs3Q0REROWDwcYDCorMOFayG7dOZouJiIjKC3/qesD2Axftfw4J4vxsIiKi8sJg4wGZOUUAgACdjAbVK6hbDBERkR9hsPGA/JJ72Dx8T3XOsSEiIipHDDYeUFBUDAAICwpQuRIiIiL/wmDjAQUlIzbhIQw2RERE5YnBxgMKjdZgE8IRGyIionLFYOMBhSYLACCUwYaIiKhcMdh4QJHJOmITzKXeRERE5YrBxs2EECiyj9gw2BAREZUnBhs3KzJZIKy7KSAkkMGGiIioPDHYuJntHjY6WUKAnu0lIiIqT/zJ62a2e9gEGXSQJN6cj4iIqDwx2LiZ7R42QQZehiIiIipv/OnrJhfT8/HFjlM4ezkbABAcqFO5IiIiIv/DYOMmX/x4AvuOXbF/HBkWqGI1RERE/onBxk2e7JCAanHhyC8wQZYkPNCiitolERER+R2fCDaKomD+/PlYs2YNcnNz0apVK0ycOBHVq1dXuzS7KhVD8dI/70BWVj7MZkXtcoiIiPyST0we/uijj7B8+XK8/fbbWLlyJRRFwUsvvQSTyaR2aURERORFvD7YmEwmLFmyBK+++ioeeOABNGzYEHPnzkVKSgq+//57tcsjIiIiL+L1webo0aPIz89H27Zt7cciIiLQuHFj7N27V8XKiIiIyNt4/RyblJQUAEDlypUdjleqVMn+mCv0br4rsE4nO/yf2JPSsCfO2BNn7Ikz9sQZe1I6rw82hYWFAACDweBwPDAwENnZ2S6dU5YlREWFlrm20kREBHvkvL6MPXHGnjhjT5yxJ87YE2fsiSOvDzZBQUEArHNtbH8GAKPRiOBg1/4yFUUgJ6fALfXZ6HQyIiKCkZNTCIuFq6IA9qQ07Ikz9sQZe+KMPXHmbz2JiAi+pdEprw82tktQaWlpqFGjhv14WloaGjRo4PJ5PbUk22JRuNz7BuyJM/bEGXvijD1xxp44Y08cef2FuYYNGyIsLAx79uyxH8vJycHhw4fRqlUrFSsjIiIib+P1IzYGgwH9+vXDrFmzEB0djapVq2LmzJmIj49H586d1S6PiIiIvIjXBxsAePXVV2E2mzF+/HgUFRWhVatWWLx4MQICAtQujYiIiLyITwQbnU6HUaNGYdSoUWqXQkRERF7M6+fYEBEREd0qBhsiIiLSDEkIIdQuorwJIaAo7v+2dTrZL+4lcDvYE2fsiTP2xBl74ow9ceZPPZFlCZIk/e3z/DLYEBERkTbxUhQRERFpBoMNERERaQaDDREREWkGgw0RERFpBoMNERERaQaDDREREWkGgw0RERFpBoMNERERaQaDDREREWkGgw0RERFpBoMNERERaQaDDREREWkGgw0RERFpBoMNERERaQaDDRGRDxJCqF2C12FPSudvfdGrXYCv+uWXX1BQUABFUXDvvfciLCxM7ZJ8ghACkiTZP1YUBbLMfF0a9qZ07IvV9f+OyIo9IQCQhL9FOTeYMWMGvvrqK1SoUAFnz55F8+bN0bVrVzz99NNql+bVVq5ciT/++ANmsxkJCQl48cUX1S7J62RnZ6O4uBgVK1a0H7sxDPoj9uWadevW4fTp00hPT0eXLl3QvHlzhIeHq12WqtiT0iUlJeHPP//EhQsX0K1bN7Ro0QK1atVSuyyPY7C5Tdu3b8fkyZPxwQcfoHbt2igoKMBbb72Fixcv4t5778WoUaPULtErzZ07F6tWrcJjjz2Gixcv4uTJk4iIiMDs2bNRu3ZttcvzCvPnz8e2bdtw5coVVKlSBU8//TTat2+PqKgovx6lYF+umTVrFtauXYs777wThYWF+PXXX9GzZ0/07NkTd955p9rlqYI9KZ3tPffBBx9Efn4+fvnlF9x1113o1asXHnzwQbXL8yxBt2XFihXi8ccfF0aj0X4sIyNDTJ06VXTr1k3MnTtXveK81IULF8Sjjz4qtm/fLoQQQlEUcejQIdGzZ0/RqVMncfDgQZUrVN+SJUtEmzZtxNq1a8X27dvF0KFDRdeuXcWYMWNESkqKEEIIi8WicpXlj3255vDhw6Jz587iwIED9mNffvmlePTRR8WAAQPEL7/8omJ16mBPSnfmzBnRvXt3sXv3bvux7du3ixdeeEH07NlTfP311ypW53n+86tOGYmSga2AgACYTCbk5OQAAMxmM6KjozFkyBC0bt0aP//8M7766is1S/U6hYWFyMrKQrVq1QBYr4M3adIEH3/8MSpVqoTXX38dKSkpAKzzJ/yJEAImkwm//vorXn75ZfTs2RPt27fHBx98gG7duuHYsWOYOnUqUlNTIcuy30wCZF+cSZKEwsJC6PXXpkZ2794db7zxBq5cuYKkpCQcOnRIxQrLH3tSOp1OhytXrsBoNNqPtW/fHsOGDUN8fDw+//xz7NixQ8UKPYvB5hbZruW3atUK58+fx+effw4A0Ov1MJvNiIyMxKBBgxAWFsZgU8L2w6ZGjRoIDg7Gxo0b7Y8pioLo6GjMmzcPQUFBGD58OAD41WUFwPq6MhgMKCwsRGpqKgDAYrEAAF555RX07NkTFy9exIIFC5CTk+M3c0rYF2dmsxlGoxFZWVkAAJPJBMD6A2vo0KE4duwYNm7cCIvF4hdBD2BPSiOEgKIoCAkJweXLlwEAxcXFAIAWLVrgxRdfhMFgwIYNG5Cenq5mqR7jXz9F3KBGjRoYO3YsFi5ciBUrVgC4Fm5iYmLw5ptvYteuXfjjjz9UrlR9th82Op0OjzzyCHbu3IktW7YAgP237NjYWEyYMAGZmZn4/vvv1SxXFUIICCFQqVIl7N27F3l5edDpdPY36H79+qFDhw7YvXs3Dhw4AMA/RrXYF2dNmzbFP/7xD4wePRqpqakwGAz2H1gdOnTAwIEDsWzZMvz5559+EfQA9uR6tuAmSRJq1KiBTp06YcaMGTh27BgCAgLsfbnrrrvw/PPPY9u2bThx4oSaJXsMg40LevTogZdffhlTpkxBUlISADgMhVavXh0RERFqlae6FStW4O2338aAAQPwzTffIDs7G/3794dOp8OyZcuwc+dOANeCT8OGDaEoCs6fP69m2eUqIyMD2dnZyM3NhSRJGDVqFFJTUzFx4kQAgMFgsP8QHzp0KCpWrIjVq1cD0PaoFvtyzYYNGzBnzhzMnDkTmzZtAgCMHDkS1apVw6BBg5Cammq/NA4AvXr1QrVq1fDbb7+pWbZHsSelW7lyJSZOnIg333wTixYtAgCMGDECd999N5577jmcP3/eIdx07NgRderUwe7du9Us22O09U5QTgIDAzFw4EAMGDAA77zzDt577z38+eefSE1NxXfffQcACAkJUblKdcyZMwfz5s1DQUEBdDodJk+ejPHjxyM1NRWzZ8/GlStXsGjRInz77bf2zwkLC0P16tX9pmfz58/H0KFD0bVrV/z73//G6tWrERsbi0mTJuHHH3/EmDFjAFh/iNtGIVq3bo38/Hw1y/Y49uWa2bNnY/r06bh48SJ++eUXzJs3D6+88gqio6MxfPhwyLKMV155BSkpKTAYDACsc9mCg4MRGRmpcvWewZ6Ubu7cuXj//fcREBCAzMxMrF692n6pduTIkahXrx6efPJJHDp0CAEBAQCsl/ACAwNRqVIllav3EBUmLGuG0WgUX331lWjbtq1o166dePDBB0X79u3FH3/8oXZpqjh58qTo2rWr2Lt3r/3YDz/8IJ599lnRo0cP8dtvv4kLFy6Ifv36iccff1xMmjRJbNy4UUyePFm0atVKnD17VsXqy8fHH38s2rRpIzZt2iSWLVsmpk6dKho0aCDee+89kZKSItauXStatGghhg0bJrKysoTZbBZCCPHGG2+I4cOHC7PZLBRFUfm7cD/25Zrjx4+Lhx56yL6ix2g0is2bN4sOHTqIp556SqSnp4s9e/aIJ598UrRs2VKsX79ebNq0ScyaNUvcd9994vz58yp/B+7HnpTur1ac9ujRQ3Tu3Fn8/vvv4vjx42LAgAGiWbNm4qOPPhKffvqpePfdd8U999wjzpw5o/J34Bm883AZGAwGdOvWDa1bt8a5c+dgNptRp04dxMXFqV2aKnQ6HdLT0+3DnQDw4IMPokKFCkhMTMT06dMxZcoUzJs3D19++SU2bNiA/fv3IywsDJ999hlq1KihYvWeJ4TAwYMH0b9/fzz22GMAgKKiIjRu3Bjjx4+H0WjE4MGDER0djfHjx+OZZ55BbGwsQkND8csvv2DFihXQ6XQqfxfux744ysnJQV5eHurUqQPA+j7TqVMnVK5cGaNGjcLQoUORlJSExMREvP/++/jggw+g1+sRHh6ORYsW2Vcfagl7Urq/WnGamJiIwYMHY8yYMViyZAkWLFiA+fPnY8eOHcjNzUXFihXxySefoGbNmip/Bx6idrIibVAURZw6dUp07NhRrF27VgghhMlksj++Z88e0bdvXzFixAhRUFBgP56Xl+fwsZYVFhaKRx55RMyZM8fpse+//140adJEzJs3TwghRG5urpg7d66YMGGCePfdd8WJEyfKu9xyw75Y2e7Hk5qaKh544AGxcuVKp+ccOHBAdOjQQQwbNsx+7NKlSyI7O1tkZ2eXW63lhT0pnW100mg0ig4dOjjcP83Ws7S0NNG1a1fx1FNP2R+7evWqKCoqEnl5eeVab3ljsCG3mjRpkmjZsqU4deqUEEI43Mjw22+/Fc2aNRPJyclqlaeK6y+RzJkzR3Tp0kUcPXrU6XmrV68WjRo1Eps2bfrLz9cS9qV0ubm5YtiwYeL5558X//vf/xweMxqNYv369aJr165i//79Qgj/uEEhe1I6s9ksZsyYIZ544gnxww8/2I/b/m3s3btXPPTQQ+Lbb78VQvhPXzh5mFy2evVqTJo0CRMnTsQnn3wCABg9ejSaNGmCZ555xmn55SOPPIIaNWpg165dapZdrnJzc+332ACA+++/H3q9HmvWrMGlS5fsx4UQePTRR/HYY49h9+7dMJvN9vu2aBH7cs2GDRswb948TJw4EXv27EFYWBiGDx+O48ePY8GCBTh58qT9uQaDAffffz9SUlLsx7W2GgxgT/6KqytOL168CEC7fbmRf3yX5HZz587FnDlzIITApUuX8Nlnn6FPnz7IyMjA6NGjUa1aNfTq1Qt//vmnfSa+yWRCSEiIdmfi32D+/Pl44YUX8Pjjj6Nfv3745ptvcPfdd6Nv3774/vvvsXz5cvsbjiRJCAsLQ1hYGE6fPg29Xm+fN6K1+2+wL9fMnDkTM2bMwOHDh3Hq1Cm88MILmDRpEsLCwpCYmIiff/4Zc+bMwf79++2fEx4ejnr16ml2k0f2pHTuWHEq/OQmhZw8TLft3Llz2Lx5M9577z20a9cOFosFhw4dwvjx4zF48GDMmDEDU6ZMwXvvvYc+ffpg8ODBCAkJwfnz53Hu3Dm0adNG7W/B4xYvXoxly5Zh5MiRiIqKwurVq/Hhhx9i3759ePPNN1FUVITFixcjNzcXffv2Rf369QFY765brVo1FBcX2wOhlrAv1xw6dAhbtmzBwoUL0axZMwDAmjVrkJiYiNTUVEyYMAGrVq3C4MGDMWfOHNx3331o1qwZtm/fjlOnTqFx48Yqfwfux56U7tSpU/jxxx8xf/58tGzZEgCwZcsWfP7555g4cSImTJiARYsWYcyYMVi0aBH27NmDli1bYt++ffjjjz8wZcoUANr4ZeBWcHdvum0nT57EM888g+XLl6NWrVr242lpaRg4cCDMZjMWL16M2NhYzJkzB7t27UJeXh4qVqyIsWPHolGjRuoV72FCCBiNRvz73//Gvffei+eee87+2IcffojNmzejUaNGmDp1KjZu3IiVK1fiypUraNiwISwWC5KTk7F8+XI0aNBAxe/C/dgXZ4cPH8agQYOwcOFCNGzY0H58y5Yt+PDDD1G1alW89dZbyM3NxWeffYaffvoJsiwjNDQUU6dO1eS/I/akdGfPnkWfPn0wZ84ctG3b1n48OTkZiYmJyMrKwpQpU1CpUiX7ilPAOmIzYcIEh176BfWm95CvWbt2rTh8+LAoKCgQ7dq1Ex9++KH9MduktMuXL4uHH35Y9OvXz/5YZmamKCgoELm5ueVes1r69esnZsyYIYQQ9nuuCCHEJ598Irp37y6mT58uhBDiyJEjIikpSQwfPlzMnDlTHD9+XJV6Pck2kdFisbAv1zlw4IBo3bq12LNnjxDCcaL95s2bRceOHe39MBqNIj8/X6Smpmr63xF74owrTm8fgw3dkrfffls0bdpUnDt3TlgsFjF16lTx1FNPiR9//NH+HNsPsF27dolOnTrZZ+n7y0x8Iaw9sFgs4rXXXhO9e/e2v7Fc/wY9d+5c8fDDD4tdu3apVWa5OnnypBDC+joYMWIE+3KdIUOGiPvvv19kZGQIIRz7kZSUJJo0aSL+/PNPtcorF3PnzhWLFy+2f/zvf//b73tSGq44vXWcPEx/a9q0adi4cSPWrFmD6tWrQ5ZlPPnkkzCbzUhKSrKvcrJdv23cuLHfzcTPzMxEbm4u8vLyIMsyXn/9dZw5cwZvvfUWAMc9joYPH47IyEisWrVKzZLLxTvvvIOBAweyLwC++uorzJs3D/PmzcPmzZsBWFcRVqxYEQMHDkRmZiYMBgOMRiMA4F//+hfi4+MdJslqzdSpU7Fs2TK0a9fOfmzAgAGIi4vz254AXHFaVtr/iUNlMmPGDGzYsAFffPGF/TqtEAL16tXD+PHjce7cOaxYscJhJn5ERITf7f00bNgwdOvWDa+99ho2bNiAKlWqYOLEidi0aRMmTZoEwHGPozZt2iAnJ0fNsj1u2rRp+Oqrr/B///d/CAsLg8ViQeXKlTFx4kRs3LgRkydPBuAffZk9ezamTZuGkydPYuvWrZg1axaGDh2K+Ph4/Pvf/0ZxcTFefvllZGRkIDAwEACQn5+PkJAQza70sb0+Pv/8cyQkJNhX7DRo0AADBw6E0Wj0u54AXHHqDlwVRX/JYrHgwIEDqFy5MqpXrw4AKC4uxv/93//hxIkTiI+PR7169ZCWloaVK1di3759uPvuu7F3714cOXIE77zzjsrfged9/PHHSEpKwtixY5GRkYGzZ89izJgxOHfuHPr06YNx48Zh2rRpKCgowMSJExEaGgoAuHz5MipUqACLxQJZljW3WmH69On48ssvkZSUhHr16gGAfZn2o48+ipycHEyfPh15eXmYNGmSpvvy559/4rvvvsP777+PNm3aoLCwED/++COmT5+OF198EfPmzcOoUaMwa9YsdO3aFRMmTIBer8ehQ4eQmZmJO+64Q+1vwe2WLl2KZcuWYfXq1fYJv5IkITs7GxaLBZ06dUJQUBDmzZuHLl26YOLEiZrvCQCcP3+eK07dgMGG/pJOp8Obb76JcePGYe7cuXjttdcwYMAAFBQUoHHjxjh37hwKCgoAAPfeey++/vpr7NmzB6Ghofjss8/sYUirbMHvpZdeQrdu3QBY925p1KgRpkyZgsLCQvvuwxMnTsRzzz2HmJgYhISE4L///a/m9jiy2blzJ5YvX44JEybYQ42iKPj555+RlZWFuLg4dOnSBbGxsZg8eTKeffZZVKxYUbN9yc7ORmFhob0XwcHB6Ny5M6pUqYKRI0fitddew6efforExETMnj0bs2bNgl6vR1hYmGb3Obpw4QKqVq2K4OBgANYRh0mTJuH48eO4cuUKGjVqhLFjx+L999/Hhx9+6Bc9AYCCggLk5OTY983T6XRo3rw5Fi9ejIEDB2LMmDFYvHgxFi9ejDlz5mDz5s32FadLlizR/H57t4rLvemmioqK8Pnnn2Pbtm2oUaMGFEXB2LFjERUVBZPJhCVLlmDr1q147733ULNmTeTn50On02n+MpSiKCgqKsLjjz+Of/7znxgyZIjD419//TXeeOMNDBs2DAMHDkRWVhYWL16MrKwsBAcH4+mnn0bdunVVqt6zzpw5gzfffBMJCQmYNGkSZFnGCy+8gIyMDFy9ehUZGRn45z//iVGjRkGWZftyVa32JSUlBf/6178waNAgPPnkkw6P7du3D8OHD8c999yDWbNmAbD+0A8NDYUsy4iMjFSj5HLxr3/9CwCwfPlyDB06FAUFBXjwwQdhMBiwZMkS6PV6rF+/HjqdDufPn0dYWJjme1JUVISHH34YvXv3tr+nKIoCWZaRkpKC559/HrGxsfj8888BAFlZWQgKCoLFYkFYWJiapXsXVacuk09ISUkRAwYMEA0bNhQzZ860r/wRQoiMjAzRpEkT8dVXX6lcpTqmT58uunfvXupmjMuWLRONGjVy2MNFCP9YJbZ3717RtGlTkZiYKD744AMxaNAgcfr0aZGeni527twpGjduLGbPni2EcFwOrhXff/+9WLp0qfjPf/4jdu7cKQYOHCiGDh0qDh065PA8o9Eo1qxZI7p27SoOHjwohNBWH65n68kHH3wgTp06Jc6fPy86duwonnjiCTF+/HiRmppqf+758+fFP/7xD/Hxxx8LIbTbEyGE+Pnnn8WmTZvEunXrRH5+vpg6daro3bs3V5yWAScP09+Ki4vDa6+9hjp16qB79+6QJAmyLENYbxeARo0aIT4+Xu0yy8WXX36JxMRE+8etWrWCTqfD6tWrkZqaaj8uhED37t3RuXNn7Nq1CxaLxb7HkRbmjdzoxr60bNkSY8aMwdy5c/HTTz/hpZdeQq1atRATE4N7770Xo0aNwpYtW5CZmWmfOKyVvsyaNQtTpkzBTz/9hKVLl2LRokWIjY1FcnIylixZgjNnztifa9vn6NKlSzh16hQAba4ivL4nSUlJGD58OHbt2oVhw4bhyJEjSE1NRVRUlP35lSpVQpUqVZCXlwdAmz0BrIszxo4diyVLluDNN9/EBx98gBdeeAEWi4UrTsuAXaFb0qBBA6xbtw7169fH5cuXkZ2djby8PCxbtgxXrlzR/HwaW4jbs2cPPvnkE6xZswYA0LFjR3Ts2BE//PADVqxYgZSUFADWN6Lw8HCEhobi5MmT0Ol0mtrjyObGvqxdu9b+WJcuXdCxY0ekpKSgatWq9ucD1rkDQUFBiIiI0FRfNm3ahG+//RaJiYlYvHgxtm3bhry8PBiNRkyfPh2bN2/GvHnzcPDgQfvnVKhQAfXq1dPspYTSehIYGIgffvgBLVu2xDPPPIMRI0Y4bJVhMBgQERFhv+wkNDhjYv369fjmm2+waNEiLF26FNOmTcO6desQHx+PCRMm4Ny5c1i2bBm+++47++f424pTV3HyMN2ywMBAZGRk4IknnoCiKKhcuTKuXr2Kjz76SPMjNoqiQKfTITg4GIWFhfj8889RVFSEZ555xj4/YOPGjfY9jurUqQPA+sO6WrVqMJvN0Ou198/txr58+umnKCoqQt++fVGhQgW8+OKLiI2NRVxcHEwmEwwGAwDr6o9KlSqhuLhYU305deoU6tWrhwYNGqC4uBghISF45ZVXMGLECIwbNw4ff/wxxowZg+zsbNx333244447sHXrVpw9e1azt72/sSfBwcF4+eWXMWLECADA66+/Dp1OhxMnTuDEiROoX78+1q9fj4MHD2L8+PEAtBF6b3T8+HHcfffd9r/3yMhIhIaGYvLkyYiIiEDbtm1x7NgxJCUlITk52e9WnJaFdt5RqFzExMTYf+OsVKkS7rzzTvtv41pmG1U4c+YM7rjjDsTGxmL16tUAgGeeeQajR49GZGQktm3bhldeeQVNmzZFUVER9u7dixUrVmjqh/f1SuvLypUrAQB9+/ZF8+bNAQBHjhzBnDlzULt2beTm5to38LOtivF1QghIkoQrV64gIyMDkiTZRyAiIyNhNptx6dIltG3bFh9++CFWr16NZcuWISAgAMHBwViyZInm/h3drCcREREwm832narz8/Mxbdo07NmzB9WqVYPBYMDSpUtRs2ZNlb8L97ONPl28eNEe2IQQWLhwIQAgLy8PycnJiIiIQGxsLJo2bYovv/zSr1aclpU2323Jo1q2bGnfYdZfCCGQlZWF/Px8DB48GHXq1MHcuXMdws2AAQPQsmVL/O9//8Nvv/2GWrVq4fXXX0dCQoLK1XvOX/Vl5cqVkCTJvvLlxIkT0Ol02L9/PxISErB8+XL78mctsP2Aeuihh3DgwAGcP3/e/sMnMjISOp0OJpMJQgg0bdoUTZs2RW5uLiwWC3Q6nSZvOPd3PZFl2X5H4dDQUEyfPh0pKSkIDg5GdHQ0YmJiVKvdk2x9eeWVV/Dbb78BsIacf/zjH+jXrx+io6ORl5eH6dOn48SJE+jVqxdeeuklv1lx6g5c7k10i8xmM1avXo177rkHdevWxYkTJ7BgwQIcO3YMTz31FPr166d2iaq4WV/69OmDvn37AoB964Trf3PXopSUFMTExNi/x+TkZLz00ktYs2YNEhISIEkSPv30UxgMBjz99NMqV1s+bqUnSUlJCAgIQO/evVWuVh1FRUX2pds6nQ4XL15Ep06dsHDhQrRv317t8nwKJw8T3SK9Xo/evXujbt26UBQFCQkJGDhwIBo0aIBVq1ZhxYoVapeoipv1ZeXKlVi+fDkA64RQg8Gg6VADAPHx8Q7fY2pqKsxmM8LDwyFJEubNm4cZM2b41ajnrfRk2rRpaNGihXpFqsQ2thAUFATAenlXCAGz2Yz69eujcuXKapbnkxhsiG6Dba6MbTjZ9kO8cePG+M9//oMvvvhCzfJUc7O+LFiwwG/7Ali3IdHpdAgLC8OHH36IJUuWYPXq1Zq6FHe7SuvJqlWrUL9+fbVLK3e2fzOXLl3Cr7/+iszMTOTl5WHDhg0oLCx0WAZPt4ZzbIhccP0qjYSEBPTv3x+BgYG45557VKxKfezLNbbJs4GBgYiIiMD48eOxZcsWrFy5Ek2bNlW7PFWwJ3/typUreOmllxAeHo5KlSohOzsb8+fPR2xsrNql+RzOsSFyk+uXM9M1/t6XI0eOoEePHggMDMTKlSvtmz76M/akdAcPHsTx48cRGRmJJk2a8DKUixhsiIg8qKioCDNnzsS//vUvze2D5Sr2hDyJwYaIyMOKi4s1P2n6drEn5CkMNkRERKQZXBVFREREmsFgQ0RERJrBYENERESawWBDREREmsFgQ0RERJrBOw8TkVcYM2YM1q9ff9PnVK1aFRcvXsTWrVtRrVq1cqqMiHwJl3sTkVc4d+4cMjMz7R9/9NFHOHz4MObPn28/ZruLcePGjf36bsZE9Nc4YkNEXqFGjRqoUaOG/ePo6GgYDAa/3PGZiFzHOTZE5DPWrVuHBg0a4MKFCwCsl69efPFFrFq1Cg8++CCaNWuGPn364PTp0/jxxx/RrVs3NG/eHE8++SSOHDnicK7k5GT069cPzZs3R+vWrfHGG284jBgRkW/iiA0R+bT9+/cjLS0NY8aMgdFoxOTJk/HKK69AkiS8+uqrCA4OxqRJk/D6669j06ZNAIC9e/eif//+aNOmDd5//31kZ2dj3rx5ePbZZ/HFF18gKChI5e+KiFzFYENEPi0/Px/vv/++fTPFX3/9FStXrsTSpUvRtm1bAMDZs2cxY8YM5OTkICIiArNnz0bt2rWxcOFC6HQ6AEDz5s3RpUsXrF27Fn379lXt+yGisuGlKCLyaZGRkQ47RFesWBGANajYVKhQAQCQk5ODwsJC/P7772jfvj2EEDCbzTCbzahevTrq1q2LnTt3lmv9ROReHLEhIp8WFhZW6vGQkJBSj+fk5EBRFHz88cf4+OOPnR4PDAx0a31EVL4YbIjIr4SGhkKSJDz//PPo0qWL0+PBwcEqVEVE7sJgQ0R+JSwsDI0bN8apU6dwxx132I8XFRXh1VdfRfv27ZGQkKBihURUFpxjQ0R+Z8SIEfjvf/+LkSNHYseOHdi2bRteeukl7Nq1C02aNFG7PCIqAwYbIvI7//jHP7B48WKkpKTg1VdfxejRo6HT6fDJJ5/whoBEPo5bKhAREZFmcMSGiIiINIPBhoiIiDSDwYaIiIg0g8GGiIiINIPBhoiIiDSDwYaIiIg0g8GGiIiINIPBhoiIiDSDwYaIiIg0g8GGiIiINIPBhoiIiDSDwYaIiIg04/8BeIbEfIFWUrIAAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "views = g.expanding(step=10)\n", - "\n", - "timestamps = []\n", - "degree = []\n", - "\n", - "for view in views:\n", - " timestamps.append(view.latest_time)\n", - " gandalf = view.node(\"Gandalf\")\n", - " if gandalf is not None:\n", - " degree.append(gandalf.degree())\n", - " else:\n", - " degree.append(0)\n", - "\n", - "\n", - "sns.set_context()\n", - "ax = plt.gca()\n", - "plt.xticks(rotation=45)\n", - "ax.set_xlabel(\"Time\")\n", - "ax.set_ylabel(\"Interactions\")\n", - "sns.lineplot(x=timestamps, y=degree, ax=ax)" - ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "raphtory", "language": "python", "name": "python3" }, @@ -437,12 +45,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" - }, - "vscode": { - "interpreter": { - "hash": "cb87ca7661adc8f1194e8349af5289fc4d6184622935eb2bec01493e8d44e9d2" - } + "version": "3.12.11" } }, "nbformat": 4, diff --git a/python/tests/test_base_install/base_notebook.ipynb b/python/tests/test_base_install/base_notebook.ipynb new file mode 100644 index 0000000000..bc469fab68 --- /dev/null +++ b/python/tests/test_base_install/base_notebook.ipynb @@ -0,0 +1,330 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import tempfile\n", + "import datetime" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Basic functionality on a graph\n", + "\n", + "After importing a Raphtory graph we can create a blank one to work with:\n", + "\n", + "* Graphs in Raphtory are directed by default\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Graph(number_of_nodes=0, number_of_edges=0, number_of_temporal_edges=0, earliest_time=None, latest_time=None)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from raphtory import Graph\n", + "\n", + "g = Graph()\n", + "g" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "NestedArcStringVecIterable([[[_default], [layer1], [layer2]], [[_default]], [[layer1]], [[layer2]]])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "g.add_edge(0, \"1\", \"2\")\n", + "g.add_edge(0, \"1\", \"3\", layer=\"layer1\")\n", + "g.add_edge(0, \"1\", \"4\", layer=\"layer2\")\n", + "\n", + "g.nodes.edges.layer_names" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once we have a new graph we can add nodes and edges to it via `add_node()` and `add_edge()`. For these:\n", + "* The ids of nodes and the source/destination of an edge can be either strings or integers\n", + "* All additions into the graph must happen at a specific time - this means updates are also additions\n", + "* If you add an edge between nodes which do no exist in the graph yet, these will be automatically created\n", + "* Properties can be added onto nodes and edges - this is a dict of any value, but the keys must be strings\n", + "* We have a special type of `static property` which exists outside of the timeline and is always accessible. \n", + "* Additions can be completed out of order, making it very easy to merge datasets together\n", + "\n", + "\n", + "We can then check the state of the graph:\n", + "* To see if a node or edge exists you can use `has_node()` and `has_edge()`\n", + "* To get the earliest and latest times at which updates have been applied to the graph you can use `earliest_time()` and `latest_time()` - if no updates have been applied these will return `None`\n", + "* To get the total number of nodes and edges of a graph you can use `num_edges()` and `num_nodes()`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True True False\n", + "True False\n", + "3 5\n", + "True True False\n", + "True False\n", + "4 7\n", + "Node(name=Ben, earliest_time=5, latest_time=8, properties=Properties({property1: None, property2: None, property3: None}))\n", + "Edge(source=Haaroon, target=Hamza, earliest_time=7, latest_time=7, properties={property3: test, property1: 1, property2: 9.8}, layer(s)=[toad])\n", + "Graph(number_of_nodes=8, number_of_edges=5, number_of_temporal_edges=7, earliest_time=0, latest_time=8)\n", + "True\n" + ] + } + ], + "source": [ + "# Basic Addition of Nodes and Edges\n", + "g.add_node(timestamp=1, id=\"10\")\n", + "g.add_edge(timestamp=2, src=\"1\", dst=\"2\")\n", + "\n", + "# checking node 10, 1 and 5 exist\n", + "print(g.has_node(\"10\"), g.has_node(\"1\"), g.has_node(\"5\"))\n", + "# checking edge 1,2 exists and 2,1 doesn't as Raphtory is directed\n", + "print(g.has_edge(\"1\", \"2\"), g.has_edge(\"2\", \"1\"))\n", + "# Check the total number of edges and nodes\n", + "print(g.count_edges(), g.count_nodes())\n", + "\n", + "# Adding nodes and edges with String IDs\n", + "g.add_node(timestamp=5, id=\"Ben\")\n", + "g.add_edge(timestamp=8, src=\"Hamza\", dst=\"Ben\", layer=\"toad\")\n", + "\n", + "# Performing the same checks as before, but with strings\n", + "print(g.has_node(id=\"Ben\"), g.has_node(id=\"Hamza\"), g.has_node(id=\"Dave\"))\n", + "print(g.has_edge(src=\"Hamza\", dst=\"Ben\"), g.has_edge(src=\"Ben\", dst=\"Hamza\"))\n", + "print(g.count_edges(), g.count_nodes())\n", + "\n", + "g.add_edge(0, \"1\", \"3\", layer=\"toad\")\n", + "# Add an edge with Temporal Properties which can change over time\n", + "e = g.add_edge(\n", + " timestamp=7,\n", + " src=\"Haaroon\",\n", + " dst=\"Hamza\",\n", + " properties={\"property1\": 1, \"property2\": 9.8, \"property3\": \"test\"},\n", + " layer=\"toad\",\n", + ")\n", + "# Add a static property which is immutable\n", + "e.add_metadata(metadata={\"First-Met\": \"01/01/1990\"})\n", + "\n", + "# Add an node with Temporal Properties which can change over time\n", + "v = g.add_node(\n", + " timestamp=5,\n", + " id=\"Hamza\",\n", + " properties={\"property1\": 5, \"property2\": 12.5, \"property3\": \"test2\"},\n", + ")\n", + "# Add a static property which is immutable\n", + "v.add_metadata(metadata={\"Date-of-Birth\": \"01/01/1990\"})\n", + "print(g.node(\"Ben\").__repr__())\n", + "print(g.edge(\"Haaroon\", \"Hamza\").__repr__())\n", + "print(g.__repr__())\n", + "g_path = tempfile.mkdtemp()\n", + "g.save_to_file(g_path)\n", + "loaded_graph = Graph.load_from_file(g_path)\n", + "print(loaded_graph.has_node(\"Hamza\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['_default'], ['layer1', 'toad'], ['layer2'], ['toad'], ['toad']]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(g.edges.layer_names)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "g.nodes.edges.start" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from raphtory import graph_gen\n", + "\n", + "g = Graph()\n", + "graph_gen.ba_preferential_attachment(g, nodes_to_add=1000, edges_per_step=10)\n", + "view = g.window(0, 1000)\n", + "\n", + "ids = []\n", + "degrees = []\n", + "for v in view.nodes:\n", + " ids.append(v.id)\n", + " degrees.append(v.degree())\n", + "\n", + "df = pd.DataFrame.from_dict({\"id\": ids, \"degree\": degrees})" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from raphtory import Graph\n", + "from raphtory import algorithms\n", + "from raphtory import graph_loader\n", + "\n", + "g = graph_loader.lotr_graph()\n", + "views_l1 = g.rolling(1000)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "views = g.expanding(100)\n", + "\n", + "timestamps = []\n", + "node_count = []\n", + "edge_count = []\n", + "degree = []\n", + "\n", + "for view in views:\n", + " timestamps.append(view.latest_time)\n", + " # node_count.append(view.num_nodes())\n", + " # edge_count.append(view.num_edges())\n", + " degree.append(view.count_edges() / max(1, view.count_nodes()))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "views = g.expanding(step=10)\n", + "\n", + "timestamps = []\n", + "degree = []\n", + "\n", + "for view in views:\n", + " timestamps.append(view.latest_time)\n", + " gandalf = view.node(\"Gandalf\")\n", + " if gandalf is not None:\n", + " degree.append(gandalf.degree())\n", + " else:\n", + " degree.append(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d0f713d1727343d09df10592827d484e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(HTML(value=''), IntProgress(value=0, max=3), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = pd.DataFrame(\n", + " {\n", + " \"time\": [0, 1, 2],\n", + " \"src\": [1, 2, 3],\n", + " \"dst\": [2, 3, 4],\n", + " }\n", + ")\n", + "g = Graph()\n", + "g.load_edges_from_df(df, \"time\", \"src\", \"dst\")\n", + "assert g.has_edge(1, 2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "raphtory", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python/tests/test_ingestion_equivalence_df.py b/python/tests/test_ingestion_equivalence_df.py index 88a4cdf03c..5f99417099 100644 --- a/python/tests/test_ingestion_equivalence_df.py +++ b/python/tests/test_ingestion_equivalence_df.py @@ -11,7 +11,7 @@ fpd = None from raphtory import Graph -base_dir = Path(__file__).parent.parent.parent +base_dir = Path(__file__).parent EDGES_FILE = os.path.join(base_dir, "data/network_traffic_edges.csv") NODES_FILE = os.path.join(base_dir, "data/network_traffic_nodes.csv") @@ -192,12 +192,12 @@ def test_metadata_update_equivalence(dataframes): id="server_id", ) # update metadata - g_pd.load_node_metadata_from_pandas( + g_pd.load_node_props_from_pandas( df=dataframes["pandas"]["nodes"], id="server_id", metadata=["primary_function", "server_name", "hardware_type"] ) - g_pd.load_edge_metadata_from_pandas( + g_pd.load_edge_props_from_pandas( df=dataframes["pandas"]["edges"], src="source", dst="destination", From b38c52a42e59a991c64f4cd81caf90c29a5e71bd Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Wed, 26 Nov 2025 12:38:15 -0500 Subject: [PATCH 20/55] Fixed invalid function call in test --- python/tests/test_load_from_polars.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/test_load_from_polars.py b/python/tests/test_load_from_polars.py index e696040179..104e3a91b3 100644 --- a/python/tests/test_load_from_polars.py +++ b/python/tests/test_load_from_polars.py @@ -48,6 +48,6 @@ def test_load_edges_from_polars_df(): ) g = Graph() - g.load_edges_from_polars(df=df, time="time", src="src", dst="dst", properties=["value"]) + g.load_edges_from_df(data=df, time="time", src="src", dst="dst", properties=["value"]) expected = [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] assert _collect_edges(g) == expected \ No newline at end of file From 62c8ea21a12f39f45fa39529ebbdf75fc89a9ab6 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Wed, 26 Nov 2025 21:35:12 -0500 Subject: [PATCH 21/55] Fixed fireducks package not available on Windows (for now anyway) --- python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 661b133e99..27e1952a0a 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -34,7 +34,7 @@ networkx = ["networkx >= 2.6.3"] export = ["raphtory[pyvis,networkx]"] all = ["raphtory[export,plot]"] dev = ["docstring_parser >= 0.16", "pandas-stubs", "maturin>=1.8.3", "tox>=4.25"] -test = ["raphtory[all]", "requests >= 2.31.0", "pyjwt[crypto] >= 2.10.1", "pytest >= 8", "pytest-benchmark >= 5.1.0", "polars >= 1.35.2", "fireducks;python_version<'3.14'", "duckdb >= 1.4.2"] +test = ["raphtory[all]", "requests >= 2.31.0", "pyjwt[crypto] >= 2.10.1", "pytest >= 8", "pytest-benchmark >= 5.1.0", "polars >= 1.35.2", "fireducks; sys_platform != 'win32' and python_version < '3.14'", "duckdb >= 1.4.2"] tox = ["nbmake"] [tool.maturin] From a7bc881a4f422dac64f2a133611d773a18a9e36d Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Thu, 27 Nov 2025 03:23:03 -0500 Subject: [PATCH 22/55] Added load_*_from_df functions to PyPersistentGraph, including load_edge_deletions_from_df --- raphtory/src/python/graph/graph.rs | 4 +- .../src/python/graph/graph_with_deletions.rs | 226 +++++++++++++++++- raphtory/src/python/graph/io/arrow_loaders.rs | 34 ++- 3 files changed, 259 insertions(+), 5 deletions(-) diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index ec846a6100..2cd41e2018 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -922,7 +922,7 @@ impl PyGraph { ) } - /// Load node properties into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + /// Load node metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) /// @@ -1042,7 +1042,7 @@ impl PyGraph { ) } - /// Load edge properties into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + /// Load edge metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) /// diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index 0b00c0f9f5..8129409e95 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -18,7 +18,17 @@ use crate::{ io::parquet_loaders::*, prelude::{DeletionOps, GraphViewOps, ImportOps, IndexMutationOps}, python::{ - graph::{edge::PyEdge, index::PyIndexSpec, node::PyNode, views::graph_view::PyGraphView}, + graph::{ + edge::PyEdge, + index::PyIndexSpec, + io::arrow_loaders::{ + load_edge_deletions_from_arrow_c_stream, load_edge_metadata_from_arrow_c_stream, + load_edges_from_arrow_c_stream, load_node_metadata_from_arrow_c_stream, + load_nodes_from_arrow_c_stream, + }, + node::PyNode, + views::graph_view::PyGraphView, + }, utils::{PyNodeRef, PyTime}, }, serialise::StableEncode, @@ -562,6 +572,54 @@ impl PyPersistentGraph { PyPersistentGraph::py_from_db_graph(self.graph.persistent_graph()) } + /// Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// + /// Arguments: + /// data (Any): The data source containing the nodes. + /// time (str): The column name for the timestamps. + /// id (str): The column name for the node IDs. + /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. + /// node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. + /// properties (List[str], optional): List of node property column names. Defaults to None. + /// metadata (List[str], optional): List of node metadata column names. Defaults to None. + /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// + /// Returns: + /// None: This function does not return a value if the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3( + signature = (data, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None) + )] + fn load_nodes_from_df<'py>( + &self, + data: &Bound<'py, PyAny>, + time: &str, + id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, + properties: Option>, + metadata: Option>, + shared_metadata: Option>, + ) -> Result<(), GraphError> { + let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + load_nodes_from_arrow_c_stream( + &self.graph, + data, + time, + id, + node_type, + node_type_col, + &properties, + &metadata, + shared_metadata.as_ref(), + ) + } + /// Load nodes from a Pandas DataFrame into the graph. /// /// Arguments: @@ -651,6 +709,57 @@ impl PyPersistentGraph { ) } + /// Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// + /// Arguments: + /// data (Any): The data source containing the edges. + /// time (str): The column name for the update timestamps. + /// src (str): The column name for the source node ids. + /// dst (str): The column name for the destination node ids. + /// properties (List[str], optional): List of edge property column names. Defaults to None. + /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. + /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. + /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. Cannot be used in combination with layer_col. + /// layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. Cannot be used in combination with layer. + /// + /// Returns: + /// None: This function does not return a value if the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3( + signature = (data, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + )] + fn load_edges_from_df( + &self, + data: &Bound, + time: &str, + src: &str, + dst: &str, + properties: Option>, + metadata: Option>, + shared_metadata: Option>, + layer: Option<&str>, + layer_col: Option<&str>, + ) -> Result<(), GraphError> { + let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + load_edges_from_arrow_c_stream( + &self.graph, + data, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + ) + } + /// Load edges from a Pandas DataFrame into the graph. /// /// Arguments: @@ -746,6 +855,36 @@ impl PyPersistentGraph { ) } + /// Load edge deletions into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// + /// Arguments: + /// data (Any): The data source containing the edges. + /// time (str): The column name for the update timestamps. + /// src (str): The column name for the source node ids. + /// dst (str): The column name for the destination node ids. + /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. Cannot be used in combination with layer_col. + /// layer_col (str, optional): The edge layer col name in the data source. Defaults to None. Cannot be used in combination with layer. + /// + /// Returns: + /// None: This function does not return a value, if the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3(signature = (data, time, src, dst, layer = None, layer_col = None))] + fn load_edge_deletions_from_df( + &self, + data: &Bound, + time: &str, + src: &str, + dst: &str, + layer: Option<&str>, + layer_col: Option<&str>, + ) -> Result<(), GraphError> { + load_edge_deletions_from_arrow_c_stream(&self.graph, data, time, src, dst, layer, layer_col) + } + /// Load edges deletions from a Pandas DataFrame into the graph. /// /// Arguments: @@ -811,6 +950,47 @@ impl PyPersistentGraph { ) } + /// Load node metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// + /// Arguments: + /// data (Any): The data source containing node information. + /// id(str): The column name for the node IDs. + /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. + /// node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. + /// metadata (List[str], optional): List of node metadata column names. Defaults to None. + /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// + /// Returns: + /// None: This function does not return a value if the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3( + signature = (data, id, node_type = None, node_type_col = None, metadata = None, shared_metadata = None) + )] + fn load_node_metadata_from_df( + &self, + data: &Bound, + id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, + metadata: Option>, + shared_metadata: Option>, + ) -> Result<(), GraphError> { + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + load_node_metadata_from_arrow_c_stream( + &self.graph, + data, + id, + node_type, + node_type_col, + &metadata, + shared_metadata.as_ref(), + ) + } + /// Load node properties from a Pandas DataFrame. /// /// Arguments: @@ -886,6 +1066,50 @@ impl PyPersistentGraph { ) } + /// Load edge metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// + /// Arguments: + /// data (Any): The data source containing edge information. + /// src (str): The column name for the source node. + /// dst (str): The column name for the destination node. + /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. + /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. + /// layer (str, optional): The edge layer name. Defaults to None. + /// layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. + /// + /// Returns: + /// None: This function does not return a value if the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. + #[pyo3( + signature = (data, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None) + )] + fn load_edge_metadata_from_df( + &self, + data: &Bound, + src: &str, + dst: &str, + metadata: Option>, + shared_metadata: Option>, + layer: Option<&str>, + layer_col: Option<&str>, + ) -> Result<(), GraphError> { + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + load_edge_metadata_from_arrow_c_stream( + &self.graph, + data, + src, + dst, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + ) + } + /// Load edge properties from a Pandas DataFrame. /// /// Arguments: diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index d6ea26acb9..e40526e96e 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -4,8 +4,8 @@ use crate::{ io::arrow::{ dataframe::{DFChunk, DFView}, df_loaders::{ - load_edges_from_df, load_edges_props_from_df, load_node_props_from_df, - load_nodes_from_df, + load_edge_deletions_from_df, load_edges_from_df, load_edges_props_from_df, + load_node_props_from_df, load_nodes_from_df, }, }, prelude::{AdditionOps, PropertyAdditionOps}, @@ -159,6 +159,36 @@ pub(crate) fn load_edge_metadata_from_arrow_c_stream< ) } +pub fn load_edge_deletions_from_arrow_c_stream< + 'py, + G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps, +>( + graph: &G, + data: &Bound<'py, PyAny>, + time: &str, + src: &str, + dst: &str, + layer: Option<&str>, + layer_col: Option<&str>, +) -> Result<(), GraphError> { + let mut cols_to_check = vec![src, dst, time]; + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); + } + + let df_view = process_arrow_c_stream_df(data, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; + load_edge_deletions_from_df( + df_view, + time, + src, + dst, + layer, + layer_col, + graph.core_graph(), + ) +} + /// Can handle any object that provides the \_\_arrow_c_stream__() interface pub(crate) fn process_arrow_c_stream_df<'a>( data: &Bound<'a, PyAny>, From cc164a6a1ef6999083dfe33cd7ac99b09b973f39 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 28 Nov 2025 04:20:39 -0500 Subject: [PATCH 23/55] Cleaned up load_from_df tests and parametrized them to run for both event graphs/persistent graphs. --- python/python/raphtory/__init__.pyi | 115 +++++++++++++++++- python/python/raphtory/vectors/__init__.pyi | 71 +++-------- python/tests/test_ingestion_equivalence_df.py | 47 +++---- python/tests/test_load_from_df.py | 81 ++++++++++++ python/tests/test_load_from_fireducks.py | 55 --------- python/tests/test_load_from_polars.py | 53 -------- 6 files changed, 235 insertions(+), 187 deletions(-) create mode 100644 python/tests/test_load_from_df.py delete mode 100644 python/tests/test_load_from_fireducks.py delete mode 100644 python/tests/test_load_from_polars.py diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 9f2cbb3ab5..23e5903f28 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1091,7 +1091,7 @@ class Graph(GraphView): def load_edge_metadata_from_df(self, data: Any, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ - Load edge properties into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + Load edge metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) @@ -1233,7 +1233,7 @@ class Graph(GraphView): def load_node_metadata_from_df(self, data: Any, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ - Load node properties into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + Load node metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) @@ -1811,6 +1811,27 @@ class PersistentGraph(GraphView): PersistentGraph: the loaded graph with initialised cache """ + def load_edge_deletions_from_df(self, data: Any, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + """ + Load edge deletions into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data (Any): The data source containing the edges. + time (str): The column name for the update timestamps. + src (str): The column name for the source node ids. + dst (str): The column name for the destination node ids. + layer (str, optional): A value to use as the layer for all edges. Defaults to None. Cannot be used in combination with layer_col. + layer_col (str, optional): The edge layer col name in the data source. Defaults to None. Cannot be used in combination with layer. + + Returns: + None: This function does not return a value, if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_edge_deletions_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges deletions from a Pandas DataFrame into the graph. @@ -1849,6 +1870,28 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ + def load_edge_metadata_from_df(self, data: Any, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + """ + Load edge metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data (Any): The data source containing edge information. + src (str): The column name for the source node. + dst (str): The column name for the destination node. + metadata (List[str], optional): List of edge metadata column names. Defaults to None. + shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. + layer (str, optional): The edge layer name. Defaults to None. + layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. + + Returns: + None: This function does not return a value if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_edge_props_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties from a Pandas DataFrame. @@ -1889,6 +1932,30 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ + def load_edges_from_df(self, data: Any, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + """ + Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data (Any): The data source containing the edges. + time (str): The column name for the update timestamps. + src (str): The column name for the source node ids. + dst (str): The column name for the destination node ids. + properties (List[str], optional): List of edge property column names. Defaults to None. + metadata (List[str], optional): List of edge metadata column names. Defaults to None. + shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. + layer (str, optional): A value to use as the layer for all edges. Defaults to None. Cannot be used in combination with layer_col. + layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. Cannot be used in combination with layer. + + Returns: + None: This function does not return a value if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges from a Pandas DataFrame into the graph. @@ -1945,6 +2012,27 @@ class PersistentGraph(GraphView): PersistentGraph: """ + def load_node_metadata_from_df(self, data: Any, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + """ + Load node metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data (Any): The data source containing node information. + id(str): The column name for the node IDs. + node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. + node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. + metadata (List[str], optional): List of node metadata column names. Defaults to None. + shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + + Returns: + None: This function does not return a value if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_node_props_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties from a Pandas DataFrame. @@ -1983,6 +2071,29 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ + def load_nodes_from_df(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + """ + Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). + This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data (Any): The data source containing the nodes. + time (str): The column name for the timestamps. + id (str): The column name for the node IDs. + node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. + node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. + properties (List[str], optional): List of node property column names. Defaults to None. + metadata (List[str], optional): List of node metadata column names. Defaults to None. + shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + + Returns: + None: This function does not return a value if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + def load_nodes_from_pandas(self, df: DataFrame, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load nodes from a Pandas DataFrame into the graph. diff --git a/python/python/raphtory/vectors/__init__.pyi b/python/python/raphtory/vectors/__init__.pyi index b0be7dd4a1..e2d8f648b1 100644 --- a/python/python/raphtory/vectors/__init__.pyi +++ b/python/python/raphtory/vectors/__init__.pyi @@ -27,15 +27,9 @@ __all__ = ['VectorisedGraph', 'Document', 'Embedding', 'VectorSelection'] class VectorisedGraph(object): """VectorisedGraph object that contains embedded documents that correspond to graph entities.""" -class VectorisedGraph(object): - def edges_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> VectorSelection: + def edges_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: """ - Search the top similarity scoring edges according to matching a specified `query` with no more than `limit` edges in the result. + Perform a similarity search between each edge's associated document and a specified `query`. Returns a number of edges up to a specified `limit` ranked in descending order of similarity score. Args: query (str | list): The text or the embedding to score against. @@ -49,14 +43,9 @@ class VectorisedGraph(object): def empty_selection(self): """Return an empty selection of entities.""" - def entities_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> VectorSelection: + def entities_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: """ - Search the top similarity scoring entities according to matching a specified `query` with no more than `limit` entities in the result. + Perform a similarity search between each entity's associated document and a specified `query`. Returns a number of entities up to a specified `limit` ranked in descending order of similarity score. Args: query (str | list): The text or the embedding to score against. @@ -67,14 +56,9 @@ class VectorisedGraph(object): VectorSelection: The vector selection resulting from the search. """ - def nodes_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> VectorSelection: + def nodes_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: """ - Search the top similarity scoring nodes according to matching a specified `query` with no more than `limit` nodes in the result. + Perform a similarity search between each node's associated document and a specified `query`. Returns a number of nodes up to a specified `limit` ranked in descending order of similarity score. Args: query (str | list): The text or the embedding to score against. @@ -85,16 +69,8 @@ class VectorisedGraph(object): VectorSelection: The vector selection resulting from the search. """ -class Document(object): - """ - A document corresponding to a graph entity. Used to generate embeddings. - - Args: - content (str): The document content. - life (int | Tuple[int, int], optional): The optional lifespan of the document. A single value - corresponds to an event, a tuple corresponds to a - window. - """ +class Document(object): + """A document corresponding to a graph entity. Used to generate embeddings.""" def __repr__(self): """Return repr(self).""" @@ -126,11 +102,13 @@ class Document(object): Optional[Any]: """ -class Embedding(object): +class Embedding(object): + def __repr__(self): """Return repr(self).""" -class VectorSelection(object): +class VectorSelection(object): + def add_edges(self, edges: list) -> None: """ Add all the documents associated with the specified `edges` to the current selection. @@ -176,9 +154,7 @@ class VectorSelection(object): list[Edge]: List of edges in the current selection. """ - def expand( - self, hops: int, window: Optional[Tuple[int | str, int | str]] = None - ) -> None: + def expand(self, hops: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: """ Add all the documents a specified number of `hops` away from the selection. @@ -195,12 +171,7 @@ class VectorSelection(object): None: """ - def expand_edges_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> None: + def expand_edges_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: """ Add the top `limit` adjacent edges with higher score for `query` to the selection @@ -215,12 +186,7 @@ class VectorSelection(object): None: """ - def expand_entities_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> None: + def expand_entities_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: """ Add the top `limit` adjacent entities with higher score for `query` to the selection @@ -243,12 +209,7 @@ class VectorSelection(object): None: """ - def expand_nodes_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> None: + def expand_nodes_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: """ Add the top `limit` adjacent nodes with higher score for `query` to the selection diff --git a/python/tests/test_ingestion_equivalence_df.py b/python/tests/test_ingestion_equivalence_df.py index 5f99417099..82e8b6ffeb 100644 --- a/python/tests/test_ingestion_equivalence_df.py +++ b/python/tests/test_ingestion_equivalence_df.py @@ -9,7 +9,7 @@ import fireducks.pandas as fpd except ModuleNotFoundError: fpd = None -from raphtory import Graph +from raphtory import Graph, PersistentGraph base_dir = Path(__file__).parent EDGES_FILE = os.path.join(base_dir, "data/network_traffic_edges.csv") @@ -35,9 +35,10 @@ def dataframes(): return data -def test_edge_ingestion_equivalence(dataframes): +@pytest.mark.parametrize("graph_type", [Graph, PersistentGraph]) +def test_edge_ingestion_equivalence(dataframes, graph_type): # reference graph - g_pd = Graph() + g_pd = graph_type() g_pd.load_edges_from_pandas( df=dataframes["pandas"]["edges"], time="timestamp", @@ -48,7 +49,7 @@ def test_edge_ingestion_equivalence(dataframes): ) # Pandas streaming - g_pd_stream = Graph() + g_pd_stream = graph_type() g_pd_stream.load_edges_from_df( data=dataframes["pandas"]["edges"], time="timestamp", @@ -60,7 +61,7 @@ def test_edge_ingestion_equivalence(dataframes): assert g_pd == g_pd_stream, "Pandas streaming edge ingestion failed equivalence check" # Polars - g_pl = Graph() + g_pl = graph_type() g_pl.load_edges_from_df( data=dataframes["polars"]["edges"], time="timestamp", @@ -72,7 +73,7 @@ def test_edge_ingestion_equivalence(dataframes): assert g_pd == g_pl, "Polars edge ingestion failed equivalence check" # Arrow - g_arrow = Graph() + g_arrow = graph_type() g_arrow.load_edges_from_df( data=dataframes["arrow"]["edges"], time="timestamp", @@ -84,7 +85,7 @@ def test_edge_ingestion_equivalence(dataframes): assert g_pd == g_arrow, "Arrow edge ingestion failed equivalence check" # DuckDB - g_duckdb = Graph() + g_duckdb = graph_type() g_duckdb.load_edges_from_df( data=dataframes["duckdb"]["edges"], time="timestamp", @@ -97,7 +98,7 @@ def test_edge_ingestion_equivalence(dataframes): if fpd: # FireDucks - g_fd = Graph() + g_fd = graph_type() g_fd.load_edges_from_df( data=dataframes["fireducks"]["edges"], time="timestamp", @@ -109,9 +110,10 @@ def test_edge_ingestion_equivalence(dataframes): assert g_pd == g_fd, "FireDucks edge ingestion failed equivalence check" -def test_node_ingestion_equivalence(dataframes): +@pytest.mark.parametrize("graph_type", [Graph, PersistentGraph]) +def test_node_ingestion_equivalence(dataframes, graph_type): # reference graph - g_pd = Graph() + g_pd = graph_type() g_pd.load_nodes_from_pandas( df=dataframes["pandas"]["nodes"], time="timestamp", @@ -121,7 +123,7 @@ def test_node_ingestion_equivalence(dataframes): ) # Pandas streaming - g_pd_stream = Graph() + g_pd_stream = graph_type() g_pd_stream.load_nodes_from_df( data=dataframes["pandas"]["nodes"], time="timestamp", @@ -132,7 +134,7 @@ def test_node_ingestion_equivalence(dataframes): assert g_pd == g_pd_stream, "Pandas streaming node ingestion failed equivalence check" # Polars - g_pl = Graph() + g_pl = graph_type() g_pl.load_nodes_from_df( data=dataframes["polars"]["nodes"], time="timestamp", @@ -143,7 +145,7 @@ def test_node_ingestion_equivalence(dataframes): assert g_pd == g_pl, "Polars node ingestion failed equivalence check" # Arrow - g_arrow = Graph() + g_arrow = graph_type() g_arrow.load_nodes_from_df( data=dataframes["arrow"]["nodes"], time="timestamp", @@ -154,7 +156,7 @@ def test_node_ingestion_equivalence(dataframes): assert g_pd == g_arrow, "Arrow node ingestion failed equivalence check" # DuckDB - g_duckdb = Graph() + g_duckdb = graph_type() g_duckdb.load_nodes_from_df( data=dataframes["duckdb"]["nodes"], time="timestamp", @@ -167,7 +169,7 @@ def test_node_ingestion_equivalence(dataframes): if fpd: # FireDucks print("Testing fireducks...") - g_fd = Graph() + g_fd = graph_type() g_fd.load_nodes_from_df( data=dataframes["fireducks"]["nodes"], time="timestamp", @@ -177,9 +179,10 @@ def test_node_ingestion_equivalence(dataframes): ) assert g_pd == g_fd, "FireDucks node ingestion failed equivalence check" -def test_metadata_update_equivalence(dataframes): +@pytest.mark.parametrize("graph_type", [Graph, PersistentGraph]) +def test_metadata_update_equivalence(dataframes, graph_type): # reference graph - g_pd = Graph() + g_pd = graph_type() g_pd.load_edges_from_pandas( df=dataframes["pandas"]["edges"], time="timestamp", @@ -205,7 +208,7 @@ def test_metadata_update_equivalence(dataframes): ) # Pandas streaming - g_pd_stream = Graph() + g_pd_stream = graph_type() g_pd_stream.load_edges_from_df( data=dataframes["pandas"]["edges"], time="timestamp", @@ -232,7 +235,7 @@ def test_metadata_update_equivalence(dataframes): assert g_pd == g_pd_stream, "Pandas streaming metadata ingestion failed equivalence check" # Polars - g_pl = Graph() + g_pl = graph_type() g_pl.load_edges_from_df( data=dataframes["polars"]["edges"], time="timestamp", @@ -259,7 +262,7 @@ def test_metadata_update_equivalence(dataframes): assert g_pd == g_pl, "Polars metadata ingestion failed equivalence check" # Arrow - g_arrow = Graph() + g_arrow = graph_type() g_arrow.load_edges_from_df( data=dataframes["arrow"]["edges"], time="timestamp", @@ -286,7 +289,7 @@ def test_metadata_update_equivalence(dataframes): assert g_pd == g_arrow, "Arrow metadata ingestion failed equivalence check" # DuckDB - g_duckdb = Graph() + g_duckdb = graph_type() g_duckdb.load_edges_from_df( data=dataframes["duckdb"]["edges"], time="timestamp", @@ -314,7 +317,7 @@ def test_metadata_update_equivalence(dataframes): if fpd: # FireDucks - g_fd = Graph() + g_fd = graph_type() g_fd.load_edges_from_df( data=dataframes["fireducks"]["edges"], time="timestamp", diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py new file mode 100644 index 0000000000..a1514aae39 --- /dev/null +++ b/python/tests/test_load_from_df.py @@ -0,0 +1,81 @@ +import polars as pl +from raphtory import Graph, PersistentGraph +import pytest +try: + import fireducks.pandas as fpd +except ModuleNotFoundError: + fpd = None + +@pytest.mark.parametrize("graph_type", [Graph, PersistentGraph]) +def test_load_edges_from_polars_df(graph_type): + df = pl.DataFrame( + { + "time": [1, 2, 3], + "src": [1, 2, 3], + "dst": [2, 3, 4], + "value": [10.0, 20.0, 30.0], + } + ) + + g_to_pandas = graph_type() + g_to_pandas.load_edges_from_pandas(df=df.to_pandas(), time="time", src="src", dst="dst", properties=["value"]) + + g_from_df = graph_type() + g_from_df.load_edges_from_df(data=df, time="time", src="src", dst="dst", properties=["value"]) + + expected = [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] + assert _collect_edges(g_to_pandas) == _collect_edges(g_from_df) + assert _collect_edges(g_to_pandas) == expected + assert _collect_edges(g_from_df) == expected + +if fpd: + import pandas + def _collect_edges(g: Graph): + return sorted((e.history()[0], e.src.id, e.dst.id, e["value"]) for e in g.edges) + + @pytest.mark.parametrize("graph_type", [Graph, PersistentGraph]) + def test_load_edges_from_fireducks_df(graph_type): + # FireDucks DataFrame (pandas-compatible API) + df = fpd.DataFrame( + { + "time": [1, 2, 3], + "src": [1, 2, 3], + "dst": [2, 3, 4], + "value": [10.0, 20.0, 30.0], + } + ) + + g = graph_type() + g.load_edges_from_df(data=df, time="time", src="src", dst="dst", properties=["value"]) + assert [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] == _collect_edges(g) + + @pytest.mark.parametrize("graph_type", [Graph, PersistentGraph]) + def test_fireducks_matches_pandas_for_same_edges(graph_type): + df_fireducks = fpd.DataFrame( + { + "time": [1, 2, 3], + "src": [1, 2, 3], + "dst": [2, 3, 4], + "value": [10.0, 20.0, 30.0], + } + ) + df_pandas = pandas.DataFrame( + { + "time": [1, 2, 3], + "src": [1, 2, 3], + "dst": [2, 3, 4], + "value": [10.0, 20.0, 30.0], + } + ) + + g_fireducks = graph_type() + g_fireducks.load_edges_from_df(data=df_fireducks, time="time", src="src", dst="dst", properties=["value"]) + + g_pandas = graph_type() + g_pandas.load_edges_from_pandas(df=df_pandas, time="time", src="src", dst="dst", properties=["value"]) + + expected = [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] + + assert _collect_edges(g_fireducks) == _collect_edges(g_pandas) + assert _collect_edges(g_fireducks) == expected + assert _collect_edges(g_pandas) == expected \ No newline at end of file diff --git a/python/tests/test_load_from_fireducks.py b/python/tests/test_load_from_fireducks.py deleted file mode 100644 index b013a353c3..0000000000 --- a/python/tests/test_load_from_fireducks.py +++ /dev/null @@ -1,55 +0,0 @@ -try: - import fireducks.pandas as fpd -except ModuleNotFoundError: - fpd = None - -if fpd: - import pandas - from raphtory import Graph - def _collect_edges(g: Graph): - return sorted((e.history()[0], e.src.id, e.dst.id, e["value"]) for e in g.edges) - - def test_load_edges_from_fireducks_df(): - # FireDucks DataFrame (pandas-compatible API) - df = fpd.DataFrame( - { - "time": [1, 2, 3], - "src": [1, 2, 3], - "dst": [2, 3, 4], - "value": [10.0, 20.0, 30.0], - } - ) - - g: Graph = Graph() - g.load_edges_from_pandas(df=df, time="time", src="src", dst="dst", properties=["value"]) - assert [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] == _collect_edges(g) - - def test_fireducks_matches_pandas_for_same_edges(): - df_fireducks = fpd.DataFrame( - { - "time": [1, 2, 3], - "src": [1, 2, 3], - "dst": [2, 3, 4], - "value": [10.0, 20.0, 30.0], - } - ) - df_pandas = pandas.DataFrame( - { - "time": [1, 2, 3], - "src": [1, 2, 3], - "dst": [2, 3, 4], - "value": [10.0, 20.0, 30.0], - } - ) - - g_fireducks: Graph = Graph() - g_fireducks.load_edges_from_pandas(df=df_fireducks, time="time", src="src", dst="dst", properties=["value"]) - - g_pandas = Graph() - g_pandas.load_edges_from_pandas(df=df_pandas, time="time", src="src", dst="dst", properties=["value"]) - - expected = [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] - - assert _collect_edges(g_fireducks) == _collect_edges(g_pandas) - assert _collect_edges(g_fireducks) == expected - assert _collect_edges(g_pandas) == expected \ No newline at end of file diff --git a/python/tests/test_load_from_polars.py b/python/tests/test_load_from_polars.py deleted file mode 100644 index 104e3a91b3..0000000000 --- a/python/tests/test_load_from_polars.py +++ /dev/null @@ -1,53 +0,0 @@ -import polars as pl -from raphtory import Graph -import pytest - -def _collect_edges(g: Graph): - return sorted((e.history()[0], e.src.id, e.dst.id, e["value"]) for e in g.edges) - -def test_load_edges_from_polars_df_error(): - df = pl.DataFrame( - { - "time": [1, 2, 3], - "src": [1, 2, 3], - "dst": [2, 3, 4], - "value": [10.0, 20.0, 30.0], - } - ) - - g = Graph() - with pytest.raises(Exception) as e: - # Current loader expects a pandas DataFrame; this will fail in pyarrow.Table.from_pandas - g.load_edges_from_pandas(df=df, time="time", src="src", dst="dst", properties=["value"]) - - print(f"\nCaptured error: {str(e.value)}") - -def test_load_edges_from_polars_df_via_to_pandas(): - df = pl.DataFrame( - { - "time": [1, 2, 3], - "src": [1, 2, 3], - "dst": [2, 3, 4], - "value": [10.0, 20.0, 30.0], - } - ) - - g = Graph() - g.load_edges_from_pandas(df=df.to_pandas(), time="time", src="src", dst="dst", properties=["value"]) - expected = [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] - assert _collect_edges(g) == expected - -def test_load_edges_from_polars_df(): - df = pl.DataFrame( - { - "time": [1, 2, 3], - "src": [1, 2, 3], - "dst": [2, 3, 4], - "value": [10.0, 20.0, 30.0], - } - ) - - g = Graph() - g.load_edges_from_df(data=df, time="time", src="src", dst="dst", properties=["value"]) - expected = [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] - assert _collect_edges(g) == expected \ No newline at end of file From 8e4e66b05646812dd8123e333db3eedf5638d57e Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 28 Nov 2025 10:39:00 -0500 Subject: [PATCH 24/55] Fixed bug in tests --- python/tests/test_load_from_df.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index a1514aae39..fed8651058 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -6,6 +6,9 @@ except ModuleNotFoundError: fpd = None +def _collect_edges(g: Graph): + return sorted((e.history()[0], e.src.id, e.dst.id, e["value"]) for e in g.edges) + @pytest.mark.parametrize("graph_type", [Graph, PersistentGraph]) def test_load_edges_from_polars_df(graph_type): df = pl.DataFrame( @@ -30,8 +33,6 @@ def test_load_edges_from_polars_df(graph_type): if fpd: import pandas - def _collect_edges(g: Graph): - return sorted((e.history()[0], e.src.id, e.dst.id, e["value"]) for e in g.edges) @pytest.mark.parametrize("graph_type", [Graph, PersistentGraph]) def test_load_edges_from_fireducks_df(graph_type): From 446064f2be0da342a5bc6a733e6cb123a5cd7e6b Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Tue, 2 Dec 2025 15:58:42 -0500 Subject: [PATCH 25/55] Removed btc dataset benchmarks --- dataset_tests/flatten_btc_datasets.py | 38 ---- dataset_tests/ingestion_benchmarks.py | 162 ------------------ .../ingestion_equivalence_assertions.py | 66 ------- 3 files changed, 266 deletions(-) delete mode 100644 dataset_tests/flatten_btc_datasets.py delete mode 100644 dataset_tests/ingestion_benchmarks.py delete mode 100644 dataset_tests/ingestion_equivalence_assertions.py diff --git a/dataset_tests/flatten_btc_datasets.py b/dataset_tests/flatten_btc_datasets.py deleted file mode 100644 index 0254c0722f..0000000000 --- a/dataset_tests/flatten_btc_datasets.py +++ /dev/null @@ -1,38 +0,0 @@ -from pathlib import Path -import pandas as pd - -FLATTENED_FILE = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/flattened_data.parquet" -DATASET_DIR = "/Users/arien/Downloads" - -def flatten_dataframes_append(): - dfs = [] - flattened_file = Path(FLATTENED_FILE) - dataset_dir = Path(DATASET_DIR) - - if flattened_file.exists(): - dfs.append(pd.read_parquet(flattened_file)) - - def get_addr(v): - if v is not None: - return v[0]["address"] - files = list(dataset_dir.glob("*.snappy.parquet")) - num_files = len(files) - for i in range(num_files): - fp = files[i] - print(f"Processing file {i}/{num_files}: {fp}") - df = pd.read_parquet(fp) - df = pd.DataFrame({ - "block_timestamp": df["block_timestamp"], - "inputs_address": df["inputs"].apply(get_addr), - "outputs_address": df["outputs"].apply(get_addr), - }) - df = df.dropna(subset=["block_timestamp", "inputs_address", "outputs_address"]) - dfs.append(df) - - out = pd.concat(dfs, ignore_index=True) - print(f"Total: {len(out)} rows") - out.to_parquet(FLATTENED_FILE, index=False, compression="snappy") - - -if __name__ == "__main__": - flatten_dataframes_append() \ No newline at end of file diff --git a/dataset_tests/ingestion_benchmarks.py b/dataset_tests/ingestion_benchmarks.py deleted file mode 100644 index 3dc44fe27e..0000000000 --- a/dataset_tests/ingestion_benchmarks.py +++ /dev/null @@ -1,162 +0,0 @@ -import gc -import time - -import pandas as pd -import polars as pl -import duckdb -import fireducks.pandas as fpd -from pyarrow import RecordBatchReader - -from raphtory import Graph - -FLATTENED_FILE = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/flattened_data.parquet" - -def bench_pandas(df: pd.DataFrame) -> float: - g = Graph() - start = time.perf_counter() - g.load_edges_from_pandas(df=df, time="block_timestamp", src="inputs_address", dst="outputs_address") - total = time.perf_counter() - start - print(f"[pandas] ingestion took {total:.3f}s for {len(df)} rows, edges: {len(g.edges)}, exploded edges: {len(g.edges.explode())}") - del g - gc.collect() - return total - -def bench_pandas_streaming(df: pd.DataFrame) -> float: - g = Graph() - start = time.perf_counter() - g.load_edges_from_df(data=df, time="block_timestamp", src="inputs_address", dst="outputs_address") - total = time.perf_counter() - start - print(f"[pandas streaming] ingestion took {total:.3f}s") - del g - gc.collect() - return total - -def bench_fire_ducks_pandas_streaming(df: fpd.frame.DataFrame) -> float: - assert "fireducks.pandas.frame.DataFrame" in str(type(df)) - g = Graph() - start = time.perf_counter() - g.load_edges_from_df(data=df, time="block_timestamp", src="inputs_address", dst="outputs_address") - total = time.perf_counter() - start - print(f"[fireducks streaming] ingestion took {total:.3f}s") - del g - gc.collect() - return total - -def bench_polars_streaming(df: pl.DataFrame) -> float: - g = Graph() - start = time.perf_counter() - g.load_edges_from_df(data=df, time="block_timestamp", src="inputs_address", dst="outputs_address") - total = time.perf_counter() - start - print(f"[polars streaming] ingestion took {total:.3f}s") - del g - gc.collect() - return total - -def bench_arrow_streaming(df: pl.DataFrame) -> float: - g = Graph() - df_arrow_from_pl = df.to_arrow() - start = time.perf_counter() - g.load_edges_from_df(data=df_arrow_from_pl, time="block_timestamp", src="inputs_address", dst="outputs_address") - total = time.perf_counter() - start - print(f"[arrow streaming] ingestion took {total:.3f}s") - del g, df_arrow_from_pl - gc.collect() - return total - -def bench_duckdb_streaming(df: pl.DataFrame) -> float: - g = Graph() - df_arrow_from_pl = df.to_arrow() - duckdb_df = duckdb.sql("SELECT * FROM df_arrow_from_pl") - start = time.perf_counter() - # uses the __arrow_c_stream__() interface internally - g.load_edges_from_df(data=duckdb_df, time="block_timestamp", src="inputs_address", dst="outputs_address") - total = time.perf_counter() - start - print(f"[duckdb streaming] ingestion took {total:.3f}s") - del g, df_arrow_from_pl, duckdb_df - gc.collect() - return total - -def bench_duckdb_reader(df: pl.DataFrame) -> float: - g = Graph() - df_arrow_from_pl = df.to_arrow() - # RecordBatchReader doesn't implement __len__(), should still work but the loading bar doesn't display progress - duckdb_df: RecordBatchReader = duckdb.sql("SELECT * FROM df_arrow_from_pl").arrow() - start = time.perf_counter() - # uses the __arrow_c_stream__() interface internally - g.load_edges_from_df(data=duckdb_df, time="block_timestamp", src="inputs_address", dst="outputs_address") - total = time.perf_counter() - start - print(f"[duckdb streaming] ingestion took {total:.3f}s") - del g, df_arrow_from_pl, duckdb_df - gc.collect() - return total - - -def ingestion_speed_btc_dataset(): - df_pd: pd.DataFrame = pd.read_parquet(FLATTENED_FILE) - df_fireducks: fpd.frame.DataFrame = fpd.read_parquet(FLATTENED_FILE) - df_pl: pl.DataFrame = pl.read_parquet(FLATTENED_FILE) - - pandas_ingestion_times = [] - pandas_streaming_ingestion_times = [] - fireducks_streaming_ingestion_times = [] - polars_streaming_ingestion_times = [] - arrow_streaming_ingestion_times = [] - duckdb_streaming_ingestion_times = [] - duckdb_reader_ingestion_times = [] - - for _ in range(5): - # 1.1) Pandas ingestion - pandas_time = bench_pandas(df_pd) - pandas_ingestion_times.append(pandas_time) - gc.collect() - - # 1.2) Pandas ingestion streaming - pandas_streaming_time = bench_pandas_streaming(df_pd) - pandas_streaming_ingestion_times.append(pandas_streaming_time) - gc.collect() - - # 2) Fireducks Pandas ingestion streaming - fpd_streaming_time = bench_fire_ducks_pandas_streaming(df_fireducks) - fireducks_streaming_ingestion_times.append(fpd_streaming_time) - gc.collect() - - # 3) Polars ingestion streaming - polars_streaming_time = bench_polars_streaming(df=df_pl) - polars_streaming_ingestion_times.append(polars_streaming_time) - gc.collect() - - # 4) Arrow ingestion streaming - arrow_streaming_time = bench_arrow_streaming(df_pl) - arrow_streaming_ingestion_times.append(arrow_streaming_time) - gc.collect() - - # 5) DuckDB streaming ingestion - duckdb_streaming_time = bench_duckdb_streaming(df_pl) - duckdb_streaming_ingestion_times.append(duckdb_streaming_time) - gc.collect() - - # 6) DuckDB RecordBatchReader ingestion - # RecordBatchReader doesn't implement __len__(), should still work but the loading bar doesn't display progress - duckdb_reader_time = bench_duckdb_reader(df_pl) - duckdb_reader_ingestion_times.append(duckdb_reader_time) - gc.collect() - - formatted_pandas = [f"{num:.3f}s" for num in pandas_ingestion_times] - formatted_pandas_streaming = [f"{num:.3f}s" for num in pandas_streaming_ingestion_times] - formatted_fireducks_streaming = [f"{num:.3f}s" for num in fireducks_streaming_ingestion_times] - formatted_polars_streaming = [f"{num:.3f}s" for num in polars_streaming_ingestion_times] - formatted_arrow_streaming = [f"{num:.3f}s" for num in arrow_streaming_ingestion_times] - formatted_duckdb_streaming = [f"{num:.3f}s" for num in duckdb_streaming_ingestion_times] - formatted_duckdb_reader = [f"{num:.3f}s" for num in duckdb_reader_ingestion_times] - - print(f"Pandas: {formatted_pandas}") - print(f"Pandas streaming: {formatted_pandas_streaming}") - print(f"Fireducks streaming: {formatted_fireducks_streaming}") - print(f"Polars streaming: {formatted_polars_streaming}") - print(f"Arrow streaming: {formatted_arrow_streaming}") - print(f"DuckDB streaming: {formatted_duckdb_streaming}") - print(f"DuckDB RecordBatchReader: {formatted_duckdb_reader}") - - -if __name__ == "__main__": - ingestion_speed_btc_dataset() \ No newline at end of file diff --git a/dataset_tests/ingestion_equivalence_assertions.py b/dataset_tests/ingestion_equivalence_assertions.py deleted file mode 100644 index 2e16a2426a..0000000000 --- a/dataset_tests/ingestion_equivalence_assertions.py +++ /dev/null @@ -1,66 +0,0 @@ -import gc - -import duckdb - -from raphtory import Graph -import pandas as pd -import polars as pl -import fireducks.pandas as fpd - -FLATTENED_FILE = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/flattened_data.parquet" - -if __name__ == "__main__": - df_pd = pd.read_parquet(FLATTENED_FILE) - g_pandas = Graph() - g_pandas.load_edges_from_pandas( - df=df_pd, time="block_timestamp", src="inputs_address", dst="outputs_address" - ) - - df_fireducks: fpd.frame.DataFrame = fpd.read_parquet(FLATTENED_FILE) - g_fireducks = Graph() - g_fireducks.load_edges_from_fireducks(df=df_fireducks, time="block_timestamp", src="inputs_address", dst="outputs_address") - print("Checking equality...") - assert g_pandas == g_fireducks - print("g_pandas == g_fireducks") - del df_fireducks, g_fireducks - gc.collect() - - df_pl = pl.read_parquet(FLATTENED_FILE) - g_polars = Graph() - g_polars.load_edges_from_polars( - df=df_pl, time="block_timestamp", src="inputs_address", dst="outputs_address" - ) - - print("Checking equality...") - assert g_pandas == g_polars - print("g_pandas == g_polars") - - df_pl_arrow = df_pl.to_arrow() - g_polars_arrow = Graph() - g_polars_arrow.load_edges_from_arrow(df=df_pl_arrow, time="block_timestamp", src="inputs_address", dst="outputs_address") - print("Checking equality...") - assert g_pandas == g_polars_arrow - print("g_pandas == g_polars_arrow") - del g_polars_arrow - gc.collect() - - g_polars_arrow_streaming = Graph() - g_polars_arrow_streaming.load_edges_from_arrow_streaming(df=df_pl_arrow, time="block_timestamp", src="inputs_address", dst="outputs_address") - print("Checking equality...") - assert g_pandas == g_polars_arrow_streaming - print("g_pandas == g_polars_arrow_streaming") - del g_polars_arrow_streaming - gc.collect() - - g_duckdb = Graph() - duckdb_results = duckdb.sql("SELECT * FROM df_pl_arrow") - g_duckdb.load_edges_from_duckdb(df=duckdb_results, time="block_timestamp", src="inputs_address", dst="outputs_address") - print("Checking equality...") - assert g_pandas == g_duckdb - print("g_pandas == g_duckdb") - - g_duckdb_streaming = Graph() - g_duckdb_streaming.load_edges_from_duckdb_streaming(df=duckdb_results, time="block_timestamp", src="inputs_address", dst="outputs_address") - print("Checking equality...") - assert g_pandas == g_duckdb_streaming - print("g_pandas == g_duckdb_streaming") \ No newline at end of file From 0ba3e5a7dba9291e4eb8fae1a01cfd247e752a0a Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Tue, 2 Dec 2025 17:17:11 -0500 Subject: [PATCH 26/55] Merge cleanup and fixing python docs errors --- python/python/raphtory/__init__.pyi | 84 ++++---- python/python/raphtory/graphql/__init__.pyi | 180 ++++-------------- python/python/raphtory/vectors/__init__.pyi | 59 ++---- raphtory/src/python/graph/graph.rs | 36 ++-- .../src/python/graph/graph_with_deletions.rs | 48 ++--- 5 files changed, 139 insertions(+), 268 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 23e5903f28..26c99411bb 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1165,8 +1165,8 @@ class Graph(GraphView): properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): A value to use as the layer for all edges. Defaults to None. Cannot be used in combination with layer_col. - layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. Cannot be used in combination with layer. + layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + layer_col (str, optional): The edge layer column name in a dataframe. Cannot be used in combination with layer. Defaults to None. Returns: None: This function does not return a value if the operation is successful. @@ -1187,8 +1187,8 @@ class Graph(GraphView): properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) - layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1209,8 +1209,8 @@ class Graph(GraphView): properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) - layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1240,8 +1240,8 @@ class Graph(GraphView): Arguments: data (Any): The data source containing node information. id(str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. - node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. + node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -1259,8 +1259,8 @@ class Graph(GraphView): Arguments: df (DataFrame): The Pandas DataFrame containing node information. id(str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -1278,8 +1278,8 @@ class Graph(GraphView): Arguments: parquet_path (str): Parquet file or directory of Parquet files path containing node information. id(str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -1300,8 +1300,8 @@ class Graph(GraphView): data (Any): The data source containing the nodes. time (str): The column name for the timestamps. id (str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. - node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. + node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -1321,8 +1321,8 @@ class Graph(GraphView): df (DataFrame): The Pandas DataFrame containing the nodes. time (str): The column name for the timestamps. id (str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -1342,8 +1342,8 @@ class Graph(GraphView): parquet_path (str): Parquet file or directory of Parquet files containing the nodes time (str): The column name for the timestamps. id (str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -1822,8 +1822,8 @@ class PersistentGraph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - layer (str, optional): A value to use as the layer for all edges. Defaults to None. Cannot be used in combination with layer_col. - layer_col (str, optional): The edge layer col name in the data source. Defaults to None. Cannot be used in combination with layer. + layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + layer_col (str, optional): The edge layer col name in the data source. Cannot be used in combination with layer. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1841,8 +1841,8 @@ class PersistentGraph(GraphView): time (str): The column name for the update timestamps. src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. - layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) - layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1860,8 +1860,8 @@ class PersistentGraph(GraphView): src (str): The column name for the source node ids. dst (str): The column name for the destination node ids. time (str): The column name for the update timestamps. - layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) - layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1946,8 +1946,8 @@ class PersistentGraph(GraphView): properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): A value to use as the layer for all edges. Defaults to None. Cannot be used in combination with layer_col. - layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. Cannot be used in combination with layer. + layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + layer_col (str, optional): The edge layer column name in a dataframe. Cannot be used in combination with layer. Defaults to None. Returns: None: This function does not return a value if the operation is successful. @@ -1968,8 +1968,8 @@ class PersistentGraph(GraphView): properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) - layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1990,8 +1990,8 @@ class PersistentGraph(GraphView): properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) - layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -2021,8 +2021,8 @@ class PersistentGraph(GraphView): Arguments: data (Any): The data source containing node information. id(str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. - node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. + node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -2040,8 +2040,8 @@ class PersistentGraph(GraphView): Arguments: df (DataFrame): The Pandas DataFrame containing node information. id(str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -2059,8 +2059,8 @@ class PersistentGraph(GraphView): Arguments: parquet_path (str): Parquet file or directory of Parquet files path containing node information. id(str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -2081,8 +2081,8 @@ class PersistentGraph(GraphView): data (Any): The data source containing the nodes. time (str): The column name for the timestamps. id (str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. - node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. + node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -2102,8 +2102,8 @@ class PersistentGraph(GraphView): df (DataFrame): The Pandas DataFrame containing the nodes. time (str): The column name for the timestamps. id (str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -2123,8 +2123,8 @@ class PersistentGraph(GraphView): parquet_path (str): Parquet file or directory of Parquet files containing the nodes time (str): The column name for the timestamps. id (str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. diff --git a/python/python/raphtory/graphql/__init__.pyi b/python/python/raphtory/graphql/__init__.pyi index 84808a34db..474481b6f0 100644 --- a/python/python/raphtory/graphql/__init__.pyi +++ b/python/python/raphtory/graphql/__init__.pyi @@ -23,26 +23,8 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = [ - "GraphServer", - "RunningGraphServer", - "RaphtoryClient", - "RemoteGraph", - "RemoteEdge", - "RemoteNode", - "RemoteNodeAddition", - "RemoteUpdate", - "RemoteEdgeAddition", - "RemoteIndexSpec", - "PropsInput", - "SomePropertySpec", - "AllPropertySpec", - "encode_graph", - "decode_graph", - "schema", -] - -class GraphServer(object): +__all__ = ['GraphServer', 'RunningGraphServer', 'RaphtoryClient', 'RemoteGraph', 'RemoteEdge', 'RemoteNode', 'RemoteNodeAddition', 'RemoteUpdate', 'RemoteEdgeAddition', 'RemoteIndexSpec', 'PropsInput', 'SomePropertySpec', 'AllPropertySpec', 'encode_graph', 'decode_graph', 'schema'] +class GraphServer(object): """ A class for defining and running a Raphtory GraphQL server @@ -61,22 +43,7 @@ class GraphServer(object): create_index: """ - def __new__( - cls, - work_dir: str | PathLike, - cache_capacity: Optional[int] = None, - cache_tti_seconds: Optional[int] = None, - log_level: Optional[str] = None, - tracing: Optional[bool] = None, - tracing_level=None, - otlp_agent_host: Optional[str] = None, - otlp_agent_port: Optional[str] = None, - otlp_tracing_service_name: Optional[str] = None, - auth_public_key: Any = None, - auth_enabled_for_reads: Any = None, - config_path: Optional[str | PathLike] = None, - create_index: Any = None, - ) -> GraphServer: + def __new__(cls, work_dir: str | PathLike, cache_capacity: Optional[int] = None, cache_tti_seconds: Optional[int] = None, log_level: Optional[str] = None, tracing: Optional[bool] = None, tracing_level=None, otlp_agent_host: Optional[str] = None, otlp_agent_port: Optional[str] = None, otlp_tracing_service_name: Optional[str] = None, auth_public_key: Any = None, auth_enabled_for_reads: Any = None, config_path: Optional[str | PathLike] = None, create_index: Any = None) -> GraphServer: """Create and return a new object. See help(type) for accurate signature.""" def run(self, port: int = 1736, timeout_ms: int = 180000) -> None: @@ -91,13 +58,7 @@ class GraphServer(object): None: """ - def set_embeddings( - self, - cache: str, - embedding: Optional[Callable] = None, - nodes: bool | str = True, - edges: bool | str = True, - ) -> GraphServer: + def set_embeddings(self, cache: str, embedding: Optional[Callable] = None, nodes: bool | str = True, edges: bool | str = True) -> GraphServer: """ Setup the server to vectorise graphs with a default template. @@ -133,9 +94,7 @@ class GraphServer(object): GraphServer: The server with indexing disabled """ - def with_vectorised_graphs( - self, graph_names: list[str], nodes: bool | str = True, edges: bool | str = True - ) -> GraphServer: + def with_vectorised_graphs(self, graph_names: list[str], nodes: bool | str = True, edges: bool | str = True) -> GraphServer: """ Vectorise a subset of the graphs of the server. @@ -148,11 +107,15 @@ class GraphServer(object): GraphServer: A new server object containing the vectorised graphs. """ -class RunningGraphServer(object): +class RunningGraphServer(object): """A Raphtory server handler that also enables querying the server""" - def __enter__(self): ... - def __exit__(self, _exc_type, _exc_val, _exc_tb): ... + def __enter__(self): + ... + + def __exit__(self, _exc_type, _exc_val, _exc_tb): + ... + def get_client(self): """ Get the client for the server @@ -169,7 +132,7 @@ class RunningGraphServer(object): None: """ -class RaphtoryClient(object): +class RaphtoryClient(object): """ A client for handling GraphQL operations in the context of Raphtory. @@ -251,9 +214,7 @@ class RaphtoryClient(object): """ - def query( - self, query: str, variables: Optional[dict[str, Any]] = None - ) -> dict[str, Any]: + def query(self, query: str, variables: Optional[dict[str, Any]] = None) -> dict[str, Any]: """ Make a GraphQL query against the server. @@ -291,9 +252,7 @@ class RaphtoryClient(object): """ - def send_graph( - self, path: str, graph: Graph | PersistentGraph, overwrite: bool = False - ) -> dict[str, Any]: + def send_graph(self, path: str, graph: Graph | PersistentGraph, overwrite: bool = False) -> dict[str, Any]: """ Send a graph to the server @@ -306,9 +265,7 @@ class RaphtoryClient(object): dict[str, Any]: The data field from the graphQL response after executing the mutation. """ - def upload_graph( - self, path: str, file_path: str, overwrite: bool = False - ) -> dict[str, Any]: + def upload_graph(self, path: str, file_path: str, overwrite: bool = False) -> dict[str, Any]: """ Upload graph file from a path file_path on the client @@ -321,15 +278,9 @@ class RaphtoryClient(object): dict[str, Any]: The data field from the graphQL response after executing the mutation. """ -class RemoteGraph(object): - def add_edge( - self, - timestamp: int | str | datetime, - src: str | int, - dst: str | int, - properties: Optional[dict] = None, - layer: Optional[str] = None, - ) -> RemoteEdge: +class RemoteGraph(object): + + def add_edge(self, timestamp: int | str | datetime, src: str | int, dst: str | int, properties: Optional[dict] = None, layer: Optional[str] = None) -> RemoteEdge: """ Adds a new edge with the given source and destination nodes and properties to the remote graph. @@ -366,13 +317,7 @@ class RemoteGraph(object): None: """ - def add_node( - self, - timestamp: int | str | datetime, - id: str | int, - properties: Optional[dict] = None, - node_type: Optional[str] = None, - ) -> RemoteNode: + def add_node(self, timestamp: int | str | datetime, id: str | int, properties: Optional[dict] = None, node_type: Optional[str] = None) -> RemoteNode: """ Adds a new node with the given id and properties to the remote graph. @@ -409,13 +354,7 @@ class RemoteGraph(object): None: """ - def create_node( - self, - timestamp: int | str | datetime, - id: str | int, - properties: Optional[dict] = None, - node_type: Optional[str] = None, - ) -> RemoteNode: + def create_node(self, timestamp: int | str | datetime, id: str | int, properties: Optional[dict] = None, node_type: Optional[str] = None) -> RemoteNode: """ Create a new node with the given id and properties to the remote graph and fail if the node already exists. @@ -429,13 +368,7 @@ class RemoteGraph(object): RemoteNode: the new remote node """ - def delete_edge( - self, - timestamp: int, - src: str | int, - dst: str | int, - layer: Optional[str] = None, - ) -> RemoteEdge: + def delete_edge(self, timestamp: int, src: str | int, dst: str | int, layer: Optional[str] = None) -> RemoteEdge: """ Deletes an edge in the remote graph, given the timestamp, src and dst nodes and layer (optional) @@ -483,7 +416,7 @@ class RemoteGraph(object): None: """ -class RemoteEdge(object): +class RemoteEdge(object): """ A remote edge reference @@ -492,9 +425,7 @@ class RemoteEdge(object): and [RemoteGraph.delete_edge][raphtory.graphql.RemoteGraph.delete_edge]. """ - def add_metadata( - self, properties: dict[str, PropValue], layer: Optional[str] = None - ) -> None: + def add_metadata(self, properties: dict[str, PropValue], layer: Optional[str] = None) -> None: """ Add metadata to the edge within the remote graph. This function is used to add metadata to an edge that does not @@ -508,12 +439,7 @@ class RemoteEdge(object): None: """ - def add_updates( - self, - t: int | str | datetime, - properties: Optional[dict[str, PropValue]] = None, - layer: Optional[str] = None, - ) -> None: + def add_updates(self, t: int | str | datetime, properties: Optional[dict[str, PropValue]] = None, layer: Optional[str] = None) -> None: """ Add updates to an edge in the remote graph at a specified time. @@ -544,9 +470,7 @@ class RemoteEdge(object): GraphError: If the operation fails. """ - def update_metadata( - self, properties: dict[str, PropValue], layer: Optional[str] = None - ) -> None: + def update_metadata(self, properties: dict[str, PropValue], layer: Optional[str] = None) -> None: """ Update metadata of an edge in the remote graph overwriting existing values. This function is used to add properties to an edge that does not @@ -560,7 +484,8 @@ class RemoteEdge(object): None: """ -class RemoteNode(object): +class RemoteNode(object): + def add_metadata(self, properties: dict[str, PropValue]) -> None: """ Add metadata to a node in the remote graph. @@ -574,9 +499,7 @@ class RemoteNode(object): None: """ - def add_updates( - self, t: int | str | datetime, properties: Optional[dict[str, PropValue]] = None - ) -> None: + def add_updates(self, t: int | str | datetime, properties: Optional[dict[str, PropValue]] = None) -> None: """ Add updates to a node in the remote graph at a specified time. This function allows for the addition of property updates to a node within the graph. The updates are time-stamped, meaning they are applied at the specified time. @@ -614,7 +537,7 @@ class RemoteNode(object): None: """ -class RemoteNodeAddition(object): +class RemoteNodeAddition(object): """ Node addition update @@ -625,16 +548,10 @@ class RemoteNodeAddition(object): updates (list[RemoteUpdate], optional): the temporal updates """ - def __new__( - cls, - name: GID, - node_type: Optional[str] = None, - metadata: Optional[PropInput] = None, - updates: Optional[list[RemoteUpdate]] = None, - ) -> RemoteNodeAddition: + def __new__(cls, name: GID, node_type: Optional[str] = None, metadata: Optional[PropInput] = None, updates: Optional[list[RemoteUpdate]] = None) -> RemoteNodeAddition: """Create and return a new object. See help(type) for accurate signature.""" -class RemoteUpdate(object): +class RemoteUpdate(object): """ A temporal update @@ -643,12 +560,10 @@ class RemoteUpdate(object): properties (PropInput, optional): the properties for the update """ - def __new__( - cls, time: TimeInput, properties: Optional[PropInput] = None - ) -> RemoteUpdate: + def __new__(cls, time: TimeInput, properties: Optional[PropInput] = None) -> RemoteUpdate: """Create and return a new object. See help(type) for accurate signature.""" -class RemoteEdgeAddition(object): +class RemoteEdgeAddition(object): """ An edge update @@ -660,17 +575,10 @@ class RemoteEdgeAddition(object): updates (list[RemoteUpdate], optional): the temporal updates for the edge """ - def __new__( - cls, - src: GID, - dst: GID, - layer: Optional[str] = None, - metadata: Optional[PropInput] = None, - updates: Optional[list[RemoteUpdate]] = None, - ) -> RemoteEdgeAddition: + def __new__(cls, src: GID, dst: GID, layer: Optional[str] = None, metadata: Optional[PropInput] = None, updates: Optional[list[RemoteUpdate]] = None) -> RemoteEdgeAddition: """Create and return a new object. See help(type) for accurate signature.""" -class RemoteIndexSpec(object): +class RemoteIndexSpec(object): """ Create a RemoteIndexSpec specifying which node and edge properties to index. @@ -682,7 +590,7 @@ class RemoteIndexSpec(object): def __new__(cls, node_props: PropsInput, edge_props: PropsInput) -> RemoteIndexSpec: """Create and return a new object. See help(type) for accurate signature.""" -class PropsInput(object): +class PropsInput(object): """ Create a PropsInput by choosing to include all/some properties explicitly. @@ -694,14 +602,10 @@ class PropsInput(object): ValueError: If neither all and some are specified. """ - def __new__( - cls, - all: Optional[AllPropertySpec] = None, - some: Optional[SomePropertySpec] = None, - ) -> PropsInput: + def __new__(cls, all: Optional[AllPropertySpec] = None, some: Optional[SomePropertySpec] = None) -> PropsInput: """Create and return a new object. See help(type) for accurate signature.""" -class SomePropertySpec(object): +class SomePropertySpec(object): """ Create a SomePropertySpec by explicitly listing metadata and/or temporal property names. @@ -710,12 +614,10 @@ class SomePropertySpec(object): properties (list[str]): Temporal property names. Defaults to []. """ - def __new__( - cls, metadata: list[str] = [], properties: list[str] = [] - ) -> SomePropertySpec: + def __new__(cls, metadata: list[str] = [], properties: list[str] = []) -> SomePropertySpec: """Create and return a new object. See help(type) for accurate signature.""" -class AllPropertySpec(object): +class AllPropertySpec(object): """ Specifies that **all** properties should be included when creating an index. Use one of the predefined variants: ALL , ALL_METADATA , or ALL_TEMPORAL . diff --git a/python/python/raphtory/vectors/__init__.pyi b/python/python/raphtory/vectors/__init__.pyi index 75e6b0e6d7..e2d8f648b1 100644 --- a/python/python/raphtory/vectors/__init__.pyi +++ b/python/python/raphtory/vectors/__init__.pyi @@ -23,17 +23,11 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ["VectorisedGraph", "Document", "Embedding", "VectorSelection"] - -class VectorisedGraph(object): +__all__ = ['VectorisedGraph', 'Document', 'Embedding', 'VectorSelection'] +class VectorisedGraph(object): """VectorisedGraph object that contains embedded documents that correspond to graph entities.""" - def edges_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> VectorSelection: + def edges_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: """ Perform a similarity search between each edge's associated document and a specified `query`. Returns a number of edges up to a specified `limit` ranked in descending order of similarity score. @@ -49,12 +43,7 @@ class VectorisedGraph(object): def empty_selection(self): """Return an empty selection of entities.""" - def entities_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> VectorSelection: + def entities_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: """ Perform a similarity search between each entity's associated document and a specified `query`. Returns a number of entities up to a specified `limit` ranked in descending order of similarity score. @@ -67,12 +56,7 @@ class VectorisedGraph(object): VectorSelection: The vector selection resulting from the search. """ - def nodes_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> VectorSelection: + def nodes_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> VectorSelection: """ Perform a similarity search between each node's associated document and a specified `query`. Returns a number of nodes up to a specified `limit` ranked in descending order of similarity score. @@ -85,7 +69,7 @@ class VectorisedGraph(object): VectorSelection: The vector selection resulting from the search. """ -class Document(object): +class Document(object): """A document corresponding to a graph entity. Used to generate embeddings.""" def __repr__(self): @@ -118,11 +102,13 @@ class Document(object): Optional[Any]: """ -class Embedding(object): +class Embedding(object): + def __repr__(self): """Return repr(self).""" -class VectorSelection(object): +class VectorSelection(object): + def add_edges(self, edges: list) -> None: """ Add all the documents associated with the specified `edges` to the current selection. @@ -168,9 +154,7 @@ class VectorSelection(object): list[Edge]: List of edges in the current selection. """ - def expand( - self, hops: int, window: Optional[Tuple[int | str, int | str]] = None - ) -> None: + def expand(self, hops: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: """ Add all the documents a specified number of `hops` away from the selection. @@ -187,12 +171,7 @@ class VectorSelection(object): None: """ - def expand_edges_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> None: + def expand_edges_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: """ Add the top `limit` adjacent edges with higher score for `query` to the selection @@ -207,12 +186,7 @@ class VectorSelection(object): None: """ - def expand_entities_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> None: + def expand_entities_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: """ Add the top `limit` adjacent entities with higher score for `query` to the selection @@ -235,12 +209,7 @@ class VectorSelection(object): None: """ - def expand_nodes_by_similarity( - self, - query: str | list, - limit: int, - window: Optional[Tuple[int | str, int | str]] = None, - ) -> None: + def expand_nodes_by_similarity(self, query: str | list, limit: int, window: Optional[Tuple[int | str, int | str]] = None) -> None: """ Add the top `limit` adjacent nodes with higher score for `query` to the selection diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 2cd41e2018..141c8af28c 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -639,8 +639,8 @@ impl PyGraph { /// data (Any): The data source containing the nodes. /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. - /// node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. + /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + /// node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -685,8 +685,8 @@ impl PyGraph { /// df (DataFrame): The Pandas DataFrame containing the nodes. /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -731,8 +731,8 @@ impl PyGraph { /// parquet_path (str): Parquet file or directory of Parquet files containing the nodes /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -784,8 +784,8 @@ impl PyGraph { /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. Cannot be used in combination with layer_col. - /// layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. Cannot be used in combination with layer. + /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + /// layer_col (str, optional): The edge layer column name in a dataframe. Cannot be used in combination with layer. Defaults to None. /// /// Returns: /// None: This function does not return a value if the operation is successful. @@ -833,8 +833,8 @@ impl PyGraph { /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) - /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + /// layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -882,8 +882,8 @@ impl PyGraph { /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) - /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + /// layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -929,8 +929,8 @@ impl PyGraph { /// Arguments: /// data (Any): The data source containing node information. /// id(str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. - /// node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. + /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + /// node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. /// @@ -968,8 +968,8 @@ impl PyGraph { /// Arguments: /// df (DataFrame): The Pandas DataFrame containing node information. /// id(str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. /// @@ -1007,8 +1007,8 @@ impl PyGraph { /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files path containing node information. /// id(str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. /// diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index 8129409e95..be1e957530 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -580,8 +580,8 @@ impl PyPersistentGraph { /// data (Any): The data source containing the nodes. /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. - /// node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. + /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + /// node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -626,8 +626,8 @@ impl PyPersistentGraph { /// df (DataFrame): The Pandas DataFrame containing the nodes. /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -670,8 +670,8 @@ impl PyPersistentGraph { /// parquet_path (str): Parquet file or directory of Parquet files containing the nodes /// time (str): The column name for the timestamps. /// id (str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. @@ -721,8 +721,8 @@ impl PyPersistentGraph { /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. Cannot be used in combination with layer_col. - /// layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. Cannot be used in combination with layer. + /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + /// layer_col (str, optional): The edge layer column name in a dataframe. Cannot be used in combination with layer. Defaults to None. /// /// Returns: /// None: This function does not return a value if the operation is successful. @@ -770,8 +770,8 @@ impl PyPersistentGraph { /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) - /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + /// layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -817,8 +817,8 @@ impl PyPersistentGraph { /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) - /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + /// layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -864,8 +864,8 @@ impl PyPersistentGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. Cannot be used in combination with layer_col. - /// layer_col (str, optional): The edge layer col name in the data source. Defaults to None. Cannot be used in combination with layer. + /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + /// layer_col (str, optional): The edge layer col name in the data source. Cannot be used in combination with layer. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -892,8 +892,8 @@ impl PyPersistentGraph { /// time (str): The column name for the update timestamps. /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. - /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) - /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + /// layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -920,8 +920,8 @@ impl PyPersistentGraph { /// src (str): The column name for the source node ids. /// dst (str): The column name for the destination node ids. /// time (str): The column name for the update timestamps. - /// layer (str, optional): A value to use as the layer for all edges. Defaults to None. (cannot be used in combination with layer_col) - /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. (cannot be used in combination with layer) + /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. + /// layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. @@ -957,8 +957,8 @@ impl PyPersistentGraph { /// Arguments: /// data (Any): The data source containing node information. /// id(str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. Cannot be used in combination with node_type_col. - /// node_type_col (str, optional): The node type column name in a dataframe. Defaults to None. Cannot be used in combination with node_type. + /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + /// node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. /// @@ -996,8 +996,8 @@ impl PyPersistentGraph { /// Arguments: /// df (DataFrame): The Pandas DataFrame containing node information. /// id(str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. /// @@ -1033,8 +1033,8 @@ impl PyPersistentGraph { /// Arguments: /// parquet_path (str): Parquet file or directory of Parquet files path containing node information. /// id(str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Defaults to None. (cannot be used in combination with node_type_col) - /// node_type_col (str, optional): The node type col name in dataframe. Defaults to None. (cannot be used in combination with node_type) + /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. /// From 4944687c6879056d1056a35af364b204558bd7ba Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Wed, 3 Dec 2025 23:34:29 -0500 Subject: [PATCH 27/55] Adding load_nodes function in python that can take any input from the following: object with __arrow_c_stream__(), parquet file/directory, csv file/directory. Added arrow-csv as a dependency to load from csv files. Added csv loading. --- Cargo.lock | 1 + Cargo.toml | 1 + python/python/raphtory/__init__.pyi | 3 + python/tests/test_load_from_df.py | 54 +++++ raphtory/Cargo.toml | 2 + raphtory/src/python/graph/graph.rs | 95 +++++++- raphtory/src/python/graph/io/arrow_loaders.rs | 225 +++++++++++++++--- 7 files changed, 351 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aeb86c0756..4db9d029f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4913,6 +4913,7 @@ version = "0.16.3" dependencies = [ "ahash", "arrow", + "arrow-csv", "arrow-json", "arroy", "async-openai", diff --git a/Cargo.toml b/Cargo.toml index 428064e7a1..bdf44ad457 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -162,6 +162,7 @@ parquet = { version = "56.2.0" } arrow-json = { version = "56.2.0" } arrow-buffer = { version = "56.2.0" } arrow-schema = { version = "56.2.0" } +arrow-csv = { version = "56.2.0" } arrow-array = { version = "56.2.0", features = ["chrono-tz"] } arrow-cast = { version = "56.2.0" } arrow-ipc = { version = "56.2.0" } diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 26c99411bb..27fc20fc22 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1290,6 +1290,9 @@ class Graph(GraphView): GraphError: If the operation fails. """ + def load_nodes(self, data, time, id, node_type=None, node_type_col=None, properties=None, metadata=None, shared_metadata=None): + ... + def load_nodes_from_df(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index fed8651058..295019b504 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -1,4 +1,7 @@ +from pathlib import Path + import polars as pl +import pandas as pd from raphtory import Graph, PersistentGraph import pytest try: @@ -31,6 +34,57 @@ def test_load_edges_from_polars_df(graph_type): assert _collect_edges(g_to_pandas) == expected assert _collect_edges(g_from_df) == expected +def test_different_data_sources(): + g = Graph() + file_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/subset/flattened_data_subset.parquet" + num_nodes_ingested = [] + + # test path string for file + g.load_nodes(data=file_path_str, time="block_timestamp", id="inputs_address") + num_nodes_ingested.append(len(g.nodes)) + del g + + # test Path object for file + file_path_obj = Path(file_path_str) + g = Graph() + g.load_nodes(data=file_path_obj, time="block_timestamp", id="inputs_address") + num_nodes_ingested.append(len(g.nodes)) + del g, file_path_obj + + # test path string for directory + dir_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/subset" + g = Graph() + g.load_nodes(data=dir_path_str, time="block_timestamp", id="inputs_address") + num_nodes_ingested.append(len(g.nodes)) + del g, dir_path_str + + # test Path object for directory + dir_path_obj = Path("/Users/arien/RustroverProjects/Raphtory/dataset_tests/subset") + g = Graph() + g.load_nodes(data=dir_path_obj, time="block_timestamp", id="inputs_address") + num_nodes_ingested.append(len(g.nodes)) + del g, dir_path_obj + + # test pandas + df_pd = pd.read_parquet(file_path_str) + g = Graph() + g.load_nodes(data=df_pd, time="block_timestamp", id="inputs_address") + num_nodes_ingested.append(len(g.nodes)) + del g, df_pd + + # test polars + df_pl = pl.read_parquet(file_path_str) + g = Graph() + g.load_nodes(data=df_pl, time="block_timestamp", id="inputs_address") + num_nodes_ingested.append(len(g.nodes)) + del g, df_pl + + # sanity check, make sure we ingested the same number of nodes each time + print(f"Number of tests ran: {len(num_nodes_ingested)}") + for i in range(len(num_nodes_ingested)-1): + assert num_nodes_ingested[0] == num_nodes_ingested[i+1] + + if fpd: import pandas diff --git a/raphtory/Cargo.toml b/raphtory/Cargo.toml index 66a566b191..d24dbe6fc8 100644 --- a/raphtory/Cargo.toml +++ b/raphtory/Cargo.toml @@ -69,6 +69,7 @@ prost-types = { workspace = true, optional = true } # arrow otional dependencies parquet = { workspace = true, optional = true } arrow-json = { workspace = true, optional = true } +arrow-csv = { workspace = true, optional = true} #arrow-array = { workspace = true, features = ["chrono-tz"], optional = true } #arrow-buffer = { workspace = true, optional = true } #arrow-cast = { workspace = true, optional = true } @@ -175,6 +176,7 @@ arrow = [ "raphtory-core/arrow", "dep:parquet", "dep:arrow-json", + "dep:arrow-csv", "dep:arrow", ] diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 141c8af28c..4da1d69825 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -9,7 +9,7 @@ use crate::{ api::view::internal::{DynamicGraph, IntoDynamic, MaterializedGraph}, graph::{edge::EdgeView, node::NodeView, views::node_subgraph::NodeSubgraph}, }, - errors::GraphError, + errors::{GraphError, GraphError::PythonError}, io::parquet_loaders::*, prelude::*, python::{ @@ -21,6 +21,7 @@ use crate::{ arrow_loaders::{ load_edge_metadata_from_arrow_c_stream, load_edges_from_arrow_c_stream, load_node_metadata_from_arrow_c_stream, load_nodes_from_arrow_c_stream, + load_nodes_from_csv_path, }, pandas_loaders::*, }, @@ -35,12 +36,14 @@ use crate::{ InternalStableDecode, StableEncode, }, }; -use pyo3::{prelude::*, pybacked::PyBackedStr, types::PyDict}; +use pyo3::{exceptions::PyValueError, prelude::*, pybacked::PyBackedStr, types::PyDict}; use raphtory_api::core::{entities::GID, storage::arc_str::ArcStr}; use raphtory_storage::core_ops::CoreGraphOps; use std::{ collections::HashMap, + ffi::OsStr, fmt::{Debug, Formatter}, + fs, path::PathBuf, }; @@ -631,6 +634,94 @@ impl PyGraph { PyGraph::py_from_db_graph(self.graph.event_graph()) } + #[pyo3( + signature = (data, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None) + )] + fn load_nodes<'py>( + &self, + data: &Bound<'py, PyAny>, + time: &str, + id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, + properties: Option>, + metadata: Option>, + shared_metadata: Option>, + ) -> Result<(), GraphError> { + let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); + let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + if data.hasattr("__arrow_c_stream__")? { + load_nodes_from_arrow_c_stream( + &self.graph, + data, + time, + id, + node_type, + node_type_col, + &properties, + &metadata, + shared_metadata.as_ref(), + ) + } else if let Ok(path) = data.extract::() { + // handles Strings too + let is_parquet = if path.is_dir() { + fs::read_dir(&path)?.any(|entry| { + entry.map_or(false, |e| { + e.path().extension().and_then(OsStr::to_str) == Some("parquet") + }) + }) + } else { + path.extension().and_then(OsStr::to_str) == Some("parquet") + }; + + let is_csv = if path.is_dir() { + fs::read_dir(&path)?.any(|entry| { + entry.map_or(false, |e| { + let p = e.path(); + let s = p.to_string_lossy(); + s.ends_with(".csv") || s.ends_with(".csv.gz") || s.ends_with(".csv.bz2") + }) + }) + } else { + let path_str = path.to_string_lossy(); + path_str.ends_with(".csv") + || path_str.ends_with(".csv.gz") + || path_str.ends_with(".csv.bz2") + }; + + if is_parquet { + load_nodes_from_parquet( + &self.graph, + path.as_path(), + time, + id, + node_type, + node_type_col, + &properties, + &metadata, + shared_metadata.as_ref(), + None, + ) + } else if is_csv { + load_nodes_from_csv_path( + &self.graph, + path, + time, + id, + node_type, + node_type_col, + &properties, + &metadata, + shared_metadata.as_ref(), + ) + } else { + Err(PythonError(PyValueError::new_err("Argument 'data' contains invalid path. Paths must either point to a Parquet/CSV file, or a directory containing Parquet/CSV files (but not both)"))) + } + } else { + Err(PythonError(PyValueError::new_err("Argument 'data' invalid. Valid data sources are: a single Parquet or CSV file, a directory containing Parquet or CSV files (but not both), and objects that implement an __arrow_c_stream__ method."))) + } + } + /// Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index e40526e96e..dccb56ab08 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -19,9 +19,19 @@ use arrow::{ }, datatypes::SchemaRef, }; +use arrow_csv::{reader::Format, ReaderBuilder}; +use bzip2::read::BzDecoder; +use flate2::read::GzDecoder; use pyo3::{prelude::*, types::PyCapsule}; use raphtory_api::core::entities::properties::prop::Prop; -use std::{cmp::min, collections::HashMap}; +use std::{ + cmp::min, + collections::HashMap, + fs, + fs::File, + path::{Path, PathBuf}, + sync::Arc, +}; const CHUNK_SIZE: usize = 1_000_000; // split large chunks so progress bar updates reasonably @@ -159,7 +169,7 @@ pub(crate) fn load_edge_metadata_from_arrow_c_stream< ) } -pub fn load_edge_deletions_from_arrow_c_stream< +pub(crate) fn load_edge_deletions_from_arrow_c_stream< 'py, G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps, >( @@ -251,33 +261,192 @@ pub(crate) fn process_arrow_c_stream_df<'a>( Ok(batch) => batch, Err(e) => return vec![Err(e)], }; - let num_rows = batch.num_rows(); - - // many times, all the data will be passed as a single RecordBatch, meaning the progress bar - // will not update properly (only updates at the end of each batch). Splitting into smaller batches - // means the progress bar will update reasonably (every CHUNK_SIZE rows) - if num_rows > CHUNK_SIZE { - let num_chunks = (num_rows + CHUNK_SIZE - 1) / CHUNK_SIZE; - let mut result = Vec::with_capacity(num_chunks); - for i in 0..num_chunks { - let offset = i * CHUNK_SIZE; - let length = min(CHUNK_SIZE, num_rows - offset); - let sliced_batch = batch.slice(offset, length); - let chunk_arrays = indices - .iter() - .map(|&idx| sliced_batch.column(idx).clone()) - .collect::>(); - result.push(Ok(DFChunk::new(chunk_arrays))); - } - result - } else { - let chunk_arrays = indices - .iter() - .map(|&idx| batch.column(idx).clone()) - .collect::>(); - vec![Ok(DFChunk::new(chunk_arrays))] - } + + split_into_chunks(&batch, &indices) }); Ok(DFView::new(names, chunks, len_from_python)) } + +/// Splits a RecordBatch into chunks of CHUNK_SIZE owned by DFChunk objects +fn split_into_chunks(batch: &RecordBatch, indices: &[usize]) -> Vec> { + // many times, all the data will be passed as a single RecordBatch, meaning the progress bar + // will not update properly (only updates at the end of each batch). Splitting into smaller batches + // means the progress bar will update reasonably (every CHUNK_SIZE rows) + let num_rows = batch.num_rows(); + if num_rows > CHUNK_SIZE { + let num_chunks = (num_rows + CHUNK_SIZE - 1) / CHUNK_SIZE; + let mut result = Vec::with_capacity(num_chunks); + for i in 0..num_chunks { + let offset = i * CHUNK_SIZE; + let length = min(CHUNK_SIZE, num_rows - offset); + let sliced_batch = batch.slice(offset, length); + let chunk_arrays = indices + .iter() + .map(|&idx| sliced_batch.column(idx).clone()) + .collect::>(); + result.push(Ok(DFChunk::new(chunk_arrays))); + } + result + } else { + let chunk_arrays = indices + .iter() + .map(|&idx| batch.column(idx).clone()) + .collect::>(); + vec![Ok(DFChunk::new(chunk_arrays))] + } +} + +fn get_reader(filename: &str, file: File) -> Box { + if filename.ends_with(".csv.gz") { + Box::new(GzDecoder::new(file)) + } else if filename.ends_with(".csv.bz2") { + Box::new(BzDecoder::new(file)) + } else { + // no need for a BufReader because ReaderBuilder::build internally wraps into BufReader + Box::new(file) + } +} + +// Load from CSV files using arrow-csv +pub(crate) fn load_nodes_from_csv_path< + 'py, + G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, +>( + graph: &G, + path: PathBuf, + time: &str, + id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, + properties: &[&str], + metadata: &[&str], + shared_metadata: Option<&HashMap>, +) -> Result<(), GraphError> { + let mut cols_to_check = vec![id, time]; + cols_to_check.extend_from_slice(properties); + cols_to_check.extend_from_slice(metadata); + if let Some(ref node_type_col) = node_type_col { + cols_to_check.push(node_type_col.as_ref()); + } + + // get the CSV file paths + let mut csv_paths = Vec::new(); + if path.is_dir() { + for entry in fs::read_dir(&path)? { + let entry = entry?; + let p = entry.path(); + let s = p.to_string_lossy(); + if s.ends_with(".csv") || s.ends_with(".csv.gz") || s.ends_with(".csv.bz2") { + csv_paths.push(p); + } + } + } else { + csv_paths.push(path.clone()); + } + + if csv_paths.is_empty() { + return Err(GraphError::LoadFailure(format!( + "No CSV files found at path '{}'", + path.display() + ))); + } + + let df_view = process_csv_paths_df(&csv_paths, cols_to_check.clone())?; + df_view.check_cols_exist(&cols_to_check)?; + load_nodes_from_df( + df_view, + time, + id, + properties, + metadata, + shared_metadata, + node_type, + node_type_col, + graph, + ) +} + +fn build_csv_reader( + path: &Path, + schema: SchemaRef, +) -> Result>, GraphError> { + let file = File::open(path)?; + let path_str = path.to_string_lossy(); + + // Support bz2 and gz compression + let reader = get_reader(path_str.as_ref(), file); + + ReaderBuilder::new(schema) + .with_header(true) + .build(reader) + .map_err(|e| { + GraphError::LoadFailure(format!( + "Arrow CSV error while reading '{}': {e}", + path.display() + )) + }) +} + +fn read_csv_chunks_from_file( + path: &Path, + schema: SchemaRef, + indices: &[usize], +) -> Result>, GraphError> { + let mut csv_reader = build_csv_reader(path, schema)?; + let mut chunks = Vec::new(); + + for batch_res in &mut csv_reader { + let batch = batch_res.map_err(|e| { + GraphError::LoadFailure(format!( + "Arrow CSV error while reading a batch from '{}': {e}", + path.display() + )) + })?; + chunks.extend(split_into_chunks(&batch, indices)) + } + Ok(chunks) +} + +fn process_csv_paths_df<'a>( + paths: &'a [PathBuf], + col_names: Vec<&str>, +) -> Result> + 'a>, GraphError> { + if paths.is_empty() { + return Err(GraphError::LoadFailure( + "No CSV files found at the provided path".to_string(), + )); + } + + // infer the schema + // TODO: Add support for user provided schema + let mut schema_reader = get_reader(paths[0].to_string_lossy().as_ref(), File::open(&paths[0])?); + let (schema, _) = Format::default() + .with_header(true) + .infer_schema(&mut schema_reader, Some(100)) + .map_err(|e| { + GraphError::LoadFailure(format!( + "Arrow CSV error while inferring schema from '{}': {e}", + paths[0].display() + )) + })?; + let schema_ref: SchemaRef = Arc::new(schema); + + // get column names and indices + let mut names: Vec = Vec::new(); + let mut indices: Vec = Vec::new(); + for (idx, field) in schema_ref.fields().iter().enumerate() { + if col_names.contains(&field.name().as_str()) { + names.push(field.name().clone()); + indices.push(idx); + } + } + + let chunks = paths.iter().cloned().flat_map(move |path| { + read_csv_chunks_from_file(path.as_path(), schema_ref.clone(), &indices) + .unwrap_or_else(|e| vec![Err(e)]) + }); + + // we don't know the total number of rows until we read all files + Ok(DFView::new(names, chunks, None)) +} From a6400fa3bae62afdd5f38a746cf0c9caf614292c Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Thu, 4 Dec 2025 20:29:38 -0500 Subject: [PATCH 28/55] Fixed CSV reader to calculate column indices for each file separately. --- raphtory/src/python/graph/io/arrow_loaders.rs | 87 ++++++++++++------- 1 file changed, 54 insertions(+), 33 deletions(-) diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index dccb56ab08..e0d3337c28 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -297,7 +297,8 @@ fn split_into_chunks(batch: &RecordBatch, indices: &[usize]) -> Vec Box { +fn get_csv_reader(filename: &str, file: File) -> Box { + // Support bz2 and gz compression if filename.ends_with(".csv.gz") { Box::new(GzDecoder::new(file)) } else if filename.ends_with(".csv.bz2") { @@ -369,15 +370,28 @@ pub(crate) fn load_nodes_from_csv_path< fn build_csv_reader( path: &Path, - schema: SchemaRef, ) -> Result>, GraphError> { let file = File::open(path)?; let path_str = path.to_string_lossy(); - // Support bz2 and gz compression - let reader = get_reader(path_str.as_ref(), file); + // infer schema + let reader = get_csv_reader(path_str.as_ref(), file); + let (schema, _) = Format::default() + .with_header(true) + .infer_schema(reader, Some(100)) + .map_err(|e| { + GraphError::LoadFailure(format!( + "Arrow CSV error while inferring schema from '{}': {e}", + path.display() + )) + })?; + let schema_ref: SchemaRef = Arc::new(schema); + + // we need another reader because the first one gets consumed + let file = File::open(path)?; + let reader = get_csv_reader(path_str.as_ref(), file); - ReaderBuilder::new(schema) + ReaderBuilder::new(schema_ref) .with_header(true) .build(reader) .map_err(|e| { @@ -390,10 +404,9 @@ fn build_csv_reader( fn read_csv_chunks_from_file( path: &Path, - schema: SchemaRef, indices: &[usize], ) -> Result>, GraphError> { - let mut csv_reader = build_csv_reader(path, schema)?; + let mut csv_reader = build_csv_reader(path)?; let mut chunks = Vec::new(); for batch_res in &mut csv_reader { @@ -410,7 +423,7 @@ fn read_csv_chunks_from_file( fn process_csv_paths_df<'a>( paths: &'a [PathBuf], - col_names: Vec<&str>, + col_names: Vec<&'a str>, ) -> Result> + 'a>, GraphError> { if paths.is_empty() { return Err(GraphError::LoadFailure( @@ -418,33 +431,41 @@ fn process_csv_paths_df<'a>( )); } - // infer the schema // TODO: Add support for user provided schema - let mut schema_reader = get_reader(paths[0].to_string_lossy().as_ref(), File::open(&paths[0])?); - let (schema, _) = Format::default() - .with_header(true) - .infer_schema(&mut schema_reader, Some(100)) - .map_err(|e| { - GraphError::LoadFailure(format!( - "Arrow CSV error while inferring schema from '{}': {e}", - paths[0].display() - )) - })?; - let schema_ref: SchemaRef = Arc::new(schema); - - // get column names and indices - let mut names: Vec = Vec::new(); - let mut indices: Vec = Vec::new(); - for (idx, field) in schema_ref.fields().iter().enumerate() { - if col_names.contains(&field.name().as_str()) { - names.push(field.name().clone()); - indices.push(idx); + let names = col_names.iter().map(|&name| name.to_string()).collect(); + let chunks = paths.iter().flat_map(move |path| { + let csv_reader = match build_csv_reader(path.as_path()) { + Ok(r) => r, + Err(e) => return vec![Err(e)], + }; + let mut indices = Vec::with_capacity(col_names.len()); + for required_col in &col_names { + if let Some((idx, _)) = csv_reader + .schema() + .fields() + .iter() + .enumerate() + .find(|(_, f)| f.name() == required_col) + { + indices.push(idx); + } else { + return vec![Err(GraphError::LoadFailure(format!( + "Column '{required_col}' not found in file {}", + path.display() + )))]; + } } - } - - let chunks = paths.iter().cloned().flat_map(move |path| { - read_csv_chunks_from_file(path.as_path(), schema_ref.clone(), &indices) - .unwrap_or_else(|e| vec![Err(e)]) + let mut results = Vec::new(); + for batch_res in csv_reader { + match batch_res { + Ok(batch) => results.extend(split_into_chunks(&batch, &indices)), + Err(e) => results.push(Err(GraphError::LoadFailure(format!( + "Arrow CSV error while reading a batch from '{}': {e}", + path.display() + )))), + } + } + results }); // we don't know the total number of rows until we read all files From 378c03ca5764b31abe674739e42845f2b0944ac9 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Thu, 4 Dec 2025 21:59:29 -0500 Subject: [PATCH 29/55] Changed unsafe ArrowArrayStreamReader pointer cast to stream arrow data from python. Replaced it with PyRecordBatchReader::from_arrow_pycapsule for safety and future changes. --- raphtory/src/python/graph/io/arrow_loaders.rs | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index e40526e96e..d9551e4af8 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -13,13 +13,11 @@ use crate::{ serialise::incremental::InternalCache, }; use arrow::{ - array::{ - ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream}, - RecordBatch, RecordBatchReader, - }, + array::{RecordBatch, RecordBatchReader}, datatypes::SchemaRef, }; use pyo3::{prelude::*, types::PyCapsule}; +use pyo3_arrow::PyRecordBatchReader; use raphtory_api::core::entities::properties::prop::Prop; use std::{cmp::min, collections::HashMap}; @@ -212,13 +210,19 @@ pub(crate) fn process_arrow_c_stream_df<'a>( "Stream capsule is not valid".to_string(), ))); } - let stream_ptr = stream_capsule.pointer() as *mut FFI_ArrowArrayStream; - let reader: ArrowArrayStreamReader = unsafe { ArrowArrayStreamReader::from_raw(stream_ptr) } + let reader = PyRecordBatchReader::from_arrow_pycapsule(stream_capsule) + .map_err(|e| { + PyErr::from(GraphError::LoadFailure(format!( + "Arrow stream error while creating the reader: {}", + e + ))) + })? + .into_reader() .map_err(|e| { - GraphError::LoadFailure(format!( + PyErr::from(GraphError::LoadFailure(format!( "Arrow stream error while creating the reader: {}", - e.to_string() - )) + e + ))) })?; // Get column names and indices once only From 11a52cf6fb5f2c31694d450f89834cd92177eafc Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Thu, 4 Dec 2025 22:20:59 -0500 Subject: [PATCH 30/55] Added test for loading data from CSV --- python/tests/test_load_from_df.py | 64 +++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 16 deletions(-) diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index 295019b504..c3c493d43e 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -1,3 +1,4 @@ +import os from pathlib import Path import polars as pl @@ -35,45 +36,76 @@ def test_load_edges_from_polars_df(graph_type): assert _collect_edges(g_from_df) == expected def test_different_data_sources(): - g = Graph() - file_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/subset/flattened_data_subset.parquet" num_nodes_ingested = [] - # test path string for file - g.load_nodes(data=file_path_str, time="block_timestamp", id="inputs_address") + ######### PARQUET ######### + parquet_dir_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/parquet_subset" + parquet_file_path_str = parquet_dir_path_str + "/flattened_data_subset.parquet" + # test path string for parquet file + g = Graph() + g.load_nodes(data=parquet_file_path_str, time="block_timestamp", id="inputs_address") num_nodes_ingested.append(len(g.nodes)) del g - # test Path object for file - file_path_obj = Path(file_path_str) + # test Path object for parquet file + file_path_obj = Path(parquet_file_path_str) g = Graph() g.load_nodes(data=file_path_obj, time="block_timestamp", id="inputs_address") num_nodes_ingested.append(len(g.nodes)) - del g, file_path_obj + del g - # test path string for directory - dir_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/subset" + # test path string for parquet directory g = Graph() - g.load_nodes(data=dir_path_str, time="block_timestamp", id="inputs_address") + g.load_nodes(data=parquet_dir_path_str, time="block_timestamp", id="inputs_address") num_nodes_ingested.append(len(g.nodes)) - del g, dir_path_str + del g - # test Path object for directory - dir_path_obj = Path("/Users/arien/RustroverProjects/Raphtory/dataset_tests/subset") + # test Path object for parquet directory + dir_path_obj = Path(parquet_dir_path_str) g = Graph() g.load_nodes(data=dir_path_obj, time="block_timestamp", id="inputs_address") num_nodes_ingested.append(len(g.nodes)) - del g, dir_path_obj + del g + + ######### CSV ######### + csv_dir_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/csv_subset" + csv_file_path_str = csv_dir_path_str + "/flattened_data_subset.csv" + # test path string for CSV file + g = Graph() + g.load_nodes(data=csv_file_path_str, time="block_timestamp", id="inputs_address") + num_nodes_ingested.append(len(g.nodes)) + del g + + # test Path object for CSV file + file_path_obj = Path(csv_file_path_str) + g = Graph() + g.load_nodes(data=file_path_obj, time="block_timestamp", id="inputs_address") + num_nodes_ingested.append(len(g.nodes)) + del g + + # test path string for CSV directory + g = Graph() + g.load_nodes(data=csv_dir_path_str, time="block_timestamp", id="inputs_address") + num_nodes_ingested.append(len(g.nodes)) + del g + + # test Path object for CSV directory + dir_path_obj = Path(csv_dir_path_str) + g = Graph() + g.load_nodes(data=dir_path_obj, time="block_timestamp", id="inputs_address") + num_nodes_ingested.append(len(g.nodes)) + del g + ######### arrow_c_stream ######### # test pandas - df_pd = pd.read_parquet(file_path_str) + df_pd = pd.read_parquet(parquet_file_path_str) g = Graph() g.load_nodes(data=df_pd, time="block_timestamp", id="inputs_address") num_nodes_ingested.append(len(g.nodes)) del g, df_pd # test polars - df_pl = pl.read_parquet(file_path_str) + df_pl = pl.read_parquet(parquet_file_path_str) g = Graph() g.load_nodes(data=df_pl, time="block_timestamp", id="inputs_address") num_nodes_ingested.append(len(g.nodes)) From ff655e142006ebfeeb223446d7910f49866bc526 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Thu, 4 Dec 2025 23:09:09 -0500 Subject: [PATCH 31/55] Changed CSV reading to avoid loading whole CSV files into memory in arrow format at once. Now stream 1 mil rows at a time. --- raphtory/src/python/graph/io/arrow_loaders.rs | 57 ++++++++----------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index e0d3337c28..a0b4ad5062 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -29,6 +29,7 @@ use std::{ collections::HashMap, fs, fs::File, + iter, path::{Path, PathBuf}, sync::Arc, }; @@ -393,6 +394,7 @@ fn build_csv_reader( ReaderBuilder::new(schema_ref) .with_header(true) + .with_batch_size(CHUNK_SIZE) .build(reader) .map_err(|e| { GraphError::LoadFailure(format!( @@ -402,25 +404,6 @@ fn build_csv_reader( }) } -fn read_csv_chunks_from_file( - path: &Path, - indices: &[usize], -) -> Result>, GraphError> { - let mut csv_reader = build_csv_reader(path)?; - let mut chunks = Vec::new(); - - for batch_res in &mut csv_reader { - let batch = batch_res.map_err(|e| { - GraphError::LoadFailure(format!( - "Arrow CSV error while reading a batch from '{}': {e}", - path.display() - )) - })?; - chunks.extend(split_into_chunks(&batch, indices)) - } - Ok(chunks) -} - fn process_csv_paths_df<'a>( paths: &'a [PathBuf], col_names: Vec<&'a str>, @@ -434,9 +417,11 @@ fn process_csv_paths_df<'a>( // TODO: Add support for user provided schema let names = col_names.iter().map(|&name| name.to_string()).collect(); let chunks = paths.iter().flat_map(move |path| { + // BoxedLIter couldn't be used because it has Send + Sync bound + type ChunkIter<'b> = Box> + 'b>; let csv_reader = match build_csv_reader(path.as_path()) { Ok(r) => r, - Err(e) => return vec![Err(e)], + Err(e) => return Box::new(iter::once(Err(e))) as ChunkIter<'a>, }; let mut indices = Vec::with_capacity(col_names.len()); for required_col in &col_names { @@ -449,23 +434,29 @@ fn process_csv_paths_df<'a>( { indices.push(idx); } else { - return vec![Err(GraphError::LoadFailure(format!( + return Box::new(iter::once(Err(GraphError::LoadFailure(format!( "Column '{required_col}' not found in file {}", path.display() - )))]; - } - } - let mut results = Vec::new(); - for batch_res in csv_reader { - match batch_res { - Ok(batch) => results.extend(split_into_chunks(&batch, &indices)), - Err(e) => results.push(Err(GraphError::LoadFailure(format!( - "Arrow CSV error while reading a batch from '{}': {e}", - path.display() - )))), + ))))) as ChunkIter<'a>; } } - results + Box::new( + csv_reader + .into_iter() + .map(move |batch_res| match batch_res { + Ok(batch) => { + let arrays = indices + .iter() + .map(|&idx| batch.column(idx).clone()) + .collect::>(); + Ok(DFChunk::new(arrays)) + } + Err(e) => Err(GraphError::LoadFailure(format!( + "Arrow CSV error while reading a batch from '{}': {e}", + path.display() + ))), + }), + ) as ChunkIter<'a> }); // we don't know the total number of rows until we read all files From 4643c16a5b628295e16d4c218654df7d5316304c Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Thu, 4 Dec 2025 23:47:22 -0500 Subject: [PATCH 32/55] Added support for mixed directories containing both CSV and parquet files. --- raphtory/src/python/graph/graph.rs | 18 +++++++++++------- raphtory/src/python/graph/io/arrow_loaders.rs | 4 ++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 4da1d69825..39d8cdaab1 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -689,6 +689,7 @@ impl PyGraph { || path_str.ends_with(".csv.bz2") }; + // support directories with mixed parquet and CSV files if is_parquet { load_nodes_from_parquet( &self.graph, @@ -701,11 +702,12 @@ impl PyGraph { &metadata, shared_metadata.as_ref(), None, - ) - } else if is_csv { + )?; + } + if is_csv { load_nodes_from_csv_path( &self.graph, - path, + &path, time, id, node_type, @@ -713,12 +715,14 @@ impl PyGraph { &properties, &metadata, shared_metadata.as_ref(), - ) - } else { - Err(PythonError(PyValueError::new_err("Argument 'data' contains invalid path. Paths must either point to a Parquet/CSV file, or a directory containing Parquet/CSV files (but not both)"))) + )?; + } + if !is_parquet && !is_csv { + return Err(PythonError(PyValueError::new_err("Argument 'data' contains invalid path. Paths must either point to a Parquet/CSV file, or a directory containing Parquet/CSV files"))); } + Ok(()) } else { - Err(PythonError(PyValueError::new_err("Argument 'data' invalid. Valid data sources are: a single Parquet or CSV file, a directory containing Parquet or CSV files (but not both), and objects that implement an __arrow_c_stream__ method."))) + Err(PythonError(PyValueError::new_err("Argument 'data' invalid. Valid data sources are: a single Parquet or CSV file, a directory containing Parquet or CSV files, and objects that implement an __arrow_c_stream__ method."))) } } diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index a0b4ad5062..975f38b06c 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -316,7 +316,7 @@ pub(crate) fn load_nodes_from_csv_path< G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( graph: &G, - path: PathBuf, + path: &PathBuf, time: &str, id: &str, node_type: Option<&str>, @@ -335,7 +335,7 @@ pub(crate) fn load_nodes_from_csv_path< // get the CSV file paths let mut csv_paths = Vec::new(); if path.is_dir() { - for entry in fs::read_dir(&path)? { + for entry in fs::read_dir(path)? { let entry = entry?; let p = entry.path(); let s = p.to_string_lossy(); From 3d68a99f5c7f5a857d5f55c029c48302d4ec4155 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Mon, 8 Dec 2025 03:18:50 -0500 Subject: [PATCH 33/55] Added schema argument to load_nodes function --- raphtory/src/io/arrow/df_loaders.rs | 48 +++++++---- raphtory/src/io/arrow/prop_handler.rs | 13 ++- raphtory/src/io/parquet_loaders.rs | 15 +++- raphtory/src/python/graph/graph.rs | 82 +++++++++++++++++-- .../src/python/graph/graph_with_deletions.rs | 4 + raphtory/src/python/graph/io/arrow_loaders.rs | 11 ++- .../src/python/graph/io/pandas_loaders.rs | 4 + 7 files changed, 153 insertions(+), 24 deletions(-) diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index f12e96b9c1..94f96e59c8 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -69,6 +69,7 @@ pub(crate) fn load_nodes_from_df< node_type: Option<&str>, node_type_col: Option<&str>, graph: &G, + schema: Option<&HashMap>, ) -> Result<(), GraphError> { if matches!(df_view.is_empty(), Some(true)) { return Ok(()); @@ -112,14 +113,19 @@ pub(crate) fn load_nodes_from_df< for chunk in df_view.chunks { let df = chunk?; let start_id = graph.reserve_event_ids(df.len()).map_err(into_graph_err)?; - let prop_cols = - combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { + let prop_cols = combine_properties_arrow( + properties, + &properties_indices, + &df, + schema, + |key, dtype| { graph .resolve_node_property(key, dtype, false) .map_err(into_graph_err) - })?; + }, + )?; let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { + combine_properties_arrow(metadata, &metadata_indices, &df, schema, |key, dtype| { graph .resolve_node_property(key, dtype, true) .map_err(into_graph_err) @@ -233,6 +239,7 @@ pub fn load_edges_from_df< layer: Option<&str>, layer_col: Option<&str>, graph: &G, + schema: Option<&HashMap>, ) -> Result<(), GraphError> { if matches!(df_view.is_empty(), Some(true)) { return Ok(()); @@ -280,14 +287,19 @@ pub fn load_edges_from_df< for chunk in df_view.chunks { let df = chunk?; let start_idx = graph.reserve_event_ids(df.len()).map_err(into_graph_err)?; - let prop_cols = - combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { + let prop_cols = combine_properties_arrow( + properties, + &properties_indices, + &df, + schema, + |key, dtype| { graph .resolve_edge_property(key, dtype, false) .map_err(into_graph_err) - })?; + }, + )?; let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { + combine_properties_arrow(metadata, &metadata_indices, &df, schema, |key, dtype| { graph .resolve_edge_property(key, dtype, true) .map_err(into_graph_err) @@ -543,6 +555,7 @@ pub(crate) fn load_node_props_from_df< metadata: &[&str], shared_metadata: Option<&HashMap>, graph: &G, + schema: Option<&HashMap>, ) -> Result<(), GraphError> { if matches!(df_view.is_empty(), Some(true)) { return Ok(()); @@ -581,7 +594,7 @@ pub(crate) fn load_node_props_from_df< for chunk in df_view.chunks { let df = chunk?; let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { + combine_properties_arrow(metadata, &metadata_indices, &df, schema, |key, dtype| { graph .resolve_node_property(key, dtype, true) .map_err(into_graph_err) @@ -666,6 +679,7 @@ pub(crate) fn load_edges_props_from_df< layer: Option<&str>, layer_col: Option<&str>, graph: &G, + schema: Option<&HashMap>, ) -> Result<(), GraphError> { if matches!(df_view.is_empty(), Some(true)) { return Ok(()); @@ -710,7 +724,7 @@ pub(crate) fn load_edges_props_from_df< for chunk in df_view.chunks { let df = chunk?; let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { + combine_properties_arrow(metadata, &metadata_indices, &df, schema, |key, dtype| { graph .resolve_edge_property(key, dtype, true) .map_err(into_graph_err) @@ -830,6 +844,7 @@ pub(crate) fn load_graph_props_from_df< properties: Option<&[&str]>, metadata: Option<&[&str]>, graph: &G, + schema: Option<&HashMap>, ) -> Result<(), GraphError> { if matches!(df_view.is_empty(), Some(true)) { return Ok(()); @@ -854,14 +869,19 @@ pub(crate) fn load_graph_props_from_df< for chunk in df_view.chunks { let df = chunk?; let start_id = graph.reserve_event_ids(df.len()).map_err(into_graph_err)?; - let prop_cols = - combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { + let prop_cols = combine_properties_arrow( + properties, + &properties_indices, + &df, + schema, + |key, dtype| { graph .resolve_graph_property(key, dtype, false) .map_err(into_graph_err) - })?; + }, + )?; let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { + combine_properties_arrow(metadata, &metadata_indices, &df, schema, |key, dtype| { graph .resolve_graph_property(key, dtype, true) .map_err(into_graph_err) diff --git a/raphtory/src/io/arrow/prop_handler.rs b/raphtory/src/io/arrow/prop_handler.rs index 7e7c2a6667..6dedfa1a0b 100644 --- a/raphtory/src/io/arrow/prop_handler.rs +++ b/raphtory/src/io/arrow/prop_handler.rs @@ -25,6 +25,7 @@ use raphtory_api::core::{ }; use rayon::prelude::*; use rustc_hash::FxHashMap; +use std::collections::HashMap; pub struct PropCols { prop_ids: Vec, @@ -55,6 +56,7 @@ pub fn combine_properties_arrow( props: &[impl AsRef], indices: &[usize], df: &DFChunk, + schema: Option<&HashMap>, prop_id_resolver: impl Fn(&str, PropType) -> Result, E>, ) -> Result where @@ -62,7 +64,16 @@ where { let dtypes = indices .iter() - .map(|idx| data_type_as_prop_type(df.chunk[*idx].data_type())) + .enumerate() + .map(|(i, idx)| { + let col_name = props[i].as_ref(); + if let Some(schema_map) = schema { + if let Some(prop_type) = schema_map.get(col_name) { + return Ok(prop_type.clone()); + } + } + data_type_as_prop_type(df.chunk[*idx].data_type()) + }) .collect::, _>>()?; let cols = indices .iter() diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 1ade06f350..fa3293fcda 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -51,6 +51,7 @@ pub fn load_nodes_from_parquet< node_type, node_type_col, graph, + None, // TODO: Add schema ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } @@ -130,6 +131,7 @@ pub fn load_edges_from_parquet< layer, layer_col, graph, + None, // TODO: Add schema ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; @@ -167,6 +169,7 @@ pub fn load_node_props_from_parquet< metadata_properties, shared_metadata, graph, + None, // TODO: Add schema ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } @@ -206,6 +209,7 @@ pub fn load_edge_props_from_parquet< layer, layer_col, graph, + None, // TODO: Add schema ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } @@ -254,8 +258,15 @@ pub fn load_graph_props_from_parquet( + schema: Option>, +) -> PyResult>> { + let Some(pairs) = schema else { + return Ok(None); + }; + + // TODO: Move this to FromPyObject impl? + let parse_type_fn = |s: &str| match s.to_ascii_lowercase().as_str() { + "i64" | "int64" | "int" => Ok(PropType::I64), + "i32" | "int32" => Ok(PropType::I32), + "u64" | "uint64" => Ok(PropType::U64), + "u32" | "uint32" => Ok(PropType::I32), + "u16" | "uint16" => Ok(PropType::U16), + "u8" | "uint8" => Ok(PropType::U8), + "f64" | "float64" | "float" | "double" => Ok(PropType::F64), + "f32" | "float32" => Ok(PropType::F32), + "bool" | "boolean" => Ok(PropType::Bool), + "str" | "string" | "utf8" => Ok(PropType::Str), + "ndtime" | "naivedatetime" | "datetime" => Ok(PropType::NDTime), + "dtime" | "datetimetz" => Ok(PropType::DTime), + other => Err(PyTypeError::new_err(format!( + "Unknown type name '{other:?}' in schema" + ))), + }; + let mut out = HashMap::with_capacity(pairs.len()); + for (name, type_str) in pairs { + let col_name = name.to_string(); + let prop_type = parse_type_fn(col_name.as_ref())?; + out.insert(col_name, prop_type); + } + Ok(Some(out)) +} + /// A temporal graph. #[pymethods] impl PyGraph { @@ -634,8 +677,28 @@ impl PyGraph { PyGraph::py_from_db_graph(self.graph.event_graph()) } + /// Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + /// a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + /// The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// + /// Arguments: + /// data (Any): The data source containing the nodes. + /// time (str): The column name for the timestamps. + /// id (str): The column name for the node IDs. + /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + /// node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. + /// properties (List[str], optional): List of node property column names. Defaults to None. + /// metadata (List[str], optional): List of node metadata column names. Defaults to None. + /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// + /// Returns: + /// None: This function does not return a value if the operation is successful. + /// + /// Raises: + /// GraphError: If the operation fails. #[pyo3( - signature = (data, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None) + signature = (data, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None, schema = None) )] fn load_nodes<'py>( &self, @@ -647,9 +710,11 @@ impl PyGraph { properties: Option>, metadata: Option>, shared_metadata: Option>, + schema: Option>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); + let column_schema = parse_column_schema(schema).map_err(|e| GraphError::PythonError(e))?; if data.hasattr("__arrow_c_stream__")? { load_nodes_from_arrow_c_stream( &self.graph, @@ -661,6 +726,7 @@ impl PyGraph { &properties, &metadata, shared_metadata.as_ref(), + column_schema.as_ref(), ) } else if let Ok(path) = data.extract::() { // handles Strings too @@ -718,11 +784,11 @@ impl PyGraph { )?; } if !is_parquet && !is_csv { - return Err(PythonError(PyValueError::new_err("Argument 'data' contains invalid path. Paths must either point to a Parquet/CSV file, or a directory containing Parquet/CSV files"))); + return Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' contains invalid path. Paths must either point to a Parquet/CSV file, or a directory containing Parquet/CSV files"))); } Ok(()) } else { - Err(PythonError(PyValueError::new_err("Argument 'data' invalid. Valid data sources are: a single Parquet or CSV file, a directory containing Parquet or CSV files, and objects that implement an __arrow_c_stream__ method."))) + Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' invalid. Valid data sources are: a single Parquet or CSV file, a directory containing Parquet or CSV files, and objects that implement an __arrow_c_stream__ method."))) } } @@ -771,6 +837,7 @@ impl PyGraph { &properties, &metadata, shared_metadata.as_ref(), + None, // TODO: Add schema ) } @@ -915,6 +982,7 @@ impl PyGraph { shared_metadata.as_ref(), layer, layer_col, + None, // TODO: Add schema ) } @@ -1055,6 +1123,7 @@ impl PyGraph { node_type_col, &metadata, shared_metadata.as_ref(), + None, // TODO: Add schema ) } @@ -1178,6 +1247,7 @@ impl PyGraph { shared_metadata.as_ref(), layer, layer_col, + None, // TODO: Add schema ) } diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index be1e957530..7534091589 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -617,6 +617,7 @@ impl PyPersistentGraph { &properties, &metadata, shared_metadata.as_ref(), + None, // TODO: Add schema ) } @@ -757,6 +758,7 @@ impl PyPersistentGraph { shared_metadata.as_ref(), layer, layer_col, + None, // TODO: Add schema ) } @@ -988,6 +990,7 @@ impl PyPersistentGraph { node_type_col, &metadata, shared_metadata.as_ref(), + None, // TODO: Add schema ) } @@ -1107,6 +1110,7 @@ impl PyPersistentGraph { shared_metadata.as_ref(), layer, layer_col, + None, // TODO: Add schema ) } diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 975f38b06c..436780b93c 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -23,7 +23,7 @@ use arrow_csv::{reader::Format, ReaderBuilder}; use bzip2::read::BzDecoder; use flate2::read::GzDecoder; use pyo3::{prelude::*, types::PyCapsule}; -use raphtory_api::core::entities::properties::prop::Prop; +use raphtory_api::core::entities::properties::prop::{Prop, PropType}; use std::{ cmp::min, collections::HashMap, @@ -49,6 +49,7 @@ pub(crate) fn load_nodes_from_arrow_c_stream< properties: &[&str], metadata: &[&str], shared_metadata: Option<&HashMap>, + schema: Option<&HashMap>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id, time]; cols_to_check.extend_from_slice(properties); @@ -68,6 +69,7 @@ pub(crate) fn load_nodes_from_arrow_c_stream< node_type, node_type_col, graph, + schema, ) } @@ -85,6 +87,7 @@ pub(crate) fn load_edges_from_arrow_c_stream< shared_metadata: Option<&HashMap>, layer: Option<&str>, layer_col: Option<&str>, + schema: Option<&HashMap>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst, time]; cols_to_check.extend_from_slice(properties); @@ -105,6 +108,7 @@ pub(crate) fn load_edges_from_arrow_c_stream< layer, layer_col, graph, + schema, ) } @@ -119,6 +123,7 @@ pub(crate) fn load_node_metadata_from_arrow_c_stream< node_type_col: Option<&str>, metadata: &[&str], shared_metadata: Option<&HashMap>, + schema: Option<&HashMap>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id]; cols_to_check.extend_from_slice(metadata); @@ -135,6 +140,7 @@ pub(crate) fn load_node_metadata_from_arrow_c_stream< metadata, shared_metadata, graph, + schema, ) } @@ -150,6 +156,7 @@ pub(crate) fn load_edge_metadata_from_arrow_c_stream< shared_metadata: Option<&HashMap>, layer: Option<&str>, layer_col: Option<&str>, + schema: Option<&HashMap>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst]; if let Some(ref layer_col) = layer_col { @@ -167,6 +174,7 @@ pub(crate) fn load_edge_metadata_from_arrow_c_stream< layer, layer_col, graph, + schema, ) } @@ -366,6 +374,7 @@ pub(crate) fn load_nodes_from_csv_path< node_type, node_type_col, graph, + None, ) } diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index 1bac0e00db..fdab6b3c20 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -54,6 +54,7 @@ pub(crate) fn load_nodes_from_pandas< node_type, node_type_col, graph, + None, // TODO: Add schema ) } @@ -92,6 +93,7 @@ pub(crate) fn load_edges_from_pandas< layer, layer_col, graph, + None, // TODO: Add schema ) } @@ -122,6 +124,7 @@ pub(crate) fn load_node_props_from_pandas< metadata, shared_metadata, graph, + None, // TODO: Add schema ) } @@ -154,6 +157,7 @@ pub(crate) fn load_edge_props_from_pandas< layer, layer_col, graph, + None, // TODO: Add schema ) } From 0a0858e6b6ec7690f3797dff955f6a2e789e5fdc Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Mon, 8 Dec 2025 23:15:18 -0500 Subject: [PATCH 34/55] Fixed load_nodes docs. Added PropType in Python. Added get_dtype_of() function on PyProperties. Added test for schema casting. --- python/python/raphtory/__init__.pyi | 45 ++++++- python/tests/test_load_from_df.py | 34 +++++- raphtory-api/src/python/mod.rs | 2 +- raphtory-api/src/python/prop.rs | 111 +++++++++++++++++- raphtory/src/python/graph/graph.rs | 39 +----- raphtory/src/python/graph/properties/props.rs | 13 ++ raphtory/src/python/packages/base_modules.rs | 2 + 7 files changed, 204 insertions(+), 42 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 27fc20fc22..4002011035 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -26,7 +26,7 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = ['GraphView', 'Graph', 'PersistentGraph', 'Node', 'Nodes', 'PathFromNode', 'PathFromGraph', 'MutableNode', 'Edge', 'Edges', 'NestedEdges', 'MutableEdge', 'Properties', 'PyPropValueList', 'Metadata', 'TemporalProperties', 'PropertiesView', 'TemporalProp', 'WindowSet', 'IndexSpecBuilder', 'IndexSpec', 'version', 'graphql', 'algorithms', 'graph_loader', 'graph_gen', 'vectors', 'node_state', 'filter', 'iterables', 'nullmodels', 'plottingutils'] +__all__ = ['GraphView', 'Graph', 'PersistentGraph', 'Node', 'Nodes', 'PathFromNode', 'PathFromGraph', 'MutableNode', 'Edge', 'Edges', 'NestedEdges', 'MutableEdge', 'Properties', 'PyPropValueList', 'PropType', 'Metadata', 'TemporalProperties', 'PropertiesView', 'TemporalProp', 'WindowSet', 'IndexSpecBuilder', 'IndexSpec', 'version', 'graphql', 'algorithms', 'graph_loader', 'graph_gen', 'vectors', 'node_state', 'filter', 'iterables', 'nullmodels', 'plottingutils'] class GraphView(object): """Graph view is a read-only version of a graph at a certain point in time.""" @@ -1290,8 +1290,30 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes(self, data, time, id, node_type=None, node_type_col=None, properties=None, metadata=None, shared_metadata=None): - ... + def load_nodes(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, schema: Optional[list[tuple[str, PropType | str]]] = None) -> None: + """ + Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data (Any): The data source containing the nodes. + time (str): The column name for the timestamps. + id (str): The column name for the node IDs. + node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. + properties (List[str], optional): List of node property column names. Defaults to None. + metadata (List[str], optional): List of node metadata column names. Defaults to None. + shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + schema (list[tuple[str, PropType | str]], optional): A list of (column_name, column_type) tuples to cast column types to. Defaults to None. + + Returns: + None: This function does not return a value if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ def load_nodes_from_df(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ @@ -6208,6 +6230,18 @@ class Properties(object): PropValue: """ + def get_dtype_of(self, key: str) -> PropType: + """ + Get the PropType of a property. Specifically, returns the PropType of the latest value for this property if it exists. + If not, it returns the PropType for the static property matching this name. + + Arguments: + key (str): the name of the property. + + Returns: + PropType: + """ + def items(self) -> list[Tuple[str, PropValue]]: """ Get a list of key-value pairs @@ -6332,6 +6366,11 @@ class PyPropValueList(object): PropValue: """ +class PropType(object): + + def __repr__(self): + """Return repr(self).""" + class Metadata(object): """A view of metadata of an entity""" diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index c3c493d43e..757269277e 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -3,7 +3,7 @@ import polars as pl import pandas as pd -from raphtory import Graph, PersistentGraph +from raphtory import Graph, PersistentGraph, PropType import pytest try: import fireducks.pandas as fpd @@ -116,6 +116,38 @@ def test_different_data_sources(): for i in range(len(num_nodes_ingested)-1): assert num_nodes_ingested[0] == num_nodes_ingested[i+1] +def test_schema_casting(): + # time/id as regular ints (I64), value column as explicit int32 + df = pd.DataFrame( + { + "time": pd.Series([1, 2, 3], dtype="int64"), + "id": pd.Series([10, 20, 30], dtype="int64"), + "val_i32": pd.Series([1, 2, 3], dtype="int32"), + } + ) + g = Graph() + g.load_nodes( + data=df, + time="time", + id="id", + properties=["val_i32"], + # Request that this column be treated as I64 + schema=[("val_i32", PropType.i64)], + ) + n_prop = g.node(10).properties + print(f"\ndtype of Property 'val_i32' with cast: {n_prop.get_dtype_of("val_i32")}") + del g + g = Graph() + g.load_nodes( + data=df, + time="time", + id="id", + properties=["val_i32"], + # No casting + ) + n_prop = g.node(10).properties + print(f"dtype of Property 'val_i32' without cast: {n_prop.get_dtype_of("val_i32")}") + if fpd: import pandas diff --git a/raphtory-api/src/python/mod.rs b/raphtory-api/src/python/mod.rs index 70fdc1d1f1..ea296f4723 100644 --- a/raphtory-api/src/python/mod.rs +++ b/raphtory-api/src/python/mod.rs @@ -2,4 +2,4 @@ mod arcstr; mod direction; pub mod error; mod gid; -mod prop; +pub mod prop; diff --git a/raphtory-api/src/python/prop.rs b/raphtory-api/src/python/prop.rs index a6875b2876..def51349a1 100644 --- a/raphtory-api/src/python/prop.rs +++ b/raphtory-api/src/python/prop.rs @@ -1,4 +1,4 @@ -use crate::core::entities::properties::prop::Prop; +use crate::core::entities::properties::prop::{Prop, PropType}; use bigdecimal::BigDecimal; use pyo3::{ exceptions::PyTypeError, @@ -8,6 +8,7 @@ use pyo3::{ Bound, FromPyObject, IntoPyObject, IntoPyObjectExt, Py, PyAny, PyErr, PyResult, Python, }; use std::{ops::Deref, str::FromStr, sync::Arc}; +use pyo3::pybacked::PyBackedStr; #[cfg(feature = "arrow")] mod array_ext { @@ -126,3 +127,111 @@ impl<'source> FromPyObject<'source> for Prop { ))) } } + +#[pyclass(name = "PropType", frozen, module = "raphtory")] +pub struct PyPropType(pub PropType); + +#[pymethods] +impl PyPropType { + #[classattr] + pub fn u8() -> PropType { + PropType::U8 + } + + #[classattr] + pub fn u16() -> PropType { + PropType::U16 + } + + #[classattr] + pub fn u32() -> PropType { + PropType::U32 + } + + #[classattr] + pub fn u64() -> PropType { + PropType::U64 + } + + #[classattr] + pub fn i32() -> PropType { + PropType::I32 + } + + #[classattr] + pub fn i64() -> PropType { + PropType::I64 + } + + #[classattr] + pub fn f32() -> PropType { + PropType::F32 + } + + #[classattr] + pub fn f64() -> PropType { + PropType::F64 + } + + #[classattr] + pub fn str() -> PropType { + PropType::Str + } + + #[classattr] + pub fn bool() -> PropType { + PropType::Bool + } + + #[classattr] + pub fn naive_datetime() -> PropType { + PropType::NDTime + } + + #[classattr] + pub fn datetime() -> PropType { + PropType::DTime + } + + fn __repr__(&self) -> String { + format!("PropType.{}", self.0) + } +} + +impl<'py> IntoPyObject<'py> for PropType { + type Target = PyPropType; + type Output = Bound<'py, Self::Target>; + type Error = >::Error; + + fn into_pyobject(self, py: Python<'py>) -> Result { + PyPropType(self).into_pyobject(py) + } +} + +impl<'source> FromPyObject<'source> for PropType { + fn extract_bound(ob: &Bound<'source, PyAny>) -> PyResult { + if let Ok(prop_type) = ob.downcast::() { + Ok(prop_type.get().0.clone()) + } else if let Ok(prop_type_str) = ob.extract::() { + match prop_type_str.deref().to_ascii_lowercase().as_str() { + "i64" | "int64" | "int" => Ok(PropType::I64), + "i32" | "int32" => Ok(PropType::I32), + "u64" | "uint64" => Ok(PropType::U64), + "u32" | "uint32" => Ok(PropType::I32), + "u16" | "uint16" => Ok(PropType::U16), + "u8" | "uint8" => Ok(PropType::U8), + "f64" | "float64" | "float" | "double" => Ok(PropType::F64), + "f32" | "float32" => Ok(PropType::F32), + "bool" | "boolean" => Ok(PropType::Bool), + "str" | "string" | "utf8" => Ok(PropType::Str), + "ndtime" | "naivedatetime" | "datetime" => Ok(PropType::NDTime), + "dtime" | "datetimetz" => Ok(PropType::DTime), + other => Err(PyTypeError::new_err(format!( + "Unknown type name '{other:?}'" + ))), + } + } else { + Err(PyTypeError::new_err("PropType must be a string or an instance of itself.")) + } + } +} \ No newline at end of file diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index de121762ee..afe08ca3eb 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -164,40 +164,6 @@ impl PyGraphEncoder { fn __getstate__(&self) {} } -fn parse_column_schema<'py>( - schema: Option>, -) -> PyResult>> { - let Some(pairs) = schema else { - return Ok(None); - }; - - // TODO: Move this to FromPyObject impl? - let parse_type_fn = |s: &str| match s.to_ascii_lowercase().as_str() { - "i64" | "int64" | "int" => Ok(PropType::I64), - "i32" | "int32" => Ok(PropType::I32), - "u64" | "uint64" => Ok(PropType::U64), - "u32" | "uint32" => Ok(PropType::I32), - "u16" | "uint16" => Ok(PropType::U16), - "u8" | "uint8" => Ok(PropType::U8), - "f64" | "float64" | "float" | "double" => Ok(PropType::F64), - "f32" | "float32" => Ok(PropType::F32), - "bool" | "boolean" => Ok(PropType::Bool), - "str" | "string" | "utf8" => Ok(PropType::Str), - "ndtime" | "naivedatetime" | "datetime" => Ok(PropType::NDTime), - "dtime" | "datetimetz" => Ok(PropType::DTime), - other => Err(PyTypeError::new_err(format!( - "Unknown type name '{other:?}' in schema" - ))), - }; - let mut out = HashMap::with_capacity(pairs.len()); - for (name, type_str) in pairs { - let col_name = name.to_string(); - let prop_type = parse_type_fn(col_name.as_ref())?; - out.insert(col_name, prop_type); - } - Ok(Some(out)) -} - /// A temporal graph. #[pymethods] impl PyGraph { @@ -691,6 +657,7 @@ impl PyGraph { /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// schema (list[tuple[str, PropType | str]], optional): A list of (column_name, column_type) tuples to cast column types to. Defaults to None. /// /// Returns: /// None: This function does not return a value if the operation is successful. @@ -710,11 +677,11 @@ impl PyGraph { properties: Option>, metadata: Option>, shared_metadata: Option>, - schema: Option>, + schema: Option>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - let column_schema = parse_column_schema(schema).map_err(|e| GraphError::PythonError(e))?; + let column_schema = schema.map(|s| s.into_iter().collect::>()); if data.hasattr("__arrow_c_stream__")? { load_nodes_from_arrow_c_stream( &self.graph, diff --git a/raphtory/src/python/graph/properties/props.rs b/raphtory/src/python/graph/properties/props.rs index e250a08572..76f71403f4 100644 --- a/raphtory/src/python/graph/properties/props.rs +++ b/raphtory/src/python/graph/properties/props.rs @@ -26,6 +26,7 @@ use pyo3::{ }; use raphtory_api::core::{entities::properties::prop::Prop, storage::arc_str::ArcStr}; use std::{collections::HashMap, ops::Deref, sync::Arc}; +use raphtory_api::core::entities::properties::prop::PropType; #[derive(Clone, Debug)] pub struct PyPropsComp(HashMap); @@ -98,6 +99,18 @@ impl PyProperties { self.props.get(key) } + /// Get the PropType of a property. Specifically, returns the PropType of the latest value for this property if it exists. + /// If not, it returns the PropType for the static property matching this name. + /// + /// Arguments: + /// key (str): the name of the property. + /// + /// Returns: + /// PropType: + pub fn get_dtype_of(&self, key: &str) -> Option { + self.props.get(key).map(|p| p.dtype()) + } + /// Check if property `key` exists. /// /// Returns: diff --git a/raphtory/src/python/packages/base_modules.rs b/raphtory/src/python/packages/base_modules.rs index 4a1d507a97..8e82975b38 100644 --- a/raphtory/src/python/packages/base_modules.rs +++ b/raphtory/src/python/packages/base_modules.rs @@ -39,6 +39,7 @@ use crate::{ }, }; use pyo3::prelude::*; +use raphtory_api::python::prop::PyPropType; pub fn add_raphtory_classes(m: &Bound) -> PyResult<()> { //Graph classes @@ -59,6 +60,7 @@ pub fn add_raphtory_classes(m: &Bound) -> PyResult<()> { PyMutableEdge, PyProperties, PyPropValueList, + PyPropType, PyMetadata, PyTemporalProperties, PropertiesView, From d2bc2473d2b0a770561cc8db466b977d65bcb9bc Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Tue, 9 Dec 2025 03:47:58 -0500 Subject: [PATCH 35/55] Fixed casting of columns, can use PropType variants in python to specify what type to cast columns to. --- raphtory-api/src/python/prop.rs | 8 +- raphtory/src/io/arrow/df_loaders.rs | 48 +++------- raphtory/src/io/arrow/prop_handler.rs | 13 +-- raphtory/src/io/parquet_loaders.rs | 15 +-- raphtory/src/python/graph/graph.rs | 5 +- raphtory/src/python/graph/io/arrow_loaders.rs | 92 ++++++++++++++----- .../src/python/graph/io/pandas_loaders.rs | 4 - raphtory/src/python/graph/properties/props.rs | 6 +- 8 files changed, 99 insertions(+), 92 deletions(-) diff --git a/raphtory-api/src/python/prop.rs b/raphtory-api/src/python/prop.rs index def51349a1..2a0ca5ceed 100644 --- a/raphtory-api/src/python/prop.rs +++ b/raphtory-api/src/python/prop.rs @@ -3,12 +3,12 @@ use bigdecimal::BigDecimal; use pyo3::{ exceptions::PyTypeError, prelude::*, + pybacked::PyBackedStr, sync::GILOnceCell, types::{PyBool, PyType}, Bound, FromPyObject, IntoPyObject, IntoPyObjectExt, Py, PyAny, PyErr, PyResult, Python, }; use std::{ops::Deref, str::FromStr, sync::Arc}; -use pyo3::pybacked::PyBackedStr; #[cfg(feature = "arrow")] mod array_ext { @@ -231,7 +231,9 @@ impl<'source> FromPyObject<'source> for PropType { ))), } } else { - Err(PyTypeError::new_err("PropType must be a string or an instance of itself.")) + Err(PyTypeError::new_err( + "PropType must be a string or an instance of itself.", + )) } } -} \ No newline at end of file +} diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 94f96e59c8..f12e96b9c1 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -69,7 +69,6 @@ pub(crate) fn load_nodes_from_df< node_type: Option<&str>, node_type_col: Option<&str>, graph: &G, - schema: Option<&HashMap>, ) -> Result<(), GraphError> { if matches!(df_view.is_empty(), Some(true)) { return Ok(()); @@ -113,19 +112,14 @@ pub(crate) fn load_nodes_from_df< for chunk in df_view.chunks { let df = chunk?; let start_id = graph.reserve_event_ids(df.len()).map_err(into_graph_err)?; - let prop_cols = combine_properties_arrow( - properties, - &properties_indices, - &df, - schema, - |key, dtype| { + let prop_cols = + combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { graph .resolve_node_property(key, dtype, false) .map_err(into_graph_err) - }, - )?; + })?; let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, schema, |key, dtype| { + combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { graph .resolve_node_property(key, dtype, true) .map_err(into_graph_err) @@ -239,7 +233,6 @@ pub fn load_edges_from_df< layer: Option<&str>, layer_col: Option<&str>, graph: &G, - schema: Option<&HashMap>, ) -> Result<(), GraphError> { if matches!(df_view.is_empty(), Some(true)) { return Ok(()); @@ -287,19 +280,14 @@ pub fn load_edges_from_df< for chunk in df_view.chunks { let df = chunk?; let start_idx = graph.reserve_event_ids(df.len()).map_err(into_graph_err)?; - let prop_cols = combine_properties_arrow( - properties, - &properties_indices, - &df, - schema, - |key, dtype| { + let prop_cols = + combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { graph .resolve_edge_property(key, dtype, false) .map_err(into_graph_err) - }, - )?; + })?; let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, schema, |key, dtype| { + combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { graph .resolve_edge_property(key, dtype, true) .map_err(into_graph_err) @@ -555,7 +543,6 @@ pub(crate) fn load_node_props_from_df< metadata: &[&str], shared_metadata: Option<&HashMap>, graph: &G, - schema: Option<&HashMap>, ) -> Result<(), GraphError> { if matches!(df_view.is_empty(), Some(true)) { return Ok(()); @@ -594,7 +581,7 @@ pub(crate) fn load_node_props_from_df< for chunk in df_view.chunks { let df = chunk?; let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, schema, |key, dtype| { + combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { graph .resolve_node_property(key, dtype, true) .map_err(into_graph_err) @@ -679,7 +666,6 @@ pub(crate) fn load_edges_props_from_df< layer: Option<&str>, layer_col: Option<&str>, graph: &G, - schema: Option<&HashMap>, ) -> Result<(), GraphError> { if matches!(df_view.is_empty(), Some(true)) { return Ok(()); @@ -724,7 +710,7 @@ pub(crate) fn load_edges_props_from_df< for chunk in df_view.chunks { let df = chunk?; let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, schema, |key, dtype| { + combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { graph .resolve_edge_property(key, dtype, true) .map_err(into_graph_err) @@ -844,7 +830,6 @@ pub(crate) fn load_graph_props_from_df< properties: Option<&[&str]>, metadata: Option<&[&str]>, graph: &G, - schema: Option<&HashMap>, ) -> Result<(), GraphError> { if matches!(df_view.is_empty(), Some(true)) { return Ok(()); @@ -869,19 +854,14 @@ pub(crate) fn load_graph_props_from_df< for chunk in df_view.chunks { let df = chunk?; let start_id = graph.reserve_event_ids(df.len()).map_err(into_graph_err)?; - let prop_cols = combine_properties_arrow( - properties, - &properties_indices, - &df, - schema, - |key, dtype| { + let prop_cols = + combine_properties_arrow(properties, &properties_indices, &df, |key, dtype| { graph .resolve_graph_property(key, dtype, false) .map_err(into_graph_err) - }, - )?; + })?; let metadata_cols = - combine_properties_arrow(metadata, &metadata_indices, &df, schema, |key, dtype| { + combine_properties_arrow(metadata, &metadata_indices, &df, |key, dtype| { graph .resolve_graph_property(key, dtype, true) .map_err(into_graph_err) diff --git a/raphtory/src/io/arrow/prop_handler.rs b/raphtory/src/io/arrow/prop_handler.rs index 6dedfa1a0b..7e7c2a6667 100644 --- a/raphtory/src/io/arrow/prop_handler.rs +++ b/raphtory/src/io/arrow/prop_handler.rs @@ -25,7 +25,6 @@ use raphtory_api::core::{ }; use rayon::prelude::*; use rustc_hash::FxHashMap; -use std::collections::HashMap; pub struct PropCols { prop_ids: Vec, @@ -56,7 +55,6 @@ pub fn combine_properties_arrow( props: &[impl AsRef], indices: &[usize], df: &DFChunk, - schema: Option<&HashMap>, prop_id_resolver: impl Fn(&str, PropType) -> Result, E>, ) -> Result where @@ -64,16 +62,7 @@ where { let dtypes = indices .iter() - .enumerate() - .map(|(i, idx)| { - let col_name = props[i].as_ref(); - if let Some(schema_map) = schema { - if let Some(prop_type) = schema_map.get(col_name) { - return Ok(prop_type.clone()); - } - } - data_type_as_prop_type(df.chunk[*idx].data_type()) - }) + .map(|idx| data_type_as_prop_type(df.chunk[*idx].data_type())) .collect::, _>>()?; let cols = indices .iter() diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index fa3293fcda..1ade06f350 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -51,7 +51,6 @@ pub fn load_nodes_from_parquet< node_type, node_type_col, graph, - None, // TODO: Add schema ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } @@ -131,7 +130,6 @@ pub fn load_edges_from_parquet< layer, layer_col, graph, - None, // TODO: Add schema ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; @@ -169,7 +167,6 @@ pub fn load_node_props_from_parquet< metadata_properties, shared_metadata, graph, - None, // TODO: Add schema ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } @@ -209,7 +206,6 @@ pub fn load_edge_props_from_parquet< layer, layer_col, graph, - None, // TODO: Add schema ) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; } @@ -258,15 +254,8 @@ pub fn load_graph_props_from_parquet() { // handles Strings too diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index d28da7e962..56b3406496 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -13,21 +13,23 @@ use crate::{ serialise::incremental::InternalCache, }; use arrow::{ - array::{RecordBatch, RecordBatchReader}, - datatypes::SchemaRef, + array::{Array, RecordBatch, RecordBatchReader}, + compute::cast, + datatypes::{Field, Schema, SchemaRef}, }; use arrow_csv::{reader::Format, ReaderBuilder}; use bzip2::read::BzDecoder; use flate2::read::GzDecoder; use pyo3::{prelude::*, types::PyCapsule}; use pyo3_arrow::PyRecordBatchReader; -use raphtory_api::core::entities::properties::prop::{Prop, PropType}; +use raphtory_api::core::entities::properties::prop::{arrow_dtype_from_prop_type, Prop, PropType}; use std::{ cmp::min, collections::HashMap, fs, fs::File, iter, + ops::Deref, path::{Path, PathBuf}, sync::Arc, }; @@ -47,7 +49,7 @@ pub(crate) fn load_nodes_from_arrow_c_stream< properties: &[&str], metadata: &[&str], shared_metadata: Option<&HashMap>, - schema: Option<&HashMap>, + schema: Option>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id, time]; cols_to_check.extend_from_slice(properties); @@ -55,7 +57,7 @@ pub(crate) fn load_nodes_from_arrow_c_stream< if let Some(ref node_type_col) = node_type_col { cols_to_check.push(node_type_col.as_ref()); } - let df_view = process_arrow_c_stream_df(data, cols_to_check.clone())?; + let df_view = process_arrow_c_stream_df(data, cols_to_check.clone(), schema)?; df_view.check_cols_exist(&cols_to_check)?; load_nodes_from_df( df_view, @@ -67,7 +69,6 @@ pub(crate) fn load_nodes_from_arrow_c_stream< node_type, node_type_col, graph, - schema, ) } @@ -85,7 +86,7 @@ pub(crate) fn load_edges_from_arrow_c_stream< shared_metadata: Option<&HashMap>, layer: Option<&str>, layer_col: Option<&str>, - schema: Option<&HashMap>, + schema: Option>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst, time]; cols_to_check.extend_from_slice(properties); @@ -93,7 +94,7 @@ pub(crate) fn load_edges_from_arrow_c_stream< if let Some(layer_col) = layer_col { cols_to_check.push(layer_col.as_ref()); } - let df_view = process_arrow_c_stream_df(data, cols_to_check.clone())?; + let df_view = process_arrow_c_stream_df(data, cols_to_check.clone(), schema)?; df_view.check_cols_exist(&cols_to_check)?; load_edges_from_df( df_view, @@ -106,7 +107,6 @@ pub(crate) fn load_edges_from_arrow_c_stream< layer, layer_col, graph, - schema, ) } @@ -121,14 +121,14 @@ pub(crate) fn load_node_metadata_from_arrow_c_stream< node_type_col: Option<&str>, metadata: &[&str], shared_metadata: Option<&HashMap>, - schema: Option<&HashMap>, + schema: Option>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id]; cols_to_check.extend_from_slice(metadata); if let Some(ref node_type_col) = node_type_col { cols_to_check.push(node_type_col.as_ref()); } - let df_view = process_arrow_c_stream_df(data, cols_to_check.clone())?; + let df_view = process_arrow_c_stream_df(data, cols_to_check.clone(), schema)?; df_view.check_cols_exist(&cols_to_check)?; load_node_props_from_df( df_view, @@ -138,7 +138,6 @@ pub(crate) fn load_node_metadata_from_arrow_c_stream< metadata, shared_metadata, graph, - schema, ) } @@ -154,14 +153,14 @@ pub(crate) fn load_edge_metadata_from_arrow_c_stream< shared_metadata: Option<&HashMap>, layer: Option<&str>, layer_col: Option<&str>, - schema: Option<&HashMap>, + schema: Option>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst]; if let Some(ref layer_col) = layer_col { cols_to_check.push(layer_col.as_ref()); } cols_to_check.extend_from_slice(metadata); - let df_view = process_arrow_c_stream_df(data, cols_to_check.clone())?; + let df_view = process_arrow_c_stream_df(data, cols_to_check.clone(), schema)?; df_view.check_cols_exist(&cols_to_check)?; load_edges_props_from_df( df_view, @@ -172,7 +171,6 @@ pub(crate) fn load_edge_metadata_from_arrow_c_stream< layer, layer_col, graph, - schema, ) } @@ -193,7 +191,7 @@ pub(crate) fn load_edge_deletions_from_arrow_c_stream< cols_to_check.push(layer_col.as_ref()); } - let df_view = process_arrow_c_stream_df(data, cols_to_check.clone())?; + let df_view = process_arrow_c_stream_df(data, cols_to_check.clone(), None)?; df_view.check_cols_exist(&cols_to_check)?; load_edge_deletions_from_df( df_view, @@ -210,6 +208,7 @@ pub(crate) fn load_edge_deletions_from_arrow_c_stream< pub(crate) fn process_arrow_c_stream_df<'a>( data: &Bound<'a, PyAny>, col_names: Vec<&str>, + schema: Option>, ) -> PyResult> + 'a>> { let py = data.py(); is_jupyter(py); @@ -245,11 +244,10 @@ pub(crate) fn process_arrow_c_stream_df<'a>( })?; // Get column names and indices once only - let schema: SchemaRef = reader.schema(); let mut names: Vec = Vec::with_capacity(col_names.len()); let mut indices: Vec = Vec::with_capacity(col_names.len()); - for (idx, field) in schema.fields().iter().enumerate() { + for (idx, field) in reader.schema().fields().iter().enumerate() { if col_names.contains(&field.name().as_str()) { names.push(field.name().clone()); indices.push(idx); @@ -265,7 +263,7 @@ pub(crate) fn process_arrow_c_stream_df<'a>( let chunks = reader .into_iter() .flat_map(move |batch_res: Result| { - let batch = match batch_res.map_err(|e| { + let batch: RecordBatch = match batch_res.map_err(|e| { GraphError::LoadFailure(format!( "Arrow stream error while reading a batch: {}", e.to_string() @@ -274,13 +272,66 @@ pub(crate) fn process_arrow_c_stream_df<'a>( Ok(batch) => batch, Err(e) => return vec![Err(e)], }; + let casted_batch = if let Some(schema) = &schema { + match cast_columns(&batch, schema) { + Ok(casted_batch) => casted_batch, + Err(e) => return vec![Err(e)], + } + } else { + batch + }; - split_into_chunks(&batch, &indices) + split_into_chunks(&casted_batch, &indices) }); Ok(DFView::new(names, chunks, len_from_python)) } +fn cast_columns( + batch: &RecordBatch, + schema: &HashMap, +) -> Result { + let old_schema_ref = batch.schema(); + let old_fields = old_schema_ref.fields(); + + let mut new_columns = Vec::with_capacity(batch.num_columns()); + let mut new_fields: Vec = Vec::with_capacity(batch.num_columns()); + + for (i, field) in old_fields.iter().enumerate() { + let col = batch.column(i); + + if let Some(target_prop_type) = schema.get(field.name()) { + let target_dtype = arrow_dtype_from_prop_type(target_prop_type); + + if col.data_type() != &target_dtype { + let casted = cast(col.as_ref(), &target_dtype).map_err(|e| { + GraphError::LoadFailure(format!( + "Failed to cast column '{}' from {:?} to {:?}: {e}", + field.name(), + col.data_type(), + target_dtype + )) + })?; + new_columns.push(casted); + let new_field = Field::new(field.name(), target_dtype, field.is_nullable()) + .with_metadata(field.metadata().clone()); + new_fields.push(new_field); + } else { + // type was already correct + new_columns.push(col.clone()); + new_fields.push(field.deref().clone()); + } + } else { + // schema doesn't say anything about this column + new_columns.push(col.clone()); + new_fields.push(field.deref().clone()); + } + } + let new_schema = Arc::new(Schema::new(new_fields)); + RecordBatch::try_new(new_schema, new_columns) + .map_err(|e| GraphError::LoadFailure(format!("Failed while casting columns: {e}"))) +} + /// Splits a RecordBatch into chunks of CHUNK_SIZE owned by DFChunk objects fn split_into_chunks(batch: &RecordBatch, indices: &[usize]) -> Vec> { // many times, all the data will be passed as a single RecordBatch, meaning the progress bar @@ -378,7 +429,6 @@ pub(crate) fn load_nodes_from_csv_path< node_type, node_type_col, graph, - None, ) } diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs index fdab6b3c20..1bac0e00db 100644 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ b/raphtory/src/python/graph/io/pandas_loaders.rs @@ -54,7 +54,6 @@ pub(crate) fn load_nodes_from_pandas< node_type, node_type_col, graph, - None, // TODO: Add schema ) } @@ -93,7 +92,6 @@ pub(crate) fn load_edges_from_pandas< layer, layer_col, graph, - None, // TODO: Add schema ) } @@ -124,7 +122,6 @@ pub(crate) fn load_node_props_from_pandas< metadata, shared_metadata, graph, - None, // TODO: Add schema ) } @@ -157,7 +154,6 @@ pub(crate) fn load_edge_props_from_pandas< layer, layer_col, graph, - None, // TODO: Add schema ) } diff --git a/raphtory/src/python/graph/properties/props.rs b/raphtory/src/python/graph/properties/props.rs index 76f71403f4..3772f966d1 100644 --- a/raphtory/src/python/graph/properties/props.rs +++ b/raphtory/src/python/graph/properties/props.rs @@ -24,9 +24,11 @@ use pyo3::{ exceptions::{PyKeyError, PyTypeError}, prelude::*, }; -use raphtory_api::core::{entities::properties::prop::Prop, storage::arc_str::ArcStr}; +use raphtory_api::core::{ + entities::properties::prop::{Prop, PropType}, + storage::arc_str::ArcStr, +}; use std::{collections::HashMap, ops::Deref, sync::Arc}; -use raphtory_api::core::entities::properties::prop::PropType; #[derive(Clone, Debug)] pub struct PyPropsComp(HashMap); From deb8cd4c5c03a3bbf4a866967d592fa7a2df3a4a Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Tue, 9 Dec 2025 04:23:00 -0500 Subject: [PATCH 36/55] Added casting using pyarrow types as input in the schema --- python/python/raphtory/__init__.pyi | 4 +-- python/tests/test_load_from_df.py | 21 ++++++++++-- raphtory-api/src/python/prop.rs | 50 +++++++++++++++++++++++++++++ raphtory/src/python/graph/graph.rs | 10 ++---- 4 files changed, 73 insertions(+), 12 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 4002011035..1772e9d4ff 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1290,7 +1290,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, schema: Optional[list[tuple[str, PropType | str]]] = None) -> None: + def load_nodes(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, schema: Any = None) -> None: """ Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. @@ -1306,7 +1306,7 @@ class Graph(GraphView): properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - schema (list[tuple[str, PropType | str]], optional): A list of (column_name, column_type) tuples to cast column types to. Defaults to None. + schema (list[tuple[str, DataType | PropType | str]], optional): A list of (column_name, column_type) tuples to cast column types to. Defaults to None. Returns: None: This function does not return a value if the operation is successful. diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index 757269277e..cc09508c8e 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -3,6 +3,7 @@ import polars as pl import pandas as pd +import pyarrow as pa from raphtory import Graph, PersistentGraph, PropType import pytest try: @@ -125,6 +126,18 @@ def test_schema_casting(): "val_i32": pd.Series([1, 2, 3], dtype="int32"), } ) + g = Graph() + g.load_nodes( + data=df, + time="time", + id="id", + properties=["val_i32"], + # No casting + ) + n_prop = g.node(10).properties + print(f"\ndtype of Property 'val_i32' without cast: {n_prop.get_dtype_of("val_i32")}") + del g + g = Graph() g.load_nodes( data=df, @@ -135,18 +148,20 @@ def test_schema_casting(): schema=[("val_i32", PropType.i64)], ) n_prop = g.node(10).properties - print(f"\ndtype of Property 'val_i32' with cast: {n_prop.get_dtype_of("val_i32")}") + print(f"dtype of Property 'val_i32' with PropType cast: {n_prop.get_dtype_of("val_i32")}") del g + g = Graph() g.load_nodes( data=df, time="time", id="id", properties=["val_i32"], - # No casting + # Request that this column be treated as I64 + schema=[("val_i32", pa.int64())], ) n_prop = g.node(10).properties - print(f"dtype of Property 'val_i32' without cast: {n_prop.get_dtype_of("val_i32")}") + print(f"dtype of Property 'val_i32' with pyarrow cast: {n_prop.get_dtype_of("val_i32")}") if fpd: diff --git a/raphtory-api/src/python/prop.rs b/raphtory-api/src/python/prop.rs index 2a0ca5ceed..fc9b5fc710 100644 --- a/raphtory-api/src/python/prop.rs +++ b/raphtory-api/src/python/prop.rs @@ -1,4 +1,5 @@ use crate::core::entities::properties::prop::{Prop, PropType}; +use arrow_schema::DataType; use bigdecimal::BigDecimal; use pyo3::{ exceptions::PyTypeError, @@ -8,6 +9,7 @@ use pyo3::{ types::{PyBool, PyType}, Bound, FromPyObject, IntoPyObject, IntoPyObjectExt, Py, PyAny, PyErr, PyResult, Python, }; +use pyo3_arrow::PyDataType; use std::{ops::Deref, str::FromStr, sync::Arc}; #[cfg(feature = "arrow")] @@ -230,6 +232,8 @@ impl<'source> FromPyObject<'source> for PropType { "Unknown type name '{other:?}'" ))), } + } else if let Ok(py_datatype) = ob.extract::() { + data_type_as_prop_type(&py_datatype.into_inner()) } else { Err(PyTypeError::new_err( "PropType must be a string or an instance of itself.", @@ -237,3 +241,49 @@ impl<'source> FromPyObject<'source> for PropType { } } } + +// TODO: Get rid of this and use the one in prop_handler.rs instead +fn data_type_as_prop_type(dt: &DataType) -> Result { + match dt { + DataType::Boolean => Ok(PropType::Bool), + DataType::Int32 => Ok(PropType::I32), + DataType::Int64 => Ok(PropType::I64), + DataType::UInt8 => Ok(PropType::U8), + DataType::UInt16 => Ok(PropType::U16), + DataType::UInt32 => Ok(PropType::U32), + DataType::UInt64 => Ok(PropType::U64), + DataType::Float32 => Ok(PropType::F32), + DataType::Float64 => Ok(PropType::F64), + DataType::Utf8 => Ok(PropType::Str), + DataType::LargeUtf8 => Ok(PropType::Str), + DataType::Utf8View => Ok(PropType::Str), + DataType::Struct(fields) => Ok(PropType::map(fields.iter().filter_map(|f| { + data_type_as_prop_type(f.data_type()) + .ok() + .map(move |pt| (f.name(), pt)) + }))), + DataType::List(v) => Ok(PropType::List(Box::new(data_type_as_prop_type( + v.data_type(), + )?))), + DataType::FixedSizeList(v, _) => Ok(PropType::List(Box::new(data_type_as_prop_type( + v.data_type(), + )?))), + DataType::LargeList(v) => Ok(PropType::List(Box::new(data_type_as_prop_type( + v.data_type(), + )?))), + DataType::Timestamp(_, v) => match v { + None => Ok(PropType::NDTime), + Some(_) => Ok(PropType::DTime), + }, + DataType::Date32 => Ok(PropType::NDTime), + DataType::Date64 => Ok(PropType::NDTime), + DataType::Decimal128(precision, scale) if *precision <= 38 => Ok(PropType::Decimal { + scale: *scale as i64, + }), + DataType::Null => Ok(PropType::Empty), + _ => Err(PyTypeError::new_err(format!( + "Unsupported Arrow DataType {:?}", + dt + ))), + } +} diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index ca0a4cad89..6bc33abd0c 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -36,12 +36,7 @@ use crate::{ InternalStableDecode, StableEncode, }, }; -use pyo3::{ - exceptions::{PyValueError}, - prelude::*, - pybacked::PyBackedStr, - types::PyDict, -}; +use pyo3::{exceptions::PyValueError, prelude::*, pybacked::PyBackedStr, types::PyDict}; use raphtory_api::core::{ entities::{properties::prop::PropType, GID}, storage::arc_str::ArcStr, @@ -642,6 +637,7 @@ impl PyGraph { PyGraph::py_from_db_graph(self.graph.event_graph()) } + // TODO: Fix DataType in schema argument below /// Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), /// a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. /// The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, @@ -656,7 +652,7 @@ impl PyGraph { /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - /// schema (list[tuple[str, PropType | str]], optional): A list of (column_name, column_type) tuples to cast column types to. Defaults to None. + /// schema (list[tuple[str, DataType | PropType | str]], optional): A list of (column_name, column_type) tuples to cast column types to. Defaults to None. /// /// Returns: /// None: This function does not return a value if the operation is successful. From 694427393fe226010b6b6b46a3df81a2d89e8958 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Wed, 10 Dec 2025 04:36:41 -0500 Subject: [PATCH 37/55] Added casting of nested datatypes in the data source. Added test for nested type using pyarrow Table. Cast whole RecordBatch at once now using StructArray. --- python/python/raphtory/__init__.pyi | 60 +++++++++++++++++ python/tests/test_load_from_df.py | 52 +++++++++++++-- raphtory-api/src/python/prop.rs | 41 ++++++++---- raphtory/src/python/graph/io/arrow_loaders.rs | 65 +++++++++---------- 4 files changed, 168 insertions(+), 50 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 1772e9d4ff..63b72b6c15 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -6371,6 +6371,66 @@ class PropType(object): def __repr__(self): """Return repr(self).""" + @staticmethod + def array(p): + ... + + @staticmethod + def bool(): + ... + + @staticmethod + def datetime(): + ... + + @staticmethod + def f32(): + ... + + @staticmethod + def f64(): + ... + + @staticmethod + def i32(): + ... + + @staticmethod + def i64(): + ... + + @staticmethod + def list(p): + ... + + @staticmethod + def map(hash_map): + ... + + @staticmethod + def naive_datetime(): + ... + + @staticmethod + def str(): + ... + + @staticmethod + def u16(): + ... + + @staticmethod + def u32(): + ... + + @staticmethod + def u64(): + ... + + @staticmethod + def u8(): + ... + class Metadata(object): """A view of metadata of an entity""" diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index cc09508c8e..f60c9b3cf3 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -127,43 +127,87 @@ def test_schema_casting(): } ) g = Graph() + # No casting g.load_nodes( data=df, time="time", id="id", properties=["val_i32"], - # No casting ) n_prop = g.node(10).properties print(f"\ndtype of Property 'val_i32' without cast: {n_prop.get_dtype_of("val_i32")}") del g + # Cast the val_i32 column to I64 using PropType.i64() g = Graph() g.load_nodes( data=df, time="time", id="id", properties=["val_i32"], - # Request that this column be treated as I64 - schema=[("val_i32", PropType.i64)], + schema=[("val_i32", PropType.i64())], ) n_prop = g.node(10).properties print(f"dtype of Property 'val_i32' with PropType cast: {n_prop.get_dtype_of("val_i32")}") del g + # Cast the val_i32 column to I64 using PyArrow int64 DataType g = Graph() g.load_nodes( data=df, time="time", id="id", properties=["val_i32"], - # Request that this column be treated as I64 schema=[("val_i32", pa.int64())], ) n_prop = g.node(10).properties print(f"dtype of Property 'val_i32' with pyarrow cast: {n_prop.get_dtype_of("val_i32")}") +def test_nested_schema_casting(): + table = pa.Table.from_pydict( + { + "time": pa.array([1, 2, 3], type=pa.int64()), + "id": pa.array([10, 20, 30], type=pa.int64()), + "val_list_i32": pa.array( + [[1, 2], [3, 4], [5, 6]], + type=pa.list_(pa.int32()), + ), + } + ) + + # No casting + g = Graph() + g.load_nodes(data=table, time="time", id="id", properties=["val_list_i32"]) + n_prop = g.node(10).properties + print(f"\ndtype of property 'val_list_i32' without cast: {n_prop.get_dtype_of("val_list_i32")}") + del g + + # Cast the val_list_i32 column to I64 using PropType.list(PropType.i64()) + g = Graph() + g.load_nodes( + data=table, + time="time", + id="id", + properties=["val_list_i32"], + schema=[("val_list_i32", PropType.list(PropType.i64()))], + ) + n_prop = g.node(10).properties + print(f"dtype of Property 'val_list_i32' with PropType cast: {n_prop.get_dtype_of("val_list_i32")}") + del g + + # Cast the val_list_i32 column to I64 using PyArrow list DataType + g = Graph() + g.load_nodes( + data=table, + time="time", + id="id", + properties=["val_list_i32"], + schema=[("val_list_i32", pa.list_(pa.int64()))], + ) + n_prop = g.node(10).properties + print(f"dtype of Property 'val_list_i32' with pyarrow cast: {n_prop.get_dtype_of("val_list_i32")}") + if fpd: import pandas diff --git a/raphtory-api/src/python/prop.rs b/raphtory-api/src/python/prop.rs index fc9b5fc710..f59235e2cb 100644 --- a/raphtory-api/src/python/prop.rs +++ b/raphtory-api/src/python/prop.rs @@ -10,7 +10,7 @@ use pyo3::{ Bound, FromPyObject, IntoPyObject, IntoPyObjectExt, Py, PyAny, PyErr, PyResult, Python, }; use pyo3_arrow::PyDataType; -use std::{ops::Deref, str::FromStr, sync::Arc}; +use std::{collections::HashMap, ops::Deref, str::FromStr, sync::Arc}; #[cfg(feature = "arrow")] mod array_ext { @@ -135,66 +135,81 @@ pub struct PyPropType(pub PropType); #[pymethods] impl PyPropType { - #[classattr] + #[staticmethod] pub fn u8() -> PropType { PropType::U8 } - #[classattr] + #[staticmethod] pub fn u16() -> PropType { PropType::U16 } - #[classattr] + #[staticmethod] pub fn u32() -> PropType { PropType::U32 } - #[classattr] + #[staticmethod] pub fn u64() -> PropType { PropType::U64 } - #[classattr] + #[staticmethod] pub fn i32() -> PropType { PropType::I32 } - #[classattr] + #[staticmethod] pub fn i64() -> PropType { PropType::I64 } - #[classattr] + #[staticmethod] pub fn f32() -> PropType { PropType::F32 } - #[classattr] + #[staticmethod] pub fn f64() -> PropType { PropType::F64 } - #[classattr] + #[staticmethod] pub fn str() -> PropType { PropType::Str } - #[classattr] + #[staticmethod] pub fn bool() -> PropType { PropType::Bool } - #[classattr] + #[staticmethod] pub fn naive_datetime() -> PropType { PropType::NDTime } - #[classattr] + #[staticmethod] pub fn datetime() -> PropType { PropType::DTime } + #[staticmethod] + pub fn list(p: PropType) -> PropType { + PropType::List(Box::new(p)) + } + + #[staticmethod] + pub fn map(hash_map: HashMap) -> PropType { + PropType::Map(Arc::new(hash_map)) + } + + #[staticmethod] + pub fn array(p: PropType) -> PropType { + PropType::Array(Box::new(p)) + } + fn __repr__(&self) -> String { format!("PropType.{}", self.0) } diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 56b3406496..5597bfbbed 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -13,9 +13,9 @@ use crate::{ serialise::incremental::InternalCache, }; use arrow::{ - array::{Array, RecordBatch, RecordBatchReader}, + array::{Array, RecordBatch, RecordBatchReader, StructArray}, compute::cast, - datatypes::{Field, Schema, SchemaRef}, + datatypes::{DataType, Field, Fields, Schema, SchemaRef}, }; use arrow_csv::{reader::Format, ReaderBuilder}; use bzip2::read::BzDecoder; @@ -273,7 +273,7 @@ pub(crate) fn process_arrow_c_stream_df<'a>( Err(e) => return vec![Err(e)], }; let casted_batch = if let Some(schema) = &schema { - match cast_columns(&batch, schema) { + match cast_columns(batch, schema) { Ok(casted_batch) => casted_batch, Err(e) => return vec![Err(e)], } @@ -288,48 +288,47 @@ pub(crate) fn process_arrow_c_stream_df<'a>( } fn cast_columns( - batch: &RecordBatch, + batch: RecordBatch, schema: &HashMap, ) -> Result { let old_schema_ref = batch.schema(); let old_fields = old_schema_ref.fields(); - let mut new_columns = Vec::with_capacity(batch.num_columns()); - let mut new_fields: Vec = Vec::with_capacity(batch.num_columns()); - - for (i, field) in old_fields.iter().enumerate() { - let col = batch.column(i); + let mut target_fields: Vec = Vec::with_capacity(old_fields.len()); + for field in old_fields.iter() { if let Some(target_prop_type) = schema.get(field.name()) { let target_dtype = arrow_dtype_from_prop_type(target_prop_type); - - if col.data_type() != &target_dtype { - let casted = cast(col.as_ref(), &target_dtype).map_err(|e| { - GraphError::LoadFailure(format!( - "Failed to cast column '{}' from {:?} to {:?}: {e}", - field.name(), - col.data_type(), - target_dtype - )) - })?; - new_columns.push(casted); - let new_field = Field::new(field.name(), target_dtype, field.is_nullable()) - .with_metadata(field.metadata().clone()); - new_fields.push(new_field); - } else { - // type was already correct - new_columns.push(col.clone()); - new_fields.push(field.deref().clone()); - } + target_fields.push( + Field::new(field.name(), target_dtype, field.is_nullable()) + .with_metadata(field.metadata().clone()), + ); } else { // schema doesn't say anything about this column - new_columns.push(col.clone()); - new_fields.push(field.deref().clone()); + target_fields.push(field.as_ref().clone()); } } - let new_schema = Arc::new(Schema::new(new_fields)); - RecordBatch::try_new(new_schema, new_columns) - .map_err(|e| GraphError::LoadFailure(format!("Failed while casting columns: {e}"))) + let struct_array = StructArray::from(batch); + let target_struct_type = DataType::Struct(Fields::from(target_fields)); + + // cast whole RecordBatch at once + let casted = cast(&struct_array, &target_struct_type).map_err(|e| { + GraphError::LoadFailure(format!( + "Failed to cast RecordBatch to target schema {:?}: {e}", + target_struct_type + )) + })?; + + let casted_struct = casted + .as_any() + .downcast_ref::() + .ok_or_else(|| { + GraphError::LoadFailure( + "Internal error: casting RecordBatch did not return StructArray".to_string(), + ) + })?; + + Ok(RecordBatch::from(casted_struct)) } /// Splits a RecordBatch into chunks of CHUNK_SIZE owned by DFChunk objects From 817d2d0f80132aedbdece8a35a280dcdbe2b8226 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Wed, 10 Dec 2025 21:49:39 -0500 Subject: [PATCH 38/55] Added dep:arrow-schema to "python" feature in raphtory-api so that DataTypes can be extracted from Python without feature gating behind arrow (larger dependency). Refactored data_type_as_prop_type to be in raphtory-api as long as any of "arrow", "storage", or "python" features is enabled, since they all have dep:arrow-schema. --- raphtory-api/Cargo.toml | 2 +- .../entities/properties/prop/prop_type.rs | 50 +++++++++++++++++++ raphtory-api/src/python/prop.rs | 50 +------------------ raphtory/src/errors.rs | 19 ++++++- raphtory/src/io/arrow/prop_handler.rs | 48 ++---------------- 5 files changed, 74 insertions(+), 95 deletions(-) diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index e1c4b738c7..9c3fafaf45 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -49,7 +49,7 @@ proptest.workspace = true default = [] # Enables generating the pyo3 python bindings python = [ - "dep:pyo3", "dep:pyo3-arrow", "dep:display-error-chain" + "dep:pyo3", "dep:pyo3-arrow", "dep:display-error-chain", "dep:arrow-schema" ] storage = [ diff --git a/raphtory-api/src/core/entities/properties/prop/prop_type.rs b/raphtory-api/src/core/entities/properties/prop/prop_type.rs index 8a72245bf7..9e029f9db2 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_type.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_type.rs @@ -1,3 +1,5 @@ +#[cfg(any(feature = "arrow", feature = "storage", feature = "python"))] +use arrow_schema::DataType; use serde::{Deserialize, Serialize}; use std::{ collections::HashMap, @@ -135,6 +137,54 @@ impl PropType { } } +#[cfg(any(feature = "arrow", feature = "storage", feature = "python"))] +pub fn data_type_as_prop_type(dt: &DataType) -> Result { + match dt { + DataType::Boolean => Ok(PropType::Bool), + DataType::Int32 => Ok(PropType::I32), + DataType::Int64 => Ok(PropType::I64), + DataType::UInt8 => Ok(PropType::U8), + DataType::UInt16 => Ok(PropType::U16), + DataType::UInt32 => Ok(PropType::U32), + DataType::UInt64 => Ok(PropType::U64), + DataType::Float32 => Ok(PropType::F32), + DataType::Float64 => Ok(PropType::F64), + DataType::Utf8 => Ok(PropType::Str), + DataType::LargeUtf8 => Ok(PropType::Str), + DataType::Utf8View => Ok(PropType::Str), + DataType::Struct(fields) => Ok(PropType::map(fields.iter().filter_map(|f| { + data_type_as_prop_type(f.data_type()) + .ok() + .map(move |pt| (f.name(), pt)) + }))), + DataType::List(v) => Ok(PropType::List(Box::new(data_type_as_prop_type( + v.data_type(), + )?))), + DataType::FixedSizeList(v, _) => Ok(PropType::List(Box::new(data_type_as_prop_type( + v.data_type(), + )?))), + DataType::LargeList(v) => Ok(PropType::List(Box::new(data_type_as_prop_type( + v.data_type(), + )?))), + DataType::Timestamp(_, v) => match v { + None => Ok(PropType::NDTime), + Some(_) => Ok(PropType::DTime), + }, + DataType::Date32 => Ok(PropType::NDTime), + DataType::Date64 => Ok(PropType::NDTime), + DataType::Decimal128(precision, scale) if *precision <= 38 => Ok(PropType::Decimal { + scale: *scale as i64, + }), + DataType::Null => Ok(PropType::Empty), + _ => Err(InvalidPropertyTypeErr(dt.clone())), + } +} + +#[cfg(any(feature = "arrow", feature = "storage", feature = "python"))] +#[derive(thiserror::Error, Debug)] +#[error("{0:?} not supported as property type")] +pub struct InvalidPropertyTypeErr(pub DataType); + #[cfg(any(feature = "arrow", feature = "storage"))] mod arrow { use crate::core::entities::properties::prop::PropType; diff --git a/raphtory-api/src/python/prop.rs b/raphtory-api/src/python/prop.rs index f59235e2cb..e9edf745ad 100644 --- a/raphtory-api/src/python/prop.rs +++ b/raphtory-api/src/python/prop.rs @@ -1,5 +1,4 @@ -use crate::core::entities::properties::prop::{Prop, PropType}; -use arrow_schema::DataType; +use crate::core::entities::properties::prop::{data_type_as_prop_type, Prop, PropType}; use bigdecimal::BigDecimal; use pyo3::{ exceptions::PyTypeError, @@ -249,6 +248,7 @@ impl<'source> FromPyObject<'source> for PropType { } } else if let Ok(py_datatype) = ob.extract::() { data_type_as_prop_type(&py_datatype.into_inner()) + .map_err(|e| PyTypeError::new_err(format!("Unsupported Arrow DataType {:?}", e.0))) } else { Err(PyTypeError::new_err( "PropType must be a string or an instance of itself.", @@ -256,49 +256,3 @@ impl<'source> FromPyObject<'source> for PropType { } } } - -// TODO: Get rid of this and use the one in prop_handler.rs instead -fn data_type_as_prop_type(dt: &DataType) -> Result { - match dt { - DataType::Boolean => Ok(PropType::Bool), - DataType::Int32 => Ok(PropType::I32), - DataType::Int64 => Ok(PropType::I64), - DataType::UInt8 => Ok(PropType::U8), - DataType::UInt16 => Ok(PropType::U16), - DataType::UInt32 => Ok(PropType::U32), - DataType::UInt64 => Ok(PropType::U64), - DataType::Float32 => Ok(PropType::F32), - DataType::Float64 => Ok(PropType::F64), - DataType::Utf8 => Ok(PropType::Str), - DataType::LargeUtf8 => Ok(PropType::Str), - DataType::Utf8View => Ok(PropType::Str), - DataType::Struct(fields) => Ok(PropType::map(fields.iter().filter_map(|f| { - data_type_as_prop_type(f.data_type()) - .ok() - .map(move |pt| (f.name(), pt)) - }))), - DataType::List(v) => Ok(PropType::List(Box::new(data_type_as_prop_type( - v.data_type(), - )?))), - DataType::FixedSizeList(v, _) => Ok(PropType::List(Box::new(data_type_as_prop_type( - v.data_type(), - )?))), - DataType::LargeList(v) => Ok(PropType::List(Box::new(data_type_as_prop_type( - v.data_type(), - )?))), - DataType::Timestamp(_, v) => match v { - None => Ok(PropType::NDTime), - Some(_) => Ok(PropType::DTime), - }, - DataType::Date32 => Ok(PropType::NDTime), - DataType::Date64 => Ok(PropType::NDTime), - DataType::Decimal128(precision, scale) if *precision <= 38 => Ok(PropType::Decimal { - scale: *scale as i64, - }), - DataType::Null => Ok(PropType::Empty), - _ => Err(PyTypeError::new_err(format!( - "Unsupported Arrow DataType {:?}", - dt - ))), - } -} diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index 7855d354fa..dbb414dea1 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -29,7 +29,10 @@ use pometry_storage::RAError; use { arrow::{datatypes::DataType, error::ArrowError}, parquet::errors::ParquetError, - raphtory_api::core::entities::{properties::prop::DeserialisationError, GidType, VID}, + raphtory_api::core::entities::{ + properties::prop::{DeserialisationError, InvalidPropertyTypeErr}, + GidType, VID, + }, }; #[cfg(feature = "python")] @@ -483,3 +486,17 @@ impl From for io::Error { io::Error::other(error) } } + +#[cfg(feature = "arrow")] +impl From for LoadError { + fn from(value: InvalidPropertyTypeErr) -> Self { + LoadError::InvalidPropertyType(value.0) + } +} + +#[cfg(feature = "arrow")] +impl From for GraphError { + fn from(value: InvalidPropertyTypeErr) -> Self { + GraphError::from(LoadError::from(value)) + } +} diff --git a/raphtory/src/io/arrow/prop_handler.rs b/raphtory/src/io/arrow/prop_handler.rs index 7e7c2a6667..c67b1ba752 100644 --- a/raphtory/src/io/arrow/prop_handler.rs +++ b/raphtory/src/io/arrow/prop_handler.rs @@ -20,7 +20,7 @@ use arrow::{ use bigdecimal::BigDecimal; use chrono::{DateTime, Utc}; use raphtory_api::core::{ - entities::properties::prop::{IntoPropList, PropType}, + entities::properties::prop::{data_type_as_prop_type, IntoPropList, PropType}, storage::{arc_str::ArcStr, dict_mapper::MaybeNew}, }; use rayon::prelude::*; @@ -62,8 +62,8 @@ where { let dtypes = indices .iter() - .map(|idx| data_type_as_prop_type(df.chunk[*idx].data_type())) - .collect::, _>>()?; + .map(|idx| data_type_as_prop_type(df.chunk[*idx].data_type()).map_err(Into::into)) + .collect::, GraphError>>()?; let cols = indices .iter() .map(|idx| lift_property_col(&df.chunk[*idx])) @@ -241,48 +241,6 @@ fn arr_as_prop(arr: ArrayRef) -> Prop { } } -fn data_type_as_prop_type(dt: &DataType) -> Result { - match dt { - DataType::Boolean => Ok(PropType::Bool), - DataType::Int32 => Ok(PropType::I32), - DataType::Int64 => Ok(PropType::I64), - DataType::UInt8 => Ok(PropType::U8), - DataType::UInt16 => Ok(PropType::U16), - DataType::UInt32 => Ok(PropType::U32), - DataType::UInt64 => Ok(PropType::U64), - DataType::Float32 => Ok(PropType::F32), - DataType::Float64 => Ok(PropType::F64), - DataType::Utf8 => Ok(PropType::Str), - DataType::LargeUtf8 => Ok(PropType::Str), - DataType::Utf8View => Ok(PropType::Str), - DataType::Struct(fields) => Ok(PropType::map(fields.iter().filter_map(|f| { - data_type_as_prop_type(f.data_type()) - .ok() - .map(move |pt| (f.name(), pt)) - }))), - DataType::List(v) => Ok(PropType::List(Box::new(data_type_as_prop_type( - v.data_type(), - )?))), - DataType::FixedSizeList(v, _) => Ok(PropType::List(Box::new(data_type_as_prop_type( - v.data_type(), - )?))), - DataType::LargeList(v) => Ok(PropType::List(Box::new(data_type_as_prop_type( - v.data_type(), - )?))), - DataType::Timestamp(_, v) => match v { - None => Ok(PropType::NDTime), - Some(_) => Ok(PropType::DTime), - }, - DataType::Date32 => Ok(PropType::NDTime), - DataType::Date64 => Ok(PropType::NDTime), - DataType::Decimal128(precision, scale) if *precision <= 38 => Ok(PropType::Decimal { - scale: *scale as i64, - }), - DataType::Null => Ok(PropType::Empty), - _ => Err(LoadError::InvalidPropertyType(dt.clone()).into()), - } -} - trait PropCol: Send + Sync { fn get(&self, i: usize) -> Option; } From 62886a3125980c55bf91cea9af7da22fbd65024d Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Thu, 11 Dec 2025 05:07:59 -0500 Subject: [PATCH 39/55] Added support for dicts as input for the schema. Added equality comparison for PropType. Fixed previous tests and added tests for dict schema input, pyarrow types, nested (StructArray) properties, nested schemas, mixed and matched PropType and pyarrow types, both in property and in schema,... --- python/python/raphtory/__init__.pyi | 18 +++ python/tests/test_load_from_df.py | 195 +++++++++++++++++++++++++--- raphtory-api/src/python/prop.rs | 4 + raphtory/src/python/graph/graph.rs | 12 +- 4 files changed, 210 insertions(+), 19 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 63b72b6c15..ab0842f80f 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -6368,6 +6368,24 @@ class PyPropValueList(object): class PropType(object): + def __eq__(self, value): + """Return self==value.""" + + def __ge__(self, value): + """Return self>=value.""" + + def __gt__(self, value): + """Return self>value.""" + + def __le__(self, value): + """Return self<=value.""" + + def __lt__(self, value): + """Return self DataType g = Graph() @@ -205,8 +205,169 @@ def test_nested_schema_casting(): properties=["val_list_i32"], schema=[("val_list_i32", pa.list_(pa.int64()))], ) - n_prop = g.node(10).properties - print(f"dtype of Property 'val_list_i32' with pyarrow cast: {n_prop.get_dtype_of("val_list_i32")}") + n_prop_dtype = g.node(10).properties.get_dtype_of("val_list_i32") + assert n_prop_dtype == PropType.list(PropType.i64()) + +def test_schema_casting_dict(): + # time/id as regular ints (I64), value column as explicit int32 + df = pd.DataFrame( + { + "time": pd.Series([1, 2, 3], dtype="int64"), + "id": pd.Series([10, 20, 30], dtype="int64"), + "val_i32": pd.Series([1, 2, 3], dtype="int32"), + } + ) + + # schema casting as list + g_list = Graph() + g_list.load_nodes( + data=df, + time="time", + id="id", + properties=["val_i32"], + schema=[("val_i32", PropType.i64())], + ) + dtype_list = [g_list.node(10).properties.get_dtype_of("val_i32")] + del g_list + + # schema casting as dict using PropType + g_dict_proptype = Graph() + g_dict_proptype.load_nodes( + data=df, + time="time", + id="id", + properties=["val_i32"], + schema={"val_i32": PropType.i64()}, + ) + dtype_list.append(g_dict_proptype.node(10).properties.get_dtype_of("val_i32")) + del g_dict_proptype + + # schema casting as dict using pyarrow DataType + g_dict_pa = Graph() + g_dict_pa.load_nodes( + data=df, + time="time", + id="id", + properties=["val_i32"], + schema={"val_i32": pa.int64()}, + ) + dtype_list.append(g_dict_pa.node(10).properties.get_dtype_of("val_i32")) + del g_dict_pa + + for dtype in dtype_list: + assert dtype == PropType.i64() + +def test_nested_schema_casting(): + # types to make sure the table is built properly and test the types + struct_type_i32 = pa.struct( + [ + pa.field("a", pa.int32()), + pa.field("b", pa.int32()), + ] + ) + struct_type_i64 = pa.struct( + [ + pa.field("a", pa.int64()), + pa.field("b", pa.int64()), + ] + ) + + table = pa.Table.from_pydict( + { + "time": pa.array([1, 2, 3], type=pa.int64()), + "id": pa.array([10, 20, 30], type=pa.int64()), + "val_struct": pa.array( + [ + {"a": 1, "b": 10}, + {"a": 2, "b": 20}, + {"a": 3, "b": 30}, + ], + type=struct_type_i32, + ), + } + ) + + # no casting + g = Graph() + g.load_nodes( + data=table, + time="time", + id="id", + properties=["val_struct"], + ) + d_type_no_cast = g.node(10).properties.get_dtype_of("val_struct") + del g + + assert d_type_no_cast == struct_type_i32 + assert d_type_no_cast == PropType.map({"a": PropType.i32(), "b": PropType.i32()}) + # also check PropType.map of pyarrow types, mix and match + assert d_type_no_cast == PropType.map({"a": pa.int32(), "b": pa.int32()}) + + # schema is a PropType.map(...) inside a dict + g = Graph() + g.load_nodes( + data=table, + time="time", + id="id", + properties=["val_struct"], + schema={ + "val_struct": PropType.map( + { + "a": PropType.i64(), + "b": PropType.i64(), + } + ) + }, + ) + dtype_proptype = g.node(10).properties.get_dtype_of("val_struct") + del g + + assert dtype_proptype == struct_type_i64 + assert dtype_proptype == PropType.map({"a": PropType.i64(), "b": PropType.i64()}) + # also check PropType.map of pyarrow types, mix and match + assert dtype_proptype == PropType.map({"a": pa.int64(), "b": pa.int64()}) + + # schema is a PropType.map(...) with mixed pyarrow and PropType types + g = Graph() + g.load_nodes( + data=table, + time="time", + id="id", + properties=["val_struct"], + schema={ + "val_struct": PropType.map( + { + "a": pa.int64(), + "b": pa.int64(), + } + ) + }, + ) + dtype_mixed = g.node(10).properties.get_dtype_of("val_struct") + del g + + assert dtype_mixed == struct_type_i64 + assert dtype_mixed == PropType.map({"a": PropType.i64(), "b": PropType.i64()}) + # also check PropType.map of pyarrow types, mix and match + assert dtype_mixed == PropType.map({"a": pa.int64(), "b": pa.int64()}) + + # schema is defined using pyarrow + g = Graph() + g.load_nodes( + data=table, + time="time", + id="id", + properties=["val_struct"], + schema={"val_struct": struct_type_i64}, + ) + dtype_pyarrow = g.node(10).properties.get_dtype_of("val_struct") + del g + + assert dtype_pyarrow == dtype_proptype + assert dtype_pyarrow == struct_type_i64 + assert dtype_pyarrow == PropType.map({"a": PropType.i64(), "b": PropType.i64()}) + # also check PropType.map of pyarrow types, mix and match + assert dtype_pyarrow == PropType.map({"a": pa.int64(), "b": pa.int64()}) if fpd: import pandas diff --git a/raphtory-api/src/python/prop.rs b/raphtory-api/src/python/prop.rs index e9edf745ad..24f5ac676a 100644 --- a/raphtory-api/src/python/prop.rs +++ b/raphtory-api/src/python/prop.rs @@ -212,6 +212,10 @@ impl PyPropType { fn __repr__(&self) -> String { format!("PropType.{}", self.0) } + + fn __eq__(&self, other: PropType) -> bool { + self.0 == other + } } impl<'py> IntoPyObject<'py> for PropType { diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 6bc33abd0c..9ba6f2b7ae 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -672,11 +672,19 @@ impl PyGraph { properties: Option>, metadata: Option>, shared_metadata: Option>, - schema: Option>, + schema: Option>, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - let column_schema = schema.map(|s| s.into_iter().collect::>()); + let column_schema = schema.map(|s| { + if let Ok(list) = s.extract::>() { + Ok(list.into_iter().collect::>()) + } else if let Ok(map) = s.extract::>() { + Ok(map) + } else { + Err(GraphError::from(PyValueError::new_err("Argument 'schema' must either be a list of (column_name, column_type) tuples or a dict mapping {'column_name' : column_type}"))) + } + }).transpose()?; if data.hasattr("__arrow_c_stream__")? { load_nodes_from_arrow_c_stream( &self.graph, From e3013ca34dad783dfc9525a037f3f1d3ad15146e Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 12 Dec 2025 04:37:23 -0500 Subject: [PATCH 40/55] Added CSV options for when loading CSV. Errors if CSV options were passed but no CSV files were detected. --- python/python/raphtory/__init__.pyi | 2 +- python/tests/test_load_from_df.py | 1 - raphtory/src/io/arrow/prop_handler.rs | 6 +- raphtory/src/python/graph/graph.rs | 14 +- raphtory/src/python/graph/io/arrow_loaders.rs | 176 ++++++++++++++---- 5 files changed, 157 insertions(+), 42 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index ab0842f80f..07f290b21c 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1290,7 +1290,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, schema: Any = None) -> None: + def load_nodes(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, schema: Any = None, csv_options=None) -> None: """ Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index 23f29e9b7e..96093f3182 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -1,4 +1,3 @@ -import os from pathlib import Path import polars as pl diff --git a/raphtory/src/io/arrow/prop_handler.rs b/raphtory/src/io/arrow/prop_handler.rs index c67b1ba752..9537fff3c2 100644 --- a/raphtory/src/io/arrow/prop_handler.rs +++ b/raphtory/src/io/arrow/prop_handler.rs @@ -1,8 +1,4 @@ -use crate::{ - errors::{GraphError, LoadError}, - io::arrow::dataframe::DFChunk, - prelude::Prop, -}; +use crate::{errors::GraphError, io::arrow::dataframe::DFChunk, prelude::Prop}; use arrow::{ array::{ Array, ArrayRef, ArrowPrimitiveType, AsArray, BooleanArray, Decimal128Array, diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 9ba6f2b7ae..ce877d2dcc 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -21,7 +21,7 @@ use crate::{ arrow_loaders::{ load_edge_metadata_from_arrow_c_stream, load_edges_from_arrow_c_stream, load_node_metadata_from_arrow_c_stream, load_nodes_from_arrow_c_stream, - load_nodes_from_csv_path, + load_nodes_from_csv_path, CsvReadOptions, }, pandas_loaders::*, }, @@ -660,7 +660,7 @@ impl PyGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (data, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None, schema = None) + signature = (data, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None, schema = None, csv_options = None) )] fn load_nodes<'py>( &self, @@ -673,6 +673,7 @@ impl PyGraph { metadata: Option>, shared_metadata: Option>, schema: Option>, + csv_options: Option, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); @@ -725,6 +726,14 @@ impl PyGraph { || path_str.ends_with(".csv.bz2") }; + // before loading anything, fail if CSV options were passed but no CSV files were detected + if !is_csv && csv_options.is_some() { + return Err(GraphError::from(PyValueError::new_err(format!( + "CSV options were passed but no CSV files were detected at {}.", + path.display() + )))); + } + // support directories with mixed parquet and CSV files if is_parquet { load_nodes_from_parquet( @@ -751,6 +760,7 @@ impl PyGraph { &properties, &metadata, shared_metadata.as_ref(), + csv_options.as_ref(), )?; } if !is_parquet && !is_csv { diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 5597bfbbed..3b19007cfb 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -15,12 +15,16 @@ use crate::{ use arrow::{ array::{Array, RecordBatch, RecordBatchReader, StructArray}, compute::cast, - datatypes::{DataType, Field, Fields, Schema, SchemaRef}, + datatypes::{DataType, Field, Fields, SchemaRef}, }; use arrow_csv::{reader::Format, ReaderBuilder}; use bzip2::read::BzDecoder; use flate2::read::GzDecoder; -use pyo3::{prelude::*, types::PyCapsule}; +use pyo3::{ + exceptions::PyValueError, + prelude::*, + types::{PyCapsule, PyDict}, +}; use pyo3_arrow::PyRecordBatchReader; use raphtory_api::core::entities::properties::prop::{arrow_dtype_from_prop_type, Prop, PropType}; use std::{ @@ -29,7 +33,6 @@ use std::{ fs, fs::File, iter, - ops::Deref, path::{Path, PathBuf}, sync::Arc, }; @@ -360,15 +363,57 @@ fn split_into_chunks(batch: &RecordBatch, indices: &[usize]) -> Vec Box { - // Support bz2 and gz compression - if filename.ends_with(".csv.gz") { - Box::new(GzDecoder::new(file)) - } else if filename.ends_with(".csv.bz2") { - Box::new(BzDecoder::new(file)) - } else { - // no need for a BufReader because ReaderBuilder::build internally wraps into BufReader - Box::new(file) +/// CSV options we support, passed as Python dict +pub(crate) struct CsvReadOptions { + delimiter: Option, + comment: Option, + escape: Option, + quote: Option, + terminator: Option, + allow_truncated_rows: Option, + has_header: Option, +} + +impl<'a> FromPyObject<'a> for CsvReadOptions { + fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult { + let dict = ob.downcast::().map_err(|e| { + PyValueError::new_err(format!("CSV options should be passed as a dict: {e}")) + })?; + let get_char = |option: &str| match dict.get_item(option)? { + None => Ok(None), + Some(val) => { + if let Ok(s) = val.extract::() { + if s.len() != 1 { + return Err(PyValueError::new_err(format!( + "CSV option '{option}' must be a single character string or int 0-255", + ))); + } + Ok(Some(s.as_bytes()[0])) + } else if let Ok(b) = val.extract::() { + Ok(Some(b)) + } else { + return Err(PyValueError::new_err(format!( + "CSV option '{option}' must be a single character string or int 0-255", + ))); + } + } + }; + let get_bool = |option: &str| { + dict.get_item(option)? + .map(|val| val.extract::()) + .transpose() + .map_err(|_| PyValueError::new_err(format!("CSV option '{option}' must be a bool"))) + }; + + Ok(CsvReadOptions { + delimiter: get_char("delimiter")?, + comment: get_char("comment")?, + escape: get_char("escape")?, + quote: get_char("quote")?, + terminator: get_char("terminator")?, + allow_truncated_rows: get_bool("allow_truncated_rows")?, + has_header: get_bool("has_header")?, + }) } } @@ -386,6 +431,7 @@ pub(crate) fn load_nodes_from_csv_path< properties: &[&str], metadata: &[&str], shared_metadata: Option<&HashMap>, + csv_options: Option<&CsvReadOptions>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id, time]; cols_to_check.extend_from_slice(properties); @@ -416,7 +462,7 @@ pub(crate) fn load_nodes_from_csv_path< ))); } - let df_view = process_csv_paths_df(&csv_paths, cols_to_check.clone())?; + let df_view = process_csv_paths_df(&csv_paths, cols_to_check.clone(), csv_options)?; df_view.check_cols_exist(&cols_to_check)?; load_nodes_from_df( df_view, @@ -431,44 +477,108 @@ pub(crate) fn load_nodes_from_csv_path< ) } +fn get_csv_reader(filename: &str, file: File) -> Box { + // Support bz2 and gz compression + if filename.ends_with(".csv.gz") { + Box::new(GzDecoder::new(file)) + } else if filename.ends_with(".csv.bz2") { + Box::new(BzDecoder::new(file)) + } else { + // no need for a BufReader because ReaderBuilder::build internally wraps into BufReader + Box::new(file) + } +} + fn build_csv_reader( path: &Path, + csv_options: Option<&CsvReadOptions>, ) -> Result>, GraphError> { let file = File::open(path)?; let path_str = path.to_string_lossy(); + let mut format = Format::default(); + + let has_header = csv_options.and_then(|o| o.has_header).unwrap_or(true); + format = format.with_header(has_header); + + if let Some(delim) = csv_options.and_then(|o| o.delimiter) { + format = format.with_delimiter(delim); + } + + if let Some(comment) = csv_options.and_then(|o| o.comment) { + format = format.with_comment(comment); + } + + if let Some(escape) = csv_options.and_then(|o| o.escape) { + format = format.with_escape(escape); + } + + if let Some(quote) = csv_options.and_then(|o| o.quote) { + format = format.with_quote(quote); + } + + if let Some(terminator) = csv_options.and_then(|o| o.terminator) { + format = format.with_terminator(terminator); + } + + if let Some(allow_truncated_rows) = csv_options.and_then(|o| o.allow_truncated_rows) { + format = format.with_truncated_rows(allow_truncated_rows); + } + // infer schema let reader = get_csv_reader(path_str.as_ref(), file); - let (schema, _) = Format::default() - .with_header(true) - .infer_schema(reader, Some(100)) - .map_err(|e| { - GraphError::LoadFailure(format!( - "Arrow CSV error while inferring schema from '{}': {e}", - path.display() - )) - })?; + let (schema, _) = format.infer_schema(reader, Some(100)).map_err(|e| { + GraphError::LoadFailure(format!( + "Arrow CSV error while inferring schema from '{}': {e}", + path.display() + )) + })?; let schema_ref: SchemaRef = Arc::new(schema); // we need another reader because the first one gets consumed let file = File::open(path)?; let reader = get_csv_reader(path_str.as_ref(), file); - ReaderBuilder::new(schema_ref) - .with_header(true) - .with_batch_size(CHUNK_SIZE) - .build(reader) - .map_err(|e| { - GraphError::LoadFailure(format!( - "Arrow CSV error while reading '{}': {e}", - path.display() - )) - }) + let mut reader_builder = ReaderBuilder::new(schema_ref) + .with_header(has_header) + .with_batch_size(CHUNK_SIZE); + + if let Some(delimiter) = csv_options.and_then(|o| o.delimiter) { + reader_builder = reader_builder.with_delimiter(delimiter); + } + + if let Some(comment) = csv_options.and_then(|o| o.comment) { + reader_builder = reader_builder.with_comment(comment); + } + + if let Some(escape) = csv_options.and_then(|o| o.escape) { + reader_builder = reader_builder.with_escape(escape); + } + + if let Some(quote) = csv_options.and_then(|o| o.quote) { + reader_builder = reader_builder.with_quote(quote); + } + + if let Some(terminator) = csv_options.and_then(|o| o.terminator) { + reader_builder = reader_builder.with_terminator(terminator); + } + + if let Some(allow_truncated_rows) = csv_options.and_then(|o| o.allow_truncated_rows) { + reader_builder = reader_builder.with_truncated_rows(allow_truncated_rows); + } + + reader_builder.build(reader).map_err(|e| { + GraphError::LoadFailure(format!( + "Arrow CSV error while reading '{}': {e}", + path.display() + )) + }) } fn process_csv_paths_df<'a>( paths: &'a [PathBuf], col_names: Vec<&'a str>, + csv_options: Option<&'a CsvReadOptions>, ) -> Result> + 'a>, GraphError> { if paths.is_empty() { return Err(GraphError::LoadFailure( @@ -481,7 +591,7 @@ fn process_csv_paths_df<'a>( let chunks = paths.iter().flat_map(move |path| { // BoxedLIter couldn't be used because it has Send + Sync bound type ChunkIter<'b> = Box> + 'b>; - let csv_reader = match build_csv_reader(path.as_path()) { + let csv_reader = match build_csv_reader(path.as_path(), csv_options) { Ok(r) => r, Err(e) => return Box::new(iter::once(Err(e))) as ChunkIter<'a>, }; From 55e13abff48246cf6582bd8f9f8b5c961b6b5da9 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Mon, 15 Dec 2025 19:39:16 -0500 Subject: [PATCH 41/55] Added schema support for Parquet and CSV files --- raphtory/src/io/parquet_loaders.rs | 75 +++++++++++++++---- raphtory/src/python/graph/graph.rs | 12 ++- .../src/python/graph/graph_with_deletions.rs | 5 ++ raphtory/src/python/graph/io/arrow_loaders.rs | 19 +++-- raphtory/src/serialise/parquet/mod.rs | 25 ++++++- 5 files changed, 112 insertions(+), 24 deletions(-) diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 1ade06f350..5f23598078 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -3,17 +3,18 @@ use crate::{ errors::{GraphError, InvalidPathReason::PathDoesNotExist}, io::arrow::{dataframe::*, df_loaders::*}, prelude::{AdditionOps, DeletionOps, PropertyAdditionOps}, + python::graph::io::arrow_loaders::cast_columns, serialise::incremental::InternalCache, }; use parquet::arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, ProjectionMask}; -use raphtory_api::core::entities::properties::prop::Prop; +use raphtory_api::core::entities::properties::prop::{Prop, PropType}; use std::{ collections::HashMap, fs, fs::File, path::{Path, PathBuf}, + sync::Arc, }; - #[cfg(feature = "storage")] use {arrow::array::StructArray, pometry_storage::RAError}; @@ -30,6 +31,7 @@ pub fn load_nodes_from_parquet< metadata: &[&str], shared_metadata: Option<&HashMap>, batch_size: Option, + schema: Option>>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id, time]; cols_to_check.extend_from_slice(properties); @@ -39,7 +41,12 @@ pub fn load_nodes_from_parquet< } for path in get_parquet_file_paths(parquet_path)? { - let df_view = process_parquet_file_to_df(path.as_path(), Some(&cols_to_check), batch_size)?; + let df_view = process_parquet_file_to_df( + path.as_path(), + Some(&cols_to_check), + batch_size, + schema.clone(), + )?; df_view.check_cols_exist(&cols_to_check)?; load_nodes_from_df( df_view, @@ -72,6 +79,7 @@ pub fn load_edges_from_parquet< layer: Option<&str>, layer_col: Option<&str>, batch_size: Option, + schema: Option>>, ) -> Result<(), GraphError> { let parquet_path = parquet_path.as_ref(); let mut cols_to_check = vec![src, dst, time]; @@ -107,9 +115,13 @@ pub fn load_edges_from_parquet< let all_df_view = get_parquet_file_paths(parquet_path)? .into_iter() .flat_map(|file| { - let df_view = - process_parquet_file_to_df(file.as_path(), Some(&cols_to_check), batch_size) - .expect("Failed to process Parquet file"); + let df_view = process_parquet_file_to_df( + file.as_path(), + Some(&cols_to_check), + batch_size, + schema.clone(), + ) + .expect("Failed to process Parquet file"); df_view.chunks }); @@ -147,6 +159,7 @@ pub fn load_node_props_from_parquet< metadata_properties: &[&str], shared_metadata: Option<&HashMap>, batch_size: Option, + schema: Option>>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id]; cols_to_check.extend_from_slice(metadata_properties); @@ -156,7 +169,12 @@ pub fn load_node_props_from_parquet< } for path in get_parquet_file_paths(parquet_path)? { - let df_view = process_parquet_file_to_df(path.as_path(), Some(&cols_to_check), batch_size)?; + let df_view = process_parquet_file_to_df( + path.as_path(), + Some(&cols_to_check), + batch_size, + schema.clone(), + )?; df_view.check_cols_exist(&cols_to_check)?; load_node_props_from_df( @@ -186,6 +204,7 @@ pub fn load_edge_props_from_parquet< layer: Option<&str>, layer_col: Option<&str>, batch_size: Option, + schema: Option>>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst]; if let Some(ref layer_col) = layer_col { @@ -195,7 +214,12 @@ pub fn load_edge_props_from_parquet< cols_to_check.extend_from_slice(metadata); for path in get_parquet_file_paths(parquet_path)? { - let df_view = process_parquet_file_to_df(path.as_path(), Some(&cols_to_check), batch_size)?; + let df_view = process_parquet_file_to_df( + path.as_path(), + Some(&cols_to_check), + batch_size, + schema.clone(), + )?; df_view.check_cols_exist(&cols_to_check)?; load_edges_props_from_df( df_view, @@ -224,6 +248,7 @@ pub fn load_edge_deletions_from_parquet< layer: Option<&str>, layer_col: Option<&str>, batch_size: Option, + schema: Option>>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst, time]; if let Some(ref layer_col) = layer_col { @@ -231,7 +256,12 @@ pub fn load_edge_deletions_from_parquet< } for path in get_parquet_file_paths(parquet_path)? { - let df_view = process_parquet_file_to_df(path.as_path(), Some(&cols_to_check), batch_size)?; + let df_view = process_parquet_file_to_df( + path.as_path(), + Some(&cols_to_check), + batch_size, + schema.clone(), + )?; df_view.check_cols_exist(&cols_to_check)?; load_edge_deletions_from_df(df_view, time, src, dst, layer, layer_col, graph) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; @@ -246,13 +276,19 @@ pub fn load_graph_props_from_parquet, + schema: Option>>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![time]; cols_to_check.extend_from_slice(properties); cols_to_check.extend_from_slice(metadata); for path in get_parquet_file_paths(parquet_path)? { - let df_view = process_parquet_file_to_df(path.as_path(), Some(&cols_to_check), batch_size)?; + let df_view = process_parquet_file_to_df( + path.as_path(), + Some(&cols_to_check), + batch_size, + schema.clone(), + )?; df_view.check_cols_exist(&cols_to_check)?; load_graph_props_from_df(df_view, time, Some(properties), Some(metadata), graph) .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; @@ -265,6 +301,7 @@ pub(crate) fn process_parquet_file_to_df( parquet_file_path: &Path, col_names: Option<&[&str]>, batch_size: Option, + schema: Option>>, ) -> Result>>, GraphError> { let (names, chunks, num_rows) = read_parquet_file(parquet_file_path, col_names)?; @@ -278,12 +315,20 @@ pub(crate) fn process_parquet_file_to_df( Some(batch_size) => chunks.with_batch_size(batch_size), }; - let chunks = chunks.build()?.into_iter().map(move |result| { - result - .map(|r| DFChunk { - chunk: r.columns().to_vec(), + let chunks = chunks.build()?.into_iter().map(move |result| match result { + Ok(r) => { + let casted_batch = if let Some(schema) = schema.as_deref() { + cast_columns(r, schema)? + } else { + r + }; + Ok(DFChunk { + chunk: casted_batch.columns().to_vec(), }) - .map_err(|e| GraphError::LoadFailure(format!("Failed to process Parquet file: {e:?}"))) + } + Err(e) => Err(GraphError::LoadFailure(format!( + "Failed to process Parquet file: {e:?}" + ))), }); Ok(DFView { diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index ce877d2dcc..5e4d76006a 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -48,6 +48,7 @@ use std::{ fmt::{Debug, Formatter}, fs, path::PathBuf, + sync::Arc, }; /// A temporal graph with event semantics. @@ -726,7 +727,7 @@ impl PyGraph { || path_str.ends_with(".csv.bz2") }; - // before loading anything, fail if CSV options were passed but no CSV files were detected + // fail before loading anything at all to avoid loading partial data if !is_csv && csv_options.is_some() { return Err(GraphError::from(PyValueError::new_err(format!( "CSV options were passed but no CSV files were detected at {}.", @@ -734,6 +735,9 @@ impl PyGraph { )))); } + // wrap in Arc to avoid cloning the entire schema for Parquet, CSV, and inner loops in CSV path + let arced_schema = column_schema.map(Arc::new); + // support directories with mixed parquet and CSV files if is_parquet { load_nodes_from_parquet( @@ -747,6 +751,7 @@ impl PyGraph { &metadata, shared_metadata.as_ref(), None, + arced_schema.clone(), )?; } if is_csv { @@ -761,6 +766,7 @@ impl PyGraph { &metadata, shared_metadata.as_ref(), csv_options.as_ref(), + arced_schema, )?; } if !is_parquet && !is_csv { @@ -911,6 +917,7 @@ impl PyGraph { &metadata, shared_metadata.as_ref(), None, + None, ) } @@ -1062,6 +1069,7 @@ impl PyGraph { layer, layer_col, None, + None, ) } @@ -1183,6 +1191,7 @@ impl PyGraph { &metadata, shared_metadata.as_ref(), None, + None, ) } @@ -1313,6 +1322,7 @@ impl PyGraph { layer, layer_col, None, + None, ) } diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index 7534091589..921e5b459c 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -707,6 +707,7 @@ impl PyPersistentGraph { &metadata, shared_metadata.as_ref(), None, + None, ) } @@ -854,6 +855,7 @@ impl PyPersistentGraph { layer, layer_col, None, + None, ) } @@ -949,6 +951,7 @@ impl PyPersistentGraph { layer, layer_col, None, + None, ) } @@ -1066,6 +1069,7 @@ impl PyPersistentGraph { &metadata, shared_metadata.as_ref(), None, + None, ) } @@ -1192,6 +1196,7 @@ impl PyPersistentGraph { layer, layer_col, None, + None, ) } diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 3b19007cfb..0ebb77f47e 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -290,7 +290,7 @@ pub(crate) fn process_arrow_c_stream_df<'a>( Ok(DFView::new(names, chunks, len_from_python)) } -fn cast_columns( +pub(crate) fn cast_columns( batch: RecordBatch, schema: &HashMap, ) -> Result { @@ -432,6 +432,7 @@ pub(crate) fn load_nodes_from_csv_path< metadata: &[&str], shared_metadata: Option<&HashMap>, csv_options: Option<&CsvReadOptions>, + schema: Option>>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![id, time]; cols_to_check.extend_from_slice(properties); @@ -462,7 +463,7 @@ pub(crate) fn load_nodes_from_csv_path< ))); } - let df_view = process_csv_paths_df(&csv_paths, cols_to_check.clone(), csv_options)?; + let df_view = process_csv_paths_df(&csv_paths, cols_to_check.clone(), csv_options, schema)?; df_view.check_cols_exist(&cols_to_check)?; load_nodes_from_df( df_view, @@ -579,18 +580,19 @@ fn process_csv_paths_df<'a>( paths: &'a [PathBuf], col_names: Vec<&'a str>, csv_options: Option<&'a CsvReadOptions>, + schema: Option>>, ) -> Result> + 'a>, GraphError> { if paths.is_empty() { return Err(GraphError::LoadFailure( "No CSV files found at the provided path".to_string(), )); } + // BoxedLIter couldn't be used because it has Send + Sync bound + type ChunkIter<'b> = Box> + 'b>; - // TODO: Add support for user provided schema let names = col_names.iter().map(|&name| name.to_string()).collect(); let chunks = paths.iter().flat_map(move |path| { - // BoxedLIter couldn't be used because it has Send + Sync bound - type ChunkIter<'b> = Box> + 'b>; + let schema = schema.clone(); let csv_reader = match build_csv_reader(path.as_path(), csv_options) { Ok(r) => r, Err(e) => return Box::new(iter::once(Err(e))) as ChunkIter<'a>, @@ -617,9 +619,14 @@ fn process_csv_paths_df<'a>( .into_iter() .map(move |batch_res| match batch_res { Ok(batch) => { + let casted_batch = if let Some(schema) = schema.as_deref() { + cast_columns(batch, schema)? + } else { + batch + }; let arrays = indices .iter() - .map(|&idx| batch.column(idx).clone()) + .map(|&idx| casted_batch.column(idx).clone()) .collect::>(); Ok(DFChunk::new(arrays)) } diff --git a/raphtory/src/serialise/parquet/mod.rs b/raphtory/src/serialise/parquet/mod.rs index 83966bd597..17fc8514c7 100644 --- a/raphtory/src/serialise/parquet/mod.rs +++ b/raphtory/src/serialise/parquet/mod.rs @@ -260,7 +260,15 @@ fn decode_graph_storage( let exclude = vec![TIME_COL]; let (c_props, g_type) = collect_prop_columns(&c_graph_path, &exclude)?; let c_props = c_props.iter().map(|s| s.as_str()).collect::>(); - load_graph_props_from_parquet(&g, &c_graph_path, TIME_COL, &[], &c_props, batch_size)?; + load_graph_props_from_parquet( + &g, + &c_graph_path, + TIME_COL, + &[], + &c_props, + batch_size, + None, + )?; g_type.ok_or_else(|| GraphError::LoadFailure("Graph type not found".to_string()))? }; @@ -278,7 +286,15 @@ fn decode_graph_storage( let exclude = vec![TIME_COL]; let (t_props, _) = collect_prop_columns(&t_graph_path, &exclude)?; let t_props = t_props.iter().map(|s| s.as_str()).collect::>(); - load_graph_props_from_parquet(&g, &t_graph_path, TIME_COL, &t_props, &[], batch_size)?; + load_graph_props_from_parquet( + &g, + &t_graph_path, + TIME_COL, + &t_props, + &[], + batch_size, + None, + )?; } let t_node_path = path.as_ref().join(NODES_T_PATH); @@ -301,6 +317,7 @@ fn decode_graph_storage( &[], None, batch_size, + None, )?; } @@ -322,6 +339,7 @@ fn decode_graph_storage( &c_prop_columns, None, batch_size, + None, )?; } @@ -346,6 +364,7 @@ fn decode_graph_storage( None, Some(LAYER_COL), batch_size, + None, )?; } @@ -360,6 +379,7 @@ fn decode_graph_storage( None, Some(LAYER_COL), batch_size, + None, )?; } @@ -381,6 +401,7 @@ fn decode_graph_storage( None, Some(LAYER_COL), batch_size, + None, )?; } From ac02d729de5a0f0676206304f9121d38ed17e017 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Mon, 15 Dec 2025 20:52:47 -0500 Subject: [PATCH 42/55] Post merge cleanup --- python/python/raphtory/__init__.pyi | 842 ++++++------------ python/python/raphtory/iterables/__init__.pyi | 476 ++++++---- .../python/raphtory/node_state/__init__.pyi | 455 +++------- raphtory/src/python/graph/io/arrow_loaders.rs | 1 - raphtory/src/python/packages/base_modules.rs | 5 +- 5 files changed, 712 insertions(+), 1067 deletions(-) diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 41e385b8bc..750c5da72f 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1,7 +1,6 @@ """ Raphtory graph analytics library """ - from __future__ import annotations ############################################################################### @@ -29,51 +28,8 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = [ - "GraphView", - "Graph", - "PersistentGraph", - "Node", - "NodeFilterBuilder", - "Nodes", - "PathFromNode", - "PathFromGraph", - "MutableNode", - "Edge", - "Edges", - "NestedEdges", - "MutableEdge", - "Properties", - "PyPropValueList", - "Metadata", - "MetadataView", - "TemporalProperties", - "PropertiesView", - "TemporalProperty", - "EventTime", - "OptionalEventTime", - "History", - "HistoryTimestamp", - "HistoryDateTime", - "HistoryEventId", - "Intervals", - "WindowSet", - "IndexSpecBuilder", - "IndexSpec", - "version", - "graphql", - "algorithms", - "graph_loader", - "graph_gen", - "vectors", - "node_state", - "filter", - "iterables", - "nullmodels", - "plottingutils", -] - -class GraphView(object): +__all__ = ['GraphView', 'Graph', 'PersistentGraph', 'Node', 'NodeFilterBuilder', 'Nodes', 'PathFromNode', 'PathFromGraph', 'MutableNode', 'Edge', 'Edges', 'NestedEdges', 'MutableEdge', 'Properties', 'PyPropValueList', 'PropType', 'Metadata', 'MetadataView', 'TemporalProperties', 'PropertiesView', 'TemporalProperty', 'EventTime', 'OptionalEventTime', 'History', 'HistoryTimestamp', 'HistoryDateTime', 'HistoryEventId', 'Intervals', 'WindowSet', 'IndexSpecBuilder', 'IndexSpec', 'version', 'graphql', 'algorithms', 'graph_loader', 'graph_gen', 'vectors', 'node_state', 'filter', 'iterables', 'nullmodels', 'plottingutils'] +class GraphView(object): """Graph view is a read-only version of a graph at a certain point in time.""" def __eq__(self, value): @@ -265,9 +221,7 @@ class GraphView(object): GraphView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -468,12 +422,7 @@ class GraphView(object): Properties: Properties paired with their names """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -610,14 +559,7 @@ class GraphView(object): GraphView: Returns the subgraph """ - def to_networkx( - self, - explode_edges: bool = False, - include_node_properties: bool = True, - include_edge_properties: bool = True, - include_update_history: bool = True, - include_property_history: bool = True, - ) -> nx.MultiDiGraph: + def to_networkx(self, explode_edges: bool = False, include_node_properties: bool = True, include_edge_properties: bool = True, include_update_history: bool = True, include_property_history: bool = True) -> nx.MultiDiGraph: """ Returns a graph with NetworkX. @@ -636,19 +578,7 @@ class GraphView(object): nx.MultiDiGraph: A Networkx MultiDiGraph. """ - def to_pyvis( - self, - explode_edges: bool = False, - edge_color: str = "#000000", - shape: str = "dot", - node_image: Optional[str] = None, - edge_weight: Optional[str] = None, - edge_label: Optional[str] = None, - colour_nodes_by_type: bool = False, - directed: bool = True, - notebook: bool = False, - **kwargs: Any, - ) -> pyvis.network.Network: + def to_pyvis(self, explode_edges: bool = False, edge_color: str = '#000000', shape: str = 'dot', node_image: Optional[str] = None, edge_weight: Optional[str] = None, edge_label: Optional[str] = None, colour_nodes_by_type: bool = False, directed: bool = True, notebook: bool = False, **kwargs: Any) -> pyvis.network.Network: """ Draw a graph with PyVis. Pyvis is a required dependency. If you intend to use this function make sure that you install Pyvis @@ -709,14 +639,7 @@ class GraphView(object): GraphView: The layered view """ - def vectorise( - self, - embedding: Callable[[list], list], - nodes: bool | str = True, - edges: bool | str = True, - cache: Optional[str] = None, - verbose: bool = False, - ) -> VectorisedGraph: + def vectorise(self, embedding: Callable[[list], list], nodes: bool | str = True, edges: bool | str = True, cache: Optional[str] = None, verbose: bool = False) -> VectorisedGraph: """ Create a VectorisedGraph from the current graph. @@ -752,7 +675,7 @@ class GraphView(object): Optional[int]: """ -class Graph(GraphView): +class Graph(GraphView): """ A temporal graph with event semantics. @@ -763,16 +686,10 @@ class Graph(GraphView): def __new__(cls, num_shards: Optional[int] = None) -> Graph: """Create and return a new object. See help(type) for accurate signature.""" - def __reduce__(self): ... - def add_edge( - self, - timestamp: TimeInput, - src: str | int, - dst: str | int, - properties: Optional[PropInput] = None, - layer: Optional[str] = None, - event_id: Optional[int] = None, - ) -> MutableEdge: + def __reduce__(self): + ... + + def add_edge(self, timestamp: TimeInput, src: str|int, dst: str|int, properties: Optional[PropInput] = None, layer: Optional[str] = None, event_id: Optional[int] = None) -> MutableEdge: """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -805,14 +722,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def add_node( - self, - timestamp: TimeInput, - id: str | int, - properties: Optional[PropInput] = None, - node_type: Optional[str] = None, - event_id: Optional[int] = None, - ) -> MutableNode: + def add_node(self, timestamp: TimeInput, id: str|int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, event_id: Optional[int] = None) -> MutableNode: """ Adds a new node with the given id and properties to the graph. @@ -830,12 +740,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def add_properties( - self, - timestamp: TimeInput, - properties: PropInput, - event_id: Optional[int] = None, - ) -> None: + def add_properties(self, timestamp: TimeInput, properties: PropInput, event_id: Optional[int] = None) -> None: """ Adds properties to the graph. @@ -912,14 +817,7 @@ class Graph(GraphView): None: """ - def create_node( - self, - timestamp: TimeInput, - id: str | int, - properties: Optional[PropInput] = None, - node_type: Optional[str] = None, - event_id: Optional[int] = None, - ) -> MutableNode: + def create_node(self, timestamp: TimeInput, id: str|int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, event_id: Optional[int] = None) -> MutableNode: """ Creates a new node with the given id and properties to the graph. It fails if the node already exists. @@ -949,7 +847,7 @@ class Graph(GraphView): Graph: """ - def edge(self, src: str | int, dst: str | int) -> MutableEdge: + def edge(self, src: str|int, dst: str|int) -> MutableEdge: """ Gets the edge with the specified source and destination nodes @@ -1042,9 +940,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def import_edges_as( - self, edges: List[Edge], new_ids: List[Tuple[int, int]], merge: bool = False - ) -> None: + def import_edges_as(self, edges: List[Edge], new_ids: List[Tuple[int, int]], merge: bool = False) -> None: """ Import multiple edges into the graph with new ids. @@ -1079,9 +975,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def import_node_as( - self, node: Node, new_id: str | int, merge: bool = False - ) -> MutableNode: + def import_node_as(self, node: Node, new_id: str|int, merge: bool = False) -> MutableNode: """ Import a single node into the graph with new id. @@ -1116,9 +1010,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def import_nodes_as( - self, nodes: List[Node], new_ids: List[str | int], merge: bool = False - ) -> None: + def import_nodes_as(self, nodes: List[Node], new_ids: List[str|int], merge: bool = False) -> None: """ Import multiple nodes into the graph with new ids. @@ -1163,16 +1055,7 @@ class Graph(GraphView): Graph: the loaded graph with initialised cache """ - def load_edge_metadata_from_df( - self, - data: Any, - src: str, - dst: str, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_metadata_from_df(self, data: Any, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, @@ -1194,16 +1077,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_pandas( - self, - df: DataFrame, - src: str, - dst: str, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_props_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties from a Pandas DataFrame. @@ -1223,16 +1097,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_parquet( - self, - parquet_path: str, - src: str, - dst: str, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_props_from_parquet(self, parquet_path: str, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties from parquet file @@ -1252,18 +1117,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_df( - self, - data: Any, - time: str, - src: str, - dst: str, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edges_from_df(self, data: Any, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, @@ -1287,18 +1141,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_pandas( - self, - df: DataFrame, - time: str, - src: str, - dst: str, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges from a Pandas DataFrame into the graph. @@ -1320,18 +1163,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_parquet( - self, - parquet_path: str, - time: str, - src: str, - dst: str, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edges_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges from a Parquet file into the graph. @@ -1365,15 +1197,7 @@ class Graph(GraphView): Graph: """ - def load_node_metadata_from_df( - self, - data: Any, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_node_metadata_from_df(self, data: Any, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, @@ -1394,15 +1218,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_node_props_from_pandas( - self, - df: DataFrame, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_node_props_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties from a Pandas DataFrame. @@ -1421,15 +1237,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_node_props_from_parquet( - self, - parquet_path: str, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_node_props_from_parquet(self, parquet_path: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties from a parquet file. @@ -1448,17 +1256,32 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_df( - self, - data: Any, - time: str, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_nodes(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, schema: Any = None, csv_options=None) -> None: + """ + Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + + Arguments: + data (Any): The data source containing the nodes. + time (str): The column name for the timestamps. + id (str): The column name for the node IDs. + node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. + node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. + properties (List[str], optional): List of node property column names. Defaults to None. + metadata (List[str], optional): List of node metadata column names. Defaults to None. + shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + schema (list[tuple[str, DataType | PropType | str]], optional): A list of (column_name, column_type) tuples to cast column types to. Defaults to None. + + Returns: + None: This function does not return a value if the operation is successful. + + Raises: + GraphError: If the operation fails. + """ + + def load_nodes_from_df(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, @@ -1481,17 +1304,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_pandas( - self, - df: DataFrame, - time: str, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_nodes_from_pandas(self, df: DataFrame, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load nodes from a Pandas DataFrame into the graph. @@ -1512,17 +1325,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_parquet( - self, - parquet_path: str, - time: str, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_nodes_from_parquet(self, parquet_path: str, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load nodes from a Parquet file into the graph. @@ -1543,7 +1346,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def node(self, id: str | int) -> MutableNode: + def node(self, id: str|int) -> MutableNode: """ Gets the node with the specified id @@ -1624,22 +1427,16 @@ class Graph(GraphView): None: """ -class PersistentGraph(GraphView): +class PersistentGraph(GraphView): """A temporal graph that allows edges and nodes to be deleted.""" def __new__(cls) -> PersistentGraph: """Create and return a new object. See help(type) for accurate signature.""" - def __reduce__(self): ... - def add_edge( - self, - timestamp: int, - src: str | int, - dst: str | int, - properties: Optional[PropInput] = None, - layer: Optional[str] = None, - event_id: Optional[int] = None, - ) -> None: + def __reduce__(self): + ... + + def add_edge(self, timestamp: int, src: str | int, dst: str | int, properties: Optional[PropInput] = None, layer: Optional[str] = None, event_id: Optional[int] = None) -> None: """ Adds a new edge with the given source and destination nodes and properties to the graph. @@ -1672,14 +1469,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def add_node( - self, - timestamp: TimeInput, - id: str | int, - properties: Optional[PropInput] = None, - node_type: Optional[str] = None, - event_id: Optional[int] = None, - ) -> None: + def add_node(self, timestamp: TimeInput, id: str | int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, event_id: Optional[int] = None) -> None: """ Adds a new node with the given id and properties to the graph. @@ -1697,9 +1487,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def add_properties( - self, timestamp: TimeInput, properties: dict, event_id: Optional[int] = None - ) -> None: + def add_properties(self, timestamp: TimeInput, properties: dict, event_id: Optional[int] = None) -> None: """ Adds properties to the graph. @@ -1775,14 +1563,7 @@ class PersistentGraph(GraphView): None: """ - def create_node( - self, - timestamp: TimeInput, - id: str | int, - properties: Optional[PropInput] = None, - node_type: Optional[str] = None, - event_id: Optional[int] = None, - ) -> MutableNode: + def create_node(self, timestamp: TimeInput, id: str | int, properties: Optional[PropInput] = None, node_type: Optional[str] = None, event_id: Optional[int] = None) -> MutableNode: """ Creates a new node with the given id and properties to the graph. It fails if the node already exists. @@ -1800,14 +1581,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def delete_edge( - self, - timestamp: int, - src: str | int, - dst: str | int, - layer: Optional[str] = None, - event_id: Optional[int] = None, - ) -> MutableEdge: + def delete_edge(self, timestamp: int, src: str | int, dst: str | int, layer: Optional[str] = None, event_id: Optional[int] = None) -> MutableEdge: """ Deletes an edge given the timestamp, src and dst nodes and layer (optional). @@ -1920,9 +1694,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def import_edges_as( - self, edges: List[Edge], new_ids: list[Tuple[GID, GID]], merge: bool = False - ) -> None: + def import_edges_as(self, edges: List[Edge], new_ids: list[Tuple[GID, GID]], merge: bool = False) -> None: """ Import multiple edges into the graph with new ids. @@ -1959,9 +1731,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def import_node_as( - self, node: Node, new_id: str | int, merge: bool = False - ) -> Node: + def import_node_as(self, node: Node, new_id: str|int, merge: bool = False) -> Node: """ Import a single node into the graph with new id. @@ -1998,9 +1768,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def import_nodes_as( - self, nodes: List[Node], new_ids: List[str | int], merge: bool = False - ) -> None: + def import_nodes_as(self, nodes: List[Node], new_ids: List[str|int], merge: bool = False) -> None: """ Import multiple nodes into the graph with new ids. @@ -2034,15 +1802,7 @@ class PersistentGraph(GraphView): PersistentGraph: the loaded graph with initialised cache """ - def load_edge_deletions_from_df( - self, - data: Any, - time: str, - src: str, - dst: str, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_deletions_from_df(self, data: Any, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge deletions into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, @@ -2063,15 +1823,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_deletions_from_pandas( - self, - df: DataFrame, - time: str, - src: str, - dst: str, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_deletions_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges deletions from a Pandas DataFrame into the graph. @@ -2090,15 +1842,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_deletions_from_parquet( - self, - parquet_path: str, - time: str, - src: str, - dst: str, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_deletions_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges deletions from a Parquet file into the graph. @@ -2117,16 +1861,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_metadata_from_df( - self, - data: Any, - src: str, - dst: str, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_metadata_from_df(self, data: Any, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, @@ -2148,16 +1883,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_pandas( - self, - df: DataFrame, - src: str, - dst: str, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_props_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties from a Pandas DataFrame. @@ -2177,16 +1903,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_parquet( - self, - parquet_path: str, - src: str, - dst: str, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edge_props_from_parquet(self, parquet_path: str, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edge properties from parquet file @@ -2206,18 +1923,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_df( - self, - data: Any, - time: str, - src: str, - dst: str, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edges_from_df(self, data: Any, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, @@ -2241,18 +1947,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_pandas( - self, - df: DataFrame, - time: str, - src: str, - dst: str, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges from a Pandas DataFrame into the graph. @@ -2274,18 +1969,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_parquet( - self, - parquet_path: str, - time: str, - src: str, - dst: str, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - layer: Optional[str] = None, - layer_col: Optional[str] = None, - ) -> None: + def load_edges_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: """ Load edges from a Parquet file into the graph. @@ -2319,15 +2003,7 @@ class PersistentGraph(GraphView): PersistentGraph: """ - def load_node_metadata_from_df( - self, - data: Any, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_node_metadata_from_df(self, data: Any, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, @@ -2348,15 +2024,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_node_props_from_pandas( - self, - df: DataFrame, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_node_props_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties from a Pandas DataFrame. @@ -2375,15 +2043,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_node_props_from_parquet( - self, - parquet_path: str, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_node_props_from_parquet(self, parquet_path: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load node properties from a parquet file. @@ -2402,17 +2062,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_df( - self, - data: Any, - time: str, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_nodes_from_df(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, @@ -2435,17 +2085,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_pandas( - self, - df: DataFrame, - time: str, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_nodes_from_pandas(self, df: DataFrame, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load nodes from a Pandas DataFrame into the graph. @@ -2466,17 +2106,7 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_parquet( - self, - parquet_path: str, - time: str, - id: str, - node_type: Optional[str] = None, - node_type_col: Optional[str] = None, - properties: Optional[List[str]] = None, - metadata: Optional[List[str]] = None, - shared_metadata: Optional[PropInput] = None, - ) -> None: + def load_nodes_from_parquet(self, parquet_path: str, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: """ Load nodes from a Parquet file into the graph. @@ -2567,7 +2197,7 @@ class PersistentGraph(GraphView): None: """ -class Node(object): +class Node(object): """A node (or node) in the graph.""" def __eq__(self, value): @@ -2724,9 +2354,7 @@ class Node(object): Node: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2799,7 +2427,7 @@ class Node(object): """ @property - def id(self) -> str | int: + def id(self) -> (str|int): """ Returns the id of the node. This is a unique identifier for the node. @@ -2954,12 +2582,7 @@ class Node(object): Properties: A list of properties. """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3081,7 +2704,7 @@ class Node(object): Optional[int]: """ -class NodeFilterBuilder(object): +class NodeFilterBuilder(object): """ A builder for constructing node filters @@ -3117,9 +2740,7 @@ class NodeFilterBuilder(object): filter.FilterExpr: """ - def fuzzy_search( - self, value, levenshtein_distance: int, prefix_match: bool - ) -> filter.FilterExpr: + def fuzzy_search(self, value, levenshtein_distance: int, prefix_match: bool) -> filter.FilterExpr: """ Returns a filter expression that checks if the specified properties approximately match the specified string. @@ -3160,7 +2781,7 @@ class NodeFilterBuilder(object): """ Returns a filter expression that checks if the specified iterable of strings does not contain a given value. - + Arguments: value (str): @@ -3168,7 +2789,7 @@ class NodeFilterBuilder(object): filter.FilterExpr: """ -class Nodes(object): +class Nodes(object): """A list of nodes that can be iterated over.""" def __bool__(self): @@ -3339,9 +2960,7 @@ class Nodes(object): Nodes: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3560,12 +3179,7 @@ class Nodes(object): PropertiesView: A view of the node properties. """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3654,9 +3268,7 @@ class Nodes(object): OptionalEventTime: The earliest time that this Nodes is valid or None if the Nodes is valid for all times. """ - def to_df( - self, include_property_history: bool = False, convert_datetime: bool = False - ) -> DataFrame: + def to_df(self, include_property_history: bool = False, convert_datetime: bool = False) -> DataFrame: """ Converts the graph's nodes into a Pandas DataFrame. @@ -3717,7 +3329,8 @@ class Nodes(object): Optional[int]: """ -class PathFromNode(object): +class PathFromNode(object): + def __bool__(self): """True if self else False""" @@ -3873,9 +3486,7 @@ class PathFromNode(object): PathFromNode: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4085,12 +3696,7 @@ class PathFromNode(object): PropertiesView: """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -4223,7 +3829,8 @@ class PathFromNode(object): Optional[int]: """ -class PathFromGraph(object): +class PathFromGraph(object): + def __bool__(self): """True if self else False""" @@ -4379,9 +3986,7 @@ class PathFromGraph(object): PathFromGraph: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4600,12 +4205,7 @@ class PathFromGraph(object): NestedPropsIterable: """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -4738,7 +4338,8 @@ class PathFromGraph(object): Optional[int]: """ -class MutableNode(Node): +class MutableNode(Node): + def __repr__(self): """Return repr(self).""" @@ -4755,12 +4356,7 @@ class MutableNode(Node): None: """ - def add_updates( - self, - t: TimeInput, - properties: Optional[PropInput] = None, - event_id: Optional[int] = None, - ) -> None: + def add_updates(self, t: TimeInput, properties: Optional[PropInput] = None, event_id: Optional[int] = None) -> None: """ Add updates to a node in the graph at a specified time. This function allows for the addition of property updates to a node within the graph. The updates are time-stamped, meaning they are applied at the specified time. @@ -4805,7 +4401,7 @@ class MutableNode(Node): None: """ -class Edge(object): +class Edge(object): """ PyEdge is a Python class that represents an edge in the graph. An edge is a directed connection between two nodes. @@ -4958,9 +4554,7 @@ class Edge(object): Edge: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -5138,12 +4732,7 @@ class Edge(object): Properties: Properties on the Edge. """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -5283,7 +4872,7 @@ class Edge(object): Optional[int]: """ -class Edges(object): +class Edges(object): """A list of edges that can be iterated over.""" def __bool__(self): @@ -5434,9 +5023,7 @@ class Edges(object): Edges: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -5618,12 +5205,7 @@ class Edges(object): PropertiesView: """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -5730,12 +5312,7 @@ class Edges(object): EventTimeIterable: Iterable of `EventTime`s. """ - def to_df( - self, - include_property_history: bool = True, - convert_datetime: bool = False, - explode: bool = False, - ) -> DataFrame: + def to_df(self, include_property_history: bool = True, convert_datetime: bool = False, explode: bool = False) -> DataFrame: """ Converts the graph's edges into a Pandas DataFrame. @@ -5788,7 +5365,8 @@ class Edges(object): Optional[int]: """ -class NestedEdges(object): +class NestedEdges(object): + def __bool__(self): """True if self else False""" @@ -5929,9 +5507,7 @@ class NestedEdges(object): NestedEdges: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -6113,12 +5689,7 @@ class NestedEdges(object): PyNestedPropsIterable: """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -6261,7 +5832,8 @@ class NestedEdges(object): Optional[int]: """ -class MutableEdge(Edge): +class MutableEdge(Edge): + def __repr__(self): """Return repr(self).""" @@ -6279,13 +5851,7 @@ class MutableEdge(Edge): None: """ - def add_updates( - self, - t: TimeInput, - properties: Optional[PropInput] = None, - layer: Optional[str] = None, - event_id: Optional[int] = None, - ) -> None: + def add_updates(self, t: TimeInput, properties: Optional[PropInput] = None, layer: Optional[str] = None, event_id: Optional[int] = None) -> None: """ Add updates to an edge in the graph at a specified time. This function allows for the addition of property updates to an edge within the graph. The updates are time-stamped, meaning they are applied at the specified time. @@ -6303,9 +5869,7 @@ class MutableEdge(Edge): GraphError: If the operation fails. """ - def delete( - self, t: TimeInput, layer: Optional[str] = None, event_id: Optional[int] = None - ) -> None: + def delete(self, t: TimeInput, layer: Optional[str] = None, event_id: Optional[int] = None) -> None: """ Mark the edge as deleted at the specified time. @@ -6335,7 +5899,7 @@ class MutableEdge(Edge): None: """ -class Properties(object): +class Properties(object): """A view of the properties of an entity""" def __contains__(self, key): @@ -6393,6 +5957,18 @@ class Properties(object): PropValue: """ + def get_dtype_of(self, key: str) -> PropType: + """ + Get the PropType of a property. Specifically, returns the PropType of the latest value for this property if it exists. + If not, it returns the PropType for the static property matching this name. + + Arguments: + key (str): the name of the property. + + Returns: + PropType: + """ + def items(self) -> list[Tuple[str, PropValue]]: """ Get a list of key-value pairs @@ -6426,7 +6002,8 @@ class Properties(object): list[PropValue]: """ -class PyPropValueList(object): +class PyPropValueList(object): + def __eq__(self, value): """Return self==value.""" @@ -6462,8 +6039,12 @@ class PyPropValueList(object): PropValue: The average of each property values, or None if count is zero. """ - def collect(self): ... - def count(self): ... + def collect(self): + ... + + def count(self): + ... + def drop_none(self) -> list[PropValue]: """ Drop none. @@ -6512,7 +6093,90 @@ class PyPropValueList(object): PropValue: """ -class Metadata(object): +class PropType(object): + + def __eq__(self, value): + """Return self==value.""" + + def __ge__(self, value): + """Return self>=value.""" + + def __gt__(self, value): + """Return self>value.""" + + def __le__(self, value): + """Return self<=value.""" + + def __lt__(self, value): + """Return self tuple[int, int]: + def as_tuple(self) -> tuple[int,int]: """ Return this entry as a tuple of (timestamp, event_id), where the timestamp is in milliseconds. @@ -7027,7 +6702,7 @@ class EventTime(object): int: Milliseconds since the Unix epoch. """ -class OptionalEventTime(object): +class OptionalEventTime(object): """ Raphtory’s optional EventTime type. Instances of OptionalEventTime may contain an EventTime, or be empty. This is used for functions that may not return data (such as earliest_time and latest_time) because the data is unavailable. @@ -7121,7 +6796,7 @@ class OptionalEventTime(object): int | None: Milliseconds since the Unix epoch. """ -class History(object): +class History(object): """History of updates for an object. Provides access to time entries and derived views such as timestamps, datetimes, event ids, and intervals.""" def __contains__(self, key): @@ -7272,7 +6947,7 @@ class History(object): HistoryTimestamp: Timestamp (as int) view of this history. """ -class HistoryTimestamp(object): +class HistoryTimestamp(object): """History view that exposes timestamps in milliseconds since the Unix epoch.""" def __contains__(self, key): @@ -7345,7 +7020,7 @@ class HistoryTimestamp(object): list[int]: List of timestamps. """ -class HistoryDateTime(object): +class HistoryDateTime(object): """History view that exposes UTC datetimes.""" def __contains__(self, key): @@ -7411,7 +7086,7 @@ class HistoryDateTime(object): TimeError: If a timestamp cannot be converted to a datetime. """ -class HistoryEventId(object): +class HistoryEventId(object): """History view that exposes event ids of time entries. They are used for ordering within the same timestamp.""" def __contains__(self, key): @@ -7484,7 +7159,7 @@ class HistoryEventId(object): list[int]: List of event ids. """ -class Intervals(object): +class Intervals(object): """View over the intervals between consecutive timestamps, expressed in milliseconds.""" def __contains__(self, key): @@ -7589,7 +7264,8 @@ class Intervals(object): list[int]: List of intervals in milliseconds. """ -class WindowSet(object): +class WindowSet(object): + def __iter__(self): """Implement iter(self).""" @@ -7607,7 +7283,8 @@ class WindowSet(object): Iterable: The time index. """ -class IndexSpecBuilder(object): +class IndexSpecBuilder(object): + def __new__(cls, graph) -> IndexSpecBuilder: """Create and return a new object. See help(type) for accurate signature.""" @@ -7711,7 +7388,8 @@ class IndexSpecBuilder(object): dict[str, Any]: """ -class IndexSpec(object): +class IndexSpec(object): + def __repr__(self): """Return repr(self).""" diff --git a/python/python/raphtory/iterables/__init__.pyi b/python/python/raphtory/iterables/__init__.pyi index 271dfa8518..07e168700c 100644 --- a/python/python/raphtory/iterables/__init__.pyi +++ b/python/python/raphtory/iterables/__init__.pyi @@ -25,56 +25,9 @@ from os import PathLike import networkx as nx # type: ignore import pyvis # type: ignore -__all__ = [ - "NestedUtcDateTimeIterable", - "NestedGIDIterable", - "GIDIterable", - "StringIterable", - "OptionArcStringIterable", - "UsizeIterable", - "OptionI64Iterable", - "NestedOptionArcStringIterable", - "NestedStringIterable", - "NestedOptionI64Iterable", - "NestedI64VecIterable", - "NestedUsizeIterable", - "BoolIterable", - "ArcStringIterable", - "NestedVecUtcDateTimeIterable", - "OptionVecUtcDateTimeIterable", - "GIDGIDIterable", - "NestedGIDGIDIterable", - "NestedBoolIterable", - "U64Iterable", - "OptionUtcDateTimeIterable", - "ArcStringVecIterable", - "NestedArcStringVecIterable", - "NestedEventTimeIterable", - "NestedArcStringIterable", - "NestedOptionEventTimeIterable", - "NestedHistoryIterable", - "EventTimeIterable", - "OptionEventTimeIterable", - "HistoryIterable", - "HistoryTimestampIterable", - "IntervalsIterable", - "HistoryEventIdIterable", - "HistoryDateTimeIterable", - "OptionUsizeIterable", - "ResultOptionUtcDateTimeIterable", - "I64Iterable", - "ResultUtcDateTimeIterable", - "NestedHistoryTimestampIterable", - "NestedIntervalsIterable", - "NestedHistoryEventIdIterable", - "NestedHistoryDateTimeIterable", - "NestedOptionUsizeIterable", - "NestedResultOptionUtcDateTimeIterable", - "NestedI64Iterable", - "NestedResultUtcDateTimeIterable", -] - -class NestedUtcDateTimeIterable(object): +__all__ = ['NestedUtcDateTimeIterable', 'NestedGIDIterable', 'GIDIterable', 'StringIterable', 'OptionArcStringIterable', 'UsizeIterable', 'OptionI64Iterable', 'NestedOptionArcStringIterable', 'NestedStringIterable', 'NestedOptionI64Iterable', 'NestedI64VecIterable', 'NestedUsizeIterable', 'BoolIterable', 'ArcStringIterable', 'NestedVecUtcDateTimeIterable', 'OptionVecUtcDateTimeIterable', 'GIDGIDIterable', 'NestedGIDGIDIterable', 'NestedBoolIterable', 'U64Iterable', 'OptionUtcDateTimeIterable', 'ArcStringVecIterable', 'NestedArcStringVecIterable', 'NestedEventTimeIterable', 'NestedArcStringIterable', 'NestedOptionEventTimeIterable', 'NestedHistoryIterable', 'EventTimeIterable', 'OptionEventTimeIterable', 'HistoryIterable', 'HistoryTimestampIterable', 'IntervalsIterable', 'HistoryEventIdIterable', 'HistoryDateTimeIterable', 'OptionUsizeIterable', 'ResultOptionUtcDateTimeIterable', 'I64Iterable', 'ResultUtcDateTimeIterable', 'NestedHistoryTimestampIterable', 'NestedIntervalsIterable', 'NestedHistoryEventIdIterable', 'NestedHistoryDateTimeIterable', 'NestedOptionUsizeIterable', 'NestedResultOptionUtcDateTimeIterable', 'NestedI64Iterable', 'NestedResultUtcDateTimeIterable'] +class NestedUtcDateTimeIterable(object): + def __eq__(self, value): """Return self==value.""" @@ -102,9 +55,11 @@ class NestedUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedGIDIterable(object): -class NestedGIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -132,11 +87,17 @@ class NestedGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self): + ... + + def max(self): + ... + + def min(self): + ... + +class GIDIterable(object): -class GIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -164,11 +125,17 @@ class GIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self): + ... + + def max(self): + ... + + def min(self): + ... + +class StringIterable(object): -class StringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -196,9 +163,11 @@ class StringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class OptionArcStringIterable(object): -class OptionArcStringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -226,9 +195,11 @@ class OptionArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class UsizeIterable(object): -class UsizeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -256,13 +227,23 @@ class UsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self): + ... + + def max(self): + ... + + def mean(self): + ... + + def min(self): + ... + + def sum(self): + ... + +class OptionI64Iterable(object): -class OptionI64Iterable(object): def __eq__(self, value): """Return self==value.""" @@ -290,11 +271,17 @@ class OptionI64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self): + ... + + def max(self): + ... + + def min(self): + ... + +class NestedOptionArcStringIterable(object): -class NestedOptionArcStringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -322,9 +309,11 @@ class NestedOptionArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedStringIterable(object): -class NestedStringIterable(object): def __eq__(self, value): """Return self==value.""" @@ -352,9 +341,11 @@ class NestedStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedOptionI64Iterable(object): -class NestedOptionI64Iterable(object): def __eq__(self, value): """Return self==value.""" @@ -382,11 +373,17 @@ class NestedOptionI64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self): + ... + + def max(self): + ... + + def min(self): + ... + +class NestedI64VecIterable(object): -class NestedI64VecIterable(object): def __eq__(self, value): """Return self==value.""" @@ -414,9 +411,11 @@ class NestedI64VecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedUsizeIterable(object): -class NestedUsizeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -444,13 +443,23 @@ class NestedUsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self): + ... + + def max(self): + ... + + def mean(self): + ... + + def min(self): + ... + + def sum(self): + ... + +class BoolIterable(object): -class BoolIterable(object): def __eq__(self, value): """Return self==value.""" @@ -478,9 +487,11 @@ class BoolIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class ArcStringIterable(object): -class ArcStringIterable(object): def __iter__(self): """Implement iter(self).""" @@ -490,9 +501,11 @@ class ArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedVecUtcDateTimeIterable(object): -class NestedVecUtcDateTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -520,9 +533,11 @@ class NestedVecUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class OptionVecUtcDateTimeIterable(object): -class OptionVecUtcDateTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -550,9 +565,11 @@ class OptionVecUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class GIDGIDIterable(object): -class GIDGIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -580,11 +597,17 @@ class GIDGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self): + ... + + def max(self): + ... + + def min(self): + ... + +class NestedGIDGIDIterable(object): -class NestedGIDGIDIterable(object): def __eq__(self, value): """Return self==value.""" @@ -612,11 +635,17 @@ class NestedGIDGIDIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self): + ... + + def max(self): + ... + + def min(self): + ... + +class NestedBoolIterable(object): -class NestedBoolIterable(object): def __eq__(self, value): """Return self==value.""" @@ -644,9 +673,11 @@ class NestedBoolIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class U64Iterable(object): -class U64Iterable(object): def __eq__(self, value): """Return self==value.""" @@ -674,13 +705,23 @@ class U64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self): + ... + + def max(self): + ... + + def mean(self): + ... + + def min(self): + ... + + def sum(self): + ... + +class OptionUtcDateTimeIterable(object): -class OptionUtcDateTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -708,9 +749,11 @@ class OptionUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class ArcStringVecIterable(object): -class ArcStringVecIterable(object): def __eq__(self, value): """Return self==value.""" @@ -738,9 +781,11 @@ class ArcStringVecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedArcStringVecIterable(object): -class NestedArcStringVecIterable(object): def __eq__(self, value): """Return self==value.""" @@ -768,9 +813,11 @@ class NestedArcStringVecIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedEventTimeIterable(object): -class NestedEventTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -798,7 +845,9 @@ class NestedEventTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + @property def dt(self) -> NestedResultUtcDateTimeIterable: """ @@ -820,8 +869,12 @@ class NestedEventTimeIterable(object): NestedUsizeIterable: Nested iterable of event ids associated to each EventTime. """ - def max(self): ... - def min(self): ... + def max(self): + ... + + def min(self): + ... + @property def t(self) -> NestedI64Iterable: """ @@ -831,7 +884,8 @@ class NestedEventTimeIterable(object): NestedI64Iterable: Nested iterable of millisecond timestamps since the Unix epoch for each EventTime. """ -class NestedArcStringIterable(object): +class NestedArcStringIterable(object): + def __iter__(self): """Implement iter(self).""" @@ -841,9 +895,11 @@ class NestedArcStringIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedOptionEventTimeIterable(object): -class NestedOptionEventTimeIterable(object): def __eq__(self, value): """Return self==value.""" @@ -871,7 +927,9 @@ class NestedOptionEventTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + @property def dt(self) -> NestedResultOptionUtcDateTimeIterable: """ @@ -893,8 +951,12 @@ class NestedOptionEventTimeIterable(object): NestedOptionUsizeIterable: Nested iterable of event ids associated to each EventTime, if available. """ - def max(self): ... - def min(self): ... + def max(self): + ... + + def min(self): + ... + @property def t(self) -> NestedOptionI64Iterable: """ @@ -904,7 +966,8 @@ class NestedOptionEventTimeIterable(object): NestedOptionI64Iterable: Nested iterable of millisecond timestamps since the Unix epoch for each EventTime, if available. """ -class NestedHistoryIterable(object): +class NestedHistoryIterable(object): + def __iter__(self): """Implement iter(self).""" @@ -966,7 +1029,8 @@ class NestedHistoryIterable(object): NestedHistoryTimestampIterable: Iterable of iterables of HistoryTimestamp objects. """ -class EventTimeIterable(object): +class EventTimeIterable(object): + def __eq__(self, value): """Return self==value.""" @@ -994,7 +1058,9 @@ class EventTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + @property def dt(self) -> ResultUtcDateTimeIterable: """ @@ -1016,8 +1082,12 @@ class EventTimeIterable(object): UsizeIterable: Iterable of event ids associated to each EventTime. """ - def max(self): ... - def min(self): ... + def max(self): + ... + + def min(self): + ... + @property def t(self) -> I64Iterable: """ @@ -1027,7 +1097,8 @@ class EventTimeIterable(object): I64Iterable: Iterable of millisecond timestamps since the Unix epoch for each EventTime. """ -class OptionEventTimeIterable(object): +class OptionEventTimeIterable(object): + def __eq__(self, value): """Return self==value.""" @@ -1055,7 +1126,9 @@ class OptionEventTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + @property def dt(self) -> ResultOptionUtcDateTimeIterable: """ @@ -1077,8 +1150,12 @@ class OptionEventTimeIterable(object): OptionUsizeIterable: Iterable of event ids associated to each EventTime, if available. """ - def max(self): ... - def min(self): ... + def max(self): + ... + + def min(self): + ... + @property def t(self) -> OptionI64Iterable: """ @@ -1088,7 +1165,8 @@ class OptionEventTimeIterable(object): OptionI64Iterable: Iterable of millisecond timestamps since the Unix epoch for each EventTime, if available. """ -class HistoryIterable(object): +class HistoryIterable(object): + def __iter__(self): """Implement iter(self).""" @@ -1150,7 +1228,8 @@ class HistoryIterable(object): HistoryTimestampIterable: Iterable of HistoryTimestamp objects, one for each item. """ -class HistoryTimestampIterable(object): +class HistoryTimestampIterable(object): + def __iter__(self): """Implement iter(self).""" @@ -1176,7 +1255,8 @@ class HistoryTimestampIterable(object): list[list[int]]: List of timestamps in milliseconds per history. """ -class IntervalsIterable(object): +class IntervalsIterable(object): + def __iter__(self): """Implement iter(self).""" @@ -1202,7 +1282,8 @@ class IntervalsIterable(object): list[list[int]]: List of intervals per history. """ -class HistoryEventIdIterable(object): +class HistoryEventIdIterable(object): + def __iter__(self): """Implement iter(self).""" @@ -1228,7 +1309,8 @@ class HistoryEventIdIterable(object): list[list[int]]: List of event ids per history. """ -class HistoryDateTimeIterable(object): +class HistoryDateTimeIterable(object): + def __iter__(self): """Implement iter(self).""" @@ -1249,7 +1331,8 @@ class HistoryDateTimeIterable(object): TimeError: If a timestamp cannot be converted to a datetime. """ -class OptionUsizeIterable(object): +class OptionUsizeIterable(object): + def __eq__(self, value): """Return self==value.""" @@ -1277,11 +1360,17 @@ class OptionUsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self): + ... + + def max(self): + ... + + def min(self): + ... + +class ResultOptionUtcDateTimeIterable(object): -class ResultOptionUtcDateTimeIterable(object): def __iter__(self): """Implement iter(self).""" @@ -1291,9 +1380,11 @@ class ResultOptionUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class I64Iterable(object): -class I64Iterable(object): def __eq__(self, value): """Return self==value.""" @@ -1321,13 +1412,23 @@ class I64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self): + ... + + def max(self): + ... + + def mean(self): + ... + + def min(self): + ... + + def sum(self): + ... + +class ResultUtcDateTimeIterable(object): -class ResultUtcDateTimeIterable(object): def __iter__(self): """Implement iter(self).""" @@ -1337,9 +1438,11 @@ class ResultUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedHistoryTimestampIterable(object): -class NestedHistoryTimestampIterable(object): def __iter__(self): """Implement iter(self).""" @@ -1381,7 +1484,8 @@ class NestedHistoryTimestampIterable(object): list[list[list[int]]]: List of timestamps in milliseconds per nested history. """ -class NestedIntervalsIterable(object): +class NestedIntervalsIterable(object): + def __iter__(self): """Implement iter(self).""" @@ -1423,7 +1527,8 @@ class NestedIntervalsIterable(object): list[list[list[int]]]: List of intervals per nested history. """ -class NestedHistoryEventIdIterable(object): +class NestedHistoryEventIdIterable(object): + def __iter__(self): """Implement iter(self).""" @@ -1465,7 +1570,8 @@ class NestedHistoryEventIdIterable(object): list[list[list[int]]]: List of event ids per nested history. """ -class NestedHistoryDateTimeIterable(object): +class NestedHistoryDateTimeIterable(object): + def __iter__(self): """Implement iter(self).""" @@ -1497,7 +1603,8 @@ class NestedHistoryDateTimeIterable(object): TimeError: If a timestamp cannot be converted to a datetime. """ -class NestedOptionUsizeIterable(object): +class NestedOptionUsizeIterable(object): + def __eq__(self, value): """Return self==value.""" @@ -1525,11 +1632,17 @@ class NestedOptionUsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def min(self): ... + def collect(self): + ... + + def max(self): + ... + + def min(self): + ... + +class NestedResultOptionUtcDateTimeIterable(object): -class NestedResultOptionUtcDateTimeIterable(object): def __iter__(self): """Implement iter(self).""" @@ -1539,9 +1652,11 @@ class NestedResultOptionUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... + +class NestedI64Iterable(object): -class NestedI64Iterable(object): def __eq__(self, value): """Return self==value.""" @@ -1569,13 +1684,23 @@ class NestedI64Iterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self): + ... + + def max(self): + ... + + def mean(self): + ... + + def min(self): + ... + + def sum(self): + ... + +class NestedResultUtcDateTimeIterable(object): -class NestedResultUtcDateTimeIterable(object): def __iter__(self): """Implement iter(self).""" @@ -1585,4 +1710,5 @@ class NestedResultUtcDateTimeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... + def collect(self): + ... diff --git a/python/python/raphtory/node_state/__init__.pyi b/python/python/raphtory/node_state/__init__.pyi index 3fc06a5864..a91301bb5d 100644 --- a/python/python/raphtory/node_state/__init__.pyi +++ b/python/python/raphtory/node_state/__init__.pyi @@ -25,58 +25,9 @@ import networkx as nx # type: ignore import pyvis # type: ignore from raphtory.iterables import * -__all__ = [ - "NodeGroups", - "DegreeView", - "NodeStateUsize", - "NodeStateOptionUsize", - "NodeStateU64", - "NodeStateOptionI64", - "NodeStateOptionEventTime", - "NodeStateOptionDateTime", - "IdView", - "NodeStateGID", - "EarliestTimeView", - "EarliestTimestampView", - "EarliestEventIdView", - "EarliestDateTimeView", - "LatestTimeView", - "LatestTimestampView", - "LatestEventIdView", - "LatestDateTimeView", - "NameView", - "NodeStateString", - "HistoryView", - "HistoryTimestampView", - "HistoryDateTimeView", - "HistoryEventIdView", - "IntervalsView", - "IntervalsFloatView", - "IntervalsIntegerView", - "EdgeHistoryCountView", - "UsizeIterable", - "NodeTypeView", - "NodeStateOptionStr", - "NodeStateListDateTime", - "NodeStateWeightedSP", - "NodeStateF64", - "NodeStateOptionF64", - "NodeStateNodes", - "NodeStateReachability", - "NodeStateListF64", - "NodeStateMotifs", - "NodeStateHits", - "NodeStateHistory", - "NodeStateHistoryTimestamp", - "NodeStateHistoryDateTime", - "NodeStateHistoryEventId", - "NodeStateIntervals", - "NodeStateSEIR", - "NodeLayout", - "NodeStateF64String", -] - -class NodeGroups(object): +__all__ = ['NodeGroups', 'DegreeView', 'NodeStateUsize', 'NodeStateOptionUsize', 'NodeStateU64', 'NodeStateOptionI64', 'NodeStateOptionEventTime', 'NodeStateOptionDateTime', 'IdView', 'NodeStateGID', 'EarliestTimeView', 'EarliestTimestampView', 'EarliestEventIdView', 'EarliestDateTimeView', 'LatestTimeView', 'LatestTimestampView', 'LatestEventIdView', 'LatestDateTimeView', 'NameView', 'NodeStateString', 'HistoryView', 'HistoryTimestampView', 'HistoryDateTimeView', 'HistoryEventIdView', 'IntervalsView', 'IntervalsFloatView', 'IntervalsIntegerView', 'EdgeHistoryCountView', 'UsizeIterable', 'NodeTypeView', 'NodeStateOptionStr', 'NodeStateListDateTime', 'NodeStateWeightedSP', 'NodeStateF64', 'NodeStateOptionF64', 'NodeStateNodes', 'NodeStateReachability', 'NodeStateListF64', 'NodeStateMotifs', 'NodeStateHits', 'NodeStateHistory', 'NodeStateHistoryTimestamp', 'NodeStateHistoryDateTime', 'NodeStateHistoryEventId', 'NodeStateIntervals', 'NodeStateSEIR', 'NodeLayout', 'NodeStateF64String'] +class NodeGroups(object): + def __bool__(self): """True if self else False""" @@ -119,7 +70,7 @@ class NodeGroups(object): Iterator[Tuple[Any, GraphView]]: Iterator over subgraphs with corresponding value """ -class DegreeView(object): +class DegreeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -272,9 +223,7 @@ class DegreeView(object): DegreeView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -429,12 +378,7 @@ class DegreeView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -614,7 +558,8 @@ class DegreeView(object): Optional[int]: """ -class NodeStateUsize(object): +class NodeStateUsize(object): + def __eq__(self, value): """Return self==value.""" @@ -807,7 +752,8 @@ class NodeStateUsize(object): Iterator[int]: Iterator over values """ -class NodeStateOptionUsize(object): +class NodeStateOptionUsize(object): + def __eq__(self, value): """Return self==value.""" @@ -849,9 +795,7 @@ class NodeStateOptionUsize(object): NodeStateOptionUsize: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[Optional[int]] = None - ) -> Optional[Optional[int]]: + def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: """ Get value for node @@ -985,7 +929,8 @@ class NodeStateOptionUsize(object): Iterator[Optional[int]]: Iterator over values """ -class NodeStateU64(object): +class NodeStateU64(object): + def __eq__(self, value): """Return self==value.""" @@ -1170,7 +1115,8 @@ class NodeStateU64(object): Iterator[int]: Iterator over values """ -class NodeStateOptionI64(object): +class NodeStateOptionI64(object): + def __eq__(self, value): """Return self==value.""" @@ -1212,9 +1158,7 @@ class NodeStateOptionI64(object): NodeStateOptionI64: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[Optional[int]] = None - ) -> Optional[Optional[int]]: + def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: """ Get value for node @@ -1348,7 +1292,8 @@ class NodeStateOptionI64(object): Iterator[Optional[int]]: Iterator over values """ -class NodeStateOptionEventTime(object): +class NodeStateOptionEventTime(object): + def __eq__(self, value): """Return self==value.""" @@ -1390,9 +1335,7 @@ class NodeStateOptionEventTime(object): NodeStateOptionEventTime: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[Optional[EventTime]] = None - ) -> Optional[Optional[EventTime]]: + def get(self, node: NodeInput, default: Optional[Optional[EventTime]] = None) -> Optional[Optional[EventTime]]: """ Get value for node @@ -1526,7 +1469,8 @@ class NodeStateOptionEventTime(object): Iterator[Optional[EventTime]]: Iterator over values """ -class NodeStateOptionDateTime(object): +class NodeStateOptionDateTime(object): + def __eq__(self, value): """Return self==value.""" @@ -1568,9 +1512,7 @@ class NodeStateOptionDateTime(object): NodeStateOptionDateTime: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[Optional[datetime]] = None - ) -> Optional[Optional[datetime]]: + def get(self, node: NodeInput, default: Optional[Optional[datetime]] = None) -> Optional[Optional[datetime]]: """ Get value for node @@ -1704,7 +1646,7 @@ class NodeStateOptionDateTime(object): Iterator[Optional[datetime]]: Iterator over values """ -class IdView(object): +class IdView(object): """A lazy view over node values""" def __eq__(self, value): @@ -1890,7 +1832,8 @@ class IdView(object): Iterator[GID]: Iterator over values """ -class NodeStateGID(object): +class NodeStateGID(object): + def __eq__(self, value): """Return self==value.""" @@ -2058,7 +2001,7 @@ class NodeStateGID(object): Iterator[GID]: Iterator over values """ -class EarliestTimeView(object): +class EarliestTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -2229,9 +2172,7 @@ class EarliestTimeView(object): EarliestTimeView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2250,9 +2191,7 @@ class EarliestTimeView(object): WindowSet: A `WindowSet` object. """ - def get( - self, node: NodeInput, default: Optional[Optional[EventTime]] = None - ) -> Optional[Optional[EventTime]]: + def get(self, node: NodeInput, default: Optional[Optional[EventTime]] = None) -> Optional[Optional[EventTime]]: """ Get value for node @@ -2380,12 +2319,7 @@ class EarliestTimeView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -2565,7 +2499,7 @@ class EarliestTimeView(object): Optional[int]: """ -class EarliestTimestampView(object): +class EarliestTimestampView(object): """A lazy view over node values""" def __eq__(self, value): @@ -2718,9 +2652,7 @@ class EarliestTimestampView(object): EarliestTimestampView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -2739,9 +2671,7 @@ class EarliestTimestampView(object): WindowSet: A `WindowSet` object. """ - def get( - self, node: NodeInput, default: Optional[Optional[int]] = None - ) -> Optional[Optional[int]]: + def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: """ Get value for node @@ -2869,12 +2799,7 @@ class EarliestTimestampView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3045,7 +2970,7 @@ class EarliestTimestampView(object): Optional[int]: """ -class EarliestEventIdView(object): +class EarliestEventIdView(object): """A lazy view over node values""" def __eq__(self, value): @@ -3198,9 +3123,7 @@ class EarliestEventIdView(object): EarliestEventIdView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3219,9 +3142,7 @@ class EarliestEventIdView(object): WindowSet: A `WindowSet` object. """ - def get( - self, node: NodeInput, default: Optional[Optional[int]] = None - ) -> Optional[Optional[int]]: + def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: """ Get value for node @@ -3349,12 +3270,7 @@ class EarliestEventIdView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -3525,7 +3441,7 @@ class EarliestEventIdView(object): Optional[int]: """ -class EarliestDateTimeView(object): +class EarliestDateTimeView(object): """A lazy view over EarliestDateTime values for each node.""" def __eq__(self, value): @@ -3694,9 +3610,7 @@ class EarliestDateTimeView(object): EarliestDateTimeView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -3858,12 +3772,7 @@ class EarliestDateTimeView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -4050,7 +3959,7 @@ class EarliestDateTimeView(object): Optional[int]: """ -class LatestTimeView(object): +class LatestTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -4221,9 +4130,7 @@ class LatestTimeView(object): LatestTimeView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4242,9 +4149,7 @@ class LatestTimeView(object): WindowSet: A `WindowSet` object. """ - def get( - self, node: NodeInput, default: Optional[Optional[int]] = None - ) -> Optional[Optional[int]]: + def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: """ Get value for node @@ -4372,12 +4277,7 @@ class LatestTimeView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -4557,7 +4457,7 @@ class LatestTimeView(object): Optional[int]: """ -class LatestTimestampView(object): +class LatestTimestampView(object): """A lazy view over node values""" def __eq__(self, value): @@ -4710,9 +4610,7 @@ class LatestTimestampView(object): LatestTimestampView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -4731,9 +4629,7 @@ class LatestTimestampView(object): WindowSet: A `WindowSet` object. """ - def get( - self, node: NodeInput, default: Optional[Optional[int]] = None - ) -> Optional[Optional[int]]: + def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: """ Get value for node @@ -4861,12 +4757,7 @@ class LatestTimestampView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -5037,7 +4928,7 @@ class LatestTimestampView(object): Optional[int]: """ -class LatestEventIdView(object): +class LatestEventIdView(object): """A lazy view over node values""" def __eq__(self, value): @@ -5190,9 +5081,7 @@ class LatestEventIdView(object): LatestEventIdView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -5211,9 +5100,7 @@ class LatestEventIdView(object): WindowSet: A `WindowSet` object. """ - def get( - self, node: NodeInput, default: Optional[Optional[int]] = None - ) -> Optional[Optional[int]]: + def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: """ Get value for node @@ -5341,12 +5228,7 @@ class LatestEventIdView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -5517,7 +5399,7 @@ class LatestEventIdView(object): Optional[int]: """ -class LatestDateTimeView(object): +class LatestDateTimeView(object): """A lazy view over EarliestDateTime values for each node.""" def __eq__(self, value): @@ -5686,9 +5568,7 @@ class LatestDateTimeView(object): LatestDateTimeView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -5707,9 +5587,7 @@ class LatestDateTimeView(object): WindowSet: A `WindowSet` object. """ - def get( - self, node: NodeInput, default: Optional[datetime] = None - ) -> Optional[datetime]: + def get(self, node: NodeInput, default: Optional[datetime] = None) -> Optional[datetime]: """ Get value for node @@ -5852,12 +5730,7 @@ class LatestDateTimeView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -6044,7 +5917,7 @@ class LatestDateTimeView(object): Optional[int]: """ -class NameView(object): +class NameView(object): """A lazy view over node values""" def __eq__(self, value): @@ -6238,7 +6111,8 @@ class NameView(object): Iterator[str]: Iterator over values """ -class NodeStateString(object): +class NodeStateString(object): + def __eq__(self, value): """Return self==value.""" @@ -6414,7 +6288,7 @@ class NodeStateString(object): Iterator[str]: Iterator over values """ -class HistoryView(object): +class HistoryView(object): """A lazy view over History objects for each node.""" def __eq__(self, value): @@ -6590,9 +6464,7 @@ class HistoryView(object): HistoryView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -6619,9 +6491,7 @@ class HistoryView(object): History: a history object containing all time entries """ - def get( - self, node: NodeInput, default: Optional[History] = None - ) -> Optional[History]: + def get(self, node: NodeInput, default: Optional[History] = None) -> Optional[History]: """ Get value for node @@ -6708,12 +6578,7 @@ class HistoryView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -6871,7 +6736,7 @@ class HistoryView(object): Optional[int]: """ -class HistoryTimestampView(object): +class HistoryTimestampView(object): """A lazy view over node values""" def __eq__(self, value): @@ -6920,9 +6785,7 @@ class HistoryTimestampView(object): NodeStateHistoryTimestamp: the computed `NodeState` """ - def get( - self, node: NodeInput, default: Optional[HistoryTimestamp] = None - ) -> Optional[HistoryTimestamp]: + def get(self, node: NodeInput, default: Optional[HistoryTimestamp] = None) -> Optional[HistoryTimestamp]: """ Get value for node @@ -6977,7 +6840,7 @@ class HistoryTimestampView(object): Iterator[HistoryTimestamp]: Iterator over values """ -class HistoryDateTimeView(object): +class HistoryDateTimeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -7026,9 +6889,7 @@ class HistoryDateTimeView(object): NodeStateHistoryDateTime: the computed `NodeState` """ - def get( - self, node: NodeInput, default: Optional[HistoryDateTime] = None - ) -> Optional[HistoryDateTime]: + def get(self, node: NodeInput, default: Optional[HistoryDateTime] = None) -> Optional[HistoryDateTime]: """ Get value for node @@ -7083,7 +6944,7 @@ class HistoryDateTimeView(object): Iterator[HistoryDateTime]: Iterator over values """ -class HistoryEventIdView(object): +class HistoryEventIdView(object): """A lazy view over node values""" def __eq__(self, value): @@ -7132,9 +6993,7 @@ class HistoryEventIdView(object): NodeStateHistoryEventId: the computed `NodeState` """ - def get( - self, node: NodeInput, default: Optional[HistoryEventId] = None - ) -> Optional[HistoryEventId]: + def get(self, node: NodeInput, default: Optional[HistoryEventId] = None) -> Optional[HistoryEventId]: """ Get value for node @@ -7189,7 +7048,7 @@ class HistoryEventIdView(object): Iterator[HistoryEventId]: Iterator over values """ -class IntervalsView(object): +class IntervalsView(object): """A lazy view over node values""" def __eq__(self, value): @@ -7238,9 +7097,7 @@ class IntervalsView(object): NodeStateIntervals: the computed `NodeState` """ - def get( - self, node: NodeInput, default: Optional[Intervals] = None - ) -> Optional[Intervals]: + def get(self, node: NodeInput, default: Optional[Intervals] = None) -> Optional[Intervals]: """ Get value for node @@ -7327,7 +7184,7 @@ class IntervalsView(object): Iterator[Intervals]: Iterator over values """ -class IntervalsFloatView(object): +class IntervalsFloatView(object): """A lazy view over node values""" def __eq__(self, value): @@ -7387,9 +7244,7 @@ class IntervalsFloatView(object): NodeStateOptionF64: the computed `NodeState` """ - def get( - self, node: NodeInput, default: Optional[Optional[float]] = None - ) -> Optional[Optional[float]]: + def get(self, node: NodeInput, default: Optional[Optional[float]] = None) -> Optional[Optional[float]]: """ Get value for node @@ -7515,7 +7370,7 @@ class IntervalsFloatView(object): Iterator[Optional[float]]: Iterator over values """ -class IntervalsIntegerView(object): +class IntervalsIntegerView(object): """A lazy view over node values""" def __eq__(self, value): @@ -7575,9 +7430,7 @@ class IntervalsIntegerView(object): NodeStateOptionI64: the computed `NodeState` """ - def get( - self, node: NodeInput, default: Optional[Optional[int]] = None - ) -> Optional[Optional[int]]: + def get(self, node: NodeInput, default: Optional[Optional[int]] = None) -> Optional[Optional[int]]: """ Get value for node @@ -7703,7 +7556,7 @@ class IntervalsIntegerView(object): Iterator[Optional[int]]: Iterator over values """ -class EdgeHistoryCountView(object): +class EdgeHistoryCountView(object): """A lazy view over node values""" def __eq__(self, value): @@ -7856,9 +7709,7 @@ class EdgeHistoryCountView(object): EdgeHistoryCountView: The layered view """ - def expanding( - self, step: int | str, alignment_unit: str | None = None - ) -> WindowSet: + def expanding(self, step: int | str, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `step` size using an expanding window. @@ -8005,12 +7856,7 @@ class EdgeHistoryCountView(object): Nodes: The nodes """ - def rolling( - self, - window: int | str, - step: int | str | None = None, - alignment_unit: str | None = None, - ) -> WindowSet: + def rolling(self, window: int | str, step: int | str | None = None, alignment_unit: str | None = None) -> WindowSet: """ Creates a `WindowSet` with the given `window` size and optional `step` using a rolling window. If `alignment_unit` is not "unaligned" and a `step` larger than `window` is provided, some time entries @@ -8190,7 +8036,8 @@ class EdgeHistoryCountView(object): Optional[int]: """ -class UsizeIterable(object): +class UsizeIterable(object): + def __eq__(self, value): """Return self==value.""" @@ -8218,13 +8065,22 @@ class UsizeIterable(object): def __repr__(self): """Return repr(self).""" - def collect(self): ... - def max(self): ... - def mean(self): ... - def min(self): ... - def sum(self): ... + def collect(self): + ... + + def max(self): + ... + + def mean(self): + ... -class NodeTypeView(object): + def min(self): + ... + + def sum(self): + ... + +class NodeTypeView(object): """A lazy view over node values""" def __eq__(self, value): @@ -8284,9 +8140,7 @@ class NodeTypeView(object): NodeStateOptionStr: the computed `NodeState` """ - def get( - self, node: NodeInput, default: Optional[Optional[str]] = None - ) -> Optional[Optional[str]]: + def get(self, node: NodeInput, default: Optional[Optional[str]] = None) -> Optional[Optional[str]]: """ Get value for node @@ -8420,7 +8274,8 @@ class NodeTypeView(object): Iterator[Optional[str]]: Iterator over values """ -class NodeStateOptionStr(object): +class NodeStateOptionStr(object): + def __eq__(self, value): """Return self==value.""" @@ -8462,9 +8317,7 @@ class NodeStateOptionStr(object): NodeStateOptionStr: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[Optional[str]] = None - ) -> Optional[Optional[str]]: + def get(self, node: NodeInput, default: Optional[Optional[str]] = None) -> Optional[Optional[str]]: """ Get value for node @@ -8598,7 +8451,8 @@ class NodeStateOptionStr(object): Iterator[Optional[str]]: Iterator over values """ -class NodeStateListDateTime(object): +class NodeStateListDateTime(object): + def __eq__(self, value): """Return self==value.""" @@ -8640,9 +8494,7 @@ class NodeStateListDateTime(object): NodeStateListDateTime: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[list[datetime]] = None - ) -> Optional[list[datetime]]: + def get(self, node: NodeInput, default: Optional[list[datetime]] = None) -> Optional[list[datetime]]: """ Get value for node @@ -8768,7 +8620,8 @@ class NodeStateListDateTime(object): Iterator[list[datetime]]: Iterator over values """ -class NodeStateWeightedSP(object): +class NodeStateWeightedSP(object): + def __eq__(self, value): """Return self==value.""" @@ -8799,9 +8652,7 @@ class NodeStateWeightedSP(object): def __repr__(self): """Return repr(self).""" - def get( - self, node: NodeInput, default: Optional[Tuple[float, Nodes]] = None - ) -> Optional[Tuple[float, Nodes]]: + def get(self, node: NodeInput, default: Optional[Tuple[float, Nodes]] = None) -> Optional[Tuple[float, Nodes]]: """ Get value for node @@ -8856,7 +8707,8 @@ class NodeStateWeightedSP(object): Iterator[Tuple[float, Nodes]]: Iterator over values """ -class NodeStateF64(object): +class NodeStateF64(object): + def __eq__(self, value): """Return self==value.""" @@ -9041,7 +8893,8 @@ class NodeStateF64(object): Iterator[float]: Iterator over values """ -class NodeStateOptionF64(object): +class NodeStateOptionF64(object): + def __eq__(self, value): """Return self==value.""" @@ -9083,9 +8936,7 @@ class NodeStateOptionF64(object): NodeStateOptionF64: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[Optional[float]] = None - ) -> Optional[Optional[float]]: + def get(self, node: NodeInput, default: Optional[Optional[float]] = None) -> Optional[Optional[float]]: """ Get value for node @@ -9211,7 +9062,8 @@ class NodeStateOptionF64(object): Iterator[Optional[float]]: Iterator over values """ -class NodeStateNodes(object): +class NodeStateNodes(object): + def __eq__(self, value): """Return self==value.""" @@ -9297,7 +9149,8 @@ class NodeStateNodes(object): Iterator[Nodes]: Iterator over values """ -class NodeStateReachability(object): +class NodeStateReachability(object): + def __eq__(self, value): """Return self==value.""" @@ -9328,9 +9181,7 @@ class NodeStateReachability(object): def __repr__(self): """Return repr(self).""" - def get( - self, node: NodeInput, default: Optional[list[Tuple[int, str]]] = None - ) -> Optional[list[Tuple[int, str]]]: + def get(self, node: NodeInput, default: Optional[list[Tuple[int, str]]] = None) -> Optional[list[Tuple[int, str]]]: """ Get value for node @@ -9385,7 +9236,8 @@ class NodeStateReachability(object): Iterator[list[Tuple[int, str]]]: Iterator over values """ -class NodeStateListF64(object): +class NodeStateListF64(object): + def __eq__(self, value): """Return self==value.""" @@ -9416,9 +9268,7 @@ class NodeStateListF64(object): def __repr__(self): """Return repr(self).""" - def get( - self, node: NodeInput, default: Optional[list[float]] = None - ) -> Optional[list[float]]: + def get(self, node: NodeInput, default: Optional[list[float]] = None) -> Optional[list[float]]: """ Get value for node @@ -9473,7 +9323,8 @@ class NodeStateListF64(object): Iterator[list[float]]: Iterator over values """ -class NodeStateMotifs(object): +class NodeStateMotifs(object): + def __eq__(self, value): """Return self==value.""" @@ -9515,9 +9366,7 @@ class NodeStateMotifs(object): NodeStateMotifs: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[list[int]] = None - ) -> Optional[list[int]]: + def get(self, node: NodeInput, default: Optional[list[int]] = None) -> Optional[list[int]]: """ Get value for node @@ -9643,7 +9492,8 @@ class NodeStateMotifs(object): Iterator[list[int]]: Iterator over values """ -class NodeStateHits(object): +class NodeStateHits(object): + def __eq__(self, value): """Return self==value.""" @@ -9685,9 +9535,7 @@ class NodeStateHits(object): NodeStateHits: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[Tuple[float, float]] = None - ) -> Optional[Tuple[float, float]]: + def get(self, node: NodeInput, default: Optional[Tuple[float, float]] = None) -> Optional[Tuple[float, float]]: """ Get value for node @@ -9813,7 +9661,7 @@ class NodeStateHits(object): Iterator[Tuple[float, float]]: Iterator over values """ -class NodeStateHistory(object): +class NodeStateHistory(object): """A NodeState of History objects for each node.""" def __eq__(self, value): @@ -9888,9 +9736,7 @@ class NodeStateHistory(object): History: A history object containing all time entries. """ - def get( - self, node: NodeInput, default: Optional[History] = None - ) -> Optional[History]: + def get(self, node: NodeInput, default: Optional[History] = None) -> Optional[History]: """ Get History object for the node. @@ -9971,7 +9817,8 @@ class NodeStateHistory(object): Iterator[History]: Iterator over History objects. """ -class NodeStateHistoryTimestamp(object): +class NodeStateHistoryTimestamp(object): + def __eq__(self, value): """Return self==value.""" @@ -10002,9 +9849,7 @@ class NodeStateHistoryTimestamp(object): def __repr__(self): """Return repr(self).""" - def get( - self, node: NodeInput, default: Optional[HistoryTimestamp] = None - ) -> Optional[HistoryTimestamp]: + def get(self, node: NodeInput, default: Optional[HistoryTimestamp] = None) -> Optional[HistoryTimestamp]: """ Get value for node @@ -10059,7 +9904,8 @@ class NodeStateHistoryTimestamp(object): Iterator[HistoryTimestamp]: Iterator over values """ -class NodeStateHistoryDateTime(object): +class NodeStateHistoryDateTime(object): + def __eq__(self, value): """Return self==value.""" @@ -10090,9 +9936,7 @@ class NodeStateHistoryDateTime(object): def __repr__(self): """Return repr(self).""" - def get( - self, node: NodeInput, default: Optional[HistoryDateTime] = None - ) -> Optional[HistoryDateTime]: + def get(self, node: NodeInput, default: Optional[HistoryDateTime] = None) -> Optional[HistoryDateTime]: """ Get value for node @@ -10147,7 +9991,8 @@ class NodeStateHistoryDateTime(object): Iterator[HistoryDateTime]: Iterator over values """ -class NodeStateHistoryEventId(object): +class NodeStateHistoryEventId(object): + def __eq__(self, value): """Return self==value.""" @@ -10178,9 +10023,7 @@ class NodeStateHistoryEventId(object): def __repr__(self): """Return repr(self).""" - def get( - self, node: NodeInput, default: Optional[HistoryEventId] = None - ) -> Optional[HistoryEventId]: + def get(self, node: NodeInput, default: Optional[HistoryEventId] = None) -> Optional[HistoryEventId]: """ Get value for node @@ -10235,7 +10078,8 @@ class NodeStateHistoryEventId(object): Iterator[HistoryEventId]: Iterator over values """ -class NodeStateIntervals(object): +class NodeStateIntervals(object): + def __eq__(self, value): """Return self==value.""" @@ -10266,9 +10110,7 @@ class NodeStateIntervals(object): def __repr__(self): """Return repr(self).""" - def get( - self, node: NodeInput, default: Optional[Intervals] = None - ) -> Optional[Intervals]: + def get(self, node: NodeInput, default: Optional[Intervals] = None) -> Optional[Intervals]: """ Get value for node @@ -10363,7 +10205,8 @@ class NodeStateIntervals(object): Iterator[Intervals]: Iterator over values """ -class NodeStateSEIR(object): +class NodeStateSEIR(object): + def __eq__(self, value): """Return self==value.""" @@ -10405,9 +10248,7 @@ class NodeStateSEIR(object): NodeStateSEIR: The k smallest values as a node state """ - def get( - self, node: NodeInput, default: Optional[Infected] = None - ) -> Optional[Infected]: + def get(self, node: NodeInput, default: Optional[Infected] = None) -> Optional[Infected]: """ Get value for node @@ -10533,7 +10374,8 @@ class NodeStateSEIR(object): Iterator[Infected]: Iterator over values """ -class NodeLayout(object): +class NodeLayout(object): + def __eq__(self, value): """Return self==value.""" @@ -10564,9 +10406,7 @@ class NodeLayout(object): def __repr__(self): """Return repr(self).""" - def get( - self, node: NodeInput, default: Optional[list[float]] = None - ) -> Optional[list[float]]: + def get(self, node: NodeInput, default: Optional[list[float]] = None) -> Optional[list[float]]: """ Get value for node @@ -10621,7 +10461,8 @@ class NodeLayout(object): Iterator[list[float]]: Iterator over values """ -class NodeStateF64String(object): +class NodeStateF64String(object): + def __eq__(self, value): """Return self==value.""" @@ -10652,9 +10493,7 @@ class NodeStateF64String(object): def __repr__(self): """Return repr(self).""" - def get( - self, node: NodeInput, default: Optional[Tuple[float, str]] = None - ) -> Optional[Tuple[float, str]]: + def get(self, node: NodeInput, default: Optional[Tuple[float, str]] = None) -> Optional[Tuple[float, str]]: """ Get value for node diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 17f4110280..813ac72c63 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -25,7 +25,6 @@ use pyo3::{ prelude::*, types::{PyCapsule, PyDict}, }; -use pyo3::{prelude::*, types::PyCapsule}; use pyo3_arrow::PyRecordBatchReader; use raphtory_api::core::entities::properties::prop::{arrow_dtype_from_prop_type, Prop, PropType}; use std::{ diff --git a/raphtory/src/python/packages/base_modules.rs b/raphtory/src/python/packages/base_modules.rs index fe6496a56f..4451ec625d 100644 --- a/raphtory/src/python/packages/base_modules.rs +++ b/raphtory/src/python/packages/base_modules.rs @@ -59,7 +59,10 @@ use crate::{ }, }; use pyo3::prelude::*; -use raphtory_api::python::{timeindex::{PyEventTime, PyOptionalEventTime}, prop::PyPropType}; +use raphtory_api::python::{ + prop::PyPropType, + timeindex::{PyEventTime, PyOptionalEventTime}, +}; pub fn add_raphtory_classes(m: &Bound) -> PyResult<()> { //Graph classes From 3b4cdc61ea4e78382b280f72804bf72a83c4b6b3 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Tue, 16 Dec 2025 00:11:54 -0500 Subject: [PATCH 43/55] Added test for loading from directories (pure parquet, pure csv, mixed parquet/csv). Make sure each ingestion path returns the same node ids. --- python/tests/test_load_from_df.py | 50 ++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index 8af08b84b1..a395a2cf9a 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -43,64 +43,78 @@ def test_load_edges_from_polars_df(graph_type): assert _collect_edges(g_from_df) == expected def test_different_data_sources(): - num_nodes_ingested = [] + nodes_list = [] ######### PARQUET ######### - parquet_dir_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/parquet_subset" - parquet_file_path_str = parquet_dir_path_str + "/flattened_data_subset.parquet" + parquet_dir_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/parquet_directory" + parquet_file_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/flattened_data_subset.parquet" # test path string for parquet file g = Graph() g.load_nodes(data=parquet_file_path_str, time="block_timestamp", id="inputs_address") - num_nodes_ingested.append(len(g.nodes)) + nodes_list.append(sorted(g.nodes.id.collect())) del g # test Path object for parquet file file_path_obj = Path(parquet_file_path_str) g = Graph() g.load_nodes(data=file_path_obj, time="block_timestamp", id="inputs_address") - num_nodes_ingested.append(len(g.nodes)) + nodes_list.append(sorted(g.nodes.id.collect())) del g # test path string for parquet directory g = Graph() g.load_nodes(data=parquet_dir_path_str, time="block_timestamp", id="inputs_address") - num_nodes_ingested.append(len(g.nodes)) + nodes_list.append(sorted(g.nodes.id.collect())) del g # test Path object for parquet directory dir_path_obj = Path(parquet_dir_path_str) g = Graph() g.load_nodes(data=dir_path_obj, time="block_timestamp", id="inputs_address") - num_nodes_ingested.append(len(g.nodes)) + nodes_list.append(sorted(g.nodes.id.collect())) del g ######### CSV ######### - csv_dir_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/csv_subset" - csv_file_path_str = csv_dir_path_str + "/flattened_data_subset.csv" + csv_dir_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/csv_directory" + csv_file_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/flattened_data_subset.csv" # test path string for CSV file g = Graph() g.load_nodes(data=csv_file_path_str, time="block_timestamp", id="inputs_address") - num_nodes_ingested.append(len(g.nodes)) + nodes_list.append(sorted(g.nodes.id.collect())) del g # test Path object for CSV file file_path_obj = Path(csv_file_path_str) g = Graph() g.load_nodes(data=file_path_obj, time="block_timestamp", id="inputs_address") - num_nodes_ingested.append(len(g.nodes)) + nodes_list.append(sorted(g.nodes.id.collect())) del g # test path string for CSV directory g = Graph() g.load_nodes(data=csv_dir_path_str, time="block_timestamp", id="inputs_address") - num_nodes_ingested.append(len(g.nodes)) + nodes_list.append(sorted(g.nodes.id.collect())) del g # test Path object for CSV directory dir_path_obj = Path(csv_dir_path_str) g = Graph() g.load_nodes(data=dir_path_obj, time="block_timestamp", id="inputs_address") - num_nodes_ingested.append(len(g.nodes)) + nodes_list.append(sorted(g.nodes.id.collect())) + del g + + ######### mixed directory ######### + mixed_dir_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/mixed_directory" + # test path string + g = Graph() + g.load_nodes(data=mixed_dir_path_str, time="block_timestamp", id="inputs_address") + nodes_list.append(sorted(g.nodes.id.collect())) + del g + + # test Path object + g = Graph() + g.load_nodes(data=Path(mixed_dir_path_str), time="block_timestamp", id="inputs_address") + nodes_list.append(sorted(g.nodes.id.collect())) del g ######### arrow_c_stream ######### @@ -108,20 +122,20 @@ def test_different_data_sources(): df_pd = pd.read_parquet(parquet_file_path_str) g = Graph() g.load_nodes(data=df_pd, time="block_timestamp", id="inputs_address") - num_nodes_ingested.append(len(g.nodes)) + nodes_list.append(sorted(g.nodes.id.collect())) del g, df_pd # test polars df_pl = pl.read_parquet(parquet_file_path_str) g = Graph() g.load_nodes(data=df_pl, time="block_timestamp", id="inputs_address") - num_nodes_ingested.append(len(g.nodes)) + nodes_list.append(sorted(g.nodes.id.collect())) del g, df_pl # sanity check, make sure we ingested the same number of nodes each time - print(f"Number of tests ran: {len(num_nodes_ingested)}") - for i in range(len(num_nodes_ingested)-1): - assert num_nodes_ingested[0] == num_nodes_ingested[i+1] + print(f"Number of tests ran: {len(nodes_list)}") + for i in range(len(nodes_list)-1): + assert nodes_list[0] == nodes_list[i+1], f"Nodes list assertion failed at item i={i}" def test_schema_casting(): # time/id as regular ints (I64), value column as explicit int32 From 1b013067473aec13e14f94a20d4b2f881ecde295 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Wed, 17 Dec 2025 03:53:27 -0500 Subject: [PATCH 44/55] Added btc_dataset tests for loading/casting from different sources as well as failures from malformed inputs --- .../data/btc_dataset/csv_directory/part1.csv | 51 +++++ .../data/btc_dataset/csv_directory/part2.csv | 51 +++++ .../data/btc_dataset/csv_directory/part3.csv | 51 +++++ .../data/btc_dataset/csv_directory/part4.csv | 51 +++++ .../tests/data/btc_dataset/flattened_data.csv | 201 ++++++++++++++++++ .../data/btc_dataset/flattened_data.parquet | Bin 0 -> 19941 bytes .../malformed_files/missing_col.parquet | Bin 0 -> 2843 bytes .../timestamp_malformed.parquet | Bin 0 -> 2355 bytes .../btc_dataset/mixed_directory/part1.parquet | Bin 0 -> 7023 bytes .../btc_dataset/mixed_directory/part2.csv | 51 +++++ .../btc_dataset/mixed_directory/part3.parquet | Bin 0 -> 7314 bytes .../btc_dataset/mixed_directory/part4.csv | 51 +++++ .../parquet_directory/part1.parquet | Bin 0 -> 7023 bytes .../parquet_directory/part2.parquet | Bin 0 -> 7110 bytes .../parquet_directory/part3.parquet | Bin 0 -> 7314 bytes .../parquet_directory/part4.parquet | Bin 0 -> 7188 bytes python/tests/test_load_from_df.py | 165 +++++++++++++- raphtory/src/python/graph/graph.rs | 143 ------------- 18 files changed, 667 insertions(+), 148 deletions(-) create mode 100644 python/tests/data/btc_dataset/csv_directory/part1.csv create mode 100644 python/tests/data/btc_dataset/csv_directory/part2.csv create mode 100644 python/tests/data/btc_dataset/csv_directory/part3.csv create mode 100644 python/tests/data/btc_dataset/csv_directory/part4.csv create mode 100644 python/tests/data/btc_dataset/flattened_data.csv create mode 100644 python/tests/data/btc_dataset/flattened_data.parquet create mode 100644 python/tests/data/btc_dataset/malformed_files/missing_col.parquet create mode 100644 python/tests/data/btc_dataset/malformed_files/timestamp_malformed.parquet create mode 100644 python/tests/data/btc_dataset/mixed_directory/part1.parquet create mode 100644 python/tests/data/btc_dataset/mixed_directory/part2.csv create mode 100644 python/tests/data/btc_dataset/mixed_directory/part3.parquet create mode 100644 python/tests/data/btc_dataset/mixed_directory/part4.csv create mode 100644 python/tests/data/btc_dataset/parquet_directory/part1.parquet create mode 100644 python/tests/data/btc_dataset/parquet_directory/part2.parquet create mode 100644 python/tests/data/btc_dataset/parquet_directory/part3.parquet create mode 100644 python/tests/data/btc_dataset/parquet_directory/part4.parquet diff --git a/python/tests/data/btc_dataset/csv_directory/part1.csv b/python/tests/data/btc_dataset/csv_directory/part1.csv new file mode 100644 index 0000000000..b36f39434a --- /dev/null +++ b/python/tests/data/btc_dataset/csv_directory/part1.csv @@ -0,0 +1,51 @@ +block_timestamp,inputs_address,outputs_address +2025-11-10 00:28:09,bc1q40a3a5xlwv0l4n4xs3mpvauscgkj78rcuhvrhs,bc1q9xh9exh34tq566nfvpwwupum6efh3yqc0aja5d +2025-11-10 05:47:18,bc1q2zn0548tc0njq9xxt0uamxxh48s8t7c9dw0a45,3K1t5MthuwcyNn4BWHp4eFSyTY8ifDjTkk +2025-11-10 12:17:32,bc1qxsp4nqu7zq424wdz4v7c6q8jylhtlskap86kpd,36jsyuJHY2Eqs48yjUb6xyzARSPYKKSrTF +2025-11-10 00:28:09,bc1qvpah3cjguuwmjjgwp2g9t52f786jnw7km3jgau,34WkZeqyNBb5FXMxLZJfveHNm2kqngt4w4 +2025-11-10 13:42:54,bc1q6xku4eta99267etxf84yy3qc2pyw04pq8dvqye,bc1q6xku4eta99267etxf84yy3qc2pyw04pq8dvqye +2025-11-10 13:42:54,bc1q50uxvgu5paqj22fg27s352wggev70xs9f8lhnk,bc1qxgxf44zkxrz8ld33kjvf85ekmmkcaexs7xkfyg +2025-11-10 05:47:18,bc1q5v45pufhugrqmp9a7taq92y4uqu8awpqcsrjq2,33bdcPeLq1Mk8TrHZ6sPwjpNbfHf6Y5UCg +2025-11-10 15:38:35,bc1qhkar0wasylsfhwamp8d023e2tqzcjfzjlx6wrd,bc1q4mhkgdfmattuya75rescq5u3ms4u3m56x8nzcz +2025-11-10 18:01:50,bc1qw8wrek2m7nlqldll66ajnwr9mh64syvkt67zlu,bc1qxwea59rqnldu5hl2zg3pt9c0av7eee4dfjnj7u +2025-11-10 17:59:43,bc1qn5yd54kvaa60jqw0ncx93ru7ctlk0727653ynz,bc1qehhvcqru3k3qfn95r9w3nf8jxnlflnw4d9duah +2025-11-10 15:34:09,bc1qr7c9p424qed5mqy33luxpu35wmxdl8vd383xze,bc1q26h4h4v5u9m0nk8mv9mn2pfpmcyemeuntee8xs +2025-11-10 20:29:45,bc1qsaqmj46l3jl42vczyqa34fqukdwdxwndj3f5ah,bc1qsaqmj46l3jl42vczyqa34fqukdwdxwndj3f5ah +2025-11-10 20:29:45,bc1qk4jzsk3qn6dra7re4cs4tjezhz4udr86cvgs0x,1NoeLVMHeSC9gvw1FdAnmHTNrHX9iTNXoK +2025-11-10 00:28:09,bc1qlmgaecwta0h2k3wc0yx5gfeagyj2u4kq7p525x,bc1qvrwer4355chxf9zcyd8lweaesk30r797f96v7v +2025-11-10 20:57:25,bc1q2dshq3fv3nxpu0h5t06a2drcpl89u9xvncdena,bc1q6vqedrdgh9j97jp4nhamyjqpf27rtldwr5zm07 +2025-11-10 14:59:57,bc1qf4g86wre0gmzxxh2c0p5892ne4rdrlxe28pazd,bc1qvmf3qpe3lt8asy8g86vxvjnzkn366gn2q0qyly +2025-11-10 18:01:50,bc1qex279aerh885fr62m0lnv3htvja2kw9km9c2aa,bc1qex279aerh885fr62m0lnv3htvja2kw9km9c2aa +2025-11-10 00:28:09,bc1qdj7cz87pd3st0ah6yhy5ltmv423ltd0xxycn7r,bc1qyn66t4wue704rz4r37mye65hsqypg0juq89068 +2025-11-10 14:56:34,bc1qjpdqs9fjgapv305p2w7lxh7zk49rjjrqzwqguf,3QNQ49CBeC63Z3r3C2D3s5grMVKZQTLQ1J +2025-11-10 17:23:00,bc1qxw0vxcnwta0n8q4s7apjtsdc72uqc8dt5wvraw,3Hs4uUpc2ntg7ajWjoiHBEksU5182GzDft +2025-11-10 13:42:54,3NtA69Kn6Z6w8A7eQNXyz5ijYZxnynvT7N,3QCCr2s4NHn2BR2oTKNcZPFGEf5cjt4BDP +2025-11-10 17:59:43,bc1qcpl66gg9wpyj4g9umh9ye0mtsgtzmmlmrcjpr5,bc1qdkwxe7cuq0z35tfanuavvmpxrkqlhy72esnj98 +2025-11-10 17:23:00,bc1qlrex5kmzc4kdvwjl0j7sm2pvewvwaylrnrlns7,bc1qs2en2wud3q99wq7ykwdwrwzkpxfluchycw6wvs +2025-11-10 00:28:09,bc1qhazv3u50uvv5lw83sgd2dnlt80j2fn2j39h4rz,3CFP97ZCKiy6dmv2wRuJtUUYZcGMXjzfKA +2025-11-10 17:23:00,bc1qch5udvlvm9luneywcfjxjfvy6wuy6hysku8vw7,bc1q3qsupsucu6sse2uwpc2hdysf3mu5wfl9rnexvu +2025-11-10 14:56:34,bc1qenk9rwm8phgfy0w9zl2fdsejxhxc86sm67y5we,bc1qzclu4epvxu9e6a5c2mk650fzdw4626352lyf2e +2025-11-10 15:38:35,bc1qxgjzdgp0yrh69llsgepm0nfrj2h0l9cjvk9p7w,bc1qzttelzj04e87qcvfujke7wdtfldug4zmugmwh8 +2025-11-10 14:59:57,bc1qdyr4723zc8azryex3weyspdhhdptlwurfzej5v,156kgpUXXggfZeyFdgxxs6Eouok9MscqZr +2025-11-10 20:57:25,bc1qsc5q87uem04xzedure8u90452hgx8h00y96ssa,bc1qmcx2377el75jxxg8y5673srdgj7v0jgjn4y0u2 +2025-11-10 14:59:57,1GbUhB5Aub17yjLaFf1fPNFN8icPzXn3sU,bc1qsj84dmnupc25zhecdxfhqnhtg4g63w8h0q260g +2025-11-10 20:29:45,bc1qpsys7sfk5u7ue3lffwzszzvffhtku78kr0vva4,bc1qt67jql0p02yvjncgqvfjzwnc2fwhjszm3d9jhmgsnkpxkaf3j6sq6cdn7c +2025-11-10 18:01:50,1NKKRqJDwJjdnYL1Wn7QsUjxdDxqJAL6vG,3GEeyC3sDSDQYMnsfYdMUJGXpGoXnz1Q5B +2025-11-10 18:01:50,bc1q77flh60ngm58qzpuhcvuxtzp2z6qc3ww52udm3,12tPKMdmLCReFCjLiif7bng5VX8G7WTHjw +2025-11-10 15:34:09,bc1qr7whs7crqjpqshv7qk9ex4742wagf9gnznjulc,bc1qehv33h4ez6drn55yqwa0v8mwzpzcgqsu2gkejyy3e24m4w2az2jq0mcke9 +2025-11-10 14:12:11,1N6iknsjUx9R9JiarC136TVCJzUSpjr8V5,37exsSwT2HfBqgeM68kzKXvG4aJXW6ERnE +2025-11-10 14:12:11,bc1qtz6tnlusht3la6p6gnjfgd3ad6zneuqk64k5j8,1HcmTmdkbmVbuof1vurum74PMcmC5Lf7PM +2025-11-10 05:47:18,bc1qm34lsc65zpw79lxes69zkqmk6ee3ewf0j77s3h,bc1qq4e2xm40vgmz79rrh4l3zrrf7eg96l2q3tw4kv +2025-11-10 14:59:57,bc1q9r8pgy2xquxj4ph8gjatle587dsvqwjrmyflx4,bc1q0prfw6u83g4pj9vyzpzgxpq5u4xtn3kj8ltg2a +2025-11-10 13:42:54,bc1qemdd6pq7qar7hs33jvvsv4tlt5edasxk996hda,39ohXNyW4mXo652kA5CzsH87HDwuDmmNuY +2025-11-10 15:34:09,bc1qmzhda5986ssuf45x2qzymtuxlc55qs7ddds7ae,bc1qwujtjmpk7eavveax6e2crygzsz59pl7qnudp8a +2025-11-10 14:56:34,bc1qzkunkrkz6q5ut6wwcjktn3fshgshx686d0xqtg,bc1qpzuj9jl3gxarhdt6rzxynk0cs9a7vw0acp5chf +2025-11-10 20:29:45,bc1qnlp6y2cxtnsm4u6xfn6yjvkvjp4u8x08fsp4yq,bc1qkc2quddu4nutdn7p0gcz4kpenex6ukxqjmxj62 +2025-11-10 14:56:34,bc1ph6td9w6mfvj0s00vj3240pclurgmeenw4a2c84vppnswufwmp5mspmp28a,3MP4GcjaoPcFunef59ZxtjNkwb3gEw2ZPS +2025-11-10 12:17:32,12RVBQfdCa4ctMCFrwPGKz1iLSs5wwdyoV,bc1q2nalwjxe947fe4zl4rmd6g97vfrqn3af5ema38 +2025-11-10 20:29:45,bc1qqf89nhg5pkrmzafsranjn6rku5nv484vtagfuy,bc1q8pkcam0ut9e3c2azzj5292q4tdszztqn7mrxfq +2025-11-10 17:59:43,bc1qns9f7yfx3ry9lj6yz7c9er0vwa0ye2eklpzqfw,bc1q8gelljdlq5dyg20hsacga6my0k462k0muewp64 +2025-11-10 12:17:32,bc1q2sc0phtfha7n50sd250g6gpx7w36y6hr75cdd9,bc1qvdsw7yv7e6pz0p8pxfweqhu0qhj89tvyc0wv4j +2025-11-10 20:29:45,bc1pw86pslqfvc6hhxalwjcsdp24te6hnyfxhvckkf8fjmf3wlp42vwq9yqwtt,bc1pykfp9h5x7umf3ksrgnff44k5yen7kps5gz2ee8znkltp7udth84sq37w06 +2025-11-10 05:47:18,37jdMXYbvg3dKzJ4pGSYiABiXoBy4putZq,bc1qrs2d75werh8nmvr4makktpyuu4l7uex9yr05ja +2025-11-10 15:38:35,bc1qep5xck85tsry58d36xxvdpwwh3s4vm04v7t4cw,bc1q95lyl6wtxs5zt7dna9qwjfvg3k84n5z87fuyuc diff --git a/python/tests/data/btc_dataset/csv_directory/part2.csv b/python/tests/data/btc_dataset/csv_directory/part2.csv new file mode 100644 index 0000000000..b606b24e44 --- /dev/null +++ b/python/tests/data/btc_dataset/csv_directory/part2.csv @@ -0,0 +1,51 @@ +block_timestamp,inputs_address,outputs_address +2025-11-10 17:59:43,bc1qthv7smt28rt2ctyg5ec3vkvx0hl4ckks9m75c3,bc1q2g6nxqvv2s09plap4x4j6pgnc2y42lmjy8fpxf +2025-11-10 20:29:45,bc1qt9nvg8mwuqmjlruc8z9m4kns2hd3gk2ylsdryp,3JDMdeEtsEfnABR6AYaHNU8fy9B7uDJduH +2025-11-10 12:17:32,bc1q4996ykxqxnhey7sh9fzvn3rre3mswta69myft7,bc1qlxsa2yndh2xt4rpmw5pt7eqaelkzd54psvx4xa +2025-11-10 05:47:18,bc1q449ruh9ga8pzd4uzds83zqckwp6yqxfvra7fvq645ev7sqefj9jsdkpegd,bc1p5xpw59psu6yyzmee8s8ptqy6gvpfls0zsegp5krw0x6dp2mtru2qcxq0tk +2025-11-10 12:17:32,3FXf66gb6NNMA52WSBBjJyMmJ3t75hLRDM,1EHF3NhZPLWEaEppSHqK5AVGB8zX5xTXEm +2025-11-10 17:23:00,bc1q4pmrg0q6ywlsd4vutmmv65g7n4mgp7tf2hz9gx,3Hqj4K5kofSnRa8MjJPt43yw5gd5f5sdKG +2025-11-10 14:59:57,bc1q3aus5ka7pq2wcjuesxqlaqwhxgckxxlx6t6ejy,bc1qwl8jvjvkyeun7wj09xkcpwt4g7sqajlzush8j2 +2025-11-10 15:38:35,bc1qh2cwxv899psevx5yhr9cvuvqpcwqr3ej4ns87a,bc1qkuwtp76a89y40nvp6235xh9u5rpumv0arpm8rq +2025-11-10 14:12:11,bc1qa7a3ndesd0n2srxzdhnkksu0hkc96rp5uyyq95,bc1q3e0xr3pqz0rad4uakctg7j2crnf3v53na9c567 +2025-11-10 14:12:11,bc1qvxs7r6w2z3aguqzdecql7t5vkr44q9gccgetyn,bc1q0qdfdjjp9w872r406tlff34tdy9wa7wvdgutd9 +2025-11-10 14:56:34,bc1qsw6zezpduq9985vxs48uhffv82jzxgl8rm5mpd,bc1qsw6zezpduq9985vxs48uhffv82jzxgl8rm5mpd +2025-11-10 05:47:18,bc1q2alxcv39eku3cvctdudkqf2y2fvqu2t49wlrj2,bc1q2alxcv39eku3cvctdudkqf2y2fvqu2t49wlrj2 +2025-11-10 12:17:32,bc1qm34lsc65zpw79lxes69zkqmk6ee3ewf0j77s3h,bc1qkqd2ewnd8mw358qw24vmwgepz8n8cta0y2pe97 +2025-11-10 05:47:18,1AGD42b91ycvAcMrCCQ1G5iiBZooPP9xqX,1AGD42b91ycvAcMrCCQ1G5iiBZooPP9xqX +2025-11-10 20:57:25,bc1qmv76fxychuzcuvt3uy5uy3seu6jvxrsfz4993w,14s7DCukciJLAFJsYDzPK7ZfFP2thzU1Yt +2025-11-10 14:56:34,35C2L1pCgwzBHNcDcVL1a5RuoefeWqyjAR,bc1qhfsad8wuvke9eczj5g4e287hz93g7t8nwn9gxj +2025-11-10 15:34:09,3HYjQZaytMNixSZwU6Dkd7QBQhs2yFcp3E,14BuunTtFx9HMvgJzJQnxR5VNgjXCKckSv +2025-11-10 21:05:57,bc1qx54ge94tzqtfjzqy0dm9z9q6yqdm75v3ed8g39,166dELRZqWwRjXXVdbahWLi7muS9A2jjJv +2025-11-10 14:59:57,bc1pthxvxwdparr9pg5s9dxl4s4u3f744qwn9atl3gcltvjx76hyg4kqx230gh,bc1pthxvxwdparr9pg5s9dxl4s4u3f744qwn9atl3gcltvjx76hyg4kqx230gh +2025-11-10 14:59:57,38Vfgc9RT5EptuLMQTrnJ65eZgQtDpneom,bc1qeq2flx8mfj3gn3j58uwaj5u2xzpt8qmvet46nl +2025-11-10 14:12:11,bc1q7cyrfmck2ffu2ud3rn5l5a8yv6f0chkp0zpemf,bc1qx0m6mzl4756vwg23jxkdpew03wwfze856phhxl +2025-11-10 17:23:00,bc1qkqw4qj5gplgkns6ysg6xd6986a4xrwpg9fsy36,bc1q9stxzl5x02rrq0cfmlfh4epumn6zvq6asa9n0u +2025-11-10 00:28:09,bc1q6nlqnu6jad5fygq7v5kua2zpr2h5x288szq5km,35AtUZgvWh9uAHhX6fMidJcbVTJTjC4Mcs +2025-11-10 14:56:34,bc1qm2safrrv8e6696sgtksv36yg066qcafc39mu3y,34Yq1C3TS1V5S8w3CNx3SN2W3CdjoGu9QN +2025-11-10 14:12:11,1GbUhB5Aub17yjLaFf1fPNFN8icPzXn3sU,bc1qw3qld7muf6feg5jlypcdfrmq4hzanx92xagswe +2025-11-10 05:47:18,1NfJSiqBw4fb74KgVQrPsk5W5aqitAD1Xv,1L6yJi7TcjztgX7W8ds1zz2NGEnT7GdoAz +2025-11-10 05:47:18,bc1qd9gg9qhkswtp26kujvc29zc6daj4yzv6qsgur3,3MqsziNKsuFwM9unjhAsKYZCQ7mNfYTSv5 +2025-11-10 20:57:25,bc1qdvdt8vjn4lpep3hr6kr90uwmjr7rf906ac5lxn,bc1qqz5ll7nwghwjg9e9d0wjpx54cs5rc6uu48emtf +2025-11-10 12:17:32,37jdMXYbvg3dKzJ4pGSYiABiXoBy4putZq,19WpQ6KYi2SGKRnsgkeX5ALgZjgSMPiqmu +2025-11-10 15:34:09,bc1qz9t7rmkf3wn3yuz48e0fhg8dvzf7hsmf3a6at9,3R1Ku4KBj6y9ekKkCJXnrVkUzGNjZtEXah +2025-11-10 15:38:35,bc1qmkavqx59gg4aucmctctww3nve6x9keln9s07zz,1HzijFYeyDDUKym1siU5hBiuyu9WVRPcVj +2025-11-10 20:57:25,bc1qkplg5aqltwln2ks4shezddemdffh9pmt73xnpd,bc1qgx3c07j5enqz0pfwpkchpx6cwh690zst9l4dz0 +2025-11-10 00:28:09,bc1qt2xxqsy0tnvcvml47t4ugrvm8p8h9skkv886uv,bc1q2sa6jadt6gry89778csh2lmu2xaw2javvq9srn +2025-11-10 20:57:25,bc1qlpygchhl3j07mh9z7gcsqzjfapyyurm9amvhh8,bc1q0z4v89tecuy5e5cr3hdj6ts8zz0ky79mmattsv +2025-11-10 21:05:57,15QKr87rKdX8g7kmJ2DBWEgbWiGrnxBTnM,3Ae1fKTuLvPNTqjAsuXznf6cFySEtcArho +2025-11-10 15:34:09,bc1q2z994nye47fnwxxy4nwukfg9kkq0m5xe7hxpq2,bc1q4saygf3hk4cl0hej8e5rr2wpdza8zga6fqshqt8mrlzgpt348chqgs532d +2025-11-10 20:57:25,bc1qagt6ng896jhghzuxhqzrcmkn0nq7tzpjejghgz,bc1q3rqkcktkqnem5yfzmrzya289m2xjl9vl37fgmxdwkhl3n7f9l35qk2u36p +2025-11-10 00:28:09,bc1qwthqxlv39qwxt6j5c5zdhxprjfkpy4qgre6nvl,bc1qc3h9tdncgalkv3yeaw5fxsn0ktdavxmvzmcm09ge4k59psxxx60qmwcmx6 +2025-11-10 20:57:25,bc1qlll42hhmtn7pwz6srexps6v8zm2tqp2p7tx4pt,bc1qzheza7hkc4jyp67vhtgrkaxpddwl3z0eada6fe +2025-11-10 14:12:11,bc1pah7lzc5rcms23lvnsrsj68atcagcg25j6kzlk7aytjtludwrl5lqclvw3t,bc1pmspgjsaxqfdvzkg7du7sgpclldwf0fajrkn36qgkrat99ugjz2ushjvq57 +2025-11-10 18:01:50,bc1qmgv6leqa0rpu40ycpndxg9hggrqdxcdgtmvp34,18N8SP9Ui6WKLtDWks6DfwMMMeJT4W5TDB +2025-11-10 17:23:00,bc1q65h44f9taql5ew5nzd2xzkmq35wcag2uugvj82,bc1qhzvug7trqm7n36g5azdqa968rl9ww5rvwzt69w +2025-11-10 20:57:25,bc1qrgl0yu3zuglvcgrdsglc5a8zdsud6n023naqaz,bc1qlg7shfvat4p0fg0n9rzap3qxryu45sljuxy4qp +2025-11-10 14:56:34,bc1q4dd592zay7mpw9arzj02pr4hr6e5yj603r29sy,bc1qepmkx8n0flmw3qq9puwwj0rpeq4atxe8tktef2 +2025-11-10 17:23:00,bc1qp68ctlw77lkyqjyxfn200ergk079jqpxny4yj2,bc1qp68ctlw77lkyqjyxfn200ergk079jqpxny4yj2 +2025-11-10 14:12:11,bc1qa3tac82ncmpd34cy2g7a0xvhm2eh9wec5dcnl0,bc1q6f3kdsu9jxvk4uv62nzmdfcmy0pu6k7y7p7w3f +2025-11-10 17:59:43,bc1qdkkaprszrgxmn6umkrs7fzufpzrefdzz7p5heu,bc1qc2qw3a5m035jcaughewp8w2mst0w788fym5fmn +2025-11-10 13:42:54,bc1qryhgpmfv03qjhhp2dj8nw8g4ewg08jzmgy3cyx,bc1q08hcxrtxl28erd7tmevja57u8af76at6qukmfu +2025-11-10 14:56:34,bc1qjq2l9469dqklhejlvkr3va9qrd343mwrtzu4v4,bc1qlnrcexfn0z6n64zq8jj7ulful6y76gs5yvme3w +2025-11-10 20:29:45,bc1qkhl6epe73jec02jv3sp2lvdfhppyg874wxs9l9,bc1q7t4vyehjsexdme84qhdgd4dawcn54djh0m78fz diff --git a/python/tests/data/btc_dataset/csv_directory/part3.csv b/python/tests/data/btc_dataset/csv_directory/part3.csv new file mode 100644 index 0000000000..a4311629f1 --- /dev/null +++ b/python/tests/data/btc_dataset/csv_directory/part3.csv @@ -0,0 +1,51 @@ +block_timestamp,inputs_address,outputs_address +2025-11-10 21:05:57,bc1qedynrxg6cslv6x09mf08xl3k2ezxj44dnf5lql4tfjtltl5csr5saaxpq7,bc1p3szdkat5d9p5chwy5jre2fz6srsarmhl2vset0v77wd92pz3n2hs5ly07y +2025-11-10 13:42:54,bc1q3vm68x9hgdftwhwx6fxv39l0l9tz3l3vvpln0c,bc1q3vm68x9hgdftwhwx6fxv39l0l9tz3l3vvpln0c +2025-11-10 15:38:35,bc1qul9vy6f3lvr70tmnkpk26gv07vc2j7m8fghu5g9vmlr94w36tqksasw04q,1ysUBxNE2bwbQSDncJHWpuAhK9fdPUTb8 +2025-11-10 12:17:32,bc1qm34lsc65zpw79lxes69zkqmk6ee3ewf0j77s3h,bc1qjccwa37c4eckaj7sym6qc5933g6650yawv7uu8 +2025-11-10 12:17:32,bc1qv4km03y5kmpqan8hhytj2ysw7a9uw3jnz53f7n,bc1qjwm26n2yavglfe0l5335ffkhgla95z5wcfzshc +2025-11-10 20:29:45,bc1p72yx4zjhrrzes32ful3nwzxkteukyt4fq0ulkncpj8aaj32yt80shukvy6,bc1q85ywegp3yn7c7nnavgkfv0gjgsvaz4p3yfck62 +2025-11-10 14:56:34,bc1q5v8yfeevlwrxr6hhf9fvtm3gs39k3rup3x48af,bc1qvjf34jq086xt6k8dlkt40h8sh7qpq4z0rn5g95 +2025-11-10 14:12:11,bc1qsqcwf6pg4ke8ahmy78fvwadseav2lgd3rxj6dc,bc1qf63zft4rmknjpefue92gqnkqppk3jvtty53nr7zanrqd0kathhqsjv8gfz +2025-11-10 14:12:11,bc1q58rmazkczke70g9gwsggpnq9rarnn4mxchmnzq,bc1qzp7dh2r0xkyw4menu7th9k8nk3qp540myml243 +2025-11-10 12:17:32,1r2qgPBgdMNiNvUhWSSD5dAiqNb3pZDDq,bc1qkfaejemkvpze78lw86cwhpnfr62q30vzycgvx4 +2025-11-10 18:01:50,bc1qk35enqhuhtcfslw3h6zguaed4lv5cm57npd6l2,bc1qarwf6apwh52pngnpnlrm8dpzkdw0zvchxz4w7g +2025-11-10 21:05:57,bc1qmy4v6rlj36aal9yu2jfk4k43ef7gfzdvu0xe23,bc1q3r6vhv6r03qgh6jkqns2nls3yzpssagw3st4d3 +2025-11-10 14:59:57,bc1qhaper2xq6ajm4u2398vsxc0x74lqx7ufmy2c0varj8y5usfyx0dq6ndfnf,bc1qvh6jqvnjas2e3ftygrg465r3y57qvy94a8mqaxakanmhwzjmfz3qjmgq7f +2025-11-10 05:47:18,bc1qm34lsc65zpw79lxes69zkqmk6ee3ewf0j77s3h,bc1q9wf2z7rfa5a4uyng2d3lkqpr06ugvsn2grm6r0 +2025-11-10 00:28:09,bc1qv6xz6gvg7654exxm2m65qqyj56a9qayxkvn5yh,3BNb5kNf69bDqU8wRGUXbEUK6WwzwiE2n6 +2025-11-10 14:12:11,bc1q5pa50wkmfqk7zjh3sndq5n95he3ehty5vmlqex,bc1qs7qhzl77j5fd3s0x098ga8uwgnu3jc9ucf3lnjkre9tr70tjg3gs6vlr3p +2025-11-10 21:05:57,bc1q27gjd4z6prxzhd7kw505p0qs58vn49eahhvsft,1FmsCG9dHV1ea3CeyayphxreisWTFDmwUb +2025-11-10 21:05:57,bc1q6mtzzhl2vffy90cmux747dj4v7lkhu9rvfnwh2,bc1q6mtzzhl2vffy90cmux747dj4v7lkhu9rvfnwh2 +2025-11-10 17:59:43,bc1qhp3yg84skqr6f7m43junkhpzcxv02u0uzwyfye,bc1qmtd34z56we7qgdra7k9zu3zqzne6l2z6x6k83u +2025-11-10 17:23:00,bc1qdmyqv5p4e876y3l76c05a636zzg8jam7rcfg8v,bc1qxslzk2hya95w23u3vmzjtxv80lspnfy4fdpx7k +2025-11-10 00:28:09,bc1qamgjuxaywqls56h7rg7afga3m6rgqwfkew688k,bc1qul5ls7sawrl8vh7yt729esad26s47xcz7llz4s +2025-11-10 20:29:45,bc1q4vxcxw7mpg9dcryqu0kav8awrn7qk5e6wgs3hg,bc1q7t0k8uez6zkcm7g0vsqq47kmr8x9ye3xvtawywvqwt7zafnsd6aqhpe7a7 +2025-11-10 18:01:50,bc1q5s4qwh68655xguj43ppxgwhh2r05z3ggx8prfv,1DExp9PkcKvDL4HAqbykBctzkDHWiLE6Mi +2025-11-10 17:59:43,bc1qwx9tp5jmzz9a3mzafnx2htp7x0ueu5q62mxnyn,15WkUpnBrCoRK5v3Phj4aKoMjeBHo4ix8E +2025-11-10 14:56:34,bc1p7mx3edp7h05s57p57cj0wd4jvqvutaw5szwklvruep8sts8xe6uszux6sv,bc1ptp8fyw450mz85a5tf09vy4thw56q0qw0ntm83tvd6ep3hzje52yq0jt965 +2025-11-10 12:17:32,bc1pprfs443mkx3q3902hgetdkq5rg64vzmn6emnn5f6saxxfe8uk8vqlt7a2d,15E8oCgYt5bJBaExy18h2wJdm7vkjRHKv5 +2025-11-10 20:29:45,bc1q3zph9r2h0e2v29s8phj57u4sd5l2y6pu48a945,bc1qhghnchwd5j5av0vslgmz76l598zr3v8yrrcvaq +2025-11-10 20:29:45,bc1qt3x6hvy29mwsshka8d3zzp5ylf4yjhc0jvlljp,bc1q2x4xyl2fvqnmvy2vt6u24e67lvkmq6hxqkkjte +2025-11-10 15:34:09,bc1qnfw0gkk05qxl38mslc69hc6vc64mksyw6zzxhg,bc1qsqfl939qdz0vpwkynqvawntv2gzv2394wjp8a8 +2025-11-10 13:42:54,bc1qcjqyhhd2mlgm2x76rw7pfqesqxae2kkkp9lrek,bc1qhm8d7ece35rk9umwd4d2myk36rnseq8wwmaury +2025-11-10 20:57:25,bc1qwlwvahdkphj3rwwc6ln6ww8jkdy7lw4vw3ce5z,bc1qstl923xpdarm44573p8jpeh7lu2mp2qztp5pcx +2025-11-10 14:56:34,bc1q760uuh9m67gdnjq594ywkyf6m9axutmxuy5xpr,bc1q48rrl4cdyujdls96yse82e5l7y7lmsyfnjfy6m +2025-11-10 21:05:57,bc1qn3c2efvw6vmwqtmqf0tv45pv7g6ftrms9yhrd5,12sxYzAsmziEjtAvKkxqE4w1uvDaTUXwXW +2025-11-10 00:28:09,bc1qegxth00stfmkuneeg4me6zlvyyl904u0lcaz2c,388JzjgiUn8JDgzaWTHxTZ9UbF3jPFMJg3 +2025-11-10 17:23:00,3H8XNZk9YwcpUAqRYPLcoVNMH3o18UrYhQ,3GKVcNr6xnXmyEpVu7h8kEusryGfy16bU3 +2025-11-10 17:59:43,bc1que6q8d3wt5e7xz6x8qtp30euh70fgvkhyduwqf,3HTtPcoAUpq1Mma2mE5mHUrLDCXahJg9Jn +2025-11-10 20:57:25,bc1qh8aq5wxlrq94m467gd8l40rv3v6ja98fe00f3w,bc1qrh7f96680090mup2njzlasmwlex50l2ntjur4u +2025-11-10 15:38:35,bc1qs0chk7re599jqdr7z3vpsftc9ut7du7scxnyrr,bc1p2ncdhtdzf28enksd2syp4033atm4jq6j9v2py7uqe2n48jf5jelqe5jycq +2025-11-10 21:05:57,bc1qyujm66rpnnfnn6zkm329gj75r6e73tk6zv5j50,1PMS23kYZv4VT3zPBSG6j8w4tjG9VVLr24 +2025-11-10 14:59:57,bc1qsnw4rsdg6tgrnrnxvjq5qnyy2j23w82um58tw9,bc1qd6t23tfrg82zpapv0tepvsm59r030mafdqx4pp +2025-11-10 21:05:57,bc1qr4v4nl2ay04tt2uz6lscy47wj277ntf8cee9rj,3Miq328XhYZVgKv8vbkxF4KxbGrFPnccA4 +2025-11-10 14:12:11,bc1qp74l26w7t27r4jedcsx3lcwn50cljwtqs0yan3kjhx8e42py26uq35f4j8,bc1qhh5ju5mu7w55p4zz3sfl938xuqu6zhdpnx9rlz8m3umqz8vkfltsmfeyt8 +2025-11-10 14:56:34,bc1qu4yaxft2tzuva4x2tukuwuus5d9hdzj02ttxnm,bc1qng6tdu6sc2aw4x42lmg6ntqtxd4ut43357dhhv +2025-11-10 05:47:18,bc1q4euuq7lcs2tnt8fgdxaylm8034q7snaekt7p24,3FN1CiruvagBhricdJMiySa1vD9h3emN3g +2025-11-10 20:57:25,bc1qy9ed5tc7vhs9dkad6hhkw8t4wh56jyg0czxmk5,bc1ql3xnnz4kw3wr080zrk7wysmzjz7sr3rjspt82a +2025-11-10 17:23:00,bc1qa5j77xexgsl46ahrzeuktv82g86qdalxn6e3at,bc1qvhket3vgyyy3yulclmr8efq4luwvy90a4qwjle +2025-11-10 20:57:25,bc1q2zsrq6vhkkzyr35hzkjgur38nhpj86jmez92cs,bc1qeldfg4ytnzsjk553mr2cdlk66usfz22pyz67dh +2025-11-10 14:56:34,bc1pseekhxtjhmvs0xlc5mzmqs0dl8xcjek26ujwpg6hqhkp3gc50f3qjuhlxp,bc1pv2tdl4x285n96a5e9hh36mt9xw098ktqz5hnywe86cph0pplwaqs56yw85 +2025-11-10 17:59:43,bc1q2gchdrgwlh5tzwm0spph77a7mj5pq0rc38aahk,bc1pcsc7utumn86cklrkzyusxdgc9e3fwj6mxu9j327vwts0cv8e2a3s92ddg4 +2025-11-10 12:17:32,bc1q0qfzuge7vr5s2xkczrjkccmxemlyyn8mhx298v,14pXPZVfyeL6gxKZDVagbQnxQhXMpM2Thb diff --git a/python/tests/data/btc_dataset/csv_directory/part4.csv b/python/tests/data/btc_dataset/csv_directory/part4.csv new file mode 100644 index 0000000000..a529f74dd7 --- /dev/null +++ b/python/tests/data/btc_dataset/csv_directory/part4.csv @@ -0,0 +1,51 @@ +block_timestamp,inputs_address,outputs_address +2025-11-10 20:57:25,bc1qhcntczjrk7n83736ww45zrhhtxgwll023qy3eu,1ETdQMChucreiaNBjPDYV3R278ChRG3c7m +2025-11-10 17:23:00,3FkQ5nZWyHs7u63PgafTQ5jBK3TrLDcfRx,15qdGZi9vDYp7cADq2jqzpo6WvpVmX7d4p +2025-11-10 13:42:54,bc1ql6sgtq0uwh67un03dz4nt26n739qyu2xatgz92,bc1q7tpvm5d0yyv9lxme5y73s46n9cv803ue3gwevg +2025-11-10 00:28:09,bc1qv2gc72d0j8dmz530vuxlgyj25j4kz5s0050trj,bc1qv2gc72d0j8dmz530vuxlgyj25j4kz5s0050trj +2025-11-10 17:59:43,bc1qcr4jzax2wjyt5lkpqkhs6nuvrmfhmapsh9j8rp,bc1qe6asyl5njvyqc39qf2y7g5vqzscd9jysjwqs0k +2025-11-10 18:01:50,bc1q4y8s9l2yck6dvcejpmn90phdernngxcjwapqge,bc1qyktx2nxpjtrn07ftkpxsjv9c9atx7xh0nmrcdm +2025-11-10 14:12:11,bc1p6qvr6ceqgzmxdqrcrg2nzq39e3sd288sxa2reac6e285qq8epu0qgkdv9g,bc1p6qvr6ceqgzmxdqrcrg2nzq39e3sd288sxa2reac6e285qq8epu0qgkdv9g +2025-11-10 15:34:09,bc1qc5yxr9qkps7gfpkeg9xvptaz494n5eh4s00eru,bc1qcq5r4zg5f75ef0ps66nsvj3m03mjln9rcqjegm +2025-11-10 20:57:25,bc1q28f9lrqqaxly2jf2azfs36dyl7r6efcy8wkwew,bc1qz94dj90ymf87w77wfdpe7xyq6j9ngj6x4agyt2 +2025-11-10 14:56:34,bc1qtjuc2dqz34tkzs4uwame8rhyvpgge0gy6knhmy,1HYadqXeegRjnqDAYBkj92o5hF1pPJu4sb +2025-11-10 20:57:25,12rLYV7AQfpuxZpPXdfUqZCs7VCNp95qq8,bc1qu8xl7f8jkv2a58j0mas98r7gwecqqmnw6lkwt7 +2025-11-10 20:57:25,3FkTTwxagg6p7rs4d3GnQpZbADpatufQzo,bc1qfyfqzgvzxw2s2w7vms4yalys96aeqlq4rm0jxx +2025-11-10 17:23:00,3NhHCdt4RYXPjVQiYiWyRRyqqP7ik8SS9t,1636RnPVv6j8mTyaRmprjSpQCPAjD5UGiA +2025-11-10 17:23:00,bc1q5u709x2l7lsleprw264k5xj4rpmmmhhrurpkq7,bc1q7p54e7uarkxjlgc6qzwmugn7ygvmcwss262l50 +2025-11-10 20:29:45,bc1qrz8a6d4z2xnd2e3lnkd45v9jc5vd65t6a0pgzw,bc1qz4cfzstee7f208cxdca8v70ht5fcv5lypc63rv +2025-11-10 15:38:35,bc1q769n0hz7a9j038jdkwwd4lq3xwrkaskv6y8sp7,bc1qwrgtmyau0h2ar0guws8a9c0sa9vmjzqrlt7620 +2025-11-10 14:59:57,3M2nVoRZJgkxNHG7W7zp48xob11mbCTCKA,bc1p2d25ns4cf85dkk6jyytjeg4fcfv22lja6wwe77eltpwc2zd2yzqqyky6jm +2025-11-10 14:59:57,bc1pq7s7kpp90z2d4s7hzfxj32n72acd0987z3u2wm55ltst9fwelelq4emgpv,bc1pysearh9me9sa4kfkne6hu3jq96shjwdtrja2lf8uk4wn2uq5jjms72j3h4 +2025-11-10 17:23:00,bc1qjnmp4gxpp4u056dapqngwq2asw2ty74htrphx2,17Q8GjyhXZcf5RMmHGU7vznVvKpwGB88PQ +2025-11-10 05:47:18,1Nvv7ihqwz6Hh9rG4Bk64K7nGT7y1g2Wa7,bc1ql5zu6awgrz7wwwq6369kvc8fqz24n2ts6hn7y2 +2025-11-10 15:34:09,bc1q8a42mx0xfeyqy90zfkludfdu3c43w4a0jfw2ps,3QRLixkesAcqc268rikzbaRShSwSVydSSE +2025-11-10 14:56:34,bc1q5gpyv756638njr84s8uzeq3v59e97ha42hj52s,3FZLUQmcFTibssUKCJiNPtu9pXexXgoVun +2025-11-10 18:01:50,1LtjGorQ6FeNuC9S1oThWZQ6b79VvqH6Xp,1MqBMKLfsYq1xMwvwzZe39VAkXV3RYmNmk +2025-11-10 14:56:34,bc1pwjzpf2p4drax2mympx474phluyzjmpl7udnw9hmx43hxgc5w28vspms2c3,bc1qwrcm425acde6757923pxjkvlees9tww5xuqf0y +2025-11-10 14:12:11,bc1qrjkmdkhewjktx059nckpyqqlxazvr7kyeg57sllw7ksgenfwda2szu8kgy,bc1p2pxfzgune7ked0gldvt3j7zzusjv4t2uphhaf7pqg3srqdpuvngse97236 +2025-11-10 00:28:09,bc1qk90plyzwtzweulus6mmf9sd0zndplwqp26u8fy,bc1qeku5u5emyu6lgazyd6zntmfvkzffnk4yxm7l559hl0rku600798q533jtx +2025-11-10 14:12:11,bc1qmlj36ml4nay4tm5gj0h4u769uwpww6ucyqf0p2,bc1qp7ekeam9lauvcmltkhfxlyzd3udzmd39syyhh3 +2025-11-10 14:56:34,bc1q63qkttxua3aw9umnqqptkw2468rs5jne547umd,bc1q793gj5eml2u9hpgqm6y3xjgmfu669xy0aqzfa0 +2025-11-10 05:47:18,bc1qmptzs6czc3mlp6hy3932kka5687vn5fd9cnecu,bc1q9yn6zdkjjlh0z5y6sqpdvwq7pwkeh5r0ka28ad +2025-11-10 15:38:35,bc1q04qcmmnmd47dc8mjn9fcvxcjdk5f6edg6f40mw,3FxCJ2XUyFEup66QDaxgcHF8az6P19wien +2025-11-10 12:17:32,bc1q7t4vyehjsexdme84qhdgd4dawcn54djh0m78fz,bc1qcu0wrnx0002g2ka4sr0nnrldtff6dvq433unh2 +2025-11-10 20:29:45,bc1q3ms0mj7jtt9nd5smhv50uvd27czetxxlnlzkuq,bc1qcu3a5u765fdhddzccjy3k02uzewarlwd7yyn82u0fv42508n650s6w5um7 +2025-11-10 00:28:09,bc1q6dadscrdytuwjeedk9fr80xwmnl5prqvhwy7aga4k3fmxwhzvf6shuzpmh,bc1qdfcstw5dcud0dusq3tscs5khczqc7esn7psa3p +2025-11-10 20:29:45,bc1qtkya7nnflevqx2tgjajycw2gjl0y4w7626lad0,1ACE9sy42uw8Tns84KjueMj8koezrcZRdG +2025-11-10 20:57:25,bc1qm34lsc65zpw79lxes69zkqmk6ee3ewf0j77s3h,bc1p52jjweup7chageaggu4cj8jl3avylha8zrr7lgqkth82tancdrxqlhnvzm +2025-11-10 05:47:18,bc1q3dc2ec45m8s49u9rlv9y8ruyr644utc3hv7ncm,bc1quf7hjdq99rldlyqmxaz9sd3unm6j5fv6yulty2 +2025-11-10 15:38:35,bc1qsh343fpz0mtlfl3k4xzu5qru0uprdyh786lfsu,38qZsSoHitwa7XDicxT8xewyXPu2VAvhhh +2025-11-10 17:23:00,bc1qtgvxhy2armx5xajkfmdks5h8y7cqmqgwwfdrp2,bc1qtgvxhy2armx5xajkfmdks5h8y7cqmqgwwfdrp2 +2025-11-10 15:34:09,bc1qr7whs7crqjpqshv7qk9ex4742wagf9gnznjulc,bc1q20n8jugfv9c224fdfxe4vgugyd2gh7uaytt9kc +2025-11-10 00:28:09,bc1qnfw0gkk05qxl38mslc69hc6vc64mksyw6zzxhg,bc1qjhv7fcemmjx4temc4d2z500jv5fud2y3rtwwg3 +2025-11-10 20:29:45,bc1q3jzyfvu60rc3um5rah7y6j3gks3m7jffqpw9ef,bc1qc29w7mwejcuklrhqf0e8l6zjys6x4sqzkns2u4 +2025-11-10 20:29:45,3J8dPt32vzUdQzvwXpicG2XFcaG96dnRZt,bc1qre4tzdx5r8ckzhn9ffrxwusvugdfvee9n0v5y7 +2025-11-10 21:05:57,bc1q2dzekmutn0s8wh5ty9kywgddcl7j796zju8aql,bc1q3u7r770vyc5v6hae8v5pdv846wq430jhdfz40j +2025-11-10 20:57:25,32bZeQ89m2oPeM6wLKeYdvzsPNBDb3bGAP,3Psvpa4LQtEf2tR9i3VJwGRgHQsFPJ8rf1 +2025-11-10 00:28:09,1VEmWQLu9iohP6RMmabnKcDJuCkyk3E85,bc1qchpdg4wnyaswyfggfatrrwz9snasrc92wgzhfy +2025-11-10 20:29:45,bc1qlw4565huuxsr03dz3sepexjv6ujmfy2amye98d,bc1q87s3wsnzdhlclqpymykpkdm44ryv66av5fv08q +2025-11-10 05:47:18,3FfS44EtZhTBb1XXQPXcjiVqxpub9gncz8,bc1qft2zpj0wl4zqghk4lad6qr9www4zrrseuv0y5e +2025-11-10 20:29:45,bc1qsggexuj2xdmne5kvj2mnu4ur2m2qjpwlyrtqtf,bc1qfyaje5au3xzwcdmt2ecct5xneywrqaf4p46m22 +2025-11-10 14:12:11,bc1q4k8t9a9jrzhcnlyretxgz4kqc5hlyruvra5q5p,bc1qlyy7f7cu2rptc9n42khv3lxrc3pzwp58aa8qlp +2025-11-10 00:28:09,bc1qlwjqwjugrv5c5wzg3hmtj9m72tqj5mnqeazrzy,bc1qlac25q65m2wjz9fjzqg382txhycjc495gs6ez2 diff --git a/python/tests/data/btc_dataset/flattened_data.csv b/python/tests/data/btc_dataset/flattened_data.csv new file mode 100644 index 0000000000..7d89529e74 --- /dev/null +++ b/python/tests/data/btc_dataset/flattened_data.csv @@ -0,0 +1,201 @@ +block_timestamp,inputs_address,outputs_address +2025-11-10 00:28:09,bc1q40a3a5xlwv0l4n4xs3mpvauscgkj78rcuhvrhs,bc1q9xh9exh34tq566nfvpwwupum6efh3yqc0aja5d +2025-11-10 05:47:18,bc1q2zn0548tc0njq9xxt0uamxxh48s8t7c9dw0a45,3K1t5MthuwcyNn4BWHp4eFSyTY8ifDjTkk +2025-11-10 12:17:32,bc1qxsp4nqu7zq424wdz4v7c6q8jylhtlskap86kpd,36jsyuJHY2Eqs48yjUb6xyzARSPYKKSrTF +2025-11-10 00:28:09,bc1qvpah3cjguuwmjjgwp2g9t52f786jnw7km3jgau,34WkZeqyNBb5FXMxLZJfveHNm2kqngt4w4 +2025-11-10 13:42:54,bc1q6xku4eta99267etxf84yy3qc2pyw04pq8dvqye,bc1q6xku4eta99267etxf84yy3qc2pyw04pq8dvqye +2025-11-10 13:42:54,bc1q50uxvgu5paqj22fg27s352wggev70xs9f8lhnk,bc1qxgxf44zkxrz8ld33kjvf85ekmmkcaexs7xkfyg +2025-11-10 05:47:18,bc1q5v45pufhugrqmp9a7taq92y4uqu8awpqcsrjq2,33bdcPeLq1Mk8TrHZ6sPwjpNbfHf6Y5UCg +2025-11-10 15:38:35,bc1qhkar0wasylsfhwamp8d023e2tqzcjfzjlx6wrd,bc1q4mhkgdfmattuya75rescq5u3ms4u3m56x8nzcz +2025-11-10 18:01:50,bc1qw8wrek2m7nlqldll66ajnwr9mh64syvkt67zlu,bc1qxwea59rqnldu5hl2zg3pt9c0av7eee4dfjnj7u +2025-11-10 17:59:43,bc1qn5yd54kvaa60jqw0ncx93ru7ctlk0727653ynz,bc1qehhvcqru3k3qfn95r9w3nf8jxnlflnw4d9duah +2025-11-10 15:34:09,bc1qr7c9p424qed5mqy33luxpu35wmxdl8vd383xze,bc1q26h4h4v5u9m0nk8mv9mn2pfpmcyemeuntee8xs +2025-11-10 20:29:45,bc1qsaqmj46l3jl42vczyqa34fqukdwdxwndj3f5ah,bc1qsaqmj46l3jl42vczyqa34fqukdwdxwndj3f5ah +2025-11-10 20:29:45,bc1qk4jzsk3qn6dra7re4cs4tjezhz4udr86cvgs0x,1NoeLVMHeSC9gvw1FdAnmHTNrHX9iTNXoK +2025-11-10 00:28:09,bc1qlmgaecwta0h2k3wc0yx5gfeagyj2u4kq7p525x,bc1qvrwer4355chxf9zcyd8lweaesk30r797f96v7v +2025-11-10 20:57:25,bc1q2dshq3fv3nxpu0h5t06a2drcpl89u9xvncdena,bc1q6vqedrdgh9j97jp4nhamyjqpf27rtldwr5zm07 +2025-11-10 14:59:57,bc1qf4g86wre0gmzxxh2c0p5892ne4rdrlxe28pazd,bc1qvmf3qpe3lt8asy8g86vxvjnzkn366gn2q0qyly +2025-11-10 18:01:50,bc1qex279aerh885fr62m0lnv3htvja2kw9km9c2aa,bc1qex279aerh885fr62m0lnv3htvja2kw9km9c2aa +2025-11-10 00:28:09,bc1qdj7cz87pd3st0ah6yhy5ltmv423ltd0xxycn7r,bc1qyn66t4wue704rz4r37mye65hsqypg0juq89068 +2025-11-10 14:56:34,bc1qjpdqs9fjgapv305p2w7lxh7zk49rjjrqzwqguf,3QNQ49CBeC63Z3r3C2D3s5grMVKZQTLQ1J +2025-11-10 17:23:00,bc1qxw0vxcnwta0n8q4s7apjtsdc72uqc8dt5wvraw,3Hs4uUpc2ntg7ajWjoiHBEksU5182GzDft +2025-11-10 13:42:54,3NtA69Kn6Z6w8A7eQNXyz5ijYZxnynvT7N,3QCCr2s4NHn2BR2oTKNcZPFGEf5cjt4BDP +2025-11-10 17:59:43,bc1qcpl66gg9wpyj4g9umh9ye0mtsgtzmmlmrcjpr5,bc1qdkwxe7cuq0z35tfanuavvmpxrkqlhy72esnj98 +2025-11-10 17:23:00,bc1qlrex5kmzc4kdvwjl0j7sm2pvewvwaylrnrlns7,bc1qs2en2wud3q99wq7ykwdwrwzkpxfluchycw6wvs +2025-11-10 00:28:09,bc1qhazv3u50uvv5lw83sgd2dnlt80j2fn2j39h4rz,3CFP97ZCKiy6dmv2wRuJtUUYZcGMXjzfKA +2025-11-10 17:23:00,bc1qch5udvlvm9luneywcfjxjfvy6wuy6hysku8vw7,bc1q3qsupsucu6sse2uwpc2hdysf3mu5wfl9rnexvu +2025-11-10 14:56:34,bc1qenk9rwm8phgfy0w9zl2fdsejxhxc86sm67y5we,bc1qzclu4epvxu9e6a5c2mk650fzdw4626352lyf2e +2025-11-10 15:38:35,bc1qxgjzdgp0yrh69llsgepm0nfrj2h0l9cjvk9p7w,bc1qzttelzj04e87qcvfujke7wdtfldug4zmugmwh8 +2025-11-10 14:59:57,bc1qdyr4723zc8azryex3weyspdhhdptlwurfzej5v,156kgpUXXggfZeyFdgxxs6Eouok9MscqZr +2025-11-10 20:57:25,bc1qsc5q87uem04xzedure8u90452hgx8h00y96ssa,bc1qmcx2377el75jxxg8y5673srdgj7v0jgjn4y0u2 +2025-11-10 14:59:57,1GbUhB5Aub17yjLaFf1fPNFN8icPzXn3sU,bc1qsj84dmnupc25zhecdxfhqnhtg4g63w8h0q260g +2025-11-10 20:29:45,bc1qpsys7sfk5u7ue3lffwzszzvffhtku78kr0vva4,bc1qt67jql0p02yvjncgqvfjzwnc2fwhjszm3d9jhmgsnkpxkaf3j6sq6cdn7c +2025-11-10 18:01:50,1NKKRqJDwJjdnYL1Wn7QsUjxdDxqJAL6vG,3GEeyC3sDSDQYMnsfYdMUJGXpGoXnz1Q5B +2025-11-10 18:01:50,bc1q77flh60ngm58qzpuhcvuxtzp2z6qc3ww52udm3,12tPKMdmLCReFCjLiif7bng5VX8G7WTHjw +2025-11-10 15:34:09,bc1qr7whs7crqjpqshv7qk9ex4742wagf9gnznjulc,bc1qehv33h4ez6drn55yqwa0v8mwzpzcgqsu2gkejyy3e24m4w2az2jq0mcke9 +2025-11-10 14:12:11,1N6iknsjUx9R9JiarC136TVCJzUSpjr8V5,37exsSwT2HfBqgeM68kzKXvG4aJXW6ERnE +2025-11-10 14:12:11,bc1qtz6tnlusht3la6p6gnjfgd3ad6zneuqk64k5j8,1HcmTmdkbmVbuof1vurum74PMcmC5Lf7PM +2025-11-10 05:47:18,bc1qm34lsc65zpw79lxes69zkqmk6ee3ewf0j77s3h,bc1qq4e2xm40vgmz79rrh4l3zrrf7eg96l2q3tw4kv +2025-11-10 14:59:57,bc1q9r8pgy2xquxj4ph8gjatle587dsvqwjrmyflx4,bc1q0prfw6u83g4pj9vyzpzgxpq5u4xtn3kj8ltg2a +2025-11-10 13:42:54,bc1qemdd6pq7qar7hs33jvvsv4tlt5edasxk996hda,39ohXNyW4mXo652kA5CzsH87HDwuDmmNuY +2025-11-10 15:34:09,bc1qmzhda5986ssuf45x2qzymtuxlc55qs7ddds7ae,bc1qwujtjmpk7eavveax6e2crygzsz59pl7qnudp8a +2025-11-10 14:56:34,bc1qzkunkrkz6q5ut6wwcjktn3fshgshx686d0xqtg,bc1qpzuj9jl3gxarhdt6rzxynk0cs9a7vw0acp5chf +2025-11-10 20:29:45,bc1qnlp6y2cxtnsm4u6xfn6yjvkvjp4u8x08fsp4yq,bc1qkc2quddu4nutdn7p0gcz4kpenex6ukxqjmxj62 +2025-11-10 14:56:34,bc1ph6td9w6mfvj0s00vj3240pclurgmeenw4a2c84vppnswufwmp5mspmp28a,3MP4GcjaoPcFunef59ZxtjNkwb3gEw2ZPS +2025-11-10 12:17:32,12RVBQfdCa4ctMCFrwPGKz1iLSs5wwdyoV,bc1q2nalwjxe947fe4zl4rmd6g97vfrqn3af5ema38 +2025-11-10 20:29:45,bc1qqf89nhg5pkrmzafsranjn6rku5nv484vtagfuy,bc1q8pkcam0ut9e3c2azzj5292q4tdszztqn7mrxfq +2025-11-10 17:59:43,bc1qns9f7yfx3ry9lj6yz7c9er0vwa0ye2eklpzqfw,bc1q8gelljdlq5dyg20hsacga6my0k462k0muewp64 +2025-11-10 12:17:32,bc1q2sc0phtfha7n50sd250g6gpx7w36y6hr75cdd9,bc1qvdsw7yv7e6pz0p8pxfweqhu0qhj89tvyc0wv4j +2025-11-10 20:29:45,bc1pw86pslqfvc6hhxalwjcsdp24te6hnyfxhvckkf8fjmf3wlp42vwq9yqwtt,bc1pykfp9h5x7umf3ksrgnff44k5yen7kps5gz2ee8znkltp7udth84sq37w06 +2025-11-10 05:47:18,37jdMXYbvg3dKzJ4pGSYiABiXoBy4putZq,bc1qrs2d75werh8nmvr4makktpyuu4l7uex9yr05ja +2025-11-10 15:38:35,bc1qep5xck85tsry58d36xxvdpwwh3s4vm04v7t4cw,bc1q95lyl6wtxs5zt7dna9qwjfvg3k84n5z87fuyuc +2025-11-10 17:59:43,bc1qthv7smt28rt2ctyg5ec3vkvx0hl4ckks9m75c3,bc1q2g6nxqvv2s09plap4x4j6pgnc2y42lmjy8fpxf +2025-11-10 20:29:45,bc1qt9nvg8mwuqmjlruc8z9m4kns2hd3gk2ylsdryp,3JDMdeEtsEfnABR6AYaHNU8fy9B7uDJduH +2025-11-10 12:17:32,bc1q4996ykxqxnhey7sh9fzvn3rre3mswta69myft7,bc1qlxsa2yndh2xt4rpmw5pt7eqaelkzd54psvx4xa +2025-11-10 05:47:18,bc1q449ruh9ga8pzd4uzds83zqckwp6yqxfvra7fvq645ev7sqefj9jsdkpegd,bc1p5xpw59psu6yyzmee8s8ptqy6gvpfls0zsegp5krw0x6dp2mtru2qcxq0tk +2025-11-10 12:17:32,3FXf66gb6NNMA52WSBBjJyMmJ3t75hLRDM,1EHF3NhZPLWEaEppSHqK5AVGB8zX5xTXEm +2025-11-10 17:23:00,bc1q4pmrg0q6ywlsd4vutmmv65g7n4mgp7tf2hz9gx,3Hqj4K5kofSnRa8MjJPt43yw5gd5f5sdKG +2025-11-10 14:59:57,bc1q3aus5ka7pq2wcjuesxqlaqwhxgckxxlx6t6ejy,bc1qwl8jvjvkyeun7wj09xkcpwt4g7sqajlzush8j2 +2025-11-10 15:38:35,bc1qh2cwxv899psevx5yhr9cvuvqpcwqr3ej4ns87a,bc1qkuwtp76a89y40nvp6235xh9u5rpumv0arpm8rq +2025-11-10 14:12:11,bc1qa7a3ndesd0n2srxzdhnkksu0hkc96rp5uyyq95,bc1q3e0xr3pqz0rad4uakctg7j2crnf3v53na9c567 +2025-11-10 14:12:11,bc1qvxs7r6w2z3aguqzdecql7t5vkr44q9gccgetyn,bc1q0qdfdjjp9w872r406tlff34tdy9wa7wvdgutd9 +2025-11-10 14:56:34,bc1qsw6zezpduq9985vxs48uhffv82jzxgl8rm5mpd,bc1qsw6zezpduq9985vxs48uhffv82jzxgl8rm5mpd +2025-11-10 05:47:18,bc1q2alxcv39eku3cvctdudkqf2y2fvqu2t49wlrj2,bc1q2alxcv39eku3cvctdudkqf2y2fvqu2t49wlrj2 +2025-11-10 12:17:32,bc1qm34lsc65zpw79lxes69zkqmk6ee3ewf0j77s3h,bc1qkqd2ewnd8mw358qw24vmwgepz8n8cta0y2pe97 +2025-11-10 05:47:18,1AGD42b91ycvAcMrCCQ1G5iiBZooPP9xqX,1AGD42b91ycvAcMrCCQ1G5iiBZooPP9xqX +2025-11-10 20:57:25,bc1qmv76fxychuzcuvt3uy5uy3seu6jvxrsfz4993w,14s7DCukciJLAFJsYDzPK7ZfFP2thzU1Yt +2025-11-10 14:56:34,35C2L1pCgwzBHNcDcVL1a5RuoefeWqyjAR,bc1qhfsad8wuvke9eczj5g4e287hz93g7t8nwn9gxj +2025-11-10 15:34:09,3HYjQZaytMNixSZwU6Dkd7QBQhs2yFcp3E,14BuunTtFx9HMvgJzJQnxR5VNgjXCKckSv +2025-11-10 21:05:57,bc1qx54ge94tzqtfjzqy0dm9z9q6yqdm75v3ed8g39,166dELRZqWwRjXXVdbahWLi7muS9A2jjJv +2025-11-10 14:59:57,bc1pthxvxwdparr9pg5s9dxl4s4u3f744qwn9atl3gcltvjx76hyg4kqx230gh,bc1pthxvxwdparr9pg5s9dxl4s4u3f744qwn9atl3gcltvjx76hyg4kqx230gh +2025-11-10 14:59:57,38Vfgc9RT5EptuLMQTrnJ65eZgQtDpneom,bc1qeq2flx8mfj3gn3j58uwaj5u2xzpt8qmvet46nl +2025-11-10 14:12:11,bc1q7cyrfmck2ffu2ud3rn5l5a8yv6f0chkp0zpemf,bc1qx0m6mzl4756vwg23jxkdpew03wwfze856phhxl +2025-11-10 17:23:00,bc1qkqw4qj5gplgkns6ysg6xd6986a4xrwpg9fsy36,bc1q9stxzl5x02rrq0cfmlfh4epumn6zvq6asa9n0u +2025-11-10 00:28:09,bc1q6nlqnu6jad5fygq7v5kua2zpr2h5x288szq5km,35AtUZgvWh9uAHhX6fMidJcbVTJTjC4Mcs +2025-11-10 14:56:34,bc1qm2safrrv8e6696sgtksv36yg066qcafc39mu3y,34Yq1C3TS1V5S8w3CNx3SN2W3CdjoGu9QN +2025-11-10 14:12:11,1GbUhB5Aub17yjLaFf1fPNFN8icPzXn3sU,bc1qw3qld7muf6feg5jlypcdfrmq4hzanx92xagswe +2025-11-10 05:47:18,1NfJSiqBw4fb74KgVQrPsk5W5aqitAD1Xv,1L6yJi7TcjztgX7W8ds1zz2NGEnT7GdoAz +2025-11-10 05:47:18,bc1qd9gg9qhkswtp26kujvc29zc6daj4yzv6qsgur3,3MqsziNKsuFwM9unjhAsKYZCQ7mNfYTSv5 +2025-11-10 20:57:25,bc1qdvdt8vjn4lpep3hr6kr90uwmjr7rf906ac5lxn,bc1qqz5ll7nwghwjg9e9d0wjpx54cs5rc6uu48emtf +2025-11-10 12:17:32,37jdMXYbvg3dKzJ4pGSYiABiXoBy4putZq,19WpQ6KYi2SGKRnsgkeX5ALgZjgSMPiqmu +2025-11-10 15:34:09,bc1qz9t7rmkf3wn3yuz48e0fhg8dvzf7hsmf3a6at9,3R1Ku4KBj6y9ekKkCJXnrVkUzGNjZtEXah +2025-11-10 15:38:35,bc1qmkavqx59gg4aucmctctww3nve6x9keln9s07zz,1HzijFYeyDDUKym1siU5hBiuyu9WVRPcVj +2025-11-10 20:57:25,bc1qkplg5aqltwln2ks4shezddemdffh9pmt73xnpd,bc1qgx3c07j5enqz0pfwpkchpx6cwh690zst9l4dz0 +2025-11-10 00:28:09,bc1qt2xxqsy0tnvcvml47t4ugrvm8p8h9skkv886uv,bc1q2sa6jadt6gry89778csh2lmu2xaw2javvq9srn +2025-11-10 20:57:25,bc1qlpygchhl3j07mh9z7gcsqzjfapyyurm9amvhh8,bc1q0z4v89tecuy5e5cr3hdj6ts8zz0ky79mmattsv +2025-11-10 21:05:57,15QKr87rKdX8g7kmJ2DBWEgbWiGrnxBTnM,3Ae1fKTuLvPNTqjAsuXznf6cFySEtcArho +2025-11-10 15:34:09,bc1q2z994nye47fnwxxy4nwukfg9kkq0m5xe7hxpq2,bc1q4saygf3hk4cl0hej8e5rr2wpdza8zga6fqshqt8mrlzgpt348chqgs532d +2025-11-10 20:57:25,bc1qagt6ng896jhghzuxhqzrcmkn0nq7tzpjejghgz,bc1q3rqkcktkqnem5yfzmrzya289m2xjl9vl37fgmxdwkhl3n7f9l35qk2u36p +2025-11-10 00:28:09,bc1qwthqxlv39qwxt6j5c5zdhxprjfkpy4qgre6nvl,bc1qc3h9tdncgalkv3yeaw5fxsn0ktdavxmvzmcm09ge4k59psxxx60qmwcmx6 +2025-11-10 20:57:25,bc1qlll42hhmtn7pwz6srexps6v8zm2tqp2p7tx4pt,bc1qzheza7hkc4jyp67vhtgrkaxpddwl3z0eada6fe +2025-11-10 14:12:11,bc1pah7lzc5rcms23lvnsrsj68atcagcg25j6kzlk7aytjtludwrl5lqclvw3t,bc1pmspgjsaxqfdvzkg7du7sgpclldwf0fajrkn36qgkrat99ugjz2ushjvq57 +2025-11-10 18:01:50,bc1qmgv6leqa0rpu40ycpndxg9hggrqdxcdgtmvp34,18N8SP9Ui6WKLtDWks6DfwMMMeJT4W5TDB +2025-11-10 17:23:00,bc1q65h44f9taql5ew5nzd2xzkmq35wcag2uugvj82,bc1qhzvug7trqm7n36g5azdqa968rl9ww5rvwzt69w +2025-11-10 20:57:25,bc1qrgl0yu3zuglvcgrdsglc5a8zdsud6n023naqaz,bc1qlg7shfvat4p0fg0n9rzap3qxryu45sljuxy4qp +2025-11-10 14:56:34,bc1q4dd592zay7mpw9arzj02pr4hr6e5yj603r29sy,bc1qepmkx8n0flmw3qq9puwwj0rpeq4atxe8tktef2 +2025-11-10 17:23:00,bc1qp68ctlw77lkyqjyxfn200ergk079jqpxny4yj2,bc1qp68ctlw77lkyqjyxfn200ergk079jqpxny4yj2 +2025-11-10 14:12:11,bc1qa3tac82ncmpd34cy2g7a0xvhm2eh9wec5dcnl0,bc1q6f3kdsu9jxvk4uv62nzmdfcmy0pu6k7y7p7w3f +2025-11-10 17:59:43,bc1qdkkaprszrgxmn6umkrs7fzufpzrefdzz7p5heu,bc1qc2qw3a5m035jcaughewp8w2mst0w788fym5fmn +2025-11-10 13:42:54,bc1qryhgpmfv03qjhhp2dj8nw8g4ewg08jzmgy3cyx,bc1q08hcxrtxl28erd7tmevja57u8af76at6qukmfu +2025-11-10 14:56:34,bc1qjq2l9469dqklhejlvkr3va9qrd343mwrtzu4v4,bc1qlnrcexfn0z6n64zq8jj7ulful6y76gs5yvme3w +2025-11-10 20:29:45,bc1qkhl6epe73jec02jv3sp2lvdfhppyg874wxs9l9,bc1q7t4vyehjsexdme84qhdgd4dawcn54djh0m78fz +2025-11-10 21:05:57,bc1qedynrxg6cslv6x09mf08xl3k2ezxj44dnf5lql4tfjtltl5csr5saaxpq7,bc1p3szdkat5d9p5chwy5jre2fz6srsarmhl2vset0v77wd92pz3n2hs5ly07y +2025-11-10 13:42:54,bc1q3vm68x9hgdftwhwx6fxv39l0l9tz3l3vvpln0c,bc1q3vm68x9hgdftwhwx6fxv39l0l9tz3l3vvpln0c +2025-11-10 15:38:35,bc1qul9vy6f3lvr70tmnkpk26gv07vc2j7m8fghu5g9vmlr94w36tqksasw04q,1ysUBxNE2bwbQSDncJHWpuAhK9fdPUTb8 +2025-11-10 12:17:32,bc1qm34lsc65zpw79lxes69zkqmk6ee3ewf0j77s3h,bc1qjccwa37c4eckaj7sym6qc5933g6650yawv7uu8 +2025-11-10 12:17:32,bc1qv4km03y5kmpqan8hhytj2ysw7a9uw3jnz53f7n,bc1qjwm26n2yavglfe0l5335ffkhgla95z5wcfzshc +2025-11-10 20:29:45,bc1p72yx4zjhrrzes32ful3nwzxkteukyt4fq0ulkncpj8aaj32yt80shukvy6,bc1q85ywegp3yn7c7nnavgkfv0gjgsvaz4p3yfck62 +2025-11-10 14:56:34,bc1q5v8yfeevlwrxr6hhf9fvtm3gs39k3rup3x48af,bc1qvjf34jq086xt6k8dlkt40h8sh7qpq4z0rn5g95 +2025-11-10 14:12:11,bc1qsqcwf6pg4ke8ahmy78fvwadseav2lgd3rxj6dc,bc1qf63zft4rmknjpefue92gqnkqppk3jvtty53nr7zanrqd0kathhqsjv8gfz +2025-11-10 14:12:11,bc1q58rmazkczke70g9gwsggpnq9rarnn4mxchmnzq,bc1qzp7dh2r0xkyw4menu7th9k8nk3qp540myml243 +2025-11-10 12:17:32,1r2qgPBgdMNiNvUhWSSD5dAiqNb3pZDDq,bc1qkfaejemkvpze78lw86cwhpnfr62q30vzycgvx4 +2025-11-10 18:01:50,bc1qk35enqhuhtcfslw3h6zguaed4lv5cm57npd6l2,bc1qarwf6apwh52pngnpnlrm8dpzkdw0zvchxz4w7g +2025-11-10 21:05:57,bc1qmy4v6rlj36aal9yu2jfk4k43ef7gfzdvu0xe23,bc1q3r6vhv6r03qgh6jkqns2nls3yzpssagw3st4d3 +2025-11-10 14:59:57,bc1qhaper2xq6ajm4u2398vsxc0x74lqx7ufmy2c0varj8y5usfyx0dq6ndfnf,bc1qvh6jqvnjas2e3ftygrg465r3y57qvy94a8mqaxakanmhwzjmfz3qjmgq7f +2025-11-10 05:47:18,bc1qm34lsc65zpw79lxes69zkqmk6ee3ewf0j77s3h,bc1q9wf2z7rfa5a4uyng2d3lkqpr06ugvsn2grm6r0 +2025-11-10 00:28:09,bc1qv6xz6gvg7654exxm2m65qqyj56a9qayxkvn5yh,3BNb5kNf69bDqU8wRGUXbEUK6WwzwiE2n6 +2025-11-10 14:12:11,bc1q5pa50wkmfqk7zjh3sndq5n95he3ehty5vmlqex,bc1qs7qhzl77j5fd3s0x098ga8uwgnu3jc9ucf3lnjkre9tr70tjg3gs6vlr3p +2025-11-10 21:05:57,bc1q27gjd4z6prxzhd7kw505p0qs58vn49eahhvsft,1FmsCG9dHV1ea3CeyayphxreisWTFDmwUb +2025-11-10 21:05:57,bc1q6mtzzhl2vffy90cmux747dj4v7lkhu9rvfnwh2,bc1q6mtzzhl2vffy90cmux747dj4v7lkhu9rvfnwh2 +2025-11-10 17:59:43,bc1qhp3yg84skqr6f7m43junkhpzcxv02u0uzwyfye,bc1qmtd34z56we7qgdra7k9zu3zqzne6l2z6x6k83u +2025-11-10 17:23:00,bc1qdmyqv5p4e876y3l76c05a636zzg8jam7rcfg8v,bc1qxslzk2hya95w23u3vmzjtxv80lspnfy4fdpx7k +2025-11-10 00:28:09,bc1qamgjuxaywqls56h7rg7afga3m6rgqwfkew688k,bc1qul5ls7sawrl8vh7yt729esad26s47xcz7llz4s +2025-11-10 20:29:45,bc1q4vxcxw7mpg9dcryqu0kav8awrn7qk5e6wgs3hg,bc1q7t0k8uez6zkcm7g0vsqq47kmr8x9ye3xvtawywvqwt7zafnsd6aqhpe7a7 +2025-11-10 18:01:50,bc1q5s4qwh68655xguj43ppxgwhh2r05z3ggx8prfv,1DExp9PkcKvDL4HAqbykBctzkDHWiLE6Mi +2025-11-10 17:59:43,bc1qwx9tp5jmzz9a3mzafnx2htp7x0ueu5q62mxnyn,15WkUpnBrCoRK5v3Phj4aKoMjeBHo4ix8E +2025-11-10 14:56:34,bc1p7mx3edp7h05s57p57cj0wd4jvqvutaw5szwklvruep8sts8xe6uszux6sv,bc1ptp8fyw450mz85a5tf09vy4thw56q0qw0ntm83tvd6ep3hzje52yq0jt965 +2025-11-10 12:17:32,bc1pprfs443mkx3q3902hgetdkq5rg64vzmn6emnn5f6saxxfe8uk8vqlt7a2d,15E8oCgYt5bJBaExy18h2wJdm7vkjRHKv5 +2025-11-10 20:29:45,bc1q3zph9r2h0e2v29s8phj57u4sd5l2y6pu48a945,bc1qhghnchwd5j5av0vslgmz76l598zr3v8yrrcvaq +2025-11-10 20:29:45,bc1qt3x6hvy29mwsshka8d3zzp5ylf4yjhc0jvlljp,bc1q2x4xyl2fvqnmvy2vt6u24e67lvkmq6hxqkkjte +2025-11-10 15:34:09,bc1qnfw0gkk05qxl38mslc69hc6vc64mksyw6zzxhg,bc1qsqfl939qdz0vpwkynqvawntv2gzv2394wjp8a8 +2025-11-10 13:42:54,bc1qcjqyhhd2mlgm2x76rw7pfqesqxae2kkkp9lrek,bc1qhm8d7ece35rk9umwd4d2myk36rnseq8wwmaury +2025-11-10 20:57:25,bc1qwlwvahdkphj3rwwc6ln6ww8jkdy7lw4vw3ce5z,bc1qstl923xpdarm44573p8jpeh7lu2mp2qztp5pcx +2025-11-10 14:56:34,bc1q760uuh9m67gdnjq594ywkyf6m9axutmxuy5xpr,bc1q48rrl4cdyujdls96yse82e5l7y7lmsyfnjfy6m +2025-11-10 21:05:57,bc1qn3c2efvw6vmwqtmqf0tv45pv7g6ftrms9yhrd5,12sxYzAsmziEjtAvKkxqE4w1uvDaTUXwXW +2025-11-10 00:28:09,bc1qegxth00stfmkuneeg4me6zlvyyl904u0lcaz2c,388JzjgiUn8JDgzaWTHxTZ9UbF3jPFMJg3 +2025-11-10 17:23:00,3H8XNZk9YwcpUAqRYPLcoVNMH3o18UrYhQ,3GKVcNr6xnXmyEpVu7h8kEusryGfy16bU3 +2025-11-10 17:59:43,bc1que6q8d3wt5e7xz6x8qtp30euh70fgvkhyduwqf,3HTtPcoAUpq1Mma2mE5mHUrLDCXahJg9Jn +2025-11-10 20:57:25,bc1qh8aq5wxlrq94m467gd8l40rv3v6ja98fe00f3w,bc1qrh7f96680090mup2njzlasmwlex50l2ntjur4u +2025-11-10 15:38:35,bc1qs0chk7re599jqdr7z3vpsftc9ut7du7scxnyrr,bc1p2ncdhtdzf28enksd2syp4033atm4jq6j9v2py7uqe2n48jf5jelqe5jycq +2025-11-10 21:05:57,bc1qyujm66rpnnfnn6zkm329gj75r6e73tk6zv5j50,1PMS23kYZv4VT3zPBSG6j8w4tjG9VVLr24 +2025-11-10 14:59:57,bc1qsnw4rsdg6tgrnrnxvjq5qnyy2j23w82um58tw9,bc1qd6t23tfrg82zpapv0tepvsm59r030mafdqx4pp +2025-11-10 21:05:57,bc1qr4v4nl2ay04tt2uz6lscy47wj277ntf8cee9rj,3Miq328XhYZVgKv8vbkxF4KxbGrFPnccA4 +2025-11-10 14:12:11,bc1qp74l26w7t27r4jedcsx3lcwn50cljwtqs0yan3kjhx8e42py26uq35f4j8,bc1qhh5ju5mu7w55p4zz3sfl938xuqu6zhdpnx9rlz8m3umqz8vkfltsmfeyt8 +2025-11-10 14:56:34,bc1qu4yaxft2tzuva4x2tukuwuus5d9hdzj02ttxnm,bc1qng6tdu6sc2aw4x42lmg6ntqtxd4ut43357dhhv +2025-11-10 05:47:18,bc1q4euuq7lcs2tnt8fgdxaylm8034q7snaekt7p24,3FN1CiruvagBhricdJMiySa1vD9h3emN3g +2025-11-10 20:57:25,bc1qy9ed5tc7vhs9dkad6hhkw8t4wh56jyg0czxmk5,bc1ql3xnnz4kw3wr080zrk7wysmzjz7sr3rjspt82a +2025-11-10 17:23:00,bc1qa5j77xexgsl46ahrzeuktv82g86qdalxn6e3at,bc1qvhket3vgyyy3yulclmr8efq4luwvy90a4qwjle +2025-11-10 20:57:25,bc1q2zsrq6vhkkzyr35hzkjgur38nhpj86jmez92cs,bc1qeldfg4ytnzsjk553mr2cdlk66usfz22pyz67dh +2025-11-10 14:56:34,bc1pseekhxtjhmvs0xlc5mzmqs0dl8xcjek26ujwpg6hqhkp3gc50f3qjuhlxp,bc1pv2tdl4x285n96a5e9hh36mt9xw098ktqz5hnywe86cph0pplwaqs56yw85 +2025-11-10 17:59:43,bc1q2gchdrgwlh5tzwm0spph77a7mj5pq0rc38aahk,bc1pcsc7utumn86cklrkzyusxdgc9e3fwj6mxu9j327vwts0cv8e2a3s92ddg4 +2025-11-10 12:17:32,bc1q0qfzuge7vr5s2xkczrjkccmxemlyyn8mhx298v,14pXPZVfyeL6gxKZDVagbQnxQhXMpM2Thb +2025-11-10 20:57:25,bc1qhcntczjrk7n83736ww45zrhhtxgwll023qy3eu,1ETdQMChucreiaNBjPDYV3R278ChRG3c7m +2025-11-10 17:23:00,3FkQ5nZWyHs7u63PgafTQ5jBK3TrLDcfRx,15qdGZi9vDYp7cADq2jqzpo6WvpVmX7d4p +2025-11-10 13:42:54,bc1ql6sgtq0uwh67un03dz4nt26n739qyu2xatgz92,bc1q7tpvm5d0yyv9lxme5y73s46n9cv803ue3gwevg +2025-11-10 00:28:09,bc1qv2gc72d0j8dmz530vuxlgyj25j4kz5s0050trj,bc1qv2gc72d0j8dmz530vuxlgyj25j4kz5s0050trj +2025-11-10 17:59:43,bc1qcr4jzax2wjyt5lkpqkhs6nuvrmfhmapsh9j8rp,bc1qe6asyl5njvyqc39qf2y7g5vqzscd9jysjwqs0k +2025-11-10 18:01:50,bc1q4y8s9l2yck6dvcejpmn90phdernngxcjwapqge,bc1qyktx2nxpjtrn07ftkpxsjv9c9atx7xh0nmrcdm +2025-11-10 14:12:11,bc1p6qvr6ceqgzmxdqrcrg2nzq39e3sd288sxa2reac6e285qq8epu0qgkdv9g,bc1p6qvr6ceqgzmxdqrcrg2nzq39e3sd288sxa2reac6e285qq8epu0qgkdv9g +2025-11-10 15:34:09,bc1qc5yxr9qkps7gfpkeg9xvptaz494n5eh4s00eru,bc1qcq5r4zg5f75ef0ps66nsvj3m03mjln9rcqjegm +2025-11-10 20:57:25,bc1q28f9lrqqaxly2jf2azfs36dyl7r6efcy8wkwew,bc1qz94dj90ymf87w77wfdpe7xyq6j9ngj6x4agyt2 +2025-11-10 14:56:34,bc1qtjuc2dqz34tkzs4uwame8rhyvpgge0gy6knhmy,1HYadqXeegRjnqDAYBkj92o5hF1pPJu4sb +2025-11-10 20:57:25,12rLYV7AQfpuxZpPXdfUqZCs7VCNp95qq8,bc1qu8xl7f8jkv2a58j0mas98r7gwecqqmnw6lkwt7 +2025-11-10 20:57:25,3FkTTwxagg6p7rs4d3GnQpZbADpatufQzo,bc1qfyfqzgvzxw2s2w7vms4yalys96aeqlq4rm0jxx +2025-11-10 17:23:00,3NhHCdt4RYXPjVQiYiWyRRyqqP7ik8SS9t,1636RnPVv6j8mTyaRmprjSpQCPAjD5UGiA +2025-11-10 17:23:00,bc1q5u709x2l7lsleprw264k5xj4rpmmmhhrurpkq7,bc1q7p54e7uarkxjlgc6qzwmugn7ygvmcwss262l50 +2025-11-10 20:29:45,bc1qrz8a6d4z2xnd2e3lnkd45v9jc5vd65t6a0pgzw,bc1qz4cfzstee7f208cxdca8v70ht5fcv5lypc63rv +2025-11-10 15:38:35,bc1q769n0hz7a9j038jdkwwd4lq3xwrkaskv6y8sp7,bc1qwrgtmyau0h2ar0guws8a9c0sa9vmjzqrlt7620 +2025-11-10 14:59:57,3M2nVoRZJgkxNHG7W7zp48xob11mbCTCKA,bc1p2d25ns4cf85dkk6jyytjeg4fcfv22lja6wwe77eltpwc2zd2yzqqyky6jm +2025-11-10 14:59:57,bc1pq7s7kpp90z2d4s7hzfxj32n72acd0987z3u2wm55ltst9fwelelq4emgpv,bc1pysearh9me9sa4kfkne6hu3jq96shjwdtrja2lf8uk4wn2uq5jjms72j3h4 +2025-11-10 17:23:00,bc1qjnmp4gxpp4u056dapqngwq2asw2ty74htrphx2,17Q8GjyhXZcf5RMmHGU7vznVvKpwGB88PQ +2025-11-10 05:47:18,1Nvv7ihqwz6Hh9rG4Bk64K7nGT7y1g2Wa7,bc1ql5zu6awgrz7wwwq6369kvc8fqz24n2ts6hn7y2 +2025-11-10 15:34:09,bc1q8a42mx0xfeyqy90zfkludfdu3c43w4a0jfw2ps,3QRLixkesAcqc268rikzbaRShSwSVydSSE +2025-11-10 14:56:34,bc1q5gpyv756638njr84s8uzeq3v59e97ha42hj52s,3FZLUQmcFTibssUKCJiNPtu9pXexXgoVun +2025-11-10 18:01:50,1LtjGorQ6FeNuC9S1oThWZQ6b79VvqH6Xp,1MqBMKLfsYq1xMwvwzZe39VAkXV3RYmNmk +2025-11-10 14:56:34,bc1pwjzpf2p4drax2mympx474phluyzjmpl7udnw9hmx43hxgc5w28vspms2c3,bc1qwrcm425acde6757923pxjkvlees9tww5xuqf0y +2025-11-10 14:12:11,bc1qrjkmdkhewjktx059nckpyqqlxazvr7kyeg57sllw7ksgenfwda2szu8kgy,bc1p2pxfzgune7ked0gldvt3j7zzusjv4t2uphhaf7pqg3srqdpuvngse97236 +2025-11-10 00:28:09,bc1qk90plyzwtzweulus6mmf9sd0zndplwqp26u8fy,bc1qeku5u5emyu6lgazyd6zntmfvkzffnk4yxm7l559hl0rku600798q533jtx +2025-11-10 14:12:11,bc1qmlj36ml4nay4tm5gj0h4u769uwpww6ucyqf0p2,bc1qp7ekeam9lauvcmltkhfxlyzd3udzmd39syyhh3 +2025-11-10 14:56:34,bc1q63qkttxua3aw9umnqqptkw2468rs5jne547umd,bc1q793gj5eml2u9hpgqm6y3xjgmfu669xy0aqzfa0 +2025-11-10 05:47:18,bc1qmptzs6czc3mlp6hy3932kka5687vn5fd9cnecu,bc1q9yn6zdkjjlh0z5y6sqpdvwq7pwkeh5r0ka28ad +2025-11-10 15:38:35,bc1q04qcmmnmd47dc8mjn9fcvxcjdk5f6edg6f40mw,3FxCJ2XUyFEup66QDaxgcHF8az6P19wien +2025-11-10 12:17:32,bc1q7t4vyehjsexdme84qhdgd4dawcn54djh0m78fz,bc1qcu0wrnx0002g2ka4sr0nnrldtff6dvq433unh2 +2025-11-10 20:29:45,bc1q3ms0mj7jtt9nd5smhv50uvd27czetxxlnlzkuq,bc1qcu3a5u765fdhddzccjy3k02uzewarlwd7yyn82u0fv42508n650s6w5um7 +2025-11-10 00:28:09,bc1q6dadscrdytuwjeedk9fr80xwmnl5prqvhwy7aga4k3fmxwhzvf6shuzpmh,bc1qdfcstw5dcud0dusq3tscs5khczqc7esn7psa3p +2025-11-10 20:29:45,bc1qtkya7nnflevqx2tgjajycw2gjl0y4w7626lad0,1ACE9sy42uw8Tns84KjueMj8koezrcZRdG +2025-11-10 20:57:25,bc1qm34lsc65zpw79lxes69zkqmk6ee3ewf0j77s3h,bc1p52jjweup7chageaggu4cj8jl3avylha8zrr7lgqkth82tancdrxqlhnvzm +2025-11-10 05:47:18,bc1q3dc2ec45m8s49u9rlv9y8ruyr644utc3hv7ncm,bc1quf7hjdq99rldlyqmxaz9sd3unm6j5fv6yulty2 +2025-11-10 15:38:35,bc1qsh343fpz0mtlfl3k4xzu5qru0uprdyh786lfsu,38qZsSoHitwa7XDicxT8xewyXPu2VAvhhh +2025-11-10 17:23:00,bc1qtgvxhy2armx5xajkfmdks5h8y7cqmqgwwfdrp2,bc1qtgvxhy2armx5xajkfmdks5h8y7cqmqgwwfdrp2 +2025-11-10 15:34:09,bc1qr7whs7crqjpqshv7qk9ex4742wagf9gnznjulc,bc1q20n8jugfv9c224fdfxe4vgugyd2gh7uaytt9kc +2025-11-10 00:28:09,bc1qnfw0gkk05qxl38mslc69hc6vc64mksyw6zzxhg,bc1qjhv7fcemmjx4temc4d2z500jv5fud2y3rtwwg3 +2025-11-10 20:29:45,bc1q3jzyfvu60rc3um5rah7y6j3gks3m7jffqpw9ef,bc1qc29w7mwejcuklrhqf0e8l6zjys6x4sqzkns2u4 +2025-11-10 20:29:45,3J8dPt32vzUdQzvwXpicG2XFcaG96dnRZt,bc1qre4tzdx5r8ckzhn9ffrxwusvugdfvee9n0v5y7 +2025-11-10 21:05:57,bc1q2dzekmutn0s8wh5ty9kywgddcl7j796zju8aql,bc1q3u7r770vyc5v6hae8v5pdv846wq430jhdfz40j +2025-11-10 20:57:25,32bZeQ89m2oPeM6wLKeYdvzsPNBDb3bGAP,3Psvpa4LQtEf2tR9i3VJwGRgHQsFPJ8rf1 +2025-11-10 00:28:09,1VEmWQLu9iohP6RMmabnKcDJuCkyk3E85,bc1qchpdg4wnyaswyfggfatrrwz9snasrc92wgzhfy +2025-11-10 20:29:45,bc1qlw4565huuxsr03dz3sepexjv6ujmfy2amye98d,bc1q87s3wsnzdhlclqpymykpkdm44ryv66av5fv08q +2025-11-10 05:47:18,3FfS44EtZhTBb1XXQPXcjiVqxpub9gncz8,bc1qft2zpj0wl4zqghk4lad6qr9www4zrrseuv0y5e +2025-11-10 20:29:45,bc1qsggexuj2xdmne5kvj2mnu4ur2m2qjpwlyrtqtf,bc1qfyaje5au3xzwcdmt2ecct5xneywrqaf4p46m22 +2025-11-10 14:12:11,bc1q4k8t9a9jrzhcnlyretxgz4kqc5hlyruvra5q5p,bc1qlyy7f7cu2rptc9n42khv3lxrc3pzwp58aa8qlp +2025-11-10 00:28:09,bc1qlwjqwjugrv5c5wzg3hmtj9m72tqj5mnqeazrzy,bc1qlac25q65m2wjz9fjzqg382txhycjc495gs6ez2 diff --git a/python/tests/data/btc_dataset/flattened_data.parquet b/python/tests/data/btc_dataset/flattened_data.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2102af87e8142fbbfb10ec230632b4367b4928c2 GIT binary patch literal 19941 zcmcJ%d$c82c^}vf0=JO5QOoL%gt0B1fFiXerS7iUyQ&=0;oP_G%lG|4Hm+N>tLoJ2 z?y6n8c2&`K8V2GHCivi3t;`AG)x=YITC%g?;_m!G}(E5o6xremBnk%bqTY1-G+|U zb!O=k(P_gHr4uw|$ERmfL$yt$sPM{!kT4>)G%3+h#2edar5id~T%^}7KQV4BDpVM& zs48>edUoxVF6k^)D#z2J^ktQlWM+08r(R`fPj;CMW9{X_2@GO#QkT7-gng2=OjlPv zH`b1>cuI1+$jU6pGR8dIMdju(vt-w#HM9Hd%I78p?E-40jps2VDYa4ft!q}w_UkM) zY+^HNc13^f;>X4n=2c)4OCHd?>`XILtxzV_c^hO-6PS+K_Vm7q^vXQ3SY{@fMH;{F zO3$>yQl)_oT3rN*8B#C4cJZl6Y9+l)&9Y!Y<=GXtd}-AQ@8jN5LFF*NiKNkD9>-Z8 zdED2vXT&5mwQqDSjY954T|$(VmbRdTUctHuWL%nIV-^^i5!2dW9tkR6WR9!cwkiC8 z7ap!X4z0++OmQR1dn_E{8-hA6DY#VyRn~Ii2(PDN%v)l+9lY7QdEt$PQ(DsYL{iJZw}~o!C#b1zDo+ov zd(4e`$lTR}9W(6=ZN9Z}*!6Uj938hsS2Vl!#@3B7W`VINa<%9ZD{@sHyB#<3T1IuB z=UHC)iKx_gl2yE=Y2N!*8Z)g zMga+mtaglqgat`VH@2!hTsfvHXtE}EGga_T`(e^1Vbd|KIu>^_RZgR?JT>M;>Q*{; zL>z^kq1`?sVIXJ_>Iz$9 z(N2P{vTS1ZzT@>($6HhLPKqFo15syMRbkH)+F-I@Sd%_29b56-u-cvns^X4v4U3XE zYMt0HI+sc5O=o*%>0~dSr9*WxV|U4#w;0ZZn;VN8&dA^F4+~T7UAwqD5werE3{$G` zrDhg}x|e<5gkfB#%68JqXd2HNoZr2Nq#8O!|!WLfbC(k7RD2mU+rJH@Ob6h7kIm{=SJ@ zIU?#vTdFp(MC?R~S7)3$b|9NlCsp2sS!+#qD9?j{iPA2;%8sRJCQTz7tIlf515dW8 z>#{gd|9YJFao|xG>qDy0qAe-=F0WOa`IMH@4uSw1XD^%dsVdSc#e7j!Gp+q3tqU`h zaU|oGISiY$tfT1;Ei%D6;A8JmNc6BSay!3Uw}ME=4YgvM~h z-dtMkXGUu~l4>1vr~B9Lj3;FoI&Kk1RHRkjd!eknA}LsvDq1uazP!eBRo&L{xIkF3 zyRbE@&dn0m^@!LU%S?Ml#|cj}(U+n6z6p_tc5xlXo?TEw280?Bi$rT{lVLwrHuZzR zy?*CAZwu`(A+s`Ud=|&8m+8cpfgo1RSzO>!apR|H=!8ignmR*zXmsgzrLODqe{{>V zlVJVu=%k6vV6|Vd#L~{uXgE7MJe}<&t&MuCbbBUK6LPiC=dS<-Uo>(i(AQVHncQF>mdLBfC|Rd5I;&B2o;Qp}c46g_uRo9j7)AgijrCl*idee(k*$3P&GrOiuT3J@< zM!p~Mx+^fNW2srsdl9J8bsdV&S&oXsuyIJzw^8O)InBl3H5Vqt%UZuNU7jk_Z~Qt? zL0X2SBbZA?YRlCbV*20%ue&*1nzzV_JJb2i&|k0S=C)>*=x8*1e0sXM>9*zJOgzpT zn}x`;vFd%*)TZh%c2jc2lBTU>*yCxN|NDQuN$1GgjF^jbKfAo)&-?ppGalVmr#$2b zWtR-MU;n?}Ts}&+j=ip4-x#$!$9j*=r-8jSyA?~)E&9S-xZIChYDL_&>b|VQq%S)o z$lcz>BP;_f#Kz=-6PapqyN=tY)q(J;$`uinZqR0yw3Hdzm^odzh=69~XQ(M!8|g4Y zLDLe`h~mMe_g}c_?1z!>Ztv0sQLDA}t=+0vVU!<7TlKss_-TImy<^+&sxbFc5{3$i zz^n?IQP1fb78-t>3ZoZ19}MoCq@}jXghnEZupw9{Bi05C!PB$aN{h%1WoNQ$kAHs5 zP&gKtIxnDM7nOEHQ{|CfR3t`>bsX83G`;f9Nlv5}R#oG0#*iVSI+YE=NMtZ1CNK0& zH&OHQdgZ7d&d(eU-f&?YxF}G{IK}Q0grzEJ zeB$;#3%tbY`i7Mflb@+OX*v%-UJtyYd*tpe ztG%dZMdY|FiKDnzZCv)1pQnXUls0OMgeMX5&fODT$7P#gqn5g@Swek^DA9^42~*Ko zWmIuiG}*MI&oUJKah%tMEp*Q$N=YGEs1lAS2Wd zo`XQI0#*QG6<+D#JSzyOOM0)fbD>?Y>Jx*A%EAidv`d&_R>YN`xo<3(0|=#UJ4?GV z=}!p{L6EWaa>?HO$o=Y+?u<~|MlA~UW%x<4s{ z6mYeYeHFEN!Bn1B(hhqSioW7u(DwjAF;~EdRToDBC5d5{NgN9jBu=57$l^LOoTSgA z&h)#_d~pm!nYk8ogEGxxo@5B+X5+bKg^`%KuIgS{4KPp|XN(JOn+f*~k~F3iBx`~& z7FfBCZD}Cc>=*0XK!#mWwGs1W*09!a^U!eG%uEUIQSDhl5#kwU7Q!XUxQqhSQt5eE zjrQeFP0S`|PK#$4gtd-!i&%~om!Y7qdoweeMr4KIUw{0O+bVN`9YQRc$~Nk}NJUCm z)EG8`MPlbp7{!W4E;e1|TBvF2GL@bLG%v4RoSigQnj2;Zgd<9?aN-y%jC4}lo~yK( z6g@>?F3y&yO}f_VlensSE=>}u%q(=@rZrb-R|C`h~$PjmV9(j3QB#Zsk=4DuvdM^P(>wnqriSl+osF6d;3b zG<)&E&dxjyhNE)h#1zN#^O%aAacWXtl(C9yKa`m^W7bE?P_+F`4IOZl?$~zLi=g5A7)IgGLzu%;xorPdy)df>)^S!8V|DCZ_8&vh|JDwz9Id zRhXl$HX5%=93*EMcC8VVtO&w_e&-lVrpIH9z&TjFZF7<{S|TA+hBfJRZQ6jW7`*l3 z7<39Xbee}{YUAmcvIt6AxLExrkL!-&!Ixb9r3ta4B(QoWP}RhNooWiWZIqHaO<}p* zi{nOy^|gzi8ne9a`#2*&XPs;Kxx#?$AVKMwrLl6W29Y$z?vI6u6>rJ3tXOF0mYD!k z#G?1x#vsa2z3#$p@Cz5mLEe?vIu>_q)|r{je1m$-WPKkwiI>}zA4bk+eq!w9QKDL} z(`6AKa4yxXH`_{Fy)##&fr__8(UxkT8*F+xq)AGr81DOKn+y|03PK`V_n6A zyDp8XL`@L`&rsS%DzQu<+DOMSsSMhi5wNVN!Yl6_>(;FWP4d2XaXH|Mq9t)H?AB0R z(GpAtVpR90+j)yZM%b}IB}Ig>{lw6Ll{6)4R8Ldc>lFJ?apA~XIxS~PB9k!LTt9d3 zA6&UDa5ZTmoTRN;nyvvf&1+P7w2GM3^ax};FA5qm>9uVL$elV3a;pv6ru>Pq*^Agk z#bI#LAg+QKNT{tW8PJS$458ogTx;;$UE|tpS=@BQ&9#)^$N(eyUeGQJ5oTlGNSZ85 z2K3^%2(=NVsX>v@OedF_&)nE&z&BQ&$_|xl-^Mf<`$^e>B_MegB{HyGk-9)nV|q6Kw(4yI7{kU zold)uA-%UK_FELItr(oYZ@Dmi!Vv*jlk2k1%h0GV{gta&i4hCyDwi$_`GDRx=22UN zm5_Cq1K4sNSvhBY)^uG4)Wt*iLcj3qV+kx>$sl5N1OlpP8_Z#e1xgZPYKJJ$ zBDEg;FRz+ZR$~d^5y2hVo>@%x?^2ug%$yC!yJB|PwD zYi7O%B=)mJ*QGQ%uQ1ajZXIrc+a*k)YzVDn@GJLX>2+Qk)&w~cEKW;ml`5@(%>p+L zE`9OB-F4j-gE!qhwzyKIo%xc~MUB)HV7F(vW0+QH%fjPnZ3{A>7sj0nVzu_|CI&)C zL9xVfs-4^J1hXmDa#k;4{krfC@Xx^qx&n2Fj{r zO`N8ESD7^KQ*hgr=@c>Y6-#p7yTq5*AAQ6EK;&uM)=8W<(zx{MZ+(B>=eT{)x~-pp zMP@1iHefLda{;76@r=tv#aS!fbKjVtcnzvZXP{eqof}e!*tR`8PpBx3$~Tc5WA)Cl zQKDFhxZNP>lNLFwN>bm?Tb^fKS2%gx64!ZlFp2%5_WPtt?ZPo_6T8WxeHF(w!dRA} z5(D?l)!@>PT)MfKZqefSpj(!a_qpp(6 zPmO&nmfmZLPP&?AsVLJ}vO+ag9>%#RKq4eg_4FT2tjU8FDt)_2(ma}u_=Q~e|YKpTaQ%S^BG8KBt(aU+Abq- zv+i@&4cfJpTc$!H6N{8bAh|{D0qR-><*_9%{p4K({hBdxLIkX`^x6ztCL~@TN|Oa$ z21w6CzjJh|`O^R`iSiM7d(*1Z9vBr019z&pYXme4BkEXM#QD`Xy;_iJ?P%W~ZiPa% z$71s^2=~h4Ice|DZ3y@LKfU)0_u%R5?&{WyA|~ugT7kJ#Y>DHO;k@u_6>jyXuXy;z zM!Y;1)YkUV;byYGH98s{blclqS#H{+)Y;i_)zvqGKQ-KzWOgPqE-FnfJ#*DTIa!H3 z&*QjKl}I1vca5vw@fgxQX^Vh>a4pinqK%vQvdCZWcy7WT4FCSkbyDn~ZXd5iX}ht!WFOeQu$=bvWM(EmncJOP9sZvm zejc0HPK9udo&;d%<34Or2^Kc-{J?M>#63lHPN4rMXdm> zXiz|0Q5KOd2})H`ceWMRm5AG`|L)!!8%<-6;u3&r8R>Azng#K?Y8Okpb~h7|18?vT zADB3vg_La|hjt}um_AH_n!*6Yie-X^G?GvgA+J68OOq1tziVvD7=i(aUQ0Tv=cUeq8!N0vUPO+o1uGa`WT!E)%c^3|@GhRp&HHjM!3G2ug!zWXkW5X2sKD!pt&7;8xHf zH3X8PEJdAaVlk(ZG$}Z>Y?TMs9=$YyN>}%i`Mq!Epo`-l z&-=!J9e$n{d4P)2ck-lgLm$N)79Oe|j*J#sMt<~r`n%Tl#(g0(IDFMMh&W57zFRY*=* z2vi7}1%+mXvRPn^F?7}}l+PjtMNJ!b#H;d_wqBBkSRazcPG|dNUPfAn0cgqRUu!0P z7dDE0_JIKDzXG4ou>=G!WMexC!xH$Mhu(d+%oQittWDDNd%;%U=tD8`m&oCw?=873 zD7KGl@INoCk)Y>kuIj>&4sveYxoM}PAn-Fgv0c_DP-4sMOl>14$9&6ib8@=L*O^{h zV4T%LfY;Y6Zy)WvPnf1X7UBx~eYg!Ee86Vx>;)BKlQ5QZ3%b zs)>4Vjy{d?8xomEODe8C{^)(6`=QBPdhR}8b*lTw1hJEVWe1m&Q0xv4QLXRbLHOXy zuRQbeukawSbB|n`D*7)?bGd);&6N;9+JANag-?IUtZ>0Rcm8EBzvC6J{FV!MzUr>K zFJ5}}J(utO*88qp{R{U$@S1OX?YDo&cRu*g;4i-J^$-80M;^WQU5`Ef#9uaMh>5wj z9rwHc${YUb>#qN`Hx6g#<`)*1mcRYUl~rSHedBvJx3+h7_x2ACkB;B;)c1bho4?;X z@n08kunD6i&0d!mr=qN6ts32S{abGQzz_cQn{R#FfA{ux{EZ*F^*7)7!|(cA@BWeT zdw%q9|Jc*-{X3H%|L_0q-}{O8{p9;U@WG$@>7V)g|KMjo^vutF_~$?J3m^T(UwYj? z{726|^p=18%O890pZwF0|H?o6=P*S4&iMS5cfA8n5?DB^5dk=;GmeyLP-1Hhg&_ zCn-`CN>c0c`(LsP;&&j|wxMP9X^Rw{1tv=BCUht^Tb}wJhpf=1pqK~ef9Ixo68M{Z zt(;j;o!x5rn8{6@h>eqQIb=t4Z!Wt0wuzO;DLA6st80Y_MJq0SNO}m1pwx2;=u-54 zf9>+y#+pMHtjZz_6qYu@`W3Z{DQj%bxfO&-k=P0mm4gWI5r|B95kl|?l?&oGbdt6} zoC59$+(5nPSRgF6VsK*0h2ESxdE@2<5uwQaj^|tzHRnz{b*Mm2en~cn>1@wzobt8( z^<};@=SGd5Sqz3nzP!6pEg!n0-HpT3)v27^RGL?oNvR*VAt>=Ka5CH=hk?SVY}XE5 z*4R@ed%_wJaa9nZ5Or;6mPL#}0?si6&Z#pnskF}x==ZR^fB-KAYNgtNqr?G%XaKB= zJ}pcLRs|`IvdbVQJu!JnHV7Cio|tq6V^|>y1eI0wR%P0VVu*3F>_lWFs&qilJA>i< zH@7ynEO%~}&oT4ZtjswwZ%P_f>-($6Tf1vpGb;F(-=f4sT4v=CB1sjb=h&8-U)RCyr2eN`I0 zN$b!nl-D%5Xsfi$;?5>q7KsZX`*$Xi@PcS)C=jo7X?Ll{jOjiVZI~%PMhaxQk<-OG zx47xr$8)Qrjs||)>is~?P ztAe);Ohs=S_YjROE}B-koO#qI(9Qw0`aoMqwQvn(T}ae>$9-M%46O5%S}Oe}R0+61 z9n>LKd1UpuigF#l{s(W(P?knw@9;2+!eibo22tBews5LWQ+FLZ<``y_w@q>%N|SB# z%%(}(MquTbZAvVj#BPivg22)lYN{1XWa3!JEDCo(`i$Qac=X>-eHU^KGbs*0`5?5irYfz?<>O0(8h`t-{t28c40qK+9^ zA_2dRQ34tuuvzdSAY?jO9T9JE{%3Exr}5!NcVOj*r;L(xNauRF?AXimTFvMAhB}&t zK9$s{y;B=3a>KnA*b%H@1P2OrQISL03xuP-I1#;qR1NvD^{P0iS=F~)ks7{qJ-dO4 z&KHO`;kC!JN$QhQ1%a{(RpVI+Bl1x+2#y8J6eRQ{2Tx4~=RbOLebZX<6Yq4>Uj#7~ zQui3t!bYl3%xFQALq9)pV}d}`pl~GfJLs|goxLs7gC;kruzMfBak9; zL)8GoFk1CJkcmr5s}A6$smsF7t2P9e{n3dNaZr3gR??u0h!IQAk35!lMvDED8oA=o z>R~KF)C)lN0|0S`T2wgL*qVa`H_AA1+`8#}LpN4(CIED4C|u~BsQN=Ql~q)P$Yd$) zcmdRhXw?%${JuytFsCZ0V}PmBw6(!5e{oz%64(?`J9dh^sjS>f(^_;&S%5^mbvrP! z34Lbb(yYsvu3JfakYfedZVh)-WTp;4JWOS9cc`fVFeypIineSTA`Lu7Pgt#$KsvzQ z>MWAwN#}&Po#F4J=AB;;_(Clg!eThP&4x$b^2VM6c6-)V^DBW`p5~OSl^*GeASP`+ z#nYOK+J>sev$TgRP{^jWTJLNoz)or`9jRE?^{}?UXesKl179z~Od7r9k)Ua%jh4Y5 zlGl|YrQeoDot}U1t(k@8MROxQ-dsCa@D_yFSuR)UaDQpm=?`hUd$^DfhT+ZSGO<=^ zdK&H&+n%$YtZden*=ZU2w>n7!FHVPK3r-KjK8o`K3QA#Nay)bVK4>#*nd*RsRMq8Pp?2XEUuft1yB$@VGYrRD?-Y<2VeKITu>JM#{^lMDvoEa0q4WYrs|KNvgNmb&4v7P9!!;wjhL%{kQJb8VF4nA4 z#csXWy36ZLw9>C^740_N--wdKxm7>iX$I#XzQtIuu(o|%9_a1l@Nhpk@#2HEk)5j@ zcSw?CWhxbX`mSB^b&V32o_W=cZ)TJjgs+I5^XE9jykFygr`vKqRTb@QXL-u--e4ngcAb;c|S)!u3(G;-BpAuIwgr*1CW7`3pC#qjF}> z+})Ykr#p@|=Qdh%XM-G=b3t;tq};8InID~K6JA}+OUOdd9!b^-KM1S5wBp_?S{Gzx zBsCvAl-*oo-O9+`^^?Ai4($UckTZQxHkKBOU3)1w9rnY!Z<*_*>_;1`QZ4FrR~1P- zl&eR_b6a-45gzUCH1yizgGt%bEVBz8#X5;V00jmrZdhA<32B}oSUEhe!@>FfhI=5k z*y_=U>@2Nr7ZTa?kPg?P<0RTy-yD_sqp!KSJ+rE;)meDY;OSXS=T;7jYCqlUmo}2) zdg0KE&%giH%yK_U7LRy0KfkxyLGJmn# zS*ZPC6`x+exO>}@UKfRCoLYWnKtFdlttz5L(8D)?+!MlnS=LToWql-S({lW{j3hNl zaQ))!ZL=yp&w->pG9?w-1p0Pn+F=AbSVMd_3sm=+Nz0TdlZol4!K%1O z!$@&i+L#@O5*N0zFw#2knl^{9=I4eRapWol?Y3>1QRW(KJDW=2sNZ_ehQZ59I>Deg zKvYSg!v}#zabj>U!07o|+=pE;N}$b^AsCJ{I@4|<7#x8kLL>A*(*rAjjjmv$aTR>I z1R)seAhjQQ>eh_2;p}X>dn0zRx>nC0q>{~tdVPJJuk2a}ba#IC%m4`HNg=Sn`EKLO zV6N$d(sRMNWiE`ZRipcwp%zEJLYR-k#;Yv>Rmv#bs`rFhwpFJrDl;glaHGK32lJ?a z;OG=am?3JGr7M(1wx|RzEw665Q$xcKpZ)ktv}piSA^pP&;wEjATB>0n=wbNrQC=9wg9s*Mhsp`_RuliZt(WK%eoVs-+NVlmBE_rPW0DQ-tNiZ7hg3_d|!K}?OWVWJ=n~< z9Hueqnr6fRemYM#wo(r~G1fU08`61AltE(5s0ke|Oyel?T-w9Q6ZSIpXTEdn&`zTe zLCb>egS*7}DQqxN63GUd2;L8&#_YXf(j-XUsP-KOKU3-iSz230?8w+IMQNcJDBvHV z*B@QKU53o;LsUnwMJ0lVin}B#i?kFXg(bO$-M}m$Z+ZosN6_T!I4)(yoJU19dg?Z{|kSz2)XRd7gk+-za`)}4l9zQ)oE3mGu4jA7C8?1A?`-JD-&g}a&h ztIhnHwLC0Ox^&jB`*eQ!V6?Wt)<>^jyh#tzJyFb7bEn&@Fsp9HiRG=Ht|xqU`P3S< z&cfp#xh-mefCY})$a{x+AWjS*Q>%_OW#ER;EmY@@Swml&nlSr>Q__`2QoD@a8P^NW z>0ESF)0351Z=vmGoS5hpSYw+s*iQz@yeOszmEWiblIhgcgH|d9cgN&h{c%=%UWKtxGmE(sRw`OUYMysszBLAZ2+n? z?TWJTbWuax?Hjm}EYvoR=UjXI<|M|hvANGpTBVSBpaH_{0b-t2h2*89b?&L^jQf&x z<`NUx5y8>2sBH=dkpS*RMd0Kwd)R1%KW$T*SWZ>JiXOn!5M&b8a>*USX@>le<+2OW zJ<+jz@X&AFB(go~hcfR+3rRg}RsoL?$64u}2oGhi@$J z)|>w6a8H0&%{`JY(0q9h3i%ugvX#hP`NE@jRPm*AS8p)KFkC=oL5if$JelhZtw2Tw z`YlnFrM6~o!>Jg=b^G9 z7jA5>?+`ORI&Q4}U9;bu-C1IZqtW!WCpsGfB=H#Zz%id|y5ISuR3ANtaunL$>0uXoL9kkV6`lWl8 zfxe-kv;c}grY1#fkFP#(*Nw%EnYj^!6EB*LtC1h9tdF`KZ>E`dW0U6_=4W0x&R_~I zz?W!KSB7Ksa5v}cS(#NL0aD1|^zs;f4PKi~)Pb1l(5Hj8EQg1sl_`x#jIKs- z1DSek!m}WZtgbG4nWU7Oc}0ArF$Vip*b_X1o?&>`pZxS~(U3aGU{P`4kpr{H-8eQ8 z1YNY3xlW44L5fxm&5?0LYzXM49xQTbL~!WiC%1(!eOuK4Jvg3b=%4A7Yy&vbktuY- z;9PPcc!MmerQyQ`PCQe(BnV)mK6h)z5{H||`(ek|Sk$f_&+mKD2_WKDe7G*w$!>fy z7|!2V*bTPU=VIj}QhOV-$>#jgzPU|oXD;4eGJShg-JoT#bUbpK`6FTb!}$_aT`x}A zK_m9_Lp!hrpMBNXu0@m6!05UL9Y{I+xS%ot`q1_OyA67^O@sG2j!Ze<1neK!mz^{ye3Kj z*N3scwcFSz&{`Md*B}4L^M97pW9q5@hE9(wpPE4AsH{Gsp-nl|Y6&?=qTdLGCc0cu z3j1Zkqx{;P;;FKCp%}YHmxqoG3o-;#q=vTh{`hr8l(5!9-(-C+zjAYC`N#{(Lzu_5 zlcJm-9?hnSOHOILI3qS!;IsR}?~T#%l-UqN(uR1{Nw5W_>r^)8(l5)r&@4-JJ@~16 zCt(+sebn@=CK5f34fL4K%Q}QXkC!k#RJj3)c`$t44fr6pi_Lvl#+-b&^R{z1$#=xo z+~zQur+Z7I!SI1GtcDi1l~<*JLlIhbVJ%WoVRuoJ`|$V?Mlw2Bdi5Q>b?FaZ^%R5` zJ0ynVw}J0Djcvp=h0IPbz3-m8nOQae>fcOs1y^|Ip|J(hvJojQVIK4ip!g<7`$h%R zAtT21N56jz$26gZw0x*kFkQoa1&c5YcA+0Ogk*^aPZGcM-4DF67TPCply^ORswvdx zy?bvX=7UOgbM8tg>tTv=Ax7m=qBkN=z|&V~9L+-b94%d-kCP@zE^V|*#@65=>!!Ws zEG1ohc+-)DXS7$$(^1viTI&+C>86eEVD^bskXoWM z%kCClb<5n^UK_P3mqQ=D$$(R%v_J8-cj6trv)=_fI}3vkeB$Qf@!H;2?l10+PNdvh zom&}gY}U#ZhrB(EPWM&uj_U8yem9nf7@wl&~2W+)jC5L|K4Voy6`z-6d%6BMR&b zFc7>^&>_k|sHTV{2rgo>zx7{WF-O~*0z2F(mPKCQO*;{5c-_-J42#t2+T6}4b>qxH zgB&y9ws%Uv1r%hLe`zdio^lUesGe&4Jgd_r~DrLxT#SC9C>x-jvT0XuJN7zT+*>RGZf3iR4S z+6mzxE0J+j5tHkWp4={d1&9d)3u@#z2zuX7Ix~d>x#!xeG9B1cM;%c{*x->EP603_ znWj)DW*!^^x5`?hu|Nezpuktx5@b&r`)FvhA%xjNdgi^CF5H?K&Mjb0Ea+CwF1p36 zRWzKfCr*0G(I;}e9V}hH^S!sxdYvS&-ykr@UWA_PNLha3KveJ=blHOosVX~*V62H9 zQhTT$D)g+y1vmWaMfQj2j~V>tm6v858Qi{mOpL-wR1_jc6N36YY`KLt{iq8_6eH+# z@YbY*A9!G#z`GLqJkOIBEswd68nLGa+{83g*!E_Hl^uabMw1e}*_tQ5LXM2F(z%nt zGSW#zTQu`bn-A6Cqj!$c>Pw*L%V1H<=9kqW!nbC z2->jN;||Q^LE~6VV;vYt9H5)cxOemyZkd}Bf4IO}+o~5rQg6E>bALrIZAZ&na&dFT zslu6SkA8Ymf;>Y;)i`;Qu!dL2uW38&1FuRiw1mZSLO(f%3)BuF(UMBQCR|t~g?^W{Fp!E~qdDL6 z9N2F@a^c4^4-#a_XbuZzk4Ac2-^9v-XUI={%ca1ByeEH_E9Ip~sLs@t^3AIBKVMhM zSC~@ZN%=;Wl>h2T`SUTPz>o4LZj@jBwNLyHpZxXT_{}SKJg`R3-TzAI#}MMKyz<ru{SjXyn+z3blBz5!Ek?%b8Tarn{*eQ&jVvlj&LWXRW_TYK%7 z554+S)nDh>)$z+8c&(@Za+1 zO&)j*?wWH~UUm*Y?!n6&9)0Bl594p0U{upf-uEl z<&M|>|6pkPn$cbT@H@^w@O4M{)zkOl5I^_y`Kup(=lO^K<-1?@@Z@iv|C2HP`7Q6S z9pk&-{uN`q`lBD7KAwj!T>bdpe%ZMP9=rdQ!Yk06fB)o;w+u#TDsJEGpJu=ga`4>` zzUg}(dvNgLuWvp(#=q*_vtQpbC}26p>4UFYYy*5c90B&c`Oi4v#mm3B?!?(w1YXUj zt9AO>Lgk|Snj_45Qz5^1dOXJ;biw%^E?uZB`+>(E{F9;l>4p8ZBl+6nUo{X_jQrE! z>x|>{1U+^2H@p5APg+3F8T4fTFQs3@CkEz?A-l#54X3!=is|v3@!IRf8j4r|1*a3!{KlX z|DXQ1IviTV*=V>p97e;r=GTD^<41(^XYD8ROdT)nTZd$$Ia%JXPm1-?_l#yw4tI3)Wd5c1GFP1B+ZkWl@1IOg zqk|bkJ~_9NoRD_rEgHv%+v4P4@9g_div8MO&ca|h(_gxFdUUv* z1xs0dynAFEz^ik(^W=2AH9i+lKYuhG;}V@YS!&(2x#=9xc=~)&YH;-r|J36`NG9&Cf7+=nCW>^l1BfMN_AUpc0klRU$GZD&u8rt^wxAI{RR zyss0oQXU^{7%v{5!T1-a_YqI$8}}$rmNDnce`<~!jPs?A^&6i1_2xJ@pyG(QDduxJ zZz?#L(R|-E_~C3e9nX`wJM!}VB$)fg^ZKRr(;Xiy8YkwH(=~;8ZpBft0?%!FHrIG= z`1eW^yf|;<_;3XrpR3NUkv!S1B}277w}NLhgkSl~YvDNaiw$u?th4V}A6Y%l8-*|K zoTcf~{_?r_=X6iKxbo-E)<5>ka9C_&zfHHt*528tusNo`oc)~sn4cb7a`9}x>iO#V zt?5?nzv}ZVXP^9v&rdJ41h3rkzTwZfSaaLm@!>{~cX9o#FYUkKbaUg^QurcT2v)9wJU(Wt{_G5GU^C)kdBT|QFYy5D-IKuOpzK?kE;`g21D&9DIcQ`zE t|AWG-O2zBPeIXt{3tSI=DKI_wV#s>n4X9LJ{NK6%`@*?%oA?87|1UU=qZ$AJ literal 0 HcmV?d00001 diff --git a/python/tests/data/btc_dataset/malformed_files/missing_col.parquet b/python/tests/data/btc_dataset/malformed_files/missing_col.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1edb128ead89229523ec41878a18e51e674a5ae7 GIT binary patch literal 2843 zcmcIm&2J+~74MxHOppdPS;dwu%^|Yb2u0a!yT5xi9NL*Ub|PGZLf6jfJ$xx2d4 zU)3KTA)!5SL7KxHa)Sd$#DNnB;Ed2}&z$$L;<(~3psF2avhyJ#0VTQG^?tA3`@L7c za{E{tb?Oi5*Wa!Gt#(*nuQMMp%oq3nxWzDx4m**+0v?D3$}<)Tflws8A7%h2(Dqze zi6M5f*h#j&zjMtk0#+216taOEsYQ{p82Clu2uh-)GE|M61%gODizF0+2+NB|;Dp>* z2$>9}NO4Q#q$u$~s7PLDP|qv`4i8-$|7_lh}>8PWOf5ftbxfQli9ia^Q!G!E(IG zrO^Vq*1|=Fl*je2U*t+2n;z%O0g8};kR$<8MXdUcBqU|#rINfr^{-zBVrhtimjNKL zZj`eDEL1+mGE9-j%A72Td>Jh2OnsyN!)PwF7t%*lM)~Fr`%qY2ukUm z>rzoJw9)DD^y$;n_^elX2@`K-M&+=pi@l4Z;&66gWoCcqb6ynKsgMh6zqzwczPv8% z!V-jqSHuei89eW~nWc!P=ldQ6W|7E+XO(vQgD-!<>jpeF52MbJr<}$8nUoyoZaCDf zz9mh?^Id!G^J~F(Jln8*kfyis&Nz`1B(vg z7B(NIDrs3Jo2DrkmK(S-Ui;}w({VBw#hCZ_$O=?3R&zeE6t@VFg@Rl#R09L&y(+m= znr55-kHx)59j(VA)2t=XF?~>fz|xdetm>Q6pq4j)k^QRu@foW28V zthXMuwl|qa^@lBysjar&j8VVWSa1B{dyRjrZ2q&hx$^(K1+8tOd8U4kA=4^(JZ}88 z)~dBOnRQacZHjN!QgQrhrSYeg&A)uO`OPho|GPtYo7^v7lWV;EsIk7e()jHf)7oy< zLJ$}rY5wZ|^Y%P2%;FMWA?yc9`?HOo{A_!pyanE8gaT0U}1q=4iLFhW1y-guq=4b{9& z;2jSCJ%g%Au?ftjMsqy|aY-8zmLQI=a!MpK<79pnP%GEjVRzUzq29j9ejD83 zDaJG(YNOG!$!E(u)BK|LDIL7Ft7+N^`OxuH(*&(+YduY~wOzZib-Ulfb86gsg&&;| zn6AmFqEz@m({hafitlW{WZsdzf+AR3G2hy5`NJE}9=ACYPyJICA&!(XNmW zg6hm(^Yq6hX%oCYO%`c$7}oGe92zwY&_B)e6-rJkvo+y}z!ZC#P=M99Ri s^8Yc@+z3G&VKe;%hT$jGCEd8Wgd5B2+1e&=Bl<%6bB$q+$=~9C0d<>v=>Px# literal 0 HcmV?d00001 diff --git a/python/tests/data/btc_dataset/malformed_files/timestamp_malformed.parquet b/python/tests/data/btc_dataset/malformed_files/timestamp_malformed.parquet new file mode 100644 index 0000000000000000000000000000000000000000..65476444279511b0304b94348b6e7e47c07c6222 GIT binary patch literal 2355 zcmcIm%Wm676r~+kb%L~UQ&W%tF9e|x3d43OK14A{7nEu}T-%A2Mv_$&#&Af9k0HhP zLqR{GMHXG;Bf83}i>|sVf~@)xK|i1$&>6~b61yo36diywntLwqJ#+6FP>g_!>eJpGilUr(xC?ee!`6Zj*e*@ccj@8|y-9D-1-h`+pgzBI$)$AJfTP^3 ztm1idWo47XNuGa5zbqW$7$nWB8^3I0Oo4TeX8;F;6eTf(c!y5|&URxE1#o7WV$+9_ z9{YN*{n6fYcHuAr-wYw+n4XlTArk>RO?AE*G(!tj9s8}(40k4Vw>hHaoL#^3hY#DHcTfGEt@lEAR?ZP)QWg5 z1d_xyL@i9GO+L#w53+8SFudzE)!554;wdmu8qXrZ1)j;W(-|uUoWLftnHGyo8c5S7 z(j9C2lf7p#FSyZEk7mASyAlvX;7M%8M_$wfiR;0@H$ArAdZt^zXA%%(D46OA?nqN* zSWaU@ZvoBe!bE8!@#)vkl4jy-7Hf+R@{o#9qXFj}?;b|ElU$j=vW!I2Ql9Rotwt7hl|>$~&ck3mg>$ zr5|oQts0K1rDt%CBHIb7_bcCiw^OMuw`Vz~U9#rer&R~o8g{Q<>QEKGJvB6>p8bo% zGW}VYxp|1lG2mR)$g56dyV(UWS4<6t#BSresDcpSj825m_dAt04BpkjRRUM>|DHcD zlCObq?qf<%tH@A%;Af;DZU%gRo)8|f<_E^yAy&4&$LujxOuTxY{cUiMBN$b>Ee{3{ zNB5VPp!BNn1sR;&mSy<_KV*C@%e>s0$sJjq$?aKgYqj3N6Jp%Iz>iKaO!j!>QF6R5 z%ZZEuypkYoNHF3k_Zhsj#J-K9Nq5MP+2dI04MW8_G9DT&W&Ajq_3d*Tl3c|eAgw!G z^v(HfRA+*I`@mG#w62VVrHu;=#qNZY$NdJz45cfXpe`|BR7a2-=*n0&4%@OZu@O?b zgXwjAVYh?1brxf8Ai(Yu%u6=Zk>H-Pl7;gS4v{*lC)!YAv~dd&Zhc!afIT$Tc8-UW z@j%Bhm{Y>5E7X;4Djl}TxdmMrEn|X_kf`IqV$61|N$&vjJ$fHkS8-Z(wRb!{9StmK z4;L`Y^CYJu!8#ouPt_hu&TCLtY=mnxKz)PM66YSbgtyP4um|2`bj&RAI#RnG9Alu9 zb12R*P)b^Kv^$x$taYM|IvKPjE|23(5Oj`Rh2`^l9hv*l zQJYWtPTsROJndlySrjWIX1kvqnn&30KqAt>x|8KzOUFIeA(0W;@8=@S^~F5GdL{Og zC%MwFxX31-$@CR*?VL2itMAXRX1BY$o%zS+zkg=_pU!N~U!9-(@Z22#yScx- zaqp|2FBj%M_iIlr{Fd&gfAWg|72V%>wR7QB-GAurt>^zt_ZJfJ%wOpK@4x)dPnt%&!1UH-ulXSe&w(A=yyiD-}oEdpZ)jW{=`>x|6OBn>CAcOrQiM0hyGl@zWjLa z5AT%=^Kic>c-xbMxn}E}VaRqJFxs$mO}G-(z%cZtkOBJgZxL=CLQ9JZpBz zZ=8Si@xS#i{MA=p9m;?FviQwkieBt$y58X%PtAYt+NKuL?(j#KzP6-?$9^Y+hG%-H zN>phikiklrDK9-K{4l1rllw9%^GGaRyg3C8GZ1tN-(WOztEw=hmsV8-9pMzV?_y1<~hEl4O(ok^_x&;D(?O2p4 zJ5Eg+da~uTs#ro&c&-a9n-o>xz`8awAMjckkY^4qvzoM=$dFYTO2oa40uY427A68J z3`uDlm2d+miCDaJ>E=lZ5tl(E!#qp5>)C~uxuAwJla8l&=8K$WptF1u#a?bGPt=JB zBITvr!3Ho1D6+<F-*+>dC}6v=ch5Gjfl;RSvdFXoIqc| zf|4d`pv1Xj`DG}KO55=yNkfnLs_={m#HR9%x+mY`RAvY+J58M9GjvryvVAf8YM+qLSQBZHma)j znVm0P{KAxSoavL-&csX8KpZGLsUo|Hp_@~hXN}528R%UvRLUqTpM7Nh%}+2VgTnSW zEd=&$AT!^=1yW`1sYM+<=Ah_W?gq0)mU6l_*%=J$22JSEsA4rM_w2#W=T5X0EGrCM z#cK*fSEi9$6C*7|STt#xq`6OdE>EUOPAU|qjSpj7Dw-J77AfE*QKjyvrPY8?MT%`_=!k{*k+aw^sg3v0ee8&>0W!FgU-yZMVJ?fl6lY&ib}2Ho14N%nPW?m z8n9{zmN{{xYd{2|uyP{9s9j44ueEolH{OqyQCE&Tc1<_EwV)Gh57q|G#NTd)%oO`e z7x$)I)WQ})jHF)FOoBi)qG`$?h>BR+PMjNM>A}S_dv^yL8@t*1s#>R*9c^|FnY|0zs3L}sxCV?-S-^&~Z6B76%qT-mOUAYr*poTafvh)GnS zy+|j&FMVd}<_-^QP-U{BkVj5Py&@sVv9TyKMf0={k_xslN>Yq1p4pj~+mSF$T9%@O zMN%MwJyFH3YeiUo_atp1>>*eChmZk8708-8Eo7DW2xY>?80*m4Po_;QS)9k(UPu;} zQa+6fW(Fb(MO0airNcBU!q#b6!mS$kRl!6GrBwyYsPcYtV0kCND<1TV zt(CQ0ZTB{s&SdkBKuY2I_+aUS(KO2f$7NB7c$}w=7l_z&aF z)2YR-gUZZnH=$PD=&Vh&qbbj*2_SL8n=Fu@nF66RUQ`5;XES6743H68A+Ky@S~?ZWZsPyAFZ~~eJrb={)C{^axnJS7i z|M8M(Q@k}i8keDoH=1?Gdv}f|-Q~&fc)5mL7NcyoKf+Pv#||n)ULyyaR#lalD;1dn zmO36wyMVsZ@C46C@4ypl?&+l#I6T_HE-ZF_rm=NwgKwU~m~&5j;@X8(?`r>*8=L>~ z?Dj{u_QJOoCOq z7soR$i;7yV_m9A>OhBim`(vxBo9^zN?W2v2JNe$)j0+ydBa+pFrGg#}db(jPoBkomUGw^aaP6IXfTB-k`#S_Tdxahk@yM=D`gaZrbgXTE*c9Akf* zY-XLU*xAebBTH;6$_L}1A6Q3dexer(o+8#Zm$CLi@tyt`%jstql zYEV3W{qD}nN)7}Z^ch&*1;=|E1Anx=*1HuTpB8X=b$jRh6vwI}wl6cIF;NkCOnPOR z@+yzBB&uyd1f#AlM&F(aKp0RGo0;pX%&udlji#EIS3x5EsP>hmO40i3%G$PTk5)D& zwT06Xs9m{U?C&3q{NC1(Ho-<$`>UA=$%XW#t}p~hrMD8{S_Eb)kqQzwXQV3i)!@Zx zqw_u_ysV^4ED!lW*FR(g4OY+smd5@`9RSh-{-PjBLk&nAJM+sx(wNu^7lHQl5H_g{ zQx!QM0WWos6^DF(I1IyJMCvshR+X@B9n0g`-O^z*(x7s0n)($mZJQ)EqE!_-HL`3| zXb+-xY0!`|SQ`?whbyQ9amu9L18O4T<0^Sl z)Y_x`Fe?MvDCUDeMN~AYiCr3{pk$3xb1cF^+UKYR%k+Z$Ur-CWrvYb$hfG70Q43(>*Q>Dh;SeX5?Dd-at! zNK~3;1WBW>2@D}!Sv;e3Qq}NADi~yKI1-u5(%t-y1xQl0AzH963bVu!h zez2T{WXp2mW@A|Pptn9ev~KOPTaUkdx9_KWDUQeK!B`##ol@p9wc+-bpRSabwxULhaAq$t7`oh#S6ag1x zu)rD-7a6nDyb7}RPT_lOK+Xz0$>~I)Ezl2EpwQ9aa{0PN|euWfRRUoD+Ve4{iRO1~nf$ zyYR#~Irifhib+aDp?_{jzWCv*&$f_fZvKKMe`vxqh!Zb(80#V~J~p>`^=;A<$Fg`2 z+GTv@>Xm1&&t08=>oL-agXV{ z*TVF%&n`|FCe;i6aUxSD7C(ONQ=h(ZZSnN=g&B=JFgknv>>~40qS+T8@T^5WJD89J zzwlo$oQi*;sQGLO>=mSS&2}!6DX$-*SnyL!e7dEl`WHLR?`hFYCdub-Tzf~ApZ4O1 zHF=o+fFhDj{A2tcdK`}_@r$1&|AA0>Sb_Io_}^d@_+0y0~E_8QyKY94^(>${9{qw)vevIyF zx7*#(f9+?Z+lAfbu)EgnhTWBLHnzO{Q*_l1-uelBt-EsiuG`rvGy3grS9SF~-Om;| zZae+1n^5yF+qct1j(P`h2nOZ2e^89s*5s3uUg*)@`jBn!CvF?Q6aC{N9=|8t(G(;r?vBF*_*yeiGn*qVCI%kA}Mm?j^-&@5ngR zAZ~c)cB^lp=SA)OqgKZr>Wq7pySdWZ07dP3?fTv!Qh2!A48dAF>aT0vx86^0K4!}u z+#du-hr6+#9_ahZOpZD_L|TWzyLIY}(?r{AH@Q7&?W$!Dm(jcTH3sY1=x|`1(mPuJ zwRRs-Yj3?rG45-d_kYU9rPlLNb$wsn_p&iQMEnT2v9@PxH;E5BiX6BG87?QSKDSeM z;-v=^ue`rq@B2?ZI$Sfx=Iz#}w4Ez{#MUF9#WP#$bJNf3B|fzq7!B7O@6eSq8T59w zNxO1uWnG_9ck+&&qr_(eJ_c~M#?}Njnr{-EMozDF?YDXLV;fVap1(Tte;qH~F5A}e z)`rK<{%n>Gj`n5Nw#{lw>$&=DA2?sNy3>Yg^T7P|+01v%Z$y8%uT)DCgR03eJU?aCOYy%+BmCja;6)^WFJ;cbw0@*95{myR$pb z?#$fo&hE_a0#b@98l@;T5}>E3LKIOX(gL3l2qF3+R8=afs`O3ji$qlo&UW2=KuPe=eCC1^E3C&+`MPzzwTL|dF#yJ&4WR6(+pnvkKYRZ zRG%K4|Kj|KFWUBjXD>bd728h!@N?nkZTsJ!f9co%*tUP`Y`*DBwteA^D}V5!ZBKCZ z#Mf+l?|**$J$?U&e)G}Ozhm2PoPY64AGYn^|Ng%|*vp^$-5);iRol+Jxck6yXI>dgB2nP}#rnPW4@9zK2lkC50;x7Ro|c>FHCgTdhKAG_B+qkHaq@FB2B z@h?o$!C$U@4K4l3`#<~53(tPwlYjO8U$b(1+HX8I^Yz*FnbGvL|MKRWix+MGD4Z{7 z)XK6+aHzP(jT4C2@f!{^K)WV&-AKlW7u>DlwBi*Fgq{$0!Zg6~I0=2+?6R;$T zE5l5K<3_`PJ3Od5$f4Fbmri)`Wd3r^1Kr{na$R~(C1jRo04LhUh0taI47K8fd!=aWJe zp^0&xxEb{c6RqdR+K@<5q5)4LST-q+!kMRUUiM2tx-9H6>N!b}7?mWsEP|4hQo5oE znUG!ah_&8)35sNEA&E9N$D4YG?OnUJgrecN*c`$9aA~P{)3r$k5tRk2Srx{L8^~~% z)PzPZ*C-UolX-+Wm^=BEiD+GoOP)eZ2oKsCq;ckEE~K%S#9gFor=b8|JbB@}jO4Tg zO@T=&+#0~Z*QyDf#&dbmcy%mV5IUNaso$bn#ckt61(s1P(c5lV7wfFi6AvSoHjMxR zqoQb2gh^14w#l?aE&9O(_ls6aA6%vzgOEwWxx&}Q!BXC}Z)(#Y*_ zUN#+zyv!g6k>aD z!Yji#@pbQ%L?mh5kk%AjAGu;?Ncv{2GS_^n1dAZa^!`nVxF#UhS#~anRztwy_kr=uHKomEeM2Bie z#>1uY^0=U?Xb1&amgNC2Df;t?;YJe{WfmGNjR`TA*cd;}qSoWa)dq$Xb#o`5pLp1* zYZeIXC6P=E6u7O)+8B#~G<7AKx1ZfeKUI%qv{hwcy%nSY z8XwdG`IQNJHI}Y+B?qK++^Pa~hi$oNUC*Sz^JEqKA*Df?+Whr$ZW9&2#;br=x+#r9 z)j%E^iJXw9rYe-v5(%@EH9mDg-U3_~5XEll_l2l-Thhc8sJdDcw*gh1)uyf$iUJmO zsROF2k)lWB(z-4@9x71D%9gjt#y=On1Kq+?lF+R3a^TvLH`;(ks0uyJR1jq(vZW+5 ze&yP9gwr;0!mbh7j(dY3TjmP6h1(5UPvEpTBuASCnR~sZu7iavB~Y(Fv=9i2F^LQd^?y)Z__f z1dYb-cFEd^gvN*TUa|Erg#~DjF2~dYj z7gN;@HowUeZY(xh{3e;j7matTYp(ipmF;rO_zaOOumt6Ycq zig`s|xSdlT)CylWSY~srU6I_nH71S!Bkha(nD$U2$5wG*+N(`R;X0_ zmXY+a*yUc-m%AowjZKBJ_9_-+zRX~eBi9kF;J}4FqAVsfr6SXLM?Ie#ABTqJGA=PF zprgAsOmx%7;)Ilzr;*%-9>`@PbID8LM|qb;#_4p(n$9&|Uw|3WlLL!{;WR6x z0+K7(cDYg{F;G=*)H9}+wG|gyrbGb> zvp>4Da|Q32&WtYsi7%eKev>G#U>*0o6vYv=QSStPLj2k!vWBV6)}dwTGR;aFR6SQu z6H95a<|WMgP3D({T3iXD)!lhY;392Fo3o~*W3_jEc}bX^(cIwjM_wP@+POXvRRrA* zVMDzliAv&SL07}B=p?n&)K(?yL9m`X`Rz%gc$WdzTE{d1);4Imv}&~PaAi%|x^a{@ z-jqbu9C76-%bK{anb%ex2&f_vz{>TSu=6 zv=lblQlHl7!c^7^W2quc3YJrw-ew;;dvkf|Qj-T;S$IV+t-GtkV$@~}Vbx_zt9#@1 zOL${ERT$dKc5=B;E*@=PK^ktc)FoGrHd4BYEsNS@%9O5+@|Fq)`b+7 zY3T~rz@^yZIuiwEO_613WjXMLi7H|qz@UgaN9UH}$U+k%D-DuP+q;2l(%dJua$cA) z8%G`uDMV!!)WUL0OSsw$;!-MF_{IoQm+f@TiU$BTd1SMv>mm>4zReEI<24XDC^}1L z^U$(?TVwsQJV`i;8sx|P{cjU$AoHQnZ_*^vyZ&{S)iJOZm2{zzLZu&eI;v( zORkx(^%A+hvv2nI9{G#spzp7BX)@lC{@PN~k-h7y&Go}zXS58{t>uli1kRG*`_Rgj z-EgzSjocUQrF^&cn4ev$RoSk@?K~ds96$H$>h)?XJR0ug#r%dKKwLs%b*EfkTHGgW zEeY1-{2xt9=EVWVzT*UrsB<9GjuRyerwwvAkX2fjZhi6i4{s7H3?o)WT?~9GGZg{V z=B@)Fsf5k5I1RMrDPCPrAYDI=QA+Lg2&HXUO#N?b;~Ie3!J&3{ufuL@;o1sLedAW? zO0c`TURn}zoK1=dR{&OVnfRc~Nv@rW=2{69*bL_g5=TYj=J`F>pMx9Y0s?=Z9USf^ zSG2E3S+ne3X+|sM@|Fz4p=-0u60^j*Mxyo%vR9<8gKD^T!*A-M#`dC?%O)s!=L=Yi zqVshY^Gb=B+ADUS*`;NCQH@myh;bViaFJkH6;%_tb>-R;WfMQsvyXh?L(7}c)U6svMw!e{Yfa}bD8FY89_4?}S>Dl?88B}*CTkHS!z{XmISIGq2fV=H64$sh8ex>Yv|m?lbqEyZ`_4<=mO`mNE`z?j6|2L$-PHoyX3c z8$=dTzu}478{PqWy5C11KlR*W=e~LF-1pw_@Xw$Az6RH+k3M?lZqV*j+7#Ol-*M_6 zA3OJn^>hFJ_>Zyu9Np!$;663`V{oUwbL~`g?*3EX+8mrYfBINXWJJ{I$$cN39m|L| z&xA+ZhKibf_t~f3cmC|`t=DI!GI~ev^!0)QyA1gMBbeH&q@+43tx`TX%f}=8g=gE_CT~eu9+@79rllGm z$-WoNUvMrsvsQJpw|9TF@B-{*gVQI5+uQHmd-tt0a{9aXzpMXohD*cY@T&dmf36IN z?r^t`Q_oz4DCGIP8T`q8~bh_i5K{XzUjj1Ts< zEluC;!gjikIoLxM!{)0o&5s1P`r77~$Gu+d-unya)#n<4wc>DZ)43&|xA>R)bFg0D z_Ka$@YIVN)6FSxw=j+A#Q^(#-$I%|j4?vJvefD~*qrG`UcLRs+FYq4EWf6>t*iECw zpRU*2{?i`rEjuH4x%Vln=O#apYb=!6RM*yS_Vb#KZs`pi?yp&P8`RT1(#y?yI;=Mq z*Q||(<5zGV@=$K(Bj8Tg*ciK=9XF0|Mb1j^+CLg=KlU+o%k!6}{%_-DIFws9-um#k zx-*?+gQI_$KKsW~FIx$$ZSSzJS-RSXYIn!{wdu@P&F{Bb;dQvW>xA=x^||f#aDTJ2 zy>|brx8rZv2e;k&V7Oq08~qYfVgJDf<|3Q6N7I+-$LV9Me;$ZNAAl;J`uP5)b71Y$ z_uG7NYrW}FZO62CI2@clo0GDrY4t>y=TA%%*V)^N>FlkPb>>OSN^boQzJLEL2itMAXRX1BY$o%zS+zkg=_pU!N~U!9-(@Z22#yScx- zaqp|2FBj%M_iIlr{Fd&gfAWg|72V%>wR7QB-GAurt>^zt_ZJfJ%wOpK@4x)dPnt%&!1UH-ulXSe&w(A=yyiD-}oEdpZ)jW{=`>x|6OBn>CAcOrQiM0hyGl@zWjLa z5AT%=^Kic>c-xbMxn}E}VaRqJFxs$mO}G-(z%cZtkOBJgZxL=CLQ9JZpBz zZ=8Si@xS#i{MA=p9m;?FviQwkieBt$y58X%PtAYt+NKuL?(j#KzP6-?$9^Y+hG%-H zN>phikiklrDK9-K{4l1rllw9%^GGaRyg3C8GZ1tN-(WOztEw=hmsV8-9pMzV?_y1<~hEl4O(ok^_x&;D(?O2p4 zJ5Eg+da~uTs#ro&c&-a9n-o>xz`8awAMjckkY^4qvzoM=$dFYTO2oa40uY427A68J z3`uDlm2d+miCDaJ>E=lZ5tl(E!#qp5>)C~uxuAwJla8l&=8K$WptF1u#a?bGPt=JB zBITvr!3Ho1D6+<F-*+>dC}6v=ch5Gjfl;RSvdFXoIqc| zf|4d`pv1Xj`DG}KO55=yNkfnLs_={m#HR9%x+mY`RAvY+J58M9GjvryvVAf8YM+qLSQBZHma)j znVm0P{KAxSoavL-&csX8KpZGLsUo|Hp_@~hXN}528R%UvRLUqTpM7Nh%}+2VgTnSW zEd=&$AT!^=1yW`1sYM+<=Ah_W?gq0)mU6l_*%=J$22JSEsA4rM_w2#W=T5X0EGrCM z#cK*fSEi9$6C*7|STt#xq`6OdE>EUOPAU|qjSpj7Dw-J77AfE*QKjyvrPY8?MT%`_=!k{*k+aw^sg3v0ee8&>0W!FgU-yZMVJ?fl6lY&ib}2Ho14N%nPW?m z8n9{zmN{{xYd{2|uyP{9s9j44ueEolH{OqyQCE&Tc1<_EwV)Gh57q|G#NTd)%oO`e z7x$)I)WQ})jHF)FOoBi)qG`$?h>BR+PMjNM>A}S_dv^yL8@t*1s#>R*9c^|FnY|0zs3L}sxCV?-S-^&~Z6B76%qT-mOUAYr*poTafvh)GnS zy+|j&FMVd}<_-^QP-U{BkVj5Py&@sVv9TyKMf0={k_xslN>Yq1p4pj~+mSF$T9%@O zMN%MwJyFH3YeiUo_atp1>>*eChmZk8708-8Eo7DW2xY>?80*m4Po_;QS)9k(UPu;} zQa+6fW(Fb(MO0airNcBU!q#b6!mS$kRl!6GrBwyYsPcYtV0kCND<1TV zt(CQ0ZTB{s&SdkBKuY2I_+aUS(KO2f$7NB7c$}w=7l_z&aF z)2YR-gUZZnH=$PD=&Vh&qbbj*2_SL8n=Fu@nF66RUQ`5;XES6743H68A+Ky@S~?ZWZsPyAFZ~~eJrb={)C{^axnJS7i z|M8M(Q@k}i8keDoH=1?Gdv}f|-Q~&fc)5mL7NcyoKf+Pv#||n)ULyyaR#lalD;1dn zmO36wyMVsZ@C46C@4ypl?&+l#I6T_HE-ZF_rm=NwgKwU~m~&5j;@X8(?`r>*8=L>~ z?Dj{u_QJOoCOq z7soR$i;7yV_m9A>OhBim`(vxBo9^zN?W2v2JNe$)j0+ydBa+pFrGg#}db(jPoBkomUGw^aaP6IXfTB-k`#S_Tdxahk@yM=D`gaZrbgXTE*c9Akf* zY-XLU*xAebBTH;6$_L}1A6Q3dexer(o+8#Zm$CLi@tyt`%jstql zYEV3W{qD}nN)7}Z^ch&*1;=|E1Anx=*1HuTpB8X=b$jRh6vwI}wl6cIF;NkCOnPOR z@+yzBB&uyd1f#AlM&F(aKp0RGo0;pX%&udlji#EIS3x5EsP>hmO40i3%G$PTk5)D& zwT06Xs9m{U?C&3q{NC1(Ho-<$`>UA=$%XW#t}p~hrMD8{S_Eb)kqQzwXQV3i)!@Zx zqw_u_ysV^4ED!lW*FR(g4OY+smd5@`9RSh-{-PjBLk&nAJM+sx(wNu^7lHQl5H_g{ zQx!QM0WWos6^DF(I1IyJMCvshR+X@B9n0g`-O^z*(x7s0n)($mZJQ)EqE!_-HL`3| zXb+-xY0!`|SQ`?whbyQ9amu9L18O4T<0^Sl z)Y_x`Fe?MvDCUDeMN~AYiCr3{pk$3xb1cF^+UKYR%k+Z$Ur-CWrvYb$hfG70Q43(>*Q>Dh;SeX5?Dd-at! zNK~3;1WBW>2@D}!Sv;e3Qq}NADi~yKI1-u5(%t-y1xQl0AzH963bVu!h zez2T{WXp2mW@A|Pptn9ev~KOPTaUkdx9_KWDUQeK!B`##ol@p9wc+-bpRSabwxULhaAq$t7`oh#S6ag1x zu)rD-7a6nDyb7}RPT_lOK+Xz0$>~I)Ezl2EpwQ9aa{0PN|euWfRRUoD+Ve4{iRO1~nf$ zyYR#~Irifhib+aDp?_{jzWCv*&$f_fZvKKMe`vxqh!Zb(80#V~J~p>`^=;A<$Fg`2 z+GTv@>Xm1&&t08=>oL-agXV{ z*TVF%&n`|FCe;i6aUxSD7C(ONQ=h(ZZSnN=g&B=JFgknv>>~40qS+T8@T^5WJD89J zzwlo$oQi*;sQGLO>=mSS&2}!6DX$-*SnyL!e7dEl`WHLR?`hFYCdub-Tzf~ApZ4O1 zHF=o+fFhDj{A2tcdK`}_@r$1&|AA0>Sb_Io_}^d@_+0y0~E_8QyKY94^(>${9{qw)vevIyF zx7*#(f9+?Z+lAfbu)EgnhTWBLHnzO{Q*_l1-uelBt-EsiuG`rvGy3grS9SF~-Om;| zZae+1n^5yF+qct1j(P`h2nOZ2e^89s*5s3uUg*)@`jBn!CvF?Q6aC{N9=|8t(G(;r?vBF*_*yeiGn*qVCI%kA}Mm?j^-&@5ngR zAZ~c)cB^lp=SA)OqgKZr>Wq7pySdWZ07dP3?fTv!Qh2!A48dAF>aT0vx86^0K4!}u z+#du-hr6+#9_ahZOpZD_L|TWzyLIY}(?r{AH@Q7&?W$!Dm(jcTH3sY1=x|`1(mPuJ zwRRs-Yj3?rG45-d_kYU9rPlLNb$wsn_p&iQMEnT2v9@PxH;E5BiX6BG87?QSKDSeM z;-v=^ue`rq@B2?ZI$Sfx=Iz#}w4Ez{#MUF9#WP#$bJNf3B|fzq7!B7O@6eSq8T59w zNxO1uWnG_9ck+&&qr_(eJ_c~M#?}Njnr{-EMozDF?YDXLV;fVap1(Tte;qH~F5A}e z)`rK<{%n>Gj`n5Nw#{lw>$&=DA2?sNy3>Yg^T7P|+01v%Z$SD)^x zo~rKZTiJ%iBxG%jGl?BBfnXBhgjkLp6R=~9$PK>)@(@HYdB}rvArS%V;6v(?+_&O9`8=giE%pIM*zz)a_TolbqH>U{sV{`Rpy zEN45fedU#(`(xdoe(gp072SX87oME`W!>NY^&h?WTe?5{FTe89uj~H3x0as!ecgZI z;g#QhN%wzcY+h`~Upd=O{7VNPXyvc`@Rz`!>i)aeU;6r|b$_1bPq%umf3+?80~aAPksKZZqb>C9(&yE`+MhQYpOrtD)pGr`KX`KHwX5q|N&B>SE`DcT507D2$f)#kCQUz+ zCX@|vFto~~tc{4;FiCR2JO^6y7jK+^v?M;O3c+HU70_3J*-4t45wb{PHZ(_B!!JH} zY6IXlNiFI$!i|?lAXH^)Wf`_uUdg}(tO=#}?1ks<1hx&bA_64vxk7fKQ0`kwz@*}C zBkHis0xv9uYdctj6gZ4QoTG$eg61@QYk3s9E*ZOr zG2eOgUr%k$GGYj~sWe2pEF@#4>ku!s8R4D`%}4=K&tG`q)C!8+Ndk`xvx0Gf^IFhA zR8dV}QrEP0rHkWc{=$vZ$b?m0`T+18m$lPG8GvO`3JxogSva=S-1mZZX5a-@ig1pM z)XcM5p(xc>EsQ9Ez|FW*G>rgGJF~2FFLNtXSpg}8LKq6_NvBLQ+ZKR8NU&_uc4l6= z3M-BZ0f6u5b+%tbVOaWRtZG92j5&;>cBUE78kQEoNnybfN>rdkgl1!EvlgbbK}ECJ z)HCm_Gs~tqbRESj56~LtE>MYJiHos?tI&u&FSnxB?%m$XqHT^rw}E93Ze$A!+uaps zGU*>3A8&1eT8!o|?%ffk=Z19yqe8)=lvdH`XSH%%xN%u$d8o9XSk?3#odt8f%NIzc z`m3985gx2}181i=#vvYxChqM_XRRK_+ebkoH#R5r?oqYxE+)v^?r%rA*(^hDEzMt0 zr?q1f3~Z@{3}Yo412Le0K74_+BbOFNKC!^FkKTBd%cw5v3h^Mz04GikP)+UJF09bg z2Cq^ONNN#CWf|9=8#TmEL~U9Ii5`CWmVXcu2zK_IB`%BgjqSZG9k>oYBHMD2r}&tO zXHGrXWFdoz8HR;fAS+89>I8mMx}gE1gd2)u7S2EV!l_6^WsBG$oDv;UZj%$YMlSH( zz^=236A`E9-##j$q=-vs0tHD=o2Gg9OtQZ5DOxYD(Gm1xoOcd&9gjq{XoKiKw>(vn<$!iI;LYu3shy)p#svfvZOFh87WAhDk!^=S>H~9d<)jLkwMv+L%~#M&^afvXJ0F^FnZ&yb!`Zp+JNmb- zG3NKq>K2_j_t;0To?i^EtiE`C{qLXIdTL`2e`54I?74sX^v{3j3uyA?=%sJafBqwW zUQcdW&Z|$(y!6EDEayiT@9e)$b4=oI(YWc&)f;PQ}fEvRH1O2Mlp>2+~-m>E-86Pb3^5ra3{$sqjnKDnam1Pz*-nG zndXC~)n#ilI@($vE(J@R@2-k9r+2W@_tnU$_eM)>?##=#Rz+;DImvOjo9+buMm*S( zw$)S)(S^dv(b`J)@zaX>aT%9MgNxLwVgu9(F9U@&~d#s zq6~pT6vrG?zGr5(;Yu2YmMu{Os=%vCL<)(fY5J$aZxVq_T%|};Sj+K6W!fdH2kcXZj@x82HR=9;~Z>~c(kwv zlihOZ#%$ zQ;^GAQKvS{EE5I{84V*FYbQ!wrKx?82Ou?yx!!BHoSxi2BIQtfS#LEOx#7kH4dD1- zZ?G3H*c&i^V)nLWABygRwYS?naCZI5TG*_u-A!|7Eui>#r2yNT^A~>iw6X+6+AyIT zV&cTK;Shxx6LzG6v<7A!ki5ckPyFhwb+;Kzyge8zNk-n#M|oE%b8}@W-Sbw^aZf$0 zZf%HMO*Yr^V!7G?MH)xFeC_aPVcTPy;o;tH>2$YFgmNhL(uzb?OaKPRsA8_UD9m;7 zbBjWA5+;A~rCVJv|G;q-Y8-JlNTS2l4#V z?>{BA1r0BDaH_+ThZRp?#A_E;kqdOzk$~Dr87(o=Y_a8((j{5r1JCne9_h5G4?d{O zI4Da2@~nOLI5Cv1-+Ls6n$KegW>$n^SLVJ_M$&kIu|Vq7`FFRi9`1%~d&PRWwYewa zUS5n;8oF?~*)JI0t01f0`3wD1r3qo+ z>9;sLZn*1}ktA6X)EuFTTFSry(&+J&1+koyI1g$OYD$_A4;5ZcIHVNm*Qzjxvm~`# zL6S_fR8WvunK}{2rEt8Z^SAs>e|HP)Pu$_!x?CJ4xw{xv8yg#VuxAgQy~Y0a@0~`f zEOb7}1oQMxnzt$>0^s@?1y$u_Wu>GGs_Bac)ww<#4bwL zOqC|nkTnJ`+{9}RIUzgz-iaT< zI+HaueVidrGF--i;}w1o>bfOeQ6wxZF8<>QO*4o!D>Iavy0#L29D4-~3+gtWtDUJS z8Md7Mmo#-R8yv+st`Wn&Eh0paje-iMj*a5TV4feUw`D)?${#fQvEM1x|DV~<<#nes zdoG6&X2HxkO@}^scJ{F`J%-8ia>8&f1IFp+-*@GimMnE<&TH{|C;BZ$=FbNRWjN12 z*jc~wy6CavLcWFU61sfl@-x>uS7si%>~zjOe)$0vGiNWMORv1=(%-!2+LzB>d*pxf zJ?rvy&EYyTXFIxiTzAiW_}t}d9i$<(3eVqDcpvDgzHeN+^cPQF`_HGY{lj~%{1gc3 z1Hdl*<<-k?h3$T;O~HNp+NJ+^^4e>w*S`5PZ-D#%^Opy~{q-B*E`59J61w)trLU}Y zE?=KL$Ac6F`RwUKpPZZM&rsa>c>BE{p#G^+vle;eBbEw_LFmIz_6Y^ zcb{j?>DA!`Q}q1*FyXHH=VimETcAMV)-~I;X~B5&fW);wp5~uu%G2tgiFg>pf2l6=6M)~9% zos6>}YudBnjsB{2T(v+-oaK|_v>i3OH;fy`oHpIuJ?(E6-mqF(XZCz=XXj(XkKD~8 zvp;y`XWJj6x7h3Tw)J28v)1d`y*}wJ_j;taK&E5;{*TG4c5vyx<;%T=yZ5@C?lP6% z?DeXiUZ?x%CdX~3?{!1;{JwXS(c)<3z#f^K<#_c#j?<0F$0q&pXtyFa*}X9e_?Ycb zymFv!#>ZsXHS(JagLrJ#-SN=5J7(pRFaWvZDO-GG(Z?<@)UKh3N4_k~YPItUggY|{h22RxO*X|z-oeGV1)W}>;j#dX6 z`^MYh=Hs;AMXQ>l4tEmB4is#rdaA>rqcLp08B=%6sJ7P*y*X*^s&$Y0&YPcWY!1ZH zaMQRe?`r(Z?Q=w}z4bHZ<5g|*)gQBQsd2trtZzH_t!#{j4nH(OqV3t*tw6(Wg%5y% zM}6AjxyisJUo`*tDm6Y)ovW=S{-9k+age_Um)?WgAm>J%4fP z|2kfJy>v^*TN@tR`_olAINFYz64Ez3+Uh#qBm!)qU#+)0J;p->O=n1zbHa zU>9hg+i8wQn@W#r{oD8Auh#~*R(;s(SG|pPi>a`^(1E!SP5a~N&Gd5GY_;tntII=E zhEpFOZ5oIAeA;oHFYfL)eN;U#9qsixvsd*ucu`>aH01p0Y2vziFEL%co3buHqiMy8%uT)DCgR03eJU?aCOYy%+BmCja;6)^WFJ;cbw0@*95{myR$pb z?#$fo&hE_a0#b@98l@;T5}>E3LKIOX(gL3l2qF3+R8=afs`O3ji$qlo&UW2=KuPe=eCC1^E3C&+`MPzzwTL|dF#yJ&4WR6(+pnvkKYRZ zRG%K4|Kj|KFWUBjXD>bd728h!@N?nkZTsJ!f9co%*tUP`Y`*DBwteA^D}V5!ZBKCZ z#Mf+l?|**$J$?U&e)G}Ozhm2PoPY64AGYn^|Ng%|*vp^$-5);iRol+Jxck6yXI>dgB2nP}#rnPW4@9zK2lkC50;x7Ro|c>FHCgTdhKAG_B+qkHaq@FB2B z@h?o$!C$U@4K4l3`#<~53(tPwlYjO8U$b(1+HX8I^Yz*FnbGvL|MKRWix+MGD4Z{7 z)XK6+aHzP(jT4C2@f!{^K)WV&-AKlW7u>DlwBi*Fgq{$0!Zg6~I0=2+?6R;$T zE5l5K<3_`PJ3Od5$f4Fbmri)`Wd3r^1Kr{na$R~(C1jRo04LhUh0taI47K8fd!=aWJe zp^0&xxEb{c6RqdR+K@<5q5)4LST-q+!kMRUUiM2tx-9H6>N!b}7?mWsEP|4hQo5oE znUG!ah_&8)35sNEA&E9N$D4YG?OnUJgrecN*c`$9aA~P{)3r$k5tRk2Srx{L8^~~% z)PzPZ*C-UolX-+Wm^=BEiD+GoOP)eZ2oKsCq;ckEE~K%S#9gFor=b8|JbB@}jO4Tg zO@T=&+#0~Z*QyDf#&dbmcy%mV5IUNaso$bn#ckt61(s1P(c5lV7wfFi6AvSoHjMxR zqoQb2gh^14w#l?aE&9O(_ls6aA6%vzgOEwWxx&}Q!BXC}Z)(#Y*_ zUN#+zyv!g6k>aD z!Yji#@pbQ%L?mh5kk%AjAGu;?Ncv{2GS_^n1dAZa^!`nVxF#UhS#~anRztwy_kr=uHKomEeM2Bie z#>1uY^0=U?Xb1&amgNC2Df;t?;YJe{WfmGNjR`TA*cd;}qSoWa)dq$Xb#o`5pLp1* zYZeIXC6P=E6u7O)+8B#~G<7AKx1ZfeKUI%qv{hwcy%nSY z8XwdG`IQNJHI}Y+B?qK++^Pa~hi$oNUC*Sz^JEqKA*Df?+Whr$ZW9&2#;br=x+#r9 z)j%E^iJXw9rYe-v5(%@EH9mDg-U3_~5XEll_l2l-Thhc8sJdDcw*gh1)uyf$iUJmO zsROF2k)lWB(z-4@9x71D%9gjt#y=On1Kq+?lF+R3a^TvLH`;(ks0uyJR1jq(vZW+5 ze&yP9gwr;0!mbh7j(dY3TjmP6h1(5UPvEpTBuASCnR~sZu7iavB~Y(Fv=9i2F^LQd^?y)Z__f z1dYb-cFEd^gvN*TUa|Erg#~DjF2~dYj z7gN;@HowUeZY(xh{3e;j7matTYp(ipmF;rO_zaOOumt6Ycq zig`s|xSdlT)CylWSY~srU6I_nH71S!Bkha(nD$U2$5wG*+N(`R;X0_ zmXY+a*yUc-m%AowjZKBJ_9_-+zRX~eBi9kF;J}4FqAVsfr6SXLM?Ie#ABTqJGA=PF zprgAsOmx%7;)Ilzr;*%-9>`@PbID8LM|qb;#_4p(n$9&|Uw|3WlLL!{;WR6x z0+K7(cDYg{F;G=*)H9}+wG|gyrbGb> zvp>4Da|Q32&WtYsi7%eKev>G#U>*0o6vYv=QSStPLj2k!vWBV6)}dwTGR;aFR6SQu z6H95a<|WMgP3D({T3iXD)!lhY;392Fo3o~*W3_jEc}bX^(cIwjM_wP@+POXvRRrA* zVMDzliAv&SL07}B=p?n&)K(?yL9m`X`Rz%gc$WdzTE{d1);4Imv}&~PaAi%|x^a{@ z-jqbu9C76-%bK{anb%ex2&f_vz{>TSu=6 zv=lblQlHl7!c^7^W2quc3YJrw-ew;;dvkf|Qj-T;S$IV+t-GtkV$@~}Vbx_zt9#@1 zOL${ERT$dKc5=B;E*@=PK^ktc)FoGrHd4BYEsNS@%9O5+@|Fq)`b+7 zY3T~rz@^yZIuiwEO_613WjXMLi7H|qz@UgaN9UH}$U+k%D-DuP+q;2l(%dJua$cA) z8%G`uDMV!!)WUL0OSsw$;!-MF_{IoQm+f@TiU$BTd1SMv>mm>4zReEI<24XDC^}1L z^U$(?TVwsQJV`i;8sx|P{cjU$AoHQnZ_*^vyZ&{S)iJOZm2{zzLZu&eI;v( zORkx(^%A+hvv2nI9{G#spzp7BX)@lC{@PN~k-h7y&Go}zXS58{t>uli1kRG*`_Rgj z-EgzSjocUQrF^&cn4ev$RoSk@?K~ds96$H$>h)?XJR0ug#r%dKKwLs%b*EfkTHGgW zEeY1-{2xt9=EVWVzT*UrsB<9GjuRyerwwvAkX2fjZhi6i4{s7H3?o)WT?~9GGZg{V z=B@)Fsf5k5I1RMrDPCPrAYDI=QA+Lg2&HXUO#N?b;~Ie3!J&3{ufuL@;o1sLedAW? zO0c`TURn}zoK1=dR{&OVnfRc~Nv@rW=2{69*bL_g5=TYj=J`F>pMx9Y0s?=Z9USf^ zSG2E3S+ne3X+|sM@|Fz4p=-0u60^j*Mxyo%vR9<8gKD^T!*A-M#`dC?%O)s!=L=Yi zqVshY^Gb=B+ADUS*`;NCQH@myh;bViaFJkH6;%_tb>-R;WfMQsvyXh?L(7}c)U6svMw!e{Yfa}bD8FY89_4?}S>Dl?88B}*CTkHS!z{XmISIGq2fV=H64$sh8ex>Yv|m?lbqEyZ`_4<=mO`mNE`z?j6|2L$-PHoyX3c z8$=dTzu}478{PqWy5C11KlR*W=e~LF-1pw_@Xw$Az6RH+k3M?lZqV*j+7#Ol-*M_6 zA3OJn^>hFJ_>Zyu9Np!$;663`V{oUwbL~`g?*3EX+8mrYfBINXWJJ{I$$cN39m|L| z&xA+ZhKibf_t~f3cmC|`t=DI!GI~ev^!0)QyA1gMBbeH&q@+43tx`TX%f}=8g=gE_CT~eu9+@79rllGm z$-WoNUvMrsvsQJpw|9TF@B-{*gVQI5+uQHmd-tt0a{9aXzpMXohD*cY@T&dmf36IN z?r^t`Q_oz4DCGIP8T`q8~bh_i5K{XzUjj1Ts< zEluC;!gjikIoLxM!{)0o&5s1P`r77~$Gu+d-unya)#n<4wc>DZ)43&|xA>R)bFg0D z_Ka$@YIVN)6FSxw=j+A#Q^(#-$I%|j4?vJvefD~*qrG`UcLRs+FYq4EWf6>t*iECw zpRU*2{?i`rEjuH4x%Vln=O#apYb=!6RM*yS_Vb#KZs`pi?yp&P8`RT1(#y?yI;=Mq z*Q||(<5zGV@=$K(Bj8Tg*ciK=9XF0|Mb1j^+CLg=KlU+o%k!6}{%_-DIFws9-um#k zx-*?+gQI_$KKsW~FIx$$ZSSzJS-RSXYIn!{wdu@P&F{Bb;dQvW>xA=x^||f#aDTJ2 zy>|brx8rZv2e;k&V7Oq08~qYfVgJDf<|3Q6N7I+-$LV9Me;$ZNAAl;J`uP5)b71Y$ z_uG7NYrW}FZO62CI2@clo0GDrY4t>y=TA%%*V)^N>FlkPb>>OSN^boQzJLE)rGD?%mZ@*f%q~ z`(}6cW_NaWcBZLhqCf+xifK#rDwV2K1<^oDRSBp-g0$)%2=PZ1s{VlxYC!@^NMKO^ zs!+weXQxe_gr*cucQd>5otf`e$IUs~>%P|94+9 z?~5<(z2lEe|1E24?u(}X{^wu$#lJNDLpLwqn2sO+_uqW)bo{v&e*Wxln11pHpM#$_ z{VV_W;qYtC+4;`=+Wf}Fc{KmX{E7J!kDWdFD#bkA<9(*{t~VI%bUIIbY*pEIj|MKG-hRO7>AAa=C^+hv2 zg55HOMGZBnGQS9H-}a<*TwSGU-6k^6Ent_*#$v(DwpX(qw>aEaYup!}y`4ny?v6{A zuG+iR#tMuF?PBND?j-kkQkRyHsppHrvXOR*8hC|ohousrjp{^)VDa?xlLjQv2go7; zqT02sMzndN2yls$X_s4;Yt6J_4IA#l}+VATqc^fsH|X>fI^pch;5EQ z5b!nv6^@{XLEx5UfSIt$BtuP@bhjqZRc#fP8RLEuvkWI;+ps#)PUsXaP8}mXuEgRg zHYq_6hj~?&QJX73Vi4(=+a6N6UwJr&Dv+7Pa_+{RjhYAuP^oRF&NO#~j3^GOR5dI~ zu$3q;D^jYaw&;Rt<6zJ4?Ziy9hirR@;_Kz%GWYkEw^%q8cj4XNd~SDFwo#IJ%&)kE z?0&Js4oAHe7S$r&(MJo%F5KKo*OpQ33=W3dWN&ADFy2>#fhx;wf1Cx^u7&m7XD6=k zt*`~TpYt5EN&?Tx+?F^Mqm-s;B`TJc{^IHXnp8T7JmhH576@QFFEZq~O-P{IAkVG6 z$YP0>#!h!Od@n4lRQpj#EIS}5lM*?3X}7YeNy5A{`gPG@&f_1uA5i($=xlHi51LLBkp40NR&2 zzt*{3k&L1&#gb%oYq?(zn5h*uiv7q*4%>{+B9cgs_f7PSr)lo z;5UUEqYxGtidSw~P6;V3C~|xR14@c8hK)%KD05>Eqr{6HiynOYR_!zjr-b7cnILq^ z6eY+(k%WcoAd*_t4`My{&l8(+ixQvIbyy&m)3k9d(IDVMjqA3}i(F@-ymb1jw~bpN z4lAT;AqmDP3*#!VT1kuCWmVavQu$F5Ihh^PR;Id%J)R29Xu5cMX;Nn@@{1zQaZ|RS zPDn%)lprCwr5x#dz{?|Knb5jLZ71i@b2XEGn75dFq0UO0c^KPR#uo8?ZqMJC*a!j) z9hU~&2}M}tO{juOsLFF3QA0az`~uR&(>HGM)OPHcX^Yl*oZFexYT=fZumm&5N&Udf zWA46it4^9WRUoQp>$VZeV&hBPO#|h_l9q{-F{;@0hc+r-rrd{BNm$7Z8{;>)_a6?B zQ4)tqp$j5%*nMGQ9L!3x%yP>%w*$&^=!Gft8t6GR<4Ss3w`sC?`m1*~Y@$`%2+yjZ zEvQ>Xsjoa@CmFY?PvW>_GQ{yLova6FyS722ucIB^$RQiUJ{YdTs2_T$7#!A%r*|g+ zX`E3}7Zwj>>eedEluQtUxljDi(?kSOnJ*mMy|W5Nhj=FlDLC53o1WabiVsku`S#Y* z%E%t|d)tps6ZYO^y1%m_!tqhM?F}|*G%Bvbm36V4sm#6{xQnON#oW|h&rOABdG+1D z-L*MpxFwAz2#t+esls7^emQ&5UX8Cg&gJ?r-CY`Whr^xiAtd9yvSngq^aph?JpwmH zqZI@g2R4C{H3ZN?IHCd+7{$w6Rdrd%v!a;=b!ZZCr41S|@_Dqw}j(bf{#UOCva2fz=O(?Q>cJ~gHC z&cH2E|8N{OD+kPny_FJ>QnMp(zhQfH=p%={imCjXHPl6xQjG~fie2T~-0_MKHi2ae zY$p;o$#nLyr+*3*guRHX+$~6>N@&nj43wX^O{qCVAyJ&jl3Ur-pvq)#P_&HHRblyY zow1gaCWK*Bw|<*i1+5^WKO+j7`~C!$Zsq92jeQrJgv~v#;0>{mM={M^m8g9w#FjFSN=%9Y;1Qbs~>EoE75937+%tTA<~rCH zQI)kMPoP(7NkvllDrqQ`oC6Q!u1UhrPqYJL&1;PP7+3*p5sZSyx6;}*<;KkwgPvVA zQ(MU@si}&Dm72t9B|`Eb3ZccLu%SekRbHFqZW&vd>JAWa3vRX!T$E)VQK~iyc48Pe z0OTYx=pFk$&TA$i&gqJ6gR>;#u zP2(ohaa?4MYN?;QZkXm)m6>8?`C(AHwoU4G?kf}K;|xbM%p=i2n%7wxn=6Axwm_O9 zJLF2Gsl9mWE0fX>?SvRUIS^sWl9GDLZb?F8;dx=JtfL$31PV)5 zMQd3WNI(`jyt0a-%26H1=C)Njwk?VjTsr;b??7Qk=Gx#H;FThzAtcJqEMUN`jH+BB zU#TJh!ipPX6e}n^*W#XZ1r^`Fvw>pBYw02=kc9*+8 zSCyY925F`NsG|a+sx9-hXf*AfxFupgC8!JyKtj1Hse!@<(wdV}kGQe9r--~(bELrf`Yof=caA3Sg#F1CZmYm4Y`SL2-kzxwkd0a=$sTgKS3V~6jd3^ z5OthNHJ%rlgEv-CPIGHq1I>sfb4Qm+nmGm{du0`x%;XrGb1WK5xp;Bz>02ry*o}nU zYH2ECZGbVX-L}9=R%H}B%<(7yb6>rcE9J*N6rf@?3=0QjX=CTDL7hxX<_1v|7+CnF zlb^^V2;9e@P&7X}BsxaqI zb~YY+mGtnDsNaBg2AzBC+%p$CkIg@D&h4Cdrb3wK|iozd`pqqEl!E)?b) zmzlnBpJy$Y)$thT=-K~a!d>>y){4!xKv9jSt~p&>2+Gv!D4z5Ff`4F2r?`&H{K&8_ zM4o@>;`twm^5b6odP!bSzfTZRjNuV_138XH1jG6#@xL!rUN68KO!(O)nAyvG@T`w2 zqm&OW&>01*5u!d)!}3eA=GUI%gm34F>Pq zf6v`Ka`rnXe_{G#^;UYl-j4a3{#@<#oZeE>TkZ9d-f}V1CRgaa zu{>lWI>>QW3uihE}xS&7nO= zN5kHDW4SjzqWRBaKJ|U*Jt~UioF`HNVFqN_Ic1Qg-+*qFKz)h$7P4|!XU5SPR zJp`-S;o7>9ee=!aW}{-Mi`KT{gZ)7U>7Isa#z!9PyGDks*UHo#na_J8Fvzcrr+PKK zhfD5j&ou(;<>CI8b(h{X@~=+Mkxun(&d5h=M(1lkrsKxQ`Br6p)46Y?W3=zG0}y6L zpHsaFwBMC@FSPJ*DWA%7g@)sZ?h&;7=6b#7Kk9IQ)f(AXraoo#+_HmW-F#5VX1X?Z zGtcV{x~n&EI9%7!ekf)<$dz`3^u*@!y0KBue2;q77Y=h+Y_Sn=W_xUo9c|`~wy!3j-wu!fCcW@Sq2hsL(|*;lUYOhZ-Q zw|;%L@-^$HtopftYkL;#hQ{Xx>TtNF&8XqOb1(jS)8IC&4|+?ow>jNnMw~vFz+6se z{n6}Y_Bd;{r|ki4n*&hCGanyrSqH{G({Yn8?(R1`su`G#_IjPO=UG&h0@qJL#-5rb duJiX2)A_q8>)bO2mfZdA+&mEf8qcD literal 0 HcmV?d00001 diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index a395a2cf9a..b73754b43d 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -46,8 +46,8 @@ def test_different_data_sources(): nodes_list = [] ######### PARQUET ######### - parquet_dir_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/parquet_directory" - parquet_file_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/flattened_data_subset.parquet" + parquet_dir_path_str = str(Path(__file__).parent) + "/data/btc_dataset/parquet_directory" + parquet_file_path_str = str(Path(__file__).parent) + "/data/btc_dataset/flattened_data.parquet" # test path string for parquet file g = Graph() g.load_nodes(data=parquet_file_path_str, time="block_timestamp", id="inputs_address") @@ -75,8 +75,8 @@ def test_different_data_sources(): del g ######### CSV ######### - csv_dir_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/csv_directory" - csv_file_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/flattened_data_subset.csv" + csv_dir_path_str = str(Path(__file__).parent) + "/data/btc_dataset/csv_directory" + csv_file_path_str = str(Path(__file__).parent) + "/data/btc_dataset/flattened_data.csv" # test path string for CSV file g = Graph() g.load_nodes(data=csv_file_path_str, time="block_timestamp", id="inputs_address") @@ -104,7 +104,7 @@ def test_different_data_sources(): del g ######### mixed directory ######### - mixed_dir_path_str = "/Users/arien/RustroverProjects/Raphtory/dataset_tests/mixed_directory" + mixed_dir_path_str = str(Path(__file__).parent) + "/data/btc_dataset/mixed_directory" # test path string g = Graph() g.load_nodes(data=mixed_dir_path_str, time="block_timestamp", id="inputs_address") @@ -389,6 +389,161 @@ def test_nested_schema_casting(): # also check PropType.map of pyarrow types, mix and match assert dtype_pyarrow == PropType.map({"a": pa.int64(), "b": pa.int64()}) +def _btc_root() -> Path: + return Path(__file__).parent / "data" / "btc_dataset" + +def _csv_expected_earliest_dt(paths: list[Path]): + df = pd.concat([pd.read_csv(p) for p in paths], ignore_index=True) + return pd.to_datetime(df["block_timestamp"], utc=True).min().to_pydatetime() + +def _parquet_expected_earliest_dt(paths: list[Path]): + df = pd.concat([pd.read_parquet(p) for p in paths], ignore_index=True) + return pd.to_datetime(df["block_timestamp"], utc=True).min().to_pydatetime() + +@pytest.mark.parametrize("schema_value", [PropType.datetime(), pa.timestamp("ms", tz="UTC")]) +def test_casting_btc_csv_file(schema_value): + csv_path = _btc_root() / "flattened_data.csv" + expected_earliest = _csv_expected_earliest_dt([csv_path]) + + # Pick a node id from the file + df = pd.read_csv(csv_path) + some_node_id = df["inputs_address"].iloc[0] + + g = Graph() + g.load_nodes( + data=str(csv_path), + time="block_timestamp", + id="inputs_address", + properties=["block_timestamp"], + schema={"block_timestamp": schema_value}, + ) + + dtype = g.node(some_node_id).properties.get_dtype_of("block_timestamp") + assert dtype == PropType.datetime() + assert dtype == pa.timestamp("ms", tz="UTC") + assert g.earliest_time.dt == expected_earliest + +@pytest.mark.parametrize("schema_value", [PropType.datetime(), pa.timestamp("ms", tz="UTC")]) +def test_casting_btc_csv_directory(schema_value): + csv_dir = _btc_root() / "csv_directory" + csv_paths = sorted(p for p in csv_dir.iterdir() if p.suffix == ".csv") + expected_earliest = _csv_expected_earliest_dt(csv_paths) + + df0 = pd.read_csv(csv_paths[0]) + some_node_id = df0["inputs_address"].iloc[0] + + g = Graph() + g.load_nodes( + data=str(csv_dir), + time="block_timestamp", + id="inputs_address", + properties=["block_timestamp"], + schema={"block_timestamp": schema_value}, + ) + + dtype = g.node(some_node_id).properties.get_dtype_of("block_timestamp") + assert dtype == PropType.datetime() + assert dtype == pa.timestamp("ms", tz="UTC") + assert g.earliest_time.dt == expected_earliest + +@pytest.mark.parametrize("schema_value", [PropType.datetime(), pa.timestamp("ms", tz="UTC")]) +def test_casting_btc_parquet_file(schema_value): + pq_path = _btc_root() / "flattened_data.parquet" + expected_earliest = _parquet_expected_earliest_dt([pq_path]) + + df = pd.read_parquet(pq_path) + some_node_id = df["inputs_address"].iloc[0] + + g = Graph() + g.load_nodes( + data=str(pq_path), + time="block_timestamp", + id="inputs_address", + properties=["block_timestamp"], + schema={"block_timestamp": schema_value}, + ) + + dtype = g.node(some_node_id).properties.get_dtype_of("block_timestamp") + assert dtype == PropType.datetime() + assert dtype == pa.timestamp("ms", tz="UTC") + assert g.earliest_time.dt == expected_earliest + +@pytest.mark.parametrize("schema_value", [PropType.datetime(), pa.timestamp("ms", tz="UTC")]) +def test_casting_btc_parquet_directory(schema_value): + pq_dir = _btc_root() / "parquet_directory" + pq_paths = sorted(p for p in pq_dir.iterdir() if p.suffix == ".parquet") + expected_earliest = _parquet_expected_earliest_dt(pq_paths) + + df0 = pd.read_parquet(pq_paths[0]) + some_node_id = df0["inputs_address"].iloc[0] + + g = Graph() + g.load_nodes( + data=str(pq_dir), + time="block_timestamp", + id="inputs_address", + properties=["block_timestamp"], + schema={"block_timestamp": schema_value}, + ) + + dtype = g.node(some_node_id).properties.get_dtype_of("block_timestamp") + assert dtype == PropType.datetime() + assert dtype == pa.timestamp("ms", tz="UTC") + assert g.earliest_time.dt == expected_earliest + +@pytest.mark.parametrize("schema_value", [PropType.datetime(), pa.timestamp("ms", tz="UTC")]) +def test_casting_btc_mixed_directory(schema_value): + mixed_dir = _btc_root() / "mixed_directory" + csv_paths = sorted(p for p in mixed_dir.iterdir() if p.suffix == ".csv") + pq_paths = sorted(p for p in mixed_dir.iterdir() if p.suffix == ".parquet") + + # Compute expected earliest across both formats + expected_csv = _csv_expected_earliest_dt(csv_paths) + expected_pq = _parquet_expected_earliest_dt(pq_paths) + expected_earliest = min(expected_csv, expected_pq) + + # Use an id from one of the files + some_node_id = pd.read_csv(csv_paths[0])["inputs_address"].iloc[0] + + g = Graph() + g.load_nodes( + data=str(mixed_dir), + time="block_timestamp", + id="inputs_address", + properties=["block_timestamp"], + schema={"block_timestamp": schema_value}, + ) + + dtype = g.node(some_node_id).properties.get_dtype_of("block_timestamp") + assert dtype == PropType.datetime() + assert dtype == pa.timestamp("ms", tz="UTC") + assert g.earliest_time.dt == expected_earliest + +def test_malformed_files(): + malformed_dir = _btc_root() / "malformed_files" + + with pytest.raises(Exception) as missing_col_error: + g = Graph() + g.load_nodes( + data=malformed_dir / "missing_col.parquet", + time="block_timestamp", + id="inputs_address", + properties=["block_timestamp"], + ) + assert "columns are not present" in str(missing_col_error.value) + assert "block_timestamp" in str(missing_col_error.value) + + with pytest.raises(Exception) as malformed_timestamp_error: + g = Graph() + g.load_nodes( + data=malformed_dir / "timestamp_malformed.parquet", + time="block_timestamp", + id="inputs_address", + properties=["block_timestamp"], + schema={"block_timestamp": pa.timestamp("ms", tz="UTC")} + ) + print(f"Error: {malformed_timestamp_error.value}") + if fpd: import pandas diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index bd224e015b..acd91e14a5 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -778,149 +778,6 @@ impl PyGraph { } } - /// Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) - /// - /// Arguments: - /// data (Any): The data source containing the nodes. - /// time (str): The column name for the timestamps. - /// id (str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - /// node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. - /// properties (List[str], optional): List of node property column names. Defaults to None. - /// metadata (List[str], optional): List of node metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3( - signature = (data, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None) - )] - fn load_nodes_from_df<'py>( - &self, - data: &Bound<'py, PyAny>, - time: &str, - id: &str, - node_type: Option<&str>, - node_type_col: Option<&str>, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_nodes_from_arrow_c_stream( - &self.graph, - data, - time, - id, - node_type, - node_type_col, - &properties, - &metadata, - shared_metadata.as_ref(), - None, // TODO: Add schema - ) - } - - /// Load nodes from a Pandas DataFrame into the graph. - /// - /// Arguments: - /// df (DataFrame): The Pandas DataFrame containing the nodes. - /// time (str): The column name for the timestamps. - /// id (str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - /// properties (List[str], optional): List of node property column names. Defaults to None. - /// metadata (List[str], optional): List of node metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3( - signature = (df, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None) - )] - fn load_nodes_from_pandas<'py>( - &self, - df: &Bound<'py, PyAny>, - time: &str, - id: &str, - node_type: Option<&str>, - node_type_col: Option<&str>, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_nodes_from_pandas( - &self.graph, - df, - time, - id, - node_type, - node_type_col, - &properties, - &metadata, - shared_metadata.as_ref(), - ) - } - - /// Load nodes from a Parquet file into the graph. - /// - /// Arguments: - /// parquet_path (str): Parquet file or directory of Parquet files containing the nodes - /// time (str): The column name for the timestamps. - /// id (str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - /// properties (List[str], optional): List of node property column names. Defaults to None. - /// metadata (List[str], optional): List of node metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3( - signature = (parquet_path, time, id, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None) - )] - fn load_nodes_from_parquet( - &self, - parquet_path: PathBuf, - time: &str, - id: &str, - node_type: Option<&str>, - node_type_col: Option<&str>, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_nodes_from_parquet( - &self.graph, - parquet_path.as_path(), - time, - id, - node_type, - node_type_col, - &properties, - &metadata, - shared_metadata.as_ref(), - None, - None, - ) - } - /// Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) From 3b43e94e1ff421186f6b1baa6462cdc35fa0cdda Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Thu, 18 Dec 2025 03:43:44 -0500 Subject: [PATCH 45/55] Fixed error message displaying incorrectly when the time column is malformed (or any column). Added tests for malformed inputs in csv. --- python/python/raphtory/__init__.pyi | 65 ------------------ .../malformed_files/extra_field.csv | 2 + .../malformed_files/impossible_date.csv | 2 + .../malformed_files/missing_field.csv | 2 + .../malformed_files/missing_id_col.csv | 2 + .../malformed_files/missing_prop_col.csv | 2 + .../malformed_files/missing_timestamp_col.csv | 11 +++ ....parquet => missing_timestamp_col.parquet} | Bin .../btc_dataset/malformed_files/null_id.csv | 2 + .../malformed_files/null_timestamp.csv | 2 + .../out_of_range_timestamp.csv | 2 + .../malformed_files/semicolon_delimiter.csv | 2 + .../malformed_files/timestamp_malformed.csv | 11 +++ python/tests/test_load_from_df.py | 53 ++++++++++++-- raphtory/src/io/parquet_loaders.rs | 13 ++-- 15 files changed, 94 insertions(+), 77 deletions(-) create mode 100644 python/tests/data/btc_dataset/malformed_files/extra_field.csv create mode 100644 python/tests/data/btc_dataset/malformed_files/impossible_date.csv create mode 100644 python/tests/data/btc_dataset/malformed_files/missing_field.csv create mode 100644 python/tests/data/btc_dataset/malformed_files/missing_id_col.csv create mode 100644 python/tests/data/btc_dataset/malformed_files/missing_prop_col.csv create mode 100644 python/tests/data/btc_dataset/malformed_files/missing_timestamp_col.csv rename python/tests/data/btc_dataset/malformed_files/{missing_col.parquet => missing_timestamp_col.parquet} (100%) create mode 100644 python/tests/data/btc_dataset/malformed_files/null_id.csv create mode 100644 python/tests/data/btc_dataset/malformed_files/null_timestamp.csv create mode 100644 python/tests/data/btc_dataset/malformed_files/out_of_range_timestamp.csv create mode 100644 python/tests/data/btc_dataset/malformed_files/semicolon_delimiter.csv create mode 100644 python/tests/data/btc_dataset/malformed_files/timestamp_malformed.csv diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 750c5da72f..b14a149d12 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1281,71 +1281,6 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_df(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: - """ - Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) - - Arguments: - data (Any): The data source containing the nodes. - time (str): The column name for the timestamps. - id (str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. - properties (List[str], optional): List of node property column names. Defaults to None. - metadata (List[str], optional): List of node metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - - Returns: - None: This function does not return a value if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_nodes_from_pandas(self, df: DataFrame, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: - """ - Load nodes from a Pandas DataFrame into the graph. - - Arguments: - df (DataFrame): The Pandas DataFrame containing the nodes. - time (str): The column name for the timestamps. - id (str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - properties (List[str], optional): List of node property column names. Defaults to None. - metadata (List[str], optional): List of node metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_nodes_from_parquet(self, parquet_path: str, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: - """ - Load nodes from a Parquet file into the graph. - - Arguments: - parquet_path (str): Parquet file or directory of Parquet files containing the nodes - time (str): The column name for the timestamps. - id (str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - properties (List[str], optional): List of node property column names. Defaults to None. - metadata (List[str], optional): List of node metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - def node(self, id: str|int) -> MutableNode: """ Gets the node with the specified id diff --git a/python/tests/data/btc_dataset/malformed_files/extra_field.csv b/python/tests/data/btc_dataset/malformed_files/extra_field.csv new file mode 100644 index 0000000000..345e506560 --- /dev/null +++ b/python/tests/data/btc_dataset/malformed_files/extra_field.csv @@ -0,0 +1,2 @@ +block_timestamp,inputs_address,outputs_address +2025-11-10 00:28:09,bc1qabc,bc1qdef,EXTRA \ No newline at end of file diff --git a/python/tests/data/btc_dataset/malformed_files/impossible_date.csv b/python/tests/data/btc_dataset/malformed_files/impossible_date.csv new file mode 100644 index 0000000000..bafdb9c919 --- /dev/null +++ b/python/tests/data/btc_dataset/malformed_files/impossible_date.csv @@ -0,0 +1,2 @@ +block_timestamp,inputs_address,outputs_address +2025-99-99 99:99:99,bc1qabc,bc1qdef \ No newline at end of file diff --git a/python/tests/data/btc_dataset/malformed_files/missing_field.csv b/python/tests/data/btc_dataset/malformed_files/missing_field.csv new file mode 100644 index 0000000000..ffd157fb94 --- /dev/null +++ b/python/tests/data/btc_dataset/malformed_files/missing_field.csv @@ -0,0 +1,2 @@ +block_timestamp,inputs_address,outputs_address +2025-11-10 00:28:09,bc1qabc \ No newline at end of file diff --git a/python/tests/data/btc_dataset/malformed_files/missing_id_col.csv b/python/tests/data/btc_dataset/malformed_files/missing_id_col.csv new file mode 100644 index 0000000000..5e8f04854f --- /dev/null +++ b/python/tests/data/btc_dataset/malformed_files/missing_id_col.csv @@ -0,0 +1,2 @@ +block_timestamp,outputs_address +2025-11-10 00:28:09,bc1qdef \ No newline at end of file diff --git a/python/tests/data/btc_dataset/malformed_files/missing_prop_col.csv b/python/tests/data/btc_dataset/malformed_files/missing_prop_col.csv new file mode 100644 index 0000000000..54da35f624 --- /dev/null +++ b/python/tests/data/btc_dataset/malformed_files/missing_prop_col.csv @@ -0,0 +1,2 @@ +block_timestamp,inputs_address +2025-11-10 00:28:09,bc1qabc \ No newline at end of file diff --git a/python/tests/data/btc_dataset/malformed_files/missing_timestamp_col.csv b/python/tests/data/btc_dataset/malformed_files/missing_timestamp_col.csv new file mode 100644 index 0000000000..8ef862e666 --- /dev/null +++ b/python/tests/data/btc_dataset/malformed_files/missing_timestamp_col.csv @@ -0,0 +1,11 @@ +inputs_address,outputs_address +bc1q40a3a5xlwv0l4n4xs3mpvauscgkj78rcuhvrhs,bc1q9xh9exh34tq566nfvpwwupum6efh3yqc0aja5d +bc1q2zn0548tc0njq9xxt0uamxxh48s8t7c9dw0a45,3K1t5MthuwcyNn4BWHp4eFSyTY8ifDjTkk +bc1qxsp4nqu7zq424wdz4v7c6q8jylhtlskap86kpd,36jsyuJHY2Eqs48yjUb6xyzARSPYKKSrTF +bc1qvpah3cjguuwmjjgwp2g9t52f786jnw7km3jgau,34WkZeqyNBb5FXMxLZJfveHNm2kqngt4w4 +bc1q6xku4eta99267etxf84yy3qc2pyw04pq8dvqye,bc1q6xku4eta99267etxf84yy3qc2pyw04pq8dvqye +bc1q50uxvgu5paqj22fg27s352wggev70xs9f8lhnk,bc1qxgxf44zkxrz8ld33kjvf85ekmmkcaexs7xkfyg +bc1q5v45pufhugrqmp9a7taq92y4uqu8awpqcsrjq2,33bdcPeLq1Mk8TrHZ6sPwjpNbfHf6Y5UCg +bc1qhkar0wasylsfhwamp8d023e2tqzcjfzjlx6wrd,bc1q4mhkgdfmattuya75rescq5u3ms4u3m56x8nzcz +bc1qw8wrek2m7nlqldll66ajnwr9mh64syvkt67zlu,bc1qxwea59rqnldu5hl2zg3pt9c0av7eee4dfjnj7u +bc1qn5yd54kvaa60jqw0ncx93ru7ctlk0727653ynz,bc1qehhvcqru3k3qfn95r9w3nf8jxnlflnw4d9duah diff --git a/python/tests/data/btc_dataset/malformed_files/missing_col.parquet b/python/tests/data/btc_dataset/malformed_files/missing_timestamp_col.parquet similarity index 100% rename from python/tests/data/btc_dataset/malformed_files/missing_col.parquet rename to python/tests/data/btc_dataset/malformed_files/missing_timestamp_col.parquet diff --git a/python/tests/data/btc_dataset/malformed_files/null_id.csv b/python/tests/data/btc_dataset/malformed_files/null_id.csv new file mode 100644 index 0000000000..0dbc5578ba --- /dev/null +++ b/python/tests/data/btc_dataset/malformed_files/null_id.csv @@ -0,0 +1,2 @@ +block_timestamp,inputs_address,outputs_address +2025-11-10 00:28:09,,bc1qdef \ No newline at end of file diff --git a/python/tests/data/btc_dataset/malformed_files/null_timestamp.csv b/python/tests/data/btc_dataset/malformed_files/null_timestamp.csv new file mode 100644 index 0000000000..686da66859 --- /dev/null +++ b/python/tests/data/btc_dataset/malformed_files/null_timestamp.csv @@ -0,0 +1,2 @@ +block_timestamp,inputs_address,outputs_address +,bc1qabc,bc1qdef \ No newline at end of file diff --git a/python/tests/data/btc_dataset/malformed_files/out_of_range_timestamp.csv b/python/tests/data/btc_dataset/malformed_files/out_of_range_timestamp.csv new file mode 100644 index 0000000000..4b41edc6a4 --- /dev/null +++ b/python/tests/data/btc_dataset/malformed_files/out_of_range_timestamp.csv @@ -0,0 +1,2 @@ +block_timestamp,inputs_address,outputs_address +999999999999999999999,bc1qabc,bc1qdef \ No newline at end of file diff --git a/python/tests/data/btc_dataset/malformed_files/semicolon_delimiter.csv b/python/tests/data/btc_dataset/malformed_files/semicolon_delimiter.csv new file mode 100644 index 0000000000..d02492edf6 --- /dev/null +++ b/python/tests/data/btc_dataset/malformed_files/semicolon_delimiter.csv @@ -0,0 +1,2 @@ +block_timestamp;inputs_address;outputs_address +2025-11-10 00:28:09;bc1qabc;bc1qdef \ No newline at end of file diff --git a/python/tests/data/btc_dataset/malformed_files/timestamp_malformed.csv b/python/tests/data/btc_dataset/malformed_files/timestamp_malformed.csv new file mode 100644 index 0000000000..9e3ab079f5 --- /dev/null +++ b/python/tests/data/btc_dataset/malformed_files/timestamp_malformed.csv @@ -0,0 +1,11 @@ +block_timestamp,inputs_address +not-a-timestamp,bc1q40a3a5xlwv0l4n4xs3mpvauscgkj78rcuhvrhs +not-a-timestamp,bc1q2zn0548tc0njq9xxt0uamxxh48s8t7c9dw0a45 +not-a-timestamp,bc1qxsp4nqu7zq424wdz4v7c6q8jylhtlskap86kpd +not-a-timestamp,bc1qvpah3cjguuwmjjgwp2g9t52f786jnw7km3jgau +not-a-timestamp,bc1q6xku4eta99267etxf84yy3qc2pyw04pq8dvqye +not-a-timestamp,bc1q50uxvgu5paqj22fg27s352wggev70xs9f8lhnk +not-a-timestamp,bc1q5v45pufhugrqmp9a7taq92y4uqu8awpqcsrjq2 +not-a-timestamp,bc1qhkar0wasylsfhwamp8d023e2tqzcjfzjlx6wrd +not-a-timestamp,bc1qw8wrek2m7nlqldll66ajnwr9mh64syvkt67zlu +not-a-timestamp,bc1qn5yd54kvaa60jqw0ncx93ru7ctlk0727653ynz diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index b73754b43d..ff266bcb99 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -522,18 +522,37 @@ def test_casting_btc_mixed_directory(schema_value): def test_malformed_files(): malformed_dir = _btc_root() / "malformed_files" - with pytest.raises(Exception) as missing_col_error: + # missing time column in Parquet/CSV/dataframe + with pytest.raises(Exception, match="columns are not present within the dataframe: block_timestamp, block_timestamp"): g = Graph() g.load_nodes( - data=malformed_dir / "missing_col.parquet", + data=malformed_dir / "missing_timestamp_col.parquet", time="block_timestamp", id="inputs_address", properties=["block_timestamp"], ) - assert "columns are not present" in str(missing_col_error.value) - assert "block_timestamp" in str(missing_col_error.value) - with pytest.raises(Exception) as malformed_timestamp_error: + with pytest.raises(Exception, match="Column 'block_timestamp' not found in file"): + g = Graph() + g.load_nodes( + data=malformed_dir / "missing_timestamp_col.csv", + time="block_timestamp", + id="inputs_address", + properties=["block_timestamp"], + ) + + with pytest.raises(Exception, match="columns are not present within the dataframe: block_timestamp, block_timestamp"): + df = pd.read_parquet(malformed_dir / "missing_timestamp_col.parquet") + g = Graph() + g.load_nodes( + data=df, + time="block_timestamp", + id="inputs_address", + properties=["block_timestamp"], + ) + + # timestamp column can't be parsed to a timestamp/dt (malformed) in Parquet/CSV/dataframe + with pytest.raises(Exception, match="Missing value for timestamp"): g = Graph() g.load_nodes( data=malformed_dir / "timestamp_malformed.parquet", @@ -542,7 +561,29 @@ def test_malformed_files(): properties=["block_timestamp"], schema={"block_timestamp": pa.timestamp("ms", tz="UTC")} ) - print(f"Error: {malformed_timestamp_error.value}") + + with pytest.raises(Exception, match="Missing value for timestamp") as e: + g = Graph() + g.load_nodes( + data=malformed_dir / "timestamp_malformed.csv", + time="block_timestamp", + id="inputs_address", + properties=["block_timestamp"], + schema={"block_timestamp": pa.timestamp("ms", tz="UTC")} + ) + + with pytest.raises(Exception, match="Missing value for timestamp"): + df = pd.read_parquet(malformed_dir / "timestamp_malformed.parquet") + g = Graph() + g.load_nodes( + data=df, + time="block_timestamp", + id="inputs_address", + properties=["block_timestamp"], + schema={"block_timestamp": pa.timestamp("ms", tz="UTC")} + ) + + if fpd: import pandas diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 5f23598078..f79c90b5ab 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -15,6 +15,7 @@ use std::{ path::{Path, PathBuf}, sync::Arc, }; +use std::error::Error; #[cfg(feature = "storage")] use {arrow::array::StructArray, pometry_storage::RAError}; @@ -59,7 +60,7 @@ pub fn load_nodes_from_parquet< node_type_col, graph, ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(e.to_string()))?; } Ok(()) @@ -143,7 +144,7 @@ pub fn load_edges_from_parquet< layer_col, graph, ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(e.to_string()))?; Ok(()) } @@ -186,7 +187,7 @@ pub fn load_node_props_from_parquet< shared_metadata, graph, ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(e.to_string()))?; } Ok(()) @@ -231,7 +232,7 @@ pub fn load_edge_props_from_parquet< layer_col, graph, ) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(e.to_string()))?; } Ok(()) @@ -264,7 +265,7 @@ pub fn load_edge_deletions_from_parquet< )?; df_view.check_cols_exist(&cols_to_check)?; load_edge_deletions_from_df(df_view, time, src, dst, layer, layer_col, graph) - .map_err(|e| GraphError::LoadFailure(format!("Failed to load graph {e:?}")))?; + .map_err(|e| GraphError::LoadFailure(e.to_string()))?; } Ok(()) } @@ -291,7 +292,7 @@ pub fn load_graph_props_from_parquet Date: Thu, 18 Dec 2025 03:44:54 -0500 Subject: [PATCH 46/55] Added malformed parquet test files --- .../malformed_files/extra_field.parquet | Bin 0 -> 2322 bytes .../malformed_files/impossible_date.parquet | Bin 0 -> 2392 bytes .../malformed_files/missing_field.parquet | Bin 0 -> 2357 bytes .../malformed_files/missing_id_col.parquet | Bin 0 -> 1820 bytes .../malformed_files/missing_prop_col.parquet | Bin 0 -> 1816 bytes .../btc_dataset/malformed_files/null_id.parquet | Bin 0 -> 2357 bytes .../malformed_files/null_timestamp.parquet | Bin 0 -> 2296 bytes .../out_of_range_timestamp.parquet | Bin 0 -> 2385 bytes .../malformed_files/semicolon_delimiter.parquet | Bin 0 -> 1576 bytes 9 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 python/tests/data/btc_dataset/malformed_files/extra_field.parquet create mode 100644 python/tests/data/btc_dataset/malformed_files/impossible_date.parquet create mode 100644 python/tests/data/btc_dataset/malformed_files/missing_field.parquet create mode 100644 python/tests/data/btc_dataset/malformed_files/missing_id_col.parquet create mode 100644 python/tests/data/btc_dataset/malformed_files/missing_prop_col.parquet create mode 100644 python/tests/data/btc_dataset/malformed_files/null_id.parquet create mode 100644 python/tests/data/btc_dataset/malformed_files/null_timestamp.parquet create mode 100644 python/tests/data/btc_dataset/malformed_files/out_of_range_timestamp.parquet create mode 100644 python/tests/data/btc_dataset/malformed_files/semicolon_delimiter.parquet diff --git a/python/tests/data/btc_dataset/malformed_files/extra_field.parquet b/python/tests/data/btc_dataset/malformed_files/extra_field.parquet new file mode 100644 index 0000000000000000000000000000000000000000..96d02c52c1c1d865e3aaad8d234defdf1a2f78e9 GIT binary patch literal 2322 zcmcIm(Qe{K6dk;~Y_^SdS1Z|qDh~)LXdkjECShAvQKf+d10`(mHrP;A$YUEY*v7$j z2t9v(yuSkJPVZdU|Y)wf_B!=78N7Y2S;GmMGp`^(UaaTwfz30!h3d5}R{{AMzb;xChH{5&Uc!jCcG zrT%*NeTCU?L;#Dk3p9Oux1@H`n$W)QV^nR`Xw67?Uk%+IlF@>|wElzjCG8T)rJ z;rm$TPcHK-N7-+?z0Um6wQS*+M*^4G5PpAzk~@jGr@4maCw_Q*nYLWRyy)s z{Vw(OH#@0xH0(0WT$yFV%e1RGCWNO~_Ad>qdzNV%-MzmD1-DPhx`1{3zsk~ zDVJ&6A`@*gqg&$5w8>;Vr9#fliEoWvI-3)Bv%A?eSeIU8Uj}#gD25W7SeD-_@1i12 z{I>Bay=MJGjByjb^!gIxeO&D0GRA#e>a)3G@fJR%lSkM1QL_|{p=B%#uVOsKunz8w zM5i|x!}U4D7eA{ycG!_x`)aOnPAaW{xX0Fet4P!j(|*-i%-IExII?X@t$B4g?ki%} zua;^n7BGuQg`p-EvYH~=`kp)>Dz@q+Y;_#lCX(EHNUScE zL0fsQ?#X#wIt5+WGL*d6&J{+GhiV87MVy+gLe^A^Hq};j3YO-yhDM3uK}VGbz>#fJ zT~b~o(nXF3C<+++Gk2;ka=smtqt4G|TXjB@#wjz&or#07SEYGy9 z!`K4rW8W9 zcf;yMeTc*3QVsS9TQ@p8w(d4OlG|q-$JYKF;;dd1OY|gP#X*D#;G@N-L|21vqc=tfMyOR1RGa zB2H=jK|NP!J;K#o6d`nv%1mU5>0w{1Ng;{K4^Obp%eHw^cUz$i^~82 literal 0 HcmV?d00001 diff --git a/python/tests/data/btc_dataset/malformed_files/impossible_date.parquet b/python/tests/data/btc_dataset/malformed_files/impossible_date.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e37569cc64e5c7defef1ff8b892633c6a1c9e3b1 GIT binary patch literal 2392 zcmcIm&u`*J6ds(2P1R_3wJTGoazIFn_K>7DiMMr>%RqvG5;k}hY^W;au?-k(Y<08`gioyzoW+<`$trLGmu||-A&cjvd1&?z4_iZ@6EG(idB(c=Xd#BmFIZ$ z0-?|Jmk6Ppkdt;~8SbQ}VU}Dq35=hlIcT|9j6>is z!GC`3@YB~%03fD#ZV+>YlP3!(dEI#&nPqq0M+VTG3_8nXsj7sM@v+&nJ%f)Ue)AQV zd`8@%IcSHDXOBa}3tpk>7OZ2~Q^yaYaNIU5Yh;hdccEv)C|rRhxYSnaFpIYM&6I@V z&r=Whc`m^vevc*oh-Dkujei#C)HXyH@epJLdE(GvBAbe*vWN!%uFoai%;k@<#Luzp zFI@J|7ecKG_yl01oWrPNJ^dr~(ZaaOe;Qk4EDBvOf-}+xV zrVjKV>B90sE`YLG`N%OhAu|vFK*m_rf1DHb;xo?QFU}&Oi?x{VZ zW2aicPRny$qNtt6*qTZav=wLio+_Ej8R)`}rAa|EClA06)eu>lII$bDVC#98YAbuP zV|b0eRbY77($yYt2u-R>kwl_g%GCnhx6~uroYT;xiKVMEJy#sGN)^z1@;_}+9U4pUt>>k?dm*{Xc}uGab6j*S zr3f5qkM4u`jWH=foJ;rOdI35gtM$FM|5scWXJ*>ciUQdy({luIz6_dvrDysBmOr!P zLcelh&EuA9>6MwG$q~z4zkFHk!~Znjm%O* zFhv>8n>uajwHcK|*Mo>NT7OW_C0dVg)fYtw-Qyw?Szvltml{BQ!joTBM|3IJ-Reil mbYP6mBRkwNgJ6e!n9_?6PkR1IN*%)A!2Cvd1EEv+r}`V_1etgM literal 0 HcmV?d00001 diff --git a/python/tests/data/btc_dataset/malformed_files/missing_field.parquet b/python/tests/data/btc_dataset/malformed_files/missing_field.parquet new file mode 100644 index 0000000000000000000000000000000000000000..deaaf7b6f5ab7e1e102ab0670ab0fc50c947499b GIT binary patch literal 2357 zcmb_eU2oz>6g4Db(`}SSS(!qW2ZXd}D zFrRAA7={rzK|JL7L!L`<+^O*Ll*_RUyT`^4AZ?e8v9bLV=Ebu$X^C|~VEiZ*g>y6# zi84?y&VDv8`0Xr$dehAnBT0BF>JH~1BrS#vpekGf=W{LNrKh}bTe5I znMBMtI+m-mF^1iK9!-3Loxag)2DWSYf$k2TGnIWpMX(*)8;kS zf<=spN0a->vkbG(ZYM=1wwqjGW~1?F{Le`IuSlkr+4^r5CJ(?jm@1j#E?myynPe=P zVaU`LYc46KC4Y*3l*ecZK-fW%!Z z&`mwip`tf(obL}(?-V@ll2rk#`2SmfX<1eUtVvi>&~o~EaFShhU;$XRfUeWf(u4jG zTzN=4HXiE3DOr=jR38reV?uXl4Sl=sk*L5QaYtMlyi3o@FM>zM1Y;80NKsy^r{OM5 z{IT@~Iquu%2%&5EljCcIvPf#90zz$+Z_}70J;KK%c<~m$Xy?OcI8zpimk=5w7zbBM z(k2&pCMU?3zAU-Us3F&~nozyN#ae*9OZ&AgVeMwzF1fRqlN(@HaV)uZU+VVTD$n_) ze7TE-36E7Vj2-(nb_+q{rgQ?BjpbZd=X0E{@{Vz=bg+i(N*>t_*Kx3{v_2%)kPD!# zaIYOJq9K0+x{z(E;-D_%dQgXG7?~O_|; z@>rhaDtR(*v4ho+Q&+{YsVVoGQ0O&^Wzc*1KCJ`nNxWICw(6?VGu+y}F{O3Jbye)u zwQ9>OI^&rKJa!%MNO4Lw@e*ollDIe*ER=5>YLyFd-ZA9@a3~!z5A4-^Tm+wsk9@rY zogb?8y}9drZMc)Wx^^v8L*Lf;+VHVZ;Huqg>J3(F9C*+%wW%REci1L=T5Fvs}{lqr`-OEMY; z&-cG1DU`2A&Qw2rL|5ceCu=1HEmL-&zpJngJg{DqyoLI&UNMQ(Kp)Zu|rNUQL@(ex&!XNY17Qsl{ literal 0 HcmV?d00001 diff --git a/python/tests/data/btc_dataset/malformed_files/missing_id_col.parquet b/python/tests/data/btc_dataset/malformed_files/missing_id_col.parquet new file mode 100644 index 0000000000000000000000000000000000000000..66569327bf257b7767bfd282df40d58669f2755f GIT binary patch literal 1820 zcmbtV-EQJW6dv3ayH%rhwJTFx}AzQ=ta-iBsQ(dZdM zUuw@0LOjdY4><0CV@oW1!XKZo0*&ZrbfF4vPw70J-+zT(KHuS1++_sDPfHX$sa%di z5U@ahbsO-rmyZE3Ozxd!k9Gukyn%|hi)}LIbsZJ+p*gWVoqmMqy%$vR3&Wk6*Tcy1 z>@d>3;05aL!?_Hb#ZeGP;ZV1%xgCZZtUxjQOtpgc={=^7@=uuuBs5i^3cu$Hf8;7k z<;lktfT=>(h=$}5)Q>{13l%2MR1hi3doj1fr7?fb6@JN8ex)jZ=1A1rLflv6A^Ycd z?1jHTcI8RoZwfKhVm{D)OAm`beR5NFe9K-9&6yi}epo&!eg8wXRL-`;6tlOi>Gr1V z>z)nn<*gc*;q=I{U2FIs4q5nNlmsb7>^tVnvdK|D_JZUNm>E;sjELQ{Bi+&?9U}U% z>;70R-7DBC)1f4(r&Tc(Lj?Yz`^#3F0sxT;!5Z?N#6B|p=O*$O<~P8Oiln_IT>i@Ysn$` z4}yWZy4Ls)@bw!LsL=#C?zQ+ztgGjX0ca<=8@>{mZFgj~-Q}iEi@q1Q{aP=>RfX-V z^28EP$v1%9R^dp=YHfN7*oB1TXeXUeS35~x>(tYn{YD+uiBU6C9?Me_lXn+}L%xNS zoLwF*yHo6p$Tz*$$q%&7T33ZQ<(NwGS#q?Le3tYb#_df$q>ae_G;PAPzt{`tS7Ps7 zrJV-NMKP2y#itd4w+DUi=5^ Cef(_z literal 0 HcmV?d00001 diff --git a/python/tests/data/btc_dataset/malformed_files/missing_prop_col.parquet b/python/tests/data/btc_dataset/malformed_files/missing_prop_col.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e05399d4b2873f039ec39d6574fe592cfadd0bfc GIT binary patch literal 1816 zcmbtV%Wm676dkdFlA=Z$B%mr4UI+prWFfmGWu~E#E=HGLgLsiO(vaP+@Rty=*&*ef5(w@nAIlulIy?XILTJfPEFn(N?;4bBI z5(2@3{P~sOC$Am@K&X1^5&D40;}ulAdbvwhdaI*iKBc1|^5jQIUVAAOKP7BJPlj0- z1!?9*$xF1i4(kN=G>ns3mJU7Np9X2Vf(lY;y;Q5Bb$P9%qx`edJq}qaNQGZ>h2L`3 zUiHa;+kdGBn?^G19f5rkdRwTL@}(-`p4`^FQ=5?dF<1C0SN&P4{+{EaS2o=Eai9F@ zHF@C=sJi;3@TY`IwPHT;V&6-PKYVmq31dGvAJPe%MR8i$E`R%7tz217hXM=kM8)*7 z5_?eq>6JSUS77x~7%+eM4+%^8VYWyF#Vii##1Ht=corp#YhXgg0nKZTp?Gy=>y*FZnO{sN*Ka_y8_GzQK44H;!=&<73=B7O_U- z1ANYdJJ)#U0AN0cAy@=%VLZnGz>||SDaUvswiI|YzO*7X8<@QdUb}u7wCwvz+ zi2HtiY(=XWGZGTnEHHZ))_8L4s7h)zcgI9KSBax9WA!v7Q7ap~w_XDl64Mxas-bv} z%4ppl5f_JhO&ks)#)xSj{hM5BwxG7wg1)Ob=oRqfnCHe`lkZy}5ogKiIr`jp?F(0HoebK$P;ck& zYE4MoQ2loIsPEV(H0oW@h0vMz9sQ*5c8~lvo3A`jNyNY-n^__E613fhetR!O+dUmP zU1f=@%r{%WVUPGeNZd@gkPQU&aBhinp?3rvzt1j-hyiEjBw7Twb&bXAaB>;tpR<9al0{sGapgjMtr^_ zno#tYvJd^r;|GVL)1bNdMBMr9Yo2Pd8Ey9zsv6+Gy%@ON3qUY$bs#z9q@dI0{R!h> z*_8ZsrzJ$1f^XT+JdEGK)p)3{_)^Fd`Cp-8Iq{~avmo1`NwOh6((>vfEH6LLQVsqN O{0Gs^BeW0y#J>RW75I1n literal 0 HcmV?d00001 diff --git a/python/tests/data/btc_dataset/malformed_files/null_id.parquet b/python/tests/data/btc_dataset/malformed_files/null_id.parquet new file mode 100644 index 0000000000000000000000000000000000000000..bfe976cb3bb68c9f801b090fb080b40645aa1be7 GIT binary patch literal 2357 zcmbVOOK;*<6gHlSq|+#kGO`O*76@t3R7z6Y#OV-aDqWNCDq#{bwGCAj`2`qk;{o$9 zQTJU|UG~>>(ce&2U3S@@&{fa9CJBMeXr>mvk8{rb&Ueql;sVuqW{o*ugepTb)DA^` zu0EqEO5g->pXc{^F2!-j!pmbW$56};!|s9EHWO#!yGPWEXOEaA9uorMCn*|6Iu@fT za7Zz96$b1OMrMoISpdd{>j`2G+BJq{zPM%g>5Fwx>IVPTv|5iaTA!;Dw+pa-;5k_p zsYHBebSzh6;uN#_oKAeEJAI>f9oVil3^dn&PE~g?ucx5fvORwk46ilQ99YBQ{6K#c zEK27GSUS0zJjqbI%w|%g;@im;WQJyG_OBTGcPt}kHa;$d$-O9UI9}TfO`WirWIUOn zaGbEur*of6jET!PY%}u{o%tuWdqh(!Kwkn3MjC6WE#}lBCi2IsnYQx0OYd2}zGp*^h(RW6!H%uR=zW-q_m5#R8 zL}x8Y$@V7gX|4t4>81Ro;dINk9P|1;29f*gAoK~u$g_>UY2l;Z$o0cJMPKh)Mu6q6 z6=jP~N;+*)QHW>6Ln8?@~4DG&$r2>Dz9dK!gU3%_)QF)NXIx4Y=>h;&k zanz-WKR3R@&#Zll5W0jHKi?vBh@>_uAk;?rHjzov1N{UG&+hbR?R>O{k)RNL8KDV+ zb1)N-HlARQ2e6mEF1yaCS(Fb|p*Ge_a-e(X_G??x)vHOn?9OFQ&ez?#V-@9Dx!Z3m zJU1-oD_vcf^132Mvh2{--9pg3Dj$K)){D8W#^*Rq;TnGq5{ zlZGPp8fvX&mYm5v1}?;)W1cp+vW;U2zEw$Fyi3!ajvMNwQ2Pk3dRFF)61;p4?o|() zYHb$bH~pqE?Ww~3z20f-Ngc>%2QcpA=E);Xsg0WuXPhV9lLMpVEZ%3_aDAtdy^QpV z%s~t~X7LQy3*=!sY{^kwKwLnc3voWIurkz?O4v{@iLeg!tU+*0R}Kxq8S6H# zn{s%5bvUVZk=?@ech6I3s+F0h3FGCtUXUEt7gbgjH1U zl{e62FJX69fTlsX`r#oTr_p(Wzi=MAl2+@3v>__u{sW#c zlP8Z!L32Z2Hjp$y=U570xBV|kbz;}UBk5PNMHhH#;JOilmRWS5zH5*N9=u#)y9GY4 qUQvmZuMOUgtYF{p{eAMmO3gn=spwOcJb}Lg(U1COgQ70r57U1LQIQe= literal 0 HcmV?d00001 diff --git a/python/tests/data/btc_dataset/malformed_files/null_timestamp.parquet b/python/tests/data/btc_dataset/malformed_files/null_timestamp.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b775f4d7aef68b47fd52196d502f92f244b8c0ce GIT binary patch literal 2296 zcmbtW-EQJW6gG54(ruKDA{jxI3xpK3m9nY%OUo*IF=m4?C2T@k+qkNb$2MSM8!xs2 z6ZI9Us#jGXqfgUE=|wMkX3TC9NTQY23d79#`ObHK#^eHZi|jL&VUulGn0_Zrs0ai2}!ab#X@tyGy08?gQ| zN1+mlJKeV(l{slDt5GMh{%(?6!wLZPrt zik3Z!3rh;R$0nK6Z&KL;Nu41mJ1dkmCKTk4MEb8p;m#7FddMSTiA{Vyb0*iQfU1*Dfrd75ize&+XQH+jo7%tc?H*rDV4`PaFh ze>uwK<71!d%rz-J-sD})F~K~))}}mo4=mF*`cD|d;rl`4QHX&((Snl-adkt-iyl%q zW}q2bpg~ACwC!Jya<>%j&%`E!P5l~y(6#i5VQvzb;IXL(ME+F#wZYpY@PNVRGoXE% znp!ZKLQ!t=wuPtKG$K9mB5gXI%n6a1IrXiHOQfaZF?Y=6p>+9G^t;OAGSM-a9n|f< zlV8V|G4s#%x8zw4-y(!A;X|Gu5UL=4h#CkDQGG~dJbzC=C&IIj`m9 zj=>Daep~!I>;z&wnR6cBl$cMJaV=xG4qLJ-6OjJXWseL(y5u?3#z2u5Aj^QXEf) za*^}JdTWeJiz1e*v8?P{*l7gnm3RU=3k$WeR;+QFT(tFacZ3yWwd=@I9oxo2ckn5; zy3hb_jiplVR(0Vg;6?lwvz~HU>eNNTAvEOT+>~mZsqi-8COWE*OAW3wzNGQ6EO9-# zJ2Hebk{8%7`~l=ya7?)gl%awRWBK z=r%qs1J6(7`lzkTkD#9;$oIBxt`ur=XQo1(NuRJQ1$xsS7)^V=>glZKcy_P+L|vtL zE_6qNdenWzm+Tp~$lQq0`Bi1!9wUqNNWA4) zQkB+HlWQTx7>nU~RGEt|M<$uv6NH+poXFUd4@^8f6|&HIZ!O`HeGjLz23#6MrXL=< zr8`7){0w@#-e1IB5Z85oddcqilTf^eUA|~EI|G=xYpBCg2QCmF?5O=E;x9>P0U;fc z3%^p59hjU`Be?wJKv%N7CHV*~nCs3I?t%w1y)=YIpi_(93a>{`9}-9C%^N0@^R(&v j&opvV4(9{U^m#wH0NWSgq(2!@&Wz4_kf_vTrgVp(7}*gX4ron=_` z93g(2MF@$F1Y!~UoaJ`GzRf1tpD)Os_}e41upW)cYoK@^Vt;yTJ_FP{KF zbm3eh`kFvb7EtPX@i;QW>^(#VP)r7kRWiO_K&hm!4@^g6lZf4V!K9uMd#sPTf#sNf zpgG#-(QBFjl*CK#xeX3f0#u(>=s`@$!-1~Kg)0o_eX;J zEs=eb-Tddt=64~(h=oKWs276{xi9%7pGC9)%W+qmaL=sNHv^}8mT4Q^k2u8PyTQzhC_>lL$A(Fd zx}oFEZh$cvntDL(ju~i%7HAOB4Q>1T-Sn-3ySM1RfcyA=8-L}h-WRaO;omEW$}};x zU_61OTxD#FOti_2mc*NBlgW5WgQWiBmFMcdT+pR6(1k5SDR}L?ID$A-Lue?%)NF}4Q!Uz5Th$RQ&1nsdQiKN`RUQCG zu1$4G1%XJHV!cG4yD%V1%&94bsiDeqHD4ZeDmBo1@{m@KxJALJGjmh1tkws8|F66*?#!&ClyhXiO79Wm`O<5mqv?lwG-vZu1;1PUD3$iK$$4l7uXN9Q c6}_0!ix*EiekJjT@Q)z=pyN%1PT|kgUr0chCjbBd literal 0 HcmV?d00001 diff --git a/python/tests/data/btc_dataset/malformed_files/semicolon_delimiter.parquet b/python/tests/data/btc_dataset/malformed_files/semicolon_delimiter.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f0df9a5e3ec6d763be41f0d4f2b7e2b39157cab2 GIT binary patch literal 1576 zcmb_d&yLzg7@t@PN~JwSk&Lj!0TG4dz;11Wvt_qA3}^wX+BMJd=qW-ketCuvl0{iL zm84Titcl`<{KJLVxg@r9YY{u&9e2Sa{wZH>g4z?l#FwhC(936Ep;q}CA&B6+8V4)) z-~osDheA_9l~QD{T;JjWIC;)h&IsMwn|bW{ZWLR7c3^Ye3b zT?K<>PN)ji2CDKWf`Upah`MT7ifOGuLqXz zf_VMg>R1Qk3(uv_{NL*r!O!C?%mYXQ&)zyNvm7LTn0+F)WbN89!+kfl94odU-XNj$ zW1iy9wc_m#p!+)Y$j;hjES)g3cDrrLwD#^U^0om(W$8>j6YC!VJ$nC~JZmuyRZg&` z-RKv^MXUVr=zBH}%O1w~Elf6k!nlpQOWenJiF?Z&>vsRarwqLM#H$!+c7~B#$$1sy zlo7BqBi$L}m>|B2VC$Z%K26NQv~9?fJsD185{$hYuS<+~=~DHN8100_*Qh&~-l^;D zQkTR??Tyw%K1f7Y3ak}*#P7%EJM|TC9vO7jmedg~U7~hNTM+}FTe^}uhIVJj{f#*s z0X{w_t*K}aVM?8qInXlGnB1uz^KFKve%KhYvLgC>hzWZ$|FECQ@xmF<805s6E~Pgz zO}^B>kvozz?82NZX1cbq{pp>ZsVwGnrYoD7FcF3(pq}mbGqgxT> z_B*ZIPGC|yn1n=b=edp7SIM~7PSqfQ%V_!Idg*4_XB literal 0 HcmV?d00001 From 8fbe73a4f7944339094766d58ac1839773451799 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Thu, 18 Dec 2025 04:18:06 -0500 Subject: [PATCH 47/55] Fixed CSV loader to return the same error as other loaders when a column is not found. removed extra_field parquet test bc it didn't work. cleaned up test file. --- .../malformed_files/extra_field.parquet | Bin 2322 -> 0 bytes python/tests/test_load_from_df.py | 92 ++++++------------ raphtory/src/io/parquet_loaders.rs | 2 +- raphtory/src/python/graph/io/arrow_loaders.rs | 7 +- 4 files changed, 36 insertions(+), 65 deletions(-) delete mode 100644 python/tests/data/btc_dataset/malformed_files/extra_field.parquet diff --git a/python/tests/data/btc_dataset/malformed_files/extra_field.parquet b/python/tests/data/btc_dataset/malformed_files/extra_field.parquet deleted file mode 100644 index 96d02c52c1c1d865e3aaad8d234defdf1a2f78e9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2322 zcmcIm(Qe{K6dk;~Y_^SdS1Z|qDh~)LXdkjECShAvQKf+d10`(mHrP;A$YUEY*v7$j z2t9v(yuSkJPVZdU|Y)wf_B!=78N7Y2S;GmMGp`^(UaaTwfz30!h3d5}R{{AMzb;xChH{5&Uc!jCcG zrT%*NeTCU?L;#Dk3p9Oux1@H`n$W)QV^nR`Xw67?Uk%+IlF@>|wElzjCG8T)rJ z;rm$TPcHK-N7-+?z0Um6wQS*+M*^4G5PpAzk~@jGr@4maCw_Q*nYLWRyy)s z{Vw(OH#@0xH0(0WT$yFV%e1RGCWNO~_Ad>qdzNV%-MzmD1-DPhx`1{3zsk~ zDVJ&6A`@*gqg&$5w8>;Vr9#fliEoWvI-3)Bv%A?eSeIU8Uj}#gD25W7SeD-_@1i12 z{I>Bay=MJGjByjb^!gIxeO&D0GRA#e>a)3G@fJR%lSkM1QL_|{p=B%#uVOsKunz8w zM5i|x!}U4D7eA{ycG!_x`)aOnPAaW{xX0Fet4P!j(|*-i%-IExII?X@t$B4g?ki%} zua;^n7BGuQg`p-EvYH~=`kp)>Dz@q+Y;_#lCX(EHNUScE zL0fsQ?#X#wIt5+WGL*d6&J{+GhiV87MVy+gLe^A^Hq};j3YO-yhDM3uK}VGbz>#fJ zT~b~o(nXF3C<+++Gk2;ka=smtqt4G|TXjB@#wjz&or#07SEYGy9 z!`K4rW8W9 zcf;yMeTc*3QVsS9TQ@p8w(d4OlG|q-$JYKF;;dd1OY|gP#X*D#;G@N-L|21vqc=tfMyOR1RGa zB2H=jK|NP!J;K#o6d`nv%1mU5>0w{1Ng;{K4^Obp%eHw^cUz$i^~82 diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index ff266bcb99..867f8042f2 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -522,66 +522,38 @@ def test_casting_btc_mixed_directory(schema_value): def test_malformed_files(): malformed_dir = _btc_root() / "malformed_files" - # missing time column in Parquet/CSV/dataframe - with pytest.raises(Exception, match="columns are not present within the dataframe: block_timestamp, block_timestamp"): - g = Graph() - g.load_nodes( - data=malformed_dir / "missing_timestamp_col.parquet", - time="block_timestamp", - id="inputs_address", - properties=["block_timestamp"], - ) - - with pytest.raises(Exception, match="Column 'block_timestamp' not found in file"): - g = Graph() - g.load_nodes( - data=malformed_dir / "missing_timestamp_col.csv", - time="block_timestamp", - id="inputs_address", - properties=["block_timestamp"], - ) - - with pytest.raises(Exception, match="columns are not present within the dataframe: block_timestamp, block_timestamp"): - df = pd.read_parquet(malformed_dir / "missing_timestamp_col.parquet") - g = Graph() - g.load_nodes( - data=df, - time="block_timestamp", - id="inputs_address", - properties=["block_timestamp"], - ) - - # timestamp column can't be parsed to a timestamp/dt (malformed) in Parquet/CSV/dataframe - with pytest.raises(Exception, match="Missing value for timestamp"): - g = Graph() - g.load_nodes( - data=malformed_dir / "timestamp_malformed.parquet", - time="block_timestamp", - id="inputs_address", - properties=["block_timestamp"], - schema={"block_timestamp": pa.timestamp("ms", tz="UTC")} - ) - - with pytest.raises(Exception, match="Missing value for timestamp") as e: - g = Graph() - g.load_nodes( - data=malformed_dir / "timestamp_malformed.csv", - time="block_timestamp", - id="inputs_address", - properties=["block_timestamp"], - schema={"block_timestamp": pa.timestamp("ms", tz="UTC")} - ) - - with pytest.raises(Exception, match="Missing value for timestamp"): - df = pd.read_parquet(malformed_dir / "timestamp_malformed.parquet") - g = Graph() - g.load_nodes( - data=df, - time="block_timestamp", - id="inputs_address", - properties=["block_timestamp"], - schema={"block_timestamp": pa.timestamp("ms", tz="UTC")} - ) + for malformed_file in malformed_dir.iterdir(): + # currently couldn't create a parquet file malformed with an extra column in a row + if "extra_field" in malformed_file.name: + with pytest.raises(Exception, match="Encountered unequal lengths between records"): + g = Graph() + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["block_timestamp"], + ) + + if "missing_timestamp_col" in malformed_file.name: + with pytest.raises(Exception, match="columns are not present within the dataframe: block_timestamp"): + g = Graph() + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["block_timestamp"], + ) + + if "timestamp_malformed" in malformed_file.name: + with pytest.raises(Exception, match="Missing value for timestamp"): + g = Graph() + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["block_timestamp"], + schema={"block_timestamp": pa.timestamp("ms", tz="UTC")} + ) diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index f79c90b5ab..d797ac3c12 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -10,12 +10,12 @@ use parquet::arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, ProjectionMa use raphtory_api::core::entities::properties::prop::{Prop, PropType}; use std::{ collections::HashMap, + error::Error, fs, fs::File, path::{Path, PathBuf}, sync::Arc, }; -use std::error::Error; #[cfg(feature = "storage")] use {arrow::array::StructArray, pometry_storage::RAError}; diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 813ac72c63..75099d6e07 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -607,10 +607,9 @@ fn process_csv_paths_df<'a>( { indices.push(idx); } else { - return Box::new(iter::once(Err(GraphError::LoadFailure(format!( - "Column '{required_col}' not found in file {}", - path.display() - ))))) as ChunkIter<'a>; + return Box::new(iter::once(Err(GraphError::ColumnDoesNotExist( + required_col.to_string(), + )))) as ChunkIter<'a>; } } Box::new( From 53dec1baef8e7087a1cfefb7e199cf6ba1533955 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 19 Dec 2025 01:24:08 -0500 Subject: [PATCH 48/55] Added tests for malformed files --- .../semicolon_delimiter.parquet | Bin 1576 -> 0 bytes python/tests/test_load_from_df.py | 132 +++++++++++++++++- 2 files changed, 129 insertions(+), 3 deletions(-) delete mode 100644 python/tests/data/btc_dataset/malformed_files/semicolon_delimiter.parquet diff --git a/python/tests/data/btc_dataset/malformed_files/semicolon_delimiter.parquet b/python/tests/data/btc_dataset/malformed_files/semicolon_delimiter.parquet deleted file mode 100644 index f0df9a5e3ec6d763be41f0d4f2b7e2b39157cab2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1576 zcmb_d&yLzg7@t@PN~JwSk&Lj!0TG4dz;11Wvt_qA3}^wX+BMJd=qW-ketCuvl0{iL zm84Titcl`<{KJLVxg@r9YY{u&9e2Sa{wZH>g4z?l#FwhC(936Ep;q}CA&B6+8V4)) z-~osDheA_9l~QD{T;JjWIC;)h&IsMwn|bW{ZWLR7c3^Ye3b zT?K<>PN)ji2CDKWf`Upah`MT7ifOGuLqXz zf_VMg>R1Qk3(uv_{NL*r!O!C?%mYXQ&)zyNvm7LTn0+F)WbN89!+kfl94odU-XNj$ zW1iy9wc_m#p!+)Y$j;hjES)g3cDrrLwD#^U^0om(W$8>j6YC!VJ$nC~JZmuyRZg&` z-RKv^MXUVr=zBH}%O1w~Elf6k!nlpQOWenJiF?Z&>vsRarwqLM#H$!+c7~B#$$1sy zlo7BqBi$L}m>|B2VC$Z%K26NQv~9?fJsD185{$hYuS<+~=~DHN8100_*Qh&~-l^;D zQkTR??Tyw%K1f7Y3ak}*#P7%EJM|TC9vO7jmedg~U7~hNTM+}FTe^}uhIVJj{f#*s z0X{w_t*K}aVM?8qInXlGnB1uz^KFKve%KhYvLgC>hzWZ$|FECQ@xmF<805s6E~Pgz zO}^B>kvozz?82NZX1cbq{pp>ZsVwGnrYoD7FcF3(pq}mbGqgxT> z_B*ZIPGC|yn1n=b=edp7SIM~7PSqfQ%V_!Idg*4_XB diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index 867f8042f2..498be21382 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -523,7 +523,7 @@ def test_malformed_files(): malformed_dir = _btc_root() / "malformed_files" for malformed_file in malformed_dir.iterdir(): - # currently couldn't create a parquet file malformed with an extra column in a row + # couldn't create a parquet file malformed with an extra column in a row if "extra_field" in malformed_file.name: with pytest.raises(Exception, match="Encountered unequal lengths between records"): g = Graph() @@ -531,7 +531,61 @@ def test_malformed_files(): data=malformed_file, time="block_timestamp", id="inputs_address", - properties=["block_timestamp"], + properties=["outputs_address"], + ) + + if "impossible_date" in malformed_file.name: + with pytest.raises(Exception) as e: + g = Graph() + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["outputs_address"], + ) + assert ("Error during parsing of time string" in str(e.value)) or ("Error parsing timestamp from '2025-99-99 99:99:99'" in str(e.value)) + + # csv file raises exception but parquet file doesn't + if "missing_field.csv" in malformed_file.name: + with pytest.raises(Exception, match="Encountered unequal lengths between records on CSV file"): + g = Graph() + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["outputs_address"], + ) + + if "missing_field.parquet" in malformed_file.name: + g = Graph() + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["outputs_address"], + ) + n = g.node("bc1qabc") + assert n.history[0] == "2025-11-10 00:28:09" + assert n.properties.get("outputs_address") is None + + if "missing_id_col" in malformed_file.name: + with pytest.raises(Exception, match="columns are not present within the dataframe: inputs_address"): + g = Graph() + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["outputs_address"], + ) + + if "missing_prop_col" in malformed_file.name: + with pytest.raises(Exception, match="columns are not present within the dataframe: outputs_address"): + g = Graph() + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["outputs_address"], ) if "missing_timestamp_col" in malformed_file.name: @@ -541,9 +595,81 @@ def test_malformed_files(): data=malformed_file, time="block_timestamp", id="inputs_address", - properties=["block_timestamp"], + properties=["outputs_address"], + ) + + if "null_id.csv" in malformed_file.name: + with pytest.raises(Exception, match="Null not supported as node id"): + g = Graph() + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["outputs_address"], + ) + + # in parquet, null value gets interpreted as Float64 + if "null_id.parquet" in malformed_file.name: + with pytest.raises(Exception, match="Float64 not supported as node id type"): + g = Graph() + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["outputs_address"], + ) + + if "null_timestamp.csv" in malformed_file.name: + with pytest.raises(Exception, match="Null not supported for time column"): + g = Graph() + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["outputs_address"], + ) + + if "null_timestamp.parquet" in malformed_file.name: + with pytest.raises(Exception, match="Missing value for timestamp"): + g = Graph() + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["outputs_address"], ) + if "out_of_range_timestamp" in malformed_file.name: + with pytest.raises(Exception, match="'999999999999999999999' is not a valid datetime"): + g = Graph() + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["outputs_address"], + ) + + # not applicable to csv + if "semicolon_delimiter" in malformed_file.name: + with pytest.raises(Exception, match="the following columns are not present within the dataframe"): + g = Graph() + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["outputs_address"], + ) + g = Graph() + + g.load_nodes( + data=malformed_file, + time="block_timestamp", + id="inputs_address", + properties=["outputs_address"], + csv_options={"delimiter": ';'} + ) + assert g.node("bc1qabc").history[0] == "2025-11-10 00:28:09" + if "timestamp_malformed" in malformed_file.name: with pytest.raises(Exception, match="Missing value for timestamp"): g = Graph() From b961c2ad88751685fc2c0fa5bf80570ba23a9157 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 19 Dec 2025 01:51:11 -0500 Subject: [PATCH 49/55] Added tests for compressed csv files (gz and bz2 compression). --- .../data/btc_dataset/flattened_data.csv.bz2 | Bin 0 -> 11920 bytes .../data/btc_dataset/flattened_data.csv.gz | Bin 0 -> 11623 bytes .../btc_dataset/mixed_directory/part4.csv | 51 ------------------ .../btc_dataset/mixed_directory/part4.csv.gz | Bin 0 -> 3119 bytes python/tests/test_load_from_df.py | 38 +++++++++++-- 5 files changed, 33 insertions(+), 56 deletions(-) create mode 100644 python/tests/data/btc_dataset/flattened_data.csv.bz2 create mode 100644 python/tests/data/btc_dataset/flattened_data.csv.gz delete mode 100644 python/tests/data/btc_dataset/mixed_directory/part4.csv create mode 100644 python/tests/data/btc_dataset/mixed_directory/part4.csv.gz diff --git a/python/tests/data/btc_dataset/flattened_data.csv.bz2 b/python/tests/data/btc_dataset/flattened_data.csv.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..bc0f6e04d9c6259e2b33b04b4963ab03591b2ecd GIT binary patch literal 11920 zcmV;BE^pC7T4*^jL0KkKSs=xA5dalCUw{A*Kn8#CKi_}wzyJU6U@H$@^}%6$tOGuF z=AL`5HD{ZZ_kFj%x8C1dh6 zPMzNGUOTtEy|-Rnt(aH2JEy(g_VZ5O_ivA&54*j(zBadZP8P#;*4@^vZuaj|`@ZwX zZ?~@AWTz2RWDNiS0007FWW`MiZ9*!i`eI^flR&_jL=rMJQ}(Ik%3&Z8Q(%}0fItM$ z6vQfdPft{Sricis;1d8N5s)C2F#ydGrhpMt{2BsiVl-$1nlfoqc}J3-r?n^QO#z_< z28{p!AVC-sp47%rfuE$md}C>gN9Ky|RAr^Lg#B(EtNcs-_XTu}w{Gx)5{{Njk z$YQ^LJJji8{TiS7^~c*-m0x#6=fS>HCrgC*6}iuE+=;fSf6wh2KO<1T^EIf!ggDXM zMip>dZitA)&7_|)^zFu>ODuoa4w%}lk@*ptmOGg0sAo>L=`(l#+oP`M;rJZ>hu{6n z|ED%N%gC=OqkW(P0FIG|gJ(M?tGADgn@u&kOXt!zelUXgW3{NqnmzN-Ort?wi}L- zvSLZ~P46lcvRR1`yk!T1jn@{jPlvt?0eOF-B%H!>$?y%zrpyWnJPh?p^Q`4b3^PIO zcoB*{522RAT2R2|V%Af_!oiI2)ilwg3nMy$XN~803gXW?;-2{E-$Z zAnC8{fD#4Q7YaDY33)mY0RU3n*N2{ZU2Zkj#hJzG3%j?oBvb7&vJgp)HKCszJIt*V402&=S07(3)cKLD13-wEStZ_Y8zn@Qsk+kC+d3QJ># zmn@pTIk-Yw-)<#~j#}z=TXe{xWC|r_j1lDwdOKl;3lSPT8*r%yiz(}-&9YX6(ml4e zYKj1th^RarC{8t?Y#V4r=4#?2i}?kNl}VG!XR&rA=cZ7h?S#c;#A%Y31oE!O@&FM6 z`2Y|F$N&rgKo_b22SNq%2htEANCALA!D@s=vIJm)m=r`-tbkvGQ_Hvz#DGy-W^ zARjFoYjE%&MUVrZzt7iq`o3`gf6njCn|~d8osD%1vI_lJ0w#&ncm`kHQlETl-TwJG z)OZJKF^MO4o?#nHQ(TdHeXT#N@E5~rBFq@Si%9uq1a5oZuc?&5~h{}HA9VX1yN`TV;mnzz9~{~&t7-&x*uz-&QhrXxBB&mG?iR?-Btxv zhFOj@Nx?eQI{DZUDaLKt!jidKz?Hh;`r<9=vPMz(GWu7oqeLZc_unu(hCo+M#m zr3~M-Z=SQXhA32Pe9-%)s&fHs$`NiJTS^zh3w`(@o+1t?hyJe!1NFnuti-;=9 zz8BmGIEL6SsB^SOZOo*Z8g6xw!x1w7!p>Dl;+-7m1?m!?xtIt#^2Gnry@TPsvAd!I z)8FlHy@9|{!uXxlvn^qN3xS%E3lb!lLS8$Q#SD3Y4nXgZV0Ccim9&W3Q)gyV*WJ zYDZzcQ|W(g+j*02`OZ-D$rmDnvXX+>>GDs+8}1X$>pufZ3a6D`sBrGXu0PHBoD((QCG~K6Nbp!{~=? zt-1~_SPnK72|&PH?`T7-F`O(-yR`zeB#PWD0^x*RrgqL|T;QV|f_(DVLU4|jOoNp# zT?u~{)P-V>oI#>;X@DwsyfFRFgnL#(=ean&U9Jh<#H!0W9wFMF45}Kc+lvt23&kk% zJ=%<1e+MWNp+KM%s3%1O3quJ>gf$uwni?7z8X1rT0XxW4WLiC#b)i;BrerF{{M-7y z{@cp7*Uk!6rD4@xPm=ALfCH7KY6Uqt3%VYqgsPsiG;bjA0a;exI()d%1kw;8J5Y!V zs)7geeHcO*s9}`giu&0F(l~(*nPn60xf1C<0q70%0qg&E-~1bjgS7jtWwf3;e?I=| zY=_$1lV+eMo%;#u^T9>rI7v{$?cq~*5%1(HYJMa*{2~+j>Mw~8;iy{FYLj(AFc`F{ zQ_g~~>iGDv7?Kkj9ElC)MO`?=FjA6b?M79!H5YacogE+_~Zzax!u1wj)ocTmQw_h`aa%Jyxa~DC=VO)3+s1P2B z<>;Smk1WPcl@Hz?{~W1`GO9;iOw!@Sbfg*xLT(RUVGqm}42ry{%1L(hm#@T=e!mXE zAY(iVZh$X27Y~_zLDWy?jj)#m@MU^^8RPyhf6=K7VOBS6_LdjhU?c31rGF{{@q|5> zxE6zc#kJ>Boclpd>FBlgO(+O(^Bq+ zptT&Fl{m8%&+ar&i4A4fL=co|Czor(!XUTn3k@NrVqO=z8S+5aax{9JrB+|Tm&1{! zY^-U;FMbzUb~;cf_Wduf9ZNF1GH*Rvp-B=e)OvzQhD_7oIUC~?#m6jY%QOO`ArYaF zV-T!6Og^g8+=dAKUoCuvFW*AOhG^gM~YeH^XE2TF4h)WaIxd)Nf7ze@qM`DtYs^ZIT!+ z^VHfAXV101ECMxp79{`gYU!^BCh zN}cKBy$bU7tsTxFTqVR(^d8F#u7q$iCKHkyltq);3yup3^aR0-vdP~KWRsQWJkPm; z17{v6PLrzH)A1f{ER~wcsBe3<-G@tbU|rp^&o^?8 z;gI3Y*LabF@A-T7@>VU59rL0i0cJ&-kg11mBQv6SVO|X`C3V+zN|iMag$c!AL2Pd~ z2K|#lg~Qi|Y8vV~Wd<5O`4ML&IDvB{pNo5|sMz{N70qzJ4(i~PqI3JA@oSZTUR;O< zkOFW*?7cqN(~K~?@oWT0V>Q@LnEI?v)0e%gcZ%J|_ui=1l0bLwv}vc0)^!2~Tbx+7<(^T|6>X!{@c+502ha*GFnw;W<~xe^y5wAwt-4B{1U6YI~9LiV7SI9DueJxbUSKD5X+0hPO=T@IugWRCtEUqq^+7{R%Z zaK}_1ScLUkqRuV*pIa`MJFbDm74695e;B_(i%%||NEH?8nN>?L(7>R|!y+abGKOun z_bQm9RiuB3*KTtCz}V{(3jXl*vjiRn23l7fd!Ebe!;u( zHS>0aOuw{0#f?4a^P{hS%RKPpIb|)?{tJ&9sa2Kq=@(qHf;NLmx=}xBVA{W{KOM=H zC<%LB&!qNT5tJbuDnBNGhAJ21IBA_1fJI#Hi!@@u@!{ zePxuI4E==i?^H&=#c8oK9})d+qP8}V8;~kJ75L>lwHorT603^jExT>3Lk;<-cMP`b z6L3ql`FM(;YDpKfP_y){{JYH9bwM-ghkl6z70SY-pe~v{kj3NnDm3USCcw`{q)cfr ze{1$Vml^>x_^86Md1OGnF(f$H)SQR4+u7E_?j-5;l6l=a&VIPL1y9Lu$Rp+ND&sxP zP^h<)0;M!!b651fEt+4P6AH~t7|g5!^RcF}+&kvBxb8{`n4N>MpUJKh=T>o9uJQ}$ zRit`|Cx;v3Ol?4j%yzEdtq8i@5)|I>`vt6L+e0h9X|!HDr`mHuHsc7{-rV<3=I;jH z^rd@vH+1bnjX-{0#s*|ZO>SmUcHXr>9;dVfpBtOC2cg!B83939=4Hfv?v-ux%rSis zu<&HeH?LbxpUI=qCCdmG_=v+%M1FQz5Y{6ajh^nj+Bs3hA__vh$ZN;_nS?4ivyNun zUXg!~4tPNh{<*_H7hW!+A z^YD}yGR{7>>8G|Y5`P=MWx&38No+6Up5(=}*aXxmyAPk+HsKU({8yy<`Lii3_)AyT zU{A(70~(8MGH$~S^X52<2}LC`WaNA@QdYMBT9Mc_^4#L5SnxZAaY;LQVT3K8cYPy- z-R%5*JC-X*RD0&7aYH|r*U?NlE0xqrgU_Oh%9P_1Y7Ji=n5VzeB)p}o2f{KLjYhs( z_XuT6qiZt~&FpF0yO)}!lKJjcsArwLiGymo_nz<;;@qbb7nY}J$?=v8j2_fD>BI$b z-&8EjA+}Xkj-(CoZOW1%fqAV$@~>K{4|4qwIOtd3+kPtxYMdAR~<2qsT&fi)!2&YhPrFEi}UhBVE3v)3b}E z8fi#!59#MI-P7{`dw(qk z8$a1{s}WTG{a~Jd!jMV+(KC{4ls`E_)1%k$3^2bf*ciFM=QeqApjD4ct9zrY96kfBA>*D2OQ^(uz~yxk zJZPsIsjce4S{!WFhLT3YtYG0q#$VviY$R=AWtJrD--?IU!m&H*kE7*vb(7WOaNPl0 z6KSf1w{T`O!(B3++~tLEF{0jf5{2A|dh;V@*W)QM<)O+^*c8dTCi#@%b!f!~%!o4= zwvm+>%sQ!unXI8B=2!$1tU_kcg|-urzEy*L4n3;k zhH50ZZ{HI|L7QPI4%rTfzm)DUKgaW6dgXt?h{XAA`luOZ>qyMX}oO93ve=2gCnTk% zgd`cSvbB!HZ2D8t5f(uL&BwsugKtYc&X8O_`#FmrsrUHxf{X3=we8kton9CXx+-l0 zLJ^*VZ@!=R`!S3VgHR%d0R#~>>>|&s>?CTjirho;A9CB!1lX9*Fnb&Yysoz=7Zv+< z-?JH2*(d8@exwgwe@|87dY`F$8=%v><_Pr#Lt}VDkPkV+d_|5J?e8k7>7^ie`7Y2V6)IqPV6HC}l>x|-#N ztKA?Ve6eo{Oe?#KWg$pZMCB6Z8(q>kiF$3jqqdpqjZ+nx5G@R{2=b~r2>c|(qC|n7 zCj_9&r?ZXHpJW&+c%OhcF_x_69d*rOSA8>2p7I5Fl@azihECs>U|j12etnG)!z#-VUBZ*u>huW#IzX+6==}C*2sU0iM)743-H+4;aoVOvX~d`-FT6up zt#paZHha*s3yiZ0+j4u|R{YhAU%*tFfbQ~#)MO&2oD)j>EzC?<&tqkq%f%$F77p`C z!_*7irPpN3VKmX(BsYeF(vU|6V z&s&9~PQ6NM=wf7PG5rBgqy;={(l~>wx;ttV!1|t2f^^UO_qX520yuU?)kt`ld7gb# z&!T$2CoBg9NIVGw3w{9tziCgV7iIWi>ifnd+)M!pTR9SGt3AY%Z41|8 zqH8)}K}pEXb(vml?M5<{zbTzCj^zbQKX|92EPu+ENNIRTPToCpd)?PtcDt!T51>PQ z1v$xEid_s9=@_skdG%WxrWAVvvuW7hH=&bHj5tFfJ|VPlL44=XM9YYu(F( zmiSHl3MM==Vn&*8{AJ$0=4a!NKSk4a%VVp8(h4IcPC{fLrG=<@loohfE4HGNK`GyF zzQDZ6kSpG1_+2poV)jr`05F4y#3-#WwfbXz(HfI$1h%X&RH=d_6Yc5zcAVVhONedo z7f2|Nd8{`zzX(WGM@sstsCjWMsolq(sxt0S-tP+cE%WE$+F|#kae$f!gmQ5*o*AaL z9z|JgEK41jRM#F+doVH$hjrQZtRm|J zLVD4V_p`!$<(yO)iue#x7lO0j3l0JeG~t0z@*kN`AqWd{ zB$}JTw%=Ug!8b$CS*VK9liU;+YWFQQ1sWo9h;!(18)$zUbR+N<3!dbyFz~rEqkaTa59&`>Q>T_j$o~D2jIJ8Doca0c)O&+KYaF< zoWwZL4Fo?(^**3CkGU~R)Bj3I18sR{p%rO)0sZm+M~D?O!}=HuYO&|s`# zrV)I)Vr*!H-hu<=RMZUSQLN8j0WJccqd$a6^cXEpp~!=Q=b}GZ{KYm*ZTLhNWmq;eE$sOF6^bwG?Hibc z(_=Cf)n%zT3PhSVqGBSERG3 zA;+G$@?Jy{lN+YT!)n5o{48~o>xSgK5IP2_{S8SWWje!OGPI_~w%J&sIcUq0BU7y@#08>%T?-$iiGhaiXk||Z!zwR>^Nmh4B5MogH>xpW?Q0lRVp2$`qY=7T)!KrAn7V;kZYy6 zfci*^GKbJU%@YU-{yx8QGZLrBeS6v@WorY6<)!CF(d7dhz(8I%U&)FKNn zO5WafX;4-d^Fz}3jeTrBI}k(*OuBx$i$3zBJICtChJB{_U&vML)&mYcU&&lE=dG_QKJ61RbtAAc;?1pHDge%<1-^& zFi7A{hbT+4EL~qt!zo{8<2VJUOMv-*2NtgKUqf;2E-MX9T}4qF*AniJ&$0OJZSW;2 zD+GE+nsKiXxN9?k@XwDIiKOnj0Ux9UIPXgOte&Z}ukK8OU zjVT$}#N2)nf~n*^dI*s3p>*;r@YyDPW!DxY~=SsO->Z-S$B8sI-sWXp>ty`DLKawbrmr8a4jk| zqa0rZ6$)U=`C0-Xu~i)1r>Ic{hBur>CXBXM_tY)CijpIe_{W|sKeBHYKkun46r(4xzq%iQ|PdMX-vcL<`nZ=W+uw& zXjx}~C~x^o5tcgm?-QY;${`)~mm8iS(x8M#TJ1=dE>K3sZY%2t%yDuqjk*z)_paW& z5wyz18uAmRDUPniw*3Egi6UQ9A*qcFRMaMz4)&9N{H+ic=0R|zrZq6xw^qg)Xw;DG zFs+g^4>emFseve)ev5ATsU=LzaRq8|t3_I|UanA>_A=|N^el%u)Tgj>P#vIVYZXM6 zL>Luv(@UPoY@iIvx<;<(y&h8e&TI!2&ZZ&NcN`06Jx@3cdNgj1f}+4&-_J^Anhe3H z^&FFZ)}mKV6i+bvJ5?hGITNXr#@>jBTjy6N@>J#scQprI_hls^&q7oX7nN4gHdWM^BbUcnyEnHF^qhbS&95ys<|er zK}p8eDO@3RJP%HBsB+q|wh9d~MNz1Z-KT+%YFB7KD5)7T1BCQ0gBNu(gM_3P1;Pgu>G#nlJ2XZd-~3Xw*)u+QVdn_ z{Xs-`B2IpLc8-#qm^ME3V{=ksF8z8Wft{*Di2{FHo?&#KG={%9fki8pJ;c28l?02r zm^gZiTp9G6*}bT!%kO_-J21#Kk2TOlZz5CIHo&wvt=NY7B-!;%U8m>Se74sSJ*SvO zHRW@7dL$<6fUxOxSPxykC=xq9Uii7kM^6A>=337yK9dncL_C5n80qUWJ7PAXpH9Tnm0frM`G^?9X)YcBw;WT3D>!S8u@am38 zA06F8cRmtgLKp6IW^U2WXLDhZs3T}5Jct`T9$sj6ikcr_h?^O7F7FZe0dZnP)7Zf7 zTIjuy)F<}Ci72^3;QO!|R_HI>xu(7W1Gu@RCnmjS5ue{tASD$OGCHfgX<&8ZFjM3h zse5G{!C7XjqbG<;U#}lATjK);8umx0`QboRR$d_|!KuFohdOiU?SSI9Zh^L4+xX`8 z9JHQ*@s-_bnMwA~UlufUm)_63J#O_roi`U9G4I**{t z(FPWdxPT`3Vd3de^-QDw7&c$brIr zzPn4YI-yty-L4%>zXqyo;p1MutI?XZC?g#AK+!Iyf{4 zjn?&$U&*NCg!Zt!2124|fhf^&Qk~xfz#X}}^CIF*lXeYwfil@~ts+NJFGG(Kj3#?i zbFJfLYSf{D7#cErkJm78Z6=Kin)quPD>(b?zIug$kKcKCMh*We`o?yukL5=6-7NYDJ5DF# z)$rf9jCC&vkP+~ZwKrfH(x?&wrbO$5$enOEWo_sKR3bElO5YOC({eLlIykBoNC{h7 zbG{Lxf}`{(2a z?#jaM@DPCKE9tMFD-lIzdCa#Z*y3!FH&`BNYE1UJ0?cUU&(~PH=0%6;O|@E|GiQU+ zKXYI+eB5V7HmFgW$S$b-t6(zjS-mZ{j$oHV;Rs(ZiQ+M^UuWajkM@N>?HOV>4usH+ znz&jumAZu!zRakThM@q?@ZbjB;Yeul%1Zv#(JYVUygY;?i%#~dw|`sPu($D#PMV`9 zV-Wgt;-YB1rcf9G#h04zy}{w+J1x#{*DPAYFbgT(u=1{TUw_8`$B&NHM)JCBd+pbr z+<4p#jN{|O1(0v^0xI4Sw~bYXP8TiDaX1mkQa(Q5tlR!PhKG->n~b?0*1QBewqEbh zC2Kt`7GY=zeIF-s9VrtOE%f5gsaKT-?t`>$(H~P|+17@gYS?1GwaY0y9_8ni*yK4Q z#VA?W7QdZ0SY>`S3|_`7u*zZ|v8TP}N)8N8+P;H1BTv3b>A)g0=(Y{OPPEl$&I%>3 zFwte|ifr493L0rJbhC(YKA0xzj?7?Xlu+k~Wu64xIB|fE$W*u0g7Oz7pW1?eUCe3k zvJOw^Xh)VY+-8^|uO<7h2S!JdsFTwS!|}DOBhf^z$Zx`ots-(zL}c(J@xT-VtODKb z@C2d5#&6ows)>0Tr0CS!g)mqN!f-#epLKlWEjuUNHJ99t6)SIyfcQ^zR6KMVNeq`? zIEJfp2kC*RYYW*WU?$nP59iCr+9_T>$oqq`YU^Et^XLmkvVd&CTd1D={bi-(AX}~e zCD2_pB!jR%FH<;4!xP17NdJD4P!$6}#1Bo3Vy3kQ&7h0WnyQazLFS`ZB?lM90YmIk z)JkS~vjXjfb(sWa(;2r~UX^fQb~J+xQCj)6=ead$q0t4Dvn=!bw#G+P8Kai<5zo7r zQu=-Hu@@Q>g4k0{)B~8RB(zK$N{NBi-RiWGeHZDI^RMd4uK6HHk*H;B zzIu!ppn8n!)&|K1{&Hs5v!mLOP`^|&pigS`Who#+d|)73**B+=MEvS;pn`!!bYPW! zXTnS7QRk-S6zSiQP~3uDEa@tM52xSP(Z(*nXCC7mO0Ipe*sGdr^v* zJY6?eYp$-RDLw!%;?#tEYLMB`CFzX>47D&wJ;9M(NF{6%TiHks9N7YQCHI-yqtiI` zp9En|&dY#CP~HK@04Clbl<#=w8?;o}#Ksz5f_KtD@Snw7kL25T&LSQ|sbQIM(%$w2 zjXn9tIXMJ2`Q>as+?8S&F7eUb4Xj^6wo^|#Z#YjVaVX3TJf0=y|-;eJZJHutV|#JR{Z1Fhu4V z@pVx*y+gE-P{C6e@FE#>d;|q`4;X9{-yDk>k@t|%xn0EvkhYU_=>kx(!&EaMmI~>& zbq7*(vn#to0*GTV#8yxo!I7Dn{2-xuOU98~M6>GS?^f%BEyw{Y5jaOlpjclX4}bJr zLHsJnKndz_1u%Pkgl~AKx`hudGR@69%se;9c2V6Ht%mf)7(Z~&!Qv!nUNC=@qhEyqD{GEm zw@UVrwnl8;f0;+xQp&PVKk1%=#vn~AUL2W+0OKd#7Id!79;-Q~|6ujKmjYR%UT>C1 z&YYGzOR&p69f5QeQF?~W(Cf)(-UB)q8Y`zj@_7%f!w1Q+TE?7lCrf_E&-6d*?_m4K zugb~eFW4t;?eQ=c5_B2CxQ@8-5bbe%OjyjS)fpPr@#%`=+&!hl7^HgCLr@oY!e39W z+uQ5wOg?SWdTxDB%EEX>|3&Kb-$r@wOzYBLmojQxA>U|Uwr)o+2cAQ>DOAK=Gm#%Carvbl$gTs8c*RNg&B&^}MZ@)@ti0QD+_?JXdEZYl58#fV za3t$a2U`lsQb@KW%ev`+01yyJ&;UUYho^6-(lhg~-Oic4qx+^Js*0IR0^hgR`T?Nc z=3jjJAKSZ?VPn>PdS6`J!vA1GtTKd8zvA{^|NXy#4&eWOa`N9#y8o-|{sR49-SFv+ zpLodr{3ErG+_;%AV@p8AI zvmaLu$uHl_lktDoSr52!EZ>5b%4PrrS?|9NxseSQ3LbhLKIG~`Cbn5;1>IiQ9OgOT>c zEQwAQ8pYx#h<;43(wDolH~i)0Hh+D+9(Q?lXTjtbwV{Om@eJ@U1pdP4?>mq@MI<*i z41iF%<*`SlK+l0IsYA~9V)sS<2bUhb6YgT(Rm6^Y1Hh;N4G@E*l-y~T!!Y)Zn$4-N5L^`~V7yKK3j*&%*YKe?Uy$Mx!!g!^RN-P?FIlArkL{2zBX!GFR2FNpuXgPz#hB^JsYMx9za z^r8b$g3VWM#%ktF(kc!GZ}p_aXxX;K!YFRt!99-92Gst6N$!`*>p$+)rvKzG-2MIW z)TdQWz|v**vzTW>SeyBEu$rJy_Gu%u&hap6NisaFewjtYdInWNw+*dLb}FSrVl<1P z@gH}hf8pUT1pof{f{Vat+8HCA@u^#Q9^e`&-{z^KfD#PLQvbeGYQ1y6MmRw~E(2bN z1edWld6~y~Nk|MLGX1ZkhyL%XUR(FM)-qlSY<+=nj@d!IcV(MY2m z$H8{X)Z2Zq3vi>eyhtnKvPs$Jf2a*;{nMW5Fi(MbYY>^CnGx7|EuKL%`Z$Rsa$1Ce zBWC(Pxb*0qCS+=8!?6&tGP+8{1KNzNx}qr7p733TE`K_?`z60V-(Jb@=Y!hQ$ro|D z*sI68_3CAKf4qD7b#qh;W~-RwX=9xR6HL4-8LLq;RSbw|^3-8~^FcP&Bv%A6<~ol< z0jXREI31oTmJnEw6o5vjg)itLoFO_~Ak9PS6O zNVJM|&Jy&F)k7ccc7)zZIB$LHyS{yH&%0Ubv_L}C0(|GoT#ojU%zzF|uDb8>xDsHy zdD)@fcEiAwhRF^bF!mo@dUT$Ip}h9g2^hAHX;O5-^JaGh+HV(K&V?^@{rl3g5Yp5+ zO4^~dq7_nGB*8lP;#6m%?}v``|2lf4wbI`oH%|Dbo6Q2;4m<2XqI1@?PH0$-S$$1j z#qkq-xO+gu`I$T?@D;A`99+PF)q4AU^ZM}k_2K0DUo-*0zq;7I5|r{Cd2(2~+I%ba)o%Lv$hXNJ5YxrZ)dt zlrTaYaT@1gU1Z+l(dw1UG_1+?POGtWX{a+8MUW=z%zF|np#`Ud3sgS0i>Q$)w!V z%ZpOut1MrH$~lnFzv8cHxNY&^bv>%hfn(p(C~b$DM~b-0J`NqiK&#yAt}8>Ejf0L^ zo&ls(&J>%RRbMcnFf_3?bnlEZ3o2a%e|(mc%eNX|N-K*KoU*d7SZx&{3&jqt}kEQ z<*%2eo;={Qqh3SlIBU`^%HrNv7d78w-l_wYcn(v-APNgVoq+BB=2qCR=ilU)bMy86 zeWY)T!p|@LCH?VuWzzpgf=W72zBUW+LGRR0jg$~YATc!#YN?BfGyk2{b_g|+wFQ#} zEw#rM=Pc``}`JvjVF6tqowHnmfLgub)@&VN7=wa zGzU&_b%_owVFWVN$u~Wb63R3->26qZNQ53qkS*%=7Eb7}t{Tm-uIosp8psSh+)|6n zN3~#Bd#4J>K4ycQ?v-KNEOAdo*nKjqE#sUI)!n&uOeFSDA!mlYw7F5)HA+BvTN?fJ z^QHhhN>OsDv~hO3@l zU8Hzn?OptNwBBrOUx0@u%OQ>kXW(nGTg)@Z*as?vXwl%H)hU|IST|AGJXEqwB;adN zt+VmqX6lzHT+fcWLY?9=tyAk195<2@H&YveF=!QZCOv5Z%5Ul*A2pi6%ur>vYO#^E z=CVv34~_Wt2KU^x{5u|1pJjHW0NzG8SQJSf7gCxP>@CH{K6m}t=)U+vxaYt}AtGAh z)-=U+idQr_*$j@vDSKnHA>i0EIZZY(Bx1+ibfN2Z0|Dw>^R!i2DH|^_zDh>jA~8Ld_jHPC%*6badgrDd=`I)Z-ehea#w zrqxz9hLtU5A?p|o@+H248WsJG{xWiLhYJIf1FKQ z%%x?_a4o}ZNU4n=WwVE-Wkmux&93_K;NZQI^K8WIu_y|3Ixwynv89_@R+u^)WpV@^ zUDE-fF^Jwq&FF%=AOP+ZQ7+R2Nee(L#axKri!&l5S{hsvS3B3Y2$!VSvGdyW!?qXR zr5%}LqvujjT$ug1bE0+rm(FYQ?yVo%f9kEVf7nj0_g$q!Awx9#tbmuYl^ z#+ky+xXxZipY|almt#N!oJ2b)#ZoLle!h{k*A13vPhbN-OfC6|*&0 z40Z$kx&giwg(ZirxpSvy)W$Uow$0?>QlQ9^{l0_HHHBghWn_p`EnE}?W@}16wz)mp z1>3Ofsu^nZ*34_TJSpqtY`u zPgP*#!k$rPN8J%Ma*TRVtanwQSVibz^?YhAQ+ys9?9p-e1Yy}m*UPc#ZxBrg)!&TX zCfB8=@*$E^YLm7;^ltNosNIco=xUJ4;b~1>Cau!#wnpH2_TBd9HCFiLWo!udM(*xz zPciuM{p`$Kms@)cH;VPwZx^?pPCj3Kfp_}#{_Bs=?6Y&kH10FQ4sE z(m~Fys`I3zHbZESo3%T^iY~}1N4F8^I;h;()*Ez#r(fgu%a}|1N-4r`L@qw%k$5LAB9v)wId4(v?!KKIdB-ot! z4hK0!$alVpC?-FG0$Nu^u%RK%jY|K6OOI;Ce~@!0FMv#oXc`6^uq4o=I5kGD{nGQT zIV^B;pnrU(lhex!1m1>|!uL~tyPlsvoLu7f_p{euzwYmc>|g$kOOCRx-6E!43$C-!t z9DF@-=PK2+t2=(dpTC|k{4M^Hqx|8EIsJBMtuZjsr?@vFlDy>>)v%zaO&p-2Tfd~G zsf77sa)wtw&BH4z+wI+Z{{EVt$i*b+!`XulpnTyDer~`0EXL(=`;v#N+g)AP>xU(O z!_Rlhyqw?g>H9xdX;1!YL;qV{GDfN;x2^iEv9K?lutOc1o_W!{ZHH3y3J#x62oayZ zetY#l(l_(+@+{t%{_*vl+W37q1;$(-B^bBqyyqkwTi3xU90rkR^s#L*($<3Q!5h#( z#pi7|nG#(TntTSZQ~H1NtmJFX|!RVVFu zn1Id_+d?zNWKRk}lPBS1>KfsXRrZ*;mh0xD{xkW6d@Cn6tCki>35sMv8e=h(*W}bN zh62eU3xzG$8RrhHtMB+|=dlre)9Z!QX2&eBVd>)0Rm1kY4ZDb8&9RE|bjMRb z{0RC!RF5ay8sc<&dR6<6<_4!%`h|?Q@8X)jJwIMQnsao^!%+pbAh2;=_g)e*kf63H z>@6>=ju6k;$l+ikET16y=}*q#k%S;y|i>+kRWETQp@q8s)6u-=CW|G><@Z>JY0 zFZ-vHucTbR(?@RVre5feUWAic!QJKO<&j>BU#In`>We|C!Rx7oH3!HP&CbD4IT6gD zQg`BmiYsia=R@2Xv+I~kjYSoLqj#P&RIgGHa ziR!nLn}}}C8U;3QyqV6gUzYWGda9Rq=5_o0^1tpo{y!_h3c zW#_wXBDA5X*4_55*F%`5z3)lfe3hTwe$m}JrBBZO_M~HfMNUI^krR+KGW=q z;#$wpbkyphQpJO>W@N6ExY~i)UF$=kcNMh|x=G%gT4KIJEezR0Uu!oNI@pg*@bG6x zJHZb(Yfsl3@zN_g*=uld_T#g9`|*CcF8ScCCs&YHt0c=lA2xrW9yYOX#pY>;MY4p)9F}g{1lzOStL3&Eltj}9<>j17 z=WXIPljGGh&j@I3w}rY?Nmylef$V*?VDk<%(8|bd2LWi!!e|2tWQrqn*PB`=mYoo3 zhP9JSw9%KxoZB%?=gOOi*#O|YFJTRa^lalyaRsm;Q=KQ;(wN!iDAGFP+4Ff%@UWg@ zgHtB3?8l}!lcFNhpd6oP(bDqB%=)p>?A5e35gMX06*ML@yT|nCK7ds_nHzj}4XsdD zxLZV4gH~$wB6FcOySrwfC;eUj{eF0QCqHh!ZWlkMKrY5~dwVOdAJGr|cyV?#LlCSH z8i%b(4weZoRkWNlSr2h)j}2l}yXg;gt?M3JU|qeX?LnGKRpHq{`gI;s!t0*uMuv2B z9#=JYCBhojymPgRpl053Q57KyvUH%~oM(p`o*T64ac3KH-Ke@{SSxeT=e0zH!)#*f zKktra2_ys_KxKtmmjttyb0!+Dgh*F3LVo-TDKa}ezb0Gr^vsl*2F|WVIRYUZgfbW?^NIycS9z!4h9E5=M8H z_G3LtZ&H+H&5H0a?<9AF9lJiya00SsgOFIpCQmbJXt&MVjCojbU`-7D0nK2jVqzN> z!^b8^D%h+Nj34?M*lP6*_8_-zr!Y@+8JB>69CKH`AEA zdDtqP;l8_h>G*$e>Cqe(o0FAR=kvbOZnMkeCP38Q(Uyx1wf(4c#A;}^4(|fmfjw|Uo8Ck>W7P``eqo#{nO)H|9@gZ#BiP%q#Q{;F+)SKjbiWs zp&|r#g{7Uwc>GQ~np#hlhE&e8rPsPy4k#f}HpB$YQgI{TLl`&70sn=ipp`jXQq4{E;#4*?`Yz7X!ky_EGZ2)m?}Cm~ z8~npo2`))02g^b^U6%GErq<5NsIaq&+J7JUbbMRO404yb`U)Vca(hRe-|; z*XUqIt-%jNVn&Ee(*e>V$kdB@+EAzaps9Br)vi&t8t_s144$WvIE5zJGi^)jtUWOi zB-_EP8l-1K#Gy1t0_(V;1`>;La*T2S$}f|5Zh~gtwu0fZ(guXhgS*y2TKi!3Uj4IG z|8aNRua?z`*GbY&4Jrkt+~S9ot;>RJ=Grdxusu>86&kEvo=QSiE|G3}nEGXc-eJ_) zVrPKhpSSE*;MM(^61R8ncl(q6@%{S+7N_t2?hU%vi;Leokt35v+G%%H(tb|;2Qawh zs(Qy>Zrcif_Adb1`@{Rp+wN)CYlhGBW+_X_Sj_!;P1>!cHJ&+Mxrz^ zE&B$r%mJ3_LSQPvupt4DuCi<97)$Pi zCyN-D-=*1Z>G#F303;l@qE>|nUR(cCzn1~A-g=hV#1^YlHFm7c@2v84{6BHVU`QjV zjnNrqD3(P50q0h%Ye!mi3ky)IZ3A=sd^pLqDW{?YBbl=WmSFFT!KB$ED|y-%T=XY+ zcK3#-yO9iU7yhYF-!7kC-abFwkRPe0_s?J<$BW-F+@)sEexgljU|595%g{mul)80O z6KO9GE+1$MnKNxnI|>-OW@^1+eN1YJ(C{Jhrud7QRyk}Rx^GmI3bM~DbVsWQP-O(F z#I3n%L8pW}>^dKCzb|NzOzS<2+ozK+Hk@A$;_CTCGI%ZvD^BNCzK0)=UoLEVdOLat zVz*j9{Bj)2&~Y1E-B4lB-jbe1h2it6H3ACHXmEqUqXl z?Wh=RMXP)oCz(j!Prpqc&w&Qkz^mUiEgJ=c3`_tL5Y4>Od9LXANp7@fo>M~tH3+^I zADejZefzeB`Uy*tn(b)yJDGJ;A7v#?vn^HK9=u0%vTKVhMZ&yqES0o3Fm66DEgab_KUCR>eRoL>5Ol6 zy14rB{`E7teLq?qDG!^&#@0Ht;rsYq8R*T?+(j9&C%}H3iT!kfe@stqIa|+veY?Rs zyw?V?n_ss^o?ZPy@45ec^b`uU8A{=(?r^{j11L9L5~v~U9@~iEP}9^BS(L61Tj;YS zG1Qnz*bl4ut`R{(xU;p#3~yuCLLAz3!o+tUuM2Fphubd5#{y(E5`*G9vke3v3reny zSwJnpPZ@eRbU-V)iOJ(t5wzEaFVZdx9!aC}Jjy;!{qES+Er2}Wi-^tBO4U)GiH%pX4)05j2h0fS8;=!x|L*6MJX#|DdD0&t6(+LRqXFUzJzJE5`>3%b1{~4u|xL>fxrC}JwdHxC0i$3PNpHa8&a3_WheM7KrgopD-tNwURhV)%G zbT&G$m^!my(=0RYX0U7~t|%U5-0-nmNLPu6!FbV#1MaRxl@^1xzn2zylk@fOnrdIo zh2BIRK`)mn2oQ=x9fWMNjhToUcHjz)UV;VnX2eFe$c0Ve{%D=G^0b7kZdt}C@l=wI~D*JpLJ_xszFzo47^cDa7JUpPNS zM>DQ#r~pW!8=xyPQt*)B%+rE9KAW_)xh(AC7_QE}L~SkriN0|UNN77QOTk^2ZwYM-@U;BL;=PlS#HX1*wND&p5#|>;X?hIwH#VMv30~Ui8 z9}A+}mY2hR_tK_7L=re)3FVIiYlC6MzmaQL^XlZV? zeFu@JVPSIGr~~K|{Bn14{=T+fqt5jD&c*fZd-={z_KQJ7Y44yq+H+-)0&kq|It*fB zf@nRZenX8ci7Be%HQVX;W&zC0Qqh!PTD!ih>qJv&DrhPVD_l)*TmLWTp#N*`Ww?=5 zmRW@v5vJEFOZ#Z;~6_RVf9QIPp%C^*CCXAt70T=T`TDlr5Xw_6n zo1vqrEl4@(ycuoxhEy}hw%S&ZV(v3Ha%!b$l2b%`Jvpd2Zu;d-)N}qjSGXN)Vr~W0 zH)bOY%Ry_{IA+MH(b=^5iuF>G>E!dHc(^^+kvGm{cW37Q;^#B` z255h-zggS;sA$;$S!fG<`+8Q)7F{X( zFlQ@qp)eq_3~eGh7$sCm?&|o;Ln{LcL}z+oD~8=3^ZfB9DmG}UICLHEwx-Je{-wv? z<7=8+Sq90J4bPMFlMZBw`)WsRnG1R_{d!2zl6<_9jTf^Q&)Xx9piqT(Ujr9|DZ!-H zI@3|gL!}Qxg8)7ev2$shU53tSA=hP5t=$rHUL6R%Oj`y^c4k|bj*i>paxnYA2exIJ zb+_1+3ww00lK6ckTzXY)7JlWc0!wWSBw@4vz7Ls!RWeRwYh3Sp>0;;AB=$l5Z+`qJ zL2z91I{3*2szx_SHRRoGtfB!eSZWkLw!41s8#O{hRpF6hId(1(vV`40+d*sQWmvg4 zQrY9$*^iA8yk~iCX<-DcjsX%;X4=xp$V;E5B!9n+Hy~k#uGq1siPAKRhUvL{WTHhG zk`Ys7JH9~OA{qz6SBSQ$1{4#svR`%Col>$>g-naK<L$II(T(#X~+n^|$;*A@J3kYRQ+93=wnl5lah|0>v7cn4)#V& zsd4S>VNUDB!nBj7A@1-@X;qsoEOr{OwNo*Lo(;SUY}l=-ew{Z;K=XQDT)uBHBdOJC>Z|9HaH>U@35CAWsJy4IuG_^EUw$^%Rp{qckKy@8+v*?8a#zl(^jbkE67UPy-w_!|jZZLwR zRqn7#)OeV!4W;D}7@Edukwiym{6K;>Ni>~jU>^H8p|pVLu`xDKV4y}vtH>m@py2)d!mbki5|-3jX-dhro*1T`_pR{&TEG^FZnX-I zs%y^>u(``E^|w6S)p44m7)68}!Gu_6#spmW1RuVAeb18&r`&Tu`t^OPZ|vK5{XKnu zF5>(5&&N08lq);MgtQQ9)*gjER_Wo62RTsPZm$gn?G9gFzdk)!{^jxgErh3=^XvD! z`z;RcMdlav>p3oeeqZ)$L&9I{1NkEF;`#9X9 zygGps==b1k0KebgNh`Mq;8viLP)r8^U2bwaOBseOCEP%4?8;I4YXQ=VNy}uKr+0W* z_~e=b&Y9JHrBjg#(=gA8PC?0KOoD+>V?U{K=s}mqs^TK)Bt@rYv2W1OT4T!tsPU@P znvJvt2iTB7xVSGWv?&3Q9L-i!OC58msl{s<=NO2!;}Aqw7ts>BcP;eypcW&ox(ql9ADqs}V3OIyP8gCq}SpxmkKsGYS($FZu zd$&yqpjP++n?+(oqdlCKqyto@J?9MKpq=uT6qt>&VOh z5nbQ_!89>Udb%%o6a!x*Kct(XEToFbm|1rf7ZSV^duj%iCaLk-O$_u*9QB6|d2a1u z1)_rY)+|G_m6jdFghwKUA|vW-YJHf`uffYx`SLkBLLM$ytL&>UJ*(vYWJvFFIZ8nP z-ocYYQj1+$m)vz7P+(#xtlhG#v)IP5C0vgnjLScFtp2%DZbN4cHQP2U0tc)2kH0)2 z0OeI~InT>nr|5s*39@GQq>ZH(qH=BuoI21nD6w@;f_}gmh~2oiPVM@I;4YAaqov1B ztqUdsUq#tsGE$0Z7}vhb$u2W?Ya{qnC{xX4;AnHwbsw9@#Omx}XH|?mYzYeELXU1tl((jze#yG>*KgwT_}eZ2_Aar*z?dcljRrMSvgN^uxarLd*q=Nv1UK(Wv}*VM?k@W(WA&%{VhNjZM!3)_tMeTdz{%hV{2% z{d<@GpZL2Apj&zq)!3jA05pm*OSG$~iU3Ntq+427P5fx}{|7rdru~qSOKVL=o3tDW zP;u9p9gnePKDag${O5Z9KfB!2GVaYBRt}@ZE7P;E z5^Ysvy!L#mx{2qw<`lyol^Ay^TlU^@p~ue)h$<%=x1|ex(s(PwRFV>c&(zSN{nFSo ze>m$5BV8%&_R<)EXeRp|H=*wlNlh0!qs3TJ_ve%T5WKz0hmS+qzwYHNNndZ|PqEi< ze|L8A2H!4E??1u&usepnK5U;yuzefe;q!I6{HCrR!k7E&ejQK#IP)jZpY4x_uW@+) zrSC~gvFvTR;TPBOd@2+C+~ePWqsIx=0(5B^xQt4TY+KjVnsQ-baiT1@Hrt& zu#Pb|KY*gbAf3!+C(+ok^@c5S=nuBrHwj8%sY1_u_O946xk*@r*0K}AT7~Uh@BjQl z`IqrKLZ7!+{do3v^78U<|H94tv(GNRwNBynphM#Zsxw`hQT0kssHrOP>(D|rYN{4w z+`EG1@jvj^NIpjcniaOZFzp7|C8D?j3tEq+xvZPt#^0FCjKnO$ToV^|Yx2i8oIn1^ z(dyZV93mDRKf;Tqej6I~tyS}-eQ$F>Rng=**6rWozB26bVHs*Jg^rX*u)2*83j&kg z;hbB4xtbjA8SDLg{J{R4j8Ep{Z4TVRZdFj*%?y@;%^TcOl4NzQhxj)$4zMTKf|j9& lkJkVdY!=WYjvI~!tOAi0{I~xJ009600|4>0OkTWG004u|vTOhV literal 0 HcmV?d00001 diff --git a/python/tests/data/btc_dataset/mixed_directory/part4.csv b/python/tests/data/btc_dataset/mixed_directory/part4.csv deleted file mode 100644 index a529f74dd7..0000000000 --- a/python/tests/data/btc_dataset/mixed_directory/part4.csv +++ /dev/null @@ -1,51 +0,0 @@ -block_timestamp,inputs_address,outputs_address -2025-11-10 20:57:25,bc1qhcntczjrk7n83736ww45zrhhtxgwll023qy3eu,1ETdQMChucreiaNBjPDYV3R278ChRG3c7m -2025-11-10 17:23:00,3FkQ5nZWyHs7u63PgafTQ5jBK3TrLDcfRx,15qdGZi9vDYp7cADq2jqzpo6WvpVmX7d4p -2025-11-10 13:42:54,bc1ql6sgtq0uwh67un03dz4nt26n739qyu2xatgz92,bc1q7tpvm5d0yyv9lxme5y73s46n9cv803ue3gwevg -2025-11-10 00:28:09,bc1qv2gc72d0j8dmz530vuxlgyj25j4kz5s0050trj,bc1qv2gc72d0j8dmz530vuxlgyj25j4kz5s0050trj -2025-11-10 17:59:43,bc1qcr4jzax2wjyt5lkpqkhs6nuvrmfhmapsh9j8rp,bc1qe6asyl5njvyqc39qf2y7g5vqzscd9jysjwqs0k -2025-11-10 18:01:50,bc1q4y8s9l2yck6dvcejpmn90phdernngxcjwapqge,bc1qyktx2nxpjtrn07ftkpxsjv9c9atx7xh0nmrcdm -2025-11-10 14:12:11,bc1p6qvr6ceqgzmxdqrcrg2nzq39e3sd288sxa2reac6e285qq8epu0qgkdv9g,bc1p6qvr6ceqgzmxdqrcrg2nzq39e3sd288sxa2reac6e285qq8epu0qgkdv9g -2025-11-10 15:34:09,bc1qc5yxr9qkps7gfpkeg9xvptaz494n5eh4s00eru,bc1qcq5r4zg5f75ef0ps66nsvj3m03mjln9rcqjegm -2025-11-10 20:57:25,bc1q28f9lrqqaxly2jf2azfs36dyl7r6efcy8wkwew,bc1qz94dj90ymf87w77wfdpe7xyq6j9ngj6x4agyt2 -2025-11-10 14:56:34,bc1qtjuc2dqz34tkzs4uwame8rhyvpgge0gy6knhmy,1HYadqXeegRjnqDAYBkj92o5hF1pPJu4sb -2025-11-10 20:57:25,12rLYV7AQfpuxZpPXdfUqZCs7VCNp95qq8,bc1qu8xl7f8jkv2a58j0mas98r7gwecqqmnw6lkwt7 -2025-11-10 20:57:25,3FkTTwxagg6p7rs4d3GnQpZbADpatufQzo,bc1qfyfqzgvzxw2s2w7vms4yalys96aeqlq4rm0jxx -2025-11-10 17:23:00,3NhHCdt4RYXPjVQiYiWyRRyqqP7ik8SS9t,1636RnPVv6j8mTyaRmprjSpQCPAjD5UGiA -2025-11-10 17:23:00,bc1q5u709x2l7lsleprw264k5xj4rpmmmhhrurpkq7,bc1q7p54e7uarkxjlgc6qzwmugn7ygvmcwss262l50 -2025-11-10 20:29:45,bc1qrz8a6d4z2xnd2e3lnkd45v9jc5vd65t6a0pgzw,bc1qz4cfzstee7f208cxdca8v70ht5fcv5lypc63rv -2025-11-10 15:38:35,bc1q769n0hz7a9j038jdkwwd4lq3xwrkaskv6y8sp7,bc1qwrgtmyau0h2ar0guws8a9c0sa9vmjzqrlt7620 -2025-11-10 14:59:57,3M2nVoRZJgkxNHG7W7zp48xob11mbCTCKA,bc1p2d25ns4cf85dkk6jyytjeg4fcfv22lja6wwe77eltpwc2zd2yzqqyky6jm -2025-11-10 14:59:57,bc1pq7s7kpp90z2d4s7hzfxj32n72acd0987z3u2wm55ltst9fwelelq4emgpv,bc1pysearh9me9sa4kfkne6hu3jq96shjwdtrja2lf8uk4wn2uq5jjms72j3h4 -2025-11-10 17:23:00,bc1qjnmp4gxpp4u056dapqngwq2asw2ty74htrphx2,17Q8GjyhXZcf5RMmHGU7vznVvKpwGB88PQ -2025-11-10 05:47:18,1Nvv7ihqwz6Hh9rG4Bk64K7nGT7y1g2Wa7,bc1ql5zu6awgrz7wwwq6369kvc8fqz24n2ts6hn7y2 -2025-11-10 15:34:09,bc1q8a42mx0xfeyqy90zfkludfdu3c43w4a0jfw2ps,3QRLixkesAcqc268rikzbaRShSwSVydSSE -2025-11-10 14:56:34,bc1q5gpyv756638njr84s8uzeq3v59e97ha42hj52s,3FZLUQmcFTibssUKCJiNPtu9pXexXgoVun -2025-11-10 18:01:50,1LtjGorQ6FeNuC9S1oThWZQ6b79VvqH6Xp,1MqBMKLfsYq1xMwvwzZe39VAkXV3RYmNmk -2025-11-10 14:56:34,bc1pwjzpf2p4drax2mympx474phluyzjmpl7udnw9hmx43hxgc5w28vspms2c3,bc1qwrcm425acde6757923pxjkvlees9tww5xuqf0y -2025-11-10 14:12:11,bc1qrjkmdkhewjktx059nckpyqqlxazvr7kyeg57sllw7ksgenfwda2szu8kgy,bc1p2pxfzgune7ked0gldvt3j7zzusjv4t2uphhaf7pqg3srqdpuvngse97236 -2025-11-10 00:28:09,bc1qk90plyzwtzweulus6mmf9sd0zndplwqp26u8fy,bc1qeku5u5emyu6lgazyd6zntmfvkzffnk4yxm7l559hl0rku600798q533jtx -2025-11-10 14:12:11,bc1qmlj36ml4nay4tm5gj0h4u769uwpww6ucyqf0p2,bc1qp7ekeam9lauvcmltkhfxlyzd3udzmd39syyhh3 -2025-11-10 14:56:34,bc1q63qkttxua3aw9umnqqptkw2468rs5jne547umd,bc1q793gj5eml2u9hpgqm6y3xjgmfu669xy0aqzfa0 -2025-11-10 05:47:18,bc1qmptzs6czc3mlp6hy3932kka5687vn5fd9cnecu,bc1q9yn6zdkjjlh0z5y6sqpdvwq7pwkeh5r0ka28ad -2025-11-10 15:38:35,bc1q04qcmmnmd47dc8mjn9fcvxcjdk5f6edg6f40mw,3FxCJ2XUyFEup66QDaxgcHF8az6P19wien -2025-11-10 12:17:32,bc1q7t4vyehjsexdme84qhdgd4dawcn54djh0m78fz,bc1qcu0wrnx0002g2ka4sr0nnrldtff6dvq433unh2 -2025-11-10 20:29:45,bc1q3ms0mj7jtt9nd5smhv50uvd27czetxxlnlzkuq,bc1qcu3a5u765fdhddzccjy3k02uzewarlwd7yyn82u0fv42508n650s6w5um7 -2025-11-10 00:28:09,bc1q6dadscrdytuwjeedk9fr80xwmnl5prqvhwy7aga4k3fmxwhzvf6shuzpmh,bc1qdfcstw5dcud0dusq3tscs5khczqc7esn7psa3p -2025-11-10 20:29:45,bc1qtkya7nnflevqx2tgjajycw2gjl0y4w7626lad0,1ACE9sy42uw8Tns84KjueMj8koezrcZRdG -2025-11-10 20:57:25,bc1qm34lsc65zpw79lxes69zkqmk6ee3ewf0j77s3h,bc1p52jjweup7chageaggu4cj8jl3avylha8zrr7lgqkth82tancdrxqlhnvzm -2025-11-10 05:47:18,bc1q3dc2ec45m8s49u9rlv9y8ruyr644utc3hv7ncm,bc1quf7hjdq99rldlyqmxaz9sd3unm6j5fv6yulty2 -2025-11-10 15:38:35,bc1qsh343fpz0mtlfl3k4xzu5qru0uprdyh786lfsu,38qZsSoHitwa7XDicxT8xewyXPu2VAvhhh -2025-11-10 17:23:00,bc1qtgvxhy2armx5xajkfmdks5h8y7cqmqgwwfdrp2,bc1qtgvxhy2armx5xajkfmdks5h8y7cqmqgwwfdrp2 -2025-11-10 15:34:09,bc1qr7whs7crqjpqshv7qk9ex4742wagf9gnznjulc,bc1q20n8jugfv9c224fdfxe4vgugyd2gh7uaytt9kc -2025-11-10 00:28:09,bc1qnfw0gkk05qxl38mslc69hc6vc64mksyw6zzxhg,bc1qjhv7fcemmjx4temc4d2z500jv5fud2y3rtwwg3 -2025-11-10 20:29:45,bc1q3jzyfvu60rc3um5rah7y6j3gks3m7jffqpw9ef,bc1qc29w7mwejcuklrhqf0e8l6zjys6x4sqzkns2u4 -2025-11-10 20:29:45,3J8dPt32vzUdQzvwXpicG2XFcaG96dnRZt,bc1qre4tzdx5r8ckzhn9ffrxwusvugdfvee9n0v5y7 -2025-11-10 21:05:57,bc1q2dzekmutn0s8wh5ty9kywgddcl7j796zju8aql,bc1q3u7r770vyc5v6hae8v5pdv846wq430jhdfz40j -2025-11-10 20:57:25,32bZeQ89m2oPeM6wLKeYdvzsPNBDb3bGAP,3Psvpa4LQtEf2tR9i3VJwGRgHQsFPJ8rf1 -2025-11-10 00:28:09,1VEmWQLu9iohP6RMmabnKcDJuCkyk3E85,bc1qchpdg4wnyaswyfggfatrrwz9snasrc92wgzhfy -2025-11-10 20:29:45,bc1qlw4565huuxsr03dz3sepexjv6ujmfy2amye98d,bc1q87s3wsnzdhlclqpymykpkdm44ryv66av5fv08q -2025-11-10 05:47:18,3FfS44EtZhTBb1XXQPXcjiVqxpub9gncz8,bc1qft2zpj0wl4zqghk4lad6qr9www4zrrseuv0y5e -2025-11-10 20:29:45,bc1qsggexuj2xdmne5kvj2mnu4ur2m2qjpwlyrtqtf,bc1qfyaje5au3xzwcdmt2ecct5xneywrqaf4p46m22 -2025-11-10 14:12:11,bc1q4k8t9a9jrzhcnlyretxgz4kqc5hlyruvra5q5p,bc1qlyy7f7cu2rptc9n42khv3lxrc3pzwp58aa8qlp -2025-11-10 00:28:09,bc1qlwjqwjugrv5c5wzg3hmtj9m72tqj5mnqeazrzy,bc1qlac25q65m2wjz9fjzqg382txhycjc495gs6ez2 diff --git a/python/tests/data/btc_dataset/mixed_directory/part4.csv.gz b/python/tests/data/btc_dataset/mixed_directory/part4.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..210234f12be4800955ad5f35ba732a75b20d3efc GIT binary patch literal 3119 zcmV+~4AAo*iwFo$^+ag`|8QY)bTlqwb9MlvSlMpeI-0)krxP~reO=y}sC@Pv)KxPVuP!ibLI@STa9;K__92Q+E+c51;;zD6WWFGj| zcQ8pfX1R@jjnsALpg-@1*XtV?IR&CKy>J!7ZcF<#76Y(Bw&K`zxLX(Ve>nQLYT@A= z!Po12L8h`DWK&i=Ss$#9WKP@Sly=NVJ(zy+uSX?eu}pY2TM4|StU#e7-a?IB45mbr zL+tD?ky=8BbKJdNN2QO$1PZrAY+M@e=ArXi$Yq|#otw;j80G8HV%HtayEp4HcT}y` z@6l`nAK1FnU3arvxcGGf(D?zJ9}a(9_atl!;WCWX?IJ9E8Nplw93+ea^nKhJSR~^_ zf*yy^OP{(hTCok||MU7UFT&>#{imNCmwg$6^^uOsTR9H<=2uqHfMzT;YHgBBIvx#p zL3PB6VyQYG37O+&pzEMB(>yFZ7&*F=_yWBerX>WnPX!nSSXB`cQ6{<&sd(wL%`(3p ztpgHf=!#Q4&6H*(yrg>x#0>Lj$d1@pR`9nfW75)ky}p{nfe00%)z*koW={5tE}I`m z*^PzRS-Wy}`0$g7@F3-QYv%Cw;^!M{2JnLQ`-8v!m{5HBZ7v6}eExY)FK(1i`=h^p z5bAq)yo~hz@~a>IsQc?9Qom1B^~P?1;ofwP#i3tlLl+Oh&6!L#uk^PO8du!i<(-Wq z@pOquz^nPjKR#W&^=wV*roOyJP(_70ZgtNff=o9TQDIYw17R{u0WGdG`~G`2`l>%% ziWS}de7H9EH_tzxf0WzX62diow*9wn!+LfgklfDK_Zu<2yDRM0`Ne$mH<#BJ<}Lny z^?dQ$FaPl{oM<=fV4_o;q+c>1$l~3g#XIM;Uee-iphq_H7)hGgVs|r*oP?TP8fRL@ z&G8%~AYj7X->L)}M;vErsXZeCRj|(jNI1<_AiNER<4q8}64v>#{y9^Ks~T4+sRCWk zcfnb|(N3>e@eNPKa{`y`?>Ont;V}{=!`$gg*k z4({pl?();cD-Qqx@Elu0Jr>pyQ_9+)hZI*E0Fz-&yCkJ@T7BlA3Q%fj%vFeSzm59G zCj*VtdOvg(2o$NVYBvzf6fiEjp{ErlkR8U;8rMN(ImtF*(v5z5y{_OB|+vu|nxsnfh zSgz0;OVB4eU)|Ai7{L#A?B#@OA}o(fr8(!&FfrJT_YHUe&0vj0w^;=z*|lc~xV_tz zETOd8Dmx{iM1mZ_j96z>20os_o7>ONyOr^R2M$QTJlp!jZolbo`P+RF-@d&&$#r=2 zWuur7*w1F^QS4Kd0d9DZ1Jy0OHW;)B?;k&Zzj6Hi-Sbn7-#=Y`eExd9rolhR{b78$ zPxEip@$h*yS1-#Ac`v`F%i-JMnMLb;4?yP1E!a4>PyEG2pbbx-HZe zIbs^8DN`FqIjbxfh&AMZ<}g9jw_svI4UO*mocN+Q$9Y7wN)t5ch; zO3fCP-O&l-K~LRc6X`lS^Z`R?*8OBX++-ke6Pj5;tHYz|G)j&3QuaBM#7nEDKYB<{ zrIpML6H6O+(`vP9qFx0|qB;SGSc=y01r88c%P{HbHe)3Q zK1+UdH$$0871o$ZcNG^BgBM$F0+p@Qco-;Jq{R&}5}~M`5ud zQjA2Q&gF)~{qiGt_+H+>OP-LMx2#R}hxa|J9PL#-I=&IEkxzq6lgopI4HAa%7T_M_aJp@Ye?<- zncyyxjFY1$QP8MfM81f!rfj4Xc2G;-?b*!}_Dk4wE|iT;WuS6<)^$@&V^Z}_zg`!L z$2DWYljuZ>0j`lpY&EZeQyFLKBZK~A?yD_~&a;~27Iv_XhMB@M7|qlbl$&sqiHSyx z7nkqam?MyKe>cY-eKJXYF};0}b>WY<;_9Rib`VXG6I^|!Z3B~$4All_iIh-g)fq}7 ze9gsUU`(Tewhd}F%H{_X;-)tfWLudu>+7=6X>4H9Jy_Yy#j=M<&s%jT5dj4Uk|XT; zh=w#Q(>9cTNo64jrIkay(V4rWgH==;5r%e1OeMHRjE#-j=s04qYK|vO>;Kfg|3*aA z5JBZ@=hjJ0(4xI29G282-Cu9BZj-J%;vi67UTZthJQRa*)HKuV}xqHRoL5n$BK>6V5n%TG>!6Pj+cw!>kcVDI9@ z$)GveI6= z7p7@*h;XzKI%<>(J`YkIQ4WSoU6zI?J1x2if$S$z+h*N%u^6hIBS`4Ck>JN(T(1yp z^}D#KTYm7*{0cn0=j>`AV!nM`j|G-!tzySZ&u!I>Jk_%26t^@AwMp5$_lC=_+tT5@ z4ffH!1|X`mF0FGH`>gR=23ztd1fQs(LwjlJS(uJ#C%RDDZKdr9L^IiMxDkDiNN&8? z87)*ro%#J<2A&?}O+PsBaxK4*{P~moDYhD~zrK0<1fQ-huFv3g+&n{{Z`OATthd86 zy#JW5ZpRNd@%{D3eo=?tcK&ex&i%OgoQCHYeNCE+Wl!@b{`OKj7u)<=E!=IQY8aC7~@&GUWOeR^t0 z;q}NvY6aDsE>Eb2QCrklm4szzCL1+Yi!yCp!SZA-v2F5uGN4Ie^9yz}z|9Gz1vtxYcROEB+2ShkM?h39N<8(1I007%LF311? literal 0 HcmV?d00001 diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index 498be21382..e108eb3d45 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -46,8 +46,8 @@ def test_different_data_sources(): nodes_list = [] ######### PARQUET ######### - parquet_dir_path_str = str(Path(__file__).parent) + "/data/btc_dataset/parquet_directory" - parquet_file_path_str = str(Path(__file__).parent) + "/data/btc_dataset/flattened_data.parquet" + parquet_dir_path_str = str(_btc_root() / "parquet_directory") + parquet_file_path_str = str(_btc_root() / "flattened_data.parquet") # test path string for parquet file g = Graph() g.load_nodes(data=parquet_file_path_str, time="block_timestamp", id="inputs_address") @@ -75,8 +75,8 @@ def test_different_data_sources(): del g ######### CSV ######### - csv_dir_path_str = str(Path(__file__).parent) + "/data/btc_dataset/csv_directory" - csv_file_path_str = str(Path(__file__).parent) + "/data/btc_dataset/flattened_data.csv" + csv_dir_path_str = str(_btc_root() / "csv_directory") + csv_file_path_str = str(_btc_root() / "flattened_data.csv") # test path string for CSV file g = Graph() g.load_nodes(data=csv_file_path_str, time="block_timestamp", id="inputs_address") @@ -90,6 +90,34 @@ def test_different_data_sources(): nodes_list.append(sorted(g.nodes.id.collect())) del g + # test path string for bz2 compressed CSV file + g = Graph() + compressed_file_path = csv_file_path_str + ".bz2" + g.load_nodes(data=compressed_file_path, time="block_timestamp", id="inputs_address") + nodes_list.append(sorted(g.nodes.id.collect())) + del g + + # test Path object for bz2 compressed CSV file + file_path_obj = Path(compressed_file_path) + g = Graph() + g.load_nodes(data=file_path_obj, time="block_timestamp", id="inputs_address") + nodes_list.append(sorted(g.nodes.id.collect())) + del g + + # test path string for gzip compressed CSV file + g = Graph() + compressed_file_path = csv_file_path_str + ".gz" + g.load_nodes(data=compressed_file_path, time="block_timestamp", id="inputs_address") + nodes_list.append(sorted(g.nodes.id.collect())) + del g + + # test Path object for gzip compressed CSV file + file_path_obj = Path(compressed_file_path) + g = Graph() + g.load_nodes(data=file_path_obj, time="block_timestamp", id="inputs_address") + nodes_list.append(sorted(g.nodes.id.collect())) + del g + # test path string for CSV directory g = Graph() g.load_nodes(data=csv_dir_path_str, time="block_timestamp", id="inputs_address") @@ -132,7 +160,7 @@ def test_different_data_sources(): nodes_list.append(sorted(g.nodes.id.collect())) del g, df_pl - # sanity check, make sure we ingested the same number of nodes each time + # sanity check, make sure we ingested the same nodes each time print(f"Number of tests ran: {len(nodes_list)}") for i in range(len(nodes_list)-1): assert nodes_list[0] == nodes_list[i+1], f"Nodes list assertion failed at item i={i}" From 152ac279ab18bbf93f8b2099ab0c5f651f201eff Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 19 Dec 2025 02:00:34 -0500 Subject: [PATCH 50/55] Added test for directory with no CSV/Parquet files --- .../data/btc_dataset/empty_directory/readme.txt | 1 + python/tests/test_load_from_df.py | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 python/tests/data/btc_dataset/empty_directory/readme.txt diff --git a/python/tests/data/btc_dataset/empty_directory/readme.txt b/python/tests/data/btc_dataset/empty_directory/readme.txt new file mode 100644 index 0000000000..0fdc877bba --- /dev/null +++ b/python/tests/data/btc_dataset/empty_directory/readme.txt @@ -0,0 +1 @@ +There are no CSV/Parquet files in this directory. Ingestion should fail. \ No newline at end of file diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index e108eb3d45..9227358d74 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -547,9 +547,18 @@ def test_casting_btc_mixed_directory(schema_value): assert dtype == pa.timestamp("ms", tz="UTC") assert g.earliest_time.dt == expected_earliest -def test_malformed_files(): - malformed_dir = _btc_root() / "malformed_files" +def test_malformed_files_and_directory(): + empty_dir = _btc_root() / "empty_directory" + with pytest.raises(Exception, match="Paths must either point to a Parquet/CSV file, or a directory containing Parquet/CSV files"): + g = Graph() + g.load_nodes( + data=empty_dir, + time="block_timestamp", + id="inputs_address", + properties=["outputs_address"], + ) + malformed_dir = _btc_root() / "malformed_files" for malformed_file in malformed_dir.iterdir(): # couldn't create a parquet file malformed with an extra column in a row if "extra_field" in malformed_file.name: From 265732af7920ed5d0ab578f2e84c36887e48f937 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 19 Dec 2025 02:41:50 -0500 Subject: [PATCH 51/55] Added load functions for edges, node_metadata, edge_metadata, and edge_deletions for csv files/directories --- raphtory/src/python/graph/io/arrow_loaders.rs | 196 ++++++++++++++++-- 1 file changed, 176 insertions(+), 20 deletions(-) diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 75099d6e07..a06ea51b46 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -416,6 +416,30 @@ impl<'a> FromPyObject<'a> for CsvReadOptions { } } +fn collect_csv_paths(path: &PathBuf) -> Result, GraphError> { + let mut csv_paths = Vec::new(); + if path.is_dir() { + for entry in fs::read_dir(path)? { + let entry = entry?; + let p = entry.path(); + let s = p.to_string_lossy(); + if s.ends_with(".csv") || s.ends_with(".csv.gz") || s.ends_with(".csv.bz2") { + csv_paths.push(p); + } + } + } else { + csv_paths.push(path.clone()); + } + + if csv_paths.is_empty() { + return Err(GraphError::LoadFailure(format!( + "No CSV files found at path '{}'", + path.display() + ))); + } + Ok(csv_paths) +} + // Load from CSV files using arrow-csv pub(crate) fn load_nodes_from_csv_path< 'py, @@ -441,26 +465,7 @@ pub(crate) fn load_nodes_from_csv_path< } // get the CSV file paths - let mut csv_paths = Vec::new(); - if path.is_dir() { - for entry in fs::read_dir(path)? { - let entry = entry?; - let p = entry.path(); - let s = p.to_string_lossy(); - if s.ends_with(".csv") || s.ends_with(".csv.gz") || s.ends_with(".csv.bz2") { - csv_paths.push(p); - } - } - } else { - csv_paths.push(path.clone()); - } - - if csv_paths.is_empty() { - return Err(GraphError::LoadFailure(format!( - "No CSV files found at path '{}'", - path.display() - ))); - } + let csv_paths = collect_csv_paths(path)?; let df_view = process_csv_paths_df(&csv_paths, cols_to_check.clone(), csv_options, schema)?; df_view.check_cols_exist(&cols_to_check)?; @@ -477,6 +482,157 @@ pub(crate) fn load_nodes_from_csv_path< ) } +pub(crate) fn load_edges_from_csv_path< + 'py, + G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, +>( + graph: &G, + path: &PathBuf, + time: &str, + src: &str, + dst: &str, + properties: &[&str], + metadata: &[&str], + shared_metadata: Option<&HashMap>, + layer: Option<&str>, + layer_col: Option<&str>, + csv_options: Option<&CsvReadOptions>, + schema: Option>>, +) -> Result<(), GraphError> { + let mut cols_to_check = vec![src, dst, time]; + cols_to_check.extend_from_slice(properties); + cols_to_check.extend_from_slice(metadata); + if let Some(layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); + } + + // get the CSV file paths + let csv_paths = collect_csv_paths(path)?; + + let df_view = process_csv_paths_df(&csv_paths, cols_to_check.clone(), csv_options, schema)?; + df_view.check_cols_exist(&cols_to_check)?; + load_edges_from_df( + df_view, + time, + src, + dst, + properties, + metadata, + shared_metadata, + layer, + layer_col, + graph, + ) +} + +pub(crate) fn load_node_metadata_from_csv_path< + 'py, + G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, +>( + graph: &G, + path: &PathBuf, + id: &str, + node_type: Option<&str>, + node_type_col: Option<&str>, + metadata: &[&str], + shared_metadata: Option<&HashMap>, + csv_options: Option<&CsvReadOptions>, + schema: Option>>, +) -> Result<(), GraphError> { + let mut cols_to_check = vec![id]; + cols_to_check.extend_from_slice(metadata); + if let Some(ref node_type_col) = node_type_col { + cols_to_check.push(node_type_col.as_ref()); + } + + // get the CSV file paths + let csv_paths = collect_csv_paths(path)?; + + let df_view = process_csv_paths_df(&csv_paths, cols_to_check.clone(), csv_options, schema)?; + df_view.check_cols_exist(&cols_to_check)?; + load_node_props_from_df( + df_view, + id, + node_type, + node_type_col, + metadata, + shared_metadata, + graph, + ) +} + +pub(crate) fn load_edge_metadata_from_csv_path< + 'py, + G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, +>( + graph: &G, + path: &PathBuf, + src: &str, + dst: &str, + metadata: &[&str], + shared_metadata: Option<&HashMap>, + layer: Option<&str>, + layer_col: Option<&str>, + csv_options: Option<&CsvReadOptions>, + schema: Option>>, +) -> Result<(), GraphError> { + let mut cols_to_check = vec![src, dst]; + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); + } + cols_to_check.extend_from_slice(metadata); + + // get the CSV file paths + let csv_paths = collect_csv_paths(path)?; + + let df_view = process_csv_paths_df(&csv_paths, cols_to_check.clone(), csv_options, schema)?; + df_view.check_cols_exist(&cols_to_check)?; + load_edges_props_from_df( + df_view, + src, + dst, + metadata, + shared_metadata, + layer, + layer_col, + graph, + ) +} + +pub(crate) fn load_edge_deletions_from_csv_path< + 'py, + G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps, +>( + graph: &G, + path: &PathBuf, + time: &str, + src: &str, + dst: &str, + layer: Option<&str>, + layer_col: Option<&str>, + csv_options: Option<&CsvReadOptions>, +) -> Result<(), GraphError> { + let mut cols_to_check = vec![src, dst, time]; + if let Some(ref layer_col) = layer_col { + cols_to_check.push(layer_col.as_ref()); + } + + // get the CSV file paths + let csv_paths = collect_csv_paths(path)?; + + let df_view = process_csv_paths_df(&csv_paths, cols_to_check.clone(), csv_options, None)?; + df_view.check_cols_exist(&cols_to_check)?; + load_edge_deletions_from_df( + df_view, + time, + src, + dst, + layer, + layer_col, + graph.core_graph(), + ) +} + fn get_csv_reader(filename: &str, file: File) -> Box { // Support bz2 and gz compression if filename.ends_with(".csv.gz") { From 10b41bddc8fdd39ab0742967c19f2bc890b9707a Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 19 Dec 2025 03:58:58 -0500 Subject: [PATCH 52/55] Added pyarrow.DataType import to gen-stubs.py and pyarrow-stubs to dev optional-dependencies in pyproject.toml. General clean-up before adding other functions (load_edge, load_node_metadata, ...) in python graph. --- python/pyproject.toml | 2 +- python/python/raphtory/__init__.pyi | 4 +- .../python/raphtory/algorithms/__init__.pyi | 1 + python/python/raphtory/filter/__init__.pyi | 1 + python/python/raphtory/graph_gen/__init__.pyi | 1 + .../python/raphtory/graph_loader/__init__.pyi | 1 + python/python/raphtory/graphql/__init__.pyi | 1 + python/python/raphtory/iterables/__init__.pyi | 1 + .../python/raphtory/node_state/__init__.pyi | 1 + python/python/raphtory/vectors/__init__.pyi | 1 + python/scripts/gen-stubs.py | 1 + raphtory/src/io/parquet_loaders.rs | 13 ++++++ raphtory/src/python/graph/graph.rs | 46 ++++--------------- raphtory/src/python/graph/io/arrow_loaders.rs | 31 +++++++++++++ 14 files changed, 65 insertions(+), 40 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 27e1952a0a..5d8310936a 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -33,7 +33,7 @@ pyvis = ["pyvis >= 0.3.2"] networkx = ["networkx >= 2.6.3"] export = ["raphtory[pyvis,networkx]"] all = ["raphtory[export,plot]"] -dev = ["docstring_parser >= 0.16", "pandas-stubs", "maturin>=1.8.3", "tox>=4.25"] +dev = ["docstring_parser >= 0.16", "pandas-stubs", "pyarrow-stubs", "maturin>=1.8.3", "tox>=4.25"] test = ["raphtory[all]", "requests >= 2.31.0", "pyjwt[crypto] >= 2.10.1", "pytest >= 8", "pytest-benchmark >= 5.1.0", "polars >= 1.35.2", "fireducks; sys_platform != 'win32' and python_version < '3.14'", "duckdb >= 1.4.2"] tox = ["nbmake"] diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index b14a149d12..24ffdb14b0 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -23,6 +23,7 @@ import numpy as np from numpy.typing import NDArray from datetime import datetime from pandas import DataFrame +from pyarrow import DataType from os import PathLike import networkx as nx # type: ignore import pyvis # type: ignore @@ -1256,7 +1257,7 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_nodes(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, schema: Any = None, csv_options=None) -> None: + def load_nodes(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, schema: Optional[list[tuple[str, DataType | PropType | str]]] = None, csv_options: Optional[dict[str, str | bool]] = None) -> None: """ Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. @@ -1273,6 +1274,7 @@ class Graph(GraphView): metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. schema (list[tuple[str, DataType | PropType | str]], optional): A list of (column_name, column_type) tuples to cast column types to. Defaults to None. + csv_options (dict[str, str | bool], optional): Allows specifying delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Returns: None: This function does not return a value if the operation is successful. diff --git a/python/python/raphtory/algorithms/__init__.pyi b/python/python/raphtory/algorithms/__init__.pyi index 43997ee0a6..e7f5fee0f5 100644 --- a/python/python/raphtory/algorithms/__init__.pyi +++ b/python/python/raphtory/algorithms/__init__.pyi @@ -23,6 +23,7 @@ import numpy as np from numpy.typing import NDArray from datetime import datetime from pandas import DataFrame +from pyarrow import DataType from os import PathLike import networkx as nx # type: ignore import pyvis # type: ignore diff --git a/python/python/raphtory/filter/__init__.pyi b/python/python/raphtory/filter/__init__.pyi index 59a3a90922..0629934002 100644 --- a/python/python/raphtory/filter/__init__.pyi +++ b/python/python/raphtory/filter/__init__.pyi @@ -20,6 +20,7 @@ import numpy as np from numpy.typing import NDArray from datetime import datetime from pandas import DataFrame +from pyarrow import DataType from os import PathLike import networkx as nx # type: ignore import pyvis # type: ignore diff --git a/python/python/raphtory/graph_gen/__init__.pyi b/python/python/raphtory/graph_gen/__init__.pyi index 6605cf3f8c..23f524982d 100644 --- a/python/python/raphtory/graph_gen/__init__.pyi +++ b/python/python/raphtory/graph_gen/__init__.pyi @@ -24,6 +24,7 @@ import numpy as np from numpy.typing import NDArray from datetime import datetime from pandas import DataFrame +from pyarrow import DataType from os import PathLike import networkx as nx # type: ignore import pyvis # type: ignore diff --git a/python/python/raphtory/graph_loader/__init__.pyi b/python/python/raphtory/graph_loader/__init__.pyi index e23ec57cd2..c1704f4bc4 100644 --- a/python/python/raphtory/graph_loader/__init__.pyi +++ b/python/python/raphtory/graph_loader/__init__.pyi @@ -24,6 +24,7 @@ import numpy as np from numpy.typing import NDArray from datetime import datetime from pandas import DataFrame +from pyarrow import DataType from os import PathLike import networkx as nx # type: ignore import pyvis # type: ignore diff --git a/python/python/raphtory/graphql/__init__.pyi b/python/python/raphtory/graphql/__init__.pyi index 061f1a5e27..b38bec1fc3 100644 --- a/python/python/raphtory/graphql/__init__.pyi +++ b/python/python/raphtory/graphql/__init__.pyi @@ -20,6 +20,7 @@ import numpy as np from numpy.typing import NDArray from datetime import datetime from pandas import DataFrame +from pyarrow import DataType from os import PathLike import networkx as nx # type: ignore import pyvis # type: ignore diff --git a/python/python/raphtory/iterables/__init__.pyi b/python/python/raphtory/iterables/__init__.pyi index 07e168700c..12233100d5 100644 --- a/python/python/raphtory/iterables/__init__.pyi +++ b/python/python/raphtory/iterables/__init__.pyi @@ -21,6 +21,7 @@ import numpy as np from numpy.typing import NDArray from datetime import datetime from pandas import DataFrame +from pyarrow import DataType from os import PathLike import networkx as nx # type: ignore import pyvis # type: ignore diff --git a/python/python/raphtory/node_state/__init__.pyi b/python/python/raphtory/node_state/__init__.pyi index a91301bb5d..358a56e8a2 100644 --- a/python/python/raphtory/node_state/__init__.pyi +++ b/python/python/raphtory/node_state/__init__.pyi @@ -20,6 +20,7 @@ import numpy as np from numpy.typing import NDArray from datetime import datetime from pandas import DataFrame +from pyarrow import DataType from os import PathLike import networkx as nx # type: ignore import pyvis # type: ignore diff --git a/python/python/raphtory/vectors/__init__.pyi b/python/python/raphtory/vectors/__init__.pyi index c29c7c88ad..4b88acf829 100644 --- a/python/python/raphtory/vectors/__init__.pyi +++ b/python/python/raphtory/vectors/__init__.pyi @@ -20,6 +20,7 @@ import numpy as np from numpy.typing import NDArray from datetime import datetime from pandas import DataFrame +from pyarrow import DataType from os import PathLike import networkx as nx # type: ignore import pyvis # type: ignore diff --git a/python/scripts/gen-stubs.py b/python/scripts/gen-stubs.py index ee83330014..b973b596ae 100755 --- a/python/scripts/gen-stubs.py +++ b/python/scripts/gen-stubs.py @@ -15,6 +15,7 @@ "from numpy.typing import NDArray", "from datetime import datetime", "from pandas import DataFrame", + "from pyarrow import DataType", "from os import PathLike", "import networkx as nx # type: ignore", "import pyvis # type: ignore", diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index d797ac3c12..2081fa25c0 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -11,6 +11,7 @@ use raphtory_api::core::entities::properties::prop::{Prop, PropType}; use std::{ collections::HashMap, error::Error, + ffi::OsStr, fs, fs::File, path::{Path, PathBuf}, @@ -19,6 +20,18 @@ use std::{ #[cfg(feature = "storage")] use {arrow::array::StructArray, pometry_storage::RAError}; +pub(crate) fn is_parquet_path(path: &PathBuf) -> Result { + if path.is_dir() { + Ok(fs::read_dir(&path)?.any(|entry| { + entry.map_or(false, |e| { + e.path().extension().and_then(OsStr::to_str) == Some("parquet") + }) + })) + } else { + Ok(path.extension().and_then(OsStr::to_str) == Some("parquet")) + } +} + pub fn load_nodes_from_parquet< G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index acd91e14a5..9bd8fba109 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -19,9 +19,9 @@ use crate::{ index::PyIndexSpec, io::{ arrow_loaders::{ - load_edge_metadata_from_arrow_c_stream, load_edges_from_arrow_c_stream, - load_node_metadata_from_arrow_c_stream, load_nodes_from_arrow_c_stream, - load_nodes_from_csv_path, CsvReadOptions, + convert_py_schema, is_csv_path, load_edge_metadata_from_arrow_c_stream, + load_edges_from_arrow_c_stream, load_node_metadata_from_arrow_c_stream, + load_nodes_from_arrow_c_stream, load_nodes_from_csv_path, CsvReadOptions, }, pandas_loaders::*, }, @@ -638,7 +638,6 @@ impl PyGraph { PyGraph::py_from_db_graph(self.graph.event_graph()) } - // TODO: Fix DataType in schema argument below /// Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), /// a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. /// The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, @@ -654,6 +653,7 @@ impl PyGraph { /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. /// schema (list[tuple[str, DataType | PropType | str]], optional): A list of (column_name, column_type) tuples to cast column types to. Defaults to None. + /// csv_options (dict[str, str | bool], optional): Allows specifying delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. /// /// Returns: /// None: This function does not return a value if the operation is successful. @@ -678,15 +678,7 @@ impl PyGraph { ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - let column_schema = schema.map(|s| { - if let Ok(list) = s.extract::>() { - Ok(list.into_iter().collect::>()) - } else if let Ok(map) = s.extract::>() { - Ok(map) - } else { - Err(GraphError::from(PyValueError::new_err("Argument 'schema' must either be a list of (column_name, column_type) tuples or a dict mapping {'column_name' : column_type}"))) - } - }).transpose()?; + let column_schema = convert_py_schema(schema)?; if data.hasattr("__arrow_c_stream__")? { load_nodes_from_arrow_c_stream( &self.graph, @@ -701,31 +693,9 @@ impl PyGraph { column_schema, ) } else if let Ok(path) = data.extract::() { - // handles Strings too - let is_parquet = if path.is_dir() { - fs::read_dir(&path)?.any(|entry| { - entry.map_or(false, |e| { - e.path().extension().and_then(OsStr::to_str) == Some("parquet") - }) - }) - } else { - path.extension().and_then(OsStr::to_str) == Some("parquet") - }; - - let is_csv = if path.is_dir() { - fs::read_dir(&path)?.any(|entry| { - entry.map_or(false, |e| { - let p = e.path(); - let s = p.to_string_lossy(); - s.ends_with(".csv") || s.ends_with(".csv.gz") || s.ends_with(".csv.bz2") - }) - }) - } else { - let path_str = path.to_string_lossy(); - path_str.ends_with(".csv") - || path_str.ends_with(".csv.gz") - || path_str.ends_with(".csv.bz2") - }; + // extracting PathBuf handles Strings too + let is_parquet = is_parquet_path(&path)?; + let is_csv = is_csv_path(&path)?; // fail before loading anything at all to avoid loading partial data if !is_csv && csv_options.is_some() { diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index a06ea51b46..979d456941 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -39,6 +39,20 @@ use std::{ const CHUNK_SIZE: usize = 1_000_000; // split large chunks so progress bar updates reasonably +pub(crate) fn convert_py_schema( + schema: Option>, +) -> Result>, GraphError> { + schema.map(|s| { + if let Ok(list) = s.extract::>() { + Ok(list.into_iter().collect::>()) + } else if let Ok(map) = s.extract::>() { + Ok(map) + } else { + Err(GraphError::from(PyValueError::new_err("Argument 'schema' must either be a list of (column_name, column_type) tuples or a dict mapping {'column_name' : column_type}"))) + } + }).transpose() +} + pub(crate) fn load_nodes_from_arrow_c_stream< 'py, G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, @@ -362,6 +376,23 @@ fn split_into_chunks(batch: &RecordBatch, indices: &[usize]) -> Vec Result { + if path.is_dir() { + Ok(fs::read_dir(&path)?.any(|entry| { + entry.map_or(false, |e| { + let p = e.path(); + let s = p.to_string_lossy(); + s.ends_with(".csv") || s.ends_with(".csv.gz") || s.ends_with(".csv.bz2") + }) + })) + } else { + let path_str = path.to_string_lossy(); + Ok(path_str.ends_with(".csv") + || path_str.ends_with(".csv.gz") + || path_str.ends_with(".csv.bz2")) + } +} + /// CSV options we support, passed as Python dict pub(crate) struct CsvReadOptions { delimiter: Option, From da68b8c1b69aecd06eec1abbe1ce631ace7df8ad Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 19 Dec 2025 23:08:26 -0500 Subject: [PATCH 53/55] Removed load_*_from_df, load_*_from_pandas, and load_*_from_parquet functions. Added load_edges, load_node_metadata, load_edge_metadata functions to PyGraph and PyPersistentGraph. Removed Pandas loaders. --- python/python/raphtory/__init__.pyi | 428 ++------- raphtory/src/io/parquet_loaders.rs | 5 +- raphtory/src/python/graph/disk_graph.rs | 6 +- raphtory/src/python/graph/graph.rs | 569 +++++------- .../src/python/graph/graph_with_deletions.rs | 863 ++++++++---------- raphtory/src/python/graph/io/arrow_loaders.rs | 48 +- raphtory/src/python/graph/io/mod.rs | 1 - .../src/python/graph/io/pandas_loaders.rs | 287 ------ raphtory/src/serialise/parquet/mod.rs | 8 +- 9 files changed, 737 insertions(+), 1478 deletions(-) delete mode 100644 raphtory/src/python/graph/io/pandas_loaders.rs diff --git a/python/python/raphtory/__init__.pyi b/python/python/raphtory/__init__.pyi index 24ffdb14b0..9b0a253104 100644 --- a/python/python/raphtory/__init__.pyi +++ b/python/python/raphtory/__init__.pyi @@ -1056,11 +1056,12 @@ class Graph(GraphView): Graph: the loaded graph with initialised cache """ - def load_edge_metadata_from_df(self, data: Any, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_metadata(self, data: Any, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None, schema: Optional[list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str]] = None, csv_options: Optional[dict[str, str | bool]] = None) -> None: """ - Load edge metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + Load edge metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). Arguments: data (Any): The data source containing edge information. @@ -1070,6 +1071,8 @@ class Graph(GraphView): shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. layer (str, optional): The edge layer name. Defaults to None. layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. + schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. Returns: None: This function does not return a value if the operation is successful. @@ -1078,62 +1081,25 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: - """ - Load edge properties from a Pandas DataFrame. - - Arguments: - df (DataFrame): The Pandas DataFrame containing edge information. - src (str): The column name for the source node. - dst (str): The column name for the destination node. - metadata (List[str], optional): List of edge metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): The edge layer name. Defaults to None. - layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_edge_props_from_parquet(self, parquet_path: str, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edges(self, data: Any, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None, schema: Optional[list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str]] = None, csv_options: Optional[dict[str, str | bool]] = None) -> None: """ - Load edge properties from parquet file - - Arguments: - parquet_path (str): Parquet file or directory of Parquet files path containing edge information. - src (str): The column name for the source node. - dst (str): The column name for the destination node. - metadata (List[str], optional): List of edge metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): The edge layer name. Defaults to None. - layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_edges_from_df(self, data: Any, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: - """ - Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). Arguments: data (Any): The data source containing the edges. time (str): The column name for the update timestamps. - src (str): The column name for the source node ids. - dst (str): The column name for the destination node ids. + src (str): The column name for the source node IDs. + dst (str): The column name for the destination node IDs. properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. layer_col (str, optional): The edge layer column name in a dataframe. Cannot be used in combination with layer. Defaults to None. + schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. Returns: None: This function does not return a value if the operation is successful. @@ -1142,50 +1108,6 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: - """ - Load edges from a Pandas DataFrame into the graph. - - Arguments: - df (DataFrame): The Pandas DataFrame containing the edges. - time (str): The column name for the update timestamps. - src (str): The column name for the source node ids. - dst (str): The column name for the destination node ids. - properties (List[str], optional): List of edge property column names. Defaults to None. - metadata (List[str], optional): List of edge metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. - layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_edges_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: - """ - Load edges from a Parquet file into the graph. - - Arguments: - parquet_path (str): Parquet file or directory of Parquet files path containing edges - time (str): The column name for the update timestamps. - src (str): The column name for the source node ids. - dst (str): The column name for the destination node ids. - properties (List[str], optional): List of edge property column names. Defaults to None. - metadata (List[str], optional): List of edge metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. - layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - @staticmethod def load_from_file(path: str) -> Graph: """ @@ -1198,11 +1120,12 @@ class Graph(GraphView): Graph: """ - def load_node_metadata_from_df(self, data: Any, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_metadata(self, data: Any, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, schema: Optional[list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str]] = None, csv_options: Optional[dict[str, str | bool]] = None) -> None: """ - Load node metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + Load node metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). Arguments: data (Any): The data source containing node information. @@ -1211,6 +1134,8 @@ class Graph(GraphView): node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. Returns: None: This function does not return a value if the operation is successful. @@ -1219,50 +1144,12 @@ class Graph(GraphView): GraphError: If the operation fails. """ - def load_node_props_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: - """ - Load node properties from a Pandas DataFrame. - - Arguments: - df (DataFrame): The Pandas DataFrame containing node information. - id(str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - metadata (List[str], optional): List of node metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_node_props_from_parquet(self, parquet_path: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: - """ - Load node properties from a parquet file. - - Arguments: - parquet_path (str): Parquet file or directory of Parquet files path containing node information. - id(str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - metadata (List[str], optional): List of node metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_nodes(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, schema: Optional[list[tuple[str, DataType | PropType | str]]] = None, csv_options: Optional[dict[str, str | bool]] = None) -> None: + def load_nodes(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, schema: Optional[list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str]] = None, csv_options: Optional[dict[str, str | bool]] = None) -> None: """ Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, - Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). Arguments: data (Any): The data source containing the nodes. @@ -1273,8 +1160,8 @@ class Graph(GraphView): properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - schema (list[tuple[str, DataType | PropType | str]], optional): A list of (column_name, column_type) tuples to cast column types to. Defaults to None. - csv_options (dict[str, str | bool], optional): Allows specifying delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. + schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. Returns: None: This function does not return a value if the operation is successful. @@ -1739,11 +1626,12 @@ class PersistentGraph(GraphView): PersistentGraph: the loaded graph with initialised cache """ - def load_edge_deletions_from_df(self, data: Any, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_deletions(self, data: Any, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None, schema: Optional[list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str]] = None, csv_options: Optional[dict[str, str | bool]] = None) -> None: """ - Load edge deletions into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + Load edge deletions into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). Arguments: data (Any): The data source containing the edges. @@ -1752,6 +1640,8 @@ class PersistentGraph(GraphView): dst (str): The column name for the destination node ids. layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. layer_col (str, optional): The edge layer col name in the data source. Cannot be used in combination with layer. Defaults to None. + schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. Returns: None: This function does not return a value, if the operation is successful. @@ -1760,49 +1650,12 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_deletions_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: - """ - Load edges deletions from a Pandas DataFrame into the graph. - - Arguments: - df (DataFrame): The Pandas DataFrame containing the edges. - time (str): The column name for the update timestamps. - src (str): The column name for the source node ids. - dst (str): The column name for the destination node ids. - layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. - layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_edge_deletions_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edge_metadata(self, data: Any, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None, schema: Optional[list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str]] = None, csv_options: Optional[dict[str, str | bool]] = None) -> None: """ - Load edges deletions from a Parquet file into the graph. - - Arguments: - parquet_path (str): Parquet file or directory of Parquet files path containing node information. - src (str): The column name for the source node ids. - dst (str): The column name for the destination node ids. - time (str): The column name for the update timestamps. - layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. - layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_edge_metadata_from_df(self, data: Any, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: - """ - Load edge metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + Load edge metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). Arguments: data (Any): The data source containing edge information. @@ -1812,6 +1665,8 @@ class PersistentGraph(GraphView): shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. layer (str, optional): The edge layer name. Defaults to None. layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. + schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. Returns: None: This function does not return a value if the operation is successful. @@ -1820,62 +1675,25 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edge_props_from_pandas(self, df: DataFrame, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: + def load_edges(self, data: Any, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None, schema: Optional[list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str]] = None, csv_options: Optional[dict[str, str | bool]] = None) -> None: """ - Load edge properties from a Pandas DataFrame. - - Arguments: - df (DataFrame): The Pandas DataFrame containing edge information. - src (str): The column name for the source node. - dst (str): The column name for the destination node. - metadata (List[str], optional): List of edge metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): The edge layer name. Defaults to None. - layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_edge_props_from_parquet(self, parquet_path: str, src: str, dst: str, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: - """ - Load edge properties from parquet file - - Arguments: - parquet_path (str): Parquet file or directory of Parquet files path containing edge information. - src (str): The column name for the source node. - dst (str): The column name for the destination node. - metadata (List[str], optional): List of edge metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): The edge layer name. Defaults to None. - layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_edges_from_df(self, data: Any, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: - """ - Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). Arguments: data (Any): The data source containing the edges. time (str): The column name for the update timestamps. - src (str): The column name for the source node ids. - dst (str): The column name for the destination node ids. + src (str): The column name for the source node IDs. + dst (str): The column name for the destination node IDs. properties (List[str], optional): List of edge property column names. Defaults to None. metadata (List[str], optional): List of edge metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. layer_col (str, optional): The edge layer column name in a dataframe. Cannot be used in combination with layer. Defaults to None. + schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. Returns: None: This function does not return a value if the operation is successful. @@ -1884,50 +1702,6 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_edges_from_pandas(self, df: DataFrame, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: - """ - Load edges from a Pandas DataFrame into the graph. - - Arguments: - df (DataFrame): The Pandas DataFrame containing the edges. - time (str): The column name for the update timestamps. - src (str): The column name for the source node ids. - dst (str): The column name for the destination node ids. - properties (List[str], optional): List of edge property column names. Defaults to None. - metadata (List[str], optional): List of edge metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. - layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_edges_from_parquet(self, parquet_path: str, time: str, src: str, dst: str, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, layer: Optional[str] = None, layer_col: Optional[str] = None) -> None: - """ - Load edges from a Parquet file into the graph. - - Arguments: - parquet_path (str): Parquet file or directory of Parquet files path containing edges - time (str): The column name for the update timestamps. - src (str): The column name for the source node ids. - dst (str): The column name for the destination node ids. - properties (List[str], optional): List of edge property column names. Defaults to None. - metadata (List[str], optional): List of edge metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. - layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - @staticmethod def load_from_file(path: str) -> PersistentGraph: """ @@ -1940,11 +1714,12 @@ class PersistentGraph(GraphView): PersistentGraph: """ - def load_node_metadata_from_df(self, data: Any, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_node_metadata(self, data: Any, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, schema: Optional[list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str]] = None, csv_options: Optional[dict[str, str | bool]] = None) -> None: """ - Load node metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + Load node metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). Arguments: data (Any): The data source containing node information. @@ -1953,6 +1728,8 @@ class PersistentGraph(GraphView): node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. Returns: None: This function does not return a value if the operation is successful. @@ -1961,49 +1738,12 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_node_props_from_pandas(self, df: DataFrame, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: - """ - Load node properties from a Pandas DataFrame. - - Arguments: - df (DataFrame): The Pandas DataFrame containing node information. - id(str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - metadata (List[str], optional): List of node metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_node_props_from_parquet(self, parquet_path: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: + def load_nodes(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None, schema: Optional[list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str]] = None, csv_options: Optional[dict[str, str | bool]] = None) -> None: """ - Load node properties from a parquet file. - - Arguments: - parquet_path (str): Parquet file or directory of Parquet files path containing node information. - id(str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - metadata (List[str], optional): List of node metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_nodes_from_df(self, data: Any, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: - """ - Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). Arguments: data (Any): The data source containing the nodes. @@ -2014,6 +1754,8 @@ class PersistentGraph(GraphView): properties (List[str], optional): List of node property column names. Defaults to None. metadata (List[str], optional): List of node metadata column names. Defaults to None. shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. Returns: None: This function does not return a value if the operation is successful. @@ -2022,48 +1764,6 @@ class PersistentGraph(GraphView): GraphError: If the operation fails. """ - def load_nodes_from_pandas(self, df: DataFrame, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: - """ - Load nodes from a Pandas DataFrame into the graph. - - Arguments: - df (DataFrame): The Pandas DataFrame containing the nodes. - time (str): The column name for the timestamps. - id (str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - properties (List[str], optional): List of node property column names. Defaults to None. - metadata (List[str], optional): List of node metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - - def load_nodes_from_parquet(self, parquet_path: str, time: str, id: str, node_type: Optional[str] = None, node_type_col: Optional[str] = None, properties: Optional[List[str]] = None, metadata: Optional[List[str]] = None, shared_metadata: Optional[PropInput] = None) -> None: - """ - Load nodes from a Parquet file into the graph. - - Arguments: - parquet_path (str): Parquet file or directory of Parquet files containing the nodes - time (str): The column name for the timestamps. - id (str): The column name for the node IDs. - node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - properties (List[str], optional): List of node property column names. Defaults to None. - metadata (List[str], optional): List of node metadata column names. Defaults to None. - shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - - Returns: - None: This function does not return a value, if the operation is successful. - - Raises: - GraphError: If the operation fails. - """ - def node(self, id: str | int) -> Optional[MutableNode]: """ Gets the node with the specified id diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index 2081fa25c0..a7e9380767 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -10,7 +10,6 @@ use parquet::arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, ProjectionMa use raphtory_api::core::entities::properties::prop::{Prop, PropType}; use std::{ collections::HashMap, - error::Error, ffi::OsStr, fs, fs::File, @@ -162,7 +161,7 @@ pub fn load_edges_from_parquet< Ok(()) } -pub fn load_node_props_from_parquet< +pub fn load_node_metadata_from_parquet< G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( graph: &G, @@ -206,7 +205,7 @@ pub fn load_node_props_from_parquet< Ok(()) } -pub fn load_edge_props_from_parquet< +pub fn load_edge_metadata_from_parquet< G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, >( graph: &G, diff --git a/raphtory/src/python/graph/disk_graph.rs b/raphtory/src/python/graph/disk_graph.rs index b80f8aa2bb..f28e74418d 100644 --- a/raphtory/src/python/graph/disk_graph.rs +++ b/raphtory/src/python/graph/disk_graph.rs @@ -1,6 +1,5 @@ //! A columnar temporal graph. //! -use super::io::pandas_loaders::*; use crate::{ db::{ api::storage::graph::storage_ops::disk_storage::IntoGraph, @@ -9,7 +8,10 @@ use crate::{ errors::GraphError, io::parquet_loaders::read_struct_arrays, prelude::Graph, - python::{graph::graph::PyGraph, types::repr::StructReprBuilder}, + python::{ + graph::{graph::PyGraph, io::arrow_loaders::convert_py_prop_args}, + types::repr::StructReprBuilder, + }, }; use arrow::{array::StructArray, datatypes::Field}; use itertools::Itertools; diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 9bd8fba109..5d7791d149 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -17,13 +17,12 @@ use crate::{ edge::PyEdge, graph_with_deletions::PyPersistentGraph, index::PyIndexSpec, - io::{ - arrow_loaders::{ - convert_py_schema, is_csv_path, load_edge_metadata_from_arrow_c_stream, - load_edges_from_arrow_c_stream, load_node_metadata_from_arrow_c_stream, - load_nodes_from_arrow_c_stream, load_nodes_from_csv_path, CsvReadOptions, - }, - pandas_loaders::*, + io::arrow_loaders::{ + convert_py_prop_args, convert_py_schema, is_csv_path, + load_edge_metadata_from_arrow_c_stream, load_edge_metadata_from_csv_path, + load_edges_from_arrow_c_stream, load_edges_from_csv_path, + load_node_metadata_from_arrow_c_stream, load_node_metadata_from_csv_path, + load_nodes_from_arrow_c_stream, load_nodes_from_csv_path, CsvReadOptions, }, node::PyNode, views::graph_view::PyGraphView, @@ -38,18 +37,13 @@ use crate::{ }; use pyo3::{exceptions::PyValueError, prelude::*, pybacked::PyBackedStr, types::PyDict}; use raphtory_api::{ - core::{ - entities::{properties::prop::PropType, GID}, - storage::arc_str::ArcStr, - }, + core::{entities::GID, storage::arc_str::ArcStr}, python::timeindex::EventTimeComponent, }; use raphtory_storage::core_ops::CoreGraphOps; use std::{ collections::HashMap, - ffi::OsStr, fmt::{Debug, Formatter}, - fs, path::PathBuf, sync::Arc, }; @@ -641,7 +635,7 @@ impl PyGraph { /// Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), /// a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. /// The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, - /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). /// /// Arguments: /// data (Any): The data source containing the nodes. @@ -652,8 +646,8 @@ impl PyGraph { /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - /// schema (list[tuple[str, DataType | PropType | str]], optional): A list of (column_name, column_type) tuples to cast column types to. Defaults to None. - /// csv_options (dict[str, str | bool], optional): Allows specifying delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. + /// schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + /// csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. /// /// Returns: /// None: This function does not return a value if the operation is successful. @@ -663,9 +657,9 @@ impl PyGraph { #[pyo3( signature = (data, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None, schema = None, csv_options = None) )] - fn load_nodes<'py>( + fn load_nodes( &self, - data: &Bound<'py, PyAny>, + data: &Bound, time: &str, id: &str, node_type: Option<&str>, @@ -673,7 +667,7 @@ impl PyGraph { properties: Option>, metadata: Option>, shared_metadata: Option>, - schema: Option>, + schema: Option>, csv_options: Option, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); @@ -708,7 +702,7 @@ impl PyGraph { // wrap in Arc to avoid cloning the entire schema for Parquet, CSV, and inner loops in CSV path let arced_schema = column_schema.map(Arc::new); - // support directories with mixed parquet and CSV files + // if-if instead of if-else to support directories with mixed parquet and CSV files if is_parquet { load_nodes_from_parquet( &self.graph, @@ -748,20 +742,23 @@ impl PyGraph { } } - /// Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + /// a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + /// The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). /// /// Arguments: /// data (Any): The data source containing the edges. /// time (str): The column name for the update timestamps. - /// src (str): The column name for the source node ids. - /// dst (str): The column name for the destination node ids. + /// src (str): The column name for the source node IDs. + /// dst (str): The column name for the destination node IDs. /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. /// layer_col (str, optional): The edge layer column name in a dataframe. Cannot be used in combination with layer. Defaults to None. + /// schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + /// csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. /// /// Returns: /// None: This function does not return a value if the operation is successful. @@ -769,9 +766,9 @@ impl PyGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (data, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + signature = (data, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, schema = None, csv_options = None) )] - fn load_edges_from_df( + fn load_edges( &self, data: &Bound, time: &str, @@ -782,127 +779,88 @@ impl PyGraph { shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + schema: Option>, + csv_options: Option, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edges_from_arrow_c_stream( - &self.graph, - data, - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - None, // TODO: Add schema - ) - } + let column_schema = convert_py_schema(schema)?; + if data.hasattr("__arrow_c_stream__")? { + load_edges_from_arrow_c_stream( + &self.graph, + data, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + column_schema, + ) + } else if let Ok(path) = data.extract::() { + // extracting PathBuf handles Strings too + let is_parquet = is_parquet_path(&path)?; + let is_csv = is_csv_path(&path)?; - /// Load edges from a Pandas DataFrame into the graph. - /// - /// Arguments: - /// df (DataFrame): The Pandas DataFrame containing the edges. - /// time (str): The column name for the update timestamps. - /// src (str): The column name for the source node ids. - /// dst (str): The column name for the destination node ids. - /// properties (List[str], optional): List of edge property column names. Defaults to None. - /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. - /// layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3( - signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) - )] - fn load_edges_from_pandas( - &self, - df: &Bound, - time: &str, - src: &str, - dst: &str, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - layer: Option<&str>, - layer_col: Option<&str>, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edges_from_pandas( - &self.graph, - df, - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - ) - } + // fail before loading anything at all to avoid loading partial data + if !is_csv && csv_options.is_some() { + return Err(GraphError::from(PyValueError::new_err(format!( + "CSV options were passed but no CSV files were detected at {}.", + path.display() + )))); + } - /// Load edges from a Parquet file into the graph. - /// - /// Arguments: - /// parquet_path (str): Parquet file or directory of Parquet files path containing edges - /// time (str): The column name for the update timestamps. - /// src (str): The column name for the source node ids. - /// dst (str): The column name for the destination node ids. - /// properties (List[str], optional): List of edge property column names. Defaults to None. - /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. - /// layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3( - signature = (parquet_path, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) - )] - fn load_edges_from_parquet( - &self, - parquet_path: PathBuf, - time: &str, - src: &str, - dst: &str, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - layer: Option<&str>, - layer_col: Option<&str>, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edges_from_parquet( - &self.graph, - parquet_path.as_path(), - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - None, - None, - ) + // wrap in Arc to avoid cloning the entire schema for Parquet, CSV, and inner loops in CSV path + let arced_schema = column_schema.map(Arc::new); + + // if-if instead of if-else to support directories with mixed parquet and CSV files + if is_parquet { + load_edges_from_parquet( + &self.graph, + &path, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + None, + arced_schema.clone(), + )?; + } + if is_csv { + load_edges_from_csv_path( + &self.graph, + &path, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + csv_options.as_ref(), + arced_schema.clone(), + )?; + } + if !is_parquet && !is_csv { + return Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' contains invalid path. Paths must either point to a Parquet/CSV file, or a directory containing Parquet/CSV files"))); + } + Ok(()) + } else { + Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' invalid. Valid data sources are: a single Parquet or CSV file, a directory containing Parquet or CSV files, and objects that implement an __arrow_c_stream__ method."))) + } } - /// Load node metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// Load node metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + /// a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + /// The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). /// /// Arguments: /// data (Any): The data source containing node information. @@ -911,6 +869,8 @@ impl PyGraph { /// node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + /// csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. /// /// Returns: /// None: This function does not return a value if the operation is successful. @@ -918,9 +878,9 @@ impl PyGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (data, id, node_type = None, node_type_col = None, metadata = None, shared_metadata = None) + signature = (data, id, node_type = None, node_type_col = None, metadata = None, shared_metadata = None, schema = None, csv_options = None) )] - fn load_node_metadata_from_df( + fn load_node_metadata( &self, data: &Bound, id: &str, @@ -928,103 +888,78 @@ impl PyGraph { node_type_col: Option<&str>, metadata: Option>, shared_metadata: Option>, + schema: Option>, + csv_options: Option, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_metadata_from_arrow_c_stream( - &self.graph, - data, - id, - node_type, - node_type_col, - &metadata, - shared_metadata.as_ref(), - None, // TODO: Add schema - ) - } + let column_schema = convert_py_schema(schema)?; + if data.hasattr("__arrow_c_stream__")? { + load_node_metadata_from_arrow_c_stream( + &self.graph, + data, + id, + node_type, + node_type_col, + &metadata, + shared_metadata.as_ref(), + column_schema, + ) + } else if let Ok(path) = data.extract::() { + // extracting PathBuf handles Strings too + let is_parquet = is_parquet_path(&path)?; + let is_csv = is_csv_path(&path)?; - /// Load node properties from a Pandas DataFrame. - /// - /// Arguments: - /// df (DataFrame): The Pandas DataFrame containing node information. - /// id(str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - /// metadata (List[str], optional): List of node metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3( - signature = (df, id, node_type = None, node_type_col = None, metadata = None, shared_metadata = None) - )] - fn load_node_props_from_pandas( - &self, - df: &Bound, - id: &str, - node_type: Option<&str>, - node_type_col: Option<&str>, - metadata: Option>, - shared_metadata: Option>, - ) -> Result<(), GraphError> { - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_props_from_pandas( - &self.graph, - df, - id, - node_type, - node_type_col, - &metadata, - shared_metadata.as_ref(), - ) - } + // fail before loading anything at all to avoid loading partial data + if !is_csv && csv_options.is_some() { + return Err(GraphError::from(PyValueError::new_err(format!( + "CSV options were passed but no CSV files were detected at {}.", + path.display() + )))); + } - /// Load node properties from a parquet file. - /// - /// Arguments: - /// parquet_path (str): Parquet file or directory of Parquet files path containing node information. - /// id(str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - /// metadata (List[str], optional): List of node metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3( - signature = (parquet_path, id, node_type = None, node_type_col = None, metadata = None, shared_metadata= None) - )] - fn load_node_props_from_parquet( - &self, - parquet_path: PathBuf, - id: &str, - node_type: Option<&str>, - node_type_col: Option<&str>, - metadata: Option>, - shared_metadata: Option>, - ) -> Result<(), GraphError> { - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_props_from_parquet( - &self.graph, - parquet_path.as_path(), - id, - node_type, - node_type_col, - &metadata, - shared_metadata.as_ref(), - None, - None, - ) + // wrap in Arc to avoid cloning the entire schema for Parquet, CSV, and inner loops in CSV path + let arced_schema = column_schema.map(Arc::new); + + // if-if instead of if-else to support directories with mixed parquet and CSV files + if is_parquet { + load_node_metadata_from_parquet( + &self.graph, + path.as_path(), + id, + node_type, + node_type_col, + &metadata, + shared_metadata.as_ref(), + None, + arced_schema.clone(), + )?; + } + if is_csv { + load_node_metadata_from_csv_path( + &self.graph, + &path, + id, + node_type, + node_type_col, + &metadata, + shared_metadata.as_ref(), + csv_options.as_ref(), + arced_schema, + )?; + } + if !is_parquet && !is_csv { + return Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' contains invalid path. Paths must either point to a Parquet/CSV file, or a directory containing Parquet/CSV files"))); + } + Ok(()) + } else { + Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' invalid. Valid data sources are: a single Parquet or CSV file, a directory containing Parquet or CSV files, and objects that implement an __arrow_c_stream__ method."))) + } } - /// Load edge metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// Load edge metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + /// a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + /// The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). /// /// Arguments: /// data (Any): The data source containing edge information. @@ -1034,6 +969,8 @@ impl PyGraph { /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. /// layer (str, optional): The edge layer name. Defaults to None. /// layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. + /// schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + /// csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. /// /// Returns: /// None: This function does not return a value if the operation is successful. @@ -1041,9 +978,9 @@ impl PyGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (data, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None) + signature = (data, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None, schema = None, csv_options = None) )] - fn load_edge_metadata_from_df( + fn load_edge_metadata( &self, data: &Bound, src: &str, @@ -1052,105 +989,75 @@ impl PyGraph { shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + schema: Option>, + csv_options: Option, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_metadata_from_arrow_c_stream( - &self.graph, - data, - src, - dst, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - None, // TODO: Add schema - ) - } + let column_schema = convert_py_schema(schema)?; + if data.hasattr("__arrow_c_stream__")? { + load_edge_metadata_from_arrow_c_stream( + &self.graph, + data, + src, + dst, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + column_schema, + ) + } else if let Ok(path) = data.extract::() { + // extracting PathBuf handles Strings too + let is_parquet = is_parquet_path(&path)?; + let is_csv = is_csv_path(&path)?; - /// Load edge properties from a Pandas DataFrame. - /// - /// Arguments: - /// df (DataFrame): The Pandas DataFrame containing edge information. - /// src (str): The column name for the source node. - /// dst (str): The column name for the destination node. - /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): The edge layer name. Defaults to None. - /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3( - signature = (df, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None) - )] - fn load_edge_props_from_pandas( - &self, - df: &Bound, - src: &str, - dst: &str, - metadata: Option>, - shared_metadata: Option>, - layer: Option<&str>, - layer_col: Option<&str>, - ) -> Result<(), GraphError> { - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_props_from_pandas( - &self.graph, - df, - src, - dst, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - ) - } + // fail before loading anything at all to avoid loading partial data + if !is_csv && csv_options.is_some() { + return Err(GraphError::from(PyValueError::new_err(format!( + "CSV options were passed but no CSV files were detected at {}.", + path.display() + )))); + } - /// Load edge properties from parquet file - /// - /// Arguments: - /// parquet_path (str): Parquet file or directory of Parquet files path containing edge information. - /// src (str): The column name for the source node. - /// dst (str): The column name for the destination node. - /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): The edge layer name. Defaults to None. - /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3( - signature = (parquet_path, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None) - )] - fn load_edge_props_from_parquet( - &self, - parquet_path: PathBuf, - src: &str, - dst: &str, - metadata: Option>, - shared_metadata: Option>, - layer: Option<&str>, - layer_col: Option<&str>, - ) -> Result<(), GraphError> { - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_props_from_parquet( - &self.graph, - parquet_path.as_path(), - src, - dst, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - None, - None, - ) + // wrap in Arc to avoid cloning the entire schema for Parquet, CSV, and inner loops in CSV path + let arced_schema = column_schema.map(Arc::new); + + // if-if instead of if-else to support directories with mixed parquet and CSV files + if is_parquet { + load_edge_metadata_from_parquet( + &self.graph, + path.as_path(), + src, + dst, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + None, + arced_schema.clone(), + )?; + } + if is_csv { + load_edge_metadata_from_csv_path( + &self.graph, + &path, + src, + dst, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + csv_options.as_ref(), + arced_schema, + )?; + } + if !is_parquet && !is_csv { + return Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' contains invalid path. Paths must either point to a Parquet/CSV file, or a directory containing Parquet/CSV files"))); + } + Ok(()) + } else { + Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' invalid. Valid data sources are: a single Parquet or CSV file, a directory containing Parquet or CSV files, and objects that implement an __arrow_c_stream__ method."))) + } } /// Create graph index diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index 6ff6e8aa09..03b1ca0386 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -5,10 +5,7 @@ //! create windows, and query the graph with a variety of algorithms. //! It is a wrapper around a set of shards, which are the actual graph data structures. //! In Python, this class wraps around the rust graph. -use super::{ - graph::{PyGraph, PyGraphEncoder}, - io::pandas_loaders::*, -}; +use super::graph::{PyGraph, PyGraphEncoder}; use crate::{ db::{ api::mutation::{AdditionOps, PropertyAdditionOps}, @@ -22,9 +19,12 @@ use crate::{ edge::PyEdge, index::PyIndexSpec, io::arrow_loaders::{ - load_edge_deletions_from_arrow_c_stream, load_edge_metadata_from_arrow_c_stream, - load_edges_from_arrow_c_stream, load_node_metadata_from_arrow_c_stream, - load_nodes_from_arrow_c_stream, + convert_py_prop_args, convert_py_schema, is_csv_path, + load_edge_deletions_from_arrow_c_stream, load_edge_deletions_from_csv_path, + load_edge_metadata_from_arrow_c_stream, load_edge_metadata_from_csv_path, + load_edges_from_arrow_c_stream, load_edges_from_csv_path, + load_node_metadata_from_arrow_c_stream, load_node_metadata_from_csv_path, + load_nodes_from_arrow_c_stream, load_nodes_from_csv_path, CsvReadOptions, }, node::PyNode, views::graph_view::PyGraphView, @@ -33,7 +33,7 @@ use crate::{ }, serialise::StableEncode, }; -use pyo3::{prelude::*, pybacked::PyBackedStr}; +use pyo3::{exceptions::PyValueError, prelude::*, pybacked::PyBackedStr}; use raphtory_api::{ core::{ entities::{properties::prop::Prop, GID}, @@ -46,6 +46,7 @@ use std::{ collections::HashMap, fmt::{Debug, Formatter}, path::PathBuf, + sync::Arc, }; /// A temporal graph that allows edges and nodes to be deleted. @@ -572,9 +573,10 @@ impl PyPersistentGraph { PyPersistentGraph::py_from_db_graph(self.graph.persistent_graph()) } - /// Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// Load nodes into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + /// a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + /// The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). /// /// Arguments: /// data (Any): The data source containing the nodes. @@ -585,6 +587,8 @@ impl PyPersistentGraph { /// properties (List[str], optional): List of node property column names. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + /// csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. /// /// Returns: /// None: This function does not return a value if the operation is successful. @@ -592,11 +596,11 @@ impl PyPersistentGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (data, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None) + signature = (data, time, id, node_type = None, node_type_col = None, properties = None, metadata= None, shared_metadata = None, schema = None, csv_options = None) )] - fn load_nodes_from_df<'py>( + fn load_nodes( &self, - data: &Bound<'py, PyAny>, + data: &Bound, time: &str, id: &str, node_type: Option<&str>, @@ -604,127 +608,98 @@ impl PyPersistentGraph { properties: Option>, metadata: Option>, shared_metadata: Option>, + schema: Option>, + csv_options: Option, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_nodes_from_arrow_c_stream( - &self.graph, - data, - time, - id, - node_type, - node_type_col, - &properties, - &metadata, - shared_metadata.as_ref(), - None, // TODO: Add schema - ) - } + let column_schema = convert_py_schema(schema)?; + if data.hasattr("__arrow_c_stream__")? { + load_nodes_from_arrow_c_stream( + &self.graph, + data, + time, + id, + node_type, + node_type_col, + &properties, + &metadata, + shared_metadata.as_ref(), + column_schema, + ) + } else if let Ok(path) = data.extract::() { + // extracting PathBuf handles Strings too + let is_parquet = is_parquet_path(&path)?; + let is_csv = is_csv_path(&path)?; - /// Load nodes from a Pandas DataFrame into the graph. - /// - /// Arguments: - /// df (DataFrame): The Pandas DataFrame containing the nodes. - /// time (str): The column name for the timestamps. - /// id (str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - /// properties (List[str], optional): List of node property column names. Defaults to None. - /// metadata (List[str], optional): List of node metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3(signature = (df,time,id, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None))] - fn load_nodes_from_pandas( - &self, - df: &Bound, - time: &str, - id: &str, - node_type: Option<&str>, - node_type_col: Option<&str>, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_nodes_from_pandas( - &self.graph, - df, - time, - id, - node_type, - node_type_col, - &properties, - &metadata, - shared_metadata.as_ref(), - ) - } + // fail before loading anything at all to avoid loading partial data + if !is_csv && csv_options.is_some() { + return Err(GraphError::from(PyValueError::new_err(format!( + "CSV options were passed but no CSV files were detected at {}.", + path.display() + )))); + } - /// Load nodes from a Parquet file into the graph. - /// - /// Arguments: - /// parquet_path (str): Parquet file or directory of Parquet files containing the nodes - /// time (str): The column name for the timestamps. - /// id (str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - /// properties (List[str], optional): List of node property column names. Defaults to None. - /// metadata (List[str], optional): List of node metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3(signature = (parquet_path, time,id, node_type = None, node_type_col = None, properties = None, metadata = None, shared_metadata = None))] - fn load_nodes_from_parquet( - &self, - parquet_path: PathBuf, - time: &str, - id: &str, - node_type: Option<&str>, - node_type_col: Option<&str>, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_nodes_from_parquet( - &self.graph, - parquet_path.as_path(), - time, - id, - node_type, - node_type_col, - &properties, - &metadata, - shared_metadata.as_ref(), - None, - None, - ) + // wrap in Arc to avoid cloning the entire schema for Parquet, CSV, and inner loops in CSV path + let arced_schema = column_schema.map(Arc::new); + + // if-if instead of if-else to support directories with mixed parquet and CSV files + if is_parquet { + load_nodes_from_parquet( + &self.graph, + path.as_path(), + time, + id, + node_type, + node_type_col, + &properties, + &metadata, + shared_metadata.as_ref(), + None, + arced_schema.clone(), + )?; + } + if is_csv { + load_nodes_from_csv_path( + &self.graph, + &path, + time, + id, + node_type, + node_type_col, + &properties, + &metadata, + shared_metadata.as_ref(), + csv_options.as_ref(), + arced_schema, + )?; + } + if !is_parquet && !is_csv { + return Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' contains invalid path. Paths must either point to a Parquet/CSV file, or a directory containing Parquet/CSV files"))); + } + Ok(()) + } else { + Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' invalid. Valid data sources are: a single Parquet or CSV file, a directory containing Parquet or CSV files, and objects that implement an __arrow_c_stream__ method."))) + } } - /// Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// Load edges into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + /// a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + /// The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). /// /// Arguments: /// data (Any): The data source containing the edges. /// time (str): The column name for the update timestamps. - /// src (str): The column name for the source node ids. - /// dst (str): The column name for the destination node ids. + /// src (str): The column name for the source node IDs. + /// dst (str): The column name for the destination node IDs. /// properties (List[str], optional): List of edge property column names. Defaults to None. /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. /// layer_col (str, optional): The edge layer column name in a dataframe. Cannot be used in combination with layer. Defaults to None. + /// schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + /// csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. /// /// Returns: /// None: This function does not return a value if the operation is successful. @@ -732,9 +707,9 @@ impl PyPersistentGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (data, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None) + signature = (data, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None, schema = None, csv_options = None) )] - fn load_edges_from_df( + fn load_edges( &self, data: &Bound, time: &str, @@ -745,123 +720,88 @@ impl PyPersistentGraph { shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + schema: Option>, + csv_options: Option, ) -> Result<(), GraphError> { let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edges_from_arrow_c_stream( - &self.graph, - data, - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - None, // TODO: Add schema - ) - } + let column_schema = convert_py_schema(schema)?; + if data.hasattr("__arrow_c_stream__")? { + load_edges_from_arrow_c_stream( + &self.graph, + data, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + column_schema, + ) + } else if let Ok(path) = data.extract::() { + // extracting PathBuf handles Strings too + let is_parquet = is_parquet_path(&path)?; + let is_csv = is_csv_path(&path)?; - /// Load edges from a Pandas DataFrame into the graph. - /// - /// Arguments: - /// df (DataFrame): The Pandas DataFrame containing the edges. - /// time (str): The column name for the update timestamps. - /// src (str): The column name for the source node ids. - /// dst (str): The column name for the destination node ids. - /// properties (List[str], optional): List of edge property column names. Defaults to None. - /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. - /// layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3(signature = (df, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None))] - fn load_edges_from_pandas( - &self, - df: &Bound, - time: &str, - src: &str, - dst: &str, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - layer: Option<&str>, - layer_col: Option<&str>, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edges_from_pandas( - &self.graph, - df, - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - ) - } + // fail before loading anything at all to avoid loading partial data + if !is_csv && csv_options.is_some() { + return Err(GraphError::from(PyValueError::new_err(format!( + "CSV options were passed but no CSV files were detected at {}.", + path.display() + )))); + } - /// Load edges from a Parquet file into the graph. - /// - /// Arguments: - /// parquet_path (str): Parquet file or directory of Parquet files path containing edges - /// time (str): The column name for the update timestamps. - /// src (str): The column name for the source node ids. - /// dst (str): The column name for the destination node ids. - /// properties (List[str], optional): List of edge property column names. Defaults to None. - /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. - /// layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3(signature = (parquet_path, time, src, dst, properties = None, metadata = None, shared_metadata = None, layer = None, layer_col = None))] - fn load_edges_from_parquet( - &self, - parquet_path: PathBuf, - time: &str, - src: &str, - dst: &str, - properties: Option>, - metadata: Option>, - shared_metadata: Option>, - layer: Option<&str>, - layer_col: Option<&str>, - ) -> Result<(), GraphError> { - let properties = convert_py_prop_args(properties.as_deref()).unwrap_or_default(); - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edges_from_parquet( - &self.graph, - parquet_path.as_path(), - time, - src, - dst, - &properties, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - None, - None, - ) + // wrap in Arc to avoid cloning the entire schema for Parquet, CSV, and inner loops in CSV path + let arced_schema = column_schema.map(Arc::new); + + // if-if instead of if-else to support directories with mixed parquet and CSV files + if is_parquet { + load_edges_from_parquet( + &self.graph, + &path, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + None, + arced_schema.clone(), + )?; + } + if is_csv { + load_edges_from_csv_path( + &self.graph, + &path, + time, + src, + dst, + &properties, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + csv_options.as_ref(), + arced_schema.clone(), + )?; + } + if !is_parquet && !is_csv { + return Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' contains invalid path. Paths must either point to a Parquet/CSV file, or a directory containing Parquet/CSV files"))); + } + Ok(()) + } else { + Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' invalid. Valid data sources are: a single Parquet or CSV file, a directory containing Parquet or CSV files, and objects that implement an __arrow_c_stream__ method."))) + } } - /// Load edge deletions into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// Load edge deletions into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + /// a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + /// The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). /// /// Arguments: /// data (Any): The data source containing the edges. @@ -870,14 +810,16 @@ impl PyPersistentGraph { /// dst (str): The column name for the destination node ids. /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. /// layer_col (str, optional): The edge layer col name in the data source. Cannot be used in combination with layer. Defaults to None. + /// schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + /// csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. /// /// Returns: /// None: This function does not return a value, if the operation is successful. /// /// Raises: /// GraphError: If the operation fails. - #[pyo3(signature = (data, time, src, dst, layer = None, layer_col = None))] - fn load_edge_deletions_from_df( + #[pyo3(signature = (data, time, src, dst, layer = None, layer_col = None, schema = None, csv_options = None))] + fn load_edge_deletions( &self, data: &Bound, time: &str, @@ -885,79 +827,77 @@ impl PyPersistentGraph { dst: &str, layer: Option<&str>, layer_col: Option<&str>, + schema: Option>, + csv_options: Option, ) -> Result<(), GraphError> { - load_edge_deletions_from_arrow_c_stream(&self.graph, data, time, src, dst, layer, layer_col) - } + let column_schema = convert_py_schema(schema)?; + if data.hasattr("__arrow_c_stream__")? { + load_edge_deletions_from_arrow_c_stream( + &self.graph, + data, + time, + src, + dst, + layer, + layer_col, + column_schema, + ) + } else if let Ok(path) = data.extract::() { + // extracting PathBuf handles Strings too + let is_parquet = is_parquet_path(&path)?; + let is_csv = is_csv_path(&path)?; - /// Load edges deletions from a Pandas DataFrame into the graph. - /// - /// Arguments: - /// df (DataFrame): The Pandas DataFrame containing the edges. - /// time (str): The column name for the update timestamps. - /// src (str): The column name for the source node ids. - /// dst (str): The column name for the destination node ids. - /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. - /// layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3(signature = (df, time, src, dst, layer = None, layer_col = None))] - fn load_edge_deletions_from_pandas( - &self, - df: &Bound, - time: &str, - src: &str, - dst: &str, - layer: Option<&str>, - layer_col: Option<&str>, - ) -> Result<(), GraphError> { - load_edge_deletions_from_pandas(&self.graph, df, time, src, dst, layer, layer_col) - } + // fail before loading anything at all to avoid loading partial data + if !is_csv && csv_options.is_some() { + return Err(GraphError::from(PyValueError::new_err(format!( + "CSV options were passed but no CSV files were detected at {}.", + path.display() + )))); + } - /// Load edges deletions from a Parquet file into the graph. - /// - /// Arguments: - /// parquet_path (str): Parquet file or directory of Parquet files path containing node information. - /// src (str): The column name for the source node ids. - /// dst (str): The column name for the destination node ids. - /// time (str): The column name for the update timestamps. - /// layer (str, optional): A value to use as the layer for all edges. Cannot be used in combination with layer_col. Defaults to None. - /// layer_col (str, optional): The edge layer col name in dataframe. Cannot be used in combination with layer. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3(signature = (parquet_path, time, src, dst, layer = None, layer_col = None))] - fn load_edge_deletions_from_parquet( - &self, - parquet_path: PathBuf, - time: &str, - src: &str, - dst: &str, - layer: Option<&str>, - layer_col: Option<&str>, - ) -> Result<(), GraphError> { - load_edge_deletions_from_parquet( - &self.graph, - parquet_path.as_path(), - time, - src, - dst, - layer, - layer_col, - None, - None, - ) + // wrap in Arc to avoid cloning the entire schema for Parquet, CSV, and inner loops in CSV path + let arced_schema = column_schema.map(Arc::new); + + // if-if instead of if-else to support directories with mixed parquet and CSV files + if is_parquet { + load_edge_deletions_from_parquet( + &self.graph, + path.as_path(), + time, + src, + dst, + layer, + layer_col, + None, + arced_schema.clone(), + )?; + } + if is_csv { + load_edge_deletions_from_csv_path( + &self.graph, + &path, + time, + src, + dst, + layer, + layer_col, + csv_options.as_ref(), + arced_schema, + )?; + } + if !is_parquet && !is_csv { + return Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' contains invalid path. Paths must either point to a Parquet/CSV file, or a directory containing Parquet/CSV files"))); + } + Ok(()) + } else { + Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' invalid. Valid data sources are: a single Parquet or CSV file, a directory containing Parquet or CSV files, and objects that implement an __arrow_c_stream__ method."))) + } } - /// Load node metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// Load node metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + /// a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + /// The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). /// /// Arguments: /// data (Any): The data source containing node information. @@ -966,6 +906,8 @@ impl PyPersistentGraph { /// node_type_col (str, optional): The node type column name in a dataframe. Cannot be used in combination with node_type. Defaults to None. /// metadata (List[str], optional): List of node metadata column names. Defaults to None. /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. + /// schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + /// csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. /// /// Returns: /// None: This function does not return a value if the operation is successful. @@ -973,9 +915,9 @@ impl PyPersistentGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (data, id, node_type = None, node_type_col = None, metadata = None, shared_metadata = None) + signature = (data, id, node_type = None, node_type_col = None, metadata = None, shared_metadata = None, schema = None, csv_options = None) )] - fn load_node_metadata_from_df( + fn load_node_metadata( &self, data: &Bound, id: &str, @@ -983,99 +925,78 @@ impl PyPersistentGraph { node_type_col: Option<&str>, metadata: Option>, shared_metadata: Option>, + schema: Option>, + csv_options: Option, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_metadata_from_arrow_c_stream( - &self.graph, - data, - id, - node_type, - node_type_col, - &metadata, - shared_metadata.as_ref(), - None, // TODO: Add schema - ) - } + let column_schema = convert_py_schema(schema)?; + if data.hasattr("__arrow_c_stream__")? { + load_node_metadata_from_arrow_c_stream( + &self.graph, + data, + id, + node_type, + node_type_col, + &metadata, + shared_metadata.as_ref(), + column_schema, + ) + } else if let Ok(path) = data.extract::() { + // extracting PathBuf handles Strings too + let is_parquet = is_parquet_path(&path)?; + let is_csv = is_csv_path(&path)?; - /// Load node properties from a Pandas DataFrame. - /// - /// Arguments: - /// df (DataFrame): The Pandas DataFrame containing node information. - /// id(str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - /// metadata (List[str], optional): List of node metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3(signature = (df, id, node_type=None, node_type_col=None, metadata = None, shared_metadata = None))] - fn load_node_props_from_pandas( - &self, - df: &Bound, - id: &str, - node_type: Option<&str>, - node_type_col: Option<&str>, - metadata: Option>, - shared_metadata: Option>, - ) -> Result<(), GraphError> { - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_props_from_pandas( - &self.graph, - df, - id, - node_type, - node_type_col, - &metadata, - shared_metadata.as_ref(), - ) - } + // fail before loading anything at all to avoid loading partial data + if !is_csv && csv_options.is_some() { + return Err(GraphError::from(PyValueError::new_err(format!( + "CSV options were passed but no CSV files were detected at {}.", + path.display() + )))); + } - /// Load node properties from a parquet file. - /// - /// Arguments: - /// parquet_path (str): Parquet file or directory of Parquet files path containing node information. - /// id(str): The column name for the node IDs. - /// node_type (str, optional): A value to use as the node type for all nodes. Cannot be used in combination with node_type_col. Defaults to None. - /// node_type_col (str, optional): The node type col name in dataframe. Cannot be used in combination with node_type. Defaults to None. - /// metadata (List[str], optional): List of node metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every node. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3(signature = (parquet_path, id, node_type = None, node_type_col=None, metadata = None, shared_metadata = None))] - fn load_node_props_from_parquet( - &self, - parquet_path: PathBuf, - id: &str, - node_type: Option<&str>, - node_type_col: Option<&str>, - metadata: Option>, - shared_metadata: Option>, - ) -> Result<(), GraphError> { - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_node_props_from_parquet( - &self.graph, - parquet_path.as_path(), - id, - node_type, - node_type_col, - &metadata, - shared_metadata.as_ref(), - None, - None, - ) + // wrap in Arc to avoid cloning the entire schema for Parquet, CSV, and inner loops in CSV path + let arced_schema = column_schema.map(Arc::new); + + // if-if instead of if-else to support directories with mixed parquet and CSV files + if is_parquet { + load_node_metadata_from_parquet( + &self.graph, + path.as_path(), + id, + node_type, + node_type_col, + &metadata, + shared_metadata.as_ref(), + None, + arced_schema.clone(), + )?; + } + if is_csv { + load_node_metadata_from_csv_path( + &self.graph, + &path, + id, + node_type, + node_type_col, + &metadata, + shared_metadata.as_ref(), + csv_options.as_ref(), + arced_schema, + )?; + } + if !is_parquet && !is_csv { + return Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' contains invalid path. Paths must either point to a Parquet/CSV file, or a directory containing Parquet/CSV files"))); + } + Ok(()) + } else { + Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' invalid. Valid data sources are: a single Parquet or CSV file, a directory containing Parquet or CSV files, and objects that implement an __arrow_c_stream__ method."))) + } } - /// Load edge metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method). - /// This includes, but is not limited to: Pandas dataframes, FireDucks(.pandas) dataframes, - /// Polars dataframes, Arrow tables, DuckDB (eg. DuckDBPyRelation obtained from running an SQL query) + /// Load edge metadata into the graph from any data source that supports the ArrowStreamExportable protocol (by providing an __arrow_c_stream__() method), + /// a path to a CSV or Parquet file, or a directory containing multiple CSV or Parquet files. + /// The following are known to support the ArrowStreamExportable protocol: Pandas dataframes, FireDucks(.pandas) dataframes, + /// Polars dataframes, Arrow tables, DuckDB (e.g. DuckDBPyRelation obtained from running an SQL query). /// /// Arguments: /// data (Any): The data source containing edge information. @@ -1085,6 +1006,8 @@ impl PyPersistentGraph { /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. /// layer (str, optional): The edge layer name. Defaults to None. /// layer_col (str, optional): The edge layer column name in a dataframe. Defaults to None. + /// schema (list[tuple[str, DataType | PropType | str]] | dict[str, DataType | PropType | str], optional): A list of (column_name, column_type) tuples or dict of {"column_name": column_type} to cast columns to. Defaults to None. + /// csv_options (dict[str, str | bool], optional): A dictionary of CSV reading options such as delimiter, comment, escape, quote, and terminator characters, as well as allow_truncated_rows and has_header flags. Defaults to None. /// /// Returns: /// None: This function does not return a value if the operation is successful. @@ -1092,9 +1015,9 @@ impl PyPersistentGraph { /// Raises: /// GraphError: If the operation fails. #[pyo3( - signature = (data, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None) + signature = (data, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None, schema = None, csv_options = None) )] - fn load_edge_metadata_from_df( + fn load_edge_metadata( &self, data: &Bound, src: &str, @@ -1103,101 +1026,75 @@ impl PyPersistentGraph { shared_metadata: Option>, layer: Option<&str>, layer_col: Option<&str>, + schema: Option>, + csv_options: Option, ) -> Result<(), GraphError> { let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_metadata_from_arrow_c_stream( - &self.graph, - data, - src, - dst, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - None, // TODO: Add schema - ) - } + let column_schema = convert_py_schema(schema)?; + if data.hasattr("__arrow_c_stream__")? { + load_edge_metadata_from_arrow_c_stream( + &self.graph, + data, + src, + dst, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + column_schema, + ) + } else if let Ok(path) = data.extract::() { + // extracting PathBuf handles Strings too + let is_parquet = is_parquet_path(&path)?; + let is_csv = is_csv_path(&path)?; - /// Load edge properties from a Pandas DataFrame. - /// - /// Arguments: - /// df (DataFrame): The Pandas DataFrame containing edge information. - /// src (str): The column name for the source node. - /// dst (str): The column name for the destination node. - /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): The edge layer name. Defaults to None. - /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3(signature = (df, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None))] - fn load_edge_props_from_pandas( - &self, - df: &Bound, - src: &str, - dst: &str, - metadata: Option>, - shared_metadata: Option>, - layer: Option<&str>, - layer_col: Option<&str>, - ) -> Result<(), GraphError> { - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_props_from_pandas( - &self.graph, - df, - src, - dst, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - ) - } + // fail before loading anything at all to avoid loading partial data + if !is_csv && csv_options.is_some() { + return Err(GraphError::from(PyValueError::new_err(format!( + "CSV options were passed but no CSV files were detected at {}.", + path.display() + )))); + } - /// Load edge properties from parquet file - /// - /// Arguments: - /// parquet_path (str): Parquet file or directory of Parquet files path containing edge information. - /// src (str): The column name for the source node. - /// dst (str): The column name for the destination node. - /// metadata (List[str], optional): List of edge metadata column names. Defaults to None. - /// shared_metadata (PropInput, optional): A dictionary of metadata properties that will be added to every edge. Defaults to None. - /// layer (str, optional): The edge layer name. Defaults to None. - /// layer_col (str, optional): The edge layer col name in dataframe. Defaults to None. - /// - /// Returns: - /// None: This function does not return a value, if the operation is successful. - /// - /// Raises: - /// GraphError: If the operation fails. - #[pyo3(signature = (parquet_path, src, dst, metadata = None, shared_metadata = None, layer = None, layer_col = None))] - fn load_edge_props_from_parquet( - &self, - parquet_path: PathBuf, - src: &str, - dst: &str, - metadata: Option>, - shared_metadata: Option>, - layer: Option<&str>, - layer_col: Option<&str>, - ) -> Result<(), GraphError> { - let metadata = convert_py_prop_args(metadata.as_deref()).unwrap_or_default(); - load_edge_props_from_parquet( - &self.graph, - parquet_path.as_path(), - src, - dst, - &metadata, - shared_metadata.as_ref(), - layer, - layer_col, - None, - None, - ) + // wrap in Arc to avoid cloning the entire schema for Parquet, CSV, and inner loops in CSV path + let arced_schema = column_schema.map(Arc::new); + + // if-if instead of if-else to support directories with mixed parquet and CSV files + if is_parquet { + load_edge_metadata_from_parquet( + &self.graph, + path.as_path(), + src, + dst, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + None, + arced_schema.clone(), + )?; + } + if is_csv { + load_edge_metadata_from_csv_path( + &self.graph, + &path, + src, + dst, + &metadata, + shared_metadata.as_ref(), + layer, + layer_col, + csv_options.as_ref(), + arced_schema, + )?; + } + if !is_parquet && !is_csv { + return Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' contains invalid path. Paths must either point to a Parquet/CSV file, or a directory containing Parquet/CSV files"))); + } + Ok(()) + } else { + Err(GraphError::PythonError(PyValueError::new_err("Argument 'data' invalid. Valid data sources are: a single Parquet or CSV file, a directory containing Parquet or CSV files, and objects that implement an __arrow_c_stream__ method."))) + } } /// Create graph index diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 979d456941..75da355ad4 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -9,7 +9,6 @@ use crate::{ }, }, prelude::{AdditionOps, PropertyAdditionOps}, - python::graph::io::pandas_loaders::is_jupyter, serialise::incremental::InternalCache, }; use arrow::{ @@ -22,7 +21,9 @@ use bzip2::read::BzDecoder; use flate2::read::GzDecoder; use pyo3::{ exceptions::PyValueError, + ffi::c_str, prelude::*, + pybacked::PyBackedStr, types::{PyCapsule, PyDict}, }; use pyo3_arrow::PyRecordBatchReader; @@ -33,12 +34,18 @@ use std::{ fs, fs::File, iter, + ops::Deref, path::{Path, PathBuf}, sync::Arc, }; +use tracing::error; const CHUNK_SIZE: usize = 1_000_000; // split large chunks so progress bar updates reasonably +pub(crate) fn convert_py_prop_args(properties: Option<&[PyBackedStr]>) -> Option> { + properties.map(|p| p.iter().map(|p| p.deref()).collect()) +} + pub(crate) fn convert_py_schema( schema: Option>, ) -> Result>, GraphError> { @@ -202,13 +209,14 @@ pub(crate) fn load_edge_deletions_from_arrow_c_stream< dst: &str, layer: Option<&str>, layer_col: Option<&str>, + schema: Option>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst, time]; if let Some(ref layer_col) = layer_col { cols_to_check.push(layer_col.as_ref()); } - let df_view = process_arrow_c_stream_df(data, cols_to_check.clone(), None)?; + let df_view = process_arrow_c_stream_df(data, cols_to_check.clone(), schema)?; df_view.check_cols_exist(&cols_to_check)?; load_edge_deletions_from_df( df_view, @@ -642,6 +650,7 @@ pub(crate) fn load_edge_deletions_from_csv_path< layer: Option<&str>, layer_col: Option<&str>, csv_options: Option<&CsvReadOptions>, + schema: Option>>, ) -> Result<(), GraphError> { let mut cols_to_check = vec![src, dst, time]; if let Some(ref layer_col) = layer_col { @@ -651,7 +660,7 @@ pub(crate) fn load_edge_deletions_from_csv_path< // get the CSV file paths let csv_paths = collect_csv_paths(path)?; - let df_view = process_csv_paths_df(&csv_paths, cols_to_check.clone(), csv_options, None)?; + let df_view = process_csv_paths_df(&csv_paths, cols_to_check.clone(), csv_options, schema)?; df_view.check_cols_exist(&cols_to_check)?; load_edge_deletions_from_df( df_view, @@ -826,3 +835,36 @@ fn process_csv_paths_df<'a>( // we don't know the total number of rows until we read all files Ok(DFView::new(names, chunks, None)) } + +pub(crate) fn is_jupyter(py: Python) { + let code = c_str!( + r#" +try: + shell = get_ipython().__class__.__name__ + if shell == 'ZMQInteractiveShell': + result = True # Jupyter notebook or qtconsole + elif shell == 'TerminalInteractiveShell': + result = False # Terminal running IPython + else: + result = False # Other type, assuming not a Jupyter environment +except NameError: + result = False # Probably standard Python interpreter +"# + ); + + if let Err(e) = py.run(code, None, None) { + error!("Error checking if running in a jupyter notebook: {}", e); + return; + } + + match py.eval(c_str!("result"), None, None) { + Ok(x) => { + if let Ok(x) = x.extract() { + kdam::set_notebook(x); + } + } + Err(e) => { + error!("Error checking if running in a jupyter notebook: {}", e); + } + }; +} diff --git a/raphtory/src/python/graph/io/mod.rs b/raphtory/src/python/graph/io/mod.rs index b3b8faa385..187456c137 100644 --- a/raphtory/src/python/graph/io/mod.rs +++ b/raphtory/src/python/graph/io/mod.rs @@ -1,7 +1,6 @@ use pyo3::{create_exception, exceptions::PyException}; pub mod arrow_loaders; -pub mod pandas_loaders; create_exception!(exceptions, ArrowErrorException, PyException); create_exception!(exceptions, GraphLoadException, PyException); diff --git a/raphtory/src/python/graph/io/pandas_loaders.rs b/raphtory/src/python/graph/io/pandas_loaders.rs deleted file mode 100644 index 1bac0e00db..0000000000 --- a/raphtory/src/python/graph/io/pandas_loaders.rs +++ /dev/null @@ -1,287 +0,0 @@ -use crate::{ - db::api::view::StaticGraphViewOps, - errors::GraphError, - io::arrow::{dataframe::*, df_loaders::*}, - prelude::{AdditionOps, PropertyAdditionOps}, - serialise::incremental::InternalCache, -}; -use arrow::array::ArrayRef; -use pyo3::{ - ffi::c_str, - prelude::*, - pybacked::PyBackedStr, - types::{IntoPyDict, PyDict}, -}; -use pyo3_arrow::PyArray; -use raphtory_api::core::entities::properties::prop::Prop; -use std::{collections::HashMap, ops::Deref}; -use tracing::error; - -pub(crate) fn convert_py_prop_args(properties: Option<&[PyBackedStr]>) -> Option> { - properties.map(|p| p.iter().map(|p| p.deref()).collect()) -} - -pub(crate) fn load_nodes_from_pandas< - 'py, - G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, ->( - graph: &G, - df: &Bound<'py, PyAny>, - time: &str, - id: &str, - node_type: Option<&str>, - node_type_col: Option<&str>, - properties: &[&str], - metadata: &[&str], - shared_metadata: Option<&HashMap>, -) -> Result<(), GraphError> { - let mut cols_to_check = vec![id, time]; - cols_to_check.extend_from_slice(properties); - cols_to_check.extend_from_slice(metadata); - if let Some(ref node_type_col) = node_type_col { - cols_to_check.push(node_type_col.as_ref()); - } - - let df_view = process_pandas_py_df(df, cols_to_check.clone())?; - df_view.check_cols_exist(&cols_to_check)?; - load_nodes_from_df( - df_view, - time, - id, - properties, - metadata, - shared_metadata, - node_type, - node_type_col, - graph, - ) -} - -pub(crate) fn load_edges_from_pandas< - 'py, - G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, ->( - graph: &G, - df: &Bound<'py, PyAny>, - time: &str, - src: &str, - dst: &str, - properties: &[&str], - metadata: &[&str], - shared_metadata: Option<&HashMap>, - layer: Option<&str>, - layer_col: Option<&str>, -) -> Result<(), GraphError> { - let mut cols_to_check = vec![src, dst, time]; - cols_to_check.extend_from_slice(properties); - cols_to_check.extend_from_slice(metadata); - if let Some(layer_col) = layer_col { - cols_to_check.push(layer_col.as_ref()); - } - - let df_view = process_pandas_py_df(df, cols_to_check.clone())?; - df_view.check_cols_exist(&cols_to_check)?; - load_edges_from_df( - df_view, - time, - src, - dst, - properties, - metadata, - shared_metadata, - layer, - layer_col, - graph, - ) -} - -pub(crate) fn load_node_props_from_pandas< - 'py, - G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, ->( - graph: &G, - df: &Bound<'py, PyAny>, - id: &str, - node_type: Option<&str>, - node_type_col: Option<&str>, - metadata: &[&str], - shared_metadata: Option<&HashMap>, -) -> Result<(), GraphError> { - let mut cols_to_check = vec![id]; - cols_to_check.extend_from_slice(metadata); - if let Some(ref node_type_col) = node_type_col { - cols_to_check.push(node_type_col.as_ref()); - } - let df_view = process_pandas_py_df(df, cols_to_check.clone())?; - df_view.check_cols_exist(&cols_to_check)?; - load_node_props_from_df( - df_view, - id, - node_type, - node_type_col, - metadata, - shared_metadata, - graph, - ) -} - -pub(crate) fn load_edge_props_from_pandas< - 'py, - G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps + InternalCache, ->( - graph: &G, - df: &Bound<'py, PyAny>, - src: &str, - dst: &str, - metadata: &[&str], - shared_metadata: Option<&HashMap>, - layer: Option<&str>, - layer_col: Option<&str>, -) -> Result<(), GraphError> { - let mut cols_to_check = vec![src, dst]; - if let Some(ref layer_col) = layer_col { - cols_to_check.push(layer_col.as_ref()); - } - cols_to_check.extend_from_slice(metadata); - let df_view = process_pandas_py_df(df, cols_to_check.clone())?; - df_view.check_cols_exist(&cols_to_check)?; - load_edges_props_from_df( - df_view, - src, - dst, - metadata, - shared_metadata, - layer, - layer_col, - graph, - ) -} - -pub fn load_edge_deletions_from_pandas< - 'py, - G: StaticGraphViewOps + PropertyAdditionOps + AdditionOps, ->( - graph: &G, - df: &Bound<'py, PyAny>, - time: &str, - src: &str, - dst: &str, - layer: Option<&str>, - layer_col: Option<&str>, -) -> Result<(), GraphError> { - let mut cols_to_check = vec![src, dst, time]; - if let Some(ref layer_col) = layer_col { - cols_to_check.push(layer_col.as_ref()); - } - - let df_view = process_pandas_py_df(df, cols_to_check.clone())?; - df_view.check_cols_exist(&cols_to_check)?; - load_edge_deletions_from_df( - df_view, - time, - src, - dst, - layer, - layer_col, - graph.core_graph(), - ) -} - -pub(crate) fn process_pandas_py_df<'a>( - df: &Bound<'a, PyAny>, - col_names: Vec<&str>, -) -> PyResult> + 'a>> { - let py = df.py(); - is_jupyter(py); - py.import("pandas")?; - let module = py.import("pyarrow")?; - let pa_table = module.getattr("Table")?; - - let df_columns: Vec = df.getattr("columns")?.extract()?; - - let cols_to_drop: Vec = df_columns - .into_iter() - .filter(|x| !col_names.contains(&x.as_str())) - .collect(); - - let dropped_df = if !cols_to_drop.is_empty() { - let drop_method = df.getattr("drop")?; - &drop_method.call((cols_to_drop,), Some(&vec![("axis", 1)].into_py_dict(py)?))? - } else { - df - }; - - let table = pa_table.call_method("from_pandas", (dropped_df.clone(),), None)?; - let kwargs = PyDict::new(py); - kwargs.set_item("max_chunksize", 1000000)?; - let rb = table - .call_method("to_batches", (), Some(&kwargs))? - .extract::>>()?; - let names: Vec = if let Some(batch0) = rb.first() { - let schema = batch0.getattr("schema")?; - schema.getattr("names")?.extract::>()? - } else { - vec![] - } - .into_iter() - .filter(|x| col_names.contains(&x.as_str())) - .collect(); - - let names_len = names.len(); - let chunks = rb.into_iter().map(move |rb| { - let chunk = (0..names_len) - .map(|i| { - let array = rb.call_method1("column", (i,)).map_err(GraphError::from)?; - let arr = array_to_rust(&array).map_err(GraphError::from)?; - Ok::<_, GraphError>(arr) - }) - .collect::, GraphError>>()?; - - Ok(DFChunk { chunk }) - }); - let num_rows: usize = dropped_df.call_method0("__len__")?.extract()?; - - Ok(DFView { - names, - chunks, - num_rows: Some(num_rows), - }) -} - -pub fn array_to_rust(obj: &Bound) -> PyResult { - let (array, _) = PyArray::extract_bound(obj)?.into_inner(); - Ok(array) -} - -pub(crate) fn is_jupyter(py: Python) { - let code = c_str!( - r#" -try: - shell = get_ipython().__class__.__name__ - if shell == 'ZMQInteractiveShell': - result = True # Jupyter notebook or qtconsole - elif shell == 'TerminalInteractiveShell': - result = False # Terminal running IPython - else: - result = False # Other type, assuming not a Jupyter environment -except NameError: - result = False # Probably standard Python interpreter -"# - ); - - if let Err(e) = py.run(code, None, None) { - error!("Error checking if running in a jupyter notebook: {}", e); - return; - } - - match py.eval(c_str!("result"), None, None) { - Ok(x) => { - if let Ok(x) = x.extract() { - kdam::set_notebook(x); - } - } - Err(e) => { - error!("Error checking if running in a jupyter notebook: {}", e); - } - }; -} diff --git a/raphtory/src/serialise/parquet/mod.rs b/raphtory/src/serialise/parquet/mod.rs index 17fc8514c7..b0a1a426d1 100644 --- a/raphtory/src/serialise/parquet/mod.rs +++ b/raphtory/src/serialise/parquet/mod.rs @@ -5,8 +5,8 @@ use crate::{ }, errors::GraphError, io::parquet_loaders::{ - load_edge_deletions_from_parquet, load_edge_props_from_parquet, load_edges_from_parquet, - load_graph_props_from_parquet, load_node_props_from_parquet, load_nodes_from_parquet, + load_edge_deletions_from_parquet, load_edge_metadata_from_parquet, load_edges_from_parquet, + load_graph_props_from_parquet, load_node_metadata_from_parquet, load_nodes_from_parquet, }, prelude::*, serialise::parquet::{ @@ -330,7 +330,7 @@ fn decode_graph_storage( .map(|s| s.as_str()) .collect::>(); - load_node_props_from_parquet( + load_node_metadata_from_parquet( &g, &c_node_path, NODE_ID, @@ -391,7 +391,7 @@ fn decode_graph_storage( .map(|s| s.as_str()) .collect::>(); - load_edge_props_from_parquet( + load_edge_metadata_from_parquet( &g, &c_edge_path, SRC_COL, From c4d7136987df04d3893d67f4acdab033b2f5789c Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 19 Dec 2025 23:21:01 -0500 Subject: [PATCH 54/55] Fixed some python tests --- .../test_graphdb/test_graphdb.py | 2 +- python/tests/test_ingestion_equivalence_df.py | 116 +++++------------- python/tests/test_load_from_df.py | 14 +-- 3 files changed, 38 insertions(+), 94 deletions(-) diff --git a/python/tests/test_base_install/test_graphdb/test_graphdb.py b/python/tests/test_base_install/test_graphdb/test_graphdb.py index 51db3f2c56..6d4ab07448 100644 --- a/python/tests/test_base_install/test_graphdb/test_graphdb.py +++ b/python/tests/test_base_install/test_graphdb/test_graphdb.py @@ -2941,7 +2941,7 @@ def test_NaN_NaT_as_properties(): df = pd.DataFrame(data) g = Graph() - g.load_nodes_from_pandas(time="time", id="id", df=df, properties=["floats"]) + g.load_nodes(time="time", id="id", data=df, properties=["floats"]) @with_disk_graph def check(g): diff --git a/python/tests/test_ingestion_equivalence_df.py b/python/tests/test_ingestion_equivalence_df.py index 5c04b6d728..823a5f3c5c 100644 --- a/python/tests/test_ingestion_equivalence_df.py +++ b/python/tests/test_ingestion_equivalence_df.py @@ -51,18 +51,7 @@ def dataframes(): def test_edge_ingestion_equivalence(dataframes, graph_type): # reference graph g_pd = graph_type() - g_pd.load_edges_from_pandas( - df=dataframes["pandas"]["edges"], - time="timestamp", - src="source", - dst="destination", - properties=["data_size_MB", "transaction_type"], - metadata=["is_encrypted"], - ) - - # Pandas streaming - g_pd_stream = graph_type() - g_pd_stream.load_edges_from_df( + g_pd.load_edges( data=dataframes["pandas"]["edges"], time="timestamp", src="source", @@ -70,13 +59,10 @@ def test_edge_ingestion_equivalence(dataframes, graph_type): properties=["data_size_MB", "transaction_type"], metadata=["is_encrypted"], ) - assert ( - g_pd == g_pd_stream - ), "Pandas streaming edge ingestion failed equivalence check" # Polars g_pl = graph_type() - g_pl.load_edges_from_df( + g_pl.load_edges( data=dataframes["polars"]["edges"], time="timestamp", src="source", @@ -88,7 +74,7 @@ def test_edge_ingestion_equivalence(dataframes, graph_type): # Arrow g_arrow = graph_type() - g_arrow.load_edges_from_df( + g_arrow.load_edges( data=dataframes["arrow"]["edges"], time="timestamp", src="source", @@ -100,7 +86,7 @@ def test_edge_ingestion_equivalence(dataframes, graph_type): # DuckDB g_duckdb = graph_type() - g_duckdb.load_edges_from_df( + g_duckdb.load_edges( data=dataframes["duckdb"]["edges"], time="timestamp", src="source", @@ -113,7 +99,7 @@ def test_edge_ingestion_equivalence(dataframes, graph_type): if fpd: # FireDucks g_fd = graph_type() - g_fd.load_edges_from_df( + g_fd.load_edges( data=dataframes["fireducks"]["edges"], time="timestamp", src="source", @@ -128,30 +114,17 @@ def test_edge_ingestion_equivalence(dataframes, graph_type): def test_node_ingestion_equivalence(dataframes, graph_type): # reference graph g_pd = graph_type() - g_pd.load_nodes_from_pandas( - df=dataframes["pandas"]["nodes"], - time="timestamp", - id="server_id", - properties=["OS_version", "uptime_days"], - metadata=["primary_function", "server_name", "hardware_type"], - ) - - # Pandas streaming - g_pd_stream = graph_type() - g_pd_stream.load_nodes_from_df( + g_pd.load_nodes( data=dataframes["pandas"]["nodes"], time="timestamp", id="server_id", properties=["OS_version", "uptime_days"], metadata=["primary_function", "server_name", "hardware_type"], ) - assert ( - g_pd == g_pd_stream - ), "Pandas streaming node ingestion failed equivalence check" # Polars g_pl = graph_type() - g_pl.load_nodes_from_df( + g_pl.load_nodes( data=dataframes["polars"]["nodes"], time="timestamp", id="server_id", @@ -162,7 +135,7 @@ def test_node_ingestion_equivalence(dataframes, graph_type): # Arrow g_arrow = graph_type() - g_arrow.load_nodes_from_df( + g_arrow.load_nodes( data=dataframes["arrow"]["nodes"], time="timestamp", id="server_id", @@ -173,7 +146,7 @@ def test_node_ingestion_equivalence(dataframes, graph_type): # DuckDB g_duckdb = graph_type() - g_duckdb.load_nodes_from_df( + g_duckdb.load_nodes( data=dataframes["duckdb"]["nodes"], time="timestamp", id="server_id", @@ -186,7 +159,7 @@ def test_node_ingestion_equivalence(dataframes, graph_type): # FireDucks print("Testing fireducks...") g_fd = graph_type() - g_fd.load_nodes_from_df( + g_fd.load_nodes( data=dataframes["fireducks"]["nodes"], time="timestamp", id="server_id", @@ -200,79 +173,50 @@ def test_node_ingestion_equivalence(dataframes, graph_type): def test_metadata_update_equivalence(dataframes, graph_type): # reference graph g_pd = graph_type() - g_pd.load_edges_from_pandas( - df=dataframes["pandas"]["edges"], - time="timestamp", - src="source", - dst="destination", - ) - g_pd.load_nodes_from_pandas( - df=dataframes["pandas"]["nodes"], - time="timestamp", - id="server_id", - ) - # update metadata - g_pd.load_node_props_from_pandas( - df=dataframes["pandas"]["nodes"], - id="server_id", - metadata=["primary_function", "server_name", "hardware_type"], - ) - g_pd.load_edge_props_from_pandas( - df=dataframes["pandas"]["edges"], - src="source", - dst="destination", - metadata=["is_encrypted"], - ) - - # Pandas streaming - g_pd_stream = graph_type() - g_pd_stream.load_edges_from_df( + g_pd.load_edges( data=dataframes["pandas"]["edges"], time="timestamp", src="source", dst="destination", ) - g_pd_stream.load_nodes_from_df( + g_pd.load_nodes( data=dataframes["pandas"]["nodes"], time="timestamp", id="server_id", ) # update metadata - g_pd_stream.load_node_metadata_from_df( + g_pd.load_node_metadata( data=dataframes["pandas"]["nodes"], id="server_id", metadata=["primary_function", "server_name", "hardware_type"], ) - g_pd_stream.load_edge_metadata_from_df( + g_pd.load_edge_metadata( data=dataframes["pandas"]["edges"], src="source", dst="destination", metadata=["is_encrypted"], ) - assert ( - g_pd == g_pd_stream - ), "Pandas streaming metadata ingestion failed equivalence check" # Polars g_pl = graph_type() - g_pl.load_edges_from_df( + g_pl.load_edges( data=dataframes["polars"]["edges"], time="timestamp", src="source", dst="destination", ) - g_pl.load_nodes_from_df( + g_pl.load_nodes( data=dataframes["polars"]["nodes"], time="timestamp", id="server_id", ) # update metadata - g_pl.load_node_metadata_from_df( + g_pl.load_node_metadata( data=dataframes["polars"]["nodes"], id="server_id", metadata=["primary_function", "server_name", "hardware_type"], ) - g_pl.load_edge_metadata_from_df( + g_pl.load_edge_metadata( data=dataframes["polars"]["edges"], src="source", dst="destination", @@ -282,24 +226,24 @@ def test_metadata_update_equivalence(dataframes, graph_type): # Arrow g_arrow = graph_type() - g_arrow.load_edges_from_df( + g_arrow.load_edges( data=dataframes["arrow"]["edges"], time="timestamp", src="source", dst="destination", ) - g_arrow.load_nodes_from_df( + g_arrow.load_nodes( data=dataframes["arrow"]["nodes"], time="timestamp", id="server_id", ) # update metadata - g_arrow.load_node_metadata_from_df( + g_arrow.load_node_metadata( data=dataframes["arrow"]["nodes"], id="server_id", metadata=["primary_function", "server_name", "hardware_type"], ) - g_arrow.load_edge_metadata_from_df( + g_arrow.load_edge_metadata( data=dataframes["arrow"]["edges"], src="source", dst="destination", @@ -309,24 +253,24 @@ def test_metadata_update_equivalence(dataframes, graph_type): # DuckDB g_duckdb = graph_type() - g_duckdb.load_edges_from_df( + g_duckdb.load_edges( data=dataframes["duckdb"]["edges"], time="timestamp", src="source", dst="destination", ) - g_duckdb.load_nodes_from_df( + g_duckdb.load_nodes( data=dataframes["duckdb"]["nodes"], time="timestamp", id="server_id", ) # update metadata - g_duckdb.load_node_metadata_from_df( + g_duckdb.load_node_metadata( data=dataframes["duckdb"]["nodes"], id="server_id", metadata=["primary_function", "server_name", "hardware_type"], ) - g_duckdb.load_edge_metadata_from_df( + g_duckdb.load_edge_metadata( data=dataframes["duckdb"]["edges"], src="source", dst="destination", @@ -337,24 +281,24 @@ def test_metadata_update_equivalence(dataframes, graph_type): if fpd: # FireDucks g_fd = graph_type() - g_fd.load_edges_from_df( + g_fd.load_edges( data=dataframes["fireducks"]["edges"], time="timestamp", src="source", dst="destination", ) - g_fd.load_nodes_from_df( + g_fd.load_nodes( data=dataframes["fireducks"]["nodes"], time="timestamp", id="server_id", ) # update metadata - g_fd.load_node_metadata_from_df( + g_fd.load_node_metadata( data=dataframes["fireducks"]["nodes"], id="server_id", metadata=["primary_function", "server_name", "hardware_type"], ) - g_fd.load_edge_metadata_from_df( + g_fd.load_edge_metadata( data=dataframes["fireducks"]["edges"], src="source", dst="destination", diff --git a/python/tests/test_load_from_df.py b/python/tests/test_load_from_df.py index 9227358d74..16ec8ffd2a 100644 --- a/python/tests/test_load_from_df.py +++ b/python/tests/test_load_from_df.py @@ -28,12 +28,12 @@ def test_load_edges_from_polars_df(graph_type): ) g_to_pandas = graph_type() - g_to_pandas.load_edges_from_pandas( - df=df.to_pandas(), time="time", src="src", dst="dst", properties=["value"] + g_to_pandas.load_edges( + data=df.to_pandas(), time="time", src="src", dst="dst", properties=["value"] ) g_from_df = graph_type() - g_from_df.load_edges_from_df( + g_from_df.load_edges( data=df, time="time", src="src", dst="dst", properties=["value"] ) @@ -736,7 +736,7 @@ def test_load_edges_from_fireducks_df(graph_type): ) g = graph_type() - g.load_edges_from_df( + g.load_edges( data=df, time="time", src="src", dst="dst", properties=["value"] ) assert [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] == _collect_edges(g) @@ -761,13 +761,13 @@ def test_fireducks_matches_pandas_for_same_edges(graph_type): ) g_fireducks = graph_type() - g_fireducks.load_edges_from_df( + g_fireducks.load_edges( data=df_fireducks, time="time", src="src", dst="dst", properties=["value"] ) g_pandas = graph_type() - g_pandas.load_edges_from_pandas( - df=df_pandas, time="time", src="src", dst="dst", properties=["value"] + g_pandas.load_edges( + data=df_pandas, time="time", src="src", dst="dst", properties=["value"] ) expected = [(1, 1, 2, 10.0), (2, 2, 3, 20.0), (3, 3, 4, 30.0)] From 259dcfb4c191117f8779578dcbfa2103923bdcc1 Mon Sep 17 00:00:00 2001 From: arienandalibi Date: Fri, 19 Dec 2025 23:54:00 -0500 Subject: [PATCH 55/55] Fixed cast_columns function to not be imported from a python feature folder which is not available in the crate root. Fixed parquet_loaders.rs. --- raphtory/src/io/parquet_loaders.rs | 56 ++++++++++++++-- raphtory/src/python/graph/io/arrow_loaders.rs | 64 ++++--------------- 2 files changed, 63 insertions(+), 57 deletions(-) diff --git a/raphtory/src/io/parquet_loaders.rs b/raphtory/src/io/parquet_loaders.rs index a7e9380767..dacd949958 100644 --- a/raphtory/src/io/parquet_loaders.rs +++ b/raphtory/src/io/parquet_loaders.rs @@ -3,11 +3,17 @@ use crate::{ errors::{GraphError, InvalidPathReason::PathDoesNotExist}, io::arrow::{dataframe::*, df_loaders::*}, prelude::{AdditionOps, DeletionOps, PropertyAdditionOps}, - python::graph::io::arrow_loaders::cast_columns, serialise::incremental::InternalCache, }; +use arrow::{ + array::{Array, RecordBatch, StructArray}, + compute::cast, + datatypes::{DataType, Field, Fields}, +}; use parquet::arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, ProjectionMask}; -use raphtory_api::core::entities::properties::prop::{Prop, PropType}; +#[cfg(feature = "storage")] +use pometry_storage::RAError; +use raphtory_api::core::entities::properties::prop::{arrow_dtype_from_prop_type, Prop, PropType}; use std::{ collections::HashMap, ffi::OsStr, @@ -16,8 +22,6 @@ use std::{ path::{Path, PathBuf}, sync::Arc, }; -#[cfg(feature = "storage")] -use {arrow::array::StructArray, pometry_storage::RAError}; pub(crate) fn is_parquet_path(path: &PathBuf) -> Result { if path.is_dir() { @@ -394,6 +398,50 @@ pub fn get_parquet_file_paths(parquet_path: &Path) -> Result, Graph Ok(parquet_files) } +pub(crate) fn cast_columns( + batch: RecordBatch, + schema: &HashMap, +) -> Result { + let old_schema_ref = batch.schema(); + let old_fields = old_schema_ref.fields(); + + let mut target_fields: Vec = Vec::with_capacity(old_fields.len()); + + for field in old_fields.iter() { + if let Some(target_prop_type) = schema.get(field.name()) { + let target_dtype = arrow_dtype_from_prop_type(target_prop_type); + target_fields.push( + Field::new(field.name(), target_dtype, field.is_nullable()) + .with_metadata(field.metadata().clone()), + ); + } else { + // schema doesn't say anything about this column + target_fields.push(field.as_ref().clone()); + } + } + let struct_array = StructArray::from(batch); + let target_struct_type = DataType::Struct(Fields::from(target_fields)); + + // cast whole RecordBatch at once + let casted = cast(&struct_array, &target_struct_type).map_err(|e| { + GraphError::LoadFailure(format!( + "Failed to cast RecordBatch to target schema {:?}: {e}", + target_struct_type + )) + })?; + + let casted_struct = casted + .as_any() + .downcast_ref::() + .ok_or_else(|| { + GraphError::LoadFailure( + "Internal error: casting RecordBatch did not return StructArray".to_string(), + ) + })?; + + Ok(RecordBatch::from(casted_struct)) +} + #[cfg(feature = "storage")] pub fn read_struct_arrays( path: &Path, diff --git a/raphtory/src/python/graph/io/arrow_loaders.rs b/raphtory/src/python/graph/io/arrow_loaders.rs index 75da355ad4..37ab3c0a8f 100644 --- a/raphtory/src/python/graph/io/arrow_loaders.rs +++ b/raphtory/src/python/graph/io/arrow_loaders.rs @@ -1,20 +1,22 @@ use crate::{ db::api::view::StaticGraphViewOps, errors::GraphError, - io::arrow::{ - dataframe::{DFChunk, DFView}, - df_loaders::{ - load_edge_deletions_from_df, load_edges_from_df, load_edges_props_from_df, - load_node_props_from_df, load_nodes_from_df, + io::{ + arrow::{ + dataframe::{DFChunk, DFView}, + df_loaders::{ + load_edge_deletions_from_df, load_edges_from_df, load_edges_props_from_df, + load_node_props_from_df, load_nodes_from_df, + }, }, + parquet_loaders::cast_columns, }, prelude::{AdditionOps, PropertyAdditionOps}, serialise::incremental::InternalCache, }; use arrow::{ - array::{Array, RecordBatch, RecordBatchReader, StructArray}, - compute::cast, - datatypes::{DataType, Field, Fields, SchemaRef}, + array::{RecordBatch, RecordBatchReader}, + datatypes::SchemaRef, }; use arrow_csv::{reader::Format, ReaderBuilder}; use bzip2::read::BzDecoder; @@ -27,7 +29,7 @@ use pyo3::{ types::{PyCapsule, PyDict}, }; use pyo3_arrow::PyRecordBatchReader; -use raphtory_api::core::entities::properties::prop::{arrow_dtype_from_prop_type, Prop, PropType}; +use raphtory_api::core::entities::properties::prop::{Prop, PropType}; use std::{ cmp::min, collections::HashMap, @@ -311,50 +313,6 @@ pub(crate) fn process_arrow_c_stream_df<'a>( Ok(DFView::new(names, chunks, len_from_python)) } -pub(crate) fn cast_columns( - batch: RecordBatch, - schema: &HashMap, -) -> Result { - let old_schema_ref = batch.schema(); - let old_fields = old_schema_ref.fields(); - - let mut target_fields: Vec = Vec::with_capacity(old_fields.len()); - - for field in old_fields.iter() { - if let Some(target_prop_type) = schema.get(field.name()) { - let target_dtype = arrow_dtype_from_prop_type(target_prop_type); - target_fields.push( - Field::new(field.name(), target_dtype, field.is_nullable()) - .with_metadata(field.metadata().clone()), - ); - } else { - // schema doesn't say anything about this column - target_fields.push(field.as_ref().clone()); - } - } - let struct_array = StructArray::from(batch); - let target_struct_type = DataType::Struct(Fields::from(target_fields)); - - // cast whole RecordBatch at once - let casted = cast(&struct_array, &target_struct_type).map_err(|e| { - GraphError::LoadFailure(format!( - "Failed to cast RecordBatch to target schema {:?}: {e}", - target_struct_type - )) - })?; - - let casted_struct = casted - .as_any() - .downcast_ref::() - .ok_or_else(|| { - GraphError::LoadFailure( - "Internal error: casting RecordBatch did not return StructArray".to_string(), - ) - })?; - - Ok(RecordBatch::from(casted_struct)) -} - /// Splits a RecordBatch into chunks of CHUNK_SIZE owned by DFChunk objects fn split_into_chunks(batch: &RecordBatch, indices: &[usize]) -> Vec> { // many times, all the data will be passed as a single RecordBatch, meaning the progress bar