From 1d02d7a7a3d69ca1438ae18e0237cb6554a953a4 Mon Sep 17 00:00:00 2001 From: Fabian Murariu Date: Thu, 23 Oct 2025 10:58:40 +0100 Subject: [PATCH 01/47] changes to test_utils for vacuum support --- db4-storage/src/pages/test_utils/checkers.rs | 52 +++++++++++++------- db4-storage/src/pages/test_utils/fixtures.rs | 7 +-- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/db4-storage/src/pages/test_utils/checkers.rs b/db4-storage/src/pages/test_utils/checkers.rs index 633b11d6a5..cf40ed3368 100644 --- a/db4-storage/src/pages/test_utils/checkers.rs +++ b/db4-storage/src/pages/test_utils/checkers.rs @@ -23,25 +23,18 @@ use crate::{ use super::fixtures::{AddEdge, Fixture, NodeFixture}; -pub fn check_edges_support< +pub fn make_graph_from_edges< NS: NodeSegmentOps, ES: EdgeSegmentOps, EXT: PersistentStrategy, >( - edges: Vec<(impl Into, impl Into, Option)>, // src, dst, optional layer_id + edges: &[(VID, VID, Option)], // src, dst, optional layer_id + graph_dir: &Path, par_load: bool, - check_load: bool, make_graph: impl FnOnce(&Path) -> GraphStore, -) { - let mut edges = edges - .into_iter() - .map(|(src, dst, layer_id)| (src.into(), dst.into(), layer_id)) - .collect::>(); - - let graph_dir = tempfile::tempdir().unwrap(); - let graph = make_graph(graph_dir.path()); - let mut nodes = HashSet::new(); - for (_, _, layer) in &edges { +) -> GraphStore { + let graph = make_graph(graph_dir); + for (_, _, layer) in edges { if let Some(layer) = layer { for layer in 0..=*layer { let name = layer.to_string(); @@ -54,12 +47,6 @@ pub fn check_edges_support< } } } - - for (src, dst, _) in &edges { - nodes.insert(*src); - nodes.insert(*dst); - } - if par_load { edges .par_iter() @@ -94,6 +81,33 @@ pub fn check_edges_support< }) .expect("Failed to add edge"); } + graph +} + +pub fn check_edges_support< + NS: NodeSegmentOps, + ES: EdgeSegmentOps, + EXT: PersistentStrategy, +>( + edges: Vec<(impl Into, impl Into, Option)>, // src, dst, optional layer_id + par_load: bool, + check_load: bool, + make_graph: impl FnOnce(&Path) -> GraphStore, +) { + let mut edges = edges + .into_iter() + .map(|(src, dst, layer_id)| (src.into(), dst.into(), layer_id)) + .collect::>(); + + let graph_dir = tempfile::tempdir().unwrap(); + let graph = make_graph_from_edges(&edges, graph_dir.path(), par_load, make_graph); + + let mut nodes = HashSet::new(); + + for (src, dst, _) in &edges { + nodes.insert(*src); + nodes.insert(*dst); + } let actual_num_nodes = graph.nodes().num_nodes() as usize; assert_eq!(actual_num_nodes, nodes.len()); diff --git a/db4-storage/src/pages/test_utils/fixtures.rs b/db4-storage/src/pages/test_utils/fixtures.rs index bcf27636e3..60bf71933a 100644 --- a/db4-storage/src/pages/test_utils/fixtures.rs +++ b/db4-storage/src/pages/test_utils/fixtures.rs @@ -113,13 +113,10 @@ pub fn edges_strat_with_layers( let num_edges = 0..(num_nodes * num_nodes); let srcs = (0usize..num_nodes).prop_map(VID); let dsts = (0usize..num_nodes).prop_map(VID); - let layer_ids = (1usize..MAX_LAYERS).prop_map(|i| Some(i as usize)); + let layer_ids = (1usize..MAX_LAYERS).prop_map(Some); num_edges.prop_flat_map(move |num_edges| { - collection::vec( - (srcs.clone(), dsts.clone(), layer_ids.clone()), - num_edges as usize, - ) + collection::vec((srcs.clone(), dsts.clone(), layer_ids.clone()), num_edges) }) }) } From 432acb53078e479d5ed2e807fe22c7b4c11568e8 Mon Sep 17 00:00:00 2001 From: Fabian Murariu Date: Sat, 25 Oct 2025 18:53:36 +0100 Subject: [PATCH 02/47] some apis for vacuum --- db4-storage/src/api/edges.rs | 5 +++++ db4-storage/src/api/nodes.rs | 5 +++++ db4-storage/src/pages/locked/edges.rs | 8 ++++++++ db4-storage/src/pages/locked/nodes.rs | 12 ++++++++++++ db4-storage/src/pages/mod.rs | 8 ++++++++ db4-storage/src/segments/edge.rs | 7 +++++++ db4-storage/src/segments/node.rs | 7 +++++++ raphtory/src/serialise/serialise.rs | 6 ++++-- 8 files changed, 56 insertions(+), 2 deletions(-) diff --git a/db4-storage/src/api/edges.rs b/db4-storage/src/api/edges.rs index 2eeee3938b..815225b04a 100644 --- a/db4-storage/src/api/edges.rs +++ b/db4-storage/src/api/edges.rs @@ -84,6 +84,11 @@ pub trait EdgeSegmentOps: Send + Sync + std::fmt::Debug + 'static { ) -> Option>; fn locked(self: &Arc) -> Self::ArcLockedSegment; + + fn vacuum( + &self, + locked_head: impl DerefMut, + ) -> Result<(), StorageError>; } pub trait LockedESegment: Send + Sync + std::fmt::Debug { diff --git a/db4-storage/src/api/nodes.rs b/db4-storage/src/api/nodes.rs index 4fc7751fac..bababd3b4c 100644 --- a/db4-storage/src/api/nodes.rs +++ b/db4-storage/src/api/nodes.rs @@ -115,6 +115,11 @@ pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { fn est_size(&self) -> usize; fn increment_est_size(&self, size: usize) -> usize; + + fn vacuum( + &self, + locked_head: impl DerefMut, + ) -> Result<(), StorageError>; } pub trait LockedNSSegment: std::fmt::Debug + Send + Sync { diff --git a/db4-storage/src/pages/locked/edges.rs b/db4-storage/src/pages/locked/edges.rs index 28d3ef0d5c..080351f36c 100644 --- a/db4-storage/src/pages/locked/edges.rs +++ b/db4-storage/src/pages/locked/edges.rs @@ -3,6 +3,7 @@ use std::ops::{Deref, DerefMut}; use crate::{ LocalPOS, api::edges::EdgeSegmentOps, + error::StorageError, pages::{edge_page::writer::EdgeWriter, layer_counter::GraphStats, resolve_pos}, segments::edge::MemEdgeSegment, }; @@ -111,4 +112,11 @@ impl<'a, EXT, ES: EdgeSegmentOps> WriteLockedEdgePages<'a, ES> }) .is_some() } + + pub fn vacuum(&mut self) -> Result<(), StorageError> { + for LockedEdgePage { page, lock, .. } in &mut self.writers { + page.vacuum(lock.deref_mut())?; + } + Ok(()) + } } diff --git a/db4-storage/src/pages/locked/nodes.rs b/db4-storage/src/pages/locked/nodes.rs index 7213daa2e0..1c25a9fe91 100644 --- a/db4-storage/src/pages/locked/nodes.rs +++ b/db4-storage/src/pages/locked/nodes.rs @@ -1,6 +1,7 @@ use crate::{ LocalPOS, api::nodes::NodeSegmentOps, + error::StorageError, pages::{layer_counter::GraphStats, node_page::writer::NodeWriter, resolve_pos}, segments::node::MemNodeSegment, }; @@ -43,6 +44,10 @@ impl<'a, EXT, NS: NodeSegmentOps> LockedNodePage<'a, NS> { NodeWriter::new(self.page, self.layer_counter, self.lock.deref_mut()) } + pub fn vacuum(&mut self) { + self.page.vacuum(self.lock.deref_mut()); + } + #[inline(always)] pub fn page_id(&self) -> usize { self.page_id @@ -101,4 +106,11 @@ impl<'a, EXT, NS: NodeSegmentOps> WriteLockedNodePages<'a, NS> pub fn len(&self) -> usize { self.writers.len() } + + pub fn vacuum(&mut self) -> Result<(), StorageError> { + for LockedNodePage { page, lock, .. } in &mut self.writers { + page.vacuum(lock.deref_mut())?; + } + Ok(()) + } } diff --git a/db4-storage/src/pages/mod.rs b/db4-storage/src/pages/mod.rs index 1816b76b57..58d7623f00 100644 --- a/db4-storage/src/pages/mod.rs +++ b/db4-storage/src/pages/mod.rs @@ -389,6 +389,14 @@ impl< pub fn get_free_writer(&self) -> EdgeWriter<'_, RwLockWriteGuard<'_, MemEdgeSegment>, ES> { self.edges().get_free_writer() } + + pub fn vacuum(self: &Arc) -> Result<(), StorageError> { + let mut locked_nodes = self.nodes.write_locked(); + let mut locked_edges = self.edges.write_locked(); + locked_nodes.vacuum()?; + locked_edges.vacuum()?; + Ok(()) + } } impl Drop for GraphStore { diff --git a/db4-storage/src/segments/edge.rs b/db4-storage/src/segments/edge.rs index 7041de6d8f..51b58060f2 100644 --- a/db4-storage/src/segments/edge.rs +++ b/db4-storage/src/segments/edge.rs @@ -559,6 +559,13 @@ impl>> EdgeSegmentOps for EdgeSegm } } + fn vacuum( + &self, + _locked_head: impl DerefMut, + ) -> Result<(), StorageError> { + Ok(()) + } + fn num_layers(&self) -> usize { self.head().layers.len() } diff --git a/db4-storage/src/segments/node.rs b/db4-storage/src/segments/node.rs index 7c66560de4..bfd977b362 100644 --- a/db4-storage/src/segments/node.rs +++ b/db4-storage/src/segments/node.rs @@ -535,6 +535,13 @@ impl>> NodeSegmentOps for NodeSegm fn increment_est_size(&self, size: usize) -> usize { self.est_size.fetch_add(size, Ordering::Relaxed) } + + fn vacuum( + &self, + _locked_head: impl DerefMut, + ) -> Result<(), StorageError> { + Ok(()) + } } #[cfg(test)] diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 399bb107ad..980790b98b 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -5,8 +5,10 @@ use crate::prelude::IndexMutationOps; use crate::{ db::api::{mutation::AdditionOps, view::StaticGraphViewOps}, errors::GraphError, - serialise::parquet::{ParquetDecoder, ParquetEncoder}, - serialise::GraphFolder, + serialise::{ + parquet::{ParquetDecoder, ParquetEncoder}, + GraphFolder, + }, }; use std::{fs, fs::File}; use tempfile; From 9aa60468dae2d045e0697e1c0760b9e3cdc88874 Mon Sep 17 00:00:00 2001 From: Fabian Murariu Date: Mon, 27 Oct 2025 14:36:27 +0000 Subject: [PATCH 03/47] make edge deletion loading sequential --- db4-storage/src/api/nodes.rs | 2 +- raphtory/src/io/arrow/df_loaders.rs | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/db4-storage/src/api/nodes.rs b/db4-storage/src/api/nodes.rs index bababd3b4c..93168f73f9 100644 --- a/db4-storage/src/api/nodes.rs +++ b/db4-storage/src/api/nodes.rs @@ -344,7 +344,7 @@ pub trait NodeRefOps<'a>: Copy + Clone + Send + Sync + 'a { self.c_prop(0, NODE_ID_IDX) .and_then(|prop| prop.into_u64().map(GidRef::U64)) }) - .expect("Node GID should be present") + .unwrap_or_else(|| panic!("GID should be present, for node {:?}", self.vid())) } fn node_type_id(&self) -> usize { diff --git a/raphtory/src/io/arrow/df_loaders.rs b/raphtory/src/io/arrow/df_loaders.rs index 6a93a683f5..2f8e5e0594 100644 --- a/raphtory/src/io/arrow/df_loaders.rs +++ b/raphtory/src/io/arrow/df_loaders.rs @@ -771,14 +771,14 @@ pub(crate) fn load_edge_deletions_from_df< }; src_col - .par_iter() - .zip(dst_col.par_iter()) - .zip(time_col.par_iter()) - .zip(secondary_index_col.par_iter()) - .zip(layer.par_iter()) + .iter() + .zip(dst_col.iter()) + .zip(time_col.iter()) + .zip(secondary_index_col.iter()) + .zip(layer.iter()) .try_for_each(|((((src, dst), time), secondary_index), layer)| { - let src = src.ok_or(LoadError::MissingSrcError)?; - let dst = dst.ok_or(LoadError::MissingDstError)?; + // let src = src.ok_or(LoadError::MissingSrcError)?; + // let dst = dst.ok_or(LoadError::MissingDstError)?; graph.delete_edge((time, secondary_index), src, dst, layer)?; Ok::<(), GraphError>(()) })?; From c13f5065e2d6a4b6e2ebc860831c2744c7899ec2 Mon Sep 17 00:00:00 2001 From: Fabian Murariu Date: Mon, 27 Oct 2025 17:09:44 +0000 Subject: [PATCH 04/47] add vecuum error --- db4-storage/src/lib.rs | 3 +++ db4-storage/src/segments/node.rs | 6 ++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/db4-storage/src/lib.rs b/db4-storage/src/lib.rs index 2b1450d5a7..28584817d7 100644 --- a/db4-storage/src/lib.rs +++ b/db4-storage/src/lib.rs @@ -102,6 +102,9 @@ pub mod error { GenericFailure(String), #[error(transparent)] InvalidNodeId(#[from] InvalidNodeId), + + #[error("Failed to vacuum storage")] + VacuumError, } } diff --git a/db4-storage/src/segments/node.rs b/db4-storage/src/segments/node.rs index bfd977b362..d1216260b5 100644 --- a/db4-storage/src/segments/node.rs +++ b/db4-storage/src/segments/node.rs @@ -101,8 +101,7 @@ impl MemNodeSegment { } pub fn swap_out_layers(&mut self) -> Vec> { - let layers = self - .layers + self.layers .iter_mut() .map(|head_guard| { let mut old_head = SegmentContainer::new( @@ -113,8 +112,7 @@ impl MemNodeSegment { std::mem::swap(&mut *head_guard, &mut old_head); old_head }) - .collect::>(); - layers + .collect::>() } pub fn get_or_create_layer(&mut self, layer_id: usize) -> &mut SegmentContainer { From 5ab054930d40f769ca257e7d9cb8bfdb9ef20c8a Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 3 Nov 2025 09:21:45 +0100 Subject: [PATCH 05/47] create empty segments for new layers so they aren't lost on write --- .../src/mutation/addition_ops_ext.rs | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index f97ecd79b6..bd86b3318a 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -1,3 +1,7 @@ +use crate::mutation::{ + addition_ops::{EdgeWriteLock, InternalAdditionOps, SessionAdditionOps}, + MutationError, +}; use db4_graph::{TemporalGraph, TransactionManager, WriteLockedGraph}; use raphtory_api::core::{ entities::properties::{ @@ -15,6 +19,7 @@ use raphtory_core::{ storage::timeindex::TimeIndexEntry, }; use storage::{ + api::{edges::EdgeSegmentOps, nodes::NodeSegmentOps}, pages::{node_page::writer::node_info_as_props, session::WriteSession}, persist::strategy::PersistentStrategy, properties::props_meta_writer::PropsMetaWriter, @@ -22,11 +27,6 @@ use storage::{ Extension, WalImpl, ES, NS, }; -use crate::mutation::{ - addition_ops::{EdgeWriteLock, InternalAdditionOps, SessionAdditionOps}, - MutationError, -}; - pub struct WriteS<'a, EXT: PersistentStrategy, ES = ES>> { static_session: WriteSession<'a, NS, ES, EXT>, } @@ -223,6 +223,16 @@ impl InternalAdditionOps for TemporalGraph { if id > MAX_LAYER { Err(TooManyLayers)?; } + let edge_segment = self.storage().edges().get_or_create_segment(0); + let mut edge_segment_head = edge_segment.head_mut(); + edge_segment_head.get_or_create_layer(id); + edge_segment.notify_write(edge_segment_head)?; + + let node_segment = self.storage().nodes().get_or_create_segment(0); + + let mut node_segment_head = node_segment.head_mut(); + node_segment_head.get_or_create_layer(id); + node_segment.notify_write(node_segment_head)?; } Ok(id) } From 40eb155f2e4fd93312f2f6c4eacce7122f184c11 Mon Sep 17 00:00:00 2001 From: Fabian Murariu Date: Tue, 4 Nov 2025 12:23:17 +0000 Subject: [PATCH 06/47] rename ParquetProp to SerdeProp and move to raphtory-api --- Cargo.toml | 1 + db4-storage/Cargo.toml | 1 + db4-storage/src/properties/mod.rs | 32 +++++++-- .../entities/properties/prop/prop_enum.rs | 65 ++++++++++++++++++- raphtory/src/serialise/parquet/graph.rs | 4 +- raphtory/src/serialise/parquet/model.rs | 52 ++------------- 6 files changed, 101 insertions(+), 54 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c3ecb1472b..440728e31a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -159,6 +159,7 @@ parquet = { version = "56.2.0" } arrow-json = { version = "56.2.0" } arrow-buffer = { version = "56.2.0" } arrow-schema = { version = "56.2.0" } +serde_arrow = {version = "0.13.6", features = ["arrow-56"]} arrow-array = { version = "56.2.0", features = ["chrono-tz"] } arrow-ipc = { version = "56.2.0" } arrow-csv = { version = "56.2.0" } diff --git a/db4-storage/Cargo.toml b/db4-storage/Cargo.toml index 643c18291b..ca254d2260 100644 --- a/db4-storage/Cargo.toml +++ b/db4-storage/Cargo.toml @@ -28,6 +28,7 @@ arrow.workspace = true arrow-array.workspace = true arrow-csv.workspace = true arrow-schema.workspace = true +serde_arrow.workspace = true parquet.workspace = true bytemuck.workspace = true rayon.workspace = true diff --git a/db4-storage/src/properties/mod.rs b/db4-storage/src/properties/mod.rs index 2bd37509be..9063710cc4 100644 --- a/db4-storage/src/properties/mod.rs +++ b/db4-storage/src/properties/mod.rs @@ -1,13 +1,14 @@ use crate::error::StorageError; use arrow_array::{ ArrayRef, BooleanArray, Decimal128Array, Float32Array, Float64Array, Int32Array, Int64Array, - StringViewArray, TimestampMillisecondArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, + StringViewArray, StructArray, TimestampMillisecondArray, UInt8Array, UInt16Array, UInt32Array, + UInt64Array, }; -use arrow_schema::DECIMAL128_MAX_PRECISION; +use arrow_schema::{DECIMAL128_MAX_PRECISION, Field, Fields}; use bigdecimal::ToPrimitive; use raphtory_api::core::entities::properties::{ meta::PropMapper, - prop::{Prop, PropType}, + prop::{Prop, PropType, SerdeMap, SerdeProp, arrow_dtype_from_prop_type}, }; use raphtory_core::{ entities::{ @@ -16,6 +17,7 @@ use raphtory_core::{ }, storage::{PropColumn, TColumns, timeindex::TimeIndexEntry}, }; +use serde_arrow::ArrayBuilder; use std::sync::Arc; pub mod props_meta_writer; @@ -193,7 +195,29 @@ impl Properties { } // PropColumn::Array(lazy_vec) => todo!(), // PropColumn::List(lazy_vec) => todo!(), - // PropColumn::Map(lazy_vec) => todo!(), + PropColumn::Map(lazy_vec) => { + let dt = meta + .get_dtype(col_id) + .as_ref() + .map(arrow_dtype_from_prop_type) + .unwrap(); + let fields = match dt { + arrow::datatypes::DataType::Struct(fields) => fields, + _ => panic!("Expected Struct data type for Map property"), + }; + let array_iter = indices.map(|i| lazy_vec.get_opt(i).cloned()); + + let mut builder = ArrayBuilder::from_arrow(&fields).unwrap(); + + for prop in array_iter { + builder.push(prop.as_ref().map(|m| SerdeMap(m))).unwrap(); + } + + let arrays = builder.to_arrow().unwrap(); + let struct_array = StructArray::new(fields, arrays, None); + + Some(Arc::new(struct_array)) + } _ => None, //todo!("Unsupported column type"), } } diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index e6ba93f125..c2a88fa024 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -8,8 +8,11 @@ use crate::core::{ use bigdecimal::{num_bigint::BigInt, BigDecimal}; use chrono::{DateTime, NaiveDateTime, Utc}; use itertools::Itertools; -use rustc_hash::FxHashMap; -use serde::{Deserialize, Serialize}; +use rustc_hash::{FxBuildHasher, FxHashMap}; +use serde::{ + ser::{SerializeMap, SerializeSeq}, + Deserialize, Serialize, +}; use std::{ cmp::Ordering, collections::HashMap, @@ -148,6 +151,64 @@ impl PartialOrd for Prop { } } +pub struct SerdeProp<'a>(pub &'a Prop); +pub struct SedeList<'a>(pub &'a Vec); +pub struct SerdeMap<'a>(pub &'a HashMap); + +impl<'a> Serialize for SedeList<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut state = serializer.serialize_seq(Some(self.0.len()))?; + for prop in self.0.iter() { + state.serialize_element(&SerdeProp(prop))?; + } + state.end() + } +} + +impl<'a> Serialize for SerdeMap<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut state = serializer.serialize_map(Some(self.0.len()))?; + for (k, v) in self.0.iter() { + state.serialize_entry(k, &SerdeProp(v))?; + } + state.end() + } +} + +impl<'a> Serialize for SerdeProp<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self.0 { + Prop::I32(i) => serializer.serialize_i32(*i), + Prop::I64(i) => serializer.serialize_i64(*i), + Prop::F32(f) => serializer.serialize_f32(*f), + Prop::F64(f) => serializer.serialize_f64(*f), + Prop::U8(u) => serializer.serialize_u8(*u), + Prop::U16(u) => serializer.serialize_u16(*u), + Prop::U32(u) => serializer.serialize_u32(*u), + Prop::U64(u) => serializer.serialize_u64(*u), + Prop::Str(s) => serializer.serialize_str(s), + Prop::Bool(b) => serializer.serialize_bool(*b), + Prop::DTime(dt) => serializer.serialize_i64(dt.timestamp_millis()), + Prop::NDTime(dt) => serializer.serialize_i64(dt.and_utc().timestamp_millis()), + Prop::List(l) => SedeList(l).serialize(serializer), + Prop::Map(m) => SerdeMap(m).serialize(serializer), + Prop::Decimal(dec) => serializer.serialize_str(&dec.to_string()), + _ => { + todo!("Serializer not implemented") + } + } + } +} + pub fn validate_prop(prop: Prop) -> Result { match prop { Prop::Decimal(ref bd) => { diff --git a/raphtory/src/serialise/parquet/graph.rs b/raphtory/src/serialise/parquet/graph.rs index e470dfda11..dc07581564 100644 --- a/raphtory/src/serialise/parquet/graph.rs +++ b/raphtory/src/serialise/parquet/graph.rs @@ -2,7 +2,7 @@ use crate::{ errors::GraphError, prelude::{GraphViewOps, Prop, PropertiesOps}, serialise::parquet::{ - model::ParquetProp, run_encode, EVENT_GRAPH_TYPE, GRAPH_C_PATH, GRAPH_TYPE, GRAPH_T_PATH, + model::SerdeProp, run_encode, EVENT_GRAPH_TYPE, GRAPH_C_PATH, GRAPH_TYPE, GRAPH_T_PATH, PERSISTENT_GRAPH_TYPE, SECONDARY_INDEX_COL, TIME_COL, }, }; @@ -85,7 +85,7 @@ impl Serialize for Row { let mut state = serializer.serialize_map(Some(self.row.len()))?; for (k, v) in self.row.iter() { - state.serialize_entry(k, &ParquetProp(v))?; + state.serialize_entry(k, &SerdeProp(v))?; } state.serialize_entry(TIME_COL, &self.t.0)?; diff --git a/raphtory/src/serialise/parquet/model.rs b/raphtory/src/serialise/parquet/model.rs index 6981f84bb4..f0f0216be9 100644 --- a/raphtory/src/serialise/parquet/model.rs +++ b/raphtory/src/serialise/parquet/model.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use super::{ Prop, DST_COL, LAYER_COL, NODE_ID_COL, SECONDARY_INDEX_COL, SRC_COL, TIME_COL, TYPE_COL, }; @@ -19,48 +21,6 @@ use serde::{ Serialize, }; -pub(crate) struct ParquetProp<'a>(pub &'a Prop); - -impl<'a> Serialize for ParquetProp<'a> { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - match self.0 { - Prop::I32(i) => serializer.serialize_i32(*i), - Prop::I64(i) => serializer.serialize_i64(*i), - Prop::F32(f) => serializer.serialize_f32(*f), - Prop::F64(f) => serializer.serialize_f64(*f), - Prop::U8(u) => serializer.serialize_u8(*u), - Prop::U16(u) => serializer.serialize_u16(*u), - Prop::U32(u) => serializer.serialize_u32(*u), - Prop::U64(u) => serializer.serialize_u64(*u), - Prop::Str(s) => serializer.serialize_str(s), - Prop::Bool(b) => serializer.serialize_bool(*b), - Prop::DTime(dt) => serializer.serialize_i64(dt.timestamp_millis()), - Prop::NDTime(dt) => serializer.serialize_i64(dt.and_utc().timestamp_millis()), - Prop::List(l) => { - let mut state = serializer.serialize_seq(Some(l.len()))?; - for prop in l.iter() { - state.serialize_element(&ParquetProp(prop))?; - } - state.end() - } - Prop::Map(m) => { - let mut state = serializer.serialize_map(Some(m.len()))?; - for (k, v) in m.iter() { - state.serialize_entry(k, &ParquetProp(v))?; - } - state.end() - } - Prop::Decimal(dec) => serializer.serialize_str(&dec.to_string()), - _ => { - todo!("Serializer not implemented") - } - } - } -} - #[derive(Debug)] struct ParquetGID(GID); @@ -101,7 +61,7 @@ impl<'a, G: StaticGraphViewOps> Serialize for ParquetTEdge<'a, G> { state.serialize_entry(LAYER_COL, &layer)?; for (name, prop) in edge.properties().temporal().iter_latest() { - state.serialize_entry(&name, &ParquetProp(&prop))?; + state.serialize_entry(&name, &SerdeProp(&prop))?; } state.end() @@ -127,7 +87,7 @@ impl<'a, G: StaticGraphViewOps> Serialize for ParquetCEdge<'a, G> { state.serialize_entry(LAYER_COL, &layer)?; for (name, prop) in edge.metadata().iter_filtered() { - state.serialize_entry(&name, &ParquetProp(&prop))?; + state.serialize_entry(&name, &SerdeProp(&prop))?; } state.end() @@ -178,7 +138,7 @@ impl<'a> Serialize for ParquetTNode<'a> { state.serialize_entry(TYPE_COL, &self.node.node_type())?; for (name, prop) in self.props.iter() { - state.serialize_entry(&self.cols[*name], &ParquetProp(prop))?; + state.serialize_entry(&self.cols[*name], &SerdeProp(prop))?; } state.end() @@ -200,7 +160,7 @@ impl<'a> Serialize for ParquetCNode<'a> { state.serialize_entry(TYPE_COL, &self.node.node_type())?; for (name, prop) in self.node.metadata().iter_filtered() { - state.serialize_entry(&name, &ParquetProp(&prop))?; + state.serialize_entry(&name, &SerdeProp(&prop))?; } state.end() From 8ec18f41aef64f14b5dcb5884d64679e69d46e3f Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 5 Nov 2025 14:56:59 +0100 Subject: [PATCH 07/47] rename _node_ methods as they are also used for edges --- db4-storage/src/properties/mod.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/db4-storage/src/properties/mod.rs b/db4-storage/src/properties/mod.rs index 2bd37509be..ae63de8e55 100644 --- a/db4-storage/src/properties/mod.rs +++ b/db4-storage/src/properties/mod.rs @@ -31,8 +31,8 @@ pub struct Properties { t_properties: TColumns, earliest: Option, latest: Option, - has_node_additions: bool, - has_node_properties: bool, + has_additions: bool, + has_properties: bool, has_deletions: bool, pub additions_count: usize, } @@ -107,12 +107,16 @@ impl Properties { self.times_from_props.get(row) } - pub fn has_node_properties(&self) -> bool { - self.has_node_properties + pub fn has_properties(&self) -> bool { + self.has_properties } - pub fn has_node_additions(&self) -> bool { - self.has_node_additions + pub fn set_has_properties(&mut self) { + self.has_properties = true + } + + pub fn has_additions(&self) -> bool { + self.has_additions } pub fn has_deletions(&self) -> bool { @@ -263,7 +267,7 @@ impl<'a> PropMutEntry<'a> { self.ensure_times_from_props(); self.set_time(t, t_prop_row); - self.properties.has_node_properties = true; + self.properties.has_properties = true; self.properties.update_earliest_latest(t); } @@ -287,7 +291,7 @@ impl<'a> PropMutEntry<'a> { .resize_with(self.row + 1, Default::default); } - self.properties.has_node_additions = true; + self.properties.has_additions = true; let prop_timestamps = &mut self.properties.additions[self.row]; prop_timestamps.set(t, edge_id); From bcae349c9cc4307d00aa881f0afa56d40cc153d5 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 5 Nov 2025 14:57:25 +0100 Subject: [PATCH 08/47] mark edge segment dirty without triggering a write for metadata-only updates --- db4-storage/src/api/edges.rs | 4 ++++ db4-storage/src/segments/edge.rs | 2 ++ 2 files changed, 6 insertions(+) diff --git a/db4-storage/src/api/edges.rs b/db4-storage/src/api/edges.rs index 815225b04a..d51361a2d2 100644 --- a/db4-storage/src/api/edges.rs +++ b/db4-storage/src/api/edges.rs @@ -53,6 +53,10 @@ pub trait EdgeSegmentOps: Send + Sync + std::fmt::Debug + 'static { fn try_head_mut(&self) -> Option>; + /// mark segment as dirty without triggering a write + fn mark_dirty(&self); + + /// notify that an edge was added (might need to write to disk) fn notify_write( &self, head_lock: impl DerefMut, diff --git a/db4-storage/src/segments/edge.rs b/db4-storage/src/segments/edge.rs index 51b58060f2..e290c70948 100644 --- a/db4-storage/src/segments/edge.rs +++ b/db4-storage/src/segments/edge.rs @@ -575,6 +575,8 @@ impl>> EdgeSegmentOps for EdgeSegm .get_layer(layer_id) .map_or(0, |layer| layer.len()) } + + fn mark_dirty(&self) {} } #[cfg(test)] From aaff34ac2b2d442c5dd7521afa3b9fc77893af86 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 5 Nov 2025 14:57:48 +0100 Subject: [PATCH 09/47] ensure we make an empty segment when there is metadata that needs to be preserved --- db4-storage/src/pages/edge_store.rs | 22 +++++++++++++- db4-storage/src/pages/node_store.rs | 47 +++++++++++++++++++---------- 2 files changed, 52 insertions(+), 17 deletions(-) diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index 24abddb9ae..e48e052bcb 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -119,14 +119,34 @@ impl, EXT: Config> EdgeStorageInner pub fn new_with_meta(edges_path: Option, edge_meta: Arc, ext: EXT) -> Self { let free_pages = (0..N).map(RwLock::new).collect::>(); - Self { + let empty = Self { segments: boxcar::Vec::new(), layer_counter: GraphStats::new().into(), free_pages: free_pages.try_into().unwrap(), edges_path, prop_meta: edge_meta, ext, + }; + let layer_mapper = empty.edge_meta().layer_meta(); + let prop_mapper = empty.edge_meta().temporal_prop_mapper(); + let metadata_mapper = empty.edge_meta().metadata_mapper(); + if layer_mapper.num_fields() > 0 + || prop_mapper.num_fields() > 0 + || metadata_mapper.num_fields() > 0 + { + let segment = empty.get_or_create_segment(0); + let mut head = segment.head_mut(); + for layer in layer_mapper.ids() { + head.get_or_create_layer(layer); + } + if prop_mapper.num_fields() > 0 { + head.get_or_create_layer(0) + .properties_mut() + .set_has_properties() + } + segment.mark_dirty(); } + empty } pub fn new(edges_path: Option, ext: EXT) -> Self { diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index 7ce0384b75..129b184bd7 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -92,22 +92,6 @@ impl, EXT: Config> ReadLockedNodeStorage NodeStorageInner { - pub fn new_with_meta( - nodes_path: Option, - node_meta: Arc, - edge_meta: Arc, - ext: EXT, - ) -> Self { - Self { - pages: boxcar::Vec::new(), - stats: GraphStats::new().into(), - nodes_path, - node_meta, - edge_meta, - ext, - } - } - pub fn prop_meta(&self) -> &Arc { &self.node_meta } @@ -147,6 +131,37 @@ impl NodeStorageInner { } impl, EXT: Config> NodeStorageInner { + pub fn new_with_meta( + nodes_path: Option, + node_meta: Arc, + edge_meta: Arc, + ext: EXT, + ) -> Self { + let empty = Self { + pages: boxcar::Vec::new(), + stats: GraphStats::new().into(), + nodes_path, + node_meta, + edge_meta, + ext, + }; + let layer_mapper = empty.node_meta.layer_meta(); + let prop_mapper = empty.node_meta.temporal_prop_mapper(); + let metadata_mapper = empty.node_meta.metadata_mapper(); + if layer_mapper.num_fields() > 0 + || prop_mapper.num_fields() > 0 + || metadata_mapper.num_fields() > 0 + { + let segment = empty.get_or_create_segment(0); + let mut head = segment.head_mut(); + if prop_mapper.num_fields() > 0 { + head.get_or_create_layer(0) + .properties_mut() + .set_has_properties() + } + } + empty + } pub fn locked(self: &Arc) -> ReadLockedNodeStorage { let locked_segments = self .pages From 35b1d497d5cf7fc7e909bf03df573d671463f509 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Wed, 5 Nov 2025 15:06:04 +0100 Subject: [PATCH 10/47] update rust-version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c3ecb1472b..ec9e9086ec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ readme = "README.md" homepage = "https://github.com/Raphtory/raphtory/" keywords = ["graph", "temporal-graph", "temporal"] authors = ["Pometry"] -rust-version = "1.86.0" +rust-version = "1.88.0" edition = "2021" # debug symbols are using a lot of resources From 8259bafcb63874540bf42faf76e82b7090347c88 Mon Sep 17 00:00:00 2001 From: Fabian Murariu Date: Wed, 5 Nov 2025 15:19:24 +0000 Subject: [PATCH 11/47] support for Prop::Map --- Cargo.lock | 32 ++++++++++++++ db4-storage/src/pages/test_utils/checkers.rs | 2 +- db4-storage/src/pages/test_utils/fixtures.rs | 25 +++++------ db4-storage/src/pages/test_utils/props.rs | 18 ++++---- db4-storage/src/properties/mod.rs | 8 +++- raphtory-api/Cargo.toml | 3 +- .../entities/properties/prop/prop_array.rs | 2 +- .../entities/properties/prop/prop_enum.rs | 44 ++++++++++++++++++- raphtory-api/src/core/storage/mod.rs | 3 +- raphtory/src/serialise/parquet/graph.rs | 7 ++- raphtory/src/serialise/parquet/model.rs | 6 +-- 11 files changed, 114 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e14a3d2de2..c7ac2ec9b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2132,6 +2132,7 @@ dependencies = [ "roaring", "rustc-hash 2.1.1", "serde", + "serde_arrow", "serde_json", "sysinfo", "tempfile", @@ -2712,6 +2713,7 @@ version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e54c115d4f30f52c67202f079c5f9d8b49db4691f460fdb0b4c2e838261b2ba5" dependencies = [ + "bytemuck", "cfg-if", "crunchy", "num-traits", @@ -3554,6 +3556,21 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "marrow" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64369333feea08a4c974cc5d7bad82197999624d0c9508bec4b97ea9fc0e3f63" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "bytemuck", + "half", + "serde", +] + [[package]] name = "matchers" version = "0.2.0" @@ -5743,6 +5760,21 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_arrow" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "197c925e607eaed897d7912f53895097c6994fdc04fe5f7a2e61eb3898de1d26" +dependencies = [ + "arrow-array", + "arrow-schema", + "bytemuck", + "chrono", + "half", + "marrow", + "serde", +] + [[package]] name = "serde_core" version = "1.0.228" diff --git a/db4-storage/src/pages/test_utils/checkers.rs b/db4-storage/src/pages/test_utils/checkers.rs index cf40ed3368..62b2af0f72 100644 --- a/db4-storage/src/pages/test_utils/checkers.rs +++ b/db4-storage/src/pages/test_utils/checkers.rs @@ -297,7 +297,7 @@ pub fn check_graph_with_nodes_support< let actual_prop = actual_props .unwrap_or_else(|| panic!("Failed to get prop {name} for {node:?}")); assert!( - const_props.contains(&actual_prop), + const_props.iter().any(|c_prop| c_prop == &actual_prop), "failed to get const prop {name} for {node:?}, expected {const_props:?}, got {actual_prop:?}" ); } diff --git a/db4-storage/src/pages/test_utils/fixtures.rs b/db4-storage/src/pages/test_utils/fixtures.rs index 60bf71933a..e7a45a566e 100644 --- a/db4-storage/src/pages/test_utils/fixtures.rs +++ b/db4-storage/src/pages/test_utils/fixtures.rs @@ -99,7 +99,7 @@ pub fn edges_strat(size: usize) -> impl Strategy> { let srcs = (0usize..num_nodes).prop_map(VID); let dsts = (0usize..num_nodes).prop_map(VID); num_edges.prop_flat_map(move |num_edges| { - collection::vec((srcs.clone(), dsts.clone()), num_edges as usize) + collection::vec((srcs.clone(), dsts.clone()), num_edges) }) }) } @@ -121,19 +121,16 @@ pub fn edges_strat_with_layers( }) } -pub fn build_raw_edges( - len: usize, - num_nodes: usize, -) -> impl Strategy< - Value = Vec<( - VID, - VID, - i64, - Vec<(String, Prop)>, - Vec<(String, Prop)>, - Option<&'static str>, - )>, -> { +pub type EdgeValues = ( + VID, + VID, + i64, + Vec<(String, Prop)>, + Vec<(String, Prop)>, + Option<&'static str>, +); + +pub fn build_raw_edges(len: usize, num_nodes: usize) -> impl Strategy> { proptest::collection::hash_map((0i32..1000).prop_map(|i| i.to_string()), prop_type(), 0..20) .prop_flat_map(move |schema| { let (t_props, c_props) = make_props(&schema); diff --git a/db4-storage/src/pages/test_utils/props.rs b/db4-storage/src/pages/test_utils/props.rs index 28b63f2981..c7510c6bcc 100644 --- a/db4-storage/src/pages/test_utils/props.rs +++ b/db4-storage/src/pages/test_utils/props.rs @@ -19,15 +19,15 @@ pub fn prop_type() -> impl Strategy { PropType::Decimal { scale: 7 }, // decimal breaks the tests because of polars-parquet ]); - // leaf.prop_recursive(3, 10, 10, |inner| { - // let dict = proptest::collection::hash_map(r"\w{1,10}", inner.clone(), 1..10) - // .prop_map(|map| PropType::map(map)); - // let list = inner - // .clone() - // .prop_map(|p_type| PropType::List(Box::new(p_type))); - // prop_oneof![inner, list, dict] - // }) - leaf + leaf.prop_recursive(3, 10, 10, |inner| { + let dict = proptest::collection::hash_map(r"\w{1,10}", inner.clone(), 1..10) + .prop_map(PropType::map); + // let list = inner + // .clone() + // .prop_map(|p_type| PropType::List(Box::new(p_type))); + // prop_oneof![inner, list, dict] + prop_oneof![inner, dict] + }) } pub fn make_props( diff --git a/db4-storage/src/properties/mod.rs b/db4-storage/src/properties/mod.rs index 9063710cc4..d66b903ea8 100644 --- a/db4-storage/src/properties/mod.rs +++ b/db4-storage/src/properties/mod.rs @@ -17,6 +17,7 @@ use raphtory_core::{ }, storage::{PropColumn, TColumns, timeindex::TimeIndexEntry}, }; +use rustc_hash::FxHashMap; use serde_arrow::ArrayBuilder; use std::sync::Arc; @@ -209,8 +210,13 @@ impl Properties { let mut builder = ArrayBuilder::from_arrow(&fields).unwrap(); + let empty_map = FxHashMap::default(); for prop in array_iter { - builder.push(prop.as_ref().map(|m| SerdeMap(m))).unwrap(); + let item = prop + .as_ref() + .map(|m| SerdeMap(m)) + .unwrap_or_else(|| SerdeMap(&empty_map)); + builder.push(item).unwrap(); } let arrays = builder.to_arrow().unwrap(); diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index 8331cd7a1c..7ef0218659 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -37,6 +37,7 @@ sorted_vector_map = { workspace = true } arrow-array = { workspace = true, optional = true } arrow-ipc = { workspace = true, optional = true } arrow-schema = { workspace = true, optional = true } +serde_arrow = { workspace = true, optional = true } itertools = { workspace = true } iter-enum = { workspace = true } minijinja = { workspace = true, optional = true } @@ -55,6 +56,6 @@ python = [ proto = [] vectors = [] template = ["dep:minijinja"] -arrow = ["dep:arrow-array", "dep:arrow-ipc", "dep:arrow-schema"] +arrow = ["dep:arrow-array", "dep:arrow-ipc", "dep:arrow-schema", "dep:serde_arrow"] search = [] io = ["dep:serde_json"] diff --git a/raphtory-api/src/core/entities/properties/prop/prop_array.rs b/raphtory-api/src/core/entities/properties/prop/prop_array.rs index 8ab7ee0676..30e413cb04 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_array.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_array.rs @@ -218,7 +218,7 @@ impl Prop { pub fn arrow_dtype_from_prop_type(prop_type: &PropType) -> DataType { match prop_type { - PropType::Str => DataType::LargeUtf8, + PropType::Str => DataType::Utf8View, PropType::U8 => DataType::UInt8, PropType::U16 => DataType::UInt16, PropType::I32 => DataType::Int32, diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index c2a88fa024..188e6737f9 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -5,6 +5,9 @@ use crate::core::{ }, storage::arc_str::ArcStr, }; +#[cfg(feature = "arrow")] +use arrow_array::StructArray; +use arrow_schema::DataType; use bigdecimal::{num_bigint::BigInt, BigDecimal}; use chrono::{DateTime, NaiveDateTime, Utc}; use itertools::Itertools; @@ -13,6 +16,8 @@ use serde::{ ser::{SerializeMap, SerializeSeq}, Deserialize, Serialize, }; +#[cfg(feature = "arrow")] +use std::borrow::Borrow; use std::{ cmp::Ordering, collections::HashMap, @@ -79,8 +84,8 @@ impl<'a> From> for Prop { PropRef::Bool(b) => Prop::Bool(b), PropRef::List(v) => Prop::List(v.clone()), PropRef::Map(m) => Prop::Map(m.clone()), - PropRef::NDTime(dt) => Prop::NDTime(dt.clone()), - PropRef::DTime(dt) => Prop::DTime(dt.clone()), + PropRef::NDTime(dt) => Prop::NDTime(*dt), + PropRef::DTime(dt) => Prop::DTime(*dt), #[cfg(feature = "arrow")] PropRef::Array(arr) => Prop::Array(arr.clone()), PropRef::Decimal(d) => Prop::Decimal(d.clone()), @@ -330,6 +335,41 @@ impl Prop { } } +#[cfg(feature = "arrow")] +pub fn struct_array_from_props>( + dt: &DataType, + props: impl IntoIterator>, +) -> StructArray { + use serde_arrow::ArrayBuilder; + + let fields = match dt { + DataType::Struct(fields) => fields, + _ => panic!("Expected DataType::Struct, got {:?}", dt), + }; + + let mut builder = ArrayBuilder::from_arrow(fields) + .unwrap_or_else(|e| panic!("Failed to make array builder {e}")); + + let empty_map = FxHashMap::default(); + + for p in props { + match p.map(|p| p.borrow()) { + Some(Prop::Map(map)) => + builder.push(SerdeMap(map)) + .unwrap_or_else(|e| panic!("Failed to push map to array builder {e}")), + _ => { + builder + .push(SerdeMap(&empty_map)) + .unwrap_or_else(|e| panic!("Failed to push empty map to array builder {e}")); + } + } + + let arrays = builder + .to_arrow() + .unwrap_or_else(|e| panic!("Failed to convert to arrow array {e}")); + StructArray::new(fields.clone(), arrays, None) +} + impl Display for Prop { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { diff --git a/raphtory-api/src/core/storage/mod.rs b/raphtory-api/src/core/storage/mod.rs index c198014d22..ad33155ba7 100644 --- a/raphtory-api/src/core/storage/mod.rs +++ b/raphtory-api/src/core/storage/mod.rs @@ -1,5 +1,5 @@ use dashmap::DashMap; -use rustc_hash::FxHasher; +use rustc_hash::{FxBuildHasher, FxHasher}; use std::hash::BuildHasherDefault; pub mod arc_str; @@ -9,5 +9,6 @@ pub mod sorted_vec_map; pub mod timeindex; pub type FxDashMap = DashMap>; +pub type FxHashMap = std::collections::HashMap; pub type ArcRwLockReadGuard = lock_api::ArcRwLockReadGuard; diff --git a/raphtory/src/serialise/parquet/graph.rs b/raphtory/src/serialise/parquet/graph.rs index dc07581564..6b409e6c06 100644 --- a/raphtory/src/serialise/parquet/graph.rs +++ b/raphtory/src/serialise/parquet/graph.rs @@ -2,14 +2,17 @@ use crate::{ errors::GraphError, prelude::{GraphViewOps, Prop, PropertiesOps}, serialise::parquet::{ - model::SerdeProp, run_encode, EVENT_GRAPH_TYPE, GRAPH_C_PATH, GRAPH_TYPE, GRAPH_T_PATH, + run_encode, EVENT_GRAPH_TYPE, GRAPH_C_PATH, GRAPH_TYPE, GRAPH_T_PATH, PERSISTENT_GRAPH_TYPE, SECONDARY_INDEX_COL, TIME_COL, }, }; use arrow::datatypes::{DataType, Field}; use itertools::Itertools; use parquet::format::KeyValue; -use raphtory_api::{core::storage::arc_str::ArcStr, GraphType}; +use raphtory_api::{ + core::{entities::properties::prop::SerdeProp, storage::arc_str::ArcStr}, + GraphType, +}; use raphtory_core::storage::timeindex::TimeIndexEntry; use raphtory_storage::graph::graph::GraphStorage; use serde::{ser::SerializeMap, Serialize}; diff --git a/raphtory/src/serialise/parquet/model.rs b/raphtory/src/serialise/parquet/model.rs index f0f0216be9..a34b661c25 100644 --- a/raphtory/src/serialise/parquet/model.rs +++ b/raphtory/src/serialise/parquet/model.rs @@ -1,5 +1,3 @@ -use std::collections::HashMap; - use super::{ Prop, DST_COL, LAYER_COL, NODE_ID_COL, SECONDARY_INDEX_COL, SRC_COL, TIME_COL, TYPE_COL, }; @@ -12,12 +10,12 @@ use crate::{ }; use arrow::datatypes::DataType; use raphtory_api::core::{ - entities::GidType, + entities::{properties::prop::SerdeProp, GidType}, storage::{arc_str::ArcStr, timeindex::TimeIndexEntry}, }; use raphtory_storage::graph::graph::GraphStorage; use serde::{ - ser::{Error, SerializeMap, SerializeSeq}, + ser::{Error, SerializeMap}, Serialize, }; From a18336988f53c9b701ea75be1cda783a9f81bb33 Mon Sep 17 00:00:00 2001 From: Fabian Murariu Date: Wed, 5 Nov 2025 16:08:46 +0000 Subject: [PATCH 12/47] support for Prop::Map refactor --- db4-storage/src/properties/mod.rs | 30 +++++-------------- .../entities/properties/prop/prop_enum.rs | 24 ++++++++++----- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/db4-storage/src/properties/mod.rs b/db4-storage/src/properties/mod.rs index d66b903ea8..8dd66b2b0e 100644 --- a/db4-storage/src/properties/mod.rs +++ b/db4-storage/src/properties/mod.rs @@ -8,7 +8,9 @@ use arrow_schema::{DECIMAL128_MAX_PRECISION, Field, Fields}; use bigdecimal::ToPrimitive; use raphtory_api::core::entities::properties::{ meta::PropMapper, - prop::{Prop, PropType, SerdeMap, SerdeProp, arrow_dtype_from_prop_type}, + prop::{ + Prop, PropType, SerdeMap, SerdeProp, arrow_dtype_from_prop_type, struct_array_from_props, + }, }; use raphtory_core::{ entities::{ @@ -17,8 +19,6 @@ use raphtory_core::{ }, storage::{PropColumn, TColumns, timeindex::TimeIndexEntry}, }; -use rustc_hash::FxHashMap; -use serde_arrow::ArrayBuilder; use std::sync::Arc; pub mod props_meta_writer; @@ -202,25 +202,11 @@ impl Properties { .as_ref() .map(arrow_dtype_from_prop_type) .unwrap(); - let fields = match dt { - arrow::datatypes::DataType::Struct(fields) => fields, - _ => panic!("Expected Struct data type for Map property"), - }; - let array_iter = indices.map(|i| lazy_vec.get_opt(i).cloned()); - - let mut builder = ArrayBuilder::from_arrow(&fields).unwrap(); - - let empty_map = FxHashMap::default(); - for prop in array_iter { - let item = prop - .as_ref() - .map(|m| SerdeMap(m)) - .unwrap_or_else(|| SerdeMap(&empty_map)); - builder.push(item).unwrap(); - } - - let arrays = builder.to_arrow().unwrap(); - let struct_array = StructArray::new(fields, arrays, None); + let array_iter = indices + .map(|i| lazy_vec.get_opt(i)) + .map(|e| e.map(|m| SerdeMap(m))); + + let struct_array = struct_array_from_props(&dt, |sm| *sm, array_iter); Some(Arc::new(struct_array)) } diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index 188e6737f9..e4b4593a15 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -158,6 +158,7 @@ impl PartialOrd for Prop { pub struct SerdeProp<'a>(pub &'a Prop); pub struct SedeList<'a>(pub &'a Vec); +#[derive(Clone, Copy)] pub struct SerdeMap<'a>(pub &'a HashMap); impl<'a> Serialize for SedeList<'a> { @@ -242,6 +243,13 @@ impl Prop { Prop::Map(h_map.into()) } + pub fn as_map(&self) -> Option> { + match self { + Prop::Map(map) => Some(SerdeMap(map)), + _ => None, + } + } + pub fn dtype(&self) -> PropType { match self { Prop::Str(_) => PropType::Str, @@ -336,8 +344,9 @@ impl Prop { } #[cfg(feature = "arrow")] -pub fn struct_array_from_props>( +pub fn struct_array_from_props

( dt: &DataType, + as_serde_map: impl Fn(&P) -> SerdeMap<'_> + Copy, props: impl IntoIterator>, ) -> StructArray { use serde_arrow::ArrayBuilder; @@ -353,14 +362,13 @@ pub fn struct_array_from_props>( let empty_map = FxHashMap::default(); for p in props { - match p.map(|p| p.borrow()) { - Some(Prop::Map(map)) => - builder.push(SerdeMap(map)) + match p.as_ref().map(as_serde_map) { + Some(map) => builder + .push(map) .unwrap_or_else(|e| panic!("Failed to push map to array builder {e}")), - _ => { - builder - .push(SerdeMap(&empty_map)) - .unwrap_or_else(|e| panic!("Failed to push empty map to array builder {e}")); + _ => builder + .push(SerdeMap(&empty_map)) + .unwrap_or_else(|e| panic!("Failed to push empty map to array builder {e}")), } } From 3a208c032cb4bfdfc11097de43a40c45e7816882 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 6 Nov 2025 11:31:36 +0100 Subject: [PATCH 13/47] initialise layers in materialize_at as doing it in resolve deadlocks in the parquet loaders --- .../src/core/entities/properties/meta.rs | 5 ++- raphtory-api/src/core/storage/dict_mapper.rs | 2 +- .../src/mutation/addition_ops_ext.rs | 10 ----- raphtory/src/db/api/view/graph.rs | 40 ++++++++++--------- 4 files changed, 27 insertions(+), 30 deletions(-) diff --git a/raphtory-api/src/core/entities/properties/meta.rs b/raphtory-api/src/core/entities/properties/meta.rs index 677d7a2f34..ab00261a27 100644 --- a/raphtory-api/src/core/entities/properties/meta.rs +++ b/raphtory-api/src/core/entities/properties/meta.rs @@ -45,9 +45,12 @@ impl Meta { pub fn set_metadata_mapper(&mut self, meta: PropMapper) { self.metadata_mapper = meta; } - pub fn set_temporal_prop_meta(&mut self, meta: PropMapper) { + pub fn set_temporal_prop_mapper(&mut self, meta: PropMapper) { self.temporal_prop_mapper = meta; } + pub fn set_layer_mapper(&mut self, meta: DictMapper) { + self.layer_mapper = meta; + } pub fn metadata_mapper(&self) -> &PropMapper { &self.metadata_mapper } diff --git a/raphtory-api/src/core/storage/dict_mapper.rs b/raphtory-api/src/core/storage/dict_mapper.rs index fc96009230..972479d7d2 100644 --- a/raphtory-api/src/core/storage/dict_mapper.rs +++ b/raphtory-api/src/core/storage/dict_mapper.rs @@ -13,7 +13,7 @@ use std::{ sync::Arc, }; -#[derive(Serialize, Deserialize, Default, Debug)] +#[derive(Serialize, Deserialize, Default, Debug, Clone)] pub struct DictMapper { map: Arc>>, reverse_map: Arc>>, diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index bd86b3318a..e89adaaaad 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -223,16 +223,6 @@ impl InternalAdditionOps for TemporalGraph { if id > MAX_LAYER { Err(TooManyLayers)?; } - let edge_segment = self.storage().edges().get_or_create_segment(0); - let mut edge_segment_head = edge_segment.head_mut(); - edge_segment_head.get_or_create_layer(id); - edge_segment.notify_write(edge_segment_head)?; - - let node_segment = self.storage().nodes().get_or_create_segment(0); - - let mut node_segment_head = node_segment.head_mut(); - node_segment_head.get_or_create_layer(id); - node_segment.notify_write(node_segment_head)?; } Ok(id) } diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index e5510103c6..30ba895157 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -252,20 +252,10 @@ impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { let mut edge_meta = Meta::new_for_edges(); node_meta.set_metadata_mapper(self.node_meta().metadata_mapper().deep_clone()); - node_meta.set_temporal_prop_meta(self.node_meta().temporal_prop_mapper().deep_clone()); + node_meta.set_temporal_prop_mapper(self.node_meta().temporal_prop_mapper().deep_clone()); edge_meta.set_metadata_mapper(self.edge_meta().metadata_mapper().deep_clone()); - edge_meta.set_temporal_prop_meta(self.edge_meta().temporal_prop_mapper().deep_clone()); - - let mut temporal_graph = TemporalGraph::new_with_meta( - path.map(|p| p.into()), - node_meta, - edge_meta, - storage.extension().clone(), - ) - .unwrap(); - - // Copy all graph properties - temporal_graph.graph_meta = self.graph_meta().deep_clone().into(); + edge_meta.set_temporal_prop_mapper(self.edge_meta().temporal_prop_mapper().deep_clone()); + let layer_meta = edge_meta.layer_meta(); let layer_map: Vec<_> = match self.layer_ids() { LayerIds::None => { @@ -276,29 +266,43 @@ impl<'graph, G: GraphView + 'graph> GraphViewOps<'graph> for G { let layers = storage.edge_meta().layer_meta().keys(); let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; for (id, name) in storage.edge_meta().layer_meta().ids().zip(layers.iter()) { - let new_id = temporal_graph.resolve_layer(Some(&name))?.inner(); + let new_id = layer_meta.get_or_create_id(name).inner(); layer_map[id] = new_id; } layer_map } LayerIds::One(l_id) => { let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; - let new_id = temporal_graph - .resolve_layer(Some(&storage.edge_meta().get_layer_name_by_id(*l_id)))?; - layer_map[*l_id] = new_id.inner(); + let new_id = layer_meta + .get_or_create_id(&storage.edge_meta().get_layer_name_by_id(*l_id)) + .inner(); + layer_map[*l_id] = new_id; layer_map } LayerIds::Multiple(ids) => { let mut layer_map = vec![0; storage.edge_meta().layer_meta().num_all_fields()]; let layers = storage.edge_meta().layer_meta().all_keys(); for id in ids { - let new_id = temporal_graph.resolve_layer(Some(&layers[id]))?.inner(); + let new_id = layer_meta.get_or_create_id(&layers[id]).inner(); layer_map[id] = new_id; } layer_map } }; + node_meta.set_layer_mapper(layer_meta.clone()); + + let mut temporal_graph = TemporalGraph::new_with_meta( + path.map(|p| p.into()), + node_meta, + edge_meta, + storage.extension().clone(), + ) + .unwrap(); + + // Copy all graph properties + temporal_graph.graph_meta = self.graph_meta().deep_clone().into(); + if let Some(earliest) = self.earliest_time() { temporal_graph.update_time(TimeIndexEntry::start(earliest)); } else { From 02327f928c6ebefd08271b089a0369ac1df56b0a Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 7 Nov 2025 09:39:04 +0100 Subject: [PATCH 14/47] much more useful location in panic message for graph/search assert functions --- raphtory/src/db/graph/assertions.rs | 6 ++++++ raphtory/src/db/graph/graph.rs | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/raphtory/src/db/graph/assertions.rs b/raphtory/src/db/graph/assertions.rs index 63ba2a3d88..03008434f2 100644 --- a/raphtory/src/db/graph/assertions.rs +++ b/raphtory/src/db/graph/assertions.rs @@ -152,6 +152,7 @@ impl ApplyFilter for SearchEdges } } +#[track_caller] pub fn assert_filter_nodes_results( init_graph: impl FnOnce(Graph) -> Graph, transform: impl GraphTransformer, @@ -169,6 +170,7 @@ pub fn assert_filter_nodes_results( ) } +#[track_caller] pub fn assert_filter_neighbours_results( init_graph: impl FnOnce(Graph) -> Graph, transform: impl GraphTransformer, @@ -188,6 +190,7 @@ pub fn assert_filter_neighbours_results( ) } +#[track_caller] pub fn assert_search_nodes_results( init_graph: impl FnOnce(Graph) -> Graph, transform: impl GraphTransformer, @@ -208,6 +211,7 @@ pub fn assert_search_nodes_results( } } +#[track_caller] pub fn assert_filter_edges_results( init_graph: impl FnOnce(Graph) -> Graph, transform: impl GraphTransformer, @@ -225,6 +229,7 @@ pub fn assert_filter_edges_results( ) } +#[track_caller] pub fn assert_search_edges_results( init_graph: impl FnOnce(Graph) -> Graph, transform: impl GraphTransformer, @@ -245,6 +250,7 @@ pub fn assert_search_edges_results( } } +#[track_caller] fn assert_results( init_graph: impl FnOnce(Graph) -> Graph, pre_transform: impl Fn(&Graph) -> (), diff --git a/raphtory/src/db/graph/graph.rs b/raphtory/src/db/graph/graph.rs index fff97f26d1..61ace66bf0 100644 --- a/raphtory/src/db/graph/graph.rs +++ b/raphtory/src/db/graph/graph.rs @@ -89,6 +89,7 @@ pub fn graph_equal<'graph1, 'graph2, G1: GraphViewOps<'graph1>, G2: GraphViewOps } } +#[track_caller] pub fn assert_node_equal< 'graph, G1: GraphViewOps<'graph>, @@ -102,6 +103,7 @@ pub fn assert_node_equal< assert_node_equal_layer(n1, n2, "", false) } +#[track_caller] pub fn assert_node_equal_layer< 'graph, G1: GraphViewOps<'graph>, @@ -248,6 +250,7 @@ pub fn assert_node_equal_layer< } } +#[track_caller] pub fn assert_nodes_equal< 'graph, G1: GraphViewOps<'graph>, @@ -261,6 +264,7 @@ pub fn assert_nodes_equal< assert_nodes_equal_layer(nodes1, nodes2, "", false); } +#[track_caller] pub fn assert_nodes_equal_layer< 'graph, G1: GraphViewOps<'graph>, @@ -287,6 +291,7 @@ pub fn assert_nodes_equal_layer< } } +#[track_caller] pub fn assert_edges_equal< 'graph1, 'graph2, @@ -301,6 +306,7 @@ pub fn assert_edges_equal< assert_edges_equal_layer(edges1, edges2, "", false); } +#[track_caller] pub fn assert_edges_equal_layer< 'graph1, 'graph2, @@ -408,6 +414,7 @@ pub fn assert_edges_equal_layer< } } +#[track_caller] fn assert_graph_equal_layer<'graph, G1: GraphViewOps<'graph>, G2: GraphViewOps<'graph>>( g1: &G1, g2: &G2, @@ -457,6 +464,7 @@ fn assert_graph_equal_layer<'graph, G1: GraphViewOps<'graph>, G2: GraphViewOps<' assert_edges_equal_layer(&g1.edges(), &g2.edges(), layer_tag, persistent); } +#[track_caller] fn assert_graph_equal_inner<'graph, G1: GraphViewOps<'graph>, G2: GraphViewOps<'graph>>( g1: &G1, g2: &G2, @@ -485,6 +493,7 @@ fn assert_graph_equal_inner<'graph, G1: GraphViewOps<'graph>, G2: GraphViewOps<' }) } +#[track_caller] pub fn assert_graph_equal<'graph, G1: GraphViewOps<'graph>, G2: GraphViewOps<'graph>>( g1: &G1, g2: &G2, @@ -493,6 +502,7 @@ pub fn assert_graph_equal<'graph, G1: GraphViewOps<'graph>, G2: GraphViewOps<'gr } /// Equality check for materialized persistent graph that ignores the updates generated by the materialise at graph.earliest_time() +#[track_caller] pub fn assert_persistent_materialize_graph_equal< 'graph, G1: GraphViewOps<'graph>, From 183c67dfa7845444f36cad594b3f6d5b1bea05d3 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 7 Nov 2025 13:02:57 +0100 Subject: [PATCH 15/47] add dirty flag support for nodes --- db4-storage/src/api/nodes.rs | 2 ++ db4-storage/src/pages/node_store.rs | 1 + db4-storage/src/segments/node.rs | 2 ++ 3 files changed, 5 insertions(+) diff --git a/db4-storage/src/api/nodes.rs b/db4-storage/src/api/nodes.rs index 93168f73f9..b27f83b767 100644 --- a/db4-storage/src/api/nodes.rs +++ b/db4-storage/src/api/nodes.rs @@ -89,6 +89,8 @@ pub trait NodeSegmentOps: Send + Sync + std::fmt::Debug + 'static { head_lock: impl DerefMut, ) -> Result<(), StorageError>; + fn mark_dirty(&self); + fn check_node(&self, pos: LocalPOS, layer_id: usize) -> bool; fn get_out_edge( diff --git a/db4-storage/src/pages/node_store.rs b/db4-storage/src/pages/node_store.rs index 129b184bd7..bc4c007cb5 100644 --- a/db4-storage/src/pages/node_store.rs +++ b/db4-storage/src/pages/node_store.rs @@ -159,6 +159,7 @@ impl, EXT: Config> NodeStorageInner .properties_mut() .set_has_properties() } + segment.mark_dirty(); } empty } diff --git a/db4-storage/src/segments/node.rs b/db4-storage/src/segments/node.rs index d1216260b5..63b1d9f05d 100644 --- a/db4-storage/src/segments/node.rs +++ b/db4-storage/src/segments/node.rs @@ -479,6 +479,8 @@ impl>> NodeSegmentOps for NodeSegm Ok(()) } + fn mark_dirty(&self) {} + fn check_node(&self, _pos: LocalPOS, _layer_id: usize) -> bool { false } From e3590883e74f28eae2d2ff33355414f5fc7c29ed Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 7 Nov 2025 18:00:11 +0100 Subject: [PATCH 16/47] start triaging tests that are known to fail for now --- .../graph/storage_ops/time_semantics.rs | 1 + raphtory/src/search/mod.rs | 76 ++----------------- 2 files changed, 8 insertions(+), 69 deletions(-) diff --git a/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs b/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs index 98c1481129..3eeaf7ed15 100644 --- a/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs +++ b/raphtory/src/db/api/storage/graph/storage_ops/time_semantics.rs @@ -810,6 +810,7 @@ mod test_graph_storage { }; #[test] + #[ignore = "TODO: #2372"] fn test_search_edges_latest() { let g = Graph::new(); let g = init_graph_for_edges_tests(g); diff --git a/raphtory/src/search/mod.rs b/raphtory/src/search/mod.rs index 3b8a9409b5..dc9c63dd13 100644 --- a/raphtory/src/search/mod.rs +++ b/raphtory/src/search/mod.rs @@ -263,34 +263,14 @@ mod test_index { let result = graph.encode(path); match result { - Err(GraphError::IOError { source }) => { - assert!( - format!("{source}").contains("Cannot write graph into non empty folder"), - ); + Err(GraphError::NonEmptyGraphFolder(err_path)) => { + assert_eq!(path, err_path); } Ok(_) => panic!("Expected error on second encode, got Ok"), Err(e) => panic!("Unexpected error type: {:?}", e), } } - #[test] - fn test_write_updates_to_already_encoded_graph_succeeds() { - let graph = init_graph(); - graph.create_index().unwrap(); - let binding = tempfile::TempDir::new().unwrap(); - let path = binding.path(); - - graph - .add_node(1, "Ozai", [("prop", 1)], Some("fire_nation")) - .unwrap(); - - // TODO: This test currently fails since graph updates are not propagated - // to the search index. - - let graph = Graph::decode(path, None).unwrap(); - assert_search_results(&graph, &NodeFilter::name().eq("Ozai"), vec!["Ozai"]); - } - #[test] fn test_create_index_persist_index_on_encode_update_index_load_persisted_index_on_decode() { let graph = init_graph(); @@ -503,53 +483,6 @@ mod test_index { assert_search_results(&graph, &filter, vec!["Alice"]); } - #[test] - fn test_cached_graph_view() { - global_info_logger(); - let graph = init_graph(); - graph.create_index().unwrap(); - - let binding = tempfile::TempDir::new().unwrap(); - let path = binding.path(); - - graph - .add_node( - 2, - "Tommy", - vec![("p1", Prop::U64(5u64))], - Some("water_tribe"), - ) - .unwrap(); - - let graph = Graph::decode(path, None).unwrap(); - let filter = NodeFilter::name().eq("Tommy"); - assert_search_results(&graph, &filter, vec!["Tommy"]); - } - - #[test] - fn test_cached_graph_view_create_index_after_graph_is_cached() { - global_info_logger(); - let graph = init_graph(); - - let binding = tempfile::TempDir::new().unwrap(); - let path = binding.path(); - // Creates index in a temp dir within graph dir - graph.create_index().unwrap(); - - graph - .add_node( - 2, - "Tommy", - vec![("p1", Prop::U64(5u64))], - Some("water_tribe"), - ) - .unwrap(); - - let graph = Graph::decode(path, None).unwrap(); - let filter = NodeFilter::name().eq("Tommy"); - assert_search_results(&graph, &filter, vec!["Tommy"]); - } - #[test] #[ignore] fn test_too_many_open_files_graph_index() { @@ -698,6 +631,7 @@ mod test_index { } #[test] + #[ignore = "TODO: #2372"] fn test_with_all_props_index_spec() { let graph = init_graph(); let index_spec = IndexSpecBuilder::new(graph.clone()) @@ -729,6 +663,7 @@ mod test_index { } #[test] + #[ignore = "TODO: #2372"] fn test_with_selected_props_index_spec() { let graph = init_graph(); let index_spec = IndexSpecBuilder::new(graph.clone()) @@ -798,6 +733,7 @@ mod test_index { } #[test] + #[ignore = "TODO: #2372"] fn test_mixed_node_and_edge_props_index_spec() { let graph = init_graph(); @@ -851,6 +787,7 @@ mod test_index { } #[test] + #[ignore = "TODO: #2372"] fn test_get_index_spec_updated_index() { let graph = init_graph(); @@ -884,6 +821,7 @@ mod test_index { } #[test] + #[ignore = "TODO: #2372"] fn test_get_index_spec_updated_index_persisted_and_loaded() { let graph = init_graph(); From 5d9619b3d95d5eb422bd5e440eeb5bcf8dba4254 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 7 Nov 2025 18:00:29 +0100 Subject: [PATCH 17/47] don't overwrite an existing file --- raphtory/src/serialise/serialise.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index 980790b98b..d81ed18fab 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -36,7 +36,7 @@ impl StableEncode for T { let folder: GraphFolder = path.into(); if folder.write_as_zip_format { - let file = File::create(&folder.get_base_path())?; + let file = File::create_new(&folder.get_base_path())?; self.encode_parquet_to_zip(file)?; #[cfg(feature = "search")] From 7b10b989ae801e08a6520152b293502903913f89 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 7 Nov 2025 18:00:47 +0100 Subject: [PATCH 18/47] is_decodable needs to check for zip --- raphtory/src/serialise/graph_folder.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/raphtory/src/serialise/graph_folder.rs b/raphtory/src/serialise/graph_folder.rs index f549aa5932..1d1c2913a4 100644 --- a/raphtory/src/serialise/graph_folder.rs +++ b/raphtory/src/serialise/graph_folder.rs @@ -123,7 +123,9 @@ impl GraphFolder { ); // Either decode a graph serialized using encode or load using underlying storage. - let graph = if MaterializedGraph::is_decodable(self.get_graph_path()) { + let graph = if self.is_zip() + || MaterializedGraph::is_decodable(self.get_graph_path()) + { MaterializedGraph::decode(self, None)? } else { // We currently do not have a way of figuring out the graph type From 247283c6073fcadb839073601f0f9b19e67502e6 Mon Sep 17 00:00:00 2001 From: Fabian Murariu Date: Fri, 7 Nov 2025 18:13:22 +0000 Subject: [PATCH 19/47] various changes for ArrowRow and PropRef --- Cargo.lock | 1 + db4-storage/src/properties/mod.rs | 11 +- raphtory-api/Cargo.toml | 2 +- .../core/entities/properties/prop/arrow.rs | 338 +++++++++++++++++- .../src/core/entities/properties/prop/mod.rs | 4 + .../entities/properties/prop/prop_enum.rs | 41 ++- .../entities/properties/prop/prop_ref_enum.rs | 193 +++++++++- raphtory-core/src/storage/mod.rs | 24 +- raphtory/src/test_utils.rs | 20 +- 9 files changed, 570 insertions(+), 64 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c7ac2ec9b8..d7a7ebeadd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5099,6 +5099,7 @@ dependencies = [ "rayon", "rustc-hash 2.1.1", "serde", + "serde_arrow", "serde_json", "sorted_vector_map", "thiserror 2.0.17", diff --git a/db4-storage/src/properties/mod.rs b/db4-storage/src/properties/mod.rs index 8dd66b2b0e..3d5939b20e 100644 --- a/db4-storage/src/properties/mod.rs +++ b/db4-storage/src/properties/mod.rs @@ -1,16 +1,13 @@ use crate::error::StorageError; use arrow_array::{ ArrayRef, BooleanArray, Decimal128Array, Float32Array, Float64Array, Int32Array, Int64Array, - StringViewArray, StructArray, TimestampMillisecondArray, UInt8Array, UInt16Array, UInt32Array, - UInt64Array, + StringViewArray, TimestampMillisecondArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, }; -use arrow_schema::{DECIMAL128_MAX_PRECISION, Field, Fields}; +use arrow_schema::DECIMAL128_MAX_PRECISION; use bigdecimal::ToPrimitive; use raphtory_api::core::entities::properties::{ meta::PropMapper, - prop::{ - Prop, PropType, SerdeMap, SerdeProp, arrow_dtype_from_prop_type, struct_array_from_props, - }, + prop::{Prop, PropType, SerdeMap, arrow_dtype_from_prop_type, struct_array_from_props}, }; use raphtory_core::{ entities::{ @@ -246,7 +243,7 @@ impl Properties { } } - pub(crate) fn t_len(&self) -> usize { + pub fn t_len(&self) -> usize { self.t_properties.len() } diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index 7ef0218659..da4cfda2b6 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -47,7 +47,7 @@ display-error-chain = { workspace = true, optional = true } proptest.workspace = true [features] -default = [] +default = ["arrow"] # Enables generating the pyo3 python bindings python = [ "dep:pyo3", "dep:pyo3-arrow", "dep:display-error-chain" diff --git a/raphtory-api/src/core/entities/properties/prop/arrow.rs b/raphtory-api/src/core/entities/properties/prop/arrow.rs index 22bc5c1b43..1c7e5a60b5 100644 --- a/raphtory-api/src/core/entities/properties/prop/arrow.rs +++ b/raphtory-api/src/core/entities/properties/prop/arrow.rs @@ -1,5 +1,333 @@ -use crate::core::{ - entities::properties::{prop::Prop, prop_array::PropArray}, - PropType, -}; -use std::sync::Arc; +use arrow_array::{cast::AsArray, types::*, Array, ArrowPrimitiveType, StructArray}; +use arrow_schema::{DataType, TimeUnit}; +use chrono::DateTime; +use serde::{ser::SerializeMap, Serialize}; + +use crate::core::entities::properties::prop::{Prop, PropRef}; + +#[derive(Debug, Clone, Copy)] +pub struct ArrowRow<'a> { + array: &'a StructArray, + index: usize, +} + +impl<'a> PartialEq for ArrowRow<'a> { + // this has the downside of returning false for rows with same fields but different order of columns + fn eq(&self, other: &Self) -> bool { + if self.array.num_columns() != other.array.num_columns() { + return false; + } + + for col in 0..self.array.num_columns() { + let self_prop = self.prop_ref(col); + let other_prop = other.prop_ref(col); + if self_prop != other_prop { + return false; + } + } + true + } +} + +impl<'a> Serialize for ArrowRow<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut state = serializer.serialize_map(Some(self.array.num_columns()))?; + for col in 0..self.array.num_columns() { + let field = &self.array.fields()[col]; + let key = field.name(); + let value = self.prop_ref(col); + state.serialize_entry(key, &value)?; + } + state.end() + } +} + +impl<'a> ArrowRow<'a> { + pub fn primitive_value(&self, col: usize) -> Option { + let primitive_array = self.array.column(col).as_primitive_opt::()?; + (primitive_array.len() > self.index && !primitive_array.is_null(self.index)).then(|| primitive_array.value(self.index)) + } + + fn primitive_dt(&self, col: usize) -> Option<(T::Native, &DataType)> { + let col = self.array.column(col).as_primitive_opt::()?; + (col.len() > self.index && !col.is_null(self.index)).then(|| (col.value(self.index), col.data_type())) + } + + fn primitive_prop(&self, col: usize) -> Option { + let (value, dt) = self.primitive_dt::(col)?; + let prop = T::prop(value, dt); + Some(prop) + } + + fn primitive_prop_ref(self, col: usize) -> Option> { + let col = self.array.column(col).as_primitive_opt::()?; + let (value, dt) = + (col.len() > self.index && !col.is_null(self.index)).then(|| (col.value(self.index), col.data_type()))?; + let prop_ref = T::prop_ref(value, dt); + Some(prop_ref) + } + + fn struct_prop(&self, col: usize) -> Option { + let column = self.array.column(col).as_struct_opt()?; + let row = ArrowRow::new(column, self.index); + row.into_prop() + } + + fn struct_prop_ref(&self, col: usize) -> Option> { + let column = self.array.column(col).as_struct_opt()?; + let row = ArrowRow::new(column, self.index); + (column.len() > self.index).then(|| PropRef::from(row)) + } + + pub fn bool_value(&self, col: usize) -> Option { + let column = self.array.column(col); + match column.data_type() { + DataType::Boolean => { + let col = column.as_boolean(); + (col.len() > self.index && !col.is_null(self.index)).then(|| col.value(self.index)) + } + _ => None, + } + } + + pub fn str_value(self, col: usize) -> Option<&'a str> { + let column = self.array.column(col); + let len = column.len(); + let valid = len > self.index && !column.is_null(self.index); + match column.data_type() { + DataType::Utf8 => { + valid.then(|| column.as_string::().value(self.index)) + } + DataType::LargeUtf8 => { + valid.then(|| column.as_string::().value(self.index)) + } + DataType::Utf8View => { + valid.then(|| column.as_string_view().value(self.index)) + } + _ => None, + } + } + + pub fn prop_value(self, col: usize) -> Option { + let dtype = self.array.fields().get(col)?.data_type(); + match dtype { + DataType::Null => None, + DataType::Boolean => self.bool_value(col).map(|b| b.into()), + DataType::Int32 => self.primitive_prop::(col), + DataType::Int64 => self.primitive_prop::(col), + DataType::UInt8 => self.primitive_prop::(col), + DataType::UInt16 => self.primitive_prop::(col), + DataType::UInt32 => self.primitive_prop::(col), + DataType::UInt64 => self.primitive_prop::(col), + DataType::Float32 => self.primitive_prop::(col), + DataType::Float64 => self.primitive_prop::(col), + DataType::Timestamp(unit, _) => match unit { + TimeUnit::Second => self.primitive_prop::(col), + TimeUnit::Millisecond => self.primitive_prop::(col), + TimeUnit::Microsecond => self.primitive_prop::(col), + TimeUnit::Nanosecond => self.primitive_prop::(col), + }, + DataType::Date32 => self.primitive_prop::(col), + DataType::Date64 => self.primitive_prop::(col), + DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => { + self.str_value(col).map(|v| v.into()) + } + DataType::Decimal128(_, _) => self.primitive_prop::(col), + DataType::Struct(_) => self.struct_prop(col), + _ => None, + } + } + + pub fn prop_ref(self, col: usize) -> Option> { + let dtype = self.array.fields().get(col)?.data_type(); + match dtype { + DataType::Null => None, + DataType::Boolean => self.bool_value(col).map(|b| b.into()), + DataType::Int32 => self.primitive_prop_ref::(col), + DataType::Int64 => self.primitive_prop_ref::(col), + DataType::UInt8 => self.primitive_prop_ref::(col), + DataType::UInt16 => self.primitive_prop_ref::(col), + DataType::UInt32 => self.primitive_prop_ref::(col), + DataType::UInt64 => self.primitive_prop_ref::(col), + DataType::Float32 => self.primitive_prop_ref::(col), + DataType::Float64 => self.primitive_prop_ref::(col), + DataType::Timestamp(unit, _) => match unit { + TimeUnit::Second => self.primitive_prop_ref::(col), + TimeUnit::Millisecond => self.primitive_prop_ref::(col), + TimeUnit::Microsecond => self.primitive_prop_ref::(col), + TimeUnit::Nanosecond => self.primitive_prop_ref::(col), + }, + DataType::Date32 => self.primitive_prop_ref::(col), + DataType::Date64 => self.primitive_prop_ref::(col), + DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => { + self.str_value(col).map(|v| v.into()) + } + DataType::Decimal128(_, _) => self.primitive_prop_ref::(col), + DataType::Struct(_) => self.struct_prop_ref(col), + _ => None, + } + } + + pub fn into_prop(self) -> Option { + let map = Prop::map( + self.array + .fields() + .iter() + .enumerate() + .filter_map(|(col, field)| Some((field.name().as_ref(), self.prop_value(col)?))), + ); + match map { + Prop::Map(m) if m.is_empty() => None, + _ => Some(map), + } + } + + pub fn is_valid(&self, col: usize) -> bool { + self.array.column(col).is_valid(self.index) + } +} + +impl<'a> ArrowRow<'a> { + pub fn new(array: &'a StructArray, index: usize) -> Self { + Self { array, index } + } + + pub fn get(&self, column: usize) -> Option<&T> { + self.array.column(column).as_any().downcast_ref() + } +} + +pub trait DirectConvert: ArrowPrimitiveType { + fn prop_ref(native: Self::Native, dtype: &DataType) -> PropRef<'static>; + fn prop(native: Self::Native, dtype: &DataType) -> Prop { + Self::prop_ref(native, dtype).into() + } +} + +impl DirectConvert for UInt8Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for UInt16Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for UInt32Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for UInt64Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for Int32Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for Int64Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for Float32Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for Float64Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(native) + } +} + +impl DirectConvert for Date64Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from(DateTime::from_timestamp_millis(native).unwrap()) + } +} + +impl DirectConvert for Date32Type { + fn prop_ref(native: Self::Native, _dtype: &DataType) -> PropRef<'static> { + PropRef::from( + Date32Type::to_naive_date(native) + .and_hms_opt(0, 0, 0) + .unwrap() + .and_utc(), + ) + } +} + +impl DirectConvert for TimestampNanosecondType { + fn prop_ref(native: Self::Native, dtype: &DataType) -> PropRef<'static> { + match dtype { + DataType::Timestamp(_, tz) => match tz { + None => PropRef::from(DateTime::from_timestamp_nanos(native).naive_utc()), + Some(_) => PropRef::from(DateTime::from_timestamp_nanos(native)), + }, + _ => unreachable!(), + } + } +} + +impl DirectConvert for TimestampMicrosecondType { + fn prop_ref(native: Self::Native, dtype: &DataType) -> PropRef<'static> { + match dtype { + DataType::Timestamp(_, tz) => match tz { + None => PropRef::from(DateTime::from_timestamp_micros(native).unwrap().naive_utc()), + Some(_) => PropRef::from(DateTime::from_timestamp_micros(native).unwrap()), + }, + _ => unreachable!(), + } + } +} + +impl DirectConvert for TimestampMillisecondType { + fn prop_ref(native: Self::Native, dtype: &DataType) -> PropRef<'static> { + match dtype { + DataType::Timestamp(_, tz) => match tz { + None => PropRef::from(DateTime::from_timestamp_millis(native).unwrap().naive_utc()), + Some(_) => PropRef::from(DateTime::from_timestamp_millis(native).unwrap()), + }, + _ => unreachable!(), + } + } +} + +impl DirectConvert for TimestampSecondType { + fn prop_ref(native: Self::Native, dtype: &DataType) -> PropRef<'static> { + match dtype { + DataType::Timestamp(_, tz) => match tz { + None => PropRef::from(DateTime::from_timestamp(native, 0).unwrap().naive_utc()), + Some(_) => PropRef::from(DateTime::from_timestamp(native, 0).unwrap()), + }, + _ => unreachable!(), + } + } +} + +impl DirectConvert for Decimal128Type { + fn prop_ref(native: Self::Native, dtype: &DataType) -> PropRef<'static> { + match dtype { + DataType::Decimal128(_, scale) => PropRef::Decimal { + num: native, + scale: *scale as i8, + }, + _ => unreachable!(), + } + } +} diff --git a/raphtory-api/src/core/entities/properties/prop/mod.rs b/raphtory-api/src/core/entities/properties/prop/mod.rs index a7e4ff1952..eb13449c2e 100644 --- a/raphtory-api/src/core/entities/properties/prop/mod.rs +++ b/raphtory-api/src/core/entities/properties/prop/mod.rs @@ -1,4 +1,6 @@ #[cfg(feature = "arrow")] +pub mod arrow; +#[cfg(feature = "arrow")] mod prop_array; mod prop_enum; mod prop_ref_enum; @@ -10,6 +12,8 @@ mod serde; #[cfg(feature = "template")] mod template; +#[cfg(feature = "arrow")] +pub use arrow::*; #[cfg(feature = "arrow")] pub use prop_array::*; pub use prop_enum::*; diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index e4b4593a15..db8a1f346b 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -1,6 +1,6 @@ use crate::core::{ entities::{ - properties::prop::{prop_ref_enum::PropRef, PropType}, + properties::prop::{prop_ref_enum::PropRef, PropNum, PropType}, GidRef, }, storage::arc_str::ArcStr, @@ -16,8 +16,6 @@ use serde::{ ser::{SerializeMap, SerializeSeq}, Deserialize, Serialize, }; -#[cfg(feature = "arrow")] -use std::borrow::Borrow; use std::{ cmp::Ordering, collections::HashMap, @@ -70,25 +68,31 @@ impl From> for Prop { } impl<'a> From> for Prop { - fn from(prop_ref: PropRef<'a>) -> Self { - match prop_ref { - PropRef::Str(s) => Prop::str(s), - PropRef::U8(u) => Prop::U8(u), - PropRef::U16(u) => Prop::U16(u), - PropRef::I32(i) => Prop::I32(i), - PropRef::I64(i) => Prop::I64(i), - PropRef::U32(u) => Prop::U32(u), - PropRef::U64(u) => Prop::U64(u), - PropRef::F32(f) => Prop::F32(f), - PropRef::F64(f) => Prop::F64(f), + fn from(value: PropRef<'a>) -> Self { + match value { + PropRef::Str(s) => Prop::Str(s.into()), + PropRef::Num(n) => match n { + PropNum::U8(u) => Prop::U8(u), + PropNum::U16(u) => Prop::U16(u), + PropNum::I32(i) => Prop::I32(i), + PropNum::I64(i) => Prop::I64(i), + PropNum::U32(u) => Prop::U32(u), + PropNum::U64(u) => Prop::U64(u), + PropNum::F32(f) => Prop::F32(f), + PropNum::F64(f) => Prop::F64(f), + }, PropRef::Bool(b) => Prop::Bool(b), PropRef::List(v) => Prop::List(v.clone()), - PropRef::Map(m) => Prop::Map(m.clone()), - PropRef::NDTime(dt) => Prop::NDTime(*dt), - PropRef::DTime(dt) => Prop::DTime(*dt), + PropRef::Map(m) => m + .into_prop() + .unwrap_or_else(|| Prop::Map(Arc::new(Default::default()))), + PropRef::NDTime(dt) => Prop::NDTime(dt), + PropRef::DTime(dt) => Prop::DTime(dt), #[cfg(feature = "arrow")] PropRef::Array(arr) => Prop::Array(arr.clone()), - PropRef::Decimal(d) => Prop::Decimal(d.clone()), + PropRef::Decimal { num, scale } => { + Prop::Decimal(BigDecimal::from_bigint(num.into(), scale as i64)) + } } } } @@ -594,6 +598,7 @@ impl From<&Prop> for Prop { } } + pub trait IntoPropMap { fn into_prop_map(self) -> Prop; } diff --git a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs index 9283b20398..74ef755d38 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs @@ -1,3 +1,5 @@ +use num_traits::ToPrimitive; +use serde::Serialize; use std::sync::Arc; use bigdecimal::BigDecimal; @@ -6,12 +8,145 @@ use rustc_hash::FxHashMap; #[cfg(feature = "arrow")] use crate::core::entities::properties::prop::PropArray; -use crate::core::{entities::properties::prop::Prop, storage::arc_str::ArcStr}; +use crate::core::{ + entities::properties::prop::{ArrowRow, Prop, SedeList, SerdeMap}, + storage::arc_str::ArcStr, +}; -#[derive(Debug, PartialEq, Clone)] -// TODO: this needs more refinement, as it's not generic enough for all the storage types +#[derive(Debug, PartialEq, Clone, Copy)] pub enum PropRef<'a> { Str(&'a str), + Num(PropNum), + Bool(bool), + List(&'a Arc>), + Map(PropMapRef<'a>), + NDTime(NaiveDateTime), + DTime(DateTime), + #[cfg(feature = "arrow")] + Array(&'a PropArray), + Decimal { + num: i128, + scale: i8, + }, +} + +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum PropMapRef<'a> { + Mem(&'a Arc>), + Arrow(ArrowRow<'a>), +} + +impl<'a> PropMapRef<'a> { + pub fn into_prop(self) -> Option { + match self { + PropMapRef::Mem(map) => Some(Prop::Map(map.clone())), + PropMapRef::Arrow(row) => row.into_prop(), + } + } +} + +impl<'a> From for PropRef<'a> { + fn from(b: bool) -> Self { + PropRef::Bool(b) + } +} + +impl<'a> From<&'a str> for PropRef<'a> { + fn from(s: &'a str) -> Self { + PropRef::Str(s) + } +} + +impl From for PropRef<'_> { + fn from(n: u8) -> Self { + PropRef::Num(PropNum::U8(n)) + } +} + +impl From for PropRef<'_> { + fn from(n: u16) -> Self { + PropRef::Num(PropNum::U16(n)) + } +} + +impl From for PropRef<'_> { + fn from(n: i32) -> Self { + PropRef::Num(PropNum::I32(n)) + } +} + +impl From for PropRef<'_> { + fn from(n: i64) -> Self { + PropRef::Num(PropNum::I64(n)) + } +} + +impl From for PropRef<'_> { + fn from(n: u32) -> Self { + PropRef::Num(PropNum::U32(n)) + } +} + +impl From for PropRef<'_> { + fn from(n: u64) -> Self { + PropRef::Num(PropNum::U64(n)) + } +} + +impl From for PropRef<'_> { + fn from(n: f32) -> Self { + PropRef::Num(PropNum::F32(n)) + } +} + +impl From for PropRef<'_> { + fn from(n: f64) -> Self { + PropRef::Num(PropNum::F64(n)) + } +} + +impl From for PropRef<'_> { + fn from(dt: NaiveDateTime) -> Self { + PropRef::NDTime(dt) + } +} + +impl From> for PropRef<'_> { + fn from(dt: DateTime) -> Self { + PropRef::DTime(dt) + } +} + +impl<'a> From<&'a BigDecimal> for PropRef<'a> { + fn from(decimal: &'a BigDecimal) -> Self { + let (num, scale) = decimal.as_bigint_and_exponent(); + let num = num.to_i128().unwrap_or_else(|| { + panic!( + "BigDecimal value {} is out of range for i128 representation", + decimal + ) + }); + PropRef::Decimal { + num, + scale: scale as i8, + } + } +} + +impl<'a> From> for PropRef<'a> { + fn from(row: ArrowRow<'a>) -> Self { + PropRef::Map(PropMapRef::Arrow(row)) + } +} + +impl<'a> From<&'a Arc>> for PropRef<'a> { + fn from(map: &'a Arc>) -> Self { + PropRef::Map(PropMapRef::Mem(map)) + } +} + +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum PropNum { U8(u8), U16(u16), I32(i32), @@ -20,14 +155,6 @@ pub enum PropRef<'a> { U64(u64), F32(f32), F64(f64), - Bool(bool), - List(&'a Arc>), - Map(&'a Arc>), - NDTime(&'a NaiveDateTime), - DTime(&'a DateTime), - #[cfg(feature = "arrow")] - Array(&'a PropArray), - Decimal(&'a BigDecimal), } impl<'a> PropRef<'a> { @@ -39,3 +166,47 @@ impl<'a> PropRef<'a> { } } } + +impl<'a> Serialize for PropMapRef<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + PropMapRef::Mem(map) => SerdeMap(map).serialize(serializer), + PropMapRef::Arrow(row) => row.serialize(serializer), + } + } +} + +impl<'a> Serialize for PropRef<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + PropRef::Str(s) => serializer.serialize_str(s), + PropRef::Num(n) => match n { + PropNum::U8(v) => serializer.serialize_u8(*v), + PropNum::U16(v) => serializer.serialize_u16(*v), + PropNum::I32(v) => serializer.serialize_i32(*v), + PropNum::I64(v) => serializer.serialize_i64(*v), + PropNum::U32(v) => serializer.serialize_u32(*v), + PropNum::U64(v) => serializer.serialize_u64(*v), + PropNum::F32(v) => serializer.serialize_f32(*v), + PropNum::F64(v) => serializer.serialize_f64(*v), + }, + PropRef::Bool(b) => serializer.serialize_bool(*b), + PropRef::List(lst) => SedeList(lst).serialize(serializer), + PropRef::Map(map_ref) => map_ref.serialize(serializer), + PropRef::NDTime(dt) => serializer.serialize_i64(dt.and_utc().timestamp_millis()), + PropRef::DTime(dt) => serializer.serialize_i64(dt.timestamp_millis()), + #[cfg(feature = "arrow")] + PropRef::Array(arr) => arr.serialize(serializer), + PropRef::Decimal { num, scale } => { + let decimal = BigDecimal::new((*num).into(), (*scale).into()); + decimal.serialize(serializer) + } + } + } +} diff --git a/raphtory-core/src/storage/mod.rs b/raphtory-core/src/storage/mod.rs index 314d2f03e1..0eb748a111 100644 --- a/raphtory-core/src/storage/mod.rs +++ b/raphtory-core/src/storage/mod.rs @@ -460,22 +460,22 @@ impl PropColumn { pub fn get_ref(&self, index: usize) -> Option> { match self { PropColumn::Bool(col) => col.get_opt(index).map(|prop| PropRef::Bool(*prop)), - PropColumn::I64(col) => col.get_opt(index).map(|prop| PropRef::I64(*prop)), - PropColumn::U32(col) => col.get_opt(index).map(|prop| PropRef::U32(*prop)), - PropColumn::U64(col) => col.get_opt(index).map(|prop| PropRef::U64(*prop)), - PropColumn::F32(col) => col.get_opt(index).map(|prop| PropRef::F32(*prop)), - PropColumn::F64(col) => col.get_opt(index).map(|prop| PropRef::F64(*prop)), + PropColumn::I64(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), + PropColumn::U32(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), + PropColumn::U64(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), + PropColumn::F32(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), + PropColumn::F64(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), PropColumn::Str(col) => col.get_opt(index).map(|prop| PropRef::Str(prop.as_ref())), #[cfg(feature = "arrow")] PropColumn::Array(col) => col.get_opt(index).map(PropRef::Array), - PropColumn::U8(col) => col.get_opt(index).map(|prop| PropRef::U8(*prop)), - PropColumn::U16(col) => col.get_opt(index).map(|prop| PropRef::U16(*prop)), - PropColumn::I32(col) => col.get_opt(index).map(|prop| PropRef::I32(*prop)), + PropColumn::U8(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), + PropColumn::U16(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), + PropColumn::I32(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), PropColumn::List(col) => col.get_opt(index).map(PropRef::List), - PropColumn::Map(col) => col.get_opt(index).map(PropRef::Map), - PropColumn::NDTime(col) => col.get_opt(index).map(PropRef::NDTime), - PropColumn::DTime(col) => col.get_opt(index).map(PropRef::DTime), - PropColumn::Decimal(col) => col.get_opt(index).map(PropRef::Decimal), + PropColumn::Map(col) => col.get_opt(index).map(PropRef::from), + PropColumn::NDTime(col) => col.get_opt(index).copied().map(PropRef::from), + PropColumn::DTime(col) => col.get_opt(index).copied().map(PropRef::from), + PropColumn::Decimal(col) => col.get_opt(index).map(PropRef::from), PropColumn::Empty(_) => None, } } diff --git a/raphtory/src/test_utils.rs b/raphtory/src/test_utils.rs index 24d07d7204..216877f5b1 100644 --- a/raphtory/src/test_utils.rs +++ b/raphtory/src/test_utils.rs @@ -191,18 +191,18 @@ pub fn prop_type() -> impl Strategy { PropType::Bool, PropType::DTime, PropType::NDTime, - // PropType::Decimal { scale }, + PropType::Decimal { scale: 7 }, ]); - // leaf.prop_recursive(3, 10, 10, |inner| { - // let dict = proptest::collection::hash_map(r"\w{1,10}", inner.clone(), 1..10) - // .prop_map(PropType::map); - // let list = inner - // .clone() - // .prop_map(|p_type| PropType::List(Box::new(p_type))); - // prop_oneof![inner, list, dict] - // }) - leaf + leaf.prop_recursive(3, 10, 10, |inner| { + let dict = proptest::collection::hash_map(r"\w{1,10}", inner.clone(), 1..10) + .prop_map(PropType::map); + // let list = inner + // .clone() + // .prop_map(|p_type| PropType::List(Box::new(p_type))); + // prop_oneof![inner, list, dict] + prop_oneof![inner, dict] + }) } #[derive(Debug, Clone)] From c93af86943fb10e5171deebfe97dda7169a2a53b Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 10 Nov 2025 10:29:53 +0100 Subject: [PATCH 20/47] make search feature not default for graphql/python --- python/Cargo.toml | 4 ++-- raphtory-graphql/Cargo.toml | 1 - raphtory-graphql/src/graph.rs | 6 ++++- raphtory-graphql/src/model/graph/graph.rs | 8 +++---- raphtory/Cargo.toml | 1 - raphtory/src/python/graph/graph.rs | 11 +++++++-- .../src/python/graph/graph_with_deletions.rs | 11 +++++++-- raphtory/src/python/packages/base_modules.rs | 23 ++++++++----------- 8 files changed, 39 insertions(+), 26 deletions(-) diff --git a/python/Cargo.toml b/python/Cargo.toml index da0c2f4b1e..8cb56c78dc 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -21,16 +21,16 @@ crate-type = ["cdylib"] pyo3 = { workspace = true } raphtory = { workspace = true, features = [ "python", - "search", "vectors", "proto", ] } raphtory-graphql = { workspace = true, features = [ - "python", "search" + "python", ] } [features] extension-module = ["pyo3/extension-module"] +search = ["raphtory/search", "raphtory-graphql/search"] [build-dependencies] pyo3-build-config = { workspace = true } diff --git a/raphtory-graphql/Cargo.toml b/raphtory-graphql/Cargo.toml index 097cc51d26..4b349c5568 100644 --- a/raphtory-graphql/Cargo.toml +++ b/raphtory-graphql/Cargo.toml @@ -15,7 +15,6 @@ homepage.workspace = true [dependencies] raphtory = { workspace = true, features = [ 'vectors', - 'search', "io", ] } tempfile = { workspace = true } diff --git a/raphtory-graphql/src/graph.rs b/raphtory-graphql/src/graph.rs index 267312b3b9..12b5b63902 100644 --- a/raphtory-graphql/src/graph.rs +++ b/raphtory-graphql/src/graph.rs @@ -18,7 +18,7 @@ use raphtory::{ graph::{edge::EdgeView, node::NodeView, views::deletion_graph::PersistentGraph}, }, errors::{GraphError, GraphResult}, - prelude::{EdgeViewOps, Graph, IndexMutationOps, NodeViewOps, StableDecode}, + prelude::{EdgeViewOps, Graph, NodeViewOps, StableDecode}, serialise::GraphFolder, vectors::{cache::VectorCache, vectorised_graph::VectorisedGraph}, }; @@ -28,6 +28,9 @@ use raphtory_storage::{ }; use tracing::info; +#[cfg(feature = "search")] +use raphtory::prelude::IndexMutationOps; + #[derive(Clone)] pub struct GraphWithVectors { pub graph: MaterializedGraph, @@ -118,6 +121,7 @@ impl GraphWithVectors { info!("Graph loaded = {}", folder.get_original_path_str()); + #[cfg(feature = "search")] if create_index { graph.create_index()?; } diff --git a/raphtory-graphql/src/model/graph/graph.rs b/raphtory-graphql/src/model/graph/graph.rs index ab60ec2489..64c602d83f 100644 --- a/raphtory-graphql/src/model/graph/graph.rs +++ b/raphtory-graphql/src/model/graph/graph.rs @@ -27,10 +27,7 @@ use raphtory::{ db::{ api::{ properties::dyn_props::DynProperties, - view::{ - DynamicGraph, IntoDynamic, NodeViewOps, SearchableGraphOps, StaticGraphViewOps, - TimeOps, - }, + view::{DynamicGraph, IntoDynamic, NodeViewOps, StaticGraphViewOps, TimeOps}, }, graph::{ node::NodeView, @@ -48,6 +45,9 @@ use std::{ sync::Arc, }; +#[cfg(feature = "search")] +use raphtory::db::api::view::SearchableGraphOps; + #[derive(ResolvedObject, Clone)] #[graphql(name = "Graph")] pub(crate) struct GqlGraph { diff --git a/raphtory/Cargo.toml b/raphtory/Cargo.toml index b0d15e8671..83d1da96aa 100644 --- a/raphtory/Cargo.toml +++ b/raphtory/Cargo.toml @@ -148,7 +148,6 @@ vectors = [ python = [ "io", "arrow", - "search", "vectors", "proto", "dep:pyo3", diff --git a/raphtory/src/python/graph/graph.rs b/raphtory/src/python/graph/graph.rs index 0fd9f56eda..e606d393fc 100644 --- a/raphtory/src/python/graph/graph.rs +++ b/raphtory/src/python/graph/graph.rs @@ -14,8 +14,8 @@ use crate::{ prelude::*, python::{ graph::{ - edge::PyEdge, graph_with_deletions::PyPersistentGraph, index::PyIndexSpec, - io::pandas_loaders::*, node::PyNode, views::graph_view::PyGraphView, + edge::PyEdge, graph_with_deletions::PyPersistentGraph, io::pandas_loaders::*, + node::PyNode, views::graph_view::PyGraphView, }, types::iterable::FromIterable, utils::{PyNodeRef, PyTime}, @@ -34,6 +34,9 @@ use std::{ path::PathBuf, }; +#[cfg(feature = "search")] +use crate::python::graph::index::PyIndexSpec; + /// A temporal graph with event semantics. /// /// Arguments: @@ -990,6 +993,7 @@ impl PyGraph { /// /// Returns: /// None: + #[cfg(feature = "search")] fn create_index(&self) -> Result<(), GraphError> { self.graph.create_index() } @@ -1001,6 +1005,7 @@ impl PyGraph { /// /// Returns: /// None: + #[cfg(feature = "search")] fn create_index_with_spec(&self, py_spec: &PyIndexSpec) -> Result<(), GraphError> { self.graph.create_index_with_spec(py_spec.spec.clone()) } @@ -1012,6 +1017,7 @@ impl PyGraph { /// /// Returns: /// None: + #[cfg(feature = "search")] fn create_index_in_ram(&self) -> Result<(), GraphError> { self.graph.create_index_in_ram() } @@ -1029,6 +1035,7 @@ impl PyGraph { /// /// Returns: /// None: + #[cfg(feature = "search")] fn create_index_in_ram_with_spec(&self, py_spec: &PyIndexSpec) -> Result<(), GraphError> { self.graph .create_index_in_ram_with_spec(py_spec.spec.clone()) diff --git a/raphtory/src/python/graph/graph_with_deletions.rs b/raphtory/src/python/graph/graph_with_deletions.rs index e632271767..67cc9ad62c 100644 --- a/raphtory/src/python/graph/graph_with_deletions.rs +++ b/raphtory/src/python/graph/graph_with_deletions.rs @@ -16,9 +16,9 @@ use crate::{ }, errors::GraphError, io::parquet_loaders::*, - prelude::{DeletionOps, GraphViewOps, ImportOps, IndexMutationOps}, + prelude::{DeletionOps, GraphViewOps, ImportOps}, python::{ - graph::{edge::PyEdge, index::PyIndexSpec, node::PyNode, views::graph_view::PyGraphView}, + graph::{edge::PyEdge, node::PyNode, views::graph_view::PyGraphView}, utils::{PyNodeRef, PyTime}, }, serialise::StableEncode, @@ -35,6 +35,9 @@ use std::{ path::PathBuf, }; +#[cfg(feature = "search")] +use crate::{prelude::IndexMutationOps, python::graph::index::PyIndexSpec}; + /// A temporal graph that allows edges and nodes to be deleted. #[derive(Clone)] #[pyclass(name = "PersistentGraph", extends = PyGraphView, frozen, module="raphtory")] @@ -1007,6 +1010,7 @@ impl PyPersistentGraph { /// /// Returns: /// None: + #[cfg(feature = "search")] fn create_index(&self) -> Result<(), GraphError> { self.graph.create_index() } @@ -1017,6 +1021,7 @@ impl PyPersistentGraph { /// /// Returns: /// None: + #[cfg(feature = "search")] fn create_index_with_spec(&self, py_spec: &PyIndexSpec) -> Result<(), GraphError> { self.graph.create_index_with_spec(py_spec.spec.clone()) } @@ -1028,6 +1033,7 @@ impl PyPersistentGraph { /// /// Returns: /// None: + #[cfg(feature = "search")] fn create_index_in_ram(&self) -> Result<(), GraphError> { self.graph.create_index_in_ram() } @@ -1045,6 +1051,7 @@ impl PyPersistentGraph { /// /// Returns: /// None: + #[cfg(feature = "search")] fn create_index_in_ram_with_spec(&self, py_spec: &PyIndexSpec) -> Result<(), GraphError> { self.graph .create_index_in_ram_with_spec(py_spec.spec.clone()) diff --git a/raphtory/src/python/packages/base_modules.rs b/raphtory/src/python/packages/base_modules.rs index 6035ccd3f0..605e4d0ea3 100644 --- a/raphtory/src/python/packages/base_modules.rs +++ b/raphtory/src/python/packages/base_modules.rs @@ -2,7 +2,7 @@ use crate::{ add_classes, add_functions, python::{ - algorithm::max_weight_matching::PyMatching, + algorithm::{epidemics::PyInfected, max_weight_matching::PyMatching}, graph::{ edge::{PyEdge, PyMutableEdge}, edges::{PyEdges, PyNestedEdges}, @@ -10,7 +10,8 @@ use crate::{ graph_with_deletions::PyPersistentGraph, node::{PyMutableNode, PyNode, PyNodes, PyPathFromGraph, PyPathFromNode}, properties::{ - PyMetadata, PyPropValueList, PyProperties, PyTemporalProp, PyTemporalProperties, + PropertiesView, PyMetadata, PyPropValueList, PyProperties, PyTemporalProp, + PyTemporalProperties, }, views::graph_view::PyGraphView, }, @@ -21,7 +22,7 @@ use crate::{ vectors::{PyVectorSelection, PyVectorisedGraph}, }, types::wrappers::{ - document::PyDocument, + document::{PyDocument, PyEmbedding}, iterables::{ ArcStringIterable, ArcStringVecIterable, BoolIterable, GIDGIDIterable, GIDIterable, NestedArcStringVecIterable, NestedBoolIterable, NestedGIDGIDIterable, @@ -37,6 +38,9 @@ use crate::{ }; use pyo3::prelude::*; +#[cfg(feature = "search")] +use crate::python::graph::index::{PyIndexSpec, PyIndexSpecBuilder}; + pub fn add_raphtory_classes(m: &Bound) -> PyResult<()> { //Graph classes add_classes!( @@ -61,10 +65,11 @@ pub fn add_raphtory_classes(m: &Bound) -> PyResult<()> { PropertiesView, PyTemporalProp, PyWindowSet, - PyIndexSpecBuilder, - PyIndexSpec ); + #[cfg(feature = "search")] + add_classes!(PyIndexSpecBuilder, PyIndexSpec); + #[pyfunction] /// Return Raphtory version. /// @@ -196,11 +201,3 @@ pub fn base_vectors_module(py: Python<'_>) -> Result, PyErr> } pub use crate::python::graph::node_state::base_node_state_module; -use crate::python::{ - algorithm::epidemics::PyInfected, - graph::{ - index::{PyIndexSpec, PyIndexSpecBuilder}, - properties::PropertiesView, - }, - types::wrappers::document::PyEmbedding, -}; From 0f8f038cd5fc9ca1309f9021aed797dcfc9c7576 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 10 Nov 2025 11:42:21 +0100 Subject: [PATCH 21/47] storage feature no longer exists --- .github/workflows/test_during_pr.yml | 12 --- .../test_python_disk_storage_workflow.yml | 67 --------------- .../test_rust_disk_storage_workflow.yml | 82 ------------------- .github/workflows/test_rust_workflow.yml | 7 +- 4 files changed, 6 insertions(+), 162 deletions(-) delete mode 100644 .github/workflows/test_python_disk_storage_workflow.yml delete mode 100644 .github/workflows/test_rust_disk_storage_workflow.yml diff --git a/.github/workflows/test_during_pr.yml b/.github/workflows/test_during_pr.yml index b92b9c96bb..94112e01f1 100644 --- a/.github/workflows/test_during_pr.yml +++ b/.github/workflows/test_during_pr.yml @@ -19,11 +19,6 @@ jobs: uses: ./.github/workflows/test_rust_workflow.yml secrets: inherit needs: rust-format-check - call-test-rust-storage-workflow-in-local-repo: - name: Run Rust storage tests - uses: ./.github/workflows/test_rust_disk_storage_workflow.yml - secrets: inherit - needs: rust-format-check call-test-python-workflow-in-local-repo: name: Run Python tests uses: ./.github/workflows/test_python_workflow.yml @@ -31,13 +26,6 @@ jobs: test_python_lower: false secrets: inherit needs: rust-format-check - call-test-python-disk-storage-workflow-in-local-repo: - name: Run Python storage tests - uses: ./.github/workflows/test_python_disk_storage_workflow.yml - with: - test_python_lower: false - secrets: inherit - needs: rust-format-check call-benchmark-workflow-in-local-repo: name: Run benchmarks uses: ./.github/workflows/benchmark.yml diff --git a/.github/workflows/test_python_disk_storage_workflow.yml b/.github/workflows/test_python_disk_storage_workflow.yml deleted file mode 100644 index 0171e2e840..0000000000 --- a/.github/workflows/test_python_disk_storage_workflow.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: Run Python storage test -permissions: { } -on: - workflow_call: - inputs: - skip_tests: - type: boolean - default: false - required: false - test_python_lower: - type: boolean - default: false - required: false -# DO NOT CHANGE NAME OF WORKFLOW, USED IN OTHER WORKFLOWS KEEP "Rust Tests" -jobs: - select-strategy: - runs-on: ubuntu-latest - outputs: - python-versions: ${{ steps.set-matrix.outputs.python-versions }} - steps: - - id: set-matrix - run: | - echo "python-versions=[\"3.9\",\"3.13\"]" >> $GITHUB_OUTPUT - python-test: - if: ${{ !inputs.skip_tests }} - name: Python Tests - needs: select-strategy - strategy: - matrix: - python: ${{ fromJson(needs.select-strategy.outputs.python-versions) }} - os: [ macos-latest, ubuntu-latest, windows-latest ] - runs-on: '${{ matrix.os }}' - steps: - - uses: actions/checkout@v3 - name: Checkout - - uses: maxim-lobanov/setup-xcode@v1 - name: Xcode version - if: "contains(matrix.os, 'macOS')" - with: - xcode-version: latest-stable - - uses: ./.github/actions/setup_rust - name: Setup Rust - - name: Install Protoc - uses: arduino/setup-protoc@v3 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - - uses: webfactory/ssh-agent@v0.7.0 - name: Load raphtory-disk_graph key - with: - ssh-private-key: ${{ secrets.RA_SSH_PRIVATE_KEY }} - - uses: Swatinem/rust-cache@v2 - name: Cargo cache - with: - cache-all-crates: true - - name: Setup Python ${{ matrix.python }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python }} - cache: 'pip' - - name: Activate pometry-storage in Cargo.toml - run: make pull-storage - - name: Install Python dependencies - run: | - python -m pip install tox - - name: Run Python tests - run: | - cd python && tox run -e storage diff --git a/.github/workflows/test_rust_disk_storage_workflow.yml b/.github/workflows/test_rust_disk_storage_workflow.yml deleted file mode 100644 index d8d0bafad9..0000000000 --- a/.github/workflows/test_rust_disk_storage_workflow.yml +++ /dev/null @@ -1,82 +0,0 @@ -name: Run Rust test -permissions: { } -on: - workflow_call: - inputs: - skip_tests: - type: boolean - default: false - required: false -# DO NOT CHANGE NAME OF WORKFLOW, USED IN OTHER WORKFLOWS KEEP "Rust Tests" -jobs: - rust-test: - if: ${{ !inputs.skip_tests }} - name: Rust Tests - runs-on: '${{ matrix.os }}' - env: - RUST_BACKTRACE: 1 - strategy: - matrix: - include: - - { os: macos-latest, flags: "" } - - { os: ubuntu-latest, flags: "-C link-arg=-fuse-ld=lld" } - - { os: windows-latest, flags: "" } - steps: - - uses: maxim-lobanov/setup-xcode@v1 - name: Xcode version - if: "contains(matrix.os, 'macOS')" - with: - xcode-version: latest-stable - - uses: actions/checkout@v3 - name: Checkout - - uses: ./.github/actions/setup_rust - name: Setup Rust - - name: Free up space (ubuntu) - if: "contains(matrix.os, 'ubuntu')" - run: | - sudo rm -rf /usr/share/dotnet - sudo rm -rf /usr/local/lib/android - sudo rm -rf /opt/ghc - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - - name: Install LLD - if: "contains(matrix.os, 'ubuntu')" - run: | - sudo apt-get install lld - - uses: webfactory/ssh-agent@v0.7.0 - name: Load pometry-storage key - with: - ssh-private-key: ${{ secrets.RA_SSH_PRIVATE_KEY }} - - name: Rust version - run: rustc --version --verbose - - uses: Swatinem/rust-cache@v2 - name: Cargo cache - with: - cache-all-crates: true - - name: Install Protoc - uses: arduino/setup-protoc@v3 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: Install nextest - uses: taiki-e/install-action@v2 - with: - tool: nextest@0.9.99 - - name: Install cargo-hack - uses: taiki-e/install-action@cargo-hack - - name: Activate pometry-storage in Cargo.toml - run: make pull-storage - - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - name: Run all Tests (disk_graph) - env: - RUSTFLAGS: -Awarnings ${{ matrix.flags }} - TEMPDIR: ${{ runner.temp }} - run: | - cargo nextest run --all --no-default-features --features "storage" --cargo-profile build-fast - - name: Check all features - env: - RUSTFLAGS: -Awarnings - run: | - cargo hack check --workspace --all-targets --each-feature --skip extension-module,default - - diff --git a/.github/workflows/test_rust_workflow.yml b/.github/workflows/test_rust_workflow.yml index c84a380c84..e967e2f844 100644 --- a/.github/workflows/test_rust_workflow.yml +++ b/.github/workflows/test_rust_workflow.yml @@ -60,7 +60,12 @@ jobs: RUSTFLAGS: -Awarnings TEMPDIR: ${{ runner.temp }} run: | - cargo nextest run --all --no-default-features --cargo-profile build-fast + cargo nextest run --workspace --no-default-features --cargo-profile build-fast + - name: Check all features + env: + RUSTFLAGS: -Awarnings + run: | + cargo hack check --workspace --all-targets --each-feature --skip extension-module,default doc-test: if: ${{ !inputs.skip_tests }} name: "Doc tests" From 9786a200e171f5e36870583c5f89cedf96d79b2c Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 10 Nov 2025 11:42:48 +0100 Subject: [PATCH 22/47] proto feature shouldn't be part of io --- raphtory/Cargo.toml | 2 -- raphtory/src/serialise/parquet/model.rs | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/raphtory/Cargo.toml b/raphtory/Cargo.toml index 83d1da96aa..af617fff31 100644 --- a/raphtory/Cargo.toml +++ b/raphtory/Cargo.toml @@ -123,7 +123,6 @@ io = [ "dep:tokio", "dep:parquet", "dep:tempfile", - "proto", "kdam", ] @@ -149,7 +148,6 @@ python = [ "io", "arrow", "vectors", - "proto", "dep:pyo3", "dep:numpy", "dep:num", diff --git a/raphtory/src/serialise/parquet/model.rs b/raphtory/src/serialise/parquet/model.rs index 6981f84bb4..4909c7e08f 100644 --- a/raphtory/src/serialise/parquet/model.rs +++ b/raphtory/src/serialise/parquet/model.rs @@ -54,8 +54,8 @@ impl<'a> Serialize for ParquetProp<'a> { state.end() } Prop::Decimal(dec) => serializer.serialize_str(&dec.to_string()), - _ => { - todo!("Serializer not implemented") + Prop::Array(_) => { + todo!("Serializer not implemented for Array (TODO: ") } } } From f98b77c0874e221bc1c44a83038240fb3f3c5b1d Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 10 Nov 2025 12:03:50 +0100 Subject: [PATCH 23/47] make the proto feature actually protbuf specific --- python/Cargo.toml | 2 +- raphtory-benchmark/Cargo.toml | 4 +++- raphtory/Cargo.toml | 2 +- raphtory/src/errors.rs | 4 ++-- raphtory/src/lib.rs | 4 ++-- raphtory/src/serialise/mod.rs | 7 ++++++- raphtory/src/serialise/parquet/model.rs | 2 +- raphtory/tests/serialise_test.rs | 1 + 8 files changed, 17 insertions(+), 9 deletions(-) diff --git a/python/Cargo.toml b/python/Cargo.toml index 8cb56c78dc..a88d5f460d 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -22,7 +22,6 @@ pyo3 = { workspace = true } raphtory = { workspace = true, features = [ "python", "vectors", - "proto", ] } raphtory-graphql = { workspace = true, features = [ "python", @@ -31,6 +30,7 @@ raphtory-graphql = { workspace = true, features = [ [features] extension-module = ["pyo3/extension-module"] search = ["raphtory/search", "raphtory-graphql/search"] +proto = ["raphtory/proto"] [build-dependencies] pyo3-build-config = { workspace = true } diff --git a/raphtory-benchmark/Cargo.toml b/raphtory-benchmark/Cargo.toml index 7654285609..aa53e69535 100644 --- a/raphtory-benchmark/Cargo.toml +++ b/raphtory-benchmark/Cargo.toml @@ -9,7 +9,6 @@ edition = "2021" criterion = { workspace = true } raphtory = { workspace = true, features = [ "io", - "proto", "vectors", ] } raphtory-api = { workspace = true } @@ -61,10 +60,12 @@ harness = false [[bench]] name = "proto_encode" harness = false +required-features = ["proto"] [[bench]] name = "proto_decode" harness = false +required-features = ["proto"] [[bench]] name = "search_bench" @@ -82,3 +83,4 @@ required-features = ["search"] [features] search = ["raphtory/search"] +proto = ["raphtory/proto"] diff --git a/raphtory/Cargo.toml b/raphtory/Cargo.toml index af617fff31..1245bb359f 100644 --- a/raphtory/Cargo.toml +++ b/raphtory/Cargo.toml @@ -123,6 +123,7 @@ io = [ "dep:tokio", "dep:parquet", "dep:tempfile", + "dep:zip", "kdam", ] @@ -168,7 +169,6 @@ arrow = [ proto = [ "dep:prost", "dep:prost-types", - "dep:zip", "dep:prost-build", "dep:memmap2", "arrow", diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index ff4fcacc56..97611eae5f 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -255,7 +255,7 @@ pub enum GraphError { #[error("The path {0} does not contain a vector DB")] VectorDbDoesntExist(String), - #[cfg(feature = "proto")] + #[cfg(feature = "io")] #[error("zip operation failed")] ZipError { #[from] @@ -327,7 +327,7 @@ pub enum GraphError { #[error("Protobuf decode error{0}")] EncodeError(#[from] prost::EncodeError), - #[cfg(feature = "proto")] + #[cfg(feature = "io")] #[error("Cannot write graph into non empty folder {0}")] NonEmptyGraphFolder(PathBuf), diff --git a/raphtory/src/lib.rs b/raphtory/src/lib.rs index e3785180f9..81620cf235 100644 --- a/raphtory/src/lib.rs +++ b/raphtory/src/lib.rs @@ -106,7 +106,7 @@ pub mod io; pub mod api; pub mod core; pub mod errors; -#[cfg(feature = "proto")] +#[cfg(feature = "io")] pub mod serialise; pub mod storage; @@ -149,7 +149,7 @@ pub mod prelude { }, }; - #[cfg(feature = "proto")] + #[cfg(all(feature = "io", feature = "arrow"))] pub use crate::serialise::{ parquet::{ParquetDecoder, ParquetEncoder}, StableDecode, StableEncode, diff --git a/raphtory/src/serialise/mod.rs b/raphtory/src/serialise/mod.rs index 250c9c62b5..316d3403f0 100644 --- a/raphtory/src/serialise/mod.rs +++ b/raphtory/src/serialise/mod.rs @@ -1,9 +1,14 @@ mod graph_folder; pub mod metadata; +#[cfg(feature = "arrow")] pub(crate) mod parquet; + +#[cfg(feature = "proto")] pub mod proto; mod serialise; pub use graph_folder::{GraphFolder, GRAPH_PATH, INDEX_PATH, META_PATH, VECTORS_PATH}; -pub use proto::proto_generated::Graph as ProtoGraph; pub use serialise::{StableDecode, StableEncode}; + +#[cfg(feature = "proto")] +pub use proto::proto_generated::Graph as ProtoGraph; diff --git a/raphtory/src/serialise/parquet/model.rs b/raphtory/src/serialise/parquet/model.rs index 4909c7e08f..020dab6740 100644 --- a/raphtory/src/serialise/parquet/model.rs +++ b/raphtory/src/serialise/parquet/model.rs @@ -55,7 +55,7 @@ impl<'a> Serialize for ParquetProp<'a> { } Prop::Decimal(dec) => serializer.serialize_str(&dec.to_string()), Prop::Array(_) => { - todo!("Serializer not implemented for Array (TODO: ") + todo!("Serializer not implemented for Array (TODO: #2377)") } } } diff --git a/raphtory/tests/serialise_test.rs b/raphtory/tests/serialise_test.rs index e544e6207f..eae58876ee 100644 --- a/raphtory/tests/serialise_test.rs +++ b/raphtory/tests/serialise_test.rs @@ -237,6 +237,7 @@ mod serialise_test { } #[test] + #[ignore = "TODO: #2377"] fn test_all_the_metadata_on_edge() { let mut props = vec![]; write_props_to_vec(&mut props); From d6fc77459db2ec2c03a5cc10d7bedf467d34d6fc Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 10 Nov 2025 12:40:25 +0100 Subject: [PATCH 24/47] disk storage is only possibly enabled when the graph has a path --- db4-graph/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db4-graph/src/lib.rs b/db4-graph/src/lib.rs index 86ae8ca333..9250e8f485 100644 --- a/db4-graph/src/lib.rs +++ b/db4-graph/src/lib.rs @@ -207,7 +207,7 @@ impl, ES = ES>> TemporalGraph { } pub fn disk_storage_enabled(&self) -> bool { - Extension::disk_storage_enabled() + self.graph_dir().is_some() && Extension::disk_storage_enabled() } pub fn extension(&self) -> &EXT { self.storage().extension() From c4e6f4343b9311df957d56e25190050942173cdc Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 10 Nov 2025 13:35:30 +0100 Subject: [PATCH 25/47] ignore broken tests and mark with issues --- raphtory-graphql/src/data.rs | 118 ++++++++++-------- raphtory-graphql/src/lib.rs | 14 +-- .../src/model/graph/mutable_graph.rs | 69 +++++----- raphtory/tests/serialise_test.rs | 6 + 4 files changed, 108 insertions(+), 99 deletions(-) diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index bb32448d66..8205a1abc5 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -330,7 +330,11 @@ pub(crate) mod data_tests { data::Data, }; use itertools::Itertools; - use raphtory::{db::api::view::MaterializedGraph, errors::GraphError, prelude::*}; + use raphtory::{ + db::api::view::{internal::InternalStorageOps, MaterializedGraph}, + errors::GraphError, + prelude::*, + }; use std::{collections::HashMap, fs, path::Path, time::Duration}; use tokio::time::sleep; @@ -517,42 +521,45 @@ pub(crate) mod data_tests { let (loaded_graph1, _) = data.get_graph("test_graph1").await.unwrap(); let (loaded_graph2, _) = data.get_graph("test_graph2").await.unwrap(); - assert!( - !loaded_graph1.is_dirty(), - "Graph1 should not be dirty when loaded from disk" - ); - assert!( - !loaded_graph2.is_dirty(), - "Graph2 should not be dirty when loaded from disk" - ); - - // Modify only graph1 to make it dirty - loaded_graph1.set_dirty(true); - assert!( - loaded_graph1.is_dirty(), - "Graph1 should be dirty after modification" - ); - - // Drop the Data instance - this should trigger serialization - drop(data); - - // Check modification times after drop - let graph1_metadata_after = fs::metadata(&graph1_path).unwrap(); - let graph2_metadata_after = fs::metadata(&graph2_path).unwrap(); - let graph1_modified_time = graph1_metadata_after.modified().unwrap(); - let graph2_modified_time = graph2_metadata_after.modified().unwrap(); - - // Graph1 (dirty) modification time should be different - assert_ne!( - graph1_original_time, graph1_modified_time, - "Graph1 (dirty) should have been written to disk on drop" - ); - - // Graph2 (not dirty) modification time should be the same - assert_eq!( - graph2_original_time, graph2_modified_time, - "Graph2 (not dirty) should not have been written to disk on drop" - ); + // TODO: This test doesn't work with disk storage right now, make sure modification dates actually update correctly! + if loaded_graph1.graph.disk_storage_enabled() { + assert!( + !loaded_graph1.is_dirty(), + "Graph1 should not be dirty when loaded from disk" + ); + assert!( + !loaded_graph2.is_dirty(), + "Graph2 should not be dirty when loaded from disk" + ); + + // Modify only graph1 to make it dirty + loaded_graph1.set_dirty(true); + assert!( + loaded_graph1.is_dirty(), + "Graph1 should be dirty after modification" + ); + + // Drop the Data instance - this should trigger serialization + drop(data); + + // Check modification times after drop + let graph1_metadata_after = fs::metadata(&graph1_path).unwrap(); + let graph2_metadata_after = fs::metadata(&graph2_path).unwrap(); + let graph1_modified_time = graph1_metadata_after.modified().unwrap(); + let graph2_modified_time = graph2_metadata_after.modified().unwrap(); + + // Graph1 (dirty) modification time should be different + assert_ne!( + graph1_original_time, graph1_modified_time, + "Graph1 (dirty) should have been written to disk on drop" + ); + + // Graph2 (not dirty) modification time should be the same + assert_eq!( + graph2_original_time, graph2_modified_time, + "Graph2 (not dirty) should not have been written to disk on drop" + ); + } } #[tokio::test] @@ -621,22 +628,25 @@ pub(crate) mod data_tests { sleep(Duration::from_secs(3)).await; data.cache.run_pending_tasks().await; - // Check modification times after eviction - let graph1_metadata_after = fs::metadata(&graph1_path).unwrap(); - let graph2_metadata_after = fs::metadata(&graph2_path).unwrap(); - let graph1_modified_time = graph1_metadata_after.modified().unwrap(); - let graph2_modified_time = graph2_metadata_after.modified().unwrap(); - - // Graph1 (dirty) modification time should be different - assert_ne!( - graph1_original_time, graph1_modified_time, - "Graph1 (dirty) should have been written to disk on eviction" - ); - - // Graph2 (not dirty) modification time should be the same - assert_eq!( - graph2_original_time, graph2_modified_time, - "Graph2 (not dirty) should not have been written to disk on eviction" - ); + // TODO: This test doesn't work with disk storage right now, make sure modification dates actually update correctly! + if loaded_graph1.graph.disk_storage_enabled() { + // Check modification times after eviction + let graph1_metadata_after = fs::metadata(&graph1_path).unwrap(); + let graph2_metadata_after = fs::metadata(&graph2_path).unwrap(); + let graph1_modified_time = graph1_metadata_after.modified().unwrap(); + let graph2_modified_time = graph2_metadata_after.modified().unwrap(); + + // Graph1 (dirty) modification time should be different + assert_ne!( + graph1_original_time, graph1_modified_time, + "Graph1 (dirty) should have been written to disk on eviction" + ); + + // Graph2 (not dirty) modification time should be the same + assert_eq!( + graph2_original_time, graph2_modified_time, + "Graph2 (not dirty) should not have been written to disk on eviction" + ); + } } } diff --git a/raphtory-graphql/src/lib.rs b/raphtory-graphql/src/lib.rs index 6fcd9755ac..be5eab06c4 100644 --- a/raphtory-graphql/src/lib.rs +++ b/raphtory-graphql/src/lib.rs @@ -402,12 +402,7 @@ mod graphql_test { async fn query_nodefilter() { let graph = Graph::new(); graph - .add_node( - 0, - 1, - [("pgraph", Prop::from_arr::(vec![3u8]))], - None, - ) + .add_node(0, 1, [("pgraph", Prop::I32(0))], None) .unwrap(); let graph: MaterializedGraph = graph.into(); @@ -867,12 +862,7 @@ mod graphql_test { async fn query_properties() { let graph = Graph::new(); graph - .add_node( - 0, - 1, - [("pgraph", Prop::from_arr::(vec![3u8]))], - None, - ) + .add_node(0, 1, [("pgraph", Prop::I32(0))], None) .unwrap(); let graph = graph.into(); diff --git a/raphtory-graphql/src/model/graph/mutable_graph.rs b/raphtory-graphql/src/model/graph/mutable_graph.rs index 1e65461cd3..98e6cefa4e 100644 --- a/raphtory-graphql/src/model/graph/mutable_graph.rs +++ b/raphtory-graphql/src/model/graph/mutable_graph.rs @@ -807,17 +807,18 @@ mod tests { assert!(result.is_ok()); assert!(result.unwrap()); - let query = "node1".to_string(); - let embedding = &fake_embedding(vec![query]).await.unwrap().remove(0); - let limit = 5; - let result = mutable_graph - .graph - .vectors - .unwrap() - .nodes_by_similarity(embedding, limit, None); - - assert!(result.is_ok()); - assert!(result.unwrap().get_documents().unwrap().len() == 2); + // TODO: #2380 (embeddings aren't working right now) + // let query = "node1".to_string(); + // let embedding = &fake_embedding(vec![query]).await.unwrap().remove(0); + // let limit = 5; + // let result = mutable_graph + // .graph + // .vectors + // .unwrap() + // .nodes_by_similarity(embedding, limit, None); + // + // assert!(result.is_ok()); + // assert!(result.unwrap().get_documents().unwrap().len() == 2); } #[tokio::test] @@ -877,17 +878,18 @@ mod tests { assert!(result.is_ok()); assert!(result.unwrap()); - let query = "complex_node_1".to_string(); - let embedding = &fake_embedding(vec![query]).await.unwrap().remove(0); - let limit = 5; - let result = mutable_graph - .graph - .vectors - .unwrap() - .nodes_by_similarity(embedding, limit, None); - - assert!(result.is_ok()); - assert!(result.unwrap().get_documents().unwrap().len() == 3); + // TODO: #2380 (embeddings aren't working right now) + // let query = "complex_node_1".to_string(); + // let embedding = &fake_embedding(vec![query]).await.unwrap().remove(0); + // let limit = 5; + // let result = mutable_graph + // .graph + // .vectors + // .unwrap() + // .nodes_by_similarity(embedding, limit, None); + // + // assert!(result.is_ok()); + // assert!(result.unwrap().get_documents().unwrap().len() == 3); } #[tokio::test] @@ -951,17 +953,18 @@ mod tests { assert!(result.is_ok()); assert!(result.unwrap()); + // TODO: #2380 (embeddings aren't working right now) // Test that edge embeddings were generated. - let query = "node1 appeared with node2".to_string(); - let embedding = &fake_embedding(vec![query]).await.unwrap().remove(0); - let limit = 5; - let result = mutable_graph - .graph - .vectors - .unwrap() - .edges_by_similarity(embedding, limit, None); - - assert!(result.is_ok()); - assert!(result.unwrap().get_documents().unwrap().len() == 2); + // let query = "node1 appeared with node2".to_string(); + // let embedding = &fake_embedding(vec![query]).await.unwrap().remove(0); + // let limit = 5; + // let result = mutable_graph + // .graph + // .vectors + // .unwrap() + // .edges_by_similarity(embedding, limit, None); + // + // assert!(result.is_ok()); + // assert!(result.unwrap().get_documents().unwrap().len() == 2); } } diff --git a/raphtory/tests/serialise_test.rs b/raphtory/tests/serialise_test.rs index eae58876ee..d030456ab3 100644 --- a/raphtory/tests/serialise_test.rs +++ b/raphtory/tests/serialise_test.rs @@ -131,6 +131,7 @@ mod serialise_test { } #[test] + #[ignore = "TODO: #2377"] fn edge_t_props() { let tempdir = TempDir::new().unwrap(); let temp_file = tempdir.path().join("graph"); @@ -183,6 +184,7 @@ mod serialise_test { } #[test] + #[ignore = "TODO: #2377"] fn test_all_the_t_props_on_node() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -210,6 +212,7 @@ mod serialise_test { } #[test] + #[ignore = "TODO: #2377"] fn test_all_the_t_props_on_edge() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -265,6 +268,7 @@ mod serialise_test { } #[test] + #[ignore = "TODO: #2377"] fn test_all_the_metadata_on_node() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -290,6 +294,7 @@ mod serialise_test { } #[test] + #[ignore = "TODO: #2377"] fn graph_metadata() { let mut props = vec![]; write_props_to_vec(&mut props); @@ -311,6 +316,7 @@ mod serialise_test { } #[test] + #[ignore = "TODO: #2377"] fn graph_temp_properties() { let mut props = vec![]; write_props_to_vec(&mut props); From ab73b77b1fad867836132f6a47f47beb73e631a4 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 10 Nov 2025 13:35:42 +0100 Subject: [PATCH 26/47] minor tweaks to the features --- raphtory/Cargo.toml | 2 +- raphtory/src/serialise/serialise.rs | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/raphtory/Cargo.toml b/raphtory/Cargo.toml index 1245bb359f..d144d19737 100644 --- a/raphtory/Cargo.toml +++ b/raphtory/Cargo.toml @@ -121,10 +121,10 @@ io = [ "dep:csv", "dep:reqwest", "dep:tokio", - "dep:parquet", "dep:tempfile", "dep:zip", "kdam", + "arrow", ] # search diff --git a/raphtory/src/serialise/serialise.rs b/raphtory/src/serialise/serialise.rs index d81ed18fab..88ff739193 100644 --- a/raphtory/src/serialise/serialise.rs +++ b/raphtory/src/serialise/serialise.rs @@ -1,7 +1,3 @@ -use std::path::Path; - -#[cfg(feature = "search")] -use crate::prelude::IndexMutationOps; use crate::{ db::api::{mutation::AdditionOps, view::StaticGraphViewOps}, errors::GraphError, @@ -10,9 +6,12 @@ use crate::{ GraphFolder, }, }; -use std::{fs, fs::File}; +use std::{fs, fs::File, path::Path}; use tempfile; +#[cfg(feature = "search")] +use crate::prelude::IndexMutationOps; + pub trait StableEncode: StaticGraphViewOps + AdditionOps { /// Encode the graph into bytes. fn encode_to_bytes(&self) -> Vec; From bf98721db783641b20a5b55f03f2d9902d9c1fff Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 10 Nov 2025 13:50:51 +0100 Subject: [PATCH 27/47] fmt --- .../core/entities/properties/prop/arrow.rs | 22 ++++++++----------- .../entities/properties/prop/prop_enum.rs | 1 - 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/raphtory-api/src/core/entities/properties/prop/arrow.rs b/raphtory-api/src/core/entities/properties/prop/arrow.rs index 1c7e5a60b5..c6be00fda2 100644 --- a/raphtory-api/src/core/entities/properties/prop/arrow.rs +++ b/raphtory-api/src/core/entities/properties/prop/arrow.rs @@ -48,12 +48,14 @@ impl<'a> Serialize for ArrowRow<'a> { impl<'a> ArrowRow<'a> { pub fn primitive_value(&self, col: usize) -> Option { let primitive_array = self.array.column(col).as_primitive_opt::()?; - (primitive_array.len() > self.index && !primitive_array.is_null(self.index)).then(|| primitive_array.value(self.index)) + (primitive_array.len() > self.index && !primitive_array.is_null(self.index)) + .then(|| primitive_array.value(self.index)) } fn primitive_dt(&self, col: usize) -> Option<(T::Native, &DataType)> { let col = self.array.column(col).as_primitive_opt::()?; - (col.len() > self.index && !col.is_null(self.index)).then(|| (col.value(self.index), col.data_type())) + (col.len() > self.index && !col.is_null(self.index)) + .then(|| (col.value(self.index), col.data_type())) } fn primitive_prop(&self, col: usize) -> Option { @@ -64,8 +66,8 @@ impl<'a> ArrowRow<'a> { fn primitive_prop_ref(self, col: usize) -> Option> { let col = self.array.column(col).as_primitive_opt::()?; - let (value, dt) = - (col.len() > self.index && !col.is_null(self.index)).then(|| (col.value(self.index), col.data_type()))?; + let (value, dt) = (col.len() > self.index && !col.is_null(self.index)) + .then(|| (col.value(self.index), col.data_type()))?; let prop_ref = T::prop_ref(value, dt); Some(prop_ref) } @@ -98,15 +100,9 @@ impl<'a> ArrowRow<'a> { let len = column.len(); let valid = len > self.index && !column.is_null(self.index); match column.data_type() { - DataType::Utf8 => { - valid.then(|| column.as_string::().value(self.index)) - } - DataType::LargeUtf8 => { - valid.then(|| column.as_string::().value(self.index)) - } - DataType::Utf8View => { - valid.then(|| column.as_string_view().value(self.index)) - } + DataType::Utf8 => valid.then(|| column.as_string::().value(self.index)), + DataType::LargeUtf8 => valid.then(|| column.as_string::().value(self.index)), + DataType::Utf8View => valid.then(|| column.as_string_view().value(self.index)), _ => None, } } diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index db8a1f346b..50a8dd4837 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -598,7 +598,6 @@ impl From<&Prop> for Prop { } } - pub trait IntoPropMap { fn into_prop_map(self) -> Prop; } From f03920acf7f4ffe5801d74ed3f996b046ecd54c5 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 10 Nov 2025 14:30:44 +0100 Subject: [PATCH 28/47] don't remove empty maps --- .../core/entities/properties/prop/arrow.rs | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/raphtory-api/src/core/entities/properties/prop/arrow.rs b/raphtory-api/src/core/entities/properties/prop/arrow.rs index c6be00fda2..b8d4386124 100644 --- a/raphtory-api/src/core/entities/properties/prop/arrow.rs +++ b/raphtory-api/src/core/entities/properties/prop/arrow.rs @@ -168,16 +168,19 @@ impl<'a> ArrowRow<'a> { } pub fn into_prop(self) -> Option { - let map = Prop::map( - self.array - .fields() - .iter() - .enumerate() - .filter_map(|(col, field)| Some((field.name().as_ref(), self.prop_value(col)?))), - ); - match map { - Prop::Map(m) if m.is_empty() => None, - _ => Some(map), + if self.index >= self.array.len() || self.array.is_null(self.index) { + None + } else { + let map = Prop::map( + self.array + .fields() + .iter() + .enumerate() + .filter_map(|(col, field)| { + Some((field.name().as_ref(), self.prop_value(col)?)) + }), + ); + Some(map) } } From 974a5acbbd544eed3bfb7a8acae178b40d61c2b9 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 10 Nov 2025 14:31:03 +0100 Subject: [PATCH 29/47] shouldn't depend on proto --- examples/rust/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/rust/Cargo.toml b/examples/rust/Cargo.toml index f4a7622bf7..09d8c27d8b 100644 --- a/examples/rust/Cargo.toml +++ b/examples/rust/Cargo.toml @@ -7,7 +7,7 @@ keywords = ["graph", "temporal-graph", "temporal", "examples"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -raphtory = { workspace = true, features = ["io", "proto"] } +raphtory = { workspace = true, features = ["io"] } chrono = { workspace = true } regex = { workspace = true } serde = { workspace = true } From e60cbdf8deee604420a2baeb3b0e0704a9b17e8b Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Tue, 11 Nov 2025 10:00:20 +0100 Subject: [PATCH 30/47] add one level of nesting so we can have nulls in the map properties --- .../entities/properties/prop/prop_enum.rs | 38 +++++++++---------- .../entities/properties/prop/prop_ref_enum.rs | 4 +- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index 50a8dd4837..7017c47001 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -5,9 +5,10 @@ use crate::core::{ }, storage::arc_str::ArcStr, }; +use arrow_array::cast::AsArray; #[cfg(feature = "arrow")] use arrow_array::StructArray; -use arrow_schema::DataType; +use arrow_schema::{DataType, Field, FieldRef}; use bigdecimal::{num_bigint::BigInt, BigDecimal}; use chrono::{DateTime, NaiveDateTime, Utc}; use itertools::Itertools; @@ -161,11 +162,16 @@ impl PartialOrd for Prop { } pub struct SerdeProp<'a>(pub &'a Prop); -pub struct SedeList<'a>(pub &'a Vec); +pub struct SerdeList<'a>(pub &'a Vec); #[derive(Clone, Copy)] pub struct SerdeMap<'a>(pub &'a HashMap); -impl<'a> Serialize for SedeList<'a> { +#[derive(Clone, Copy, Serialize)] +pub struct SerdeRow<'a> { + value: Option>, +} + +impl<'a> Serialize for SerdeList<'a> { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, @@ -209,7 +215,7 @@ impl<'a> Serialize for SerdeProp<'a> { Prop::Bool(b) => serializer.serialize_bool(*b), Prop::DTime(dt) => serializer.serialize_i64(dt.timestamp_millis()), Prop::NDTime(dt) => serializer.serialize_i64(dt.and_utc().timestamp_millis()), - Prop::List(l) => SedeList(l).serialize(serializer), + Prop::List(l) => SerdeList(l).serialize(serializer), Prop::Map(m) => SerdeMap(m).serialize(serializer), Prop::Decimal(dec) => serializer.serialize_str(&dec.to_string()), _ => { @@ -355,31 +361,23 @@ pub fn struct_array_from_props

( ) -> StructArray { use serde_arrow::ArrayBuilder; - let fields = match dt { - DataType::Struct(fields) => fields, - _ => panic!("Expected DataType::Struct, got {:?}", dt), - }; + let fields = [FieldRef::new(Field::new("value", dt.clone(), true))]; - let mut builder = ArrayBuilder::from_arrow(fields) + let mut builder = ArrayBuilder::from_arrow(&fields) .unwrap_or_else(|e| panic!("Failed to make array builder {e}")); - let empty_map = FxHashMap::default(); - for p in props { - match p.as_ref().map(as_serde_map) { - Some(map) => builder - .push(map) - .unwrap_or_else(|e| panic!("Failed to push map to array builder {e}")), - _ => builder - .push(SerdeMap(&empty_map)) - .unwrap_or_else(|e| panic!("Failed to push empty map to array builder {e}")), - } + builder + .push(SerdeRow { + value: p.as_ref().map(as_serde_map), + }) + .unwrap_or_else(|e| panic!("Failed to push map to array builder {e}")) } let arrays = builder .to_arrow() .unwrap_or_else(|e| panic!("Failed to convert to arrow array {e}")); - StructArray::new(fields.clone(), arrays, None) + arrays.first().unwrap().as_struct().clone() } impl Display for Prop { diff --git a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs index 74ef755d38..5874c2ec9f 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs @@ -9,7 +9,7 @@ use rustc_hash::FxHashMap; #[cfg(feature = "arrow")] use crate::core::entities::properties::prop::PropArray; use crate::core::{ - entities::properties::prop::{ArrowRow, Prop, SedeList, SerdeMap}, + entities::properties::prop::{ArrowRow, Prop, SerdeList, SerdeMap}, storage::arc_str::ArcStr, }; @@ -197,7 +197,7 @@ impl<'a> Serialize for PropRef<'a> { PropNum::F64(v) => serializer.serialize_f64(*v), }, PropRef::Bool(b) => serializer.serialize_bool(*b), - PropRef::List(lst) => SedeList(lst).serialize(serializer), + PropRef::List(lst) => SerdeList(lst).serialize(serializer), PropRef::Map(map_ref) => map_ref.serialize(serializer), PropRef::NDTime(dt) => serializer.serialize_i64(dt.and_utc().timestamp_millis()), PropRef::DTime(dt) => serializer.serialize_i64(dt.timestamp_millis()), From f59dfe560b19bd9e5618954d981e28af53e1fc0b Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Tue, 11 Nov 2025 10:18:37 +0100 Subject: [PATCH 31/47] simplify and generalise struct_array_from_props --- db4-storage/src/properties/mod.rs | 2 +- .../src/core/entities/properties/prop/prop_enum.rs | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/db4-storage/src/properties/mod.rs b/db4-storage/src/properties/mod.rs index 7d8de7ad03..4f4e44f7ef 100644 --- a/db4-storage/src/properties/mod.rs +++ b/db4-storage/src/properties/mod.rs @@ -207,7 +207,7 @@ impl Properties { .map(|i| lazy_vec.get_opt(i)) .map(|e| e.map(|m| SerdeMap(m))); - let struct_array = struct_array_from_props(&dt, |sm| *sm, array_iter); + let struct_array = struct_array_from_props(&dt, array_iter); Some(Arc::new(struct_array)) } diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index 7017c47001..4fdeb57118 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -167,8 +167,8 @@ pub struct SerdeList<'a>(pub &'a Vec); pub struct SerdeMap<'a>(pub &'a HashMap); #[derive(Clone, Copy, Serialize)] -pub struct SerdeRow<'a> { - value: Option>, +pub struct SerdeRow { + value: Option

, } impl<'a> Serialize for SerdeList<'a> { @@ -354,9 +354,8 @@ impl Prop { } #[cfg(feature = "arrow")] -pub fn struct_array_from_props

( +pub fn struct_array_from_props( dt: &DataType, - as_serde_map: impl Fn(&P) -> SerdeMap<'_> + Copy, props: impl IntoIterator>, ) -> StructArray { use serde_arrow::ArrayBuilder; @@ -368,9 +367,7 @@ pub fn struct_array_from_props

( for p in props { builder - .push(SerdeRow { - value: p.as_ref().map(as_serde_map), - }) + .push(SerdeRow { value: p }) .unwrap_or_else(|e| panic!("Failed to push map to array builder {e}")) } From 7466de7968b6faf669899a91b4956076b8521a41 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Tue, 11 Nov 2025 11:18:41 +0100 Subject: [PATCH 32/47] add missing validity check for struct_prop_ref --- raphtory-api/src/core/entities/properties/prop/arrow.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/raphtory-api/src/core/entities/properties/prop/arrow.rs b/raphtory-api/src/core/entities/properties/prop/arrow.rs index b8d4386124..c2bb1b9b9f 100644 --- a/raphtory-api/src/core/entities/properties/prop/arrow.rs +++ b/raphtory-api/src/core/entities/properties/prop/arrow.rs @@ -80,8 +80,12 @@ impl<'a> ArrowRow<'a> { fn struct_prop_ref(&self, col: usize) -> Option> { let column = self.array.column(col).as_struct_opt()?; - let row = ArrowRow::new(column, self.index); - (column.len() > self.index).then(|| PropRef::from(row)) + if self.index < column.len() && column.is_valid(self.index) { + let row = ArrowRow::new(column, self.index); + Some(PropRef::from(row)) + } else { + None + } } pub fn bool_value(&self, col: usize) -> Option { From c5ff640a8b314e792faf1ef89390b72a3b92e64b Mon Sep 17 00:00:00 2001 From: Fabian Murariu Date: Mon, 10 Nov 2025 16:40:28 +0000 Subject: [PATCH 33/47] major change to the Prop enum to support both List types # Conflicts: # raphtory-api/src/core/entities/properties/prop/prop_enum.rs --- Cargo.lock | 21 ++ Cargo.toml | 1 + db4-storage/src/pages/test_utils/props.rs | 20 +- db4-storage/src/properties/mod.rs | 20 +- raphtory-api/Cargo.toml | 1 + .../core/entities/properties/prop/arrow.rs | 27 +- .../entities/properties/prop/prop_array.rs | 326 ++++++++++-------- .../entities/properties/prop/prop_enum.rs | 229 ++++-------- .../entities/properties/prop/prop_ref_enum.rs | 67 +--- .../entities/properties/prop/prop_type.rs | 29 +- .../entities/properties/prop/prop_unwrap.rs | 13 +- .../core/entities/properties/prop/serde.rs | 4 +- .../core/entities/properties/prop/template.rs | 4 +- raphtory-api/src/python/prop.rs | 18 +- .../src/entities/properties/tprop.rs | 31 +- raphtory-core/src/storage/mod.rs | 38 +- raphtory-graphql/src/model/graph/filtering.rs | 2 +- raphtory-graphql/src/model/graph/property.rs | 3 +- raphtory-graphql/src/python/client/mod.rs | 16 +- .../src/db/api/properties/temporal_props.rs | 4 +- raphtory/src/errors.rs | 6 +- raphtory/src/io/arrow/prop_handler.rs | 4 +- raphtory/src/python/graph/edges.rs | 4 +- raphtory/src/python/graph/node.rs | 2 +- raphtory/src/python/types/repr.rs | 12 +- raphtory/src/python/types/wrappers/prop.rs | 1 - raphtory/src/python/utils/export.rs | 4 +- raphtory/src/serialise/proto/ext.rs | 18 +- raphtory/src/test_utils.rs | 13 +- 29 files changed, 406 insertions(+), 532 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d7a7ebeadd..9b7f598a9b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2236,6 +2236,26 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "derive_more" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "derive_utils" version = "0.15.0" @@ -5085,6 +5105,7 @@ dependencies = [ "bytemuck", "chrono", "dashmap", + "derive_more", "display-error-chain", "iter-enum", "itertools 0.13.0", diff --git a/Cargo.toml b/Cargo.toml index f2bd506464..df3ad02b39 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,7 @@ async-graphql = { version = "7.0.16", features = ["dynamic-schema"] } bincode = "1.3.3" async-graphql-poem = "7.0.16" dynamic-graphql = "0.10.1" +derive_more = "2.0.1" reqwest = { version = "0.12.8", default-features = false, features = [ "rustls-tls", "multipart", diff --git a/db4-storage/src/pages/test_utils/props.rs b/db4-storage/src/pages/test_utils/props.rs index c7510c6bcc..4c91400288 100644 --- a/db4-storage/src/pages/test_utils/props.rs +++ b/db4-storage/src/pages/test_utils/props.rs @@ -2,7 +2,7 @@ use bigdecimal::BigDecimal; use chrono::{DateTime, NaiveDateTime, Utc}; use itertools::Itertools; use proptest::prelude::*; -use raphtory_api::core::entities::properties::prop::{DECIMAL_MAX, Prop, PropType}; +use raphtory_api::core::entities::properties::prop::{DECIMAL_MAX, Prop, PropArray, PropType}; use std::collections::HashMap; pub fn prop_type() -> impl Strategy { @@ -22,11 +22,10 @@ pub fn prop_type() -> impl Strategy { leaf.prop_recursive(3, 10, 10, |inner| { let dict = proptest::collection::hash_map(r"\w{1,10}", inner.clone(), 1..10) .prop_map(PropType::map); - // let list = inner - // .clone() - // .prop_map(|p_type| PropType::List(Box::new(p_type))); - // prop_oneof![inner, list, dict] - prop_oneof![inner, dict] + let list = inner + .clone() + .prop_map(|p_type| PropType::List(Box::new(p_type))); + prop_oneof![inner, list, dict] }) } @@ -100,7 +99,7 @@ pub(crate) fn prop(p_type: &PropType) -> impl Strategy + use<> { }) .boxed(), PropType::List(p_type) => proptest::collection::vec(prop(p_type), 0..10) - .prop_map(|props| Prop::List(props.into())) + .prop_map(|props| Prop::List(PropArray::Vec(props.into()))) .boxed(), PropType::Map(p_types) => { let prop_types: Vec> = p_types @@ -109,17 +108,16 @@ pub(crate) fn prop(p_type: &PropType) -> impl Strategy + use<> { .collect::>() .into_iter() .map(|(name, p_type)| { - let pt_strat = prop(&p_type) + prop(&p_type) .prop_map(move |prop| (name.clone(), prop.clone())) - .boxed(); - pt_strat + .boxed() }) .collect_vec(); let props = proptest::sample::select(prop_types).prop_flat_map(|prop| prop); proptest::collection::vec(props, 1..10) - .prop_map(|props| Prop::map(props)) + .prop_map(Prop::map) .boxed() } PropType::Decimal { scale } => { diff --git a/db4-storage/src/properties/mod.rs b/db4-storage/src/properties/mod.rs index 4f4e44f7ef..2521a59f54 100644 --- a/db4-storage/src/properties/mod.rs +++ b/db4-storage/src/properties/mod.rs @@ -7,7 +7,10 @@ use arrow_schema::DECIMAL128_MAX_PRECISION; use bigdecimal::ToPrimitive; use raphtory_api::core::entities::properties::{ meta::PropMapper, - prop::{Prop, PropType, SerdeMap, arrow_dtype_from_prop_type, struct_array_from_props}, + prop::{ + Prop, PropType, SerdeList, SerdeMap, arrow_dtype_from_prop_type, list_array_from_props, + struct_array_from_props, + }, }; use raphtory_core::{ entities::{ @@ -211,6 +214,21 @@ impl Properties { Some(Arc::new(struct_array)) } + PropColumn::List(lazy_vec) => { + let dt = meta + .get_dtype(col_id) + .as_ref() + .map(arrow_dtype_from_prop_type) + .unwrap(); + + let array_iter = indices + .map(|i| lazy_vec.get_opt(i)) + .map(|opt_list| opt_list.map(|list| SerdeList(list))); + + let list_array = list_array_from_props(&dt, |lst| *lst, array_iter); + + Some(Arc::new(list_array)) + } _ => None, //todo!("Unsupported column type"), } } diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index da4cfda2b6..9fc711e9fe 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -22,6 +22,7 @@ thiserror = { workspace = true } bytemuck = { workspace = true } chrono.workspace = true dashmap = { workspace = true } +derive_more = { workspace = true } rustc-hash = { workspace = true } lock_api = { workspace = true } parking_lot = { workspace = true } diff --git a/raphtory-api/src/core/entities/properties/prop/arrow.rs b/raphtory-api/src/core/entities/properties/prop/arrow.rs index c2bb1b9b9f..9d762dc607 100644 --- a/raphtory-api/src/core/entities/properties/prop/arrow.rs +++ b/raphtory-api/src/core/entities/properties/prop/arrow.rs @@ -1,4 +1,6 @@ -use arrow_array::{cast::AsArray, types::*, Array, ArrowPrimitiveType, StructArray}; +use arrow_array::{ + cast::AsArray, types::*, Array, ArrowPrimitiveType, OffsetSizeTrait, StructArray, +}; use arrow_schema::{DataType, TimeUnit}; use chrono::DateTime; use serde::{ser::SerializeMap, Serialize}; @@ -18,6 +20,7 @@ impl<'a> PartialEq for ArrowRow<'a> { return false; } + //FIXME: it could be that the fields don't match in order but the values are the same for col in 0..self.array.num_columns() { let self_prop = self.prop_ref(col); let other_prop = other.prop_ref(col); @@ -73,9 +76,23 @@ impl<'a> ArrowRow<'a> { } fn struct_prop(&self, col: usize) -> Option { - let column = self.array.column(col).as_struct_opt()?; - let row = ArrowRow::new(column, self.index); - row.into_prop() + let col = self.array.column(col).as_struct_opt()?; + let row = ArrowRow::new(col, self.index); + if col.len() > self.index && !col.is_null(self.index) { + row.into_prop() + } else { + None + } + } + + fn list_prop(&self, col: usize) -> Option { + let col = self.array.column(col).as_list_opt::()?; + let row = col.value(self.index); + if col.len() > self.index && !col.is_null(self.index) { + Some(row.into()) + } else { + None + } } fn struct_prop_ref(&self, col: usize) -> Option> { @@ -137,6 +154,8 @@ impl<'a> ArrowRow<'a> { } DataType::Decimal128(_, _) => self.primitive_prop::(col), DataType::Struct(_) => self.struct_prop(col), + DataType::List(_) => self.list_prop::(col), + DataType::LargeList(_) => self.list_prop::(col), _ => None, } } diff --git a/raphtory-api/src/core/entities/properties/prop/prop_array.rs b/raphtory-api/src/core/entities/properties/prop/prop_array.rs index 30e413cb04..5f43f355b9 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_array.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_array.rs @@ -1,52 +1,51 @@ use crate::{ - core::entities::properties::prop::{Prop, PropType}, + core::entities::properties::prop::{ArrowRow, DirectConvert, Prop, PropType}, iter::{BoxedLIter, IntoDynBoxed}, }; use arrow_array::{ - cast::AsArray, - types::{ - Float32Type, Float64Type, Int32Type, Int64Type, UInt16Type, UInt32Type, UInt64Type, - UInt8Type, - }, - Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray, RecordBatch, + cast::AsArray, types::*, Array, ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray, }; -use arrow_ipc::{reader::StreamReader, writer::StreamWriter}; -use arrow_schema::{ArrowError, DataType, Field, Fields, Schema}; -use serde::{Deserialize, Serialize, Serializer}; +use arrow_schema::{DataType, Field, Fields, TimeUnit}; +use serde::{ser::SerializeSeq, Deserialize, Serialize, Serializer}; use std::{ hash::{Hash, Hasher}, sync::Arc, }; -use thiserror::Error; -#[derive(Default, Debug, Clone)] +#[derive(Debug, Clone, derive_more::From)] pub enum PropArray { - #[default] - Empty, + Vec(Arc>), Array(ArrayRef), } -#[derive(Error, Debug)] -pub enum DeserialisationError { - #[error("Failed to deserialize ArrayRef")] - DeserialisationError, - #[error(transparent)] - ArrowError(#[from] ArrowError), +impl Default for PropArray { + fn default() -> Self { + PropArray::Vec(vec![].into()) + } +} + +impl From> for PropArray { + fn from(vec: Vec) -> Self { + PropArray::Vec(Arc::from(vec)) + } } impl Hash for PropArray { fn hash(&self, state: &mut H) { - if let PropArray::Array(array) = self { - let data = array.to_data(); - let dtype = array.data_type(); - dtype.hash(state); - data.offset().hash(state); - data.len().hash(state); - for buffer in data.buffers() { - buffer.hash(state); + match self { + PropArray::Array(array) => { + let data = array.to_data(); + let dtype = array.data_type(); + dtype.hash(state); + data.offset().hash(state); + data.len().hash(state); + for buffer in data.buffers() { + buffer.hash(state); + } + } + PropArray::Vec(ps) => { + ps.hash(state); } - } else { - PropArray::Empty.hash(state); } } } @@ -55,48 +54,25 @@ impl PropArray { pub fn len(&self) -> usize { match self { PropArray::Array(arr) => arr.len(), - PropArray::Empty => 0, + PropArray::Vec(ps) => ps.len(), } } pub fn is_empty(&self) -> bool { match self { - PropArray::Empty => true, + PropArray::Vec(ps) => ps.is_empty(), PropArray::Array(arr) => arr.is_empty(), } } pub fn dtype(&self) -> PropType { match self { - PropArray::Empty => PropType::Empty, + PropArray::Vec(ps) if ps.is_empty() => PropType::Empty, + PropArray::Vec(ps) => ps[0].dtype(), PropArray::Array(a) => PropType::from(a.data_type()), } } - pub fn to_vec_u8(&self) -> Vec { - // assuming we can allocate this can't fail - let mut bytes = vec![]; - if let PropArray::Array(value) = self { - let schema = Schema::new(vec![Field::new("data", value.data_type().clone(), true)]); - let mut writer = StreamWriter::try_new(&mut bytes, &schema).unwrap(); - let rb = RecordBatch::try_new(schema.into(), vec![value.clone()]).unwrap(); - writer.write(&rb).unwrap(); - writer.finish().unwrap(); - } - bytes - } - - pub fn from_vec_u8(bytes: &[u8]) -> Result { - if bytes.is_empty() { - return Ok(PropArray::Empty); - } - let mut reader = StreamReader::try_new(bytes, None)?; - let rb = reader - .next() - .ok_or(DeserialisationError::DeserialisationError)??; - Ok(PropArray::Array(rb.column(0).clone())) - } - pub fn into_array_ref(self) -> Option { match self { PropArray::Array(arr) => Some(arr), @@ -111,78 +87,114 @@ impl PropArray { } } - pub fn iter_prop(&self) -> impl Iterator + '_ { - self.iter_prop_inner().into_iter().flatten() + // TODO: need something that returns PropRef instead to avoid allocations + pub fn iter(&self) -> impl Iterator + '_ { + self.iter_all().flatten() + } + + pub fn iter_all(&self) -> BoxedLIter<'_, Option> { + match self { + PropArray::Vec(ps) => ps.iter().cloned().map(Some).into_dyn_boxed(), + PropArray::Array(arr) => { + let dtype = arr.data_type(); + match dtype { + DataType::Boolean => arr + .as_boolean() + .iter() + .map(|p| p.map(Prop::Bool)) + .into_dyn_boxed(), + DataType::Int32 => as_primitive_iter::(arr), + DataType::Int64 => as_primitive_iter::(arr), + DataType::UInt8 => as_primitive_iter::(arr), + DataType::UInt16 => as_primitive_iter::(arr), + DataType::UInt32 => as_primitive_iter::(arr), + DataType::UInt64 => as_primitive_iter::(arr), + DataType::Float32 => as_primitive_iter::(arr), + DataType::Float64 => as_primitive_iter::(arr), + DataType::Timestamp(unit, _) => match unit { + TimeUnit::Second => as_primitive_iter::(arr), + TimeUnit::Millisecond => as_primitive_iter::(arr), + TimeUnit::Microsecond => as_primitive_iter::(arr), + TimeUnit::Nanosecond => as_primitive_iter::(arr), + }, + DataType::Date32 => as_primitive_iter::(arr), + DataType::Date64 => as_primitive_iter::(arr), + DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => as_str_iter(arr), + DataType::Decimal128(_, _) => as_primitive_iter::(arr), + DataType::Struct(_) => as_struct_iter(arr), + DataType::List(_) => as_list_iter::(arr), + DataType::LargeList(_) => as_list_iter::(arr), + _ => std::iter::empty().into_dyn_boxed(), + } + } + } } +} - fn iter_prop_inner(&self) -> Option> { - let arr = self.as_array_ref()?; +fn as_primitive_iter(arr: &ArrayRef) -> BoxedLIter<'_, Option> { + arr.as_primitive_opt::() + .into_iter() + .flat_map(|primitive_array| { + let dt = arr.data_type(); + primitive_array.iter().map(|v| v.map(|v| TT::prop(v, dt))) + }) + .into_dyn_boxed() +} - arr.as_primitive_opt::() - .map(|arr| { - arr.into_iter() - .map(|v| Prop::I32(v.unwrap_or_default())) - .into_dyn_boxed() - }) - .or_else(|| { - arr.as_primitive_opt::().map(|arr| { - arr.into_iter() - .map(|v| Prop::F64(v.unwrap_or_default())) - .into_dyn_boxed() - }) - }) - .or_else(|| { - arr.as_primitive_opt::().map(|arr| { - arr.into_iter() - .map(|v| Prop::F32(v.unwrap_or_default())) - .into_dyn_boxed() - }) - }) - .or_else(|| { - arr.as_primitive_opt::().map(|arr| { - arr.into_iter() - .map(|v| Prop::U64(v.unwrap_or_default())) - .into_dyn_boxed() - }) - }) - .or_else(|| { - arr.as_primitive_opt::().map(|arr| { - arr.into_iter() - .map(|v| Prop::U32(v.unwrap_or_default())) - .into_dyn_boxed() - }) - }) - .or_else(|| { - arr.as_primitive_opt::().map(|arr| { - arr.into_iter() - .map(|v| Prop::I64(v.unwrap_or_default())) - .into_dyn_boxed() - }) - }) - .or_else(|| { - arr.as_primitive_opt::().map(|arr| { - arr.into_iter() - .map(|v| Prop::U16(v.unwrap_or_default())) - .into_dyn_boxed() - }) - }) - .or_else(|| { - arr.as_primitive_opt::().map(|arr| { - arr.into_iter() - .map(|v| Prop::U8(v.unwrap_or_default())) - .into_dyn_boxed() - }) - }) +fn as_str_iter(arr: &ArrayRef) -> BoxedLIter<'_, Option> { + match arr.data_type() { + DataType::Utf8 => arr + .as_string::() + .into_iter() + .map(|opt_str| opt_str.map(|s| Prop::str(s.to_string()))) + .into_dyn_boxed(), + DataType::LargeUtf8 => arr + .as_string::() + .into_iter() + .map(|opt_str| opt_str.map(|s| Prop::str(s.to_string()))) + .into_dyn_boxed(), + DataType::Utf8View => arr + .as_string_view() + .into_iter() + .map(|opt_str| opt_str.map(|s| Prop::str(s.to_string()))) + .into_dyn_boxed(), + _ => panic!("as_str_iter called on non-string array"), } } +fn as_struct_iter(arr: &ArrayRef) -> BoxedLIter<'_, Option> { + let arr = arr.as_struct(); + (0..arr.len()) + .map(|row| (!arr.is_null(row)).then(|| ArrowRow::new(arr, row))) + .map(|arrow_row| arrow_row.and_then(|row| row.into_prop())) + .into_dyn_boxed() +} + +fn as_list_iter(arr: &ArrayRef) -> BoxedLIter<'_, Option> { + let arr = arr.as_list::(); + (0..arr.len()) + .map(|i| { + if arr.is_null(i) { + None + } else { + let value_array = arr.value(i); + let prop_array = PropArray::Array(value_array); + Some(Prop::List(prop_array)) + } + }) + .into_dyn_boxed() +} + impl Serialize for PropArray { fn serialize(&self, serializer: S) -> Result where S: Serializer, { - let bytes = self.to_vec_u8(); - bytes.serialize(serializer) + let mut state = serializer.serialize_seq(Some(self.len()))?; + for prop in self.iter_all() { + state.serialize_element(&prop)?; + } + state.end() } } @@ -191,17 +203,54 @@ impl<'de> Deserialize<'de> for PropArray { where D: serde::Deserializer<'de>, { - let bytes = Vec::::deserialize(deserializer)?; - PropArray::from_vec_u8(&bytes).map_err(serde::de::Error::custom) + let vec: Vec = Deserialize::deserialize(deserializer)?; + Ok(PropArray::Vec(Arc::from(vec))) } } impl PartialEq for PropArray { fn eq(&self, other: &Self) -> bool { match (self, other) { - (PropArray::Empty, PropArray::Empty) => true, + (PropArray::Vec(l), PropArray::Vec(r)) => l.eq(r), (PropArray::Array(a), PropArray::Array(b)) => a.eq(b), - _ => false, + _ => { + let mut l_iter = self.iter_all(); + let mut r_iter = other.iter_all(); + loop { + match (l_iter.next(), r_iter.next()) { + (Some(lv), Some(rv)) => { + if lv != rv { + return false; + } + } + (None, None) => return true, + _ => return false, + } + } + } + } + } +} + +impl PartialOrd for PropArray { + fn partial_cmp(&self, other: &Self) -> Option { + match (self, other) { + (PropArray::Vec(l), PropArray::Vec(r)) => l.partial_cmp(r), + _ => { + let mut l_iter = self.iter_all(); + let mut r_iter = other.iter_all(); + loop { + match (l_iter.next(), r_iter.next()) { + (Some(lv), Some(rv)) => match lv.partial_cmp(&rv) { + Some(std::cmp::Ordering::Equal) => continue, + other => return other, + }, + (None, None) => return Some(std::cmp::Ordering::Equal), + (None, Some(_)) => return Some(std::cmp::Ordering::Less), + (Some(_), None) => return Some(std::cmp::Ordering::Greater), + } + } + } } } } @@ -212,7 +261,7 @@ impl Prop { PrimitiveArray: From>, { let array = PrimitiveArray::::from(vals); - Prop::Array(PropArray::Array(Arc::new(array))) + Prop::List(PropArray::Array(Arc::new(array))) } } @@ -232,12 +281,8 @@ pub fn arrow_dtype_from_prop_type(prop_type: &PropType) -> DataType { PropType::DTime => { DataType::Timestamp(arrow_schema::TimeUnit::Millisecond, Some("UTC".into())) } - PropType::Array(d_type) => { - DataType::List(Field::new("data", arrow_dtype_from_prop_type(d_type), true).into()) - } - PropType::List(d_type) => { - DataType::List(Field::new("data", arrow_dtype_from_prop_type(d_type), true).into()) + DataType::LargeList(Field::new("data", arrow_dtype_from_prop_type(d_type), true).into()) } PropType::Map(d_type) => { let fields = d_type @@ -263,29 +308,6 @@ pub fn arrow_dtype_from_prop_type(prop_type: &PropType) -> DataType { } } -pub fn prop_type_from_arrow_dtype(arrow_dtype: &DataType) -> PropType { - match arrow_dtype { - DataType::LargeUtf8 | DataType::Utf8 | DataType::Utf8View => PropType::Str, - DataType::UInt8 => PropType::U8, - DataType::UInt16 => PropType::U16, - DataType::Int32 => PropType::I32, - DataType::Int64 => PropType::I64, - DataType::UInt32 => PropType::U32, - DataType::UInt64 => PropType::U64, - DataType::Float32 => PropType::F32, - DataType::Float64 => PropType::F64, - DataType::Boolean => PropType::Bool, - DataType::Decimal128(_, scale) => PropType::Decimal { - scale: *scale as i64, - }, - DataType::List(field) => { - let d_type = field.data_type(); - PropType::Array(Box::new(prop_type_from_arrow_dtype(d_type))) - } - _ => panic!("{:?} not supported as disk_graph property", arrow_dtype), - } -} - pub trait PropArrayUnwrap: Sized { fn into_array(self) -> Option; fn unwrap_array(self) -> ArrayRef { @@ -301,7 +323,7 @@ impl PropArrayUnwrap for Option

{ impl PropArrayUnwrap for Prop { fn into_array(self) -> Option { - if let Prop::Array(v) = self { + if let Prop::List(v) = self { v.into_array_ref() } else { None diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index 4fdeb57118..55b555c626 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -6,9 +6,10 @@ use crate::core::{ storage::arc_str::ArcStr, }; use arrow_array::cast::AsArray; +use arrow_array::ArrayRef; #[cfg(feature = "arrow")] -use arrow_array::StructArray; use arrow_schema::{DataType, Field, FieldRef}; +use arrow_array::{LargeListArray, ListArray, StructArray}; use bigdecimal::{num_bigint::BigInt, BigDecimal}; use chrono::{DateTime, NaiveDateTime, Utc}; use itertools::Itertools; @@ -38,7 +39,7 @@ pub const DECIMAL_MAX: i128 = 99999999999999999999999999999999999999i128; // equ pub struct InvalidBigDecimal(BigDecimal); /// Denotes the types of properties allowed to be stored in the graph. -#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)] +#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, derive_more::From)] pub enum Prop { Str(ArcStr), U8(u8), @@ -50,12 +51,10 @@ pub enum Prop { F32(f32), F64(f64), Bool(bool), - List(Arc>), + List(PropArray), Map(Arc>), NDTime(NaiveDateTime), DTime(DateTime), - #[cfg(feature = "arrow")] - Array(PropArray), Decimal(BigDecimal), } @@ -83,14 +82,12 @@ impl<'a> From> for Prop { PropNum::F64(f) => Prop::F64(f), }, PropRef::Bool(b) => Prop::Bool(b), - PropRef::List(v) => Prop::List(v.clone()), + PropRef::List(v) => Prop::List(v.clone().into()), PropRef::Map(m) => m .into_prop() .unwrap_or_else(|| Prop::Map(Arc::new(Default::default()))), PropRef::NDTime(dt) => Prop::NDTime(dt), PropRef::DTime(dt) => Prop::DTime(dt), - #[cfg(feature = "arrow")] - PropRef::Array(arr) => Prop::Array(arr.clone()), PropRef::Decimal { num, scale } => { Prop::Decimal(BigDecimal::from_bigint(num.into(), scale as i64)) } @@ -118,8 +115,6 @@ impl Hash for Prop { } Prop::Bool(b) => b.hash(state), Prop::NDTime(dt) => dt.hash(state), - #[cfg(feature = "arrow")] - Prop::Array(b) => b.hash(state), Prop::DTime(dt) => dt.hash(state), Prop::List(v) => { for prop in v.iter() { @@ -162,7 +157,8 @@ impl PartialOrd for Prop { } pub struct SerdeProp<'a>(pub &'a Prop); -pub struct SerdeList<'a>(pub &'a Vec); +#[derive(Clone, Copy)] +pub struct SerdeList<'a>(pub &'a PropArray); #[derive(Clone, Copy)] pub struct SerdeMap<'a>(pub &'a HashMap); @@ -178,7 +174,7 @@ impl<'a> Serialize for SerdeList<'a> { { let mut state = serializer.serialize_seq(Some(self.0.len()))?; for prop in self.0.iter() { - state.serialize_element(&SerdeProp(prop))?; + state.serialize_element(&SerdeProp(&prop))?; } state.end() } @@ -218,9 +214,6 @@ impl<'a> Serialize for SerdeProp<'a> { Prop::List(l) => SerdeList(l).serialize(serializer), Prop::Map(m) => SerdeMap(m).serialize(serializer), Prop::Decimal(dec) => serializer.serialize_str(&dec.to_string()), - _ => { - todo!("Serializer not implemented") - } } } } @@ -272,26 +265,9 @@ impl Prop { Prop::F32(_) => PropType::F32, Prop::F64(_) => PropType::F64, Prop::Bool(_) => PropType::Bool, - Prop::List(list) => { - let list_type = list - .iter() - .map(|p| Ok(p.dtype())) - .reduce(|a, b| unify_types(&a?, &b?, &mut false)) - .transpose() - .map(|e| e.unwrap_or(PropType::Empty)) - .unwrap_or_else(|e| panic!("Cannot unify types for list {list:?}: {e:?}")); - PropType::List(Box::new(list_type)) - } + Prop::List(list) => PropType::List(Box::new(list.dtype())), Prop::Map(map) => PropType::map(map.iter().map(|(k, v)| (k, v.dtype()))), Prop::NDTime(_) => PropType::NDTime, - #[cfg(feature = "arrow")] - Prop::Array(arr) => { - let arrow_dtype = arr - .as_array_ref() - .expect("Should not call dtype on empty PropArray") - .data_type(); - PropType::Array(Box::new(prop_type_from_arrow_dtype(arrow_dtype))) - } Prop::DTime(_) => PropType::DTime, Prop::Decimal(d) => PropType::Decimal { scale: d.as_bigint_and_scale().1, @@ -303,6 +279,12 @@ impl Prop { Prop::Str(s.into()) } + pub fn list, I: IntoIterator>(vals: I) -> Prop { + Prop::List(PropArray::Vec( + vals.into_iter().map_into().collect::>().into(), + )) + } + pub fn add(self, other: Prop) -> Option { match (self, other) { (Prop::U8(a), Prop::U8(b)) => Some(Prop::U8(a + b)), @@ -353,6 +335,45 @@ impl Prop { } } +#[cfg(feature = "arrow")] +pub fn list_array_from_props

( + dt: &DataType, + as_serde_map: impl Fn(&P) -> SerdeList<'_> + Copy, + props: impl IntoIterator>, +) -> LargeListArray { + use arrow_array::LargeListArray; + use arrow_schema::{Field, Fields}; + use serde_arrow::ArrayBuilder; + + let fields: Fields = vec![Field::new("list", dt.clone(), false)].into(); + + let mut builder = ArrayBuilder::from_arrow(&fields) + .unwrap_or_else(|e| panic!("Failed to make array builder {e}")); + + let empty_list = PropArray::default(); + for p in props { + match p.as_ref().map(as_serde_map) { + todo!("USE SerdeRow"); + Some(list) => builder + .push(Value { list }) + .unwrap_or_else(|e| panic!("Failed to push list to array builder {e}")), + _ => builder + .push(SerdeList(&empty_list)) + .unwrap_or_else(|e| panic!("Failed to push empty list to array builder {e}")), + } + } + + let arrays = builder + .to_arrow() + .unwrap_or_else(|e| panic!("Failed to convert to arrow array {e}")); + arrays[0] + .clone() + .as_any() + .downcast_ref::() + .unwrap() + .clone() +} + #[cfg(feature = "arrow")] pub fn struct_array_from_props( dt: &DataType, @@ -392,8 +413,6 @@ impl Display for Prop { Prop::Bool(value) => write!(f, "{}", value), Prop::DTime(value) => write!(f, "{}", value), Prop::NDTime(value) => write!(f, "{}", value), - #[cfg(feature = "arrow")] - Prop::Array(value) => write!(f, "{:?}", value), Prop::List(value) => { write!( f, @@ -437,144 +456,24 @@ impl Display for Prop { } } -impl From for Prop { - fn from(value: ArcStr) -> Self { - Prop::Str(value) - } -} - -impl From<&ArcStr> for Prop { - fn from(value: &ArcStr) -> Self { - Prop::Str(value.clone()) - } -} - -impl From for Prop { - fn from(value: String) -> Self { - Prop::Str(value.into()) - } -} - -impl From<&String> for Prop { - fn from(s: &String) -> Self { - Prop::Str(s.as_str().into()) - } -} - -impl From> for Prop { - fn from(s: Arc) -> Self { - Prop::Str(s.into()) - } -} - -impl From<&Arc> for Prop { - fn from(value: &Arc) -> Self { - Prop::Str(value.clone().into()) - } -} - impl From<&str> for Prop { fn from(s: &str) -> Self { - Prop::Str(s.to_owned().into()) - } -} - -impl From for Prop { - fn from(i: i32) -> Self { - Prop::I32(i) - } -} - -impl From for Prop { - fn from(i: u8) -> Self { - Prop::U8(i) - } -} - -impl From for Prop { - fn from(i: u16) -> Self { - Prop::U16(i) - } -} - -impl From for Prop { - fn from(i: i64) -> Self { - Prop::I64(i) - } -} - -impl From for Prop { - fn from(d: BigDecimal) -> Self { - Prop::Decimal(d) - } -} - -impl From for Prop { - fn from(u: u32) -> Self { - Prop::U32(u) - } -} - -impl From for Prop { - fn from(u: u64) -> Self { - Prop::U64(u) - } -} - -impl From for Prop { - fn from(f: f32) -> Self { - Prop::F32(f) - } -} - -impl From for Prop { - fn from(f: f64) -> Self { - Prop::F64(f) - } -} - -impl From> for Prop { - fn from(f: DateTime) -> Self { - Prop::DTime(f) - } -} - -impl From for Prop { - fn from(value: NaiveDateTime) -> Self { - Prop::NDTime(value) - } -} - -impl From for Prop { - fn from(b: bool) -> Self { - Prop::Bool(b) + Prop::Str(s.into()) } } -impl From>> for Prop { - fn from(value: Arc>) -> Self { - Prop::List(value) +impl From for Prop { + fn from(s: String) -> Self { + Prop::Str(s.into()) } } -#[cfg(feature = "arrow")] -impl From for Prop { - fn from(value: PropArray) -> Self { - Prop::Array(value) - } -} impl From> for Prop { fn from(value: HashMap) -> Self { Prop::Map(Arc::new(value.into_iter().collect())) } } -impl From>> for Prop { - fn from(value: Arc>) -> Self { - Prop::Map(value) - } -} - impl From> for Prop { fn from(value: FxHashMap) -> Self { Prop::Map(Arc::new(value)) @@ -583,7 +482,7 @@ impl From> for Prop { impl From> for Prop { fn from(value: Vec) -> Self { - Prop::List(Arc::new(value)) + Prop::List(Arc::new(value).into()) } } @@ -593,6 +492,13 @@ impl From<&Prop> for Prop { } } +#[cfg(feature = "arrow")] +impl From for Prop { + fn from(value: ArrayRef) -> Self { + Prop::List(PropArray::from(value)) + } +} + pub trait IntoPropMap { fn into_prop_map(self) -> Prop; } @@ -613,7 +519,8 @@ pub trait IntoPropList { impl, K: Into> IntoPropList for I { fn into_prop_list(self) -> Prop { - Prop::List(Arc::new(self.into_iter().map(|v| v.into()).collect())) + let vec = self.into_iter().map(|v| v.into()).collect::>(); + Prop::List(Arc::new(vec).into()) } } diff --git a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs index 5874c2ec9f..b189c73d76 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs @@ -18,16 +18,11 @@ pub enum PropRef<'a> { Str(&'a str), Num(PropNum), Bool(bool), - List(&'a Arc>), + List(&'a PropArray), Map(PropMapRef<'a>), NDTime(NaiveDateTime), DTime(DateTime), - #[cfg(feature = "arrow")] - Array(&'a PropArray), - Decimal { - num: i128, - scale: i8, - }, + Decimal { num: i128, scale: i8 }, } #[derive(Debug, PartialEq, Clone, Copy)] @@ -45,6 +40,12 @@ impl<'a> PropMapRef<'a> { } } +impl> From for PropRef<'static> { + fn from(n: T) -> Self { + PropRef::Num(n.into()) + } +} + impl<'a> From for PropRef<'a> { fn from(b: bool) -> Self { PropRef::Bool(b) @@ -57,54 +58,6 @@ impl<'a> From<&'a str> for PropRef<'a> { } } -impl From for PropRef<'_> { - fn from(n: u8) -> Self { - PropRef::Num(PropNum::U8(n)) - } -} - -impl From for PropRef<'_> { - fn from(n: u16) -> Self { - PropRef::Num(PropNum::U16(n)) - } -} - -impl From for PropRef<'_> { - fn from(n: i32) -> Self { - PropRef::Num(PropNum::I32(n)) - } -} - -impl From for PropRef<'_> { - fn from(n: i64) -> Self { - PropRef::Num(PropNum::I64(n)) - } -} - -impl From for PropRef<'_> { - fn from(n: u32) -> Self { - PropRef::Num(PropNum::U32(n)) - } -} - -impl From for PropRef<'_> { - fn from(n: u64) -> Self { - PropRef::Num(PropNum::U64(n)) - } -} - -impl From for PropRef<'_> { - fn from(n: f32) -> Self { - PropRef::Num(PropNum::F32(n)) - } -} - -impl From for PropRef<'_> { - fn from(n: f64) -> Self { - PropRef::Num(PropNum::F64(n)) - } -} - impl From for PropRef<'_> { fn from(dt: NaiveDateTime) -> Self { PropRef::NDTime(dt) @@ -145,7 +98,7 @@ impl<'a> From<&'a Arc>> for PropRef<'a> { } } -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, PartialEq, Clone, Copy, derive_more::From)] pub enum PropNum { U8(u8), U16(u16), @@ -201,8 +154,6 @@ impl<'a> Serialize for PropRef<'a> { PropRef::Map(map_ref) => map_ref.serialize(serializer), PropRef::NDTime(dt) => serializer.serialize_i64(dt.and_utc().timestamp_millis()), PropRef::DTime(dt) => serializer.serialize_i64(dt.timestamp_millis()), - #[cfg(feature = "arrow")] - PropRef::Array(arr) => arr.serialize(serializer), PropRef::Decimal { num, scale } => { let decimal = BigDecimal::new((*num).into(), (*scale).into()); decimal.serialize(serializer) diff --git a/raphtory-api/src/core/entities/properties/prop/prop_type.rs b/raphtory-api/src/core/entities/properties/prop/prop_type.rs index d4f9275a6c..0a34493c6e 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_type.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_type.rs @@ -38,7 +38,6 @@ pub enum PropType { Map(Arc>), NDTime, DTime, - Array(Box), Decimal { scale: i64, }, @@ -69,7 +68,6 @@ impl Display for PropType { } PropType::NDTime => "NDTime", PropType::DTime => "DTime", - PropType::Array(p_type) => return write!(f, "Array<{}>", p_type), PropType::Decimal { scale } => return write!(f, "Decimal({})", scale), }; @@ -148,24 +146,20 @@ impl PropType { PropType::Map(p_map) => { p_map.iter().map(|(_, v)| v.est_size()).sum::() * CONTAINER_SIZE } - PropType::Array(p_type) => p_type.est_size() * CONTAINER_SIZE, PropType::Decimal { .. } => 16, PropType::Empty => 0, } } } -#[cfg(any(feature = "arrow"))] -mod arrow { +pub mod arrow { use crate::core::entities::properties::prop::PropType; - use arrow_schema::DataType; + use arrow_schema::{DataType, TimeUnit}; impl From<&DataType> for PropType { fn from(value: &DataType) -> Self { match value { - DataType::Utf8 => PropType::Str, - DataType::LargeUtf8 => PropType::Str, - DataType::Utf8View => PropType::Str, + DataType::Utf8View | DataType::LargeUtf8 | DataType::Utf8 => PropType::Str, DataType::UInt8 => PropType::U8, DataType::UInt16 => PropType::U16, DataType::Int32 => PropType::I32, @@ -178,7 +172,18 @@ mod arrow { scale: *scale as i64, }, DataType::Boolean => PropType::Bool, - + DataType::Timestamp(TimeUnit::Millisecond, None) => PropType::NDTime, + DataType::Timestamp(TimeUnit::Microsecond, tz) if tz.as_deref() == Some("UTC") => { + PropType::DTime + } + DataType::Struct(fields) => PropType::map( + fields + .iter() + .map(|f| (f.name().to_string(), PropType::from(f.data_type()))), + ), + DataType::List(field) | DataType::LargeList(field) => { + PropType::List(Box::new(PropType::from(field.data_type()))) + } _ => PropType::Empty, } } @@ -213,9 +218,6 @@ pub fn unify_types(l: &PropType, r: &PropType, unified: &mut bool) -> Result { unify_types(l_type, r_type, unified).map(|t| PropType::List(Box::new(t))) } - (PropType::Array(l_type), PropType::Array(r_type)) => { - unify_types(l_type, r_type, unified).map(|t| PropType::Array(Box::new(t))) - } (PropType::Map(l_map), PropType::Map(r_map)) => { // maps need to be merged and only overlapping keys need to be unified @@ -272,7 +274,6 @@ pub fn check_for_unification(l: &PropType, r: &PropType) -> Option { (PropType::NDTime, PropType::NDTime) => None, (PropType::DTime, PropType::DTime) => None, (PropType::List(l_type), PropType::List(r_type)) => check_for_unification(l_type, r_type), - (PropType::Array(l_type), PropType::Array(r_type)) => check_for_unification(l_type, r_type), (PropType::Map(l_map), PropType::Map(r_map)) => { let keys_check = l_map .keys() diff --git a/raphtory-api/src/core/entities/properties/prop/prop_unwrap.rs b/raphtory-api/src/core/entities/properties/prop/prop_unwrap.rs index f9e29bc1b5..133d12b3f7 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_unwrap.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_unwrap.rs @@ -1,4 +1,7 @@ -use crate::core::{entities::properties::prop::Prop, storage::arc_str::ArcStr}; +use crate::core::{ + entities::properties::prop::{Prop, PropArray}, + storage::arc_str::ArcStr, +}; use bigdecimal::BigDecimal; use chrono::NaiveDateTime; use rustc_hash::FxHashMap; @@ -55,8 +58,8 @@ pub trait PropUnwrap: Sized { self.into_bool().unwrap() } - fn into_list(self) -> Option>>; - fn unwrap_list(self) -> Arc> { + fn into_list(self) -> Option; + fn unwrap_list(self) -> PropArray { self.into_list().unwrap() } @@ -116,7 +119,7 @@ impl PropUnwrap for Option

{ self.and_then(|p| p.into_bool()) } - fn into_list(self) -> Option>> { + fn into_list(self) -> Option { self.and_then(|p| p.into_list()) } @@ -218,7 +221,7 @@ impl PropUnwrap for Prop { } } - fn into_list(self) -> Option>> { + fn into_list(self) -> Option { if let Prop::List(v) = self { Some(v) } else { diff --git a/raphtory-api/src/core/entities/properties/prop/serde.rs b/raphtory-api/src/core/entities/properties/prop/serde.rs index 56b35b2679..fd33605a90 100644 --- a/raphtory-api/src/core/entities/properties/prop/serde.rs +++ b/raphtory-api/src/core/entities/properties/prop/serde.rs @@ -14,7 +14,7 @@ impl TryFrom for Prop { .map(|num| num.into()) .or_else(|| value.as_f64().map(|num| num.into())) .ok_or(format!("Number conversion error for: {}", value)), - Value::String(value) => Ok(value.into()), + Value::String(value) => Ok(value.as_str().into()), Value::Array(value) => value .into_iter() .map(|item| item.try_into()) @@ -49,7 +49,7 @@ impl From for Value { .map(Value::Number) .unwrap_or(Value::Null), Prop::Bool(value) => Value::Bool(value), - Prop::List(values) => Value::Array(values.iter().cloned().map(Value::from).collect()), + Prop::List(values) => Value::Array(values.iter().map(Value::from).collect()), Prop::Map(map) => { let json_map: serde_json::Map = map .iter() diff --git a/raphtory-api/src/core/entities/properties/prop/template.rs b/raphtory-api/src/core/entities/properties/prop/template.rs index 21f55ed2e5..12209991e1 100644 --- a/raphtory-api/src/core/entities/properties/prop/template.rs +++ b/raphtory-api/src/core/entities/properties/prop/template.rs @@ -17,9 +17,7 @@ impl From for Value { Prop::Str(value) => Value::from(value.0.to_owned()), Prop::DTime(value) => Value::from(value.timestamp_millis()), Prop::NDTime(value) => Value::from(value.and_utc().timestamp_millis()), - #[cfg(feature = "arrow")] - Prop::Array(value) => Value::from(value.to_vec_u8()), - Prop::List(value) => value.iter().cloned().collect(), + Prop::List(value) => value.iter().collect(), Prop::Map(value) => value .iter() .map(|(key, value)| (key.to_string(), value.clone())) diff --git a/raphtory-api/src/python/prop.rs b/raphtory-api/src/python/prop.rs index a6875b2876..1afd56d145 100644 --- a/raphtory-api/src/python/prop.rs +++ b/raphtory-api/src/python/prop.rs @@ -53,18 +53,13 @@ impl<'py> IntoPyObject<'py> for Prop { Prop::F64(f64) => f64.into_pyobject(py)?.into_any(), Prop::DTime(dtime) => dtime.into_pyobject(py)?.into_any(), Prop::NDTime(ndtime) => ndtime.into_pyobject(py)?.into_any(), - #[cfg(feature = "arrow")] - Prop::Array(blob) => { - if let Some(arr_ref) = blob.into_array_ref() { - PyArray::from_array_ref(arr_ref).into_pyarrow(py)? - } else { - py.None().into_bound(py) - } - } Prop::I32(v) => v.into_pyobject(py)?.into_any(), Prop::U32(v) => v.into_pyobject(py)?.into_any(), Prop::F32(v) => v.into_pyobject(py)?.into_any(), - Prop::List(v) => v.deref().clone().into_pyobject(py)?.into_any(), // Fixme: optimise the clone here? + Prop::List(PropArray::Array(arr_ref)) => { + PyArray::from_array_ref(arr_ref).into_pyarrow(py)? + } + Prop::List(PropArray::Vec(v)) => v.deref().clone().into_pyobject(py)?.into_any(), // Fixme: optimise the clone here? Prop::Map(v) => v.deref().clone().into_pyobject(py)?.into_any(), Prop::Decimal(d) => { let decl_cls = get_decimal_cls(py)?; @@ -109,13 +104,12 @@ impl<'source> FromPyObject<'source> for Prop { if let Ok(s) = ob.extract::() { return Ok(Prop::Str(s.into())); } - #[cfg(feature = "arrow")] if let Ok(arrow) = ob.extract::() { let (arr, _) = arrow.into_inner(); - return Ok(Prop::Array(PropArray::Array(arr))); + return Ok(Prop::List(PropArray::Array(arr))); } if let Ok(list) = ob.extract() { - return Ok(Prop::List(Arc::new(list))); + return Ok(Prop::List(PropArray::Vec(Arc::new(list)))); } if let Ok(map) = ob.extract() { return Ok(Prop::Map(Arc::new(map))); diff --git a/raphtory-core/src/entities/properties/tprop.rs b/raphtory-core/src/entities/properties/tprop.rs index 786798fe01..273c4640dd 100644 --- a/raphtory-core/src/entities/properties/tprop.rs +++ b/raphtory-core/src/entities/properties/tprop.rs @@ -35,10 +35,8 @@ pub enum TProp { F64(TCell), Bool(TCell), DTime(TCell>), - #[cfg(feature = "arrow")] - Array(TCell), + List(TCell), NDTime(TCell), - List(TCell>>), Map(TCell>>), Decimal(TCell), } @@ -64,7 +62,6 @@ pub enum TPropVariants< F64, Bool, DTime, - #[cfg(feature = "arrow")] Array, NDTime, List, Map, @@ -82,8 +79,6 @@ pub enum TPropVariants< F64(F64), Bool(Bool), DTime(DTime), - #[cfg(feature = "arrow")] - Array(Array), NDTime(NDTime), List(List), Map(Map), @@ -177,8 +172,6 @@ impl TProp { Prop::Bool(value) => TProp::Bool(TCell::new(t, value)), Prop::DTime(value) => TProp::DTime(TCell::new(t, value)), Prop::NDTime(value) => TProp::NDTime(TCell::new(t, value)), - #[cfg(feature = "arrow")] - Prop::Array(value) => TProp::Array(TCell::new(t, value)), Prop::List(value) => TProp::List(TCell::new(t, value)), Prop::Map(value) => TProp::Map(TCell::new(t, value)), Prop::Decimal(value) => TProp::Decimal(TCell::new(t, value)), @@ -199,8 +192,6 @@ impl TProp { TProp::F64(_) => PropType::F64, TProp::Bool(_) => PropType::Bool, TProp::DTime(_) => PropType::DTime, - #[cfg(feature = "arrow")] - TProp::Array(_) => PropType::Array(Box::new(PropType::Empty)), TProp::NDTime(_) => PropType::NDTime, TProp::List(_) => PropType::List(Box::new(PropType::Empty)), TProp::Map(_) => PropType::Map(HashMap::new().into()), @@ -251,10 +242,6 @@ impl TProp { (TProp::NDTime(cell), Prop::NDTime(a)) => { cell.set(t, a); } - #[cfg(feature = "arrow")] - (TProp::Array(cell), Prop::Array(a)) => { - cell.set(t, a); - } (TProp::List(cell), Prop::List(a)) => { cell.set(t, a); } @@ -328,11 +315,6 @@ impl TProp { cell.iter_window(r) .map(|(t, value)| (*t, Prop::NDTime(*value))), ), - #[cfg(feature = "arrow")] - TProp::Array(cell) => TPropVariants::Array( - cell.iter_window(r) - .map(|(t, value)| (*t, Prop::Array(value.clone()))), - ), TProp::List(cell) => TPropVariants::List( cell.iter_window(r) .map(|(t, value)| (*t, Prop::List(value.clone()))), @@ -389,11 +371,6 @@ impl TProp { TProp::NDTime(cell) => { TPropVariants::NDTime(cell.iter().map(|(t, value)| (*t, Prop::NDTime(*value)))) } - #[cfg(feature = "arrow")] - TProp::Array(cell) => TPropVariants::Array( - cell.iter() - .map(|(t, value)| (*t, Prop::Array(value.clone()))), - ), TProp::List(cell) => TPropVariants::List( cell.iter() .map(|(t, value)| (*t, Prop::List(value.clone()))), @@ -425,10 +402,6 @@ impl<'a> TPropOps<'a> for &'a TProp { TProp::Bool(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::Bool(*v))), TProp::DTime(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::DTime(*v))), TProp::NDTime(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::NDTime(*v))), - #[cfg(feature = "arrow")] - TProp::Array(cell) => cell - .last_before(t) - .map(|(t, v)| (t, Prop::Array(v.clone()))), TProp::List(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::List(v.clone()))), TProp::Map(cell) => cell.last_before(t).map(|(t, v)| (t, Prop::Map(v.clone()))), TProp::Decimal(cell) => cell @@ -452,8 +425,6 @@ impl<'a> TPropOps<'a> for &'a TProp { TProp::Bool(cell) => cell.at(ti).map(|v| Prop::Bool(*v)), TProp::DTime(cell) => cell.at(ti).map(|v| Prop::DTime(*v)), TProp::NDTime(cell) => cell.at(ti).map(|v| Prop::NDTime(*v)), - #[cfg(feature = "arrow")] - TProp::Array(cell) => cell.at(ti).map(|v| Prop::Array(v.clone())), TProp::List(cell) => cell.at(ti).map(|v| Prop::List(v.clone())), TProp::Map(cell) => cell.at(ti).map(|v| Prop::Map(v.clone())), TProp::Decimal(cell) => cell.at(ti).map(|v| Prop::Decimal(v.clone())), diff --git a/raphtory-core/src/storage/mod.rs b/raphtory-core/src/storage/mod.rs index 0eb748a111..a1ceac42fc 100644 --- a/raphtory-core/src/storage/mod.rs +++ b/raphtory-core/src/storage/mod.rs @@ -15,7 +15,7 @@ use chrono::DateTime; use either::Either; use lazy_vec::LazyVec; use raphtory_api::core::{ - entities::properties::prop::{prop_type_from_arrow_dtype, Prop, PropRef, PropType}, + entities::properties::prop::{Prop, PropRef, PropType}, storage::arc_str::ArcStr, }; use rustc_hash::FxHashMap; @@ -138,9 +138,7 @@ pub enum PropColumn { F32(LazyVec), F64(LazyVec), Str(LazyVec), - #[cfg(feature = "arrow")] - Array(LazyVec), - List(LazyVec>>), + List(LazyVec), Map(LazyVec>>), NDTime(LazyVec), DTime(LazyVec>), @@ -193,8 +191,6 @@ impl PropColumn { PropColumn::F32(_) => PropType::F32, PropColumn::F64(_) => PropType::F64, PropColumn::Str(_) => PropType::Str, - #[cfg(feature = "arrow")] - PropColumn::Array(_) => PropType::Array(Box::new(PropType::Empty)), PropColumn::List(_) => PropType::List(Box::new(PropType::Empty)), PropColumn::Map(_) => PropType::Map(HashMap::new().into()), PropColumn::NDTime(_) => PropType::NDTime, @@ -209,9 +205,9 @@ impl PropColumn { } } - fn init_from_prop_type(&mut self, prop_type: PropType) { + fn init_from_prop_type(&mut self, prop_type: impl Into) { if let PropColumn::Empty(len) = self { - match prop_type { + match prop_type.into() { PropType::Bool => *self = PropColumn::Bool(LazyVec::with_len(*len)), PropType::I64 => *self = PropColumn::I64(LazyVec::with_len(*len)), PropType::U32 => *self = PropColumn::U32(LazyVec::with_len(*len)), @@ -219,8 +215,6 @@ impl PropColumn { PropType::F32 => *self = PropColumn::F32(LazyVec::with_len(*len)), PropType::F64 => *self = PropColumn::F64(LazyVec::with_len(*len)), PropType::Str => *self = PropColumn::Str(LazyVec::with_len(*len)), - #[cfg(feature = "arrow")] - PropType::Array(_) => *self = PropColumn::Array(LazyVec::with_len(*len)), PropType::U8 => *self = PropColumn::U8(LazyVec::with_len(*len)), PropType::U16 => *self = PropColumn::U16(LazyVec::with_len(*len)), PropType::I32 => *self = PropColumn::I32(LazyVec::with_len(*len)), @@ -229,13 +223,15 @@ impl PropColumn { PropType::NDTime => *self = PropColumn::NDTime(LazyVec::with_len(*len)), PropType::DTime => *self = PropColumn::DTime(LazyVec::with_len(*len)), PropType::Decimal { .. } => *self = PropColumn::Decimal(LazyVec::with_len(*len)), - _ => {} + PropType::Empty => { + panic!("Cannot initialize PropColumn from Empty PropType") + } } } } pub fn append(&mut self, col: &dyn Array, mask: &BooleanArray) { - self.init_from_prop_type(prop_type_from_arrow_dtype(col.data_type())); + self.init_from_prop_type(col.data_type()); match self { PropColumn::Bool(v) => v.append(col.as_boolean(), mask), PropColumn::I64(v) => v.append(col.as_primitive::(), mask), @@ -301,8 +297,6 @@ impl PropColumn { (PropColumn::F32(col), Prop::F32(v)) => col.upsert(index, v), (PropColumn::F64(col), Prop::F64(v)) => col.upsert(index, v), (PropColumn::Str(col), Prop::Str(v)) => col.upsert(index, v), - #[cfg(feature = "arrow")] - (PropColumn::Array(col), Prop::Array(v)) => col.upsert(index, v), (PropColumn::U8(col), Prop::U8(v)) => col.upsert(index, v), (PropColumn::U16(col), Prop::U16(v)) => col.upsert(index, v), (PropColumn::I32(col), Prop::I32(v)) => col.upsert(index, v), @@ -331,8 +325,6 @@ impl PropColumn { (PropColumn::F32(col), Prop::F32(v)) => col.check(index, v)?, (PropColumn::F64(col), Prop::F64(v)) => col.check(index, v)?, (PropColumn::Str(col), Prop::Str(v)) => col.check(index, v)?, - #[cfg(feature = "arrow")] - (PropColumn::Array(col), Prop::Array(v)) => col.check(index, v)?, (PropColumn::U8(col), Prop::U8(v)) => col.check(index, v)?, (PropColumn::U16(col), Prop::U16(v)) => col.check(index, v)?, (PropColumn::I32(col), Prop::I32(v)) => col.check(index, v)?, @@ -362,8 +354,6 @@ impl PropColumn { (PropColumn::F32(col), Prop::F32(v)) => col.push(Some(v)), (PropColumn::F64(col), Prop::F64(v)) => col.push(Some(v)), (PropColumn::Str(col), Prop::Str(v)) => col.push(Some(v)), - #[cfg(feature = "arrow")] - (PropColumn::Array(col), Prop::Array(v)) => col.push(Some(v)), (PropColumn::U16(col), Prop::U16(v)) => col.push(Some(v)), (PropColumn::I32(col), Prop::I32(v)) => col.push(Some(v)), (PropColumn::List(col), Prop::List(v)) => col.push(Some(v)), @@ -391,8 +381,6 @@ impl PropColumn { Prop::F32(_) => *self = PropColumn::F32(LazyVec::with_len(*len)), Prop::F64(_) => *self = PropColumn::F64(LazyVec::with_len(*len)), Prop::Str(_) => *self = PropColumn::Str(LazyVec::with_len(*len)), - #[cfg(feature = "arrow")] - Prop::Array(_) => *self = PropColumn::Array(LazyVec::with_len(*len)), Prop::U8(_) => *self = PropColumn::U8(LazyVec::with_len(*len)), Prop::U16(_) => *self = PropColumn::U16(LazyVec::with_len(*len)), Prop::I32(_) => *self = PropColumn::I32(LazyVec::with_len(*len)), @@ -418,8 +406,6 @@ impl PropColumn { PropColumn::F32(col) => col.push(None), PropColumn::F64(col) => col.push(None), PropColumn::Str(col) => col.push(None), - #[cfg(feature = "arrow")] - PropColumn::Array(col) => col.push(None), PropColumn::U8(col) => col.push(None), PropColumn::U16(col) => col.push(None), PropColumn::I32(col) => col.push(None), @@ -442,9 +428,7 @@ impl PropColumn { PropColumn::U64(col) => col.get_opt(index).map(|prop| (*prop).into()), PropColumn::F32(col) => col.get_opt(index).map(|prop| (*prop).into()), PropColumn::F64(col) => col.get_opt(index).map(|prop| (*prop).into()), - PropColumn::Str(col) => col.get_opt(index).map(|prop| prop.into()), - #[cfg(feature = "arrow")] - PropColumn::Array(col) => col.get_opt(index).map(|prop| Prop::Array(prop.clone())), + PropColumn::Str(col) => col.get_opt(index).map(|prop| prop.clone().into()), PropColumn::U8(col) => col.get_opt(index).map(|prop| (*prop).into()), PropColumn::U16(col) => col.get_opt(index).map(|prop| (*prop).into()), PropColumn::I32(col) => col.get_opt(index).map(|prop| (*prop).into()), @@ -466,8 +450,6 @@ impl PropColumn { PropColumn::F32(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), PropColumn::F64(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), PropColumn::Str(col) => col.get_opt(index).map(|prop| PropRef::Str(prop.as_ref())), - #[cfg(feature = "arrow")] - PropColumn::Array(col) => col.get_opt(index).map(PropRef::Array), PropColumn::U8(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), PropColumn::U16(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), PropColumn::I32(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), @@ -489,8 +471,6 @@ impl PropColumn { PropColumn::F32(col) => col.len(), PropColumn::F64(col) => col.len(), PropColumn::Str(col) => col.len(), - #[cfg(feature = "arrow")] - PropColumn::Array(col) => col.len(), PropColumn::U8(col) => col.len(), PropColumn::U16(col) => col.len(), PropColumn::I32(col) => col.len(), diff --git a/raphtory-graphql/src/model/graph/filtering.rs b/raphtory-graphql/src/model/graph/filtering.rs index ca2b3693e1..ae25199894 100644 --- a/raphtory-graphql/src/model/graph/filtering.rs +++ b/raphtory-graphql/src/model/graph/filtering.rs @@ -637,7 +637,7 @@ fn build_property_filter( let prop_value = match (&prop, operator) { (Some(Prop::List(list)), Operator::IsIn | Operator::IsNotIn) => { - PropertyFilterValue::Set(Arc::new(list.iter().cloned().collect())) + PropertyFilterValue::Set(Arc::new(list.iter().collect())) } (Some(p), _) => PropertyFilterValue::Single(p.clone()), (None, _) => PropertyFilterValue::None, diff --git a/raphtory-graphql/src/model/graph/property.rs b/raphtory-graphql/src/model/graph/property.rs index 142eb038eb..76afdd3af7 100644 --- a/raphtory-graphql/src/model/graph/property.rs +++ b/raphtory-graphql/src/model/graph/property.rs @@ -165,7 +165,7 @@ fn prop_to_gql(prop: &Prop) -> GqlValue { .map(|number| GqlValue::Number(number)) .unwrap_or(GqlValue::Null), Prop::Bool(b) => GqlValue::Boolean(*b), - Prop::List(l) => GqlValue::List(l.iter().map(|pp| prop_to_gql(pp)).collect()), + Prop::List(l) => GqlValue::List(l.iter().map(|pp| prop_to_gql(&pp)).collect()), Prop::Map(m) => GqlValue::Object( m.iter() .map(|(k, v)| (Name::new(k.to_string()), prop_to_gql(v))) @@ -173,7 +173,6 @@ fn prop_to_gql(prop: &Prop) -> GqlValue { ), Prop::DTime(t) => GqlValue::Number(t.timestamp_millis().into()), Prop::NDTime(t) => GqlValue::Number(t.and_utc().timestamp_millis().into()), - Prop::Array(a) => GqlValue::List(a.iter_prop().map(|p| prop_to_gql(&p)).collect()), Prop::Decimal(d) => GqlValue::String(d.to_string()), } } diff --git a/raphtory-graphql/src/python/client/mod.rs b/raphtory-graphql/src/python/client/mod.rs index 579e6b019e..91b79c9a74 100644 --- a/raphtory-graphql/src/python/client/mod.rs +++ b/raphtory-graphql/src/python/client/mod.rs @@ -235,11 +235,7 @@ fn inner_collection(value: &Prop) -> String { Prop::F64(value) => format!("{{ f64: {} }}", value), Prop::Bool(value) => format!("{{ bool: {} }}", value), Prop::List(value) => { - let vec: Vec = value.iter().map(inner_collection).collect(); - format!("{{ list: [{}] }}", vec.join(", ")) - } - Prop::Array(value) => { - let vec: Vec = value.iter_prop().map(|v| inner_collection(&v)).collect(); + let vec: Vec = value.iter().map(|p| inner_collection(&p)).collect(); format!("{{ list: [{}] }}", vec.join(", ")) } Prop::Map(value) => { @@ -268,15 +264,7 @@ fn to_graphql_valid(key: &String, value: &Prop) -> String { Prop::F64(value) => format!("{{ key: \"{}\", value: {{ f64: {} }} }}", key, value), Prop::Bool(value) => format!("{{ key: \"{}\", value: {{ bool: {} }} }}", key, value), Prop::List(value) => { - let vec: Vec = value.iter().map(inner_collection).collect(); - format!( - "{{ key: \"{}\", value: {{ list: [{}] }} }}", - key, - vec.join(", ") - ) - } - Prop::Array(value) => { - let vec: Vec = value.iter_prop().map(|v| inner_collection(&v)).collect(); + let vec: Vec = value.iter().map(|p| inner_collection(&p)).collect(); format!( "{{ key: \"{}\", value: {{ list: [{}] }} }}", key, diff --git a/raphtory/src/db/api/properties/temporal_props.rs b/raphtory/src/db/api/properties/temporal_props.rs index 70e9def86b..d8dc79c144 100644 --- a/raphtory/src/db/api/properties/temporal_props.rs +++ b/raphtory/src/db/api/properties/temporal_props.rs @@ -2,7 +2,7 @@ use crate::db::api::{properties::internal::InternalPropertiesOps, view::BoxedLIt use bigdecimal::BigDecimal; use chrono::{DateTime, NaiveDateTime, Utc}; use raphtory_api::core::{ - entities::properties::prop::{Prop, PropType, PropUnwrap}, + entities::properties::prop::{Prop, PropArray, PropType, PropUnwrap}, storage::{arc_str::ArcStr, timeindex::TimeIndexEntry}, }; use rustc_hash::FxHashMap; @@ -277,7 +277,7 @@ impl PropUnwrap for TemporalPropertyView

{ self.latest().into_bool() } - fn into_list(self) -> Option>> { + fn into_list(self) -> Option { self.latest().into_list() } diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index 97611eae5f..95212063ff 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -28,7 +28,7 @@ use tracing::error; use { arrow::{datatypes::DataType, error::ArrowError}, parquet::errors::ParquetError, - raphtory_api::core::entities::{properties::prop::DeserialisationError, GidType, VID}, + raphtory_api::core::entities::{GidType, VID}, }; #[cfg(feature = "python")] @@ -331,10 +331,6 @@ pub enum GraphError { #[error("Cannot write graph into non empty folder {0}")] NonEmptyGraphFolder(PathBuf), - #[cfg(feature = "arrow")] - #[error(transparent)] - DeserialisationError(#[from] DeserialisationError), - #[cfg(feature = "proto")] #[error("Cache is not initialised")] CacheNotInnitialised, diff --git a/raphtory/src/io/arrow/prop_handler.rs b/raphtory/src/io/arrow/prop_handler.rs index 3ba0aac7db..aa3849d8ff 100644 --- a/raphtory/src/io/arrow/prop_handler.rs +++ b/raphtory/src/io/arrow/prop_handler.rs @@ -19,7 +19,7 @@ use arrow::{ use bigdecimal::BigDecimal; use chrono::{DateTime, Utc}; use raphtory_api::core::{ - entities::properties::prop::{IntoPropList, PropType}, + entities::properties::prop::{IntoPropList, PropArray, PropType}, storage::{arc_str::ArcStr, dict_mapper::MaybeNew}, }; use rayon::prelude::*; @@ -197,7 +197,7 @@ fn arr_as_prop(arr: ArrayRef) -> Prop { .map(|elem| Prop::Decimal(BigDecimal::new(elem.into(), *scale as i64))) .into_prop_list() } - DataType::Null => Prop::List(vec![].into()), + DataType::Null => Prop::List(PropArray::default()), dt => panic!("Data type not recognized {dt:?}"), } } diff --git a/raphtory/src/python/graph/edges.rs b/raphtory/src/python/graph/edges.rs index 431dd20495..350958f924 100644 --- a/raphtory/src/python/graph/edges.rs +++ b/raphtory/src/python/graph/edges.rs @@ -354,8 +354,8 @@ impl PyEdges { ); let row_header: Vec = vec![ - Prop::from(item.src().name()), - Prop::from(item.dst().name()), + Prop::Str(item.src().name().into()), + Prop::Str(item.dst().name().into()), Prop::from(item.layer_name().unwrap_or(ArcStr::from(""))), ]; diff --git a/raphtory/src/python/graph/node.rs b/raphtory/src/python/graph/node.rs index a6eeab3929..f866b301d8 100644 --- a/raphtory/src/python/graph/node.rs +++ b/raphtory/src/python/graph/node.rs @@ -787,7 +787,7 @@ impl PyNodes { ); let row_header: Vec = vec![ - Prop::from(item.name()), + Prop::Str(item.name().into()), Prop::from(item.node_type().unwrap_or_else(|| ArcStr::from(""))), ]; diff --git a/raphtory/src/python/types/repr.rs b/raphtory/src/python/types/repr.rs index ab4ca1fe47..3d95af7869 100644 --- a/raphtory/src/python/types/repr.rs +++ b/raphtory/src/python/types/repr.rs @@ -7,7 +7,10 @@ use bigdecimal::BigDecimal; use chrono::{DateTime, NaiveDateTime, TimeZone}; use itertools::Itertools; use pyo3::{prelude::PyAnyMethods, Bound, PyAny, PyObject, Python}; -use raphtory_api::core::{entities::GID, storage::arc_str::ArcStr}; +use raphtory_api::core::{ + entities::{properties::prop::PropArray, GID}, + storage::arc_str::ArcStr, +}; use std::{collections::HashMap, error::Error, ops::Deref, sync::Arc}; pub fn iterator_repr, V: Repr>(iter: I) -> String { @@ -238,6 +241,13 @@ impl Repr for Vec { } } +impl Repr for PropArray { + fn repr(&self) -> String { + let repr = self.iter().map(|v| v.repr()).join(", "); + format!("[{}]", repr) + } +} + impl Repr for Arc<[T]> { fn repr(&self) -> String { self.deref().repr() diff --git a/raphtory/src/python/types/wrappers/prop.rs b/raphtory/src/python/types/wrappers/prop.rs index 314dd11807..53575c3f06 100644 --- a/raphtory/src/python/types/wrappers/prop.rs +++ b/raphtory/src/python/types/wrappers/prop.rs @@ -12,7 +12,6 @@ impl Repr for Prop { Prop::F64(v) => v.repr(), Prop::DTime(v) => v.repr(), Prop::NDTime(v) => v.repr(), - Prop::Array(v) => format!("{:?}", v), Prop::I32(v) => v.repr(), Prop::U32(v) => v.repr(), Prop::F32(v) => v.repr(), diff --git a/raphtory/src/python/utils/export.rs b/raphtory/src/python/utils/export.rs index d125d01d7e..b84ae7caa9 100644 --- a/raphtory/src/python/utils/export.rs +++ b/raphtory/src/python/utils/export.rs @@ -83,7 +83,7 @@ pub(crate) fn extract_properties

( let mut prop_vec = vec![]; prop_view.iter().for_each(|(time, prop)| { let prop_time = Prop::DTime(time.dt().unwrap()); - prop_vec.push(Prop::List(Arc::from(vec![prop_time, prop]))) + prop_vec.push(Prop::List(vec![prop_time, prop].into())) }); let wrapped = Prop::from(prop_vec); let _ = properties_map.insert(column_name, wrapped); @@ -92,7 +92,7 @@ pub(crate) fn extract_properties

( .iter() .map(|(k, v)| Prop::from(vec![Prop::from(k), v])) .collect_vec(); - let wrapped = Prop::List(Arc::from(vec_props)); + let wrapped = Prop::List(vec_props.into()); let _ = properties_map.insert(column_name, wrapped); } }); diff --git a/raphtory/src/serialise/proto/ext.rs b/raphtory/src/serialise/proto/ext.rs index e189f6bc70..062efff30d 100644 --- a/raphtory/src/serialise/proto/ext.rs +++ b/raphtory/src/serialise/proto/ext.rs @@ -55,7 +55,7 @@ fn as_proto_prop_type(p_type: &PropType) -> Option { fn as_proto_prop_type2(p_type: &PropType) -> Option { match p_type { - PropType::Array(tpe) => { + PropType::List(tpe) => { let prop_type = as_proto_prop_type(tpe)?; Some(PType { kind: Some(proto_generated::prop_type::p_type::Kind::Array(ArrayType { @@ -78,7 +78,7 @@ fn as_prop_type2(p_type: PType) -> Option { proto_generated::prop_type::p_type::Kind::Scalar(scalar) => as_prop_type(scalar.p_type()), proto_generated::prop_type::p_type::Kind::Array(array) => { let p_type = as_prop_type(array.p_type())?; - Some(PropType::Array(Box::new(p_type))) + Some(PropType::List(Box::new(p_type))) } proto_generated::prop_type::p_type::Kind::Decimal(decimal) => Some(PropType::Decimal { scale: decimal.scale as i64, @@ -627,13 +627,14 @@ fn as_prop_value(value: Option<&prop::Value>) -> Result, GraphError prop::Value::F32(f) => Some(Prop::F32(*f)), prop::Value::F64(f) => Some(Prop::F64(*f)), prop::Value::Str(s) => Some(Prop::Str(ArcStr::from(s.as_str()))), - prop::Value::Prop(props) => Some(Prop::List(Arc::new( + prop::Value::Prop(props) => Some(Prop::List( props .properties .iter() .filter_map(|prop| as_prop_value(prop.value.as_ref()).transpose()) - .collect::, _>>()?, - ))), + .collect::, _>>()? + .into(), + )), prop::Value::Map(dict) => Some(Prop::Map(Arc::new( dict.map .iter() @@ -663,7 +664,7 @@ fn as_prop_value(value: Option<&prop::Value>) -> Result, GraphError prop::Value::DTime(dt) => Some(Prop::DTime( DateTime::parse_from_rfc3339(dt).unwrap().into(), )), - prop::Value::Array(blob) => Some(Prop::Array(PropArray::from_vec_u8(&blob.data)?)), + prop::Value::Array(_) => None, _ => None, }; Ok(value) @@ -699,7 +700,7 @@ fn as_proto_prop(prop: &Prop) -> proto_generated::Prop { Prop::F64(f) => Some(prop::Value::F64(*f)), Prop::Str(s) => Some(prop::Value::Str(s.to_string())), Prop::List(list) => { - let properties = list.iter().map(as_proto_prop).collect(); + let properties = list.iter().map(|p| as_proto_prop(&p)).collect(); Some(prop::Value::Prop(prop::Props { properties })) } Prop::Map(map) => { @@ -732,9 +733,6 @@ fn as_proto_prop(prop: &Prop) -> proto_generated::Prop { Prop::DTime(dt) => Some(prop::Value::DTime( dt.to_rfc3339_opts(chrono::SecondsFormat::AutoSi, true), )), - Prop::Array(blob) => Some(prop::Value::Array(Array { - data: blob.to_vec_u8(), - })), Prop::Decimal(bd) => Some(prop::Value::Decimal(bd.to_string())), }; diff --git a/raphtory/src/test_utils.rs b/raphtory/src/test_utils.rs index 216877f5b1..e800dcc6f3 100644 --- a/raphtory/src/test_utils.rs +++ b/raphtory/src/test_utils.rs @@ -197,11 +197,10 @@ pub fn prop_type() -> impl Strategy { leaf.prop_recursive(3, 10, 10, |inner| { let dict = proptest::collection::hash_map(r"\w{1,10}", inner.clone(), 1..10) .prop_map(PropType::map); - // let list = inner - // .clone() - // .prop_map(|p_type| PropType::List(Box::new(p_type))); - // prop_oneof![inner, list, dict] - prop_oneof![inner, dict] + let list = inner + .clone() + .prop_map(|p_type| PropType::List(Box::new(p_type))); + prop_oneof![inner, list, dict] }) } @@ -561,7 +560,7 @@ pub fn build_graph_from_edge_list<'a>( src, dst, [ - ("str_prop", str_prop.into_prop()), + ("str_prop", str_prop.as_str().into_prop()), ("int_prop", int_prop.into_prop()), ], None, @@ -672,7 +671,7 @@ pub fn add_node_props<'a>( ) { for (node, str_prop, int_prop) in nodes { let props = [ - str_prop.as_ref().map(|v| ("str_prop", v.into_prop())), + str_prop.as_deref().map(|v| ("str_prop", v.into_prop())), int_prop.as_ref().map(|v| ("int_prop", (*v).into())), ] .into_iter() From 5cdf62525c52e36515221f8b5e54456d5c4e4c46 Mon Sep 17 00:00:00 2001 From: Fabian Murariu Date: Wed, 12 Nov 2025 12:06:16 +0000 Subject: [PATCH 34/47] progress with complex properties, proptest still failing --- db4-storage/src/pages/test_utils/checkers.rs | 2 +- db4-storage/src/pages/test_utils/props.rs | 3 +- db4-storage/src/properties/mod.rs | 7 +--- .../src/properties/props_meta_writer.rs | 20 ++++++++++ raphtory-api/Cargo.toml | 2 +- .../core/entities/properties/prop/arrow.rs | 15 +++++++- .../entities/properties/prop/prop_array.rs | 15 ++++++-- .../entities/properties/prop/prop_enum.rs | 37 ++++++------------- .../entities/properties/prop/prop_ref_enum.rs | 6 +-- .../entities/properties/prop/prop_type.rs | 6 +-- raphtory-core/src/storage/mod.rs | 6 ++- 11 files changed, 73 insertions(+), 46 deletions(-) diff --git a/db4-storage/src/pages/test_utils/checkers.rs b/db4-storage/src/pages/test_utils/checkers.rs index 62b2af0f72..0f701c88c3 100644 --- a/db4-storage/src/pages/test_utils/checkers.rs +++ b/db4-storage/src/pages/test_utils/checkers.rs @@ -336,7 +336,7 @@ pub fn check_graph_with_nodes_support< assert_eq!( actual_props, props, - "Expected properties for node ({node:?}) to be {props:?}, but got {actual_props:?}" + "Expected temporal properties for node ({node:?}) to be {props:?}, but got {actual_props:?}" ); } }; diff --git a/db4-storage/src/pages/test_utils/props.rs b/db4-storage/src/pages/test_utils/props.rs index 4c91400288..37684f7995 100644 --- a/db4-storage/src/pages/test_utils/props.rs +++ b/db4-storage/src/pages/test_utils/props.rs @@ -98,7 +98,8 @@ pub(crate) fn prop(p_type: &PropType) -> impl Strategy + use<> { ) }) .boxed(), - PropType::List(p_type) => proptest::collection::vec(prop(p_type), 0..10) + // TODO: empty lists are a type nightmare + PropType::List(p_type) => proptest::collection::vec(prop(p_type), 1..10) .prop_map(|props| Prop::List(PropArray::Vec(props.into()))) .boxed(), PropType::Map(p_types) => { diff --git a/db4-storage/src/properties/mod.rs b/db4-storage/src/properties/mod.rs index 2521a59f54..945fcf78c8 100644 --- a/db4-storage/src/properties/mod.rs +++ b/db4-storage/src/properties/mod.rs @@ -198,8 +198,6 @@ impl Properties { .unwrap(), )) } - // PropColumn::Array(lazy_vec) => todo!(), - // PropColumn::List(lazy_vec) => todo!(), PropColumn::Map(lazy_vec) => { let dt = meta .get_dtype(col_id) @@ -223,13 +221,12 @@ impl Properties { let array_iter = indices .map(|i| lazy_vec.get_opt(i)) - .map(|opt_list| opt_list.map(|list| SerdeList(list))); + .map(|opt_list| opt_list.map(SerdeList)); - let list_array = list_array_from_props(&dt, |lst| *lst, array_iter); + let list_array = list_array_from_props(&dt, array_iter); Some(Arc::new(list_array)) } - _ => None, //todo!("Unsupported column type"), } } diff --git a/db4-storage/src/properties/props_meta_writer.rs b/db4-storage/src/properties/props_meta_writer.rs index 0ed0014860..8c20634287 100644 --- a/db4-storage/src/properties/props_meta_writer.rs +++ b/db4-storage/src/properties/props_meta_writer.rs @@ -268,6 +268,26 @@ mod test { assert_eq!(meta.temporal_prop_mapper().keys().len(), 2); } + #[test] + fn complex_props_meta_writer() { + let meta = Meta::default(); + let prop_list_map = Prop::list([Prop::map([("a", 1)]), Prop::map([("b", 2f64)])]); + let props = vec![("a", prop_list_map.clone())]; + + let writer = PropsMetaWriter::temporal(&meta, props.into_iter()).unwrap(); + let props = writer.into_props_temporal().unwrap(); + assert_eq!(props.len(), 1); + + assert_eq!(props, vec![(0, prop_list_map.clone())]); + + let expected_d_type = prop_list_map.dtype(); + + assert_eq!( + meta.temporal_prop_mapper().d_types().first().unwrap(), + &expected_d_type + ); + } + #[test] fn test_fail_typecheck() { let meta = Meta::default(); diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index 9fc711e9fe..ebe042288e 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -22,7 +22,7 @@ thiserror = { workspace = true } bytemuck = { workspace = true } chrono.workspace = true dashmap = { workspace = true } -derive_more = { workspace = true } +derive_more = { workspace = true, features = ["from"] } rustc-hash = { workspace = true } lock_api = { workspace = true } parking_lot = { workspace = true } diff --git a/raphtory-api/src/core/entities/properties/prop/arrow.rs b/raphtory-api/src/core/entities/properties/prop/arrow.rs index 9d762dc607..937cdc2df9 100644 --- a/raphtory-api/src/core/entities/properties/prop/arrow.rs +++ b/raphtory-api/src/core/entities/properties/prop/arrow.rs @@ -1,3 +1,5 @@ +use std::borrow::Cow; + use arrow_array::{ cast::AsArray, types::*, Array, ArrowPrimitiveType, OffsetSizeTrait, StructArray, }; @@ -5,7 +7,7 @@ use arrow_schema::{DataType, TimeUnit}; use chrono::DateTime; use serde::{ser::SerializeMap, Serialize}; -use crate::core::entities::properties::prop::{Prop, PropRef}; +use crate::core::entities::properties::prop::{Prop, PropArray, PropRef}; #[derive(Debug, Clone, Copy)] pub struct ArrowRow<'a> { @@ -105,6 +107,16 @@ impl<'a> ArrowRow<'a> { } } + fn list_prop_ref(&self, col: usize) -> Option> { + let column = self.array.column(col).as_list_opt::()?; + if self.index < column.len() && column.is_valid(self.index) { + let list_array = column.value(self.index); + Some(PropRef::List(Cow::Owned(PropArray::from(list_array)))) + } else { + None + } + } + pub fn bool_value(&self, col: usize) -> Option { let column = self.array.column(col); match column.data_type() { @@ -186,6 +198,7 @@ impl<'a> ArrowRow<'a> { } DataType::Decimal128(_, _) => self.primitive_prop_ref::(col), DataType::Struct(_) => self.struct_prop_ref(col), + DataType::LargeList(_) => self.list_prop_ref(col), _ => None, } } diff --git a/raphtory-api/src/core/entities/properties/prop/prop_array.rs b/raphtory-api/src/core/entities/properties/prop/prop_array.rs index 5f43f355b9..d492b11cb4 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_array.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_array.rs @@ -1,5 +1,7 @@ use crate::{ - core::entities::properties::prop::{ArrowRow, DirectConvert, Prop, PropType}, + core::entities::properties::prop::{ + unify_types, ArrowRow, DirectConvert, Prop, PropType, SerdeProp, + }, iter::{BoxedLIter, IntoDynBoxed}, }; use arrow_array::{ @@ -68,7 +70,14 @@ impl PropArray { pub fn dtype(&self) -> PropType { match self { PropArray::Vec(ps) if ps.is_empty() => PropType::Empty, - PropArray::Vec(ps) => ps[0].dtype(), + PropArray::Vec(ps) => ps + .iter() + .map(|p| p.dtype()) + .reduce(|dt1, dt2| { + unify_types(&dt1, &dt2, &mut false) + .unwrap_or_else(|e| panic!("Failed to unify props {e}")) + }) + .unwrap(), PropArray::Array(a) => PropType::from(a.data_type()), } } @@ -192,7 +201,7 @@ impl Serialize for PropArray { { let mut state = serializer.serialize_seq(Some(self.len()))?; for prop in self.iter_all() { - state.serialize_element(&prop)?; + state.serialize_element(&prop.as_ref().map(SerdeProp))?; } state.end() } diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index 55b555c626..3cf297d431 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -5,11 +5,9 @@ use crate::core::{ }, storage::arc_str::ArcStr, }; -use arrow_array::cast::AsArray; -use arrow_array::ArrayRef; +use arrow_array::{cast::AsArray, ArrayRef, LargeListArray, StructArray}; #[cfg(feature = "arrow")] use arrow_schema::{DataType, Field, FieldRef}; -use arrow_array::{LargeListArray, ListArray, StructArray}; use bigdecimal::{num_bigint::BigInt, BigDecimal}; use chrono::{DateTime, NaiveDateTime, Utc}; use itertools::Itertools; @@ -82,7 +80,7 @@ impl<'a> From> for Prop { PropNum::F64(f) => Prop::F64(f), }, PropRef::Bool(b) => Prop::Bool(b), - PropRef::List(v) => Prop::List(v.clone().into()), + PropRef::List(v) => Prop::List(v.as_ref().clone()), PropRef::Map(m) => m .into_prop() .unwrap_or_else(|| Prop::Map(Arc::new(Default::default()))), @@ -157,7 +155,7 @@ impl PartialOrd for Prop { } pub struct SerdeProp<'a>(pub &'a Prop); -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub struct SerdeList<'a>(pub &'a PropArray); #[derive(Clone, Copy)] pub struct SerdeMap<'a>(pub &'a HashMap); @@ -336,42 +334,29 @@ impl Prop { } #[cfg(feature = "arrow")] -pub fn list_array_from_props

( +pub fn list_array_from_props( dt: &DataType, - as_serde_map: impl Fn(&P) -> SerdeList<'_> + Copy, props: impl IntoIterator>, ) -> LargeListArray { - use arrow_array::LargeListArray; use arrow_schema::{Field, Fields}; use serde_arrow::ArrayBuilder; - let fields: Fields = vec![Field::new("list", dt.clone(), false)].into(); + let fields: Fields = vec![Field::new("value", dt.clone(), true)].into(); let mut builder = ArrayBuilder::from_arrow(&fields) .unwrap_or_else(|e| panic!("Failed to make array builder {e}")); - let empty_list = PropArray::default(); - for p in props { - match p.as_ref().map(as_serde_map) { - todo!("USE SerdeRow"); - Some(list) => builder - .push(Value { list }) - .unwrap_or_else(|e| panic!("Failed to push list to array builder {e}")), - _ => builder - .push(SerdeList(&empty_list)) - .unwrap_or_else(|e| panic!("Failed to push empty list to array builder {e}")), - } + for value in props { + builder.push(SerdeRow { value }).unwrap_or_else(|e| { + panic!("Failed to push list to array builder {e} for type {fields:?}",) + }); } let arrays = builder .to_arrow() .unwrap_or_else(|e| panic!("Failed to convert to arrow array {e}")); - arrays[0] - .clone() - .as_any() - .downcast_ref::() - .unwrap() - .clone() + + arrays.first().unwrap().as_list::().clone() } #[cfg(feature = "arrow")] diff --git a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs index b189c73d76..c5ec3dc539 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs @@ -1,6 +1,6 @@ use num_traits::ToPrimitive; use serde::Serialize; -use std::sync::Arc; +use std::{borrow::Cow, sync::Arc}; use bigdecimal::BigDecimal; use chrono::{DateTime, NaiveDateTime, Utc}; @@ -13,12 +13,12 @@ use crate::core::{ storage::arc_str::ArcStr, }; -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, PartialEq, Clone)] pub enum PropRef<'a> { Str(&'a str), Num(PropNum), Bool(bool), - List(&'a PropArray), + List(Cow<'a, PropArray>), Map(PropMapRef<'a>), NDTime(NaiveDateTime), DTime(DateTime), diff --git a/raphtory-api/src/core/entities/properties/prop/prop_type.rs b/raphtory-api/src/core/entities/properties/prop/prop_type.rs index 0a34493c6e..a6c7dd0d4d 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_type.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_type.rs @@ -420,15 +420,15 @@ mod test { ); assert!(unify); - let l = PropType::Array(Box::new(PropType::map([("a".to_string(), PropType::U8)]))); - let r = PropType::Array(Box::new(PropType::map([ + let l = PropType::List(Box::new(PropType::map([("a".to_string(), PropType::U8)]))); + let r = PropType::List(Box::new(PropType::map([ ("a".to_string(), PropType::Empty), ("b".to_string(), PropType::Str), ]))); let mut unify = false; assert_eq!( unify_types(&l, &r, &mut unify), - Ok(PropType::Array(Box::new(PropType::map([ + Ok(PropType::List(Box::new(PropType::map([ ("a".to_string(), PropType::U8), ("b".to_string(), PropType::Str) ])))) diff --git a/raphtory-core/src/storage/mod.rs b/raphtory-core/src/storage/mod.rs index a1ceac42fc..172e924fe6 100644 --- a/raphtory-core/src/storage/mod.rs +++ b/raphtory-core/src/storage/mod.rs @@ -20,7 +20,7 @@ use raphtory_api::core::{ }; use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; -use std::{collections::HashMap, fmt::Debug, sync::Arc}; +use std::{borrow::Cow, collections::HashMap, fmt::Debug, sync::Arc}; use thiserror::Error; #[cfg(feature = "arrow")] @@ -453,7 +453,9 @@ impl PropColumn { PropColumn::U8(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), PropColumn::U16(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), PropColumn::I32(col) => col.get_opt(index).map(|prop| PropRef::from(*prop)), - PropColumn::List(col) => col.get_opt(index).map(PropRef::List), + PropColumn::List(col) => col + .get_opt(index) + .map(|prop| PropRef::List(Cow::Borrowed(prop))), PropColumn::Map(col) => col.get_opt(index).map(PropRef::from), PropColumn::NDTime(col) => col.get_opt(index).copied().map(PropRef::from), PropColumn::DTime(col) => col.get_opt(index).copied().map(PropRef::from), From 934178d6191e1f04a60f74ce3fba6e0fe0c70c93 Mon Sep 17 00:00:00 2001 From: Fabian Murariu Date: Wed, 12 Nov 2025 16:37:04 +0000 Subject: [PATCH 35/47] more fixes for complex properties --- db4-storage/src/pages/edge_store.rs | 2 +- db4-storage/src/pages/locked/nodes.rs | 2 +- db4-storage/src/pages/test_utils/fixtures.rs | 7 +-- db4-storage/src/pages/test_utils/props.rs | 5 +- db4-storage/src/segments/edge.rs | 2 +- db4-storage/src/segments/mod.rs | 4 -- .../entities/properties/prop/prop_enum.rs | 1 - raphtory-core/src/storage/lazy_vec.rs | 23 +------- .../src/mutation/addition_ops_ext.rs | 1 - .../algorithms/dynamics/temporal/epidemics.rs | 2 +- .../src/db/api/state/node_state_ord_ops.rs | 2 +- raphtory/src/db/api/storage/storage.rs | 1 - raphtory/src/db/api/view/graph.rs | 10 +--- raphtory/src/db/replay/mod.rs | 1 - raphtory/tests/db_tests.rs | 8 ++- .../tests/exploded_edge_property_filter.rs | 2 +- raphtory/tests/views_test.rs | 54 +++++++++---------- 17 files changed, 42 insertions(+), 85 deletions(-) diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index e48e052bcb..76771bf586 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -20,7 +20,7 @@ use parking_lot::{RwLock, RwLockWriteGuard}; use raphtory_api::core::entities::{EID, VID, properties::meta::Meta}; use raphtory_core::{ entities::{ELID, LayerIds}, - storage::timeindex::{AsTime, TimeIndexEntry, TimeIndexOps}, + storage::timeindex::{AsTime, TimeIndexEntry}, }; use rayon::prelude::*; diff --git a/db4-storage/src/pages/locked/nodes.rs b/db4-storage/src/pages/locked/nodes.rs index 1c25a9fe91..5225754204 100644 --- a/db4-storage/src/pages/locked/nodes.rs +++ b/db4-storage/src/pages/locked/nodes.rs @@ -45,7 +45,7 @@ impl<'a, EXT, NS: NodeSegmentOps> LockedNodePage<'a, NS> { } pub fn vacuum(&mut self) { - self.page.vacuum(self.lock.deref_mut()); + let _ = self.page.vacuum(self.lock.deref_mut()); } #[inline(always)] diff --git a/db4-storage/src/pages/test_utils/fixtures.rs b/db4-storage/src/pages/test_utils/fixtures.rs index e7a45a566e..160bb798d3 100644 --- a/db4-storage/src/pages/test_utils/fixtures.rs +++ b/db4-storage/src/pages/test_utils/fixtures.rs @@ -55,11 +55,8 @@ pub fn make_edges(num_edges: usize, num_nodes: usize) -> impl Strategy impl Strategy { assert!(num_nodes > 0); - let schema = proptest::collection::hash_map( - (0i32..1000).prop_map(|i| i.to_string()), - prop_type(), - 0..30, - ); + let schema = + proptest::collection::hash_map((0i32..10).prop_map(|i| i.to_string()), prop_type(), 0..30); schema.prop_flat_map(move |schema| { let (t_props, c_props) = make_props(&schema); diff --git a/db4-storage/src/pages/test_utils/props.rs b/db4-storage/src/pages/test_utils/props.rs index 37684f7995..51153167a8 100644 --- a/db4-storage/src/pages/test_utils/props.rs +++ b/db4-storage/src/pages/test_utils/props.rs @@ -20,8 +20,9 @@ pub fn prop_type() -> impl Strategy { ]); leaf.prop_recursive(3, 10, 10, |inner| { - let dict = proptest::collection::hash_map(r"\w{1,10}", inner.clone(), 1..10) - .prop_map(PropType::map); + let keys = (0..1_000_000).prop_map(|i| format!("k_{i}")); + let dict = + proptest::collection::hash_map(keys, inner.clone(), 1..10).prop_map(PropType::map); let list = inner .clone() .prop_map(|p_type| PropType::List(Box::new(p_type))); diff --git a/db4-storage/src/segments/edge.rs b/db4-storage/src/segments/edge.rs index e290c70948..adef5bb383 100644 --- a/db4-storage/src/segments/edge.rs +++ b/db4-storage/src/segments/edge.rs @@ -9,7 +9,7 @@ use crate::{ segments::edge_entry::MemEdgeRef, utils::Iter4, }; -use arrow_array::{Array, ArrayRef, BooleanArray}; +use arrow_array::{ArrayRef, BooleanArray}; use parking_lot::lock_api::ArcRwLockReadGuard; use raphtory_api::core::entities::{ VID, diff --git a/db4-storage/src/segments/mod.rs b/db4-storage/src/segments/mod.rs index 8d8bd36a02..13f76d479f 100644 --- a/db4-storage/src/segments/mod.rs +++ b/db4-storage/src/segments/mod.rs @@ -76,10 +76,6 @@ impl PageIndex { fn par_iter(&self) -> impl IndexedParallelIterator> { self.0.par_iter().map(|i| i.index()) } - - fn len(&self) -> usize { - self.0.len() - } } #[derive(Default)] diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index 3cf297d431..2516525564 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -28,7 +28,6 @@ use thiserror::Error; #[cfg(feature = "arrow")] use crate::core::entities::properties::prop::prop_array::*; -use crate::core::entities::properties::prop::unify_types; pub const DECIMAL_MAX: i128 = 99999999999999999999999999999999999999i128; // equivalent to parquet decimal(38, 0) diff --git a/raphtory-core/src/storage/lazy_vec.rs b/raphtory-core/src/storage/lazy_vec.rs index 227ac62250..d8da2041e4 100644 --- a/raphtory-core/src/storage/lazy_vec.rs +++ b/raphtory-core/src/storage/lazy_vec.rs @@ -1,7 +1,6 @@ use arrow_array::BooleanArray; -use raphtory_api::iter::BoxedLIter; use serde::{Deserialize, Serialize}; -use std::{fmt::Debug, iter}; +use std::fmt::Debug; #[derive(thiserror::Error, Debug, PartialEq)] #[error("Cannot set previous value '{previous_value:?}' to '{new_value:?}' in position '{index}'")] @@ -275,24 +274,6 @@ where LazyVec::LazyVec1(A::default(), TupleCol::from(inner)) } - pub(crate) fn filled_ids(&self) -> BoxedLIter<'_, usize> { - match self { - LazyVec::Empty => Box::new(iter::empty()), - LazyVec::LazyVec1(_, tuples) => Box::new( - tuples - .iter() - .enumerate() - .filter_map(|(id, value)| value.map(|_| id)), - ), - LazyVec::LazyVecN(_, vector) => Box::new( - vector - .iter() - .enumerate() - .filter_map(|(id, value)| value.map(|_| id)), - ), - } - } - #[cfg(test)] fn iter(&self) -> Box + Send + '_> { match self { @@ -465,8 +446,6 @@ mod lazy_vec_tests { }) .unwrap(); assert_eq!(vec.get(9), Some(&1)); - - assert_eq!(vec.filled_ids().collect_vec(), vec![1, 5, 6, 8, 9]); } #[test] diff --git a/raphtory-storage/src/mutation/addition_ops_ext.rs b/raphtory-storage/src/mutation/addition_ops_ext.rs index e89adaaaad..e770f8a537 100644 --- a/raphtory-storage/src/mutation/addition_ops_ext.rs +++ b/raphtory-storage/src/mutation/addition_ops_ext.rs @@ -19,7 +19,6 @@ use raphtory_core::{ storage::timeindex::TimeIndexEntry, }; use storage::{ - api::{edges::EdgeSegmentOps, nodes::NodeSegmentOps}, pages::{node_page::writer::node_info_as_props, session::WriteSession}, persist::strategy::PersistentStrategy, properties::props_meta_writer::PropsMetaWriter, diff --git a/raphtory/src/algorithms/dynamics/temporal/epidemics.rs b/raphtory/src/algorithms/dynamics/temporal/epidemics.rs index 9207f25a54..e3d6cd3c50 100644 --- a/raphtory/src/algorithms/dynamics/temporal/epidemics.rs +++ b/raphtory/src/algorithms/dynamics/temporal/epidemics.rs @@ -29,7 +29,7 @@ pub struct Probability(f64); impl Probability { pub fn sample(self, rng: &mut R) -> bool { - rng.gen_bool(self.0) + rng.random_bool(self.0) } } diff --git a/raphtory/src/db/api/state/node_state_ord_ops.rs b/raphtory/src/db/api/state/node_state_ord_ops.rs index b90b952d65..cbbe32a209 100644 --- a/raphtory/src/db/api/state/node_state_ord_ops.rs +++ b/raphtory/src/db/api/state/node_state_ord_ops.rs @@ -9,7 +9,7 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator}; use std::{ cmp::{Ordering, Reverse}, collections::BinaryHeap, - fmt::{Binary, Debug, Formatter}, + fmt::{Debug, Formatter}, ops::Deref, }; diff --git a/raphtory/src/db/api/storage/storage.rs b/raphtory/src/db/api/storage/storage.rs index 470e3fc96f..adf3d02c24 100644 --- a/raphtory/src/db/api/storage/storage.rs +++ b/raphtory/src/db/api/storage/storage.rs @@ -37,7 +37,6 @@ use std::{ }; use storage::{Extension, WalImpl}; -use crate::prelude::Graph; #[cfg(feature = "search")] use { crate::{ diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index 30ba895157..ecb2bb7c11 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -17,13 +17,8 @@ use crate::{ node::NodeView, nodes::Nodes, views::{ - cached_view::CachedView, - filter::{ - model::{AsEdgeFilter, AsNodeFilter}, - node_type_filtered_graph::NodeTypeFilteredGraph, - }, - node_subgraph::NodeSubgraph, - valid_graph::ValidGraph, + cached_view::CachedView, filter::node_type_filtered_graph::NodeTypeFilteredGraph, + node_subgraph::NodeSubgraph, valid_graph::ValidGraph, }, }, }, @@ -62,7 +57,6 @@ use std::{ path::Path, sync::{atomic::Ordering, Arc}, }; -use storage::Extension; /// This trait GraphViewOps defines operations for accessing /// information about a graph. The trait has associated types diff --git a/raphtory/src/db/replay/mod.rs b/raphtory/src/db/replay/mod.rs index 3f583b98af..2c356faa3a 100644 --- a/raphtory/src/db/replay/mod.rs +++ b/raphtory/src/db/replay/mod.rs @@ -3,7 +3,6 @@ use raphtory_api::core::{ entities::{properties::prop::Prop, EID, GID, VID}, storage::{dict_mapper::MaybeNew, timeindex::TimeIndexEntry}, }; -use raphtory_storage::mutation::addition_ops::InternalAdditionOps; use storage::{ api::edges::EdgeSegmentOps, error::StorageError, diff --git a/raphtory/tests/db_tests.rs b/raphtory/tests/db_tests.rs index 37bb76e654..9eab14354d 100644 --- a/raphtory/tests/db_tests.rs +++ b/raphtory/tests/db_tests.rs @@ -2007,10 +2007,8 @@ fn test_graph_metadata() { fn test_graph_metadata2() { let g = Graph::new(); - let as_props: Vec<(&str, Prop)> = vec![( - "mylist", - Prop::List(Arc::from(vec![Prop::I64(1), Prop::I64(2)])), - )]; + let as_props: Vec<(&str, Prop)> = + vec![("mylist", Prop::list(vec![Prop::I64(1), Prop::I64(2)]))]; g.add_metadata(as_props.clone()).unwrap(); @@ -2075,7 +2073,7 @@ fn test_graph_temporal_props() { .enumerate() .map(|(i, props)| { let (name, value) = props; - let value = Prop::from(value); + let value = Prop::from(value.as_str()); (name.as_str().into(), value, i % 2) }) .partition(|(_, _, i)| *i == 0); diff --git a/raphtory/tests/exploded_edge_property_filter.rs b/raphtory/tests/exploded_edge_property_filter.rs index 6512bddabf..d6d9dcf4bf 100644 --- a/raphtory/tests/exploded_edge_property_filter.rs +++ b/raphtory/tests/exploded_edge_property_filter.rs @@ -38,7 +38,7 @@ fn build_filtered_graph( *src, *dst, [ - ("str_prop", str_prop.into()), + ("str_prop", str_prop.as_str().into()), ("int_prop", Prop::I64(*int_prop)), ], None, diff --git a/raphtory/tests/views_test.rs b/raphtory/tests/views_test.rs index 1fcdd3cbb2..a1dce955db 100644 --- a/raphtory/tests/views_test.rs +++ b/raphtory/tests/views_test.rs @@ -559,7 +559,6 @@ pub(crate) mod test_filters_window_graph { use raphtory_storage::mutation::{ addition_ops::InternalAdditionOps, property_addition_ops::InternalPropertyAdditionOps, }; - use std::sync::Arc; use raphtory::prelude::GraphViewOps; @@ -748,7 +747,7 @@ pub(crate) mod test_filters_window_graph { ("q1", Prop::U64(0u64)), ( "x", - Prop::List(Arc::from(vec![Prop::U64(1), Prop::U64(6), Prop::U64(9)])), + Prop::list(vec![Prop::U64(1), Prop::U64(6), Prop::U64(9)]), ), ], None, @@ -1256,11 +1255,11 @@ pub(crate) mod test_filters_window_graph { TestVariants::EventOnly, ); - let filter = PropertyFilter::property("x").eq(Prop::List(Arc::new(vec![ + let filter = PropertyFilter::property("x").eq(Prop::list(vec![ Prop::U64(1), Prop::U64(6), Prop::U64(9), - ]))); + ])); let expected_results = vec!["N14"]; // TODO: List(U64) not supported as disk_graph property // assert_filter_nodes_results_w!( @@ -1375,11 +1374,11 @@ pub(crate) mod test_filters_window_graph { TestVariants::PersistentOnly, ); - let filter = PropertyFilter::property("x").eq(Prop::List(Arc::new(vec![ + let filter = PropertyFilter::property("x").eq(Prop::list(vec![ Prop::U64(1), Prop::U64(6), Prop::U64(9), - ]))); + ])); let expected_results = vec!["N14"]; // TODO: List(U64) not supported as disk_graph property // assert_filter_nodes_results_pg_w!( @@ -1494,11 +1493,11 @@ pub(crate) mod test_filters_window_graph { vec![TestGraphVariants::Graph], ); - let filter = PropertyFilter::property("x").ne(Prop::List(Arc::new(vec![ + let filter = PropertyFilter::property("x").ne(Prop::list(vec![ Prop::U64(1), Prop::U64(6), Prop::U64(9), - ]))); + ])); let expected_results = Vec::<&str>::new(); assert_filter_nodes_results( init_graph, @@ -1604,11 +1603,11 @@ pub(crate) mod test_filters_window_graph { TestVariants::PersistentOnly, ); - let filter = PropertyFilter::property("x").ne(Prop::List(Arc::new(vec![ + let filter = PropertyFilter::property("x").ne(Prop::list(vec![ Prop::U64(1), Prop::U64(6), Prop::U64(9), - ]))); + ])); let expected_results = Vec::<&str>::new(); assert_filter_nodes_results( init_graph, @@ -1844,11 +1843,11 @@ pub(crate) mod test_filters_window_graph { TestVariants::PersistentOnly, ); - let filter = PropertyFilter::property("x").le(Prop::List(Arc::new(vec![ + let filter = PropertyFilter::property("x").le(Prop::list(vec![ Prop::U64(1), Prop::U64(2), Prop::U64(3), - ]))); + ])); let expected_results = Vec::<&str>::new(); assert_filter_nodes_results( init_graph, @@ -1921,11 +1920,11 @@ pub(crate) mod test_filters_window_graph { vec![TestGraphVariants::Graph], ); - let filter = PropertyFilter::property("x").gt(Prop::List(Arc::new(vec![ + let filter = PropertyFilter::property("x").gt(Prop::list(vec![ Prop::U64(1), Prop::U64(6), Prop::U64(9), - ]))); + ])); let expected_results = Vec::<&str>::new(); assert_filter_nodes_results( init_graph, @@ -2750,7 +2749,6 @@ pub(crate) mod test_filters_window_graph { prelude::{AdditionOps, GraphViewOps, PropertyAdditionOps, PropertyFilter}, }; use raphtory_api::core::{entities::properties::prop::Prop, storage::arc_str::ArcStr}; - use std::sync::Arc; use crate::test_filters_window_graph::WindowGraphTransformer; @@ -2978,7 +2976,7 @@ pub(crate) mod test_filters_window_graph { ("q1", Prop::U64(0u64)), ( "x", - Prop::List(Arc::from(vec![Prop::U64(1), Prop::U64(6), Prop::U64(9)])), + Prop::list(vec![Prop::U64(1), Prop::U64(6), Prop::U64(9)]), ), ], None, @@ -3288,11 +3286,11 @@ pub(crate) mod test_filters_window_graph { TestVariants::EventOnly, ); - let filter = PropertyFilter::property("x").eq(Prop::List(Arc::new(vec![ + let filter = PropertyFilter::property("x").eq(Prop::list(vec![ Prop::U64(1), Prop::U64(6), Prop::U64(9), - ]))); + ])); let expected_results = vec!["N14->N15"]; // TODO: List(U64) not supported as disk_graph property // assert_filter_edges_results_w!( @@ -3409,11 +3407,11 @@ pub(crate) mod test_filters_window_graph { TestVariants::PersistentOnly, ); - let filter = PropertyFilter::property("x").eq(Prop::List(Arc::new(vec![ + let filter = PropertyFilter::property("x").eq(Prop::list(vec![ Prop::U64(1), Prop::U64(6), Prop::U64(9), - ]))); + ])); let expected_results = vec!["N14->N15"]; // TODO: List(U64) not supported as disk_graph property // assert_filter_edges_results_pg_w!( @@ -3527,11 +3525,11 @@ pub(crate) mod test_filters_window_graph { TestVariants::EventOnly, ); - let filter = PropertyFilter::property("x").ne(Prop::List(Arc::new(vec![ + let filter = PropertyFilter::property("x").ne(Prop::list(vec![ Prop::U64(1), Prop::U64(6), Prop::U64(9), - ]))); + ])); let expected_results = Vec::<&str>::new(); assert_filter_edges_results( init_graph, @@ -3642,11 +3640,11 @@ pub(crate) mod test_filters_window_graph { TestVariants::PersistentOnly, ); - let filter = PropertyFilter::property("x").ne(Prop::List(Arc::new(vec![ + let filter = PropertyFilter::property("x").ne(Prop::list(vec![ Prop::U64(1), Prop::U64(6), Prop::U64(9), - ]))); + ])); let expected_results = Vec::<&str>::new(); assert_filter_edges_results( init_graph2, @@ -3938,11 +3936,9 @@ pub(crate) mod test_filters_window_graph { TestVariants::EventOnly, ); - let filter = PropertyFilter::property("x").gt(Prop::List(Arc::new(vec![ - Prop::U64(1), - Prop::U64(6), - Prop::U64(9), - ]))); + let filter = PropertyFilter::property("x").gt(Prop::List( + vec![Prop::U64(1), Prop::U64(6), Prop::U64(9)].into(), + )); let expected_results = Vec::<&str>::new(); assert_filter_edges_results( init_graph, From 1c4f0cc87ee23613f7b7ebe8e48853fb7956afbb Mon Sep 17 00:00:00 2001 From: Fabian Murariu Date: Thu, 13 Nov 2025 13:05:43 +0000 Subject: [PATCH 36/47] disable empty lists for now for proptests --- db4-storage/src/pages/edge_store.rs | 2 +- db4-storage/src/segments/mod.rs | 1 - raphtory/src/test_utils.rs | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/db4-storage/src/pages/edge_store.rs b/db4-storage/src/pages/edge_store.rs index 76771bf586..ec4f192fe7 100644 --- a/db4-storage/src/pages/edge_store.rs +++ b/db4-storage/src/pages/edge_store.rs @@ -442,7 +442,7 @@ impl, EXT: Config> EdgeStorageInner ) -> EdgeWriter<'a, RwLockWriteGuard<'a, MemEdgeSegment>, ES> { // optimistic first try to get a free page 3 times let num_edges = self.num_edges(); - let slot_idx = num_edges as usize % N; + let slot_idx = num_edges % N; let maybe_free_page = self.free_pages[slot_idx..] .iter() .cycle() diff --git a/db4-storage/src/segments/mod.rs b/db4-storage/src/segments/mod.rs index 13f76d479f..7f9b8688ef 100644 --- a/db4-storage/src/segments/mod.rs +++ b/db4-storage/src/segments/mod.rs @@ -313,7 +313,6 @@ impl SegmentContainer { self.data .iter_all() .chain(iter::repeat(None)) - // .take(self.max_page_len as usize) .take(max_local_pos + 1) .enumerate() .map(|(i, v)| { diff --git a/raphtory/src/test_utils.rs b/raphtory/src/test_utils.rs index e800dcc6f3..089b932994 100644 --- a/raphtory/src/test_utils.rs +++ b/raphtory/src/test_utils.rs @@ -149,7 +149,7 @@ pub fn prop(p_type: &PropType) -> BoxedStrategy { ) }) .boxed(), - PropType::List(p_type) => proptest::collection::vec(prop(p_type), 0..10) + PropType::List(p_type) => proptest::collection::vec(prop(p_type), 1..10) .prop_map(|props| Prop::List(props.into())) .boxed(), PropType::Map(p_types) => { @@ -158,7 +158,7 @@ pub fn prop(p_type: &PropType) -> BoxedStrategy { .map(|(k, v)| (k.clone(), v.clone())) .collect(); let len = key_val.len(); - let samples = proptest::sample::subsequence(key_val, 0..=len); + let samples = proptest::sample::subsequence(key_val, 1..=len); samples .prop_flat_map(|key_vals| { let props: Vec<_> = key_vals From 8f53eb79e56c347934a04df6734513ab40641605 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 13 Nov 2025 16:26:36 +0100 Subject: [PATCH 37/47] enable generating empty list and map --- raphtory/src/test_utils.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/raphtory/src/test_utils.rs b/raphtory/src/test_utils.rs index 089b932994..376c2b938d 100644 --- a/raphtory/src/test_utils.rs +++ b/raphtory/src/test_utils.rs @@ -149,7 +149,7 @@ pub fn prop(p_type: &PropType) -> BoxedStrategy { ) }) .boxed(), - PropType::List(p_type) => proptest::collection::vec(prop(p_type), 1..10) + PropType::List(p_type) => proptest::collection::vec(prop(p_type), 0..10) .prop_map(|props| Prop::List(props.into())) .boxed(), PropType::Map(p_types) => { @@ -158,7 +158,7 @@ pub fn prop(p_type: &PropType) -> BoxedStrategy { .map(|(k, v)| (k.clone(), v.clone())) .collect(); let len = key_val.len(); - let samples = proptest::sample::subsequence(key_val, 1..=len); + let samples = proptest::sample::subsequence(key_val, 0..=len); samples .prop_flat_map(|key_vals| { let props: Vec<_> = key_vals @@ -195,7 +195,7 @@ pub fn prop_type() -> impl Strategy { ]); leaf.prop_recursive(3, 10, 10, |inner| { - let dict = proptest::collection::hash_map(r"\w{1,10}", inner.clone(), 1..10) + let dict = proptest::collection::hash_map(r"\w{1,10}", inner.clone(), 0..10) .prop_map(PropType::map); let list = inner .clone() From cb2ec5ec179c0d5e697c7c7389b6e7b57df7e0f4 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Thu, 13 Nov 2025 20:12:46 +0100 Subject: [PATCH 38/47] fix test compile issues --- db4-storage/src/segments/edge.rs | 2 +- raphtory-core/src/storage/lazy_vec.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/db4-storage/src/segments/edge.rs b/db4-storage/src/segments/edge.rs index adef5bb383..19a9045945 100644 --- a/db4-storage/src/segments/edge.rs +++ b/db4-storage/src/segments/edge.rs @@ -582,7 +582,7 @@ impl>> EdgeSegmentOps for EdgeSegm #[cfg(test)] mod test { use super::*; - use arrow_array::{BooleanArray, StringArray}; + use arrow_array::{Array, BooleanArray, StringArray}; use raphtory_api::core::entities::properties::{prop::PropType, tprop::TPropOps}; use raphtory_core::storage::timeindex::TimeIndexEntry; diff --git a/raphtory-core/src/storage/lazy_vec.rs b/raphtory-core/src/storage/lazy_vec.rs index d8da2041e4..b5f5cfe5ad 100644 --- a/raphtory-core/src/storage/lazy_vec.rs +++ b/raphtory-core/src/storage/lazy_vec.rs @@ -1,6 +1,6 @@ use arrow_array::BooleanArray; use serde::{Deserialize, Serialize}; -use std::fmt::Debug; +use std::{fmt::Debug, iter}; #[derive(thiserror::Error, Debug, PartialEq)] #[error("Cannot set previous value '{previous_value:?}' to '{new_value:?}' in position '{index}'")] From 0dc46096ed818d06650ffe8324453148c36b5b62 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 14 Nov 2025 09:24:35 +0100 Subject: [PATCH 39/47] install cargo hack and latest nextest --- .github/workflows/test_rust_workflow.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_rust_workflow.yml b/.github/workflows/test_rust_workflow.yml index e967e2f844..3c341520ee 100644 --- a/.github/workflows/test_rust_workflow.yml +++ b/.github/workflows/test_rust_workflow.yml @@ -49,9 +49,9 @@ jobs: with: cache-all-crates: true - name: Install nextest - uses: taiki-e/install-action@v2 - with: - tool: nextest@0.9.99 + uses: taiki-e/install-action@nextest + - name: Install cargo-hack + uses: taiki-e/install-action@cargo-hack - uses: actions/setup-python@v5 with: python-version: '3.12' From ec09798e1b099762a812aafc7f64dc3f1dc9eafd Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 14 Nov 2025 12:39:49 +0100 Subject: [PATCH 40/47] add support for chunk size handling --- db4-storage/src/persist/strategy.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/db4-storage/src/persist/strategy.rs b/db4-storage/src/persist/strategy.rs index e1b0283a51..d8604d608c 100644 --- a/db4-storage/src/persist/strategy.rs +++ b/db4-storage/src/persist/strategy.rs @@ -9,12 +9,15 @@ use crate::segments::{ pub const DEFAULT_MAX_PAGE_LEN_NODES: u32 = 131_072; // 2^17 pub const DEFAULT_MAX_PAGE_LEN_EDGES: u32 = 1_048_576; // 2^20 +pub const DEFAULT_MAX_MEMORY_BYTES: usize = 32 * 1024 * 1024; pub trait Config: Default + std::fmt::Debug + Clone + Send + Sync + 'static + for<'a> Deserialize<'a> + Serialize { fn max_node_page_len(&self) -> u32; fn max_edge_page_len(&self) -> u32; + + fn max_memory_bytes(&self) -> usize; fn is_parallel(&self) -> bool; fn node_types(&self) -> &[String]; fn set_node_types(&mut self, types: impl IntoIterator>); @@ -70,6 +73,10 @@ impl Config for NoOpStrategy { self.max_edge_page_len } + fn max_memory_bytes(&self) -> usize { + usize::MAX + } + fn is_parallel(&self) -> bool { false } From d9ac48928ea69de864c1bdb6e5b438e454a9562d Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 14 Nov 2025 14:19:36 +0100 Subject: [PATCH 41/47] remove the arrow feature --- db4-storage/Cargo.toml | 2 +- raphtory-api/Cargo.toml | 10 ++++------ .../src/core/entities/properties/prop/mod.rs | 6 ++---- .../entities/properties/prop/prop_enum.rs | 9 ++------- .../entities/properties/prop/prop_ref_enum.rs | 18 ++++++++---------- raphtory-api/src/python/prop.rs | 6 +++--- raphtory-core/Cargo.toml | 5 ++--- .../src/entities/properties/tprop.rs | 5 ++--- raphtory-core/src/storage/mod.rs | 3 +-- raphtory-storage/Cargo.toml | 4 ++-- raphtory/Cargo.toml | 19 +++++-------------- .../src/db/api/properties/temporal_props.rs | 5 ++--- raphtory/src/db/api/view/graph.rs | 9 +++++++-- raphtory/src/errors.rs | 15 +++------------ raphtory/src/io/mod.rs | 3 +-- raphtory/src/lib.rs | 2 +- raphtory/src/serialise/mod.rs | 2 +- raphtory/src/serialise/proto/ext.rs | 1 - raphtory/tests/proto_test.rs | 1 - raphtory/tests/serialise_test.rs | 9 ++------- 20 files changed, 49 insertions(+), 85 deletions(-) diff --git a/db4-storage/Cargo.toml b/db4-storage/Cargo.toml index ca254d2260..c3db7ff3c3 100644 --- a/db4-storage/Cargo.toml +++ b/db4-storage/Cargo.toml @@ -13,7 +13,7 @@ edition = "2024" [dependencies] raphtory-api.workspace = true raphtory-api-macros.workspace = true -raphtory-core = { workspace = true, features = ["arrow"] } +raphtory-core = { workspace = true } # db4-common = {path = "../db4-common"} bitvec = { workspace = true, features = ["serde"] } diff --git a/raphtory-api/Cargo.toml b/raphtory-api/Cargo.toml index ebe042288e..4c632688a9 100644 --- a/raphtory-api/Cargo.toml +++ b/raphtory-api/Cargo.toml @@ -35,10 +35,10 @@ twox-hash.workspace = true tracing-subscriber = { workspace = true } tracing = { workspace = true } sorted_vector_map = { workspace = true } -arrow-array = { workspace = true, optional = true } -arrow-ipc = { workspace = true, optional = true } -arrow-schema = { workspace = true, optional = true } -serde_arrow = { workspace = true, optional = true } +arrow-array = { workspace = true } +arrow-ipc = { workspace = true } +arrow-schema = { workspace = true } +serde_arrow = { workspace = true } itertools = { workspace = true } iter-enum = { workspace = true } minijinja = { workspace = true, optional = true } @@ -48,7 +48,6 @@ display-error-chain = { workspace = true, optional = true } proptest.workspace = true [features] -default = ["arrow"] # Enables generating the pyo3 python bindings python = [ "dep:pyo3", "dep:pyo3-arrow", "dep:display-error-chain" @@ -57,6 +56,5 @@ python = [ proto = [] vectors = [] template = ["dep:minijinja"] -arrow = ["dep:arrow-array", "dep:arrow-ipc", "dep:arrow-schema", "dep:serde_arrow"] search = [] io = ["dep:serde_json"] diff --git a/raphtory-api/src/core/entities/properties/prop/mod.rs b/raphtory-api/src/core/entities/properties/prop/mod.rs index eb13449c2e..b0bab3edac 100644 --- a/raphtory-api/src/core/entities/properties/prop/mod.rs +++ b/raphtory-api/src/core/entities/properties/prop/mod.rs @@ -1,6 +1,5 @@ -#[cfg(feature = "arrow")] pub mod arrow; -#[cfg(feature = "arrow")] + mod prop_array; mod prop_enum; mod prop_ref_enum; @@ -12,9 +11,8 @@ mod serde; #[cfg(feature = "template")] mod template; -#[cfg(feature = "arrow")] pub use arrow::*; -#[cfg(feature = "arrow")] + pub use prop_array::*; pub use prop_enum::*; pub use prop_ref_enum::*; diff --git a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs index 2516525564..acde96f75d 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_enum.rs @@ -5,9 +5,6 @@ use crate::core::{ }, storage::arc_str::ArcStr, }; -use arrow_array::{cast::AsArray, ArrayRef, LargeListArray, StructArray}; -#[cfg(feature = "arrow")] -use arrow_schema::{DataType, Field, FieldRef}; use bigdecimal::{num_bigint::BigInt, BigDecimal}; use chrono::{DateTime, NaiveDateTime, Utc}; use itertools::Itertools; @@ -26,8 +23,9 @@ use std::{ }; use thiserror::Error; -#[cfg(feature = "arrow")] use crate::core::entities::properties::prop::prop_array::*; +use arrow_array::{cast::AsArray, ArrayRef, LargeListArray, StructArray}; +use arrow_schema::{DataType, Field, FieldRef}; pub const DECIMAL_MAX: i128 = 99999999999999999999999999999999999999i128; // equivalent to parquet decimal(38, 0) @@ -332,7 +330,6 @@ impl Prop { } } -#[cfg(feature = "arrow")] pub fn list_array_from_props( dt: &DataType, props: impl IntoIterator>, @@ -358,7 +355,6 @@ pub fn list_array_from_props( arrays.first().unwrap().as_list::().clone() } -#[cfg(feature = "arrow")] pub fn struct_array_from_props( dt: &DataType, props: impl IntoIterator>, @@ -476,7 +472,6 @@ impl From<&Prop> for Prop { } } -#[cfg(feature = "arrow")] impl From for Prop { fn from(value: ArrayRef) -> Self { Prop::List(PropArray::from(value)) diff --git a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs index c5ec3dc539..f77d94dc20 100644 --- a/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs +++ b/raphtory-api/src/core/entities/properties/prop/prop_ref_enum.rs @@ -1,17 +1,15 @@ -use num_traits::ToPrimitive; -use serde::Serialize; -use std::{borrow::Cow, sync::Arc}; - +use crate::core::{ + entities::properties::prop::{Prop, SerdeList, SerdeMap}, + storage::arc_str::ArcStr, +}; use bigdecimal::BigDecimal; use chrono::{DateTime, NaiveDateTime, Utc}; +use num_traits::ToPrimitive; use rustc_hash::FxHashMap; +use serde::Serialize; +use std::{borrow::Cow, sync::Arc}; -#[cfg(feature = "arrow")] -use crate::core::entities::properties::prop::PropArray; -use crate::core::{ - entities::properties::prop::{ArrowRow, Prop, SerdeList, SerdeMap}, - storage::arc_str::ArcStr, -}; +use crate::core::entities::properties::prop::{ArrowRow, PropArray}; #[derive(Debug, PartialEq, Clone)] pub enum PropRef<'a> { diff --git a/raphtory-api/src/python/prop.rs b/raphtory-api/src/python/prop.rs index 1afd56d145..cfb7d3828f 100644 --- a/raphtory-api/src/python/prop.rs +++ b/raphtory-api/src/python/prop.rs @@ -9,7 +9,6 @@ use pyo3::{ }; use std::{ops::Deref, str::FromStr, sync::Arc}; -#[cfg(feature = "arrow")] mod array_ext { use pyo3::{intern, prelude::*, types::PyTuple}; use pyo3_arrow::PyArray; @@ -28,8 +27,9 @@ mod array_ext { } } -#[cfg(feature = "arrow")] -use {crate::core::entities::properties::prop::PropArray, array_ext::*, pyo3_arrow::PyArray}; +use crate::core::entities::properties::prop::PropArray; +use array_ext::*; +use pyo3_arrow::PyArray; static DECIMAL_CLS: GILOnceCell> = GILOnceCell::new(); diff --git a/raphtory-core/Cargo.toml b/raphtory-core/Cargo.toml index 0bfe27f5a3..8e40e4fdf8 100644 --- a/raphtory-core/Cargo.toml +++ b/raphtory-core/Cargo.toml @@ -13,7 +13,7 @@ rust-version.workspace = true edition.workspace = true [dependencies] -raphtory-api = { workspace = true, features = ["arrow"] } +raphtory-api = { workspace = true } dashmap = { workspace = true, features = ["raw-api"] } hashbrown = { workspace = true } either = { workspace = true } @@ -29,7 +29,7 @@ parking_lot = { workspace = true } itertools = { workspace = true } once_cell = { workspace = true } ouroboros = { workspace = true } -arrow-array = { workspace = true} +arrow-array = { workspace = true } regex = { workspace = true } pyo3 = { workspace = true, optional = true } @@ -37,5 +37,4 @@ pyo3 = { workspace = true, optional = true } proptest = { workspace = true } [features] -arrow = ["raphtory-api/arrow"] python = ["dep:pyo3", "raphtory-api/python"] diff --git a/raphtory-core/src/entities/properties/tprop.rs b/raphtory-core/src/entities/properties/tprop.rs index 273c4640dd..3ea03418fe 100644 --- a/raphtory-core/src/entities/properties/tprop.rs +++ b/raphtory-core/src/entities/properties/tprop.rs @@ -6,11 +6,10 @@ use bigdecimal::BigDecimal; use chrono::{DateTime, NaiveDateTime, Utc}; use either::Either; use iter_enum::{DoubleEndedIterator, ExactSizeIterator, FusedIterator, Iterator}; -#[cfg(feature = "arrow")] -use raphtory_api::core::entities::properties::prop::PropArray; + use raphtory_api::core::{ entities::properties::{ - prop::{Prop, PropType}, + prop::{Prop, PropArray, PropType}, tprop::TPropOps, }, storage::arc_str::ArcStr, diff --git a/raphtory-core/src/storage/mod.rs b/raphtory-core/src/storage/mod.rs index 172e924fe6..4405c3c272 100644 --- a/raphtory-core/src/storage/mod.rs +++ b/raphtory-core/src/storage/mod.rs @@ -23,7 +23,6 @@ use serde::{Deserialize, Serialize}; use std::{borrow::Cow, collections::HashMap, fmt::Debug, sync::Arc}; use thiserror::Error; -#[cfg(feature = "arrow")] use raphtory_api::core::entities::properties::prop::PropArray; pub mod lazy_vec; @@ -280,7 +279,7 @@ impl PropColumn { ), // PropColumn::List(v) => v.append(col, mask), // PropColumn::Map(v) => v.append(col, mask), - // #[cfg(feature = "arrow")] + // // PropColumn::Array(v) => v.append(col, mask), // PropColumn::Empty(_) => {} _ => { /* ignore unsupported types for now */ } diff --git a/raphtory-storage/Cargo.toml b/raphtory-storage/Cargo.toml index cfb193040f..64e96359bb 100644 --- a/raphtory-storage/Cargo.toml +++ b/raphtory-storage/Cargo.toml @@ -26,8 +26,8 @@ itertools = { workspace = true } thiserror = { workspace = true } bigdecimal = { workspace = true, optional = true } num-traits = { workspace = true, optional = true } -arrow-array = { workspace = true, optional = true } -arrow-schema = { workspace = true, optional = true } +arrow-array = { workspace = true } +arrow-schema = { workspace = true } [dev-dependencies] proptest = { workspace = true } diff --git a/raphtory/Cargo.toml b/raphtory/Cargo.toml index d144d19737..b710a80068 100644 --- a/raphtory/Cargo.toml +++ b/raphtory/Cargo.toml @@ -51,6 +51,9 @@ roaring = { workspace = true } strsim = { workspace = true } walkdir = { workspace = true } uuid = { workspace = true } +parquet = { workspace = true } +arrow-json = { workspace = true } +arrow = { workspace = true } # io optional dependencies csv = { workspace = true, optional = true } @@ -67,10 +70,7 @@ memmap2 = { workspace = true, optional = true } prost = { workspace = true, optional = true } prost-types = { workspace = true, optional = true } -# arrow otional dependencies -parquet = { workspace = true, optional = true } -arrow-json = { workspace = true, optional = true } -arrow = { workspace = true, optional = true, features = ["chrono-tz"] } + # search optional dependencies tantivy = { workspace = true, optional = true } @@ -124,7 +124,6 @@ io = [ "dep:tempfile", "dep:zip", "kdam", - "arrow", ] # search @@ -147,7 +146,6 @@ vectors = [ # Enables generating the pyo3 python bindings python = [ "io", - "arrow", "vectors", "dep:pyo3", "dep:numpy", @@ -158,20 +156,13 @@ python = [ "raphtory-core/python", "kdam/notebook", ] -arrow = [ - "raphtory-api/arrow", - "raphtory-core/arrow", - "dep:parquet", - "dep:arrow-json", - "dep:arrow", -] + proto = [ "dep:prost", "dep:prost-types", "dep:prost-build", "dep:memmap2", - "arrow", "io", ] diff --git a/raphtory/src/db/api/properties/temporal_props.rs b/raphtory/src/db/api/properties/temporal_props.rs index d8dc79c144..cc2fea4b8a 100644 --- a/raphtory/src/db/api/properties/temporal_props.rs +++ b/raphtory/src/db/api/properties/temporal_props.rs @@ -13,8 +13,8 @@ use std::{ sync::Arc, }; -#[cfg(feature = "arrow")] -use {arrow::array::ArrayRef, raphtory_api::core::entities::properties::prop::PropArrayUnwrap}; +use arrow::array::ArrayRef; +use raphtory_api::core::entities::properties::prop::PropArrayUnwrap; #[derive(Clone)] pub struct TemporalPropertyView { @@ -298,7 +298,6 @@ impl PropUnwrap for TemporalPropertyView

{ } } -#[cfg(feature = "arrow")] impl PropArrayUnwrap for TemporalPropertyView

{ fn into_array(self) -> Option { self.latest().into_array() diff --git a/raphtory/src/db/api/view/graph.rs b/raphtory/src/db/api/view/graph.rs index ecb2bb7c11..3d519ad20f 100644 --- a/raphtory/src/db/api/view/graph.rs +++ b/raphtory/src/db/api/view/graph.rs @@ -17,8 +17,13 @@ use crate::{ node::NodeView, nodes::Nodes, views::{ - cached_view::CachedView, filter::node_type_filtered_graph::NodeTypeFilteredGraph, - node_subgraph::NodeSubgraph, valid_graph::ValidGraph, + cached_view::CachedView, + filter::{ + model::{AsEdgeFilter, AsNodeFilter}, + node_type_filtered_graph::NodeTypeFilteredGraph, + }, + node_subgraph::NodeSubgraph, + valid_graph::ValidGraph, }, }, }, diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index 95212063ff..28a0572350 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -24,12 +24,9 @@ use std::{ }; use tracing::error; -#[cfg(feature = "arrow")] -use { - arrow::{datatypes::DataType, error::ArrowError}, - parquet::errors::ParquetError, - raphtory_api::core::entities::{GidType, VID}, -}; +use arrow::{datatypes::DataType, error::ArrowError}; +use parquet::errors::ParquetError; +use raphtory_api::core::entities::{GidType, VID}; #[cfg(feature = "python")] use pyo3::PyErr; @@ -68,7 +65,6 @@ pub enum InvalidPathReason { }, } -#[cfg(feature = "arrow")] #[derive(thiserror::Error, Debug)] pub enum LoadError { #[error("Only str columns are supported for layers, got {0:?}")] @@ -133,11 +129,9 @@ pub enum GraphError { #[error("You cannot set ‘{0}’ and ‘{1}’ at the same time. Please pick one or the other.")] WrongNumOfArgs(String, String), - #[cfg(feature = "arrow")] #[error("Arrow-rs error: {0}")] ArrowRs(#[from] ArrowError), - #[cfg(feature = "arrow")] #[error("Arrow-rs parquet error: {0}")] ParquetError(#[from] ParquetError), @@ -147,7 +141,6 @@ pub enum GraphError { source: InvalidPathReason, }, - #[cfg(feature = "arrow")] #[error("{source}")] LoadError { #[from] @@ -262,11 +255,9 @@ pub enum GraphError { source: zip::result::ZipError, }, - #[cfg(feature = "arrow")] #[error("Failed to load graph: {0}")] LoadFailure(String), - #[cfg(feature = "arrow")] #[error( "Failed to load graph as the following columns are not present within the dataframe: {0}" )] diff --git a/raphtory/src/io/mod.rs b/raphtory/src/io/mod.rs index 1fd56c86e8..c5f5abd6a8 100644 --- a/raphtory/src/io/mod.rs +++ b/raphtory/src/io/mod.rs @@ -1,7 +1,6 @@ -#[cfg(feature = "arrow")] pub mod arrow; pub mod csv_loader; pub mod json_loader; pub mod neo4j_loader; -#[cfg(feature = "arrow")] + pub mod parquet_loaders; diff --git a/raphtory/src/lib.rs b/raphtory/src/lib.rs index 81620cf235..78dfdb19a9 100644 --- a/raphtory/src/lib.rs +++ b/raphtory/src/lib.rs @@ -149,7 +149,7 @@ pub mod prelude { }, }; - #[cfg(all(feature = "io", feature = "arrow"))] + #[cfg(feature = "io")] pub use crate::serialise::{ parquet::{ParquetDecoder, ParquetEncoder}, StableDecode, StableEncode, diff --git a/raphtory/src/serialise/mod.rs b/raphtory/src/serialise/mod.rs index 316d3403f0..e6a139713b 100644 --- a/raphtory/src/serialise/mod.rs +++ b/raphtory/src/serialise/mod.rs @@ -1,6 +1,6 @@ mod graph_folder; pub mod metadata; -#[cfg(feature = "arrow")] + pub(crate) mod parquet; #[cfg(feature = "proto")] diff --git a/raphtory/src/serialise/proto/ext.rs b/raphtory/src/serialise/proto/ext.rs index 062efff30d..933c49deda 100644 --- a/raphtory/src/serialise/proto/ext.rs +++ b/raphtory/src/serialise/proto/ext.rs @@ -27,7 +27,6 @@ use raphtory_api::core::{ }; use std::{borrow::Borrow, collections::HashMap, sync::Arc}; -#[cfg(feature = "arrow")] use raphtory_api::core::entities::properties::prop::PropArray; fn as_proto_prop_type(p_type: &PropType) -> Option { diff --git a/raphtory/tests/proto_test.rs b/raphtory/tests/proto_test.rs index 2f4fbddc4e..36ab0c635d 100644 --- a/raphtory/tests/proto_test.rs +++ b/raphtory/tests/proto_test.rs @@ -29,7 +29,6 @@ mod proto_test { use std::{collections::HashMap, io::Cursor, iter, path::PathBuf, sync::Arc}; use tempfile::TempDir; - #[cfg(feature = "arrow")] use arrow::array::types::{Int32Type, UInt8Type}; use raphtory::test_utils::{build_edge_list, build_graph_from_edge_list}; diff --git a/raphtory/tests/serialise_test.rs b/raphtory/tests/serialise_test.rs index d030456ab3..6d77b963e4 100644 --- a/raphtory/tests/serialise_test.rs +++ b/raphtory/tests/serialise_test.rs @@ -1,7 +1,7 @@ #[cfg(test)] #[cfg(feature = "proto")] mod serialise_test { - #[cfg(feature = "arrow")] + use arrow::{array::types::Int32Type, datatypes::UInt8Type}; use chrono::{DateTime, NaiveDateTime}; use itertools::Itertools; @@ -141,7 +141,6 @@ mod serialise_test { g1.add_edge(3, "Alice", "Bob", [("kind", "friends")], None) .unwrap(); - #[cfg(feature = "arrow")] g1.add_edge( 3, "Alice", @@ -454,10 +453,7 @@ mod serialise_test { props.push(("weight", Prop::F64(75.5))); props.push(( "children", - Prop::List(Arc::new(vec![ - Prop::Str("Bob".into()), - Prop::Str("Charlie".into()), - ])), + Prop::from(vec![Prop::Str("Bob".into()), Prop::Str("Charlie".into())]), )); props.push(( "properties", @@ -481,7 +477,6 @@ mod serialise_test { ), )); - #[cfg(feature = "arrow")] props.push(( "array", Prop::from_arr::(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), From 659c3e75c55dd0420d6c8321443b90ad925f9d3f Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 14 Nov 2025 14:29:47 +0100 Subject: [PATCH 42/47] make sure all features compile --- raphtory-benchmark/benches/search_bench.rs | 8 ++------ raphtory/src/python/packages/base_modules.rs | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/raphtory-benchmark/benches/search_bench.rs b/raphtory-benchmark/benches/search_bench.rs index 97bf7ca7b7..03567243a6 100644 --- a/raphtory-benchmark/benches/search_bench.rs +++ b/raphtory-benchmark/benches/search_bench.rs @@ -1,10 +1,6 @@ use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; use once_cell::sync::Lazy; -use rand::{ - rng, - seq::{IteratorRandom, SliceRandom}, - Rng, -}; +use rand::{prelude::IndexedRandom, rng, seq::IteratorRandom, Rng}; use raphtory::{ db::{ api::{ @@ -37,7 +33,7 @@ use std::{iter, sync::Arc, time::Instant}; static GRAPH: Lazy> = Lazy::new(|| { let data_dir = "/tmp/graphs/raph_social/rf0.1"; // TODO Fix this // let data_dir = "/tmp/graphs/raph_social/rf1.0"; - let graph = Graph::decode(data_dir).unwrap(); + let graph = Graph::decode(data_dir, None).unwrap(); println!("Nodes count = {}", graph.count_nodes()); println!("Edges count = {}", graph.count_edges()); diff --git a/raphtory/src/python/packages/base_modules.rs b/raphtory/src/python/packages/base_modules.rs index 605e4d0ea3..b9fef4e309 100644 --- a/raphtory/src/python/packages/base_modules.rs +++ b/raphtory/src/python/packages/base_modules.rs @@ -68,7 +68,7 @@ pub fn add_raphtory_classes(m: &Bound) -> PyResult<()> { ); #[cfg(feature = "search")] - add_classes!(PyIndexSpecBuilder, PyIndexSpec); + add_classes!(m, PyIndexSpecBuilder, PyIndexSpec); #[pyfunction] /// Return Raphtory version. From 3f00651d95b23c83f6d46a3e4931c4827b8cbf07 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Fri, 14 Nov 2025 17:32:43 +0100 Subject: [PATCH 43/47] get python tests running again --- .../test_index.py | 0 .../test_index_spec.py | 0 python/tox.ini | 23 +++++++++---------- 3 files changed, 11 insertions(+), 12 deletions(-) rename python/tests/{test_base_install => test_search}/test_index.py (100%) rename python/tests/{test_base_install => test_search}/test_index_spec.py (100%) diff --git a/python/tests/test_base_install/test_index.py b/python/tests/test_search/test_index.py similarity index 100% rename from python/tests/test_base_install/test_index.py rename to python/tests/test_search/test_index.py diff --git a/python/tests/test_base_install/test_index_spec.py b/python/tests/test_search/test_index_spec.py similarity index 100% rename from python/tests/test_base_install/test_index_spec.py rename to python/tests/test_search/test_index_spec.py diff --git a/python/tox.ini b/python/tox.ini index b964ca5fbd..888e1380ed 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -20,6 +20,17 @@ pass_env = [testenv:.pkg] pass_env = MATURIN_PEP517_ARGS +[testenv:search] +extras = + test +wheel_build_env = .pkg_search +commands = pytest --nbmake --nbmake-timeout=1200 {tty:--color=yes} tests/test_search + +[testenv:.pkg_search] +set_env = + MATURIN_PEP517_ARGS="--features=search,extension-module" + + [testenv:export] extras = export @@ -39,18 +50,6 @@ deps = matplotlib commands = pytest --nbmake --nbmake-timeout=1200 {tty:--color=yes} ../examples/python/socio-patterns/example.ipynb -[testenv:storage] -extras = - test -set_env = - DISK_TEST_MARK=1 -wheel_build_env = .pkg_private -commands = pytest --nbmake --nbmake-timeout=1200 {tty:--color=yes} tests - -[testenv:.pkg_private] -set_env = - MATURIN_PEP517_ARGS="--features=storage,extension-module" - [testenv:docs] deps = -r ../docs/requirements.txt From 80765b1e60b6010e2f685017cd8e1ed7c27cdc2c Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 17 Nov 2025 10:41:41 +0100 Subject: [PATCH 44/47] add location tracing for io errors --- raphtory/src/errors.rs | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index 28a0572350..f7d790b29b 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -2,10 +2,12 @@ use crate::{ core::storage::lazy_vec::IllegalSet, db::graph::views::filter::model::filter_operator::FilterOperator, prelude::GraphViewOps, }; +use arrow::{datatypes::DataType, error::ArrowError}; use itertools::Itertools; +use parquet::errors::ParquetError; use raphtory_api::core::entities::{ properties::prop::{PropError, PropType}, - GID, + GidType, GID, VID, }; use raphtory_core::{ entities::{ @@ -16,18 +18,16 @@ use raphtory_core::{ }; use raphtory_storage::mutation::MutationError; use std::{ + backtrace::Backtrace, fmt::Debug, - io, + io, panic, + panic::Location, path::{PathBuf, StripPrefixError}, sync::Arc, time::SystemTimeError, }; use tracing::error; -use arrow::{datatypes::DataType, error::ArrowError}; -use parquet::errors::ParquetError; -use raphtory_api::core::entities::{GidType, VID}; - #[cfg(feature = "python")] use pyo3::PyErr; @@ -227,10 +227,10 @@ pub enum GraphError { #[error("The loaded graph is of the wrong type. Did you mean Graph / PersistentGraph?")] GraphLoadError, - #[error("IO operation failed: {source}")] + #[error("{source} at {location}")] IOError { - #[from] source: io::Error, + location: &'static Location<'static>, }, #[error("IO operation failed: {0}")] @@ -463,3 +463,28 @@ impl From for io::Error { io::Error::other(error) } } + +impl From for GraphError { + #[track_caller] + fn from(source: io::Error) -> Self { + let location = Location::caller(); + GraphError::IOError { source, location } + } +} + +#[cfg(test)] +mod test { + use crate::errors::GraphError; + use std::io; + + #[test] + fn test_location_capture() { + fn inner() -> Result<(), GraphError> { + Err(io::Error::other(GraphError::IllegalSet("hi".to_string())))?; + Ok(()) + } + + let res = inner().err().unwrap(); + println!("{}", res); + } +} From 69fee1becdaf53c28bc5a0e82df493b81ba7145d Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Mon, 17 Nov 2025 12:31:32 +0100 Subject: [PATCH 45/47] fix pattern match for location --- raphtory/src/search/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raphtory/src/search/mod.rs b/raphtory/src/search/mod.rs index dc9c63dd13..69d810e807 100644 --- a/raphtory/src/search/mod.rs +++ b/raphtory/src/search/mod.rs @@ -364,7 +364,7 @@ mod test_index { .unwrap(); let result = graph.encode(folder); match result { - Err(GraphError::IOError { source }) => { + Err(GraphError::IOError { source, .. }) => { assert!( format!("{source}").to_lowercase().contains("file exists"), "{}", From e6fe6ce713ed49d52a30edfe461cc63bc5913451 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Tue, 18 Nov 2025 14:34:09 +0100 Subject: [PATCH 46/47] ignore and tag the broken tests --- .../src/model/graph/mutable_graph.rs | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/raphtory-graphql/src/model/graph/mutable_graph.rs b/raphtory-graphql/src/model/graph/mutable_graph.rs index 98e6cefa4e..bd801f71aa 100644 --- a/raphtory-graphql/src/model/graph/mutable_graph.rs +++ b/raphtory-graphql/src/model/graph/mutable_graph.rs @@ -25,7 +25,7 @@ pub struct BatchFailures { } fn split_failures( - results: impl IntoIterator>, + results: impl IntoIterator>, write_result: Result<(), GraphError>, ) -> (Vec, Option) { let mut succeeded = Vec::new(); @@ -131,7 +131,7 @@ impl GqlMutableGraph { fn as_properties( properties: Vec, -) -> Result, GraphError> { +) -> Result, GraphError> { let props: Result, GraphError> = properties .into_iter() .map(|p| { @@ -174,7 +174,7 @@ impl GqlMutableGraph { Ok::<_, GraphError>(node) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = node.update_embeddings().await; @@ -203,7 +203,7 @@ impl GqlMutableGraph { Ok::<_, GraphError>(node) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = node.update_embeddings().await; @@ -246,7 +246,7 @@ impl GqlMutableGraph { split_failures(nodes, Ok(())) }) - .await; + .await; self.post_mutation_ops().await; @@ -284,7 +284,7 @@ impl GqlMutableGraph { Ok::<_, GraphError>(edge) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = edge.update_embeddings().await; @@ -326,7 +326,7 @@ impl GqlMutableGraph { split_failures(edge_res, Ok(())) }) - .await; + .await; self.post_mutation_ops().await; let _ = self.graph.update_edge_embeddings(edge_pairs).await; @@ -353,7 +353,7 @@ impl GqlMutableGraph { Ok::<_, GraphError>(edge) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = edge.update_embeddings().await; @@ -378,7 +378,7 @@ impl GqlMutableGraph { .add_properties(t, as_properties(properties)?)?; Ok(true) }) - .await; + .await; self.post_mutation_ops().await; @@ -392,7 +392,7 @@ impl GqlMutableGraph { self_clone.graph.add_metadata(as_properties(properties)?)?; Ok(true) }) - .await; + .await; self.post_mutation_ops().await; @@ -408,7 +408,7 @@ impl GqlMutableGraph { .update_metadata(as_properties(properties)?)?; Ok(true) }) - .await; + .await; self.post_mutation_ops().await; @@ -486,7 +486,7 @@ impl GqlMutableNode { self_clone.node.add_metadata(as_properties(properties)?)?; Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; @@ -500,7 +500,7 @@ impl GqlMutableNode { self_clone.node.set_node_type(&new_type)?; Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; @@ -517,7 +517,7 @@ impl GqlMutableNode { Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; @@ -537,7 +537,7 @@ impl GqlMutableNode { .add_updates(time, as_properties(properties.unwrap_or(vec![]))?)?; Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = self.node.update_embeddings().await; @@ -603,7 +603,7 @@ impl GqlMutableEdge { self_clone.edge.delete(time, layer.as_str())?; Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = self.edge.update_embeddings().await; @@ -628,7 +628,7 @@ impl GqlMutableEdge { Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = self.edge.update_embeddings().await; @@ -653,7 +653,7 @@ impl GqlMutableEdge { Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = self.edge.update_embeddings().await; @@ -681,7 +681,7 @@ impl GqlMutableEdge { Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = self.edge.update_embeddings().await; @@ -778,6 +778,7 @@ mod tests { } #[tokio::test] + #[ignore = "TODO: #2384"] async fn test_add_nodes_simple() { let (mutable_graph, _tmp_dir) = create_mutable_graph().await; @@ -822,6 +823,7 @@ mod tests { } #[tokio::test] + #[ignore = "TODO: #2384"] async fn test_add_nodes_with_properties() { let (mutable_graph, _tmp_dir) = create_mutable_graph().await; @@ -893,6 +895,7 @@ mod tests { } #[tokio::test] + #[ignore = "TODO: #2384"] async fn test_add_edges_simple() { let (mutable_graph, _tmp_dir) = create_mutable_graph().await; From 9ac47d706e6aca806b9dc0dbc2789404b18934a5 Mon Sep 17 00:00:00 2001 From: Lucas Jeub Date: Tue, 18 Nov 2025 14:35:41 +0100 Subject: [PATCH 47/47] fmt --- .../src/model/graph/mutable_graph.rs | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/raphtory-graphql/src/model/graph/mutable_graph.rs b/raphtory-graphql/src/model/graph/mutable_graph.rs index bd801f71aa..27c9f7b732 100644 --- a/raphtory-graphql/src/model/graph/mutable_graph.rs +++ b/raphtory-graphql/src/model/graph/mutable_graph.rs @@ -25,7 +25,7 @@ pub struct BatchFailures { } fn split_failures( - results: impl IntoIterator>, + results: impl IntoIterator>, write_result: Result<(), GraphError>, ) -> (Vec, Option) { let mut succeeded = Vec::new(); @@ -131,7 +131,7 @@ impl GqlMutableGraph { fn as_properties( properties: Vec, -) -> Result, GraphError> { +) -> Result, GraphError> { let props: Result, GraphError> = properties .into_iter() .map(|p| { @@ -174,7 +174,7 @@ impl GqlMutableGraph { Ok::<_, GraphError>(node) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = node.update_embeddings().await; @@ -203,7 +203,7 @@ impl GqlMutableGraph { Ok::<_, GraphError>(node) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = node.update_embeddings().await; @@ -246,7 +246,7 @@ impl GqlMutableGraph { split_failures(nodes, Ok(())) }) - .await; + .await; self.post_mutation_ops().await; @@ -284,7 +284,7 @@ impl GqlMutableGraph { Ok::<_, GraphError>(edge) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = edge.update_embeddings().await; @@ -326,7 +326,7 @@ impl GqlMutableGraph { split_failures(edge_res, Ok(())) }) - .await; + .await; self.post_mutation_ops().await; let _ = self.graph.update_edge_embeddings(edge_pairs).await; @@ -353,7 +353,7 @@ impl GqlMutableGraph { Ok::<_, GraphError>(edge) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = edge.update_embeddings().await; @@ -378,7 +378,7 @@ impl GqlMutableGraph { .add_properties(t, as_properties(properties)?)?; Ok(true) }) - .await; + .await; self.post_mutation_ops().await; @@ -392,7 +392,7 @@ impl GqlMutableGraph { self_clone.graph.add_metadata(as_properties(properties)?)?; Ok(true) }) - .await; + .await; self.post_mutation_ops().await; @@ -408,7 +408,7 @@ impl GqlMutableGraph { .update_metadata(as_properties(properties)?)?; Ok(true) }) - .await; + .await; self.post_mutation_ops().await; @@ -486,7 +486,7 @@ impl GqlMutableNode { self_clone.node.add_metadata(as_properties(properties)?)?; Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; @@ -500,7 +500,7 @@ impl GqlMutableNode { self_clone.node.set_node_type(&new_type)?; Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; @@ -517,7 +517,7 @@ impl GqlMutableNode { Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; @@ -537,7 +537,7 @@ impl GqlMutableNode { .add_updates(time, as_properties(properties.unwrap_or(vec![]))?)?; Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = self.node.update_embeddings().await; @@ -603,7 +603,7 @@ impl GqlMutableEdge { self_clone.edge.delete(time, layer.as_str())?; Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = self.edge.update_embeddings().await; @@ -628,7 +628,7 @@ impl GqlMutableEdge { Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = self.edge.update_embeddings().await; @@ -653,7 +653,7 @@ impl GqlMutableEdge { Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = self.edge.update_embeddings().await; @@ -681,7 +681,7 @@ impl GqlMutableEdge { Ok::<_, GraphError>(()) }) - .await?; + .await?; self.post_mutation_ops().await; let _ = self.edge.update_embeddings().await;