Skip to content

Commit 969e405

Browse files
authored
redesign, refactor property filters (#2311)
* redesign, refactor property filters * takes a reference * rework validation * fix arbitrary list, fix tests * merge python traits, fix tests, add more validations * rework gql filtering apis, fix tests * add review suggestions * Features/gql apis (#2350) * impl nodes select filtering in gql * change semantics of filters in gql, add missing filter apis in edges, fix all tests * add more edge filter tests * add filtering to path from node, add tests * fix apply views * rename filter-iter to select * add select as args * impl window filter (#2359) * impl window filter * impl window filter in python, add tests * impl gql window filter, add tests * ref * impl review suggestions * fixes * fix py and gql * add review suggestions
1 parent c210fed commit 969e405

File tree

60 files changed

+6273
-5808
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+6273
-5808
lines changed

python/python/raphtory/__init__.pyi

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ __all__ = [
4949
"IndexSpec",
5050
"Prop",
5151
"version",
52-
"DiskGraphStorage",
5352
"graphql",
5453
"algorithms",
5554
"graph_loader",
@@ -1376,17 +1375,6 @@ class Graph(GraphView):
13761375
MutableNode: The node object with the specified id, or None if the node does not exist
13771376
"""
13781377

1379-
def persist_as_disk_graph(self, graph_dir: str | PathLike) -> DiskGraphStorage:
1380-
"""
1381-
save graph in disk_graph format and memory map the result
1382-
1383-
Arguments:
1384-
graph_dir (str | PathLike): folder where the graph will be saved
1385-
1386-
Returns:
1387-
DiskGraphStorage: the persisted graph storage
1388-
"""
1389-
13901378
def persistent_graph(self) -> PersistentGraph:
13911379
"""
13921380
View graph with persistent semantics
@@ -1424,17 +1412,6 @@ class Graph(GraphView):
14241412
bytes:
14251413
"""
14261414

1427-
def to_disk_graph(self, graph_dir: str | PathLike) -> Graph:
1428-
"""
1429-
Persist graph on disk
1430-
1431-
Arguments:
1432-
graph_dir (str | PathLike): the folder where the graph will be persisted
1433-
1434-
Returns:
1435-
Graph: a view of the persisted graph
1436-
"""
1437-
14381415
def to_parquet(self, graph_dir: str | PathLike):
14391416
"""
14401417
Persist graph to parquet files
@@ -2209,7 +2186,6 @@ class PersistentGraph(GraphView):
22092186
bytes:
22102187
"""
22112188

2212-
def to_disk_graph(self, graph_dir): ...
22132189
def update_metadata(self, metadata: dict) -> None:
22142190
"""
22152191
Updates metadata of the graph.
@@ -6142,35 +6118,3 @@ class Prop(object):
61426118
def u8(value): ...
61436119

61446120
def version(): ...
6145-
6146-
class DiskGraphStorage(object):
6147-
def __repr__(self):
6148-
"""Return repr(self)."""
6149-
6150-
def append_node_temporal_properties(self, location, chunk_size=20000000): ...
6151-
def graph_dir(self): ...
6152-
@staticmethod
6153-
def load_from_dir(graph_dir): ...
6154-
@staticmethod
6155-
def load_from_pandas(graph_dir, edge_df, time_col, src_col, dst_col): ...
6156-
@staticmethod
6157-
def load_from_parquets(
6158-
graph_dir,
6159-
layer_parquet_cols,
6160-
node_properties=None,
6161-
chunk_size=10000000,
6162-
t_props_chunk_size=10000000,
6163-
num_threads=4,
6164-
node_type_col=None,
6165-
node_id_col=None,
6166-
): ...
6167-
def load_node_metadata(self, location, col_names=None, chunk_size=None): ...
6168-
def load_node_types(self, location, col_name, chunk_size=None): ...
6169-
def merge_by_sorted_gids(self, other, graph_dir):
6170-
"""
6171-
Merge this graph with another `DiskGraph`. Note that both graphs should have nodes that are
6172-
sorted by their global ids or the resulting graph will be nonsense!
6173-
"""
6174-
6175-
def to_events(self): ...
6176-
def to_persistent(self): ...

python/python/raphtory/algorithms/__init__.pyi

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ __all__ = [
7070
"max_weight_matching",
7171
"Matching",
7272
"Infected",
73-
"connected_components",
7473
]
7574

7675
def dijkstra_single_source_shortest_paths(
@@ -894,5 +893,3 @@ class Infected(object):
894893
Returns:
895894
int:
896895
"""
897-
898-
def connected_components(graph): ...

python/python/raphtory/filter/__init__.pyi

Lines changed: 39 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@ __all__ = [
3030
"EdgeEndpoint",
3131
"Edge",
3232
"ExplodedEdge",
33-
"Property",
34-
"Metadata",
35-
"TemporalPropertyFilterBuilder",
33+
"FilterOps",
34+
"PropertyFilterOps",
35+
"NodeWindow",
36+
"EdgeWindow",
37+
"ExplodedEdgeWindow",
3638
]
3739

3840
class FilterExpr(object):
@@ -51,41 +53,8 @@ class FilterExpr(object):
5153
def __ror__(self, value):
5254
"""Return value|self."""
5355

54-
class PropertyFilterOps(object):
55-
def __eq__(self, value):
56-
"""Return self==value."""
57-
58-
def __ge__(self, value):
59-
"""Return self>=value."""
60-
61-
def __gt__(self, value):
62-
"""Return self>value."""
63-
64-
def __le__(self, value):
65-
"""Return self<=value."""
66-
67-
def __lt__(self, value):
68-
"""Return self<value."""
69-
70-
def __ne__(self, value):
71-
"""Return self!=value."""
72-
73-
def all(self): ...
74-
def any(self): ...
75-
def avg(self): ...
76-
def contains(self, value): ...
77-
def ends_with(self, value): ...
78-
def fuzzy_search(self, prop_value, levenshtein_distance, prefix_match): ...
79-
def is_in(self, values): ...
80-
def is_none(self): ...
81-
def is_not_in(self, values): ...
82-
def is_some(self): ...
83-
def len(self): ...
84-
def max(self): ...
85-
def min(self): ...
86-
def not_contains(self, value): ...
87-
def starts_with(self, value): ...
88-
def sum(self): ...
56+
class PropertyFilterOps(FilterOps):
57+
def temporal(self): ...
8958

9059
class Node(object):
9160
@staticmethod
@@ -119,6 +88,8 @@ class Node(object):
11988

12089
@staticmethod
12190
def property(name): ...
91+
@staticmethod
92+
def window(py_start, py_end): ...
12293

12394
class EdgeFilterOp(object):
12495
def __eq__(self, value):
@@ -160,32 +131,18 @@ class Edge(object):
160131
def property(name): ...
161132
@staticmethod
162133
def src(): ...
134+
@staticmethod
135+
def window(py_start, py_end): ...
163136

164137
class ExplodedEdge(object):
165138
@staticmethod
166139
def metadata(name): ...
167140
@staticmethod
168141
def property(name): ...
142+
@staticmethod
143+
def window(py_start, py_end): ...
169144

170-
class Property(PropertyFilterOps):
171-
"""
172-
Construct a property filter
173-
174-
Arguments:
175-
name (str): the name of the property to filter
176-
"""
177-
178-
def temporal(self): ...
179-
180-
class Metadata(PropertyFilterOps):
181-
"""
182-
Construct a metadata filter
183-
184-
Arguments:
185-
name (str): the name of the property to filter
186-
"""
187-
188-
class TemporalPropertyFilterBuilder(object):
145+
class FilterOps(object):
189146
def __eq__(self, value):
190147
"""Return self==value."""
191148

@@ -207,9 +164,33 @@ class TemporalPropertyFilterBuilder(object):
207164
def all(self): ...
208165
def any(self): ...
209166
def avg(self): ...
167+
def contains(self, value): ...
168+
def ends_with(self, value): ...
210169
def first(self): ...
211-
def latest(self): ...
170+
def fuzzy_search(self, prop_value, levenshtein_distance, prefix_match): ...
171+
def is_in(self, values): ...
172+
def is_none(self): ...
173+
def is_not_in(self, values): ...
174+
def is_some(self): ...
175+
def last(self): ...
212176
def len(self): ...
213177
def max(self): ...
214178
def min(self): ...
179+
def not_contains(self, value): ...
180+
def starts_with(self, value): ...
215181
def sum(self): ...
182+
183+
class PropertyFilterOps(FilterOps):
184+
def temporal(self): ...
185+
186+
class NodeWindow(object):
187+
def metadata(self, name): ...
188+
def property(self, name): ...
189+
190+
class EdgeWindow(object):
191+
def metadata(self, name): ...
192+
def property(self, name): ...
193+
194+
class ExplodedEdgeWindow(object):
195+
def metadata(self, name): ...
196+
def property(self, name): ...

python/tests/test_base_install/test_filters/semantics/test_edge_property_filter_semantics.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,9 @@ def check(graph):
9696

9797

9898
@with_disk_variants(init_edges_graph, variants=["graph", "event_disk_graph"])
99-
def test_temporal_latest_semantics():
99+
def test_temporal_last_semantics():
100100
def check(graph):
101-
filter_expr = filter.Edge.property("p1").temporal().latest() == 1
101+
filter_expr = filter.Edge.property("p1").temporal().last() == 1
102102
result_ids = sorted(graph.filter(filter_expr).edges.id)
103103
expected_ids = sorted(
104104
[("N1", "N2"), ("N3", "N4"), ("N4", "N5"), ("N6", "N7"), ("N7", "N8")]
@@ -112,9 +112,9 @@ def check(graph):
112112
init_fn=combined([init_edges_graph, init_graph_for_secondary_indexes]),
113113
variants=["graph", "event_disk_graph"],
114114
)
115-
def test_temporal_latest_semantics_for_secondary_indexes3():
115+
def test_temporal_last_semantics_for_secondary_indexes3():
116116
def check(graph):
117-
filter_expr = filter.Edge.property("p1").temporal().latest() == 1
117+
filter_expr = filter.Edge.property("p1").temporal().last() == 1
118118
result_ids = sorted(graph.filter(filter_expr).edges.id)
119119
expected_ids = sorted(
120120
[

python/tests/test_base_install/test_filters/semantics/test_node_property_filter_semantics.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ def check(graph):
5656

5757

5858
@with_disk_variants(init_nodes_graph)
59-
def test_temporal_latest_semantics():
59+
def test_temporal_last_semantics():
6060
def check(graph):
61-
filter_expr = filter.Node.property("p1").temporal().latest() == 1
61+
filter_expr = filter.Node.property("p1").temporal().last() == 1
6262
result_ids = sorted(graph.filter(filter_expr).nodes.id)
6363
expected_ids = sorted(["N1", "N3", "N4", "N6", "N7"])
6464
assert result_ids == expected_ids
@@ -69,9 +69,9 @@ def check(graph):
6969
@with_disk_variants(
7070
init_fn=combined([init_nodes_graph, init_graph_for_secondary_indexes]),
7171
)
72-
def test_temporal_latest_semantics_for_secondary_indexes():
72+
def test_temporal_last_semantics_for_secondary_indexes():
7373
def check(graph):
74-
filter_expr = filter.Node.property("p1").temporal().latest() == 1
74+
filter_expr = filter.Node.property("p1").temporal().last() == 1
7575
result_ids = sorted(graph.filter(filter_expr).nodes.id)
7676
expected_ids = sorted(["N1", "N16", "N3", "N4", "N6", "N7"])
7777
assert result_ids == expected_ids

0 commit comments

Comments
 (0)