Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions pandasaurus_cxg/graph_generator/graph_generator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import textwrap
import uuid
from enum import Enum
Expand Down Expand Up @@ -120,9 +121,7 @@ def generate_rdf_graph(self, merge: bool = False):
)
)

self.graph.add(
(dataset_class, URIRef(self.ns[ncname_safe(key)]), Literal(value))
)
self.graph.add((dataset_class, URIRef(self.ns[ncname_safe(key)]), Literal(value)))
has_source = URIRef(HAS_SOURCE["iri"])
self.graph.add((has_source, RDFS.label, Literal(HAS_SOURCE["label"])))

Expand Down Expand Up @@ -171,10 +170,25 @@ def generate_rdf_graph(self, merge: bool = False):
resource = self.ns[_uuid]
self.graph.add((resource, RDF.type, cell_set_class))
self.graph.add((resource, has_source, dataset_class))
# Collect author_cell_type keys here
payload = {}
for k, v in inner_dict.items():
if k in {"subcluster_of", "cluster_matches"}:
continue
self.graph.add((resource, self.ns[ncname_safe(k)], Literal(v)))
elif k in {"cell_count", "cell_type"}:
self.graph.add((resource, self.ns[k], Literal(v)))
else:
# Author annotations go into the JSON payload
payload[k] = v
# Add one JSON literal for all author annotations
if payload:
self.graph.add(
(
resource,
self.ns.author_cell_type_json,
Literal(json.dumps(payload, ensure_ascii=False)),
)
)

# add relationship between each resource based on their predicate in the co_annotation_report
subcluster = URIRef(SUBCLUSTER_OF.get("iri"))
Expand Down
4 changes: 2 additions & 2 deletions pandasaurus_cxg/graph_generator/graph_generator_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,8 @@ def ncname_safe(term: str) -> str:

"""
term = term.replace(" ", "_")
term = re.sub(r'^[^A-Za-z_]+', '', term)
return re.sub(r'[^A-Za-z0-9_\-\.]', '_', term)
term = re.sub(r"^[^A-Za-z_]+", "", term)
return re.sub(r"[^A-Za-z0-9_\-\.]", "_", term)


def parse_citation_field_into_dict(value: str) -> Dict[str, str]:
Expand Down
16 changes: 8 additions & 8 deletions test/graph_generator/test_graph_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def test_generate_rdf_graph_with_merge(graph_generator_instance_for_kidney, expe
)
== expected_stable_ids
)
assert len(graph_generator.graph) == 747
assert len(graph_generator.graph) == 584
assert (
len([[s, p, o] for s, p, o in graph_generator.graph.triples((None, RDF.type, None))]) == 146
)
Expand All @@ -216,7 +216,7 @@ def test_generate_rdf_graph_with_merge(graph_generator_instance_for_kidney, expe
)
]
)
== 90
== 77
)
assert (
len(
Expand All @@ -241,7 +241,7 @@ def test_generate_rdf_graph_with_merge(graph_generator_instance_for_kidney, expe
def test_generate_rdf_graph_without_merge(graph_generator_instance_for_kidney):
graph_generator = graph_generator_instance_for_kidney
graph_generator.generate_rdf_graph()
assert len(graph_generator.graph) == 2177
assert len(graph_generator.graph) == 1398
assert (
len([[s, p, o] for s, p, o in graph_generator.graph.triples((None, RDF.type, None))]) == 312
)
Expand Down Expand Up @@ -270,11 +270,11 @@ def test_enrich_rdf_graph_with_merge(graph_generator_instance_for_kidney):
graph_generator = graph_generator_instance_for_kidney
graph_generator.generate_rdf_graph(merge=True)

assert len(graph_generator.graph) == 747
assert len(graph_generator.graph) == 584

graph_generator.enrich_rdf_graph()

assert len(graph_generator.graph) == 1242
assert len(graph_generator.graph) == 1081
assert (
URIRef(CONSIST_OF.get("iri")),
RDFS.label,
Expand All @@ -291,19 +291,19 @@ def test_enrich_rdf_graph_with_merge(graph_generator_instance_for_kidney):
if str(s).startswith("http://purl.obolibrary.org/obo/CL_")
]
)
== 531
== 529
)


def test_enrich_rdf_graph_without_merge(graph_generator_instance_for_kidney):
graph_generator = graph_generator_instance_for_kidney
graph_generator.generate_rdf_graph()

assert len(graph_generator.graph) == 2177
assert len(graph_generator.graph) == 1398

graph_generator.enrich_rdf_graph()

assert len(graph_generator.graph) == 2674
assert len(graph_generator.graph) == 1895


def test_save_rdf_graph(graph_generator_instance_for_kidney):
Expand Down