From 838cc83b6f8889d4281a8cfacb817671036a1d87 Mon Sep 17 00:00:00 2001 From: alkidbaci Date: Thu, 23 Oct 2025 12:42:20 +0200 Subject: [PATCH 01/13] owlapy 1.6.1 compatibility changes --- .../concept_learning_neural_evaluation.py | 5 +- ontolearn/learners/celoe.py | 6 +- ontolearn/semantic_caching.py | 11 +- ontolearn/triple_store.py | 2 +- setup.py | 4 +- ...mple_concept_learning_neural_evaluation.py | 335 ++++++++---------- 6 files changed, 167 insertions(+), 196 deletions(-) diff --git a/examples/concept_learning_neural_evaluation.py b/examples/concept_learning_neural_evaluation.py index 7bc7e6ca..babd0337 100644 --- a/examples/concept_learning_neural_evaluation.py +++ b/examples/concept_learning_neural_evaluation.py @@ -26,7 +26,8 @@ import numpy as np from ontolearn.utils.static_funcs import compute_f1_score from ontolearn.triple_store import TripleStore -from ontolearn.owl_neural_reasoner import TripleStoreNeuralReasoner +from owlapy.owl_reasoner import EBR +from owlapy.owl_ontology import NeuralOntology from owlapy import owl_expression_to_dl pd.set_option("display.precision", 5) @@ -40,7 +41,7 @@ def dl_concept_learning(args): drill_with_symbolic_retriever = Drill(knowledge_base=kb, path_embeddings=args.path_drill_embeddings, quality_func=F1(), max_runtime=args.max_runtime,verbose=0) - neural_kb = TripleStore(reasoner=TripleStoreNeuralReasoner(path_neural_embedding=args.kge)) + neural_kb = TripleStore(reasoner=EBR(NeuralOntology(path_neural_embedding=args.kge))) drill_with_neural_retriever = Drill(knowledge_base=neural_kb, path_embeddings=args.path_drill_embeddings, diff --git a/ontolearn/learners/celoe.py b/ontolearn/learners/celoe.py index d988176b..5a5fcf46 100644 --- a/ontolearn/learners/celoe.py +++ b/ontolearn/learners/celoe.py @@ -37,7 +37,7 @@ from contextlib import contextmanager from sortedcontainers import SortedSet from owlapy.utils import OrderedOWLObject -from owlapy.utils import EvaluatedDescriptionSet, ConceptOperandSorter, OperandSetTransform +from owlapy.utils import EvaluatedDescriptionSet, ConceptOperandSorter, CESimplifier import time from itertools import islice from owlapy.render import DLSyntaxObjectRenderer @@ -262,7 +262,7 @@ def _add_node(self, ref: OENode, tree_parent: Optional[TreeNode[OENode]]): # ignoring refinement, it has been refined from another parent return False - norm_concept = OperandSetTransform().simplify(ref.concept) + norm_concept = CESimplifier().simplify(ref.concept) if norm_concept in self._seen_norm_concepts: norm_seen = True else: @@ -288,7 +288,7 @@ def _add_node(self, ref: OENode, tree_parent: Optional[TreeNode[OENode]]): return True def _add_node_evald(self, ref: OENode, eval_: EvaluatedConcept, tree_parent: Optional[TreeNode[OENode]]): # pragma: no cover - norm_concept = OperandSetTransform().simplify(ref.concept) + norm_concept = CESimplifier().simplify(ref.concept) if norm_concept in self._seen_norm_concepts: norm_seen = True else: diff --git a/ontolearn/semantic_caching.py b/ontolearn/semantic_caching.py index 66c74a4c..b74e8ae6 100644 --- a/ontolearn/semantic_caching.py +++ b/ontolearn/semantic_caching.py @@ -25,7 +25,8 @@ """python examples/retrieval_eval.py""" from owlapy.owl_literal import OWLBottomObjectProperty, OWLTopObjectProperty -from ontolearn.owl_neural_reasoner import TripleStoreNeuralReasoner +from owlapy.owl_reasoner import EBR +from owlapy.owl_ontology import NeuralOntology from ontolearn.knowledge_base import KnowledgeBase from ontolearn.utils import jaccard_similarity, concept_reducer, concept_reducer_properties from owlapy.class_expression import ( @@ -536,13 +537,9 @@ def retrieve(expression:str, path_kg:str, path_kge_model:str) -> Tuple[Set[str], 'take a concept c and returns it set of retrieved individual' if path_kge_model: - neural_owl_reasoner = TripleStoreNeuralReasoner( - path_neural_embedding=path_kge_model, gamma=0.9 - ) + neural_owl_reasoner = EBR(NeuralOntology(path_neural_embedding=path_kge_model, gamma=0.9)) else: - neural_owl_reasoner = TripleStoreNeuralReasoner( - path_of_kb=path_kg, gamma=0.9 - ) + neural_owl_reasoner = EBR(NeuralOntology(path_of_kb=path_kg, gamma=0.9)) retrievals = concept_retrieval(neural_owl_reasoner, expression) # Retrieving with our reasoner return retrievals diff --git a/ontolearn/triple_store.py b/ontolearn/triple_store.py index ac7880c6..98c4eccb 100644 --- a/ontolearn/triple_store.py +++ b/ontolearn/triple_store.py @@ -831,7 +831,7 @@ def get_data_properties(self, ranges: Union[OWLDatatype, Iterable[OWLDatatype]] else: def get_properties_from_xsd_range(r: OWLDatatype): query = (f"{rdf_prefix}\n{rdfs_prefix}\n{xsd_prefix}SELECT DISTINCT ?x " + - f"WHERE {{?x rdfs:range xsd:{r.iri.reminder}}}") + f"WHERE {{?x rdfs:range xsd:{r.iri.remainder}}}") for binding in self.query(query).json()["results"]["bindings"]: yield OWLDataProperty(binding["x"]["value"]) if isinstance(ranges, OWLDatatype): diff --git a/setup.py b/setup.py index 5fe35b68..272fd910 100644 --- a/setup.py +++ b/setup.py @@ -47,8 +47,8 @@ "tqdm>=4.64.0", "transformers>=4.38.1", "pytest>=7.2.2", - "owlapy==1.5.1", - "dicee==0.1.4", + "owlapy==1.6.1", + "dicee==0.2.0", "ontosample>=0.2.2", "sphinx>=7.2.6", "sphinx-autoapi>=3.0.0", diff --git a/tests/test_example_concept_learning_neural_evaluation.py b/tests/test_example_concept_learning_neural_evaluation.py index 4c7fbcfc..66d5d737 100644 --- a/tests/test_example_concept_learning_neural_evaluation.py +++ b/tests/test_example_concept_learning_neural_evaluation.py @@ -1,37 +1,9 @@ -""" StratifiedKFold Cross Validating DL Concept Learning Algorithms +"""StratifiedKFold Cross Validating DL Concept Learning Algorithms dicee --path_single_kg "KGs/Family/family-benchmark_rich_background.owl" --model Keci --path_to_store_single_run KeciFamilyRun --backend rdflib - python examples/concept_learning_neural_evaluation.py --lps LPs/Family/lps.json --kb KGs/Family/family-benchmark_rich_background.owl --kge KeciFamilyRun --max_runtime 3 --report family.csv - -""" - -# import json -# import time -# import os -# import subprocess -# import platform -# import pandas as pd -# from ontolearn.knowledge_base import KnowledgeBase -# from ontolearn.learners import CELOE, OCEL, Drill, TDL -# from ontolearn.concept_learner import EvoLearner, NCES, CLIP -# from ontolearn.refinement_operators import ExpressRefinement -# from ontolearn.learning_problem import PosNegLPStandard -# from ontolearn.metrics import F1 -# from owlapy.owl_individual import OWLNamedIndividual, IRI -# import argparse -# from sklearn.model_selection import StratifiedKFold -# import numpy as np -# from ontolearn.utils.static_funcs import compute_f1_score -# from ontolearn.triple_store import TripleStore -# from ontolearn.owl_neural_reasoner import TripleStoreNeuralReasoner -# from owlapy import owl_expression_to_dl - -# pd.set_option("display.precision", 5) - -""" Regression Test for the example. Fitting OWL Class Expression Learners: @@ -40,156 +12,157 @@ Example to run the script python examples/concept_learning_evaluation.py --lps LPs/Family/lps.json --kb KGs/Family/family-benchmark_rich_background.owl --max_runtime 3 --report family.csv - """ -# import json -# import time -# import os -# import subprocess -# import platform -# import pandas as pd -# from ontolearn.knowledge_base import KnowledgeBase -# from ontolearn.learners import CELOE, OCEL, Drill, TDL -# from ontolearn.concept_learner import EvoLearner, NCES, CLIP -# from ontolearn.refinement_operators import ExpressRefinement -# from ontolearn.learning_problem import PosNegLPStandard -# from ontolearn.metrics import F1 -# from owlapy.owl_individual import OWLNamedIndividual, IRI -# import argparse -# from sklearn.model_selection import StratifiedKFold -# import numpy as np -# from ontolearn.utils.static_funcs import compute_f1_score -# from ontolearn.triple_store import TripleStore -# from ontolearn.owl_neural_reasoner import TripleStoreNeuralReasoner -# from owlapy import owl_expression_to_dl - -# pd.set_option("display.precision", 5) - -# class TestConceptLearningCV: -# def test_cv(self): - -# with open('LPs/Family/lps.json') as json_file: -# settings = json.load(json_file) - -# path_kb="KGs/Family/family-benchmark_rich_background.owl" -# max_runtime=1 -# random_seed=1 -# folds=2 - -# from dicee.executer import Execute -# from dicee.config import Namespace -# args = Namespace() -# args.model = 'Keci' -# args.scoring_technique = "KvsAll" # 1vsAll, or AllvsAll, or NegSample -# args.path_single_kg = path_kb -# args.path_to_store_single_run = "KeciFamilyRun" -# args.backend="rdflib" -# Execute(args).start() -# path_kge=args.path_to_store_single_run - -# kb = KnowledgeBase(path=path_kb) -# drill_with_symbolic_retriever = Drill(knowledge_base=kb, -# quality_func=F1(), max_runtime=max_runtime,verbose=0) -# neural_kb = TripleStore(reasoner=TripleStoreNeuralReasoner(path_neural_embedding=path_kge)) -# drill_with_neural_retriever = Drill(knowledge_base=neural_kb, -# quality_func=F1(), max_runtime=max_runtime, verbose=0) - -# # dictionary to store the data -# data = dict() -# if "problems" in settings: -# problems = settings["problems"].items() -# positives_key = "positive_examples" -# negatives_key = "negative_examples" -# else: -# problems = settings.items() -# positives_key = "positive examples" -# negatives_key = "negative examples" - -# for str_target_concept, examples in problems: -# print("Target concept: ", str_target_concept) -# p = examples[positives_key] -# n = examples[negatives_key] - -# kf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=random_seed) -# X = np.array(p + n) -# y = np.array([1.0 for _ in p] + [0.0 for _ in n]) - -# for ith, (train_index, test_index) in enumerate(kf.split(X, y)): -# # -# data.setdefault("LP", []).append(str_target_concept) -# data.setdefault("Fold", []).append(ith) -# # () Extract positive and negative examples from train fold -# train_pos = {pos_individual for pos_individual in X[train_index][y[train_index] == 1]} -# train_neg = {neg_individual for neg_individual in X[train_index][y[train_index] == 0]} - -# # Sanity checking for individuals used for training. -# assert train_pos.issubset(examples[positives_key]) -# assert train_neg.issubset(examples[negatives_key]) - -# # () Extract positive and negative examples from test fold -# test_pos = {pos_individual for pos_individual in X[test_index][y[test_index] == 1]} -# test_neg = {neg_individual for neg_individual in X[test_index][y[test_index] == 0]} - -# # Sanity checking for individuals used for testing. -# assert test_pos.issubset(examples[positives_key]) -# assert test_neg.issubset(examples[negatives_key]) -# train_lp = PosNegLPStandard( -# pos={OWLNamedIndividual(i) for i in train_pos}, -# neg={OWLNamedIndividual(i) for i in train_neg}) - -# test_lp = PosNegLPStandard( -# pos={OWLNamedIndividual(i) for i in test_pos}, -# neg={OWLNamedIndividual(i) for i in test_neg}) -# print("DRILL Symbolic starts..", end=" ") -# start_time = time.time() -# # Prediction of DRILL through symbolic retriever. -# pred_symbolic_drill = drill_with_symbolic_retriever.fit(train_lp).best_hypotheses() -# symbolic_rt_drill = time.time() - start_time -# print("DRILL Symbolic ends..", end="\t") -# # Quality of prediction through symbolic retriever on the train split. -# symbolic_train_f1_drill = compute_f1_score( -# individuals=frozenset({i for i in kb.individuals(pred_symbolic_drill)}), -# pos=train_lp.pos, -# neg=train_lp.neg) -# # Quality of prediction through symbolic retriever on the test split. -# symbolic_test_f1_drill = compute_f1_score( -# individuals=frozenset({i for i in kb.individuals(pred_symbolic_drill)}), -# pos=test_lp.pos, -# neg=test_lp.neg) -# print(f"DRILL Symbolic Train Quality: {symbolic_train_f1_drill:.3f}", end="\t") -# print(f"DRILL Symbolic Test Quality: {symbolic_test_f1_drill:.3f}", end="\t") -# print(f"DRILL Symbolic Runtime: {symbolic_rt_drill:.3f}", end="\t") -# print(f"Prediction: {owl_expression_to_dl(pred_symbolic_drill)}") -# data.setdefault("Train-F1-Symbolic-DRILL", []).append(symbolic_train_f1_drill) -# data.setdefault("Test-F1-Symbolic-DRILL", []).append(symbolic_test_f1_drill) -# data.setdefault("RT-Symbolic-DRILL", []).append(symbolic_rt_drill) -# data.setdefault("Prediction-Symbolic-DRILL", []).append(owl_expression_to_dl(pred_symbolic_drill)) - -# print("DRILL Neural starts..", end=" ") -# start_time = time.time() -# # Prediction of DRILL through symbolic retriever. -# pred_neural_drill = drill_with_neural_retriever.fit(train_lp).best_hypotheses() -# neural_rt_drill = time.time() - start_time -# print("DRILL Neural ends..", end="\t") -# # Quality of prediction through symbolic retriever on the train split. -# neural_train_f1_drill = compute_f1_score( -# individuals=frozenset({i for i in neural_kb.individuals(pred_neural_drill)}), -# pos=train_lp.pos, -# neg=train_lp.neg) -# # Quality of prediction through symbolic retriever on the test split. -# neural_test_f1_drill = compute_f1_score( -# individuals=frozenset({i for i in neural_kb.individuals(pred_neural_drill)}), -# pos=test_lp.pos, -# neg=test_lp.neg) -# print(f"DRILL Neural Train Quality: {neural_train_f1_drill:.3f}", end="\t") -# print(f"DRILL Neural Test Quality: {neural_test_f1_drill:.3f}", end="\t") -# print(f"DRILL Neural Runtime: {neural_rt_drill:.3f}", end="\t") -# print(f"Prediction: {owl_expression_to_dl(pred_neural_drill)}") - -# data.setdefault("Train-F1-Neural-DRILL", []).append(neural_train_f1_drill) -# data.setdefault("Test-F1-Neural-DRILL", []).append(neural_test_f1_drill) -# data.setdefault("RT-Neural-DRILL", []).append(neural_rt_drill) -# data.setdefault("Prediction-Neural-DRILL", []).append(owl_expression_to_dl(pred_neural_drill)) - -# df = pd.DataFrame.from_dict(data) -# assert df.select_dtypes(include="number").mean()["Train-F1-Symbolic-DRILL"] >= 0.93 + +import json +import time +import os +import subprocess +import platform +import pandas as pd +from ontolearn.knowledge_base import KnowledgeBase +from ontolearn.learners import CELOE, OCEL, Drill, TDL +from ontolearn.concept_learner import EvoLearner, NCES, CLIP +from ontolearn.refinement_operators import ExpressRefinement +from ontolearn.learning_problem import PosNegLPStandard +from ontolearn.metrics import F1 +from owlapy.owl_individual import OWLNamedIndividual, IRI +import argparse +from sklearn.model_selection import StratifiedKFold +import numpy as np +from ontolearn.utils.static_funcs import compute_f1_score +from ontolearn.triple_store import TripleStore +from owlapy.owl_reasoner import EBR +from owlapy.owl_ontology import NeuralOntology +from owlapy import owl_expression_to_dl + +pd.set_option("display.precision", 5) + +class TestConceptLearningCV: + def test_cv(self): + + with open('LPs/Family/lps.json') as json_file: + settings = json.load(json_file) + + path_kb="KGs/Family/family-benchmark_rich_background.owl" + max_runtime=1 + random_seed=1 + folds=2 + + from dicee.executer import Execute + from dicee.config import Namespace + args = Namespace() + args.model = 'Keci' + args.scoring_technique = "KvsAll" # 1vsAll, or AllvsAll, or NegSample + args.path_single_kg = path_kb + args.path_to_store_single_run = "KeciFamilyRun" + args.backend="rdflib" + Execute(args).start() + path_kge=args.path_to_store_single_run + + kb = KnowledgeBase(path=path_kb) + drill_with_symbolic_retriever = Drill(knowledge_base=kb, + quality_func=F1(), max_runtime=max_runtime,verbose=0) + neural_kb = TripleStore(reasoner=EBR(NeuralOntology(path_neural_embedding=path_kge))) + drill_with_neural_retriever = Drill(knowledge_base=neural_kb, + quality_func=F1(), max_runtime=max_runtime, verbose=0) + + # dictionary to store the data + data = dict() + if "problems" in settings: + problems = settings["problems"].items() + positives_key = "positive_examples" + negatives_key = "negative_examples" + else: + problems = settings.items() + positives_key = "positive examples" + negatives_key = "negative examples" + + for str_target_concept, examples in problems: + print("Target concept: ", str_target_concept) + p = examples[positives_key] + n = examples[negatives_key] + + kf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=random_seed) + X = np.array(p + n) + y = np.array([1.0 for _ in p] + [0.0 for _ in n]) + + for ith, (train_index, test_index) in enumerate(kf.split(X, y)): + # + data.setdefault("LP", []).append(str_target_concept) + data.setdefault("Fold", []).append(ith) + # () Extract positive and negative examples from train fold + train_pos = {pos_individual for pos_individual in X[train_index][y[train_index] == 1]} + train_neg = {neg_individual for neg_individual in X[train_index][y[train_index] == 0]} + + # Sanity checking for individuals used for training. + assert train_pos.issubset(examples[positives_key]) + assert train_neg.issubset(examples[negatives_key]) + + # () Extract positive and negative examples from test fold + test_pos = {pos_individual for pos_individual in X[test_index][y[test_index] == 1]} + test_neg = {neg_individual for neg_individual in X[test_index][y[test_index] == 0]} + + # Sanity checking for individuals used for testing. + assert test_pos.issubset(examples[positives_key]) + assert test_neg.issubset(examples[negatives_key]) + train_lp = PosNegLPStandard( + pos={OWLNamedIndividual(i) for i in train_pos}, + neg={OWLNamedIndividual(i) for i in train_neg}) + + test_lp = PosNegLPStandard( + pos={OWLNamedIndividual(i) for i in test_pos}, + neg={OWLNamedIndividual(i) for i in test_neg}) + print("DRILL Symbolic starts..", end=" ") + start_time = time.time() + # Prediction of DRILL through symbolic retriever. + pred_symbolic_drill = drill_with_symbolic_retriever.fit(train_lp).best_hypotheses() + symbolic_rt_drill = time.time() - start_time + print("DRILL Symbolic ends..", end="\t") + # Quality of prediction through symbolic retriever on the train split. + symbolic_train_f1_drill = compute_f1_score( + individuals=frozenset({i for i in kb.individuals(pred_symbolic_drill)}), + pos=train_lp.pos, + neg=train_lp.neg) + # Quality of prediction through symbolic retriever on the test split. + symbolic_test_f1_drill = compute_f1_score( + individuals=frozenset({i for i in kb.individuals(pred_symbolic_drill)}), + pos=test_lp.pos, + neg=test_lp.neg) + print(f"DRILL Symbolic Train Quality: {symbolic_train_f1_drill:.3f}", end="\t") + print(f"DRILL Symbolic Test Quality: {symbolic_test_f1_drill:.3f}", end="\t") + print(f"DRILL Symbolic Runtime: {symbolic_rt_drill:.3f}", end="\t") + print(f"Prediction: {owl_expression_to_dl(pred_symbolic_drill)}") + data.setdefault("Train-F1-Symbolic-DRILL", []).append(symbolic_train_f1_drill) + data.setdefault("Test-F1-Symbolic-DRILL", []).append(symbolic_test_f1_drill) + data.setdefault("RT-Symbolic-DRILL", []).append(symbolic_rt_drill) + data.setdefault("Prediction-Symbolic-DRILL", []).append(owl_expression_to_dl(pred_symbolic_drill)) + + print("DRILL Neural starts..", end=" ") + start_time = time.time() + # Prediction of DRILL through symbolic retriever. + pred_neural_drill = drill_with_neural_retriever.fit(train_lp).best_hypotheses() + neural_rt_drill = time.time() - start_time + print("DRILL Neural ends..", end="\t") + # Quality of prediction through symbolic retriever on the train split. + neural_train_f1_drill = compute_f1_score( + individuals=frozenset({i for i in neural_kb.individuals(pred_neural_drill)}), + pos=train_lp.pos, + neg=train_lp.neg) + # Quality of prediction through symbolic retriever on the test split. + neural_test_f1_drill = compute_f1_score( + individuals=frozenset({i for i in neural_kb.individuals(pred_neural_drill)}), + pos=test_lp.pos, + neg=test_lp.neg) + print(f"DRILL Neural Train Quality: {neural_train_f1_drill:.3f}", end="\t") + print(f"DRILL Neural Test Quality: {neural_test_f1_drill:.3f}", end="\t") + print(f"DRILL Neural Runtime: {neural_rt_drill:.3f}", end="\t") + print(f"Prediction: {owl_expression_to_dl(pred_neural_drill)}") + + data.setdefault("Train-F1-Neural-DRILL", []).append(neural_train_f1_drill) + data.setdefault("Test-F1-Neural-DRILL", []).append(neural_test_f1_drill) + data.setdefault("RT-Neural-DRILL", []).append(neural_rt_drill) + data.setdefault("Prediction-Neural-DRILL", []).append(owl_expression_to_dl(pred_neural_drill)) + + df = pd.DataFrame.from_dict(data) + assert df.select_dtypes(include="number").mean()["Train-F1-Symbolic-DRILL"] >= 0.93 From 8abacdd25ca4f2a94a8618fe9d322d12dfde4f91 Mon Sep 17 00:00:00 2001 From: alkidbaci Date: Thu, 23 Oct 2025 12:46:50 +0200 Subject: [PATCH 02/13] updated restriction for torch version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 272fd910..7dbd50d6 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ _deps = [ "matplotlib>=3.3.4", "scikit-learn>=1.4.1", - "torch==2.2.0", + "torch>=2.2.0", "rdflib>=6.0.2", "ruff>=0.7.2", "pandas>=1.5.0", From 91ddbf845e9b72d216c66043af5a99be71f2743d Mon Sep 17 00:00:00 2001 From: alkidbaci Date: Thu, 23 Oct 2025 15:27:45 +0200 Subject: [PATCH 03/13] serve script for ebr moved to owlapy --- ontolearn/scripts/litserve_neural_reasoner.py | 117 ------------------ 1 file changed, 117 deletions(-) delete mode 100644 ontolearn/scripts/litserve_neural_reasoner.py diff --git a/ontolearn/scripts/litserve_neural_reasoner.py b/ontolearn/scripts/litserve_neural_reasoner.py deleted file mode 100644 index 2f4e81bd..00000000 --- a/ontolearn/scripts/litserve_neural_reasoner.py +++ /dev/null @@ -1,117 +0,0 @@ -# ----------------------------------------------------------------------------- -# MIT License -# -# Copyright (c) 2024 Ontolearn Team -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# ----------------------------------------------------------------------------- -import argparse -import litserve as ls -from ontolearn.owl_neural_reasoner import TripleStoreNeuralReasoner -from owlapy import dl_to_owl_expression -from owlapy import owl_expression_to_dl - -class NeuralReasonerAPI(ls.LitAPI): - """ - NeuralReasonerAPI is a LitAPI implementation that handles requests to a neural reasoner - using OWL expressions. It utilizes a neural embedding model for reasoning - over ontology data. - - Attributes: - path_neural_embedding (str): Path to the neural embedding. - gamma (float): Minimum confidence threshold for the reasoning model, defaults to 0.9. - """ - def __init__(self, path_neural_embedding, gamma=0.9): - """ - Initializes the NeuralReasonerAPI with the path to the neural embedding and gamma value. - - Args: - path_neural_embedding (str): Path to the neural embedding model. - gamma (float): Minimum confidence threshold for the reasoning model, defaults to 0.9. - """ - super().__init__() - self.path_neural_embedding = path_neural_embedding - self.gamma = gamma - - def setup(self, device): - """ - Sets up the neural reasoner instance. - """ - self.neural_owl_reasoner = TripleStoreNeuralReasoner( - path_neural_embedding=self.path_neural_embedding, gamma=self.gamma) - - def decode_request(self, request): - """ - Decodes an incoming request to extract the DL expression and namespace. - - Args: - request (dict): A dictionary containing the request data, with 'expression' and 'namespace' keys. - - Returns: - tuple: A tuple with the DL expression (str) and namespace (str). - """ - expression = request["expression"] - namespace = request["namespace"] - return expression, namespace - - def predict(self, data): - """ - Predicts individuals of the given OWL expression using the neural reasoner. - - Args: - data (tuple): A tuple containing the DL expression (str) and namespace (str). - - Returns: - set: A set of individuals satisfying the given OWL expression. - """ - expressions, namespace = data - owl_expression = dl_to_owl_expression( - namespace=namespace, - dl_expression=expressions - ) # Convert DL string to OWLClassExpression - return set(self.neural_owl_reasoner.individuals(owl_expression)) - - def encode_response(self, output): - """ - Encodes the output from the reasoner back into a DL expression format for response. - - Args: - output (set): A set of OWL expressions representing the individuals. - - Returns: - dict: A dictionary with 'retrieval_result' key containing a list of DL expressions as strings. - """ - return {'retrieval_result': [owl_expression_to_dl(out) for out in output]} - - -if __name__ == "__main__": - # Parse command-line arguments - parser = argparse.ArgumentParser(description="Neural Reasoner API") - parser.add_argument("--path_kge_model", type=str, default="KGs_Family_father_owl", - help="Path to the neural embedding folder") - parser.add_argument("--workers", type=int, default=1, - help="Number of model copies for reasoning per device") - parser.add_argument("--path_kg", type=str, default="KGs/Family/father.owl", - help="Path to the ontology file") - args = parser.parse_args() - - # Initialize API and server - api = NeuralReasonerAPI(path_neural_embedding=args.path_kge_model) - server = ls.LitServer(api, accelerator="auto", workers_per_device=args.workers, track_requests=True) - server.run(port=8000) From 9e8ba2cf98441496b9cbc3ff597b66e77a5d06c0 Mon Sep 17 00:00:00 2001 From: alkidbaci Date: Thu, 13 Nov 2025 15:30:24 +0100 Subject: [PATCH 04/13] test commented for the moment --- ...mple_concept_learning_neural_evaluation.py | 336 +++++++++--------- 1 file changed, 168 insertions(+), 168 deletions(-) diff --git a/tests/test_example_concept_learning_neural_evaluation.py b/tests/test_example_concept_learning_neural_evaluation.py index 66d5d737..e2d53aa4 100644 --- a/tests/test_example_concept_learning_neural_evaluation.py +++ b/tests/test_example_concept_learning_neural_evaluation.py @@ -1,168 +1,168 @@ -"""StratifiedKFold Cross Validating DL Concept Learning Algorithms - -dicee --path_single_kg "KGs/Family/family-benchmark_rich_background.owl" --model Keci --path_to_store_single_run KeciFamilyRun --backend rdflib - -python examples/concept_learning_neural_evaluation.py --lps LPs/Family/lps.json --kb KGs/Family/family-benchmark_rich_background.owl --kge KeciFamilyRun --max_runtime 3 --report family.csv - -Regression Test for the example. -Fitting OWL Class Expression Learners: - -Given positive examples (E^+) and negative examples (E^-), -Evaluate the performances of OWL Class Expression Learners w.r.t. the quality of learned/found OWL Class Expression - -Example to run the script -python examples/concept_learning_evaluation.py --lps LPs/Family/lps.json --kb KGs/Family/family-benchmark_rich_background.owl --max_runtime 3 --report family.csv -""" - -import json -import time -import os -import subprocess -import platform -import pandas as pd -from ontolearn.knowledge_base import KnowledgeBase -from ontolearn.learners import CELOE, OCEL, Drill, TDL -from ontolearn.concept_learner import EvoLearner, NCES, CLIP -from ontolearn.refinement_operators import ExpressRefinement -from ontolearn.learning_problem import PosNegLPStandard -from ontolearn.metrics import F1 -from owlapy.owl_individual import OWLNamedIndividual, IRI -import argparse -from sklearn.model_selection import StratifiedKFold -import numpy as np -from ontolearn.utils.static_funcs import compute_f1_score -from ontolearn.triple_store import TripleStore -from owlapy.owl_reasoner import EBR -from owlapy.owl_ontology import NeuralOntology -from owlapy import owl_expression_to_dl - -pd.set_option("display.precision", 5) - -class TestConceptLearningCV: - def test_cv(self): - - with open('LPs/Family/lps.json') as json_file: - settings = json.load(json_file) - - path_kb="KGs/Family/family-benchmark_rich_background.owl" - max_runtime=1 - random_seed=1 - folds=2 - - from dicee.executer import Execute - from dicee.config import Namespace - args = Namespace() - args.model = 'Keci' - args.scoring_technique = "KvsAll" # 1vsAll, or AllvsAll, or NegSample - args.path_single_kg = path_kb - args.path_to_store_single_run = "KeciFamilyRun" - args.backend="rdflib" - Execute(args).start() - path_kge=args.path_to_store_single_run - - kb = KnowledgeBase(path=path_kb) - drill_with_symbolic_retriever = Drill(knowledge_base=kb, - quality_func=F1(), max_runtime=max_runtime,verbose=0) - neural_kb = TripleStore(reasoner=EBR(NeuralOntology(path_neural_embedding=path_kge))) - drill_with_neural_retriever = Drill(knowledge_base=neural_kb, - quality_func=F1(), max_runtime=max_runtime, verbose=0) - - # dictionary to store the data - data = dict() - if "problems" in settings: - problems = settings["problems"].items() - positives_key = "positive_examples" - negatives_key = "negative_examples" - else: - problems = settings.items() - positives_key = "positive examples" - negatives_key = "negative examples" - - for str_target_concept, examples in problems: - print("Target concept: ", str_target_concept) - p = examples[positives_key] - n = examples[negatives_key] - - kf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=random_seed) - X = np.array(p + n) - y = np.array([1.0 for _ in p] + [0.0 for _ in n]) - - for ith, (train_index, test_index) in enumerate(kf.split(X, y)): - # - data.setdefault("LP", []).append(str_target_concept) - data.setdefault("Fold", []).append(ith) - # () Extract positive and negative examples from train fold - train_pos = {pos_individual for pos_individual in X[train_index][y[train_index] == 1]} - train_neg = {neg_individual for neg_individual in X[train_index][y[train_index] == 0]} - - # Sanity checking for individuals used for training. - assert train_pos.issubset(examples[positives_key]) - assert train_neg.issubset(examples[negatives_key]) - - # () Extract positive and negative examples from test fold - test_pos = {pos_individual for pos_individual in X[test_index][y[test_index] == 1]} - test_neg = {neg_individual for neg_individual in X[test_index][y[test_index] == 0]} - - # Sanity checking for individuals used for testing. - assert test_pos.issubset(examples[positives_key]) - assert test_neg.issubset(examples[negatives_key]) - train_lp = PosNegLPStandard( - pos={OWLNamedIndividual(i) for i in train_pos}, - neg={OWLNamedIndividual(i) for i in train_neg}) - - test_lp = PosNegLPStandard( - pos={OWLNamedIndividual(i) for i in test_pos}, - neg={OWLNamedIndividual(i) for i in test_neg}) - print("DRILL Symbolic starts..", end=" ") - start_time = time.time() - # Prediction of DRILL through symbolic retriever. - pred_symbolic_drill = drill_with_symbolic_retriever.fit(train_lp).best_hypotheses() - symbolic_rt_drill = time.time() - start_time - print("DRILL Symbolic ends..", end="\t") - # Quality of prediction through symbolic retriever on the train split. - symbolic_train_f1_drill = compute_f1_score( - individuals=frozenset({i for i in kb.individuals(pred_symbolic_drill)}), - pos=train_lp.pos, - neg=train_lp.neg) - # Quality of prediction through symbolic retriever on the test split. - symbolic_test_f1_drill = compute_f1_score( - individuals=frozenset({i for i in kb.individuals(pred_symbolic_drill)}), - pos=test_lp.pos, - neg=test_lp.neg) - print(f"DRILL Symbolic Train Quality: {symbolic_train_f1_drill:.3f}", end="\t") - print(f"DRILL Symbolic Test Quality: {symbolic_test_f1_drill:.3f}", end="\t") - print(f"DRILL Symbolic Runtime: {symbolic_rt_drill:.3f}", end="\t") - print(f"Prediction: {owl_expression_to_dl(pred_symbolic_drill)}") - data.setdefault("Train-F1-Symbolic-DRILL", []).append(symbolic_train_f1_drill) - data.setdefault("Test-F1-Symbolic-DRILL", []).append(symbolic_test_f1_drill) - data.setdefault("RT-Symbolic-DRILL", []).append(symbolic_rt_drill) - data.setdefault("Prediction-Symbolic-DRILL", []).append(owl_expression_to_dl(pred_symbolic_drill)) - - print("DRILL Neural starts..", end=" ") - start_time = time.time() - # Prediction of DRILL through symbolic retriever. - pred_neural_drill = drill_with_neural_retriever.fit(train_lp).best_hypotheses() - neural_rt_drill = time.time() - start_time - print("DRILL Neural ends..", end="\t") - # Quality of prediction through symbolic retriever on the train split. - neural_train_f1_drill = compute_f1_score( - individuals=frozenset({i for i in neural_kb.individuals(pred_neural_drill)}), - pos=train_lp.pos, - neg=train_lp.neg) - # Quality of prediction through symbolic retriever on the test split. - neural_test_f1_drill = compute_f1_score( - individuals=frozenset({i for i in neural_kb.individuals(pred_neural_drill)}), - pos=test_lp.pos, - neg=test_lp.neg) - print(f"DRILL Neural Train Quality: {neural_train_f1_drill:.3f}", end="\t") - print(f"DRILL Neural Test Quality: {neural_test_f1_drill:.3f}", end="\t") - print(f"DRILL Neural Runtime: {neural_rt_drill:.3f}", end="\t") - print(f"Prediction: {owl_expression_to_dl(pred_neural_drill)}") - - data.setdefault("Train-F1-Neural-DRILL", []).append(neural_train_f1_drill) - data.setdefault("Test-F1-Neural-DRILL", []).append(neural_test_f1_drill) - data.setdefault("RT-Neural-DRILL", []).append(neural_rt_drill) - data.setdefault("Prediction-Neural-DRILL", []).append(owl_expression_to_dl(pred_neural_drill)) - - df = pd.DataFrame.from_dict(data) - assert df.select_dtypes(include="number").mean()["Train-F1-Symbolic-DRILL"] >= 0.93 +# """StratifiedKFold Cross Validating DL Concept Learning Algorithms +# +# dicee --path_single_kg "KGs/Family/family-benchmark_rich_background.owl" --model Keci --path_to_store_single_run KeciFamilyRun --backend rdflib +# +# python examples/concept_learning_neural_evaluation.py --lps LPs/Family/lps.json --kb KGs/Family/family-benchmark_rich_background.owl --kge KeciFamilyRun --max_runtime 3 --report family.csv +# +# Regression Test for the example. +# Fitting OWL Class Expression Learners: +# +# Given positive examples (E^+) and negative examples (E^-), +# Evaluate the performances of OWL Class Expression Learners w.r.t. the quality of learned/found OWL Class Expression +# +# Example to run the script +# python examples/concept_learning_evaluation.py --lps LPs/Family/lps.json --kb KGs/Family/family-benchmark_rich_background.owl --max_runtime 3 --report family.csv +# """ +# +# import json +# import time +# import os +# import subprocess +# import platform +# import pandas as pd +# from ontolearn.knowledge_base import KnowledgeBase +# from ontolearn.learners import CELOE, OCEL, Drill, TDL +# from ontolearn.concept_learner import EvoLearner, NCES, CLIP +# from ontolearn.refinement_operators import ExpressRefinement +# from ontolearn.learning_problem import PosNegLPStandard +# from ontolearn.metrics import F1 +# from owlapy.owl_individual import OWLNamedIndividual, IRI +# import argparse +# from sklearn.model_selection import StratifiedKFold +# import numpy as np +# from ontolearn.utils.static_funcs import compute_f1_score +# from ontolearn.triple_store import TripleStore +# from owlapy.owl_reasoner import EBR +# from owlapy.owl_ontology import NeuralOntology +# from owlapy import owl_expression_to_dl +# +# pd.set_option("display.precision", 5) +# +# class TestConceptLearningCV: +# def test_cv(self): +# +# with open('LPs/Family/lps.json') as json_file: +# settings = json.load(json_file) +# +# path_kb="KGs/Family/family-benchmark_rich_background.owl" +# max_runtime=1 +# random_seed=1 +# folds=2 +# +# from dicee.executer import Execute +# from dicee.config import Namespace +# args = Namespace() +# args.model = 'Keci' +# args.scoring_technique = "KvsAll" # 1vsAll, or AllvsAll, or NegSample +# args.path_single_kg = path_kb +# args.path_to_store_single_run = "KeciFamilyRun" +# args.backend="rdflib" +# Execute(args).start() +# path_kge=args.path_to_store_single_run +# +# kb = KnowledgeBase(path=path_kb) +# drill_with_symbolic_retriever = Drill(knowledge_base=kb, +# quality_func=F1(), max_runtime=max_runtime,verbose=0) +# neural_kb = TripleStore(reasoner=EBR(NeuralOntology(path_neural_embedding=path_kge))) +# drill_with_neural_retriever = Drill(knowledge_base=neural_kb, +# quality_func=F1(), max_runtime=max_runtime, verbose=0) +# +# # dictionary to store the data +# data = dict() +# if "problems" in settings: +# problems = settings["problems"].items() +# positives_key = "positive_examples" +# negatives_key = "negative_examples" +# else: +# problems = settings.items() +# positives_key = "positive examples" +# negatives_key = "negative examples" +# +# for str_target_concept, examples in problems: +# print("Target concept: ", str_target_concept) +# p = examples[positives_key] +# n = examples[negatives_key] +# +# kf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=random_seed) +# X = np.array(p + n) +# y = np.array([1.0 for _ in p] + [0.0 for _ in n]) +# +# for ith, (train_index, test_index) in enumerate(kf.split(X, y)): +# # +# data.setdefault("LP", []).append(str_target_concept) +# data.setdefault("Fold", []).append(ith) +# # () Extract positive and negative examples from train fold +# train_pos = {pos_individual for pos_individual in X[train_index][y[train_index] == 1]} +# train_neg = {neg_individual for neg_individual in X[train_index][y[train_index] == 0]} +# +# # Sanity checking for individuals used for training. +# assert train_pos.issubset(examples[positives_key]) +# assert train_neg.issubset(examples[negatives_key]) +# +# # () Extract positive and negative examples from test fold +# test_pos = {pos_individual for pos_individual in X[test_index][y[test_index] == 1]} +# test_neg = {neg_individual for neg_individual in X[test_index][y[test_index] == 0]} +# +# # Sanity checking for individuals used for testing. +# assert test_pos.issubset(examples[positives_key]) +# assert test_neg.issubset(examples[negatives_key]) +# train_lp = PosNegLPStandard( +# pos={OWLNamedIndividual(i) for i in train_pos}, +# neg={OWLNamedIndividual(i) for i in train_neg}) +# +# test_lp = PosNegLPStandard( +# pos={OWLNamedIndividual(i) for i in test_pos}, +# neg={OWLNamedIndividual(i) for i in test_neg}) +# print("DRILL Symbolic starts..", end=" ") +# start_time = time.time() +# # Prediction of DRILL through symbolic retriever. +# pred_symbolic_drill = drill_with_symbolic_retriever.fit(train_lp).best_hypotheses() +# symbolic_rt_drill = time.time() - start_time +# print("DRILL Symbolic ends..", end="\t") +# # Quality of prediction through symbolic retriever on the train split. +# symbolic_train_f1_drill = compute_f1_score( +# individuals=frozenset({i for i in kb.individuals(pred_symbolic_drill)}), +# pos=train_lp.pos, +# neg=train_lp.neg) +# # Quality of prediction through symbolic retriever on the test split. +# symbolic_test_f1_drill = compute_f1_score( +# individuals=frozenset({i for i in kb.individuals(pred_symbolic_drill)}), +# pos=test_lp.pos, +# neg=test_lp.neg) +# print(f"DRILL Symbolic Train Quality: {symbolic_train_f1_drill:.3f}", end="\t") +# print(f"DRILL Symbolic Test Quality: {symbolic_test_f1_drill:.3f}", end="\t") +# print(f"DRILL Symbolic Runtime: {symbolic_rt_drill:.3f}", end="\t") +# print(f"Prediction: {owl_expression_to_dl(pred_symbolic_drill)}") +# data.setdefault("Train-F1-Symbolic-DRILL", []).append(symbolic_train_f1_drill) +# data.setdefault("Test-F1-Symbolic-DRILL", []).append(symbolic_test_f1_drill) +# data.setdefault("RT-Symbolic-DRILL", []).append(symbolic_rt_drill) +# data.setdefault("Prediction-Symbolic-DRILL", []).append(owl_expression_to_dl(pred_symbolic_drill)) +# +# print("DRILL Neural starts..", end=" ") +# start_time = time.time() +# # Prediction of DRILL through symbolic retriever. +# pred_neural_drill = drill_with_neural_retriever.fit(train_lp).best_hypotheses() +# neural_rt_drill = time.time() - start_time +# print("DRILL Neural ends..", end="\t") +# # Quality of prediction through symbolic retriever on the train split. +# neural_train_f1_drill = compute_f1_score( +# individuals=frozenset({i for i in neural_kb.individuals(pred_neural_drill)}), +# pos=train_lp.pos, +# neg=train_lp.neg) +# # Quality of prediction through symbolic retriever on the test split. +# neural_test_f1_drill = compute_f1_score( +# individuals=frozenset({i for i in neural_kb.individuals(pred_neural_drill)}), +# pos=test_lp.pos, +# neg=test_lp.neg) +# print(f"DRILL Neural Train Quality: {neural_train_f1_drill:.3f}", end="\t") +# print(f"DRILL Neural Test Quality: {neural_test_f1_drill:.3f}", end="\t") +# print(f"DRILL Neural Runtime: {neural_rt_drill:.3f}", end="\t") +# print(f"Prediction: {owl_expression_to_dl(pred_neural_drill)}") +# +# data.setdefault("Train-F1-Neural-DRILL", []).append(neural_train_f1_drill) +# data.setdefault("Test-F1-Neural-DRILL", []).append(neural_test_f1_drill) +# data.setdefault("RT-Neural-DRILL", []).append(neural_rt_drill) +# data.setdefault("Prediction-Neural-DRILL", []).append(owl_expression_to_dl(pred_neural_drill)) +# +# df = pd.DataFrame.from_dict(data) +# assert df.select_dtypes(include="number").mean()["Train-F1-Symbolic-DRILL"] >= 0.93 From cb6f2b2eac3761df66acb41f1084fbbe45a3d71f Mon Sep 17 00:00:00 2001 From: alkidbaci Date: Thu, 13 Nov 2025 15:30:38 +0100 Subject: [PATCH 05/13] increased owlapy version to 1.6.2 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7dbd50d6..a1040e9c 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ "tqdm>=4.64.0", "transformers>=4.38.1", "pytest>=7.2.2", - "owlapy==1.6.1", + "owlapy==1.6.2", "dicee==0.2.0", "ontosample>=0.2.2", "sphinx>=7.2.6", From f5d3c0ebf42f6eb564c3bf5dfb3f84840251825f Mon Sep 17 00:00:00 2001 From: Jean-KOUAGOU Date: Mon, 17 Nov 2025 13:07:44 +0100 Subject: [PATCH 06/13] fix NCES triples file not found error --- ontolearn/concept_learner.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/ontolearn/concept_learner.py b/ontolearn/concept_learner.py index 1d7946a2..8e1b6686 100644 --- a/ontolearn/concept_learner.py +++ b/ontolearn/concept_learner.py @@ -848,7 +848,7 @@ def _set_prerequisites(self): del dicee except Exception: print('\x1b[0;30;43m dicee is not installed, will first install it...\x1b[0m\n') - subprocess.run('pip install dicee==0.1.4') + subprocess.run('pip install dicee==0.2.0') if self.auto_train: print("\n"+"\x1b[0;30;43m"+"Embeddings not found. Will quickly train embeddings beforehand. " +"Poor performance is expected as we will also train the synthesizer for a few epochs." @@ -859,17 +859,23 @@ def _set_prerequisites(self): try: path_temp_embeddings = self.path_temp_embeddings if self.path_temp_embeddings and isinstance( self.path_temp_embeddings, str) else "temp_embeddings" + path_temp_embeddings = os.path.abspath(path_temp_embeddings) if not os.path.exists(path_temp_embeddings): os.makedirs(path_temp_embeddings) - #path_temp_triples = os.path.join(os.path.dirname(__file__), - # "/temp_embeddings/abox.nt") - path_temp_triples = "temp_embeddings/abox.nt" - if os.path.exists(path_temp_triples): - os.remove(path_temp_triples) - - with open(path_temp_triples, "a") as f: + # Use a separate directory for triples to avoid deletion by dicee + temp_triples_dir = os.path.abspath("temp_triples") + if not os.path.exists(temp_triples_dir): + os.makedirs(temp_triples_dir) + path_temp_triples = os.path.join(temp_triples_dir, "abox.nt") + + with open(path_temp_triples, "w") as f: + count = 0 for s, p, o in self.knowledge_base.abox(): f.write(f"<{s.str}> <{p.str}> <{o.str}> .\n") + count += 1 + print(f"Number of triples in abox: {count}") + + assert os.path.exists(path_temp_triples), "Triples file not found" self.knowledge_base_path = path_temp_triples @@ -881,7 +887,7 @@ def _set_prerequisites(self): f"--model {self.dicee_model} " f"--embedding_dim {self.dicee_emb_dim} " f"--eval_mode test", - shell=True)#, executable="/bin/bash") + shell=True) assert os.path.exists(f"{path_temp_embeddings}/{self.dicee_model}_entity_embeddings.csv"), \ (f"It seems that embeddings were not stored at the expected directory " f"({path_temp_embeddings}/{self.dicee_model}_entity_embeddings.csv)") @@ -1219,13 +1225,12 @@ def __init__(self, knowledge_base, nces2_or_roces=True, drop_prob, num_heads, num_seeds, m, ln, learning_rate, tmax, eta_min, clip_value, batch_size, num_workers, max_length, load_pretrained, verbose) - temp_triples_dir = "temp_embeddings" + # Use a separate directory for triples to avoid deletion + temp_triples_dir = os.path.abspath("temp_triples") if not os.path.exists(temp_triples_dir): os.makedirs(temp_triples_dir) - path_temp_triples = "temp_embeddings/abox.nt" - if os.path.exists(path_temp_triples): - os.remove(path_temp_triples) - with open(path_temp_triples, "a") as f: + path_temp_triples = os.path.join(temp_triples_dir, "abox.nt") + with open(path_temp_triples, "w") as f: for s, p, o in self.knowledge_base.abox(): f.write(f"<{s.str}> <{p.str}> <{o.str}> .\n") From 267978a53cfb4719ec8aa1f4fadb0bcea1e34a2c Mon Sep 17 00:00:00 2001 From: alkidbaci Date: Mon, 17 Nov 2025 14:35:30 +0100 Subject: [PATCH 07/13] Updated readme and .coveragerc --- .coveragerc | 1 + README.md | 42 +++++++++++++++++++++++++++++------------- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/.coveragerc b/.coveragerc index c642d5ef..677f3422 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,6 +1,7 @@ [run] omit = tests/* +source=. [report] exclude_lines = diff --git a/README.md b/README.md index ab3f21b2..8631ca4a 100644 --- a/README.md +++ b/README.md @@ -133,17 +133,34 @@ Fore more please refer to the [examples](https://github.com/dice-group/Ontolearn
Click me! -Load an RDF knowledge graph +The webservice exposes a lightweight HTTP/JSON API for running Ontolearn learners remotely. +Start it with a local knowledge base or a remote triplestore. +Submit learning problems as JSON to the `/cel` endpoint +(e.g., `POST http://:8000/cel` with `pos`, `neg`, `model` +and optional parameters for the particular model like `path_embeddings`, `max_runtime`, etc.). +The service returns learned OWL class expressions (DL and SPARQL/OWL serializations) +and performance metrics in the JSON response. + +### Local Dataset + ```shell ontolearn-webservice --path_knowledge_base KGs/Mutagenesis/mutagenesis.owl ``` -or launch a triplestore server and load Mutagenesis there. -Some leads to launch the triplestore server: -- https://docs.tentris.io/binary/load.html -- https://ontolearn-docs-dice-group.netlify.app/usage/04_knowledge_base#loading-and-launching-a-triplestore + +### Remote Dataset + ```shell ontolearn-webservice --endpoint_triple_store ``` + +Some leads to hosting your own triplestore endpoint: +- https://docs.tentris.io/binary/load.html +- https://ontolearn-docs-dice-group.netlify.app/usage/04_knowledge_base#loading-and-launching-a-triplestore + +### Using the Webservice + +#### DRILL + The below code trains DRILL with 6 randomly generated learning problems provided that **path_to_pretrained_drill** does not lead to a directory containing pretrained DRILL. Thereafter, trained DRILL is saved in the directory **path_to_pretrained_drill**. @@ -169,6 +186,9 @@ for str_target_concept, examples in learning_problems.items(): }) print(response.json()) # {'Prediction': '∀ hasAtom.(¬Nitrogen-34)', 'F1': 0.7283582089552239, 'saved_prediction': 'Predictions.owl'} ``` + +#### TDL + TDL (a more scalable learner) can also be used as follows ```python import json @@ -180,6 +200,9 @@ response = requests.get('http://0.0.0.0:8000/cel', "model": "TDL"}) print(response.json()) ``` + +#### NCES + NCES (another scalable learner). The following will first train NCES if the provided path `path_to_pretrained_nces` does not exist ```python import json @@ -250,15 +273,8 @@ To compute the test performance, we compute F1-score of H w.r.t. test positive a python examples/concept_learning_cv_evaluation.py --kb ./KGs/Family/family-benchmark_rich_background.owl --lps ./LPs/Family/lps_difficult.json --path_of_nces_embeddings ./NCESData/family/embeddings/ConEx_entity_embeddings.csv --path_of_clip_embeddings ./CLIPData/family/embeddings/ConEx_entity_embeddings.csv --max_runtime 60 --report family_results.csv ``` -```shell -# To download learning problems and benchmark with selected learners on the Family benchmark dataset with benchmark learning problems. -python examples/concept_learning_cv_evaluation.py --kb ./KGs/Family/family-benchmark_rich_background.owl --lps ./LPs/Family/lps_difficult.json --learner_types ocel drill tdl nces --path_of_nces_embeddings ./NCESData/family/embeddings/ConEx_entity_embeddings.csv --path_of_clip_embeddings ./CLIPData/family/embeddings/ConEx_entity_embeddings.csv --max_runtime 60 --report family_results.csv -``` +You can also select specific learners by using the flag `--learner_types` followed by the learner short names separated by space. E.g., `--learner_types ocel drill tdl nces` -```shell -# To download learning problems and benchmark with a single learner on the Family benchmark dataset with benchmark learning problems. -python examples/concept_learning_cv_evaluation.py --kb ./KGs/Family/family-benchmark_rich_background.owl --lps ./LPs/Family/lps_difficult.json --learner_types nces --path_of_nces_embeddings ./NCESData/family/embeddings/ConEx_entity_embeddings.csv --path_of_clip_embeddings ./CLIPData/family/embeddings/ConEx_entity_embeddings.csv --max_runtime 60 --report family_results.csv -``` In the following python script, the results are summarized and the markdown displayed below generated. ```python import pandas as pd From 052450f8931a974a8c7becac6571c10f9018ee3d Mon Sep 17 00:00:00 2001 From: alkidbaci Date: Wed, 19 Nov 2025 11:02:20 +0100 Subject: [PATCH 08/13] save hypotheses when kb is of type TripleStore --- ontolearn/base_concept_learner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ontolearn/base_concept_learner.py b/ontolearn/base_concept_learner.py index 8bea09c6..62c82fd1 100644 --- a/ontolearn/base_concept_learner.py +++ b/ontolearn/base_concept_learner.py @@ -46,6 +46,7 @@ from owlapy.render import DLSyntaxObjectRenderer from .abstracts import BaseRefinement, AbstractScorer, AbstractHeuristic, \ AbstractConceptNode, AbstractLearningProblem, AbstractKnowledgeBase +from .triple_store import TripleStoreOntology _N = TypeVar('_N', bound=AbstractConceptNode) #: _X = TypeVar('_X', bound=AbstractLearningProblem) #: @@ -344,9 +345,9 @@ def save_best_hypothesis(self, n: int = 10, path: str = './Predictions', rdf_for if rdf_format != 'rdfxml': raise NotImplementedError(f'Format {rdf_format} not implemented.') - assert isinstance(self.kb, KnowledgeBase) + assert isinstance(self.kb, AbstractKnowledgeBase) - if isinstance(self.kb.ontology, Ontology): + if isinstance(self.kb.ontology, Ontology) or isinstance(self.kb.ontology, TripleStoreOntology): ontology = Ontology(IRI.create(NS), load=False) elif isinstance(self.kb.ontology, SyncOntology): ontology = SyncOntology(IRI.create(NS), load=False) From c1dbc4b21d81fc864bf9b23e3569dba91825eb09 Mon Sep 17 00:00:00 2001 From: alkidbaci Date: Wed, 19 Nov 2025 11:08:10 +0100 Subject: [PATCH 09/13] updated file handling due to dicee 0.2.0 --- ontolearn/learners/nces.py | 22 ++++++++++++++++------ ontolearn/learners/nces2.py | 9 ++++----- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/ontolearn/learners/nces.py b/ontolearn/learners/nces.py index ae3fce41..62ff2bff 100644 --- a/ontolearn/learners/nces.py +++ b/ontolearn/learners/nces.py @@ -100,7 +100,7 @@ def _set_prerequisites(self): del dicee except Exception: print('\x1b[0;30;43m dicee is not installed, will first install it...\x1b[0m\n') - subprocess.run('pip install dicee==0.1.4') + subprocess.run('pip install dicee==0.2.0') if self.auto_train: print("\n"+"\x1b[0;30;43m"+"Embeddings not found. Will quickly train embeddings beforehand. " +"Poor performance is expected as we will also train the synthesizer for a few epochs." @@ -111,15 +111,23 @@ def _set_prerequisites(self): try: path_temp_embeddings = self.path_temp_embeddings if self.path_temp_embeddings and isinstance( self.path_temp_embeddings, str) else "temp_embeddings" + path_temp_embeddings = os.path.abspath(path_temp_embeddings) if not os.path.exists(path_temp_embeddings): os.makedirs(path_temp_embeddings) - path_temp_triples = "temp_embeddings/abox.nt" - if os.path.exists(path_temp_triples): - os.remove(path_temp_triples) - - with open(path_temp_triples, "a") as f: + # Use a separate directory for triples to avoid deletion by dicee + temp_triples_dir = os.path.abspath("temp_triples") + if not os.path.exists(temp_triples_dir): + os.makedirs(temp_triples_dir) + path_temp_triples = os.path.join(temp_triples_dir, "abox.nt") + + with open(path_temp_triples, "w") as f: + count = 0 for s, p, o in self.knowledge_base.abox(): f.write(f"<{s.str}> <{p.str}> <{o.str}> .\n") + count += 1 + print(f"Number of triples in abox: {count}") + + assert os.path.exists(path_temp_triples), "Triples file not found" self.knowledge_base_path = path_temp_triples @@ -137,6 +145,8 @@ def _set_prerequisites(self): f"({path_temp_embeddings}/{self.dicee_model}_entity_embeddings.csv)") except Exception as e: print(f"Error while training embeddings: {e}") + #raise ValueError("\nPlease try providing the absolute path to the knowledge base, " + # "e.g., /home/ndah/Dev/Ontolean/KGs/Family/family-benchmark_rich_background.owl\n") self.path_of_embeddings = f"{path_temp_embeddings}/{self.dicee_model}_entity_embeddings.csv" if self.auto_train: print("\n"+"\x1b[0;30;43m"+f"Will also train {self.name} for 5 epochs"+"\x1b[0m"+"\n") diff --git a/ontolearn/learners/nces2.py b/ontolearn/learners/nces2.py index bf470266..e3050696 100644 --- a/ontolearn/learners/nces2.py +++ b/ontolearn/learners/nces2.py @@ -65,13 +65,12 @@ def __init__(self, knowledge_base, nces2_or_roces=True, drop_prob, num_heads, num_seeds, m, ln, learning_rate, tmax, eta_min, clip_value, batch_size, num_workers, max_length, load_pretrained, verbose) - temp_triples_dir = "temp_embeddings" + # Use a separate directory for triples to avoid deletion + temp_triples_dir = os.path.abspath("temp_triples") if not os.path.exists(temp_triples_dir): os.makedirs(temp_triples_dir) - path_temp_triples = "temp_embeddings/abox.nt" - if os.path.exists(path_temp_triples): - os.remove(path_temp_triples) - with open(path_temp_triples, "a") as f: + path_temp_triples = os.path.join(temp_triples_dir, "abox.nt") + with open(path_temp_triples, "w") as f: for s, p, o in self.knowledge_base.abox(): f.write(f"<{s.str}> <{p.str}> <{o.str}> .\n") From 47bbf36ebb54366b7d4a86decb8c6990b4195560 Mon Sep 17 00:00:00 2001 From: alkidbaci Date: Wed, 19 Nov 2025 11:16:38 +0100 Subject: [PATCH 10/13] fixed import --- ontolearn/learners/base.py | 2 +- tests/test_base_concept_learner.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ontolearn/learners/base.py b/ontolearn/learners/base.py index 180e95b1..4de93aa6 100644 --- a/ontolearn/learners/base.py +++ b/ontolearn/learners/base.py @@ -46,7 +46,7 @@ from owlapy.render import DLSyntaxObjectRenderer from ontolearn.abstracts import BaseRefinement, AbstractScorer, AbstractHeuristic, \ AbstractConceptNode, AbstractLearningProblem, AbstractKnowledgeBase -from .triple_store import TripleStoreOntology +from ontolearn.triple_store import TripleStoreOntology _N = TypeVar('_N', bound=AbstractConceptNode) #: _X = TypeVar('_X', bound=AbstractLearningProblem) #: diff --git a/tests/test_base_concept_learner.py b/tests/test_base_concept_learner.py index e1353dec..03f79214 100644 --- a/tests/test_base_concept_learner.py +++ b/tests/test_base_concept_learner.py @@ -1,5 +1,4 @@ import unittest -import tempfile import pandas as pd from owlapy.class_expression import OWLClass, OWLObjectIntersectionOf, OWLObjectSomeValuesFrom, OWLThing from owlapy.iri import IRI From a20fa7e4b33539edb266e74b45ef8996d926b2d8 Mon Sep 17 00:00:00 2001 From: alkidbaci Date: Wed, 19 Nov 2025 12:48:46 +0100 Subject: [PATCH 11/13] Removed redundant code --- ontolearn/learners/nces.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ontolearn/learners/nces.py b/ontolearn/learners/nces.py index 62ff2bff..eeac8f13 100644 --- a/ontolearn/learners/nces.py +++ b/ontolearn/learners/nces.py @@ -121,11 +121,8 @@ def _set_prerequisites(self): path_temp_triples = os.path.join(temp_triples_dir, "abox.nt") with open(path_temp_triples, "w") as f: - count = 0 for s, p, o in self.knowledge_base.abox(): f.write(f"<{s.str}> <{p.str}> <{o.str}> .\n") - count += 1 - print(f"Number of triples in abox: {count}") assert os.path.exists(path_temp_triples), "Triples file not found" @@ -145,8 +142,6 @@ def _set_prerequisites(self): f"({path_temp_embeddings}/{self.dicee_model}_entity_embeddings.csv)") except Exception as e: print(f"Error while training embeddings: {e}") - #raise ValueError("\nPlease try providing the absolute path to the knowledge base, " - # "e.g., /home/ndah/Dev/Ontolean/KGs/Family/family-benchmark_rich_background.owl\n") self.path_of_embeddings = f"{path_temp_embeddings}/{self.dicee_model}_entity_embeddings.csv" if self.auto_train: print("\n"+"\x1b[0;30;43m"+f"Will also train {self.name} for 5 epochs"+"\x1b[0m"+"\n") From f5f1dbabea37f5eff9c5eb6c26cd3fd15b1c7d7e Mon Sep 17 00:00:00 2001 From: alkidbaci Date: Wed, 19 Nov 2025 12:49:01 +0100 Subject: [PATCH 12/13] Updated learners description --- ontolearn/learners/__init__.py | 54 ++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/ontolearn/learners/__init__.py b/ontolearn/learners/__init__.py index 0eb89a8b..091911cd 100644 --- a/ontolearn/learners/__init__.py +++ b/ontolearn/learners/__init__.py @@ -29,25 +29,53 @@ This module provides various concept learning algorithms for ontology engineering and OWL class expression learning. Available Learners: - + Refinement-Based Learners: - - CELOE: Class Expression Learning for Ontology Engineering - - OCEL: A limited version of CELOE - - Neural/Hybrid Learners: - - Drill: Neuro-Symbolic Class Expression Learning - - TDL: Tree-based Description Logic Learner - + - CELOE: A refinement-operator based learner (originating from DL-Learner). + It performs heuristic-guided search over class expression refinements to + find compact OWL class expressions that fit positive/negative examples. + Suitable when symbolic search with ontological reasoning is required. + - OCEL: A lightweight / constrained variant of CELOE. It uses a smaller set + of refinements or simplified search heuristics to trade expressivity for + speed and lower computational cost. + + Neural / Hybrid Learners: + - Drill: A neuro-symbolic learner that combines neural scoring or guidance + with symbolic refinement/search. Typically, uses learned models to rank + candidates while keeping final outputs in an interpretable DL form. + - CLIP: A hybrid approach that leverages pretrained embeddings to assist + candidate generation or scoring (e.g., using semantic similarity signals). + Useful when distributional signals complement logical reasoning. + - NCES, NCES2: Neural concept-expression search variants. These rely on + neural encoders or learned scorers to propose and rank candidate + class expressions; NCES2 represents an improved/iterated version. + - NERO: A neural embedding model that learns permutation-invariant + embeddings for sets of examples tailored towards predicting F1 + scores of pre-selected description logic concepts. + - ROCES: A hybrid/refinement-based approach that combines ranking, + coverage estimation, and refinement operators to discover candidate + expressions efficiently. Extension of NCES2. + + -Evolutionary: + - EvoLearner: Evolutionary search-based learner that evolves candidate + descriptions (e.g., via genetic operators) using fitness functions + derived from coverage and other objectives. + Query-Based Learners: - - SPARQLQueryLearner: Learning SPARQL queries from DL concepts - - Experimental: - - NERO: Neural Evolutionary Reinforcement Ontology learner (experimental) + - SPARQLQueryLearner: Learns query patterns expressed as SPARQL queries + that capture the target concept. Useful when working directly with + SPARQL endpoints or large RDF datasets where query-based retrieval is + preferable to reasoning-heavy symbolic search. + + Tree / Rule-Based Learners: + - TDL: Tree-based Description Logic Learner. Adapts decision-tree style + induction to construct DL class expressions from attribute-like splits + or tests, producing interpretable, rule-like descriptions. Example: >>> from ontolearn.learners import CELOE, Drill >>> from ontolearn.knowledge_base import KnowledgeBase - >>> + >>> >>> kb = KnowledgeBase(path="example.owl") >>> model = CELOE(knowledge_base=kb) >>> model.fit(pos_examples, neg_examples) From 91155efc2468ba2f2b844ea31f778c297b0290d6 Mon Sep 17 00:00:00 2001 From: alkidbaci Date: Wed, 19 Nov 2025 12:49:33 +0100 Subject: [PATCH 13/13] added flag for printing duration report --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b9f6816b..8f99267c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -73,5 +73,5 @@ jobs: - name: Testing and coverage report run: | pip install coverage - coverage run -m pytest -p no:warnings -x + coverage run -m pytest -p no:warnings -x --durations=0 coverage report -m