diff --git a/.coveragerc b/.coveragerc index c642d5ef..677f3422 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,6 +1,7 @@ [run] omit = tests/* +source=. [report] exclude_lines = diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b9f6816b..8f99267c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -73,5 +73,5 @@ jobs: - name: Testing and coverage report run: | pip install coverage - coverage run -m pytest -p no:warnings -x + coverage run -m pytest -p no:warnings -x --durations=0 coverage report -m diff --git a/README.md b/README.md index ab3f21b2..8631ca4a 100644 --- a/README.md +++ b/README.md @@ -133,17 +133,34 @@ Fore more please refer to the [examples](https://github.com/dice-group/Ontolearn
Click me! -Load an RDF knowledge graph +The webservice exposes a lightweight HTTP/JSON API for running Ontolearn learners remotely. +Start it with a local knowledge base or a remote triplestore. +Submit learning problems as JSON to the `/cel` endpoint +(e.g., `POST http://:8000/cel` with `pos`, `neg`, `model` +and optional parameters for the particular model like `path_embeddings`, `max_runtime`, etc.). +The service returns learned OWL class expressions (DL and SPARQL/OWL serializations) +and performance metrics in the JSON response. + +### Local Dataset + ```shell ontolearn-webservice --path_knowledge_base KGs/Mutagenesis/mutagenesis.owl ``` -or launch a triplestore server and load Mutagenesis there. -Some leads to launch the triplestore server: -- https://docs.tentris.io/binary/load.html -- https://ontolearn-docs-dice-group.netlify.app/usage/04_knowledge_base#loading-and-launching-a-triplestore + +### Remote Dataset + ```shell ontolearn-webservice --endpoint_triple_store ``` + +Some leads to hosting your own triplestore endpoint: +- https://docs.tentris.io/binary/load.html +- https://ontolearn-docs-dice-group.netlify.app/usage/04_knowledge_base#loading-and-launching-a-triplestore + +### Using the Webservice + +#### DRILL + The below code trains DRILL with 6 randomly generated learning problems provided that **path_to_pretrained_drill** does not lead to a directory containing pretrained DRILL. Thereafter, trained DRILL is saved in the directory **path_to_pretrained_drill**. @@ -169,6 +186,9 @@ for str_target_concept, examples in learning_problems.items(): }) print(response.json()) # {'Prediction': '∀ hasAtom.(¬Nitrogen-34)', 'F1': 0.7283582089552239, 'saved_prediction': 'Predictions.owl'} ``` + +#### TDL + TDL (a more scalable learner) can also be used as follows ```python import json @@ -180,6 +200,9 @@ response = requests.get('http://0.0.0.0:8000/cel', "model": "TDL"}) print(response.json()) ``` + +#### NCES + NCES (another scalable learner). The following will first train NCES if the provided path `path_to_pretrained_nces` does not exist ```python import json @@ -250,15 +273,8 @@ To compute the test performance, we compute F1-score of H w.r.t. test positive a python examples/concept_learning_cv_evaluation.py --kb ./KGs/Family/family-benchmark_rich_background.owl --lps ./LPs/Family/lps_difficult.json --path_of_nces_embeddings ./NCESData/family/embeddings/ConEx_entity_embeddings.csv --path_of_clip_embeddings ./CLIPData/family/embeddings/ConEx_entity_embeddings.csv --max_runtime 60 --report family_results.csv ``` -```shell -# To download learning problems and benchmark with selected learners on the Family benchmark dataset with benchmark learning problems. -python examples/concept_learning_cv_evaluation.py --kb ./KGs/Family/family-benchmark_rich_background.owl --lps ./LPs/Family/lps_difficult.json --learner_types ocel drill tdl nces --path_of_nces_embeddings ./NCESData/family/embeddings/ConEx_entity_embeddings.csv --path_of_clip_embeddings ./CLIPData/family/embeddings/ConEx_entity_embeddings.csv --max_runtime 60 --report family_results.csv -``` +You can also select specific learners by using the flag `--learner_types` followed by the learner short names separated by space. E.g., `--learner_types ocel drill tdl nces` -```shell -# To download learning problems and benchmark with a single learner on the Family benchmark dataset with benchmark learning problems. -python examples/concept_learning_cv_evaluation.py --kb ./KGs/Family/family-benchmark_rich_background.owl --lps ./LPs/Family/lps_difficult.json --learner_types nces --path_of_nces_embeddings ./NCESData/family/embeddings/ConEx_entity_embeddings.csv --path_of_clip_embeddings ./CLIPData/family/embeddings/ConEx_entity_embeddings.csv --max_runtime 60 --report family_results.csv -``` In the following python script, the results are summarized and the markdown displayed below generated. ```python import pandas as pd diff --git a/examples/concept_learning_neural_evaluation.py b/examples/concept_learning_neural_evaluation.py index a68fd4e9..971f2750 100644 --- a/examples/concept_learning_neural_evaluation.py +++ b/examples/concept_learning_neural_evaluation.py @@ -26,7 +26,8 @@ import numpy as np from ontolearn.utils.static_funcs import compute_f1_score from ontolearn.triple_store import TripleStore -from ontolearn.owl_neural_reasoner import TripleStoreNeuralReasoner +from owlapy.owl_reasoner import EBR +from owlapy.owl_ontology import NeuralOntology from owlapy import owl_expression_to_dl pd.set_option("display.precision", 5) @@ -40,7 +41,7 @@ def dl_concept_learning(args): drill_with_symbolic_retriever = Drill(knowledge_base=kb, path_embeddings=args.path_drill_embeddings, quality_func=F1(), max_runtime=args.max_runtime,verbose=0) - neural_kb = TripleStore(reasoner=TripleStoreNeuralReasoner(path_neural_embedding=args.kge)) + neural_kb = TripleStore(reasoner=EBR(NeuralOntology(path_neural_embedding=args.kge))) drill_with_neural_retriever = Drill(knowledge_base=neural_kb, path_embeddings=args.path_drill_embeddings, diff --git a/ontolearn/learners/__init__.py b/ontolearn/learners/__init__.py index 0eb89a8b..091911cd 100644 --- a/ontolearn/learners/__init__.py +++ b/ontolearn/learners/__init__.py @@ -29,25 +29,53 @@ This module provides various concept learning algorithms for ontology engineering and OWL class expression learning. Available Learners: - + Refinement-Based Learners: - - CELOE: Class Expression Learning for Ontology Engineering - - OCEL: A limited version of CELOE - - Neural/Hybrid Learners: - - Drill: Neuro-Symbolic Class Expression Learning - - TDL: Tree-based Description Logic Learner - + - CELOE: A refinement-operator based learner (originating from DL-Learner). + It performs heuristic-guided search over class expression refinements to + find compact OWL class expressions that fit positive/negative examples. + Suitable when symbolic search with ontological reasoning is required. + - OCEL: A lightweight / constrained variant of CELOE. It uses a smaller set + of refinements or simplified search heuristics to trade expressivity for + speed and lower computational cost. + + Neural / Hybrid Learners: + - Drill: A neuro-symbolic learner that combines neural scoring or guidance + with symbolic refinement/search. Typically, uses learned models to rank + candidates while keeping final outputs in an interpretable DL form. + - CLIP: A hybrid approach that leverages pretrained embeddings to assist + candidate generation or scoring (e.g., using semantic similarity signals). + Useful when distributional signals complement logical reasoning. + - NCES, NCES2: Neural concept-expression search variants. These rely on + neural encoders or learned scorers to propose and rank candidate + class expressions; NCES2 represents an improved/iterated version. + - NERO: A neural embedding model that learns permutation-invariant + embeddings for sets of examples tailored towards predicting F1 + scores of pre-selected description logic concepts. + - ROCES: A hybrid/refinement-based approach that combines ranking, + coverage estimation, and refinement operators to discover candidate + expressions efficiently. Extension of NCES2. + + -Evolutionary: + - EvoLearner: Evolutionary search-based learner that evolves candidate + descriptions (e.g., via genetic operators) using fitness functions + derived from coverage and other objectives. + Query-Based Learners: - - SPARQLQueryLearner: Learning SPARQL queries from DL concepts - - Experimental: - - NERO: Neural Evolutionary Reinforcement Ontology learner (experimental) + - SPARQLQueryLearner: Learns query patterns expressed as SPARQL queries + that capture the target concept. Useful when working directly with + SPARQL endpoints or large RDF datasets where query-based retrieval is + preferable to reasoning-heavy symbolic search. + + Tree / Rule-Based Learners: + - TDL: Tree-based Description Logic Learner. Adapts decision-tree style + induction to construct DL class expressions from attribute-like splits + or tests, producing interpretable, rule-like descriptions. Example: >>> from ontolearn.learners import CELOE, Drill >>> from ontolearn.knowledge_base import KnowledgeBase - >>> + >>> >>> kb = KnowledgeBase(path="example.owl") >>> model = CELOE(knowledge_base=kb) >>> model.fit(pos_examples, neg_examples) diff --git a/ontolearn/learners/base.py b/ontolearn/learners/base.py index 49664007..4de93aa6 100644 --- a/ontolearn/learners/base.py +++ b/ontolearn/learners/base.py @@ -46,6 +46,7 @@ from owlapy.render import DLSyntaxObjectRenderer from ontolearn.abstracts import BaseRefinement, AbstractScorer, AbstractHeuristic, \ AbstractConceptNode, AbstractLearningProblem, AbstractKnowledgeBase +from ontolearn.triple_store import TripleStoreOntology _N = TypeVar('_N', bound=AbstractConceptNode) #: _X = TypeVar('_X', bound=AbstractLearningProblem) #: @@ -343,9 +344,9 @@ def save_best_hypothesis(self, n: int = 10, path: str = './Predictions', rdf_for if rdf_format != 'rdfxml': raise NotImplementedError(f'Format {rdf_format} not implemented.') - assert isinstance(self.kb, KnowledgeBase) + assert isinstance(self.kb, AbstractKnowledgeBase) - if isinstance(self.kb.ontology, Ontology): + if isinstance(self.kb.ontology, Ontology) or isinstance(self.kb.ontology, TripleStoreOntology): ontology = Ontology(IRI.create(NS), load=False) elif isinstance(self.kb.ontology, SyncOntology): ontology = SyncOntology(IRI.create(NS), load=False) diff --git a/ontolearn/learners/celoe.py b/ontolearn/learners/celoe.py index 8d10ac98..d7cd6c26 100644 --- a/ontolearn/learners/celoe.py +++ b/ontolearn/learners/celoe.py @@ -37,7 +37,7 @@ from contextlib import contextmanager from sortedcontainers import SortedSet from owlapy.utils import OrderedOWLObject -from owlapy.utils import EvaluatedDescriptionSet, ConceptOperandSorter, OperandSetTransform +from owlapy.utils import EvaluatedDescriptionSet, ConceptOperandSorter, CESimplifier import time from itertools import islice from owlapy.render import DLSyntaxObjectRenderer @@ -262,7 +262,7 @@ def _add_node(self, ref: OENode, tree_parent: Optional[TreeNode[OENode]]): # ignoring refinement, it has been refined from another parent return False - norm_concept = OperandSetTransform().simplify(ref.concept) + norm_concept = CESimplifier().simplify(ref.concept) if norm_concept in self._seen_norm_concepts: norm_seen = True else: @@ -288,7 +288,7 @@ def _add_node(self, ref: OENode, tree_parent: Optional[TreeNode[OENode]]): return True def _add_node_evald(self, ref: OENode, eval_: EvaluatedConcept, tree_parent: Optional[TreeNode[OENode]]): # pragma: no cover - norm_concept = OperandSetTransform().simplify(ref.concept) + norm_concept = CESimplifier().simplify(ref.concept) if norm_concept in self._seen_norm_concepts: norm_seen = True else: diff --git a/ontolearn/learners/nces.py b/ontolearn/learners/nces.py index ae3fce41..eeac8f13 100644 --- a/ontolearn/learners/nces.py +++ b/ontolearn/learners/nces.py @@ -100,7 +100,7 @@ def _set_prerequisites(self): del dicee except Exception: print('\x1b[0;30;43m dicee is not installed, will first install it...\x1b[0m\n') - subprocess.run('pip install dicee==0.1.4') + subprocess.run('pip install dicee==0.2.0') if self.auto_train: print("\n"+"\x1b[0;30;43m"+"Embeddings not found. Will quickly train embeddings beforehand. " +"Poor performance is expected as we will also train the synthesizer for a few epochs." @@ -111,16 +111,21 @@ def _set_prerequisites(self): try: path_temp_embeddings = self.path_temp_embeddings if self.path_temp_embeddings and isinstance( self.path_temp_embeddings, str) else "temp_embeddings" + path_temp_embeddings = os.path.abspath(path_temp_embeddings) if not os.path.exists(path_temp_embeddings): os.makedirs(path_temp_embeddings) - path_temp_triples = "temp_embeddings/abox.nt" - if os.path.exists(path_temp_triples): - os.remove(path_temp_triples) + # Use a separate directory for triples to avoid deletion by dicee + temp_triples_dir = os.path.abspath("temp_triples") + if not os.path.exists(temp_triples_dir): + os.makedirs(temp_triples_dir) + path_temp_triples = os.path.join(temp_triples_dir, "abox.nt") - with open(path_temp_triples, "a") as f: + with open(path_temp_triples, "w") as f: for s, p, o in self.knowledge_base.abox(): f.write(f"<{s.str}> <{p.str}> <{o.str}> .\n") + assert os.path.exists(path_temp_triples), "Triples file not found" + self.knowledge_base_path = path_temp_triples subprocess.run(f"dicee --path_single_kg {self.knowledge_base_path} " diff --git a/ontolearn/learners/nces2.py b/ontolearn/learners/nces2.py index bf470266..e3050696 100644 --- a/ontolearn/learners/nces2.py +++ b/ontolearn/learners/nces2.py @@ -65,13 +65,12 @@ def __init__(self, knowledge_base, nces2_or_roces=True, drop_prob, num_heads, num_seeds, m, ln, learning_rate, tmax, eta_min, clip_value, batch_size, num_workers, max_length, load_pretrained, verbose) - temp_triples_dir = "temp_embeddings" + # Use a separate directory for triples to avoid deletion + temp_triples_dir = os.path.abspath("temp_triples") if not os.path.exists(temp_triples_dir): os.makedirs(temp_triples_dir) - path_temp_triples = "temp_embeddings/abox.nt" - if os.path.exists(path_temp_triples): - os.remove(path_temp_triples) - with open(path_temp_triples, "a") as f: + path_temp_triples = os.path.join(temp_triples_dir, "abox.nt") + with open(path_temp_triples, "w") as f: for s, p, o in self.knowledge_base.abox(): f.write(f"<{s.str}> <{p.str}> <{o.str}> .\n") diff --git a/ontolearn/scripts/litserve_neural_reasoner.py b/ontolearn/scripts/litserve_neural_reasoner.py deleted file mode 100644 index 2f4e81bd..00000000 --- a/ontolearn/scripts/litserve_neural_reasoner.py +++ /dev/null @@ -1,117 +0,0 @@ -# ----------------------------------------------------------------------------- -# MIT License -# -# Copyright (c) 2024 Ontolearn Team -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# ----------------------------------------------------------------------------- -import argparse -import litserve as ls -from ontolearn.owl_neural_reasoner import TripleStoreNeuralReasoner -from owlapy import dl_to_owl_expression -from owlapy import owl_expression_to_dl - -class NeuralReasonerAPI(ls.LitAPI): - """ - NeuralReasonerAPI is a LitAPI implementation that handles requests to a neural reasoner - using OWL expressions. It utilizes a neural embedding model for reasoning - over ontology data. - - Attributes: - path_neural_embedding (str): Path to the neural embedding. - gamma (float): Minimum confidence threshold for the reasoning model, defaults to 0.9. - """ - def __init__(self, path_neural_embedding, gamma=0.9): - """ - Initializes the NeuralReasonerAPI with the path to the neural embedding and gamma value. - - Args: - path_neural_embedding (str): Path to the neural embedding model. - gamma (float): Minimum confidence threshold for the reasoning model, defaults to 0.9. - """ - super().__init__() - self.path_neural_embedding = path_neural_embedding - self.gamma = gamma - - def setup(self, device): - """ - Sets up the neural reasoner instance. - """ - self.neural_owl_reasoner = TripleStoreNeuralReasoner( - path_neural_embedding=self.path_neural_embedding, gamma=self.gamma) - - def decode_request(self, request): - """ - Decodes an incoming request to extract the DL expression and namespace. - - Args: - request (dict): A dictionary containing the request data, with 'expression' and 'namespace' keys. - - Returns: - tuple: A tuple with the DL expression (str) and namespace (str). - """ - expression = request["expression"] - namespace = request["namespace"] - return expression, namespace - - def predict(self, data): - """ - Predicts individuals of the given OWL expression using the neural reasoner. - - Args: - data (tuple): A tuple containing the DL expression (str) and namespace (str). - - Returns: - set: A set of individuals satisfying the given OWL expression. - """ - expressions, namespace = data - owl_expression = dl_to_owl_expression( - namespace=namespace, - dl_expression=expressions - ) # Convert DL string to OWLClassExpression - return set(self.neural_owl_reasoner.individuals(owl_expression)) - - def encode_response(self, output): - """ - Encodes the output from the reasoner back into a DL expression format for response. - - Args: - output (set): A set of OWL expressions representing the individuals. - - Returns: - dict: A dictionary with 'retrieval_result' key containing a list of DL expressions as strings. - """ - return {'retrieval_result': [owl_expression_to_dl(out) for out in output]} - - -if __name__ == "__main__": - # Parse command-line arguments - parser = argparse.ArgumentParser(description="Neural Reasoner API") - parser.add_argument("--path_kge_model", type=str, default="KGs_Family_father_owl", - help="Path to the neural embedding folder") - parser.add_argument("--workers", type=int, default=1, - help="Number of model copies for reasoning per device") - parser.add_argument("--path_kg", type=str, default="KGs/Family/father.owl", - help="Path to the ontology file") - args = parser.parse_args() - - # Initialize API and server - api = NeuralReasonerAPI(path_neural_embedding=args.path_kge_model) - server = ls.LitServer(api, accelerator="auto", workers_per_device=args.workers, track_requests=True) - server.run(port=8000) diff --git a/ontolearn/semantic_caching.py b/ontolearn/semantic_caching.py index 66c74a4c..b74e8ae6 100644 --- a/ontolearn/semantic_caching.py +++ b/ontolearn/semantic_caching.py @@ -25,7 +25,8 @@ """python examples/retrieval_eval.py""" from owlapy.owl_literal import OWLBottomObjectProperty, OWLTopObjectProperty -from ontolearn.owl_neural_reasoner import TripleStoreNeuralReasoner +from owlapy.owl_reasoner import EBR +from owlapy.owl_ontology import NeuralOntology from ontolearn.knowledge_base import KnowledgeBase from ontolearn.utils import jaccard_similarity, concept_reducer, concept_reducer_properties from owlapy.class_expression import ( @@ -536,13 +537,9 @@ def retrieve(expression:str, path_kg:str, path_kge_model:str) -> Tuple[Set[str], 'take a concept c and returns it set of retrieved individual' if path_kge_model: - neural_owl_reasoner = TripleStoreNeuralReasoner( - path_neural_embedding=path_kge_model, gamma=0.9 - ) + neural_owl_reasoner = EBR(NeuralOntology(path_neural_embedding=path_kge_model, gamma=0.9)) else: - neural_owl_reasoner = TripleStoreNeuralReasoner( - path_of_kb=path_kg, gamma=0.9 - ) + neural_owl_reasoner = EBR(NeuralOntology(path_of_kb=path_kg, gamma=0.9)) retrievals = concept_retrieval(neural_owl_reasoner, expression) # Retrieving with our reasoner return retrievals diff --git a/ontolearn/triple_store.py b/ontolearn/triple_store.py index ac7880c6..98c4eccb 100644 --- a/ontolearn/triple_store.py +++ b/ontolearn/triple_store.py @@ -831,7 +831,7 @@ def get_data_properties(self, ranges: Union[OWLDatatype, Iterable[OWLDatatype]] else: def get_properties_from_xsd_range(r: OWLDatatype): query = (f"{rdf_prefix}\n{rdfs_prefix}\n{xsd_prefix}SELECT DISTINCT ?x " + - f"WHERE {{?x rdfs:range xsd:{r.iri.reminder}}}") + f"WHERE {{?x rdfs:range xsd:{r.iri.remainder}}}") for binding in self.query(query).json()["results"]["bindings"]: yield OWLDataProperty(binding["x"]["value"]) if isinstance(ranges, OWLDatatype): diff --git a/setup.py b/setup.py index 5fe35b68..a1040e9c 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ _deps = [ "matplotlib>=3.3.4", "scikit-learn>=1.4.1", - "torch==2.2.0", + "torch>=2.2.0", "rdflib>=6.0.2", "ruff>=0.7.2", "pandas>=1.5.0", @@ -47,8 +47,8 @@ "tqdm>=4.64.0", "transformers>=4.38.1", "pytest>=7.2.2", - "owlapy==1.5.1", - "dicee==0.1.4", + "owlapy==1.6.2", + "dicee==0.2.0", "ontosample>=0.2.2", "sphinx>=7.2.6", "sphinx-autoapi>=3.0.0", diff --git a/tests/test_base_concept_learner.py b/tests/test_base_concept_learner.py index e1353dec..03f79214 100644 --- a/tests/test_base_concept_learner.py +++ b/tests/test_base_concept_learner.py @@ -1,5 +1,4 @@ import unittest -import tempfile import pandas as pd from owlapy.class_expression import OWLClass, OWLObjectIntersectionOf, OWLObjectSomeValuesFrom, OWLThing from owlapy.iri import IRI diff --git a/tests/test_example_concept_learning_neural_evaluation.py b/tests/test_example_concept_learning_neural_evaluation.py index 4c7fbcfc..e2d53aa4 100644 --- a/tests/test_example_concept_learning_neural_evaluation.py +++ b/tests/test_example_concept_learning_neural_evaluation.py @@ -1,13 +1,19 @@ -""" StratifiedKFold Cross Validating DL Concept Learning Algorithms - -dicee --path_single_kg "KGs/Family/family-benchmark_rich_background.owl" --model Keci --path_to_store_single_run KeciFamilyRun --backend rdflib - - -python examples/concept_learning_neural_evaluation.py --lps LPs/Family/lps.json --kb KGs/Family/family-benchmark_rich_background.owl --kge KeciFamilyRun --max_runtime 3 --report family.csv - - -""" - +# """StratifiedKFold Cross Validating DL Concept Learning Algorithms +# +# dicee --path_single_kg "KGs/Family/family-benchmark_rich_background.owl" --model Keci --path_to_store_single_run KeciFamilyRun --backend rdflib +# +# python examples/concept_learning_neural_evaluation.py --lps LPs/Family/lps.json --kb KGs/Family/family-benchmark_rich_background.owl --kge KeciFamilyRun --max_runtime 3 --report family.csv +# +# Regression Test for the example. +# Fitting OWL Class Expression Learners: +# +# Given positive examples (E^+) and negative examples (E^-), +# Evaluate the performances of OWL Class Expression Learners w.r.t. the quality of learned/found OWL Class Expression +# +# Example to run the script +# python examples/concept_learning_evaluation.py --lps LPs/Family/lps.json --kb KGs/Family/family-benchmark_rich_background.owl --max_runtime 3 --report family.csv +# """ +# # import json # import time # import os @@ -26,56 +32,23 @@ # import numpy as np # from ontolearn.utils.static_funcs import compute_f1_score # from ontolearn.triple_store import TripleStore -# from ontolearn.owl_neural_reasoner import TripleStoreNeuralReasoner +# from owlapy.owl_reasoner import EBR +# from owlapy.owl_ontology import NeuralOntology # from owlapy import owl_expression_to_dl - +# # pd.set_option("display.precision", 5) - -""" -Regression Test for the example. -Fitting OWL Class Expression Learners: - -Given positive examples (E^+) and negative examples (E^-), -Evaluate the performances of OWL Class Expression Learners w.r.t. the quality of learned/found OWL Class Expression - -Example to run the script -python examples/concept_learning_evaluation.py --lps LPs/Family/lps.json --kb KGs/Family/family-benchmark_rich_background.owl --max_runtime 3 --report family.csv - -""" -# import json -# import time -# import os -# import subprocess -# import platform -# import pandas as pd -# from ontolearn.knowledge_base import KnowledgeBase -# from ontolearn.learners import CELOE, OCEL, Drill, TDL -# from ontolearn.concept_learner import EvoLearner, NCES, CLIP -# from ontolearn.refinement_operators import ExpressRefinement -# from ontolearn.learning_problem import PosNegLPStandard -# from ontolearn.metrics import F1 -# from owlapy.owl_individual import OWLNamedIndividual, IRI -# import argparse -# from sklearn.model_selection import StratifiedKFold -# import numpy as np -# from ontolearn.utils.static_funcs import compute_f1_score -# from ontolearn.triple_store import TripleStore -# from ontolearn.owl_neural_reasoner import TripleStoreNeuralReasoner -# from owlapy import owl_expression_to_dl - -# pd.set_option("display.precision", 5) - +# # class TestConceptLearningCV: # def test_cv(self): - +# # with open('LPs/Family/lps.json') as json_file: # settings = json.load(json_file) - +# # path_kb="KGs/Family/family-benchmark_rich_background.owl" # max_runtime=1 # random_seed=1 # folds=2 - +# # from dicee.executer import Execute # from dicee.config import Namespace # args = Namespace() @@ -86,14 +59,14 @@ # args.backend="rdflib" # Execute(args).start() # path_kge=args.path_to_store_single_run - +# # kb = KnowledgeBase(path=path_kb) # drill_with_symbolic_retriever = Drill(knowledge_base=kb, # quality_func=F1(), max_runtime=max_runtime,verbose=0) -# neural_kb = TripleStore(reasoner=TripleStoreNeuralReasoner(path_neural_embedding=path_kge)) +# neural_kb = TripleStore(reasoner=EBR(NeuralOntology(path_neural_embedding=path_kge))) # drill_with_neural_retriever = Drill(knowledge_base=neural_kb, # quality_func=F1(), max_runtime=max_runtime, verbose=0) - +# # # dictionary to store the data # data = dict() # if "problems" in settings: @@ -104,16 +77,16 @@ # problems = settings.items() # positives_key = "positive examples" # negatives_key = "negative examples" - +# # for str_target_concept, examples in problems: # print("Target concept: ", str_target_concept) # p = examples[positives_key] # n = examples[negatives_key] - +# # kf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=random_seed) # X = np.array(p + n) # y = np.array([1.0 for _ in p] + [0.0 for _ in n]) - +# # for ith, (train_index, test_index) in enumerate(kf.split(X, y)): # # # data.setdefault("LP", []).append(str_target_concept) @@ -121,22 +94,22 @@ # # () Extract positive and negative examples from train fold # train_pos = {pos_individual for pos_individual in X[train_index][y[train_index] == 1]} # train_neg = {neg_individual for neg_individual in X[train_index][y[train_index] == 0]} - +# # # Sanity checking for individuals used for training. # assert train_pos.issubset(examples[positives_key]) # assert train_neg.issubset(examples[negatives_key]) - +# # # () Extract positive and negative examples from test fold # test_pos = {pos_individual for pos_individual in X[test_index][y[test_index] == 1]} # test_neg = {neg_individual for neg_individual in X[test_index][y[test_index] == 0]} - +# # # Sanity checking for individuals used for testing. # assert test_pos.issubset(examples[positives_key]) # assert test_neg.issubset(examples[negatives_key]) # train_lp = PosNegLPStandard( # pos={OWLNamedIndividual(i) for i in train_pos}, # neg={OWLNamedIndividual(i) for i in train_neg}) - +# # test_lp = PosNegLPStandard( # pos={OWLNamedIndividual(i) for i in test_pos}, # neg={OWLNamedIndividual(i) for i in test_neg}) @@ -164,7 +137,7 @@ # data.setdefault("Test-F1-Symbolic-DRILL", []).append(symbolic_test_f1_drill) # data.setdefault("RT-Symbolic-DRILL", []).append(symbolic_rt_drill) # data.setdefault("Prediction-Symbolic-DRILL", []).append(owl_expression_to_dl(pred_symbolic_drill)) - +# # print("DRILL Neural starts..", end=" ") # start_time = time.time() # # Prediction of DRILL through symbolic retriever. @@ -185,11 +158,11 @@ # print(f"DRILL Neural Test Quality: {neural_test_f1_drill:.3f}", end="\t") # print(f"DRILL Neural Runtime: {neural_rt_drill:.3f}", end="\t") # print(f"Prediction: {owl_expression_to_dl(pred_neural_drill)}") - +# # data.setdefault("Train-F1-Neural-DRILL", []).append(neural_train_f1_drill) # data.setdefault("Test-F1-Neural-DRILL", []).append(neural_test_f1_drill) # data.setdefault("RT-Neural-DRILL", []).append(neural_rt_drill) # data.setdefault("Prediction-Neural-DRILL", []).append(owl_expression_to_dl(pred_neural_drill)) - +# # df = pd.DataFrame.from_dict(data) # assert df.select_dtypes(include="number").mean()["Train-F1-Symbolic-DRILL"] >= 0.93