From fd26d21ccb7f57bcbe618d337b8155da52c68104 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Mon, 16 Feb 2026 13:37:21 +0100 Subject: [PATCH 1/3] Apply formatting to canon.py --- Makefile | 2 +- lib/pyld/canon.py | 96 ++++++++++++++++++++++++++--------------------- 2 files changed, 55 insertions(+), 43 deletions(-) diff --git a/Makefile b/Makefile index 9043adcb..cc9e4d6e 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ upgrade-submodules: git submodule update --remote --init --recursive # TODO: Expand to lib/ and tests/ as linting issues are resolved. -RUFF_TARGET = lib/pyld/context_resolver.py lib/pyld/identifier_issuer.py lib/pyld/iri_resolver.py lib/pyld/nquads.py lib/pyld/resolved_context.py tests/*.py +RUFF_TARGET = lib/pyld/context_resolver.py lib/pyld/identifier_issuer.py lib/pyld/iri_resolver.py lib/pyld/nquads.py lib/pyld/resolved_context.py lib/pyld/canon.py tests/*.py lint: ruff check $(RUFF_TARGET) diff --git a/lib/pyld/canon.py b/lib/pyld/canon.py index b53e62dc..a22d85e9 100644 --- a/lib/pyld/canon.py +++ b/lib/pyld/canon.py @@ -1,11 +1,11 @@ - +import copy import hashlib -from pyld.nquads import parse_nquads, serialize_nquad + from pyld.identifier_issuer import IdentifierIssuer -import copy +from pyld.nquads import parse_nquads, serialize_nquad -class URDNA2015(object): +class URDNA2015: """ URDNA2015 implements the URDNA2015 RDF Dataset Normalization Algorithm. """ @@ -20,11 +20,11 @@ def __init__(self): # 4.4) Normalization Algorithm def main(self, dataset, options): # handle invalid output format - if 'format' in options: - if (options['format'] != 'application/n-quads' and - options['format'] != 'application/nquads'): - raise UnknownFormatError( - 'Unknown output format.', options['format']) + if 'format' in options and ( + options['format'] != 'application/n-quads' + and options['format'] != 'application/nquads' + ): + raise UnknownFormatError('Unknown output format.', options['format']) # 1) Create the normalization state. @@ -49,8 +49,9 @@ def main(self, dataset, options): if key == 'predicate' or component['type'] != 'blank node': continue id_ = component['value'] - self.blank_node_info.setdefault( - id_, {'quads': []})['quads'].append(quad) + self.blank_node_info.setdefault(id_, {'quads': []})['quads'].append( + quad + ) # 3) Create a list of non-normalized blank node identifiers and # populate it using the keys from the blank node to quads map. @@ -105,7 +106,7 @@ def main(self, dataset, options): # 6) For each hash to identifier list mapping in hash to blank nodes # map, lexicographically-sorted by hash: - for hash, id_list in sorted(self.hash_to_blank_nodes.items()): + for _hash, id_list in sorted(self.hash_to_blank_nodes.items()): # 6.1) Create hash path list where each item will be a result of # running the Hash N-Degree Quads algorithm. hash_path_list = [] @@ -157,11 +158,12 @@ def main(self, dataset, options): for key, component in quad.items(): if key == 'predicate': continue - if(component['type'] == 'blank node' and not - component['value'].startswith( - self.canonical_issuer.prefix)): + if component['type'] == 'blank node' and not component[ + 'value' + ].startswith(self.canonical_issuer.prefix): component['value'] = self.canonical_issuer.get_id( - component['value']) + component['value'] + ) # 7.2) Add quad copy to the normalized dataset. normalized.append(serialize_nquad(quad)) @@ -170,8 +172,10 @@ def main(self, dataset, options): normalized.sort() # 8) Return the normalized dataset. - if (options.get('format') == 'application/n-quads' or - options.get('format') == 'application/nquads'): + if ( + options.get('format') == 'application/n-quads' + or options.get('format') == 'application/nquads' + ): return ''.join(normalized) return parse_nquads(''.join(normalized)) @@ -206,8 +210,7 @@ def hash_first_degree_quads(self, id_): # matches the reference blank node identifier then use the # blank node identifier _:a, otherwise, use the blank node # identifier _:z. - copy[key] = self.modify_first_degree_component( - id_, component, key) + copy[key] = self.modify_first_degree_component(id_, component, key) nquads.append(serialize_nquad(copy)) # 4) Sort nquads in lexicographical order. @@ -301,7 +304,7 @@ def hash_n_degree_quads(self, id_, issuer): for related in permutation: # 5.4.4.1) If a canonical identifier has been issued for # related, append it to path. - if(self.canonical_issuer.has_id(related)): + if self.canonical_issuer.has_id(related): path += self.canonical_issuer.get_id(related) # 5.4.4.2) Otherwise: else: @@ -320,9 +323,11 @@ def hash_n_degree_quads(self, id_, issuer): # path is greater than or equal to the length of chosen # path and path is lexicographically greater than chosen # path, then skip to the next permutation. - if(len(chosen_path) != 0 and - len(path) >= len(chosen_path) and - path > chosen_path): + if ( + len(chosen_path) != 0 + and len(path) >= len(chosen_path) + and path > chosen_path + ): skip_to_next_permutation = True break @@ -352,9 +357,11 @@ def hash_n_degree_quads(self, id_, issuer): # path is greater than or equal to the length of chosen # path and path is lexicographically greater than chosen # path, then skip to the next permutation. - if(len(chosen_path) != 0 and - len(path) >= len(chosen_path) and - path > chosen_path): + if ( + len(chosen_path) != 0 + and len(path) >= len(chosen_path) + and path > chosen_path + ): skip_to_next_permutation = True break @@ -394,9 +401,11 @@ def create_hash_to_related(self, id_, issuer): # object, and graph name and it is a blank node that is not # identified by identifier: for key, component in quad.items(): - if(key != 'predicate' and - component['type'] == 'blank node' and - component['value'] != id_): + if ( + key != 'predicate' + and component['type'] == 'blank node' + and component['value'] != id_ + ): # 3.1.1) Set hash to the result of the Hash Related Blank # Node algorithm, passing the blank node identifier for # component as related, quad, path identifier issuer as @@ -405,8 +414,7 @@ def create_hash_to_related(self, id_, issuer): # respectively. related = component['value'] position = self.POSITIONS[key] - hash = self.hash_related_blank_node( - related, quad, issuer, position) + hash = self.hash_related_blank_node(related, quad, issuer, position) # 3.1.2) Add a mapping of hash to the blank node identifier # for component to hash to related blank nodes map, adding @@ -467,8 +475,10 @@ def create_hash_to_related(self, id_, issuer): # algorithm, passing the blank node identifier for subject as # related, quad, path identifier issuer as issuer, and p as # position. - if(quad['subject']['type'] == 'blank node' and - quad['subject']['value'] != id_): + if ( + quad['subject']['type'] == 'blank node' + and quad['subject']['value'] != id_ + ): related = quad['subject']['value'] position = 'p' # 3.2) Otherwise, if quad's object is a blank node that does @@ -476,8 +486,10 @@ def create_hash_to_related(self, id_, issuer): # Node algorithm, passing the blank node identifier for object # as related, quad, path identifier issuer as issuer, and r # as position. - elif(quad['object']['type'] == 'blank node' and - quad['object']['value'] != id_): + elif ( + quad['object']['type'] == 'blank node' + and quad['object']['value'] != id_ + ): related = quad['object']['value'] position = 'r' # 3.3) Otherwise, continue to the next quad. @@ -487,8 +499,7 @@ def create_hash_to_related(self, id_, issuer): # 3.4) Add a mapping of hash to the blank node identifier for the # component that matched (subject or object) to hash to related # blank nodes map, adding an entry as necessary. - hash = self.hash_related_blank_node( - related, quad, issuer, position) + hash = self.hash_related_blank_node(related, quad, issuer, position) hash_to_related.setdefault(hash, []).append(related) return hash_to_related @@ -525,9 +536,10 @@ def permutations(elements): for i in range(length): e = elements[i] is_left = left[e] - if((k is None or e > k) and - ((is_left and i > 0 and e > elements[i - 1]) or - (not is_left and i < last and e > elements[i + 1]))): + if (k is None or e > k) and ( + (is_left and i > 0 and e > elements[i - 1]) + or (not is_left and i < last and e > elements[i + 1]) + ): k, pos = e, i # no more permutations @@ -551,4 +563,4 @@ class UnknownFormatError(ValueError): def __init__(self, message, format): Exception.__init__(self, message) - self.format = format \ No newline at end of file + self.format = format From f3e671f3a1a7f41eab864aca54a3d35b993ba14f Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Thu, 5 Feb 2026 09:37:56 +0100 Subject: [PATCH 2/3] Add stub for RDFC-1.0 and prepare tests. --- lib/pyld/canon.py | 8 ++++++++ lib/pyld/jsonld.py | 40 +++++++++++++++++++++++----------------- tests/runtests.py | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 17 deletions(-) diff --git a/lib/pyld/canon.py b/lib/pyld/canon.py index a22d85e9..05f675d7 100644 --- a/lib/pyld/canon.py +++ b/lib/pyld/canon.py @@ -508,6 +508,14 @@ def create_hash_to_related(self, id_, issuer): def create_hash(self): return hashlib.sha1() +class RDFC10(URDNA2015): + """ + RDFC10 implements the RDF Canonicalization algorithm version 1.0. + """ + # TODO: Stub that uses URDNA2015 for now + def __init__(self): + URDNA2015.__init__(self) + def permutations(elements): """ diff --git a/lib/pyld/jsonld.py b/lib/pyld/jsonld.py index b0b781e0..f44f28a9 100644 --- a/lib/pyld/jsonld.py +++ b/lib/pyld/jsonld.py @@ -23,7 +23,7 @@ import uuid from typing import Optional, Callable, Any -from pyld.canon import URDNA2015, URGNA2012, UnknownFormatError +from pyld.canon import URDNA2015, URGNA2012, RDFC10, UnknownFormatError from pyld.nquads import ParserError, parse_nquads, serialize_nquad, serialize_nquads from pyld.identifier_issuer import IdentifierIssuer from .context_resolver import ContextResolver @@ -266,8 +266,8 @@ def normalize(input_, options=None): :param input_: the JSON-LD input to normalize. :param [options]: the options to use. - [algorithm] the algorithm to use: `URDNA2015` or `URGNA2012` - (default: `URGNA2012`). + [algorithm] the algorithm to use: `RDFC10`, `URDNA2015` or `URGNA2012` + (default: `RDFC10`). [base] the base IRI to use. [inputFormat] the format if input is not JSON-LD: 'application/n-quads' for N-Quads. @@ -897,8 +897,8 @@ def normalize(self, input_, options): :param input_: the JSON-LD input to normalize. :param options: the options to use. - [algorithm] the algorithm to use: `URDNA2015` or `URGNA2012` - (default: `URGNA2012`). + [algorithm] the algorithm to use: `RDFC10`, `URDNA2015` or `URGNA2012` + (default: `RDFC10`). [base] the base IRI to use. [contextResolver] internal use only. [inputFormat] the format if input is not JSON-LD: @@ -912,7 +912,7 @@ def normalize(self, input_, options): """ # set default options options = options.copy() if options else {} - options.setdefault('algorithm', 'URGNA2012') + options.setdefault('algorithm', 'RDFC10') options.setdefault('base', input_ if _is_string(input_) else '') options.setdefault('documentLoader', _default_document_loader) options.setdefault('contextResolver', @@ -920,7 +920,7 @@ def normalize(self, input_, options): options.setdefault('extractAllScripts', True) options.setdefault('processingMode', 'json-ld-1.1') - if options['algorithm'] not in ['URDNA2015', 'URGNA2012']: + if options['algorithm'] not in ['RDFC10', 'URDNA2015', 'URGNA2012']: raise JsonLdError( 'Unsupported normalization algorithm.', 'jsonld.NormalizeError') @@ -946,17 +946,23 @@ def normalize(self, input_, options): 'jsonld.NormalizeError') from cause # do normalization - if options['algorithm'] == 'URDNA2015': - try: - return URDNA2015().main(dataset, options) - except UnknownFormatError as cause: - raise JsonLdError( - str(cause), - 'jsonld.UnknownFormat', - {'format': cause.format}) from cause - + if options['algorithm'] == 'RDFC10': + algorithm = RDFC10() + elif options['algorithm'] == 'URDNA2015': + algorithm = URDNA2015() # assume URGNA2012 - return URGNA2012().main(dataset, options) + else: + algorithm = URGNA2012() + + try: + # TODO: find a good way to expose identifier map + return algorithm.main(dataset, options) #, algorithm.hash_to_blank_nodes + except UnknownFormatError as cause: + raise JsonLdError( + str(cause), + 'jsonld.UnknownFormat', + {'format': cause.format}) from cause + def from_rdf(self, dataset, options): """ diff --git a/tests/runtests.py b/tests/runtests.py index 68354c7a..abfef0f3 100644 --- a/tests/runtests.py +++ b/tests/runtests.py @@ -91,12 +91,14 @@ 'https://w3c.github.io/json-ld-api/tests', 'https://w3c.github.io/json-ld-framing/tests', 'https://github.com/json-ld/normalization/tests', + 'https://w3c.github.io/rdf-canon/tests/vocab#' ] SPEC_DIRS = [ '../specifications/json-ld-api/tests/', '../specifications/json-ld-framing/tests/', '../specifications/normalization/tests/', + '../specifications/rdf-canon/tests/' ] # NOTE: The following TestRunner class can be removed because pytest now @@ -1094,6 +1096,44 @@ def write(self, filename): ), ], }, + 'rdfc:RDFC10EvalTest': { + 'pending': { + 'idRegex': [ + '.*#test060c$', + '.*#test075c$' + ] + }, + 'skip': { + 'idRegex': [] + }, + 'fn': 'normalize', + 'params': [ + read_test_property('action'), + create_test_options({ + 'algorithm': 'RDFC10', + 'inputFormat': 'application/n-quads', + 'format': 'application/n-quads' + }) + ] + }, + # 'rdfc:RDFC10MapTest': { + # 'pending': { + # 'idRegex': [ + # ] + # }, + # 'skip': { + # 'idRegex': [] + # }, + # 'fn': 'normalize', + # 'params': [ + # read_test_property('action'), + # create_test_options({ + # 'algorithm': 'RDFC10', + # 'inputFormat': 'application/n-quads', + # 'format': 'application/n-quads' + # }) + # ] + # } } From 573788244c21a33951ddc03863ec0b8a39703e79 Mon Sep 17 00:00:00 2001 From: Miel Vander Sande Date: Mon, 16 Feb 2026 10:46:04 +0100 Subject: [PATCH 3/3] Add rdf-canon to submodules --- .gitmodules | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitmodules b/.gitmodules index f4e5432a..87e7ce95 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "specifications/normalization"] path = specifications/normalization url = https://github.com/json-ld/normalization.git +[submodule "specifications/rdf-canon"] + path = specifications/rdf-canon + url = https://github.com/w3c/rdf-canon.git