Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@
[submodule "specifications/normalization"]
path = specifications/normalization
url = https://github.com/json-ld/normalization.git
[submodule "specifications/rdf-canon"]
path = specifications/rdf-canon
url = https://github.com/w3c/rdf-canon.git
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ upgrade-submodules:
git submodule update --remote --init --recursive

# TODO: Expand to lib/ and tests/ as linting issues are resolved.
RUFF_TARGET = lib/pyld/context_resolver.py lib/pyld/identifier_issuer.py lib/pyld/iri_resolver.py lib/pyld/nquads.py lib/pyld/resolved_context.py tests/*.py
RUFF_TARGET = lib/pyld/context_resolver.py lib/pyld/identifier_issuer.py lib/pyld/iri_resolver.py lib/pyld/nquads.py lib/pyld/resolved_context.py lib/pyld/canon.py tests/*.py

lint:
ruff check $(RUFF_TARGET)
Expand Down
104 changes: 62 additions & 42 deletions lib/pyld/canon.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@

import copy
import hashlib
from pyld.nquads import parse_nquads, serialize_nquad

from pyld.identifier_issuer import IdentifierIssuer
import copy
from pyld.nquads import parse_nquads, serialize_nquad


class URDNA2015(object):
class URDNA2015:
"""
URDNA2015 implements the URDNA2015 RDF Dataset Normalization Algorithm.
"""
Expand All @@ -20,11 +20,11 @@ def __init__(self):
# 4.4) Normalization Algorithm
def main(self, dataset, options):
# handle invalid output format
if 'format' in options:
if (options['format'] != 'application/n-quads' and
options['format'] != 'application/nquads'):
raise UnknownFormatError(
'Unknown output format.', options['format'])
if 'format' in options and (
options['format'] != 'application/n-quads'
and options['format'] != 'application/nquads'
):
raise UnknownFormatError('Unknown output format.', options['format'])

# 1) Create the normalization state.

Expand All @@ -49,8 +49,9 @@ def main(self, dataset, options):
if key == 'predicate' or component['type'] != 'blank node':
continue
id_ = component['value']
self.blank_node_info.setdefault(
id_, {'quads': []})['quads'].append(quad)
self.blank_node_info.setdefault(id_, {'quads': []})['quads'].append(
quad
)

# 3) Create a list of non-normalized blank node identifiers and
# populate it using the keys from the blank node to quads map.
Expand Down Expand Up @@ -105,7 +106,7 @@ def main(self, dataset, options):

# 6) For each hash to identifier list mapping in hash to blank nodes
# map, lexicographically-sorted by hash:
for hash, id_list in sorted(self.hash_to_blank_nodes.items()):
for _hash, id_list in sorted(self.hash_to_blank_nodes.items()):
# 6.1) Create hash path list where each item will be a result of
# running the Hash N-Degree Quads algorithm.
hash_path_list = []
Expand Down Expand Up @@ -157,11 +158,12 @@ def main(self, dataset, options):
for key, component in quad.items():
if key == 'predicate':
continue
if(component['type'] == 'blank node' and not
component['value'].startswith(
self.canonical_issuer.prefix)):
if component['type'] == 'blank node' and not component[
'value'
].startswith(self.canonical_issuer.prefix):
component['value'] = self.canonical_issuer.get_id(
component['value'])
component['value']
)

# 7.2) Add quad copy to the normalized dataset.
normalized.append(serialize_nquad(quad))
Expand All @@ -170,8 +172,10 @@ def main(self, dataset, options):
normalized.sort()

# 8) Return the normalized dataset.
if (options.get('format') == 'application/n-quads' or
options.get('format') == 'application/nquads'):
if (
options.get('format') == 'application/n-quads'
or options.get('format') == 'application/nquads'
):
return ''.join(normalized)
return parse_nquads(''.join(normalized))

Expand Down Expand Up @@ -206,8 +210,7 @@ def hash_first_degree_quads(self, id_):
# matches the reference blank node identifier then use the
# blank node identifier _:a, otherwise, use the blank node
# identifier _:z.
copy[key] = self.modify_first_degree_component(
id_, component, key)
copy[key] = self.modify_first_degree_component(id_, component, key)
nquads.append(serialize_nquad(copy))

# 4) Sort nquads in lexicographical order.
Expand Down Expand Up @@ -301,7 +304,7 @@ def hash_n_degree_quads(self, id_, issuer):
for related in permutation:
# 5.4.4.1) If a canonical identifier has been issued for
# related, append it to path.
if(self.canonical_issuer.has_id(related)):
if self.canonical_issuer.has_id(related):
path += self.canonical_issuer.get_id(related)
# 5.4.4.2) Otherwise:
else:
Expand All @@ -320,9 +323,11 @@ def hash_n_degree_quads(self, id_, issuer):
# path is greater than or equal to the length of chosen
# path and path is lexicographically greater than chosen
# path, then skip to the next permutation.
if(len(chosen_path) != 0 and
len(path) >= len(chosen_path) and
path > chosen_path):
if (
len(chosen_path) != 0
and len(path) >= len(chosen_path)
and path > chosen_path
):
skip_to_next_permutation = True
break

Expand Down Expand Up @@ -352,9 +357,11 @@ def hash_n_degree_quads(self, id_, issuer):
# path is greater than or equal to the length of chosen
# path and path is lexicographically greater than chosen
# path, then skip to the next permutation.
if(len(chosen_path) != 0 and
len(path) >= len(chosen_path) and
path > chosen_path):
if (
len(chosen_path) != 0
and len(path) >= len(chosen_path)
and path > chosen_path
):
skip_to_next_permutation = True
break

Expand Down Expand Up @@ -394,9 +401,11 @@ def create_hash_to_related(self, id_, issuer):
# object, and graph name and it is a blank node that is not
# identified by identifier:
for key, component in quad.items():
if(key != 'predicate' and
component['type'] == 'blank node' and
component['value'] != id_):
if (
key != 'predicate'
and component['type'] == 'blank node'
and component['value'] != id_
):
# 3.1.1) Set hash to the result of the Hash Related Blank
# Node algorithm, passing the blank node identifier for
# component as related, quad, path identifier issuer as
Expand All @@ -405,8 +414,7 @@ def create_hash_to_related(self, id_, issuer):
# respectively.
related = component['value']
position = self.POSITIONS[key]
hash = self.hash_related_blank_node(
related, quad, issuer, position)
hash = self.hash_related_blank_node(related, quad, issuer, position)

# 3.1.2) Add a mapping of hash to the blank node identifier
# for component to hash to related blank nodes map, adding
Expand Down Expand Up @@ -467,17 +475,21 @@ def create_hash_to_related(self, id_, issuer):
# algorithm, passing the blank node identifier for subject as
# related, quad, path identifier issuer as issuer, and p as
# position.
if(quad['subject']['type'] == 'blank node' and
quad['subject']['value'] != id_):
if (
quad['subject']['type'] == 'blank node'
and quad['subject']['value'] != id_
):
related = quad['subject']['value']
position = 'p'
# 3.2) Otherwise, if quad's object is a blank node that does
# not match identifier, to the result of the Hash Related Blank
# Node algorithm, passing the blank node identifier for object
# as related, quad, path identifier issuer as issuer, and r
# as position.
elif(quad['object']['type'] == 'blank node' and
quad['object']['value'] != id_):
elif (
quad['object']['type'] == 'blank node'
and quad['object']['value'] != id_
):
related = quad['object']['value']
position = 'r'
# 3.3) Otherwise, continue to the next quad.
Expand All @@ -487,8 +499,7 @@ def create_hash_to_related(self, id_, issuer):
# 3.4) Add a mapping of hash to the blank node identifier for the
# component that matched (subject or object) to hash to related
# blank nodes map, adding an entry as necessary.
hash = self.hash_related_blank_node(
related, quad, issuer, position)
hash = self.hash_related_blank_node(related, quad, issuer, position)
hash_to_related.setdefault(hash, []).append(related)

return hash_to_related
Expand All @@ -497,6 +508,14 @@ def create_hash_to_related(self, id_, issuer):
def create_hash(self):
return hashlib.sha1()

class RDFC10(URDNA2015):
"""
RDFC10 implements the RDF Canonicalization algorithm version 1.0.
"""
# TODO: Stub that uses URDNA2015 for now
def __init__(self):
URDNA2015.__init__(self)


def permutations(elements):
"""
Expand Down Expand Up @@ -525,9 +544,10 @@ def permutations(elements):
for i in range(length):
e = elements[i]
is_left = left[e]
if((k is None or e > k) and
((is_left and i > 0 and e > elements[i - 1]) or
(not is_left and i < last and e > elements[i + 1]))):
if (k is None or e > k) and (
(is_left and i > 0 and e > elements[i - 1])
or (not is_left and i < last and e > elements[i + 1])
):
k, pos = e, i

# no more permutations
Expand All @@ -551,4 +571,4 @@ class UnknownFormatError(ValueError):

def __init__(self, message, format):
Exception.__init__(self, message)
self.format = format
self.format = format
40 changes: 23 additions & 17 deletions lib/pyld/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import uuid

from typing import Optional, Callable, Any
from pyld.canon import URDNA2015, URGNA2012, UnknownFormatError
from pyld.canon import URDNA2015, URGNA2012, RDFC10, UnknownFormatError
from pyld.nquads import ParserError, parse_nquads, serialize_nquad, serialize_nquads
from pyld.identifier_issuer import IdentifierIssuer
from .context_resolver import ContextResolver
Expand Down Expand Up @@ -266,8 +266,8 @@ def normalize(input_, options=None):

:param input_: the JSON-LD input to normalize.
:param [options]: the options to use.
[algorithm] the algorithm to use: `URDNA2015` or `URGNA2012`
(default: `URGNA2012`).
[algorithm] the algorithm to use: `RDFC10`, `URDNA2015` or `URGNA2012`
(default: `RDFC10`).
[base] the base IRI to use.
[inputFormat] the format if input is not JSON-LD:
'application/n-quads' for N-Quads.
Expand Down Expand Up @@ -897,8 +897,8 @@ def normalize(self, input_, options):

:param input_: the JSON-LD input to normalize.
:param options: the options to use.
[algorithm] the algorithm to use: `URDNA2015` or `URGNA2012`
(default: `URGNA2012`).
[algorithm] the algorithm to use: `RDFC10`, `URDNA2015` or `URGNA2012`
(default: `RDFC10`).
[base] the base IRI to use.
[contextResolver] internal use only.
[inputFormat] the format if input is not JSON-LD:
Expand All @@ -912,15 +912,15 @@ def normalize(self, input_, options):
"""
# set default options
options = options.copy() if options else {}
options.setdefault('algorithm', 'URGNA2012')
options.setdefault('algorithm', 'RDFC10')
options.setdefault('base', input_ if _is_string(input_) else '')
options.setdefault('documentLoader', _default_document_loader)
options.setdefault('contextResolver',
ContextResolver(_resolved_context_cache, options['documentLoader']))
options.setdefault('extractAllScripts', True)
options.setdefault('processingMode', 'json-ld-1.1')

if options['algorithm'] not in ['URDNA2015', 'URGNA2012']:
if options['algorithm'] not in ['RDFC10', 'URDNA2015', 'URGNA2012']:
raise JsonLdError(
'Unsupported normalization algorithm.',
'jsonld.NormalizeError')
Expand All @@ -946,17 +946,23 @@ def normalize(self, input_, options):
'jsonld.NormalizeError') from cause

# do normalization
if options['algorithm'] == 'URDNA2015':
try:
return URDNA2015().main(dataset, options)
except UnknownFormatError as cause:
raise JsonLdError(
str(cause),
'jsonld.UnknownFormat',
{'format': cause.format}) from cause

if options['algorithm'] == 'RDFC10':
algorithm = RDFC10()
elif options['algorithm'] == 'URDNA2015':
algorithm = URDNA2015()
# assume URGNA2012
return URGNA2012().main(dataset, options)
else:
algorithm = URGNA2012()

try:
# TODO: find a good way to expose identifier map
return algorithm.main(dataset, options) #, algorithm.hash_to_blank_nodes
except UnknownFormatError as cause:
raise JsonLdError(
str(cause),
'jsonld.UnknownFormat',
{'format': cause.format}) from cause


def from_rdf(self, dataset, options):
"""
Expand Down
40 changes: 40 additions & 0 deletions tests/runtests.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,14 @@
'https://w3c.github.io/json-ld-api/tests',
'https://w3c.github.io/json-ld-framing/tests',
'https://github.com/json-ld/normalization/tests',
'https://w3c.github.io/rdf-canon/tests/vocab#'
]

SPEC_DIRS = [
'../specifications/json-ld-api/tests/',
'../specifications/json-ld-framing/tests/',
'../specifications/normalization/tests/',
'../specifications/rdf-canon/tests/'
]

# NOTE: The following TestRunner class can be removed because pytest now
Expand Down Expand Up @@ -1094,6 +1096,44 @@ def write(self, filename):
),
],
},
'rdfc:RDFC10EvalTest': {
'pending': {
'idRegex': [
'.*#test060c$',
'.*#test075c$'
]
},
'skip': {
'idRegex': []
},
'fn': 'normalize',
'params': [
read_test_property('action'),
create_test_options({
'algorithm': 'RDFC10',
'inputFormat': 'application/n-quads',
'format': 'application/n-quads'
})
]
},
# 'rdfc:RDFC10MapTest': {
# 'pending': {
# 'idRegex': [
# ]
# },
# 'skip': {
# 'idRegex': []
# },
# 'fn': 'normalize',
# 'params': [
# read_test_property('action'),
# create_test_options({
# 'algorithm': 'RDFC10',
# 'inputFormat': 'application/n-quads',
# 'format': 'application/n-quads'
# })
# ]
# }
}


Expand Down