From 8cf21f52b6cbaf7e60c3011fd7b285791063cafd Mon Sep 17 00:00:00 2001 From: David de Boer Date: Wed, 4 Mar 2026 12:54:21 +0100 Subject: [PATCH] docs(pipeline): document custom TypeScript executor pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Clarify that Executor is an interface, not just SparqlConstructExecutor - Add TransformExecutor decorator example for post-processing quad streams - Show concrete date-cleaning use case (Dutch dates → ISO 8601) - Link to VocabularyExecutor as a real-world reference - Update opening description to reflect SPARQL-first but not SPARQL-only --- packages/pipeline/README.md | 65 +++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/packages/pipeline/README.md b/packages/pipeline/README.md index 1164437..0af14bc 100644 --- a/packages/pipeline/README.md +++ b/packages/pipeline/README.md @@ -1,9 +1,9 @@ # Pipeline -A framework for transforming large RDF datasets using pure [SPARQL](https://www.w3.org/TR/sparql11-query/) queries. +A framework for transforming large RDF datasets, primarily using [SPARQL](https://www.w3.org/TR/sparql11-query/) queries with TypeScript for the parts that are hard to express in SPARQL alone. - **SPARQL-native.** Data transformations are plain SPARQL query files — portable, transparent, testable and version-controlled. -- **Composable.** Decorators wrap executors and resolvers to add behaviour (provenance, vocabulary detection, data import) without subclassing. +- **Composable.** Executors are an interface: wrap a SPARQL executor with custom TypeScript to handle edge cases like date parsing or string normalisation (see [Executor](#executor)). - **Extensible.** A plugin system lets packages like [@lde/pipeline-void](../pipeline-void) (or your own plugins) hook into the pipeline lifecycle. ## Components @@ -61,7 +61,7 @@ const itemSelector: ItemSelector = { ### Executor -Generates RDF triples. `SparqlConstructExecutor` runs a SPARQL CONSTRUCT query with template substitution and variable bindings: +Generates RDF triples. The built-in `SparqlConstructExecutor` runs a SPARQL CONSTRUCT query with template substitution and variable bindings: ```typescript const executor = new SparqlConstructExecutor({ @@ -69,6 +69,65 @@ const executor = new SparqlConstructExecutor({ }); ``` +`Executor` is an interface, so you can implement your own for logic that's hard to express in pure SPARQL — for example, cleaning up messy date notations or converting locale-specific dates to ISO 8601. The decorator pattern lets you wrap a SPARQL executor and post-process its quad stream in TypeScript: + +```typescript +import { DataFactory } from 'n3'; +import type { Quad, Literal } from '@rdfjs/types'; +import type { Dataset, Distribution } from '@lde/dataset'; +import { + type Executor, + type ExecuteOptions, + NotSupported, +} from '@lde/pipeline'; + +class TransformExecutor implements Executor { + constructor( + private readonly inner: Executor, + private readonly transform: ( + quads: AsyncIterable, + dataset: Dataset, + ) => AsyncIterable, + ) {} + + async execute( + dataset: Dataset, + distribution: Distribution, + options?: ExecuteOptions, + ): Promise | NotSupported> { + const result = await this.inner.execute(dataset, distribution, options); + if (result instanceof NotSupported) return result; + return this.transform(result, dataset); + } +} +``` + +Then use it to wrap any SPARQL executor: + +```typescript +new Stage({ + name: 'dates', + executors: new TransformExecutor( + await SparqlConstructExecutor.fromFile('dates.rq'), + async function* (quads) { + for await (const quad of quads) { + if (quad.object.termType === 'Literal' && isMessyDate(quad.object)) { + const cleaned = DataFactory.literal( + parseDutchDate(quad.object.value), + DataFactory.namedNode('http://www.w3.org/2001/XMLSchema#date'), + ); + yield DataFactory.quad(quad.subject, quad.predicate, cleaned); + } else { + yield quad; + } + } + }, + ), +}); +``` + +This keeps SPARQL doing the heavy lifting while TypeScript handles the edge cases. See [@lde/pipeline-void](../pipeline-void)'s `VocabularyExecutor` for a real-world example of this pattern. + ### Writer Writes generated quads to a destination: