From f72d03d7c9c59a9852f24cf1632fce666bcc9488 Mon Sep 17 00:00:00 2001 From: James Ross Date: Fri, 13 Mar 2026 18:05:27 -0700 Subject: [PATCH 1/4] feat: add content attachment metadata --- CHANGELOG.md | 2 + README.md | 13 ++- ROADMAP.md | 16 ++- docs/ROADMAP/COMPLETED.md | 1 + docs/specs/CONTENT_ATTACHMENT.md | 16 ++- index.d.ts | 29 ++++- src/domain/services/KeyCodec.js | 14 +++ src/domain/services/PatchBuilderV2.js | 79 +++++++++++++- src/domain/warp/PatchSession.js | 10 +- src/domain/warp/_wiredMethods.d.ts | 8 ++ src/domain/warp/query.methods.js | 92 +++++++++++++--- src/infrastructure/adapters/CasBlobAdapter.js | 2 +- src/ports/BlobStoragePort.js | 2 +- .../api/content-attachment.test.js | 41 +++++++ test/type-check/consumer.ts | 9 +- test/unit/domain/WarpGraph.content.test.js | 82 ++++++++++++++ .../WarpGraph.apiSurface.test.js.snap | 14 ++- .../services/PatchBuilderV2.content.test.js | 100 ++++++++++++++++-- 18 files changed, 467 insertions(+), 63 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a0978edb..a3e7b917 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- **Content attachment metadata API** — `attachContent()` and `attachEdgeContent()` now accept optional `{ mime, size }` metadata hints, persist byte size alongside the `_content` OID, and expose `getContentMeta()` / `getEdgeContentMeta()` for structured `{ oid, mime, size }` reads without manual `_content.*` property handling. - **Streaming transitive closure traversal** — Added `transitiveClosureStream()` to the traversal stack so callers can consume reachability edges lazily as an `AsyncGenerator<{ from, to }>` without materializing the full closure array. The existing `transitiveClosure()` API remains and now collects from the stream for backward compatibility. - **First-class sync trust configuration** — `WarpGraph.open({ trust })` and `graph.syncWith(..., { trust })` now expose an explicit public trust-config surface for sync evaluation instead of relying on hidden controller wiring alone. - **Fluent `WarpStateV5` test builder** — Added `createStateBuilder()` in `test/helpers/stateBuilder.js` so state-heavy tests can seed nodes, edges, removals, properties, frontier state, and graph materialization through one fluent helper instead of ad hoc OR-Set/LWW mutation. @@ -19,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- **Roadmap reconciled after PR #67 / #68 merges** — `ROADMAP.md` and `docs/ROADMAP/COMPLETED.md` now reflect the merged pre-push gate regression work (`B168`) and the current `main` baseline before the issue-45 slice branches off. - **Large-graph traversal memory profile** — `topologicalSort()` now has a lightweight mode that avoids retaining discovery adjacency when callers do not need it. `levels()` and `transitiveReduction()` were refactored to re-fetch neighbors on demand instead of pinning full topo adjacency in memory, reducing steady-state large-graph working sets. - **Roadmap reconciled after B87 merge** — `ROADMAP.md` now treats the Markdown code-sample linter as merged work on `main`, advances the CI/tooling wave to start at `B88`, and records the follow-up backlog items for pre-push gate-message regression coverage (`B168`) and archived-doc status guardrails (`B169`). - **Surface validation accounting** — The declaration surface checker now distinguishes runtime-backed exports from type-only manifest entries and understands namespace declarations, which makes the type-surface contract tighter without forcing runtime exports for pure types. diff --git a/README.md b/README.md index f5bdc2d7..1fe88272 100644 --- a/README.md +++ b/README.md @@ -458,21 +458,28 @@ Attach content-addressed blobs to nodes and edges as first-class payloads (Paper ```javascript const patch = await graph.createPatch(); patch.addNode('adr:0007'); // sync — queues a NodeAdd op -await patch.attachContent('adr:0007', '# ADR 0007\n\nDecision text...'); // async — writes blob +await patch.attachContent('adr:0007', '# ADR 0007\n\nDecision text...', { + mime: 'text/markdown', +}); // async — writes blob + records metadata await patch.commit(); // Read content back const buffer = await graph.getContent('adr:0007'); // Uint8Array | null const oid = await graph.getContentOid('adr:0007'); // hex SHA or null +const meta = await graph.getContentMeta('adr:0007'); +// { oid: 'abc123...', mime: 'text/markdown', size: 26 } // Edge content works the same way (assumes nodes and edge already exist) const patch2 = await graph.createPatch(); -await patch2.attachEdgeContent('a', 'b', 'rel', 'edge payload'); +await patch2.attachEdgeContent('a', 'b', 'rel', 'edge payload', { + mime: 'text/plain', +}); await patch2.commit(); const edgeBuf = await graph.getEdgeContent('a', 'b', 'rel'); +const edgeMeta = await graph.getEdgeContentMeta('a', 'b', 'rel'); ``` -Content blobs survive `git gc` — their OIDs are embedded in the patch commit tree and checkpoint tree, keeping them reachable. If a live `_content` reference points at a missing blob anyway (for example due to manual corruption), `getContent()` / `getEdgeContent()` throw instead of silently returning empty bytes. +Content blobs survive `git gc` — their OIDs are embedded in the patch commit tree and checkpoint tree, keeping them reachable. `attachContent()` / `attachEdgeContent()` also persist byte-size metadata automatically and will store a MIME hint when provided. Historical attachments created before metadata support may still return `mime: null` / `size: null` from the metadata APIs until they are rewritten. If a live `_content` reference points at a missing blob anyway (for example due to manual corruption), `getContent()` / `getEdgeContent()` throw instead of silently returning empty bytes. ### Writer API diff --git a/ROADMAP.md b/ROADMAP.md index d8ff691e..55c404e3 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,7 +1,7 @@ # ROADMAP — @git-stunts/git-warp > **Current version:** v14.0.0 -> **Last reconciled:** 2026-03-12 (main after PR #66 merge; 26 active standalone items remain after trust/serve hardening, type-surface cleanup, large-graph traversal work, test-infra extraction, the constructor-default lint cleanup, checkpoint content-anchor batching, tree-construction determinism fuzzing, CI gate dedupe, the explicit type-only export manifest split, and the merged Markdown code-sample lint gate) +> **Last reconciled:** 2026-03-13 (main after PR #67 / #68 merges; 25 active standalone items remain after trust/serve hardening, type-surface cleanup, large-graph traversal work, test-infra extraction, the constructor-default lint cleanup, checkpoint content-anchor batching, tree-construction determinism fuzzing, CI gate dedupe, the explicit type-only export manifest split, the merged Markdown code-sample lint gate, the pre-push gate regression harness, and the missing-content blob error hardening) > **Completed milestones:** [docs/ROADMAP/COMPLETED.md](docs/ROADMAP/COMPLETED.md) --- @@ -204,7 +204,7 @@ P1 is complete on `v15`: B36 and B37 landed as the shared test-foundation pass, ### P2 — CI & Tooling (one batch PR) -`B83`, `B85`, `B57`, `B86`, and `B87` are now merged on `main`. The repo now runs both markdownlint and the Markdown JS/TS code-sample linter in the CI fast gate and the local `scripts/hooks/pre-push` firewall. Remaining P2 work starts at B88. That merge also promoted one follow-up item, B168, so the local hook's gate labels and quick-mode messaging now have their own regression-coverage task. B123 is still the largest item and may need to split out if the PR gets too big. +`B83`, `B85`, `B57`, `B86`, `B87`, and `B168` are now merged on `main`. The repo now runs both markdownlint and the Markdown JS/TS code-sample linter in the CI fast gate and the local `scripts/hooks/pre-push` firewall, and the hook's gate labels/quick-mode messaging now have dedicated regression coverage. Remaining P2 work starts at B88. B123 is still the largest item and may need to split out if the PR gets too big. | ID | Item | Depends on | Effort | | ---- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------- | ------ | @@ -219,8 +219,6 @@ P1 is complete on `v15`: B36 and B37 landed as the shared test-foundation pass, | B128 | **DOCS CONSISTENCY PREFLIGHT** — automated pass in `release:preflight` verifying changelog/readme/guide updates for behavior changes in hot paths (materialize, checkpoint, sync). From BACKLOG 2026-02-28. | — | S | | B12 | **DOCS-VERSION-SYNC PRE-COMMIT CHECK** — grep version literals in .md files against `package.json` | — | S | | B43 | **VITEST EXPLICIT RUNTIME EXCLUDES** — prevent accidental local runs of Docker-only suites | — | S | -| B168 | **PRE-PUSH GATE LABEL REGRESSION TEST** — add a lightweight regression test or shared source for `scripts/hooks/pre-push` gate numbering and quick-mode messaging so local hook text cannot drift from the actual gate layout or CI ordering. From PR #66 review follow-up. | — | S | - ### P3 — Type Safety & Surface No hard dependencies. Pick up opportunistically after P2. @@ -337,9 +335,9 @@ Complete on `v15`: **B80** and **B99**. #### Wave 2: CI & Tooling (P2, one batch PR) -3. **B88, B119, B123, B128, B12, B43, B168** +3. **B88, B119, B123, B128, B12, B43** -Internal chain: **B97 already resolved** → B85 → B57. That chain is complete on `main`, and B168 remains the hook-message drift follow-up from the B87 review cycle. B123 remains the largest remaining item and may need to split out. +Internal chain: **B97 already resolved** → B85 → B57. That chain is complete on `main`, and B168 is merged as the hook-message drift follow-up from the B87 review cycle. B123 remains the largest remaining item and may need to split out. #### Wave 3: Type Surface (P3) @@ -397,8 +395,8 @@ B158 (P7) ──→ B159 (P7) CDC seek cache | **Milestone (M12)** | 18 | B66, B67, B70, B73, B75, B105–B115, B117, B118 | | **Milestone (M13)** | 1 | B116 (internal: DONE; wire-format: DEFERRED) | | **Milestone (M14)** | 16 | B130–B145 | -| **Standalone** | 26 | B12, B28, B34–B35, B43, B53, B54, B76, B79, B88, B96, B98, B102–B104, B119, B123, B127–B129, B147, B152, B155–B156, B168–B169 | -| **Standalone (done)** | 61 | B19, B22, B26, B36–B37, B44, B46, B47, B48–B52, B55, B57, B71, B72, B77, B78, B80–B87, B89–B95, B97, B99–B100, B120–B122, B124, B125, B126, B146, B148–B151, B153, B154, B157–B165, B167 | +| **Standalone** | 25 | B12, B28, B34–B35, B43, B53, B54, B76, B79, B88, B96, B98, B102–B104, B119, B123, B127–B129, B147, B152, B155–B156, B169 | +| **Standalone (done)** | 62 | B19, B22, B26, B36–B37, B44, B46, B47, B48–B52, B55, B57, B71, B72, B77, B78, B80–B87, B89–B95, B97, B99–B100, B120–B122, B124, B125, B126, B146, B148–B151, B153, B154, B157–B168 | | **Deferred** | 7 | B4, B7, B16, B20, B21, B27, B101 | | **Rejected** | 7 | B5, B6, B13, B17, B18, B25, B45 | | **Total tracked** | **146** total; 61 standalone done | | @@ -505,7 +503,7 @@ B158 (P7) ──→ B159 (P7) CDC seek cache Every milestone has a hard gate. No milestone blurs into the next. All milestones are complete: M10 → M12 → M13 (internal) → M11 → M14. M13 wire-format cutover remains deferred by ADR 3 readiness gates. -The active backlog is **26 standalone items** sorted into **8 priority tiers** (P0–P7) with **6 execution waves**. Wave 1 is complete, and Wave 2 now starts at B88 in the CI & Tooling pack, with B168 still active as the remaining hook-message drift follow-up. See [Execution Order](#execution-order) for the full sequence. +The active backlog is **25 standalone items** sorted into **8 priority tiers** (P0–P7) with **6 execution waves**. Wave 1 is complete, and Wave 2 now starts at B88 in the CI & Tooling pack. See [Execution Order](#execution-order) for the full sequence. Rejected items live in `GRAVEYARD.md`. Resurrections require an RFC. `BACKLOG.md` retired — all intake goes directly into this file (policy in `CLAUDE.md`). diff --git a/docs/ROADMAP/COMPLETED.md b/docs/ROADMAP/COMPLETED.md index 11329dd8..7b16eb7a 100644 --- a/docs/ROADMAP/COMPLETED.md +++ b/docs/ROADMAP/COMPLETED.md @@ -397,6 +397,7 @@ Investigation revealed the correct approach is a two-phase split: | B85 | ~~**TYPE-ONLY EXPORT MANIFEST SECTION**~~ — **DONE (`v15`).** Added explicit `typeExports` to `type-surface.m8.json` and taught `check-dts-surface` to fail on misplaced or duplicate entries across `exports` and `typeExports`. | | B86 | ~~**MARKDOWNLINT CI GATE**~~ — **DONE (`v15`).** Added `npm run lint:md` with focused `MD040` enforcement and wired it into CI, then labeled the repo’s bare fenced code blocks so Markdown docs/examples pass the new gate immediately. | | B87 | ~~**CODE SAMPLE LINTER**~~ — **DONE.** Added `scripts/lint-markdown-code-samples.js` and `npm run lint:md:code`, which extract fenced JavaScript/TypeScript samples from Markdown and syntax-check them with line-accurate diagnostics. Wired into the CI fast gate and local pre-push firewall alongside markdownlint, with failures for malformed mixed-marker fences and unterminated JS/TS blocks. | +| B168 | ~~**PRE-PUSH GATE LABEL REGRESSION TEST**~~ — **DONE.** Added a focused behavioral harness for `scripts/hooks/pre-push` that exercises the real shell hook with stubbed commands, proves quick mode skips Gate 8, and locks Gate 1–8 failure labels to their runtime output. | | B95 | ~~**NAMESPACE EXPORT SUPPORT**~~ — **DONE (`v15`).** The declaration surface validator now handles namespace exports in `.d.ts` parsing. | | B97 | ~~**AUDIT MANIFEST vs `index.js` DRIFT**~~ — **DONE (`v15`).** Runtime exports were reconciled with the public surface, and the surface checker now distinguishes runtime-backed vs type-only manifest entries. | | B99 | ~~**DETERMINISM FUZZER FOR TREE CONSTRUCTION**~~ — **DONE (`v15`).** Added seeded property tests that prove stable tree OIDs when `PatchBuilderV2` content anchor order is permuted internally and when `CheckpointService.createV5()` sees the same content properties in different insertion orders. | diff --git a/docs/specs/CONTENT_ATTACHMENT.md b/docs/specs/CONTENT_ATTACHMENT.md index a090e62f..2835ff01 100644 --- a/docs/specs/CONTENT_ATTACHMENT.md +++ b/docs/specs/CONTENT_ATTACHMENT.md @@ -92,11 +92,15 @@ The hybrid approach was implemented: dedicated methods that encapsulate CAS deta ```javascript const patch = await graph.createPatch(); patch.addNode('adr:0007'); -await patch.attachContent('adr:0007', '# ADR 0007\n\nDecision text...'); +await patch.attachContent('adr:0007', '# ADR 0007\n\nDecision text...', { + mime: 'text/markdown', +}); await patch.commit(); // Edge content -await patch.attachEdgeContent('a', 'b', 'rel', 'edge payload'); +await patch.attachEdgeContent('a', 'b', 'rel', 'edge payload', { + mime: 'text/plain', +}); ``` Both methods are async (they call `writeBlob()` internally) and return the builder for chaining. @@ -106,15 +110,18 @@ Both methods are async (they call `writeBlob()` internally) and return the build ```javascript const buffer = await graph.getContent('adr:0007'); // Uint8Array | null const oid = await graph.getContentOid('adr:0007'); // string | null +const meta = await graph.getContentMeta('adr:0007'); // Edge content const edgeBuf = await graph.getEdgeContent('a', 'b', 'rel'); const edgeOid = await graph.getEdgeContentOid('a', 'b', 'rel'); +const edgeMeta = await graph.getEdgeContentMeta('a', 'b', 'rel'); ``` `getContent()` returns raw `Uint8Array` bytes. Consumers wanting text should decode with `new TextDecoder().decode(buffer)`. If `_content` points at a missing blob OID, `getContent()` throws instead of silently returning empty bytes. `getEdgeContent()` has the same byte-decoding and missing-blob semantics for edge `_content` references. +`getContentMeta()` / `getEdgeContentMeta()` return `{ oid, mime, size }` when metadata exists, or `null` when no attachment exists. Historical attachments created before metadata support may still surface `mime: null` / `size: null`. #### Constant @@ -125,16 +132,15 @@ import { CONTENT_PROPERTY_KEY } from '@git-stunts/git-warp'; ### 3.4 Content Metadata -Optionally, additional system properties can store content metadata alongside the CAS reference: +git-warp stores content metadata in sibling system properties alongside the `_content` reference: | Property | Purpose | Example | |---|---|---| | `_content` | CAS blob SHA (required) | `"a1b2c3d4..."` | | `_content.size` | Byte length | `4096` | | `_content.mime` | MIME type hint | `"text/markdown"` | -| `_content.encoding` | Content encoding | `"utf-8"` | -Whether git-warp stores metadata or leaves it to consumers is an implementation decision. A minimal v1 could store only the SHA and let consumers handle metadata. +`attachContent()` / `attachEdgeContent()` always persist `_content.size` from the actual encoded byte length. If callers provide `{ mime }`, the MIME hint is stored in `_content.mime`; otherwise the metadata API returns `mime: null`. --- diff --git a/index.d.ts b/index.d.ts index 84f3ee57..d79c8793 100644 --- a/index.d.ts +++ b/index.d.ts @@ -1416,6 +1416,17 @@ export interface TemporalQuery { ): Promise; } +interface ContentAttachmentOptions { + mime?: string | null; + size?: number | null; +} + +interface ContentMeta { + oid: string; + mime: string | null; + size: number | null; +} + // ============================================================================ // PatchV2 & PatchBuilderV2 // ============================================================================ @@ -1460,9 +1471,9 @@ export class PatchBuilderV2 { /** Sets a property on an edge. */ setEdgeProperty(from: string, to: string, label: string, key: string, value: unknown): PatchBuilderV2; /** Attaches content to a node (writes blob + sets _content property). */ - attachContent(nodeId: string, content: Uint8Array | string): Promise; + attachContent(nodeId: string, content: Uint8Array | string, metadata?: ContentAttachmentOptions): Promise; /** Attaches content to an edge (writes blob + sets _content edge property). */ - attachEdgeContent(from: string, to: string, label: string, content: Uint8Array | string): Promise; + attachEdgeContent(from: string, to: string, label: string, content: Uint8Array | string, metadata?: ContentAttachmentOptions): Promise; /** Builds the PatchV2 object without committing. */ build(): PatchV2; /** Commits the patch to the graph and returns the commit SHA. */ @@ -1494,9 +1505,9 @@ export class PatchSession { /** Sets a property on an edge. */ setEdgeProperty(from: string, to: string, label: string, key: string, value: unknown): this; /** Attaches content to a node (writes blob + sets _content property). */ - attachContent(nodeId: string, content: Uint8Array | string): Promise; + attachContent(nodeId: string, content: Uint8Array | string, metadata?: ContentAttachmentOptions): Promise; /** Attaches content to an edge (writes blob + sets _content edge property). */ - attachEdgeContent(from: string, to: string, label: string, content: Uint8Array | string): Promise; + attachEdgeContent(from: string, to: string, label: string, content: Uint8Array | string, metadata?: ContentAttachmentOptions): Promise; /** Builds the PatchV2 object without committing. */ build(): PatchV2; /** Commits the patch with CAS protection. */ @@ -1808,6 +1819,11 @@ export default class WarpGraph { */ getContentOid(nodeId: string): Promise; + /** + * Gets structured content metadata for a node attachment, or null if none is attached. + */ + getContentMeta(nodeId: string): Promise; + /** * Gets the content blob for a node, or null if none is attached. * Returns raw bytes; use `new TextDecoder().decode(result)` for text. @@ -1819,6 +1835,11 @@ export default class WarpGraph { */ getEdgeContentOid(from: string, to: string, label: string): Promise; + /** + * Gets structured content metadata for an edge attachment, or null if none is attached. + */ + getEdgeContentMeta(from: string, to: string, label: string): Promise; + /** * Gets the content blob for an edge, or null if none is attached. * Returns raw bytes; use `new TextDecoder().decode(result)` for text. diff --git a/src/domain/services/KeyCodec.js b/src/domain/services/KeyCodec.js index 1d6b87e6..fbe0ff7a 100644 --- a/src/domain/services/KeyCodec.js +++ b/src/domain/services/KeyCodec.js @@ -25,6 +25,20 @@ export const EDGE_PROP_PREFIX = '\x01'; */ export const CONTENT_PROPERTY_KEY = '_content'; +/** + * Well-known property key for attached content MIME metadata. + * Stores a MIME type hint for the blob referenced by `_content`. + * @const {string} + */ +export const CONTENT_MIME_PROPERTY_KEY = '_content.mime'; + +/** + * Well-known property key for attached content byte-size metadata. + * Stores the byte length of the blob referenced by `_content`. + * @const {string} + */ +export const CONTENT_SIZE_PROPERTY_KEY = '_content.size'; + /** * Encodes an edge key to a string for Map storage. * diff --git a/src/domain/services/PatchBuilderV2.js b/src/domain/services/PatchBuilderV2.js index 5d264b6b..b7bff42a 100644 --- a/src/domain/services/PatchBuilderV2.js +++ b/src/domain/services/PatchBuilderV2.js @@ -24,7 +24,14 @@ import { createEdgePropSetV2, createPatchV2, } from '../types/WarpTypesV2.js'; -import { encodeEdgeKey, FIELD_SEPARATOR, EDGE_PROP_PREFIX, CONTENT_PROPERTY_KEY } from './KeyCodec.js'; +import { + encodeEdgeKey, + FIELD_SEPARATOR, + EDGE_PROP_PREFIX, + CONTENT_PROPERTY_KEY, + CONTENT_MIME_PROPERTY_KEY, + CONTENT_SIZE_PROPERTY_KEY, +} from './KeyCodec.js'; import { lowerCanonicalOp } from './OpNormalizer.js'; import { encodePatchMessage, decodePatchMessage, detectMessageKind } from './WarpMessageCodec.js'; import { buildWriterRef } from '../utils/RefLayout.js'; @@ -92,6 +99,60 @@ function _assertNoReservedBytes(value, label) { } } +/** + * Calculates the persisted byte length of attached content. + * + * String content is encoded as UTF-8 before hashing/storage, so metadata + * should reflect the encoded byte length rather than JavaScript code units. + * + * @param {Uint8Array|string} content + * @returns {number} + */ +function byteSizeOfContent(content) { + return typeof content === 'string' + ? new TextEncoder().encode(content).byteLength + : content.byteLength; +} + +/** + * Validates and normalizes optional content metadata for attachment APIs. + * + * Size is always persisted, either computed from the content bytes or + * validated against the provided hint when callers pass `{ size }`. + * + * @param {Uint8Array|string} content + * @param {{ mime?: string|null, size?: number|null }|undefined} metadata + * @returns {{ mime: string|null, size: number }} + */ +function normalizeContentMetadata(content, metadata) { + if (metadata !== undefined && (metadata === null || typeof metadata !== 'object' || Array.isArray(metadata))) { + throw new Error('content metadata must be an object when provided'); + } + + const actualSize = byteSizeOfContent(content); + const providedSize = metadata?.size; + if (providedSize !== undefined && providedSize !== null) { + if (!Number.isInteger(providedSize) || providedSize < 0) { + throw new Error('content metadata size must be a non-negative integer'); + } + if (providedSize !== actualSize) { + throw new Error(`content metadata size ${providedSize} does not match actual byte size ${actualSize}`); + } + } + + const providedMime = metadata?.mime; + if (providedMime !== undefined && providedMime !== null) { + if (typeof providedMime !== 'string' || providedMime.trim() === '') { + throw new Error('content metadata mime must be a non-empty string when provided'); + } + } + + return { + mime: typeof providedMime === 'string' ? providedMime : null, + size: actualSize, + }; +} + /** * Fluent builder for creating WARP v5 patches with dots and observed-remove semantics. */ @@ -532,18 +593,22 @@ export class PatchBuilderV2 { * * @param {string} nodeId - The node ID to attach content to * @param {Uint8Array|string} content - The content to attach + * @param {{ mime?: string|null, size?: number|null }} [metadata] - Optional metadata hint * @returns {Promise} This builder instance for method chaining */ - async attachContent(nodeId, content) { + async attachContent(nodeId, content, metadata = undefined) { this._assertNotCommitted(); // Validate identifiers before writing blob to avoid orphaned blobs _assertNoReservedBytes(nodeId, 'nodeId'); _assertNoReservedBytes(CONTENT_PROPERTY_KEY, 'key'); this._assertNodeExistsForContent(nodeId); + const normalizedMeta = normalizeContentMetadata(content, metadata); const oid = this._blobStorage - ? await this._blobStorage.store(content, { slug: `${this._graphName}/${nodeId}` }) + ? await this._blobStorage.store(content, { slug: `${this._graphName}/${nodeId}`, mime: normalizedMeta.mime, size: normalizedMeta.size }) : await this._persistence.writeBlob(content); this.setProperty(nodeId, CONTENT_PROPERTY_KEY, oid); + this.setProperty(nodeId, CONTENT_SIZE_PROPERTY_KEY, normalizedMeta.size); + this.setProperty(nodeId, CONTENT_MIME_PROPERTY_KEY, normalizedMeta.mime); this._contentBlobs.push(oid); return this; } @@ -556,9 +621,10 @@ export class PatchBuilderV2 { * @param {string} to - Target node ID * @param {string} label - Edge label * @param {Uint8Array|string} content - The content to attach + * @param {{ mime?: string|null, size?: number|null }} [metadata] - Optional metadata hint * @returns {Promise} This builder instance for method chaining */ - async attachEdgeContent(from, to, label, content) { + async attachEdgeContent(from, to, label, content, metadata = undefined) { this._assertNotCommitted(); // Validate identifiers before writing blob to avoid orphaned blobs _assertNoReservedBytes(from, 'from'); @@ -566,10 +632,13 @@ export class PatchBuilderV2 { _assertNoReservedBytes(label, 'label'); _assertNoReservedBytes(CONTENT_PROPERTY_KEY, 'key'); this._assertEdgeExists(from, to, label); + const normalizedMeta = normalizeContentMetadata(content, metadata); const oid = this._blobStorage - ? await this._blobStorage.store(content, { slug: `${this._graphName}/${from}/${to}/${label}` }) + ? await this._blobStorage.store(content, { slug: `${this._graphName}/${from}/${to}/${label}`, mime: normalizedMeta.mime, size: normalizedMeta.size }) : await this._persistence.writeBlob(content); this.setEdgeProperty(from, to, label, CONTENT_PROPERTY_KEY, oid); + this.setEdgeProperty(from, to, label, CONTENT_SIZE_PROPERTY_KEY, normalizedMeta.size); + this.setEdgeProperty(from, to, label, CONTENT_MIME_PROPERTY_KEY, normalizedMeta.mime); this._contentBlobs.push(oid); return this; } diff --git a/src/domain/warp/PatchSession.js b/src/domain/warp/PatchSession.js index 81e79339..bbf08bdb 100644 --- a/src/domain/warp/PatchSession.js +++ b/src/domain/warp/PatchSession.js @@ -148,12 +148,13 @@ export class PatchSession { * * @param {string} nodeId - The node ID to attach content to * @param {Uint8Array|string} content - The content to attach + * @param {{ mime?: string|null, size?: number|null }} [metadata] - Optional content metadata * @returns {Promise} This session for chaining * @throws {WriterError} SESSION_COMMITTED if already committed */ - async attachContent(nodeId, content) { + async attachContent(nodeId, content, metadata = undefined) { this._ensureNotCommitted(); - await this._builder.attachContent(nodeId, content); + await this._builder.attachContent(nodeId, content, metadata); return this; } @@ -164,13 +165,14 @@ export class PatchSession { * @param {string} to - Target node ID * @param {string} label - Edge label/type * @param {Uint8Array|string} content - The content to attach + * @param {{ mime?: string|null, size?: number|null }} [metadata] - Optional content metadata * @returns {Promise} This session for chaining * @throws {WriterError} SESSION_COMMITTED if already committed */ // eslint-disable-next-line max-params -- direct delegate matching PatchBuilderV2 signature - async attachEdgeContent(from, to, label, content) { + async attachEdgeContent(from, to, label, content, metadata = undefined) { this._ensureNotCommitted(); - await this._builder.attachEdgeContent(from, to, label, content); + await this._builder.attachEdgeContent(from, to, label, content, metadata); return this; } diff --git a/src/domain/warp/_wiredMethods.d.ts b/src/domain/warp/_wiredMethods.d.ts index cf9ad3bd..4c712bae 100644 --- a/src/domain/warp/_wiredMethods.d.ts +++ b/src/domain/warp/_wiredMethods.d.ts @@ -29,6 +29,12 @@ interface TranslationCostResult { breakdown: { nodeLoss: number; edgeLoss: number; propLoss: number }; } +interface ContentMeta { + oid: string; + mime: string | null; + size: number | null; +} + /** * Lightweight status snapshot. */ @@ -168,6 +174,8 @@ declare module '../WarpGraph.js' { hasNode(nodeId: string): Promise; getNodeProps(nodeId: string): Promise | null>; getEdgeProps(from: string, to: string, label: string): Promise | null>; + getContentMeta(nodeId: string): Promise; + getEdgeContentMeta(from: string, to: string, label: string): Promise; neighbors(nodeId: string, direction?: 'outgoing' | 'incoming' | 'both', edgeLabel?: string): Promise>; getStateSnapshot(): Promise; getNodes(): Promise; diff --git a/src/domain/warp/query.methods.js b/src/domain/warp/query.methods.js index 4be3bcbd..a31b2493 100644 --- a/src/domain/warp/query.methods.js +++ b/src/domain/warp/query.methods.js @@ -8,7 +8,16 @@ */ import { orsetContains, orsetElements } from '../crdt/ORSet.js'; -import { decodePropKey, isEdgePropKey, decodeEdgePropKey, encodeEdgeKey, decodeEdgeKey, CONTENT_PROPERTY_KEY } from '../services/KeyCodec.js'; +import { + decodePropKey, + isEdgePropKey, + decodeEdgePropKey, + encodeEdgeKey, + decodeEdgeKey, + CONTENT_PROPERTY_KEY, + CONTENT_MIME_PROPERTY_KEY, + CONTENT_SIZE_PROPERTY_KEY, +} from '../services/KeyCodec.js'; import { compareEventIds } from '../utils/EventId.js'; import { cloneStateV5 } from '../services/JoinReducer.js'; import QueryBuilder from '../services/QueryBuilder.js'; @@ -342,6 +351,36 @@ export async function translationCost(configA, configB) { return computeTranslationCost(configA, configB, s); } +/** + * Extracts structured content metadata from a property bag. + * + * Historical graphs may only have `_content`, in which case `mime` and `size` + * return as null until the content is re-attached through the metadata-aware + * APIs. + * + * @param {Record|null} props + * @returns {{ oid: string, mime: string|null, size: number|null }|null} + */ +function extractContentMeta(props) { + if (!props) { + return null; + } + const oid = props[CONTENT_PROPERTY_KEY]; + if (typeof oid !== 'string') { + return null; + } + const mimeValue = props[CONTENT_MIME_PROPERTY_KEY]; + const sizeValue = props[CONTENT_SIZE_PROPERTY_KEY]; + const size = typeof sizeValue === 'number' && Number.isInteger(sizeValue) && sizeValue >= 0 + ? sizeValue + : null; + return { + oid, + mime: typeof mimeValue === 'string' ? mimeValue : null, + size, + }; +} + /** * Gets the content blob OID for a node, or null if none is attached. * @@ -351,12 +390,21 @@ export async function translationCost(configA, configB) { * @throws {import('../errors/QueryError.js').default} If no cached state exists (code: `E_NO_STATE`) */ export async function getContentOid(nodeId) { + const meta = await getContentMeta.call(this, nodeId); + return meta?.oid ?? null; +} + +/** + * Gets structured content metadata for a node attachment, or null if none is attached. + * + * @this {import('../WarpGraph.js').default} + * @param {string} nodeId - The node ID to check + * @returns {Promise<{ oid: string, mime: string|null, size: number|null }|null>} Content metadata or null + * @throws {import('../errors/QueryError.js').default} If no cached state exists (code: `E_NO_STATE`) + */ +export async function getContentMeta(nodeId) { const props = await getNodeProps.call(this, nodeId); - if (!props) { - return null; - } - const oid = props[CONTENT_PROPERTY_KEY]; - return (typeof oid === 'string') ? oid : null; + return extractContentMeta(props); } /** @@ -374,10 +422,11 @@ export async function getContentOid(nodeId) { * blob object. */ export async function getContent(nodeId) { - const oid = await getContentOid.call(this, nodeId); - if (!oid) { + const meta = await getContentMeta.call(this, nodeId); + if (!meta) { return null; } + const { oid } = meta; if (this._blobStorage) { return await this._blobStorage.retrieve(oid); } @@ -395,13 +444,23 @@ export async function getContent(nodeId) { * @throws {import('../errors/QueryError.js').default} If no cached state exists (code: `E_NO_STATE`) */ export async function getEdgeContentOid(from, to, label) { + const meta = await getEdgeContentMeta.call(this, from, to, label); + return meta?.oid ?? null; +} + +/** + * Gets structured content metadata for an edge attachment, or null if none is attached. + * + * @this {import('../WarpGraph.js').default} + * @param {string} from - Source node ID + * @param {string} to - Target node ID + * @param {string} label - Edge label + * @returns {Promise<{ oid: string, mime: string|null, size: number|null }|null>} Content metadata or null + * @throws {import('../errors/QueryError.js').default} If no cached state exists (code: `E_NO_STATE`) + */ +export async function getEdgeContentMeta(from, to, label) { const props = await getEdgeProps.call(this, from, to, label); - if (!props) { - return null; - } - // getEdgeProps returns a plain object — use bracket access - const oid = props[CONTENT_PROPERTY_KEY]; - return (typeof oid === 'string') ? oid : null; + return extractContentMeta(props); } /** @@ -421,10 +480,11 @@ export async function getEdgeContentOid(from, to, label) { * blob object. */ export async function getEdgeContent(from, to, label) { - const oid = await getEdgeContentOid.call(this, from, to, label); - if (!oid) { + const meta = await getEdgeContentMeta.call(this, from, to, label); + if (!meta) { return null; } + const { oid } = meta; if (this._blobStorage) { return await this._blobStorage.retrieve(oid); } diff --git a/src/infrastructure/adapters/CasBlobAdapter.js b/src/infrastructure/adapters/CasBlobAdapter.js index 29ef2c6d..1d5ad2d8 100644 --- a/src/infrastructure/adapters/CasBlobAdapter.js +++ b/src/infrastructure/adapters/CasBlobAdapter.js @@ -96,7 +96,7 @@ export default class CasBlobAdapter extends BlobStoragePort { * * @override * @param {Uint8Array|string} content - * @param {{ slug?: string }} [options] + * @param {{ slug?: string, mime?: string|null, size?: number|null }} [options] * @returns {Promise} */ async store(content, options) { diff --git a/src/ports/BlobStoragePort.js b/src/ports/BlobStoragePort.js index 011795dc..cce9ca60 100644 --- a/src/ports/BlobStoragePort.js +++ b/src/ports/BlobStoragePort.js @@ -12,7 +12,7 @@ export default class BlobStoragePort { * Stores content and returns a storage identifier (e.g. CAS tree OID). * * @param {Uint8Array|string} _content - The content to store - * @param {{ slug?: string }} [_options] - Optional metadata + * @param {{ slug?: string, mime?: string|null, size?: number|null }} [_options] - Optional storage metadata * @returns {Promise} Storage identifier for retrieval * @abstract */ diff --git a/test/integration/api/content-attachment.test.js b/test/integration/api/content-attachment.test.js index a2bb79db..072f7881 100644 --- a/test/integration/api/content-attachment.test.js +++ b/test/integration/api/content-attachment.test.js @@ -45,6 +45,28 @@ describe('API: Content Attachment', () => { expect(oid.length).toBeGreaterThanOrEqual(40); }); + it('persists and reads content metadata for nodes', async () => { + const graph = await repo.openGraph('test', 'alice'); + + const patch = await graph.createPatch(); + patch.addNode('doc:1'); + await patch.attachContent('doc:1', '# Title\n', { + mime: 'text/markdown', + size: 8, + }); + await patch.commit(); + + await graph.materialize(); + const meta = await graph.getContentMeta('doc:1'); + + expect(meta).not.toBeNull(); + expect(meta).toMatchObject({ + mime: 'text/markdown', + size: 8, + }); + expect(meta?.oid).toMatch(/^[0-9a-f]+$/); + }); + it('returns null when no content attached', async () => { const graph = await repo.openGraph('test', 'alice'); @@ -82,6 +104,25 @@ describe('API: Content Attachment', () => { expect(oid).toMatch(/^[0-9a-f]+$/); }); + it('persists and reads content metadata for edges', async () => { + const graph = await repo.openGraph('test', 'alice'); + const binary = new Uint8Array([0xde, 0xad, 0xbe, 0xef]); + + const patch = await graph.createPatch(); + patch.addNode('a').addNode('b').addEdge('a', 'b', 'rel'); + await patch.attachEdgeContent('a', 'b', 'rel', binary); + await patch.commit(); + + await graph.materialize(); + const meta = await graph.getEdgeContentMeta('a', 'b', 'rel'); + + expect(meta).toEqual({ + oid: expect.stringMatching(/^[0-9a-f]+$/), + mime: null, + size: binary.byteLength, + }); + }); + it('multi-writer LWW: concurrent attachments resolve deterministically', async () => { const graph1 = await repo.openGraph('test', 'alice'); const graph2 = await repo.openGraph('test', 'bob'); diff --git a/test/type-check/consumer.ts b/test/type-check/consumer.ts index e7c44072..423931f9 100644 --- a/test/type-check/consumer.ts +++ b/test/type-check/consumer.ts @@ -266,10 +266,13 @@ const edges: Array<{ from: string; to: string; label: string; props: Record { }); }); + describe('getContentMeta()', () => { + it('returns structured metadata for a node attachment', async () => { + setupGraphState(graph, (/** @type {any} */ state) => { + addNode(state, 'doc:1', 1); + state.prop.set(encodePropKey('doc:1', '_content'), { eventId: null, value: 'abc123' }); + state.prop.set(encodePropKey('doc:1', '_content.mime'), { eventId: null, value: 'text/markdown' }); + state.prop.set(encodePropKey('doc:1', '_content.size'), { eventId: null, value: 42 }); + }); + + const meta = await graph.getContentMeta('doc:1'); + + expect(meta).toEqual({ + oid: 'abc123', + mime: 'text/markdown', + size: 42, + }); + }); + + it('returns null metadata fields when only the oid exists', async () => { + setupGraphState(graph, (/** @type {any} */ state) => { + addNode(state, 'doc:1', 1); + state.prop.set(encodePropKey('doc:1', '_content'), { eventId: null, value: 'abc123' }); + }); + + const meta = await graph.getContentMeta('doc:1'); + + expect(meta).toEqual({ + oid: 'abc123', + mime: null, + size: null, + }); + }); + + it('returns null when no content is attached', async () => { + setupGraphState(graph, (/** @type {any} */ state) => { + addNode(state, 'doc:1', 1); + }); + + expect(await graph.getContentMeta('doc:1')).toBeNull(); + }); + }); + describe('getContent()', () => { it('reads and returns the blob buffer', async () => { const buf = new TextEncoder().encode('# ADR 001\n\nSome content'); @@ -268,6 +310,46 @@ describe('WarpGraph content attachment (query methods)', () => { }); }); + describe('getEdgeContentMeta()', () => { + it('returns structured metadata for an edge attachment', async () => { + setupGraphState(graph, (/** @type {any} */ state) => { + addNode(state, 'a', 1); + addNode(state, 'b', 2); + addEdge(state, 'a', 'b', 'rel', 3); + state.prop.set(encodeEdgePropKey('a', 'b', 'rel', '_content'), { + eventId: { lamport: 2, writerId: 'w1', patchSha: 'aabbccdd', opIndex: 0 }, + value: 'def456', + }); + state.prop.set(encodeEdgePropKey('a', 'b', 'rel', '_content.mime'), { + eventId: { lamport: 2, writerId: 'w1', patchSha: 'aabbccdd', opIndex: 1 }, + value: 'application/octet-stream', + }); + state.prop.set(encodeEdgePropKey('a', 'b', 'rel', '_content.size'), { + eventId: { lamport: 2, writerId: 'w1', patchSha: 'aabbccdd', opIndex: 2 }, + value: 6, + }); + }); + + const meta = await graph.getEdgeContentMeta('a', 'b', 'rel'); + + expect(meta).toEqual({ + oid: 'def456', + mime: 'application/octet-stream', + size: 6, + }); + }); + + it('returns null when no edge content is attached', async () => { + setupGraphState(graph, (/** @type {any} */ state) => { + addNode(state, 'a', 1); + addNode(state, 'b', 2); + addEdge(state, 'a', 'b', 'rel', 3); + }); + + expect(await graph.getEdgeContentMeta('a', 'b', 'rel')).toBeNull(); + }); + }); + describe('getEdgeContent()', () => { it('reads and returns the blob buffer', async () => { const buf = new TextEncoder().encode('edge content'); diff --git a/test/unit/domain/__snapshots__/WarpGraph.apiSurface.test.js.snap b/test/unit/domain/__snapshots__/WarpGraph.apiSurface.test.js.snap index 56560f9e..3c86e445 100644 --- a/test/unit/domain/__snapshots__/WarpGraph.apiSurface.test.js.snap +++ b/test/unit/domain/__snapshots__/WarpGraph.apiSurface.test.js.snap @@ -202,6 +202,11 @@ exports[`WarpGraph API surface > all prototype methods have correct property des "enumerable": false, "type": "method", }, + "getContentMeta": { + "configurable": true, + "enumerable": false, + "type": "method", + }, "getContentOid": { "configurable": true, "enumerable": false, @@ -212,6 +217,11 @@ exports[`WarpGraph API surface > all prototype methods have correct property des "enumerable": false, "type": "method", }, + "getEdgeContentMeta": { + "configurable": true, + "enumerable": false, + "type": "method", + }, "getEdgeContentOid": { "configurable": true, "enumerable": false, @@ -440,7 +450,7 @@ exports[`WarpGraph API surface > all prototype methods have correct property des } `; -exports[`WarpGraph API surface > prototype method count matches snapshot 1`] = `87`; +exports[`WarpGraph API surface > prototype method count matches snapshot 1`] = `89`; exports[`WarpGraph API surface > prototype methods match snapshot 1`] = ` [ @@ -484,8 +494,10 @@ exports[`WarpGraph API surface > prototype methods match snapshot 1`] = ` "fork", "gcPolicy", "getContent", + "getContentMeta", "getContentOid", "getEdgeContent", + "getEdgeContentMeta", "getEdgeContentOid", "getEdgeProps", "getEdges", diff --git a/test/unit/domain/services/PatchBuilderV2.content.test.js b/test/unit/domain/services/PatchBuilderV2.content.test.js index 9219e060..45c46bb1 100644 --- a/test/unit/domain/services/PatchBuilderV2.content.test.js +++ b/test/unit/domain/services/PatchBuilderV2.content.test.js @@ -37,7 +37,7 @@ function createMockState() { describe('PatchBuilderV2 content attachment', () => { describe('attachContent()', () => { - it('writes blob and sets _content property', async () => { + it('writes blob and sets content reference metadata properties', async () => { const state = createMockState(); orsetAdd(state.nodeAlive, 'node:1', createDot('w1', 1)); const persistence = createMockPersistence({ @@ -55,13 +55,55 @@ describe('PatchBuilderV2 content attachment', () => { expect(persistence.writeBlob).toHaveBeenCalledWith('hello world'); const patch = builder.build(); - expect(patch.ops).toHaveLength(1); - expect(patch.ops[0]).toMatchObject({ + expect(patch.ops).toHaveLength(3); + expect(patch.ops).toContainEqual(expect.objectContaining({ type: 'PropSet', node: 'node:1', key: '_content', value: 'abc123', + })); + expect(patch.ops).toContainEqual(expect.objectContaining({ + type: 'PropSet', + node: 'node:1', + key: '_content.size', + value: 11, + })); + expect(patch.ops).toContainEqual(expect.objectContaining({ + type: 'PropSet', + node: 'node:1', + key: '_content.mime', + value: null, + })); + }); + + it('accepts optional content metadata and persists it alongside the blob oid', async () => { + const state = createMockState(); + orsetAdd(state.nodeAlive, 'node:1', createDot('w1', 1)); + const persistence = createMockPersistence({ + writeBlob: vi.fn().mockResolvedValue('abc123'), }); + const builder = new PatchBuilderV2(/** @type {any} */ ({ + persistence, + writerId: 'w1', + lamport: 1, + versionVector: createVersionVector(), + getCurrentState: () => state, + })); + + await builder.attachContent('node:1', 'hello world', { + mime: 'text/plain', + size: 11, + }); + + const patch = builder.build(); + expect(patch.ops).toContainEqual(expect.objectContaining({ + key: '_content.mime', + value: 'text/plain', + })); + expect(patch.ops).toContainEqual(expect.objectContaining({ + key: '_content.size', + value: 11, + })); }); it('tracks blob OID in _contentBlobs', async () => { @@ -131,10 +173,28 @@ describe('PatchBuilderV2 content attachment', () => { expect(persistence.writeBlob).not.toHaveBeenCalled(); expect(builder._contentBlobs).toEqual([]); }); + + it('rejects mismatched size metadata before writing the blob', async () => { + const state = createMockState(); + orsetAdd(state.nodeAlive, 'node:1', createDot('w1', 1)); + const persistence = createMockPersistence(); + const builder = new PatchBuilderV2(/** @type {any} */ ({ + persistence, + writerId: 'w1', + lamport: 1, + versionVector: createVersionVector(), + getCurrentState: () => state, + })); + + await expect(builder.attachContent('node:1', 'hello', { size: 9 })) + .rejects.toThrow('content metadata size 9 does not match actual byte size 5'); + expect(persistence.writeBlob).not.toHaveBeenCalled(); + expect(builder._contentBlobs).toEqual([]); + }); }); describe('attachEdgeContent()', () => { - it('writes blob and sets _content edge property', async () => { + it('writes blob and sets content reference metadata on the edge', async () => { const state = createMockState(); const edgeKey = encodeEdgeKey('a', 'b', 'rel'); orsetAdd(state.edgeAlive, edgeKey, createDot('w1', 1)); @@ -154,12 +214,22 @@ describe('PatchBuilderV2 content attachment', () => { expect(persistence.writeBlob).toHaveBeenCalledWith(Buffer.from('binary')); const patch = builder.build(); - expect(patch.ops).toHaveLength(1); - expect(patch.ops[0]).toMatchObject({ + expect(patch.ops).toHaveLength(3); + expect(patch.ops).toContainEqual(expect.objectContaining({ type: 'PropSet', key: '_content', value: 'def456', - }); + })); + expect(patch.ops).toContainEqual(expect.objectContaining({ + type: 'PropSet', + key: '_content.size', + value: 6, + })); + expect(patch.ops).toContainEqual(expect.objectContaining({ + type: 'PropSet', + key: '_content.mime', + value: null, + })); // Schema should be 3 (edge properties present) expect(patch.schema).toBe(3); }); @@ -270,15 +340,19 @@ describe('PatchBuilderV2 content attachment', () => { await builder.attachContent('node:1', 'hello world'); - expect(blobStorage.store).toHaveBeenCalledWith('hello world', { slug: 'g/node:1' }); + expect(blobStorage.store).toHaveBeenCalledWith('hello world', { + slug: 'g/node:1', + mime: null, + size: 11, + }); expect(persistence.writeBlob).not.toHaveBeenCalled(); const patch = builder.build(); - expect(patch.ops[0]).toMatchObject({ + expect(patch.ops).toContainEqual(expect.objectContaining({ type: 'PropSet', node: 'node:1', key: '_content', value: 'cas-tree-oid', - }); + })); }); it('falls back to persistence.writeBlob() when blobStorage is not provided', async () => { @@ -323,7 +397,11 @@ describe('PatchBuilderV2 content attachment', () => { await builder.attachEdgeContent('a', 'b', 'rel', 'edge-data'); - expect(blobStorage.store).toHaveBeenCalledWith('edge-data', { slug: 'g/a/b/rel' }); + expect(blobStorage.store).toHaveBeenCalledWith('edge-data', { + slug: 'g/a/b/rel', + mime: null, + size: 9, + }); expect(persistence.writeBlob).not.toHaveBeenCalled(); }); }); From 43bfe8cae003965863533d97bda86721a0d12b33 Mon Sep 17 00:00:00 2001 From: James Ross Date: Fri, 13 Mar 2026 19:38:42 -0700 Subject: [PATCH 2/4] fix: preserve content metadata attachment lineage --- CHANGELOG.md | 2 +- README.md | 4 +- ROADMAP.md | 2 +- docs/specs/CONTENT_ATTACHMENT.md | 8 +- src/domain/services/KeyCodec.js | 4 +- src/domain/warp/query.methods.js | 176 +++++++++++++++--- .../api/content-attachment.test.js | 12 ++ test/unit/domain/WarpGraph.content.test.js | 75 +++++++- 8 files changed, 238 insertions(+), 45 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3e7b917..08345b6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- **Content attachment metadata API** — `attachContent()` and `attachEdgeContent()` now accept optional `{ mime, size }` metadata hints, persist byte size alongside the `_content` OID, and expose `getContentMeta()` / `getEdgeContentMeta()` for structured `{ oid, mime, size }` reads without manual `_content.*` property handling. +- **Content attachment metadata API** — `attachContent()` and `attachEdgeContent()` now accept optional `{ mime, size }` metadata hints, persist logical content byte size alongside the `_content` OID, and expose `getContentMeta()` / `getEdgeContentMeta()` for structured `{ oid, mime, size }` reads without manual `_content.*` property handling. Metadata reads stay aligned with the current `_content` attachment instead of inheriting stale sibling props from later manual rewrites. - **Streaming transitive closure traversal** — Added `transitiveClosureStream()` to the traversal stack so callers can consume reachability edges lazily as an `AsyncGenerator<{ from, to }>` without materializing the full closure array. The existing `transitiveClosure()` API remains and now collects from the stream for backward compatibility. - **First-class sync trust configuration** — `WarpGraph.open({ trust })` and `graph.syncWith(..., { trust })` now expose an explicit public trust-config surface for sync evaluation instead of relying on hidden controller wiring alone. - **Fluent `WarpStateV5` test builder** — Added `createStateBuilder()` in `test/helpers/stateBuilder.js` so state-heavy tests can seed nodes, edges, removals, properties, frontier state, and graph materialization through one fluent helper instead of ad hoc OR-Set/LWW mutation. diff --git a/README.md b/README.md index 1fe88272..27bf9a19 100644 --- a/README.md +++ b/README.md @@ -467,7 +467,7 @@ await patch.commit(); const buffer = await graph.getContent('adr:0007'); // Uint8Array | null const oid = await graph.getContentOid('adr:0007'); // hex SHA or null const meta = await graph.getContentMeta('adr:0007'); -// { oid: 'abc123...', mime: 'text/markdown', size: 26 } +// { oid: 'abc123...', mime: 'text/markdown', size: 28 } // Edge content works the same way (assumes nodes and edge already exist) const patch2 = await graph.createPatch(); @@ -479,7 +479,7 @@ const edgeBuf = await graph.getEdgeContent('a', 'b', 'rel'); const edgeMeta = await graph.getEdgeContentMeta('a', 'b', 'rel'); ``` -Content blobs survive `git gc` — their OIDs are embedded in the patch commit tree and checkpoint tree, keeping them reachable. `attachContent()` / `attachEdgeContent()` also persist byte-size metadata automatically and will store a MIME hint when provided. Historical attachments created before metadata support may still return `mime: null` / `size: null` from the metadata APIs until they are rewritten. If a live `_content` reference points at a missing blob anyway (for example due to manual corruption), `getContent()` / `getEdgeContent()` throw instead of silently returning empty bytes. +Content blobs survive `git gc` — their OIDs are embedded in the patch commit tree and checkpoint tree, keeping them reachable. `attachContent()` / `attachEdgeContent()` also persist logical content byte-size metadata automatically and will store a MIME hint when provided. Historical attachments created before metadata support, or later manual `_content` rewrites that bypass the attachment helpers, may still return `mime: null` / `size: null` from the metadata APIs until they are re-attached through the metadata-aware APIs. If a live `_content` reference points at a missing blob anyway (for example due to manual corruption), `getContent()` / `getEdgeContent()` throw instead of silently returning empty bytes. ### Writer API diff --git a/ROADMAP.md b/ROADMAP.md index 55c404e3..2563bdc0 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -399,7 +399,7 @@ B158 (P7) ──→ B159 (P7) CDC seek cache | **Standalone (done)** | 62 | B19, B22, B26, B36–B37, B44, B46, B47, B48–B52, B55, B57, B71, B72, B77, B78, B80–B87, B89–B95, B97, B99–B100, B120–B122, B124, B125, B126, B146, B148–B151, B153, B154, B157–B168 | | **Deferred** | 7 | B4, B7, B16, B20, B21, B27, B101 | | **Rejected** | 7 | B5, B6, B13, B17, B18, B25, B45 | -| **Total tracked** | **146** total; 61 standalone done | | +| **Total tracked** | **146** total; 62 standalone done | | ### STANK.md Cross-Reference diff --git a/docs/specs/CONTENT_ATTACHMENT.md b/docs/specs/CONTENT_ATTACHMENT.md index 2835ff01..c98d5fae 100644 --- a/docs/specs/CONTENT_ATTACHMENT.md +++ b/docs/specs/CONTENT_ATTACHMENT.md @@ -121,7 +121,7 @@ const edgeMeta = await graph.getEdgeContentMeta('a', 'b', 'rel'); `getContent()` returns raw `Uint8Array` bytes. Consumers wanting text should decode with `new TextDecoder().decode(buffer)`. If `_content` points at a missing blob OID, `getContent()` throws instead of silently returning empty bytes. `getEdgeContent()` has the same byte-decoding and missing-blob semantics for edge `_content` references. -`getContentMeta()` / `getEdgeContentMeta()` return `{ oid, mime, size }` when metadata exists, or `null` when no attachment exists. Historical attachments created before metadata support may still surface `mime: null` / `size: null`. +`getContentMeta()` / `getEdgeContentMeta()` return `{ oid, mime, size }` when metadata exists, or `null` when no attachment exists. Historical attachments created before metadata support, or later manual `_content` rewrites that bypass the attachment helpers, may still surface `mime: null` / `size: null`. #### Constant @@ -132,15 +132,15 @@ import { CONTENT_PROPERTY_KEY } from '@git-stunts/git-warp'; ### 3.4 Content Metadata -git-warp stores content metadata in sibling system properties alongside the `_content` reference: +git-warp stores logical attachment metadata in sibling system properties alongside the `_content` reference: | Property | Purpose | Example | |---|---|---| | `_content` | CAS blob SHA (required) | `"a1b2c3d4..."` | -| `_content.size` | Byte length | `4096` | +| `_content.size` | Logical content byte length | `4096` | | `_content.mime` | MIME type hint | `"text/markdown"` | -`attachContent()` / `attachEdgeContent()` always persist `_content.size` from the actual encoded byte length. If callers provide `{ mime }`, the MIME hint is stored in `_content.mime`; otherwise the metadata API returns `mime: null`. +`attachContent()` / `attachEdgeContent()` always persist `_content.size` from the actual encoded byte length. If callers provide `{ mime }`, the MIME hint is stored in `_content.mime`; otherwise the metadata API returns `mime: null`. The read APIs only surface `_content.mime` / `_content.size` when they belong to the current `_content` attachment lineage, so a later manual `_content` rewrite does not inherit stale metadata from an older blob reference. --- diff --git a/src/domain/services/KeyCodec.js b/src/domain/services/KeyCodec.js index fbe0ff7a..03ed030f 100644 --- a/src/domain/services/KeyCodec.js +++ b/src/domain/services/KeyCodec.js @@ -27,14 +27,14 @@ export const CONTENT_PROPERTY_KEY = '_content'; /** * Well-known property key for attached content MIME metadata. - * Stores a MIME type hint for the blob referenced by `_content`. + * Stores a MIME type hint for the attached logical content referenced by `_content`. * @const {string} */ export const CONTENT_MIME_PROPERTY_KEY = '_content.mime'; /** * Well-known property key for attached content byte-size metadata. - * Stores the byte length of the blob referenced by `_content`. + * Stores the byte length of the attached logical content referenced by `_content`. * @const {string} */ export const CONTENT_SIZE_PROPERTY_KEY = '_content.size'; diff --git a/src/domain/warp/query.methods.js b/src/domain/warp/query.methods.js index a31b2493..4c717b28 100644 --- a/src/domain/warp/query.methods.js +++ b/src/domain/warp/query.methods.js @@ -10,8 +10,10 @@ import { orsetContains, orsetElements } from '../crdt/ORSet.js'; import { decodePropKey, + encodePropKey, isEdgePropKey, decodeEdgePropKey, + encodeEdgePropKey, encodeEdgeKey, decodeEdgeKey, CONTENT_PROPERTY_KEY, @@ -352,30 +354,130 @@ export async function translationCost(configA, configB) { } /** - * Extracts structured content metadata from a property bag. + * Returns true when two registers were written in the same patch lineage. * - * Historical graphs may only have `_content`, in which case `mime` and `size` - * return as null until the content is re-attached through the metadata-aware - * APIs. + * Content metadata is stored in sibling properties, so the read path only + * treats `_content.mime` / `_content.size` as current when they come from the + * same patch as the live `_content` reference. This prevents stale metadata + * from surviving a later manual `_content` rewrite. * - * @param {Record|null} props - * @returns {{ oid: string, mime: string|null, size: number|null }|null} + * @param {import('../utils/EventId.js').EventId|null|undefined} contentEventId + * @param {import('../utils/EventId.js').EventId|null|undefined} candidateEventId + * @returns {boolean} + */ +function isSameAttachmentLineage(contentEventId, candidateEventId) { + return Boolean( + contentEventId + && candidateEventId + && contentEventId.lamport === candidateEventId.lamport + && contentEventId.writerId === candidateEventId.writerId + && contentEventId.patchSha === candidateEventId.patchSha + ); +} + +/** + * Filters an edge-property register against the edge birth event. + * + * @param {{ eventId: import('../utils/EventId.js').EventId|null, value: unknown }|undefined} register + * @param {import('../utils/EventId.js').EventId|undefined} birthEvent + * @returns {{ eventId: import('../utils/EventId.js').EventId|null, value: unknown }|null} + */ +function visibleEdgeRegister(register, birthEvent) { + if (!register) { + return null; + } + if (birthEvent && register.eventId && compareEventIds(register.eventId, birthEvent) < 0) { + return null; + } + return register; +} + +/** + * Looks up the current node attachment registers directly from materialized state. + * + * @param {import('../services/JoinReducer.js').WarpStateV5} state + * @param {string} nodeId + * @returns {{ contentRegister: { eventId: import('../utils/EventId.js').EventId|null, value: string }, mimeRegister: { eventId: import('../utils/EventId.js').EventId|null, value: unknown }|null, sizeRegister: { eventId: import('../utils/EventId.js').EventId|null, value: unknown }|null }|null} + */ +function getNodeContentRegisters(state, nodeId) { + if (!orsetContains(state.nodeAlive, nodeId)) { + return null; + } + const contentRegister = state.prop.get(encodePropKey(nodeId, CONTENT_PROPERTY_KEY)); + if (!contentRegister || typeof contentRegister.value !== 'string') { + return null; + } + return { + contentRegister: /** @type {{ eventId: import('../utils/EventId.js').EventId|null, value: string }} */ (contentRegister), + mimeRegister: state.prop.get(encodePropKey(nodeId, CONTENT_MIME_PROPERTY_KEY)) || null, + sizeRegister: state.prop.get(encodePropKey(nodeId, CONTENT_SIZE_PROPERTY_KEY)) || null, + }; +} + +/** + * Looks up the current edge attachment registers directly from materialized state. + * + * @param {import('../services/JoinReducer.js').WarpStateV5} state + * @param {string} from + * @param {string} to + * @param {string} label + * @returns {{ contentRegister: { eventId: import('../utils/EventId.js').EventId|null, value: string }, mimeRegister: { eventId: import('../utils/EventId.js').EventId|null, value: unknown }|null, sizeRegister: { eventId: import('../utils/EventId.js').EventId|null, value: unknown }|null }|null} */ -function extractContentMeta(props) { - if (!props) { +function getEdgeContentRegisters(state, from, to, label) { + const edgeKey = encodeEdgeKey(from, to, label); + if (!orsetContains(state.edgeAlive, edgeKey)) { + return null; + } + if (!orsetContains(state.nodeAlive, from) || !orsetContains(state.nodeAlive, to)) { return null; } - const oid = props[CONTENT_PROPERTY_KEY]; - if (typeof oid !== 'string') { + const birthEvent = state.edgeBirthEvent?.get(edgeKey); + const contentRegister = visibleEdgeRegister( + state.prop.get(encodeEdgePropKey(from, to, label, CONTENT_PROPERTY_KEY)), + birthEvent, + ); + if (!contentRegister || typeof contentRegister.value !== 'string') { return null; } - const mimeValue = props[CONTENT_MIME_PROPERTY_KEY]; - const sizeValue = props[CONTENT_SIZE_PROPERTY_KEY]; - const size = typeof sizeValue === 'number' && Number.isInteger(sizeValue) && sizeValue >= 0 - ? sizeValue + return { + contentRegister: /** @type {{ eventId: import('../utils/EventId.js').EventId|null, value: string }} */ (contentRegister), + mimeRegister: visibleEdgeRegister( + state.prop.get(encodeEdgePropKey(from, to, label, CONTENT_MIME_PROPERTY_KEY)), + birthEvent, + ), + sizeRegister: visibleEdgeRegister( + state.prop.get(encodeEdgePropKey(from, to, label, CONTENT_SIZE_PROPERTY_KEY)), + birthEvent, + ), + }; +} + +/** + * Extracts structured content metadata from a property bag. + * + * Historical graphs may only have `_content`, and manual `_content` rewrites + * can outlive older sibling metadata fields. In those cases `mime` and `size` + * return as null until the content is re-attached through the metadata-aware + * APIs. + * + * @param {{ eventId: import('../utils/EventId.js').EventId|null, value: string }} contentRegister + * @param {{ eventId: import('../utils/EventId.js').EventId|null, value: unknown }|null} mimeRegister + * @param {{ eventId: import('../utils/EventId.js').EventId|null, value: unknown }|null} sizeRegister + * @returns {{ oid: string, mime: string|null, size: number|null }|null} + */ +function extractContentMeta(contentRegister, mimeRegister, sizeRegister) { + const sizeValue = isSameAttachmentLineage(contentRegister.eventId, sizeRegister?.eventId) + ? sizeRegister?.value : null; + const mimeValue = isSameAttachmentLineage(contentRegister.eventId, mimeRegister?.eventId) + ? mimeRegister?.value + : null; + const size = + typeof sizeValue === 'number' && Number.isInteger(sizeValue) && sizeValue >= 0 + ? sizeValue + : null; return { - oid, + oid: contentRegister.value, mime: typeof mimeValue === 'string' ? mimeValue : null, size, }; @@ -390,8 +492,10 @@ function extractContentMeta(props) { * @throws {import('../errors/QueryError.js').default} If no cached state exists (code: `E_NO_STATE`) */ export async function getContentOid(nodeId) { - const meta = await getContentMeta.call(this, nodeId); - return meta?.oid ?? null; + await this._ensureFreshState(); + const s = /** @type {import('../services/JoinReducer.js').WarpStateV5} */ (this._cachedState); + const registers = getNodeContentRegisters(s, nodeId); + return registers?.contentRegister.value ?? null; } /** @@ -403,8 +507,12 @@ export async function getContentOid(nodeId) { * @throws {import('../errors/QueryError.js').default} If no cached state exists (code: `E_NO_STATE`) */ export async function getContentMeta(nodeId) { - const props = await getNodeProps.call(this, nodeId); - return extractContentMeta(props); + await this._ensureFreshState(); + const s = /** @type {import('../services/JoinReducer.js').WarpStateV5} */ (this._cachedState); + const registers = getNodeContentRegisters(s, nodeId); + return registers + ? extractContentMeta(registers.contentRegister, registers.mimeRegister, registers.sizeRegister) + : null; } /** @@ -422,11 +530,13 @@ export async function getContentMeta(nodeId) { * blob object. */ export async function getContent(nodeId) { - const meta = await getContentMeta.call(this, nodeId); - if (!meta) { + await this._ensureFreshState(); + const s = /** @type {import('../services/JoinReducer.js').WarpStateV5} */ (this._cachedState); + const registers = getNodeContentRegisters(s, nodeId); + if (!registers) { return null; } - const { oid } = meta; + const { value: oid } = registers.contentRegister; if (this._blobStorage) { return await this._blobStorage.retrieve(oid); } @@ -444,8 +554,10 @@ export async function getContent(nodeId) { * @throws {import('../errors/QueryError.js').default} If no cached state exists (code: `E_NO_STATE`) */ export async function getEdgeContentOid(from, to, label) { - const meta = await getEdgeContentMeta.call(this, from, to, label); - return meta?.oid ?? null; + await this._ensureFreshState(); + const s = /** @type {import('../services/JoinReducer.js').WarpStateV5} */ (this._cachedState); + const registers = getEdgeContentRegisters(s, from, to, label); + return registers?.contentRegister.value ?? null; } /** @@ -459,8 +571,12 @@ export async function getEdgeContentOid(from, to, label) { * @throws {import('../errors/QueryError.js').default} If no cached state exists (code: `E_NO_STATE`) */ export async function getEdgeContentMeta(from, to, label) { - const props = await getEdgeProps.call(this, from, to, label); - return extractContentMeta(props); + await this._ensureFreshState(); + const s = /** @type {import('../services/JoinReducer.js').WarpStateV5} */ (this._cachedState); + const registers = getEdgeContentRegisters(s, from, to, label); + return registers + ? extractContentMeta(registers.contentRegister, registers.mimeRegister, registers.sizeRegister) + : null; } /** @@ -480,11 +596,13 @@ export async function getEdgeContentMeta(from, to, label) { * blob object. */ export async function getEdgeContent(from, to, label) { - const meta = await getEdgeContentMeta.call(this, from, to, label); - if (!meta) { + await this._ensureFreshState(); + const s = /** @type {import('../services/JoinReducer.js').WarpStateV5} */ (this._cachedState); + const registers = getEdgeContentRegisters(s, from, to, label); + if (!registers) { return null; } - const { oid } = meta; + const { value: oid } = registers.contentRegister; if (this._blobStorage) { return await this._blobStorage.retrieve(oid); } diff --git a/test/integration/api/content-attachment.test.js b/test/integration/api/content-attachment.test.js index 072f7881..2f956281 100644 --- a/test/integration/api/content-attachment.test.js +++ b/test/integration/api/content-attachment.test.js @@ -282,6 +282,12 @@ describe('API: Content Attachment', () => { await graph.materialize(); + await expect(graph.getContentMeta('doc:1')).resolves.toEqual({ + oid: 'deadbeefdeadbeefdeadbeefdeadbeefdeadbeef', + mime: null, + size: null, + }); + await expect(graph.getContent('doc:1')) .rejects.toMatchObject({ code: PersistenceError.E_MISSING_OBJECT }); }); @@ -302,6 +308,12 @@ describe('API: Content Attachment', () => { await graph.materialize(); + await expect(graph.getEdgeContentMeta('a', 'b', 'rel')).resolves.toEqual({ + oid: 'deadbeefdeadbeefdeadbeefdeadbeefdeadbeef', + mime: null, + size: null, + }); + await expect(graph.getEdgeContent('a', 'b', 'rel')) .rejects.toMatchObject({ code: PersistenceError.E_MISSING_OBJECT }); }); diff --git a/test/unit/domain/WarpGraph.content.test.js b/test/unit/domain/WarpGraph.content.test.js index 8963c742..865f39c0 100644 --- a/test/unit/domain/WarpGraph.content.test.js +++ b/test/unit/domain/WarpGraph.content.test.js @@ -23,6 +23,15 @@ function addEdge(/** @type {any} */ state, /** @type {any} */ from, /** @type {a state.edgeBirthEvent.set(edgeKey, { lamport: 1, writerId: 'w1', patchSha: 'aabbccdd', opIndex: 0 }); } +function attachmentEvent( + /** @type {number} */ opIndex, + /** @type {string} */ patchSha = 'aabbccdd', + /** @type {number} */ lamport = 2, + /** @type {string} */ writerId = 'w1', +) { + return { lamport, writerId, patchSha, opIndex }; +} + describe('WarpGraph content attachment (query methods)', () => { /** @type {any} */ let mockPersistence; @@ -90,9 +99,9 @@ describe('WarpGraph content attachment (query methods)', () => { it('returns structured metadata for a node attachment', async () => { setupGraphState(graph, (/** @type {any} */ state) => { addNode(state, 'doc:1', 1); - state.prop.set(encodePropKey('doc:1', '_content'), { eventId: null, value: 'abc123' }); - state.prop.set(encodePropKey('doc:1', '_content.mime'), { eventId: null, value: 'text/markdown' }); - state.prop.set(encodePropKey('doc:1', '_content.size'), { eventId: null, value: 42 }); + state.prop.set(encodePropKey('doc:1', '_content'), { eventId: attachmentEvent(0), value: 'abc123' }); + state.prop.set(encodePropKey('doc:1', '_content.mime'), { eventId: attachmentEvent(1), value: 'text/markdown' }); + state.prop.set(encodePropKey('doc:1', '_content.size'), { eventId: attachmentEvent(2), value: 42 }); }); const meta = await graph.getContentMeta('doc:1'); @@ -104,6 +113,32 @@ describe('WarpGraph content attachment (query methods)', () => { }); }); + it('ignores stale metadata when _content is rewritten later', async () => { + setupGraphState(graph, (/** @type {any} */ state) => { + addNode(state, 'doc:1', 1); + state.prop.set(encodePropKey('doc:1', '_content'), { + eventId: attachmentEvent(0, 'feedbabe', 3), + value: 'new456', + }); + state.prop.set(encodePropKey('doc:1', '_content.mime'), { + eventId: attachmentEvent(1, 'aabbccdd', 2), + value: 'text/markdown', + }); + state.prop.set(encodePropKey('doc:1', '_content.size'), { + eventId: attachmentEvent(2, 'aabbccdd', 2), + value: 42, + }); + }); + + const meta = await graph.getContentMeta('doc:1'); + + expect(meta).toEqual({ + oid: 'new456', + mime: null, + size: null, + }); + }); + it('returns null metadata fields when only the oid exists', async () => { setupGraphState(graph, (/** @type {any} */ state) => { addNode(state, 'doc:1', 1); @@ -317,15 +352,15 @@ describe('WarpGraph content attachment (query methods)', () => { addNode(state, 'b', 2); addEdge(state, 'a', 'b', 'rel', 3); state.prop.set(encodeEdgePropKey('a', 'b', 'rel', '_content'), { - eventId: { lamport: 2, writerId: 'w1', patchSha: 'aabbccdd', opIndex: 0 }, + eventId: attachmentEvent(0), value: 'def456', }); state.prop.set(encodeEdgePropKey('a', 'b', 'rel', '_content.mime'), { - eventId: { lamport: 2, writerId: 'w1', patchSha: 'aabbccdd', opIndex: 1 }, + eventId: attachmentEvent(1), value: 'application/octet-stream', }); state.prop.set(encodeEdgePropKey('a', 'b', 'rel', '_content.size'), { - eventId: { lamport: 2, writerId: 'w1', patchSha: 'aabbccdd', opIndex: 2 }, + eventId: attachmentEvent(2), value: 6, }); }); @@ -339,6 +374,34 @@ describe('WarpGraph content attachment (query methods)', () => { }); }); + it('ignores stale edge metadata when _content is rewritten later', async () => { + setupGraphState(graph, (/** @type {any} */ state) => { + addNode(state, 'a', 1); + addNode(state, 'b', 2); + addEdge(state, 'a', 'b', 'rel', 3); + state.prop.set(encodeEdgePropKey('a', 'b', 'rel', '_content'), { + eventId: attachmentEvent(0, 'feedbabe', 3), + value: 'new-edge-oid', + }); + state.prop.set(encodeEdgePropKey('a', 'b', 'rel', '_content.mime'), { + eventId: attachmentEvent(1), + value: 'application/octet-stream', + }); + state.prop.set(encodeEdgePropKey('a', 'b', 'rel', '_content.size'), { + eventId: attachmentEvent(2), + value: 6, + }); + }); + + const meta = await graph.getEdgeContentMeta('a', 'b', 'rel'); + + expect(meta).toEqual({ + oid: 'new-edge-oid', + mime: null, + size: null, + }); + }); + it('returns null when no edge content is attached', async () => { setupGraphState(graph, (/** @type {any} */ state) => { addNode(state, 'a', 1); From bf13805c8b503bff5f10dfb28fe7dfe97bd9f1da Mon Sep 17 00:00:00 2001 From: James Ross Date: Fri, 13 Mar 2026 19:44:19 -0700 Subject: [PATCH 3/4] fix: tighten content metadata type surface --- CHANGELOG.md | 1 + ROADMAP.md | 2 +- contracts/type-surface.m8.json | 6 ++++++ index.d.ts | 4 ++-- src/infrastructure/adapters/CasBlobAdapter.js | 4 ++++ test/type-check/consumer.ts | 9 ++++++--- 6 files changed, 20 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 08345b6a..007a2250 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- **Content metadata review follow-ups** — `ContentMeta` and `ContentAttachmentOptions` are now exported as public type-only symbols, the consumer smoke test imports them directly, and the git-cas adapter docs now explicitly note that MIME/size hints are accepted for CRDT metadata but are not embedded in CAS manifests. - **Roadmap reconciled after PR #67 / #68 merges** — `ROADMAP.md` and `docs/ROADMAP/COMPLETED.md` now reflect the merged pre-push gate regression work (`B168`) and the current `main` baseline before the issue-45 slice branches off. - **Large-graph traversal memory profile** — `topologicalSort()` now has a lightweight mode that avoids retaining discovery adjacency when callers do not need it. `levels()` and `transitiveReduction()` were refactored to re-fetch neighbors on demand instead of pinning full topo adjacency in memory, reducing steady-state large-graph working sets. - **Roadmap reconciled after B87 merge** — `ROADMAP.md` now treats the Markdown code-sample linter as merged work on `main`, advances the CI/tooling wave to start at `B88`, and records the follow-up backlog items for pre-push gate-message regression coverage (`B168`) and archived-doc status guardrails (`B169`). diff --git a/ROADMAP.md b/ROADMAP.md index 2563bdc0..19d3bcfc 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -173,7 +173,7 @@ Archived to [COMPLETED.md](docs/ROADMAP/COMPLETED.md#milestone-11--compass-ii). ## Standalone Lane (Ongoing) -32 active items sorted into priority tiers. Guiding principles: (1) harden first — correctness, memory safety, test infra, CI gates before features; (2) large-graph support is forward-looking — medium priority; (3) CI & Tooling items batch into one PR. +25 active standalone items sorted into priority tiers. Guiding principles: (1) harden first — correctness, memory safety, test infra, CI gates before features; (2) large-graph support is forward-looking — medium priority; (3) CI & Tooling items batch into one PR. > Completed standalone items archived in [COMPLETED.md](docs/ROADMAP/COMPLETED.md#standalone-lane--completed-items). diff --git a/contracts/type-surface.m8.json b/contracts/type-surface.m8.json index f6a54d26..630c1346 100644 --- a/contracts/type-surface.m8.json +++ b/contracts/type-surface.m8.json @@ -1313,6 +1313,12 @@ "ComposeWormholesOptions": { "kind": "interface" }, + "ContentAttachmentOptions": { + "kind": "interface" + }, + "ContentMeta": { + "kind": "interface" + }, "CreateBTROptions": { "kind": "interface" }, diff --git a/index.d.ts b/index.d.ts index d79c8793..64631634 100644 --- a/index.d.ts +++ b/index.d.ts @@ -1416,12 +1416,12 @@ export interface TemporalQuery { ): Promise; } -interface ContentAttachmentOptions { +export interface ContentAttachmentOptions { mime?: string | null; size?: number | null; } -interface ContentMeta { +export interface ContentMeta { oid: string; mime: string | null; size: number | null; diff --git a/src/infrastructure/adapters/CasBlobAdapter.js b/src/infrastructure/adapters/CasBlobAdapter.js index 1d5ad2d8..53b6d230 100644 --- a/src/infrastructure/adapters/CasBlobAdapter.js +++ b/src/infrastructure/adapters/CasBlobAdapter.js @@ -106,6 +106,10 @@ export default class CasBlobAdapter extends BlobStoragePort { : content; const source = Readable.from([buf]); + // `mime` and `size` are accepted on the public store() contract because + // PatchBuilderV2 forwards higher-level attachment metadata here. This CAS + // adapter persists that metadata via sibling CRDT properties instead of + // encoding it into the git-cas manifest, so only slug/encryption are used. /** @type {{ source: *, slug: string, filename: string, encryptionKey?: Uint8Array }} */ const storeOpts = { source, diff --git a/test/type-check/consumer.ts b/test/type-check/consumer.ts index 423931f9..02ea27f3 100644 --- a/test/type-check/consumer.ts +++ b/test/type-check/consumer.ts @@ -32,6 +32,8 @@ import WarpGraph, { IndexRebuildService, HealthCheckService, CommitDagTraversalService, + ContentAttachmentOptions, + ContentMeta, NoOpLogger, ConsoleLogger, ClockAdapter, @@ -266,13 +268,14 @@ const edges: Array<{ from: string; to: string; label: string; props: Record Date: Fri, 13 Mar 2026 20:05:45 -0700 Subject: [PATCH 4/4] fix: reconcile content metadata surface manifest --- CHANGELOG.md | 1 + contracts/type-surface.m8.json | 48 ++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 007a2250..63062e79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - **Content metadata review follow-ups** — `ContentMeta` and `ContentAttachmentOptions` are now exported as public type-only symbols, the consumer smoke test imports them directly, and the git-cas adapter docs now explicitly note that MIME/size hints are accepted for CRDT metadata but are not embedded in CAS manifests. +- **Content metadata surface manifest follow-up** — the declaration contract manifest now matches the shipped API: `attachContent()` / `attachEdgeContent()` include the optional metadata parameter for both patch builders and patch sessions, and `WarpGraph` exports `getContentMeta()` / `getEdgeContentMeta()` in the tracked public surface. - **Roadmap reconciled after PR #67 / #68 merges** — `ROADMAP.md` and `docs/ROADMAP/COMPLETED.md` now reflect the merged pre-push gate regression work (`B168`) and the current `main` baseline before the issue-45 slice branches off. - **Large-graph traversal memory profile** — `topologicalSort()` now has a lightweight mode that avoids retaining discovery adjacency when callers do not need it. `levels()` and `transitiveReduction()` were refactored to re-fetch neighbors on demand instead of pinning full topo adjacency in memory, reducing steady-state large-graph working sets. - **Roadmap reconciled after B87 merge** — `ROADMAP.md` now treats the Markdown code-sample linter as merged work on `main`, advances the CI/tooling wave to start at `B88`, and records the follow-up backlog items for pre-push gate-message regression coverage (`B168`) and archived-doc status guardrails (`B169`). diff --git a/contracts/type-surface.m8.json b/contracts/type-surface.m8.json index 630c1346..38d3fa5c 100644 --- a/contracts/type-surface.m8.json +++ b/contracts/type-surface.m8.json @@ -246,6 +246,11 @@ { "name": "content", "type": "Buffer | string" + }, + { + "name": "metadata", + "type": "ContentAttachmentOptions", + "optional": true } ], "returns": "Promise" @@ -268,6 +273,11 @@ { "name": "content", "type": "Buffer | string" + }, + { + "name": "metadata", + "type": "ContentAttachmentOptions", + "optional": true } ], "returns": "Promise" @@ -399,6 +409,11 @@ { "name": "content", "type": "Buffer | string" + }, + { + "name": "metadata", + "type": "ContentAttachmentOptions", + "optional": true } ], "returns": "Promise" @@ -421,6 +436,11 @@ { "name": "content", "type": "Buffer | string" + }, + { + "name": "metadata", + "type": "ContentAttachmentOptions", + "optional": true } ], "returns": "Promise" @@ -694,6 +714,16 @@ ], "returns": "Promise" }, + "getContentMeta": { + "async": true, + "params": [ + { + "name": "nodeId", + "type": "string" + } + ], + "returns": "Promise" + }, "getEdgeContentOid": { "async": true, "params": [ @@ -730,6 +760,24 @@ ], "returns": "Promise" }, + "getEdgeContentMeta": { + "async": true, + "params": [ + { + "name": "from", + "type": "string" + }, + { + "name": "to", + "type": "string" + }, + { + "name": "label", + "type": "string" + } + ], + "returns": "Promise" + }, "neighbors": { "async": true, "params": [