From 1ca350b4a273d9dbc4a52780ecffbf78c377cf55 Mon Sep 17 00:00:00 2001 From: Nico Martin Date: Sat, 28 Feb 2026 11:41:46 +0100 Subject: [PATCH 1/8] added CrossOriginStorage implementation --- packages/transformers/src/env.js | 7 + packages/transformers/src/utils/cache.js | 11 ++ .../utils/cache/CrossOriginStorageCache.js | 137 ++++++++++++++++++ 3 files changed, 155 insertions(+) create mode 100644 packages/transformers/src/utils/cache/CrossOriginStorageCache.js diff --git a/packages/transformers/src/env.js b/packages/transformers/src/env.js index cf72b929f..710fd8578 100644 --- a/packages/transformers/src/env.js +++ b/packages/transformers/src/env.js @@ -191,6 +191,11 @@ export const LogLevel = Object.freeze({ * This can improve performance by avoiding repeated downloads of WASM files. Note: Only the WASM binary is cached. * The MJS loader file still requires network access unless you use a Service Worker. * @property {string} cacheKey The cache key to use for storing models and WASM binaries. Defaults to 'transformers-cache'. + * @property {boolean} experimental_useCrossOriginStorage Whether to use the Cross-Origin Storage API to cache model files + * across origins, allowing different sites to share the same cached model weights. Defaults to `false`. + * Requires browser support for `navigator.crossOriginStorage`. The `experimental_` prefix indicates that the underlying + * browser API is not yet standardised and may change or be removed without a major version bump. See + * {@link https://github.com/WICG/cross-origin-storage} for the current spec status. */ /** @type {TransformersEnvironment} */ @@ -227,6 +232,8 @@ export const env = { useWasmCache: IS_WEB_CACHE_AVAILABLE || IS_FS_AVAILABLE, cacheKey: 'transformers-cache', + + experimental_useCrossOriginStorage: false, ////////////////////////////////////////////////////// }; diff --git a/packages/transformers/src/utils/cache.js b/packages/transformers/src/utils/cache.js index 4b96114b6..b2059fc9b 100644 --- a/packages/transformers/src/utils/cache.js +++ b/packages/transformers/src/utils/cache.js @@ -1,6 +1,7 @@ import { apis, env } from '../env.js'; import { FileCache } from './hub/files.js'; import { logger } from './logger.js'; +import { CrossOriginStorage } from './cache/CrossOriginStorageCache.js'; /** * @typedef {Object} CacheInterface @@ -36,6 +37,16 @@ export async function getCache(file_cache_dir = null) { cache = env.customCache; } + if (!cache && env.experimental_useCrossOriginStorage) { + if (!CrossOriginStorage.isAvailable()) { + throw Error( + '`env.experimental_useCrossOriginStorage=true`, but the Cross-Origin Storage API is not available in this environment. ' + + 'See https://github.com/WICG/cross-origin-storage for browser support and usage instructions.', + ); + } + cache = new CrossOriginStorage(); + } + if (!cache && env.useBrowserCache) { if (typeof caches === 'undefined') { throw Error('Browser cache is not available in this environment.'); diff --git a/packages/transformers/src/utils/cache/CrossOriginStorageCache.js b/packages/transformers/src/utils/cache/CrossOriginStorageCache.js new file mode 100644 index 000000000..452842655 --- /dev/null +++ b/packages/transformers/src/utils/cache/CrossOriginStorageCache.js @@ -0,0 +1,137 @@ +const HASH_ALGORITHM = 'SHA-256'; + +/** + * Name of the Cache API bucket used to persist the url→hash mapping. + * Kept separate from the main `transformers-cache` bucket so it can be + * managed (inspected / cleared) independently. + */ +const HASH_CACHE_NAME = 'experimental_transformers-hash-cache'; + +/** + * A cache implementation backed by the experimental `navigator.crossOriginStorage` API, + * which allows sharing cached files (identified by content hash) across origins. + * + * Implements {@link import('../cache.js').CacheInterface}. + * + * @see https://github.com/explainers-by-googlers/cross-origin-storage + */ +export class CrossOriginStorage { + /** + * Returns whether the `navigator.crossOriginStorage` API is available in the current environment. + * @returns {boolean} + */ + static isAvailable = () => typeof navigator !== 'undefined' && 'crossOriginStorage' in navigator; + + /** + * Looks up a cached response for the given URL by resolving its SHA-256 hash and requesting + * the corresponding file handle from cross-origin storage. + * + * Implements `CacheInterface.match`. + * + * @param {string} request The URL of the resource to look up. + * @returns {Promise} The cached `Response`, or `undefined` if not found. + */ + match = async (request) => { + const hashValue = await this._getFileHash(request); + if (!hashValue) { + return undefined; + } + const hash = { algorithm: HASH_ALGORITHM, value: hashValue }; + try { + // @ts-expect-error + const [handle] = await navigator.crossOriginStorage.requestFileHandles([hash]); + const blob = await handle.getFile(); + return new Response(blob); + } catch (err) { + return undefined; + } + }; + + /** + * Stores a response in cross-origin storage, keyed by the SHA-256 hash of its body. + * + * Implements `CacheInterface.put`. + * + * @param {string} request The URL of the resource (used to derive a cache key). + * @param {Response} response The response whose body will be written to the cache. + * @returns {Promise} + */ + put = async (request, response) => { + const blob = await response.blob(); + const hash = await this._getBlobHash(blob); + // @ts-expect-error + const [handle] = await navigator.crossOriginStorage.requestFileHandles([hash], { create: true }); + const writableStream = await handle.createWritable(); + await writableStream.write(blob); + await writableStream.close(); + }; + + /** + * Resolves the SHA-256 hash for a Hugging Face resource URL by reading its raw Git LFS + * pointer file. Uses a network-first strategy: always attempts a live fetch and persists + * the result to the Cache API. Falls back to the cached hash when the network is + * unavailable, so cross-origin storage lookups continue to work offline. + * + * Supports any `/resolve//` URL (not limited to `/resolve/main/onnx/`). + * + * @see https://huggingface.co/docs/hub/en/storage-backends#xet + * @param {string} url The resolved Hugging Face URL of the resource. + * @returns {Promise} The hex-encoded SHA-256 hash, or `null` if unavailable. + */ + _getFileHash = async (url) => { + if (!/\/resolve\//.test(url)) { + return null; + } + + const rawUrl = url.replace(/\/resolve\//, '/raw/'); + + try { + // Network-first: fetch the LFS pointer file and cache the hash for offline use. + const text = await fetch(rawUrl).then((response) => response.text()); + if (!text.includes('oid sha256:')) { + return null; + } + const hash = text.replace(/.*?\n^oid sha256:(\w+)\n.*?$/gm, '$1') || null; + if (hash) { + try { + const hashCache = await caches.open(HASH_CACHE_NAME); + await hashCache.put(rawUrl, new Response(hash)); + } catch { + // Cache API unavailable (e.g. non-secure context): hash still returned. + } + } + return hash; + } catch { + // Network unavailable: fall back to the last cached hash so offline lookups work. + try { + const hashCache = await caches.open(HASH_CACHE_NAME); + const cached = await hashCache.match(rawUrl); + if (cached) { + return cached.text(); + } + } catch { + // Cache API also unavailable_ nothing we can do. + } + return null; + } + }; + + /** + * Computes the SHA-256 hash of a `Blob`'s contents. + * + * @param {Blob} blob The blob to hash. + * @returns {Promise<{algorithm: string, value: string}>} An object containing the algorithm + * identifier (`"SHA-256"`) and the lowercase hex-encoded hash value. + */ + _getBlobHash = async (blob) => { + const arrayBuffer = await blob.arrayBuffer(); + const hashBuffer = await crypto.subtle.digest(HASH_ALGORITHM, arrayBuffer); + const hashArray = Array.from(new Uint8Array(hashBuffer)); + const hashHex = hashArray.map((byte) => byte.toString(16).padStart(2, '0')).join(''); + + return { + algorithm: HASH_ALGORITHM, + value: hashHex, + }; + }; +} From 469df358f30354bdd1d7a3c23e57213270a939c2 Mon Sep 17 00:00:00 2001 From: Nico Martin Date: Mon, 2 Mar 2026 15:46:07 +0100 Subject: [PATCH 2/8] added references to COS extension --- packages/transformers/src/env.js | 10 ++++------ packages/transformers/src/utils/cache.js | 4 +++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/transformers/src/env.js b/packages/transformers/src/env.js index fc708dfe7..4e319a4db 100644 --- a/packages/transformers/src/env.js +++ b/packages/transformers/src/env.js @@ -139,9 +139,7 @@ const DEFAULT_LOCAL_MODEL_PATH = '/models/'; const localModelPath = RUNNING_LOCALLY ? path.join(dirname__, DEFAULT_LOCAL_MODEL_PATH) : DEFAULT_LOCAL_MODEL_PATH; // Ensure default fetch is called with the correct receiver in browser environments. -const DEFAULT_FETCH = typeof globalThis.fetch === 'function' - ? globalThis.fetch.bind(globalThis) - : undefined; +const DEFAULT_FETCH = typeof globalThis.fetch === 'function' ? globalThis.fetch.bind(globalThis) : undefined; /** * Log levels for controlling output verbosity. @@ -202,9 +200,9 @@ export const LogLevel = Object.freeze({ * @property {string} cacheKey The cache key to use for storing models and WASM binaries. Defaults to 'transformers-cache'. * @property {boolean} experimental_useCrossOriginStorage Whether to use the Cross-Origin Storage API to cache model files * across origins, allowing different sites to share the same cached model weights. Defaults to `false`. - * Requires browser support for `navigator.crossOriginStorage`. The `experimental_` prefix indicates that the underlying - * browser API is not yet standardised and may change or be removed without a major version bump. See - * {@link https://github.com/WICG/cross-origin-storage} for the current spec status. + * Requires the Cross-Origin Storage Chrome extension: {@link https://chromewebstore.google.com/detail/cross-origin-storage/denpnpcgjgikjpoglpjefakmdcbmlgih}. + * The `experimental_` prefix indicates that the underlying browser API is not yet standardised and may change or be + * removed without a major version bump. For more information, see {@link https://github.com/WICG/cross-origin-storage}. * @property {(input: string | URL, init?: any) => Promise} fetch The fetch function to use. Defaults to `fetch`. */ diff --git a/packages/transformers/src/utils/cache.js b/packages/transformers/src/utils/cache.js index b2059fc9b..f8aa64234 100644 --- a/packages/transformers/src/utils/cache.js +++ b/packages/transformers/src/utils/cache.js @@ -41,7 +41,9 @@ export async function getCache(file_cache_dir = null) { if (!CrossOriginStorage.isAvailable()) { throw Error( '`env.experimental_useCrossOriginStorage=true`, but the Cross-Origin Storage API is not available in this environment. ' + - 'See https://github.com/WICG/cross-origin-storage for browser support and usage instructions.', + 'Install the Chrome extension to enable Cross-Origin Storage: ' + + 'https://chromewebstore.google.com/detail/cross-origin-storage/denpnpcgjgikjpoglpjefakmdcbmlgih. ' + + 'For more information about the API, see https://github.com/WICG/cross-origin-storage', ); } cache = new CrossOriginStorage(); From ac0976fc5324132583f83ff042d952f265722b13 Mon Sep 17 00:00:00 2001 From: Nico Martin Date: Mon, 2 Mar 2026 17:00:16 +0100 Subject: [PATCH 3/8] added fallback cache --- packages/transformers/src/utils/cache.js | 21 ++++++----- .../utils/cache/CrossOriginStorageCache.js | 37 ++++++++++++++++++- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/packages/transformers/src/utils/cache.js b/packages/transformers/src/utils/cache.js index ec911ad7c..e50f4725f 100644 --- a/packages/transformers/src/utils/cache.js +++ b/packages/transformers/src/utils/cache.js @@ -39,16 +39,19 @@ export async function getCache(file_cache_dir = null) { cache = env.customCache; } - if (!cache && env.experimental_useCrossOriginStorage) { - if (!CrossOriginStorage.isAvailable()) { - throw Error( - '`env.experimental_useCrossOriginStorage=true`, but the Cross-Origin Storage API is not available in this environment. ' + - 'Install the Chrome extension to enable Cross-Origin Storage: ' + - 'https://chromewebstore.google.com/detail/cross-origin-storage/denpnpcgjgikjpoglpjefakmdcbmlgih. ' + - 'For more information about the API, see https://github.com/WICG/cross-origin-storage', - ); + if (!cache && env.experimental_useCrossOriginStorage && CrossOriginStorage.isAvailable()) { + // When the browser cache is also enabled, open it and pass it as a per-request fallback + // so that any request for which no file hash can be resolved is served from (or stored + // in) the browser cache instead of going all the way back to the network. + let browserCache = null; + if (env.useBrowserCache && typeof caches !== 'undefined') { + try { + browserCache = await caches.open(env.cacheKey); + } catch (e) { + logger.warn('An error occurred while opening the browser cache for CrossOriginStorage fallback:', e); + } } - cache = new CrossOriginStorage(); + cache = new CrossOriginStorage(browserCache); } if (!cache && env.useBrowserCache) { diff --git a/packages/transformers/src/utils/cache/CrossOriginStorageCache.js b/packages/transformers/src/utils/cache/CrossOriginStorageCache.js index 452842655..6e72e6fda 100644 --- a/packages/transformers/src/utils/cache/CrossOriginStorageCache.js +++ b/packages/transformers/src/utils/cache/CrossOriginStorageCache.js @@ -16,6 +16,17 @@ const HASH_CACHE_NAME = 'experimental_transformers-hash-cache'; * @see https://github.com/explainers-by-googlers/cross-origin-storage */ export class CrossOriginStorage { + /** + * @param {import('../cache.js').CacheInterface|null} [fallbackCache] + * An optional fallback cache (e.g. a browser `Cache` opened via `caches.open()`) that is + * consulted when no file hash can be resolved for a given request. When provided, both + * `match` and `put` delegate to it as a fallback/secondary store. + */ + constructor(fallbackCache = null) { + /** @type {import('../cache.js').CacheInterface|null} */ + this._fallbackCache = fallbackCache; + } + /** * Returns whether the `navigator.crossOriginStorage` API is available in the current environment. * @returns {boolean} @@ -26,14 +37,21 @@ export class CrossOriginStorage { * Looks up a cached response for the given URL by resolving its SHA-256 hash and requesting * the corresponding file handle from cross-origin storage. * + * Falls back to the `fallbackCache` (if configured) when no file hash can be resolved for + * the request. + * * Implements `CacheInterface.match`. * * @param {string} request The URL of the resource to look up. - * @returns {Promise} The cached `Response`, or `undefined` if not found. + * @returns {Promise} The cached `Response`, or `undefined` if not found. */ match = async (request) => { const hashValue = await this._getFileHash(request); if (!hashValue) { + // No hash available — delegate to fallback cache if one is configured. + if (this._fallbackCache) { + return this._fallbackCache.match(request); + } return undefined; } const hash = { algorithm: HASH_ALGORITHM, value: hashValue }; @@ -43,6 +61,10 @@ export class CrossOriginStorage { const blob = await handle.getFile(); return new Response(blob); } catch (err) { + // Cross-origin storage lookup failed — delegate to fallback cache if one is configured. + if (this._fallbackCache) { + return this._fallbackCache.match(request); + } return undefined; } }; @@ -50,6 +72,9 @@ export class CrossOriginStorage { /** * Stores a response in cross-origin storage, keyed by the SHA-256 hash of its body. * + * When a `fallbackCache` is configured, also stores the response there so that subsequent + * requests that cannot resolve a hash still have a warm entry. + * * Implements `CacheInterface.put`. * * @param {string} request The URL of the resource (used to derive a cache key). @@ -64,6 +89,16 @@ export class CrossOriginStorage { const writableStream = await handle.createWritable(); await writableStream.write(blob); await writableStream.close(); + + // Populate the fallback cache as well so that a future miss (e.g. hash unavailable) + // can still be served without a full network round-trip. + if (this._fallbackCache) { + try { + await this._fallbackCache.put(request, new Response(blob)); + } catch { + // Fallback cache write failure is non-fatal. + } + } }; /** From a2bb6cdec6b58b3365b38e16778e3bc85eec22ad Mon Sep 17 00:00:00 2001 From: Nico Martin Date: Tue, 3 Mar 2026 15:12:28 +0100 Subject: [PATCH 4/8] refactored CrossOriginStorage --- packages/transformers/src/utils/cache.js | 13 +- .../utils/cache/CrossOriginStorageCache.js | 184 ++++++++++++------ 2 files changed, 126 insertions(+), 71 deletions(-) diff --git a/packages/transformers/src/utils/cache.js b/packages/transformers/src/utils/cache.js index e50f4725f..d9012e26f 100644 --- a/packages/transformers/src/utils/cache.js +++ b/packages/transformers/src/utils/cache.js @@ -40,18 +40,7 @@ export async function getCache(file_cache_dir = null) { } if (!cache && env.experimental_useCrossOriginStorage && CrossOriginStorage.isAvailable()) { - // When the browser cache is also enabled, open it and pass it as a per-request fallback - // so that any request for which no file hash can be resolved is served from (or stored - // in) the browser cache instead of going all the way back to the network. - let browserCache = null; - if (env.useBrowserCache && typeof caches !== 'undefined') { - try { - browserCache = await caches.open(env.cacheKey); - } catch (e) { - logger.warn('An error occurred while opening the browser cache for CrossOriginStorage fallback:', e); - } - } - cache = new CrossOriginStorage(browserCache); + cache = new CrossOriginStorage(); } if (!cache && env.useBrowserCache) { diff --git a/packages/transformers/src/utils/cache/CrossOriginStorageCache.js b/packages/transformers/src/utils/cache/CrossOriginStorageCache.js index 6e72e6fda..f7d65ec09 100644 --- a/packages/transformers/src/utils/cache/CrossOriginStorageCache.js +++ b/packages/transformers/src/utils/cache/CrossOriginStorageCache.js @@ -16,16 +16,7 @@ const HASH_CACHE_NAME = 'experimental_transformers-hash-cache'; * @see https://github.com/explainers-by-googlers/cross-origin-storage */ export class CrossOriginStorage { - /** - * @param {import('../cache.js').CacheInterface|null} [fallbackCache] - * An optional fallback cache (e.g. a browser `Cache` opened via `caches.open()`) that is - * consulted when no file hash can be resolved for a given request. When provided, both - * `match` and `put` delegate to it as a fallback/secondary store. - */ - constructor(fallbackCache = null) { - /** @type {import('../cache.js').CacheInterface|null} */ - this._fallbackCache = fallbackCache; - } + constructor() {} /** * Returns whether the `navigator.crossOriginStorage` API is available in the current environment. @@ -37,9 +28,6 @@ export class CrossOriginStorage { * Looks up a cached response for the given URL by resolving its SHA-256 hash and requesting * the corresponding file handle from cross-origin storage. * - * Falls back to the `fallbackCache` (if configured) when no file hash can be resolved for - * the request. - * * Implements `CacheInterface.match`. * * @param {string} request The URL of the resource to look up. @@ -48,10 +36,6 @@ export class CrossOriginStorage { match = async (request) => { const hashValue = await this._getFileHash(request); if (!hashValue) { - // No hash available — delegate to fallback cache if one is configured. - if (this._fallbackCache) { - return this._fallbackCache.match(request); - } return undefined; } const hash = { algorithm: HASH_ALGORITHM, value: hashValue }; @@ -61,59 +45,160 @@ export class CrossOriginStorage { const blob = await handle.getFile(); return new Response(blob); } catch (err) { - // Cross-origin storage lookup failed — delegate to fallback cache if one is configured. - if (this._fallbackCache) { - return this._fallbackCache.match(request); - } return undefined; } }; /** - * Stores a response in cross-origin storage, keyed by the SHA-256 hash of its body. + * Stores a response in cross-origin storage, keyed by its SHA-256 hash. + * + * For LFS-backed URLs the hash is resolved cheaply from the Git LFS pointer file + * (via `_getLfsFileHash`) without reading the response body a second time. * - * When a `fallbackCache` is configured, also stores the response there so that subsequent - * requests that cannot resolve a hash still have a warm entry. + * For non-LFS resources the hash is unknown upfront. In that case the body is consumed + * in the background: the stream is read to compute the content hash, the file is written + * into cross-origin storage, and the computed hash is persisted to `HASH_CACHE_NAME` + * so that future `match` calls can resolve the file without a network round-trip. * * Implements `CacheInterface.put`. * - * @param {string} request The URL of the resource (used to derive a cache key). + * @param {string} request The URL of the resource (used as the hash-cache key). * @param {Response} response The response whose body will be written to the cache. * @returns {Promise} */ put = async (request, response) => { - const blob = await response.blob(); - const hash = await this._getBlobHash(blob); + const hashValue = await this._getFileHash(request); + + console.log(request, hashValue); + + if (hashValue) { + // Fast path: LFS hash already known. Consume the body and store directly. + const blob = await response.blob(); + await this._storeBlobInCOS(blob, hashValue); + } else { + // Slow path: hash unknown. Process in the background so put() returns promptly. + // The caller already holds a reference to the original response; we receive it + // here only to buffer and hash its body. + this._processAndStore(request, response.body, response.headers); + } + }; + + /** + * Writes a blob into cross-origin storage using the given pre-computed hex hash string. + * + * @param {Blob} blob + * @param {string} hashHex Hex-encoded SHA-256 hash of `blob`. + * @returns {Promise} + */ + _storeBlobInCOS = async (blob, hashHex) => { + const hash = { algorithm: HASH_ALGORITHM, value: hashHex }; // @ts-expect-error const [handle] = await navigator.crossOriginStorage.requestFileHandles([hash], { create: true }); const writableStream = await handle.createWritable(); await writableStream.write(blob); await writableStream.close(); + }; + + /** + * Background task for non-LFS resources: consumes `stream`, computes the SHA-256 hash + * of the resulting blob, stores it in cross-origin storage, and persists the computed + * hash to `HASH_CACHE_NAME` keyed by `request` so future `match` calls can resolve the + * file without a network round-trip. + * + * Called fire-and-forget from `put` — errors are swallowed so failures never surface to + * the caller. + * + * @param {string} request The original resource URL. + * @param {ReadableStream} stream The response body stream to consume. + * @param {Headers} _headers The original response headers (reserved for future use). + * @returns {Promise} + */ + _processAndStore = async (request, stream, _headers) => { + try { + const blob = await new Response(stream).blob(); + const { value: hashHex } = await this._getBlobHash(blob); - // Populate the fallback cache as well so that a future miss (e.g. hash unavailable) - // can still be served without a full network round-trip. - if (this._fallbackCache) { + await this._storeBlobInCOS(blob, hashHex); + + // Persist the computed hash so future match() calls resolve without the network. try { - await this._fallbackCache.put(request, new Response(blob)); + const hashCache = await caches.open(HASH_CACHE_NAME); + await hashCache.put(request, new Response(hashHex)); } catch { - // Fallback cache write failure is non-fatal. + // Cache API unavailable (e.g. non-secure context): COS entry still written. } + } catch { + // Non-fatal: background store failure must not affect the caller. } }; /** - * Resolves the SHA-256 hash for a Hugging Face resource URL by reading its raw Git LFS - * pointer file. Uses a network-first strategy: always attempts a live fetch and persists - * the result to the Cache API. Falls back to the cached hash when the network is - * unavailable, so cross-origin storage lookups continue to work offline. + * Deletes the cache entry for the given request. + * + * Removes the hash entry from `HASH_CACHE_NAME`. Note: cross-origin storage itself does not + * expose a delete API, so only the local hash mapping is removed. After deletion, `match` + * will no longer be able to resolve the file from cross-origin storage. * - * Supports any `/resolve//` URL (not limited to `/resolve/main/onnx/`). + * Implements `CacheInterface.delete`. + * + * @param {string} request + * @returns {Promise} Resolves to `true` if the hash entry was deleted, `false` otherwise. + */ + delete = async (request) => { + try { + const hashCache = await caches.open(HASH_CACHE_NAME); + return await hashCache.delete(request); + } catch { + return false; + } + }; + + /** + * Resolves the SHA-256 hash for a given URL. + * + * Returns the cached hash immediately if one has been persisted to `HASH_CACHE_NAME`. + * Otherwise falls back to `_getLfsFileHash` to retrieve the hash from the Hugging Face + * LFS pointer file, persisting the result to `HASH_CACHE_NAME` for future lookups. + * + * Returns `null` if the hash cannot be determined (e.g. non-LFS URL with no cached entry). + * + * @param {string} url The resource URL to resolve a hash for. + * @returns {Promise} The hex-encoded SHA-256 hash, or `null` if unavailable. + */ + _getFileHash = async (url) => { + try { + const hashCache = await caches.open(HASH_CACHE_NAME); + const cached = await hashCache.match(url); + if (cached) { + return cached.text(); + } + + const hash = await this._getLfsFileHash(url); + if (hash) { + const hashCache = await caches.open(HASH_CACHE_NAME); + await hashCache.put(url, new Response(hash)); + return hash; + } + + return null; + } catch (e) { + return null; + } + }; + + /** + * Attempts to retrieve the SHA-256 hash for a Hugging Face resource URL from its raw + * Git LFS pointer file. + * + * Only applicable to URLs containing `/resolve/` (i.e. Hugging Face resolved file URLs). + * The `/resolve/` segment is rewritten to `/raw/` to fetch the LFS pointer directly. + * Returns `null` for non-LFS URLs or when the network request fails. * * @see https://huggingface.co/docs/hub/en/storage-backends#xet * @param {string} url The resolved Hugging Face URL of the resource. * @returns {Promise} The hex-encoded SHA-256 hash, or `null` if unavailable. */ - _getFileHash = async (url) => { + _getLfsFileHash = async (url) => { if (!/\/resolve\//.test(url)) { return null; } @@ -121,32 +206,13 @@ export class CrossOriginStorage { const rawUrl = url.replace(/\/resolve\//, '/raw/'); try { - // Network-first: fetch the LFS pointer file and cache the hash for offline use. + // fetch the LFS pointer file and return the sha256 hash if present. const text = await fetch(rawUrl).then((response) => response.text()); if (!text.includes('oid sha256:')) { return null; } - const hash = text.replace(/.*?\n^oid sha256:(\w+)\n.*?$/gm, '$1') || null; - if (hash) { - try { - const hashCache = await caches.open(HASH_CACHE_NAME); - await hashCache.put(rawUrl, new Response(hash)); - } catch { - // Cache API unavailable (e.g. non-secure context): hash still returned. - } - } - return hash; + return text.replace(/.*?\n^oid sha256:(\w+)\n.*?$/gm, '$1') || null; } catch { - // Network unavailable: fall back to the last cached hash so offline lookups work. - try { - const hashCache = await caches.open(HASH_CACHE_NAME); - const cached = await hashCache.match(rawUrl); - if (cached) { - return cached.text(); - } - } catch { - // Cache API also unavailable_ nothing we can do. - } return null; } }; From cd06794f7da978efca3a96191b6da51b4224b55e Mon Sep 17 00:00:00 2001 From: Nico Martin Date: Tue, 3 Mar 2026 15:27:41 +0100 Subject: [PATCH 5/8] clean-up --- .../utils/cache/CrossOriginStorageCache.js | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/packages/transformers/src/utils/cache/CrossOriginStorageCache.js b/packages/transformers/src/utils/cache/CrossOriginStorageCache.js index f7d65ec09..9f9c360d9 100644 --- a/packages/transformers/src/utils/cache/CrossOriginStorageCache.js +++ b/packages/transformers/src/utils/cache/CrossOriginStorageCache.js @@ -31,7 +31,7 @@ export class CrossOriginStorage { * Implements `CacheInterface.match`. * * @param {string} request The URL of the resource to look up. - * @returns {Promise} The cached `Response`, or `undefined` if not found. + * @returns {Promise} The cached `Response`, or `undefined` if not found. */ match = async (request) => { const hashValue = await this._getFileHash(request); @@ -52,8 +52,9 @@ export class CrossOriginStorage { /** * Stores a response in cross-origin storage, keyed by its SHA-256 hash. * - * For LFS-backed URLs the hash is resolved cheaply from the Git LFS pointer file - * (via `_getLfsFileHash`) without reading the response body a second time. + * For LFS-backed URLs the hash is resolved cheaply via `_getFileHash` (which checks + * `HASH_CACHE_NAME` first, then falls back to fetching the Git LFS pointer file) + * without reading the response body a second time. * * For non-LFS resources the hash is unknown upfront. In that case the body is consumed * in the background: the stream is read to compute the content hash, the file is written @@ -69,8 +70,6 @@ export class CrossOriginStorage { put = async (request, response) => { const hashValue = await this._getFileHash(request); - console.log(request, hashValue); - if (hashValue) { // Fast path: LFS hash already known. Consume the body and store directly. const blob = await response.blob(); @@ -79,7 +78,7 @@ export class CrossOriginStorage { // Slow path: hash unknown. Process in the background so put() returns promptly. // The caller already holds a reference to the original response; we receive it // here only to buffer and hash its body. - this._processAndStore(request, response.body, response.headers); + this._processAndStore(request, response.body); } }; @@ -110,10 +109,9 @@ export class CrossOriginStorage { * * @param {string} request The original resource URL. * @param {ReadableStream} stream The response body stream to consume. - * @param {Headers} _headers The original response headers (reserved for future use). * @returns {Promise} */ - _processAndStore = async (request, stream, _headers) => { + _processAndStore = async (request, stream) => { try { const blob = await new Response(stream).blob(); const { value: hashHex } = await this._getBlobHash(blob); @@ -136,8 +134,9 @@ export class CrossOriginStorage { * Deletes the cache entry for the given request. * * Removes the hash entry from `HASH_CACHE_NAME`. Note: cross-origin storage itself does not - * expose a delete API, so only the local hash mapping is removed. After deletion, `match` - * will no longer be able to resolve the file from cross-origin storage. + * expose a delete API, so only the local hash mapping is removed. For non-LFS URLs this + * permanently prevents `match` from resolving the file. For LFS-backed URLs, `match` will + * re-fetch the LFS pointer file on the next call and repopulate the hash cache automatically. * * Implements `CacheInterface.delete`. * From bd0b5cda112234682be9eda29efb8b017ba5e462 Mon Sep 17 00:00:00 2001 From: Nico Martin Date: Tue, 3 Mar 2026 15:36:54 +0100 Subject: [PATCH 6/8] some improvements --- .../utils/cache/CrossOriginStorageCache.js | 72 +++++++++++-------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/packages/transformers/src/utils/cache/CrossOriginStorageCache.js b/packages/transformers/src/utils/cache/CrossOriginStorageCache.js index 9f9c360d9..2f4e2ef91 100644 --- a/packages/transformers/src/utils/cache/CrossOriginStorageCache.js +++ b/packages/transformers/src/utils/cache/CrossOriginStorageCache.js @@ -2,11 +2,17 @@ const HASH_ALGORITHM = 'SHA-256'; /** * Name of the Cache API bucket used to persist the url→hash mapping. - * Kept separate from the main `transformers-cache` bucket so it can be - * managed (inspected / cleared) independently. */ const HASH_CACHE_NAME = 'experimental_transformers-hash-cache'; +/** + * Builds the hash descriptor object expected by the cross-origin storage API. + * + * @param {string} value Hex-encoded SHA-256 hash. + * @returns {{ algorithm: string, value: string }} + */ +const makeHashDescriptor = (value) => ({ algorithm: HASH_ALGORITHM, value }); + /** * A cache implementation backed by the experimental `navigator.crossOriginStorage` API, * which allows sharing cached files (identified by content hash) across origins. @@ -16,7 +22,17 @@ const HASH_CACHE_NAME = 'experimental_transformers-hash-cache'; * @see https://github.com/explainers-by-googlers/cross-origin-storage */ export class CrossOriginStorage { - constructor() {} + /** @type {Promise | null} */ + #hashCache = null; + + /** + * Returns (and lazily opens) the hash cache, reusing the same promise across concurrent callers. + * @returns {Promise} + */ + _getHashCache = () => { + this.#hashCache ??= caches.open(HASH_CACHE_NAME); + return this.#hashCache; + }; /** * Returns whether the `navigator.crossOriginStorage` API is available in the current environment. @@ -38,13 +54,12 @@ export class CrossOriginStorage { if (!hashValue) { return undefined; } - const hash = { algorithm: HASH_ALGORITHM, value: hashValue }; try { // @ts-expect-error - const [handle] = await navigator.crossOriginStorage.requestFileHandles([hash]); + const [handle] = await navigator.crossOriginStorage.requestFileHandles([makeHashDescriptor(hashValue)]); const blob = await handle.getFile(); return new Response(blob); - } catch (err) { + } catch { return undefined; } }; @@ -90,9 +105,10 @@ export class CrossOriginStorage { * @returns {Promise} */ _storeBlobInCOS = async (blob, hashHex) => { - const hash = { algorithm: HASH_ALGORITHM, value: hashHex }; // @ts-expect-error - const [handle] = await navigator.crossOriginStorage.requestFileHandles([hash], { create: true }); + const [handle] = await navigator.crossOriginStorage.requestFileHandles([makeHashDescriptor(hashHex)], { + create: true, + }); const writableStream = await handle.createWritable(); await writableStream.write(blob); await writableStream.close(); @@ -113,14 +129,18 @@ export class CrossOriginStorage { */ _processAndStore = async (request, stream) => { try { - const blob = await new Response(stream).blob(); - const { value: hashHex } = await this._getBlobHash(blob); + const chunks = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + const blob = new Blob(chunks); + const hashHex = await this._getBlobHash(blob); await this._storeBlobInCOS(blob, hashHex); // Persist the computed hash so future match() calls resolve without the network. try { - const hashCache = await caches.open(HASH_CACHE_NAME); + const hashCache = await this._getHashCache(); await hashCache.put(request, new Response(hashHex)); } catch { // Cache API unavailable (e.g. non-secure context): COS entry still written. @@ -145,7 +165,7 @@ export class CrossOriginStorage { */ delete = async (request) => { try { - const hashCache = await caches.open(HASH_CACHE_NAME); + const hashCache = await this._getHashCache(); return await hashCache.delete(request); } catch { return false; @@ -166,7 +186,7 @@ export class CrossOriginStorage { */ _getFileHash = async (url) => { try { - const hashCache = await caches.open(HASH_CACHE_NAME); + const hashCache = await this._getHashCache(); const cached = await hashCache.match(url); if (cached) { return cached.text(); @@ -174,13 +194,12 @@ export class CrossOriginStorage { const hash = await this._getLfsFileHash(url); if (hash) { - const hashCache = await caches.open(HASH_CACHE_NAME); await hashCache.put(url, new Response(hash)); return hash; } return null; - } catch (e) { + } catch { return null; } }; @@ -198,19 +217,16 @@ export class CrossOriginStorage { * @returns {Promise} The hex-encoded SHA-256 hash, or `null` if unavailable. */ _getLfsFileHash = async (url) => { - if (!/\/resolve\//.test(url)) { + if (!url.includes('/resolve/')) { return null; } - const rawUrl = url.replace(/\/resolve\//, '/raw/'); + const rawUrl = url.replace('/resolve/', '/raw/'); try { - // fetch the LFS pointer file and return the sha256 hash if present. - const text = await fetch(rawUrl).then((response) => response.text()); - if (!text.includes('oid sha256:')) { - return null; - } - return text.replace(/.*?\n^oid sha256:(\w+)\n.*?$/gm, '$1') || null; + const text = await fetch(rawUrl).then((r) => r.text()); + const match = text.match(/^oid sha256:([0-9a-f]+)$/m); + return match ? match[1] : null; } catch { return null; } @@ -220,18 +236,12 @@ export class CrossOriginStorage { * Computes the SHA-256 hash of a `Blob`'s contents. * * @param {Blob} blob The blob to hash. - * @returns {Promise<{algorithm: string, value: string}>} An object containing the algorithm - * identifier (`"SHA-256"`) and the lowercase hex-encoded hash value. + * @returns {Promise} The lowercase hex-encoded SHA-256 hash. */ _getBlobHash = async (blob) => { const arrayBuffer = await blob.arrayBuffer(); const hashBuffer = await crypto.subtle.digest(HASH_ALGORITHM, arrayBuffer); const hashArray = Array.from(new Uint8Array(hashBuffer)); - const hashHex = hashArray.map((byte) => byte.toString(16).padStart(2, '0')).join(''); - - return { - algorithm: HASH_ALGORITHM, - value: hashHex, - }; + return hashArray.map((byte) => byte.toString(16).padStart(2, '0')).join(''); }; } From e46d50aae785a16e6802ac62454056f1eda16aa6 Mon Sep 17 00:00:00 2001 From: Nico Martin Date: Tue, 3 Mar 2026 16:42:09 +0100 Subject: [PATCH 7/8] added types --- .../utils/cache/CrossOriginStorageCache.js | 2 - .../src/utils/cache/cross-origin-storage.d.ts | 38 +++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 packages/transformers/src/utils/cache/cross-origin-storage.d.ts diff --git a/packages/transformers/src/utils/cache/CrossOriginStorageCache.js b/packages/transformers/src/utils/cache/CrossOriginStorageCache.js index 2f4e2ef91..8a1a9aa5e 100644 --- a/packages/transformers/src/utils/cache/CrossOriginStorageCache.js +++ b/packages/transformers/src/utils/cache/CrossOriginStorageCache.js @@ -55,7 +55,6 @@ export class CrossOriginStorage { return undefined; } try { - // @ts-expect-error const [handle] = await navigator.crossOriginStorage.requestFileHandles([makeHashDescriptor(hashValue)]); const blob = await handle.getFile(); return new Response(blob); @@ -105,7 +104,6 @@ export class CrossOriginStorage { * @returns {Promise} */ _storeBlobInCOS = async (blob, hashHex) => { - // @ts-expect-error const [handle] = await navigator.crossOriginStorage.requestFileHandles([makeHashDescriptor(hashHex)], { create: true, }); diff --git a/packages/transformers/src/utils/cache/cross-origin-storage.d.ts b/packages/transformers/src/utils/cache/cross-origin-storage.d.ts new file mode 100644 index 000000000..a66ec31d7 --- /dev/null +++ b/packages/transformers/src/utils/cache/cross-origin-storage.d.ts @@ -0,0 +1,38 @@ +/** + * Type definitions for the Cross-Origin Storage API + * Source: https://github.com/WICG/cross-origin-storage/blob/main/cross-origin-storage.d.ts + * @see https://github.com/WICG/cross-origin-storage + */ + +/** + * Represents the dictionary for hash algorithms and values. + */ +interface CrossOriginStorageRequestFileHandleHash { + value: string; + algorithm: string; +} + +/** + * Represents the options for requesting file handles. + */ +interface CrossOriginStorageRequestFileHandleOptions { + create?: boolean; +} + +/** + * The CrossOriginStorageManager interface. + * [SecureContext] + */ +interface CrossOriginStorageManager { + requestFileHandles( + hashes: CrossOriginStorageRequestFileHandleHash[], + options?: CrossOriginStorageRequestFileHandleOptions, + ): Promise; +} + +/** + * Augment the standard Navigator interface. + */ +interface Navigator { + readonly crossOriginStorage: CrossOriginStorageManager; +} From f4324f76c038d7e9ce32f9c26ccec8641c7b6746 Mon Sep 17 00:00:00 2001 From: Nico Martin Date: Tue, 3 Mar 2026 20:35:11 +0100 Subject: [PATCH 8/8] added type references --- .../transformers/src/utils/cache/CrossOriginStorageCache.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/transformers/src/utils/cache/CrossOriginStorageCache.js b/packages/transformers/src/utils/cache/CrossOriginStorageCache.js index 8a1a9aa5e..e4d7e47e9 100644 --- a/packages/transformers/src/utils/cache/CrossOriginStorageCache.js +++ b/packages/transformers/src/utils/cache/CrossOriginStorageCache.js @@ -1,3 +1,5 @@ +/// + const HASH_ALGORITHM = 'SHA-256'; /**