-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Open
Labels
bugSomething isn't workingSomething isn't working
Description
System Info
I regularly see this when running tests on node using vitest.
Environment/Platform
- Website/web-app
- Browser extension
- Server-side (e.g., Node.js, Deno, Bun)
- Desktop app (e.g., Electron)
- Other (e.g., VSCode extension)
Description
Load model from /.../transformers/.cache/Xenova/clip-vit-base-patch32/onnx/model_quantized.onnx failed:Protobuf parsing failed.
One process starts the download, the next process sees that the file is there and tries to read it, but it has not completed downloading, so Protobuf parsing error.
This will happen in the real world when run node with multi-process as is typical.
Reproduction
download-model.js
#!/usr/bin/env node
/**
* Downloads a Transformers.js model (and optional tokenizer/processor files)
* into a shared cache directory.
*
* Usage:
* node download-model.js --model Xenova/all-MiniLM-L6-v2 --cacheDir ./.cache/hf
*
* Notes:
* - Uses Transformers.js to fetch model artifacts.
* - Multiple processes can run this concurrently; if the cache dir is shared,
* most files will already exist after the first finishes.
*/
import { env, pipeline } from "@sroussey/transformers";
import fs from "node:fs";
import path from "node:path";
import process from "node:process";
function getArg(name, def) {
const i = process.argv.indexOf(`--${name}`);
if (i !== -1 && process.argv[i + 1]) return process.argv[i + 1];
return def;
}
const model = getArg("model", "Xenova/all-MiniLM-L6-v2");
const cacheDir = path.resolve(getArg("cacheDir", "./.cache/hf"));
const task = getArg("task", "feature-extraction"); // feature-extraction, text-generation, etc.
fs.mkdirSync(cacheDir, { recursive: true });
const pid = process.pid;
const t0 = Date.now();
async function main() {
console.log(`[${pid}] Starting download for model="${model}" task="${task}" cache="${cacheDir}"`);
// Dynamic import so env vars are set before module init
// Ensure local caching is enabled (default is true, but be explicit)
env.allowLocalModels = true;
// If you want to forbid remote fetch after first run, set:
// env.allowRemoteModels = false;
// Force creation of a pipeline; this triggers downloads of required files.
// device can be "cpu", "webgpu" (browser), etc. In Node it is typically "cpu".
const pipe = await pipeline(task, model, {
// cache_dir is supported by Transformers.js; keeping env var too.
cache_dir: cacheDir,
// progress_callback: (p) => console.log(`[${pid}]`, p), // can be noisy
});
// Touch something small so it definitely completes initialization
// (for feature-extraction this is a tiny forward pass)
if (task === "feature-extraction") {
await pipe("warmup");
}
const ms = Date.now() - t0;
console.log(`[${pid}] Done in ${ms}ms`);
}
main().catch((err) => {
console.error(`[${pid}] Failed:`, err?.stack || err);
process.exit(1);
});download-parallel.sh
#!/usr/bin/env bash
set -euo pipefail
MODEL="${MODEL:-Xenova/all-MiniLM-L6-v2}"
CACHE_DIR="${CACHE_DIR:-./.cache/hf}"
TASK="${TASK:-feature-extraction}"
PROCS="${PROCS:-6}"
mkdir -p "$CACHE_DIR"
echo "Model: $MODEL"
echo "Task: $TASK"
echo "Cache dir: $CACHE_DIR"
echo "Procs: $PROCS"
echo
pids=()
# Start N parallel processes
for i in $(seq 1 "$PROCS"); do
node ./download-model.js --model "$MODEL" --task "$TASK" --cacheDir "$CACHE_DIR" &
pids+=("$!")
done
# Wait for all
rc=0
for pid in "${pids[@]}"; do
if ! wait "$pid"; then
rc=1
fi
done
if [[ "$rc" -ne 0 ]]; then
echo "One or more downloads failed." >&2
exit 1
fi
echo
echo "All processes finished."first run fails, second run does not:
node ➜ /workspaces/…/packages/test/src (main) $ ./download-parallel.sh
Model: Xenova/all-MiniLM-L6-v2
Task: feature-extraction
Cache dir: ./.cache/hf
Procs: 6
onnxruntime cpuid_info warning: Unknown CPU vendor. cpuinfo_vendor value: 0
onnxruntime cpuid_info warning: Unknown CPU vendor. cpuinfo_vendor value: 0
onnxruntime cpuid_info warning: Unknown CPU vendor. cpuinfo_vendor value: 0
onnxruntime cpuid_info warning: Unknown CPU vendor. cpuinfo_vendor value: 0
onnxruntime cpuid_info warning: Unknown CPU vendor. cpuinfo_vendor value: 0
onnxruntime cpuid_info warning: Unknown CPU vendor. cpuinfo_vendor value: 0
[380815] Starting download for model="Xenova/all-MiniLM-L6-v2" task="feature-extraction" cache="/workspaces/workglow/libs/packages/test/src/.cache/hf"
[380812] Starting download for model="Xenova/all-MiniLM-L6-v2" task="feature-extraction" cache="/workspaces/workglow/libs/packages/test/src/.cache/hf"
[380810] Starting download for model="Xenova/all-MiniLM-L6-v2" task="feature-extraction" cache="/workspaces/workglow/libs/packages/test/src/.cache/hf"
[380811] Starting download for model="Xenova/all-MiniLM-L6-v2" task="feature-extraction" cache="/workspaces/workglow/libs/packages/test/src/.cache/hf"
[380813] Starting download for model="Xenova/all-MiniLM-L6-v2" task="feature-extraction" cache="/workspaces/workglow/libs/packages/test/src/.cache/hf"
[380814] Starting download for model="Xenova/all-MiniLM-L6-v2" task="feature-extraction" cache="/workspaces/workglow/libs/packages/test/src/.cache/hf"
[380814] Failed: Error: Load model from /workspaces/workglow/libs/packages/test/src/.cache/hf/Xenova/all-MiniLM-L6-v2/onnx/model.onnx failed:Protobuf parsing failed.
at new OnnxruntimeSessionHandler (/workspaces/workglow/libs/node_modules/.bun/onnxruntime-node@1.24.1/node_modules/onnxruntime-node/dist/backend.js:50:92)
at Immediate.<anonymous> (/workspaces/workglow/libs/node_modules/.bun/onnxruntime-node@1.24.1/node_modules/onnxruntime-node/dist/backend.js:136:29)
at process.processImmediate (node:internal/timers:504:21)
[380811] Failed: Error: Load model from /workspaces/workglow/libs/packages/test/src/.cache/hf/Xenova/all-MiniLM-L6-v2/onnx/model.onnx failed:Protobuf parsing failed.
at new OnnxruntimeSessionHandler (/workspaces/workglow/libs/node_modules/.bun/onnxruntime-node@1.24.1/node_modules/onnxruntime-node/dist/backend.js:50:92)
at Immediate.<anonymous> (/workspaces/workglow/libs/node_modules/.bun/onnxruntime-node@1.24.1/node_modules/onnxruntime-node/dist/backend.js:136:29)
at process.processImmediate (node:internal/timers:504:21)
[380813] Failed: Error: Load model from /workspaces/workglow/libs/packages/test/src/.cache/hf/Xenova/all-MiniLM-L6-v2/onnx/model.onnx failed:Protobuf parsing failed.
at new OnnxruntimeSessionHandler (/workspaces/workglow/libs/node_modules/.bun/onnxruntime-node@1.24.1/node_modules/onnxruntime-node/dist/backend.js:50:92)
at Immediate.<anonymous> (/workspaces/workglow/libs/node_modules/.bun/onnxruntime-node@1.24.1/node_modules/onnxruntime-node/dist/backend.js:136:29)
at process.processImmediate (node:internal/timers:504:21)
[380815] Failed: Error: Load model from /workspaces/workglow/libs/packages/test/src/.cache/hf/Xenova/all-MiniLM-L6-v2/onnx/model.onnx failed:Protobuf parsing failed.
at new OnnxruntimeSessionHandler (/workspaces/workglow/libs/node_modules/.bun/onnxruntime-node@1.24.1/node_modules/onnxruntime-node/dist/backend.js:50:92)
at Immediate.<anonymous> (/workspaces/workglow/libs/node_modules/.bun/onnxruntime-node@1.24.1/node_modules/onnxruntime-node/dist/backend.js:136:29)
at process.processImmediate (node:internal/timers:504:21)
[380810] Failed: Error: Load model from /workspaces/workglow/libs/packages/test/src/.cache/hf/Xenova/all-MiniLM-L6-v2/onnx/model.onnx failed:Protobuf parsing failed.
at new OnnxruntimeSessionHandler (/workspaces/workglow/libs/node_modules/.bun/onnxruntime-node@1.24.1/node_modules/onnxruntime-node/dist/backend.js:50:92)
at Immediate.<anonymous> (/workspaces/workglow/libs/node_modules/.bun/onnxruntime-node@1.24.1/node_modules/onnxruntime-node/dist/backend.js:136:29)
at process.processImmediate (node:internal/timers:504:21)
[380812] Done in 3161ms
One or more downloads failed.
node ➜ /workspaces/…/packages/test/src (main) $ ./download-parallel.sh
Model: Xenova/all-MiniLM-L6-v2
Task: feature-extraction
Cache dir: ./.cache/hf
Procs: 6
onnxruntime cpuid_info warning: Unknown CPU vendor. cpuinfo_vendor value: 0
onnxruntime cpuid_info warning: Unknown CPU vendor. cpuinfo_vendor value: 0
onnxruntime cpuid_info warning: Unknown CPU vendor. cpuinfo_vendor value: 0
onnxruntime cpuid_info warning: Unknown CPU vendor. cpuinfo_vendor value: 0
onnxruntime cpuid_info warning: Unknown CPU vendor. cpuinfo_vendor value: 0
onnxruntime cpuid_info warning: Unknown CPU vendor. cpuinfo_vendor value: 0
[381104] Starting download for model="Xenova/all-MiniLM-L6-v2" task="feature-extraction" cache="/workspaces/workglow/libs/packages/test/src/.cache/hf"
[381108] Starting download for model="Xenova/all-MiniLM-L6-v2" task="feature-extraction" cache="/workspaces/workglow/libs/packages/test/src/.cache/hf"
[381105] Starting download for model="Xenova/all-MiniLM-L6-v2" task="feature-extraction" cache="/workspaces/workglow/libs/packages/test/src/.cache/hf"
[381109] Starting download for model="Xenova/all-MiniLM-L6-v2" task="feature-extraction" cache="/workspaces/workglow/libs/packages/test/src/.cache/hf"
[381107] Starting download for model="Xenova/all-MiniLM-L6-v2" task="feature-extraction" cache="/workspaces/workglow/libs/packages/test/src/.cache/hf"
[381106] Starting download for model="Xenova/all-MiniLM-L6-v2" task="feature-extraction" cache="/workspaces/workglow/libs/packages/test/src/.cache/hf"
[381105] Done in 167ms
[381109] Done in 171ms
[381106] Done in 171ms
[381107] Done in 179ms
[381104] Done in 189ms
[381108] Done in 193ms
All processes finished.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working