IliasHad · IliasHad · Feb 25, 2026 · Feb 24, 2026
diff --git a/packages/embedding-core/src/services/extractors.ts b/packages/embedding-core/src/services/extractors.ts
@@ -41,7 +41,7 @@ export async function getFrameExtractor() {
 
     const processor = await AutoProcessor.from_pretrained(VISUAL_EMBEDDING_MODEL)
     const model = await CLIPVisionModelWithProjection.from_pretrained(VISUAL_EMBEDDING_MODEL, {
-      device: USE_GPU ? "cuda" : "auto",
+      device: USE_GPU ? "cuda" : "cpu",
       dtype: "fp16"
     })
     visualModelCache = { processor, model }
@@ -53,7 +53,7 @@ export async function getAudioExtractor() {
   if (!audioModelCache) {
     const processor = await AutoProcessor.from_pretrained(AUDIO_EMBEDDING_MODEL)
     const model = await ClapAudioModelWithProjection.from_pretrained(AUDIO_EMBEDDING_MODEL, {
-      device: USE_GPU ? "cuda" : "auto"
+      device: USE_GPU ? "cuda" : "cpu"
     })
 
     audioModelCache = { processor, model }
@@ -66,7 +66,7 @@ async function getTextToVisualExtractor() {
 
     const tokenizer = await AutoTokenizer.from_pretrained(VISUAL_EMBEDDING_MODEL)
     const model = await CLIPTextModelWithProjection.from_pretrained(VISUAL_EMBEDDING_MODEL, {
-      device: USE_GPU ? "cuda" : "auto",
+      device: USE_GPU ? "cuda" : "cpu",
       dtype: "fp16"
     })
     textToVisualModelCache = { tokenizer, model }
@@ -122,7 +122,7 @@ export async function getTextExtractor() {
   if (!textModelCache) {
 
     const embed = await pipeline('feature-extraction', TEXT_EMBEDDING_MODEL, {
-      device: USE_GPU ? "cuda" : "auto",
+      device: USE_GPU ? "cuda" : "cpu",
       dtype: "fp16"
     })
 

diff --git a/packages/shared/src/constants/gpu.ts b/packages/shared/src/constants/gpu.ts
@@ -1,3 +1,12 @@
+import { logger } from "@shared/services/logger";
 import { isGPUAvailable } from "@shared/utils/gpu";
 
-export const USE_GPU = isGPUAvailable() 
+export let USE_GPU = false;
+
+(async function initGPU(): Promise<void> {
+    try {
+        USE_GPU = await isGPUAvailable();
+    } catch (error) {
+        logger.error({ error }, 'Failed to initialize GPU — defaulting to CPU');
+    }
+})()
diff --git a/packages/shared/src/utils/gpu.ts b/packages/shared/src/utils/gpu.ts
@@ -1,11 +1,24 @@
-import { execSync } from 'child_process';
+import { logger } from '@shared/services/logger';
+
+let _gpuAvailable: boolean | null = null;
+
+const MINIMAL_ONNX_MODEL = Buffer.from([
+  0x08, 0x07, 0x12, 0x00, 0x2a, 0x00, 0x3a, 0x00,
+]);
+
+export async function isGPUAvailable(): Promise<boolean> {
+  if (_gpuAvailable !== null) return _gpuAvailable;
 
-export function isGPUAvailable(): boolean {
   try {
-    // Check if nvidia-smi is available
-    execSync('nvidia-smi', { stdio: 'ignore' });
-    return true;
+    const ort = await import('onnxruntime-node');
+    await ort.InferenceSession.create(MINIMAL_ONNX_MODEL, {
+      executionProviders: ['CUDAExecutionProvider'],
+    });
+    _gpuAvailable = true;
+    logger.info('GPU available');
   } catch {
-    return false;
+    _gpuAvailable = false;
+    logger.info('GPU unavailable — fallback to CPU');
   }
+  return _gpuAvailable;
 }