diff --git a/packages/embedding-core/src/services/extractors.ts b/packages/embedding-core/src/services/extractors.ts index aca0d423..01b07760 100644 --- a/packages/embedding-core/src/services/extractors.ts +++ b/packages/embedding-core/src/services/extractors.ts @@ -41,7 +41,7 @@ export async function getFrameExtractor() { const processor = await AutoProcessor.from_pretrained(VISUAL_EMBEDDING_MODEL) const model = await CLIPVisionModelWithProjection.from_pretrained(VISUAL_EMBEDDING_MODEL, { - device: USE_GPU ? "cuda" : "auto", + device: USE_GPU ? "cuda" : "cpu", dtype: "fp16" }) visualModelCache = { processor, model } @@ -53,7 +53,7 @@ export async function getAudioExtractor() { if (!audioModelCache) { const processor = await AutoProcessor.from_pretrained(AUDIO_EMBEDDING_MODEL) const model = await ClapAudioModelWithProjection.from_pretrained(AUDIO_EMBEDDING_MODEL, { - device: USE_GPU ? "cuda" : "auto" + device: USE_GPU ? "cuda" : "cpu" }) audioModelCache = { processor, model } @@ -66,7 +66,7 @@ async function getTextToVisualExtractor() { const tokenizer = await AutoTokenizer.from_pretrained(VISUAL_EMBEDDING_MODEL) const model = await CLIPTextModelWithProjection.from_pretrained(VISUAL_EMBEDDING_MODEL, { - device: USE_GPU ? "cuda" : "auto", + device: USE_GPU ? "cuda" : "cpu", dtype: "fp16" }) textToVisualModelCache = { tokenizer, model } @@ -122,7 +122,7 @@ export async function getTextExtractor() { if (!textModelCache) { const embed = await pipeline('feature-extraction', TEXT_EMBEDDING_MODEL, { - device: USE_GPU ? "cuda" : "auto", + device: USE_GPU ? "cuda" : "cpu", dtype: "fp16" }) diff --git a/packages/shared/src/constants/gpu.ts b/packages/shared/src/constants/gpu.ts index 02377dde..08a5fe08 100644 --- a/packages/shared/src/constants/gpu.ts +++ b/packages/shared/src/constants/gpu.ts @@ -1,3 +1,12 @@ +import { logger } from "@shared/services/logger"; import { isGPUAvailable } from "@shared/utils/gpu"; -export const USE_GPU = isGPUAvailable() +export let USE_GPU = false; + +(async function initGPU(): Promise { + try { + USE_GPU = await isGPUAvailable(); + } catch (error) { + logger.error({ error }, 'Failed to initialize GPU — defaulting to CPU'); + } +})() \ No newline at end of file diff --git a/packages/shared/src/utils/gpu.ts b/packages/shared/src/utils/gpu.ts index 3dd850d3..5d2aa3b5 100644 --- a/packages/shared/src/utils/gpu.ts +++ b/packages/shared/src/utils/gpu.ts @@ -1,11 +1,24 @@ -import { execSync } from 'child_process'; +import { logger } from '@shared/services/logger'; + +let _gpuAvailable: boolean | null = null; + +const MINIMAL_ONNX_MODEL = Buffer.from([ + 0x08, 0x07, 0x12, 0x00, 0x2a, 0x00, 0x3a, 0x00, +]); + +export async function isGPUAvailable(): Promise { + if (_gpuAvailable !== null) return _gpuAvailable; -export function isGPUAvailable(): boolean { try { - // Check if nvidia-smi is available - execSync('nvidia-smi', { stdio: 'ignore' }); - return true; + const ort = await import('onnxruntime-node'); + await ort.InferenceSession.create(MINIMAL_ONNX_MODEL, { + executionProviders: ['CUDAExecutionProvider'], + }); + _gpuAvailable = true; + logger.info('GPU available'); } catch { - return false; + _gpuAvailable = false; + logger.info('GPU unavailable — fallback to CPU'); } + return _gpuAvailable; } \ No newline at end of file