From c05bf3e16b1758911416085fb21328556adc1635 Mon Sep 17 00:00:00 2001 From: Nadav Oxenberg Date: Wed, 25 Feb 2026 15:05:40 +0200 Subject: [PATCH 1/8] feat: add Hebrew language support with Chatterbox TTS and ivrit-ai Whisper Add Hebrew (he) as a supported language across the full pipeline: - Chatterbox TTS backend for Hebrew zero-shot voice cloning - ivrit-ai Whisper models (v3, v3-turbo) for Hebrew speech-to-text - Hebrew diacritization via dicta-onnx for improved TTS quality - Audio trimming utility to handle Chatterbox trailing silence - Model management UI for downloading/unloading Hebrew models - Auto-sync language selection from voice profile Also includes: - datetime.utcnow() -> datetime.now(timezone.utc) migration (PEP 8) - UUID validation utility for API endpoints - .gitignore updates for data/cache, data/finetune, backend/models Co-Authored-By: Claude Opus 4.6 --- .gitignore | 5 + .../Generation/FloatingGenerateBox.tsx | 14 +- .../components/Generation/GenerationForm.tsx | 15 +- .../ServerSettings/ModelManagement.tsx | 57 ++- app/src/lib/api/client.ts | 6 + app/src/lib/api/types.ts | 1 + app/src/lib/constants/languages.ts | 5 +- app/src/lib/hooks/useGenerationForm.ts | 11 +- backend/backends/__init__.py | 20 +- backend/backends/chatterbox_backend.py | 342 ++++++++++++++++ backend/backends/mlx_backend.py | 76 +++- backend/backends/pytorch_backend.py | 90 +++-- backend/channels.py | 4 +- backend/config.py | 6 + backend/database.py | 20 +- backend/export_import.py | 4 +- backend/history.py | 4 +- backend/main.py | 371 ++++++++++++++---- backend/models.py | 6 +- backend/profiles.py | 14 +- backend/requirements.txt | 4 + backend/stories.py | 30 +- backend/tts.py | 13 +- backend/utils/audio.py | 130 ++++++ backend/utils/tasks.py | 6 +- bun.lock | 9 +- 26 files changed, 1073 insertions(+), 190 deletions(-) create mode 100644 backend/backends/chatterbox_backend.py diff --git a/.gitignore b/.gitignore index 05f7ef0d..581bd993 100644 --- a/.gitignore +++ b/.gitignore @@ -38,9 +38,14 @@ Thumbs.db data/profiles/* data/generations/* data/projects/* +data/cache/ +data/finetune/ data/voicebox.db !data/.gitkeep +# Model binaries (downloaded at runtime) +backend/models/ + # Logs *.log logs/ diff --git a/app/src/components/Generation/FloatingGenerateBox.tsx b/app/src/components/Generation/FloatingGenerateBox.tsx index a8d556a6..55791f9e 100644 --- a/app/src/components/Generation/FloatingGenerateBox.tsx +++ b/app/src/components/Generation/FloatingGenerateBox.tsx @@ -13,7 +13,7 @@ import { } from '@/components/ui/select'; import { Textarea } from '@/components/ui/textarea'; import { useToast } from '@/components/ui/use-toast'; -import { LANGUAGE_OPTIONS } from '@/lib/constants/languages'; +import { LANGUAGE_OPTIONS, type LanguageCode } from '@/lib/constants/languages'; import { useGenerationForm } from '@/lib/hooks/useGenerationForm'; import { useProfile, useProfiles } from '@/lib/hooks/useProfiles'; import { useAddStoryItem, useStory } from '@/lib/hooks/useStories'; @@ -75,6 +75,16 @@ export function FloatingGenerateBox({ }, }); + // Auto-sync language when selected profile changes + useEffect(() => { + if (selectedProfile?.language) { + const profileLang = selectedProfile.language as LanguageCode; + if (LANGUAGE_OPTIONS.some((l) => l.value === profileLang)) { + form.setValue('language', profileLang); + } + } + }, [selectedProfile?.language, form]); + // Click away handler to collapse the box useEffect(() => { function handleClickOutside(event: MouseEvent) { @@ -383,7 +393,7 @@ export function FloatingGenerateBox({ name="language" render={({ field }) => ( - diff --git a/app/src/components/Generation/GenerationForm.tsx b/app/src/components/Generation/GenerationForm.tsx index 31b100f8..46e54b83 100644 --- a/app/src/components/Generation/GenerationForm.tsx +++ b/app/src/components/Generation/GenerationForm.tsx @@ -1,3 +1,4 @@ +import { useEffect } from 'react'; import { Loader2, Mic } from 'lucide-react'; import { Button } from '@/components/ui/button'; import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'; @@ -19,7 +20,7 @@ import { SelectValue, } from '@/components/ui/select'; import { Textarea } from '@/components/ui/textarea'; -import { LANGUAGE_OPTIONS } from '@/lib/constants/languages'; +import { LANGUAGE_OPTIONS, type LanguageCode } from '@/lib/constants/languages'; import { useGenerationForm } from '@/lib/hooks/useGenerationForm'; import { useProfile } from '@/lib/hooks/useProfiles'; import { useUIStore } from '@/stores/uiStore'; @@ -30,6 +31,16 @@ export function GenerationForm() { const { form, handleSubmit, isPending } = useGenerationForm(); + // Auto-sync language when selected profile changes + useEffect(() => { + if (selectedProfile?.language) { + const profileLang = selectedProfile.language as LanguageCode; + if (LANGUAGE_OPTIONS.some((l) => l.value === profileLang)) { + form.setValue('language', profileLang); + } + } + }, [selectedProfile?.language, form]); + async function onSubmit(data: Parameters[0]) { await handleSubmit(data, selectedProfileId); } @@ -105,7 +116,7 @@ export function GenerationForm() { render={({ field }) => ( Language - diff --git a/app/src/components/ServerSettings/ModelManagement.tsx b/app/src/components/ServerSettings/ModelManagement.tsx index 4a5fd439..ad7a66ab 100644 --- a/app/src/components/ServerSettings/ModelManagement.tsx +++ b/app/src/components/ServerSettings/ModelManagement.tsx @@ -1,5 +1,5 @@ import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'; -import { Download, Loader2, Trash2 } from 'lucide-react'; +import { Download, Loader2, Power, Trash2 } from 'lucide-react'; import { useCallback, useState } from 'react'; import { AlertDialog, @@ -136,6 +136,27 @@ export function ModelManagement() { }, }); + const unloadMutation = useMutation({ + mutationFn: async (modelName: string) => { + return apiClient.unloadModel(modelName); + }, + onSuccess: async (_data, modelName) => { + const model = modelStatus?.models.find((m) => m.model_name === modelName); + toast({ + title: 'Model unloaded', + description: `${model?.display_name || modelName} has been unloaded from memory.`, + }); + await queryClient.invalidateQueries({ queryKey: ['modelStatus'], refetchType: 'all' }); + }, + onError: (error: Error) => { + toast({ + title: 'Unload failed', + description: error.message, + variant: 'destructive', + }); + }, + }); + const formatSize = (sizeMb?: number): string => { if (!sizeMb) return 'Unknown'; if (sizeMb < 1024) return `${sizeMb.toFixed(1)} MB`; @@ -164,7 +185,7 @@ export function ModelManagement() {
{modelStatus.models - .filter((m) => m.model_name.startsWith('qwen-tts')) + .filter((m) => m.model_name.startsWith('qwen-tts') || m.model_name === 'chatterbox-tts') .map((model) => ( unloadMutation.mutate(model.model_name)} isDownloading={downloadingModel === model.model_name} + isUnloading={unloadMutation.isPending && unloadMutation.variables === model.model_name} formatSize={formatSize} /> ))} @@ -206,7 +229,9 @@ export function ModelManagement() { }); setDeleteDialogOpen(true); }} + onUnload={() => unloadMutation.mutate(model.model_name)} isDownloading={downloadingModel === model.model_name} + isUnloading={unloadMutation.isPending && unloadMutation.variables === model.model_name} formatSize={formatSize} /> ))} @@ -271,14 +296,16 @@ interface ModelItemProps { }; onDownload: () => void; onDelete: () => void; + onUnload: () => void; isDownloading: boolean; // Local state - true if user just clicked download + isUnloading: boolean; formatSize: (sizeMb?: number) => string; } -function ModelItem({ model, onDownload, onDelete, isDownloading, formatSize }: ModelItemProps) { +function ModelItem({ model, onDownload, onDelete, onUnload, isDownloading, isUnloading, formatSize }: ModelItemProps) { // Use server's downloading state OR local state (for immediate feedback before server updates) const showDownloading = model.downloading || isDownloading; - + return (
@@ -305,9 +332,25 @@ function ModelItem({ model, onDownload, onDelete, isDownloading, formatSize }: M
{model.downloaded && !showDownloading ? (
-
- Ready -
+ {model.loaded ? ( + + ) : ( +
+ Ready +
+ )} +
+ ) : ( + <> +
+

{adapterDisplayName(adapter)}

+

+ {formatDate(adapter.completed_at)} + {adapter.is_active && ' \u00B7 Active'} +

+
+ + + + )} +
+ ))} +
+
+ ); +} diff --git a/app/src/components/FinetuneTab/FinetuneProgressToast.tsx b/app/src/components/FinetuneTab/FinetuneProgressToast.tsx new file mode 100644 index 00000000..d67d0148 --- /dev/null +++ b/app/src/components/FinetuneTab/FinetuneProgressToast.tsx @@ -0,0 +1,42 @@ +import { Loader2 } from 'lucide-react'; +import { Progress } from '@/components/ui/progress'; +import type { ActiveFinetuneTask } from '@/lib/api/types'; + +interface FinetuneProgressToastProps { + task: ActiveFinetuneTask; +} + +export function FinetuneProgressToast({ task }: FinetuneProgressToastProps) { + const progressPercent = + task.total_steps > 0 + ? Math.round((task.current_step / task.total_steps) * 100) + : 0; + + return ( +
+
+ + Fine-tuning in progress +
+ + {task.status === 'training' && ( + <> + +
+ + Epoch {task.current_epoch}/{task.total_epochs} + + {progressPercent}% +
+ {task.current_loss != null && ( +

Loss: {task.current_loss.toFixed(4)}

+ )} + + )} + + {task.status === 'preparing' && ( +

Preparing dataset...

+ )} +
+ ); +} diff --git a/app/src/components/FinetuneTab/FinetuneSampleManager.tsx b/app/src/components/FinetuneTab/FinetuneSampleManager.tsx new file mode 100644 index 00000000..b4b5f291 --- /dev/null +++ b/app/src/components/FinetuneTab/FinetuneSampleManager.tsx @@ -0,0 +1,584 @@ +import { memo, useEffect, useRef, useState } from 'react'; +import { Download, Mic, Pause, Play, RotateCcw, Square, Star, Trash2, Upload } from 'lucide-react'; +import { Visualizer } from 'react-sound-visualizer'; +import { Button } from '@/components/ui/button'; +import { + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, +} from '@/components/ui/dialog'; +import { Textarea } from '@/components/ui/textarea'; +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table'; +import { Badge } from '@/components/ui/badge'; +import { useToast } from '@/components/ui/use-toast'; +import { apiClient } from '@/lib/api/client'; +import type { FinetuneSampleResponse } from '@/lib/api/types'; +import type { LanguageCode } from '@/lib/constants/languages'; +import { + useAddFinetuneSample, + useDeleteFinetuneSample, + useImportProfileSamples, + useSetRefAudio, +} from '@/lib/hooks/useFinetune'; +import { useProfileSamples } from '@/lib/hooks/useProfiles'; +import { useAudioRecording } from '@/lib/hooks/useAudioRecording'; +import { useTranscription } from '@/lib/hooks/useTranscription'; +import { formatAudioDuration } from '@/lib/utils/audio'; +import { cn } from '@/lib/utils/cn'; + +const MemoizedWaveform = memo(function MemoizedWaveform({ + audioStream, +}: { + audioStream: MediaStream; +}) { + return ( +
+ + {({ canvasRef }) => ( + + )} + +
+ ); +}); + +interface FinetuneSampleManagerProps { + profileId: string; + profileLanguage?: string; + samples: FinetuneSampleResponse[]; +} + +export function FinetuneSampleManager({ + profileId, + profileLanguage, + samples, +}: FinetuneSampleManagerProps) { + const { toast } = useToast(); + const addSample = useAddFinetuneSample(); + const deleteSample = useDeleteFinetuneSample(); + const importSamples = useImportProfileSamples(); + const setRefAudio = useSetRefAudio(); + const transcription = useTranscription(); + const { data: profileSamples } = useProfileSamples(profileId); + const fileInputRef = useRef(null); + const [playingId, setPlayingId] = useState(null); + const audioRef = useRef(null); + const [showRecorder, setShowRecorder] = useState(false); + const [audioStream, setAudioStream] = useState(null); + + // Review dialog state + const [reviewFile, setReviewFile] = useState(null); + const [reviewTranscript, setReviewTranscript] = useState(''); + const [isReviewOpen, setIsReviewOpen] = useState(false); + const [isPreviewPlaying, setIsPreviewPlaying] = useState(false); + const previewAudioRef = useRef(null); + const previewUrlRef = useRef(null); + + const { + isRecording, + duration, + error: recordingError, + startRecording, + stopRecording, + cancelRecording, + } = useAudioRecording({ + maxDurationSeconds: 59, + onRecordingComplete: (blob) => { + const file = new File([blob], `finetune-${Date.now()}.wav`, { + type: 'audio/wav', + }); + // Don't upload immediately — open review dialog + setReviewFile(file); + setReviewTranscript(''); + setIsReviewOpen(true); + setShowRecorder(false); + + // Stop waveform stream + if (audioStream) { + audioStream.getTracks().forEach((t) => t.stop()); + setAudioStream(null); + } + + // Auto-transcribe + transcription.mutate( + { file, language: (profileLanguage as LanguageCode) || undefined }, + { + onSuccess: (result) => { + setReviewTranscript(result.text); + }, + onError: () => { + // User can still type manually + }, + }, + ); + }, + }); + + // Cleanup preview URL on unmount + useEffect(() => { + return () => { + if (previewUrlRef.current) { + URL.revokeObjectURL(previewUrlRef.current); + } + }; + }, []); + + const totalDuration = samples.reduce((sum, s) => sum + s.duration_seconds, 0); + + const getDatasetHealth = () => { + if (samples.length >= 120 && totalDuration >= 1200) return { label: 'Excellent', color: 'bg-green-500' }; + if (samples.length >= 30 && totalDuration >= 300) return { label: 'Adequate', color: 'bg-yellow-500' }; + return { label: 'Need more samples', color: 'bg-red-500' }; + }; + + const health = getDatasetHealth(); + + const handleFileUpload = async (e: React.ChangeEvent) => { + const files = e.target.files; + if (!files) return; + + for (const file of Array.from(files)) { + try { + await addSample.mutateAsync({ profileId, file }); + toast({ title: 'Sample added', description: `${file.name} uploaded and auto-transcribed` }); + } catch (error) { + toast({ + title: 'Upload failed', + description: error instanceof Error ? error.message : 'Failed to upload sample', + variant: 'destructive', + }); + } + } + e.target.value = ''; + }; + + const handleStartRecording = async () => { + setShowRecorder(true); + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + setAudioStream(stream); + } catch { + // Waveform won't show but recording can still work + } + startRecording(); + }; + + const handleStopRecording = () => { + stopRecording(); + }; + + const handleCancelRecording = () => { + cancelRecording(); + setShowRecorder(false); + if (audioStream) { + audioStream.getTracks().forEach((t) => t.stop()); + setAudioStream(null); + } + }; + + // Review dialog actions + const handleReviewSave = async () => { + if (!reviewFile) return; + const transcript = reviewTranscript.trim(); + if (!transcript) { + toast({ title: 'Transcript required', description: 'Please enter or edit the transcript before saving.', variant: 'destructive' }); + return; + } + try { + await addSample.mutateAsync({ profileId, file: reviewFile, transcript }); + toast({ title: 'Sample saved', description: 'Recording added to training set' }); + handleReviewClose(); + } catch (error) { + toast({ + title: 'Save failed', + description: error instanceof Error ? error.message : 'Failed to save sample', + variant: 'destructive', + }); + } + }; + + const handleReviewRetranscribe = () => { + if (!reviewFile) return; + transcription.mutate( + { file: reviewFile, language: (profileLanguage as LanguageCode) || undefined }, + { + onSuccess: (result) => { + setReviewTranscript(result.text); + }, + onError: () => { + toast({ title: 'Transcription failed', description: 'Please type the transcript manually.', variant: 'destructive' }); + }, + }, + ); + }; + + const handleReviewPlayPause = () => { + if (!reviewFile) return; + + if (isPreviewPlaying) { + previewAudioRef.current?.pause(); + setIsPreviewPlaying(false); + return; + } + + // Create object URL for playback + if (previewUrlRef.current) { + URL.revokeObjectURL(previewUrlRef.current); + } + const url = URL.createObjectURL(reviewFile); + previewUrlRef.current = url; + + const audio = new Audio(url); + audio.onended = () => setIsPreviewPlaying(false); + previewAudioRef.current = audio; + audio.play(); + setIsPreviewPlaying(true); + }; + + const handleReviewClose = () => { + setIsReviewOpen(false); + setReviewFile(null); + setReviewTranscript(''); + if (previewAudioRef.current) { + previewAudioRef.current.pause(); + previewAudioRef.current = null; + } + if (previewUrlRef.current) { + URL.revokeObjectURL(previewUrlRef.current); + previewUrlRef.current = null; + } + setIsPreviewPlaying(false); + }; + + const handleImport = async () => { + try { + const result = await importSamples.mutateAsync({ profileId }); + toast({ + title: 'Samples imported', + description: `${result.length} samples imported from profile`, + }); + } catch (error) { + toast({ + title: 'Import failed', + description: error instanceof Error ? error.message : 'Failed to import samples', + variant: 'destructive', + }); + } + }; + + const handlePlay = (sampleId: string) => { + if (playingId === sampleId) { + audioRef.current?.pause(); + setPlayingId(null); + return; + } + + if (audioRef.current) { + audioRef.current.pause(); + } + + const audio = new Audio(apiClient.getFinetuneSampleAudioUrl(sampleId)); + audio.onended = () => setPlayingId(null); + audioRef.current = audio; + audio.play(); + setPlayingId(sampleId); + }; + + const handleDelete = async (sampleId: string) => { + try { + await deleteSample.mutateAsync({ profileId, sampleId }); + } catch (error) { + toast({ + title: 'Delete failed', + description: error instanceof Error ? error.message : 'Failed to delete sample', + variant: 'destructive', + }); + } + }; + + const handleSetRef = async (sampleId: string) => { + try { + await setRefAudio.mutateAsync({ profileId, sampleId }); + toast({ title: 'Reference audio set' }); + } catch (error) { + toast({ + title: 'Failed to set reference', + description: error instanceof Error ? error.message : 'Error', + variant: 'destructive', + }); + } + }; + + return ( +
+ {/* Header */} +
+
+

Training Samples

+
+ {samples.length} samples + {Math.floor(totalDuration / 60)}m {Math.floor(totalDuration % 60)}s total +
+
+ {health.label} +
+
+
+ + {!showRecorder && ( +
+ + + {profileSamples && profileSamples.length > 0 && ( + + )} +
+ )} +
+ + + + {/* Recording UI */} + {showRecorder && ( +
+ {audioStream && } + + {isRecording ? ( + <> +
+
+
+ + {formatAudioDuration(duration)} + +
+
+
+ + +
+

+ {formatAudioDuration(60 - duration)} remaining +

+ + ) : ( + <> +
+ + Preparing... +
+ + + )} + + {recordingError && ( +

{recordingError}

+ )} +
+ )} + + {/* Review Dialog — shown after recording stops */} + { if (!open) handleReviewClose(); }}> + + + Review Recording + + Listen to your recording and verify the transcript before saving. + + + +
+ {/* Playback */} +
+ +
+ + {isPreviewPlaying ? 'Playing...' : 'Click to preview'} + + + {reviewFile?.name} + +
+
+ + {/* Transcript */} +
+
+ + +
+ + {transcription.isPending && !reviewTranscript ? ( +
+ + Transcribing audio... + +
+ ) : ( +