diff --git a/.gitignore b/.gitignore index 05f7ef0d..581bd993 100644 --- a/.gitignore +++ b/.gitignore @@ -38,9 +38,14 @@ Thumbs.db data/profiles/* data/generations/* data/projects/* +data/cache/ +data/finetune/ data/voicebox.db !data/.gitkeep +# Model binaries (downloaded at runtime) +backend/models/ + # Logs *.log logs/ diff --git a/CHANGELOG.md b/CHANGELOG.md index b7116d39..e6cff344 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Audio export failing when Tauri save dialog returns object instead of string path ### Added +- **Hebrew Language Support** - Full Hebrew voice cloning and transcription + - Chatterbox Multilingual TTS backend (`ResembleAI/chatterbox`) for Hebrew voice generation + - ivrit-ai Whisper models (`ivrit-ai/whisper-large-v3-turbo`) for Hebrew transcription + - Automatic backend routing: Hebrew requests use Chatterbox, all other languages use Qwen3-TTS + - Auto-download of Hebrew models on first use with progress tracking + - Trailing silence trimming for Chatterbox output +- **Model Unloading** - New `/models/{model_name}/unload` endpoint to free memory for individual models - **Makefile** - Comprehensive development workflow automation with commands for setup, development, building, testing, and code quality checks - Includes Python version detection and compatibility warnings - Self-documenting help system with `make help` diff --git a/app/src/components/FinetuneTab/AdapterSelector.tsx b/app/src/components/FinetuneTab/AdapterSelector.tsx new file mode 100644 index 00000000..b3324f49 --- /dev/null +++ b/app/src/components/FinetuneTab/AdapterSelector.tsx @@ -0,0 +1,171 @@ +import { useState } from 'react'; +import { Check, Pencil, Trash2, Zap } from 'lucide-react'; +import { Button } from '@/components/ui/button'; +import { Input } from '@/components/ui/input'; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select'; +import { useToast } from '@/components/ui/use-toast'; +import type { AdapterInfo } from '@/lib/api/types'; +import { useAdapters, useSetActiveAdapter, useUpdateAdapterLabel, useDeleteAdapter } from '@/lib/hooks/useFinetune'; +import { adapterDisplayName } from '@/lib/utils/adapters'; + +interface AdapterSelectorProps { + profileId: string; +} + +function formatDate(dateStr?: string) { + if (!dateStr) return ''; + const d = new Date(dateStr); + return d.toLocaleDateString(undefined, { month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit' }); +} + +export function AdapterSelector({ profileId }: AdapterSelectorProps) { + const { toast } = useToast(); + const { data: adapters, isLoading } = useAdapters(profileId); + const setActiveAdapter = useSetActiveAdapter(); + const updateLabel = useUpdateAdapterLabel(); + const deleteAdapter = useDeleteAdapter(); + + const [editingId, setEditingId] = useState(null); + const [editValue, setEditValue] = useState(''); + + if (isLoading || !adapters || adapters.length === 0) return null; + + const activeAdapter = adapters.find((a) => a.is_active); + const currentValue = activeAdapter?.job_id ?? 'none'; + + const handleChange = async (value: string) => { + const jobId = value === 'none' ? null : value; + try { + await setActiveAdapter.mutateAsync({ profileId, jobId }); + toast({ + title: jobId ? 'Adapter activated' : 'Adapter deactivated', + description: jobId ? 'Generation will use the selected adapter.' : 'Generation will use the base model.', + }); + } catch (error) { + toast({ + title: 'Failed to switch adapter', + description: error instanceof Error ? error.message : 'Error', + variant: 'destructive', + }); + } + }; + + const handleStartEdit = (adapter: AdapterInfo) => { + setEditingId(adapter.job_id); + setEditValue(adapter.label || ''); + }; + + const handleSaveLabel = async (jobId: string) => { + if (!editValue.trim()) { + setEditingId(null); + return; + } + try { + await updateLabel.mutateAsync({ profileId, jobId, label: editValue.trim() }); + setEditingId(null); + } catch { + toast({ title: 'Failed to rename adapter', variant: 'destructive' }); + } + }; + + const handleDelete = async (adapter: AdapterInfo) => { + const confirmed = window.confirm( + `Delete adapter "${adapterDisplayName(adapter)}"? This permanently removes the trained model files and cannot be undone.`, + ); + if (!confirmed) return; + + try { + await deleteAdapter.mutateAsync({ profileId, jobId: adapter.job_id }); + toast({ title: 'Adapter deleted' }); + } catch { + toast({ title: 'Failed to delete adapter', variant: 'destructive' }); + } + }; + + return ( +
+
+ +

Active Adapter

+
+ + + + {/* Adapter list with management actions */} +
+ {adapters.map((adapter) => ( +
+ {editingId === adapter.job_id ? ( +
+ setEditValue(e.target.value)} + onKeyDown={(e) => { + if (e.key === 'Enter') handleSaveLabel(adapter.job_id); + if (e.key === 'Escape') setEditingId(null); + }} + className="h-7 text-xs" + autoFocus + /> + +
+ ) : ( + <> +
+

{adapterDisplayName(adapter)}

+

+ {formatDate(adapter.completed_at)} + {adapter.is_active && ' \u00B7 Active'} +

+
+ + + + )} +
+ ))} +
+
+ ); +} diff --git a/app/src/components/FinetuneTab/FinetuneProgressToast.tsx b/app/src/components/FinetuneTab/FinetuneProgressToast.tsx new file mode 100644 index 00000000..d67d0148 --- /dev/null +++ b/app/src/components/FinetuneTab/FinetuneProgressToast.tsx @@ -0,0 +1,42 @@ +import { Loader2 } from 'lucide-react'; +import { Progress } from '@/components/ui/progress'; +import type { ActiveFinetuneTask } from '@/lib/api/types'; + +interface FinetuneProgressToastProps { + task: ActiveFinetuneTask; +} + +export function FinetuneProgressToast({ task }: FinetuneProgressToastProps) { + const progressPercent = + task.total_steps > 0 + ? Math.round((task.current_step / task.total_steps) * 100) + : 0; + + return ( +
+
+ + Fine-tuning in progress +
+ + {task.status === 'training' && ( + <> + +
+ + Epoch {task.current_epoch}/{task.total_epochs} + + {progressPercent}% +
+ {task.current_loss != null && ( +

Loss: {task.current_loss.toFixed(4)}

+ )} + + )} + + {task.status === 'preparing' && ( +

Preparing dataset...

+ )} +
+ ); +} diff --git a/app/src/components/FinetuneTab/FinetuneSampleManager.tsx b/app/src/components/FinetuneTab/FinetuneSampleManager.tsx new file mode 100644 index 00000000..b4b5f291 --- /dev/null +++ b/app/src/components/FinetuneTab/FinetuneSampleManager.tsx @@ -0,0 +1,584 @@ +import { memo, useEffect, useRef, useState } from 'react'; +import { Download, Mic, Pause, Play, RotateCcw, Square, Star, Trash2, Upload } from 'lucide-react'; +import { Visualizer } from 'react-sound-visualizer'; +import { Button } from '@/components/ui/button'; +import { + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, +} from '@/components/ui/dialog'; +import { Textarea } from '@/components/ui/textarea'; +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table'; +import { Badge } from '@/components/ui/badge'; +import { useToast } from '@/components/ui/use-toast'; +import { apiClient } from '@/lib/api/client'; +import type { FinetuneSampleResponse } from '@/lib/api/types'; +import type { LanguageCode } from '@/lib/constants/languages'; +import { + useAddFinetuneSample, + useDeleteFinetuneSample, + useImportProfileSamples, + useSetRefAudio, +} from '@/lib/hooks/useFinetune'; +import { useProfileSamples } from '@/lib/hooks/useProfiles'; +import { useAudioRecording } from '@/lib/hooks/useAudioRecording'; +import { useTranscription } from '@/lib/hooks/useTranscription'; +import { formatAudioDuration } from '@/lib/utils/audio'; +import { cn } from '@/lib/utils/cn'; + +const MemoizedWaveform = memo(function MemoizedWaveform({ + audioStream, +}: { + audioStream: MediaStream; +}) { + return ( +
+ + {({ canvasRef }) => ( + + )} + +
+ ); +}); + +interface FinetuneSampleManagerProps { + profileId: string; + profileLanguage?: string; + samples: FinetuneSampleResponse[]; +} + +export function FinetuneSampleManager({ + profileId, + profileLanguage, + samples, +}: FinetuneSampleManagerProps) { + const { toast } = useToast(); + const addSample = useAddFinetuneSample(); + const deleteSample = useDeleteFinetuneSample(); + const importSamples = useImportProfileSamples(); + const setRefAudio = useSetRefAudio(); + const transcription = useTranscription(); + const { data: profileSamples } = useProfileSamples(profileId); + const fileInputRef = useRef(null); + const [playingId, setPlayingId] = useState(null); + const audioRef = useRef(null); + const [showRecorder, setShowRecorder] = useState(false); + const [audioStream, setAudioStream] = useState(null); + + // Review dialog state + const [reviewFile, setReviewFile] = useState(null); + const [reviewTranscript, setReviewTranscript] = useState(''); + const [isReviewOpen, setIsReviewOpen] = useState(false); + const [isPreviewPlaying, setIsPreviewPlaying] = useState(false); + const previewAudioRef = useRef(null); + const previewUrlRef = useRef(null); + + const { + isRecording, + duration, + error: recordingError, + startRecording, + stopRecording, + cancelRecording, + } = useAudioRecording({ + maxDurationSeconds: 59, + onRecordingComplete: (blob) => { + const file = new File([blob], `finetune-${Date.now()}.wav`, { + type: 'audio/wav', + }); + // Don't upload immediately — open review dialog + setReviewFile(file); + setReviewTranscript(''); + setIsReviewOpen(true); + setShowRecorder(false); + + // Stop waveform stream + if (audioStream) { + audioStream.getTracks().forEach((t) => t.stop()); + setAudioStream(null); + } + + // Auto-transcribe + transcription.mutate( + { file, language: (profileLanguage as LanguageCode) || undefined }, + { + onSuccess: (result) => { + setReviewTranscript(result.text); + }, + onError: () => { + // User can still type manually + }, + }, + ); + }, + }); + + // Cleanup preview URL on unmount + useEffect(() => { + return () => { + if (previewUrlRef.current) { + URL.revokeObjectURL(previewUrlRef.current); + } + }; + }, []); + + const totalDuration = samples.reduce((sum, s) => sum + s.duration_seconds, 0); + + const getDatasetHealth = () => { + if (samples.length >= 120 && totalDuration >= 1200) return { label: 'Excellent', color: 'bg-green-500' }; + if (samples.length >= 30 && totalDuration >= 300) return { label: 'Adequate', color: 'bg-yellow-500' }; + return { label: 'Need more samples', color: 'bg-red-500' }; + }; + + const health = getDatasetHealth(); + + const handleFileUpload = async (e: React.ChangeEvent) => { + const files = e.target.files; + if (!files) return; + + for (const file of Array.from(files)) { + try { + await addSample.mutateAsync({ profileId, file }); + toast({ title: 'Sample added', description: `${file.name} uploaded and auto-transcribed` }); + } catch (error) { + toast({ + title: 'Upload failed', + description: error instanceof Error ? error.message : 'Failed to upload sample', + variant: 'destructive', + }); + } + } + e.target.value = ''; + }; + + const handleStartRecording = async () => { + setShowRecorder(true); + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + setAudioStream(stream); + } catch { + // Waveform won't show but recording can still work + } + startRecording(); + }; + + const handleStopRecording = () => { + stopRecording(); + }; + + const handleCancelRecording = () => { + cancelRecording(); + setShowRecorder(false); + if (audioStream) { + audioStream.getTracks().forEach((t) => t.stop()); + setAudioStream(null); + } + }; + + // Review dialog actions + const handleReviewSave = async () => { + if (!reviewFile) return; + const transcript = reviewTranscript.trim(); + if (!transcript) { + toast({ title: 'Transcript required', description: 'Please enter or edit the transcript before saving.', variant: 'destructive' }); + return; + } + try { + await addSample.mutateAsync({ profileId, file: reviewFile, transcript }); + toast({ title: 'Sample saved', description: 'Recording added to training set' }); + handleReviewClose(); + } catch (error) { + toast({ + title: 'Save failed', + description: error instanceof Error ? error.message : 'Failed to save sample', + variant: 'destructive', + }); + } + }; + + const handleReviewRetranscribe = () => { + if (!reviewFile) return; + transcription.mutate( + { file: reviewFile, language: (profileLanguage as LanguageCode) || undefined }, + { + onSuccess: (result) => { + setReviewTranscript(result.text); + }, + onError: () => { + toast({ title: 'Transcription failed', description: 'Please type the transcript manually.', variant: 'destructive' }); + }, + }, + ); + }; + + const handleReviewPlayPause = () => { + if (!reviewFile) return; + + if (isPreviewPlaying) { + previewAudioRef.current?.pause(); + setIsPreviewPlaying(false); + return; + } + + // Create object URL for playback + if (previewUrlRef.current) { + URL.revokeObjectURL(previewUrlRef.current); + } + const url = URL.createObjectURL(reviewFile); + previewUrlRef.current = url; + + const audio = new Audio(url); + audio.onended = () => setIsPreviewPlaying(false); + previewAudioRef.current = audio; + audio.play(); + setIsPreviewPlaying(true); + }; + + const handleReviewClose = () => { + setIsReviewOpen(false); + setReviewFile(null); + setReviewTranscript(''); + if (previewAudioRef.current) { + previewAudioRef.current.pause(); + previewAudioRef.current = null; + } + if (previewUrlRef.current) { + URL.revokeObjectURL(previewUrlRef.current); + previewUrlRef.current = null; + } + setIsPreviewPlaying(false); + }; + + const handleImport = async () => { + try { + const result = await importSamples.mutateAsync({ profileId }); + toast({ + title: 'Samples imported', + description: `${result.length} samples imported from profile`, + }); + } catch (error) { + toast({ + title: 'Import failed', + description: error instanceof Error ? error.message : 'Failed to import samples', + variant: 'destructive', + }); + } + }; + + const handlePlay = (sampleId: string) => { + if (playingId === sampleId) { + audioRef.current?.pause(); + setPlayingId(null); + return; + } + + if (audioRef.current) { + audioRef.current.pause(); + } + + const audio = new Audio(apiClient.getFinetuneSampleAudioUrl(sampleId)); + audio.onended = () => setPlayingId(null); + audioRef.current = audio; + audio.play(); + setPlayingId(sampleId); + }; + + const handleDelete = async (sampleId: string) => { + try { + await deleteSample.mutateAsync({ profileId, sampleId }); + } catch (error) { + toast({ + title: 'Delete failed', + description: error instanceof Error ? error.message : 'Failed to delete sample', + variant: 'destructive', + }); + } + }; + + const handleSetRef = async (sampleId: string) => { + try { + await setRefAudio.mutateAsync({ profileId, sampleId }); + toast({ title: 'Reference audio set' }); + } catch (error) { + toast({ + title: 'Failed to set reference', + description: error instanceof Error ? error.message : 'Error', + variant: 'destructive', + }); + } + }; + + return ( +
+ {/* Header */} +
+
+

Training Samples

+
+ {samples.length} samples + {Math.floor(totalDuration / 60)}m {Math.floor(totalDuration % 60)}s total +
+
+ {health.label} +
+
+
+ + {!showRecorder && ( +
+ + + {profileSamples && profileSamples.length > 0 && ( + + )} +
+ )} +
+ + + + {/* Recording UI */} + {showRecorder && ( +
+ {audioStream && } + + {isRecording ? ( + <> +
+
+
+ + {formatAudioDuration(duration)} + +
+
+
+ + +
+

+ {formatAudioDuration(60 - duration)} remaining +

+ + ) : ( + <> +
+ + Preparing... +
+ + + )} + + {recordingError && ( +

{recordingError}

+ )} +
+ )} + + {/* Review Dialog — shown after recording stops */} + { if (!open) handleReviewClose(); }}> + + + Review Recording + + Listen to your recording and verify the transcript before saving. + + + +
+ {/* Playback */} +
+ +
+ + {isPreviewPlaying ? 'Playing...' : 'Click to preview'} + + + {reviewFile?.name} + +
+
+ + {/* Transcript */} +
+
+ + +
+ + {transcription.isPending && !reviewTranscript ? ( +
+ + Transcribing audio... + +
+ ) : ( +