diff --git a/.gitignore b/.gitignore index 05f7ef0d..cb01402d 100644 --- a/.gitignore +++ b/.gitignore @@ -38,9 +38,14 @@ Thumbs.db data/profiles/* data/generations/* data/projects/* +data/cache/* +data/finetune/* data/voicebox.db !data/.gitkeep +# Model binaries (downloaded at runtime) +backend/models/ + # Logs *.log logs/ diff --git a/CHANGELOG.md b/CHANGELOG.md index b7116d39..e87d4790 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Audio export failing when Tauri save dialog returns object instead of string path ### Added +- **Hebrew Language Support** - Full Hebrew voice cloning and transcription + - Chatterbox Multilingual TTS backend (`ResembleAI/chatterbox`) for Hebrew voice generation + - ivrit-ai Whisper models (`ivrit-ai/whisper-large-v3-turbo` and `ivrit-ai/whisper-large-v3`) for Hebrew transcription + - Automatic backend routing: Hebrew requests use Chatterbox, all other languages use Qwen3-TTS + - Auto-download of Hebrew models on first use with progress tracking + - Trailing silence trimming for Chatterbox output +- **Model Unloading** - New `/models/{model_name}/unload` endpoint to free memory for individual models - **Makefile** - Comprehensive development workflow automation with commands for setup, development, building, testing, and code quality checks - Includes Python version detection and compatibility warnings - Self-documenting help system with `make help` @@ -68,9 +75,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 --- -## [Unreleased - Planned] +## Roadmap -### Planned - Real-time streaming synthesis - Conversation mode with multiple speakers - Voice effects (pitch shift, reverb, M3GAN-style) @@ -82,4 +88,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 --- +[Unreleased]: https://github.com/nadavox/voicebox/compare/v0.1.0...HEAD [0.1.0]: https://github.com/jamiepine/voicebox/releases/tag/v0.1.0 diff --git a/app/src/components/Generation/FloatingGenerateBox.tsx b/app/src/components/Generation/FloatingGenerateBox.tsx index a8d556a6..55791f9e 100644 --- a/app/src/components/Generation/FloatingGenerateBox.tsx +++ b/app/src/components/Generation/FloatingGenerateBox.tsx @@ -13,7 +13,7 @@ import { } from '@/components/ui/select'; import { Textarea } from '@/components/ui/textarea'; import { useToast } from '@/components/ui/use-toast'; -import { LANGUAGE_OPTIONS } from '@/lib/constants/languages'; +import { LANGUAGE_OPTIONS, type LanguageCode } from '@/lib/constants/languages'; import { useGenerationForm } from '@/lib/hooks/useGenerationForm'; import { useProfile, useProfiles } from '@/lib/hooks/useProfiles'; import { useAddStoryItem, useStory } from '@/lib/hooks/useStories'; @@ -75,6 +75,16 @@ export function FloatingGenerateBox({ }, }); + // Auto-sync language when selected profile changes + useEffect(() => { + if (selectedProfile?.language) { + const profileLang = selectedProfile.language as LanguageCode; + if (LANGUAGE_OPTIONS.some((l) => l.value === profileLang)) { + form.setValue('language', profileLang); + } + } + }, [selectedProfile?.language, form]); + // Click away handler to collapse the box useEffect(() => { function handleClickOutside(event: MouseEvent) { @@ -383,7 +393,7 @@ export function FloatingGenerateBox({ name="language" render={({ field }) => ( - diff --git a/app/src/components/Generation/GenerationForm.tsx b/app/src/components/Generation/GenerationForm.tsx index 31b100f8..46e54b83 100644 --- a/app/src/components/Generation/GenerationForm.tsx +++ b/app/src/components/Generation/GenerationForm.tsx @@ -1,3 +1,4 @@ +import { useEffect } from 'react'; import { Loader2, Mic } from 'lucide-react'; import { Button } from '@/components/ui/button'; import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'; @@ -19,7 +20,7 @@ import { SelectValue, } from '@/components/ui/select'; import { Textarea } from '@/components/ui/textarea'; -import { LANGUAGE_OPTIONS } from '@/lib/constants/languages'; +import { LANGUAGE_OPTIONS, type LanguageCode } from '@/lib/constants/languages'; import { useGenerationForm } from '@/lib/hooks/useGenerationForm'; import { useProfile } from '@/lib/hooks/useProfiles'; import { useUIStore } from '@/stores/uiStore'; @@ -30,6 +31,16 @@ export function GenerationForm() { const { form, handleSubmit, isPending } = useGenerationForm(); + // Auto-sync language when selected profile changes + useEffect(() => { + if (selectedProfile?.language) { + const profileLang = selectedProfile.language as LanguageCode; + if (LANGUAGE_OPTIONS.some((l) => l.value === profileLang)) { + form.setValue('language', profileLang); + } + } + }, [selectedProfile?.language, form]); + async function onSubmit(data: Parameters[0]) { await handleSubmit(data, selectedProfileId); } @@ -105,7 +116,7 @@ export function GenerationForm() { render={({ field }) => ( Language - diff --git a/app/src/components/ServerSettings/ModelManagement.tsx b/app/src/components/ServerSettings/ModelManagement.tsx index 4a5fd439..ad7a66ab 100644 --- a/app/src/components/ServerSettings/ModelManagement.tsx +++ b/app/src/components/ServerSettings/ModelManagement.tsx @@ -1,5 +1,5 @@ import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'; -import { Download, Loader2, Trash2 } from 'lucide-react'; +import { Download, Loader2, Power, Trash2 } from 'lucide-react'; import { useCallback, useState } from 'react'; import { AlertDialog, @@ -136,6 +136,27 @@ export function ModelManagement() { }, }); + const unloadMutation = useMutation({ + mutationFn: async (modelName: string) => { + return apiClient.unloadModel(modelName); + }, + onSuccess: async (_data, modelName) => { + const model = modelStatus?.models.find((m) => m.model_name === modelName); + toast({ + title: 'Model unloaded', + description: `${model?.display_name || modelName} has been unloaded from memory.`, + }); + await queryClient.invalidateQueries({ queryKey: ['modelStatus'], refetchType: 'all' }); + }, + onError: (error: Error) => { + toast({ + title: 'Unload failed', + description: error.message, + variant: 'destructive', + }); + }, + }); + const formatSize = (sizeMb?: number): string => { if (!sizeMb) return 'Unknown'; if (sizeMb < 1024) return `${sizeMb.toFixed(1)} MB`; @@ -164,7 +185,7 @@ export function ModelManagement() {
{modelStatus.models - .filter((m) => m.model_name.startsWith('qwen-tts')) + .filter((m) => m.model_name.startsWith('qwen-tts') || m.model_name === 'chatterbox-tts') .map((model) => ( unloadMutation.mutate(model.model_name)} isDownloading={downloadingModel === model.model_name} + isUnloading={unloadMutation.isPending && unloadMutation.variables === model.model_name} formatSize={formatSize} /> ))} @@ -206,7 +229,9 @@ export function ModelManagement() { }); setDeleteDialogOpen(true); }} + onUnload={() => unloadMutation.mutate(model.model_name)} isDownloading={downloadingModel === model.model_name} + isUnloading={unloadMutation.isPending && unloadMutation.variables === model.model_name} formatSize={formatSize} /> ))} @@ -271,14 +296,16 @@ interface ModelItemProps { }; onDownload: () => void; onDelete: () => void; + onUnload: () => void; isDownloading: boolean; // Local state - true if user just clicked download + isUnloading: boolean; formatSize: (sizeMb?: number) => string; } -function ModelItem({ model, onDownload, onDelete, isDownloading, formatSize }: ModelItemProps) { +function ModelItem({ model, onDownload, onDelete, onUnload, isDownloading, isUnloading, formatSize }: ModelItemProps) { // Use server's downloading state OR local state (for immediate feedback before server updates) const showDownloading = model.downloading || isDownloading; - + return (
@@ -305,9 +332,25 @@ function ModelItem({ model, onDownload, onDelete, isDownloading, formatSize }: M
{model.downloaded && !showDownloading ? (
-
- Ready -
+ {model.loaded ? ( + + ) : ( +
+ Ready +
+ )}