Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,14 @@ Thumbs.db
data/profiles/*
data/generations/*
data/projects/*
data/cache/*
data/finetune/*
data/voicebox.db
!data/.gitkeep

# Model binaries (downloaded at runtime)
backend/models/

# Logs
*.log
logs/
Expand Down
11 changes: 9 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Audio export failing when Tauri save dialog returns object instead of string path

### Added
- **Hebrew Language Support** - Full Hebrew voice cloning and transcription
- Chatterbox Multilingual TTS backend (`ResembleAI/chatterbox`) for Hebrew voice generation
- ivrit-ai Whisper models (`ivrit-ai/whisper-large-v3-turbo` and `ivrit-ai/whisper-large-v3`) for Hebrew transcription
- Automatic backend routing: Hebrew requests use Chatterbox, all other languages use Qwen3-TTS
- Auto-download of Hebrew models on first use with progress tracking
- Trailing silence trimming for Chatterbox output
- **Model Unloading** - New `/models/{model_name}/unload` endpoint to free memory for individual models
- **Makefile** - Comprehensive development workflow automation with commands for setup, development, building, testing, and code quality checks
- Includes Python version detection and compatibility warnings
- Self-documenting help system with `make help`
Expand All @@ -68,9 +75,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

---

## [Unreleased - Planned]
## Roadmap

### Planned
- Real-time streaming synthesis
- Conversation mode with multiple speakers
- Voice effects (pitch shift, reverb, M3GAN-style)
Expand All @@ -82,4 +88,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

---

[Unreleased]: https://github.com/nadavox/voicebox/compare/v0.1.0...HEAD
[0.1.0]: https://github.com/jamiepine/voicebox/releases/tag/v0.1.0
14 changes: 12 additions & 2 deletions app/src/components/Generation/FloatingGenerateBox.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import {
} from '@/components/ui/select';
import { Textarea } from '@/components/ui/textarea';
import { useToast } from '@/components/ui/use-toast';
import { LANGUAGE_OPTIONS } from '@/lib/constants/languages';
import { LANGUAGE_OPTIONS, type LanguageCode } from '@/lib/constants/languages';
import { useGenerationForm } from '@/lib/hooks/useGenerationForm';
import { useProfile, useProfiles } from '@/lib/hooks/useProfiles';
import { useAddStoryItem, useStory } from '@/lib/hooks/useStories';
Expand Down Expand Up @@ -75,6 +75,16 @@ export function FloatingGenerateBox({
},
});

// Auto-sync language when selected profile changes
useEffect(() => {
if (selectedProfile?.language) {
const profileLang = selectedProfile.language as LanguageCode;
if (LANGUAGE_OPTIONS.some((l) => l.value === profileLang)) {
form.setValue('language', profileLang);
}
}
}, [selectedProfile?.language, form]);

// Click away handler to collapse the box
useEffect(() => {
function handleClickOutside(event: MouseEvent) {
Expand Down Expand Up @@ -383,7 +393,7 @@ export function FloatingGenerateBox({
name="language"
render={({ field }) => (
<FormItem className="flex-1 space-y-0">
<Select onValueChange={field.onChange} defaultValue={field.value}>
<Select onValueChange={field.onChange} value={field.value}>
<FormControl>
<SelectTrigger className="h-8 text-xs bg-card border-border rounded-full hover:bg-background/50 transition-all">
<SelectValue />
Expand Down
15 changes: 13 additions & 2 deletions app/src/components/Generation/GenerationForm.tsx
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { useEffect } from 'react';
import { Loader2, Mic } from 'lucide-react';
import { Button } from '@/components/ui/button';
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
Expand All @@ -19,7 +20,7 @@ import {
SelectValue,
} from '@/components/ui/select';
import { Textarea } from '@/components/ui/textarea';
import { LANGUAGE_OPTIONS } from '@/lib/constants/languages';
import { LANGUAGE_OPTIONS, type LanguageCode } from '@/lib/constants/languages';
import { useGenerationForm } from '@/lib/hooks/useGenerationForm';
import { useProfile } from '@/lib/hooks/useProfiles';
import { useUIStore } from '@/stores/uiStore';
Expand All @@ -30,6 +31,16 @@ export function GenerationForm() {

const { form, handleSubmit, isPending } = useGenerationForm();

// Auto-sync language when selected profile changes
useEffect(() => {
if (selectedProfile?.language) {
const profileLang = selectedProfile.language as LanguageCode;
if (LANGUAGE_OPTIONS.some((l) => l.value === profileLang)) {
form.setValue('language', profileLang);
}
}
}, [selectedProfile?.language, form]);

async function onSubmit(data: Parameters<typeof handleSubmit>[0]) {
await handleSubmit(data, selectedProfileId);
}
Expand Down Expand Up @@ -105,7 +116,7 @@ export function GenerationForm() {
render={({ field }) => (
<FormItem>
<FormLabel>Language</FormLabel>
<Select onValueChange={field.onChange} defaultValue={field.value}>
<Select onValueChange={field.onChange} value={field.value}>
<FormControl>
<SelectTrigger>
<SelectValue />
Expand Down
57 changes: 50 additions & 7 deletions app/src/components/ServerSettings/ModelManagement.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
import { Download, Loader2, Trash2 } from 'lucide-react';
import { Download, Loader2, Power, Trash2 } from 'lucide-react';
import { useCallback, useState } from 'react';
import {
AlertDialog,
Expand Down Expand Up @@ -136,6 +136,27 @@ export function ModelManagement() {
},
});

const unloadMutation = useMutation({
mutationFn: async (modelName: string) => {
return apiClient.unloadModel(modelName);
},
onSuccess: async (_data, modelName) => {
const model = modelStatus?.models.find((m) => m.model_name === modelName);
toast({
title: 'Model unloaded',
description: `${model?.display_name || modelName} has been unloaded from memory.`,
});
await queryClient.invalidateQueries({ queryKey: ['modelStatus'], refetchType: 'all' });
},
onError: (error: Error) => {
toast({
title: 'Unload failed',
description: error.message,
variant: 'destructive',
});
},
});

const formatSize = (sizeMb?: number): string => {
if (!sizeMb) return 'Unknown';
if (sizeMb < 1024) return `${sizeMb.toFixed(1)} MB`;
Expand Down Expand Up @@ -164,7 +185,7 @@ export function ModelManagement() {
</h3>
<div className="space-y-2">
{modelStatus.models
.filter((m) => m.model_name.startsWith('qwen-tts'))
.filter((m) => m.model_name.startsWith('qwen-tts') || m.model_name === 'chatterbox-tts')
.map((model) => (
<ModelItem
key={model.model_name}
Expand All @@ -178,7 +199,9 @@ export function ModelManagement() {
});
setDeleteDialogOpen(true);
}}
onUnload={() => unloadMutation.mutate(model.model_name)}
isDownloading={downloadingModel === model.model_name}
isUnloading={unloadMutation.isPending && unloadMutation.variables === model.model_name}
formatSize={formatSize}
/>
))}
Expand Down Expand Up @@ -206,7 +229,9 @@ export function ModelManagement() {
});
setDeleteDialogOpen(true);
}}
onUnload={() => unloadMutation.mutate(model.model_name)}
isDownloading={downloadingModel === model.model_name}
isUnloading={unloadMutation.isPending && unloadMutation.variables === model.model_name}
formatSize={formatSize}
/>
))}
Expand Down Expand Up @@ -271,14 +296,16 @@ interface ModelItemProps {
};
onDownload: () => void;
onDelete: () => void;
onUnload: () => void;
isDownloading: boolean; // Local state - true if user just clicked download
isUnloading: boolean;
formatSize: (sizeMb?: number) => string;
}

function ModelItem({ model, onDownload, onDelete, isDownloading, formatSize }: ModelItemProps) {
function ModelItem({ model, onDownload, onDelete, onUnload, isDownloading, isUnloading, formatSize }: ModelItemProps) {
// Use server's downloading state OR local state (for immediate feedback before server updates)
const showDownloading = model.downloading || isDownloading;

return (
<div className="flex items-center justify-between p-3 border rounded-lg">
<div className="flex-1">
Expand All @@ -305,9 +332,25 @@ function ModelItem({ model, onDownload, onDelete, isDownloading, formatSize }: M
<div className="flex items-center gap-2">
{model.downloaded && !showDownloading ? (
<div className="flex items-center gap-2">
<div className="flex items-center gap-1 text-sm text-muted-foreground">
<span>Ready</span>
</div>
{model.loaded ? (
<Button
size="sm"
onClick={onUnload}
variant="outline"
disabled={isUnloading}
title="Unload model from memory"
>
{isUnloading ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<Power className="h-4 w-4" />
)}
</Button>
) : (
<div className="flex items-center gap-1 text-sm text-muted-foreground">
<span>Ready</span>
</div>
)}
<Button
size="sm"
onClick={onDelete}
Expand Down
11 changes: 7 additions & 4 deletions app/src/lib/api/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -310,13 +310,10 @@ class ApiClient {
}

async triggerModelDownload(modelName: string): Promise<{ message: string }> {
console.log('[API] triggerModelDownload called for:', modelName, 'at', new Date().toISOString());
const result = await this.request<{ message: string }>('/models/download', {
return this.request<{ message: string }>('/models/download', {
method: 'POST',
body: JSON.stringify({ model_name: modelName } as ModelDownloadRequest),
});
console.log('[API] triggerModelDownload response:', result);
return result;
}

async deleteModel(modelName: string): Promise<{ message: string }> {
Expand All @@ -325,6 +322,12 @@ class ApiClient {
});
}

async unloadModel(modelName: string): Promise<{ message: string }> {
return this.request<{ message: string }>(`/models/${modelName}/unload`, {
method: 'POST',
});
}

// Task Management
async getActiveTasks(): Promise<ActiveTasksResponse> {
return this.request<ActiveTasksResponse>('/tasks/active');
Expand Down
1 change: 1 addition & 0 deletions app/src/lib/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export interface GenerationRequest {
language: LanguageCode;
seed?: number;
model_size?: '1.7B' | '0.6B';
instruct?: string;
}

export interface GenerationResponse {
Expand Down
5 changes: 3 additions & 2 deletions app/src/lib/constants/languages.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/**
* Supported languages for Qwen3-TTS
* Based on: https://github.com/QwenLM/Qwen3-TTS
* Supported languages for voice generation.
* Most languages use Qwen3-TTS; Hebrew uses Chatterbox TTS.
*/

export const SUPPORTED_LANGUAGES = {
Expand All @@ -14,6 +14,7 @@ export const SUPPORTED_LANGUAGES = {
pt: 'Portuguese',
es: 'Spanish',
it: 'Italian',
he: 'Hebrew',
} as const;

export type LanguageCode = keyof typeof SUPPORTED_LANGUAGES;
Expand Down
11 changes: 9 additions & 2 deletions app/src/lib/hooks/useGenerationForm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,15 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
try {
setIsGenerating(true);

const modelName = `qwen-tts-${data.modelSize}`;
const displayName = data.modelSize === '1.7B' ? 'Qwen TTS 1.7B' : 'Qwen TTS 0.6B';
// Determine model name for download tracking
// Hebrew uses Chatterbox TTS, other languages use Qwen
const isHebrew = data.language === 'he';
const modelName = isHebrew ? 'chatterbox-tts' : `qwen-tts-${data.modelSize}`;
const displayName = isHebrew
? 'Chatterbox TTS (Hebrew)'
: data.modelSize === '1.7B'
? 'Qwen TTS 1.7B'
: 'Qwen TTS 0.6B';

try {
const modelStatus = await apiClient.getModelStatus();
Expand Down
Loading