Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions server/lib/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ export const config = {

openaiApiKey: process.env.OPENAI_API_KEY || '',
replicateApiToken: process.env.REPLICATE_API_TOKEN || '',
mimoApiKey: process.env.MIMO_API_KEY || '',

// Speech-to-text
sttProvider: (process.env.STT_PROVIDER || 'local') as 'local' | 'openai',
Expand Down
28 changes: 28 additions & 0 deletions server/lib/tts-config.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,34 @@ async function loadTtsModule(opts: {
return mod;
}

describe('getTTSConfig', () => {
it('returns Xiaomi defaults when config file is missing', async () => {
const mod = await loadTtsModule({
language: 'en',
edgeVoiceGender: 'female',
storedVoice: 'en-US-JennyNeural',
});

const cfg = mod.getTTSConfig();
expect(cfg.xiaomi.model).toBe('mimo-v2-tts');
expect(cfg.xiaomi.voice).toBe('mimo_default');
expect(cfg.xiaomi.style).toBe('');
});

it('deep-merges Xiaomi patches without dropping defaults', async () => {
const mod = await loadTtsModule({
language: 'en',
edgeVoiceGender: 'female',
storedVoice: 'en-US-JennyNeural',
});

const cfg = mod.updateTTSConfig({ xiaomi: { style: 'Happy' } });
expect(cfg.xiaomi.style).toBe('Happy');
expect(cfg.xiaomi.model).toBe('mimo-v2-tts');
expect(cfg.xiaomi.voice).toBe('mimo_default');
});
});

describe('resolveEdgeTTSVoice', () => {
it('keeps explicit non-default English override', async () => {
const mod = await loadTtsModule({
Expand Down
14 changes: 14 additions & 0 deletions server/lib/tts-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ export interface TTSVoiceConfig {
/** Voice name (e.g. en-US-AriaNeural, en-GB-SoniaNeural) */
voice: string;
};
/** Xiaomi MiMo TTS settings */
xiaomi: {
/** Xiaomi model name */
model: string;
/** Built-in Xiaomi voice name */
voice: string;
/** Optional default Xiaomi style prompt */
style: string;
};
}

const DEFAULTS: TTSVoiceConfig = {
Expand All @@ -65,6 +74,11 @@ const DEFAULTS: TTSVoiceConfig = {
edge: {
voice: 'en-US-AriaNeural',
},
xiaomi: {
model: 'mimo-v2-tts',
voice: 'mimo_default',
style: '',
},
};

let cached: TTSVoiceConfig | null = null;
Expand Down
68 changes: 68 additions & 0 deletions server/routes/api-keys.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/** Tests for API key status and persistence routes. */
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { Hono } from 'hono';

describe('api-keys routes', () => {
beforeEach(() => {
vi.resetModules();
});

afterEach(() => {
vi.restoreAllMocks();
});

function mockDeps(overrides: { mimoKey?: string } = {}) {
const mockConfig: Record<string, unknown> = {
openaiApiKey: '',
replicateApiToken: '',
mimoApiKey: overrides.mimoKey || '',
};

vi.doMock('../lib/config.js', () => ({
config: mockConfig,
}));

vi.doMock('../lib/env-file.js', () => ({
writeEnvKey: vi.fn(async () => {}),
}));

vi.doMock('../middleware/rate-limit.js', () => ({
rateLimitGeneral: vi.fn((_c: unknown, next: () => Promise<void>) => next()),
}));
}

async function buildApp() {
const mod = await import('./api-keys.js');
const app = new Hono();
app.route('/', mod.default);
return app;
}

it('reports xiaomiKeySet from config', async () => {
mockDeps({ mimoKey: 'sk-mimo' });
const app = await buildApp();

const res = await app.request('/api/keys');
expect(res.status).toBe(200);

const json = await res.json() as Record<string, unknown>;
expect(json.xiaomiKeySet).toBe(true);
});

it('writes MIMO_API_KEY from mimoApiKey input', async () => {
mockDeps();
const app = await buildApp();

const res = await app.request('/api/keys', {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ mimoApiKey: 'sk-mimo' }),
});

expect(res.status).toBe(200);

const json = await res.json() as Record<string, unknown>;
expect(json.ok).toBe(true);
expect(json.xiaomiKeySet).toBe(true);
});
});
9 changes: 9 additions & 0 deletions server/routes/api-keys.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ app.get('/api/keys', rateLimitGeneral, (c) => {
return c.json({
openaiKeySet: !!config.openaiApiKey,
replicateKeySet: !!config.replicateApiToken,
xiaomiKeySet: !!config.mimoApiKey,
});
});

Expand All @@ -42,11 +43,19 @@ app.put('/api/keys', rateLimitGeneral, async (c) => {
results.push(val ? 'REPLICATE_API_TOKEN saved' : 'REPLICATE_API_TOKEN cleared');
}

if (body.mimoApiKey !== undefined) {
const val = body.mimoApiKey.trim();
await writeEnvKey('MIMO_API_KEY', val);
(config as Record<string, unknown>).mimoApiKey = val;
results.push(val ? 'MIMO_API_KEY saved' : 'MIMO_API_KEY cleared');
}

return c.json({
ok: true,
message: results.join(', ') || 'No changes',
openaiKeySet: !!config.openaiApiKey,
replicateKeySet: !!config.replicateApiToken,
xiaomiKeySet: !!config.mimoApiKey,
});
} catch {
return c.text('Invalid request', 400);
Expand Down
57 changes: 56 additions & 1 deletion server/routes/tts.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,18 @@ describe('TTS routes', () => {
function mockDeps(overrides: {
openaiKey?: string;
replicateToken?: string;
mimoKey?: string;
edgeResult?: { ok: boolean; buf?: Buffer; message?: string; status?: number; contentType?: string };
openaiResult?: { ok: boolean; buf?: Buffer; message?: string; status?: number };
replicateResult?: { ok: boolean; buf?: Buffer; message?: string; status?: number };
xiaomiResult?: { ok: boolean; buf?: Buffer; message?: string; status?: number; contentType?: string };
} = {}) {
vi.doMock('../lib/config.js', () => ({
config: {
auth: false, port: 3000, host: '127.0.0.1', sslPort: 3443,
openaiApiKey: overrides.openaiKey || '',
replicateApiToken: overrides.replicateToken || '',
mimoApiKey: overrides.mimoKey || '',
},
SESSION_COOKIE_NAME: 'nerve_session_3000',
}));
Expand All @@ -49,11 +52,17 @@ describe('TTS routes', () => {
overrides.replicateResult || { ok: true, buf: Buffer.from('fake-replicate-audio') }
),
}));
vi.doMock('../services/xiaomi-tts.js', () => ({
synthesizeXiaomi: vi.fn(async () =>
overrides.xiaomiResult || { ok: true, buf: Buffer.from('RIFFdemo'), contentType: 'audio/wav' }
),
}));
vi.doMock('../lib/tts-config.js', () => ({
getTTSConfig: vi.fn(() => ({
openai: { voice: 'alloy', model: 'tts-1' },
openai: { voice: 'alloy', model: 'tts-1', instructions: '' },
edge: { voice: 'en-US-JennyNeural' },
qwen: {},
xiaomi: { model: 'mimo-v2-tts', voice: 'mimo_default', style: 'Happy' },
})),
updateTTSConfig: vi.fn((patch: unknown) => patch),
}));
Expand Down Expand Up @@ -123,6 +132,41 @@ describe('TTS routes', () => {
expect(res.status).toBe(200);
});

it('uses explicit Xiaomi provider and returns WAV audio', async () => {
mockDeps();
const app = await buildApp();
const res = await app.request('/api/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text: 'Hello', provider: 'xiaomi' }),
});
expect(res.status).toBe(200);
expect(res.headers.get('Content-Type')).toBe('audio/wav');
});

it('honors explicit Xiaomi provider even when other keys exist', async () => {
mockDeps({ openaiKey: 'sk-test', replicateToken: 'r8-test', mimoKey: 'sk-mimo' });
const app = await buildApp();
const res = await app.request('/api/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text: 'Hello', provider: 'xiaomi' }),
});
expect(res.status).toBe(200);
expect(res.headers.get('Content-Type')).toBe('audio/wav');
});

it('returns Xiaomi provider errors', async () => {
mockDeps({ xiaomiResult: { ok: false, message: 'Xiaomi failed', status: 502 } });
const app = await buildApp();
const res = await app.request('/api/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text: 'Hello', provider: 'xiaomi' }),
});
expect(res.status).toBe(502);
});

it('returns error from provider failure', async () => {
mockDeps({ edgeResult: { ok: false, message: 'Edge TTS failed', status: 500 } });
const app = await buildApp();
Expand Down Expand Up @@ -181,6 +225,17 @@ describe('TTS routes', () => {
expect(res.status).toBe(200);
});

it('accepts valid Xiaomi config patch', async () => {
mockDeps();
const app = await buildApp();
const res = await app.request('/api/tts/config', {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ xiaomi: { model: 'mimo-v2-tts', voice: 'default_en', style: 'Happy' } }),
});
expect(res.status).toBe(200);
});

it('rejects non-string values', async () => {
mockDeps();
const app = await buildApp();
Expand Down
23 changes: 18 additions & 5 deletions server/routes/tts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import { getTtsCache, setTtsCache } from '../services/tts-cache.js';
import { synthesizeOpenAI } from '../services/openai-tts.js';
import { synthesizeReplicate } from '../services/replicate-tts.js';
import { synthesizeEdge } from '../services/edge-tts.js';
import { synthesizeXiaomi } from '../services/xiaomi-tts.js';
import { rateLimitTTS, rateLimitGeneral } from '../middleware/rate-limit.js';
import type { ContentfulStatusCode } from 'hono/utils/http-status';

Expand All @@ -40,7 +41,7 @@ const ttsSchema = z.object({
.refine((s) => s.trim().length > 0, 'Text cannot be empty or whitespace'),
voice: z.string().optional(),
// Accept both old ("qwen") and new ("replicate") values
provider: z.enum(['openai', 'replicate', 'qwen', 'edge']).optional(),
provider: z.enum(['openai', 'replicate', 'qwen', 'edge', 'xiaomi']).optional(),
model: z.string().optional(),
});

Expand Down Expand Up @@ -72,19 +73,28 @@ app.post(
const voice = rawVoice;

// Resolve effective provider: explicit > openai (if key) > replicate (if key) > edge
const useXiaomi = provider === 'xiaomi';
const useReplicate =
provider === 'replicate' ||
(!provider && !config.openaiApiKey && !!config.replicateApiToken);
const useEdge =
provider === 'edge' ||
(!provider && !config.openaiApiKey && !config.replicateApiToken);
const effectiveProvider = useEdge ? 'edge' : useReplicate ? 'replicate' : 'openai';
const effectiveProvider = useXiaomi
? 'xiaomi'
: useEdge
? 'edge'
: useReplicate
? 'replicate'
: 'openai';
console.log(`[tts] provider=${effectiveProvider} voice=${voice} text="${text.slice(0, 50)}..."`);

// Cache key includes provider + model + voice for proper isolation
const xiaomiStyle = effectiveProvider === 'xiaomi' ? getTTSConfig().xiaomi.style : '';

// Cache key includes provider + model + voice and Xiaomi style for proper isolation
const hash = crypto
.createHash('md5')
.update(`${effectiveProvider}:${model || ''}:${voice}:${text}`)
.update(`${effectiveProvider}:${model || ''}:${voice || ''}:${xiaomiStyle}:${text}`)
.digest('hex');

const cached = getTtsCache(hash);
Expand All @@ -95,7 +105,9 @@ app.post(
}

let result;
if (effectiveProvider === 'edge') {
if (effectiveProvider === 'xiaomi') {
result = await synthesizeXiaomi(text, { model, voice });
} else if (effectiveProvider === 'edge') {
result = await synthesizeEdge(text, voice);
} else if (effectiveProvider === 'replicate') {
result = await synthesizeReplicate(text, { model, voice });
Expand Down Expand Up @@ -131,6 +143,7 @@ const TTS_CONFIG_SCHEMA: Record<string, string[]> = {
qwen: ['mode', 'language', 'speaker', 'voiceDescription', 'styleInstruction'],
openai: ['model', 'voice', 'instructions'],
edge: ['voice'],
xiaomi: ['model', 'voice', 'style'],
};

/** Validate TTS config patch — only allow known keys with string values */
Expand Down
Loading
Loading