Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ GROK_API_KEY=
GROK_BASE_URL=
GROK_MODELS=

# --- Local LLM Providers ------------------------------------------------------
# Ollama (local open source models)
OLLAMA_API_KEY=
OLLAMA_BASE_URL=
OLLAMA_MODELS=

# LLaMA.cpp (local open source models)
LLAMACPP_API_KEY=
LLAMACPP_BASE_URL=
LLAMACPP_MODELS=

# --- TTS (Text-to-Speech) ----------------------------------------------------

TTS_OPENAI_API_KEY=
Expand Down Expand Up @@ -137,3 +148,23 @@ DEFAULT_MODEL=
# LOG_LEVEL=info
# LOG_FORMAT=pretty
# LLM_THINKING_DISABLED=false

# --- HTTPS Development Server ------------------------------------------------
# Enable HTTPS for local development (required for microphone access in Chrome)
# HTTPS_ENABLE=true
# HTTP_PORT=3000
# HTTPS_PORT=3001
# HTTPS_CERT_PATH=./localhost.crt
# HTTPS_KEY_PATH=./localhost.key
# HOST=0.0.0.0

# --- Local LLM Providers ------------------------------------------------------
# Ollama (local open source models)
# OLLAMA_API_KEY=
# OLLAMA_BASE_URL=http://localhost:11434/v1
# OLLAMA_MODELS=

# LLaMA.cpp (local open source models)
# LLAMACPP_API_KEY=
# LLAMACPP_BASE_URL=http://localhost:8080/v1
# LLAMACPP_MODELS=
141 changes: 141 additions & 0 deletions app/api/ai/providers/ollama/tags/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/**
* Ollama API endpoint to get list of available local models
* GET /api/ai/providers/ollama/tags
*/

import { NextResponse } from 'next/server';
import type { ModelInfo } from '@/lib/types/provider';
import { getProvider } from '@/lib/ai/providers';
import { resolveBaseUrl } from '@/lib/server/provider-config';

/**
* Response from Ollama /api/tags endpoint
* @see https://ollama.com/docs/api
*/
interface OllamaModel {
name: string;
model: string;
digest: string;
size: number;
modified_at: string;
details: {
parameter_size: string;
quantization_level: string;
};
}

interface OllamaTagsResponse {
models: OllamaModel[];
}

/**
* Convert Ollama model info to OpenMAIC ModelInfo format
*/
function convertOllamaModelToModelInfo(model: OllamaModel): ModelInfo {
// Estimate context window based on parameter size
// This is a rough heuristic - actual depends on how the user loaded the model
let contextWindow = 8192;
const paramSize = model.details.parameter_size?.toLowerCase() || '';

if (paramSize.includes('1b') || paramSize.includes('2b')) {
contextWindow = 32768;
} else if (paramSize.includes('7b') || paramSize.includes('8b')) {
contextWindow = 128000;
} else if (paramSize.includes('13b') || paramSize.includes('14b')) {
contextWindow = 128000;
} else if (paramSize.includes('30b') || paramSize.includes('34b')) {
contextWindow = 200000;
} else if (paramSize.includes('70b')) {
contextWindow = 200000;
}

// Check if model is vision-capable by name
const hasVision = /vision|vl|multimodal/i.test(model.name);

// Check if model is a reasoning model (deepseek-r1, etc.)
const isReasoning = /r1|reason|thinking/i.test(model.name);

const capabilities: ModelInfo['capabilities'] = {
streaming: true,
vision: hasVision,
tools: !hasVision, // Most Llama-based models support tools except vision-only
};

if (isReasoning) {
capabilities.thinking = {
toggleable: false,
budgetAdjustable: false,
defaultEnabled: true,
};
}

return {
id: model.name,
name: `${model.name} (${model.details.parameter_size} ${model.details.quantization_level})`,
contextWindow,
outputWindow: Math.floor(contextWindow / 4),
capabilities,
};
}

export async function GET() {
try {
// Get provider configuration
const provider = getProvider('ollama');
if (!provider) {
return NextResponse.json({ error: 'Ollama provider not configured' }, { status: 404 });
}

// Get base URL from environment or use default
const serverBaseUrl = resolveBaseUrl('ollama');
const baseUrl = serverBaseUrl || provider.defaultBaseUrl;

if (!baseUrl) {
return NextResponse.json({ error: 'Base URL not configured for Ollama' }, { status: 400 });
}

// Strip trailing slash and /v1 if present (Ollama's tags endpoint is at /api/tags)
let cleanBaseUrl = baseUrl.replace(/\/v1\/?$/, '').replace(/\/$/, '');
const tagsUrl = `${cleanBaseUrl}/api/tags`;

// Fetch models from local Ollama
const response = await fetch(tagsUrl, {
method: 'GET',
signal: AbortSignal.timeout(5000), // 5 second timeout
});

if (!response.ok) {
return NextResponse.json(
{
error: `Failed to connect to Ollama: ${response.status} ${response.statusText}`,
details: `Is Ollama running at ${cleanBaseUrl}?`,
},
{ status: response.status }
);
}

const data = (await response.json()) as OllamaTagsResponse;

if (!data.models || !Array.isArray(data.models)) {
return NextResponse.json({ models: [] });
}

// Convert to OpenMAIC format
const models: ModelInfo[] = data.models.map(convertOllamaModelToModelInfo);

return NextResponse.json({ models });
} catch (error) {
console.error('[Ollama Tags API] Error:', error);

const message = error instanceof Error ? error.message : String(error);

return NextResponse.json(
{
error: 'Connection error',
details: `Could not connect to Ollama: ${message}`,
hint: 'Please ensure Ollama is running and the Base URL is configured correctly',
},
{ status: 503 }
);
}
}
4 changes: 4 additions & 0 deletions app/api/parse-pdf/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export async function POST(req: NextRequest) {
const providerId = formData.get('providerId') as PDFProviderId | null;
const apiKey = formData.get('apiKey') as string | null;
const baseUrl = formData.get('baseUrl') as string | null;
const prompt = formData.get('prompt') as string | null;

if (!pdfFile) {
return apiError('MISSING_REQUIRED_FIELD', 400, 'No PDF file provided');
Expand All @@ -49,6 +50,9 @@ export async function POST(req: NextRequest) {
baseUrl: clientBaseUrl
? clientBaseUrl
: resolvePDFBaseUrl(effectiveProviderId, baseUrl || undefined),
providerOptions: {
...(prompt && { prompt }),
},
};

// Convert PDF to buffer
Expand Down
3 changes: 3 additions & 0 deletions app/generation-preview/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,9 @@ function GenerationPreviewContent() {
if (currentSession.pdfProviderConfig?.baseUrl?.trim()) {
parseFormData.append('baseUrl', currentSession.pdfProviderConfig.baseUrl);
}
if (currentSession.pdfProviderConfig?.customPrompt?.trim()) {
parseFormData.append('prompt', currentSession.pdfProviderConfig.customPrompt);
}

const parseResponse = await fetch('/api/parse-pdf', {
method: 'POST',
Expand Down
2 changes: 1 addition & 1 deletion app/generation-preview/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export interface GenerationSessionState {
pdfStorageKey?: string;
pdfFileName?: string;
pdfProviderId?: string;
pdfProviderConfig?: { apiKey?: string; baseUrl?: string };
pdfProviderConfig?: { apiKey?: string; baseUrl?: string; customPrompt?: string };
// Web search context
researchContext?: string;
researchSources?: Array<{ title: string; url: string }>;
Expand Down
8 changes: 5 additions & 3 deletions components/ai-elements/prompt-input.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -1010,7 +1010,7 @@ type SpeechRecognitionResult = {
};

type SpeechRecognitionAlternative = {
script: string;
transcript: string;
confidence: number;
};

Expand Down Expand Up @@ -1041,6 +1041,8 @@ export const PromptInputSpeechButton = ({
const [isListening, setIsListening] = useState(false);
const [recognition, setRecognition] = useState<SpeechRecognition | null>(null);
const recognitionRef = useRef<SpeechRecognition | null>(null);
const { useSettingsStore } = require('@/lib/store/settings');
const asrLanguage = useSettingsStore((state: { asrLanguage: string }) => state.asrLanguage);

useEffect(() => {
if (
Expand All @@ -1052,7 +1054,7 @@ export const PromptInputSpeechButton = ({

speechRecognition.continuous = true;
speechRecognition.interimResults = true;
speechRecognition.lang = 'en-US';
speechRecognition.lang = asrLanguage || 'zh-CN';

speechRecognition.onstart = () => {
setIsListening(true);
Expand All @@ -1068,7 +1070,7 @@ export const PromptInputSpeechButton = ({
for (let i = event.resultIndex; i < event.results.length; i++) {
const result = event.results[i];
if (result.isFinal) {
finalScript += result[0]?.script ?? '';
finalScript += result[0]?.transcript ?? '';
}
}

Expand Down
Loading
Loading