THU-MAIC · evcgs · Mar 20, 2026 · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/.env.example b/.env.example
@@ -52,6 +52,17 @@ GROK_API_KEY=
 GROK_BASE_URL=
 GROK_MODELS=
 
+# --- Local LLM Providers ------------------------------------------------------
+# Ollama (local open source models)
+OLLAMA_API_KEY=
+OLLAMA_BASE_URL=
+OLLAMA_MODELS=
+
+# LLaMA.cpp (local open source models)
+LLAMACPP_API_KEY=
+LLAMACPP_BASE_URL=
+LLAMACPP_MODELS=
+
 # --- TTS (Text-to-Speech) ----------------------------------------------------
 
 TTS_OPENAI_API_KEY=
@@ -137,3 +148,23 @@ DEFAULT_MODEL=
 # LOG_LEVEL=info
 # LOG_FORMAT=pretty
 # LLM_THINKING_DISABLED=false
+
+# --- HTTPS Development Server ------------------------------------------------
+# Enable HTTPS for local development (required for microphone access in Chrome)
+# HTTPS_ENABLE=true
+# HTTP_PORT=3000
+# HTTPS_PORT=3001
+# HTTPS_CERT_PATH=./localhost.crt
+# HTTPS_KEY_PATH=./localhost.key
+# HOST=0.0.0.0
+
+# --- Local LLM Providers ------------------------------------------------------
+# Ollama (local open source models)
+# OLLAMA_API_KEY=
+# OLLAMA_BASE_URL=http://localhost:11434/v1
+# OLLAMA_MODELS=
+
+# LLaMA.cpp (local open source models)
+# LLAMACPP_API_KEY=
+# LLAMACPP_BASE_URL=http://localhost:8080/v1
+# LLAMACPP_MODELS=
diff --git a/app/api/ai/providers/ollama/tags/route.ts b/app/api/ai/providers/ollama/tags/route.ts
@@ -0,0 +1,141 @@
+/**
+ * Ollama API endpoint to get list of available local models
+ * GET /api/ai/providers/ollama/tags
+ */
+
+import { NextResponse } from 'next/server';
+import type { ModelInfo } from '@/lib/types/provider';
+import { getProvider } from '@/lib/ai/providers';
+import { resolveBaseUrl } from '@/lib/server/provider-config';
+
+/**
+ * Response from Ollama /api/tags endpoint
+ * @see https://ollama.com/docs/api
+ */
+interface OllamaModel {
+  name: string;
+  model: string;
+  digest: string;
+  size: number;
+  modified_at: string;
+  details: {
+    parameter_size: string;
+    quantization_level: string;
+  };
+}
+
+interface OllamaTagsResponse {
+  models: OllamaModel[];
+}
+
+/**
+ * Convert Ollama model info to OpenMAIC ModelInfo format
+ */
+function convertOllamaModelToModelInfo(model: OllamaModel): ModelInfo {
+  // Estimate context window based on parameter size
+  // This is a rough heuristic - actual depends on how the user loaded the model
+  let contextWindow = 8192;
+  const paramSize = model.details.parameter_size?.toLowerCase() || '';
+
+  if (paramSize.includes('1b') || paramSize.includes('2b')) {
+    contextWindow = 32768;
+  } else if (paramSize.includes('7b') || paramSize.includes('8b')) {
+    contextWindow = 128000;
+  } else if (paramSize.includes('13b') || paramSize.includes('14b')) {
+    contextWindow = 128000;
+  } else if (paramSize.includes('30b') || paramSize.includes('34b')) {
+    contextWindow = 200000;
+  } else if (paramSize.includes('70b')) {
+    contextWindow = 200000;
+  }
+
+  // Check if model is vision-capable by name
+  const hasVision = /vision|vl|multimodal/i.test(model.name);
+
+  // Check if model is a reasoning model (deepseek-r1, etc.)
+  const isReasoning = /r1|reason|thinking/i.test(model.name);
+
+  const capabilities: ModelInfo['capabilities'] = {
+    streaming: true,
+    vision: hasVision,
+    tools: !hasVision, // Most Llama-based models support tools except vision-only
+  };
+
+  if (isReasoning) {
+    capabilities.thinking = {
+      toggleable: false,
+      budgetAdjustable: false,
+      defaultEnabled: true,
+    };
+  }
+
+  return {
+    id: model.name,
+    name: `${model.name} (${model.details.parameter_size} ${model.details.quantization_level})`,
+    contextWindow,
+    outputWindow: Math.floor(contextWindow / 4),
+    capabilities,
+  };
+}
+
+export async function GET() {
+  try {
+    // Get provider configuration
+    const provider = getProvider('ollama');
+    if (!provider) {
+      return NextResponse.json({ error: 'Ollama provider not configured' }, { status: 404 });
+    }
+
+    // Get base URL from environment or use default
+    const serverBaseUrl = resolveBaseUrl('ollama');
+    const baseUrl = serverBaseUrl || provider.defaultBaseUrl;
+
+    if (!baseUrl) {
+      return NextResponse.json({ error: 'Base URL not configured for Ollama' }, { status: 400 });
+    }
+
+    // Strip trailing slash and /v1 if present (Ollama's tags endpoint is at /api/tags)
+    let cleanBaseUrl = baseUrl.replace(/\/v1\/?$/, '').replace(/\/$/, '');
+    const tagsUrl = `${cleanBaseUrl}/api/tags`;
+
+    // Fetch models from local Ollama
+    const response = await fetch(tagsUrl, {
+      method: 'GET',
+      signal: AbortSignal.timeout(5000), // 5 second timeout
+    });
+
+    if (!response.ok) {
+      return NextResponse.json(
+        {
+          error: `Failed to connect to Ollama: ${response.status} ${response.statusText}`,
+          details: `Is Ollama running at ${cleanBaseUrl}?`,
+        },
+        { status: response.status }
+      );
+    }
+
+    const data = (await response.json()) as OllamaTagsResponse;
+
+    if (!data.models || !Array.isArray(data.models)) {
+      return NextResponse.json({ models: [] });
+    }
+
+    // Convert to OpenMAIC format
+    const models: ModelInfo[] = data.models.map(convertOllamaModelToModelInfo);
+
+    return NextResponse.json({ models });
+  } catch (error) {
+    console.error('[Ollama Tags API] Error:', error);
+
+    const message = error instanceof Error ? error.message : String(error);
+
+    return NextResponse.json(
+      {
+        error: 'Connection error',
+        details: `Could not connect to Ollama: ${message}`,
+        hint: 'Please ensure Ollama is running and the Base URL is configured correctly',
+      },
+      { status: 503 }
+    );
+  }
+}
diff --git a/app/api/parse-pdf/route.ts b/app/api/parse-pdf/route.ts
@@ -25,6 +25,7 @@ export async function POST(req: NextRequest) {
     const providerId = formData.get('providerId') as PDFProviderId | null;
     const apiKey = formData.get('apiKey') as string | null;
     const baseUrl = formData.get('baseUrl') as string | null;
+    const prompt = formData.get('prompt') as string | null;
 
     if (!pdfFile) {
       return apiError('MISSING_REQUIRED_FIELD', 400, 'No PDF file provided');
@@ -49,6 +50,9 @@ export async function POST(req: NextRequest) {
       baseUrl: clientBaseUrl
         ? clientBaseUrl
         : resolvePDFBaseUrl(effectiveProviderId, baseUrl || undefined),
+      providerOptions: {
+        ...(prompt && { prompt }),
+      },
     };
 
     // Convert PDF to buffer

diff --git a/app/generation-preview/page.tsx b/app/generation-preview/page.tsx
@@ -189,6 +189,9 @@ function GenerationPreviewContent() {
         if (currentSession.pdfProviderConfig?.baseUrl?.trim()) {
           parseFormData.append('baseUrl', currentSession.pdfProviderConfig.baseUrl);
         }
+        if (currentSession.pdfProviderConfig?.customPrompt?.trim()) {
+          parseFormData.append('prompt', currentSession.pdfProviderConfig.customPrompt);
+        }
 
         const parseResponse = await fetch('/api/parse-pdf', {
           method: 'POST',

diff --git a/app/generation-preview/types.ts b/app/generation-preview/types.ts
@@ -21,7 +21,7 @@ export interface GenerationSessionState {
   pdfStorageKey?: string;
   pdfFileName?: string;
   pdfProviderId?: string;
-  pdfProviderConfig?: { apiKey?: string; baseUrl?: string };
+  pdfProviderConfig?: { apiKey?: string; baseUrl?: string; customPrompt?: string };
   // Web search context
   researchContext?: string;
   researchSources?: Array<{ title: string; url: string }>;

diff --git a/components/ai-elements/prompt-input.tsx b/components/ai-elements/prompt-input.tsx
@@ -1010,7 +1010,7 @@ type SpeechRecognitionResult = {
 };
 
 type SpeechRecognitionAlternative = {
-  script: string;
+  transcript: string;
   confidence: number;
 };
 
@@ -1041,6 +1041,8 @@ export const PromptInputSpeechButton = ({
   const [isListening, setIsListening] = useState(false);
   const [recognition, setRecognition] = useState<SpeechRecognition | null>(null);
   const recognitionRef = useRef<SpeechRecognition | null>(null);
+  const { useSettingsStore } = require('@/lib/store/settings');
+  const asrLanguage = useSettingsStore((state: { asrLanguage: string }) => state.asrLanguage);
 
   useEffect(() => {
     if (
@@ -1052,7 +1054,7 @@ export const PromptInputSpeechButton = ({
 
       speechRecognition.continuous = true;
       speechRecognition.interimResults = true;
-      speechRecognition.lang = 'en-US';
+      speechRecognition.lang = asrLanguage || 'zh-CN';
 
       speechRecognition.onstart = () => {
         setIsListening(true);
@@ -1068,7 +1070,7 @@ export const PromptInputSpeechButton = ({
         for (let i = event.resultIndex; i < event.results.length; i++) {
           const result = event.results[i];
           if (result.isFinal) {
-            finalScript += result[0]?.script ?? '';
+            finalScript += result[0]?.transcript ?? '';
           }
         }