diff --git a/.env.example b/.env.example index b854b653d..790bf1bb8 100644 --- a/.env.example +++ b/.env.example @@ -52,6 +52,17 @@ GROK_API_KEY= GROK_BASE_URL= GROK_MODELS= +# --- Local LLM Providers ------------------------------------------------------ +# Ollama (local open source models) +OLLAMA_API_KEY= +OLLAMA_BASE_URL= +OLLAMA_MODELS= + +# LLaMA.cpp (local open source models) +LLAMACPP_API_KEY= +LLAMACPP_BASE_URL= +LLAMACPP_MODELS= + # --- TTS (Text-to-Speech) ---------------------------------------------------- TTS_OPENAI_API_KEY= @@ -137,3 +148,23 @@ DEFAULT_MODEL= # LOG_LEVEL=info # LOG_FORMAT=pretty # LLM_THINKING_DISABLED=false + +# --- HTTPS Development Server ------------------------------------------------ +# Enable HTTPS for local development (required for microphone access in Chrome) +# HTTPS_ENABLE=true +# HTTP_PORT=3000 +# HTTPS_PORT=3001 +# HTTPS_CERT_PATH=./localhost.crt +# HTTPS_KEY_PATH=./localhost.key +# HOST=0.0.0.0 + +# --- Local LLM Providers ------------------------------------------------------ +# Ollama (local open source models) +# OLLAMA_API_KEY= +# OLLAMA_BASE_URL=http://localhost:11434/v1 +# OLLAMA_MODELS= + +# LLaMA.cpp (local open source models) +# LLAMACPP_API_KEY= +# LLAMACPP_BASE_URL=http://localhost:8080/v1 +# LLAMACPP_MODELS= diff --git a/app/api/ai/providers/ollama/tags/route.ts b/app/api/ai/providers/ollama/tags/route.ts new file mode 100644 index 000000000..25e159ae6 --- /dev/null +++ b/app/api/ai/providers/ollama/tags/route.ts @@ -0,0 +1,141 @@ +/** + * Ollama API endpoint to get list of available local models + * GET /api/ai/providers/ollama/tags + */ + +import { NextResponse } from 'next/server'; +import type { ModelInfo } from '@/lib/types/provider'; +import { getProvider } from '@/lib/ai/providers'; +import { resolveBaseUrl } from '@/lib/server/provider-config'; + +/** + * Response from Ollama /api/tags endpoint + * @see https://ollama.com/docs/api + */ +interface OllamaModel { + name: string; + model: string; + digest: string; + size: number; + modified_at: string; + details: { + parameter_size: string; + quantization_level: string; + }; +} + +interface OllamaTagsResponse { + models: OllamaModel[]; +} + +/** + * Convert Ollama model info to OpenMAIC ModelInfo format + */ +function convertOllamaModelToModelInfo(model: OllamaModel): ModelInfo { + // Estimate context window based on parameter size + // This is a rough heuristic - actual depends on how the user loaded the model + let contextWindow = 8192; + const paramSize = model.details.parameter_size?.toLowerCase() || ''; + + if (paramSize.includes('1b') || paramSize.includes('2b')) { + contextWindow = 32768; + } else if (paramSize.includes('7b') || paramSize.includes('8b')) { + contextWindow = 128000; + } else if (paramSize.includes('13b') || paramSize.includes('14b')) { + contextWindow = 128000; + } else if (paramSize.includes('30b') || paramSize.includes('34b')) { + contextWindow = 200000; + } else if (paramSize.includes('70b')) { + contextWindow = 200000; + } + + // Check if model is vision-capable by name + const hasVision = /vision|vl|multimodal/i.test(model.name); + + // Check if model is a reasoning model (deepseek-r1, etc.) + const isReasoning = /r1|reason|thinking/i.test(model.name); + + const capabilities: ModelInfo['capabilities'] = { + streaming: true, + vision: hasVision, + tools: !hasVision, // Most Llama-based models support tools except vision-only + }; + + if (isReasoning) { + capabilities.thinking = { + toggleable: false, + budgetAdjustable: false, + defaultEnabled: true, + }; + } + + return { + id: model.name, + name: `${model.name} (${model.details.parameter_size} ${model.details.quantization_level})`, + contextWindow, + outputWindow: Math.floor(contextWindow / 4), + capabilities, + }; +} + +export async function GET() { + try { + // Get provider configuration + const provider = getProvider('ollama'); + if (!provider) { + return NextResponse.json({ error: 'Ollama provider not configured' }, { status: 404 }); + } + + // Get base URL from environment or use default + const serverBaseUrl = resolveBaseUrl('ollama'); + const baseUrl = serverBaseUrl || provider.defaultBaseUrl; + + if (!baseUrl) { + return NextResponse.json({ error: 'Base URL not configured for Ollama' }, { status: 400 }); + } + + // Strip trailing slash and /v1 if present (Ollama's tags endpoint is at /api/tags) + let cleanBaseUrl = baseUrl.replace(/\/v1\/?$/, '').replace(/\/$/, ''); + const tagsUrl = `${cleanBaseUrl}/api/tags`; + + // Fetch models from local Ollama + const response = await fetch(tagsUrl, { + method: 'GET', + signal: AbortSignal.timeout(5000), // 5 second timeout + }); + + if (!response.ok) { + return NextResponse.json( + { + error: `Failed to connect to Ollama: ${response.status} ${response.statusText}`, + details: `Is Ollama running at ${cleanBaseUrl}?`, + }, + { status: response.status } + ); + } + + const data = (await response.json()) as OllamaTagsResponse; + + if (!data.models || !Array.isArray(data.models)) { + return NextResponse.json({ models: [] }); + } + + // Convert to OpenMAIC format + const models: ModelInfo[] = data.models.map(convertOllamaModelToModelInfo); + + return NextResponse.json({ models }); + } catch (error) { + console.error('[Ollama Tags API] Error:', error); + + const message = error instanceof Error ? error.message : String(error); + + return NextResponse.json( + { + error: 'Connection error', + details: `Could not connect to Ollama: ${message}`, + hint: 'Please ensure Ollama is running and the Base URL is configured correctly', + }, + { status: 503 } + ); + } +} diff --git a/app/api/parse-pdf/route.ts b/app/api/parse-pdf/route.ts index 94feff548..a45b8f69b 100644 --- a/app/api/parse-pdf/route.ts +++ b/app/api/parse-pdf/route.ts @@ -25,6 +25,7 @@ export async function POST(req: NextRequest) { const providerId = formData.get('providerId') as PDFProviderId | null; const apiKey = formData.get('apiKey') as string | null; const baseUrl = formData.get('baseUrl') as string | null; + const prompt = formData.get('prompt') as string | null; if (!pdfFile) { return apiError('MISSING_REQUIRED_FIELD', 400, 'No PDF file provided'); @@ -49,6 +50,9 @@ export async function POST(req: NextRequest) { baseUrl: clientBaseUrl ? clientBaseUrl : resolvePDFBaseUrl(effectiveProviderId, baseUrl || undefined), + providerOptions: { + ...(prompt && { prompt }), + }, }; // Convert PDF to buffer diff --git a/app/generation-preview/page.tsx b/app/generation-preview/page.tsx index b5380973b..ea0f16ec4 100644 --- a/app/generation-preview/page.tsx +++ b/app/generation-preview/page.tsx @@ -189,6 +189,9 @@ function GenerationPreviewContent() { if (currentSession.pdfProviderConfig?.baseUrl?.trim()) { parseFormData.append('baseUrl', currentSession.pdfProviderConfig.baseUrl); } + if (currentSession.pdfProviderConfig?.customPrompt?.trim()) { + parseFormData.append('prompt', currentSession.pdfProviderConfig.customPrompt); + } const parseResponse = await fetch('/api/parse-pdf', { method: 'POST', diff --git a/app/generation-preview/types.ts b/app/generation-preview/types.ts index 408ae81fd..82b04c167 100644 --- a/app/generation-preview/types.ts +++ b/app/generation-preview/types.ts @@ -21,7 +21,7 @@ export interface GenerationSessionState { pdfStorageKey?: string; pdfFileName?: string; pdfProviderId?: string; - pdfProviderConfig?: { apiKey?: string; baseUrl?: string }; + pdfProviderConfig?: { apiKey?: string; baseUrl?: string; customPrompt?: string }; // Web search context researchContext?: string; researchSources?: Array<{ title: string; url: string }>; diff --git a/components/ai-elements/prompt-input.tsx b/components/ai-elements/prompt-input.tsx index aa241855f..9311ccaa3 100644 --- a/components/ai-elements/prompt-input.tsx +++ b/components/ai-elements/prompt-input.tsx @@ -1010,7 +1010,7 @@ type SpeechRecognitionResult = { }; type SpeechRecognitionAlternative = { - script: string; + transcript: string; confidence: number; }; @@ -1041,6 +1041,8 @@ export const PromptInputSpeechButton = ({ const [isListening, setIsListening] = useState(false); const [recognition, setRecognition] = useState(null); const recognitionRef = useRef(null); + const { useSettingsStore } = require('@/lib/store/settings'); + const asrLanguage = useSettingsStore((state: { asrLanguage: string }) => state.asrLanguage); useEffect(() => { if ( @@ -1052,7 +1054,7 @@ export const PromptInputSpeechButton = ({ speechRecognition.continuous = true; speechRecognition.interimResults = true; - speechRecognition.lang = 'en-US'; + speechRecognition.lang = asrLanguage || 'zh-CN'; speechRecognition.onstart = () => { setIsListening(true); @@ -1068,7 +1070,7 @@ export const PromptInputSpeechButton = ({ for (let i = event.resultIndex; i < event.results.length; i++) { const result = event.results[i]; if (result.isFinal) { - finalScript += result[0]?.script ?? ''; + finalScript += result[0]?.transcript ?? ''; } } diff --git a/components/settings/pdf-settings.tsx b/components/settings/pdf-settings.tsx index bfa43bdda..36f01b4d6 100644 --- a/components/settings/pdf-settings.tsx +++ b/components/settings/pdf-settings.tsx @@ -44,7 +44,8 @@ export function PDFSettings({ selectedProviderId }: PDFSettingsProps) { const isServerConfigured = !!pdfProvidersConfig[selectedProviderId]?.isServerConfigured; const providerConfig = pdfProvidersConfig[selectedProviderId]; const hasBaseUrl = !!providerConfig?.baseUrl; - const needsRemoteConfig = selectedProviderId === 'mineru'; + // All PDF providers now support custom API Key and Base URL configuration + const needsRemoteConfig = true; // Reset state when provider changes const [prevSelectedProviderId, setPrevSelectedProviderId] = useState(selectedProviderId); @@ -98,114 +99,152 @@ export function PDFSettings({ selectedProviderId }: PDFSettingsProps) { )} - {/* Base URL + API Key Configuration (for remote providers like MinerU) */} - {(needsRemoteConfig || isServerConfigured) && ( - <> -
-
- -
- - setPDFProviderConfig(selectedProviderId, { baseUrl: e.target.value }) - } - className="text-sm" - /> - -
+ {/* Base URL + API Key Configuration - all providers support custom configuration */} + <> +
+
+ +
+ + setPDFProviderConfig(selectedProviderId, { baseUrl: e.target.value }) + } + className="text-sm" + /> +
+
-
- -
- - setPDFProviderConfig(selectedProviderId, { - apiKey: e.target.value, - }) - } - className="font-mono text-sm pr-10" - /> - -
+
+ +
+ + setPDFProviderConfig(selectedProviderId, { + apiKey: e.target.value, + }) + } + className="font-mono text-sm pr-10" + /> +
+
+ + {/* Custom OCR prompt (for WiseOCR) */} + {selectedProviderId === 'wiseocr' && ( +
+ + + setPDFProviderConfig(selectedProviderId, { customPrompt: e.target.value }) + } + className="text-sm" + /> +
+ )} - {/* Test result message */} - {testMessage && ( -
-
- {testStatus === 'success' && } - {testStatus === 'error' && } - {testMessage} -
+ {/* Test result message */} + {testMessage && ( +
+
+ {testStatus === 'success' && } + {testStatus === 'error' && } + {testMessage}
- )} - - {/* Request URL Preview */} - {(() => { - const effectiveBaseUrl = providerConfig?.baseUrl || ''; - if (!effectiveBaseUrl) return null; - const fullUrl = effectiveBaseUrl + '/file_parse'; - return ( -

- {t('settings.requestUrl')}: {fullUrl} -

- ); - })()} - - )} +
+ )} + + {/* Request URL Preview - show different endpoints based on provider */} + {(() => { + const effectiveBaseUrl = providerConfig?.baseUrl || ''; + if (!effectiveBaseUrl) return null; + let fullUrl = effectiveBaseUrl; + if (selectedProviderId === 'mineru' || selectedProviderId === 'wiseocr') { + fullUrl = effectiveBaseUrl.replace(/\/$/, '') + '/file_parse'; + } else if (selectedProviderId === 'unpdf') { + fullUrl = effectiveBaseUrl.replace(/\/$/, '') + '/v1/convert'; + } + return ( +

+ {t('settings.requestUrl')}: {fullUrl} +

+ ); + })()} + {/* Features List */}
diff --git a/lib/ai/providers.ts b/lib/ai/providers.ts index 63d4a41a0..db15ad4a3 100644 --- a/lib/ai/providers.ts +++ b/lib/ai/providers.ts @@ -945,6 +945,89 @@ export const PROVIDERS: Record = { }, ], }, + + ollama: { + id: 'ollama', + name: 'Ollama', + type: 'openai', + defaultBaseUrl: 'http://localhost:11434/v1', + requiresApiKey: false, + icon: '/logos/ollama.svg', + models: [ + // Common models pre-configured for selection + { + id: 'llama3.1', + name: 'Llama 3.1', + contextWindow: 128000, + outputWindow: 4096, + capabilities: { streaming: true, tools: true, vision: false }, + }, + { + id: 'llama3.2', + name: 'Llama 3.2', + contextWindow: 128000, + outputWindow: 4096, + capabilities: { streaming: true, tools: true, vision: false }, + }, + { + id: 'llama3.2-vision', + name: 'Llama 3.2 Vision', + contextWindow: 128000, + outputWindow: 4096, + capabilities: { streaming: true, tools: true, vision: true }, + }, + { + id: 'gemma3', + name: 'Gemma 3', + contextWindow: 128000, + outputWindow: 4096, + capabilities: { streaming: true, tools: true, vision: false }, + }, + { + id: 'qwen3', + name: 'Qwen 3', + contextWindow: 128000, + outputWindow: 4096, + capabilities: { streaming: true, tools: true, vision: false }, + }, + { + id: 'deepseek-r1', + name: 'DeepSeek R1', + contextWindow: 128000, + outputWindow: 4096, + capabilities: { + streaming: true, + tools: false, + vision: false, + thinking: { + toggleable: false, + budgetAdjustable: false, + defaultEnabled: true, + }, + }, + }, + ], + }, + + 'llama-cpp': { + id: 'llama-cpp', + name: 'LLaMA.cpp', + type: 'openai', + defaultBaseUrl: 'http://localhost:8080/v1', + requiresApiKey: false, + icon: '/logos/llama-cpp.svg', + models: [ + // LLaMA.cpp doesn't ship with pre-loaded models + // Users can run any model they have loaded + { + id: 'default', + name: 'Default Model', + contextWindow: 8192, + outputWindow: 2048, + capabilities: { streaming: true, tools: false, vision: false }, + }, + ], + }, }; /** diff --git a/lib/i18n/settings.ts b/lib/i18n/settings.ts index 3ba0be4f3..74d7d5759 100644 --- a/lib/i18n/settings.ts +++ b/lib/i18n/settings.ts @@ -227,6 +227,7 @@ export const settingsZhCN = { providerOpenAIWhisper: 'OpenAI ASR (gpt-4o-mini-transcribe)', providerBrowserNative: '浏览器原生 ASR', providerQwenASR: 'Qwen ASR(阿里云百炼)', + providerWiseOCR: 'WiseOCR(智诊科技)', providerUnpdf: 'unpdf(内置)', providerMinerU: 'MinerU', browserNativeTTSNote: '浏览器原生 TTS 无需配置,完全免费,使用系统内置语音', @@ -455,6 +456,15 @@ export const settingsZhCN = { pdfFeatures: '支持功能', pdfApiKey: 'API Key', pdfBaseUrl: 'Base URL', + wiseocrDescription: + 'WiseOCR 是智诊科技提供的商用 OCR 服务,基于视觉大模型对文档进行智能识别,支持 PDF 和图片,输出结构化 Markdown。', + wiseocrApiKeyRequired: '使用前需要在 WiseDiag 官网申请 API Key。', + wiseocrWarning: '注意', + wiseocrCostWarning: 'WiseOCR 为商用服务,使用可能产生费用。请查看 WiseDiag 官网了解定价详情。', + enterWiseocrApiKey: '输入 WiseOCR API Key', + wiseocrServerAddress: 'WiseOCR 服务器地址(如:https://openapi.wisediag.com)', + wiseocrApiKeyOptional: '仅在私有部署服务器启用认证时需要', + wiseocrCustomPrompt: '自定义 OCR 提示词', mineruDescription: 'MinerU 是一个商用 PDF 解析服务,支持高级功能如表格提取、公式识别和布局分析。', mineruApiKeyRequired: '使用前需要在 MinerU 官网申请 API Key。', @@ -817,6 +827,7 @@ export const settingsEnUS = { providerOpenAIWhisper: 'OpenAI ASR (gpt-4o-mini-transcribe)', providerBrowserNative: 'Browser Native ASR', providerQwenASR: 'Qwen ASR (Alibaba Cloud Bailian)', + providerWiseOCR: 'WiseOCR (WiseDiag)', providerUnpdf: 'unpdf (Built-in)', providerMinerU: 'MinerU', browserNativeTTSNote: @@ -1046,6 +1057,16 @@ export const settingsEnUS = { pdfFeatures: 'Supported Features', pdfApiKey: 'API Key', pdfBaseUrl: 'Base URL', + wiseocrDescription: + 'WiseOCR is a commercial OCR service provided by WiseDiag, based on vision large model for intelligent document recognition. Supports PDF and images, outputs structured Markdown.', + wiseocrApiKeyRequired: 'You need to apply for an API Key on the WiseDiag website before use.', + wiseocrWarning: 'Warning', + wiseocrCostWarning: + 'WiseOCR is a commercial service and may incur fees. Please check the WiseDiag website for pricing details.', + enterWiseocrApiKey: 'Enter WiseOCR API Key', + wiseocrServerAddress: 'WiseOCR server address (e.g., https://openapi.wisediag.com)', + wiseocrApiKeyOptional: 'Only required if private deployment server has authentication enabled', + wiseocrCustomPrompt: 'Custom OCR Prompt', mineruDescription: 'MinerU is a commercial PDF parsing service that supports advanced features such as table extraction, formula recognition, and layout analysis.', mineruApiKeyRequired: 'You need to apply for an API Key on the MinerU website before use.', diff --git a/lib/media/adapters/seedream-adapter.ts b/lib/media/adapters/seedream-adapter.ts index adc07a0bf..b6f1c23ae 100644 --- a/lib/media/adapters/seedream-adapter.ts +++ b/lib/media/adapters/seedream-adapter.ts @@ -83,8 +83,12 @@ export async function generateWithSeedream( options: ImageGenerationOptions, ): Promise { const baseUrl = config.baseUrl || DEFAULT_BASE_URL; + // If baseUrl already ends with /api/v3, don't duplicate it + const fullUrl = baseUrl.endsWith('/api/v3') + ? `${baseUrl}/images/generations` + : `${baseUrl}/api/v3/images/generations`; - const response = await fetch(`${baseUrl}/api/v3/images/generations`, { + const response = await fetch(fullUrl, { method: 'POST', headers: { 'Content-Type': 'application/json', @@ -99,7 +103,8 @@ export async function generateWithSeedream( }); if (!response.ok) { - const text = await response.text(); + const text = await response.text().catch(() => 'empty response'); + console.error(`[Seedream] Request to ${fullUrl} failed with ${response.status}: ${text}`); throw new Error(`Seedream generation failed (${response.status}): ${text}`); } diff --git a/lib/pdf/constants.ts b/lib/pdf/constants.ts index 93a2ef387..e79b450fc 100644 --- a/lib/pdf/constants.ts +++ b/lib/pdf/constants.ts @@ -9,6 +9,14 @@ import type { PDFProviderId, PDFProviderConfig } from './types'; * PDF Provider Registry */ export const PDF_PROVIDERS: Record = { + wiseocr: { + id: 'wiseocr', + name: 'WiseOCR', + requiresApiKey: true, + icon: '/logos/wiseocr.png', + features: ['text', 'images', 'tables', 'formulas', 'layout-analysis', 'ocr'], + }, + unpdf: { id: 'unpdf', name: 'unpdf', diff --git a/lib/pdf/pdf-providers.ts b/lib/pdf/pdf-providers.ts index edfaea06e..986607c95 100644 --- a/lib/pdf/pdf-providers.ts +++ b/lib/pdf/pdf-providers.ts @@ -168,6 +168,10 @@ export async function parsePDF( let result: ParsedPdfContent; switch (config.providerId) { + case 'wiseocr': + result = await parseWithWiseOCR(config, pdfBuffer); + break; + case 'unpdf': result = await parseWithUnpdf(pdfBuffer); break; @@ -437,6 +441,98 @@ function extractMinerUResult(fileResult: Record): ParsedPdfCont }; } +/** + * Parse PDF using WiseOCR API (WiseDiag) + * + * Official WiseOCR API endpoint: + * POST https://openapi.wisediag.com/v1/ocr/pdf + * + * Supports: PDF and image files with OCR powered by vision large model + * Returns structured Markdown output + * + * @see https://api-docs.wisediag.com/wiseocr + */ +async function parseWithWiseOCR( + config: PDFParserConfig, + pdfBuffer: Buffer, +): Promise { + if (!config.apiKey) { + throw new Error( + 'WiseOCR API key is required. ' + + 'Please get your API key from https://www.wisediag.com/wiseocr', + ); + } + + log.info('[WiseOCR] Parsing PDF with WiseOCR API'); + + const fileName = 'document.pdf'; + + // Create FormData for file upload + const formData = new FormData(); + + // Convert Buffer to Blob + const arrayBuffer = pdfBuffer.buffer.slice( + pdfBuffer.byteOffset, + pdfBuffer.byteOffset + pdfBuffer.byteLength, + ); + const blob = new Blob([arrayBuffer as ArrayBuffer], { + type: 'application/pdf', + }); + formData.append('file', blob, fileName); + + // Use default DPI 200 (recommended by WiseOCR) + formData.append('dpi', '200'); + + // Add optional prompt parameter for custom OCR instructions + if (config.providerOptions?.prompt) { + formData.append('prompt', config.providerOptions.prompt); + } + + // Authorization header + const headers: Record = { + 'Authorization': `Bearer ${config.apiKey}`, + }; + + // Use custom base URL if provided, otherwise default to official API + const apiUrl = config.baseUrl || 'https://openapi.wisediag.com/v1/ocr/pdf'; + + // POST to WiseOCR API + const response = await fetch(apiUrl, { + method: 'POST', + headers, + body: formData, + }); + + if (!response.ok) { + const errorText = await response.text().catch(() => response.statusText); + throw new Error(`WiseOCR API error (${response.status}): ${errorText}`); + } + + const json = await response.json(); + + // Extract result + const markdown: string = json.markdown || ''; + const pageCount: number = json.total_pages || 0; + + log.info( + `[WiseOCR] Parsed successfully: ${pageCount} pages, ` + + `${markdown.length} chars of markdown`, + ); + + // WiseOCR already returns markdown with content + // Images are embedded in the markdown as base64 by the API + return { + text: markdown, + images: [], // WiseOCR embeds images directly in markdown + metadata: { + pageCount, + parser: 'wiseocr', + elapsedSeconds: json.elapsed_seconds, + usage: json.usage, + }, + }; +} + /** * Get current PDF parser configuration from settings store * Note: This function should only be called in browser context diff --git a/lib/pdf/types.ts b/lib/pdf/types.ts index 8173daedc..14fd376b0 100644 --- a/lib/pdf/types.ts +++ b/lib/pdf/types.ts @@ -5,7 +5,7 @@ /** * PDF Provider IDs */ -export type PDFProviderId = 'unpdf' | 'mineru'; +export type PDFProviderId = 'wiseocr' | 'unpdf' | 'mineru'; /** * PDF Provider Configuration @@ -26,6 +26,9 @@ export interface PDFParserConfig { providerId: PDFProviderId; apiKey?: string; baseUrl?: string; + providerOptions?: { + prompt?: string; + }; } // Note: ParsedPdfContent is imported from @/lib/types/pdf to avoid duplication diff --git a/lib/server/provider-config.ts b/lib/server/provider-config.ts index 0b876df0f..dde090b70 100644 --- a/lib/server/provider-config.ts +++ b/lib/server/provider-config.ts @@ -49,6 +49,8 @@ const LLM_ENV_MAP: Record = { SILICONFLOW: 'siliconflow', DOUBAO: 'doubao', GROK: 'grok', + OLLAMA: 'ollama', + LLAMA_CPP: 'llama-cpp', }; const TTS_ENV_MAP: Record = { diff --git a/lib/store/settings.ts b/lib/store/settings.ts index cc322f6ad..dd53b73fe 100644 --- a/lib/store/settings.ts +++ b/lib/store/settings.ts @@ -74,6 +74,7 @@ export interface SettingsState { { apiKey: string; baseUrl: string; + customPrompt?: string; enabled: boolean; isServerConfigured?: boolean; serverBaseUrl?: string; @@ -191,7 +192,7 @@ export interface SettingsState { setPDFProvider: (providerId: PDFProviderId) => void; setPDFProviderConfig: ( providerId: PDFProviderId, - config: Partial<{ apiKey: string; baseUrl: string; enabled: boolean }>, + config: Partial<{ apiKey: string; baseUrl: string; customPrompt: string; enabled: boolean }>, ) => void; // Image Generation actions @@ -281,9 +282,10 @@ const getDefaultAudioConfig = () => ({ const getDefaultPDFConfig = () => ({ pdfProviderId: 'unpdf' as PDFProviderId, pdfProvidersConfig: { + wiseocr: { apiKey: '', baseUrl: 'https://openapi.wisediag.com/v1/ocr/pdf', customPrompt: '', enabled: false }, unpdf: { apiKey: '', baseUrl: '', enabled: true }, mineru: { apiKey: '', baseUrl: '', enabled: false }, - } as Record, + } as Record, }); // Initialize default Image config diff --git a/lib/types/provider.ts b/lib/types/provider.ts index 007688877..ee2386ca0 100644 --- a/lib/types/provider.ts +++ b/lib/types/provider.ts @@ -16,7 +16,9 @@ export type BuiltInProviderId = | 'glm' | 'siliconflow' | 'doubao' - | 'grok'; + | 'grok' + | 'ollama' + | 'llama-cpp'; /** * Provider ID (built-in or custom) diff --git a/next.config.ts b/next.config.ts index f84e6f45d..d532eea6a 100644 --- a/next.config.ts +++ b/next.config.ts @@ -7,6 +7,14 @@ const nextConfig: NextConfig = { experimental: { proxyClientMaxBodySize: '200mb', }, + env: { + HTTPS_ENABLE: process.env.HTTPS_ENABLE, + HTTP_PORT: process.env.HTTP_PORT, + HTTPS_PORT: process.env.HTTPS_PORT, + HTTPS_CERT_PATH: process.env.HTTPS_CERT_PATH, + HTTPS_KEY_PATH: process.env.HTTPS_KEY_PATH, + HOST: process.env.HOST, + }, }; export default nextConfig; diff --git a/package.json b/package.json index 55835ca15..ddc809620 100644 --- a/package.json +++ b/package.json @@ -8,9 +8,10 @@ }, "scripts": { "postinstall": "cd packages/mathml2omml && npm run build && cd ../pptxgenjs && npm run build", - "dev": "next dev", + "dev": "node server.js", + "dev:https": "HTTPS_ENABLE=true node server.js", "build": "next build", - "start": "next start", + "start": "node server.js", "lint": "eslint", "check": "prettier . --check", "format": "prettier . --write", @@ -110,6 +111,7 @@ "@types/react": "^19", "@types/react-dom": "^19", "@types/tinycolor2": "^1.4.6", + "dotenv": "^17.3.1", "eslint": "^9", "eslint-config-next": "16.1.2", "prettier": "3.8.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index fac3752ea..b96b8530e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -276,6 +276,9 @@ importers: '@types/tinycolor2': specifier: ^1.4.6 version: 1.4.6 + dotenv: + specifier: ^17.3.1 + version: 17.3.1 eslint: specifier: ^9 version: 9.39.4(jiti@2.6.1) diff --git a/public/logos/llama-cpp.svg b/public/logos/llama-cpp.svg new file mode 100644 index 000000000..7b93e9126 --- /dev/null +++ b/public/logos/llama-cpp.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/public/logos/ollama.svg b/public/logos/ollama.svg new file mode 100644 index 000000000..1efea49bf --- /dev/null +++ b/public/logos/ollama.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/public/logos/wiseocr.png b/public/logos/wiseocr.png new file mode 100644 index 000000000..56b61aa81 Binary files /dev/null and b/public/logos/wiseocr.png differ diff --git a/scripts/detect-lan-ip.js b/scripts/detect-lan-ip.js new file mode 100755 index 000000000..483d181c7 --- /dev/null +++ b/scripts/detect-lan-ip.js @@ -0,0 +1,43 @@ +#!/usr/bin/env node +const os = require('os'); + +function isPrivateIP(ip) { + // 内网IP范围: + // 10.0.0.0/8 + // 172.16.0.0/12 + // 192.168.0.0/16 + const octets = ip.split('.').map(Number); + + if (octets[0] === 10) { + return true; + } + + if (octets[0] === 172 && octets[1] >= 16 && octets[1] <= 31) { + return true; + } + + if (octets[0] === 192 && octets[1] === 168) { + return true; + } + + return false; +} + +function getPrivateIPs() { + const interfaces = os.networkInterfaces(); + const ips = []; + + for (const name of Object.keys(interfaces)) { + const iface = interfaces[name]; + for (const addr of iface) { + if (addr.family === 'IPv4' && !addr.internal && isPrivateIP(addr.address)) { + ips.push(addr.address); + } + } + } + + return ips; +} + +const ips = getPrivateIPs(); +console.log(ips.join(' ')); diff --git a/scripts/setup-https.sh b/scripts/setup-https.sh new file mode 100755 index 000000000..04c8c34de --- /dev/null +++ b/scripts/setup-https.sh @@ -0,0 +1,55 @@ +#!/bin/bash +set -e + +echo "🔐 OpenMAIC HTTPS 证书生成工具" +echo "==================================" + +# 检测 mkcert 是否安装 +if ! command -v mkcert &> /dev/null; then + echo "❌ mkcert 未安装,请先安装 mkcert:" + echo "" + echo " macOS: brew install mkcert" + echo " Ubuntu/Debian: sudo apt install mkcert" + echo " CentOS/RHEL: 请参考 https://github.com/FiloSottile/mkcert#installation" + echo " Windows: choco install mkcert 或者 scoop install mkcert" + echo "" + echo "安装后请运行: mkcert -install" + exit 1 +fi + +echo "✅ mkcert 已检测到" + +# 初始化本地 CA +echo "🔧 初始化本地 CA..." +mkcert -install + +# 获取局域网 IP +echo "🌐 检测局域网 IP 地址..." +LAN_IPS=$(node "$(dirname "$0")/detect-lan-ip.js") +echo "📋 检测到内网 IP: $LAN_IPS" + +# 构建域名列表 +DOMAINS="localhost 127.0.0.1 $LAN_IPS" +echo "📋 将为以下域名生成证书: $DOMAINS" + +# 生成证书到项目根目录 +OUTPUT_DIR="$(dirname "$0")/.." +cd "$OUTPUT_DIR" + +echo "🚀 生成证书..." +mkcert -cert-file localhost.crt -key-file localhost.key $DOMAINS + +echo "" +echo "✅ 证书生成完成!" +echo "📍 证书位置: $(pwd)/localhost.crt" +echo "📍 私钥位置: $(pwd)/localhost.key" +echo "" +echo "📝 下一步操作:" +echo " 1. 编辑 .env 文件,添加 HTTPS_ENABLE=true" +echo " 2. 运行 pnpm dev 启动服务(同时启动 HTTP 和 HTTPS)" +echo " 3. 访问 https://localhost:3001 即可" +echo "" +echo "🌐 局域网访问地址:" +for ip in $LAN_IPS; do + echo " https://$ip:3001" +done diff --git a/server.js b/server.js new file mode 100644 index 000000000..bb8403956 --- /dev/null +++ b/server.js @@ -0,0 +1,77 @@ +#!/usr/bin/env node +// Load environment variables from .env +require('dotenv').config(); + +const { createServer } = require('http'); +const { createServer: createHttpsServer } = require('https'); +const { readFileSync } = require('fs'); +const { parse } = require('url'); +const next = require('next'); + +// 读取环境变量配置 +const dev = process.env.NODE_ENV !== 'production'; +const httpsEnabled = process.env.HTTPS_ENABLE === 'true' || process.env.HTTPS_ENABLE === '1'; +const httpPort = parseInt(process.env.HTTP_PORT || '3000', 10); +const httpsPort = parseInt(process.env.HTTPS_PORT || '3001', 10); +const host = process.env.HOST || '0.0.0.0'; +const certPath = process.env.HTTPS_CERT_PATH || './localhost.crt'; +const keyPath = process.env.HTTPS_KEY_PATH || './localhost.key'; + +// 初始化 Next.js +const app = next({ dev, hostname: host, port: httpPort }); +const handle = app.getRequestHandler(); + +app.prepare().then(() => { + // 始终启动 HTTP 服务器 + const httpServer = createServer((req, res) => { + const parsedUrl = parse(req.url, true); + handle(req, res, parsedUrl); + }); + + httpServer.listen(httpPort, host, () => { + console.log(`✅ HTTP 服务器已启动`); + console.log(`📍 本地访问: http://localhost:${httpPort}`); + console.log(`📍 局域网访问: http://${host === '0.0.0.0' ? 'your-lan-ip' : host}:${httpPort}`); + console.log(''); + }); + + // 如果启用 HTTPS,则启动 HTTPS 服务器 + if (httpsEnabled) { + try { + // 检查证书文件是否存在 + const cert = readFileSync(certPath); + const key = readFileSync(keyPath); + + const httpsServer = createHttpsServer({ cert, key }, (req, res) => { + const parsedUrl = parse(req.url, true); + handle(req, res, parsedUrl); + }); + + httpsServer.listen(httpsPort, host, () => { + console.log(`🔐 HTTPS 服务器已启动`); + console.log(`📍 本地访问: https://localhost:${httpsPort}`); + console.log(`📍 局域网访问: https://${host === '0.0.0.0' ? 'your-lan-ip' : host}:${httpsPort}`); + console.log(''); + console.log(`📝 提示: 如果浏览器提示不安全,请确保已运行 mkcert -install 信任本地 CA`); + console.log(''); + }); + + } catch (err) { + console.error(`❌ HTTPS 证书文件读取失败`); + console.error(` 证书路径: ${certPath}`); + console.error(` 私钥路径: ${keyPath}`); + console.error(''); + console.error(`👉 请先运行: ./scripts/setup-https.sh 生成证书`); + console.error(''); + console.error(`👉 如果证书位置自定义,请在 .env 文件中设置 HTTPS_CERT_PATH 和 HTTPS_KEY_PATH`); + console.error(''); + process.exit(1); + } + } else { + console.log(`ℹ️ HTTPS 未启用 (设置 HTTPS_ENABLE=true 启用)`); + console.log(''); + } +}).catch(err => { + console.error('❌ 启动失败:', err); + process.exit(1); +});