cortex-scout/cortex-scout.json at main · cortex-works/cortex-scout · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
{
  "_comment": "Cortex Scout configuration. All fields are optional — env vars (and hardcoded defaults) are used as fallback for any missing key.",
  "deep_research": {
    "_doc": {
      "enabled": "Set false to hide the deep_research tool entirely (same as DEEP_RESEARCH_ENABLED=0).",
      "llm_base_url": "LLM endpoint. OpenAI default: https://api.openai.com/v1. Ollama: http://localhost:11434/v1. LM Studio: http://localhost:1234/v1.",
      "llm_api_key": "API key. Leave blank ('') for key-less local endpoints (Ollama/LM Studio). Env fallback: OPENAI_API_KEY.",
      "llm_model": "Model name. Env fallback: DEEP_RESEARCH_LLM_MODEL. Default: gpt-4o-mini.",
      "synthesis_enabled": "Set false to run search+scrape only and skip LLM synthesis entirely. Env fallback: DEEP_RESEARCH_SYNTHESIS=0.",
      "synthesis_max_sources": "Max source docs fed to the LLM. Env fallback: DEEP_RESEARCH_SYNTHESIS_MAX_SOURCES. Default: 8. For local 4B-class Ollama models, 1-2 is usually more reliable than 3+.",
      "synthesis_max_chars_per_source": "Max chars per source document. Env fallback: DEEP_RESEARCH_SYNTHESIS_MAX_CHARS_PER_SOURCE. Default: 2500. For local 4B-class Ollama models, 600-1000 chars per source is a safer range.",
      "synthesis_max_tokens": "Max tokens in the LLM response. Tune per model: 512-1024 for small 4k-ctx models, 2048+ for large models. Env fallback: DEEP_RESEARCH_SYNTHESIS_MAX_TOKENS. Default: 1024. For local 4B-class Ollama models, 512-768 is typically more stable."
    },
    "enabled": true,
    "llm_base_url": "http://localhost:11434/v1",
    "llm_api_key": "",
    "llm_model": "qwen3.5:4b",
    "synthesis_enabled": true,
    "synthesis_max_sources": 2,
    "synthesis_max_chars_per_source": 800,
    "synthesis_max_tokens": 768
  }
}