From ebd72ce127c868b8034c4e1699d0aa11058ac84b Mon Sep 17 00:00:00 2001
From: Yuang-Deng <lndengyuang@gmail.com>
Date: Fri, 27 Feb 2026 14:22:18 +0800
Subject: [PATCH 1/7] feat(llm): add lazyllm onlinechat backend for chat model
 provider

---
 docker/.env.example                   |   8 +-
 src/memos/api/config.py               |  21 +++++
 src/memos/configs/llm.py              |  13 +++
 src/memos/llms/factory.py             |   2 +
 src/memos/llms/lazyllm_onlinechat.py  | 114 ++++++++++++++++++++++++++
 src/memos/mem_os/product.py           |   6 +-
 tests/configs/test_llm.py             |  40 ++++++++-
 tests/llms/test_lazyllm_onlinechat.py |  80 ++++++++++++++++++
 8 files changed, 280 insertions(+), 4 deletions(-)
 create mode 100644 src/memos/llms/lazyllm_onlinechat.py
 create mode 100644 tests/llms/test_lazyllm_onlinechat.py

diff --git a/docker/.env.example b/docker/.env.example
index 3674cd69b..46f03ca8b 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -25,9 +25,15 @@ MOS_MAX_TOKENS=2048
 # Top-P for LLM in the Product API
 MOS_TOP_P=0.9
 # LLM for the Product API backend
-MOS_CHAT_MODEL_PROVIDER=openai             # openai | huggingface | vllm
+MOS_CHAT_MODEL_PROVIDER=openai             # openai | huggingface | vllm | lazyllm
 OPENAI_API_KEY=sk-xxx                      # [required] when provider=openai
 OPENAI_API_BASE=https://api.openai.com/v1  # [required] base for the key
+# LazyLLM backend options (optional, used when provider=lazyllm)
+MOS_LAZYLLM_SOURCE=openai
+MOS_LAZYLLM_API_KEY=${OPENAI_API_KEY}
+MOS_LAZYLLM_API_BASE=${OPENAI_API_BASE}
+MOS_LAZYLLM_STREAM=false
+MOS_LAZYLLM_SKIP_AUTH=false
 
 ## MemReader / retrieval LLM
 MEMRADER_MODEL=gpt-4o-mini
diff --git a/src/memos/api/config.py b/src/memos/api/config.py
index 65049b0c2..e4613b08f 100644
--- a/src/memos/api/config.py
+++ b/src/memos/api/config.py
@@ -297,6 +297,23 @@ def vllm_config() -> dict[str, Any]:
             "model_schema": os.getenv("MOS_MODEL_SCHEMA", "memos.configs.llm.VLLMLLMConfig"),
         }
 
+    @staticmethod
+    def lazyllm_config() -> dict[str, Any]:
+        """Get LazyLLM OnlineChat configuration."""
+        return {
+            "model_name_or_path": os.getenv("MOS_CHAT_MODEL", "gpt-4o-mini"),
+            "temperature": float(os.getenv("MOS_CHAT_TEMPERATURE", "0.8")),
+            "max_tokens": int(os.getenv("MOS_MAX_TOKENS", "8000")),
+            "top_p": float(os.getenv("MOS_TOP_P", "0.9")),
+            "top_k": int(os.getenv("MOS_TOP_K", "50")),
+            "remove_think_prefix": True,
+            "source": os.getenv("MOS_LAZYLLM_SOURCE", "openai"),
+            "api_key": os.getenv("MOS_LAZYLLM_API_KEY", os.getenv("OPENAI_API_KEY")),
+            "api_base": os.getenv("MOS_LAZYLLM_API_BASE", os.getenv("OPENAI_API_BASE")),
+            "stream": os.getenv("MOS_LAZYLLM_STREAM", "false").lower() == "true",
+            "skip_auth": os.getenv("MOS_LAZYLLM_SKIP_AUTH", "false").lower() == "true",
+        }
+
     @staticmethod
     def get_activation_config() -> dict[str, Any]:
         """Get Ollama configuration."""
@@ -786,12 +803,14 @@ def get_product_default_config() -> dict[str, Any]:
         openai_config = APIConfig.get_openai_config()
         qwen_config = APIConfig.qwen_config()
         vllm_config = APIConfig.vllm_config()
+        lazyllm_config = APIConfig.lazyllm_config()
         reader_config = APIConfig.get_reader_config()
 
         backend_model = {
             "openai": openai_config,
             "huggingface": qwen_config,
             "vllm": vllm_config,
+            "lazyllm": lazyllm_config,
         }
         backend = os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai")
         mysql_config = APIConfig.get_mysql_config()
@@ -905,6 +924,7 @@ def create_user_config(user_name: str, user_id: str) -> tuple["MOSConfig", "Gene
         openai_config = APIConfig.get_openai_config()
         qwen_config = APIConfig.qwen_config()
         vllm_config = APIConfig.vllm_config()
+        lazyllm_config = APIConfig.lazyllm_config()
         mysql_config = APIConfig.get_mysql_config()
         reader_config = APIConfig.get_reader_config()
         backend = os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai")
@@ -912,6 +932,7 @@ def create_user_config(user_name: str, user_id: str) -> tuple["MOSConfig", "Gene
             "openai": openai_config,
             "huggingface": qwen_config,
             "vllm": vllm_config,
+            "lazyllm": lazyllm_config,
         }
         # Create MOSConfig
         config_dict = {
diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py
index 5487d117c..da03ef3fa 100644
--- a/src/memos/configs/llm.py
+++ b/src/memos/configs/llm.py
@@ -119,6 +119,18 @@ class VLLMLLMConfig(BaseLLMConfig):
     extra_body: Any = Field(default=None, description="Extra options for API")
 
 
+class LazyLLMOnlineChatConfig(BaseLLMConfig):
+    source: str = Field(default="openai", description="LazyLLM online source name")
+    api_key: str | None = Field(default=None, description="API key for LazyLLM online source")
+    api_base: str | None = Field(default=None, description="Base URL for LazyLLM online source")
+    stream: bool = Field(default=False, description="Enable stream mode in LazyLLM module")
+    skip_auth: bool = Field(default=False, description="Skip LazyLLM API key validation")
+    type: str | None = Field(default=None, description="Optional model type for LazyLLM module")
+    extra_kwargs: dict[str, Any] | None = Field(
+        default=None, description="Extra kwargs for lazyllm.OnlineChatModule"
+    )
+
+
 class LLMConfigFactory(BaseConfig):
     """Factory class for creating LLM configurations."""
 
@@ -135,6 +147,7 @@ class LLMConfigFactory(BaseConfig):
         "qwen": QwenLLMConfig,
         "deepseek": DeepSeekLLMConfig,
         "openai_new": OpenAIResponsesLLMConfig,
+        "lazyllm": LazyLLMOnlineChatConfig,
     }
 
     @field_validator("backend")
diff --git a/src/memos/llms/factory.py b/src/memos/llms/factory.py
index 8f4da662f..f85f9a5a3 100644
--- a/src/memos/llms/factory.py
+++ b/src/memos/llms/factory.py
@@ -5,6 +5,7 @@
 from memos.llms.deepseek import DeepSeekLLM
 from memos.llms.hf import HFLLM
 from memos.llms.hf_singleton import HFSingletonLLM
+from memos.llms.lazyllm_onlinechat import LazyLLMOnlineChatLLM
 from memos.llms.ollama import OllamaLLM
 from memos.llms.openai import AzureLLM, OpenAILLM
 from memos.llms.openai_new import OpenAIResponsesLLM
@@ -26,6 +27,7 @@ class LLMFactory(BaseLLM):
         "qwen": QwenLLM,
         "deepseek": DeepSeekLLM,
         "openai_new": OpenAIResponsesLLM,
+        "lazyllm": LazyLLMOnlineChatLLM,
     }
 
     @classmethod
diff --git a/src/memos/llms/lazyllm_onlinechat.py b/src/memos/llms/lazyllm_onlinechat.py
new file mode 100644
index 000000000..5325c84f6
--- /dev/null
+++ b/src/memos/llms/lazyllm_onlinechat.py
@@ -0,0 +1,114 @@
+import json
+
+from collections.abc import Generator
+from typing import Any
+
+from memos.configs.llm import LazyLLMOnlineChatConfig
+from memos.llms.base import BaseLLM
+from memos.llms.utils import remove_thinking_tags
+from memos.log import get_logger
+from memos.types import MessageList
+
+
+logger = get_logger(__name__)
+
+
+class LazyLLMOnlineChatLLM(BaseLLM):
+    """LazyLLM OnlineChat backend."""
+
+    def __init__(self, config: LazyLLMOnlineChatConfig):
+        self.config = config
+        try:
+            import lazyllm
+        except ImportError as exc:
+            raise ImportError(
+                "LazyLLM backend requires `lazyllm`. "
+                "Install with: pip install 'git+https://github.com/LazyAGI/LazyLLM.git@main'"
+            ) from exc
+
+        module_kwargs: dict[str, Any] = {
+            "source": config.source,
+            "model": config.model_name_or_path,
+            "stream": config.stream,
+            "skip_auth": config.skip_auth,
+        }
+        if config.api_base:
+            module_kwargs["base_url"] = config.api_base
+        if config.api_key:
+            module_kwargs["api_key"] = config.api_key
+        if config.type:
+            module_kwargs["type"] = config.type
+        if config.extra_kwargs:
+            module_kwargs.update(config.extra_kwargs)
+
+        self.client = lazyllm.OnlineChatModule(**module_kwargs)
+        logger.info("LazyLLM OnlineChat LLM instance initialized")
+
+    def _normalize_messages(self, messages: MessageList | str) -> MessageList:
+        if isinstance(messages, str):
+            return [{"role": "user", "content": messages}]
+        return messages
+
+    def generate(self, messages: MessageList | str, **kwargs) -> str | list[dict]:
+        normalized_messages = self._normalize_messages(messages)
+        runtime_model = kwargs.get("model_name_or_path", self.config.model_name_or_path)
+
+        request_kwargs: dict[str, Any] = {
+            "messages": normalized_messages,
+            "stream_output": False,
+            "model_name": runtime_model,
+            "temperature": kwargs.get("temperature", self.config.temperature),
+            "max_tokens": kwargs.get("max_tokens", self.config.max_tokens),
+            "top_p": kwargs.get("top_p", self.config.top_p),
+            "top_k": kwargs.get("top_k", self.config.top_k),
+        }
+        if kwargs.get("tools"):
+            request_kwargs["tools"] = kwargs["tools"]
+
+        response = self.client("", **request_kwargs)
+        if isinstance(response, dict):
+            tool_calls = response.get("tool_calls")
+            if isinstance(tool_calls, list) and len(tool_calls) > 0:
+                return self.tool_call_parser(tool_calls)
+            response_content = response.get("content", "")
+            reasoning_content = response.get("reasoning_content")
+            if isinstance(reasoning_content, str) and reasoning_content:
+                reasoning_content = f"<think>{reasoning_content}</think>"
+            if self.config.remove_think_prefix:
+                return remove_thinking_tags(response_content)
+            if reasoning_content:
+                return reasoning_content + (response_content or "")
+            return response_content or ""
+        if isinstance(response, str):
+            return remove_thinking_tags(response) if self.config.remove_think_prefix else response
+        return str(response)
+
+    def generate_stream(self, messages: MessageList | str, **kwargs) -> Generator[str, None, None]:
+        if kwargs.get("tools"):
+            logger.info("stream api not support tools")
+            return
+
+        response = self.generate(messages, **kwargs)
+        if isinstance(response, str):
+            yield response
+            return
+        yield json.dumps(response, ensure_ascii=False)
+
+    def tool_call_parser(self, tool_calls: list[dict]) -> list[dict]:
+        parsed_calls = []
+        for tool_call in tool_calls:
+            function_data = tool_call.get("function", {})
+            arguments = function_data.get("arguments", {})
+            if isinstance(arguments, str):
+                try:
+                    arguments = json.loads(arguments)
+                except json.JSONDecodeError:
+                    pass
+            parsed_calls.append(
+                {
+                    "tool_call_id": tool_call.get("id", ""),
+                    "function_name": function_data.get("name", ""),
+                    "arguments": arguments,
+                }
+            )
+        return parsed_calls
diff --git a/src/memos/mem_os/product.py b/src/memos/mem_os/product.py
index b2c74c384..1e5e48f49 100644
--- a/src/memos/mem_os/product.py
+++ b/src/memos/mem_os/product.py
@@ -1201,8 +1201,10 @@ def chat_with_references(
                 )
             elif self.config.chat_model.backend == "vllm":
                 response_stream = self.chat_llm.generate_stream(current_messages)
+            else:
+                response_stream = self.chat_llm.generate_stream(current_messages)
         else:
-            if self.config.chat_model.backend in ["huggingface", "vllm", "openai"]:
+            if self.config.chat_model.backend in ["huggingface", "vllm", "openai", "lazyllm"]:
                 response_stream = self.chat_llm.generate_stream(current_messages)
             else:
                 response_stream = self.chat_llm.generate(current_messages)
@@ -1219,7 +1221,7 @@ def chat_with_references(
         full_response = ""
         token_count = 0
         # Use tiktoken for proper token-based chunking
-        if self.config.chat_model.backend not in ["huggingface", "vllm", "openai"]:
+        if self.config.chat_model.backend not in ["huggingface", "vllm", "openai", "lazyllm"]:
             # For non-huggingface backends, we need to collect the full response first
             full_response_text = ""
             for chunk in response_stream:
diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py
index 6562c9a95..5ad84550a 100644
--- a/tests/configs/test_llm.py
+++ b/tests/configs/test_llm.py
@@ -1,6 +1,7 @@
 from memos.configs.llm import (
     BaseLLMConfig,
     HFLLMConfig,
+    LazyLLMOnlineChatConfig,
     LLMConfigFactory,
     OllamaLLMConfig,
     OpenAILLMConfig,
@@ -140,10 +141,47 @@ def test_hf_llm_config():
     check_config_instantiation_invalid(HFLLMConfig)
 
 
+def test_lazyllm_online_chat_config():
+    check_config_base_class(
+        LazyLLMOnlineChatConfig,
+        required_fields=[
+            "model_name_or_path",
+        ],
+        optional_fields=[
+            "temperature",
+            "max_tokens",
+            "top_p",
+            "top_k",
+            "remove_think_prefix",
+            "default_headers",
+            "source",
+            "api_key",
+            "api_base",
+            "stream",
+            "skip_auth",
+            "type",
+            "extra_kwargs",
+        ],
+    )
+
+    check_config_instantiation_valid(
+        LazyLLMOnlineChatConfig,
+        {
+            "model_name_or_path": "gpt-4o-mini",
+            "source": "openai",
+            "api_key": "sk-test",
+            "api_base": "https://api.openai.com/v1",
+            "stream": False,
+        },
+    )
+
+    check_config_instantiation_invalid(LazyLLMOnlineChatConfig)
+
+
 def test_llm_config_factory():
     check_config_factory_class(
         LLMConfigFactory,
-        expected_backends=["openai", "ollama", "huggingface"],
+        expected_backends=["openai", "ollama", "huggingface", "lazyllm"],
     )
 
     check_config_instantiation_valid(
diff --git a/tests/llms/test_lazyllm_onlinechat.py b/tests/llms/test_lazyllm_onlinechat.py
new file mode 100644
index 000000000..59cf2796b
--- /dev/null
+++ b/tests/llms/test_lazyllm_onlinechat.py
@@ -0,0 +1,80 @@
+import sys
+import unittest
+
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+from memos.configs.llm import LLMConfigFactory
+from memos.llms.factory import LLMFactory
+
+
+class TestLazyLLMOnlineChatBackend(unittest.TestCase):
+    def test_generate_with_mocked_lazyllm_backend(self):
+        """Test LLMFactory with mocked lazyllm backend."""
+        mock_client = MagicMock()
+        mock_client.return_value = {"content": "Hello from LazyLLM", "tool_calls": None}
+
+        mock_lazyllm = SimpleNamespace(OnlineChatModule=MagicMock(return_value=mock_client))
+        original_lazyllm = sys.modules.get("lazyllm")
+        sys.modules["lazyllm"] = mock_lazyllm
+        try:
+            config = LLMConfigFactory.model_validate(
+                {
+                    "backend": "lazyllm",
+                    "config": {
+                        "model_name_or_path": "gpt-4o-mini",
+                        "source": "openai",
+                        "api_key": "sk-xxxx",
+                        "api_base": "https://api.openai.com/v1",
+                    },
+                }
+            )
+            llm = LLMFactory.from_config(config)
+            response = llm.generate([{"role": "user", "content": "hello"}])
+            self.assertEqual(response, "Hello from LazyLLM")
+        finally:
+            if original_lazyllm is None:
+                sys.modules.pop("lazyllm", None)
+            else:
+                sys.modules["lazyllm"] = original_lazyllm
+
+    def test_generate_with_tool_calls(self):
+        """Test lazyllm tool call parser compatibility."""
+        mock_client = MagicMock()
+        mock_client.return_value = {
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": "call_1",
+                    "function": {"name": "search", "arguments": '{"query":"memos"}'},
+                }
+            ],
+        }
+
+        mock_lazyllm = SimpleNamespace(OnlineChatModule=MagicMock(return_value=mock_client))
+        original_lazyllm = sys.modules.get("lazyllm")
+        sys.modules["lazyllm"] = mock_lazyllm
+        try:
+            config = LLMConfigFactory.model_validate(
+                {
+                    "backend": "lazyllm",
+                    "config": {"model_name_or_path": "gpt-4o-mini"},
+                }
+            )
+            llm = LLMFactory.from_config(config)
+            response = llm.generate([{"role": "user", "content": "search memos"}])
+            self.assertEqual(
+                response,
+                [
+                    {
+                        "tool_call_id": "call_1",
+                        "function_name": "search",
+                        "arguments": {"query": "memos"},
+                    }
+                ],
+            )
+        finally:
+            if original_lazyllm is None:
+                sys.modules.pop("lazyllm", None)
+            else:
+                sys.modules["lazyllm"] = original_lazyllm

From b3202b0f9d2dec01acc0bdd75a95b822c77d5559 Mon Sep 17 00:00:00 2001
From: Yuang-Deng <lndengyuang@gmail.com>
Date: Fri, 27 Feb 2026 14:39:07 +0800
Subject: [PATCH 2/7] fix(llm): align lazyllm backend parser with lint rules

---
 src/memos/llms/lazyllm_onlinechat.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/memos/llms/lazyllm_onlinechat.py b/src/memos/llms/lazyllm_onlinechat.py
index 5325c84f6..fa3f63980 100644
--- a/src/memos/llms/lazyllm_onlinechat.py
+++ b/src/memos/llms/lazyllm_onlinechat.py
@@ -1,6 +1,7 @@
 import json
 
 from collections.abc import Generator
+from contextlib import suppress
 from typing import Any
 
 from memos.configs.llm import LazyLLMOnlineChatConfig
@@ -100,10 +101,8 @@ def tool_call_parser(self, tool_calls: list[dict]) -> list[dict]:
             function_data = tool_call.get("function", {})
             arguments = function_data.get("arguments", {})
             if isinstance(arguments, str):
-                try:
+                with suppress(json.JSONDecodeError):
                     arguments = json.loads(arguments)
-                except json.JSONDecodeError:
-                    pass
             parsed_calls.append(
                 {
                     "tool_call_id": tool_call.get("id", ""),

From b4d1c8f432f2a6b3c40b3580b51a7e47f3a6fd3e Mon Sep 17 00:00:00 2001
From: Yuang-Deng <lndengyuang@gmail.com>
Date: Fri, 27 Feb 2026 14:48:31 +0800
Subject: [PATCH 3/7] feat(llm): switch lazyllm api-key namespace prefix to
 MEMOS

---
 docker/.env.example                   | 6 +++++-
 src/memos/api/config.py               | 3 ++-
 src/memos/configs/llm.py              | 1 +
 src/memos/llms/lazyllm_onlinechat.py  | 2 +-
 tests/configs/test_llm.py             | 2 ++
 tests/llms/test_lazyllm_onlinechat.py | 8 ++++++--
 6 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/docker/.env.example b/docker/.env.example
index 46f03ca8b..424800c78 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -29,11 +29,15 @@ MOS_CHAT_MODEL_PROVIDER=openai             # openai | huggingface | vllm | lazyl
 OPENAI_API_KEY=sk-xxx                      # [required] when provider=openai
 OPENAI_API_BASE=https://api.openai.com/v1  # [required] base for the key
 # LazyLLM backend options (optional, used when provider=lazyllm)
+MOS_LAZYLLM_NAMESPACE=memos
 MOS_LAZYLLM_SOURCE=openai
-MOS_LAZYLLM_API_KEY=${OPENAI_API_KEY}
 MOS_LAZYLLM_API_BASE=${OPENAI_API_BASE}
 MOS_LAZYLLM_STREAM=false
 MOS_LAZYLLM_SKIP_AUTH=false
+# Optional override to force a single key (bypasses namespace supplier key lookup)
+MOS_MEMOS_API_KEY=
+# LazyLLM namespace API keys (namespace = memos => MEMOS_*_API_KEY)
+MEMOS_OPENAI_API_KEY=${OPENAI_API_KEY}
 
 ## MemReader / retrieval LLM
 MEMRADER_MODEL=gpt-4o-mini
diff --git a/src/memos/api/config.py b/src/memos/api/config.py
index e4613b08f..72ded20ac 100644
--- a/src/memos/api/config.py
+++ b/src/memos/api/config.py
@@ -307,8 +307,9 @@ def lazyllm_config() -> dict[str, Any]:
             "top_p": float(os.getenv("MOS_TOP_P", "0.9")),
             "top_k": int(os.getenv("MOS_TOP_K", "50")),
             "remove_think_prefix": True,
+            "namespace": os.getenv("MOS_LAZYLLM_NAMESPACE", "memos"),
             "source": os.getenv("MOS_LAZYLLM_SOURCE", "openai"),
-            "api_key": os.getenv("MOS_LAZYLLM_API_KEY", os.getenv("OPENAI_API_KEY")),
+            "api_key": os.getenv("MOS_MEMOS_API_KEY"),
             "api_base": os.getenv("MOS_LAZYLLM_API_BASE", os.getenv("OPENAI_API_BASE")),
             "stream": os.getenv("MOS_LAZYLLM_STREAM", "false").lower() == "true",
             "skip_auth": os.getenv("MOS_LAZYLLM_SKIP_AUTH", "false").lower() == "true",
diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py
index da03ef3fa..2b54f6240 100644
--- a/src/memos/configs/llm.py
+++ b/src/memos/configs/llm.py
@@ -120,6 +120,7 @@ class VLLMLLMConfig(BaseLLMConfig):
 
 
 class LazyLLMOnlineChatConfig(BaseLLMConfig):
+    namespace: str = Field(default="memos", description="LazyLLM config namespace")
     source: str = Field(default="openai", description="LazyLLM online source name")
     api_key: str | None = Field(default=None, description="API key for LazyLLM online source")
     api_base: str | None = Field(default=None, description="Base URL for LazyLLM online source")
diff --git a/src/memos/llms/lazyllm_onlinechat.py b/src/memos/llms/lazyllm_onlinechat.py
index fa3f63980..cfca16117 100644
--- a/src/memos/llms/lazyllm_onlinechat.py
+++ b/src/memos/llms/lazyllm_onlinechat.py
@@ -42,7 +42,7 @@ def __init__(self, config: LazyLLMOnlineChatConfig):
         if config.extra_kwargs:
             module_kwargs.update(config.extra_kwargs)
 
-        self.client = lazyllm.OnlineChatModule(**module_kwargs)
+        self.client = lazyllm.namespace(config.namespace).OnlineChatModule(**module_kwargs)
         logger.info("LazyLLM OnlineChat LLM instance initialized")
 
     def _normalize_messages(self, messages: MessageList | str) -> MessageList:
diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py
index 5ad84550a..7d17c275f 100644
--- a/tests/configs/test_llm.py
+++ b/tests/configs/test_llm.py
@@ -154,6 +154,7 @@ def test_lazyllm_online_chat_config():
             "top_k",
             "remove_think_prefix",
             "default_headers",
+            "namespace",
             "source",
             "api_key",
             "api_base",
@@ -168,6 +169,7 @@ def test_lazyllm_online_chat_config():
         LazyLLMOnlineChatConfig,
         {
             "model_name_or_path": "gpt-4o-mini",
+            "namespace": "memos",
             "source": "openai",
             "api_key": "sk-test",
             "api_base": "https://api.openai.com/v1",
diff --git a/tests/llms/test_lazyllm_onlinechat.py b/tests/llms/test_lazyllm_onlinechat.py
index 59cf2796b..69eca2da7 100644
--- a/tests/llms/test_lazyllm_onlinechat.py
+++ b/tests/llms/test_lazyllm_onlinechat.py
@@ -13,8 +13,9 @@ def test_generate_with_mocked_lazyllm_backend(self):
         """Test LLMFactory with mocked lazyllm backend."""
         mock_client = MagicMock()
         mock_client.return_value = {"content": "Hello from LazyLLM", "tool_calls": None}
+        mock_namespace_module = SimpleNamespace(OnlineChatModule=MagicMock(return_value=mock_client))
 
-        mock_lazyllm = SimpleNamespace(OnlineChatModule=MagicMock(return_value=mock_client))
+        mock_lazyllm = SimpleNamespace(namespace=MagicMock(return_value=mock_namespace_module))
         original_lazyllm = sys.modules.get("lazyllm")
         sys.modules["lazyllm"] = mock_lazyllm
         try:
@@ -26,12 +27,14 @@ def test_generate_with_mocked_lazyllm_backend(self):
                         "source": "openai",
                         "api_key": "sk-xxxx",
                         "api_base": "https://api.openai.com/v1",
+                        "namespace": "memos",
                     },
                 }
             )
             llm = LLMFactory.from_config(config)
             response = llm.generate([{"role": "user", "content": "hello"}])
             self.assertEqual(response, "Hello from LazyLLM")
+            mock_lazyllm.namespace.assert_called_once_with("memos")
         finally:
             if original_lazyllm is None:
                 sys.modules.pop("lazyllm", None)
@@ -50,8 +53,9 @@ def test_generate_with_tool_calls(self):
                 }
             ],
         }
+        mock_namespace_module = SimpleNamespace(OnlineChatModule=MagicMock(return_value=mock_client))
 
-        mock_lazyllm = SimpleNamespace(OnlineChatModule=MagicMock(return_value=mock_client))
+        mock_lazyllm = SimpleNamespace(namespace=MagicMock(return_value=mock_namespace_module))
         original_lazyllm = sys.modules.get("lazyllm")
         sys.modules["lazyllm"] = mock_lazyllm
         try:

From 659af6e108f42c3c7b6de191ae88d7a960a88c07 Mon Sep 17 00:00:00 2001
From: Yuang-Deng <lndengyuang@gmail.com>
Date: Fri, 27 Feb 2026 14:53:01 +0800
Subject: [PATCH 4/7] feat(llm): position lazyllm backend as unified supplier
 interface

---
 docker/.env.example                   |  2 ++
 src/memos/api/config.py               |  2 +-
 src/memos/configs/llm.py              |  5 ++++-
 src/memos/llms/lazyllm_onlinechat.py  | 14 ++++++++++++--
 tests/llms/test_lazyllm_onlinechat.py | 26 ++++++++++++++++++++++++++
 5 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/docker/.env.example b/docker/.env.example
index 424800c78..622afad3f 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -29,6 +29,8 @@ MOS_CHAT_MODEL_PROVIDER=openai             # openai | huggingface | vllm | lazyl
 OPENAI_API_KEY=sk-xxx                      # [required] when provider=openai
 OPENAI_API_BASE=https://api.openai.com/v1  # [required] base for the key
 # LazyLLM backend options (optional, used when provider=lazyllm)
+# MemOS delegates supplier support to LazyLLM. Any source supported by LazyLLM can be used.
+# Example values for MOS_LAZYLLM_SOURCE: openai / qwen / glm / deepseek / kimi / siliconflow / minimax / ...
 MOS_LAZYLLM_NAMESPACE=memos
 MOS_LAZYLLM_SOURCE=openai
 MOS_LAZYLLM_API_BASE=${OPENAI_API_BASE}
diff --git a/src/memos/api/config.py b/src/memos/api/config.py
index 72ded20ac..85c774d4e 100644
--- a/src/memos/api/config.py
+++ b/src/memos/api/config.py
@@ -310,7 +310,7 @@ def lazyllm_config() -> dict[str, Any]:
             "namespace": os.getenv("MOS_LAZYLLM_NAMESPACE", "memos"),
             "source": os.getenv("MOS_LAZYLLM_SOURCE", "openai"),
             "api_key": os.getenv("MOS_MEMOS_API_KEY"),
-            "api_base": os.getenv("MOS_LAZYLLM_API_BASE", os.getenv("OPENAI_API_BASE")),
+            "api_base": os.getenv("MOS_LAZYLLM_API_BASE"),
             "stream": os.getenv("MOS_LAZYLLM_STREAM", "false").lower() == "true",
             "skip_auth": os.getenv("MOS_LAZYLLM_SKIP_AUTH", "false").lower() == "true",
         }
diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py
index 2b54f6240..b69d015cf 100644
--- a/src/memos/configs/llm.py
+++ b/src/memos/configs/llm.py
@@ -121,7 +121,10 @@ class VLLMLLMConfig(BaseLLMConfig):
 
 class LazyLLMOnlineChatConfig(BaseLLMConfig):
     namespace: str = Field(default="memos", description="LazyLLM config namespace")
-    source: str = Field(default="openai", description="LazyLLM online source name")
+    source: str = Field(
+        default="openai",
+        description="LazyLLM supplier source name (for example: openai/qwen/glm/deepseek)",
+    )
     api_key: str | None = Field(default=None, description="API key for LazyLLM online source")
     api_base: str | None = Field(default=None, description="Base URL for LazyLLM online source")
     stream: bool = Field(default=False, description="Enable stream mode in LazyLLM module")
diff --git a/src/memos/llms/lazyllm_onlinechat.py b/src/memos/llms/lazyllm_onlinechat.py
index cfca16117..42b47a9e1 100644
--- a/src/memos/llms/lazyllm_onlinechat.py
+++ b/src/memos/llms/lazyllm_onlinechat.py
@@ -15,7 +15,7 @@
 
 
 class LazyLLMOnlineChatLLM(BaseLLM):
-    """LazyLLM OnlineChat backend."""
+    """LazyLLM OnlineChat backend as a unified supplier interface."""
 
     def __init__(self, config: LazyLLMOnlineChatConfig):
         self.config = config
@@ -42,7 +42,17 @@ def __init__(self, config: LazyLLMOnlineChatConfig):
         if config.extra_kwargs:
             module_kwargs.update(config.extra_kwargs)
 
-        self.client = lazyllm.namespace(config.namespace).OnlineChatModule(**module_kwargs)
+        try:
+            self.client = lazyllm.namespace(config.namespace).OnlineChatModule(**module_kwargs)
+        except Exception as exc:
+            if "Unsupported source" in str(exc):
+                raise ValueError(
+                    f"Unsupported LazyLLM source '{config.source}'. "
+                    "MemOS uses LazyLLM as a unified supplier interface. "
+                    "Please use a source supported by LazyLLM, or open an issue/PR in "
+                    "https://github.com/LazyAGI/LazyLLM"
+                ) from exc
+            raise
         logger.info("LazyLLM OnlineChat LLM instance initialized")
 
     def _normalize_messages(self, messages: MessageList | str) -> MessageList:
diff --git a/tests/llms/test_lazyllm_onlinechat.py b/tests/llms/test_lazyllm_onlinechat.py
index 69eca2da7..94442d213 100644
--- a/tests/llms/test_lazyllm_onlinechat.py
+++ b/tests/llms/test_lazyllm_onlinechat.py
@@ -82,3 +82,29 @@ def test_generate_with_tool_calls(self):
                 sys.modules.pop("lazyllm", None)
             else:
                 sys.modules["lazyllm"] = original_lazyllm
+
+    def test_init_with_unsupported_source(self):
+        """Test unsupported source message from LazyLLM."""
+        mock_namespace_module = SimpleNamespace(
+            OnlineChatModule=MagicMock(side_effect=AssertionError("Unsupported source: unknown"))
+        )
+        mock_lazyllm = SimpleNamespace(namespace=MagicMock(return_value=mock_namespace_module))
+        original_lazyllm = sys.modules.get("lazyllm")
+        sys.modules["lazyllm"] = mock_lazyllm
+        try:
+            config = LLMConfigFactory.model_validate(
+                {
+                    "backend": "lazyllm",
+                    "config": {
+                        "model_name_or_path": "test-model",
+                        "source": "unknown",
+                    },
+                }
+            )
+            with self.assertRaisesRegex(ValueError, "MemOS uses LazyLLM as a unified supplier interface"):
+                LLMFactory.from_config(config)
+        finally:
+            if original_lazyllm is None:
+                sys.modules.pop("lazyllm", None)
+            else:
+                sys.modules["lazyllm"] = original_lazyllm

From ddbfeb7d3ba67bfb0b027e6b5ec1f9fe2d76d611 Mon Sep 17 00:00:00 2001
From: Yuang-Deng <lndengyuang@gmail.com>
Date: Fri, 27 Feb 2026 14:55:48 +0800
Subject: [PATCH 5/7] refactor(llm): align lazyllm namespace and api-key prefix
 to MOS

---
 docker/.env.example                   | 8 ++++----
 src/memos/api/config.py               | 4 ++--
 src/memos/configs/llm.py              | 2 +-
 tests/configs/test_llm.py             | 2 +-
 tests/llms/test_lazyllm_onlinechat.py | 4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/docker/.env.example b/docker/.env.example
index 622afad3f..3fc85a825 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -31,15 +31,15 @@ OPENAI_API_BASE=https://api.openai.com/v1  # [required] base for the key
 # LazyLLM backend options (optional, used when provider=lazyllm)
 # MemOS delegates supplier support to LazyLLM. Any source supported by LazyLLM can be used.
 # Example values for MOS_LAZYLLM_SOURCE: openai / qwen / glm / deepseek / kimi / siliconflow / minimax / ...
-MOS_LAZYLLM_NAMESPACE=memos
+MOS_LAZYLLM_NAMESPACE=mos
 MOS_LAZYLLM_SOURCE=openai
 MOS_LAZYLLM_API_BASE=${OPENAI_API_BASE}
 MOS_LAZYLLM_STREAM=false
 MOS_LAZYLLM_SKIP_AUTH=false
 # Optional override to force a single key (bypasses namespace supplier key lookup)
-MOS_MEMOS_API_KEY=
-# LazyLLM namespace API keys (namespace = memos => MEMOS_*_API_KEY)
-MEMOS_OPENAI_API_KEY=${OPENAI_API_KEY}
+MOS_LAZYLLM_API_KEY=
+# LazyLLM namespace API keys (namespace = mos => MOS_*_API_KEY)
+MOS_OPENAI_API_KEY=${OPENAI_API_KEY}
 
 ## MemReader / retrieval LLM
 MEMRADER_MODEL=gpt-4o-mini
diff --git a/src/memos/api/config.py b/src/memos/api/config.py
index 85c774d4e..8b9d43810 100644
--- a/src/memos/api/config.py
+++ b/src/memos/api/config.py
@@ -307,9 +307,9 @@ def lazyllm_config() -> dict[str, Any]:
             "top_p": float(os.getenv("MOS_TOP_P", "0.9")),
             "top_k": int(os.getenv("MOS_TOP_K", "50")),
             "remove_think_prefix": True,
-            "namespace": os.getenv("MOS_LAZYLLM_NAMESPACE", "memos"),
+            "namespace": os.getenv("MOS_LAZYLLM_NAMESPACE", "mos"),
             "source": os.getenv("MOS_LAZYLLM_SOURCE", "openai"),
-            "api_key": os.getenv("MOS_MEMOS_API_KEY"),
+            "api_key": os.getenv("MOS_LAZYLLM_API_KEY"),
             "api_base": os.getenv("MOS_LAZYLLM_API_BASE"),
             "stream": os.getenv("MOS_LAZYLLM_STREAM", "false").lower() == "true",
             "skip_auth": os.getenv("MOS_LAZYLLM_SKIP_AUTH", "false").lower() == "true",
diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py
index b69d015cf..5aaf39216 100644
--- a/src/memos/configs/llm.py
+++ b/src/memos/configs/llm.py
@@ -120,7 +120,7 @@ class VLLMLLMConfig(BaseLLMConfig):
 
 
 class LazyLLMOnlineChatConfig(BaseLLMConfig):
-    namespace: str = Field(default="memos", description="LazyLLM config namespace")
+    namespace: str = Field(default="mos", description="LazyLLM config namespace")
     source: str = Field(
         default="openai",
         description="LazyLLM supplier source name (for example: openai/qwen/glm/deepseek)",
diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py
index 7d17c275f..64d4fad82 100644
--- a/tests/configs/test_llm.py
+++ b/tests/configs/test_llm.py
@@ -169,7 +169,7 @@ def test_lazyllm_online_chat_config():
         LazyLLMOnlineChatConfig,
         {
             "model_name_or_path": "gpt-4o-mini",
-            "namespace": "memos",
+            "namespace": "mos",
             "source": "openai",
             "api_key": "sk-test",
             "api_base": "https://api.openai.com/v1",
diff --git a/tests/llms/test_lazyllm_onlinechat.py b/tests/llms/test_lazyllm_onlinechat.py
index 94442d213..065104475 100644
--- a/tests/llms/test_lazyllm_onlinechat.py
+++ b/tests/llms/test_lazyllm_onlinechat.py
@@ -27,14 +27,14 @@ def test_generate_with_mocked_lazyllm_backend(self):
                         "source": "openai",
                         "api_key": "sk-xxxx",
                         "api_base": "https://api.openai.com/v1",
-                        "namespace": "memos",
+                        "namespace": "mos",
                     },
                 }
             )
             llm = LLMFactory.from_config(config)
             response = llm.generate([{"role": "user", "content": "hello"}])
             self.assertEqual(response, "Hello from LazyLLM")
-            mock_lazyllm.namespace.assert_called_once_with("memos")
+            mock_lazyllm.namespace.assert_called_once_with("mos")
         finally:
             if original_lazyllm is None:
                 sys.modules.pop("lazyllm", None)

From becb4f1a438e46e517ccaf9669a25fff72ffa32b Mon Sep 17 00:00:00 2001
From: Yuang-Deng <lndengyuang@gmail.com>
Date: Fri, 27 Feb 2026 15:15:30 +0800
Subject: [PATCH 6/7] refactor(llm): remove single-key override for lazyllm
 namespace auth

---
 docker/.env.example     | 2 --
 src/memos/api/config.py | 1 -
 2 files changed, 3 deletions(-)

diff --git a/docker/.env.example b/docker/.env.example
index 3fc85a825..4db15f4b6 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -36,8 +36,6 @@ MOS_LAZYLLM_SOURCE=openai
 MOS_LAZYLLM_API_BASE=${OPENAI_API_BASE}
 MOS_LAZYLLM_STREAM=false
 MOS_LAZYLLM_SKIP_AUTH=false
-# Optional override to force a single key (bypasses namespace supplier key lookup)
-MOS_LAZYLLM_API_KEY=
 # LazyLLM namespace API keys (namespace = mos => MOS_*_API_KEY)
 MOS_OPENAI_API_KEY=${OPENAI_API_KEY}
 
diff --git a/src/memos/api/config.py b/src/memos/api/config.py
index 8b9d43810..7c897e804 100644
--- a/src/memos/api/config.py
+++ b/src/memos/api/config.py
@@ -309,7 +309,6 @@ def lazyllm_config() -> dict[str, Any]:
             "remove_think_prefix": True,
             "namespace": os.getenv("MOS_LAZYLLM_NAMESPACE", "mos"),
             "source": os.getenv("MOS_LAZYLLM_SOURCE", "openai"),
-            "api_key": os.getenv("MOS_LAZYLLM_API_KEY"),
             "api_base": os.getenv("MOS_LAZYLLM_API_BASE"),
             "stream": os.getenv("MOS_LAZYLLM_STREAM", "false").lower() == "true",
             "skip_auth": os.getenv("MOS_LAZYLLM_SKIP_AUTH", "false").lower() == "true",

From 7591cc756e242a5eeb6fa2d5a0e322407a9852c9 Mon Sep 17 00:00:00 2001
From: Yuang-Deng <lndengyuang@gmail.com>
Date: Fri, 27 Feb 2026 15:45:34 +0800
Subject: [PATCH 7/7] refactor(llm): hardcode lazyllm namespace and remove
 MOS_LAZYLLM env knobs

---
 docker/.env.example                   | 13 ++++---------
 src/memos/api/config.py               |  5 -----
 src/memos/configs/llm.py              |  5 ++---
 src/memos/llms/lazyllm_onlinechat.py  | 16 +++++++++-------
 tests/configs/test_llm.py             |  2 --
 tests/llms/test_lazyllm_onlinechat.py |  1 -
 6 files changed, 15 insertions(+), 27 deletions(-)

diff --git a/docker/.env.example b/docker/.env.example
index 4db15f4b6..17dd85776 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -28,16 +28,11 @@ MOS_TOP_P=0.9
 MOS_CHAT_MODEL_PROVIDER=openai             # openai | huggingface | vllm | lazyllm
 OPENAI_API_KEY=sk-xxx                      # [required] when provider=openai
 OPENAI_API_BASE=https://api.openai.com/v1  # [required] base for the key
-# LazyLLM backend options (optional, used when provider=lazyllm)
-# MemOS delegates supplier support to LazyLLM. Any source supported by LazyLLM can be used.
-# Example values for MOS_LAZYLLM_SOURCE: openai / qwen / glm / deepseek / kimi / siliconflow / minimax / ...
-MOS_LAZYLLM_NAMESPACE=mos
-MOS_LAZYLLM_SOURCE=openai
-MOS_LAZYLLM_API_BASE=${OPENAI_API_BASE}
-MOS_LAZYLLM_STREAM=false
-MOS_LAZYLLM_SKIP_AUTH=false
-# LazyLLM namespace API keys (namespace = mos => MOS_*_API_KEY)
+# LazyLLM backend (provider=lazyllm): configure supplier keys under MOS namespace.
+# The namespace is fixed to `mos`, so key pattern is MOS_<SOURCE>_API_KEY.
 MOS_OPENAI_API_KEY=${OPENAI_API_KEY}
+MOS_QWEN_API_KEY=
+MOS_DEEPSEEK_API_KEY=
 
 ## MemReader / retrieval LLM
 MEMRADER_MODEL=gpt-4o-mini
diff --git a/src/memos/api/config.py b/src/memos/api/config.py
index 7c897e804..7bfc30e13 100644
--- a/src/memos/api/config.py
+++ b/src/memos/api/config.py
@@ -307,11 +307,6 @@ def lazyllm_config() -> dict[str, Any]:
             "top_p": float(os.getenv("MOS_TOP_P", "0.9")),
             "top_k": int(os.getenv("MOS_TOP_K", "50")),
             "remove_think_prefix": True,
-            "namespace": os.getenv("MOS_LAZYLLM_NAMESPACE", "mos"),
-            "source": os.getenv("MOS_LAZYLLM_SOURCE", "openai"),
-            "api_base": os.getenv("MOS_LAZYLLM_API_BASE"),
-            "stream": os.getenv("MOS_LAZYLLM_STREAM", "false").lower() == "true",
-            "skip_auth": os.getenv("MOS_LAZYLLM_SKIP_AUTH", "false").lower() == "true",
         }
 
     @staticmethod
diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py
index 5aaf39216..9e2e2b140 100644
--- a/src/memos/configs/llm.py
+++ b/src/memos/configs/llm.py
@@ -120,9 +120,8 @@ class VLLMLLMConfig(BaseLLMConfig):
 
 
 class LazyLLMOnlineChatConfig(BaseLLMConfig):
-    namespace: str = Field(default="mos", description="LazyLLM config namespace")
-    source: str = Field(
-        default="openai",
+    source: str | None = Field(
+        default=None,
         description="LazyLLM supplier source name (for example: openai/qwen/glm/deepseek)",
     )
     api_key: str | None = Field(default=None, description="API key for LazyLLM online source")
diff --git a/src/memos/llms/lazyllm_onlinechat.py b/src/memos/llms/lazyllm_onlinechat.py
index 42b47a9e1..3c5437441 100644
--- a/src/memos/llms/lazyllm_onlinechat.py
+++ b/src/memos/llms/lazyllm_onlinechat.py
@@ -16,6 +16,7 @@
 
 class LazyLLMOnlineChatLLM(BaseLLM):
     """LazyLLM OnlineChat backend as a unified supplier interface."""
+    _NAMESPACE = "mos"
 
     def __init__(self, config: LazyLLMOnlineChatConfig):
         self.config = config
@@ -27,12 +28,13 @@ def __init__(self, config: LazyLLMOnlineChatConfig):
                 "Install with: pip install 'git+https://github.com/LazyAGI/LazyLLM.git@main'"
             ) from exc
 
-        module_kwargs: dict[str, Any] = {
-            "source": config.source,
-            "model": config.model_name_or_path,
-            "stream": config.stream,
-            "skip_auth": config.skip_auth,
-        }
+        module_kwargs: dict[str, Any] = {"model": config.model_name_or_path}
+        if config.source:
+            module_kwargs["source"] = config.source
+        if config.stream:
+            module_kwargs["stream"] = config.stream
+        if config.skip_auth:
+            module_kwargs["skip_auth"] = config.skip_auth
         if config.api_base:
             module_kwargs["base_url"] = config.api_base
         if config.api_key:
@@ -43,7 +45,7 @@ def __init__(self, config: LazyLLMOnlineChatConfig):
             module_kwargs.update(config.extra_kwargs)
 
         try:
-            self.client = lazyllm.namespace(config.namespace).OnlineChatModule(**module_kwargs)
+            self.client = lazyllm.namespace(self._NAMESPACE).OnlineChatModule(**module_kwargs)
         except Exception as exc:
             if "Unsupported source" in str(exc):
                 raise ValueError(
diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py
index 64d4fad82..5ad84550a 100644
--- a/tests/configs/test_llm.py
+++ b/tests/configs/test_llm.py
@@ -154,7 +154,6 @@ def test_lazyllm_online_chat_config():
             "top_k",
             "remove_think_prefix",
             "default_headers",
-            "namespace",
             "source",
             "api_key",
             "api_base",
@@ -169,7 +168,6 @@ def test_lazyllm_online_chat_config():
         LazyLLMOnlineChatConfig,
         {
             "model_name_or_path": "gpt-4o-mini",
-            "namespace": "mos",
             "source": "openai",
             "api_key": "sk-test",
             "api_base": "https://api.openai.com/v1",
diff --git a/tests/llms/test_lazyllm_onlinechat.py b/tests/llms/test_lazyllm_onlinechat.py
index 065104475..01a0633f3 100644
--- a/tests/llms/test_lazyllm_onlinechat.py
+++ b/tests/llms/test_lazyllm_onlinechat.py
@@ -27,7 +27,6 @@ def test_generate_with_mocked_lazyllm_backend(self):
                         "source": "openai",
                         "api_key": "sk-xxxx",
                         "api_base": "https://api.openai.com/v1",
-                        "namespace": "mos",
                     },
                 }
             )