MemTensor · Yuang-Deng · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026
diff --git a/docker/.env.example b/docker/.env.example
@@ -25,9 +25,14 @@ MOS_MAX_TOKENS=2048
 # Top-P for LLM in the Product API
 MOS_TOP_P=0.9
 # LLM for the Product API backend
-MOS_CHAT_MODEL_PROVIDER=openai             # openai | huggingface | vllm
+MOS_CHAT_MODEL_PROVIDER=openai             # openai | huggingface | vllm | lazyllm
 OPENAI_API_KEY=sk-xxx                      # [required] when provider=openai
 OPENAI_API_BASE=https://api.openai.com/v1  # [required] base for the key
+# LazyLLM backend (provider=lazyllm): configure supplier keys under MOS namespace.
+# The namespace is fixed to `mos`, so key pattern is MOS_<SOURCE>_API_KEY.
+MOS_OPENAI_API_KEY=${OPENAI_API_KEY}
+MOS_QWEN_API_KEY=
+MOS_DEEPSEEK_API_KEY=
 
 ## MemReader / retrieval LLM
 MEMRADER_MODEL=gpt-4o-mini

diff --git a/src/memos/api/config.py b/src/memos/api/config.py
@@ -297,6 +297,18 @@ def vllm_config() -> dict[str, Any]:
             "model_schema": os.getenv("MOS_MODEL_SCHEMA", "memos.configs.llm.VLLMLLMConfig"),
         }
 
+    @staticmethod
+    def lazyllm_config() -> dict[str, Any]:
+        """Get LazyLLM OnlineChat configuration."""
+        return {
+            "model_name_or_path": os.getenv("MOS_CHAT_MODEL", "gpt-4o-mini"),
+            "temperature": float(os.getenv("MOS_CHAT_TEMPERATURE", "0.8")),
+            "max_tokens": int(os.getenv("MOS_MAX_TOKENS", "8000")),
+            "top_p": float(os.getenv("MOS_TOP_P", "0.9")),
+            "top_k": int(os.getenv("MOS_TOP_K", "50")),
+            "remove_think_prefix": True,
+        }
+
     @staticmethod
     def get_activation_config() -> dict[str, Any]:
         """Get Ollama configuration."""
@@ -786,12 +798,14 @@ def get_product_default_config() -> dict[str, Any]:
         openai_config = APIConfig.get_openai_config()
         qwen_config = APIConfig.qwen_config()
         vllm_config = APIConfig.vllm_config()
+        lazyllm_config = APIConfig.lazyllm_config()
         reader_config = APIConfig.get_reader_config()
 
         backend_model = {
             "openai": openai_config,
             "huggingface": qwen_config,
             "vllm": vllm_config,
+            "lazyllm": lazyllm_config,
         }
         backend = os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai")
         mysql_config = APIConfig.get_mysql_config()
@@ -905,13 +919,15 @@ def create_user_config(user_name: str, user_id: str) -> tuple["MOSConfig", "Gene
         openai_config = APIConfig.get_openai_config()
         qwen_config = APIConfig.qwen_config()
         vllm_config = APIConfig.vllm_config()
+        lazyllm_config = APIConfig.lazyllm_config()
         mysql_config = APIConfig.get_mysql_config()
         reader_config = APIConfig.get_reader_config()
         backend = os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai")
         backend_model = {
             "openai": openai_config,
             "huggingface": qwen_config,
             "vllm": vllm_config,
+            "lazyllm": lazyllm_config,
         }
         # Create MOSConfig
         config_dict = {

diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py
@@ -119,6 +119,21 @@ class VLLMLLMConfig(BaseLLMConfig):
     extra_body: Any = Field(default=None, description="Extra options for API")
 
 
+class LazyLLMOnlineChatConfig(BaseLLMConfig):
+    source: str | None = Field(
+        default=None,
+        description="LazyLLM supplier source name (for example: openai/qwen/glm/deepseek)",
+    )
+    api_key: str | None = Field(default=None, description="API key for LazyLLM online source")
+    api_base: str | None = Field(default=None, description="Base URL for LazyLLM online source")
+    stream: bool = Field(default=False, description="Enable stream mode in LazyLLM module")
+    skip_auth: bool = Field(default=False, description="Skip LazyLLM API key validation")
+    type: str | None = Field(default=None, description="Optional model type for LazyLLM module")
+    extra_kwargs: dict[str, Any] | None = Field(
+        default=None, description="Extra kwargs for lazyllm.OnlineChatModule"
+    )
+
+
 class LLMConfigFactory(BaseConfig):
     """Factory class for creating LLM configurations."""
 
@@ -135,6 +150,7 @@ class LLMConfigFactory(BaseConfig):
         "qwen": QwenLLMConfig,
         "deepseek": DeepSeekLLMConfig,
         "openai_new": OpenAIResponsesLLMConfig,
+        "lazyllm": LazyLLMOnlineChatConfig,
     }
 
     @field_validator("backend")

diff --git a/src/memos/llms/factory.py b/src/memos/llms/factory.py
@@ -5,6 +5,7 @@
 from memos.llms.deepseek import DeepSeekLLM
 from memos.llms.hf import HFLLM
 from memos.llms.hf_singleton import HFSingletonLLM
+from memos.llms.lazyllm_onlinechat import LazyLLMOnlineChatLLM
 from memos.llms.ollama import OllamaLLM
 from memos.llms.openai import AzureLLM, OpenAILLM
 from memos.llms.openai_new import OpenAIResponsesLLM
@@ -26,6 +27,7 @@ class LLMFactory(BaseLLM):
         "qwen": QwenLLM,
         "deepseek": DeepSeekLLM,
         "openai_new": OpenAIResponsesLLM,
+        "lazyllm": LazyLLMOnlineChatLLM,
     }
 
     @classmethod

diff --git a/src/memos/llms/lazyllm_onlinechat.py b/src/memos/llms/lazyllm_onlinechat.py
@@ -0,0 +1,125 @@
+import json
+
+from collections.abc import Generator
+from contextlib import suppress
+from typing import Any
+
+from memos.configs.llm import LazyLLMOnlineChatConfig
+from memos.llms.base import BaseLLM
+from memos.llms.utils import remove_thinking_tags
+from memos.log import get_logger
+from memos.types import MessageList
+
+
+logger = get_logger(__name__)
+
+
+class LazyLLMOnlineChatLLM(BaseLLM):
+    """LazyLLM OnlineChat backend as a unified supplier interface."""
+    _NAMESPACE = "mos"
+
+    def __init__(self, config: LazyLLMOnlineChatConfig):
+        self.config = config
+        try:
+            import lazyllm
+        except ImportError as exc:
+            raise ImportError(
+                "LazyLLM backend requires `lazyllm`. "
+                "Install with: pip install 'git+https://github.com/LazyAGI/LazyLLM.git@main'"
+            ) from exc
+
+        module_kwargs: dict[str, Any] = {"model": config.model_name_or_path}
+        if config.source:
+            module_kwargs["source"] = config.source
+        if config.stream:
+            module_kwargs["stream"] = config.stream
+        if config.skip_auth:
+            module_kwargs["skip_auth"] = config.skip_auth
+        if config.api_base:
+            module_kwargs["base_url"] = config.api_base
+        if config.api_key:
+            module_kwargs["api_key"] = config.api_key
+        if config.type:
+            module_kwargs["type"] = config.type
+        if config.extra_kwargs:
+            module_kwargs.update(config.extra_kwargs)
+
+        try:
+            self.client = lazyllm.namespace(self._NAMESPACE).OnlineChatModule(**module_kwargs)
+        except Exception as exc:
+            if "Unsupported source" in str(exc):
+                raise ValueError(
+                    f"Unsupported LazyLLM source '{config.source}'. "
+                    "MemOS uses LazyLLM as a unified supplier interface. "
+                    "Please use a source supported by LazyLLM, or open an issue/PR in "
+                    "https://github.com/LazyAGI/LazyLLM"
+                ) from exc
+            raise
+        logger.info("LazyLLM OnlineChat LLM instance initialized")
+
+    def _normalize_messages(self, messages: MessageList | str) -> MessageList:
+        if isinstance(messages, str):
+            return [{"role": "user", "content": messages}]
+        return messages
+
+    def generate(self, messages: MessageList | str, **kwargs) -> str | list[dict]:
+        normalized_messages = self._normalize_messages(messages)
+        runtime_model = kwargs.get("model_name_or_path", self.config.model_name_or_path)
+
+        request_kwargs: dict[str, Any] = {
+            "messages": normalized_messages,
+            "stream_output": False,
+            "model_name": runtime_model,
+            "temperature": kwargs.get("temperature", self.config.temperature),
+            "max_tokens": kwargs.get("max_tokens", self.config.max_tokens),
+            "top_p": kwargs.get("top_p", self.config.top_p),
+            "top_k": kwargs.get("top_k", self.config.top_k),
+        }
+        if kwargs.get("tools"):
+            request_kwargs["tools"] = kwargs["tools"]
+
+        response = self.client("", **request_kwargs)
+        if isinstance(response, dict):
+            tool_calls = response.get("tool_calls")
+            if isinstance(tool_calls, list) and len(tool_calls) > 0:
+                return self.tool_call_parser(tool_calls)
+            response_content = response.get("content", "")
+            reasoning_content = response.get("reasoning_content")
+            if isinstance(reasoning_content, str) and reasoning_content:
+                reasoning_content = f"<think>{reasoning_content}</think>"
+            if self.config.remove_think_prefix:
+                return remove_thinking_tags(response_content)
+            if reasoning_content:
+                return reasoning_content + (response_content or "")
+            return response_content or ""
+        if isinstance(response, str):
+            return remove_thinking_tags(response) if self.config.remove_think_prefix else response
+        return str(response)
+
+    def generate_stream(self, messages: MessageList | str, **kwargs) -> Generator[str, None, None]:
+        if kwargs.get("tools"):
+            logger.info("stream api not support tools")
+            return
+
+        response = self.generate(messages, **kwargs)
+        if isinstance(response, str):
+            yield response
+            return
+        yield json.dumps(response, ensure_ascii=False)
+
+    def tool_call_parser(self, tool_calls: list[dict]) -> list[dict]:
+        parsed_calls = []
+        for tool_call in tool_calls:
+            function_data = tool_call.get("function", {})
+            arguments = function_data.get("arguments", {})
+            if isinstance(arguments, str):
+                with suppress(json.JSONDecodeError):
+                    arguments = json.loads(arguments)
+            parsed_calls.append(
+                {
+                    "tool_call_id": tool_call.get("id", ""),
+                    "function_name": function_data.get("name", ""),
+                    "arguments": arguments,
+                }
+            )
+        return parsed_calls
diff --git a/src/memos/mem_os/product.py b/src/memos/mem_os/product.py
@@ -1201,8 +1201,10 @@ def chat_with_references(
                 )
             elif self.config.chat_model.backend == "vllm":
                 response_stream = self.chat_llm.generate_stream(current_messages)
+            else:
+                response_stream = self.chat_llm.generate_stream(current_messages)
         else:
-            if self.config.chat_model.backend in ["huggingface", "vllm", "openai"]:
+            if self.config.chat_model.backend in ["huggingface", "vllm", "openai", "lazyllm"]:
                 response_stream = self.chat_llm.generate_stream(current_messages)
             else:
                 response_stream = self.chat_llm.generate(current_messages)
@@ -1219,7 +1221,7 @@ def chat_with_references(
         full_response = ""
         token_count = 0
         # Use tiktoken for proper token-based chunking
-        if self.config.chat_model.backend not in ["huggingface", "vllm", "openai"]:
+        if self.config.chat_model.backend not in ["huggingface", "vllm", "openai", "lazyllm"]:
             # For non-huggingface backends, we need to collect the full response first
             full_response_text = ""
             for chunk in response_stream:

diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py
@@ -1,6 +1,7 @@
 from memos.configs.llm import (
     BaseLLMConfig,
     HFLLMConfig,
+    LazyLLMOnlineChatConfig,
     LLMConfigFactory,
     OllamaLLMConfig,
     OpenAILLMConfig,
@@ -140,10 +141,47 @@ def test_hf_llm_config():
     check_config_instantiation_invalid(HFLLMConfig)
 
 
+def test_lazyllm_online_chat_config():
+    check_config_base_class(
+        LazyLLMOnlineChatConfig,
+        required_fields=[
+            "model_name_or_path",
+        ],
+        optional_fields=[
+            "temperature",
+            "max_tokens",
+            "top_p",
+            "top_k",
+            "remove_think_prefix",
+            "default_headers",
+            "source",
+            "api_key",
+            "api_base",
+            "stream",
+            "skip_auth",
+            "type",
+            "extra_kwargs",
+        ],
+    )
+
+    check_config_instantiation_valid(
+        LazyLLMOnlineChatConfig,
+        {
+            "model_name_or_path": "gpt-4o-mini",
+            "source": "openai",
+            "api_key": "sk-test",
+            "api_base": "https://api.openai.com/v1",
+            "stream": False,
+        },
+    )
+
+    check_config_instantiation_invalid(LazyLLMOnlineChatConfig)
+
+
 def test_llm_config_factory():
     check_config_factory_class(
         LLMConfigFactory,
-        expected_backends=["openai", "ollama", "huggingface"],
+        expected_backends=["openai", "ollama", "huggingface", "lazyllm"],
     )
 
     check_config_instantiation_valid(