From ebd72ce127c868b8034c4e1699d0aa11058ac84b Mon Sep 17 00:00:00 2001 From: Yuang-Deng Date: Fri, 27 Feb 2026 14:22:18 +0800 Subject: [PATCH 1/7] feat(llm): add lazyllm onlinechat backend for chat model provider --- docker/.env.example | 8 +- src/memos/api/config.py | 21 +++++ src/memos/configs/llm.py | 13 +++ src/memos/llms/factory.py | 2 + src/memos/llms/lazyllm_onlinechat.py | 114 ++++++++++++++++++++++++++ src/memos/mem_os/product.py | 6 +- tests/configs/test_llm.py | 40 ++++++++- tests/llms/test_lazyllm_onlinechat.py | 80 ++++++++++++++++++ 8 files changed, 280 insertions(+), 4 deletions(-) create mode 100644 src/memos/llms/lazyllm_onlinechat.py create mode 100644 tests/llms/test_lazyllm_onlinechat.py diff --git a/docker/.env.example b/docker/.env.example index 3674cd69b..46f03ca8b 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -25,9 +25,15 @@ MOS_MAX_TOKENS=2048 # Top-P for LLM in the Product API MOS_TOP_P=0.9 # LLM for the Product API backend -MOS_CHAT_MODEL_PROVIDER=openai # openai | huggingface | vllm +MOS_CHAT_MODEL_PROVIDER=openai # openai | huggingface | vllm | lazyllm OPENAI_API_KEY=sk-xxx # [required] when provider=openai OPENAI_API_BASE=https://api.openai.com/v1 # [required] base for the key +# LazyLLM backend options (optional, used when provider=lazyllm) +MOS_LAZYLLM_SOURCE=openai +MOS_LAZYLLM_API_KEY=${OPENAI_API_KEY} +MOS_LAZYLLM_API_BASE=${OPENAI_API_BASE} +MOS_LAZYLLM_STREAM=false +MOS_LAZYLLM_SKIP_AUTH=false ## MemReader / retrieval LLM MEMRADER_MODEL=gpt-4o-mini diff --git a/src/memos/api/config.py b/src/memos/api/config.py index 65049b0c2..e4613b08f 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -297,6 +297,23 @@ def vllm_config() -> dict[str, Any]: "model_schema": os.getenv("MOS_MODEL_SCHEMA", "memos.configs.llm.VLLMLLMConfig"), } + @staticmethod + def lazyllm_config() -> dict[str, Any]: + """Get LazyLLM OnlineChat configuration.""" + return { + "model_name_or_path": os.getenv("MOS_CHAT_MODEL", "gpt-4o-mini"), + "temperature": float(os.getenv("MOS_CHAT_TEMPERATURE", "0.8")), + "max_tokens": int(os.getenv("MOS_MAX_TOKENS", "8000")), + "top_p": float(os.getenv("MOS_TOP_P", "0.9")), + "top_k": int(os.getenv("MOS_TOP_K", "50")), + "remove_think_prefix": True, + "source": os.getenv("MOS_LAZYLLM_SOURCE", "openai"), + "api_key": os.getenv("MOS_LAZYLLM_API_KEY", os.getenv("OPENAI_API_KEY")), + "api_base": os.getenv("MOS_LAZYLLM_API_BASE", os.getenv("OPENAI_API_BASE")), + "stream": os.getenv("MOS_LAZYLLM_STREAM", "false").lower() == "true", + "skip_auth": os.getenv("MOS_LAZYLLM_SKIP_AUTH", "false").lower() == "true", + } + @staticmethod def get_activation_config() -> dict[str, Any]: """Get Ollama configuration.""" @@ -786,12 +803,14 @@ def get_product_default_config() -> dict[str, Any]: openai_config = APIConfig.get_openai_config() qwen_config = APIConfig.qwen_config() vllm_config = APIConfig.vllm_config() + lazyllm_config = APIConfig.lazyllm_config() reader_config = APIConfig.get_reader_config() backend_model = { "openai": openai_config, "huggingface": qwen_config, "vllm": vllm_config, + "lazyllm": lazyllm_config, } backend = os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai") mysql_config = APIConfig.get_mysql_config() @@ -905,6 +924,7 @@ def create_user_config(user_name: str, user_id: str) -> tuple["MOSConfig", "Gene openai_config = APIConfig.get_openai_config() qwen_config = APIConfig.qwen_config() vllm_config = APIConfig.vllm_config() + lazyllm_config = APIConfig.lazyllm_config() mysql_config = APIConfig.get_mysql_config() reader_config = APIConfig.get_reader_config() backend = os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai") @@ -912,6 +932,7 @@ def create_user_config(user_name: str, user_id: str) -> tuple["MOSConfig", "Gene "openai": openai_config, "huggingface": qwen_config, "vllm": vllm_config, + "lazyllm": lazyllm_config, } # Create MOSConfig config_dict = { diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py index 5487d117c..da03ef3fa 100644 --- a/src/memos/configs/llm.py +++ b/src/memos/configs/llm.py @@ -119,6 +119,18 @@ class VLLMLLMConfig(BaseLLMConfig): extra_body: Any = Field(default=None, description="Extra options for API") +class LazyLLMOnlineChatConfig(BaseLLMConfig): + source: str = Field(default="openai", description="LazyLLM online source name") + api_key: str | None = Field(default=None, description="API key for LazyLLM online source") + api_base: str | None = Field(default=None, description="Base URL for LazyLLM online source") + stream: bool = Field(default=False, description="Enable stream mode in LazyLLM module") + skip_auth: bool = Field(default=False, description="Skip LazyLLM API key validation") + type: str | None = Field(default=None, description="Optional model type for LazyLLM module") + extra_kwargs: dict[str, Any] | None = Field( + default=None, description="Extra kwargs for lazyllm.OnlineChatModule" + ) + + class LLMConfigFactory(BaseConfig): """Factory class for creating LLM configurations.""" @@ -135,6 +147,7 @@ class LLMConfigFactory(BaseConfig): "qwen": QwenLLMConfig, "deepseek": DeepSeekLLMConfig, "openai_new": OpenAIResponsesLLMConfig, + "lazyllm": LazyLLMOnlineChatConfig, } @field_validator("backend") diff --git a/src/memos/llms/factory.py b/src/memos/llms/factory.py index 8f4da662f..f85f9a5a3 100644 --- a/src/memos/llms/factory.py +++ b/src/memos/llms/factory.py @@ -5,6 +5,7 @@ from memos.llms.deepseek import DeepSeekLLM from memos.llms.hf import HFLLM from memos.llms.hf_singleton import HFSingletonLLM +from memos.llms.lazyllm_onlinechat import LazyLLMOnlineChatLLM from memos.llms.ollama import OllamaLLM from memos.llms.openai import AzureLLM, OpenAILLM from memos.llms.openai_new import OpenAIResponsesLLM @@ -26,6 +27,7 @@ class LLMFactory(BaseLLM): "qwen": QwenLLM, "deepseek": DeepSeekLLM, "openai_new": OpenAIResponsesLLM, + "lazyllm": LazyLLMOnlineChatLLM, } @classmethod diff --git a/src/memos/llms/lazyllm_onlinechat.py b/src/memos/llms/lazyllm_onlinechat.py new file mode 100644 index 000000000..5325c84f6 --- /dev/null +++ b/src/memos/llms/lazyllm_onlinechat.py @@ -0,0 +1,114 @@ +import json + +from collections.abc import Generator +from typing import Any + +from memos.configs.llm import LazyLLMOnlineChatConfig +from memos.llms.base import BaseLLM +from memos.llms.utils import remove_thinking_tags +from memos.log import get_logger +from memos.types import MessageList + + +logger = get_logger(__name__) + + +class LazyLLMOnlineChatLLM(BaseLLM): + """LazyLLM OnlineChat backend.""" + + def __init__(self, config: LazyLLMOnlineChatConfig): + self.config = config + try: + import lazyllm + except ImportError as exc: + raise ImportError( + "LazyLLM backend requires `lazyllm`. " + "Install with: pip install 'git+https://github.com/LazyAGI/LazyLLM.git@main'" + ) from exc + + module_kwargs: dict[str, Any] = { + "source": config.source, + "model": config.model_name_or_path, + "stream": config.stream, + "skip_auth": config.skip_auth, + } + if config.api_base: + module_kwargs["base_url"] = config.api_base + if config.api_key: + module_kwargs["api_key"] = config.api_key + if config.type: + module_kwargs["type"] = config.type + if config.extra_kwargs: + module_kwargs.update(config.extra_kwargs) + + self.client = lazyllm.OnlineChatModule(**module_kwargs) + logger.info("LazyLLM OnlineChat LLM instance initialized") + + def _normalize_messages(self, messages: MessageList | str) -> MessageList: + if isinstance(messages, str): + return [{"role": "user", "content": messages}] + return messages + + def generate(self, messages: MessageList | str, **kwargs) -> str | list[dict]: + normalized_messages = self._normalize_messages(messages) + runtime_model = kwargs.get("model_name_or_path", self.config.model_name_or_path) + + request_kwargs: dict[str, Any] = { + "messages": normalized_messages, + "stream_output": False, + "model_name": runtime_model, + "temperature": kwargs.get("temperature", self.config.temperature), + "max_tokens": kwargs.get("max_tokens", self.config.max_tokens), + "top_p": kwargs.get("top_p", self.config.top_p), + "top_k": kwargs.get("top_k", self.config.top_k), + } + if kwargs.get("tools"): + request_kwargs["tools"] = kwargs["tools"] + + response = self.client("", **request_kwargs) + if isinstance(response, dict): + tool_calls = response.get("tool_calls") + if isinstance(tool_calls, list) and len(tool_calls) > 0: + return self.tool_call_parser(tool_calls) + response_content = response.get("content", "") + reasoning_content = response.get("reasoning_content") + if isinstance(reasoning_content, str) and reasoning_content: + reasoning_content = f"{reasoning_content}" + if self.config.remove_think_prefix: + return remove_thinking_tags(response_content) + if reasoning_content: + return reasoning_content + (response_content or "") + return response_content or "" + if isinstance(response, str): + return remove_thinking_tags(response) if self.config.remove_think_prefix else response + return str(response) + + def generate_stream(self, messages: MessageList | str, **kwargs) -> Generator[str, None, None]: + if kwargs.get("tools"): + logger.info("stream api not support tools") + return + + response = self.generate(messages, **kwargs) + if isinstance(response, str): + yield response + return + yield json.dumps(response, ensure_ascii=False) + + def tool_call_parser(self, tool_calls: list[dict]) -> list[dict]: + parsed_calls = [] + for tool_call in tool_calls: + function_data = tool_call.get("function", {}) + arguments = function_data.get("arguments", {}) + if isinstance(arguments, str): + try: + arguments = json.loads(arguments) + except json.JSONDecodeError: + pass + parsed_calls.append( + { + "tool_call_id": tool_call.get("id", ""), + "function_name": function_data.get("name", ""), + "arguments": arguments, + } + ) + return parsed_calls diff --git a/src/memos/mem_os/product.py b/src/memos/mem_os/product.py index b2c74c384..1e5e48f49 100644 --- a/src/memos/mem_os/product.py +++ b/src/memos/mem_os/product.py @@ -1201,8 +1201,10 @@ def chat_with_references( ) elif self.config.chat_model.backend == "vllm": response_stream = self.chat_llm.generate_stream(current_messages) + else: + response_stream = self.chat_llm.generate_stream(current_messages) else: - if self.config.chat_model.backend in ["huggingface", "vllm", "openai"]: + if self.config.chat_model.backend in ["huggingface", "vllm", "openai", "lazyllm"]: response_stream = self.chat_llm.generate_stream(current_messages) else: response_stream = self.chat_llm.generate(current_messages) @@ -1219,7 +1221,7 @@ def chat_with_references( full_response = "" token_count = 0 # Use tiktoken for proper token-based chunking - if self.config.chat_model.backend not in ["huggingface", "vllm", "openai"]: + if self.config.chat_model.backend not in ["huggingface", "vllm", "openai", "lazyllm"]: # For non-huggingface backends, we need to collect the full response first full_response_text = "" for chunk in response_stream: diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py index 6562c9a95..5ad84550a 100644 --- a/tests/configs/test_llm.py +++ b/tests/configs/test_llm.py @@ -1,6 +1,7 @@ from memos.configs.llm import ( BaseLLMConfig, HFLLMConfig, + LazyLLMOnlineChatConfig, LLMConfigFactory, OllamaLLMConfig, OpenAILLMConfig, @@ -140,10 +141,47 @@ def test_hf_llm_config(): check_config_instantiation_invalid(HFLLMConfig) +def test_lazyllm_online_chat_config(): + check_config_base_class( + LazyLLMOnlineChatConfig, + required_fields=[ + "model_name_or_path", + ], + optional_fields=[ + "temperature", + "max_tokens", + "top_p", + "top_k", + "remove_think_prefix", + "default_headers", + "source", + "api_key", + "api_base", + "stream", + "skip_auth", + "type", + "extra_kwargs", + ], + ) + + check_config_instantiation_valid( + LazyLLMOnlineChatConfig, + { + "model_name_or_path": "gpt-4o-mini", + "source": "openai", + "api_key": "sk-test", + "api_base": "https://api.openai.com/v1", + "stream": False, + }, + ) + + check_config_instantiation_invalid(LazyLLMOnlineChatConfig) + + def test_llm_config_factory(): check_config_factory_class( LLMConfigFactory, - expected_backends=["openai", "ollama", "huggingface"], + expected_backends=["openai", "ollama", "huggingface", "lazyllm"], ) check_config_instantiation_valid( diff --git a/tests/llms/test_lazyllm_onlinechat.py b/tests/llms/test_lazyllm_onlinechat.py new file mode 100644 index 000000000..59cf2796b --- /dev/null +++ b/tests/llms/test_lazyllm_onlinechat.py @@ -0,0 +1,80 @@ +import sys +import unittest + +from types import SimpleNamespace +from unittest.mock import MagicMock + +from memos.configs.llm import LLMConfigFactory +from memos.llms.factory import LLMFactory + + +class TestLazyLLMOnlineChatBackend(unittest.TestCase): + def test_generate_with_mocked_lazyllm_backend(self): + """Test LLMFactory with mocked lazyllm backend.""" + mock_client = MagicMock() + mock_client.return_value = {"content": "Hello from LazyLLM", "tool_calls": None} + + mock_lazyllm = SimpleNamespace(OnlineChatModule=MagicMock(return_value=mock_client)) + original_lazyllm = sys.modules.get("lazyllm") + sys.modules["lazyllm"] = mock_lazyllm + try: + config = LLMConfigFactory.model_validate( + { + "backend": "lazyllm", + "config": { + "model_name_or_path": "gpt-4o-mini", + "source": "openai", + "api_key": "sk-xxxx", + "api_base": "https://api.openai.com/v1", + }, + } + ) + llm = LLMFactory.from_config(config) + response = llm.generate([{"role": "user", "content": "hello"}]) + self.assertEqual(response, "Hello from LazyLLM") + finally: + if original_lazyllm is None: + sys.modules.pop("lazyllm", None) + else: + sys.modules["lazyllm"] = original_lazyllm + + def test_generate_with_tool_calls(self): + """Test lazyllm tool call parser compatibility.""" + mock_client = MagicMock() + mock_client.return_value = { + "content": None, + "tool_calls": [ + { + "id": "call_1", + "function": {"name": "search", "arguments": '{"query":"memos"}'}, + } + ], + } + + mock_lazyllm = SimpleNamespace(OnlineChatModule=MagicMock(return_value=mock_client)) + original_lazyllm = sys.modules.get("lazyllm") + sys.modules["lazyllm"] = mock_lazyllm + try: + config = LLMConfigFactory.model_validate( + { + "backend": "lazyllm", + "config": {"model_name_or_path": "gpt-4o-mini"}, + } + ) + llm = LLMFactory.from_config(config) + response = llm.generate([{"role": "user", "content": "search memos"}]) + self.assertEqual( + response, + [ + { + "tool_call_id": "call_1", + "function_name": "search", + "arguments": {"query": "memos"}, + } + ], + ) + finally: + if original_lazyllm is None: + sys.modules.pop("lazyllm", None) + else: + sys.modules["lazyllm"] = original_lazyllm From b3202b0f9d2dec01acc0bdd75a95b822c77d5559 Mon Sep 17 00:00:00 2001 From: Yuang-Deng Date: Fri, 27 Feb 2026 14:39:07 +0800 Subject: [PATCH 2/7] fix(llm): align lazyllm backend parser with lint rules --- src/memos/llms/lazyllm_onlinechat.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/memos/llms/lazyllm_onlinechat.py b/src/memos/llms/lazyllm_onlinechat.py index 5325c84f6..fa3f63980 100644 --- a/src/memos/llms/lazyllm_onlinechat.py +++ b/src/memos/llms/lazyllm_onlinechat.py @@ -1,6 +1,7 @@ import json from collections.abc import Generator +from contextlib import suppress from typing import Any from memos.configs.llm import LazyLLMOnlineChatConfig @@ -100,10 +101,8 @@ def tool_call_parser(self, tool_calls: list[dict]) -> list[dict]: function_data = tool_call.get("function", {}) arguments = function_data.get("arguments", {}) if isinstance(arguments, str): - try: + with suppress(json.JSONDecodeError): arguments = json.loads(arguments) - except json.JSONDecodeError: - pass parsed_calls.append( { "tool_call_id": tool_call.get("id", ""), From b4d1c8f432f2a6b3c40b3580b51a7e47f3a6fd3e Mon Sep 17 00:00:00 2001 From: Yuang-Deng Date: Fri, 27 Feb 2026 14:48:31 +0800 Subject: [PATCH 3/7] feat(llm): switch lazyllm api-key namespace prefix to MEMOS --- docker/.env.example | 6 +++++- src/memos/api/config.py | 3 ++- src/memos/configs/llm.py | 1 + src/memos/llms/lazyllm_onlinechat.py | 2 +- tests/configs/test_llm.py | 2 ++ tests/llms/test_lazyllm_onlinechat.py | 8 ++++++-- 6 files changed, 17 insertions(+), 5 deletions(-) diff --git a/docker/.env.example b/docker/.env.example index 46f03ca8b..424800c78 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -29,11 +29,15 @@ MOS_CHAT_MODEL_PROVIDER=openai # openai | huggingface | vllm | lazyl OPENAI_API_KEY=sk-xxx # [required] when provider=openai OPENAI_API_BASE=https://api.openai.com/v1 # [required] base for the key # LazyLLM backend options (optional, used when provider=lazyllm) +MOS_LAZYLLM_NAMESPACE=memos MOS_LAZYLLM_SOURCE=openai -MOS_LAZYLLM_API_KEY=${OPENAI_API_KEY} MOS_LAZYLLM_API_BASE=${OPENAI_API_BASE} MOS_LAZYLLM_STREAM=false MOS_LAZYLLM_SKIP_AUTH=false +# Optional override to force a single key (bypasses namespace supplier key lookup) +MOS_MEMOS_API_KEY= +# LazyLLM namespace API keys (namespace = memos => MEMOS_*_API_KEY) +MEMOS_OPENAI_API_KEY=${OPENAI_API_KEY} ## MemReader / retrieval LLM MEMRADER_MODEL=gpt-4o-mini diff --git a/src/memos/api/config.py b/src/memos/api/config.py index e4613b08f..72ded20ac 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -307,8 +307,9 @@ def lazyllm_config() -> dict[str, Any]: "top_p": float(os.getenv("MOS_TOP_P", "0.9")), "top_k": int(os.getenv("MOS_TOP_K", "50")), "remove_think_prefix": True, + "namespace": os.getenv("MOS_LAZYLLM_NAMESPACE", "memos"), "source": os.getenv("MOS_LAZYLLM_SOURCE", "openai"), - "api_key": os.getenv("MOS_LAZYLLM_API_KEY", os.getenv("OPENAI_API_KEY")), + "api_key": os.getenv("MOS_MEMOS_API_KEY"), "api_base": os.getenv("MOS_LAZYLLM_API_BASE", os.getenv("OPENAI_API_BASE")), "stream": os.getenv("MOS_LAZYLLM_STREAM", "false").lower() == "true", "skip_auth": os.getenv("MOS_LAZYLLM_SKIP_AUTH", "false").lower() == "true", diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py index da03ef3fa..2b54f6240 100644 --- a/src/memos/configs/llm.py +++ b/src/memos/configs/llm.py @@ -120,6 +120,7 @@ class VLLMLLMConfig(BaseLLMConfig): class LazyLLMOnlineChatConfig(BaseLLMConfig): + namespace: str = Field(default="memos", description="LazyLLM config namespace") source: str = Field(default="openai", description="LazyLLM online source name") api_key: str | None = Field(default=None, description="API key for LazyLLM online source") api_base: str | None = Field(default=None, description="Base URL for LazyLLM online source") diff --git a/src/memos/llms/lazyllm_onlinechat.py b/src/memos/llms/lazyllm_onlinechat.py index fa3f63980..cfca16117 100644 --- a/src/memos/llms/lazyllm_onlinechat.py +++ b/src/memos/llms/lazyllm_onlinechat.py @@ -42,7 +42,7 @@ def __init__(self, config: LazyLLMOnlineChatConfig): if config.extra_kwargs: module_kwargs.update(config.extra_kwargs) - self.client = lazyllm.OnlineChatModule(**module_kwargs) + self.client = lazyllm.namespace(config.namespace).OnlineChatModule(**module_kwargs) logger.info("LazyLLM OnlineChat LLM instance initialized") def _normalize_messages(self, messages: MessageList | str) -> MessageList: diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py index 5ad84550a..7d17c275f 100644 --- a/tests/configs/test_llm.py +++ b/tests/configs/test_llm.py @@ -154,6 +154,7 @@ def test_lazyllm_online_chat_config(): "top_k", "remove_think_prefix", "default_headers", + "namespace", "source", "api_key", "api_base", @@ -168,6 +169,7 @@ def test_lazyllm_online_chat_config(): LazyLLMOnlineChatConfig, { "model_name_or_path": "gpt-4o-mini", + "namespace": "memos", "source": "openai", "api_key": "sk-test", "api_base": "https://api.openai.com/v1", diff --git a/tests/llms/test_lazyllm_onlinechat.py b/tests/llms/test_lazyllm_onlinechat.py index 59cf2796b..69eca2da7 100644 --- a/tests/llms/test_lazyllm_onlinechat.py +++ b/tests/llms/test_lazyllm_onlinechat.py @@ -13,8 +13,9 @@ def test_generate_with_mocked_lazyllm_backend(self): """Test LLMFactory with mocked lazyllm backend.""" mock_client = MagicMock() mock_client.return_value = {"content": "Hello from LazyLLM", "tool_calls": None} + mock_namespace_module = SimpleNamespace(OnlineChatModule=MagicMock(return_value=mock_client)) - mock_lazyllm = SimpleNamespace(OnlineChatModule=MagicMock(return_value=mock_client)) + mock_lazyllm = SimpleNamespace(namespace=MagicMock(return_value=mock_namespace_module)) original_lazyllm = sys.modules.get("lazyllm") sys.modules["lazyllm"] = mock_lazyllm try: @@ -26,12 +27,14 @@ def test_generate_with_mocked_lazyllm_backend(self): "source": "openai", "api_key": "sk-xxxx", "api_base": "https://api.openai.com/v1", + "namespace": "memos", }, } ) llm = LLMFactory.from_config(config) response = llm.generate([{"role": "user", "content": "hello"}]) self.assertEqual(response, "Hello from LazyLLM") + mock_lazyllm.namespace.assert_called_once_with("memos") finally: if original_lazyllm is None: sys.modules.pop("lazyllm", None) @@ -50,8 +53,9 @@ def test_generate_with_tool_calls(self): } ], } + mock_namespace_module = SimpleNamespace(OnlineChatModule=MagicMock(return_value=mock_client)) - mock_lazyllm = SimpleNamespace(OnlineChatModule=MagicMock(return_value=mock_client)) + mock_lazyllm = SimpleNamespace(namespace=MagicMock(return_value=mock_namespace_module)) original_lazyllm = sys.modules.get("lazyllm") sys.modules["lazyllm"] = mock_lazyllm try: From 659af6e108f42c3c7b6de191ae88d7a960a88c07 Mon Sep 17 00:00:00 2001 From: Yuang-Deng Date: Fri, 27 Feb 2026 14:53:01 +0800 Subject: [PATCH 4/7] feat(llm): position lazyllm backend as unified supplier interface --- docker/.env.example | 2 ++ src/memos/api/config.py | 2 +- src/memos/configs/llm.py | 5 ++++- src/memos/llms/lazyllm_onlinechat.py | 14 ++++++++++++-- tests/llms/test_lazyllm_onlinechat.py | 26 ++++++++++++++++++++++++++ 5 files changed, 45 insertions(+), 4 deletions(-) diff --git a/docker/.env.example b/docker/.env.example index 424800c78..622afad3f 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -29,6 +29,8 @@ MOS_CHAT_MODEL_PROVIDER=openai # openai | huggingface | vllm | lazyl OPENAI_API_KEY=sk-xxx # [required] when provider=openai OPENAI_API_BASE=https://api.openai.com/v1 # [required] base for the key # LazyLLM backend options (optional, used when provider=lazyllm) +# MemOS delegates supplier support to LazyLLM. Any source supported by LazyLLM can be used. +# Example values for MOS_LAZYLLM_SOURCE: openai / qwen / glm / deepseek / kimi / siliconflow / minimax / ... MOS_LAZYLLM_NAMESPACE=memos MOS_LAZYLLM_SOURCE=openai MOS_LAZYLLM_API_BASE=${OPENAI_API_BASE} diff --git a/src/memos/api/config.py b/src/memos/api/config.py index 72ded20ac..85c774d4e 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -310,7 +310,7 @@ def lazyllm_config() -> dict[str, Any]: "namespace": os.getenv("MOS_LAZYLLM_NAMESPACE", "memos"), "source": os.getenv("MOS_LAZYLLM_SOURCE", "openai"), "api_key": os.getenv("MOS_MEMOS_API_KEY"), - "api_base": os.getenv("MOS_LAZYLLM_API_BASE", os.getenv("OPENAI_API_BASE")), + "api_base": os.getenv("MOS_LAZYLLM_API_BASE"), "stream": os.getenv("MOS_LAZYLLM_STREAM", "false").lower() == "true", "skip_auth": os.getenv("MOS_LAZYLLM_SKIP_AUTH", "false").lower() == "true", } diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py index 2b54f6240..b69d015cf 100644 --- a/src/memos/configs/llm.py +++ b/src/memos/configs/llm.py @@ -121,7 +121,10 @@ class VLLMLLMConfig(BaseLLMConfig): class LazyLLMOnlineChatConfig(BaseLLMConfig): namespace: str = Field(default="memos", description="LazyLLM config namespace") - source: str = Field(default="openai", description="LazyLLM online source name") + source: str = Field( + default="openai", + description="LazyLLM supplier source name (for example: openai/qwen/glm/deepseek)", + ) api_key: str | None = Field(default=None, description="API key for LazyLLM online source") api_base: str | None = Field(default=None, description="Base URL for LazyLLM online source") stream: bool = Field(default=False, description="Enable stream mode in LazyLLM module") diff --git a/src/memos/llms/lazyllm_onlinechat.py b/src/memos/llms/lazyllm_onlinechat.py index cfca16117..42b47a9e1 100644 --- a/src/memos/llms/lazyllm_onlinechat.py +++ b/src/memos/llms/lazyllm_onlinechat.py @@ -15,7 +15,7 @@ class LazyLLMOnlineChatLLM(BaseLLM): - """LazyLLM OnlineChat backend.""" + """LazyLLM OnlineChat backend as a unified supplier interface.""" def __init__(self, config: LazyLLMOnlineChatConfig): self.config = config @@ -42,7 +42,17 @@ def __init__(self, config: LazyLLMOnlineChatConfig): if config.extra_kwargs: module_kwargs.update(config.extra_kwargs) - self.client = lazyllm.namespace(config.namespace).OnlineChatModule(**module_kwargs) + try: + self.client = lazyllm.namespace(config.namespace).OnlineChatModule(**module_kwargs) + except Exception as exc: + if "Unsupported source" in str(exc): + raise ValueError( + f"Unsupported LazyLLM source '{config.source}'. " + "MemOS uses LazyLLM as a unified supplier interface. " + "Please use a source supported by LazyLLM, or open an issue/PR in " + "https://github.com/LazyAGI/LazyLLM" + ) from exc + raise logger.info("LazyLLM OnlineChat LLM instance initialized") def _normalize_messages(self, messages: MessageList | str) -> MessageList: diff --git a/tests/llms/test_lazyllm_onlinechat.py b/tests/llms/test_lazyllm_onlinechat.py index 69eca2da7..94442d213 100644 --- a/tests/llms/test_lazyllm_onlinechat.py +++ b/tests/llms/test_lazyllm_onlinechat.py @@ -82,3 +82,29 @@ def test_generate_with_tool_calls(self): sys.modules.pop("lazyllm", None) else: sys.modules["lazyllm"] = original_lazyllm + + def test_init_with_unsupported_source(self): + """Test unsupported source message from LazyLLM.""" + mock_namespace_module = SimpleNamespace( + OnlineChatModule=MagicMock(side_effect=AssertionError("Unsupported source: unknown")) + ) + mock_lazyllm = SimpleNamespace(namespace=MagicMock(return_value=mock_namespace_module)) + original_lazyllm = sys.modules.get("lazyllm") + sys.modules["lazyllm"] = mock_lazyllm + try: + config = LLMConfigFactory.model_validate( + { + "backend": "lazyllm", + "config": { + "model_name_or_path": "test-model", + "source": "unknown", + }, + } + ) + with self.assertRaisesRegex(ValueError, "MemOS uses LazyLLM as a unified supplier interface"): + LLMFactory.from_config(config) + finally: + if original_lazyllm is None: + sys.modules.pop("lazyllm", None) + else: + sys.modules["lazyllm"] = original_lazyllm From ddbfeb7d3ba67bfb0b027e6b5ec1f9fe2d76d611 Mon Sep 17 00:00:00 2001 From: Yuang-Deng Date: Fri, 27 Feb 2026 14:55:48 +0800 Subject: [PATCH 5/7] refactor(llm): align lazyllm namespace and api-key prefix to MOS --- docker/.env.example | 8 ++++---- src/memos/api/config.py | 4 ++-- src/memos/configs/llm.py | 2 +- tests/configs/test_llm.py | 2 +- tests/llms/test_lazyllm_onlinechat.py | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docker/.env.example b/docker/.env.example index 622afad3f..3fc85a825 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -31,15 +31,15 @@ OPENAI_API_BASE=https://api.openai.com/v1 # [required] base for the key # LazyLLM backend options (optional, used when provider=lazyllm) # MemOS delegates supplier support to LazyLLM. Any source supported by LazyLLM can be used. # Example values for MOS_LAZYLLM_SOURCE: openai / qwen / glm / deepseek / kimi / siliconflow / minimax / ... -MOS_LAZYLLM_NAMESPACE=memos +MOS_LAZYLLM_NAMESPACE=mos MOS_LAZYLLM_SOURCE=openai MOS_LAZYLLM_API_BASE=${OPENAI_API_BASE} MOS_LAZYLLM_STREAM=false MOS_LAZYLLM_SKIP_AUTH=false # Optional override to force a single key (bypasses namespace supplier key lookup) -MOS_MEMOS_API_KEY= -# LazyLLM namespace API keys (namespace = memos => MEMOS_*_API_KEY) -MEMOS_OPENAI_API_KEY=${OPENAI_API_KEY} +MOS_LAZYLLM_API_KEY= +# LazyLLM namespace API keys (namespace = mos => MOS_*_API_KEY) +MOS_OPENAI_API_KEY=${OPENAI_API_KEY} ## MemReader / retrieval LLM MEMRADER_MODEL=gpt-4o-mini diff --git a/src/memos/api/config.py b/src/memos/api/config.py index 85c774d4e..8b9d43810 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -307,9 +307,9 @@ def lazyllm_config() -> dict[str, Any]: "top_p": float(os.getenv("MOS_TOP_P", "0.9")), "top_k": int(os.getenv("MOS_TOP_K", "50")), "remove_think_prefix": True, - "namespace": os.getenv("MOS_LAZYLLM_NAMESPACE", "memos"), + "namespace": os.getenv("MOS_LAZYLLM_NAMESPACE", "mos"), "source": os.getenv("MOS_LAZYLLM_SOURCE", "openai"), - "api_key": os.getenv("MOS_MEMOS_API_KEY"), + "api_key": os.getenv("MOS_LAZYLLM_API_KEY"), "api_base": os.getenv("MOS_LAZYLLM_API_BASE"), "stream": os.getenv("MOS_LAZYLLM_STREAM", "false").lower() == "true", "skip_auth": os.getenv("MOS_LAZYLLM_SKIP_AUTH", "false").lower() == "true", diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py index b69d015cf..5aaf39216 100644 --- a/src/memos/configs/llm.py +++ b/src/memos/configs/llm.py @@ -120,7 +120,7 @@ class VLLMLLMConfig(BaseLLMConfig): class LazyLLMOnlineChatConfig(BaseLLMConfig): - namespace: str = Field(default="memos", description="LazyLLM config namespace") + namespace: str = Field(default="mos", description="LazyLLM config namespace") source: str = Field( default="openai", description="LazyLLM supplier source name (for example: openai/qwen/glm/deepseek)", diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py index 7d17c275f..64d4fad82 100644 --- a/tests/configs/test_llm.py +++ b/tests/configs/test_llm.py @@ -169,7 +169,7 @@ def test_lazyllm_online_chat_config(): LazyLLMOnlineChatConfig, { "model_name_or_path": "gpt-4o-mini", - "namespace": "memos", + "namespace": "mos", "source": "openai", "api_key": "sk-test", "api_base": "https://api.openai.com/v1", diff --git a/tests/llms/test_lazyllm_onlinechat.py b/tests/llms/test_lazyllm_onlinechat.py index 94442d213..065104475 100644 --- a/tests/llms/test_lazyllm_onlinechat.py +++ b/tests/llms/test_lazyllm_onlinechat.py @@ -27,14 +27,14 @@ def test_generate_with_mocked_lazyllm_backend(self): "source": "openai", "api_key": "sk-xxxx", "api_base": "https://api.openai.com/v1", - "namespace": "memos", + "namespace": "mos", }, } ) llm = LLMFactory.from_config(config) response = llm.generate([{"role": "user", "content": "hello"}]) self.assertEqual(response, "Hello from LazyLLM") - mock_lazyllm.namespace.assert_called_once_with("memos") + mock_lazyllm.namespace.assert_called_once_with("mos") finally: if original_lazyllm is None: sys.modules.pop("lazyllm", None) From becb4f1a438e46e517ccaf9669a25fff72ffa32b Mon Sep 17 00:00:00 2001 From: Yuang-Deng Date: Fri, 27 Feb 2026 15:15:30 +0800 Subject: [PATCH 6/7] refactor(llm): remove single-key override for lazyllm namespace auth --- docker/.env.example | 2 -- src/memos/api/config.py | 1 - 2 files changed, 3 deletions(-) diff --git a/docker/.env.example b/docker/.env.example index 3fc85a825..4db15f4b6 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -36,8 +36,6 @@ MOS_LAZYLLM_SOURCE=openai MOS_LAZYLLM_API_BASE=${OPENAI_API_BASE} MOS_LAZYLLM_STREAM=false MOS_LAZYLLM_SKIP_AUTH=false -# Optional override to force a single key (bypasses namespace supplier key lookup) -MOS_LAZYLLM_API_KEY= # LazyLLM namespace API keys (namespace = mos => MOS_*_API_KEY) MOS_OPENAI_API_KEY=${OPENAI_API_KEY} diff --git a/src/memos/api/config.py b/src/memos/api/config.py index 8b9d43810..7c897e804 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -309,7 +309,6 @@ def lazyllm_config() -> dict[str, Any]: "remove_think_prefix": True, "namespace": os.getenv("MOS_LAZYLLM_NAMESPACE", "mos"), "source": os.getenv("MOS_LAZYLLM_SOURCE", "openai"), - "api_key": os.getenv("MOS_LAZYLLM_API_KEY"), "api_base": os.getenv("MOS_LAZYLLM_API_BASE"), "stream": os.getenv("MOS_LAZYLLM_STREAM", "false").lower() == "true", "skip_auth": os.getenv("MOS_LAZYLLM_SKIP_AUTH", "false").lower() == "true", From 7591cc756e242a5eeb6fa2d5a0e322407a9852c9 Mon Sep 17 00:00:00 2001 From: Yuang-Deng Date: Fri, 27 Feb 2026 15:45:34 +0800 Subject: [PATCH 7/7] refactor(llm): hardcode lazyllm namespace and remove MOS_LAZYLLM env knobs --- docker/.env.example | 13 ++++--------- src/memos/api/config.py | 5 ----- src/memos/configs/llm.py | 5 ++--- src/memos/llms/lazyllm_onlinechat.py | 16 +++++++++------- tests/configs/test_llm.py | 2 -- tests/llms/test_lazyllm_onlinechat.py | 1 - 6 files changed, 15 insertions(+), 27 deletions(-) diff --git a/docker/.env.example b/docker/.env.example index 4db15f4b6..17dd85776 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -28,16 +28,11 @@ MOS_TOP_P=0.9 MOS_CHAT_MODEL_PROVIDER=openai # openai | huggingface | vllm | lazyllm OPENAI_API_KEY=sk-xxx # [required] when provider=openai OPENAI_API_BASE=https://api.openai.com/v1 # [required] base for the key -# LazyLLM backend options (optional, used when provider=lazyllm) -# MemOS delegates supplier support to LazyLLM. Any source supported by LazyLLM can be used. -# Example values for MOS_LAZYLLM_SOURCE: openai / qwen / glm / deepseek / kimi / siliconflow / minimax / ... -MOS_LAZYLLM_NAMESPACE=mos -MOS_LAZYLLM_SOURCE=openai -MOS_LAZYLLM_API_BASE=${OPENAI_API_BASE} -MOS_LAZYLLM_STREAM=false -MOS_LAZYLLM_SKIP_AUTH=false -# LazyLLM namespace API keys (namespace = mos => MOS_*_API_KEY) +# LazyLLM backend (provider=lazyllm): configure supplier keys under MOS namespace. +# The namespace is fixed to `mos`, so key pattern is MOS__API_KEY. MOS_OPENAI_API_KEY=${OPENAI_API_KEY} +MOS_QWEN_API_KEY= +MOS_DEEPSEEK_API_KEY= ## MemReader / retrieval LLM MEMRADER_MODEL=gpt-4o-mini diff --git a/src/memos/api/config.py b/src/memos/api/config.py index 7c897e804..7bfc30e13 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -307,11 +307,6 @@ def lazyllm_config() -> dict[str, Any]: "top_p": float(os.getenv("MOS_TOP_P", "0.9")), "top_k": int(os.getenv("MOS_TOP_K", "50")), "remove_think_prefix": True, - "namespace": os.getenv("MOS_LAZYLLM_NAMESPACE", "mos"), - "source": os.getenv("MOS_LAZYLLM_SOURCE", "openai"), - "api_base": os.getenv("MOS_LAZYLLM_API_BASE"), - "stream": os.getenv("MOS_LAZYLLM_STREAM", "false").lower() == "true", - "skip_auth": os.getenv("MOS_LAZYLLM_SKIP_AUTH", "false").lower() == "true", } @staticmethod diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py index 5aaf39216..9e2e2b140 100644 --- a/src/memos/configs/llm.py +++ b/src/memos/configs/llm.py @@ -120,9 +120,8 @@ class VLLMLLMConfig(BaseLLMConfig): class LazyLLMOnlineChatConfig(BaseLLMConfig): - namespace: str = Field(default="mos", description="LazyLLM config namespace") - source: str = Field( - default="openai", + source: str | None = Field( + default=None, description="LazyLLM supplier source name (for example: openai/qwen/glm/deepseek)", ) api_key: str | None = Field(default=None, description="API key for LazyLLM online source") diff --git a/src/memos/llms/lazyllm_onlinechat.py b/src/memos/llms/lazyllm_onlinechat.py index 42b47a9e1..3c5437441 100644 --- a/src/memos/llms/lazyllm_onlinechat.py +++ b/src/memos/llms/lazyllm_onlinechat.py @@ -16,6 +16,7 @@ class LazyLLMOnlineChatLLM(BaseLLM): """LazyLLM OnlineChat backend as a unified supplier interface.""" + _NAMESPACE = "mos" def __init__(self, config: LazyLLMOnlineChatConfig): self.config = config @@ -27,12 +28,13 @@ def __init__(self, config: LazyLLMOnlineChatConfig): "Install with: pip install 'git+https://github.com/LazyAGI/LazyLLM.git@main'" ) from exc - module_kwargs: dict[str, Any] = { - "source": config.source, - "model": config.model_name_or_path, - "stream": config.stream, - "skip_auth": config.skip_auth, - } + module_kwargs: dict[str, Any] = {"model": config.model_name_or_path} + if config.source: + module_kwargs["source"] = config.source + if config.stream: + module_kwargs["stream"] = config.stream + if config.skip_auth: + module_kwargs["skip_auth"] = config.skip_auth if config.api_base: module_kwargs["base_url"] = config.api_base if config.api_key: @@ -43,7 +45,7 @@ def __init__(self, config: LazyLLMOnlineChatConfig): module_kwargs.update(config.extra_kwargs) try: - self.client = lazyllm.namespace(config.namespace).OnlineChatModule(**module_kwargs) + self.client = lazyllm.namespace(self._NAMESPACE).OnlineChatModule(**module_kwargs) except Exception as exc: if "Unsupported source" in str(exc): raise ValueError( diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py index 64d4fad82..5ad84550a 100644 --- a/tests/configs/test_llm.py +++ b/tests/configs/test_llm.py @@ -154,7 +154,6 @@ def test_lazyllm_online_chat_config(): "top_k", "remove_think_prefix", "default_headers", - "namespace", "source", "api_key", "api_base", @@ -169,7 +168,6 @@ def test_lazyllm_online_chat_config(): LazyLLMOnlineChatConfig, { "model_name_or_path": "gpt-4o-mini", - "namespace": "mos", "source": "openai", "api_key": "sk-test", "api_base": "https://api.openai.com/v1", diff --git a/tests/llms/test_lazyllm_onlinechat.py b/tests/llms/test_lazyllm_onlinechat.py index 065104475..01a0633f3 100644 --- a/tests/llms/test_lazyllm_onlinechat.py +++ b/tests/llms/test_lazyllm_onlinechat.py @@ -27,7 +27,6 @@ def test_generate_with_mocked_lazyllm_backend(self): "source": "openai", "api_key": "sk-xxxx", "api_base": "https://api.openai.com/v1", - "namespace": "mos", }, } )