Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion docker/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,14 @@ MOS_MAX_TOKENS=2048
# Top-P for LLM in the Product API
MOS_TOP_P=0.9
# LLM for the Product API backend
MOS_CHAT_MODEL_PROVIDER=openai # openai | huggingface | vllm
MOS_CHAT_MODEL_PROVIDER=openai # openai | huggingface | vllm | lazyllm
OPENAI_API_KEY=sk-xxx # [required] when provider=openai
OPENAI_API_BASE=https://api.openai.com/v1 # [required] base for the key
# LazyLLM backend (provider=lazyllm): configure supplier keys under MOS namespace.
# The namespace is fixed to `mos`, so key pattern is MOS_<SOURCE>_API_KEY.
MOS_OPENAI_API_KEY=${OPENAI_API_KEY}
MOS_QWEN_API_KEY=
MOS_DEEPSEEK_API_KEY=

## MemReader / retrieval LLM
MEMRADER_MODEL=gpt-4o-mini
Expand Down
16 changes: 16 additions & 0 deletions src/memos/api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,18 @@ def vllm_config() -> dict[str, Any]:
"model_schema": os.getenv("MOS_MODEL_SCHEMA", "memos.configs.llm.VLLMLLMConfig"),
}

@staticmethod
def lazyllm_config() -> dict[str, Any]:
"""Get LazyLLM OnlineChat configuration."""
return {
"model_name_or_path": os.getenv("MOS_CHAT_MODEL", "gpt-4o-mini"),
"temperature": float(os.getenv("MOS_CHAT_TEMPERATURE", "0.8")),
"max_tokens": int(os.getenv("MOS_MAX_TOKENS", "8000")),
"top_p": float(os.getenv("MOS_TOP_P", "0.9")),
"top_k": int(os.getenv("MOS_TOP_K", "50")),
"remove_think_prefix": True,
}

@staticmethod
def get_activation_config() -> dict[str, Any]:
"""Get Ollama configuration."""
Expand Down Expand Up @@ -786,12 +798,14 @@ def get_product_default_config() -> dict[str, Any]:
openai_config = APIConfig.get_openai_config()
qwen_config = APIConfig.qwen_config()
vllm_config = APIConfig.vllm_config()
lazyllm_config = APIConfig.lazyllm_config()
reader_config = APIConfig.get_reader_config()

backend_model = {
"openai": openai_config,
"huggingface": qwen_config,
"vllm": vllm_config,
"lazyllm": lazyllm_config,
}
backend = os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai")
mysql_config = APIConfig.get_mysql_config()
Expand Down Expand Up @@ -905,13 +919,15 @@ def create_user_config(user_name: str, user_id: str) -> tuple["MOSConfig", "Gene
openai_config = APIConfig.get_openai_config()
qwen_config = APIConfig.qwen_config()
vllm_config = APIConfig.vllm_config()
lazyllm_config = APIConfig.lazyllm_config()
mysql_config = APIConfig.get_mysql_config()
reader_config = APIConfig.get_reader_config()
backend = os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai")
backend_model = {
"openai": openai_config,
"huggingface": qwen_config,
"vllm": vllm_config,
"lazyllm": lazyllm_config,
}
# Create MOSConfig
config_dict = {
Expand Down
16 changes: 16 additions & 0 deletions src/memos/configs/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,21 @@ class VLLMLLMConfig(BaseLLMConfig):
extra_body: Any = Field(default=None, description="Extra options for API")


class LazyLLMOnlineChatConfig(BaseLLMConfig):
source: str | None = Field(
default=None,
description="LazyLLM supplier source name (for example: openai/qwen/glm/deepseek)",
)
api_key: str | None = Field(default=None, description="API key for LazyLLM online source")
api_base: str | None = Field(default=None, description="Base URL for LazyLLM online source")
stream: bool = Field(default=False, description="Enable stream mode in LazyLLM module")
skip_auth: bool = Field(default=False, description="Skip LazyLLM API key validation")
type: str | None = Field(default=None, description="Optional model type for LazyLLM module")
extra_kwargs: dict[str, Any] | None = Field(
default=None, description="Extra kwargs for lazyllm.OnlineChatModule"
)


class LLMConfigFactory(BaseConfig):
"""Factory class for creating LLM configurations."""

Expand All @@ -135,6 +150,7 @@ class LLMConfigFactory(BaseConfig):
"qwen": QwenLLMConfig,
"deepseek": DeepSeekLLMConfig,
"openai_new": OpenAIResponsesLLMConfig,
"lazyllm": LazyLLMOnlineChatConfig,
}

@field_validator("backend")
Expand Down
2 changes: 2 additions & 0 deletions src/memos/llms/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from memos.llms.deepseek import DeepSeekLLM
from memos.llms.hf import HFLLM
from memos.llms.hf_singleton import HFSingletonLLM
from memos.llms.lazyllm_onlinechat import LazyLLMOnlineChatLLM
from memos.llms.ollama import OllamaLLM
from memos.llms.openai import AzureLLM, OpenAILLM
from memos.llms.openai_new import OpenAIResponsesLLM
Expand All @@ -26,6 +27,7 @@ class LLMFactory(BaseLLM):
"qwen": QwenLLM,
"deepseek": DeepSeekLLM,
"openai_new": OpenAIResponsesLLM,
"lazyllm": LazyLLMOnlineChatLLM,
}

@classmethod
Expand Down
125 changes: 125 additions & 0 deletions src/memos/llms/lazyllm_onlinechat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import json

from collections.abc import Generator
from contextlib import suppress
from typing import Any

from memos.configs.llm import LazyLLMOnlineChatConfig
from memos.llms.base import BaseLLM
from memos.llms.utils import remove_thinking_tags
from memos.log import get_logger
from memos.types import MessageList


logger = get_logger(__name__)


class LazyLLMOnlineChatLLM(BaseLLM):
"""LazyLLM OnlineChat backend as a unified supplier interface."""
_NAMESPACE = "mos"

def __init__(self, config: LazyLLMOnlineChatConfig):
self.config = config
try:
import lazyllm
except ImportError as exc:
raise ImportError(
"LazyLLM backend requires `lazyllm`. "
"Install with: pip install 'git+https://github.com/LazyAGI/LazyLLM.git@main'"
) from exc

module_kwargs: dict[str, Any] = {"model": config.model_name_or_path}
if config.source:
module_kwargs["source"] = config.source
if config.stream:
module_kwargs["stream"] = config.stream
if config.skip_auth:
module_kwargs["skip_auth"] = config.skip_auth
if config.api_base:
module_kwargs["base_url"] = config.api_base
if config.api_key:
module_kwargs["api_key"] = config.api_key
if config.type:
module_kwargs["type"] = config.type
if config.extra_kwargs:
module_kwargs.update(config.extra_kwargs)

try:
self.client = lazyllm.namespace(self._NAMESPACE).OnlineChatModule(**module_kwargs)
except Exception as exc:
if "Unsupported source" in str(exc):
raise ValueError(
f"Unsupported LazyLLM source '{config.source}'. "
"MemOS uses LazyLLM as a unified supplier interface. "
"Please use a source supported by LazyLLM, or open an issue/PR in "
"https://github.com/LazyAGI/LazyLLM"
) from exc
raise
logger.info("LazyLLM OnlineChat LLM instance initialized")

def _normalize_messages(self, messages: MessageList | str) -> MessageList:
if isinstance(messages, str):
return [{"role": "user", "content": messages}]
return messages

def generate(self, messages: MessageList | str, **kwargs) -> str | list[dict]:
normalized_messages = self._normalize_messages(messages)
runtime_model = kwargs.get("model_name_or_path", self.config.model_name_or_path)

request_kwargs: dict[str, Any] = {
"messages": normalized_messages,
"stream_output": False,
"model_name": runtime_model,
"temperature": kwargs.get("temperature", self.config.temperature),
"max_tokens": kwargs.get("max_tokens", self.config.max_tokens),
"top_p": kwargs.get("top_p", self.config.top_p),
"top_k": kwargs.get("top_k", self.config.top_k),
}
if kwargs.get("tools"):
request_kwargs["tools"] = kwargs["tools"]

response = self.client("", **request_kwargs)
if isinstance(response, dict):
tool_calls = response.get("tool_calls")
if isinstance(tool_calls, list) and len(tool_calls) > 0:
return self.tool_call_parser(tool_calls)
response_content = response.get("content", "")
reasoning_content = response.get("reasoning_content")
if isinstance(reasoning_content, str) and reasoning_content:
reasoning_content = f"<think>{reasoning_content}</think>"
if self.config.remove_think_prefix:
return remove_thinking_tags(response_content)
if reasoning_content:
return reasoning_content + (response_content or "")
return response_content or ""
if isinstance(response, str):
return remove_thinking_tags(response) if self.config.remove_think_prefix else response
return str(response)

def generate_stream(self, messages: MessageList | str, **kwargs) -> Generator[str, None, None]:
if kwargs.get("tools"):
logger.info("stream api not support tools")
return

response = self.generate(messages, **kwargs)
if isinstance(response, str):
yield response
return
yield json.dumps(response, ensure_ascii=False)

def tool_call_parser(self, tool_calls: list[dict]) -> list[dict]:
parsed_calls = []
for tool_call in tool_calls:
function_data = tool_call.get("function", {})
arguments = function_data.get("arguments", {})
if isinstance(arguments, str):
with suppress(json.JSONDecodeError):
arguments = json.loads(arguments)
parsed_calls.append(
{
"tool_call_id": tool_call.get("id", ""),
"function_name": function_data.get("name", ""),
"arguments": arguments,
}
)
return parsed_calls
6 changes: 4 additions & 2 deletions src/memos/mem_os/product.py
Original file line number Diff line number Diff line change
Expand Up @@ -1201,8 +1201,10 @@ def chat_with_references(
)
elif self.config.chat_model.backend == "vllm":
response_stream = self.chat_llm.generate_stream(current_messages)
else:
response_stream = self.chat_llm.generate_stream(current_messages)
else:
if self.config.chat_model.backend in ["huggingface", "vllm", "openai"]:
if self.config.chat_model.backend in ["huggingface", "vllm", "openai", "lazyllm"]:
response_stream = self.chat_llm.generate_stream(current_messages)
else:
response_stream = self.chat_llm.generate(current_messages)
Expand All @@ -1219,7 +1221,7 @@ def chat_with_references(
full_response = ""
token_count = 0
# Use tiktoken for proper token-based chunking
if self.config.chat_model.backend not in ["huggingface", "vllm", "openai"]:
if self.config.chat_model.backend not in ["huggingface", "vllm", "openai", "lazyllm"]:
# For non-huggingface backends, we need to collect the full response first
full_response_text = ""
for chunk in response_stream:
Expand Down
40 changes: 39 additions & 1 deletion tests/configs/test_llm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from memos.configs.llm import (
BaseLLMConfig,
HFLLMConfig,
LazyLLMOnlineChatConfig,
LLMConfigFactory,
OllamaLLMConfig,
OpenAILLMConfig,
Expand Down Expand Up @@ -140,10 +141,47 @@ def test_hf_llm_config():
check_config_instantiation_invalid(HFLLMConfig)


def test_lazyllm_online_chat_config():
check_config_base_class(
LazyLLMOnlineChatConfig,
required_fields=[
"model_name_or_path",
],
optional_fields=[
"temperature",
"max_tokens",
"top_p",
"top_k",
"remove_think_prefix",
"default_headers",
"source",
"api_key",
"api_base",
"stream",
"skip_auth",
"type",
"extra_kwargs",
],
)

check_config_instantiation_valid(
LazyLLMOnlineChatConfig,
{
"model_name_or_path": "gpt-4o-mini",
"source": "openai",
"api_key": "sk-test",
"api_base": "https://api.openai.com/v1",
"stream": False,
},
)

check_config_instantiation_invalid(LazyLLMOnlineChatConfig)


def test_llm_config_factory():
check_config_factory_class(
LLMConfigFactory,
expected_backends=["openai", "ollama", "huggingface"],
expected_backends=["openai", "ollama", "huggingface", "lazyllm"],
)

check_config_instantiation_valid(
Expand Down
Loading