From ca841f8b4ca6a5bcb36d1f80c5247357d84c3c5c Mon Sep 17 00:00:00 2001
From: LittleMouse <n15978654429@gmail.com>
Date: Tue, 31 Dec 2024 18:12:49 +0800
Subject: [PATCH 01/16] api_yolo add "exit" function.

---
 src/api/api_yolo.cpp | 21 +++++++++++++++++++++
 src/api/api_yolo.h   |  8 ++++++++
 2 files changed, 29 insertions(+)

diff --git a/src/api/api_yolo.cpp b/src/api/api_yolo.cpp
index 2774cc0..e7bdbe9 100644
--- a/src/api/api_yolo.cpp
+++ b/src/api/api_yolo.cpp
@@ -42,6 +42,27 @@ String ApiYolo::setup(ApiYoloSetupConfig_t config, String request_id)
     return work_id;
 }
 
+String ApiYolo::exit(String work_id, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"] = request_id;
+        doc["work_id"]    = work_id;
+        doc["action"]     = "exit";
+        serializeJson(doc, cmd);
+    }
+
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        100);
+    return work_id;
+}
+
 int ApiYolo::inference(String& work_id, uint8_t* input, size_t& raw_len, String request_id)
 {
     String cmd;
diff --git a/src/api/api_yolo.h b/src/api/api_yolo.h
index fbd6368..e065182 100644
--- a/src/api/api_yolo.h
+++ b/src/api/api_yolo.h
@@ -28,6 +28,14 @@ class ApiYolo {
      */
     String setup(ApiYoloSetupConfig_t config = ApiYoloSetupConfig_t(), String request_id = "yolo_setup");
 
+    /**
+     * @brief Exit module YOLO, return YOLO work_id
+     *
+     * @param work_id
+     * @param request_id
+     * @return String
+     */
+    String exit(String work_id, String request_id = "yolo_exit");
     /**
      * @brief Inference input data by module LLM
      *

From ee5a466a82c2155cbe317fe60039a13938bc19ec Mon Sep 17 00:00:00 2001
From: LittleMouse <n15978654429@gmail.com>
Date: Mon, 6 Jan 2025 15:30:21 +0800
Subject: [PATCH 02/16] add vlm, depth anything module. add doc. update api.

---
 docs/cn.md                     | 640 +++++++++++++++++++++++++++++++++
 docs/en.md                     |  41 +++
 src/M5ModuleLLM.cpp            |   2 +
 src/M5ModuleLLM.h              |  14 +
 src/api/api_depth_anything.cpp | 106 ++++++
 src/api/api_depth_anything.h   |  60 ++++
 src/api/api_melotts.cpp        |  21 ++
 src/api/api_melotts.h          |   9 +
 src/api/api_vlm.cpp            | 137 +++++++
 src/api/api_vlm.h              |  72 ++++
 src/api/api_yolo.h             |   1 +
 11 files changed, 1103 insertions(+)
 create mode 100644 docs/cn.md
 create mode 100644 docs/en.md
 create mode 100644 src/api/api_depth_anything.cpp
 create mode 100644 src/api/api_depth_anything.h
 create mode 100644 src/api/api_vlm.cpp
 create mode 100644 src/api/api_vlm.h

diff --git a/docs/cn.md b/docs/cn.md
new file mode 100644
index 0000000..137bd59
--- /dev/null
+++ b/docs/cn.md
@@ -0,0 +1,640 @@
+# M5Module-LLM Arduino API
+
+[M5Module-LLM](https://github.com/m5stack/M5Module-LLM) Arduino驱动库API文档。
+
+## M5ModuleLLM Class
+
+`M5ModuleLLM`用于初始化LLM Module, 并且提供内部成员用于快速初始化LLM的各个单元, 方便根据自己的需求构建应用。
+
+```cpp
+class M5ModuleLLM {
+public:
+    bool begin(Stream* targetPort);
+    bool checkConnection();
+    void update();
+
+    m5_module_llm::ApiSys sys;
+    m5_module_llm::ApiLlm llm;
+    m5_module_llm::ApiAudio audio;
+    m5_module_llm::ApiTts tts;
+    m5_module_llm::ApiTts melotts;
+    m5_module_llm::ApiKws kws;
+    m5_module_llm::ApiAsr asr;
+    m5_module_llm::ApiAsr yolo;
+    m5_module_llm::ModuleMsg msg;
+    m5_module_llm::ModuleComm comm;
+private:
+};
+```
+
+### begin
+
+**函数原型:**
+
+```cpp
+bool begin(Stream* targetPort);
+```
+
+**功能说明:**
+
+- 初始化LLM Module UART接口配置
+
+**传入参数:**
+
+- Stream* targetPort:
+    - 传入Serial指针
+
+**返回值:**
+
+- bool:
+    - true: 初始化成功
+    - false: 初始化失败
+
+### checkConnection
+
+**函数原型:**
+
+```cpp
+bool checkConnection();
+```
+
+**功能说明:**
+
+- 发送`sys.ping`指令, 检查LLM Module连接状态
+
+**传入参数:**
+
+- null
+
+**返回值:**
+
+- bool:
+    - true: 模组响应
+    - false: 模组无响应
+
+### update
+
+**函数原型:**
+
+```cpp
+void update();
+```
+
+**功能说明:**
+
+- 拉取LLM Module UART响应数据, 该API需包含在Loop中循环执行。
+
+**传入参数:**
+
+- null
+
+**返回值:**
+
+- null
+
+## ApiSys Class
+
+`M5ModuleLLM`的内部成员`ApiSys sys`用于控制SYS单元实现系统复位等操作。
+
+### ping
+
+**函数原型:**
+
+```cpp
+int ping();
+```
+
+**功能说明:**
+
+- 发送`sys.ping`指令, 检查LLM Module连接状态
+
+**传入参数:**
+
+- null
+
+**返回值:**
+
+- int:
+    - MODULE_LLM_OK / Error Code
+
+### reset
+
+**函数原型:**
+
+```cpp
+int reset(bool waitResetFinish = true);
+```
+
+**功能说明:**
+
+- 发送`sys.reset`指令, 复位软件服务。
+
+**传入参数:**
+
+- bool waitResetFinish:
+    - true:阻塞等待复位
+    - false:非阻塞执行复位
+
+**返回值:**
+
+- int:
+    - MODULE_LLM_OK / Error Code
+
+### reboot
+
+**函数原型:**
+
+```cpp
+int reboot();
+```
+
+**功能说明:**
+
+- 发送`sys.reboot`指令, 复位系统。
+
+**传入参数:**
+
+- null
+
+**返回值:**
+
+- int:
+    - MODULE_LLM_OK / Error Code
+
+## ApiAudio Class
+
+`M5ModuleLLM`的内部成员`ApiAudio audio`用于控制AUDIO单元的初始化和配置。
+
+### setup
+
+**函数原型:**
+
+```cpp
+String setup(ApiAudioSetupConfig_t config = ApiAudioSetupConfig_t(), String request_id = "audio_setup");
+```
+
+**功能说明:**
+
+- 初始化Audio单元, 开启系统声卡。(使用KWS和TTS前需开启该功能)
+
+**传入参数:**
+
+ApiAudioSetupConfig_t config:
+
+- LLM单元初始化配置:
+- String request_id:
+    - 会话id, 使用默认即可。
+
+```cpp
+struct ApiAudioSetupConfig_t {
+    int capcard      = 0;
+    int capdevice    = 0;
+    float capVolume  = 0.5;
+    int playcard     = 0;
+    int playdevice   = 1;
+    float playVolume = 0.15;
+};
+```
+
+| 参数         | 描述       | 输入值                             |
+|------------|----------|---------------------------------|
+| capcard    | 麦克风声卡的索引 | 系统默认声卡:0                        |
+| capdevice  | 麦克风设备索引  | 板载硅麦:0                          |
+| capVolume  | 输入的音量    | 0.0～10.0 (1<volume将增益, 默认值为0.5) |
+| playcard   | 扬声器声卡的索引 | 系统默认声卡:0                        |
+| playdevice | 扬声器设备索引  | 板载扬声器:1                         |
+| playVolume | 输出的音量    | 0.0～10.0 (1<volume将增益, 默认值为0.5) |
+
+**返回值:**
+
+- String:
+    - audio_work_id: audio单元work_id
+
+## ApiKws Class
+
+`M5ModuleLLM`的内部成员`ApiKws kws`用于控制KWS单元的初始化和配置。
+
+### setup
+
+**函数原型:**
+
+```cpp
+String setup(ApiKwsSetupConfig_t config = ApiKwsSetupConfig_t(), String request_id = "kws_setup");
+```
+
+**功能说明:**
+
+- 初始化KWS单元, 并配置唤醒关键字。
+
+**传入参数:**
+
+ApiKwsSetupConfig_t config:
+
+- KWS单元初始化配置:
+- String request_id:
+    - 会话id, 使用默认即可。
+
+```cpp
+struct ApiKwsSetupConfig_t {
+    String kws             = "HELLO";
+    String model           = "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01";
+    String response_format = "kws.bool";
+    String input           = "sys.pcm";
+    bool enoutput          = true;
+};
+```
+
+| 参数       | 描述         | 输入值                                                                                                                           |
+|----------|------------|-------------------------------------------------------------------------------------------------------------------------------|
+| model    | 转换模型       | 英文模型: "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01"<br>中文模型: "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01" |
+| kws      | KWS唤醒词文本设置 | 不允许中文/英文混合, 英文要求全大写                                                                                                           |
+| enoutput | 启用UART输出   | 启用: true<br>禁用: false                                                                                                         |
+
+**返回值:**
+
+- String:
+    - kws_work_id: kws单元work_id
+
+## ApiAsr Class
+
+`M5ModuleLLM`的内部成员`ApiAsr asr`用于控制ASR单元的初始化和配置。
+
+### setup
+
+**函数原型:**
+
+```cpp
+String setup(ApiAsrSetupConfig_t config = ApiAsrSetupConfig_t(), String request_id = "asr_setup");
+```
+
+**功能说明:**
+
+- 初始化ASR单元, 开启语音转文本功能。
+
+**传入参数:**
+
+ApiAsrSetupConfig_t config:
+
+- ASR单元初始化配置:
+- String request_id:
+    - 会话id, 使用默认即可。
+
+```cpp
+struct ApiAsrSetupConfig_t {
+    String model           = "sherpa-ncnn-streaming-zipformer-20M-2023-02-17";
+    String response_format = "asr.utf-8.stream";
+    String input           = ["sys.pcm", "kws.1000"];
+    bool enoutput          = true;
+    float rule1            = 2.4;
+    float rule2            = 1.2;
+    float rule3            = 30.0;
+};
+```
+
+| 参数              | 描述            | 输入值                                                                                                                 |
+|-----------------|---------------|---------------------------------------------------------------------------------------------------------------------|
+| model           | 转换模型          | 英文模型: "sherpa-ncnn-streaming-zipformer-20M-2023-02-17"<br>中文模型: "sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23" |
+| response_format | 输出格式          | 普通输出: "asr.utf-8"<br>流式输出: "asr.utf-8.stream"                                                                       |
+| input           | 输入            | KWS唤醒输入: "kws.xxx"(输入kws单元的work_id)<br>板载麦克风输入: "sys.pcm"<br>UART流式输入: "asr.wav.stream.base64"                      |
+| rule1           | 唤醒到未识别到内容超时时间 | 单位:秒                                                                                                                |
+| rule2           | 识别最大间隔时间      | 单位:秒                                                                                                                |
+| rule3           | 识别最长超时时间      | 单位:秒                                                                                                                |
+| enoutput        | 启用UART输出      | 启用: true<br>禁用: false                                                                                               |
+
+**返回值:**
+
+- String:
+    - asr_work_id: asr单元work_id
+
+## ApiLlm Class
+
+`M5ModuleLLM`的内部成员`ApiLlm llm`用于控制LLM单元的初始化和配置。
+
+### setup
+
+**函数原型:**
+
+```cpp
+String setup(ApiLlmSetupConfig_t config = ApiLlmSetupConfig_t(), String request_id = "llm_setup");
+```
+
+**功能说明:**
+
+- 初始化LLM单元, 支持配置LLM单元输入输出数据方式。
+
+**传入参数:**
+
+- ApiLlmSetupConfig_t config:
+    - LLM单元初始化配置:
+- String request_id:
+    - 会话id, 使用默认即可。
+
+```cpp
+struct ApiLlmSetupConfig_t {
+    String prompt;
+    String model           = "qwen2.5-0.5B-prefill-20e";
+    String response_format = "llm.utf-8.stream";
+    String input           = "llm.utf-8";
+    bool enoutput          = true;
+    bool enkws             = true;
+    int max_token_len      = 127;
+};
+```
+
+| 参数              | 描述                      | 输入值                                                                                             |
+|-----------------|-------------------------|-------------------------------------------------------------------------------------------------|
+| model           | 转换模型                    | 预置模型 "qwen2.5-0.5B-prefill-20e"                                                                 |
+| response_format | 输出格式                    | 普通输出: "llm.utf-8"<br>流式输出: "llm.utf-8.stream"                                                   |
+| input           | 输入                      | ASR输入: "asr.xxx"(输入asr单元的work_id)<br>UART输入: "llm.utf-8"<br>KWS唤醒打断: "kws.xxx"(输入kws单元的work_id) |
+| enkws           | KWS唤醒是否终止过程             | KWS打断过程: true<br>KWS不打断过程: false                                                                |
+| max_length      | 配置最大输出token(最大返回推理文本长度) | 最大值: 1024, 推荐使用127                                                                              |
+| prompt          | 模型初始化提示词                | String                                                                                          |
+| enoutput        | 启用UART输出                | 启用: true<br>禁用: false                                                                           |
+
+**返回值:**
+
+- String:
+    - llm_work_id: llm单元work_id
+
+### inference
+
+**函数原型:**
+
+```cpp
+int inference(String work_id, String input, String request_id = "llm_inference");
+```
+
+**功能说明:**
+
+- 输入数据, 开始推理。返回结果内容将进入`M5ModuleLLM.msg`中的`responseMsgList`列表容器中。
+
+**传入参数:**
+
+- String work_id:
+    - 调用的LLM单元work_id
+- String input:
+    - 输入文本
+- String request_id:
+    - 会话ID, 当同时存在多个会话的时候用于区分。
+
+**返回值:**
+
+- int:
+    - MODULE_LLM_OK / Error Code
+
+### inferenceAndWaitResult
+
+**函数原型:**
+
+```cpp
+int inferenceAndWaitResult(String work_id, String input, std::function<void(String&)> onResult, uint32_t timeout = 5000, String request_id = "llm_inference");
+```
+
+**功能说明:**
+
+- 输入数据, 开始推理。并阻塞等待返回结果, 然后调用callback函数。
+
+**传入参数:**
+
+- String work_id:
+    - 调用的LLM单元work_id
+- String input:
+    - 输入文本
+- void onResult(String&)
+    - 推理结果callback函数
+- uint32_t timeout:
+    - 等待推理超时时间
+- String request_id:
+    - 会话ID, 当同时存在多个会话的时候用于区分。
+
+**返回值:**
+
+- int:
+    - MODULE_LLM_OK / Error Code
+
+## ApiTts Class
+
+`M5ModuleLLM`的内部成员`ApiTts tts`用于控制TTS单元的初始化和配置。
+
+### setup
+
+**函数原型:**
+
+```cpp
+String setup(ApiTtsSetupConfig_t config = ApiTtsSetupConfig_t(), String request_id = "tts_setup");
+```
+
+**功能说明:**
+
+- 初始化TTS单元, 开启文本转语音功能。
+
+**传入参数:**
+
+ApiTtsSetupConfig_t config:
+
+- LLM单元初始化配置:
+- String request_id:
+    - 会话id, 使用默认即可。
+
+```cpp
+struct ApiTtsSetupConfig_t {
+    String model           = "single_speaker_english_fast";
+    String response_format = "tts.base64.wav";
+    String input           = "tts.utf-8.stream";
+    bool enoutput          = true;
+    bool enkws             = true;
+};
+```
+
+| 参数       | 描述          | 输入值                                                                                      |
+|----------|-------------|------------------------------------------------------------------------------------------|
+| model    | 转换模型        | 英文模型: "single_speaker_english_fast"<br>中文模型: "single_speaker_fast"                       |
+| input    | 输入          | LLM输入: "llm.xxx"(输入llm单元的work_id)<br>UART输入: "tts.utf-8"<br>UART流式输入: "tts.utf-8.stream" |
+| enkws    | KWS唤醒是否终止过程 | KWS打断过程: true<br>KWS不打断过程: false                                                         |
+| enoutput | 启用UART输出    | 启用: true<br>禁用: false                                                                    |
+
+**返回值:**
+
+- String:
+    - tts_work_id: tts单元work_id
+
+### inference
+
+**函数原型:**
+
+```cpp
+int inference(String work_id, String input, uint32_t timeout = 0, String request_id = "tts_inference");
+```
+
+**功能说明:**
+
+- 输入数据, 开始推理转换, 完成后将自动播放至扬声器。
+
+**传入参数:**
+
+- String work_id:
+    - 调用的TTS单元work_id
+- String input:
+    - 输入文本
+- uint32_t timeout:
+    - 等待推理超时时间
+- String request_id:
+    - 会话ID, 当同时存在多个会话的时候用于区分。
+
+**返回值:**
+
+- int:
+    - MODULE_LLM_OK / Error Code
+
+## ModuleMsg Class
+
+`M5ModuleLLM`的内部成员`ModuleMsg msg`提供了`responseMsgList`容器用于用于缓存接收LLM Module返回的各种信息。参考以下案例，在主循环中遍历获取返回结果。
+
+```cpp
+void loop()
+{
+    module_llm.update();
+
+    // Handle response msg
+    for (auto& msg : module_llm.msg.responseMsgList) {
+        // KWS msg
+        if (msg.work_id == kws_work_id) {
+            Serial.printf(">> Keyword detected\n");
+        }
+
+        // ASR msg
+        if (msg.work_id == asr_work_id) {
+            if (msg.object == "asr.utf-8.stream") {
+                // Parse and get asr result
+                JsonDocument doc;
+                deserializeJson(doc, msg.raw_msg);
+                String asr_result = doc["data"]["delta"].as<String>();
+                Serial.printf(">> %s\n", asr_result.c_str());
+            }
+        }
+    }
+    module_llm.msg.responseMsgList.clear();
+}
+
+```
+
+## VoiceAssistant Class
+
+`M5ModuleLLM_VoiceAssistant`用于快速创建LLM语音助手实例, 快速实现KWS(语音唤醒)->ASR(语音转文本)->LLM(大模型推理)->TTS(
+文本转语音)。
+
+- 初始化时候只需要将`M5ModuleLLM`实例传入构造函数, 并注册对应事件的回调函数即可完成语音助手创建。
+
+```cpp
+/*
+ * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#include <Arduino.h>
+#include <M5Unified.h>
+#include <M5ModuleLLM.h>
+
+M5ModuleLLM module_llm;
+M5ModuleLLM_VoiceAssistant voice_assistant(&module_llm);
+
+/* On ASR data callback */
+void on_asr_data_input(String data, bool isFinish, int index)
+{
+    M5.Display.setTextColor(TFT_GREEN, TFT_BLACK);
+    M5.Display.printf(">> %s\n", data.c_str());
+
+    /* If ASR data is finish */
+    if (isFinish) {
+        M5.Display.setTextColor(TFT_YELLOW, TFT_BLACK);
+        M5.Display.print(">> ");
+    }
+};
+
+/* On LLM data callback */
+void on_llm_data_input(String data, bool isFinish, int index)
+{
+    M5.Display.print(data);
+
+    /* If LLM data is finish */
+    if (isFinish) {
+        M5.Display.print("\n");
+    }
+};
+
+void setup()
+{
+    M5.begin();
+    M5.Display.setTextSize(2);
+    M5.Display.setTextScroll(true);
+
+    /* Init module serial port */
+    Serial2.begin(115200, SERIAL_8N1, 16, 17);  // Basic
+    // Serial2.begin(115200, SERIAL_8N1, 13, 14);  // Core2
+    // Serial2.begin(115200, SERIAL_8N1, 18, 17);  // CoreS3
+
+    /* Init module */
+    module_llm.begin(&Serial2);
+
+    /* Make sure module is connected */
+    M5.Display.printf(">> Check ModuleLLM connection..\n");
+    while (1) {
+        if (module_llm.checkConnection()) {
+            break;
+        }
+    }
+
+    /* Begin voice assistant preset */
+    M5.Display.printf(">> Begin voice assistant..\n");
+    int ret = voice_assistant.begin("HELLO");
+    if (ret != MODULE_LLM_OK) {
+        while (1) {
+            M5.Display.setTextColor(TFT_RED);
+            M5.Display.printf(">> Begin voice assistant failed\n");
+        }
+    }
+
+    /* Register on ASR data callback function */
+    voice_assistant.onAsrDataInput(on_asr_data_input);
+
+    /* Register on LLM data callback function */
+    voice_assistant.onLlmDataInput(on_llm_data_input);
+
+    M5.Display.printf(">> Voice assistant ready\n");
+}
+
+void loop()
+{
+    /* Keep voice assistant preset update */
+    voice_assistant.update();
+}
+```
+
+## Error Code
+
+```cpp
+enum ModuleLLMErrorCode_t {
+    MODULE_LLM_OK                              = 0,
+    MODULE_LLM_RESET_WARN                      = -1,
+    MODULE_LLM_JSON_FORMAT_ERROR               = -2,
+    MODULE_LLM_ACTION_MATCH_FAILED             = -3,
+    MODULE_LLM_INFERENCE_DATA_PUSH_FAILED      = -4,
+    MODULE_LLM_MODEL_LOADING_FAILED            = -5,
+    MODULE_LLM_UNIT_NOT_EXIST                  = -6,
+    MODULE_LLM_UNKNOWN_OPERATION               = -7,
+    MODULE_LLM_UNIT_RESOURCE_ALLOCATION_FAILED = -8,
+    MODULE_LLM_UNIT_CALL_FAILED                = -9,
+    MODULE_LLM_MODEL_INIT_FAILED               = -10,
+    MODULE_LLM_MODEL_RUN_FAILED                = -11,
+    MODULE_LLM_MODULE_NOT_INITIALISED          = -12,
+    MODULE_LLM_MODULE_ALREADY_WORKING          = -13,
+    MODULE_LLM_MODULE_NOT_WORKING              = -14,
+    MODULE_LLM_NO_UPDATEABLE_MODULES           = -15,
+    MODULE_LLM_NO_MODULES_AVAILABLE_FOR_UPDATE = -16,
+    MODULE_LLM_FILE_OPEN_FAILED                = -17,
+    MODULE_LLM_WAIT_RESPONSE_TIMEOUT           = -97,
+    MODULE_LLM_RESPONSE_PARSE_FAILED           = -98,
+    MODULE_LLM_ERROR_NONE                      = -99,
+};
+```
+
diff --git a/docs/en.md b/docs/en.md
new file mode 100644
index 0000000..1714e53
--- /dev/null
+++ b/docs/en.md
@@ -0,0 +1,41 @@
+# LLM Module Arduino Quick Start
+
+## Overview
+
+The `LLM Module` can be used with various M5 controllers. This tutorial demonstrates how to control the LLM Module using the `M5Core` series in the `Arduino IDE` with the LLM Module driver library.
+
+<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/guide/llm/llm/llm_module_device_01.jpg" width="70%" />
+
+## Environment Setup
+
+- 1.Arduino IDE Installation: Refer to the [Arduino IDE Installation Guide](/en/arduino/arduino_ide) to complete the IDE installation.
+
+- 2.Board Manager Installation: Refer to the [Basic Environment Setup Guide](/en/arduino/arduino_board) to complete the M5Stack board manager installation and select the `M5Core` development board.
+
+<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/arduino/m5core/quickstart_arduino_core_selectboard.png" width="70%" />
+
+- 3.Library Installation: Refer to the [Library Management Guide](/en/arduino/arduino_library) to install the `LLM Module` driver library. (Follow prompts to install the dependency library `M5Unified`)
+
+<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/guide/llm/llm/llm_arduino_lib_01.jpg" width="70%" />
+<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/guide/llm/llm/llm_arduino_lib_02.jpg" width="70%" />
+
+## Program Compilation & Upload
+
+Open the example program "kws_asr" in the driver library, click the upload button, and the program will automatically compile and upload.The wake-up word used in the example program is "HELLO". After waiting for the device to be initialized, it will be woken up using the keyword.
+
+<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/guide/llm/llm/llm_arduino_example_01.jpg" width="70%" />
+<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/guide/llm/llm/llm_arduino_example_02.jpg" width="70%" />
+<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/guide/llm/llm/llm_arduino_example_03.jpg" width="70%" />
+<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/guide/llm/llm/llm_arduino_example_04.jpg" width="70%" />
+
+- Examples:
+  - `kws_asr`: Uses KWS to wake up and triggers ASR for speech-to-text conversion. (KWS+ASR)
+  - `text_assistant`: Inputs text into the LLM model, performs inference, and outputs the result in text form. (LLM)
+  - `tts`: Uses the TTS unit to convert text to speech for playback. (TTS)
+  - `voice_assistant`: Uses KWS to wake up, triggers ASR for speech-to-text conversion, inputs the converted text into the LLM for inference, and outputs the inference result through TTS as speech. (KWS+ASR+LLM+TTS)
+
+## Related Links
+
+- [LLM Module Arduino Lib](https://github.com/m5stack/M5Module-LLM)
+- [LLM Module Arduino Lib API](/en/guide/llm/llm/arduino_api)
+
diff --git a/src/M5ModuleLLM.cpp b/src/M5ModuleLLM.cpp
index 0bac44b..2ac9119 100644
--- a/src/M5ModuleLLM.cpp
+++ b/src/M5ModuleLLM.cpp
@@ -12,6 +12,7 @@ bool M5ModuleLLM::begin(Stream* serialPort)
     msg.init(&comm);
     sys.init(&msg);
     llm.init(&msg);
+    vlm.init(&msg);
     audio.init(&msg);
     tts.init(&msg);
     melotts.init(&msg);
@@ -19,6 +20,7 @@ bool M5ModuleLLM::begin(Stream* serialPort)
     asr.init(&msg);
     yolo.init(&msg);
     camera.init(&msg);
+    depthanything.init(&msg);
     return true;
 }
 
diff --git a/src/M5ModuleLLM.h b/src/M5ModuleLLM.h
index 42bae02..c652089 100644
--- a/src/M5ModuleLLM.h
+++ b/src/M5ModuleLLM.h
@@ -9,12 +9,14 @@
 #include "utils/msg.h"
 #include "api/api_sys.h"
 #include "api/api_llm.h"
+#include "api/api_vlm.h"
 #include "api/api_audio.h"
 #include "api/api_tts.h"
 #include "api/api_melotts.h"
 #include "api/api_kws.h"
 #include "api/api_asr.h"
 #include "api/api_yolo.h"
+#include "api/api_depth_anything.h"
 #include "api/api_camera.h"
 #include "api/api_version.h"
 
@@ -55,6 +57,12 @@ class M5ModuleLLM {
      */
     m5_module_llm::ApiLlm llm;
 
+    /**
+     * @brief VLM module api set
+     *
+     */
+    m5_module_llm::ApiVlm vlm;
+
     /**
      * @brief Audio module api set
      *
@@ -97,6 +105,12 @@ class M5ModuleLLM {
      */
     m5_module_llm::ApiYolo yolo;
 
+    /**
+     * @brief DepthAnything module api set
+     *
+     */
+    m5_module_llm::ApiDepthAnything depthanything;
+
     /**
      * @brief MSG module to handle module response message
      *
diff --git a/src/api/api_depth_anything.cpp b/src/api/api_depth_anything.cpp
new file mode 100644
index 0000000..b8ceb2d
--- /dev/null
+++ b/src/api/api_depth_anything.cpp
@@ -0,0 +1,106 @@
+/*
+ * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#include "api_depth_anything.h"
+
+using namespace m5_module_llm;
+
+void ApiDepthAnything::init(ModuleMsg* moduleMsg)
+{
+    _module_msg = moduleMsg;
+}
+
+String ApiDepthAnything::setup(ApiDepthAnythingSetupConfig_t config, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"]              = request_id;
+        doc["work_id"]                 = "depth_anything";
+        doc["action"]                  = "setup";
+        doc["object"]                  = "depth_anything.setup";
+        doc["data"]["model"]           = config.model;
+        doc["data"]["response_format"] = config.response_format;
+        JsonArray inputArray           = doc["data"]["input"].to<JsonArray>();
+        for (const String& str : config.input) {
+            inputArray.add(str);
+        }
+        doc["data"]["enoutput"] = config.enoutput;
+        serializeJson(doc, cmd);
+    }
+
+    String work_id;
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        5000);
+    return work_id;
+}
+
+int ApiDepthAnything::inference(String& work_id, uint8_t* input, size_t& raw_len, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["RAW"]        = raw_len;
+        doc["request_id"] = request_id;
+        doc["work_id"]    = work_id;
+        doc["action"]     = "inference";
+        doc["object"]     = "cv.jpeg.base64";
+        serializeJson(doc, cmd);
+    }
+
+    _module_msg->sendCmd(cmd.c_str());
+    _module_msg->sendRaw(input, raw_len);
+    return MODULE_LLM_OK;
+}
+
+int ApiDepthAnything::inferenceAndWaitResult(String& work_id, uint8_t* input, size_t& raw_len,
+                                             std::function<void(String&)> onResult, uint32_t timeout, String request_id)
+{
+    inference(work_id, input, raw_len, request_id);
+
+    uint32_t time_out_count = millis();
+    bool is_time_out        = false;
+    bool is_msg_finish      = false;
+    while (1) {
+        _module_msg->update();
+        _module_msg->takeMsg(request_id, [&time_out_count, &is_msg_finish, &onResult](ResponseMsg_t& msg) {
+            String response_msg;
+            {
+                JsonDocument doc;
+                deserializeJson(doc, msg.raw_msg);
+                response_msg = doc["data"]["delta"].as<String>();
+                if (!doc["data"]["finish"].isNull()) {
+                    is_msg_finish = doc["data"]["finish"];
+                    if (is_msg_finish) {
+                        response_msg += '\n';
+                    }
+                }
+            }
+            if (onResult) {
+                onResult(response_msg);
+            }
+            time_out_count = millis();
+        });
+
+        if (is_msg_finish) {
+            break;
+        }
+
+        if (millis() - time_out_count > timeout) {
+            is_time_out = true;
+            break;
+        }
+    }
+
+    if (is_time_out) {
+        return MODULE_LLM_WAIT_RESPONSE_TIMEOUT;
+    }
+    return MODULE_LLM_OK;
+}
diff --git a/src/api/api_depth_anything.h b/src/api/api_depth_anything.h
new file mode 100644
index 0000000..c1138e7
--- /dev/null
+++ b/src/api/api_depth_anything.h
@@ -0,0 +1,60 @@
+/*
+ * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#pragma once
+#include "../utils/msg.h"
+#include <Arduino.h>
+
+namespace m5_module_llm {
+struct ApiDepthAnythingSetupConfig_t {
+    String model              = "depth_anything";
+    String response_format    = "jpeg.base64.stream";
+    std::vector<String> input = {"depth_anything.jpeg.raw"};
+    bool enoutput             = true;
+};
+
+class ApiDepthAnything {
+public:
+    void init(ModuleMsg* moduleMsg);
+
+    /**
+     * @brief Setup module YOLO, return YOLO work_id
+     *
+     * @param config
+     * @param request_id
+     * @return String
+     */
+    String setup(ApiDepthAnythingSetupConfig_t config = ApiDepthAnythingSetupConfig_t(),
+                 String request_id                    = "depth_anything_setup");
+
+    /**
+     * @brief Inference input data by module LLM
+     *
+     * @param raw_len
+     * @param work_id
+     * @param input
+     * @param request_id
+     * @return int
+     */
+    int inference(String& work_id, uint8_t* input, size_t& raw_len, String request_id = "depth_anything_inference");
+
+    /**
+     * @brief Inference input data by module LLM, and wait inference result
+     *
+     * @param raw_len
+     * @param work_id
+     * @param input
+     * @param onResult On inference result callback
+     * @param timeout
+     * @param request_id
+     * @return int
+     */
+    int inferenceAndWaitResult(String& work_id, uint8_t* input, size_t& raw_len, std::function<void(String&)> onResult,
+                               uint32_t timeout = 5000, String request_id = "depth_anything_inference");
+
+private:
+    ModuleMsg* _module_msg = nullptr;
+};
+}  // namespace m5_module_llm
diff --git a/src/api/api_melotts.cpp b/src/api/api_melotts.cpp
index b4e6f8b..adcadbf 100644
--- a/src/api/api_melotts.cpp
+++ b/src/api/api_melotts.cpp
@@ -44,6 +44,27 @@ String ApiMelotts::setup(ApiMelottsSetupConfig_t config, String request_id, Stri
     return work_id;
 }
 
+String ApiMelotts::exit(String work_id, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"] = request_id;
+        doc["work_id"]    = work_id;
+        doc["action"]     = "exit";
+        serializeJson(doc, cmd);
+    }
+
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        100);
+    return work_id;
+}
+
 int ApiMelotts::inference(String work_id, String input, uint32_t timeout, String request_id)
 {
     String cmd;
diff --git a/src/api/api_melotts.h b/src/api/api_melotts.h
index 7a35219..22ab4ea 100644
--- a/src/api/api_melotts.h
+++ b/src/api/api_melotts.h
@@ -31,6 +31,15 @@ class ApiMelotts {
     String setup(ApiMelottsSetupConfig_t config = ApiMelottsSetupConfig_t(), String request_id = "melotts_setup",
                  String language = "en_US");
 
+    /**
+     * @brief Exit module TTS, return TTS work_id
+     *
+     * @param work_id
+     * @param request_id
+     * @return String
+     */
+    String exit(String work_id, String request_id = "yolo_exit");
+
     /**
      * @brief Inference input data by TTS module
      *
diff --git a/src/api/api_vlm.cpp b/src/api/api_vlm.cpp
new file mode 100644
index 0000000..651b986
--- /dev/null
+++ b/src/api/api_vlm.cpp
@@ -0,0 +1,137 @@
+/*
+ * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#include "api_vlm.h"
+#include "api_version.h"
+
+using namespace m5_module_llm;
+
+void ApiVlm::init(ModuleMsg* moduleMsg)
+{
+    _module_msg = moduleMsg;
+}
+
+String ApiVlm::setup(ApiVlmSetupConfig_t config, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"]              = request_id;
+        doc["work_id"]                 = "vlm";
+        doc["action"]                  = "setup";
+        doc["object"]                  = "vlm.setup";
+        doc["data"]["model"]           = config.model;
+        doc["data"]["response_format"] = config.response_format;
+        doc["data"]["enoutput"]        = config.enoutput;
+        doc["data"]["enkws"]           = config.enkws;
+        doc["data"]["max_token_len"]   = config.max_token_len;
+        doc["data"]["prompt"]          = config.prompt;
+        if (!llm_version) {
+            doc["data"]["model"] = "qwen2.5-0.5b";
+            doc["data"]["input"] = config.input[0];
+        } else {
+            JsonArray inputArray = doc["data"]["input"].to<JsonArray>();
+            for (const String& str : config.input) {
+                inputArray.add(str);
+            }
+        }
+        serializeJson(doc, cmd);
+    }
+
+    String llm_work_id;
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&llm_work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            llm_work_id = msg.work_id;
+        },
+        20000);
+    return llm_work_id;
+}
+
+String ApiVlm::exit(String work_id, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"] = request_id;
+        doc["work_id"]    = work_id;
+        doc["action"]     = "exit";
+        serializeJson(doc, cmd);
+    }
+
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        100);
+    return work_id;
+}
+
+int ApiVlm::inference(String work_id, String input, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"]     = request_id;
+        doc["work_id"]        = work_id;
+        doc["action"]         = "inference";
+        doc["object"]         = "vlm.utf-8.stream";
+        doc["data"]["delta"]  = input;
+        doc["data"]["index"]  = 0;
+        doc["data"]["finish"] = true;
+        serializeJson(doc, cmd);
+    }
+
+    _module_msg->sendCmd(cmd.c_str());
+    return MODULE_LLM_OK;
+}
+
+int ApiVlm::inferenceAndWaitResult(String work_id, String input, std::function<void(String&)> onResult,
+                                   uint32_t timeout, String request_id)
+{
+    inference(work_id, input, request_id);
+
+    uint32_t time_out_count = millis();
+    bool is_time_out        = false;
+    bool is_msg_finish      = false;
+    while (1) {
+        _module_msg->update();
+        _module_msg->takeMsg(request_id, [&time_out_count, &is_msg_finish, &onResult](ResponseMsg_t& msg) {
+            String response_msg;
+            {
+                JsonDocument doc;
+                deserializeJson(doc, msg.raw_msg);
+                response_msg = doc["data"]["delta"].as<String>();
+                if (!doc["data"]["finish"].isNull()) {
+                    is_msg_finish = doc["data"]["finish"];
+                    if (is_msg_finish) {
+                        response_msg += '\n';
+                    }
+                }
+            }
+            if (onResult) {
+                onResult(response_msg);
+            }
+            time_out_count = millis();
+        });
+
+        if (is_msg_finish) {
+            break;
+        }
+
+        if (millis() - time_out_count > timeout) {
+            is_time_out = true;
+            break;
+        }
+    }
+
+    if (is_time_out) {
+        return MODULE_LLM_WAIT_RESPONSE_TIMEOUT;
+    }
+    return MODULE_LLM_OK;
+}
diff --git a/src/api/api_vlm.h b/src/api/api_vlm.h
new file mode 100644
index 0000000..01878d7
--- /dev/null
+++ b/src/api/api_vlm.h
@@ -0,0 +1,72 @@
+/*
+ * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#pragma once
+#include "../utils/msg.h"
+#include <Arduino.h>
+
+namespace m5_module_llm {
+
+struct ApiVlmSetupConfig_t {
+    String prompt;
+    String model              = "internvl2.5-1B-ax630c";
+    String response_format    = "vlm.utf-8.stream";
+    std::vector<String> input = {"vlm.utf-8.stream"};
+    bool enoutput             = true;
+    bool enkws                = true;
+    // int max_token_len         = 127;
+    int max_token_len = 255;
+};
+
+class ApiVlm {
+public:
+    void init(ModuleMsg* moduleMsg);
+
+    /**
+     * @brief Setup module LLM, return LLM work_id
+     *
+     * @param config
+     * @param request_id
+     * @return String
+     */
+    String setup(ApiVlmSetupConfig_t config = ApiVlmSetupConfig_t(), String request_id = "vlm_setup");
+
+    /**
+     * @brief Exit module YOLO, return YOLO work_id
+     *
+     * @param work_id
+     * @param request_id
+     * @return String
+     */
+    String exit(String work_id, String request_id = "vlm_exit");
+
+    /**
+     * @brief Inference input data by module LLM
+     *
+     * @param work_id
+     * @param input
+     * @param request_id
+     * @return int
+     */
+    int inference(String work_id, String input, String request_id = "vlm_inference");
+
+    /**
+     * @brief Inference input data by module LLM, and wait inference result
+     *
+     * @param work_id
+     * @param input
+     * @param onResult On inference result callback
+     * @param timeout
+     * @param request_id
+     * @return int
+     */
+    int inferenceAndWaitResult(String work_id, String input, std::function<void(String&)> onResult,
+                               uint32_t timeout = 5000, String request_id = "vlm_inference");
+
+private:
+    ModuleMsg* _module_msg = nullptr;
+};
+
+}  // namespace m5_module_llm
diff --git a/src/api/api_yolo.h b/src/api/api_yolo.h
index e065182..ebccc1a 100644
--- a/src/api/api_yolo.h
+++ b/src/api/api_yolo.h
@@ -36,6 +36,7 @@ class ApiYolo {
      * @return String
      */
     String exit(String work_id, String request_id = "yolo_exit");
+
     /**
      * @brief Inference input data by module LLM
      *

From af55e48bc04f4da3edd97e2b220418f885fa4fd3 Mon Sep 17 00:00:00 2001
From: LittleMouse <n15978654429@gmail.com>
Date: Mon, 6 Jan 2025 17:22:47 +0800
Subject: [PATCH 03/16] update module api.

---
 src/api/api_asr.cpp            | 21 +++++++++++++++++++++
 src/api/api_asr.h              |  9 +++++++++
 src/api/api_audio.cpp          | 21 +++++++++++++++++++++
 src/api/api_audio.h            | 11 ++++++++++-
 src/api/api_camera.cpp         | 21 +++++++++++++++++++++
 src/api/api_camera.h           | 11 ++++++++++-
 src/api/api_depth_anything.cpp | 21 +++++++++++++++++++++
 src/api/api_depth_anything.h   | 11 ++++++++++-
 src/api/api_kws.cpp            | 21 +++++++++++++++++++++
 src/api/api_kws.h              |  9 +++++++++
 src/api/api_llm.cpp            | 21 +++++++++++++++++++++
 src/api/api_llm.h              |  9 +++++++++
 src/api/api_melotts.h          |  2 +-
 src/api/api_tts.cpp            | 21 +++++++++++++++++++++
 src/api/api_tts.h              |  9 +++++++++
 src/api/api_vlm.h              |  8 ++++----
 16 files changed, 218 insertions(+), 8 deletions(-)

diff --git a/src/api/api_asr.cpp b/src/api/api_asr.cpp
index 900d531..2766fa6 100644
--- a/src/api/api_asr.cpp
+++ b/src/api/api_asr.cpp
@@ -51,3 +51,24 @@ String ApiAsr::setup(ApiAsrSetupConfig_t config, String request_id, String langu
         10000);
     return work_id;
 }
+
+String ApiAsr::exit(String work_id, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"] = request_id;
+        doc["work_id"]    = work_id;
+        doc["action"]     = "exit";
+        serializeJson(doc, cmd);
+    }
+
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        100);
+    return work_id;
+}
diff --git a/src/api/api_asr.h b/src/api/api_asr.h
index 34d2364..0cd34a0 100644
--- a/src/api/api_asr.h
+++ b/src/api/api_asr.h
@@ -34,6 +34,15 @@ class ApiAsr {
     String setup(ApiAsrSetupConfig_t config = ApiAsrSetupConfig_t(), String request_id = "asr_setup",
                  String language = "en_US");
 
+    /**
+     * @brief Exit module ASR, return ASR work_id
+     *
+     * @param work_id
+     * @param request_id
+     * @return String
+     */
+    String exit(String work_id, String request_id = "asr_exit");
+
 private:
     ModuleMsg* _module_msg = nullptr;
 };
diff --git a/src/api/api_audio.cpp b/src/api/api_audio.cpp
index 343a586..caae9ae 100644
--- a/src/api/api_audio.cpp
+++ b/src/api/api_audio.cpp
@@ -40,3 +40,24 @@ String ApiAudio::setup(ApiAudioSetupConfig_t config, String request_id)
         5000);
     return work_id;
 }
+
+String ApiAudio::exit(String work_id, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"] = request_id;
+        doc["work_id"]    = work_id;
+        doc["action"]     = "exit";
+        serializeJson(doc, cmd);
+    }
+
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        100);
+    return work_id;
+}
diff --git a/src/api/api_audio.h b/src/api/api_audio.h
index a1ab88f..2bf9eff 100644
--- a/src/api/api_audio.h
+++ b/src/api/api_audio.h
@@ -23,7 +23,7 @@ class ApiAudio {
     void init(ModuleMsg* moduleMsg);
 
     /**
-     * @brief Setup module audio, return work_id
+     * @brief Setup module audio, return audio work_id
      *
      * @param config
      * @param request_id
@@ -31,6 +31,15 @@ class ApiAudio {
      */
     String setup(ApiAudioSetupConfig_t config = ApiAudioSetupConfig_t(), String request_id = "audio_setup");
 
+    /**
+     * @brief Exit module audio, return audio work_id
+     *
+     * @param work_id
+     * @param request_id
+     * @return String
+     */
+    String exit(String work_id, String request_id = "audio_exit");
+
 private:
     ModuleMsg* _module_msg = nullptr;
 };
diff --git a/src/api/api_camera.cpp b/src/api/api_camera.cpp
index 8b2c7da..395dc8b 100644
--- a/src/api/api_camera.cpp
+++ b/src/api/api_camera.cpp
@@ -39,3 +39,24 @@ String ApiCamera::setup(ApiCameraSetupConfig_t config, String request_id)
         5000);
     return work_id;
 }
+
+String ApiCamera::exit(String work_id, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"] = request_id;
+        doc["work_id"]    = work_id;
+        doc["action"]     = "exit";
+        serializeJson(doc, cmd);
+    }
+
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        100);
+    return work_id;
+}
diff --git a/src/api/api_camera.h b/src/api/api_camera.h
index f0d0ea3..973de50 100644
--- a/src/api/api_camera.h
+++ b/src/api/api_camera.h
@@ -22,7 +22,7 @@ class ApiCamera {
     void init(ModuleMsg* moduleMsg);
 
     /**
-     * @brief Setup module camera, return work_id
+     * @brief Setup module camera, return camera work_id
      *
      * @param config
      * @param request_id
@@ -30,6 +30,15 @@ class ApiCamera {
      */
     String setup(ApiCameraSetupConfig_t config = ApiCameraSetupConfig_t(), String request_id = "camera_setup");
 
+    /**
+     * @brief Exit module camera, return camera work_id
+     *
+     * @param work_id
+     * @param request_id
+     * @return String
+     */
+    String exit(String work_id, String request_id = "camera_exit");
+
 private:
     ModuleMsg* _module_msg = nullptr;
 };
diff --git a/src/api/api_depth_anything.cpp b/src/api/api_depth_anything.cpp
index b8ceb2d..85d0a97 100644
--- a/src/api/api_depth_anything.cpp
+++ b/src/api/api_depth_anything.cpp
@@ -42,6 +42,27 @@ String ApiDepthAnything::setup(ApiDepthAnythingSetupConfig_t config, String requ
     return work_id;
 }
 
+String ApiDepthAnything::exit(String work_id, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"] = request_id;
+        doc["work_id"]    = work_id;
+        doc["action"]     = "exit";
+        serializeJson(doc, cmd);
+    }
+
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        100);
+    return work_id;
+}
+
 int ApiDepthAnything::inference(String& work_id, uint8_t* input, size_t& raw_len, String request_id)
 {
     String cmd;
diff --git a/src/api/api_depth_anything.h b/src/api/api_depth_anything.h
index c1138e7..36cb450 100644
--- a/src/api/api_depth_anything.h
+++ b/src/api/api_depth_anything.h
@@ -20,7 +20,7 @@ class ApiDepthAnything {
     void init(ModuleMsg* moduleMsg);
 
     /**
-     * @brief Setup module YOLO, return YOLO work_id
+     * @brief Setup module DepthAnything, return DepthAnything work_id
      *
      * @param config
      * @param request_id
@@ -29,6 +29,15 @@ class ApiDepthAnything {
     String setup(ApiDepthAnythingSetupConfig_t config = ApiDepthAnythingSetupConfig_t(),
                  String request_id                    = "depth_anything_setup");
 
+    /**
+     * @brief Exit module DepthAnything, return DepthAnything work_id
+     *
+     * @param work_id
+     * @param request_id
+     * @return String
+     */
+    String exit(String work_id, String request_id = "depth_anything_exit");
+
     /**
      * @brief Inference input data by module LLM
      *
diff --git a/src/api/api_kws.cpp b/src/api/api_kws.cpp
index 07f417e..a4c3103 100644
--- a/src/api/api_kws.cpp
+++ b/src/api/api_kws.cpp
@@ -48,3 +48,24 @@ String ApiKws::setup(ApiKwsSetupConfig_t config, String request_id, String langu
         30000);
     return work_id;
 }
+
+String ApiKws::exit(String work_id, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"] = request_id;
+        doc["work_id"]    = work_id;
+        doc["action"]     = "exit";
+        serializeJson(doc, cmd);
+    }
+
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        100);
+    return work_id;
+}
diff --git a/src/api/api_kws.h b/src/api/api_kws.h
index 6400eec..71dc51c 100644
--- a/src/api/api_kws.h
+++ b/src/api/api_kws.h
@@ -31,6 +31,15 @@ class ApiKws {
     String setup(ApiKwsSetupConfig_t config = ApiKwsSetupConfig_t(), String request_id = "kws_setup",
                  String language = "en_US");
 
+    /**
+     * @brief Exit module KWS, return KWS work_id
+     *
+     * @param work_id
+     * @param request_id
+     * @return String
+     */
+    String exit(String work_id, String request_id = "kws_exit");
+
 private:
     ModuleMsg* _module_msg = nullptr;
 };
diff --git a/src/api/api_llm.cpp b/src/api/api_llm.cpp
index 57b4ee9..93e4fa9 100644
--- a/src/api/api_llm.cpp
+++ b/src/api/api_llm.cpp
@@ -51,6 +51,27 @@ String ApiLlm::setup(ApiLlmSetupConfig_t config, String request_id)
     return llm_work_id;
 }
 
+String ApiLlm::exit(String work_id, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"] = request_id;
+        doc["work_id"]    = work_id;
+        doc["action"]     = "exit";
+        serializeJson(doc, cmd);
+    }
+
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        100);
+    return work_id;
+}
+
 int ApiLlm::inference(String work_id, String input, String request_id)
 {
     String cmd;
diff --git a/src/api/api_llm.h b/src/api/api_llm.h
index 27536e9..45a3455 100644
--- a/src/api/api_llm.h
+++ b/src/api/api_llm.h
@@ -33,6 +33,15 @@ class ApiLlm {
      */
     String setup(ApiLlmSetupConfig_t config = ApiLlmSetupConfig_t(), String request_id = "llm_setup");
 
+    /**
+     * @brief Exit module LLM, return LLM work_id
+     *
+     * @param work_id
+     * @param request_id
+     * @return String
+     */
+    String exit(String work_id, String request_id = "llm_exit");
+
     /**
      * @brief Inference input data by module LLM
      *
diff --git a/src/api/api_melotts.h b/src/api/api_melotts.h
index 22ab4ea..2708c82 100644
--- a/src/api/api_melotts.h
+++ b/src/api/api_melotts.h
@@ -38,7 +38,7 @@ class ApiMelotts {
      * @param request_id
      * @return String
      */
-    String exit(String work_id, String request_id = "yolo_exit");
+    String exit(String work_id, String request_id = "tts_exit");
 
     /**
      * @brief Inference input data by TTS module
diff --git a/src/api/api_tts.cpp b/src/api/api_tts.cpp
index 6cfe661..7aee3c8 100644
--- a/src/api/api_tts.cpp
+++ b/src/api/api_tts.cpp
@@ -52,6 +52,27 @@ String ApiTts::setup(ApiTtsSetupConfig_t config, String request_id, String langu
     return work_id;
 }
 
+String ApiTts::exit(String work_id, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"] = request_id;
+        doc["work_id"]    = work_id;
+        doc["action"]     = "exit";
+        serializeJson(doc, cmd);
+    }
+
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        100);
+    return work_id;
+}
+
 int ApiTts::inference(String work_id, String input, uint32_t timeout, String request_id)
 {
     String cmd;
diff --git a/src/api/api_tts.h b/src/api/api_tts.h
index e5b06b9..80aea4e 100644
--- a/src/api/api_tts.h
+++ b/src/api/api_tts.h
@@ -32,6 +32,15 @@ class ApiTts {
     String setup(ApiTtsSetupConfig_t config = ApiTtsSetupConfig_t(), String request_id = "tts_setup",
                  String language = "en_US");
 
+    /**
+     * @brief Exit module TTS, return TTS work_id
+     *
+     * @param work_id
+     * @param request_id
+     * @return String
+     */
+    String exit(String work_id, String request_id = "tts_exit");
+
     /**
      * @brief Inference input data by TTS module
      *
diff --git a/src/api/api_vlm.h b/src/api/api_vlm.h
index 01878d7..cad9e30 100644
--- a/src/api/api_vlm.h
+++ b/src/api/api_vlm.h
@@ -25,7 +25,7 @@ class ApiVlm {
     void init(ModuleMsg* moduleMsg);
 
     /**
-     * @brief Setup module LLM, return LLM work_id
+     * @brief Setup module VLLM, return VLLM work_id
      *
      * @param config
      * @param request_id
@@ -34,7 +34,7 @@ class ApiVlm {
     String setup(ApiVlmSetupConfig_t config = ApiVlmSetupConfig_t(), String request_id = "vlm_setup");
 
     /**
-     * @brief Exit module YOLO, return YOLO work_id
+     * @brief Exit module VLLM, return VLLM work_id
      *
      * @param work_id
      * @param request_id
@@ -43,7 +43,7 @@ class ApiVlm {
     String exit(String work_id, String request_id = "vlm_exit");
 
     /**
-     * @brief Inference input data by module LLM
+     * @brief Inference input data by module VLLM
      *
      * @param work_id
      * @param input
@@ -53,7 +53,7 @@ class ApiVlm {
     int inference(String work_id, String input, String request_id = "vlm_inference");
 
     /**
-     * @brief Inference input data by module LLM, and wait inference result
+     * @brief Inference input data by module VLLM, and wait inference result
      *
      * @param work_id
      * @param input

From dfd4c1e5d8c94e52c60c603b5e33566f14961c0a Mon Sep 17 00:00:00 2001
From: LittleMouse <n15978654429@gmail.com>
Date: Mon, 6 Jan 2025 17:33:41 +0800
Subject: [PATCH 04/16] Increase llm setup timeout.

---
 src/api/api_llm.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/api/api_llm.cpp b/src/api/api_llm.cpp
index 93e4fa9..617c8bd 100644
--- a/src/api/api_llm.cpp
+++ b/src/api/api_llm.cpp
@@ -47,7 +47,7 @@ String ApiLlm::setup(ApiLlmSetupConfig_t config, String request_id)
             // Copy work id
             llm_work_id = msg.work_id;
         },
-        10000);
+        20000);
     return llm_work_id;
 }
 

From 6679384e761e45207735d604091ef879cf174b97 Mon Sep 17 00:00:00 2001
From: LittleMouse <n15978654429@gmail.com>
Date: Tue, 7 Jan 2025 09:26:03 +0800
Subject: [PATCH 05/16] fix arduino lib version

---
 library.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library.json b/library.json
index f274721..22e3a1c 100644
--- a/library.json
+++ b/library.json
@@ -14,7 +14,7 @@
         "M5GFX": "*",
         "ArduinoJson": "*"
     },
-    "version": "1.0.0",
+    "version": "1.4.0",
     "frameworks": "arduino",
     "platforms": "espressif32"
 }
\ No newline at end of file

From 31cdca83ea36eddbe314992cc84926d478df207c Mon Sep 17 00:00:00 2001
From: LittleMouse <n15978654429@gmail.com>
Date: Thu, 9 Jan 2025 15:46:46 +0800
Subject: [PATCH 06/16] Increase the timeout time of LLM setup. Update arduino
 lib version.

---
 library.properties  |  2 +-
 src/api/api_llm.cpp |  2 +-
 src/utils/comm.cpp  | 13 +++++++++++--
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/library.properties b/library.properties
index 020a707..d08cea6 100644
--- a/library.properties
+++ b/library.properties
@@ -1,5 +1,5 @@
 name=M5ModuleLLM
-version=1.0.0
+version=1.4.0
 author=M5Stack
 maintainer=M5Stack
 sentence=M5ModuleLLM is a library for M5ModuleLLM
diff --git a/src/api/api_llm.cpp b/src/api/api_llm.cpp
index 617c8bd..44ca3aa 100644
--- a/src/api/api_llm.cpp
+++ b/src/api/api_llm.cpp
@@ -47,7 +47,7 @@ String ApiLlm::setup(ApiLlmSetupConfig_t config, String request_id)
             // Copy work id
             llm_work_id = msg.work_id;
         },
-        20000);
+        30000);
     return llm_work_id;
 }
 
diff --git a/src/utils/comm.cpp b/src/utils/comm.cpp
index 7df0c4a..33b6bc5 100644
--- a/src/utils/comm.cpp
+++ b/src/utils/comm.cpp
@@ -7,6 +7,8 @@
 #include <ArduinoJson.h>
 
 using namespace m5_module_llm;
+const size_t JSON_BUFFER_SIZE = 2048;
+char jsonBuffer[JSON_BUFFER_SIZE];
 
 bool ModuleComm::init(Stream* serialPort)
 {
@@ -34,6 +36,7 @@ void ModuleComm::sendRaw(const uint8_t* data, size_t& raw_len)
 ModuleComm::Respond_t ModuleComm::getResponse(uint32_t timeout)
 {
     Respond_t ret;
+    String buffer;
 
     uint32_t time_out_count = millis();
     bool get_msg            = false;
@@ -43,7 +46,13 @@ ModuleComm::Respond_t ModuleComm::getResponse(uint32_t timeout)
         if (_serial->available()) {
             get_msg = true;
             while (_serial->available()) {
-                ret.msg += (char)_serial->read();
+                char c = (char)_serial->read();
+                buffer += c;
+
+                if (c == '\n') {
+                    ret.msg = buffer;
+                    return ret;;
+                }
             }
             get_msg_count  = millis();
             time_out_count = millis();
@@ -62,7 +71,7 @@ ModuleComm::Respond_t ModuleComm::getResponse(uint32_t timeout)
             break;
         }
 
-        delay(5);
+        // delay(5);
     }
 
     return ret;

From 8fd149869be6d2b1226dbdd9357baf6a2ea1d1a0 Mon Sep 17 00:00:00 2001
From: LittleMouse <n15978654429@gmail.com>
Date: Thu, 9 Jan 2025 19:02:35 +0800
Subject: [PATCH 07/16] update yolo demo

---
 examples/YOLO/YOLO.ino | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/examples/YOLO/YOLO.ino b/examples/YOLO/YOLO.ino
index cd6dc72..ea6d45d 100644
--- a/examples/YOLO/YOLO.ino
+++ b/examples/YOLO/YOLO.ino
@@ -119,5 +119,8 @@ void loop()
     }
 
     /* Clear handled messages */
+    module_llm.msg.clearMsg("yolo_setup");
     module_llm.msg.responseMsgList.clear();
+
+    usleep(500000);
 }
\ No newline at end of file

From 9a1fa03ce13dc85264987fa6221ac48e463e01a6 Mon Sep 17 00:00:00 2001
From: lovyan03 <42724151+lovyan03@users.noreply.github.com>
Date: Sat, 18 Jan 2025 11:14:47 +0900
Subject: [PATCH 08/16] Changed to automatically detect pin settings in sample
 code.

---
 examples/KWS_ASR/KWS_ASR.ino                         | 9 ++++++---
 examples/SerialTextAssistant/SerialTextAssistant.ino | 9 ++++++---
 examples/TTS/TTS.ino                                 | 9 ++++++---
 examples/TextAssistant/TextAssistant.ino             | 9 ++++++---
 examples/VoiceAssistant/VoiceAssistant.ino           | 9 ++++++---
 examples/YOLO/YOLO.ino                               | 9 ++++++---
 6 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/examples/KWS_ASR/KWS_ASR.ino b/examples/KWS_ASR/KWS_ASR.ino
index b7ad005..bd7e5f9 100644
--- a/examples/KWS_ASR/KWS_ASR.ino
+++ b/examples/KWS_ASR/KWS_ASR.ino
@@ -27,9 +27,12 @@ void setup()
     // language = "zh_CN";
 
     /* Init module serial port */
-    Serial2.begin(115200, SERIAL_8N1, 16, 17);  // Basic
-    // Serial2.begin(115200, SERIAL_8N1, 13, 14);  // Core2
-    // Serial2.begin(115200, SERIAL_8N1, 18, 17);  // CoreS3
+    // int rxd = 16, txd = 17;  // Basic
+    // int rxd = 13, txd = 14;  // Core2
+    // int rxd = 18, txd = 17;  // CoreS3
+    int rxd = M5.getPin(m5::pin_name_t::port_c_rxd);
+    int txd = M5.getPin(m5::pin_name_t::port_c_txd);
+    Serial2.begin(115200, SERIAL_8N1, rxd, txd);
 
     /* Init module */
     module_llm.begin(&Serial2);
diff --git a/examples/SerialTextAssistant/SerialTextAssistant.ino b/examples/SerialTextAssistant/SerialTextAssistant.ino
index 2587018..10c44b8 100644
--- a/examples/SerialTextAssistant/SerialTextAssistant.ino
+++ b/examples/SerialTextAssistant/SerialTextAssistant.ino
@@ -25,9 +25,12 @@ void setup()
     CommSerialPort.begin(115200);
 
     /* Init module serial port */
-    Serial2.begin(115200, SERIAL_8N1, 16, 17);  // Basic
-    // Serial2.begin(115200, SERIAL_8N1, 13, 14);  // Core2
-    // Serial2.begin(115200, SERIAL_8N1, 18, 17);  // CoreS3
+    // int rxd = 16, txd = 17;  // Basic
+    // int rxd = 13, txd = 14;  // Core2
+    // int rxd = 18, txd = 17;  // CoreS3
+    int rxd = M5.getPin(m5::pin_name_t::port_c_rxd);
+    int txd = M5.getPin(m5::pin_name_t::port_c_txd);
+    Serial2.begin(115200, SERIAL_8N1, rxd, txd);
 
     /* Init module */
     module_llm.begin(&Serial2);
diff --git a/examples/TTS/TTS.ino b/examples/TTS/TTS.ino
index 73d536d..5a110d5 100644
--- a/examples/TTS/TTS.ino
+++ b/examples/TTS/TTS.ino
@@ -22,9 +22,12 @@ void setup()
     // language = "zh_CN";
 
     /* Init module serial port */
-    Serial2.begin(115200, SERIAL_8N1, 16, 17);  // Basic
-    // Serial2.begin(115200, SERIAL_8N1, 13, 14);  // Core2
-    // Serial2.begin(115200, SERIAL_8N1, 18, 17);  // CoreS3
+    // int rxd = 16, txd = 17;  // Basic
+    // int rxd = 13, txd = 14;  // Core2
+    // int rxd = 18, txd = 17;  // CoreS3
+    int rxd = M5.getPin(m5::pin_name_t::port_c_rxd);
+    int txd = M5.getPin(m5::pin_name_t::port_c_txd);
+    Serial2.begin(115200, SERIAL_8N1, rxd, txd);
 
     /* Init module */
     module_llm.begin(&Serial2);
diff --git a/examples/TextAssistant/TextAssistant.ino b/examples/TextAssistant/TextAssistant.ino
index 17bc101..48dd552 100644
--- a/examples/TextAssistant/TextAssistant.ino
+++ b/examples/TextAssistant/TextAssistant.ino
@@ -17,9 +17,12 @@ void setup()
     M5.Display.setTextScroll(true);
 
     /* Init module serial port */
-    Serial2.begin(115200, SERIAL_8N1, 16, 17);  // Basic
-    // Serial2.begin(115200, SERIAL_8N1, 13, 14);  // Core2
-    // Serial2.begin(115200, SERIAL_8N1, 18, 17);  // CoreS3
+    // int rxd = 16, txd = 17;  // Basic
+    // int rxd = 13, txd = 14;  // Core2
+    // int rxd = 18, txd = 17;  // CoreS3
+    int rxd = M5.getPin(m5::pin_name_t::port_c_rxd);
+    int txd = M5.getPin(m5::pin_name_t::port_c_txd);
+    Serial2.begin(115200, SERIAL_8N1, rxd, txd);
 
     /* Init module */
     module_llm.begin(&Serial2);
diff --git a/examples/VoiceAssistant/VoiceAssistant.ino b/examples/VoiceAssistant/VoiceAssistant.ino
index 3a4979b..2f6ac00 100644
--- a/examples/VoiceAssistant/VoiceAssistant.ino
+++ b/examples/VoiceAssistant/VoiceAssistant.ino
@@ -42,9 +42,12 @@ void setup()
     M5.Display.setTextScroll(true);
 
     /* Init module serial port */
-    Serial2.begin(115200, SERIAL_8N1, 16, 17);  // Basic
-    // Serial2.begin(115200, SERIAL_8N1, 13, 14);  // Core2
-    // Serial2.begin(115200, SERIAL_8N1, 18, 17);  // CoreS3
+    // int rxd = 16, txd = 17;  // Basic
+    // int rxd = 13, txd = 14;  // Core2
+    // int rxd = 18, txd = 17;  // CoreS3
+    int rxd = M5.getPin(m5::pin_name_t::port_c_rxd);
+    int txd = M5.getPin(m5::pin_name_t::port_c_txd);
+    Serial2.begin(115200, SERIAL_8N1, rxd, txd);
 
     /* Init module */
     module_llm.begin(&Serial2);
diff --git a/examples/YOLO/YOLO.ino b/examples/YOLO/YOLO.ino
index cd6dc72..10ba72b 100644
--- a/examples/YOLO/YOLO.ino
+++ b/examples/YOLO/YOLO.ino
@@ -28,9 +28,12 @@ void setup()
     M5.Display.setTextScroll(true);
 
     /* Init module serial port */
-    Serial2.begin(115200, SERIAL_8N1, 16, 17);  // Basic
-    // Serial2.begin(115200, SERIAL_8N1, 13, 14);  // Core2
-    // Serial2.begin(115200, SERIAL_8N1, 18, 17);  // CoreS3
+    // int rxd = 16, txd = 17;  // Basic
+    // int rxd = 13, txd = 14;  // Core2
+    // int rxd = 18, txd = 17;  // CoreS3
+    int rxd = M5.getPin(m5::pin_name_t::port_c_rxd);
+    int txd = M5.getPin(m5::pin_name_t::port_c_txd);
+    Serial2.begin(115200, SERIAL_8N1, rxd, txd);
 
     /* Init module */
     module_llm.begin(&Serial2);

From 7d3dcdc2ce1552f62277d2611a377c2376e66af8 Mon Sep 17 00:00:00 2001
From: LittleMouse <n15978654429@gmail.com>
Date: Mon, 20 Jan 2025 11:08:30 +0800
Subject: [PATCH 09/16] update vad_whisper api & demo

---
 examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino | 108 +++++++++++++++++++
 src/M5ModuleLLM.cpp                          |   2 +
 src/M5ModuleLLM.h                            |  14 +++
 src/api/api_vad.cpp                          |  64 +++++++++++
 src/api/api_vad.h                            |  45 ++++++++
 src/api/api_whisper.cpp                      |  65 +++++++++++
 src/api/api_whisper.h                        |  47 ++++++++
 7 files changed, 345 insertions(+)
 create mode 100644 examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino
 create mode 100644 src/api/api_vad.cpp
 create mode 100644 src/api/api_vad.h
 create mode 100644 src/api/api_whisper.cpp
 create mode 100644 src/api/api_whisper.h

diff --git a/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino b/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino
new file mode 100644
index 0000000..8d84a1c
--- /dev/null
+++ b/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino
@@ -0,0 +1,108 @@
+/*
+ * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#include <Arduino.h>
+#include <M5Unified.h>
+#include <M5ModuleLLM.h>
+
+M5ModuleLLM module_llm;
+
+/* Must be capitalized */
+String wake_up_keyword = "HELLO";
+// String wake_up_keyword = "你好你好";
+String kws_work_id;
+String vad_work_id;
+String whisper_work_id;
+String language;
+
+void setup()
+{
+    M5.begin();
+    M5.Display.setTextSize(2);
+    M5.Display.setTextScroll(true);
+    // M5.Display.setFont(&fonts::efontCN_12);  // Support Chinese display
+
+    language = "en_US";
+    // language = "zh_CN";
+
+    /* Init module serial port */
+    Serial2.begin(115200, SERIAL_8N1, 16, 17);  // Basic
+    // Serial2.begin(115200, SERIAL_8N1, 13, 14);  // Core2
+    // Serial2.begin(115200, SERIAL_8N1, 18, 17);  // CoreS3
+
+    /* Init module */
+    module_llm.begin(&Serial2);
+
+    /* Make sure module is connected */
+    M5.Display.printf(">> Check ModuleLLM connection..\n");
+    while (1) {
+        if (module_llm.checkConnection()) {
+            break;
+        }
+    }
+
+    /* Reset ModuleLLM */
+    M5.Display.printf(">> Reset ModuleLLM..\n");
+    module_llm.sys.reset();
+
+    /* Setup Audio module */
+    M5.Display.printf(">> Setup audio..\n");
+    module_llm.audio.setup();
+
+    /* Setup KWS module and save returned work id */
+    M5.Display.printf(">> Setup kws..\n");
+    m5_module_llm::ApiKwsSetupConfig_t kws_config;
+    kws_config.kws = wake_up_keyword;
+    kws_work_id    = module_llm.kws.setup(kws_config, "kws_setup", language);
+
+    /* Setup VAD module and save returned work id */
+    M5.Display.printf(">> Setup vad..\n");
+    m5_module_llm::ApiVadSetupConfig_t vad_config;
+    vad_config.input = {"sys.pcm", kws_work_id};
+    vad_work_id      = module_llm.vad.setup(vad_config, "vad_setup");
+
+    /* Setup Whisper module and save returned work id */
+    M5.Display.printf(">> Setup whisper..\n");
+    m5_module_llm::ApiWhisperSetupConfig_t whisper_config;
+    whisper_config.input    = {"sys.pcm", kws_work_id, vad_work_id};
+    whisper_config.language = "en";
+    // whisper_config.language = "zh";
+    // whisper_config.language = "ja";
+    whisper_work_id = module_llm.whisper.setup(whisper_config, "whisper_setup");
+
+    M5.Display.printf(">> Setup ok\n>> Say \"%s\" to wakeup\n", wake_up_keyword.c_str());
+}
+
+void loop()
+{
+    /* Update ModuleLLM */
+    module_llm.update();
+
+    /* Handle module response messages */
+    for (auto& msg : module_llm.msg.responseMsgList) {
+        /* If KWS module message */
+        if (msg.work_id == kws_work_id) {
+            M5.Display.setTextColor(TFT_GREENYELLOW);
+            M5.Display.printf(">> Keyword detected\n");
+        }
+
+        /* If ASR module message */
+        if (msg.work_id == whisper_work_id) {
+            /* Check message object type */
+            if (msg.object == "asr.utf-8") {
+                /* Parse message json and get ASR result */
+                JsonDocument doc;
+                deserializeJson(doc, msg.raw_msg);
+                String asr_result = doc["data"].as<String>();
+
+                M5.Display.setTextColor(TFT_YELLOW);
+                M5.Display.printf(">> %s\n", asr_result.c_str());
+            }
+        }
+    }
+
+    /* Clear handled messages */
+    module_llm.msg.responseMsgList.clear();
+}
\ No newline at end of file
diff --git a/src/M5ModuleLLM.cpp b/src/M5ModuleLLM.cpp
index 2ac9119..d4c63f1 100644
--- a/src/M5ModuleLLM.cpp
+++ b/src/M5ModuleLLM.cpp
@@ -20,6 +20,8 @@ bool M5ModuleLLM::begin(Stream* serialPort)
     asr.init(&msg);
     yolo.init(&msg);
     camera.init(&msg);
+    vad.init(&msg);
+    whisper.init(&msg);
     depthanything.init(&msg);
     return true;
 }
diff --git a/src/M5ModuleLLM.h b/src/M5ModuleLLM.h
index c652089..a1846fb 100644
--- a/src/M5ModuleLLM.h
+++ b/src/M5ModuleLLM.h
@@ -18,6 +18,8 @@
 #include "api/api_yolo.h"
 #include "api/api_depth_anything.h"
 #include "api/api_camera.h"
+#include "api/api_vad.h"
+#include "api/api_whisper.h"
 #include "api/api_version.h"
 
 class M5ModuleLLM {
@@ -105,6 +107,18 @@ class M5ModuleLLM {
      */
     m5_module_llm::ApiYolo yolo;
 
+    /**
+     * @brief VAD module api set
+     *
+     */
+    m5_module_llm::ApiVad vad;
+
+    /**
+     * @brief Whisper module api set
+     *
+     */
+    m5_module_llm::ApiWhisper whisper;
+
     /**
      * @brief DepthAnything module api set
      *
diff --git a/src/api/api_vad.cpp b/src/api/api_vad.cpp
new file mode 100644
index 0000000..9b646af
--- /dev/null
+++ b/src/api/api_vad.cpp
@@ -0,0 +1,64 @@
+/*
+ * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#include "api_vad.h"
+
+using namespace m5_module_llm;
+
+void ApiVad::init(ModuleMsg* moduleMsg)
+{
+    _module_msg = moduleMsg;
+}
+
+String ApiVad::setup(ApiVadSetupConfig_t config, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"]              = request_id;
+        doc["work_id"]                 = "vad";
+        doc["action"]                  = "setup";
+        doc["object"]                  = "vad.setup";
+        doc["data"]["model"]           = config.model;
+        doc["data"]["response_format"] = config.response_format;
+        doc["data"]["enoutput"]        = config.enoutput;
+        JsonArray inputArray           = doc["data"]["input"].to<JsonArray>();
+        for (const String& str : config.input) {
+            inputArray.add(str);
+        }
+        serializeJson(doc, cmd);
+    }
+
+    String work_id;
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        30000);
+    return work_id;
+}
+
+String ApiVad::exit(String work_id, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"] = request_id;
+        doc["work_id"]    = work_id;
+        doc["action"]     = "exit";
+        serializeJson(doc, cmd);
+    }
+
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        100);
+    return work_id;
+}
diff --git a/src/api/api_vad.h b/src/api/api_vad.h
new file mode 100644
index 0000000..15f2e11
--- /dev/null
+++ b/src/api/api_vad.h
@@ -0,0 +1,45 @@
+/*
+ * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#pragma once
+#include "../utils/msg.h"
+#include <Arduino.h>
+
+namespace m5_module_llm {
+
+struct ApiVadSetupConfig_t {
+    String model              = "silero-vad";
+    String response_format    = "vad.bool";
+    std::vector<String> input = {"sys.pcm"};
+    bool enoutput             = true;
+};
+
+class ApiVad {
+public:
+    void init(ModuleMsg* moduleMsg);
+
+    /**
+     * @brief Setup module VAD, return VAD work_id
+     *
+     * @param config
+     * @param request_id
+     * @return String
+     */
+    String setup(ApiVadSetupConfig_t config = ApiVadSetupConfig_t(), String request_id = "vad_setup");
+
+    /**
+     * @brief Exit module VAD, return VAD work_id
+     *
+     * @param work_id
+     * @param request_id
+     * @return String
+     */
+    String exit(String work_id, String request_id = "vad_exit");
+
+private:
+    ModuleMsg* _module_msg = nullptr;
+};
+
+}  // namespace m5_module_llm
diff --git a/src/api/api_whisper.cpp b/src/api/api_whisper.cpp
new file mode 100644
index 0000000..3b8f2f9
--- /dev/null
+++ b/src/api/api_whisper.cpp
@@ -0,0 +1,65 @@
+/*
+ * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#include "api_whisper.h"
+
+using namespace m5_module_llm;
+
+void ApiWhisper::init(ModuleMsg* moduleMsg)
+{
+    _module_msg = moduleMsg;
+}
+
+String ApiWhisper::setup(ApiWhisperSetupConfig_t config, String request_id, String language)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"]              = request_id;
+        doc["work_id"]                 = "whisper";
+        doc["action"]                  = "setup";
+        doc["object"]                  = "whisper.setup";
+        doc["data"]["model"]           = config.model;
+        doc["data"]["response_format"] = config.response_format;
+        doc["data"]["language"]        = config.language;
+        doc["data"]["enoutput"]        = config.enoutput;
+        JsonArray inputArray           = doc["data"]["input"].to<JsonArray>();
+        for (const String& str : config.input) {
+            inputArray.add(str);
+        }
+        serializeJson(doc, cmd);
+    }
+
+    String work_id;
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        10000);
+    return work_id;
+}
+
+String ApiWhisper::exit(String work_id, String request_id)
+{
+    String cmd;
+    {
+        JsonDocument doc;
+        doc["request_id"] = request_id;
+        doc["work_id"]    = work_id;
+        doc["action"]     = "exit";
+        serializeJson(doc, cmd);
+    }
+
+    _module_msg->sendCmdAndWaitToTakeMsg(
+        cmd.c_str(), request_id,
+        [&work_id](ResponseMsg_t& msg) {
+            // Copy work id
+            work_id = msg.work_id;
+        },
+        100);
+    return work_id;
+}
diff --git a/src/api/api_whisper.h b/src/api/api_whisper.h
new file mode 100644
index 0000000..40a5c2d
--- /dev/null
+++ b/src/api/api_whisper.h
@@ -0,0 +1,47 @@
+/*
+ * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#pragma once
+#include "../utils/msg.h"
+#include <Arduino.h>
+
+namespace m5_module_llm {
+
+struct ApiWhisperSetupConfig_t {
+    String model              = "whisper-tiny";
+    String response_format    = "asr.utf-8";
+    String language           = "en";
+    std::vector<String> input = {"sys.pcm"};
+    bool enoutput             = true;
+};
+
+class ApiWhisper {
+public:
+    void init(ModuleMsg* moduleMsg);
+
+    /**
+     * @brief Setup module ASR, return ASR work_id
+     *
+     * @param config
+     * @param request_id
+     * @return String
+     */
+    String setup(ApiWhisperSetupConfig_t config = ApiWhisperSetupConfig_t(), String request_id = "asr_setup",
+                 String language = "en_US");
+
+    /**
+     * @brief Exit module ASR, return ASR work_id
+     *
+     * @param work_id
+     * @param request_id
+     * @return String
+     */
+    String exit(String work_id, String request_id = "asr_exit");
+
+private:
+    ModuleMsg* _module_msg = nullptr;
+};
+
+}  // namespace m5_module_llm

From d46d46adc23a693b9821f953ebc416a242f7aa49 Mon Sep 17 00:00:00 2001
From: LittleMouse <n15978654429@gmail.com>
Date: Mon, 20 Jan 2025 11:12:45 +0800
Subject: [PATCH 10/16] update vad_whisper demo

---
 examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino b/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino
index 8d84a1c..db6645b 100644
--- a/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino
+++ b/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino
@@ -28,9 +28,12 @@ void setup()
     // language = "zh_CN";
 
     /* Init module serial port */
-    Serial2.begin(115200, SERIAL_8N1, 16, 17);  // Basic
-    // Serial2.begin(115200, SERIAL_8N1, 13, 14);  // Core2
-    // Serial2.begin(115200, SERIAL_8N1, 18, 17);  // CoreS3
+    // int rxd = 16, txd = 17;  // Basic
+    // int rxd = 13, txd = 14;  // Core2
+    // int rxd = 18, txd = 17;  // CoreS3
+    int rxd = M5.getPin(m5::pin_name_t::port_c_rxd);
+    int txd = M5.getPin(m5::pin_name_t::port_c_txd);
+    Serial2.begin(115200, SERIAL_8N1, rxd, txd);
 
     /* Init module */
     module_llm.begin(&Serial2);

From cb779441e13238d085551e381d792ffe1f5b549a Mon Sep 17 00:00:00 2001
From: LittleMouse <n15978654429@gmail.com>
Date: Mon, 20 Jan 2025 11:19:45 +0800
Subject: [PATCH 11/16] fix clang-format error.

---
 src/utils/comm.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils/comm.cpp b/src/utils/comm.cpp
index 33b6bc5..af350b0 100644
--- a/src/utils/comm.cpp
+++ b/src/utils/comm.cpp
@@ -51,7 +51,7 @@ ModuleComm::Respond_t ModuleComm::getResponse(uint32_t timeout)
 
                 if (c == '\n') {
                     ret.msg = buffer;
-                    return ret;;
+                    return ret;
                 }
             }
             get_msg_count  = millis();

From b75ce56a0fc4ff099797ffd8a386b2238ba61b49 Mon Sep 17 00:00:00 2001
From: LittleMouse <n15978654429@gmail.com>
Date: Mon, 20 Jan 2025 11:52:37 +0800
Subject: [PATCH 12/16] kws_vad_whisper demo add Japanese display.

---
 examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino b/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino
index db6645b..0dff8cd 100644
--- a/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino
+++ b/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino
@@ -23,6 +23,7 @@ void setup()
     M5.Display.setTextSize(2);
     M5.Display.setTextScroll(true);
     // M5.Display.setFont(&fonts::efontCN_12);  // Support Chinese display
+    // M5.Display.setFont(&fonts::efontJA_12);  // Support Japanese display
 
     language = "en_US";
     // language = "zh_CN";

From 3d61ba786b60efd9a2190bfa23b81d0967b35256 Mon Sep 17 00:00:00 2001
From: LittleMouse <n15978654429@gmail.com>
Date: Mon, 20 Jan 2025 16:40:09 +0800
Subject: [PATCH 13/16] Add delay to receive message function

---
 src/utils/comm.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils/comm.cpp b/src/utils/comm.cpp
index af350b0..21d6c72 100644
--- a/src/utils/comm.cpp
+++ b/src/utils/comm.cpp
@@ -71,7 +71,7 @@ ModuleComm::Respond_t ModuleComm::getResponse(uint32_t timeout)
             break;
         }
 
-        // delay(5);
+        delay(5);
     }
 
     return ret;

From ea5651e9c18ec2a535dce6f3d7e890ebcf7e2961 Mon Sep 17 00:00:00 2001
From: LittleMouse <n15978654429@gmail.com>
Date: Thu, 23 Jan 2025 15:58:49 +0800
Subject: [PATCH 14/16] update docs

---
 docs/cn.md |  518 +++++++++++++++++++++++----
 docs/en.md | 1009 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 1426 insertions(+), 101 deletions(-)

diff --git a/docs/cn.md b/docs/cn.md
index 137bd59..d8ef9c9 100644
--- a/docs/cn.md
+++ b/docs/cn.md
@@ -4,12 +4,12 @@
 
 ## M5ModuleLLM Class
 
-`M5ModuleLLM`用于初始化LLM Module, 并且提供内部成员用于快速初始化LLM的各个单元, 方便根据自己的需求构建应用。
+`M5ModuleLLM`用于初始化 LLM Module, 并且提供内部成员用于快速初始化 LLM 的各个单元, 方便根据自己的需求构建应用。
 
 ```cpp
 class M5ModuleLLM {
 public:
-    bool begin(Stream* targetPort);
+    bool begin(Stream * targetPort);
     bool checkConnection();
     void update();
 
@@ -21,6 +21,9 @@ public:
     m5_module_llm::ApiKws kws;
     m5_module_llm::ApiAsr asr;
     m5_module_llm::ApiAsr yolo;
+    m5_module_llm::ApiVad vad;
+    m5_module_llm::ApiWhisper whisper;
+    m5_module_llm::ApiDepthAnything depthanything;
     m5_module_llm::ModuleMsg msg;
     m5_module_llm::ModuleComm comm;
 private:
@@ -37,12 +40,12 @@ bool begin(Stream* targetPort);
 
 **功能说明:**
 
-- 初始化LLM Module UART接口配置
+- 初始化 LLM Module UART 接口配置
 
 **传入参数:**
 
-- Stream* targetPort:
-    - 传入Serial指针
+- Stream\* targetPort:
+    - 传入 Serial 指针
 
 **返回值:**
 
@@ -60,7 +63,7 @@ bool checkConnection();
 
 **功能说明:**
 
-- 发送`sys.ping`指令, 检查LLM Module连接状态
+- 发送 `sys.ping` 指令, 检查 LLM Module 连接状态
 
 **传入参数:**
 
@@ -82,7 +85,7 @@ void update();
 
 **功能说明:**
 
-- 拉取LLM Module UART响应数据, 该API需包含在Loop中循环执行。
+- 拉取 LLM Module UART 响应数据, 该 API 需包含在 Loop 中循环执行。
 
 **传入参数:**
 
@@ -94,7 +97,7 @@ void update();
 
 ## ApiSys Class
 
-`M5ModuleLLM`的内部成员`ApiSys sys`用于控制SYS单元实现系统复位等操作。
+`M5ModuleLLM` 的内部成员 `ApiSys sys` 用于控制 SYS 单元实现系统复位等操作。
 
 ### ping
 
@@ -106,7 +109,7 @@ int ping();
 
 **功能说明:**
 
-- 发送`sys.ping`指令, 检查LLM Module连接状态
+- 发送`sys.ping`指令, 检查 LLM Module 连接状态
 
 **传入参数:**
 
@@ -127,7 +130,7 @@ int reset(bool waitResetFinish = true);
 
 **功能说明:**
 
-- 发送`sys.reset`指令, 复位软件服务。
+- 发送 `sys.reset` 指令, 复位软件服务。
 
 **传入参数:**
 
@@ -150,7 +153,7 @@ int reboot();
 
 **功能说明:**
 
-- 发送`sys.reboot`指令, 复位系统。
+- 发送 `sys.reboot` 指令, 复位系统。
 
 **传入参数:**
 
@@ -163,7 +166,9 @@ int reboot();
 
 ## ApiAudio Class
 
-`M5ModuleLLM`的内部成员`ApiAudio audio`用于控制AUDIO单元的初始化和配置。
+注意：此函数在 1.3 及之后版本已经弃用，改为内部自动配置。
+
+`M5ModuleLLM` 的内部成员 `ApiAudio audio` 用于控制 Audio 单元的初始化和配置。
 
 ### setup
 
@@ -175,7 +180,7 @@ String setup(ApiAudioSetupConfig_t config = ApiAudioSetupConfig_t(), String requ
 
 **功能说明:**
 
-- 初始化Audio单元, 开启系统声卡。(使用KWS和TTS前需开启该功能)
+- 初始化 Audio 单元, 开启系统声卡。(使用 KWS 和 TTS 前需开启该功能)
 
 **传入参数:**
 
@@ -196,32 +201,78 @@ struct ApiAudioSetupConfig_t {
 };
 ```
 
-| 参数         | 描述       | 输入值                             |
-|------------|----------|---------------------------------|
-| capcard    | 麦克风声卡的索引 | 系统默认声卡:0                        |
-| capdevice  | 麦克风设备索引  | 板载硅麦:0                          |
-| capVolume  | 输入的音量    | 0.0～10.0 (1<volume将增益, 默认值为0.5) |
-| playcard   | 扬声器声卡的索引 | 系统默认声卡:0                        |
-| playdevice | 扬声器设备索引  | 板载扬声器:1                         |
-| playVolume | 输出的音量    | 0.0～10.0 (1<volume将增益, 默认值为0.5) |
+| 参数       | 描述             | 输入值                                   |
+| ---------- | ---------------- | ---------------------------------------- |
+| capcard    | 麦克风声卡的索引 | 系统默认声卡:0                           |
+| capdevice  | 麦克风设备索引   | 板载硅麦:0                               |
+| capVolume  | 输入的音量       | 0.0～10.0 (1\<volume将增益, 默认值为0.5) |
+| playcard   | 扬声器声卡的索引 | 系统默认声卡:0                           |
+| playdevice | 扬声器设备索引   | 板载扬声器:1                             |
+| playVolume | 输出的音量       | 0.0～10.0 (1\<volume将增益, 默认值为0.5) |
 
 **返回值:**
 
 - String:
     - audio_work_id: audio单元work_id
 
-## ApiKws Class
+## ApiCamera Class
 
-`M5ModuleLLM`的内部成员`ApiKws kws`用于控制KWS单元的初始化和配置。
+`M5ModuleLLM` 的内部成员 `ApiCamera camera` 用于控制 Camera 单元的初始化和配置。
 
 ### setup
 
 **函数原型:**
 
 ```cpp
-String setup(ApiKwsSetupConfig_t config = ApiKwsSetupConfig_t(), String request_id = "kws_setup");
+String setup(ApiCameraSetupConfig_t config = ApiCameraSetupConfig_t(), String request_id = "camera_setup");
+```
+
+**功能说明:**
+
+- 初始化 Camera 单元, 开启摄像头输入。(使用 UVC 前需开启该功能)
+
+**传入参数:**
+
+ApiCameraSetupConfig_t config:
+
+- Camera 单元初始化配置:
+- String request_id:
+    - 会话id, 使用默认即可。
+
+```cpp
+struct ApiCameraSetupConfig_t {
+    String response_format = "camera.raw";
+    String input           = "/dev/video0";
+    bool enoutput          = false;
+    int frame_width        = 320;
+    int frame_height       = 320;
+};
 ```
 
+| 参数         | 描述                 | 输入值                    |
+| ------------ | -------------------- | ------------------------- |
+| input        | UVC 的索引           | "/dev/video0"             |
+| enoutput     | 是否串口输出图像数据 | 启用: true<br>禁用: false |
+| frame_width  | 采集图像的宽         | 320                       |
+| frame_height | 采集图像的高         | 320                       |
+
+**返回值:**
+
+- String:
+    - camera_work_id: camera 单元 work_id
+
+## ApiKws Class
+
+`M5ModuleLLM` 的内部成员 `ApiKws kws` 用于控制 KWS 单元的初始化和配置。
+
+### setup
+
+**函数原型:**
+
+```cpp
+String setup(ApiKwsSetupConfig_t config = ApiKwsSetupConfig_t(), String request_id = "kws_setup",
+             String language = "en_US");```
+
 **功能说明:**
 
 - 初始化KWS单元, 并配置唤醒关键字。
@@ -244,32 +295,77 @@ struct ApiKwsSetupConfig_t {
 };
 ```
 
-| 参数       | 描述         | 输入值                                                                                                                           |
-|----------|------------|-------------------------------------------------------------------------------------------------------------------------------|
-| model    | 转换模型       | 英文模型: "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01"<br>中文模型: "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01" |
-| kws      | KWS唤醒词文本设置 | 不允许中文/英文混合, 英文要求全大写                                                                                                           |
-| enoutput | 启用UART输出   | 启用: true<br>禁用: false                                                                                                         |
+| 参数     | 描述              | 输入值                                                                                                                                |
+| -------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
+| model    | 转换模型          | 英文模型: "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01"<br>中文模型: "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01" |
+| kws      | KWS唤醒词文本设置 | 不允许中文/英文混合, 英文要求全大写                                                                                                   |
+| enoutput | 启用UART输出      | 启用: true<br>禁用: false                                                                                                             |
 
 **返回值:**
 
 - String:
     - kws_work_id: kws单元work_id
 
+## ApiVad Class
+
+`M5ModuleLLM` 的内部成员 `ApiVad vad` 用于控制 VAD 单元的初始化和配置。
+
+### setup
+
+**函数原型:**
+
+```cpp
+String setup(ApiVadSetupConfig_t config = ApiVadSetupConfig_t(), String request_id = "vad_setup");
+```
+
+**功能说明:**
+
+- 初始化 VAD 单元。
+
+**传入参数:**
+
+ApiVadSetupConfig_t config:
+
+- VAD 单元初始化配置:
+- String request_id:
+    - 会话id, 使用默认即可。
+
+```cpp
+struct ApiKwsSetupConfig_t {
+    String model           = "silero-vad";
+    String response_format = "vad.bool";
+    String input           = {"sys.pcm", "kws.1000"};
+    bool enoutput          = true;
+};
+```
+
+| 参数     | 描述         | 输入值                                                                                                             |
+| -------- | ------------ | ------------------------------------------------------------------------------------------------------------------ |
+| model    | 转换模型     | 模型: "silero-vad"<br>                                                                                             |
+| input    | 输入         | KWS唤醒输入: "kws.xxx"(输入kws单元的work_id)<br>板载麦克风输入: "sys.pcm"<br>UART流式输入: "vad.wav.stream.base64" |
+| enoutput | 启用UART输出 | 启用: true<br>禁用: false                                                                                          |
+
+**返回值:**
+
+- String:
+    - vad_work_id: vad 单元 work_id
+
 ## ApiAsr Class
 
-`M5ModuleLLM`的内部成员`ApiAsr asr`用于控制ASR单元的初始化和配置。
+`M5ModuleLLM` 的内部成员 `ApiAsr asr` 用于控制 ASR 单元的初始化和配置。
 
 ### setup
 
 **函数原型:**
 
 ```cpp
-String setup(ApiAsrSetupConfig_t config = ApiAsrSetupConfig_t(), String request_id = "asr_setup");
+String setup(ApiAsrSetupConfig_t config = ApiAsrSetupConfig_t(), String request_id = "asr_setup",
+             String language = "en_US");
 ```
 
 **功能说明:**
 
-- 初始化ASR单元, 开启语音转文本功能。
+- 初始化 ASR 单元, 开启语音转文本功能。
 
 **传入参数:**
 
@@ -291,24 +387,71 @@ struct ApiAsrSetupConfig_t {
 };
 ```
 
-| 参数              | 描述            | 输入值                                                                                                                 |
-|-----------------|---------------|---------------------------------------------------------------------------------------------------------------------|
-| model           | 转换模型          | 英文模型: "sherpa-ncnn-streaming-zipformer-20M-2023-02-17"<br>中文模型: "sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23" |
-| response_format | 输出格式          | 普通输出: "asr.utf-8"<br>流式输出: "asr.utf-8.stream"                                                                       |
-| input           | 输入            | KWS唤醒输入: "kws.xxx"(输入kws单元的work_id)<br>板载麦克风输入: "sys.pcm"<br>UART流式输入: "asr.wav.stream.base64"                      |
-| rule1           | 唤醒到未识别到内容超时时间 | 单位:秒                                                                                                                |
-| rule2           | 识别最大间隔时间      | 单位:秒                                                                                                                |
-| rule3           | 识别最长超时时间      | 单位:秒                                                                                                                |
-| enoutput        | 启用UART输出      | 启用: true<br>禁用: false                                                                                               |
+| 参数            | 描述                       | 输入值                                                                                                                      |
+| --------------- | -------------------------- | --------------------------------------------------------------------------------------------------------------------------- |
+| model           | 转换模型                   | 英文模型: "sherpa-ncnn-streaming-zipformer-20M-2023-02-17"<br>中文模型: "sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23" |
+| response_format | 输出格式                   | 普通输出: "asr.utf-8"<br>流式输出: "asr.utf-8.stream"                                                                       |
+| input           | 输入                       | KWS唤醒输入: "kws.xxx"(输入kws单元的work_id)<br>板载麦克风输入: "sys.pcm"<br>UART流式输入: "asr.wav.stream.base64"          |
+| rule1           | 唤醒到未识别到内容超时时间 | 单位:秒                                                                                                                     |
+| rule2           | 识别最大间隔时间           | 单位:秒                                                                                                                     |
+| rule3           | 识别最长超时时间           | 单位:秒                                                                                                                     |
+| enoutput        | 启用UART输出               | 启用: true<br>禁用: false                                                                                                   |
 
 **返回值:**
 
 - String:
-    - asr_work_id: asr单元work_id
+    - asr_work_id: asr 单元 work_id
+
+## ApiWhisper Class
+
+`M5ModuleLLM`的内部成员`ApiWhisper whisper`用于控制 Whisper 单元的初始化和配置。
+
+### setup
+
+**函数原型:**
+
+```cpp
+String setup(ApiWhisperSetupConfig_t config = ApiWhisperSetupConfig_t(), String request_id = "asr_setup",
+```
+
+**功能说明:**
+
+- 初始化 Whisper 单元, 开启语音转文本功能。
+
+**传入参数:**
+
+ApiWhisperSetupConfig_t config:
+
+- Whisper 单元初始化配置:
+- String request_id:
+    - 会话id, 使用默认即可。
+
+```cpp
+struct ApiAsrSetupConfig_t {
+    String model           = "whisper-tiny";
+    String response_format = "asr.utf-8";
+    String input           = [ "sys.pcm", "kws.1000", "vad.1001" ];
+    String language        = "en";
+    bool enoutput          = true;
+};
+```
+
+| 参数            | 描述               | 输入值                                                                                                             |
+| --------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------ |
+| model           | 转换模型           | 模型: "whisper-tiny"<br>                                                                                           |
+| response_format | 输出格式           | 普通输出: "asr.utf-8"<br>                                                                                          |
+| input           | 输入               | KWS唤醒输入: "kws.xxx"(输入kws单元的work_id)<br>板载麦克风输入: "sys.pcm"<br>UART流式输入: "asr.wav.stream.base64" |
+| language        | 用于语言识别的语言 | 默认 “en”<br> 可选 “zh”, "ja"                                                                                      |  |
+| enoutput        | 启用UART输出       | 启用: true<br>禁用: false                                                                                          |
+
+**返回值:**
+
+- String:
+    - whisper_work_id: whisper 单元 work_id
 
 ## ApiLlm Class
 
-`M5ModuleLLM`的内部成员`ApiLlm llm`用于控制LLM单元的初始化和配置。
+`M5ModuleLLM` 的内部成员 `ApiLlm llm` 用于控制 LLM 单元的初始化和配置。
 
 ### setup
 
@@ -320,7 +463,7 @@ String setup(ApiLlmSetupConfig_t config = ApiLlmSetupConfig_t(), String request_
 
 **功能说明:**
 
-- 初始化LLM单元, 支持配置LLM单元输入输出数据方式。
+- 初始化 LLM 单元, 支持配置 LLM 单元输入输出数据方式。
 
 **传入参数:**
 
@@ -334,22 +477,20 @@ struct ApiLlmSetupConfig_t {
     String prompt;
     String model           = "qwen2.5-0.5B-prefill-20e";
     String response_format = "llm.utf-8.stream";
-    String input           = "llm.utf-8";
+    String input           = ["llm.utf-8", "kws.1000"];
     bool enoutput          = true;
-    bool enkws             = true;
     int max_token_len      = 127;
 };
 ```
 
-| 参数              | 描述                      | 输入值                                                                                             |
-|-----------------|-------------------------|-------------------------------------------------------------------------------------------------|
-| model           | 转换模型                    | 预置模型 "qwen2.5-0.5B-prefill-20e"                                                                 |
-| response_format | 输出格式                    | 普通输出: "llm.utf-8"<br>流式输出: "llm.utf-8.stream"                                                   |
-| input           | 输入                      | ASR输入: "asr.xxx"(输入asr单元的work_id)<br>UART输入: "llm.utf-8"<br>KWS唤醒打断: "kws.xxx"(输入kws单元的work_id) |
-| enkws           | KWS唤醒是否终止过程             | KWS打断过程: true<br>KWS不打断过程: false                                                                |
-| max_length      | 配置最大输出token(最大返回推理文本长度) | 最大值: 1024, 推荐使用127                                                                              |
-| prompt          | 模型初始化提示词                | String                                                                                          |
-| enoutput        | 启用UART输出                | 启用: true<br>禁用: false                                                                           |
+| 参数            | 描述                                    | 输入值                                                                                                            |
+| --------------- | --------------------------------------- | ----------------------------------------------------------------------------------------------------------------- |
+| model           | 转换模型                                | 预置模型 "qwen2.5-0.5B-prefill-20e"                                                                               |
+| response_format | 输出格式                                | 普通输出: "llm.utf-8"<br>流式输出: "llm.utf-8.stream"                                                             |
+| input           | 输入                                    | ASR输入: "asr.xxx"(输入asr单元的work_id)<br>UART输入: "llm.utf-8"<br>KWS唤醒打断: "kws.xxx"(输入kws单元的work_id) |
+| max_length      | 配置最大输出token(最大返回推理文本长度) | 最大值: 1023                                                                                                      |
+| prompt          | 模型初始化系统提示词                    | String                                                                                                            |
+| enoutput        | 启用UART输出                            | 启用: true<br>禁用: false                                                                                         |
 
 **返回值:**
 
@@ -366,7 +507,7 @@ int inference(String work_id, String input, String request_id = "llm_inference")
 
 **功能说明:**
 
-- 输入数据, 开始推理。返回结果内容将进入`M5ModuleLLM.msg`中的`responseMsgList`列表容器中。
+- 输入数据, 开始推理。返回结果内容将进入 `M5ModuleLLM.msg` 中的 `responseMsgList` 列表容器中。
 
 **传入参数:**
 
@@ -392,16 +533,16 @@ int inferenceAndWaitResult(String work_id, String input, std::function<void(Stri
 
 **功能说明:**
 
-- 输入数据, 开始推理。并阻塞等待返回结果, 然后调用callback函数。
+- 输入数据, 开始推理。并阻塞等待返回结果, 然后调用 callback 函数。
 
 **传入参数:**
 
 - String work_id:
-    - 调用的LLM单元work_id
+    - 调用的 LLM 单元 work_id
 - String input:
     - 输入文本
 - void onResult(String&)
-    - 推理结果callback函数
+    - 推理结果 callback 函数
 - uint32_t timeout:
     - 等待推理超时时间
 - String request_id:
@@ -409,12 +550,117 @@ int inferenceAndWaitResult(String work_id, String input, std::function<void(Stri
 
 **返回值:**
 
+- int:
+    - MODULE_LLM_OK / Error Code
+
+## ApiVlm Class
+
+`M5ModuleLLM` 的内部成员 `ApiVlm vlm` 用于控制 VLM 单元的初始化和配置。
+
+### setup
+
+**函数原型:**
+
+```cpp
+String setup(ApiVlmSetupConfig_t config = ApiVlmSetupConfig_t(), String request_id = "vlm_setup");
+```
+
+**功能说明:**
+
+- 初始化 VLM 单元, 支持配置 VLM 单元输入输出数据方式。
+
+**传入参数:**
+
+- ApiLlmSetupConfig_t config:
+    - LLM单元初始化配置:
+- String request_id:
+    - 会话id, 使用默认即可。
+
+```cpp
+struct ApiVlmSetupConfig_t {
+    String prompt;
+    String model           = "internvl2.5-1B-ax630c";
+    String response_format = "vlm.utf-8.stream";
+    String input           = ["vlm.utf-8", "kws.1000"];
+    bool enoutput          = true;
+    int max_token_len      = 1023;
+};
+```
+
+| 参数            | 描述                                    | 输入值                                                                                                            |
+| --------------- | --------------------------------------- | ----------------------------------------------------------------------------------------------------------------- |
+| model           | 转换模型                                | 预置模型 "internvl2.5-1B-ax630c"                                                                                  |
+| response_format | 输出格式                                | 普通输出: "vlm.utf-8"<br>流式输出: "vlm.utf-8.stream"                                                             |
+| input           | 输入                                    | ASR输入: "asr.xxx"(输入asr单元的work_id)<br>UART输入: "llm.utf-8"<br>KWS唤醒打断: "kws.xxx"(输入kws单元的work_id) |
+| max_length      | 配置最大输出token(最大返回推理文本长度) | 最大值: 1023                                                                                                      |
+| prompt          | 模型初始化系统提示词                    | String                                                                                                            |
+| enoutput        | 启用UART输出                            | 启用: true<br>禁用: false                                                                                         |
+
+**返回值:**
+
+- String:
+    - vlm_work_id: vlm 单元 work_id
+
+### inference
+
+**函数原型:**
+
+```cpp
+int inference(String work_id, String input, String request_id = "vlm_inference");
+```
+
+**功能说明:**
+
+- 输入数据, 开始推理。返回结果内容将进入 `M5ModuleLLM.msg` 中的 `responseMsgList` 列表容器中。
+
+**传入参数:**
+
+- String work_id:
+    - 调用的LLM单元work_id
+- String input:
+    - 输入文本
+- String request_id:
+    - 会话ID, 当同时存在多个会话的时候用于区分。
+
+**返回值:**
+
+- int:
+    - MODULE_LLM_OK / Error Code
+
+### inferenceAndWaitResult
+
+**函数原型:**
+
+```cpp
+int inferenceAndWaitResult(String work_id, String input, std::function<void(String&)> onResult,
+                           uint32_t timeout = 5000, String request_id = "vlm_inference");
+```
+
+**功能说明:**
+
+- 输入数据, 开始推理。并阻塞等待返回结果, 然后调用 callback 函数。
+
+**传入参数:**
+
+- String work_id:
+    - 调用的 VLM 单元 work_id
+- String input:
+    - 输入文本
+- void onResult(String&)
+    - 推理结果 callback 函数
+- uint32_t timeout:
+    - 等待推理超时时间
+- String request_id:
+    - 会话 ID, 当同时存在多个会话的时候用于区分。
+
+**返回值:**
+
 - int:
     - MODULE_LLM_OK / Error Code
 
 ## ApiTts Class
 
-`M5ModuleLLM`的内部成员`ApiTts tts`用于控制TTS单元的初始化和配置。
+`M5ModuleLLM` 的内部成员 `ApiTts tts` 用于控制 TTS 单元的初始化和配置。
 
 ### setup
 
@@ -439,19 +685,19 @@ ApiTtsSetupConfig_t config:
 ```cpp
 struct ApiTtsSetupConfig_t {
     String model           = "single_speaker_english_fast";
-    String response_format = "tts.base64.wav";
-    String input           = "tts.utf-8.stream";
-    bool enoutput          = true;
-    bool enkws             = true;
+    String response_format = "sys.pcm";
+    String input           = ["tts.utf-8.stream", "kws.1000"];
+    bool enoutput          = false;
+    bool enaudio           = true;
 };
 ```
 
-| 参数       | 描述          | 输入值                                                                                      |
-|----------|-------------|------------------------------------------------------------------------------------------|
-| model    | 转换模型        | 英文模型: "single_speaker_english_fast"<br>中文模型: "single_speaker_fast"                       |
-| input    | 输入          | LLM输入: "llm.xxx"(输入llm单元的work_id)<br>UART输入: "tts.utf-8"<br>UART流式输入: "tts.utf-8.stream" |
-| enkws    | KWS唤醒是否终止过程 | KWS打断过程: true<br>KWS不打断过程: false                                                         |
-| enoutput | 启用UART输出    | 启用: true<br>禁用: false                                                                    |
+| 参数     | 描述           | 输入值                                                                                                                                                |
+| -------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
+| model    | 转换模型       | 英文模型: "single_speaker_english_fast"<br>中文模型: "single_speaker_fast"                                                                            |
+| input    | 输入           | LLM输入: "llm.xxx"(输入llm单元的work_id)<br>UART输入: "tts.utf-8"<br>UART流式输入: "tts.utf-8.stream"<br>KWS唤醒打断: "kws.xxx"(输入kws单元的work_id) |
+| enoutput | 启用UART输出   | 启用: true<br>禁用: false                                                                                                                             |
+| enaudio  | 启用扬声器播放 | 启用: true<br>禁用: true                                                                                                                              |
 
 **返回值:**
 
@@ -468,7 +714,7 @@ int inference(String work_id, String input, uint32_t timeout = 0, String request
 
 **功能说明:**
 
-- 输入数据, 开始推理转换, 完成后将自动播放至扬声器。
+- 输入数据, 开始推理转换, 完成后扬声器将自动播放。
 
 **传入参数:**
 
@@ -486,9 +732,130 @@ int inference(String work_id, String input, uint32_t timeout = 0, String request
 - int:
     - MODULE_LLM_OK / Error Code
 
+## ApiMelotts Class
+
+`M5ModuleLLM` 的内部成员 `ApiMelotts melotts` 用于控制 Melotts 单元的初始化和配置。
+
+### setup
+
+**函数原型:**
+
+```cpp
+String setup(ApiMelottsSetupConfig_t config = ApiMelottsSetupConfig_t(), String request_id = "melotts_setup",
+             String language = "en_US");
+```
+
+**功能说明:**
+
+- 初始化 Melotts 单元, 开启文本转语音功能。
+
+**传入参数:**
+
+ApiMelottsSetupConfig_t config:
+
+- Melotts单元初始化配置:
+- String request_id:
+    - 会话id, 使用默认即可。
+
+```cpp
+struct ApiMelottsSetupConfig_t {
+    String model              = "melotts_zh-cn";
+    String response_format    = "sys.pcm";
+    std::vector<String> input = {"tts.utf-8.stream"};
+    bool enoutput             = false;
+    bool enaudio              = true;
+};
+```
+
+| 参数     | 描述           | 输入值                                                                                                        |
+| -------- | -------------- | ------------------------------------------------------------------------------------------------------------- |
+| model    | 转换模型       | 中英文模型: "melotts_zh-cn"<br>中文模型: "single_speaker_fast"                                                |
+| input    | 输入           | LLM输入: "llm.xxx"(输入llm单元的work_id)<br>UART输入: "melotts.utf-8"<br>UART流式输入: "melotts.utf-8.stream" |
+| enoutput | 启用UART输出   | 启用: true<br>禁用: false                                                                                     |
+| enaudio  | 启用扬声器播放 | 启用: true<br>禁用: true                                                                                      |
+
+**返回值:**
+
+- String:
+    - melotts_work_id: melotts 单元 work_id
+
+### inference
+
+**函数原型:**
+
+```cpp
+int inference(String work_id, String input, uint32_t timeout = 0, String request_id = "tts_inference");
+```
+
+**功能说明:**
+
+- 输入数据, 开始推理转换, 完成后扬声器将自动播放。
+
+**传入参数:**
+
+- String work_id:
+    - 调用的 Melotts 单元work_id
+- String input:
+    - 输入文本
+- uint32_t timeout:
+    - 等待推理超时时间
+- String request_id:
+    - 会话ID, 当同时存在多个会话的时候用于区分。
+
+**返回值:**
+
+- int:
+    - MODULE_LLM_OK / Error Code
+
+## ApiYolo Class
+
+`M5ModuleLLM` 的内部成员 `ApiYolo yolo` 用于控制 Yolo 单元的初始化和配置。
+
+### setup
+
+**函数原型:**
+
+```cpp
+String setup(ApiYoloSetupConfig_t config = ApiYoloSetupConfig_t(), String request_id = "yolo_setup");
+```
+
+**功能说明:**
+
+- 初始化 Yolo 单元, 开启图像检测功能。
+
+**传入参数:**
+
+ApiYoloSetupConfig_t config:
+
+- Yolo 单元初始化配置:
+- String request_id:
+    - 会话id, 使用默认即可。
+
+```cpp
+struct ApiYoloSetupConfig_t {
+    String model              = "yolo11n";
+    String response_format    = "yolo.box.stream";
+    std::vector<String> input = {"yolo.jpeg.base64"};
+    bool enoutput             = true;
+};
+```
+
+| 参数            | 描述         | 输入值                                                                                        |
+| --------------- | ------------ | --------------------------------------------------------------------------------------------- |
+| model           | 转换模型     | 检测模型: "yolo11n"<br>姿态模型: "yolo11n-pose"<br>手部姿态模型: "yolo11n-hand-pose"          |
+| response_format | 输出格式     | 检测输出: "yolo.box.stream"<br>姿态输出: "yolo.pose.stream"                                   |
+| input           | 输入         | UVC 输入: "camera.xxx"(输入 camera 单元的 work_id)<br>UART流式输入: "yolo.jpeg.base64.stream" |
+| enoutput        | 启用UART输出 | 启用: true<br>禁用: false                                                                     |
+
+**返回值:**
+
+- String:
+    - yolo_work_id: yolo 单元 work_id
+
 ## ModuleMsg Class
 
-`M5ModuleLLM`的内部成员`ModuleMsg msg`提供了`responseMsgList`容器用于用于缓存接收LLM Module返回的各种信息。参考以下案例，在主循环中遍历获取返回结果。
+`M5ModuleLLM` 的内部成员 `ModuleMsg msg` 提供了 `responseMsgList` 容器用于用于缓存接收 LLM Module
+返回的各种信息。参考以下案例，在主循环中遍历获取返回结果。
 
 ```cpp
 void loop()
@@ -520,10 +887,10 @@ void loop()
 
 ## VoiceAssistant Class
 
-`M5ModuleLLM_VoiceAssistant`用于快速创建LLM语音助手实例, 快速实现KWS(语音唤醒)->ASR(语音转文本)->LLM(大模型推理)->TTS(
-文本转语音)。
+`M5ModuleLLM_VoiceAssistant` 用于快速创建 LLM 语音助手实例, 快速实现 KWS(语音唤醒)->ASR(语音转文本)->LLM(大模型推理)->
+TTS(文本转语音)。
 
-- 初始化时候只需要将`M5ModuleLLM`实例传入构造函数, 并注册对应事件的回调函数即可完成语音助手创建。
+- 初始化时候只需要将 `M5ModuleLLM` 实例传入构造函数, 并注册对应事件的回调函数即可完成语音助手创建。
 
 ```cpp
 /*
@@ -637,4 +1004,3 @@ enum ModuleLLMErrorCode_t {
     MODULE_LLM_ERROR_NONE                      = -99,
 };
 ```
-
diff --git a/docs/en.md b/docs/en.md
index 1714e53..b1d7f57 100644
--- a/docs/en.md
+++ b/docs/en.md
@@ -1,41 +1,1000 @@
-# LLM Module Arduino Quick Start
+# M5Module-LLM Arduino API
 
-## Overview
+[M5Module-LLM](https://github.com/m5stack/M5Module-LLM) Arduino Driver Library API Documentation.
 
-The `LLM Module` can be used with various M5 controllers. This tutorial demonstrates how to control the LLM Module using the `M5Core` series in the `Arduino IDE` with the LLM Module driver library.
+## M5ModuleLLM Class
 
-<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/guide/llm/llm/llm_module_device_01.jpg" width="70%" />
+`M5ModuleLLM` is used to initialize the LLM Module and provides internal members for quick initialization of various LLM units, making it easier to build applications according to your needs.
 
-## Environment Setup
+```cpp
+class M5ModuleLLM {
+public:
+    bool begin(Stream * targetPort);
+    bool checkConnection();
+    void update();
 
-- 1.Arduino IDE Installation: Refer to the [Arduino IDE Installation Guide](/en/arduino/arduino_ide) to complete the IDE installation.
+    m5_module_llm::ApiSys sys;
+    m5_module_llm::ApiLlm llm;
+    m5_module_llm::ApiAudio audio;
+    m5_module_llm::ApiTts tts;
+    m5_module_llm::ApiTts melotts;
+    m5_module_llm::ApiKws kws;
+    m5_module_llm::ApiAsr asr;
+    m5_module_llm::ApiAsr yolo;
+    m5_module_llm::ApiVad vad;
+    m5_module_llm::ApiWhisper whisper;
+    m5_module_llm::ApiDepthAnything depthanything;
+    m5_module_llm::ModuleMsg msg;
+    m5_module_llm::ModuleComm comm;
+private:
+};
+```
 
-- 2.Board Manager Installation: Refer to the [Basic Environment Setup Guide](/en/arduino/arduino_board) to complete the M5Stack board manager installation and select the `M5Core` development board.
+### begin
 
-<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/arduino/m5core/quickstart_arduino_core_selectboard.png" width="70%" />
+**Function Prototype:**
 
-- 3.Library Installation: Refer to the [Library Management Guide](/en/arduino/arduino_library) to install the `LLM Module` driver library. (Follow prompts to install the dependency library `M5Unified`)
+```cpp
+bool begin(Stream* targetPort);
+```
 
-<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/guide/llm/llm/llm_arduino_lib_01.jpg" width="70%" />
-<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/guide/llm/llm/llm_arduino_lib_02.jpg" width="70%" />
+**Function Description:**
 
-## Program Compilation & Upload
+- Initializes the LLM Module UART interface configuration.
 
-Open the example program "kws_asr" in the driver library, click the upload button, and the program will automatically compile and upload.The wake-up word used in the example program is "HELLO". After waiting for the device to be initialized, it will be woken up using the keyword.
+**Parameters:**
 
-<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/guide/llm/llm/llm_arduino_example_01.jpg" width="70%" />
-<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/guide/llm/llm/llm_arduino_example_02.jpg" width="70%" />
-<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/guide/llm/llm/llm_arduino_example_03.jpg" width="70%" />
-<img src="https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/docs/static/assets/img/guide/llm/llm/llm_arduino_example_04.jpg" width="70%" />
+- Stream\* targetPort:
+    - Pass the Serial pointer.
 
-- Examples:
-  - `kws_asr`: Uses KWS to wake up and triggers ASR for speech-to-text conversion. (KWS+ASR)
-  - `text_assistant`: Inputs text into the LLM model, performs inference, and outputs the result in text form. (LLM)
-  - `tts`: Uses the TTS unit to convert text to speech for playback. (TTS)
-  - `voice_assistant`: Uses KWS to wake up, triggers ASR for speech-to-text conversion, inputs the converted text into the LLM for inference, and outputs the inference result through TTS as speech. (KWS+ASR+LLM+TTS)
+**Return Value:**
 
-## Related Links
+- bool:
+    - true: Initialization successful
+    - false: Initialization failed
 
-- [LLM Module Arduino Lib](https://github.com/m5stack/M5Module-LLM)
-- [LLM Module Arduino Lib API](/en/guide/llm/llm/arduino_api)
+### checkConnection
 
+**Function Prototype:**
+
+```cpp
+bool checkConnection();
+```
+
+**Function Description:**
+
+- Sends the `sys.ping` command to check the connection status of the LLM Module.
+
+**Parameters:**
+
+- None
+
+**Return Value:**
+
+- bool:
+    - true: Module responds
+    - false: No response from module
+
+### update
+
+**Function Prototype:**
+
+```cpp
+void update();
+```
+
+**Function Description:**
+
+- Pulls the LLM Module UART response data, this API should be included in the Loop and executed continuously.
+
+**Parameters:**
+
+- None
+
+**Return Value:**
+
+- None
+
+## ApiSys Class
+
+The internal member `ApiSys sys` of `M5ModuleLLM` is used to control the SYS unit, enabling operations like system reset.
+
+### ping
+
+**Function Prototype:**
+
+```cpp
+int ping();
+```
+
+**Function Description:**
+
+- Sends the `sys.ping` command to check the connection status of the LLM Module.
+
+**Parameters:**
+
+- None
+
+**Return Value:**
+
+- int:
+    - MODULE_LLM_OK / Error Code
+
+### reset
+
+**Function Prototype:**
+
+```cpp
+int reset(bool waitResetFinish = true);
+```
+
+**Function Description:**
+
+- Sends the `sys.reset` command to reset the software service.
+
+**Parameters:**
+
+- bool waitResetFinish:
+    - true: Blocks and waits for reset to finish
+    - false: Performs reset without blocking
+
+**Return Value:**
+
+- int:
+    - MODULE_LLM_OK / Error Code
+
+### reboot
+
+**Function Prototype:**
+
+```cpp
+int reboot();
+```
+
+**Function Description:**
+
+- Sends the `sys.reboot` command to reboot the system.
+
+**Parameters:**
+
+- None
+
+**Return Value:**
+
+- int:
+    - MODULE_LLM_OK / Error Code
+
+## ApiAudio Class
+
+Note: This function has been deprecated in version 1.3 and later, and is now automatically configured internally.
+
+The internal member `ApiAudio audio` of `M5ModuleLLM` is used to control the initialization and configuration of the Audio unit.
+
+### setup
+
+**Function prototype:**
+
+```cpp
+String setup(ApiAudioSetupConfig_t config = ApiAudioSetupConfig_t(), String request_id = "audio_setup");
+```
+
+**Function description:**
+
+- Initializes the Audio unit and activates the system sound card. (This feature must be enabled before using KWS and TTS)
+
+**Parameters:**
+
+ApiAudioSetupConfig_t config:
+
+- LLM unit initialization configuration:
+- String request_id:
+    - Session ID, default can be used.
+
+```cpp
+struct ApiAudioSetupConfig_t {
+    int capcard      = 0;
+    int capdevice    = 0;
+    float capVolume  = 0.5;
+    int playcard     = 0;
+    int playdevice   = 1;
+    float playVolume = 0.15;
+};
+```
+
+| Parameter   | Description       | Input Values                          |
+| ----------- | ----------------- | ------------------------------------- |
+| capcard     | Microphone sound card index | Default sound card: 0           |
+| capdevice   | Microphone device index   | Onboard silicon microphone: 0    |
+| capVolume   | Input volume      | 0.0～10.0 (1<volume increases gain, default is 0.5) |
+| playcard    | Speaker sound card index | Default sound card: 0           |
+| playdevice  | Speaker device index   | Onboard speaker: 1                |
+| playVolume  | Output volume      | 0.0～10.0 (1<volume increases gain, default is 0.5) |
+
+**Return Value:**
+
+- String:
+    - audio_work_id: audio unit work_id
+
+## ApiCamera Class
+
+The internal member `ApiCamera camera` of `M5ModuleLLM` is used to control the initialization and configuration of the Camera unit.
+
+### setup
+
+**Function prototype:**
+
+```cpp
+String setup(ApiCameraSetupConfig_t config = ApiCameraSetupConfig_t(), String request_id = "camera_setup");
+```
+
+**Function description:**
+
+- Initializes the Camera unit and activates camera input. (This feature must be enabled before using UVC)
+
+**Parameters:**
+
+ApiCameraSetupConfig_t config:
+
+- Camera unit initialization configuration:
+- String request_id:
+    - Session ID, default can be used.
+
+```cpp
+struct ApiCameraSetupConfig_t {
+    String response_format = "camera.raw";
+    String input           = "/dev/video0";
+    bool enoutput          = false;
+    int frame_width        = 320;
+    int frame_height       = 320;
+};
+```
+
+| Parameter     | Description             | Input Values                |
+| ------------- | ----------------------- | --------------------------- |
+| input         | UVC index               | "/dev/video0"               |
+| enoutput      | Whether to output image data via serial | Enable: true<br>Disable: false |
+| frame_width   | Image width             | 320                         |
+| frame_height  | Image height            | 320                         |
+
+**Return Value:**
+
+- String:
+    - camera_work_id: camera unit work_id
+
+## ApiKws Class
+
+The internal member `ApiKws kws` of `M5ModuleLLM` is used to control the initialization and configuration of the KWS unit.
+
+### setup
+
+**Function Prototype:**
+
+```cpp
+String setup(ApiKwsSetupConfig_t config = ApiKwsSetupConfig_t(), String request_id = "kws_setup",
+             String language = "en_US");```
+
+**Function Description:**
+
+- Initializes the KWS unit and configures the wake-up keyword.
+
+**Parameters:**
+
+ApiKwsSetupConfig_t config:
+
+- KWS unit initialization configuration:
+- String request_id:
+    - Session ID, default can be used.
+
+```cpp
+struct ApiKwsSetupConfig_t {
+    String kws             = "HELLO";
+    String model           = "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01";
+    String response_format = "kws.bool";
+    String input           = "sys.pcm";
+    bool enoutput          = true;
+};
+```
+
+| Parameter | Description         | Input Values                                                                                                                             |
+| --------- | ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
+| model     | Conversion Model    | English Model: "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01"<br>Chinese Model: "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01" |
+| kws       | KWS Wake-up Word Text | No mixing of Chinese and English; English must be in uppercase                                                                                             |
+| enoutput  | Enable UART Output  | Enable: true<br>Disable: false                                                                                                            |
+
+**Return Value:**
+
+- String:
+    - kws_work_id: KWS unit work_id
+
+## ApiVad Class
+
+The internal member `ApiVad vad` of `M5ModuleLLM` is used to control the initialization and configuration of the VAD unit.
+
+### setup
+
+**Function Prototype:**
+
+```cpp
+String setup(ApiVadSetupConfig_t config = ApiVadSetupConfig_t(), String request_id = "vad_setup");
+```
+
+**Function Description:**
+
+- Initializes the VAD unit.
+
+**Parameters:**
+
+ApiVadSetupConfig_t config:
+
+- VAD unit initialization configuration:
+- String request_id:
+    - Session ID, default can be used.
+
+```cpp
+struct ApiKwsSetupConfig_t {
+    String model           = "silero-vad";
+    String response_format = "vad.bool";
+    String input           = {"sys.pcm", "kws.1000"};
+    bool enoutput          = true;
+};
+```
+
+| Parameter | Description      | Input Values                                                                                                          |
+| --------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| model     | Conversion Model | Model: "silero-vad"<br>                                                                                               |
+| input     | Input            | KWS Wake-up Input: "kws.xxx" (input the KWS unit's work_id)<br>Onboard Microphone Input: "sys.pcm"<br>UART Stream Input: "vad.wav.stream.base64" |
+| enoutput  | Enable UART Output | Enable: true<br>Disable: false                                                                                         |
+
+**Return Value:**
+
+- String:
+    - vad_work_id: VAD unit work_id
+
+## ApiAsr Class
+
+The internal member `ApiAsr asr` of `M5ModuleLLM` is used to control the initialization and configuration of the ASR unit.
+
+### setup
+
+**Function Prototype:**
+
+```cpp
+String setup(ApiAsrSetupConfig_t config = ApiAsrSetupConfig_t(), String request_id = "asr_setup",
+             String language = "en_US");
+```
+
+**Function Description:**
+
+- Initializes the ASR unit and enables speech-to-text functionality.
+
+**Input Parameters:**
+
+ApiAsrSetupConfig_t config:
+
+- ASR unit initialization configuration:
+- String request_id:
+    - Session ID, the default can be used.
+
+```cpp
+struct ApiAsrSetupConfig_t {
+    String model           = "sherpa-ncnn-streaming-zipformer-20M-2023-02-17";
+    String response_format = "asr.utf-8.stream";
+    String input           = ["sys.pcm", "kws.1000"];
+    bool enoutput          = true;
+    float rule1            = 2.4;
+    float rule2            = 1.2;
+    float rule3            = 30.0;
+};
+```
+
+| Parameter       | Description                          | Input Values                                                                                                                |
+| --------------- | ------------------------------------ | --------------------------------------------------------------------------------------------------------------------------- |
+| model           | Conversion model                     | English Model: "sherpa-ncnn-streaming-zipformer-20M-2023-02-17"<br>Chinese Model: "sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23" |
+| response_format | Output format                        | Normal output: "asr.utf-8"<br>Stream output: "asr.utf-8.stream"                                                              |
+| input           | Input                                | KWS wake input: "kws.xxx" (input kws unit work_id)<br>Onboard microphone input: "sys.pcm"<br>UART stream input: "asr.wav.stream.base64" |
+| rule1           | Timeout for unrecognized content wake | Unit: seconds                                                                                                               |
+| rule2           | Maximum recognition interval         | Unit: seconds                                                                                                               |
+| rule3           | Maximum recognition timeout          | Unit: seconds                                                                                                               |
+| enoutput        | Enable UART output                   | Enable: true<br>Disable: false                                                                                                |
+
+**Return Value:**
+
+- String:
+    - asr_work_id: ASR unit work_id
+
+## ApiWhisper Class
+
+The internal member `ApiWhisper whisper` of `M5ModuleLLM` is used to control the initialization and configuration of the Whisper unit.
+
+### setup
+
+**Function Prototype:**
+
+```cpp
+String setup(ApiWhisperSetupConfig_t config = ApiWhisperSetupConfig_t(), String request_id = "asr_setup",
+```
+
+**Function Description:**
+
+- Initializes the Whisper unit and enables speech-to-text functionality.
+
+**Input Parameters:**
+
+ApiWhisperSetupConfig_t config:
+
+- Whisper unit initialization configuration:
+- String request_id:
+    - Session ID, the default can be used.
+
+```cpp
+struct ApiAsrSetupConfig_t {
+    String model           = "whisper-tiny";
+    String response_format = "asr.utf-8";
+    String input           = [ "sys.pcm", "kws.1000", "vad.1001" ];
+    String language        = "en";
+    bool enoutput          = true;
+};
+```
+
+| Parameter       | Description                          | Input Values                                                                                                                |
+| --------------- | ------------------------------------ | --------------------------------------------------------------------------------------------------------------------------- |
+| model           | Conversion model                     | Model: "whisper-tiny"<br>                                                                                                    |
+| response_format | Output format                        | Normal output: "asr.utf-8"<br>                                                                                               |
+| input           | Input                                | KWS wake input: "kws.xxx" (input kws unit work_id)<br>Onboard microphone input: "sys.pcm"<br>UART stream input: "asr.wav.stream.base64" |
+| language        | Language used for language recognition | Default: "en"<br>Optional: "zh", "ja"                                                                                         |
+| enoutput        | Enable UART output                   | Enable: true<br>Disable: false                                                                                                |
+
+**Return Value:**
+
+- String:
+    - whisper_work_id: Whisper unit work_id
+
+## ApiLlm Class
+
+The internal member `ApiLlm llm` of `M5ModuleLLM` is used to control the initialization and configuration of the LLM unit.
+
+### setup
+
+**Function prototype:**
+
+```cpp
+String setup(ApiLlmSetupConfig_t config = ApiLlmSetupConfig_t(), String request_id = "llm_setup");
+```
+
+**Function Description:**
+
+- Initializes the LLM unit and supports configuring the input and output data format for the LLM unit.
+
+**Parameters:**
+
+- ApiLlmSetupConfig_t config:
+    - LLM unit initialization configuration
+- String request_id:
+    - Session ID, the default value can be used.
+
+```cpp
+struct ApiLlmSetupConfig_t {
+    String prompt;
+    String model           = "qwen2.5-0.5B-prefill-20e";
+    String response_format = "llm.utf-8.stream";
+    String input           = ["llm.utf-8", "kws.1000"];
+    bool enoutput          = true;
+    int max_token_len      = 127;
+};
+```
+
+| Parameter        | Description                                 | Input Values                                                                                                      |
+| ---------------- | ------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- |
+| model            | Model used for conversion                   | Predefined model "qwen2.5-0.5B-prefill-20e"                                                                       |
+| response_format  | Output format                               | Normal output: "llm.utf-8"<br>Streaming output: "llm.utf-8.stream"                                                 |
+| input            | Input format                                | ASR input: "asr.xxx" (work_id of the ASR unit)<br>UART input: "llm.utf-8"<br>KWS wake-up interruption: "kws.xxx" (work_id of the KWS unit) |
+| max_length       | Configures the maximum output token length (maximum returned inference text length) | Maximum value: 1023                                                                                                 |
+| prompt           | Model initialization system prompt          | String                                                                                                            |
+| enoutput         | Enable UART output                          | Enable: true<br>Disable: false                                                                                     |
+
+**Return Value:**
+
+- String:
+    - `llm_work_id`: LLM unit work ID
+
+### inference
+
+**Function prototype:**
+
+```cpp
+int inference(String work_id, String input, String request_id = "llm_inference");
+```
+
+**Function Description:**
+
+- Sends input data to start inference. The result will be placed in the `responseMsgList` container in `M5ModuleLLM.msg`.
+
+**Parameters:**
+
+- String work_id:
+    - The LLM unit's work ID being called
+- String input:
+    - Input text
+- String request_id:
+    - Session ID, used to differentiate when multiple sessions exist.
+
+**Return Value:**
+
+- int:
+    - `MODULE_LLM_OK` / Error Code
+
+### inferenceAndWaitResult
+
+**Function prototype:**
+
+```cpp
+int inferenceAndWaitResult(String work_id, String input, std::function<void(String&)> onResult, uint32_t timeout = 5000, String request_id = "llm_inference");
+```
+
+**Function Description:**
+
+- Sends input data to start inference, blocks while waiting for the result, then calls the callback function.
+
+**Parameters:**
+
+- String work_id:
+    - The LLM unit's work ID being called
+- String input:
+    - Input text
+- void onResult(String&)
+    - Callback function for inference result
+- uint32_t timeout:
+    - Timeout for waiting for inference result
+- String request_id:
+    - Session ID, used to differentiate when multiple sessions exist.
+
+**Return Value:**
+
+- int:
+    - `MODULE_LLM_OK` / Error Code
+
+## ApiVlm Class
+
+The internal member `ApiVlm vlm` of `M5ModuleLLM` is used to control the initialization and configuration of the VLM unit.
+
+### setup
+
+**Function prototype:**
+
+```cpp
+String setup(ApiVlmSetupConfig_t config = ApiVlmSetupConfig_t(), String request_id = "vlm_setup");
+```
+
+**Function Description:**
+
+- Initializes the VLM unit and supports configuring the input and output data format for the VLM unit.
+
+**Parameters:**
+
+- ApiVlmSetupConfig_t config:
+    - VLM unit initialization configuration
+- String request_id:
+    - Session ID, the default value can be used.
+
+```cpp
+struct ApiVlmSetupConfig_t {
+    String prompt;
+    String model           = "internvl2.5-1B-ax630c";
+    String response_format = "vlm.utf-8.stream";
+    String input           = ["vlm.utf-8", "kws.1000"];
+    bool enoutput          = true;
+    int max_token_len      = 1023;
+};
+```
+
+| Parameter        | Description                                 | Input Values                                                                                                      |
+| ---------------- | ------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- |
+| model            | Model used for conversion                   | Predefined model "internvl2.5-1B-ax630c"                                                                          |
+| response_format  | Output format                               | Normal output: "vlm.utf-8"<br>Streaming output: "vlm.utf-8.stream"                                                 |
+| input            | Input format                                | ASR input: "asr.xxx" (work_id of the ASR unit)<br>UART input: "llm.utf-8"<br>KWS wake-up interruption: "kws.xxx" (work_id of the KWS unit) |
+| max_length       | Configures the maximum output token length (maximum returned inference text length) | Maximum value: 1023                                                                                                 |
+| prompt           | Model initialization system prompt          | String                                                                                                            |
+| enoutput         | Enable UART output                          | Enable: true<br>Disable: false                                                                                     |
+
+**Return Value:**
+
+- String:
+    - `vlm_work_id`: VLM unit work ID
+
+### inference
+
+**Function prototype:**
+
+```cpp
+int inference(String work_id, String input, String request_id = "vlm_inference");
+```
+
+**Function Description:**
+
+- Sends input data to start inference. The result will be placed in the `responseMsgList` container in `M5ModuleLLM.msg`.
+
+**Parameters:**
+
+- String work_id:
+    - The VLM unit's work ID being called
+- String input:
+    - Input text
+- String request_id:
+    - Session ID, used to differentiate when multiple sessions exist.
+
+**Return Value:**
+
+- int:
+    - `MODULE_LLM_OK` / Error Code
+
+### inferenceAndWaitResult
+
+**Function prototype:**
+
+```cpp
+int inferenceAndWaitResult(String work_id, String input, std::function<void(String&)> onResult,
+                           uint32_t timeout = 5000, String request_id = "vlm_inference");
+```
+
+**Function Description:**
+
+- Sends input data to start inference, blocks while waiting for the result, then calls the callback function.
+
+**Parameters:**
+
+- String work_id:
+    - The VLM unit's work ID being called
+- String input:
+    - Input text
+- void onResult(String&)
+    - Callback function for inference result
+- uint32_t timeout:
+    - Timeout for waiting for inference result
+- String request_id:
+    - Session ID, used to differentiate when multiple sessions exist.
+
+**Return Value:**
+
+- int:
+    - `MODULE_LLM_OK` / Error Code
+
+## ApiTts Class
+
+The internal member `ApiTts tts` of `M5ModuleLLM` is used to control the initialization and configuration of the TTS unit.
+
+### setup
+
+**Function prototype:**
+
+```cpp
+String setup(ApiTtsSetupConfig_t config = ApiTtsSetupConfig_t(), String request_id = "tts_setup");
+```
+
+**Function description:**
+
+- Initializes the TTS unit and enables the text-to-speech functionality.
+
+**Parameters:**
+
+- ApiTtsSetupConfig_t config:
+    - LLM unit initialization configuration:
+- String request_id:
+    - Session ID, use the default if not needed.
+
+```cpp
+struct ApiTtsSetupConfig_t {
+    String model           = "single_speaker_english_fast";
+    String response_format = "sys.pcm";
+    String input           = ["tts.utf-8.stream", "kws.1000"];
+    bool enoutput          = false;
+    bool enaudio           = true;
+};
+```
+
+| Parameter | Description     | Input values                                                                                                                                          |
+| --------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
+| model    | Conversion model | English model: "single_speaker_english_fast"<br>Chinese model: "single_speaker_fast"                                                                   |
+| input    | Input          | LLM input: "llm.xxx" (input LLM unit's work_id)<br>UART input: "tts.utf-8"<br>UART stream input: "tts.utf-8.stream"<br>KWS wake-up interrupt: "kws.xxx" (input KWS unit's work_id) |
+| enoutput | Enable UART output | Enable: true<br>Disable: false                                                                                                                       |
+| enaudio  | Enable speaker playback | Enable: true<br>Disable: true                                                                                                                        |
+
+**Return value:**
+
+- String:
+    - tts_work_id: TTS unit work_id
+
+### inference
+
+**Function prototype:**
+
+```cpp
+int inference(String work_id, String input, uint32_t timeout = 0, String request_id = "tts_inference");
+```
+
+**Function description:**
+
+- Input data and start the inference conversion. After completion, the speaker will automatically play.
+
+**Parameters:**
+
+- String work_id:
+    - Work ID of the TTS unit to be called.
+- String input:
+    - Input text.
+- uint32_t timeout:
+    - Timeout for waiting for inference.
+- String request_id:
+    - Session ID, used to distinguish between multiple sessions.
+
+**Return value:**
+
+- int:
+    - MODULE_LLM_OK / Error Code
+
+## ApiMelotts Class
+
+The internal member `ApiMelotts melotts` of `M5ModuleLLM` is used to control the initialization and configuration of the Melotts unit.
+
+### setup
+
+**Function prototype:**
+
+```cpp
+String setup(ApiMelottsSetupConfig_t config = ApiMelottsSetupConfig_t(), String request_id = "melotts_setup", 
+             String language = "en_US");
+```
+
+**Function description:**
+
+- Initializes the Melotts unit and enables the text-to-speech functionality.
+
+**Parameters:**
+
+- ApiMelottsSetupConfig_t config:
+    - Melotts unit initialization configuration:
+- String request_id:
+    - Session ID, use the default if not needed.
+
+```cpp
+struct ApiMelottsSetupConfig_t {
+    String model              = "melotts_zh-cn";
+    String response_format    = "sys.pcm";
+    std::vector<String> input = {"tts.utf-8.stream"};
+    bool enoutput             = false;
+    bool enaudio              = true;
+};
+```
+
+| Parameter | Description     | Input values                                                                                                                                          |
+| --------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
+| model    | Conversion model | Chinese and English model: "melotts_zh-cn"<br>Chinese model: "single_speaker_fast"                                                                   |
+| input    | Input          | LLM input: "llm.xxx" (input LLM unit's work_id)<br>UART input: "melotts.utf-8"<br>UART stream input: "melotts.utf-8.stream" |
+| enoutput | Enable UART output | Enable: true<br>Disable: false                                                                                                                       |
+| enaudio  | Enable speaker playback | Enable: true<br>Disable: true                                                                                                                        |
+
+**Return value:**
+
+- String:
+    - melotts_work_id: Melotts unit work_id
+
+### inference
+
+**Function prototype:**
+
+```cpp
+int inference(String work_id, String input, uint32_t timeout = 0, String request_id = "tts_inference");
+```
+
+**Function description:**
+
+- Input data and start the inference conversion. After completion, the speaker will automatically play.
+
+**Parameters:**
+
+- String work_id:
+    - Work ID of the Melotts unit to be called.
+- String input:
+    - Input text.
+- uint32_t timeout:
+    - Timeout for waiting for inference.
+- String request_id:
+    - Session ID, used to distinguish between multiple sessions.
+
+**Return value:**
+
+- int:
+    - MODULE_LLM_OK / Error Code
+
+## ApiYolo Class
+
+The internal member `ApiYolo yolo` of `M5ModuleLLM` is used to control the initialization and configuration of the Yolo unit.
+
+### setup
+
+**Function prototype:**
+
+```cpp
+String setup(ApiYoloSetupConfig_t config = ApiYoloSetupConfig_t(), String request_id = "yolo_setup");
+```
+
+**Function description:**
+
+- Initializes the Yolo unit and enables image detection functionality.
+
+**Parameters:**
+
+- ApiYoloSetupConfig_t config:
+    - Yolo unit initialization configuration:
+- String request_id:
+    - Session ID, use the default if not needed.
+
+```cpp
+struct ApiYoloSetupConfig_t {
+    String model              = "yolo11n";
+    String response_format    = "yolo.box.stream";
+    std::vector<String> input = {"yolo.jpeg.base64"};
+    bool enoutput             = true;
+};
+```
+
+| Parameter | Description     | Input values                                                                                                                                          |
+| --------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
+| model    | Conversion model | Detection model: "yolo11n"<br>Pose model: "yolo11n-pose"<br>Hand pose model: "yolo11n-hand-pose"                                                      |
+| response_format | Output format | Detection output: "yolo.box.stream"<br>Pose output: "yolo.pose.stream"                                                                 |
+| input    | Input          | UVC input: "camera.xxx" (input camera unit's work_id)<br>UART stream input: "yolo.jpeg.base64.stream" |
+| enoutput | Enable UART output | Enable: true<br>Disable: false                                                                                                                       |
+
+**Return value:**
+
+- String:
+    - yolo_work_id: Yolo unit work_id
+
+## ModuleMsg Class
+
+The internal member `ModuleMsg msg` of `M5ModuleLLM` provides a container `responseMsgList` used to cache various information returned from the LLM Module. Refer to the following example, where the main loop iterates to retrieve the results.
+
+```cpp
+void loop()
+{
+    module_llm.update();
+
+    // Handle response msg
+    for (auto& msg : module_llm.msg.responseMsgList) {
+        // KWS msg
+        if (msg.work_id == kws_work_id) {
+            Serial.printf(">> Keyword detected\n");
+        }
+
+        // ASR msg
+        if (msg.work_id == asr_work_id) {
+            if (msg.object == "asr.utf-8.stream") {
+                // Parse and get asr result
+                JsonDocument doc;
+                deserializeJson(doc, msg.raw_msg);
+                String asr_result = doc["data"]["delta"].as<String>();
+                Serial.printf(">> %s\n", asr_result.c_str());
+            }
+        }
+    }
+    module_llm.msg.responseMsgList.clear();
+}
+```
+
+## VoiceAssistant Class
+
+`M5ModuleLLM_VoiceAssistant` is used to quickly create an LLM voice assistant instance, achieving a fast implementation of KWS (keyword spotting) -> ASR (speech-to-text) -> LLM (large model inference) -> TTS (text-to-speech).
+
+- During initialization, simply pass the `M5ModuleLLM` instance to the constructor, and register the corresponding event callback functions to complete the voice assistant setup.
+
+```cpp
+/*
+ * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD
+ *
+ * SPDX-License-Identifier: MIT
+ */
+#include <Arduino.h>
+#include <M5Unified.h>
+#include <M5ModuleLLM.h>
+
+M5ModuleLLM module_llm;
+M5ModuleLLM_VoiceAssistant voice_assistant(&module_llm);
+
+/* On ASR data callback */
+void on_asr_data_input(String data, bool isFinish, int index)
+{
+    M5.Display.setTextColor(TFT_GREEN, TFT_BLACK);
+    M5.Display.printf(">> %s\n", data.c_str());
+
+    /* If ASR data is finish */
+    if (isFinish) {
+        M5.Display.setTextColor(TFT_YELLOW, TFT_BLACK);
+        M5.Display.print(">> ");
+    }
+};
+
+/* On LLM data callback */
+void on_llm_data_input(String data, bool isFinish, int index)
+{
+    M5.Display.print(data);
+
+    /* If LLM data is finish */
+    if (isFinish) {
+        M5.Display.print("\n");
+    }
+};
+
+void setup()
+{
+    M5.begin();
+    M5.Display.setTextSize(2);
+    M5.Display.setTextScroll(true);
+
+    /* Init module serial port */
+    Serial2.begin(115200, SERIAL_8N1, 16, 17);  // Basic
+    // Serial2.begin(115200, SERIAL_8N1, 13, 14);  // Core2
+    // Serial2.begin(115200, SERIAL_8N1, 18, 17);  // CoreS3
+
+    /* Init module */
+    module_llm.begin(&Serial2);
+
+    /* Make sure module is connected */
+    M5.Display.printf(">> Check ModuleLLM connection..\n");
+    while (1) {
+        if (module_llm.checkConnection()) {
+            break;
+        }
+    }
+
+    /* Begin voice assistant preset */
+    M5.Display.printf(">> Begin voice assistant..\n");
+    int ret = voice_assistant.begin("HELLO");
+    if (ret != MODULE_LLM_OK) {
+        while (1) {
+            M5.Display.setTextColor(TFT_RED);
+            M5.Display.printf(">> Begin voice assistant failed\n");
+        }
+    }
+
+    /* Register on ASR data callback function */
+    voice_assistant.onAsrDataInput(on_asr_data_input);
+
+    /* Register on LLM data callback function */
+    voice_assistant.onLlmDataInput(on_llm_data_input);
+
+    M5.Display.printf(">> Voice assistant ready\n");
+}
+
+void loop()
+{
+    /* Keep voice assistant preset update */
+    voice_assistant.update();
+}
+```
+
+## Error Code
+
+```cpp
+enum ModuleLLMErrorCode_t {
+    MODULE_LLM_OK                              = 0,
+    MODULE_LLM_RESET_WARN                      = -1,
+    MODULE_LLM_JSON_FORMAT_ERROR               = -2,
+    MODULE_LLM_ACTION_MATCH_FAILED             = -3,
+    MODULE_LLM_INFERENCE_DATA_PUSH_FAILED      = -4,
+    MODULE_LLM_MODEL_LOADING_FAILED            = -5,
+    MODULE_LLM_UNIT_NOT_EXIST                  = -6,
+    MODULE_LLM_UNKNOWN_OPERATION               = -7,
+    MODULE_LLM_UNIT_RESOURCE_ALLOCATION_FAILED = -8,
+    MODULE_LLM_UNIT_CALL_FAILED                = -9,
+    MODULE_LLM_MODEL_INIT_FAILED               = -10,
+    MODULE_LLM_MODEL_RUN_FAILED                = -11,
+    MODULE_LLM_MODULE_NOT_INITIALISED          = -12,
+    MODULE_LLM_MODULE_ALREADY_WORKING          = -13,
+    MODULE_LLM_MODULE_NOT_WORKING              = -14,
+    MODULE_LLM_NO_UPDATEABLE_MODULES           = -15,
+    MODULE_LLM_NO_MODULES_AVAILABLE_FOR_UPDATE = -16,
+    MODULE_LLM_FILE_OPEN_FAILED                = -17,
+    MODULE_LLM_WAIT_RESPONSE_TIMEOUT           = -97,
+    MODULE_LLM_RESPONSE_PARSE_FAILED           = -98,
+    MODULE_LLM_ERROR_NONE                      = -99,
+};
+```
\ No newline at end of file

From e601909e688f4bbff29f691ccd8010c14f337ac6 Mon Sep 17 00:00:00 2001
From: Forairaaaaa <applesyqd@outlook.com>
Date: Tue, 25 Mar 2025 09:18:47 +0800
Subject: [PATCH 15/16] Update library.json

---
 library.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/library.json b/library.json
index 22e3a1c..ccb85d9 100644
--- a/library.json
+++ b/library.json
@@ -14,7 +14,7 @@
         "M5GFX": "*",
         "ArduinoJson": "*"
     },
-    "version": "1.4.0",
+    "version": "1.5.0",
     "frameworks": "arduino",
     "platforms": "espressif32"
-}
\ No newline at end of file
+}

From 9b0f4d0206551387fa210612aeca8256d228da7e Mon Sep 17 00:00:00 2001
From: Forairaaaaa <applesyqd@outlook.com>
Date: Tue, 25 Mar 2025 09:19:01 +0800
Subject: [PATCH 16/16] Update library.properties

---
 library.properties | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/library.properties b/library.properties
index d08cea6..81929ac 100644
--- a/library.properties
+++ b/library.properties
@@ -1,5 +1,5 @@
 name=M5ModuleLLM
-version=1.4.0
+version=1.5.0
 author=M5Stack
 maintainer=M5Stack
 sentence=M5ModuleLLM is a library for M5ModuleLLM
@@ -8,4 +8,4 @@ category=Device Control
 url=https://github.com/m5stack/M5Module-LLM.git
 architectures=esp32
 includes=M5ModuleLLM.h
-depends=M5Unified,ArduinoJson
\ No newline at end of file
+depends=M5Unified,ArduinoJson