From ca841f8b4ca6a5bcb36d1f80c5247357d84c3c5c Mon Sep 17 00:00:00 2001 From: LittleMouse Date: Tue, 31 Dec 2024 18:12:49 +0800 Subject: [PATCH 01/16] api_yolo add "exit" function. --- src/api/api_yolo.cpp | 21 +++++++++++++++++++++ src/api/api_yolo.h | 8 ++++++++ 2 files changed, 29 insertions(+) diff --git a/src/api/api_yolo.cpp b/src/api/api_yolo.cpp index 2774cc0..e7bdbe9 100644 --- a/src/api/api_yolo.cpp +++ b/src/api/api_yolo.cpp @@ -42,6 +42,27 @@ String ApiYolo::setup(ApiYoloSetupConfig_t config, String request_id) return work_id; } +String ApiYolo::exit(String work_id, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = work_id; + doc["action"] = "exit"; + serializeJson(doc, cmd); + } + + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 100); + return work_id; +} + int ApiYolo::inference(String& work_id, uint8_t* input, size_t& raw_len, String request_id) { String cmd; diff --git a/src/api/api_yolo.h b/src/api/api_yolo.h index fbd6368..e065182 100644 --- a/src/api/api_yolo.h +++ b/src/api/api_yolo.h @@ -28,6 +28,14 @@ class ApiYolo { */ String setup(ApiYoloSetupConfig_t config = ApiYoloSetupConfig_t(), String request_id = "yolo_setup"); + /** + * @brief Exit module YOLO, return YOLO work_id + * + * @param work_id + * @param request_id + * @return String + */ + String exit(String work_id, String request_id = "yolo_exit"); /** * @brief Inference input data by module LLM * From ee5a466a82c2155cbe317fe60039a13938bc19ec Mon Sep 17 00:00:00 2001 From: LittleMouse Date: Mon, 6 Jan 2025 15:30:21 +0800 Subject: [PATCH 02/16] add vlm, depth anything module. add doc. update api. --- docs/cn.md | 640 +++++++++++++++++++++++++++++++++ docs/en.md | 41 +++ src/M5ModuleLLM.cpp | 2 + src/M5ModuleLLM.h | 14 + src/api/api_depth_anything.cpp | 106 ++++++ src/api/api_depth_anything.h | 60 ++++ src/api/api_melotts.cpp | 21 ++ src/api/api_melotts.h | 9 + src/api/api_vlm.cpp | 137 +++++++ src/api/api_vlm.h | 72 ++++ src/api/api_yolo.h | 1 + 11 files changed, 1103 insertions(+) create mode 100644 docs/cn.md create mode 100644 docs/en.md create mode 100644 src/api/api_depth_anything.cpp create mode 100644 src/api/api_depth_anything.h create mode 100644 src/api/api_vlm.cpp create mode 100644 src/api/api_vlm.h diff --git a/docs/cn.md b/docs/cn.md new file mode 100644 index 0000000..137bd59 --- /dev/null +++ b/docs/cn.md @@ -0,0 +1,640 @@ +# M5Module-LLM Arduino API + +[M5Module-LLM](https://github.com/m5stack/M5Module-LLM) Arduino驱动库API文档。 + +## M5ModuleLLM Class + +`M5ModuleLLM`用于初始化LLM Module, 并且提供内部成员用于快速初始化LLM的各个单元, 方便根据自己的需求构建应用。 + +```cpp +class M5ModuleLLM { +public: + bool begin(Stream* targetPort); + bool checkConnection(); + void update(); + + m5_module_llm::ApiSys sys; + m5_module_llm::ApiLlm llm; + m5_module_llm::ApiAudio audio; + m5_module_llm::ApiTts tts; + m5_module_llm::ApiTts melotts; + m5_module_llm::ApiKws kws; + m5_module_llm::ApiAsr asr; + m5_module_llm::ApiAsr yolo; + m5_module_llm::ModuleMsg msg; + m5_module_llm::ModuleComm comm; +private: +}; +``` + +### begin + +**函数原型:** + +```cpp +bool begin(Stream* targetPort); +``` + +**功能说明:** + +- 初始化LLM Module UART接口配置 + +**传入参数:** + +- Stream* targetPort: + - 传入Serial指针 + +**返回值:** + +- bool: + - true: 初始化成功 + - false: 初始化失败 + +### checkConnection + +**函数原型:** + +```cpp +bool checkConnection(); +``` + +**功能说明:** + +- 发送`sys.ping`指令, 检查LLM Module连接状态 + +**传入参数:** + +- null + +**返回值:** + +- bool: + - true: 模组响应 + - false: 模组无响应 + +### update + +**函数原型:** + +```cpp +void update(); +``` + +**功能说明:** + +- 拉取LLM Module UART响应数据, 该API需包含在Loop中循环执行。 + +**传入参数:** + +- null + +**返回值:** + +- null + +## ApiSys Class + +`M5ModuleLLM`的内部成员`ApiSys sys`用于控制SYS单元实现系统复位等操作。 + +### ping + +**函数原型:** + +```cpp +int ping(); +``` + +**功能说明:** + +- 发送`sys.ping`指令, 检查LLM Module连接状态 + +**传入参数:** + +- null + +**返回值:** + +- int: + - MODULE_LLM_OK / Error Code + +### reset + +**函数原型:** + +```cpp +int reset(bool waitResetFinish = true); +``` + +**功能说明:** + +- 发送`sys.reset`指令, 复位软件服务。 + +**传入参数:** + +- bool waitResetFinish: + - true:阻塞等待复位 + - false:非阻塞执行复位 + +**返回值:** + +- int: + - MODULE_LLM_OK / Error Code + +### reboot + +**函数原型:** + +```cpp +int reboot(); +``` + +**功能说明:** + +- 发送`sys.reboot`指令, 复位系统。 + +**传入参数:** + +- null + +**返回值:** + +- int: + - MODULE_LLM_OK / Error Code + +## ApiAudio Class + +`M5ModuleLLM`的内部成员`ApiAudio audio`用于控制AUDIO单元的初始化和配置。 + +### setup + +**函数原型:** + +```cpp +String setup(ApiAudioSetupConfig_t config = ApiAudioSetupConfig_t(), String request_id = "audio_setup"); +``` + +**功能说明:** + +- 初始化Audio单元, 开启系统声卡。(使用KWS和TTS前需开启该功能) + +**传入参数:** + +ApiAudioSetupConfig_t config: + +- LLM单元初始化配置: +- String request_id: + - 会话id, 使用默认即可。 + +```cpp +struct ApiAudioSetupConfig_t { + int capcard = 0; + int capdevice = 0; + float capVolume = 0.5; + int playcard = 0; + int playdevice = 1; + float playVolume = 0.15; +}; +``` + +| 参数 | 描述 | 输入值 | +|------------|----------|---------------------------------| +| capcard | 麦克风声卡的索引 | 系统默认声卡:0 | +| capdevice | 麦克风设备索引 | 板载硅麦:0 | +| capVolume | 输入的音量 | 0.0~10.0 (1中文模型: "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01" | +| kws | KWS唤醒词文本设置 | 不允许中文/英文混合, 英文要求全大写 | +| enoutput | 启用UART输出 | 启用: true
禁用: false | + +**返回值:** + +- String: + - kws_work_id: kws单元work_id + +## ApiAsr Class + +`M5ModuleLLM`的内部成员`ApiAsr asr`用于控制ASR单元的初始化和配置。 + +### setup + +**函数原型:** + +```cpp +String setup(ApiAsrSetupConfig_t config = ApiAsrSetupConfig_t(), String request_id = "asr_setup"); +``` + +**功能说明:** + +- 初始化ASR单元, 开启语音转文本功能。 + +**传入参数:** + +ApiAsrSetupConfig_t config: + +- ASR单元初始化配置: +- String request_id: + - 会话id, 使用默认即可。 + +```cpp +struct ApiAsrSetupConfig_t { + String model = "sherpa-ncnn-streaming-zipformer-20M-2023-02-17"; + String response_format = "asr.utf-8.stream"; + String input = ["sys.pcm", "kws.1000"]; + bool enoutput = true; + float rule1 = 2.4; + float rule2 = 1.2; + float rule3 = 30.0; +}; +``` + +| 参数 | 描述 | 输入值 | +|-----------------|---------------|---------------------------------------------------------------------------------------------------------------------| +| model | 转换模型 | 英文模型: "sherpa-ncnn-streaming-zipformer-20M-2023-02-17"
中文模型: "sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23" | +| response_format | 输出格式 | 普通输出: "asr.utf-8"
流式输出: "asr.utf-8.stream" | +| input | 输入 | KWS唤醒输入: "kws.xxx"(输入kws单元的work_id)
板载麦克风输入: "sys.pcm"
UART流式输入: "asr.wav.stream.base64" | +| rule1 | 唤醒到未识别到内容超时时间 | 单位:秒 | +| rule2 | 识别最大间隔时间 | 单位:秒 | +| rule3 | 识别最长超时时间 | 单位:秒 | +| enoutput | 启用UART输出 | 启用: true
禁用: false | + +**返回值:** + +- String: + - asr_work_id: asr单元work_id + +## ApiLlm Class + +`M5ModuleLLM`的内部成员`ApiLlm llm`用于控制LLM单元的初始化和配置。 + +### setup + +**函数原型:** + +```cpp +String setup(ApiLlmSetupConfig_t config = ApiLlmSetupConfig_t(), String request_id = "llm_setup"); +``` + +**功能说明:** + +- 初始化LLM单元, 支持配置LLM单元输入输出数据方式。 + +**传入参数:** + +- ApiLlmSetupConfig_t config: + - LLM单元初始化配置: +- String request_id: + - 会话id, 使用默认即可。 + +```cpp +struct ApiLlmSetupConfig_t { + String prompt; + String model = "qwen2.5-0.5B-prefill-20e"; + String response_format = "llm.utf-8.stream"; + String input = "llm.utf-8"; + bool enoutput = true; + bool enkws = true; + int max_token_len = 127; +}; +``` + +| 参数 | 描述 | 输入值 | +|-----------------|-------------------------|-------------------------------------------------------------------------------------------------| +| model | 转换模型 | 预置模型 "qwen2.5-0.5B-prefill-20e" | +| response_format | 输出格式 | 普通输出: "llm.utf-8"
流式输出: "llm.utf-8.stream" | +| input | 输入 | ASR输入: "asr.xxx"(输入asr单元的work_id)
UART输入: "llm.utf-8"
KWS唤醒打断: "kws.xxx"(输入kws单元的work_id) | +| enkws | KWS唤醒是否终止过程 | KWS打断过程: true
KWS不打断过程: false | +| max_length | 配置最大输出token(最大返回推理文本长度) | 最大值: 1024, 推荐使用127 | +| prompt | 模型初始化提示词 | String | +| enoutput | 启用UART输出 | 启用: true
禁用: false | + +**返回值:** + +- String: + - llm_work_id: llm单元work_id + +### inference + +**函数原型:** + +```cpp +int inference(String work_id, String input, String request_id = "llm_inference"); +``` + +**功能说明:** + +- 输入数据, 开始推理。返回结果内容将进入`M5ModuleLLM.msg`中的`responseMsgList`列表容器中。 + +**传入参数:** + +- String work_id: + - 调用的LLM单元work_id +- String input: + - 输入文本 +- String request_id: + - 会话ID, 当同时存在多个会话的时候用于区分。 + +**返回值:** + +- int: + - MODULE_LLM_OK / Error Code + +### inferenceAndWaitResult + +**函数原型:** + +```cpp +int inferenceAndWaitResult(String work_id, String input, std::function onResult, uint32_t timeout = 5000, String request_id = "llm_inference"); +``` + +**功能说明:** + +- 输入数据, 开始推理。并阻塞等待返回结果, 然后调用callback函数。 + +**传入参数:** + +- String work_id: + - 调用的LLM单元work_id +- String input: + - 输入文本 +- void onResult(String&) + - 推理结果callback函数 +- uint32_t timeout: + - 等待推理超时时间 +- String request_id: + - 会话ID, 当同时存在多个会话的时候用于区分。 + +**返回值:** + +- int: + - MODULE_LLM_OK / Error Code + +## ApiTts Class + +`M5ModuleLLM`的内部成员`ApiTts tts`用于控制TTS单元的初始化和配置。 + +### setup + +**函数原型:** + +```cpp +String setup(ApiTtsSetupConfig_t config = ApiTtsSetupConfig_t(), String request_id = "tts_setup"); +``` + +**功能说明:** + +- 初始化TTS单元, 开启文本转语音功能。 + +**传入参数:** + +ApiTtsSetupConfig_t config: + +- LLM单元初始化配置: +- String request_id: + - 会话id, 使用默认即可。 + +```cpp +struct ApiTtsSetupConfig_t { + String model = "single_speaker_english_fast"; + String response_format = "tts.base64.wav"; + String input = "tts.utf-8.stream"; + bool enoutput = true; + bool enkws = true; +}; +``` + +| 参数 | 描述 | 输入值 | +|----------|-------------|------------------------------------------------------------------------------------------| +| model | 转换模型 | 英文模型: "single_speaker_english_fast"
中文模型: "single_speaker_fast" | +| input | 输入 | LLM输入: "llm.xxx"(输入llm单元的work_id)
UART输入: "tts.utf-8"
UART流式输入: "tts.utf-8.stream" | +| enkws | KWS唤醒是否终止过程 | KWS打断过程: true
KWS不打断过程: false | +| enoutput | 启用UART输出 | 启用: true
禁用: false | + +**返回值:** + +- String: + - tts_work_id: tts单元work_id + +### inference + +**函数原型:** + +```cpp +int inference(String work_id, String input, uint32_t timeout = 0, String request_id = "tts_inference"); +``` + +**功能说明:** + +- 输入数据, 开始推理转换, 完成后将自动播放至扬声器。 + +**传入参数:** + +- String work_id: + - 调用的TTS单元work_id +- String input: + - 输入文本 +- uint32_t timeout: + - 等待推理超时时间 +- String request_id: + - 会话ID, 当同时存在多个会话的时候用于区分。 + +**返回值:** + +- int: + - MODULE_LLM_OK / Error Code + +## ModuleMsg Class + +`M5ModuleLLM`的内部成员`ModuleMsg msg`提供了`responseMsgList`容器用于用于缓存接收LLM Module返回的各种信息。参考以下案例,在主循环中遍历获取返回结果。 + +```cpp +void loop() +{ + module_llm.update(); + + // Handle response msg + for (auto& msg : module_llm.msg.responseMsgList) { + // KWS msg + if (msg.work_id == kws_work_id) { + Serial.printf(">> Keyword detected\n"); + } + + // ASR msg + if (msg.work_id == asr_work_id) { + if (msg.object == "asr.utf-8.stream") { + // Parse and get asr result + JsonDocument doc; + deserializeJson(doc, msg.raw_msg); + String asr_result = doc["data"]["delta"].as(); + Serial.printf(">> %s\n", asr_result.c_str()); + } + } + } + module_llm.msg.responseMsgList.clear(); +} + +``` + +## VoiceAssistant Class + +`M5ModuleLLM_VoiceAssistant`用于快速创建LLM语音助手实例, 快速实现KWS(语音唤醒)->ASR(语音转文本)->LLM(大模型推理)->TTS( +文本转语音)。 + +- 初始化时候只需要将`M5ModuleLLM`实例传入构造函数, 并注册对应事件的回调函数即可完成语音助手创建。 + +```cpp +/* + * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD + * + * SPDX-License-Identifier: MIT + */ +#include +#include +#include + +M5ModuleLLM module_llm; +M5ModuleLLM_VoiceAssistant voice_assistant(&module_llm); + +/* On ASR data callback */ +void on_asr_data_input(String data, bool isFinish, int index) +{ + M5.Display.setTextColor(TFT_GREEN, TFT_BLACK); + M5.Display.printf(">> %s\n", data.c_str()); + + /* If ASR data is finish */ + if (isFinish) { + M5.Display.setTextColor(TFT_YELLOW, TFT_BLACK); + M5.Display.print(">> "); + } +}; + +/* On LLM data callback */ +void on_llm_data_input(String data, bool isFinish, int index) +{ + M5.Display.print(data); + + /* If LLM data is finish */ + if (isFinish) { + M5.Display.print("\n"); + } +}; + +void setup() +{ + M5.begin(); + M5.Display.setTextSize(2); + M5.Display.setTextScroll(true); + + /* Init module serial port */ + Serial2.begin(115200, SERIAL_8N1, 16, 17); // Basic + // Serial2.begin(115200, SERIAL_8N1, 13, 14); // Core2 + // Serial2.begin(115200, SERIAL_8N1, 18, 17); // CoreS3 + + /* Init module */ + module_llm.begin(&Serial2); + + /* Make sure module is connected */ + M5.Display.printf(">> Check ModuleLLM connection..\n"); + while (1) { + if (module_llm.checkConnection()) { + break; + } + } + + /* Begin voice assistant preset */ + M5.Display.printf(">> Begin voice assistant..\n"); + int ret = voice_assistant.begin("HELLO"); + if (ret != MODULE_LLM_OK) { + while (1) { + M5.Display.setTextColor(TFT_RED); + M5.Display.printf(">> Begin voice assistant failed\n"); + } + } + + /* Register on ASR data callback function */ + voice_assistant.onAsrDataInput(on_asr_data_input); + + /* Register on LLM data callback function */ + voice_assistant.onLlmDataInput(on_llm_data_input); + + M5.Display.printf(">> Voice assistant ready\n"); +} + +void loop() +{ + /* Keep voice assistant preset update */ + voice_assistant.update(); +} +``` + +## Error Code + +```cpp +enum ModuleLLMErrorCode_t { + MODULE_LLM_OK = 0, + MODULE_LLM_RESET_WARN = -1, + MODULE_LLM_JSON_FORMAT_ERROR = -2, + MODULE_LLM_ACTION_MATCH_FAILED = -3, + MODULE_LLM_INFERENCE_DATA_PUSH_FAILED = -4, + MODULE_LLM_MODEL_LOADING_FAILED = -5, + MODULE_LLM_UNIT_NOT_EXIST = -6, + MODULE_LLM_UNKNOWN_OPERATION = -7, + MODULE_LLM_UNIT_RESOURCE_ALLOCATION_FAILED = -8, + MODULE_LLM_UNIT_CALL_FAILED = -9, + MODULE_LLM_MODEL_INIT_FAILED = -10, + MODULE_LLM_MODEL_RUN_FAILED = -11, + MODULE_LLM_MODULE_NOT_INITIALISED = -12, + MODULE_LLM_MODULE_ALREADY_WORKING = -13, + MODULE_LLM_MODULE_NOT_WORKING = -14, + MODULE_LLM_NO_UPDATEABLE_MODULES = -15, + MODULE_LLM_NO_MODULES_AVAILABLE_FOR_UPDATE = -16, + MODULE_LLM_FILE_OPEN_FAILED = -17, + MODULE_LLM_WAIT_RESPONSE_TIMEOUT = -97, + MODULE_LLM_RESPONSE_PARSE_FAILED = -98, + MODULE_LLM_ERROR_NONE = -99, +}; +``` + diff --git a/docs/en.md b/docs/en.md new file mode 100644 index 0000000..1714e53 --- /dev/null +++ b/docs/en.md @@ -0,0 +1,41 @@ +# LLM Module Arduino Quick Start + +## Overview + +The `LLM Module` can be used with various M5 controllers. This tutorial demonstrates how to control the LLM Module using the `M5Core` series in the `Arduino IDE` with the LLM Module driver library. + + + +## Environment Setup + +- 1.Arduino IDE Installation: Refer to the [Arduino IDE Installation Guide](/en/arduino/arduino_ide) to complete the IDE installation. + +- 2.Board Manager Installation: Refer to the [Basic Environment Setup Guide](/en/arduino/arduino_board) to complete the M5Stack board manager installation and select the `M5Core` development board. + + + +- 3.Library Installation: Refer to the [Library Management Guide](/en/arduino/arduino_library) to install the `LLM Module` driver library. (Follow prompts to install the dependency library `M5Unified`) + + + + +## Program Compilation & Upload + +Open the example program "kws_asr" in the driver library, click the upload button, and the program will automatically compile and upload.The wake-up word used in the example program is "HELLO". After waiting for the device to be initialized, it will be woken up using the keyword. + + + + + + +- Examples: + - `kws_asr`: Uses KWS to wake up and triggers ASR for speech-to-text conversion. (KWS+ASR) + - `text_assistant`: Inputs text into the LLM model, performs inference, and outputs the result in text form. (LLM) + - `tts`: Uses the TTS unit to convert text to speech for playback. (TTS) + - `voice_assistant`: Uses KWS to wake up, triggers ASR for speech-to-text conversion, inputs the converted text into the LLM for inference, and outputs the inference result through TTS as speech. (KWS+ASR+LLM+TTS) + +## Related Links + +- [LLM Module Arduino Lib](https://github.com/m5stack/M5Module-LLM) +- [LLM Module Arduino Lib API](/en/guide/llm/llm/arduino_api) + diff --git a/src/M5ModuleLLM.cpp b/src/M5ModuleLLM.cpp index 0bac44b..2ac9119 100644 --- a/src/M5ModuleLLM.cpp +++ b/src/M5ModuleLLM.cpp @@ -12,6 +12,7 @@ bool M5ModuleLLM::begin(Stream* serialPort) msg.init(&comm); sys.init(&msg); llm.init(&msg); + vlm.init(&msg); audio.init(&msg); tts.init(&msg); melotts.init(&msg); @@ -19,6 +20,7 @@ bool M5ModuleLLM::begin(Stream* serialPort) asr.init(&msg); yolo.init(&msg); camera.init(&msg); + depthanything.init(&msg); return true; } diff --git a/src/M5ModuleLLM.h b/src/M5ModuleLLM.h index 42bae02..c652089 100644 --- a/src/M5ModuleLLM.h +++ b/src/M5ModuleLLM.h @@ -9,12 +9,14 @@ #include "utils/msg.h" #include "api/api_sys.h" #include "api/api_llm.h" +#include "api/api_vlm.h" #include "api/api_audio.h" #include "api/api_tts.h" #include "api/api_melotts.h" #include "api/api_kws.h" #include "api/api_asr.h" #include "api/api_yolo.h" +#include "api/api_depth_anything.h" #include "api/api_camera.h" #include "api/api_version.h" @@ -55,6 +57,12 @@ class M5ModuleLLM { */ m5_module_llm::ApiLlm llm; + /** + * @brief VLM module api set + * + */ + m5_module_llm::ApiVlm vlm; + /** * @brief Audio module api set * @@ -97,6 +105,12 @@ class M5ModuleLLM { */ m5_module_llm::ApiYolo yolo; + /** + * @brief DepthAnything module api set + * + */ + m5_module_llm::ApiDepthAnything depthanything; + /** * @brief MSG module to handle module response message * diff --git a/src/api/api_depth_anything.cpp b/src/api/api_depth_anything.cpp new file mode 100644 index 0000000..b8ceb2d --- /dev/null +++ b/src/api/api_depth_anything.cpp @@ -0,0 +1,106 @@ +/* + * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD + * + * SPDX-License-Identifier: MIT + */ +#include "api_depth_anything.h" + +using namespace m5_module_llm; + +void ApiDepthAnything::init(ModuleMsg* moduleMsg) +{ + _module_msg = moduleMsg; +} + +String ApiDepthAnything::setup(ApiDepthAnythingSetupConfig_t config, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = "depth_anything"; + doc["action"] = "setup"; + doc["object"] = "depth_anything.setup"; + doc["data"]["model"] = config.model; + doc["data"]["response_format"] = config.response_format; + JsonArray inputArray = doc["data"]["input"].to(); + for (const String& str : config.input) { + inputArray.add(str); + } + doc["data"]["enoutput"] = config.enoutput; + serializeJson(doc, cmd); + } + + String work_id; + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 5000); + return work_id; +} + +int ApiDepthAnything::inference(String& work_id, uint8_t* input, size_t& raw_len, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["RAW"] = raw_len; + doc["request_id"] = request_id; + doc["work_id"] = work_id; + doc["action"] = "inference"; + doc["object"] = "cv.jpeg.base64"; + serializeJson(doc, cmd); + } + + _module_msg->sendCmd(cmd.c_str()); + _module_msg->sendRaw(input, raw_len); + return MODULE_LLM_OK; +} + +int ApiDepthAnything::inferenceAndWaitResult(String& work_id, uint8_t* input, size_t& raw_len, + std::function onResult, uint32_t timeout, String request_id) +{ + inference(work_id, input, raw_len, request_id); + + uint32_t time_out_count = millis(); + bool is_time_out = false; + bool is_msg_finish = false; + while (1) { + _module_msg->update(); + _module_msg->takeMsg(request_id, [&time_out_count, &is_msg_finish, &onResult](ResponseMsg_t& msg) { + String response_msg; + { + JsonDocument doc; + deserializeJson(doc, msg.raw_msg); + response_msg = doc["data"]["delta"].as(); + if (!doc["data"]["finish"].isNull()) { + is_msg_finish = doc["data"]["finish"]; + if (is_msg_finish) { + response_msg += '\n'; + } + } + } + if (onResult) { + onResult(response_msg); + } + time_out_count = millis(); + }); + + if (is_msg_finish) { + break; + } + + if (millis() - time_out_count > timeout) { + is_time_out = true; + break; + } + } + + if (is_time_out) { + return MODULE_LLM_WAIT_RESPONSE_TIMEOUT; + } + return MODULE_LLM_OK; +} diff --git a/src/api/api_depth_anything.h b/src/api/api_depth_anything.h new file mode 100644 index 0000000..c1138e7 --- /dev/null +++ b/src/api/api_depth_anything.h @@ -0,0 +1,60 @@ +/* + * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD + * + * SPDX-License-Identifier: MIT + */ +#pragma once +#include "../utils/msg.h" +#include + +namespace m5_module_llm { +struct ApiDepthAnythingSetupConfig_t { + String model = "depth_anything"; + String response_format = "jpeg.base64.stream"; + std::vector input = {"depth_anything.jpeg.raw"}; + bool enoutput = true; +}; + +class ApiDepthAnything { +public: + void init(ModuleMsg* moduleMsg); + + /** + * @brief Setup module YOLO, return YOLO work_id + * + * @param config + * @param request_id + * @return String + */ + String setup(ApiDepthAnythingSetupConfig_t config = ApiDepthAnythingSetupConfig_t(), + String request_id = "depth_anything_setup"); + + /** + * @brief Inference input data by module LLM + * + * @param raw_len + * @param work_id + * @param input + * @param request_id + * @return int + */ + int inference(String& work_id, uint8_t* input, size_t& raw_len, String request_id = "depth_anything_inference"); + + /** + * @brief Inference input data by module LLM, and wait inference result + * + * @param raw_len + * @param work_id + * @param input + * @param onResult On inference result callback + * @param timeout + * @param request_id + * @return int + */ + int inferenceAndWaitResult(String& work_id, uint8_t* input, size_t& raw_len, std::function onResult, + uint32_t timeout = 5000, String request_id = "depth_anything_inference"); + +private: + ModuleMsg* _module_msg = nullptr; +}; +} // namespace m5_module_llm diff --git a/src/api/api_melotts.cpp b/src/api/api_melotts.cpp index b4e6f8b..adcadbf 100644 --- a/src/api/api_melotts.cpp +++ b/src/api/api_melotts.cpp @@ -44,6 +44,27 @@ String ApiMelotts::setup(ApiMelottsSetupConfig_t config, String request_id, Stri return work_id; } +String ApiMelotts::exit(String work_id, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = work_id; + doc["action"] = "exit"; + serializeJson(doc, cmd); + } + + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 100); + return work_id; +} + int ApiMelotts::inference(String work_id, String input, uint32_t timeout, String request_id) { String cmd; diff --git a/src/api/api_melotts.h b/src/api/api_melotts.h index 7a35219..22ab4ea 100644 --- a/src/api/api_melotts.h +++ b/src/api/api_melotts.h @@ -31,6 +31,15 @@ class ApiMelotts { String setup(ApiMelottsSetupConfig_t config = ApiMelottsSetupConfig_t(), String request_id = "melotts_setup", String language = "en_US"); + /** + * @brief Exit module TTS, return TTS work_id + * + * @param work_id + * @param request_id + * @return String + */ + String exit(String work_id, String request_id = "yolo_exit"); + /** * @brief Inference input data by TTS module * diff --git a/src/api/api_vlm.cpp b/src/api/api_vlm.cpp new file mode 100644 index 0000000..651b986 --- /dev/null +++ b/src/api/api_vlm.cpp @@ -0,0 +1,137 @@ +/* + * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD + * + * SPDX-License-Identifier: MIT + */ +#include "api_vlm.h" +#include "api_version.h" + +using namespace m5_module_llm; + +void ApiVlm::init(ModuleMsg* moduleMsg) +{ + _module_msg = moduleMsg; +} + +String ApiVlm::setup(ApiVlmSetupConfig_t config, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = "vlm"; + doc["action"] = "setup"; + doc["object"] = "vlm.setup"; + doc["data"]["model"] = config.model; + doc["data"]["response_format"] = config.response_format; + doc["data"]["enoutput"] = config.enoutput; + doc["data"]["enkws"] = config.enkws; + doc["data"]["max_token_len"] = config.max_token_len; + doc["data"]["prompt"] = config.prompt; + if (!llm_version) { + doc["data"]["model"] = "qwen2.5-0.5b"; + doc["data"]["input"] = config.input[0]; + } else { + JsonArray inputArray = doc["data"]["input"].to(); + for (const String& str : config.input) { + inputArray.add(str); + } + } + serializeJson(doc, cmd); + } + + String llm_work_id; + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&llm_work_id](ResponseMsg_t& msg) { + // Copy work id + llm_work_id = msg.work_id; + }, + 20000); + return llm_work_id; +} + +String ApiVlm::exit(String work_id, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = work_id; + doc["action"] = "exit"; + serializeJson(doc, cmd); + } + + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 100); + return work_id; +} + +int ApiVlm::inference(String work_id, String input, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = work_id; + doc["action"] = "inference"; + doc["object"] = "vlm.utf-8.stream"; + doc["data"]["delta"] = input; + doc["data"]["index"] = 0; + doc["data"]["finish"] = true; + serializeJson(doc, cmd); + } + + _module_msg->sendCmd(cmd.c_str()); + return MODULE_LLM_OK; +} + +int ApiVlm::inferenceAndWaitResult(String work_id, String input, std::function onResult, + uint32_t timeout, String request_id) +{ + inference(work_id, input, request_id); + + uint32_t time_out_count = millis(); + bool is_time_out = false; + bool is_msg_finish = false; + while (1) { + _module_msg->update(); + _module_msg->takeMsg(request_id, [&time_out_count, &is_msg_finish, &onResult](ResponseMsg_t& msg) { + String response_msg; + { + JsonDocument doc; + deserializeJson(doc, msg.raw_msg); + response_msg = doc["data"]["delta"].as(); + if (!doc["data"]["finish"].isNull()) { + is_msg_finish = doc["data"]["finish"]; + if (is_msg_finish) { + response_msg += '\n'; + } + } + } + if (onResult) { + onResult(response_msg); + } + time_out_count = millis(); + }); + + if (is_msg_finish) { + break; + } + + if (millis() - time_out_count > timeout) { + is_time_out = true; + break; + } + } + + if (is_time_out) { + return MODULE_LLM_WAIT_RESPONSE_TIMEOUT; + } + return MODULE_LLM_OK; +} diff --git a/src/api/api_vlm.h b/src/api/api_vlm.h new file mode 100644 index 0000000..01878d7 --- /dev/null +++ b/src/api/api_vlm.h @@ -0,0 +1,72 @@ +/* + * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD + * + * SPDX-License-Identifier: MIT + */ +#pragma once +#include "../utils/msg.h" +#include + +namespace m5_module_llm { + +struct ApiVlmSetupConfig_t { + String prompt; + String model = "internvl2.5-1B-ax630c"; + String response_format = "vlm.utf-8.stream"; + std::vector input = {"vlm.utf-8.stream"}; + bool enoutput = true; + bool enkws = true; + // int max_token_len = 127; + int max_token_len = 255; +}; + +class ApiVlm { +public: + void init(ModuleMsg* moduleMsg); + + /** + * @brief Setup module LLM, return LLM work_id + * + * @param config + * @param request_id + * @return String + */ + String setup(ApiVlmSetupConfig_t config = ApiVlmSetupConfig_t(), String request_id = "vlm_setup"); + + /** + * @brief Exit module YOLO, return YOLO work_id + * + * @param work_id + * @param request_id + * @return String + */ + String exit(String work_id, String request_id = "vlm_exit"); + + /** + * @brief Inference input data by module LLM + * + * @param work_id + * @param input + * @param request_id + * @return int + */ + int inference(String work_id, String input, String request_id = "vlm_inference"); + + /** + * @brief Inference input data by module LLM, and wait inference result + * + * @param work_id + * @param input + * @param onResult On inference result callback + * @param timeout + * @param request_id + * @return int + */ + int inferenceAndWaitResult(String work_id, String input, std::function onResult, + uint32_t timeout = 5000, String request_id = "vlm_inference"); + +private: + ModuleMsg* _module_msg = nullptr; +}; + +} // namespace m5_module_llm diff --git a/src/api/api_yolo.h b/src/api/api_yolo.h index e065182..ebccc1a 100644 --- a/src/api/api_yolo.h +++ b/src/api/api_yolo.h @@ -36,6 +36,7 @@ class ApiYolo { * @return String */ String exit(String work_id, String request_id = "yolo_exit"); + /** * @brief Inference input data by module LLM * From af55e48bc04f4da3edd97e2b220418f885fa4fd3 Mon Sep 17 00:00:00 2001 From: LittleMouse Date: Mon, 6 Jan 2025 17:22:47 +0800 Subject: [PATCH 03/16] update module api. --- src/api/api_asr.cpp | 21 +++++++++++++++++++++ src/api/api_asr.h | 9 +++++++++ src/api/api_audio.cpp | 21 +++++++++++++++++++++ src/api/api_audio.h | 11 ++++++++++- src/api/api_camera.cpp | 21 +++++++++++++++++++++ src/api/api_camera.h | 11 ++++++++++- src/api/api_depth_anything.cpp | 21 +++++++++++++++++++++ src/api/api_depth_anything.h | 11 ++++++++++- src/api/api_kws.cpp | 21 +++++++++++++++++++++ src/api/api_kws.h | 9 +++++++++ src/api/api_llm.cpp | 21 +++++++++++++++++++++ src/api/api_llm.h | 9 +++++++++ src/api/api_melotts.h | 2 +- src/api/api_tts.cpp | 21 +++++++++++++++++++++ src/api/api_tts.h | 9 +++++++++ src/api/api_vlm.h | 8 ++++---- 16 files changed, 218 insertions(+), 8 deletions(-) diff --git a/src/api/api_asr.cpp b/src/api/api_asr.cpp index 900d531..2766fa6 100644 --- a/src/api/api_asr.cpp +++ b/src/api/api_asr.cpp @@ -51,3 +51,24 @@ String ApiAsr::setup(ApiAsrSetupConfig_t config, String request_id, String langu 10000); return work_id; } + +String ApiAsr::exit(String work_id, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = work_id; + doc["action"] = "exit"; + serializeJson(doc, cmd); + } + + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 100); + return work_id; +} diff --git a/src/api/api_asr.h b/src/api/api_asr.h index 34d2364..0cd34a0 100644 --- a/src/api/api_asr.h +++ b/src/api/api_asr.h @@ -34,6 +34,15 @@ class ApiAsr { String setup(ApiAsrSetupConfig_t config = ApiAsrSetupConfig_t(), String request_id = "asr_setup", String language = "en_US"); + /** + * @brief Exit module ASR, return ASR work_id + * + * @param work_id + * @param request_id + * @return String + */ + String exit(String work_id, String request_id = "asr_exit"); + private: ModuleMsg* _module_msg = nullptr; }; diff --git a/src/api/api_audio.cpp b/src/api/api_audio.cpp index 343a586..caae9ae 100644 --- a/src/api/api_audio.cpp +++ b/src/api/api_audio.cpp @@ -40,3 +40,24 @@ String ApiAudio::setup(ApiAudioSetupConfig_t config, String request_id) 5000); return work_id; } + +String ApiAudio::exit(String work_id, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = work_id; + doc["action"] = "exit"; + serializeJson(doc, cmd); + } + + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 100); + return work_id; +} diff --git a/src/api/api_audio.h b/src/api/api_audio.h index a1ab88f..2bf9eff 100644 --- a/src/api/api_audio.h +++ b/src/api/api_audio.h @@ -23,7 +23,7 @@ class ApiAudio { void init(ModuleMsg* moduleMsg); /** - * @brief Setup module audio, return work_id + * @brief Setup module audio, return audio work_id * * @param config * @param request_id @@ -31,6 +31,15 @@ class ApiAudio { */ String setup(ApiAudioSetupConfig_t config = ApiAudioSetupConfig_t(), String request_id = "audio_setup"); + /** + * @brief Exit module audio, return audio work_id + * + * @param work_id + * @param request_id + * @return String + */ + String exit(String work_id, String request_id = "audio_exit"); + private: ModuleMsg* _module_msg = nullptr; }; diff --git a/src/api/api_camera.cpp b/src/api/api_camera.cpp index 8b2c7da..395dc8b 100644 --- a/src/api/api_camera.cpp +++ b/src/api/api_camera.cpp @@ -39,3 +39,24 @@ String ApiCamera::setup(ApiCameraSetupConfig_t config, String request_id) 5000); return work_id; } + +String ApiCamera::exit(String work_id, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = work_id; + doc["action"] = "exit"; + serializeJson(doc, cmd); + } + + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 100); + return work_id; +} diff --git a/src/api/api_camera.h b/src/api/api_camera.h index f0d0ea3..973de50 100644 --- a/src/api/api_camera.h +++ b/src/api/api_camera.h @@ -22,7 +22,7 @@ class ApiCamera { void init(ModuleMsg* moduleMsg); /** - * @brief Setup module camera, return work_id + * @brief Setup module camera, return camera work_id * * @param config * @param request_id @@ -30,6 +30,15 @@ class ApiCamera { */ String setup(ApiCameraSetupConfig_t config = ApiCameraSetupConfig_t(), String request_id = "camera_setup"); + /** + * @brief Exit module camera, return camera work_id + * + * @param work_id + * @param request_id + * @return String + */ + String exit(String work_id, String request_id = "camera_exit"); + private: ModuleMsg* _module_msg = nullptr; }; diff --git a/src/api/api_depth_anything.cpp b/src/api/api_depth_anything.cpp index b8ceb2d..85d0a97 100644 --- a/src/api/api_depth_anything.cpp +++ b/src/api/api_depth_anything.cpp @@ -42,6 +42,27 @@ String ApiDepthAnything::setup(ApiDepthAnythingSetupConfig_t config, String requ return work_id; } +String ApiDepthAnything::exit(String work_id, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = work_id; + doc["action"] = "exit"; + serializeJson(doc, cmd); + } + + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 100); + return work_id; +} + int ApiDepthAnything::inference(String& work_id, uint8_t* input, size_t& raw_len, String request_id) { String cmd; diff --git a/src/api/api_depth_anything.h b/src/api/api_depth_anything.h index c1138e7..36cb450 100644 --- a/src/api/api_depth_anything.h +++ b/src/api/api_depth_anything.h @@ -20,7 +20,7 @@ class ApiDepthAnything { void init(ModuleMsg* moduleMsg); /** - * @brief Setup module YOLO, return YOLO work_id + * @brief Setup module DepthAnything, return DepthAnything work_id * * @param config * @param request_id @@ -29,6 +29,15 @@ class ApiDepthAnything { String setup(ApiDepthAnythingSetupConfig_t config = ApiDepthAnythingSetupConfig_t(), String request_id = "depth_anything_setup"); + /** + * @brief Exit module DepthAnything, return DepthAnything work_id + * + * @param work_id + * @param request_id + * @return String + */ + String exit(String work_id, String request_id = "depth_anything_exit"); + /** * @brief Inference input data by module LLM * diff --git a/src/api/api_kws.cpp b/src/api/api_kws.cpp index 07f417e..a4c3103 100644 --- a/src/api/api_kws.cpp +++ b/src/api/api_kws.cpp @@ -48,3 +48,24 @@ String ApiKws::setup(ApiKwsSetupConfig_t config, String request_id, String langu 30000); return work_id; } + +String ApiKws::exit(String work_id, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = work_id; + doc["action"] = "exit"; + serializeJson(doc, cmd); + } + + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 100); + return work_id; +} diff --git a/src/api/api_kws.h b/src/api/api_kws.h index 6400eec..71dc51c 100644 --- a/src/api/api_kws.h +++ b/src/api/api_kws.h @@ -31,6 +31,15 @@ class ApiKws { String setup(ApiKwsSetupConfig_t config = ApiKwsSetupConfig_t(), String request_id = "kws_setup", String language = "en_US"); + /** + * @brief Exit module KWS, return KWS work_id + * + * @param work_id + * @param request_id + * @return String + */ + String exit(String work_id, String request_id = "kws_exit"); + private: ModuleMsg* _module_msg = nullptr; }; diff --git a/src/api/api_llm.cpp b/src/api/api_llm.cpp index 57b4ee9..93e4fa9 100644 --- a/src/api/api_llm.cpp +++ b/src/api/api_llm.cpp @@ -51,6 +51,27 @@ String ApiLlm::setup(ApiLlmSetupConfig_t config, String request_id) return llm_work_id; } +String ApiLlm::exit(String work_id, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = work_id; + doc["action"] = "exit"; + serializeJson(doc, cmd); + } + + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 100); + return work_id; +} + int ApiLlm::inference(String work_id, String input, String request_id) { String cmd; diff --git a/src/api/api_llm.h b/src/api/api_llm.h index 27536e9..45a3455 100644 --- a/src/api/api_llm.h +++ b/src/api/api_llm.h @@ -33,6 +33,15 @@ class ApiLlm { */ String setup(ApiLlmSetupConfig_t config = ApiLlmSetupConfig_t(), String request_id = "llm_setup"); + /** + * @brief Exit module LLM, return LLM work_id + * + * @param work_id + * @param request_id + * @return String + */ + String exit(String work_id, String request_id = "llm_exit"); + /** * @brief Inference input data by module LLM * diff --git a/src/api/api_melotts.h b/src/api/api_melotts.h index 22ab4ea..2708c82 100644 --- a/src/api/api_melotts.h +++ b/src/api/api_melotts.h @@ -38,7 +38,7 @@ class ApiMelotts { * @param request_id * @return String */ - String exit(String work_id, String request_id = "yolo_exit"); + String exit(String work_id, String request_id = "tts_exit"); /** * @brief Inference input data by TTS module diff --git a/src/api/api_tts.cpp b/src/api/api_tts.cpp index 6cfe661..7aee3c8 100644 --- a/src/api/api_tts.cpp +++ b/src/api/api_tts.cpp @@ -52,6 +52,27 @@ String ApiTts::setup(ApiTtsSetupConfig_t config, String request_id, String langu return work_id; } +String ApiTts::exit(String work_id, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = work_id; + doc["action"] = "exit"; + serializeJson(doc, cmd); + } + + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 100); + return work_id; +} + int ApiTts::inference(String work_id, String input, uint32_t timeout, String request_id) { String cmd; diff --git a/src/api/api_tts.h b/src/api/api_tts.h index e5b06b9..80aea4e 100644 --- a/src/api/api_tts.h +++ b/src/api/api_tts.h @@ -32,6 +32,15 @@ class ApiTts { String setup(ApiTtsSetupConfig_t config = ApiTtsSetupConfig_t(), String request_id = "tts_setup", String language = "en_US"); + /** + * @brief Exit module TTS, return TTS work_id + * + * @param work_id + * @param request_id + * @return String + */ + String exit(String work_id, String request_id = "tts_exit"); + /** * @brief Inference input data by TTS module * diff --git a/src/api/api_vlm.h b/src/api/api_vlm.h index 01878d7..cad9e30 100644 --- a/src/api/api_vlm.h +++ b/src/api/api_vlm.h @@ -25,7 +25,7 @@ class ApiVlm { void init(ModuleMsg* moduleMsg); /** - * @brief Setup module LLM, return LLM work_id + * @brief Setup module VLLM, return VLLM work_id * * @param config * @param request_id @@ -34,7 +34,7 @@ class ApiVlm { String setup(ApiVlmSetupConfig_t config = ApiVlmSetupConfig_t(), String request_id = "vlm_setup"); /** - * @brief Exit module YOLO, return YOLO work_id + * @brief Exit module VLLM, return VLLM work_id * * @param work_id * @param request_id @@ -43,7 +43,7 @@ class ApiVlm { String exit(String work_id, String request_id = "vlm_exit"); /** - * @brief Inference input data by module LLM + * @brief Inference input data by module VLLM * * @param work_id * @param input @@ -53,7 +53,7 @@ class ApiVlm { int inference(String work_id, String input, String request_id = "vlm_inference"); /** - * @brief Inference input data by module LLM, and wait inference result + * @brief Inference input data by module VLLM, and wait inference result * * @param work_id * @param input From dfd4c1e5d8c94e52c60c603b5e33566f14961c0a Mon Sep 17 00:00:00 2001 From: LittleMouse Date: Mon, 6 Jan 2025 17:33:41 +0800 Subject: [PATCH 04/16] Increase llm setup timeout. --- src/api/api_llm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/api_llm.cpp b/src/api/api_llm.cpp index 93e4fa9..617c8bd 100644 --- a/src/api/api_llm.cpp +++ b/src/api/api_llm.cpp @@ -47,7 +47,7 @@ String ApiLlm::setup(ApiLlmSetupConfig_t config, String request_id) // Copy work id llm_work_id = msg.work_id; }, - 10000); + 20000); return llm_work_id; } From 6679384e761e45207735d604091ef879cf174b97 Mon Sep 17 00:00:00 2001 From: LittleMouse Date: Tue, 7 Jan 2025 09:26:03 +0800 Subject: [PATCH 05/16] fix arduino lib version --- library.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library.json b/library.json index f274721..22e3a1c 100644 --- a/library.json +++ b/library.json @@ -14,7 +14,7 @@ "M5GFX": "*", "ArduinoJson": "*" }, - "version": "1.0.0", + "version": "1.4.0", "frameworks": "arduino", "platforms": "espressif32" } \ No newline at end of file From 31cdca83ea36eddbe314992cc84926d478df207c Mon Sep 17 00:00:00 2001 From: LittleMouse Date: Thu, 9 Jan 2025 15:46:46 +0800 Subject: [PATCH 06/16] Increase the timeout time of LLM setup. Update arduino lib version. --- library.properties | 2 +- src/api/api_llm.cpp | 2 +- src/utils/comm.cpp | 13 +++++++++++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/library.properties b/library.properties index 020a707..d08cea6 100644 --- a/library.properties +++ b/library.properties @@ -1,5 +1,5 @@ name=M5ModuleLLM -version=1.0.0 +version=1.4.0 author=M5Stack maintainer=M5Stack sentence=M5ModuleLLM is a library for M5ModuleLLM diff --git a/src/api/api_llm.cpp b/src/api/api_llm.cpp index 617c8bd..44ca3aa 100644 --- a/src/api/api_llm.cpp +++ b/src/api/api_llm.cpp @@ -47,7 +47,7 @@ String ApiLlm::setup(ApiLlmSetupConfig_t config, String request_id) // Copy work id llm_work_id = msg.work_id; }, - 20000); + 30000); return llm_work_id; } diff --git a/src/utils/comm.cpp b/src/utils/comm.cpp index 7df0c4a..33b6bc5 100644 --- a/src/utils/comm.cpp +++ b/src/utils/comm.cpp @@ -7,6 +7,8 @@ #include using namespace m5_module_llm; +const size_t JSON_BUFFER_SIZE = 2048; +char jsonBuffer[JSON_BUFFER_SIZE]; bool ModuleComm::init(Stream* serialPort) { @@ -34,6 +36,7 @@ void ModuleComm::sendRaw(const uint8_t* data, size_t& raw_len) ModuleComm::Respond_t ModuleComm::getResponse(uint32_t timeout) { Respond_t ret; + String buffer; uint32_t time_out_count = millis(); bool get_msg = false; @@ -43,7 +46,13 @@ ModuleComm::Respond_t ModuleComm::getResponse(uint32_t timeout) if (_serial->available()) { get_msg = true; while (_serial->available()) { - ret.msg += (char)_serial->read(); + char c = (char)_serial->read(); + buffer += c; + + if (c == '\n') { + ret.msg = buffer; + return ret;; + } } get_msg_count = millis(); time_out_count = millis(); @@ -62,7 +71,7 @@ ModuleComm::Respond_t ModuleComm::getResponse(uint32_t timeout) break; } - delay(5); + // delay(5); } return ret; From 8fd149869be6d2b1226dbdd9357baf6a2ea1d1a0 Mon Sep 17 00:00:00 2001 From: LittleMouse Date: Thu, 9 Jan 2025 19:02:35 +0800 Subject: [PATCH 07/16] update yolo demo --- examples/YOLO/YOLO.ino | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/YOLO/YOLO.ino b/examples/YOLO/YOLO.ino index cd6dc72..ea6d45d 100644 --- a/examples/YOLO/YOLO.ino +++ b/examples/YOLO/YOLO.ino @@ -119,5 +119,8 @@ void loop() } /* Clear handled messages */ + module_llm.msg.clearMsg("yolo_setup"); module_llm.msg.responseMsgList.clear(); + + usleep(500000); } \ No newline at end of file From 9a1fa03ce13dc85264987fa6221ac48e463e01a6 Mon Sep 17 00:00:00 2001 From: lovyan03 <42724151+lovyan03@users.noreply.github.com> Date: Sat, 18 Jan 2025 11:14:47 +0900 Subject: [PATCH 08/16] Changed to automatically detect pin settings in sample code. --- examples/KWS_ASR/KWS_ASR.ino | 9 ++++++--- examples/SerialTextAssistant/SerialTextAssistant.ino | 9 ++++++--- examples/TTS/TTS.ino | 9 ++++++--- examples/TextAssistant/TextAssistant.ino | 9 ++++++--- examples/VoiceAssistant/VoiceAssistant.ino | 9 ++++++--- examples/YOLO/YOLO.ino | 9 ++++++--- 6 files changed, 36 insertions(+), 18 deletions(-) diff --git a/examples/KWS_ASR/KWS_ASR.ino b/examples/KWS_ASR/KWS_ASR.ino index b7ad005..bd7e5f9 100644 --- a/examples/KWS_ASR/KWS_ASR.ino +++ b/examples/KWS_ASR/KWS_ASR.ino @@ -27,9 +27,12 @@ void setup() // language = "zh_CN"; /* Init module serial port */ - Serial2.begin(115200, SERIAL_8N1, 16, 17); // Basic - // Serial2.begin(115200, SERIAL_8N1, 13, 14); // Core2 - // Serial2.begin(115200, SERIAL_8N1, 18, 17); // CoreS3 + // int rxd = 16, txd = 17; // Basic + // int rxd = 13, txd = 14; // Core2 + // int rxd = 18, txd = 17; // CoreS3 + int rxd = M5.getPin(m5::pin_name_t::port_c_rxd); + int txd = M5.getPin(m5::pin_name_t::port_c_txd); + Serial2.begin(115200, SERIAL_8N1, rxd, txd); /* Init module */ module_llm.begin(&Serial2); diff --git a/examples/SerialTextAssistant/SerialTextAssistant.ino b/examples/SerialTextAssistant/SerialTextAssistant.ino index 2587018..10c44b8 100644 --- a/examples/SerialTextAssistant/SerialTextAssistant.ino +++ b/examples/SerialTextAssistant/SerialTextAssistant.ino @@ -25,9 +25,12 @@ void setup() CommSerialPort.begin(115200); /* Init module serial port */ - Serial2.begin(115200, SERIAL_8N1, 16, 17); // Basic - // Serial2.begin(115200, SERIAL_8N1, 13, 14); // Core2 - // Serial2.begin(115200, SERIAL_8N1, 18, 17); // CoreS3 + // int rxd = 16, txd = 17; // Basic + // int rxd = 13, txd = 14; // Core2 + // int rxd = 18, txd = 17; // CoreS3 + int rxd = M5.getPin(m5::pin_name_t::port_c_rxd); + int txd = M5.getPin(m5::pin_name_t::port_c_txd); + Serial2.begin(115200, SERIAL_8N1, rxd, txd); /* Init module */ module_llm.begin(&Serial2); diff --git a/examples/TTS/TTS.ino b/examples/TTS/TTS.ino index 73d536d..5a110d5 100644 --- a/examples/TTS/TTS.ino +++ b/examples/TTS/TTS.ino @@ -22,9 +22,12 @@ void setup() // language = "zh_CN"; /* Init module serial port */ - Serial2.begin(115200, SERIAL_8N1, 16, 17); // Basic - // Serial2.begin(115200, SERIAL_8N1, 13, 14); // Core2 - // Serial2.begin(115200, SERIAL_8N1, 18, 17); // CoreS3 + // int rxd = 16, txd = 17; // Basic + // int rxd = 13, txd = 14; // Core2 + // int rxd = 18, txd = 17; // CoreS3 + int rxd = M5.getPin(m5::pin_name_t::port_c_rxd); + int txd = M5.getPin(m5::pin_name_t::port_c_txd); + Serial2.begin(115200, SERIAL_8N1, rxd, txd); /* Init module */ module_llm.begin(&Serial2); diff --git a/examples/TextAssistant/TextAssistant.ino b/examples/TextAssistant/TextAssistant.ino index 17bc101..48dd552 100644 --- a/examples/TextAssistant/TextAssistant.ino +++ b/examples/TextAssistant/TextAssistant.ino @@ -17,9 +17,12 @@ void setup() M5.Display.setTextScroll(true); /* Init module serial port */ - Serial2.begin(115200, SERIAL_8N1, 16, 17); // Basic - // Serial2.begin(115200, SERIAL_8N1, 13, 14); // Core2 - // Serial2.begin(115200, SERIAL_8N1, 18, 17); // CoreS3 + // int rxd = 16, txd = 17; // Basic + // int rxd = 13, txd = 14; // Core2 + // int rxd = 18, txd = 17; // CoreS3 + int rxd = M5.getPin(m5::pin_name_t::port_c_rxd); + int txd = M5.getPin(m5::pin_name_t::port_c_txd); + Serial2.begin(115200, SERIAL_8N1, rxd, txd); /* Init module */ module_llm.begin(&Serial2); diff --git a/examples/VoiceAssistant/VoiceAssistant.ino b/examples/VoiceAssistant/VoiceAssistant.ino index 3a4979b..2f6ac00 100644 --- a/examples/VoiceAssistant/VoiceAssistant.ino +++ b/examples/VoiceAssistant/VoiceAssistant.ino @@ -42,9 +42,12 @@ void setup() M5.Display.setTextScroll(true); /* Init module serial port */ - Serial2.begin(115200, SERIAL_8N1, 16, 17); // Basic - // Serial2.begin(115200, SERIAL_8N1, 13, 14); // Core2 - // Serial2.begin(115200, SERIAL_8N1, 18, 17); // CoreS3 + // int rxd = 16, txd = 17; // Basic + // int rxd = 13, txd = 14; // Core2 + // int rxd = 18, txd = 17; // CoreS3 + int rxd = M5.getPin(m5::pin_name_t::port_c_rxd); + int txd = M5.getPin(m5::pin_name_t::port_c_txd); + Serial2.begin(115200, SERIAL_8N1, rxd, txd); /* Init module */ module_llm.begin(&Serial2); diff --git a/examples/YOLO/YOLO.ino b/examples/YOLO/YOLO.ino index cd6dc72..10ba72b 100644 --- a/examples/YOLO/YOLO.ino +++ b/examples/YOLO/YOLO.ino @@ -28,9 +28,12 @@ void setup() M5.Display.setTextScroll(true); /* Init module serial port */ - Serial2.begin(115200, SERIAL_8N1, 16, 17); // Basic - // Serial2.begin(115200, SERIAL_8N1, 13, 14); // Core2 - // Serial2.begin(115200, SERIAL_8N1, 18, 17); // CoreS3 + // int rxd = 16, txd = 17; // Basic + // int rxd = 13, txd = 14; // Core2 + // int rxd = 18, txd = 17; // CoreS3 + int rxd = M5.getPin(m5::pin_name_t::port_c_rxd); + int txd = M5.getPin(m5::pin_name_t::port_c_txd); + Serial2.begin(115200, SERIAL_8N1, rxd, txd); /* Init module */ module_llm.begin(&Serial2); From 7d3dcdc2ce1552f62277d2611a377c2376e66af8 Mon Sep 17 00:00:00 2001 From: LittleMouse Date: Mon, 20 Jan 2025 11:08:30 +0800 Subject: [PATCH 09/16] update vad_whisper api & demo --- examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino | 108 +++++++++++++++++++ src/M5ModuleLLM.cpp | 2 + src/M5ModuleLLM.h | 14 +++ src/api/api_vad.cpp | 64 +++++++++++ src/api/api_vad.h | 45 ++++++++ src/api/api_whisper.cpp | 65 +++++++++++ src/api/api_whisper.h | 47 ++++++++ 7 files changed, 345 insertions(+) create mode 100644 examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino create mode 100644 src/api/api_vad.cpp create mode 100644 src/api/api_vad.h create mode 100644 src/api/api_whisper.cpp create mode 100644 src/api/api_whisper.h diff --git a/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino b/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino new file mode 100644 index 0000000..8d84a1c --- /dev/null +++ b/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino @@ -0,0 +1,108 @@ +/* + * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD + * + * SPDX-License-Identifier: MIT + */ +#include +#include +#include + +M5ModuleLLM module_llm; + +/* Must be capitalized */ +String wake_up_keyword = "HELLO"; +// String wake_up_keyword = "你好你好"; +String kws_work_id; +String vad_work_id; +String whisper_work_id; +String language; + +void setup() +{ + M5.begin(); + M5.Display.setTextSize(2); + M5.Display.setTextScroll(true); + // M5.Display.setFont(&fonts::efontCN_12); // Support Chinese display + + language = "en_US"; + // language = "zh_CN"; + + /* Init module serial port */ + Serial2.begin(115200, SERIAL_8N1, 16, 17); // Basic + // Serial2.begin(115200, SERIAL_8N1, 13, 14); // Core2 + // Serial2.begin(115200, SERIAL_8N1, 18, 17); // CoreS3 + + /* Init module */ + module_llm.begin(&Serial2); + + /* Make sure module is connected */ + M5.Display.printf(">> Check ModuleLLM connection..\n"); + while (1) { + if (module_llm.checkConnection()) { + break; + } + } + + /* Reset ModuleLLM */ + M5.Display.printf(">> Reset ModuleLLM..\n"); + module_llm.sys.reset(); + + /* Setup Audio module */ + M5.Display.printf(">> Setup audio..\n"); + module_llm.audio.setup(); + + /* Setup KWS module and save returned work id */ + M5.Display.printf(">> Setup kws..\n"); + m5_module_llm::ApiKwsSetupConfig_t kws_config; + kws_config.kws = wake_up_keyword; + kws_work_id = module_llm.kws.setup(kws_config, "kws_setup", language); + + /* Setup VAD module and save returned work id */ + M5.Display.printf(">> Setup vad..\n"); + m5_module_llm::ApiVadSetupConfig_t vad_config; + vad_config.input = {"sys.pcm", kws_work_id}; + vad_work_id = module_llm.vad.setup(vad_config, "vad_setup"); + + /* Setup Whisper module and save returned work id */ + M5.Display.printf(">> Setup whisper..\n"); + m5_module_llm::ApiWhisperSetupConfig_t whisper_config; + whisper_config.input = {"sys.pcm", kws_work_id, vad_work_id}; + whisper_config.language = "en"; + // whisper_config.language = "zh"; + // whisper_config.language = "ja"; + whisper_work_id = module_llm.whisper.setup(whisper_config, "whisper_setup"); + + M5.Display.printf(">> Setup ok\n>> Say \"%s\" to wakeup\n", wake_up_keyword.c_str()); +} + +void loop() +{ + /* Update ModuleLLM */ + module_llm.update(); + + /* Handle module response messages */ + for (auto& msg : module_llm.msg.responseMsgList) { + /* If KWS module message */ + if (msg.work_id == kws_work_id) { + M5.Display.setTextColor(TFT_GREENYELLOW); + M5.Display.printf(">> Keyword detected\n"); + } + + /* If ASR module message */ + if (msg.work_id == whisper_work_id) { + /* Check message object type */ + if (msg.object == "asr.utf-8") { + /* Parse message json and get ASR result */ + JsonDocument doc; + deserializeJson(doc, msg.raw_msg); + String asr_result = doc["data"].as(); + + M5.Display.setTextColor(TFT_YELLOW); + M5.Display.printf(">> %s\n", asr_result.c_str()); + } + } + } + + /* Clear handled messages */ + module_llm.msg.responseMsgList.clear(); +} \ No newline at end of file diff --git a/src/M5ModuleLLM.cpp b/src/M5ModuleLLM.cpp index 2ac9119..d4c63f1 100644 --- a/src/M5ModuleLLM.cpp +++ b/src/M5ModuleLLM.cpp @@ -20,6 +20,8 @@ bool M5ModuleLLM::begin(Stream* serialPort) asr.init(&msg); yolo.init(&msg); camera.init(&msg); + vad.init(&msg); + whisper.init(&msg); depthanything.init(&msg); return true; } diff --git a/src/M5ModuleLLM.h b/src/M5ModuleLLM.h index c652089..a1846fb 100644 --- a/src/M5ModuleLLM.h +++ b/src/M5ModuleLLM.h @@ -18,6 +18,8 @@ #include "api/api_yolo.h" #include "api/api_depth_anything.h" #include "api/api_camera.h" +#include "api/api_vad.h" +#include "api/api_whisper.h" #include "api/api_version.h" class M5ModuleLLM { @@ -105,6 +107,18 @@ class M5ModuleLLM { */ m5_module_llm::ApiYolo yolo; + /** + * @brief VAD module api set + * + */ + m5_module_llm::ApiVad vad; + + /** + * @brief Whisper module api set + * + */ + m5_module_llm::ApiWhisper whisper; + /** * @brief DepthAnything module api set * diff --git a/src/api/api_vad.cpp b/src/api/api_vad.cpp new file mode 100644 index 0000000..9b646af --- /dev/null +++ b/src/api/api_vad.cpp @@ -0,0 +1,64 @@ +/* + * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD + * + * SPDX-License-Identifier: MIT + */ +#include "api_vad.h" + +using namespace m5_module_llm; + +void ApiVad::init(ModuleMsg* moduleMsg) +{ + _module_msg = moduleMsg; +} + +String ApiVad::setup(ApiVadSetupConfig_t config, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = "vad"; + doc["action"] = "setup"; + doc["object"] = "vad.setup"; + doc["data"]["model"] = config.model; + doc["data"]["response_format"] = config.response_format; + doc["data"]["enoutput"] = config.enoutput; + JsonArray inputArray = doc["data"]["input"].to(); + for (const String& str : config.input) { + inputArray.add(str); + } + serializeJson(doc, cmd); + } + + String work_id; + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 30000); + return work_id; +} + +String ApiVad::exit(String work_id, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = work_id; + doc["action"] = "exit"; + serializeJson(doc, cmd); + } + + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 100); + return work_id; +} diff --git a/src/api/api_vad.h b/src/api/api_vad.h new file mode 100644 index 0000000..15f2e11 --- /dev/null +++ b/src/api/api_vad.h @@ -0,0 +1,45 @@ +/* + * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD + * + * SPDX-License-Identifier: MIT + */ +#pragma once +#include "../utils/msg.h" +#include + +namespace m5_module_llm { + +struct ApiVadSetupConfig_t { + String model = "silero-vad"; + String response_format = "vad.bool"; + std::vector input = {"sys.pcm"}; + bool enoutput = true; +}; + +class ApiVad { +public: + void init(ModuleMsg* moduleMsg); + + /** + * @brief Setup module VAD, return VAD work_id + * + * @param config + * @param request_id + * @return String + */ + String setup(ApiVadSetupConfig_t config = ApiVadSetupConfig_t(), String request_id = "vad_setup"); + + /** + * @brief Exit module VAD, return VAD work_id + * + * @param work_id + * @param request_id + * @return String + */ + String exit(String work_id, String request_id = "vad_exit"); + +private: + ModuleMsg* _module_msg = nullptr; +}; + +} // namespace m5_module_llm diff --git a/src/api/api_whisper.cpp b/src/api/api_whisper.cpp new file mode 100644 index 0000000..3b8f2f9 --- /dev/null +++ b/src/api/api_whisper.cpp @@ -0,0 +1,65 @@ +/* + * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD + * + * SPDX-License-Identifier: MIT + */ +#include "api_whisper.h" + +using namespace m5_module_llm; + +void ApiWhisper::init(ModuleMsg* moduleMsg) +{ + _module_msg = moduleMsg; +} + +String ApiWhisper::setup(ApiWhisperSetupConfig_t config, String request_id, String language) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = "whisper"; + doc["action"] = "setup"; + doc["object"] = "whisper.setup"; + doc["data"]["model"] = config.model; + doc["data"]["response_format"] = config.response_format; + doc["data"]["language"] = config.language; + doc["data"]["enoutput"] = config.enoutput; + JsonArray inputArray = doc["data"]["input"].to(); + for (const String& str : config.input) { + inputArray.add(str); + } + serializeJson(doc, cmd); + } + + String work_id; + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 10000); + return work_id; +} + +String ApiWhisper::exit(String work_id, String request_id) +{ + String cmd; + { + JsonDocument doc; + doc["request_id"] = request_id; + doc["work_id"] = work_id; + doc["action"] = "exit"; + serializeJson(doc, cmd); + } + + _module_msg->sendCmdAndWaitToTakeMsg( + cmd.c_str(), request_id, + [&work_id](ResponseMsg_t& msg) { + // Copy work id + work_id = msg.work_id; + }, + 100); + return work_id; +} diff --git a/src/api/api_whisper.h b/src/api/api_whisper.h new file mode 100644 index 0000000..40a5c2d --- /dev/null +++ b/src/api/api_whisper.h @@ -0,0 +1,47 @@ +/* + * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD + * + * SPDX-License-Identifier: MIT + */ +#pragma once +#include "../utils/msg.h" +#include + +namespace m5_module_llm { + +struct ApiWhisperSetupConfig_t { + String model = "whisper-tiny"; + String response_format = "asr.utf-8"; + String language = "en"; + std::vector input = {"sys.pcm"}; + bool enoutput = true; +}; + +class ApiWhisper { +public: + void init(ModuleMsg* moduleMsg); + + /** + * @brief Setup module ASR, return ASR work_id + * + * @param config + * @param request_id + * @return String + */ + String setup(ApiWhisperSetupConfig_t config = ApiWhisperSetupConfig_t(), String request_id = "asr_setup", + String language = "en_US"); + + /** + * @brief Exit module ASR, return ASR work_id + * + * @param work_id + * @param request_id + * @return String + */ + String exit(String work_id, String request_id = "asr_exit"); + +private: + ModuleMsg* _module_msg = nullptr; +}; + +} // namespace m5_module_llm From d46d46adc23a693b9821f953ebc416a242f7aa49 Mon Sep 17 00:00:00 2001 From: LittleMouse Date: Mon, 20 Jan 2025 11:12:45 +0800 Subject: [PATCH 10/16] update vad_whisper demo --- examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino b/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino index 8d84a1c..db6645b 100644 --- a/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino +++ b/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino @@ -28,9 +28,12 @@ void setup() // language = "zh_CN"; /* Init module serial port */ - Serial2.begin(115200, SERIAL_8N1, 16, 17); // Basic - // Serial2.begin(115200, SERIAL_8N1, 13, 14); // Core2 - // Serial2.begin(115200, SERIAL_8N1, 18, 17); // CoreS3 + // int rxd = 16, txd = 17; // Basic + // int rxd = 13, txd = 14; // Core2 + // int rxd = 18, txd = 17; // CoreS3 + int rxd = M5.getPin(m5::pin_name_t::port_c_rxd); + int txd = M5.getPin(m5::pin_name_t::port_c_txd); + Serial2.begin(115200, SERIAL_8N1, rxd, txd); /* Init module */ module_llm.begin(&Serial2); From cb779441e13238d085551e381d792ffe1f5b549a Mon Sep 17 00:00:00 2001 From: LittleMouse Date: Mon, 20 Jan 2025 11:19:45 +0800 Subject: [PATCH 11/16] fix clang-format error. --- src/utils/comm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/comm.cpp b/src/utils/comm.cpp index 33b6bc5..af350b0 100644 --- a/src/utils/comm.cpp +++ b/src/utils/comm.cpp @@ -51,7 +51,7 @@ ModuleComm::Respond_t ModuleComm::getResponse(uint32_t timeout) if (c == '\n') { ret.msg = buffer; - return ret;; + return ret; } } get_msg_count = millis(); From b75ce56a0fc4ff099797ffd8a386b2238ba61b49 Mon Sep 17 00:00:00 2001 From: LittleMouse Date: Mon, 20 Jan 2025 11:52:37 +0800 Subject: [PATCH 12/16] kws_vad_whisper demo add Japanese display. --- examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino b/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino index db6645b..0dff8cd 100644 --- a/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino +++ b/examples/KWS_VAD_Whisper/KWS_VAD_Whisper.ino @@ -23,6 +23,7 @@ void setup() M5.Display.setTextSize(2); M5.Display.setTextScroll(true); // M5.Display.setFont(&fonts::efontCN_12); // Support Chinese display + // M5.Display.setFont(&fonts::efontJA_12); // Support Japanese display language = "en_US"; // language = "zh_CN"; From 3d61ba786b60efd9a2190bfa23b81d0967b35256 Mon Sep 17 00:00:00 2001 From: LittleMouse Date: Mon, 20 Jan 2025 16:40:09 +0800 Subject: [PATCH 13/16] Add delay to receive message function --- src/utils/comm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/comm.cpp b/src/utils/comm.cpp index af350b0..21d6c72 100644 --- a/src/utils/comm.cpp +++ b/src/utils/comm.cpp @@ -71,7 +71,7 @@ ModuleComm::Respond_t ModuleComm::getResponse(uint32_t timeout) break; } - // delay(5); + delay(5); } return ret; From ea5651e9c18ec2a535dce6f3d7e890ebcf7e2961 Mon Sep 17 00:00:00 2001 From: LittleMouse Date: Thu, 23 Jan 2025 15:58:49 +0800 Subject: [PATCH 14/16] update docs --- docs/cn.md | 518 +++++++++++++++++++++++---- docs/en.md | 1009 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 1426 insertions(+), 101 deletions(-) diff --git a/docs/cn.md b/docs/cn.md index 137bd59..d8ef9c9 100644 --- a/docs/cn.md +++ b/docs/cn.md @@ -4,12 +4,12 @@ ## M5ModuleLLM Class -`M5ModuleLLM`用于初始化LLM Module, 并且提供内部成员用于快速初始化LLM的各个单元, 方便根据自己的需求构建应用。 +`M5ModuleLLM`用于初始化 LLM Module, 并且提供内部成员用于快速初始化 LLM 的各个单元, 方便根据自己的需求构建应用。 ```cpp class M5ModuleLLM { public: - bool begin(Stream* targetPort); + bool begin(Stream * targetPort); bool checkConnection(); void update(); @@ -21,6 +21,9 @@ public: m5_module_llm::ApiKws kws; m5_module_llm::ApiAsr asr; m5_module_llm::ApiAsr yolo; + m5_module_llm::ApiVad vad; + m5_module_llm::ApiWhisper whisper; + m5_module_llm::ApiDepthAnything depthanything; m5_module_llm::ModuleMsg msg; m5_module_llm::ModuleComm comm; private: @@ -37,12 +40,12 @@ bool begin(Stream* targetPort); **功能说明:** -- 初始化LLM Module UART接口配置 +- 初始化 LLM Module UART 接口配置 **传入参数:** -- Stream* targetPort: - - 传入Serial指针 +- Stream\* targetPort: + - 传入 Serial 指针 **返回值:** @@ -60,7 +63,7 @@ bool checkConnection(); **功能说明:** -- 发送`sys.ping`指令, 检查LLM Module连接状态 +- 发送 `sys.ping` 指令, 检查 LLM Module 连接状态 **传入参数:** @@ -82,7 +85,7 @@ void update(); **功能说明:** -- 拉取LLM Module UART响应数据, 该API需包含在Loop中循环执行。 +- 拉取 LLM Module UART 响应数据, 该 API 需包含在 Loop 中循环执行。 **传入参数:** @@ -94,7 +97,7 @@ void update(); ## ApiSys Class -`M5ModuleLLM`的内部成员`ApiSys sys`用于控制SYS单元实现系统复位等操作。 +`M5ModuleLLM` 的内部成员 `ApiSys sys` 用于控制 SYS 单元实现系统复位等操作。 ### ping @@ -106,7 +109,7 @@ int ping(); **功能说明:** -- 发送`sys.ping`指令, 检查LLM Module连接状态 +- 发送`sys.ping`指令, 检查 LLM Module 连接状态 **传入参数:** @@ -127,7 +130,7 @@ int reset(bool waitResetFinish = true); **功能说明:** -- 发送`sys.reset`指令, 复位软件服务。 +- 发送 `sys.reset` 指令, 复位软件服务。 **传入参数:** @@ -150,7 +153,7 @@ int reboot(); **功能说明:** -- 发送`sys.reboot`指令, 复位系统。 +- 发送 `sys.reboot` 指令, 复位系统。 **传入参数:** @@ -163,7 +166,9 @@ int reboot(); ## ApiAudio Class -`M5ModuleLLM`的内部成员`ApiAudio audio`用于控制AUDIO单元的初始化和配置。 +注意:此函数在 1.3 及之后版本已经弃用,改为内部自动配置。 + +`M5ModuleLLM` 的内部成员 `ApiAudio audio` 用于控制 Audio 单元的初始化和配置。 ### setup @@ -175,7 +180,7 @@ String setup(ApiAudioSetupConfig_t config = ApiAudioSetupConfig_t(), String requ **功能说明:** -- 初始化Audio单元, 开启系统声卡。(使用KWS和TTS前需开启该功能) +- 初始化 Audio 单元, 开启系统声卡。(使用 KWS 和 TTS 前需开启该功能) **传入参数:** @@ -196,32 +201,78 @@ struct ApiAudioSetupConfig_t { }; ``` -| 参数 | 描述 | 输入值 | -|------------|----------|---------------------------------| -| capcard | 麦克风声卡的索引 | 系统默认声卡:0 | -| capdevice | 麦克风设备索引 | 板载硅麦:0 | -| capVolume | 输入的音量 | 0.0~10.0 (1禁用: false | +| frame_width | 采集图像的宽 | 320 | +| frame_height | 采集图像的高 | 320 | + +**返回值:** + +- String: + - camera_work_id: camera 单元 work_id + +## ApiKws Class + +`M5ModuleLLM` 的内部成员 `ApiKws kws` 用于控制 KWS 单元的初始化和配置。 + +### setup + +**函数原型:** + +```cpp +String setup(ApiKwsSetupConfig_t config = ApiKwsSetupConfig_t(), String request_id = "kws_setup", + String language = "en_US");``` + **功能说明:** - 初始化KWS单元, 并配置唤醒关键字。 @@ -244,32 +295,77 @@ struct ApiKwsSetupConfig_t { }; ``` -| 参数 | 描述 | 输入值 | -|----------|------------|-------------------------------------------------------------------------------------------------------------------------------| -| model | 转换模型 | 英文模型: "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01"
中文模型: "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01" | -| kws | KWS唤醒词文本设置 | 不允许中文/英文混合, 英文要求全大写 | -| enoutput | 启用UART输出 | 启用: true
禁用: false | +| 参数 | 描述 | 输入值 | +| -------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------- | +| model | 转换模型 | 英文模型: "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01"
中文模型: "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01" | +| kws | KWS唤醒词文本设置 | 不允许中文/英文混合, 英文要求全大写 | +| enoutput | 启用UART输出 | 启用: true
禁用: false | **返回值:** - String: - kws_work_id: kws单元work_id +## ApiVad Class + +`M5ModuleLLM` 的内部成员 `ApiVad vad` 用于控制 VAD 单元的初始化和配置。 + +### setup + +**函数原型:** + +```cpp +String setup(ApiVadSetupConfig_t config = ApiVadSetupConfig_t(), String request_id = "vad_setup"); +``` + +**功能说明:** + +- 初始化 VAD 单元。 + +**传入参数:** + +ApiVadSetupConfig_t config: + +- VAD 单元初始化配置: +- String request_id: + - 会话id, 使用默认即可。 + +```cpp +struct ApiKwsSetupConfig_t { + String model = "silero-vad"; + String response_format = "vad.bool"; + String input = {"sys.pcm", "kws.1000"}; + bool enoutput = true; +}; +``` + +| 参数 | 描述 | 输入值 | +| -------- | ------------ | ------------------------------------------------------------------------------------------------------------------ | +| model | 转换模型 | 模型: "silero-vad"
| +| input | 输入 | KWS唤醒输入: "kws.xxx"(输入kws单元的work_id)
板载麦克风输入: "sys.pcm"
UART流式输入: "vad.wav.stream.base64" | +| enoutput | 启用UART输出 | 启用: true
禁用: false | + +**返回值:** + +- String: + - vad_work_id: vad 单元 work_id + ## ApiAsr Class -`M5ModuleLLM`的内部成员`ApiAsr asr`用于控制ASR单元的初始化和配置。 +`M5ModuleLLM` 的内部成员 `ApiAsr asr` 用于控制 ASR 单元的初始化和配置。 ### setup **函数原型:** ```cpp -String setup(ApiAsrSetupConfig_t config = ApiAsrSetupConfig_t(), String request_id = "asr_setup"); +String setup(ApiAsrSetupConfig_t config = ApiAsrSetupConfig_t(), String request_id = "asr_setup", + String language = "en_US"); ``` **功能说明:** -- 初始化ASR单元, 开启语音转文本功能。 +- 初始化 ASR 单元, 开启语音转文本功能。 **传入参数:** @@ -291,24 +387,71 @@ struct ApiAsrSetupConfig_t { }; ``` -| 参数 | 描述 | 输入值 | -|-----------------|---------------|---------------------------------------------------------------------------------------------------------------------| -| model | 转换模型 | 英文模型: "sherpa-ncnn-streaming-zipformer-20M-2023-02-17"
中文模型: "sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23" | -| response_format | 输出格式 | 普通输出: "asr.utf-8"
流式输出: "asr.utf-8.stream" | -| input | 输入 | KWS唤醒输入: "kws.xxx"(输入kws单元的work_id)
板载麦克风输入: "sys.pcm"
UART流式输入: "asr.wav.stream.base64" | -| rule1 | 唤醒到未识别到内容超时时间 | 单位:秒 | -| rule2 | 识别最大间隔时间 | 单位:秒 | -| rule3 | 识别最长超时时间 | 单位:秒 | -| enoutput | 启用UART输出 | 启用: true
禁用: false | +| 参数 | 描述 | 输入值 | +| --------------- | -------------------------- | --------------------------------------------------------------------------------------------------------------------------- | +| model | 转换模型 | 英文模型: "sherpa-ncnn-streaming-zipformer-20M-2023-02-17"
中文模型: "sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23" | +| response_format | 输出格式 | 普通输出: "asr.utf-8"
流式输出: "asr.utf-8.stream" | +| input | 输入 | KWS唤醒输入: "kws.xxx"(输入kws单元的work_id)
板载麦克风输入: "sys.pcm"
UART流式输入: "asr.wav.stream.base64" | +| rule1 | 唤醒到未识别到内容超时时间 | 单位:秒 | +| rule2 | 识别最大间隔时间 | 单位:秒 | +| rule3 | 识别最长超时时间 | 单位:秒 | +| enoutput | 启用UART输出 | 启用: true
禁用: false | **返回值:** - String: - - asr_work_id: asr单元work_id + - asr_work_id: asr 单元 work_id + +## ApiWhisper Class + +`M5ModuleLLM`的内部成员`ApiWhisper whisper`用于控制 Whisper 单元的初始化和配置。 + +### setup + +**函数原型:** + +```cpp +String setup(ApiWhisperSetupConfig_t config = ApiWhisperSetupConfig_t(), String request_id = "asr_setup", +``` + +**功能说明:** + +- 初始化 Whisper 单元, 开启语音转文本功能。 + +**传入参数:** + +ApiWhisperSetupConfig_t config: + +- Whisper 单元初始化配置: +- String request_id: + - 会话id, 使用默认即可。 + +```cpp +struct ApiAsrSetupConfig_t { + String model = "whisper-tiny"; + String response_format = "asr.utf-8"; + String input = [ "sys.pcm", "kws.1000", "vad.1001" ]; + String language = "en"; + bool enoutput = true; +}; +``` + +| 参数 | 描述 | 输入值 | +| --------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------ | +| model | 转换模型 | 模型: "whisper-tiny"
| +| response_format | 输出格式 | 普通输出: "asr.utf-8"
| +| input | 输入 | KWS唤醒输入: "kws.xxx"(输入kws单元的work_id)
板载麦克风输入: "sys.pcm"
UART流式输入: "asr.wav.stream.base64" | +| language | 用于语言识别的语言 | 默认 “en”
可选 “zh”, "ja" | | +| enoutput | 启用UART输出 | 启用: true
禁用: false | + +**返回值:** + +- String: + - whisper_work_id: whisper 单元 work_id ## ApiLlm Class -`M5ModuleLLM`的内部成员`ApiLlm llm`用于控制LLM单元的初始化和配置。 +`M5ModuleLLM` 的内部成员 `ApiLlm llm` 用于控制 LLM 单元的初始化和配置。 ### setup @@ -320,7 +463,7 @@ String setup(ApiLlmSetupConfig_t config = ApiLlmSetupConfig_t(), String request_ **功能说明:** -- 初始化LLM单元, 支持配置LLM单元输入输出数据方式。 +- 初始化 LLM 单元, 支持配置 LLM 单元输入输出数据方式。 **传入参数:** @@ -334,22 +477,20 @@ struct ApiLlmSetupConfig_t { String prompt; String model = "qwen2.5-0.5B-prefill-20e"; String response_format = "llm.utf-8.stream"; - String input = "llm.utf-8"; + String input = ["llm.utf-8", "kws.1000"]; bool enoutput = true; - bool enkws = true; int max_token_len = 127; }; ``` -| 参数 | 描述 | 输入值 | -|-----------------|-------------------------|-------------------------------------------------------------------------------------------------| -| model | 转换模型 | 预置模型 "qwen2.5-0.5B-prefill-20e" | -| response_format | 输出格式 | 普通输出: "llm.utf-8"
流式输出: "llm.utf-8.stream" | -| input | 输入 | ASR输入: "asr.xxx"(输入asr单元的work_id)
UART输入: "llm.utf-8"
KWS唤醒打断: "kws.xxx"(输入kws单元的work_id) | -| enkws | KWS唤醒是否终止过程 | KWS打断过程: true
KWS不打断过程: false | -| max_length | 配置最大输出token(最大返回推理文本长度) | 最大值: 1024, 推荐使用127 | -| prompt | 模型初始化提示词 | String | -| enoutput | 启用UART输出 | 启用: true
禁用: false | +| 参数 | 描述 | 输入值 | +| --------------- | --------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | +| model | 转换模型 | 预置模型 "qwen2.5-0.5B-prefill-20e" | +| response_format | 输出格式 | 普通输出: "llm.utf-8"
流式输出: "llm.utf-8.stream" | +| input | 输入 | ASR输入: "asr.xxx"(输入asr单元的work_id)
UART输入: "llm.utf-8"
KWS唤醒打断: "kws.xxx"(输入kws单元的work_id) | +| max_length | 配置最大输出token(最大返回推理文本长度) | 最大值: 1023 | +| prompt | 模型初始化系统提示词 | String | +| enoutput | 启用UART输出 | 启用: true
禁用: false | **返回值:** @@ -366,7 +507,7 @@ int inference(String work_id, String input, String request_id = "llm_inference") **功能说明:** -- 输入数据, 开始推理。返回结果内容将进入`M5ModuleLLM.msg`中的`responseMsgList`列表容器中。 +- 输入数据, 开始推理。返回结果内容将进入 `M5ModuleLLM.msg` 中的 `responseMsgList` 列表容器中。 **传入参数:** @@ -392,16 +533,16 @@ int inferenceAndWaitResult(String work_id, String input, std::function流式输出: "vlm.utf-8.stream" | +| input | 输入 | ASR输入: "asr.xxx"(输入asr单元的work_id)
UART输入: "llm.utf-8"
KWS唤醒打断: "kws.xxx"(输入kws单元的work_id) | +| max_length | 配置最大输出token(最大返回推理文本长度) | 最大值: 1023 | +| prompt | 模型初始化系统提示词 | String | +| enoutput | 启用UART输出 | 启用: true
禁用: false | + +**返回值:** + +- String: + - vlm_work_id: vlm 单元 work_id + +### inference + +**函数原型:** + +```cpp +int inference(String work_id, String input, String request_id = "vlm_inference"); +``` + +**功能说明:** + +- 输入数据, 开始推理。返回结果内容将进入 `M5ModuleLLM.msg` 中的 `responseMsgList` 列表容器中。 + +**传入参数:** + +- String work_id: + - 调用的LLM单元work_id +- String input: + - 输入文本 +- String request_id: + - 会话ID, 当同时存在多个会话的时候用于区分。 + +**返回值:** + +- int: + - MODULE_LLM_OK / Error Code + +### inferenceAndWaitResult + +**函数原型:** + +```cpp +int inferenceAndWaitResult(String work_id, String input, std::function onResult, + uint32_t timeout = 5000, String request_id = "vlm_inference"); +``` + +**功能说明:** + +- 输入数据, 开始推理。并阻塞等待返回结果, 然后调用 callback 函数。 + +**传入参数:** + +- String work_id: + - 调用的 VLM 单元 work_id +- String input: + - 输入文本 +- void onResult(String&) + - 推理结果 callback 函数 +- uint32_t timeout: + - 等待推理超时时间 +- String request_id: + - 会话 ID, 当同时存在多个会话的时候用于区分。 + +**返回值:** + - int: - MODULE_LLM_OK / Error Code ## ApiTts Class -`M5ModuleLLM`的内部成员`ApiTts tts`用于控制TTS单元的初始化和配置。 +`M5ModuleLLM` 的内部成员 `ApiTts tts` 用于控制 TTS 单元的初始化和配置。 ### setup @@ -439,19 +685,19 @@ ApiTtsSetupConfig_t config: ```cpp struct ApiTtsSetupConfig_t { String model = "single_speaker_english_fast"; - String response_format = "tts.base64.wav"; - String input = "tts.utf-8.stream"; - bool enoutput = true; - bool enkws = true; + String response_format = "sys.pcm"; + String input = ["tts.utf-8.stream", "kws.1000"]; + bool enoutput = false; + bool enaudio = true; }; ``` -| 参数 | 描述 | 输入值 | -|----------|-------------|------------------------------------------------------------------------------------------| -| model | 转换模型 | 英文模型: "single_speaker_english_fast"
中文模型: "single_speaker_fast" | -| input | 输入 | LLM输入: "llm.xxx"(输入llm单元的work_id)
UART输入: "tts.utf-8"
UART流式输入: "tts.utf-8.stream" | -| enkws | KWS唤醒是否终止过程 | KWS打断过程: true
KWS不打断过程: false | -| enoutput | 启用UART输出 | 启用: true
禁用: false | +| 参数 | 描述 | 输入值 | +| -------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | +| model | 转换模型 | 英文模型: "single_speaker_english_fast"
中文模型: "single_speaker_fast" | +| input | 输入 | LLM输入: "llm.xxx"(输入llm单元的work_id)
UART输入: "tts.utf-8"
UART流式输入: "tts.utf-8.stream"
KWS唤醒打断: "kws.xxx"(输入kws单元的work_id) | +| enoutput | 启用UART输出 | 启用: true
禁用: false | +| enaudio | 启用扬声器播放 | 启用: true
禁用: true | **返回值:** @@ -468,7 +714,7 @@ int inference(String work_id, String input, uint32_t timeout = 0, String request **功能说明:** -- 输入数据, 开始推理转换, 完成后将自动播放至扬声器。 +- 输入数据, 开始推理转换, 完成后扬声器将自动播放。 **传入参数:** @@ -486,9 +732,130 @@ int inference(String work_id, String input, uint32_t timeout = 0, String request - int: - MODULE_LLM_OK / Error Code +## ApiMelotts Class + +`M5ModuleLLM` 的内部成员 `ApiMelotts melotts` 用于控制 Melotts 单元的初始化和配置。 + +### setup + +**函数原型:** + +```cpp +String setup(ApiMelottsSetupConfig_t config = ApiMelottsSetupConfig_t(), String request_id = "melotts_setup", + String language = "en_US"); +``` + +**功能说明:** + +- 初始化 Melotts 单元, 开启文本转语音功能。 + +**传入参数:** + +ApiMelottsSetupConfig_t config: + +- Melotts单元初始化配置: +- String request_id: + - 会话id, 使用默认即可。 + +```cpp +struct ApiMelottsSetupConfig_t { + String model = "melotts_zh-cn"; + String response_format = "sys.pcm"; + std::vector input = {"tts.utf-8.stream"}; + bool enoutput = false; + bool enaudio = true; +}; +``` + +| 参数 | 描述 | 输入值 | +| -------- | -------------- | ------------------------------------------------------------------------------------------------------------- | +| model | 转换模型 | 中英文模型: "melotts_zh-cn"
中文模型: "single_speaker_fast" | +| input | 输入 | LLM输入: "llm.xxx"(输入llm单元的work_id)
UART输入: "melotts.utf-8"
UART流式输入: "melotts.utf-8.stream" | +| enoutput | 启用UART输出 | 启用: true
禁用: false | +| enaudio | 启用扬声器播放 | 启用: true
禁用: true | + +**返回值:** + +- String: + - melotts_work_id: melotts 单元 work_id + +### inference + +**函数原型:** + +```cpp +int inference(String work_id, String input, uint32_t timeout = 0, String request_id = "tts_inference"); +``` + +**功能说明:** + +- 输入数据, 开始推理转换, 完成后扬声器将自动播放。 + +**传入参数:** + +- String work_id: + - 调用的 Melotts 单元work_id +- String input: + - 输入文本 +- uint32_t timeout: + - 等待推理超时时间 +- String request_id: + - 会话ID, 当同时存在多个会话的时候用于区分。 + +**返回值:** + +- int: + - MODULE_LLM_OK / Error Code + +## ApiYolo Class + +`M5ModuleLLM` 的内部成员 `ApiYolo yolo` 用于控制 Yolo 单元的初始化和配置。 + +### setup + +**函数原型:** + +```cpp +String setup(ApiYoloSetupConfig_t config = ApiYoloSetupConfig_t(), String request_id = "yolo_setup"); +``` + +**功能说明:** + +- 初始化 Yolo 单元, 开启图像检测功能。 + +**传入参数:** + +ApiYoloSetupConfig_t config: + +- Yolo 单元初始化配置: +- String request_id: + - 会话id, 使用默认即可。 + +```cpp +struct ApiYoloSetupConfig_t { + String model = "yolo11n"; + String response_format = "yolo.box.stream"; + std::vector input = {"yolo.jpeg.base64"}; + bool enoutput = true; +}; +``` + +| 参数 | 描述 | 输入值 | +| --------------- | ------------ | --------------------------------------------------------------------------------------------- | +| model | 转换模型 | 检测模型: "yolo11n"
姿态模型: "yolo11n-pose"
手部姿态模型: "yolo11n-hand-pose" | +| response_format | 输出格式 | 检测输出: "yolo.box.stream"
姿态输出: "yolo.pose.stream" | +| input | 输入 | UVC 输入: "camera.xxx"(输入 camera 单元的 work_id)
UART流式输入: "yolo.jpeg.base64.stream" | +| enoutput | 启用UART输出 | 启用: true
禁用: false | + +**返回值:** + +- String: + - yolo_work_id: yolo 单元 work_id + ## ModuleMsg Class -`M5ModuleLLM`的内部成员`ModuleMsg msg`提供了`responseMsgList`容器用于用于缓存接收LLM Module返回的各种信息。参考以下案例,在主循环中遍历获取返回结果。 +`M5ModuleLLM` 的内部成员 `ModuleMsg msg` 提供了 `responseMsgList` 容器用于用于缓存接收 LLM Module +返回的各种信息。参考以下案例,在主循环中遍历获取返回结果。 ```cpp void loop() @@ -520,10 +887,10 @@ void loop() ## VoiceAssistant Class -`M5ModuleLLM_VoiceAssistant`用于快速创建LLM语音助手实例, 快速实现KWS(语音唤醒)->ASR(语音转文本)->LLM(大模型推理)->TTS( -文本转语音)。 +`M5ModuleLLM_VoiceAssistant` 用于快速创建 LLM 语音助手实例, 快速实现 KWS(语音唤醒)->ASR(语音转文本)->LLM(大模型推理)-> +TTS(文本转语音)。 -- 初始化时候只需要将`M5ModuleLLM`实例传入构造函数, 并注册对应事件的回调函数即可完成语音助手创建。 +- 初始化时候只需要将 `M5ModuleLLM` 实例传入构造函数, 并注册对应事件的回调函数即可完成语音助手创建。 ```cpp /* @@ -637,4 +1004,3 @@ enum ModuleLLMErrorCode_t { MODULE_LLM_ERROR_NONE = -99, }; ``` - diff --git a/docs/en.md b/docs/en.md index 1714e53..b1d7f57 100644 --- a/docs/en.md +++ b/docs/en.md @@ -1,41 +1,1000 @@ -# LLM Module Arduino Quick Start +# M5Module-LLM Arduino API -## Overview +[M5Module-LLM](https://github.com/m5stack/M5Module-LLM) Arduino Driver Library API Documentation. -The `LLM Module` can be used with various M5 controllers. This tutorial demonstrates how to control the LLM Module using the `M5Core` series in the `Arduino IDE` with the LLM Module driver library. +## M5ModuleLLM Class - +`M5ModuleLLM` is used to initialize the LLM Module and provides internal members for quick initialization of various LLM units, making it easier to build applications according to your needs. -## Environment Setup +```cpp +class M5ModuleLLM { +public: + bool begin(Stream * targetPort); + bool checkConnection(); + void update(); -- 1.Arduino IDE Installation: Refer to the [Arduino IDE Installation Guide](/en/arduino/arduino_ide) to complete the IDE installation. + m5_module_llm::ApiSys sys; + m5_module_llm::ApiLlm llm; + m5_module_llm::ApiAudio audio; + m5_module_llm::ApiTts tts; + m5_module_llm::ApiTts melotts; + m5_module_llm::ApiKws kws; + m5_module_llm::ApiAsr asr; + m5_module_llm::ApiAsr yolo; + m5_module_llm::ApiVad vad; + m5_module_llm::ApiWhisper whisper; + m5_module_llm::ApiDepthAnything depthanything; + m5_module_llm::ModuleMsg msg; + m5_module_llm::ModuleComm comm; +private: +}; +``` -- 2.Board Manager Installation: Refer to the [Basic Environment Setup Guide](/en/arduino/arduino_board) to complete the M5Stack board manager installation and select the `M5Core` development board. +### begin - +**Function Prototype:** -- 3.Library Installation: Refer to the [Library Management Guide](/en/arduino/arduino_library) to install the `LLM Module` driver library. (Follow prompts to install the dependency library `M5Unified`) +```cpp +bool begin(Stream* targetPort); +``` - - +**Function Description:** -## Program Compilation & Upload +- Initializes the LLM Module UART interface configuration. -Open the example program "kws_asr" in the driver library, click the upload button, and the program will automatically compile and upload.The wake-up word used in the example program is "HELLO". After waiting for the device to be initialized, it will be woken up using the keyword. +**Parameters:** - - - - +- Stream\* targetPort: + - Pass the Serial pointer. -- Examples: - - `kws_asr`: Uses KWS to wake up and triggers ASR for speech-to-text conversion. (KWS+ASR) - - `text_assistant`: Inputs text into the LLM model, performs inference, and outputs the result in text form. (LLM) - - `tts`: Uses the TTS unit to convert text to speech for playback. (TTS) - - `voice_assistant`: Uses KWS to wake up, triggers ASR for speech-to-text conversion, inputs the converted text into the LLM for inference, and outputs the inference result through TTS as speech. (KWS+ASR+LLM+TTS) +**Return Value:** -## Related Links +- bool: + - true: Initialization successful + - false: Initialization failed -- [LLM Module Arduino Lib](https://github.com/m5stack/M5Module-LLM) -- [LLM Module Arduino Lib API](/en/guide/llm/llm/arduino_api) +### checkConnection +**Function Prototype:** + +```cpp +bool checkConnection(); +``` + +**Function Description:** + +- Sends the `sys.ping` command to check the connection status of the LLM Module. + +**Parameters:** + +- None + +**Return Value:** + +- bool: + - true: Module responds + - false: No response from module + +### update + +**Function Prototype:** + +```cpp +void update(); +``` + +**Function Description:** + +- Pulls the LLM Module UART response data, this API should be included in the Loop and executed continuously. + +**Parameters:** + +- None + +**Return Value:** + +- None + +## ApiSys Class + +The internal member `ApiSys sys` of `M5ModuleLLM` is used to control the SYS unit, enabling operations like system reset. + +### ping + +**Function Prototype:** + +```cpp +int ping(); +``` + +**Function Description:** + +- Sends the `sys.ping` command to check the connection status of the LLM Module. + +**Parameters:** + +- None + +**Return Value:** + +- int: + - MODULE_LLM_OK / Error Code + +### reset + +**Function Prototype:** + +```cpp +int reset(bool waitResetFinish = true); +``` + +**Function Description:** + +- Sends the `sys.reset` command to reset the software service. + +**Parameters:** + +- bool waitResetFinish: + - true: Blocks and waits for reset to finish + - false: Performs reset without blocking + +**Return Value:** + +- int: + - MODULE_LLM_OK / Error Code + +### reboot + +**Function Prototype:** + +```cpp +int reboot(); +``` + +**Function Description:** + +- Sends the `sys.reboot` command to reboot the system. + +**Parameters:** + +- None + +**Return Value:** + +- int: + - MODULE_LLM_OK / Error Code + +## ApiAudio Class + +Note: This function has been deprecated in version 1.3 and later, and is now automatically configured internally. + +The internal member `ApiAudio audio` of `M5ModuleLLM` is used to control the initialization and configuration of the Audio unit. + +### setup + +**Function prototype:** + +```cpp +String setup(ApiAudioSetupConfig_t config = ApiAudioSetupConfig_t(), String request_id = "audio_setup"); +``` + +**Function description:** + +- Initializes the Audio unit and activates the system sound card. (This feature must be enabled before using KWS and TTS) + +**Parameters:** + +ApiAudioSetupConfig_t config: + +- LLM unit initialization configuration: +- String request_id: + - Session ID, default can be used. + +```cpp +struct ApiAudioSetupConfig_t { + int capcard = 0; + int capdevice = 0; + float capVolume = 0.5; + int playcard = 0; + int playdevice = 1; + float playVolume = 0.15; +}; +``` + +| Parameter | Description | Input Values | +| ----------- | ----------------- | ------------------------------------- | +| capcard | Microphone sound card index | Default sound card: 0 | +| capdevice | Microphone device index | Onboard silicon microphone: 0 | +| capVolume | Input volume | 0.0~10.0 (1Disable: false | +| frame_width | Image width | 320 | +| frame_height | Image height | 320 | + +**Return Value:** + +- String: + - camera_work_id: camera unit work_id + +## ApiKws Class + +The internal member `ApiKws kws` of `M5ModuleLLM` is used to control the initialization and configuration of the KWS unit. + +### setup + +**Function Prototype:** + +```cpp +String setup(ApiKwsSetupConfig_t config = ApiKwsSetupConfig_t(), String request_id = "kws_setup", + String language = "en_US");``` + +**Function Description:** + +- Initializes the KWS unit and configures the wake-up keyword. + +**Parameters:** + +ApiKwsSetupConfig_t config: + +- KWS unit initialization configuration: +- String request_id: + - Session ID, default can be used. + +```cpp +struct ApiKwsSetupConfig_t { + String kws = "HELLO"; + String model = "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01"; + String response_format = "kws.bool"; + String input = "sys.pcm"; + bool enoutput = true; +}; +``` + +| Parameter | Description | Input Values | +| --------- | ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | +| model | Conversion Model | English Model: "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01"
Chinese Model: "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01" | +| kws | KWS Wake-up Word Text | No mixing of Chinese and English; English must be in uppercase | +| enoutput | Enable UART Output | Enable: true
Disable: false | + +**Return Value:** + +- String: + - kws_work_id: KWS unit work_id + +## ApiVad Class + +The internal member `ApiVad vad` of `M5ModuleLLM` is used to control the initialization and configuration of the VAD unit. + +### setup + +**Function Prototype:** + +```cpp +String setup(ApiVadSetupConfig_t config = ApiVadSetupConfig_t(), String request_id = "vad_setup"); +``` + +**Function Description:** + +- Initializes the VAD unit. + +**Parameters:** + +ApiVadSetupConfig_t config: + +- VAD unit initialization configuration: +- String request_id: + - Session ID, default can be used. + +```cpp +struct ApiKwsSetupConfig_t { + String model = "silero-vad"; + String response_format = "vad.bool"; + String input = {"sys.pcm", "kws.1000"}; + bool enoutput = true; +}; +``` + +| Parameter | Description | Input Values | +| --------- | --------------- | --------------------------------------------------------------------------------------------------------------------- | +| model | Conversion Model | Model: "silero-vad"
| +| input | Input | KWS Wake-up Input: "kws.xxx" (input the KWS unit's work_id)
Onboard Microphone Input: "sys.pcm"
UART Stream Input: "vad.wav.stream.base64" | +| enoutput | Enable UART Output | Enable: true
Disable: false | + +**Return Value:** + +- String: + - vad_work_id: VAD unit work_id + +## ApiAsr Class + +The internal member `ApiAsr asr` of `M5ModuleLLM` is used to control the initialization and configuration of the ASR unit. + +### setup + +**Function Prototype:** + +```cpp +String setup(ApiAsrSetupConfig_t config = ApiAsrSetupConfig_t(), String request_id = "asr_setup", + String language = "en_US"); +``` + +**Function Description:** + +- Initializes the ASR unit and enables speech-to-text functionality. + +**Input Parameters:** + +ApiAsrSetupConfig_t config: + +- ASR unit initialization configuration: +- String request_id: + - Session ID, the default can be used. + +```cpp +struct ApiAsrSetupConfig_t { + String model = "sherpa-ncnn-streaming-zipformer-20M-2023-02-17"; + String response_format = "asr.utf-8.stream"; + String input = ["sys.pcm", "kws.1000"]; + bool enoutput = true; + float rule1 = 2.4; + float rule2 = 1.2; + float rule3 = 30.0; +}; +``` + +| Parameter | Description | Input Values | +| --------------- | ------------------------------------ | --------------------------------------------------------------------------------------------------------------------------- | +| model | Conversion model | English Model: "sherpa-ncnn-streaming-zipformer-20M-2023-02-17"
Chinese Model: "sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23" | +| response_format | Output format | Normal output: "asr.utf-8"
Stream output: "asr.utf-8.stream" | +| input | Input | KWS wake input: "kws.xxx" (input kws unit work_id)
Onboard microphone input: "sys.pcm"
UART stream input: "asr.wav.stream.base64" | +| rule1 | Timeout for unrecognized content wake | Unit: seconds | +| rule2 | Maximum recognition interval | Unit: seconds | +| rule3 | Maximum recognition timeout | Unit: seconds | +| enoutput | Enable UART output | Enable: true
Disable: false | + +**Return Value:** + +- String: + - asr_work_id: ASR unit work_id + +## ApiWhisper Class + +The internal member `ApiWhisper whisper` of `M5ModuleLLM` is used to control the initialization and configuration of the Whisper unit. + +### setup + +**Function Prototype:** + +```cpp +String setup(ApiWhisperSetupConfig_t config = ApiWhisperSetupConfig_t(), String request_id = "asr_setup", +``` + +**Function Description:** + +- Initializes the Whisper unit and enables speech-to-text functionality. + +**Input Parameters:** + +ApiWhisperSetupConfig_t config: + +- Whisper unit initialization configuration: +- String request_id: + - Session ID, the default can be used. + +```cpp +struct ApiAsrSetupConfig_t { + String model = "whisper-tiny"; + String response_format = "asr.utf-8"; + String input = [ "sys.pcm", "kws.1000", "vad.1001" ]; + String language = "en"; + bool enoutput = true; +}; +``` + +| Parameter | Description | Input Values | +| --------------- | ------------------------------------ | --------------------------------------------------------------------------------------------------------------------------- | +| model | Conversion model | Model: "whisper-tiny"
| +| response_format | Output format | Normal output: "asr.utf-8"
| +| input | Input | KWS wake input: "kws.xxx" (input kws unit work_id)
Onboard microphone input: "sys.pcm"
UART stream input: "asr.wav.stream.base64" | +| language | Language used for language recognition | Default: "en"
Optional: "zh", "ja" | +| enoutput | Enable UART output | Enable: true
Disable: false | + +**Return Value:** + +- String: + - whisper_work_id: Whisper unit work_id + +## ApiLlm Class + +The internal member `ApiLlm llm` of `M5ModuleLLM` is used to control the initialization and configuration of the LLM unit. + +### setup + +**Function prototype:** + +```cpp +String setup(ApiLlmSetupConfig_t config = ApiLlmSetupConfig_t(), String request_id = "llm_setup"); +``` + +**Function Description:** + +- Initializes the LLM unit and supports configuring the input and output data format for the LLM unit. + +**Parameters:** + +- ApiLlmSetupConfig_t config: + - LLM unit initialization configuration +- String request_id: + - Session ID, the default value can be used. + +```cpp +struct ApiLlmSetupConfig_t { + String prompt; + String model = "qwen2.5-0.5B-prefill-20e"; + String response_format = "llm.utf-8.stream"; + String input = ["llm.utf-8", "kws.1000"]; + bool enoutput = true; + int max_token_len = 127; +}; +``` + +| Parameter | Description | Input Values | +| ---------------- | ------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | +| model | Model used for conversion | Predefined model "qwen2.5-0.5B-prefill-20e" | +| response_format | Output format | Normal output: "llm.utf-8"
Streaming output: "llm.utf-8.stream" | +| input | Input format | ASR input: "asr.xxx" (work_id of the ASR unit)
UART input: "llm.utf-8"
KWS wake-up interruption: "kws.xxx" (work_id of the KWS unit) | +| max_length | Configures the maximum output token length (maximum returned inference text length) | Maximum value: 1023 | +| prompt | Model initialization system prompt | String | +| enoutput | Enable UART output | Enable: true
Disable: false | + +**Return Value:** + +- String: + - `llm_work_id`: LLM unit work ID + +### inference + +**Function prototype:** + +```cpp +int inference(String work_id, String input, String request_id = "llm_inference"); +``` + +**Function Description:** + +- Sends input data to start inference. The result will be placed in the `responseMsgList` container in `M5ModuleLLM.msg`. + +**Parameters:** + +- String work_id: + - The LLM unit's work ID being called +- String input: + - Input text +- String request_id: + - Session ID, used to differentiate when multiple sessions exist. + +**Return Value:** + +- int: + - `MODULE_LLM_OK` / Error Code + +### inferenceAndWaitResult + +**Function prototype:** + +```cpp +int inferenceAndWaitResult(String work_id, String input, std::function onResult, uint32_t timeout = 5000, String request_id = "llm_inference"); +``` + +**Function Description:** + +- Sends input data to start inference, blocks while waiting for the result, then calls the callback function. + +**Parameters:** + +- String work_id: + - The LLM unit's work ID being called +- String input: + - Input text +- void onResult(String&) + - Callback function for inference result +- uint32_t timeout: + - Timeout for waiting for inference result +- String request_id: + - Session ID, used to differentiate when multiple sessions exist. + +**Return Value:** + +- int: + - `MODULE_LLM_OK` / Error Code + +## ApiVlm Class + +The internal member `ApiVlm vlm` of `M5ModuleLLM` is used to control the initialization and configuration of the VLM unit. + +### setup + +**Function prototype:** + +```cpp +String setup(ApiVlmSetupConfig_t config = ApiVlmSetupConfig_t(), String request_id = "vlm_setup"); +``` + +**Function Description:** + +- Initializes the VLM unit and supports configuring the input and output data format for the VLM unit. + +**Parameters:** + +- ApiVlmSetupConfig_t config: + - VLM unit initialization configuration +- String request_id: + - Session ID, the default value can be used. + +```cpp +struct ApiVlmSetupConfig_t { + String prompt; + String model = "internvl2.5-1B-ax630c"; + String response_format = "vlm.utf-8.stream"; + String input = ["vlm.utf-8", "kws.1000"]; + bool enoutput = true; + int max_token_len = 1023; +}; +``` + +| Parameter | Description | Input Values | +| ---------------- | ------------------------------------------- | ----------------------------------------------------------------------------------------------------------------- | +| model | Model used for conversion | Predefined model "internvl2.5-1B-ax630c" | +| response_format | Output format | Normal output: "vlm.utf-8"
Streaming output: "vlm.utf-8.stream" | +| input | Input format | ASR input: "asr.xxx" (work_id of the ASR unit)
UART input: "llm.utf-8"
KWS wake-up interruption: "kws.xxx" (work_id of the KWS unit) | +| max_length | Configures the maximum output token length (maximum returned inference text length) | Maximum value: 1023 | +| prompt | Model initialization system prompt | String | +| enoutput | Enable UART output | Enable: true
Disable: false | + +**Return Value:** + +- String: + - `vlm_work_id`: VLM unit work ID + +### inference + +**Function prototype:** + +```cpp +int inference(String work_id, String input, String request_id = "vlm_inference"); +``` + +**Function Description:** + +- Sends input data to start inference. The result will be placed in the `responseMsgList` container in `M5ModuleLLM.msg`. + +**Parameters:** + +- String work_id: + - The VLM unit's work ID being called +- String input: + - Input text +- String request_id: + - Session ID, used to differentiate when multiple sessions exist. + +**Return Value:** + +- int: + - `MODULE_LLM_OK` / Error Code + +### inferenceAndWaitResult + +**Function prototype:** + +```cpp +int inferenceAndWaitResult(String work_id, String input, std::function onResult, + uint32_t timeout = 5000, String request_id = "vlm_inference"); +``` + +**Function Description:** + +- Sends input data to start inference, blocks while waiting for the result, then calls the callback function. + +**Parameters:** + +- String work_id: + - The VLM unit's work ID being called +- String input: + - Input text +- void onResult(String&) + - Callback function for inference result +- uint32_t timeout: + - Timeout for waiting for inference result +- String request_id: + - Session ID, used to differentiate when multiple sessions exist. + +**Return Value:** + +- int: + - `MODULE_LLM_OK` / Error Code + +## ApiTts Class + +The internal member `ApiTts tts` of `M5ModuleLLM` is used to control the initialization and configuration of the TTS unit. + +### setup + +**Function prototype:** + +```cpp +String setup(ApiTtsSetupConfig_t config = ApiTtsSetupConfig_t(), String request_id = "tts_setup"); +``` + +**Function description:** + +- Initializes the TTS unit and enables the text-to-speech functionality. + +**Parameters:** + +- ApiTtsSetupConfig_t config: + - LLM unit initialization configuration: +- String request_id: + - Session ID, use the default if not needed. + +```cpp +struct ApiTtsSetupConfig_t { + String model = "single_speaker_english_fast"; + String response_format = "sys.pcm"; + String input = ["tts.utf-8.stream", "kws.1000"]; + bool enoutput = false; + bool enaudio = true; +}; +``` + +| Parameter | Description | Input values | +| --------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | +| model | Conversion model | English model: "single_speaker_english_fast"
Chinese model: "single_speaker_fast" | +| input | Input | LLM input: "llm.xxx" (input LLM unit's work_id)
UART input: "tts.utf-8"
UART stream input: "tts.utf-8.stream"
KWS wake-up interrupt: "kws.xxx" (input KWS unit's work_id) | +| enoutput | Enable UART output | Enable: true
Disable: false | +| enaudio | Enable speaker playback | Enable: true
Disable: true | + +**Return value:** + +- String: + - tts_work_id: TTS unit work_id + +### inference + +**Function prototype:** + +```cpp +int inference(String work_id, String input, uint32_t timeout = 0, String request_id = "tts_inference"); +``` + +**Function description:** + +- Input data and start the inference conversion. After completion, the speaker will automatically play. + +**Parameters:** + +- String work_id: + - Work ID of the TTS unit to be called. +- String input: + - Input text. +- uint32_t timeout: + - Timeout for waiting for inference. +- String request_id: + - Session ID, used to distinguish between multiple sessions. + +**Return value:** + +- int: + - MODULE_LLM_OK / Error Code + +## ApiMelotts Class + +The internal member `ApiMelotts melotts` of `M5ModuleLLM` is used to control the initialization and configuration of the Melotts unit. + +### setup + +**Function prototype:** + +```cpp +String setup(ApiMelottsSetupConfig_t config = ApiMelottsSetupConfig_t(), String request_id = "melotts_setup", + String language = "en_US"); +``` + +**Function description:** + +- Initializes the Melotts unit and enables the text-to-speech functionality. + +**Parameters:** + +- ApiMelottsSetupConfig_t config: + - Melotts unit initialization configuration: +- String request_id: + - Session ID, use the default if not needed. + +```cpp +struct ApiMelottsSetupConfig_t { + String model = "melotts_zh-cn"; + String response_format = "sys.pcm"; + std::vector input = {"tts.utf-8.stream"}; + bool enoutput = false; + bool enaudio = true; +}; +``` + +| Parameter | Description | Input values | +| --------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | +| model | Conversion model | Chinese and English model: "melotts_zh-cn"
Chinese model: "single_speaker_fast" | +| input | Input | LLM input: "llm.xxx" (input LLM unit's work_id)
UART input: "melotts.utf-8"
UART stream input: "melotts.utf-8.stream" | +| enoutput | Enable UART output | Enable: true
Disable: false | +| enaudio | Enable speaker playback | Enable: true
Disable: true | + +**Return value:** + +- String: + - melotts_work_id: Melotts unit work_id + +### inference + +**Function prototype:** + +```cpp +int inference(String work_id, String input, uint32_t timeout = 0, String request_id = "tts_inference"); +``` + +**Function description:** + +- Input data and start the inference conversion. After completion, the speaker will automatically play. + +**Parameters:** + +- String work_id: + - Work ID of the Melotts unit to be called. +- String input: + - Input text. +- uint32_t timeout: + - Timeout for waiting for inference. +- String request_id: + - Session ID, used to distinguish between multiple sessions. + +**Return value:** + +- int: + - MODULE_LLM_OK / Error Code + +## ApiYolo Class + +The internal member `ApiYolo yolo` of `M5ModuleLLM` is used to control the initialization and configuration of the Yolo unit. + +### setup + +**Function prototype:** + +```cpp +String setup(ApiYoloSetupConfig_t config = ApiYoloSetupConfig_t(), String request_id = "yolo_setup"); +``` + +**Function description:** + +- Initializes the Yolo unit and enables image detection functionality. + +**Parameters:** + +- ApiYoloSetupConfig_t config: + - Yolo unit initialization configuration: +- String request_id: + - Session ID, use the default if not needed. + +```cpp +struct ApiYoloSetupConfig_t { + String model = "yolo11n"; + String response_format = "yolo.box.stream"; + std::vector input = {"yolo.jpeg.base64"}; + bool enoutput = true; +}; +``` + +| Parameter | Description | Input values | +| --------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | +| model | Conversion model | Detection model: "yolo11n"
Pose model: "yolo11n-pose"
Hand pose model: "yolo11n-hand-pose" | +| response_format | Output format | Detection output: "yolo.box.stream"
Pose output: "yolo.pose.stream" | +| input | Input | UVC input: "camera.xxx" (input camera unit's work_id)
UART stream input: "yolo.jpeg.base64.stream" | +| enoutput | Enable UART output | Enable: true
Disable: false | + +**Return value:** + +- String: + - yolo_work_id: Yolo unit work_id + +## ModuleMsg Class + +The internal member `ModuleMsg msg` of `M5ModuleLLM` provides a container `responseMsgList` used to cache various information returned from the LLM Module. Refer to the following example, where the main loop iterates to retrieve the results. + +```cpp +void loop() +{ + module_llm.update(); + + // Handle response msg + for (auto& msg : module_llm.msg.responseMsgList) { + // KWS msg + if (msg.work_id == kws_work_id) { + Serial.printf(">> Keyword detected\n"); + } + + // ASR msg + if (msg.work_id == asr_work_id) { + if (msg.object == "asr.utf-8.stream") { + // Parse and get asr result + JsonDocument doc; + deserializeJson(doc, msg.raw_msg); + String asr_result = doc["data"]["delta"].as(); + Serial.printf(">> %s\n", asr_result.c_str()); + } + } + } + module_llm.msg.responseMsgList.clear(); +} +``` + +## VoiceAssistant Class + +`M5ModuleLLM_VoiceAssistant` is used to quickly create an LLM voice assistant instance, achieving a fast implementation of KWS (keyword spotting) -> ASR (speech-to-text) -> LLM (large model inference) -> TTS (text-to-speech). + +- During initialization, simply pass the `M5ModuleLLM` instance to the constructor, and register the corresponding event callback functions to complete the voice assistant setup. + +```cpp +/* + * SPDX-FileCopyrightText: 2024 M5Stack Technology CO LTD + * + * SPDX-License-Identifier: MIT + */ +#include +#include +#include + +M5ModuleLLM module_llm; +M5ModuleLLM_VoiceAssistant voice_assistant(&module_llm); + +/* On ASR data callback */ +void on_asr_data_input(String data, bool isFinish, int index) +{ + M5.Display.setTextColor(TFT_GREEN, TFT_BLACK); + M5.Display.printf(">> %s\n", data.c_str()); + + /* If ASR data is finish */ + if (isFinish) { + M5.Display.setTextColor(TFT_YELLOW, TFT_BLACK); + M5.Display.print(">> "); + } +}; + +/* On LLM data callback */ +void on_llm_data_input(String data, bool isFinish, int index) +{ + M5.Display.print(data); + + /* If LLM data is finish */ + if (isFinish) { + M5.Display.print("\n"); + } +}; + +void setup() +{ + M5.begin(); + M5.Display.setTextSize(2); + M5.Display.setTextScroll(true); + + /* Init module serial port */ + Serial2.begin(115200, SERIAL_8N1, 16, 17); // Basic + // Serial2.begin(115200, SERIAL_8N1, 13, 14); // Core2 + // Serial2.begin(115200, SERIAL_8N1, 18, 17); // CoreS3 + + /* Init module */ + module_llm.begin(&Serial2); + + /* Make sure module is connected */ + M5.Display.printf(">> Check ModuleLLM connection..\n"); + while (1) { + if (module_llm.checkConnection()) { + break; + } + } + + /* Begin voice assistant preset */ + M5.Display.printf(">> Begin voice assistant..\n"); + int ret = voice_assistant.begin("HELLO"); + if (ret != MODULE_LLM_OK) { + while (1) { + M5.Display.setTextColor(TFT_RED); + M5.Display.printf(">> Begin voice assistant failed\n"); + } + } + + /* Register on ASR data callback function */ + voice_assistant.onAsrDataInput(on_asr_data_input); + + /* Register on LLM data callback function */ + voice_assistant.onLlmDataInput(on_llm_data_input); + + M5.Display.printf(">> Voice assistant ready\n"); +} + +void loop() +{ + /* Keep voice assistant preset update */ + voice_assistant.update(); +} +``` + +## Error Code + +```cpp +enum ModuleLLMErrorCode_t { + MODULE_LLM_OK = 0, + MODULE_LLM_RESET_WARN = -1, + MODULE_LLM_JSON_FORMAT_ERROR = -2, + MODULE_LLM_ACTION_MATCH_FAILED = -3, + MODULE_LLM_INFERENCE_DATA_PUSH_FAILED = -4, + MODULE_LLM_MODEL_LOADING_FAILED = -5, + MODULE_LLM_UNIT_NOT_EXIST = -6, + MODULE_LLM_UNKNOWN_OPERATION = -7, + MODULE_LLM_UNIT_RESOURCE_ALLOCATION_FAILED = -8, + MODULE_LLM_UNIT_CALL_FAILED = -9, + MODULE_LLM_MODEL_INIT_FAILED = -10, + MODULE_LLM_MODEL_RUN_FAILED = -11, + MODULE_LLM_MODULE_NOT_INITIALISED = -12, + MODULE_LLM_MODULE_ALREADY_WORKING = -13, + MODULE_LLM_MODULE_NOT_WORKING = -14, + MODULE_LLM_NO_UPDATEABLE_MODULES = -15, + MODULE_LLM_NO_MODULES_AVAILABLE_FOR_UPDATE = -16, + MODULE_LLM_FILE_OPEN_FAILED = -17, + MODULE_LLM_WAIT_RESPONSE_TIMEOUT = -97, + MODULE_LLM_RESPONSE_PARSE_FAILED = -98, + MODULE_LLM_ERROR_NONE = -99, +}; +``` \ No newline at end of file From e601909e688f4bbff29f691ccd8010c14f337ac6 Mon Sep 17 00:00:00 2001 From: Forairaaaaa Date: Tue, 25 Mar 2025 09:18:47 +0800 Subject: [PATCH 15/16] Update library.json --- library.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library.json b/library.json index 22e3a1c..ccb85d9 100644 --- a/library.json +++ b/library.json @@ -14,7 +14,7 @@ "M5GFX": "*", "ArduinoJson": "*" }, - "version": "1.4.0", + "version": "1.5.0", "frameworks": "arduino", "platforms": "espressif32" -} \ No newline at end of file +} From 9b0f4d0206551387fa210612aeca8256d228da7e Mon Sep 17 00:00:00 2001 From: Forairaaaaa Date: Tue, 25 Mar 2025 09:19:01 +0800 Subject: [PATCH 16/16] Update library.properties --- library.properties | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library.properties b/library.properties index d08cea6..81929ac 100644 --- a/library.properties +++ b/library.properties @@ -1,5 +1,5 @@ name=M5ModuleLLM -version=1.4.0 +version=1.5.0 author=M5Stack maintainer=M5Stack sentence=M5ModuleLLM is a library for M5ModuleLLM @@ -8,4 +8,4 @@ category=Device Control url=https://github.com/m5stack/M5Module-LLM.git architectures=esp32 includes=M5ModuleLLM.h -depends=M5Unified,ArduinoJson \ No newline at end of file +depends=M5Unified,ArduinoJson