Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ option(MLLM_BUILD_ARM_BACKEND "Enable MLLM ARM backend" OFF)
option(MLLM_BUILD_OPENCL_BACKEND "Enable MLLM OpenCL backend" OFF)
option(MLLM_BUILD_CUDA_BACKEND "Enable MLLM CUDA backend" OFF)
option(MLLM_BUILD_QNN_BACKEND "Enable MLLM QNN backend" OFF)
option(MLLM_BUILD_SDK_C_BINDING "Enable MLLM C SDK binding" OFF)
option(MLLM_BUILD_SDK_C_BINDING "Enable MLLM C SDK binding" ON)
option(MLLM_BUILD_EXPERIMENTS "Enable MLLM experiments" OFF)

# Extension Enable
Expand Down
315 changes: 315 additions & 0 deletions docs/service/mllm_cli.rst

Large diffs are not rendered by default.

88 changes: 88 additions & 0 deletions mllm-cli/cmd/mllm-client/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Copyright (c) MLLM Team.
// Licensed under the MIT License.
package main

import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"log"
"mllm-cli/pkg/api"
"net/http"
"os"
"strings"
)

func main() {
serverURL := "http://localhost:8080/v1/chat/completions"
var history []api.RequestMessage
var currentSessionID string

fmt.Println("\n--- MLLM Refactored Interactive Client ---")
fmt.Println("Supports multi-turn sessions. Type /exit to quit.")
log.Printf("Connecting to: %s", serverURL)

reader := bufio.NewReader(os.Stdin)
for {
fmt.Print("\n> ")
userInput, _ := reader.ReadString('\n')
cleanedInput := strings.TrimSpace(userInput)
if cleanedInput == "" { continue }
if cleanedInput == "/exit" || cleanedInput == "/quit" { return }

history = append(history, api.RequestMessage{Role: "user", Content: cleanedInput})
apiRequest := api.OpenAIRequest{
Model: "Qwen3-0.6B-w4a32kai",
Messages: history,
Stream: true,
SessionID: currentSessionID,
}
requestBody, _ := json.Marshal(apiRequest)

req, _ := http.NewRequest("POST", serverURL, bytes.NewBuffer(requestBody))
Comment on lines +42 to +44
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Handle errors from json.Marshal and http.NewRequest.

Both json.Marshal and http.NewRequest can return errors, but they are silently ignored. This could lead to nil pointer dereferences or invalid requests.

Apply this diff:

-		requestBody, _ := json.Marshal(apiRequest)
+		requestBody, err := json.Marshal(apiRequest)
+		if err != nil {
+			log.Printf("ERROR: Failed to marshal request: %v", err)
+			history = history[:len(history)-1]
+			continue
+		}
 
-		req, _ := http.NewRequest("POST", serverURL, bytes.NewBuffer(requestBody))
+		req, err := http.NewRequest("POST", serverURL, bytes.NewBuffer(requestBody))
+		if err != nil {
+			log.Printf("ERROR: Failed to create request: %v", err)
+			history = history[:len(history)-1]
+			continue
+		}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
requestBody, _ := json.Marshal(apiRequest)
req, _ := http.NewRequest("POST", serverURL, bytes.NewBuffer(requestBody))
requestBody, err := json.Marshal(apiRequest)
if err != nil {
log.Printf("ERROR: Failed to marshal request: %v", err)
history = history[:len(history)-1]
continue
}
req, err := http.NewRequest("POST", serverURL, bytes.NewBuffer(requestBody))
if err != nil {
log.Printf("ERROR: Failed to create request: %v", err)
history = history[:len(history)-1]
continue
}
🤖 Prompt for AI Agents
In mllm-cli/cmd/mllm-client/main.go around lines 42 to 44, both json.Marshal and
http.NewRequest ignore returned errors; change the code to capture each error
into a variable, check it immediately, and handle failures (log or print a clear
error message and exit/return). Specifically, replace the blank-ignored error
for json.Marshal with "requestBody, err := json.Marshal(apiRequest)" and if err
!= nil handle it, then create the request with "req, err :=
http.NewRequest(...)" and check err != nil and handle it before using req.

req.Header.Set("Content-Type", "application/json")
req.Header.Set("Accept", "text/event-stream")
resp, err := http.DefaultClient.Do(req)
if err != nil {
log.Printf("ERROR: Request failed: %v", err)
history = history[:len(history)-1]
continue
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
log.Printf("ERROR: Server returned status %s: %s", resp.Status, string(bodyBytes))
history = history[:len(history)-1]
continue
}

sessionIDFromHeader := resp.Header.Get("X-Session-ID")
if sessionIDFromHeader != "" && currentSessionID != sessionIDFromHeader {
currentSessionID = sessionIDFromHeader
log.Printf("[Session Manager] New session established. ID: %s", currentSessionID)
}

var fullResponse strings.Builder
scanner := bufio.NewScanner(resp.Body)
fmt.Print("Assistant: ")
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "data: ") {
jsonData := strings.TrimPrefix(line, "data: ")
if jsonData == "[DONE]" { break }
var chunk api.OpenAIResponseChunk
if json.Unmarshal([]byte(jsonData), &chunk) == nil && len(chunk.Choices) > 0 {
content := chunk.Choices[0].Delta.Content
fmt.Print(content)
fullResponse.WriteString(content)
}
}
}
fmt.Println()
if err := scanner.Err(); err != nil { log.Printf("ERROR reading stream: %v", err) }
history = append(history, api.RequestMessage{Role: "assistant", Content: fullResponse.String()})
}
}
72 changes: 72 additions & 0 deletions mllm-cli/cmd/mllm-server/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright (c) MLLM Team.
// Licensed under the MIT License.
package main

import (
"context"
"flag"
"log"
"mllm-cli/mllm"
pkgmllm "mllm-cli/pkg/mllm"
"mllm-cli/pkg/server"
"os"
"os/signal"
"path/filepath"
"syscall"
"time"
)

func main() {
modelPath := flag.String("model-path", "", "Path to the MLLM model directory.")
flag.Parse()

if *modelPath == "" {
log.Fatal("FATAL: --model-path argument is required.")
}

if !mllm.InitializeContext() {
log.Fatal("FATAL: InitializeContext failed!")
}
mllm.SetLogLevel(2)
if !mllm.StartService(4) {
log.Fatal("FATAL: StartService failed!")
}
defer mllm.StopService()
defer mllm.ShutdownContext()

mllmService := pkgmllm.NewService()

log.Printf("Loading model and creating session from: %s", *modelPath)
session, err := mllm.NewSession(*modelPath)
if err != nil {
log.Fatalf("FATAL: Failed to create session: %v", err)
}

sessionID := filepath.Base(*modelPath)
if !session.Insert(sessionID) {
session.Close()
log.Fatalf("FATAL: Failed to insert session with ID '%s'", sessionID)
}
mllmService.RegisterSession(sessionID, session)
log.Printf("Session created and registered successfully with ID: %s", sessionID)
Comment on lines +45 to +51
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Session ID derivation may cause collisions with multiple models.

Using filepath.Base(*modelPath) as the session ID could cause collisions if multiple models have the same directory name in different paths. For example, /path1/model and /path2/model would both get session ID "model".

Consider one of these approaches:

  1. Use the full model path or a hash:
-	sessionID := filepath.Base(*modelPath)
+	// Use a hash of the full path to avoid collisions
+	sessionID := fmt.Sprintf("%x", md5.Sum([]byte(*modelPath)))[:16]
  1. Allow explicit session ID via flag:
+	sessionID := flag.String("session-id", "", "Session ID (defaults to model directory name)")
 	flag.Parse()
 	// ...
-	sessionID := filepath.Base(*modelPath)
+	sid := *sessionID
+	if sid == "" {
+		sid = filepath.Base(*modelPath)
+	}

The second approach provides flexibility while maintaining the current default behavior.

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In mllm-cli/cmd/mllm-server/main.go around lines 45 to 51, deriving the
sessionID with filepath.Base(*modelPath) can collide for models with identical
directory names; change to accept an explicit session ID via a CLI flag and fall
back to a deterministic unique ID when not provided (for example: use the model
path combined with a short hash or a hash-only of the full model path, or the
base name concatenated with the first 8 chars of a SHA256 hex of *modelPath) so
IDs are unique; implement: add a new flag (e.g., --session-id), if set use that
value (validate allowed chars/length), otherwise compute sessionID = baseName +
"-" + shortHash(*modelPath) (or full-hash), then use that sessionID for
session.Insert/registration and update log messages accordingly.


httpServer := server.NewServer(":8080", mllmService)

go httpServer.Start()

shutdownChan := make(chan os.Signal, 1)
signal.Notify(shutdownChan, syscall.SIGINT, syscall.SIGTERM)
<-shutdownChan

log.Println("Received shutdown signal. Starting graceful shutdown...")

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := httpServer.Shutdown(ctx); err != nil {
log.Printf("HTTP server shutdown failed: %v", err)
}

mllmService.Shutdown()

log.Println("Server gracefully stopped.")
}
8 changes: 6 additions & 2 deletions mllm-cli/go.mod
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
module mllm-cli

go 1.25.0
go 1.22.2

require github.com/charmbracelet/bubbles v0.21.0
require (
github.com/charmbracelet/bubbles v0.21.0
github.com/gorilla/websocket v1.5.3 //
)

require (
github.com/atotto/clipboard v0.1.4 // indirect
Expand All @@ -28,4 +31,5 @@ require (
golang.org/x/sys v0.35.0 // indirect
golang.org/x/term v0.34.0
golang.org/x/text v0.3.8 // indirect
github.com/google/uuid v1.6.0
)
108 changes: 104 additions & 4 deletions mllm-cli/mllm/c.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,31 @@
package mllm

/*
#cgo CFLAGS: -fPIC -I${SRCDIR}/SDK/include/
#cgo CFLAGS: -std=c11
#cgo LDFLAGS: -L${SRCDIR}/SDK/lib/
#cgo LDFLAGS: -lMllmSdkC
#cgo LDFLAGS: -Wl,-rpath ${SRCDIR}/SDK/lib
#cgo LDFLAGS: -lMllmSdkC -lMllmRT -lMllmCPUBackend

#include <mllm/mllm-c.h>
#include <stdlib.h>

static void* MllmCAny_get_v_custom_ptr(MllmCAny handle) {
return handle.v_custom_ptr;
}

static MllmCAny MllmCAny_set_v_custom_ptr_null(MllmCAny handle) {
handle.v_custom_ptr = NULL;
return handle;
}
Comment on lines +13 to +20
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Avoid peeking into C struct internals; add stable C API instead.

Accessing MllmCAny.v_custom_ptr directly is ABI-fragile. If the C struct layout changes, this breaks at compile or (worse) runtime. Prefer an official C helper (e.g., mllmAnyIsValid, mllmAnyNull) exported by the C SDK, or provide a dedicated C API to query/free validity instead of field access from Go.

*/
import "C"
import "unsafe"
import "fmt"
import "runtime"


type Session struct {
cHandle C.MllmCAny
sessionID string
}
Comment on lines +28 to +31
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Concurrent use-after-free risk; guard Session with locks and a closed flag.

Methods can race with Close(), leading to use-after-free of the C handle. Add an RWMutex and a closed flag; serialize Close vs. Insert/SendRequest/PollResponse/SessionID.

Apply this diff:

@@
-import "C"
-import "unsafe"
-import "fmt"
-import "runtime"
+import "C"
+import "unsafe"
+import "fmt"
+import "runtime"
+import "sync"
@@
 type Session struct {
     cHandle C.MllmCAny
     sessionID string
+    mu     sync.RWMutex
+    closed bool
 }
@@
-func (s *Session) Close() {
-    if C.MllmCAny_get_v_custom_ptr(s.cHandle) != nil {
-        fmt.Println("[Go Close] Mllm Session manually closed.") 
-        C.freeSession(s.cHandle)
-        s.cHandle = C.MllmCAny_set_v_custom_ptr_null(s.cHandle)
-        runtime.SetFinalizer(s, nil)
-    }
-}
+func (s *Session) Close() {
+    s.mu.Lock()
+    defer s.mu.Unlock()
+    if s.closed {
+        return
+    }
+    if C.MllmCAny_get_v_custom_ptr(s.cHandle) != nil {
+        fmt.Println("[Go Close] Mllm Session manually closed.")
+        C.freeSession(s.cHandle)
+        s.cHandle = C.MllmCAny_set_v_custom_ptr_null(s.cHandle)
+    }
+    s.closed = true
+    runtime.SetFinalizer(s, nil)
+}
@@
-func (s *Session) Insert(sessionID string) bool {
+func (s *Session) Insert(sessionID string) bool {
+    s.mu.Lock()
+    defer s.mu.Unlock()
+    if s.closed {
+        fmt.Println("[Go Insert] Error: session already closed.")
+        return false
+    }
     cSessionID := C.CString(sessionID)
     defer C.free(unsafe.Pointer(cSessionID))
     result := C.insertSession(cSessionID, s.cHandle)
     if isOk(result) {
         s.sessionID = sessionID 
     }
     return isOk(result)
 }
@@
-func (s *Session) SendRequest(jsonRequest string) bool {
-    if s.sessionID == "" {
+func (s *Session) SendRequest(jsonRequest string) bool {
+    s.mu.RLock()
+    defer s.mu.RUnlock()
+    if s.closed {
+        fmt.Println("[Go SendRequest] Error: session is closed.")
+        return false
+    }
+    if s.sessionID == "" {
         fmt.Println("[Go SendRequest] Error: sessionID is not set on this session.")
         return false 
     }
     cSessionID := C.CString(s.sessionID)
     cJsonRequest := C.CString(jsonRequest)
     defer C.free(unsafe.Pointer(cSessionID))
     defer C.free(unsafe.Pointer(cJsonRequest))
 
     result := C.sendRequest(cSessionID, cJsonRequest)
     return isOk(result)
 }
@@
-func (s *Session) PollResponse(requestID string) string { 
-    if requestID == "" {
+func (s *Session) PollResponse(requestID string) string { 
+    s.mu.RLock()
+    defer s.mu.RUnlock()
+    if s.closed {
+        fmt.Println("[Go PollResponse] Error: session is closed.")
+        return ""
+    }
+    if requestID == "" {
         fmt.Println("[Go PollResponse] Error: requestID cannot be empty.")
         return ""
     }
     cRequestID := C.CString(requestID)
     defer C.free(unsafe.Pointer(cRequestID))
 
     cResponse := C.pollResponse(cRequestID) 
     if cResponse == nil {
         return ""
     }
     defer C.freeResponseString(cResponse)
     
     return C.GoString(cResponse)
 }
@@
-func (s *Session) SessionID() string {
-    return s.sessionID
-}
+func (s *Session) SessionID() string {
+    s.mu.RLock()
+    defer s.mu.RUnlock()
+    return s.sessionID
+}

Also applies to: 76-83, 85-93, 95-107, 109-124, 126-128

🤖 Prompt for AI Agents
In mllm-cli/mllm/c.go around lines 28-31 (and similarly for ranges 76-83, 85-93,
95-107, 109-124, 126-128), the Session struct and its methods are vulnerable to
concurrent use-after-free when Close() runs while other methods access cHandle;
add a sync.RWMutex field (e.g., mu) and a boolean closed flag to Session;
acquire mu.RLock() at the start of Insert/SendRequest/PollResponse/SessionID and
return an error if closed, release RLock before returning; make Close() acquire
mu.Lock(), check and set closed=true, safely free the C handle and set it to nil
under the write lock, then release the lock; ensure all accesses to cHandle and
closed are done under the appropriate lock to serialize Close vs. other
operations.


func isOk(any C.MllmCAny) bool {
return C.isOk(any) == 1
Expand All @@ -26,3 +41,88 @@ func InitializeContext() bool {
func ShutdownContext() bool {
return isOk(C.shutdownContext())
}

func StartService(workerThreads int) bool {
result := C.startService(C.size_t(workerThreads))
return isOk(result)
}
Comment on lines +45 to +48
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Validate workerThreads to prevent size_t underflow on negative input.

Casting a negative int to C.size_t yields a huge value. Guard against workerThreads < 0.

Apply this diff:

 func StartService(workerThreads int) bool {
-    result := C.startService(C.size_t(workerThreads))
+    if workerThreads < 0 {
+        fmt.Println("[Go StartService] Error: workerThreads cannot be negative.")
+        return false
+    }
+    result := C.startService(C.size_t(workerThreads))
     return isOk(result)
 }
🤖 Prompt for AI Agents
In mllm-cli/mllm/c.go around lines 45 to 48, the StartService function casts
workerThreads directly to C.size_t which underflows for negative ints; validate
that workerThreads is non-negative before casting, returning false (or an error)
immediately if workerThreads < 0, otherwise cast to C.size_t and call
C.startService as before. Ensure the validation is explicit and documented with
a short comment so callers can see negative values are rejected.


func StopService() bool {
result := C.stopService()
return isOk(result)
}

func SetLogLevel(level int) {
C.setLogLevel(C.int(level))
}

func NewSession(modelPath string) (*Session, error) {
cModelPath := C.CString(modelPath)
defer C.free(unsafe.Pointer(cModelPath))

handle := C.createQwen3Session(cModelPath)
if !isOk(handle) {
return nil, fmt.Errorf("底层C API createQwen3Session 失败")
}
s := &Session{cHandle: handle}
runtime.SetFinalizer(s, func(s *Session) {
fmt.Println("[Go Finalizer] Mllm Session automatically released.")
C.freeSession(s.cHandle)
})

return s, nil
}

func (s *Session) Close() {
if C.MllmCAny_get_v_custom_ptr(s.cHandle) != nil {
fmt.Println("[Go Close] Mllm Session manually closed.")
C.freeSession(s.cHandle)
s.cHandle = C.MllmCAny_set_v_custom_ptr_null(s.cHandle)
runtime.SetFinalizer(s, nil)
}
}

func (s *Session) Insert(sessionID string) bool {
cSessionID := C.CString(sessionID)
defer C.free(unsafe.Pointer(cSessionID))
result := C.insertSession(cSessionID, s.cHandle)
if isOk(result) {
s.sessionID = sessionID
}
return isOk(result)
}

func (s *Session) SendRequest(jsonRequest string) bool {
if s.sessionID == "" {
fmt.Println("[Go SendRequest] Error: sessionID is not set on this session.")
return false
}
cSessionID := C.CString(s.sessionID)
cJsonRequest := C.CString(jsonRequest)
defer C.free(unsafe.Pointer(cSessionID))
defer C.free(unsafe.Pointer(cJsonRequest))

result := C.sendRequest(cSessionID, cJsonRequest)
return isOk(result)
}

func (s *Session) PollResponse(requestID string) string {
if requestID == "" {
fmt.Println("[Go PollResponse] Error: requestID cannot be empty.")
return ""
}
cRequestID := C.CString(requestID)
defer C.free(unsafe.Pointer(cRequestID))

cResponse := C.pollResponse(cRequestID)
if cResponse == nil {
return ""
}
defer C.freeResponseString(cResponse)

return C.GoString(cResponse)
}

func (s *Session) SessionID() string {
return s.sessionID
}
33 changes: 33 additions & 0 deletions mllm-cli/pkg/api/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright (c) MLLM Team.
// Licensed under the MIT License.
package api

type RequestMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}

type OpenAIRequest struct {
Model string `json:"model"`
Messages []RequestMessage `json:"messages"`
Stream bool `json:"stream"`
EnableThinking bool `json:"enable_thinking,omitempty"`
Thinking bool `json:"thinking,omitempty"` // <-- 新增此行,用于接收客户端可能发送的 "thinking": true
SessionID string `json:"session_id,omitempty"`
}

type ResponseDelta struct {
Content string `json:"content"`
}

type ResponseChoice struct {
Delta ResponseDelta `json:"delta"`
}

type OpenAIResponseChunk struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []ResponseChoice `json:"choices"`
}
Loading