UbiquitousLearning · chenghuaWang · Nov 4, 2025 · Oct 29, 2025 · Nov 4, 2025 · coderabbitai
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -19,7 +19,7 @@ option(MLLM_BUILD_ARM_BACKEND "Enable MLLM ARM backend" OFF)
 option(MLLM_BUILD_OPENCL_BACKEND "Enable MLLM OpenCL backend" OFF)
 option(MLLM_BUILD_CUDA_BACKEND "Enable MLLM CUDA backend" OFF)
 option(MLLM_BUILD_QNN_BACKEND "Enable MLLM QNN backend" OFF)
-option(MLLM_BUILD_SDK_C_BINDING "Enable MLLM C SDK binding" OFF)
+option(MLLM_BUILD_SDK_C_BINDING "Enable MLLM C SDK binding" ON)
 option(MLLM_BUILD_EXPERIMENTS "Enable MLLM experiments" OFF)
 
 # Extension Enable

diff --git a/mllm-cli/cmd/mllm-client/main.go b/mllm-cli/cmd/mllm-client/main.go
@@ -0,0 +1,88 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log"
+	"mllm-cli/pkg/api" 
+	"net/http"
+	"os"
+	"strings"
+)
+
+func main() {
+	serverURL := "http://localhost:8080/v1/chat/completions"
+	var history []api.RequestMessage
+	var currentSessionID string 
+
+	fmt.Println("\n--- MLLM Refactored Interactive Client ---")
+	fmt.Println("Supports multi-turn sessions. Type /exit to quit.")
+	log.Printf("Connecting to: %s", serverURL)
+
+	reader := bufio.NewReader(os.Stdin)
+	for {
+		fmt.Print("\n> ")
+		userInput, _ := reader.ReadString('\n')
+		cleanedInput := strings.TrimSpace(userInput)
+		if cleanedInput == "" { continue }
+		if cleanedInput == "/exit" || cleanedInput == "/quit" { return }
+
+		history = append(history, api.RequestMessage{Role: "user", Content: cleanedInput})
+		apiRequest := api.OpenAIRequest{
+			Model:     "Qwen3-0.6B-w4a32kai",
+			Messages:  history,
+			Stream:    true,
+			SessionID: currentSessionID, 
+		}
+		requestBody, _ := json.Marshal(apiRequest)
+
+		req, _ := http.NewRequest("POST", serverURL, bytes.NewBuffer(requestBody))
-		requestBody, _ := json.Marshal(apiRequest)
-
-		req, _ := http.NewRequest("POST", serverURL, bytes.NewBuffer(requestBody))
+		requestBody, err := json.Marshal(apiRequest)
+		if err != nil {
+			log.Printf("ERROR: Failed to marshal request: %v", err)
+			history = history[:len(history)-1]
+			continue
+		}
+
+		req, err := http.NewRequest("POST", serverURL, bytes.NewBuffer(requestBody))
+		if err != nil {
+			log.Printf("ERROR: Failed to create request: %v", err)
+			history = history[:len(history)-1]
+			continue
+		}
-		requestBody, _ := json.Marshal(apiRequest)
-
-		req, _ := http.NewRequest("POST", serverURL, bytes.NewBuffer(requestBody))
+		requestBody, err := json.Marshal(apiRequest)
+		if err != nil {
+			log.Printf("ERROR: Failed to marshal request: %v", err)
+			history = history[:len(history)-1]
+			continue
+		}
+
+		req, err := http.NewRequest("POST", serverURL, bytes.NewBuffer(requestBody))
+		if err != nil {
+			log.Printf("ERROR: Failed to create request: %v", err)
+			history = history[:len(history)-1]
+			continue
+		}
+		req.Header.Set("Content-Type", "application/json")
+		req.Header.Set("Accept", "text/event-stream")
+		resp, err := http.DefaultClient.Do(req)
+		if err != nil {
+			log.Printf("ERROR: Request failed: %v", err)
+			history = history[:len(history)-1]
+			continue
+		}
+		defer resp.Body.Close()
+
+		if resp.StatusCode != http.StatusOK {
+			bodyBytes, _ := io.ReadAll(resp.Body)
+			log.Printf("ERROR: Server returned status %s: %s", resp.Status, string(bodyBytes))
+			history = history[:len(history)-1]
+			continue
+		}
+
+		sessionIDFromHeader := resp.Header.Get("X-Session-ID")
+		if sessionIDFromHeader != "" && currentSessionID != sessionIDFromHeader {
+			currentSessionID = sessionIDFromHeader
+			log.Printf("[Session Manager] New session established. ID: %s", currentSessionID)
+		}
+
+		var fullResponse strings.Builder
+		scanner := bufio.NewScanner(resp.Body)
+		fmt.Print("Assistant: ")
+		for scanner.Scan() {
+			line := scanner.Text()
+			if strings.HasPrefix(line, "data: ") {
+				jsonData := strings.TrimPrefix(line, "data: ")
+				if jsonData == "[DONE]" { break }
+				var chunk api.OpenAIResponseChunk
+				if json.Unmarshal([]byte(jsonData), &chunk) == nil && len(chunk.Choices) > 0 {
+					content := chunk.Choices[0].Delta.Content
+					fmt.Print(content)
+					fullResponse.WriteString(content)
+				}
+			}
+		}
+		fmt.Println()
+		if err := scanner.Err(); err != nil { log.Printf("ERROR reading stream: %v", err) }
+		history = append(history, api.RequestMessage{Role: "assistant", Content: fullResponse.String()})
+	}
+}
diff --git a/mllm-cli/cmd/mllm-server/main.go b/mllm-cli/cmd/mllm-server/main.go
@@ -0,0 +1,72 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+package main
+
+import (
+	"context"
+	"flag"
+	"log"
+	"mllm-cli/mllm"
+	pkgmllm "mllm-cli/pkg/mllm"
+	"mllm-cli/pkg/server"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"syscall"
+	"time"
+)
+
+func main() {
+	modelPath := flag.String("model-path", "", "Path to the MLLM model directory.")
+	flag.Parse()
+
+	if *modelPath == "" {
+		log.Fatal("FATAL: --model-path argument is required.")
+	}
+
+	if !mllm.InitializeContext() {
+		log.Fatal("FATAL: InitializeContext failed!")
+	}
+	mllm.SetLogLevel(2)
+	if !mllm.StartService(4) {
+		log.Fatal("FATAL: StartService failed!")
+	}
+	defer mllm.StopService()
+	defer mllm.ShutdownContext()
+
+	mllmService := pkgmllm.NewService()
+
+	log.Printf("Loading model and creating session from: %s", *modelPath)
+	session, err := mllm.NewSession(*modelPath)
+	if err != nil {
+		log.Fatalf("FATAL: Failed to create session: %v", err)
+	}
+
+	sessionID := filepath.Base(*modelPath)
+	if !session.Insert(sessionID) {
+		session.Close()
+		log.Fatalf("FATAL: Failed to insert session with ID '%s'", sessionID)
+	}
+	mllmService.RegisterSession(sessionID, session)
+	log.Printf("Session created and registered successfully with ID: %s", sessionID)
+
+	httpServer := server.NewServer(":8080", mllmService)
+
+	go httpServer.Start()
+
+	shutdownChan := make(chan os.Signal, 1)
+	signal.Notify(shutdownChan, syscall.SIGINT, syscall.SIGTERM)
+	<-shutdownChan
+
+	log.Println("Received shutdown signal. Starting graceful shutdown...")
+
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+	if err := httpServer.Shutdown(ctx); err != nil {
+		log.Printf("HTTP server shutdown failed: %v", err)
+	}
+
+	mllmService.Shutdown()
+
+	log.Println("Server gracefully stopped.")
+}
diff --git a/mllm-cli/go.mod b/mllm-cli/go.mod
@@ -1,8 +1,11 @@
 module mllm-cli
 
-go 1.25.0
+go 1.22.2
 
-require github.com/charmbracelet/bubbles v0.21.0
+require (
+	github.com/charmbracelet/bubbles v0.21.0
+	github.com/gorilla/websocket v1.5.3 //
+)
 
 require (
 	github.com/atotto/clipboard v0.1.4 // indirect
@@ -28,4 +31,5 @@ require (
 	golang.org/x/sys v0.35.0 // indirect
 	golang.org/x/term v0.34.0
 	golang.org/x/text v0.3.8 // indirect
+	github.com/google/uuid v1.6.0
 )
diff --git a/mllm-cli/mllm/c.go b/mllm-cli/mllm/c.go
@@ -4,16 +4,31 @@
 package mllm
 
 /*
-#cgo CFLAGS: -fPIC -I${SRCDIR}/SDK/include/
 #cgo CFLAGS: -std=c11
-#cgo LDFLAGS: -L${SRCDIR}/SDK/lib/
-#cgo LDFLAGS: -lMllmSdkC
-#cgo LDFLAGS: -Wl,-rpath ${SRCDIR}/SDK/lib
+#cgo LDFLAGS: -lMllmSdkC -lMllmRT -lMllmCPUBackend
 
 #include <mllm/mllm-c.h>
 #include <stdlib.h>
+
+static void* MllmCAny_get_v_custom_ptr(MllmCAny handle) {
+    return handle.v_custom_ptr;
+}
+
+static MllmCAny MllmCAny_set_v_custom_ptr_null(MllmCAny handle) {
+    handle.v_custom_ptr = NULL;
+    return handle;
+}
 */
 import "C"
+import "unsafe"
+import "fmt"
+import "runtime"
+
+
+type Session struct {
+    cHandle C.MllmCAny
+    sessionID string
+}
 
 func isOk(any C.MllmCAny) bool {
 	return C.isOk(any) == 1
@@ -26,3 +41,88 @@ func InitializeContext() bool {
 func ShutdownContext() bool {
 	return isOk(C.shutdownContext())
 }
+
+func StartService(workerThreads int) bool {
+    result := C.startService(C.size_t(workerThreads))
+    return isOk(result)
+}
+
+func StopService() bool {
+    result := C.stopService()
+    return isOk(result)
+}
+
+func SetLogLevel(level int) {
+    C.setLogLevel(C.int(level))
+}
+
+func NewSession(modelPath string) (*Session, error) {
+    cModelPath := C.CString(modelPath)
+    defer C.free(unsafe.Pointer(cModelPath))
+
+    handle := C.createQwen3Session(cModelPath)
+    if !isOk(handle) {
+        return nil, fmt.Errorf("底层C API createQwen3Session 失败")
+    }
+    s := &Session{cHandle: handle}
+    runtime.SetFinalizer(s, func(s *Session) {
+        fmt.Println("[Go Finalizer] Mllm Session automatically released.") 
+        C.freeSession(s.cHandle)
+    })
+
+    return s, nil
+}
+
+func (s *Session) Close() {
+    if C.MllmCAny_get_v_custom_ptr(s.cHandle) != nil {
+        fmt.Println("[Go Close] Mllm Session manually closed.") 
+        C.freeSession(s.cHandle)
+        s.cHandle = C.MllmCAny_set_v_custom_ptr_null(s.cHandle)
+        runtime.SetFinalizer(s, nil)
+    }
+}
+
+func (s *Session) Insert(sessionID string) bool {
+    cSessionID := C.CString(sessionID)
+    defer C.free(unsafe.Pointer(cSessionID))
+    result := C.insertSession(cSessionID, s.cHandle)
+    if isOk(result) {
+        s.sessionID = sessionID 
+    }
+    return isOk(result)
+}
+
+func (s *Session) SendRequest(jsonRequest string) bool {
+    if s.sessionID == "" {
+        fmt.Println("[Go SendRequest] Error: sessionID is not set on this session.")
+        return false 
+    }
+    cSessionID := C.CString(s.sessionID)
+    cJsonRequest := C.CString(jsonRequest)
+    defer C.free(unsafe.Pointer(cSessionID))
+    defer C.free(unsafe.Pointer(cJsonRequest))
+
+    result := C.sendRequest(cSessionID, cJsonRequest)
+    return isOk(result)
+}
+
+func (s *Session) PollResponse(requestID string) string { 
+    if requestID == "" {
+        fmt.Println("[Go PollResponse] Error: requestID cannot be empty.")
+        return ""
+    }
+    cRequestID := C.CString(requestID)
+    defer C.free(unsafe.Pointer(cRequestID))
+
+    cResponse := C.pollResponse(cRequestID) 
+    if cResponse == nil {
+        return ""
+    }
+    defer C.freeResponseString(cResponse)
+
+    return C.GoString(cResponse)
+}
+
+func (s *Session) SessionID() string {
+    return s.sessionID
+}
diff --git a/mllm-cli/pkg/api/types.go b/mllm-cli/pkg/api/types.go
@@ -0,0 +1,33 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+package api
+
+type RequestMessage struct {
+	Role    string `json:"role"`
+	Content string `json:"content"`
+}
+
+type OpenAIRequest struct {
+	Model          string           `json:"model"`
+	Messages       []RequestMessage `json:"messages"`
+	Stream         bool             `json:"stream"`
+	EnableThinking bool             `json:"enable_thinking,omitempty"` 
+	Thinking       bool             `json:"thinking,omitempty"`       // <-- 新增此行，用于接收客户端可能发送的 "thinking": true
+	SessionID      string           `json:"session_id,omitempty"`     
+}
+
+type ResponseDelta struct {
+	Content string `json:"content"`
+}
+
+type ResponseChoice struct {
+	Delta ResponseDelta `json:"delta"`
+}
+
+type OpenAIResponseChunk struct {
+	ID      string           `json:"id"`
+	Object  string           `json:"object"`
+	Created int64            `json:"created"`
+	Model   string           `json:"model"`
+	Choices []ResponseChoice `json:"choices"`
+}