-
Notifications
You must be signed in to change notification settings - Fork 148
feat: Add mllm-cli support for Qwen3 and update docs #493
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| // Copyright (c) MLLM Team. | ||
| // Licensed under the MIT License. | ||
| package main | ||
|
|
||
| import ( | ||
| "bufio" | ||
| "bytes" | ||
| "encoding/json" | ||
| "fmt" | ||
| "io" | ||
| "log" | ||
| "mllm-cli/pkg/api" | ||
| "net/http" | ||
| "os" | ||
| "strings" | ||
| ) | ||
|
|
||
| func main() { | ||
| serverURL := "http://localhost:8080/v1/chat/completions" | ||
| var history []api.RequestMessage | ||
| var currentSessionID string | ||
|
|
||
| fmt.Println("\n--- MLLM Refactored Interactive Client ---") | ||
| fmt.Println("Supports multi-turn sessions. Type /exit to quit.") | ||
| log.Printf("Connecting to: %s", serverURL) | ||
|
|
||
| reader := bufio.NewReader(os.Stdin) | ||
| for { | ||
| fmt.Print("\n> ") | ||
| userInput, _ := reader.ReadString('\n') | ||
| cleanedInput := strings.TrimSpace(userInput) | ||
| if cleanedInput == "" { continue } | ||
| if cleanedInput == "/exit" || cleanedInput == "/quit" { return } | ||
|
|
||
| history = append(history, api.RequestMessage{Role: "user", Content: cleanedInput}) | ||
| apiRequest := api.OpenAIRequest{ | ||
| Model: "Qwen3-0.6B-w4a32kai", | ||
| Messages: history, | ||
| Stream: true, | ||
| SessionID: currentSessionID, | ||
| } | ||
| requestBody, _ := json.Marshal(apiRequest) | ||
|
|
||
| req, _ := http.NewRequest("POST", serverURL, bytes.NewBuffer(requestBody)) | ||
| req.Header.Set("Content-Type", "application/json") | ||
| req.Header.Set("Accept", "text/event-stream") | ||
| resp, err := http.DefaultClient.Do(req) | ||
| if err != nil { | ||
| log.Printf("ERROR: Request failed: %v", err) | ||
| history = history[:len(history)-1] | ||
| continue | ||
| } | ||
| defer resp.Body.Close() | ||
|
|
||
| if resp.StatusCode != http.StatusOK { | ||
| bodyBytes, _ := io.ReadAll(resp.Body) | ||
| log.Printf("ERROR: Server returned status %s: %s", resp.Status, string(bodyBytes)) | ||
| history = history[:len(history)-1] | ||
| continue | ||
| } | ||
|
|
||
| sessionIDFromHeader := resp.Header.Get("X-Session-ID") | ||
| if sessionIDFromHeader != "" && currentSessionID != sessionIDFromHeader { | ||
| currentSessionID = sessionIDFromHeader | ||
| log.Printf("[Session Manager] New session established. ID: %s", currentSessionID) | ||
| } | ||
|
|
||
| var fullResponse strings.Builder | ||
| scanner := bufio.NewScanner(resp.Body) | ||
| fmt.Print("Assistant: ") | ||
| for scanner.Scan() { | ||
| line := scanner.Text() | ||
| if strings.HasPrefix(line, "data: ") { | ||
| jsonData := strings.TrimPrefix(line, "data: ") | ||
| if jsonData == "[DONE]" { break } | ||
| var chunk api.OpenAIResponseChunk | ||
| if json.Unmarshal([]byte(jsonData), &chunk) == nil && len(chunk.Choices) > 0 { | ||
| content := chunk.Choices[0].Delta.Content | ||
| fmt.Print(content) | ||
| fullResponse.WriteString(content) | ||
| } | ||
| } | ||
| } | ||
| fmt.Println() | ||
| if err := scanner.Err(); err != nil { log.Printf("ERROR reading stream: %v", err) } | ||
| history = append(history, api.RequestMessage{Role: "assistant", Content: fullResponse.String()}) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| // Copyright (c) MLLM Team. | ||
| // Licensed under the MIT License. | ||
| package main | ||
|
|
||
| import ( | ||
| "context" | ||
| "flag" | ||
| "log" | ||
| "mllm-cli/mllm" | ||
| pkgmllm "mllm-cli/pkg/mllm" | ||
| "mllm-cli/pkg/server" | ||
| "os" | ||
| "os/signal" | ||
| "path/filepath" | ||
| "syscall" | ||
| "time" | ||
| ) | ||
|
|
||
| func main() { | ||
| modelPath := flag.String("model-path", "", "Path to the MLLM model directory.") | ||
| flag.Parse() | ||
|
|
||
| if *modelPath == "" { | ||
| log.Fatal("FATAL: --model-path argument is required.") | ||
| } | ||
|
|
||
| if !mllm.InitializeContext() { | ||
| log.Fatal("FATAL: InitializeContext failed!") | ||
| } | ||
| mllm.SetLogLevel(2) | ||
| if !mllm.StartService(4) { | ||
| log.Fatal("FATAL: StartService failed!") | ||
| } | ||
| defer mllm.StopService() | ||
| defer mllm.ShutdownContext() | ||
|
|
||
| mllmService := pkgmllm.NewService() | ||
|
|
||
| log.Printf("Loading model and creating session from: %s", *modelPath) | ||
| session, err := mllm.NewSession(*modelPath) | ||
| if err != nil { | ||
| log.Fatalf("FATAL: Failed to create session: %v", err) | ||
| } | ||
|
|
||
| sessionID := filepath.Base(*modelPath) | ||
| if !session.Insert(sessionID) { | ||
| session.Close() | ||
| log.Fatalf("FATAL: Failed to insert session with ID '%s'", sessionID) | ||
| } | ||
| mllmService.RegisterSession(sessionID, session) | ||
| log.Printf("Session created and registered successfully with ID: %s", sessionID) | ||
|
Comment on lines
+45
to
+51
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Session ID derivation may cause collisions with multiple models. Using Consider one of these approaches:
- sessionID := filepath.Base(*modelPath)
+ // Use a hash of the full path to avoid collisions
+ sessionID := fmt.Sprintf("%x", md5.Sum([]byte(*modelPath)))[:16]
+ sessionID := flag.String("session-id", "", "Session ID (defaults to model directory name)")
flag.Parse()
// ...
- sessionID := filepath.Base(*modelPath)
+ sid := *sessionID
+ if sid == "" {
+ sid = filepath.Base(*modelPath)
+ }The second approach provides flexibility while maintaining the current default behavior.
🤖 Prompt for AI Agents |
||
|
|
||
| httpServer := server.NewServer(":8080", mllmService) | ||
|
|
||
| go httpServer.Start() | ||
|
|
||
| shutdownChan := make(chan os.Signal, 1) | ||
| signal.Notify(shutdownChan, syscall.SIGINT, syscall.SIGTERM) | ||
| <-shutdownChan | ||
|
|
||
| log.Println("Received shutdown signal. Starting graceful shutdown...") | ||
|
|
||
| ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) | ||
| defer cancel() | ||
| if err := httpServer.Shutdown(ctx); err != nil { | ||
| log.Printf("HTTP server shutdown failed: %v", err) | ||
| } | ||
|
|
||
| mllmService.Shutdown() | ||
|
|
||
| log.Println("Server gracefully stopped.") | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,16 +4,31 @@ | |
| package mllm | ||
|
|
||
| /* | ||
| #cgo CFLAGS: -fPIC -I${SRCDIR}/SDK/include/ | ||
| #cgo CFLAGS: -std=c11 | ||
| #cgo LDFLAGS: -L${SRCDIR}/SDK/lib/ | ||
| #cgo LDFLAGS: -lMllmSdkC | ||
| #cgo LDFLAGS: -Wl,-rpath ${SRCDIR}/SDK/lib | ||
| #cgo LDFLAGS: -lMllmSdkC -lMllmRT -lMllmCPUBackend | ||
|
|
||
| #include <mllm/mllm-c.h> | ||
| #include <stdlib.h> | ||
|
|
||
| static void* MllmCAny_get_v_custom_ptr(MllmCAny handle) { | ||
| return handle.v_custom_ptr; | ||
| } | ||
|
|
||
| static MllmCAny MllmCAny_set_v_custom_ptr_null(MllmCAny handle) { | ||
| handle.v_custom_ptr = NULL; | ||
| return handle; | ||
| } | ||
|
Comment on lines
+13
to
+20
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Avoid peeking into C struct internals; add stable C API instead. Accessing MllmCAny.v_custom_ptr directly is ABI-fragile. If the C struct layout changes, this breaks at compile or (worse) runtime. Prefer an official C helper (e.g., mllmAnyIsValid, mllmAnyNull) exported by the C SDK, or provide a dedicated C API to query/free validity instead of field access from Go. |
||
| */ | ||
| import "C" | ||
| import "unsafe" | ||
| import "fmt" | ||
| import "runtime" | ||
|
|
||
|
|
||
| type Session struct { | ||
| cHandle C.MllmCAny | ||
| sessionID string | ||
| } | ||
|
Comment on lines
+28
to
+31
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Concurrent use-after-free risk; guard Session with locks and a closed flag. Methods can race with Close(), leading to use-after-free of the C handle. Add an RWMutex and a closed flag; serialize Close vs. Insert/SendRequest/PollResponse/SessionID. Apply this diff: @@
-import "C"
-import "unsafe"
-import "fmt"
-import "runtime"
+import "C"
+import "unsafe"
+import "fmt"
+import "runtime"
+import "sync"
@@
type Session struct {
cHandle C.MllmCAny
sessionID string
+ mu sync.RWMutex
+ closed bool
}
@@
-func (s *Session) Close() {
- if C.MllmCAny_get_v_custom_ptr(s.cHandle) != nil {
- fmt.Println("[Go Close] Mllm Session manually closed.")
- C.freeSession(s.cHandle)
- s.cHandle = C.MllmCAny_set_v_custom_ptr_null(s.cHandle)
- runtime.SetFinalizer(s, nil)
- }
-}
+func (s *Session) Close() {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if s.closed {
+ return
+ }
+ if C.MllmCAny_get_v_custom_ptr(s.cHandle) != nil {
+ fmt.Println("[Go Close] Mllm Session manually closed.")
+ C.freeSession(s.cHandle)
+ s.cHandle = C.MllmCAny_set_v_custom_ptr_null(s.cHandle)
+ }
+ s.closed = true
+ runtime.SetFinalizer(s, nil)
+}
@@
-func (s *Session) Insert(sessionID string) bool {
+func (s *Session) Insert(sessionID string) bool {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if s.closed {
+ fmt.Println("[Go Insert] Error: session already closed.")
+ return false
+ }
cSessionID := C.CString(sessionID)
defer C.free(unsafe.Pointer(cSessionID))
result := C.insertSession(cSessionID, s.cHandle)
if isOk(result) {
s.sessionID = sessionID
}
return isOk(result)
}
@@
-func (s *Session) SendRequest(jsonRequest string) bool {
- if s.sessionID == "" {
+func (s *Session) SendRequest(jsonRequest string) bool {
+ s.mu.RLock()
+ defer s.mu.RUnlock()
+ if s.closed {
+ fmt.Println("[Go SendRequest] Error: session is closed.")
+ return false
+ }
+ if s.sessionID == "" {
fmt.Println("[Go SendRequest] Error: sessionID is not set on this session.")
return false
}
cSessionID := C.CString(s.sessionID)
cJsonRequest := C.CString(jsonRequest)
defer C.free(unsafe.Pointer(cSessionID))
defer C.free(unsafe.Pointer(cJsonRequest))
result := C.sendRequest(cSessionID, cJsonRequest)
return isOk(result)
}
@@
-func (s *Session) PollResponse(requestID string) string {
- if requestID == "" {
+func (s *Session) PollResponse(requestID string) string {
+ s.mu.RLock()
+ defer s.mu.RUnlock()
+ if s.closed {
+ fmt.Println("[Go PollResponse] Error: session is closed.")
+ return ""
+ }
+ if requestID == "" {
fmt.Println("[Go PollResponse] Error: requestID cannot be empty.")
return ""
}
cRequestID := C.CString(requestID)
defer C.free(unsafe.Pointer(cRequestID))
cResponse := C.pollResponse(cRequestID)
if cResponse == nil {
return ""
}
defer C.freeResponseString(cResponse)
return C.GoString(cResponse)
}
@@
-func (s *Session) SessionID() string {
- return s.sessionID
-}
+func (s *Session) SessionID() string {
+ s.mu.RLock()
+ defer s.mu.RUnlock()
+ return s.sessionID
+}Also applies to: 76-83, 85-93, 95-107, 109-124, 126-128 🤖 Prompt for AI Agents |
||
|
|
||
| func isOk(any C.MllmCAny) bool { | ||
| return C.isOk(any) == 1 | ||
|
|
@@ -26,3 +41,88 @@ func InitializeContext() bool { | |
| func ShutdownContext() bool { | ||
| return isOk(C.shutdownContext()) | ||
| } | ||
|
|
||
| func StartService(workerThreads int) bool { | ||
| result := C.startService(C.size_t(workerThreads)) | ||
| return isOk(result) | ||
| } | ||
|
Comment on lines
+45
to
+48
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Validate workerThreads to prevent size_t underflow on negative input. Casting a negative int to C.size_t yields a huge value. Guard against workerThreads < 0. Apply this diff: func StartService(workerThreads int) bool {
- result := C.startService(C.size_t(workerThreads))
+ if workerThreads < 0 {
+ fmt.Println("[Go StartService] Error: workerThreads cannot be negative.")
+ return false
+ }
+ result := C.startService(C.size_t(workerThreads))
return isOk(result)
}🤖 Prompt for AI Agents |
||
|
|
||
| func StopService() bool { | ||
| result := C.stopService() | ||
| return isOk(result) | ||
| } | ||
|
|
||
| func SetLogLevel(level int) { | ||
| C.setLogLevel(C.int(level)) | ||
| } | ||
|
|
||
| func NewSession(modelPath string) (*Session, error) { | ||
| cModelPath := C.CString(modelPath) | ||
| defer C.free(unsafe.Pointer(cModelPath)) | ||
|
|
||
| handle := C.createQwen3Session(cModelPath) | ||
| if !isOk(handle) { | ||
| return nil, fmt.Errorf("底层C API createQwen3Session 失败") | ||
| } | ||
| s := &Session{cHandle: handle} | ||
| runtime.SetFinalizer(s, func(s *Session) { | ||
| fmt.Println("[Go Finalizer] Mllm Session automatically released.") | ||
| C.freeSession(s.cHandle) | ||
| }) | ||
|
|
||
| return s, nil | ||
| } | ||
|
|
||
| func (s *Session) Close() { | ||
| if C.MllmCAny_get_v_custom_ptr(s.cHandle) != nil { | ||
| fmt.Println("[Go Close] Mllm Session manually closed.") | ||
| C.freeSession(s.cHandle) | ||
| s.cHandle = C.MllmCAny_set_v_custom_ptr_null(s.cHandle) | ||
| runtime.SetFinalizer(s, nil) | ||
| } | ||
| } | ||
|
|
||
| func (s *Session) Insert(sessionID string) bool { | ||
| cSessionID := C.CString(sessionID) | ||
| defer C.free(unsafe.Pointer(cSessionID)) | ||
| result := C.insertSession(cSessionID, s.cHandle) | ||
| if isOk(result) { | ||
| s.sessionID = sessionID | ||
| } | ||
| return isOk(result) | ||
| } | ||
|
|
||
| func (s *Session) SendRequest(jsonRequest string) bool { | ||
| if s.sessionID == "" { | ||
| fmt.Println("[Go SendRequest] Error: sessionID is not set on this session.") | ||
| return false | ||
| } | ||
| cSessionID := C.CString(s.sessionID) | ||
| cJsonRequest := C.CString(jsonRequest) | ||
| defer C.free(unsafe.Pointer(cSessionID)) | ||
| defer C.free(unsafe.Pointer(cJsonRequest)) | ||
|
|
||
| result := C.sendRequest(cSessionID, cJsonRequest) | ||
| return isOk(result) | ||
| } | ||
|
|
||
| func (s *Session) PollResponse(requestID string) string { | ||
| if requestID == "" { | ||
| fmt.Println("[Go PollResponse] Error: requestID cannot be empty.") | ||
| return "" | ||
| } | ||
| cRequestID := C.CString(requestID) | ||
| defer C.free(unsafe.Pointer(cRequestID)) | ||
|
|
||
| cResponse := C.pollResponse(cRequestID) | ||
| if cResponse == nil { | ||
| return "" | ||
| } | ||
| defer C.freeResponseString(cResponse) | ||
|
|
||
| return C.GoString(cResponse) | ||
| } | ||
|
|
||
| func (s *Session) SessionID() string { | ||
| return s.sessionID | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| // Copyright (c) MLLM Team. | ||
| // Licensed under the MIT License. | ||
| package api | ||
|
|
||
| type RequestMessage struct { | ||
| Role string `json:"role"` | ||
| Content string `json:"content"` | ||
| } | ||
|
|
||
| type OpenAIRequest struct { | ||
| Model string `json:"model"` | ||
| Messages []RequestMessage `json:"messages"` | ||
| Stream bool `json:"stream"` | ||
| EnableThinking bool `json:"enable_thinking,omitempty"` | ||
| Thinking bool `json:"thinking,omitempty"` // <-- 新增此行,用于接收客户端可能发送的 "thinking": true | ||
| SessionID string `json:"session_id,omitempty"` | ||
| } | ||
|
|
||
| type ResponseDelta struct { | ||
| Content string `json:"content"` | ||
| } | ||
|
|
||
| type ResponseChoice struct { | ||
| Delta ResponseDelta `json:"delta"` | ||
| } | ||
|
|
||
| type OpenAIResponseChunk struct { | ||
| ID string `json:"id"` | ||
| Object string `json:"object"` | ||
| Created int64 `json:"created"` | ||
| Model string `json:"model"` | ||
| Choices []ResponseChoice `json:"choices"` | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Handle errors from json.Marshal and http.NewRequest.
Both
json.Marshalandhttp.NewRequestcan return errors, but they are silently ignored. This could lead to nil pointer dereferences or invalid requests.Apply this diff:
📝 Committable suggestion
🤖 Prompt for AI Agents