ml-explore · davidkoski · Dec 17, 2025 · Dec 17, 2025 · davidkoski · Dec 17, 2025
diff --git a/...assets/AccentColor.colorset/Contents.json → ...assets/AccentColor.colorset/Contents.json b/...assets/AccentColor.colorset/Contents.json → ...assets/AccentColor.colorset/Contents.json
diff --git a/Applications/LLMBasic/Assets.xcassets/AppIcon.appiconset/Contents.json b/Applications/LLMBasic/Assets.xcassets/AppIcon.appiconset/Contents.json
@@ -0,0 +1,85 @@
+{
+  "images" : [
+    {
+      "idiom" : "universal",
+      "platform" : "ios",
+      "size" : "1024x1024"
+    },
+    {
+      "appearances" : [
+        {
+          "appearance" : "luminosity",
+          "value" : "dark"
+        }
+      ],
+      "idiom" : "universal",
+      "platform" : "ios",
+      "size" : "1024x1024"
+    },
+    {
+      "appearances" : [
+        {
+          "appearance" : "luminosity",
+          "value" : "tinted"
+        }
+      ],
+      "idiom" : "universal",
+      "platform" : "ios",
+      "size" : "1024x1024"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "16x16"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "16x16"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "32x32"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "32x32"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "128x128"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "128x128"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "256x256"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "256x256"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "1x",
+      "size" : "512x512"
+    },
+    {
+      "idiom" : "mac",
+      "scale" : "2x",
+      "size" : "512x512"
+    }
+  ],
+  "info" : {
+    "author" : "xcode",
+    "version" : 1
+  }
+}
diff --git a/...ons/VLMEval/Assets.xcassets/Contents.json → ...ns/LLMBasic/Assets.xcassets/Contents.json b/...ons/VLMEval/Assets.xcassets/Contents.json → ...ns/LLMBasic/Assets.xcassets/Contents.json
diff --git a/Applications/LLMBasic/ChatModel.swift b/Applications/LLMBasic/ChatModel.swift
@@ -0,0 +1,108 @@
+// Copyright © 2025 Apple Inc.
+
+import MLXLLM
+import MLXLMCommon
+import SwiftUI
+
+/// which model to load
+private let modelConfiguration = LLMRegistry.gemma3_1B_qat_4bit
+
+/// instructions for the model (the system prompt)
+private let instructions =
+    """
+    You are a friendly and helpful chatbot.
+    """
+
+/// parameters controlling generation
+private let generateParameters = GenerateParameters(temperature: 0.5)
+
+/// Downloads and loads the weights for the model -- we have one of these in the process
+@MainActor @Observable public class ModelLoader {
+
+    enum State {
+        case idle
+        case loading(Task<ModelContainer, Error>)
+        case loaded(ModelContainer)
+    }
+
+    public var progress = 0.0
+    public var isLoaded: Bool {
+        switch state {
+        case .idle, .loading: false
+        case .loaded: true
+        }
+    }
+
+    private var state = State.idle
+
+    public func model() async throws -> ModelContainer {
+        switch self.state {
+        case .idle:
+            let task = Task {
+                // download and report progress
+                try await loadModelContainer(configuration: modelConfiguration) { value in
+                    Task { @MainActor in
+                        self.progress = value.fractionCompleted
+                    }
+                }
+            }
+            self.state = .loading(task)
+            let model = try await task.value
+
+            self.state = .loaded(model)
+            return model
+
+        case .loading(let task):
+            return try await task.value
+
+        case .loaded(let model):
+            return model
+        }
+    }
+}
+
+/// View model for the ChatSession
+@MainActor @Observable public class ChatModel {
+
+    private let session: ChatSession
+
+    /// back and forth conversation between the user and LLM
+    public var messages = [Chat.Message]()
+
+    private var task: Task<Void, Error>?
+    public var isBusy: Bool {
+        task != nil
+    }
+
+    public init(model: ModelContainer) {
+        self.session = ChatSession(
+            model,
+            instructions: instructions,
+            generateParameters: generateParameters)
+    }
+
+    public func cancel() {
+        task?.cancel()
+    }
+
+    public func respond(_ message: String) {
+        guard task == nil else { return }
+
+        self.messages.append(.init(role: .user, content: message))
+        self.messages.append(.init(role: .assistant, content: "..."))
+        let lastIndex = self.messages.count - 1
+
+        self.task = Task {
+            var first = true
+            for try await item in session.streamResponse(to: message) {
+                if first {
+                    self.messages[lastIndex].content = item
+                    first = false
+                } else {
+                    self.messages[lastIndex].content += item
+                }
+            }
+            self.task = nil
+        }
+    }
+}
diff --git a/Applications/LLMBasic/ContentView.swift b/Applications/LLMBasic/ContentView.swift
@@ -0,0 +1,82 @@
+// Copyright © 2025 Apple Inc.
+
+import MLXLMCommon
+import SwiftUI
+
+struct ContentView: View {
+
+    /// provided by the application
+    let loader: ModelLoader
+
+    /// once loaded this will hold the chat session
+    @State var session: ChatModel?
+    @State var error: String?
+
+    /// prompt for the LLM (text field)
+    @State var prompt = ""
+
+    @FocusState var promptFocused
+
+    var body: some View {
+        VStack {
+            if let error {
+                Text("Error: \(error)")
+
+            } else if !loader.isLoaded {
+                ProgressView("Loading", value: loader.progress, total: 1)
+
+            } else if let session {
+                // show the chat messages
+                ScrollView(.vertical) {
+                    ForEach(session.messages.enumerated(), id: \.offset) { _, message in
+                        let bold = message.role == .user
+
+                        HStack {
+                            Text(message.content)
+                                .bold(bold)
+                            Spacer()
+                        }
+                        .padding(.bottom, 4)
+                    }
+
+                    Spacer()
+
+                    if session.isBusy {
+                        // a stop button -- cmd-. to interrupt
+                        HStack {
+                            Button("Stop", action: { session.cancel() })
+                                .keyboardShortcut(".")
+                            Spacer()
+                        }
+                    } else {
+                        // message from the user -> LLM
+                        TextField("Prompt", text: $prompt)
+                            .onSubmit(respond)
+                            .focused($promptFocused)
+                            .onAppear {
+                                promptFocused = true
+                            }
+                    }
+                }
+                .defaultScrollAnchor(.bottom)
+            }
+        }
+        .padding()
+        .task {
+            do {
+                let model = try await loader.model()
+                self.session = ChatModel(model: model)
+            } catch {
+                self.error = error.localizedDescription
+            }
+        }
+        .onDisappear {
+            self.session?.cancel()
+        }
+    }
+
+    private func respond() {
+        session?.respond(prompt)
+        prompt = ""
+    }
+}
diff --git a/Applications/VLMEval/VLMEval.entitlements → Applications/LLMBasic/LLMBasic.entitlements b/Applications/VLMEval/VLMEval.entitlements → Applications/LLMBasic/LLMBasic.entitlements
@@ -4,13 +4,5 @@
 <dict>
 	<key>com.apple.developer.kernel.increased-memory-limit</key>
 	<true/>
-	<key>com.apple.security.app-sandbox</key>
-	<true/>
-	<key>com.apple.security.device.usb</key>
-	<true/>
-	<key>com.apple.security.files.user-selected.read-only</key>
-	<true/>
-	<key>com.apple.security.network.client</key>
-	<true/>
 </dict>
 </plist>
diff --git a/Applications/LLMBasic/LLMBasicApp.swift b/Applications/LLMBasic/LLMBasicApp.swift
@@ -0,0 +1,22 @@
+// Copyright © 2025 Apple Inc.
+
+import MLX
+import MLXLLM
+import MLXLMCommon
+import SwiftUI
+
+@main
+struct LLMBasicApp: App {
+
+    init() {
+        MLX.GPU.set(cacheLimit: 20 * 1024 * 1024)
+    }
+
+    @State var loader = ModelLoader()
+
+    var body: some Scene {
+        WindowGroup {
+            ContentView(loader: loader)
+        }
+    }
+}
diff --git a/Applications/LLMBasic/README.md b/Applications/LLMBasic/README.md
@@ -0,0 +1,23 @@
+#  LLMBasic
+
+A minimal example of:
+
+- loading a model, including downloading weights
+- setting up a ChatSession
+- a simple UI for a back and forth session with the model
+
+The `ChatModel` has a few parameters at the top if you want to try a different model or
+system prompt.
+
+The goal of this example is to be a **minimal** application that loads and interacts with
+an LLM.
+
+See `LLMEval` and `MLXChatExample` for more full featured applications.
+
+As always, you must set the Team on the LLMBasic target.
+
+Some notes about the setup:
+
+- this downloads models from hugging face so LLMBasic -> Signing & Capabilities has the "Outgoing Connections (Client)" set in the App Sandbox
+- LLM models are large so this uses the Increased Memory Limit entitlement on iOS to allow ... increased memory limits for devices that have more memory
+- `MLX.GPU.set(cacheLimit: 20 * 1024 * 1024)` is used to limit the buffer cache size
diff --git a/Applications/LLMEval/README.md b/Applications/LLMEval/README.md
@@ -14,6 +14,9 @@ Some notes about the setup:
 - LLM models are large so this uses the Increased Memory Limit entitlement on iOS to allow ... increased memory limits for devices that have more memory
 - `MLX.GPU.set(cacheLimit: 20 * 1024 * 1024)` is used to limit the buffer cache size
 
+`MLXChatExample` is a more full featured multi-turn chat example that supports VLMs.
+`LLMBasic` is a **minimal** LLM chat example.
+
 ### Trying Different Models
 
 The example app uses an 8 billion parameter quantized Qwen3 model by default, see [LLMEvaluator.swift](ViewModels/LLMEvaluator.swift#L52):
@@ -33,19 +36,6 @@ For example:
     var modelConfiguration = LLMRegistry.phi4bit
 ```
 
-### Troubleshooting
-
-If the program crashes with a very deep stack trace, you may need to build
-in Release configuration. This seems to depend on the size of the model.
-
-There are a couple options:
-
-- Build Release
-- Force the model evaluation to run on the main thread, e.g. using @MainActor
-- Build `Cmlx` with optimizations by modifying `mlx/Package.swift` and adding `.unsafeOptions(["-O3"]),` around line 87
-
-See discussion here: https://github.com/ml-explore/mlx-swift-examples/issues/3
-
 ### Performance
 
 Different models have difference performance characteristics. For example Gemma 2B may outperform Phi-2 in terms of tokens / second.

diff --git a/Applications/LLMEval/Views/ContentView.swift b/Applications/LLMEval/Views/ContentView.swift
@@ -1,6 +1,5 @@
 // Copyright © 2025 Apple Inc.
 
-import AsyncAlgorithms
 import MLX
 import MLXLLM
 import MLXLMCommon