kherud
diff --git a/‎src/main/cpp/jllama.cpp‎
Lines changed: 1682 additions & 484 deletions b/‎src/main/cpp/jllama.cpp‎
Lines changed: 1682 additions & 484 deletions
diff --git a/‎src/main/cpp/jllama.h‎
Lines changed: 131 additions & 57 deletions b/‎src/main/cpp/jllama.h‎
Lines changed: 131 additions & 57 deletions
diff --git a/‎src/main/java/de/kherud/llama/InferenceParameters.java‎
Lines changed: 1 addition & 1 deletion b/‎src/main/java/de/kherud/llama/InferenceParameters.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/main/java/de/kherud/llama/LlamaIterable.java‎
Lines changed: 7 additions & 1 deletion b/‎src/main/java/de/kherud/llama/LlamaIterable.java‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎src/main/java/de/kherud/llama/LlamaIterator.java‎
Lines changed: 54 additions & 19 deletions b/‎src/main/java/de/kherud/llama/LlamaIterator.java‎
Lines changed: 54 additions & 19 deletions
@@ -564,7 +564,7 @@ public InferenceParameters setTools(String... tools) {
 
  		parameters.put(PARAM_TOOLS, "[" + toolBuilder.toString() +"]");
  		parameters.put(PARAM_TOOL_CHOICE, toJsonString("required"));
-// 		parameters.put(PARAM_PARALLEL_TOOL_CALLS,String.valueOf(false));
+ 		parameters.put(PARAM_PARALLEL_TOOL_CALLS,String.valueOf(true));
 		return this;
 	}
 
 
@@ -2,14 +2,20 @@
 
 import org.jetbrains.annotations.NotNull;
 
+
 /**
  * An iterable used by {@link LlamaModel#generate(InferenceParameters)} that specifically returns a {@link LlamaIterator}.
  */
 @FunctionalInterface
 public interface LlamaIterable extends Iterable<LlamaOutput> {
 
+    /**
+     * Returns a LlamaIterator over elements of type LlamaOutput.
+     * This overrides the standard iterator() method to specifically return a LlamaIterator.
+     *
+     * @return a LlamaIterator instance
+     */
     @NotNull
     @Override
     LlamaIterator iterator();
-
 }
@@ -1,26 +1,42 @@
 package de.kherud.llama;
 
-import java.lang.annotation.Native;
+
 import java.util.Iterator;
 import java.util.NoSuchElementException;
 
 /**
- * This iterator is used by {@link LlamaModel#generate(InferenceParameters)}. In addition to implementing {@link Iterator},
- * it allows to cancel ongoing inference (see {@link #cancel()}).
+ * Iterates over a stream of outputs from the model
  */
-public final class LlamaIterator implements Iterator<LlamaOutput> {
+public class LlamaIterator implements Iterator<LlamaOutput> {
 
     private final LlamaModel model;
+    private final boolean isChat;
     private final int taskId;
 
-    @Native
-    @SuppressWarnings("FieldMayBeFinal")
-    private boolean hasNext = true;
+    /**
+     * Whether there is a next token to receive
+     */
+    public boolean hasNext = true;
 
-    LlamaIterator(LlamaModel model, InferenceParameters parameters) {
+    /**
+     * Creates a new iterator
+     *
+     * @param model the llama model to use for generating
+     * @param parameters parameters for the inference
+     * @param isChat whether this is a chat completion (true) or regular
+     * completion (false)
+     */
+    LlamaIterator(LlamaModel model, InferenceParameters parameters, boolean isChat) {
         this.model = model;
-        parameters.setStream(true);
-        taskId = model.requestCompletion(parameters.toString());
+        this.isChat = isChat;
+
+        if (isChat) {
+            String prompt = model.applyTemplate(parameters);
+            parameters.setPrompt(prompt);
+            this.taskId = model.requestChat(parameters.toString());
+        } else {
+            this.taskId = model.requestCompletion(parameters.toString());
+        }
     }
 
     @Override
@@ -33,19 +49,38 @@ public LlamaOutput next() {
         if (!hasNext) {
             throw new NoSuchElementException();
         }
-        LlamaOutput output = model.receiveCompletion(taskId);
-        hasNext = !output.stop;
-        if (output.stop) {
-        	model.releaseTask(taskId);
+
+        try {
+            if (isChat) {
+                String response = model.streamChatCompletion(taskId);
+                // Check for completion by examining the JSON response
+                // This is a simplification - the actual implementation might need more
+                // sophisticated handling
+                if (response != null && response.contains("\"finish_reason\":")) {
+                    hasNext = false;
+                }
+                return new LlamaOutput(response, !hasNext);
+            } else {
+                StreamingOutput output = model.streamCompletion(taskId);
+                hasNext = !output.isFinal;
+                return new LlamaOutput(output.text, output.isFinal);
+            }
+        } catch (Exception e) {
+            model.releaseTask(taskId);
+            hasNext = false;
+            throw new RuntimeException(e);
         }
-        return output;
     }
 
     /**
-     * Cancel the ongoing generation process.
+     * Cancel the ongoing generation process. This will stop the model from
+     * generating more tokens and release resources.
      */
     public void cancel() {
-        model.cancelCompletion(taskId);
-        hasNext = false;
+        if (hasNext) {
+                model.cancelCompletion(taskId);
+            model.releaseTask(taskId);
+            hasNext = false;
+        }
     }
-}
+}
Original file line number	Diff line number	Diff line change
`@@ -564,7 +564,7 @@ public InferenceParameters setTools(String... tools) {`
`564`	`564`
`565`	`565`	`parameters.put(PARAM_TOOLS, "[" + toolBuilder.toString() +"]");`
`566`	`566`	`parameters.put(PARAM_TOOL_CHOICE, toJsonString("required"));`
`567`		`-// parameters.put(PARAM_PARALLEL_TOOL_CALLS,String.valueOf(false));`
	`567`	`+ parameters.put(PARAM_PARALLEL_TOOL_CALLS,String.valueOf(true));`
`568`	`568`	`return this;`
`569`	`569`	`}`
`570`	`570`