From 453918e2521faf7cdf515ad229f4c72a468f65f3 Mon Sep 17 00:00:00 2001
From: Greg OmniMind <gwgithub23@wierzowiecki.pl>
Date: Wed, 1 Oct 2025 22:17:46 +0200
Subject: [PATCH 1/2] Add --no-stream flag for reasoning models (issue #430)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR adds a minimal --no-stream CLI flag to disable response streaming,
which resolves issues with reasoning models (o1, o3, o3-pro) that require
organization verification or don't support streaming on certain platforms.

## Changes

1. **config.go**: Add `NoStream bool` field to Config struct
2. **config.go**: Add help text for --no-stream flag
3. **main.go**: Add CLI flag registration for --no-stream
4. **proto.go**: Add `Stream *bool` field to Request struct
5. **mods.go**: Pass NoStream config to request
6. **openai.go**: Implement conditional streaming logic
7. **openai.go**: Add NonStreamingWrapper to handle non-streaming responses

## Implementation Details

- Default behavior: streaming enabled (backward compatible)
- With --no-stream: uses Chat Completion API instead of streaming API
- NonStreamingWrapper implements stream.Stream interface for consistency
- Tested with gpt-4o-mini (both streaming and non-streaming modes work)

## Scope

This PR implements ONLY the CLI flag. Per-model configuration in
config_template.yml is intentionally left for future work to keep
this change minimal and focused.

## Testing

```bash
# Non-streaming mode (works with reasoning models)
echo "test" | ./mods -m o3 --no-stream

# Streaming mode still works (default)
echo "test" | ./mods -m gpt-4o
```

Fixes #430

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 config.go                 |  2 ++
 internal/openai/openai.go | 72 +++++++++++++++++++++++++++++++++++++++
 internal/proto/proto.go   |  1 +
 main.go                   |  1 +
 mods.go                   |  4 +++
 5 files changed, 80 insertions(+)

diff --git a/config.go b/config.go
index 9f0abfe2..29adc0d2 100644
--- a/config.go
+++ b/config.go
@@ -44,6 +44,7 @@ var help = map[string]string{
 	"prompt-args":       "Include the prompt from the arguments in the response",
 	"raw":               "Render output as raw text when connected to a TTY",
 	"quiet":             "Quiet mode (hide the spinner while loading and stderr messages for success)",
+	"no-stream":         "Disable streaming of responses (useful for reasoning models without streaming support)",
 	"help":              "Show help and exit",
 	"version":           "Show version and exit",
 	"max-retries":       "Maximum number of times to retry API calls",
@@ -145,6 +146,7 @@ type Config struct {
 	FormatAs            string     `yaml:"format-as" env:"FORMAT_AS"`
 	Raw                 bool       `yaml:"raw" env:"RAW"`
 	Quiet               bool       `yaml:"quiet" env:"QUIET"`
+	NoStream            bool       `yaml:"no-stream" env:"NO_STREAM"`
 	MaxTokens           int64      `yaml:"max-tokens" env:"MAX_TOKENS"`
 	MaxCompletionTokens int64      `yaml:"max-completion-tokens" env:"MAX_COMPLETION_TOKENS"`
 	MaxInputChars       int64      `yaml:"max-input-chars" env:"MAX_INPUT_CHARS"`
diff --git a/internal/openai/openai.go b/internal/openai/openai.go
index 47c498af..90fefc82 100644
--- a/internal/openai/openai.go
+++ b/internal/openai/openai.go
@@ -93,6 +93,19 @@ func (c *Client) Request(ctx context.Context, request proto.Request) stream.Stre
 		}
 	}
 
+	// Check if streaming is disabled
+	useStreaming := request.Stream == nil || *request.Stream
+	if !useStreaming {
+		// Non-streaming mode: use regular Chat Completion API
+		resp, err := c.Chat.Completions.New(ctx, body)
+		return &NonStreamingWrapper{
+			response: resp,
+			err:      err,
+			messages: request.Messages,
+		}
+	}
+
+	// Streaming mode (default)
 	s := &Stream{
 		stream:   c.Chat.Completions.NewStreaming(ctx, body),
 		request:  body,
@@ -180,3 +193,62 @@ func (s *Stream) Next() bool {
 
 	return false
 }
+
+// NonStreamingWrapper wraps a single ChatCompletion response to implement stream.Stream.
+type NonStreamingWrapper struct {
+	response *openai.ChatCompletion
+	err      error
+	messages []proto.Message
+	consumed bool
+}
+
+// Next implements stream.Stream.
+func (w *NonStreamingWrapper) Next() bool {
+	if w.consumed {
+		return false
+	}
+	w.consumed = true
+	return w.err == nil && w.response != nil
+}
+
+// Current implements stream.Stream.
+func (w *NonStreamingWrapper) Current() (proto.Chunk, error) {
+	if w.err != nil {
+		return proto.Chunk{}, w.err //nolint:wrapcheck
+	}
+	if w.response == nil || len(w.response.Choices) == 0 {
+		return proto.Chunk{}, stream.ErrNoContent
+	}
+
+	// Return the complete message content as a single chunk
+	content := w.response.Choices[0].Message.Content
+
+	// Add the response to messages
+	if !w.consumed {
+		msg := w.response.Choices[0].Message.ToParam()
+		w.messages = append(w.messages, toProtoMessage(msg))
+	}
+
+	return proto.Chunk{Content: content}, nil
+}
+
+// Close implements stream.Stream.
+func (w *NonStreamingWrapper) Close() error {
+	return nil
+}
+
+// Err implements stream.Stream.
+func (w *NonStreamingWrapper) Err() error {
+	return w.err //nolint:wrapcheck
+}
+
+// Messages implements stream.Stream.
+func (w *NonStreamingWrapper) Messages() []proto.Message {
+	return w.messages
+}
+
+// CallTools implements stream.Stream.
+func (w *NonStreamingWrapper) CallTools() []proto.ToolCallStatus {
+	// Non-streaming mode doesn't support tool calls yet
+	return nil
+}
diff --git a/internal/proto/proto.go b/internal/proto/proto.go
index 93a90be6..d56b8f38 100644
--- a/internal/proto/proto.go
+++ b/internal/proto/proto.go
@@ -75,6 +75,7 @@ type Request struct {
 	Stop           []string
 	MaxTokens      *int64
 	ResponseFormat *string
+	Stream         *bool
 	ToolCaller     func(name string, data []byte) (string, error)
 }
 
diff --git a/main.go b/main.go
index 77027448..06aacd32 100644
--- a/main.go
+++ b/main.go
@@ -267,6 +267,7 @@ func initFlags() {
 	flags.BoolVarP(&config.Version, "version", "v", false, stdoutStyles().FlagDesc.Render(help["version"]))
 	flags.IntVar(&config.MaxRetries, "max-retries", config.MaxRetries, stdoutStyles().FlagDesc.Render(help["max-retries"]))
 	flags.BoolVar(&config.NoLimit, "no-limit", config.NoLimit, stdoutStyles().FlagDesc.Render(help["no-limit"]))
+	flags.BoolVar(&config.NoStream, "no-stream", config.NoStream, stdoutStyles().FlagDesc.Render(help["no-stream"]))
 	flags.Int64Var(&config.MaxTokens, "max-tokens", config.MaxTokens, stdoutStyles().FlagDesc.Render(help["max-tokens"]))
 	flags.IntVar(&config.WordWrap, "word-wrap", config.WordWrap, stdoutStyles().FlagDesc.Render(help["word-wrap"]))
 	flags.Float64Var(&config.Temperature, "temp", config.Temperature, stdoutStyles().FlagDesc.Render(help["temp"]))
diff --git a/mods.go b/mods.go
index d43f4597..e6b377d0 100644
--- a/mods.go
+++ b/mods.go
@@ -437,6 +437,10 @@ func (m *Mods) startCompletionCmd(content string) tea.Cmd {
 		if cfg.MaxTokens > 0 {
 			request.MaxTokens = &cfg.MaxTokens
 		}
+		if cfg.NoStream {
+			noStream := false
+			request.Stream = &noStream
+		}
 
 		var client stream.Client
 		switch mod.API {

From 67d2acad853a1b585220a37cc006a5c1835359e5 Mon Sep 17 00:00:00 2001
From: Greg OmniMind <gwgithub23@wierzowiecki.pl>
Date: Wed, 1 Oct 2025 23:25:52 +0200
Subject: [PATCH 2/2] Fix linting issues: remove unused nolint directives
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Remove unused //nolint:wrapcheck from line 217
* Remove unused //nolint:wrapcheck from line 242

Fixes golangci-lint nolintlint errors in CI.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 internal/openai/openai.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/openai/openai.go b/internal/openai/openai.go
index 90fefc82..b6f1cf82 100644
--- a/internal/openai/openai.go
+++ b/internal/openai/openai.go
@@ -214,7 +214,7 @@ func (w *NonStreamingWrapper) Next() bool {
 // Current implements stream.Stream.
 func (w *NonStreamingWrapper) Current() (proto.Chunk, error) {
 	if w.err != nil {
-		return proto.Chunk{}, w.err //nolint:wrapcheck
+		return proto.Chunk{}, w.err
 	}
 	if w.response == nil || len(w.response.Choices) == 0 {
 		return proto.Chunk{}, stream.ErrNoContent
@@ -239,7 +239,7 @@ func (w *NonStreamingWrapper) Close() error {
 
 // Err implements stream.Stream.
 func (w *NonStreamingWrapper) Err() error {
-	return w.err //nolint:wrapcheck
+	return w.err
 }
 
 // Messages implements stream.Stream.