From 453918e2521faf7cdf515ad229f4c72a468f65f3 Mon Sep 17 00:00:00 2001 From: Greg OmniMind Date: Wed, 1 Oct 2025 22:17:46 +0200 Subject: [PATCH 1/2] Add --no-stream flag for reasoning models (issue #430) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds a minimal --no-stream CLI flag to disable response streaming, which resolves issues with reasoning models (o1, o3, o3-pro) that require organization verification or don't support streaming on certain platforms. ## Changes 1. **config.go**: Add `NoStream bool` field to Config struct 2. **config.go**: Add help text for --no-stream flag 3. **main.go**: Add CLI flag registration for --no-stream 4. **proto.go**: Add `Stream *bool` field to Request struct 5. **mods.go**: Pass NoStream config to request 6. **openai.go**: Implement conditional streaming logic 7. **openai.go**: Add NonStreamingWrapper to handle non-streaming responses ## Implementation Details - Default behavior: streaming enabled (backward compatible) - With --no-stream: uses Chat Completion API instead of streaming API - NonStreamingWrapper implements stream.Stream interface for consistency - Tested with gpt-4o-mini (both streaming and non-streaming modes work) ## Scope This PR implements ONLY the CLI flag. Per-model configuration in config_template.yml is intentionally left for future work to keep this change minimal and focused. ## Testing ```bash # Non-streaming mode (works with reasoning models) echo "test" | ./mods -m o3 --no-stream # Streaming mode still works (default) echo "test" | ./mods -m gpt-4o ``` Fixes #430 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- config.go | 2 ++ internal/openai/openai.go | 72 +++++++++++++++++++++++++++++++++++++++ internal/proto/proto.go | 1 + main.go | 1 + mods.go | 4 +++ 5 files changed, 80 insertions(+) diff --git a/config.go b/config.go index 9f0abfe2..29adc0d2 100644 --- a/config.go +++ b/config.go @@ -44,6 +44,7 @@ var help = map[string]string{ "prompt-args": "Include the prompt from the arguments in the response", "raw": "Render output as raw text when connected to a TTY", "quiet": "Quiet mode (hide the spinner while loading and stderr messages for success)", + "no-stream": "Disable streaming of responses (useful for reasoning models without streaming support)", "help": "Show help and exit", "version": "Show version and exit", "max-retries": "Maximum number of times to retry API calls", @@ -145,6 +146,7 @@ type Config struct { FormatAs string `yaml:"format-as" env:"FORMAT_AS"` Raw bool `yaml:"raw" env:"RAW"` Quiet bool `yaml:"quiet" env:"QUIET"` + NoStream bool `yaml:"no-stream" env:"NO_STREAM"` MaxTokens int64 `yaml:"max-tokens" env:"MAX_TOKENS"` MaxCompletionTokens int64 `yaml:"max-completion-tokens" env:"MAX_COMPLETION_TOKENS"` MaxInputChars int64 `yaml:"max-input-chars" env:"MAX_INPUT_CHARS"` diff --git a/internal/openai/openai.go b/internal/openai/openai.go index 47c498af..90fefc82 100644 --- a/internal/openai/openai.go +++ b/internal/openai/openai.go @@ -93,6 +93,19 @@ func (c *Client) Request(ctx context.Context, request proto.Request) stream.Stre } } + // Check if streaming is disabled + useStreaming := request.Stream == nil || *request.Stream + if !useStreaming { + // Non-streaming mode: use regular Chat Completion API + resp, err := c.Chat.Completions.New(ctx, body) + return &NonStreamingWrapper{ + response: resp, + err: err, + messages: request.Messages, + } + } + + // Streaming mode (default) s := &Stream{ stream: c.Chat.Completions.NewStreaming(ctx, body), request: body, @@ -180,3 +193,62 @@ func (s *Stream) Next() bool { return false } + +// NonStreamingWrapper wraps a single ChatCompletion response to implement stream.Stream. +type NonStreamingWrapper struct { + response *openai.ChatCompletion + err error + messages []proto.Message + consumed bool +} + +// Next implements stream.Stream. +func (w *NonStreamingWrapper) Next() bool { + if w.consumed { + return false + } + w.consumed = true + return w.err == nil && w.response != nil +} + +// Current implements stream.Stream. +func (w *NonStreamingWrapper) Current() (proto.Chunk, error) { + if w.err != nil { + return proto.Chunk{}, w.err //nolint:wrapcheck + } + if w.response == nil || len(w.response.Choices) == 0 { + return proto.Chunk{}, stream.ErrNoContent + } + + // Return the complete message content as a single chunk + content := w.response.Choices[0].Message.Content + + // Add the response to messages + if !w.consumed { + msg := w.response.Choices[0].Message.ToParam() + w.messages = append(w.messages, toProtoMessage(msg)) + } + + return proto.Chunk{Content: content}, nil +} + +// Close implements stream.Stream. +func (w *NonStreamingWrapper) Close() error { + return nil +} + +// Err implements stream.Stream. +func (w *NonStreamingWrapper) Err() error { + return w.err //nolint:wrapcheck +} + +// Messages implements stream.Stream. +func (w *NonStreamingWrapper) Messages() []proto.Message { + return w.messages +} + +// CallTools implements stream.Stream. +func (w *NonStreamingWrapper) CallTools() []proto.ToolCallStatus { + // Non-streaming mode doesn't support tool calls yet + return nil +} diff --git a/internal/proto/proto.go b/internal/proto/proto.go index 93a90be6..d56b8f38 100644 --- a/internal/proto/proto.go +++ b/internal/proto/proto.go @@ -75,6 +75,7 @@ type Request struct { Stop []string MaxTokens *int64 ResponseFormat *string + Stream *bool ToolCaller func(name string, data []byte) (string, error) } diff --git a/main.go b/main.go index 77027448..06aacd32 100644 --- a/main.go +++ b/main.go @@ -267,6 +267,7 @@ func initFlags() { flags.BoolVarP(&config.Version, "version", "v", false, stdoutStyles().FlagDesc.Render(help["version"])) flags.IntVar(&config.MaxRetries, "max-retries", config.MaxRetries, stdoutStyles().FlagDesc.Render(help["max-retries"])) flags.BoolVar(&config.NoLimit, "no-limit", config.NoLimit, stdoutStyles().FlagDesc.Render(help["no-limit"])) + flags.BoolVar(&config.NoStream, "no-stream", config.NoStream, stdoutStyles().FlagDesc.Render(help["no-stream"])) flags.Int64Var(&config.MaxTokens, "max-tokens", config.MaxTokens, stdoutStyles().FlagDesc.Render(help["max-tokens"])) flags.IntVar(&config.WordWrap, "word-wrap", config.WordWrap, stdoutStyles().FlagDesc.Render(help["word-wrap"])) flags.Float64Var(&config.Temperature, "temp", config.Temperature, stdoutStyles().FlagDesc.Render(help["temp"])) diff --git a/mods.go b/mods.go index d43f4597..e6b377d0 100644 --- a/mods.go +++ b/mods.go @@ -437,6 +437,10 @@ func (m *Mods) startCompletionCmd(content string) tea.Cmd { if cfg.MaxTokens > 0 { request.MaxTokens = &cfg.MaxTokens } + if cfg.NoStream { + noStream := false + request.Stream = &noStream + } var client stream.Client switch mod.API { From 67d2acad853a1b585220a37cc006a5c1835359e5 Mon Sep 17 00:00:00 2001 From: Greg OmniMind Date: Wed, 1 Oct 2025 23:25:52 +0200 Subject: [PATCH 2/2] Fix linting issues: remove unused nolint directives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Remove unused //nolint:wrapcheck from line 217 * Remove unused //nolint:wrapcheck from line 242 Fixes golangci-lint nolintlint errors in CI. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- internal/openai/openai.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/openai/openai.go b/internal/openai/openai.go index 90fefc82..b6f1cf82 100644 --- a/internal/openai/openai.go +++ b/internal/openai/openai.go @@ -214,7 +214,7 @@ func (w *NonStreamingWrapper) Next() bool { // Current implements stream.Stream. func (w *NonStreamingWrapper) Current() (proto.Chunk, error) { if w.err != nil { - return proto.Chunk{}, w.err //nolint:wrapcheck + return proto.Chunk{}, w.err } if w.response == nil || len(w.response.Choices) == 0 { return proto.Chunk{}, stream.ErrNoContent @@ -239,7 +239,7 @@ func (w *NonStreamingWrapper) Close() error { // Err implements stream.Stream. func (w *NonStreamingWrapper) Err() error { - return w.err //nolint:wrapcheck + return w.err } // Messages implements stream.Stream.