Merge pull request #3 from Khan/release/v1.41.2

tudeshi · web-flow · commit 4158511279ed · 2025-10-01T16:52:55.000-06:00
Add Verbosity parameter to Chat Completion Request (sashabaranov#1064)
diff --git a/chat.go b/chat.go
@@ -248,13 +248,24 @@ func (r *ChatCompletionResponseFormatJSONSchema) UnmarshalJSON(data []byte) erro
 	return nil
 }
 
+// ChatCompletionRequestExtensions contains third-party OpenAI API extensions
+// (e.g., vendor-specific implementations like vLLM).
+type ChatCompletionRequestExtensions struct {
+	// GuidedChoice is a vLLM-specific extension that restricts the model's output
+	// to one of the predefined string choices provided in this field. This feature
+	// is used to constrain the model's responses to a controlled set of options,
+	// ensuring predictable and consistent outputs in scenarios where specific
+	// choices are required.
+	GuidedChoice []string `json:"guided_choice,omitempty"`
+}
+
 // ChatCompletionRequest represents a request structure for chat completion API.
 type ChatCompletionRequest struct {
 	Model    string                  `json:"model"`
 	Messages []ChatCompletionMessage `json:"messages"`
 	// MaxTokens The maximum number of tokens that can be generated in the chat completion.
 	// This value can be used to control costs for text generated via API.
-	// This value is now deprecated in favor of max_completion_tokens, and is not compatible with o1 series models.
+	// Deprecated: use MaxCompletionTokens. Not compatible with o1-series models.
 	// refs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens
 	MaxTokens int `json:"max_tokens,omitempty"`
 	// MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,
@@ -309,6 +320,19 @@ type ChatCompletionRequest struct {
 	ChatTemplateKwargs map[string]any `json:"chat_template_kwargs,omitempty"`
 	// Specifies the latency tier to use for processing the request.
 	ServiceTier ServiceTier `json:"service_tier,omitempty"`
+	// Verbosity determines how many output tokens are generated. Lowering the number of
+	// tokens reduces overall latency. It can be set to "low", "medium", or "high".
+	// Note: This field is only confirmed to work with gpt-5, gpt-5-mini and gpt-5-nano.
+	// Also, it is not in the API reference of chat completion at the time of writing,
+	// though it is supported by the API.
+	Verbosity string `json:"verbosity,omitempty"`
+	// A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies.
+	// The IDs should be a string that uniquely identifies each user.
+	// We recommend hashing their username or email address, in order to avoid sending us any identifying information.
+	// https://platform.openai.com/docs/api-reference/chat/create#chat_create-safety_identifier
+	SafetyIdentifier string `json:"safety_identifier,omitempty"`
+	// Embedded struct for non-OpenAI extensions
+	ChatCompletionRequestExtensions
 }
 
 type StreamOptions struct {
diff --git a/chat_test.go b/chat_test.go
@@ -331,6 +331,126 @@ func TestO3ModelsChatCompletionsBetaLimitations(t *testing.T) {
 	}
 }
 
+func TestGPT5ModelsChatCompletionsBetaLimitations(t *testing.T) {
+	tests := []struct {
+		name          string
+		in            openai.ChatCompletionRequest
+		expectedError error
+	}{
+		{
+			name: "log_probs_unsupported",
+			in: openai.ChatCompletionRequest{
+				MaxCompletionTokens: 1000,
+				LogProbs:            true,
+				Model:               openai.GPT5,
+			},
+			expectedError: openai.ErrReasoningModelLimitationsLogprobs,
+		},
+		{
+			name: "set_temperature_unsupported",
+			in: openai.ChatCompletionRequest{
+				MaxCompletionTokens: 1000,
+				Model:               openai.GPT5Mini,
+				Messages: []openai.ChatCompletionMessage{
+					{
+						Role: openai.ChatMessageRoleUser,
+					},
+					{
+						Role: openai.ChatMessageRoleAssistant,
+					},
+				},
+				Temperature: float32(2),
+			},
+			expectedError: openai.ErrReasoningModelLimitationsOther,
+		},
+		{
+			name: "set_top_unsupported",
+			in: openai.ChatCompletionRequest{
+				MaxCompletionTokens: 1000,
+				Model:               openai.GPT5Nano,
+				Messages: []openai.ChatCompletionMessage{
+					{
+						Role: openai.ChatMessageRoleUser,
+					},
+					{
+						Role: openai.ChatMessageRoleAssistant,
+					},
+				},
+				Temperature: float32(1),
+				TopP:        float32(0.1),
+			},
+			expectedError: openai.ErrReasoningModelLimitationsOther,
+		},
+		{
+			name: "set_n_unsupported",
+			in: openai.ChatCompletionRequest{
+				MaxCompletionTokens: 1000,
+				Model:               openai.GPT5ChatLatest,
+				Messages: []openai.ChatCompletionMessage{
+					{
+						Role: openai.ChatMessageRoleUser,
+					},
+					{
+						Role: openai.ChatMessageRoleAssistant,
+					},
+				},
+				Temperature: float32(1),
+				TopP:        float32(1),
+				N:           2,
+			},
+			expectedError: openai.ErrReasoningModelLimitationsOther,
+		},
+		{
+			name: "set_presence_penalty_unsupported",
+			in: openai.ChatCompletionRequest{
+				MaxCompletionTokens: 1000,
+				Model:               openai.GPT5,
+				Messages: []openai.ChatCompletionMessage{
+					{
+						Role: openai.ChatMessageRoleUser,
+					},
+					{
+						Role: openai.ChatMessageRoleAssistant,
+					},
+				},
+				PresencePenalty: float32(0.1),
+			},
+			expectedError: openai.ErrReasoningModelLimitationsOther,
+		},
+		{
+			name: "set_frequency_penalty_unsupported",
+			in: openai.ChatCompletionRequest{
+				MaxCompletionTokens: 1000,
+				Model:               openai.GPT5Mini,
+				Messages: []openai.ChatCompletionMessage{
+					{
+						Role: openai.ChatMessageRoleUser,
+					},
+					{
+						Role: openai.ChatMessageRoleAssistant,
+					},
+				},
+				FrequencyPenalty: float32(0.1),
+			},
+			expectedError: openai.ErrReasoningModelLimitationsOther,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			config := openai.DefaultConfig("whatever")
+			config.BaseURL = "http://localhost/v1"
+			client := openai.NewClientWithConfig(config)
+			ctx := context.Background()
+
+			_, err := client.CreateChatCompletion(ctx, tt.in)
+			checks.HasError(t, err)
+			msg := fmt.Sprintf("CreateChatCompletion should return wrong model error, returned: %s", err)
+			checks.ErrorIs(t, err, tt.expectedError, msg)
+		})
+	}
+}
+
 func TestChatRequestOmitEmpty(t *testing.T) {
 	data, err := json.Marshal(openai.ChatCompletionRequest{
 		// We set model b/c it's required, so omitempty doesn't make sense
diff --git a/completion.go b/completion.go
@@ -49,6 +49,10 @@ const (
 	GPT4Dot1Nano20250414    = "gpt-4.1-nano-2025-04-14"
 	GPT4Dot5Preview         = "gpt-4.5-preview"
 	GPT4Dot5Preview20250227 = "gpt-4.5-preview-2025-02-27"
+	GPT5                    = "gpt-5"
+	GPT5Mini                = "gpt-5-mini"
+	GPT5Nano                = "gpt-5-nano"
+	GPT5ChatLatest          = "gpt-5-chat-latest"
 	GPT3Dot5Turbo0125       = "gpt-3.5-turbo-0125"
 	GPT3Dot5Turbo1106       = "gpt-3.5-turbo-1106"
 	GPT3Dot5Turbo0613       = "gpt-3.5-turbo-0613"
@@ -142,6 +146,10 @@ var disabledModelsForEndpoints = map[string]map[string]bool{
 		GPT4Dot1Mini20250414:    true,
 		GPT4Dot1Nano:            true,
 		GPT4Dot1Nano20250414:    true,
+		GPT5:                    true,
+		GPT5Mini:                true,
+		GPT5Nano:                true,
+		GPT5ChatLatest:          true,
 	},
 	chatCompletionsSuffix: {
 		CodexCodeDavinci002:     true,
diff --git a/completion_test.go b/completion_test.go
@@ -300,3 +300,32 @@ func TestCompletionWithGPT4oModels(t *testing.T) {
 		})
 	}
 }
+
+// TestCompletionWithGPT5Models Tests that GPT5 models are not supported for completion endpoint.
+func TestCompletionWithGPT5Models(t *testing.T) {
+	config := openai.DefaultConfig("whatever")
+	config.BaseURL = "http://localhost/v1"
+	client := openai.NewClientWithConfig(config)
+
+	models := []string{
+		openai.GPT5,
+		openai.GPT5Mini,
+		openai.GPT5Nano,
+		openai.GPT5ChatLatest,
+	}
+
+	for _, model := range models {
+		t.Run(model, func(t *testing.T) {
+			_, err := client.CreateCompletion(
+				context.Background(),
+				openai.CompletionRequest{
+					MaxTokens: 5,
+					Model:     model,
+				},
+			)
+			if !errors.Is(err, openai.ErrCompletionUnsupportedModel) {
+				t.Fatalf("CreateCompletion should return ErrCompletionUnsupportedModel for %s model, but returned: %v", model, err)
+			}
+		})
+	}
+}
diff --git a/reasoning_validator.go b/reasoning_validator.go
@@ -28,21 +28,22 @@ var (
 	ErrReasoningModelLimitationsOther    = errors.New("this model has beta-limitations, temperature, top_p and n are fixed at 1, while presence_penalty and frequency_penalty are fixed at 0") //nolint:lll
 )
 
-// ReasoningValidator handles validation for o-series model requests.
+// ReasoningValidator handles validation for reasoning model requests.
 type ReasoningValidator struct{}
 
-// NewReasoningValidator creates a new validator for o-series models.
+// NewReasoningValidator creates a new validator for reasoning models.
 func NewReasoningValidator() *ReasoningValidator {
 	return &ReasoningValidator{}
 }
 
-// Validate performs all validation checks for o-series models.
+// Validate performs all validation checks for reasoning models.
 func (v *ReasoningValidator) Validate(request ChatCompletionRequest) error {
 	o1Series := strings.HasPrefix(request.Model, "o1")
 	o3Series := strings.HasPrefix(request.Model, "o3")
 	o4Series := strings.HasPrefix(request.Model, "o4")
+	gpt5Series := strings.HasPrefix(request.Model, "gpt-5")
 
-	if !o1Series && !o3Series && !o4Series {
+	if !o1Series && !o3Series && !o4Series && !gpt5Series {
 		return nil
 	}