Skip to content

Commit 4158511

Browse files
authored
Merge pull request #3 from Khan/release/v1.41.2
Add Verbosity parameter to Chat Completion Request (sashabaranov#1064)
2 parents 6f645e0 + c53c997 commit 4158511

File tree

5 files changed

+187
-5
lines changed

5 files changed

+187
-5
lines changed

chat.go

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,13 +248,24 @@ func (r *ChatCompletionResponseFormatJSONSchema) UnmarshalJSON(data []byte) erro
248248
return nil
249249
}
250250

251+
// ChatCompletionRequestExtensions contains third-party OpenAI API extensions
252+
// (e.g., vendor-specific implementations like vLLM).
253+
type ChatCompletionRequestExtensions struct {
254+
// GuidedChoice is a vLLM-specific extension that restricts the model's output
255+
// to one of the predefined string choices provided in this field. This feature
256+
// is used to constrain the model's responses to a controlled set of options,
257+
// ensuring predictable and consistent outputs in scenarios where specific
258+
// choices are required.
259+
GuidedChoice []string `json:"guided_choice,omitempty"`
260+
}
261+
251262
// ChatCompletionRequest represents a request structure for chat completion API.
252263
type ChatCompletionRequest struct {
253264
Model string `json:"model"`
254265
Messages []ChatCompletionMessage `json:"messages"`
255266
// MaxTokens The maximum number of tokens that can be generated in the chat completion.
256267
// This value can be used to control costs for text generated via API.
257-
// This value is now deprecated in favor of max_completion_tokens, and is not compatible with o1 series models.
268+
// Deprecated: use MaxCompletionTokens. Not compatible with o1-series models.
258269
// refs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens
259270
MaxTokens int `json:"max_tokens,omitempty"`
260271
// MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,
@@ -309,6 +320,19 @@ type ChatCompletionRequest struct {
309320
ChatTemplateKwargs map[string]any `json:"chat_template_kwargs,omitempty"`
310321
// Specifies the latency tier to use for processing the request.
311322
ServiceTier ServiceTier `json:"service_tier,omitempty"`
323+
// Verbosity determines how many output tokens are generated. Lowering the number of
324+
// tokens reduces overall latency. It can be set to "low", "medium", or "high".
325+
// Note: This field is only confirmed to work with gpt-5, gpt-5-mini and gpt-5-nano.
326+
// Also, it is not in the API reference of chat completion at the time of writing,
327+
// though it is supported by the API.
328+
Verbosity string `json:"verbosity,omitempty"`
329+
// A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies.
330+
// The IDs should be a string that uniquely identifies each user.
331+
// We recommend hashing their username or email address, in order to avoid sending us any identifying information.
332+
// https://platform.openai.com/docs/api-reference/chat/create#chat_create-safety_identifier
333+
SafetyIdentifier string `json:"safety_identifier,omitempty"`
334+
// Embedded struct for non-OpenAI extensions
335+
ChatCompletionRequestExtensions
312336
}
313337

314338
type StreamOptions struct {

chat_test.go

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,126 @@ func TestO3ModelsChatCompletionsBetaLimitations(t *testing.T) {
331331
}
332332
}
333333

334+
func TestGPT5ModelsChatCompletionsBetaLimitations(t *testing.T) {
335+
tests := []struct {
336+
name string
337+
in openai.ChatCompletionRequest
338+
expectedError error
339+
}{
340+
{
341+
name: "log_probs_unsupported",
342+
in: openai.ChatCompletionRequest{
343+
MaxCompletionTokens: 1000,
344+
LogProbs: true,
345+
Model: openai.GPT5,
346+
},
347+
expectedError: openai.ErrReasoningModelLimitationsLogprobs,
348+
},
349+
{
350+
name: "set_temperature_unsupported",
351+
in: openai.ChatCompletionRequest{
352+
MaxCompletionTokens: 1000,
353+
Model: openai.GPT5Mini,
354+
Messages: []openai.ChatCompletionMessage{
355+
{
356+
Role: openai.ChatMessageRoleUser,
357+
},
358+
{
359+
Role: openai.ChatMessageRoleAssistant,
360+
},
361+
},
362+
Temperature: float32(2),
363+
},
364+
expectedError: openai.ErrReasoningModelLimitationsOther,
365+
},
366+
{
367+
name: "set_top_unsupported",
368+
in: openai.ChatCompletionRequest{
369+
MaxCompletionTokens: 1000,
370+
Model: openai.GPT5Nano,
371+
Messages: []openai.ChatCompletionMessage{
372+
{
373+
Role: openai.ChatMessageRoleUser,
374+
},
375+
{
376+
Role: openai.ChatMessageRoleAssistant,
377+
},
378+
},
379+
Temperature: float32(1),
380+
TopP: float32(0.1),
381+
},
382+
expectedError: openai.ErrReasoningModelLimitationsOther,
383+
},
384+
{
385+
name: "set_n_unsupported",
386+
in: openai.ChatCompletionRequest{
387+
MaxCompletionTokens: 1000,
388+
Model: openai.GPT5ChatLatest,
389+
Messages: []openai.ChatCompletionMessage{
390+
{
391+
Role: openai.ChatMessageRoleUser,
392+
},
393+
{
394+
Role: openai.ChatMessageRoleAssistant,
395+
},
396+
},
397+
Temperature: float32(1),
398+
TopP: float32(1),
399+
N: 2,
400+
},
401+
expectedError: openai.ErrReasoningModelLimitationsOther,
402+
},
403+
{
404+
name: "set_presence_penalty_unsupported",
405+
in: openai.ChatCompletionRequest{
406+
MaxCompletionTokens: 1000,
407+
Model: openai.GPT5,
408+
Messages: []openai.ChatCompletionMessage{
409+
{
410+
Role: openai.ChatMessageRoleUser,
411+
},
412+
{
413+
Role: openai.ChatMessageRoleAssistant,
414+
},
415+
},
416+
PresencePenalty: float32(0.1),
417+
},
418+
expectedError: openai.ErrReasoningModelLimitationsOther,
419+
},
420+
{
421+
name: "set_frequency_penalty_unsupported",
422+
in: openai.ChatCompletionRequest{
423+
MaxCompletionTokens: 1000,
424+
Model: openai.GPT5Mini,
425+
Messages: []openai.ChatCompletionMessage{
426+
{
427+
Role: openai.ChatMessageRoleUser,
428+
},
429+
{
430+
Role: openai.ChatMessageRoleAssistant,
431+
},
432+
},
433+
FrequencyPenalty: float32(0.1),
434+
},
435+
expectedError: openai.ErrReasoningModelLimitationsOther,
436+
},
437+
}
438+
439+
for _, tt := range tests {
440+
t.Run(tt.name, func(t *testing.T) {
441+
config := openai.DefaultConfig("whatever")
442+
config.BaseURL = "http://localhost/v1"
443+
client := openai.NewClientWithConfig(config)
444+
ctx := context.Background()
445+
446+
_, err := client.CreateChatCompletion(ctx, tt.in)
447+
checks.HasError(t, err)
448+
msg := fmt.Sprintf("CreateChatCompletion should return wrong model error, returned: %s", err)
449+
checks.ErrorIs(t, err, tt.expectedError, msg)
450+
})
451+
}
452+
}
453+
334454
func TestChatRequestOmitEmpty(t *testing.T) {
335455
data, err := json.Marshal(openai.ChatCompletionRequest{
336456
// We set model b/c it's required, so omitempty doesn't make sense

completion.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ const (
4949
GPT4Dot1Nano20250414 = "gpt-4.1-nano-2025-04-14"
5050
GPT4Dot5Preview = "gpt-4.5-preview"
5151
GPT4Dot5Preview20250227 = "gpt-4.5-preview-2025-02-27"
52+
GPT5 = "gpt-5"
53+
GPT5Mini = "gpt-5-mini"
54+
GPT5Nano = "gpt-5-nano"
55+
GPT5ChatLatest = "gpt-5-chat-latest"
5256
GPT3Dot5Turbo0125 = "gpt-3.5-turbo-0125"
5357
GPT3Dot5Turbo1106 = "gpt-3.5-turbo-1106"
5458
GPT3Dot5Turbo0613 = "gpt-3.5-turbo-0613"
@@ -142,6 +146,10 @@ var disabledModelsForEndpoints = map[string]map[string]bool{
142146
GPT4Dot1Mini20250414: true,
143147
GPT4Dot1Nano: true,
144148
GPT4Dot1Nano20250414: true,
149+
GPT5: true,
150+
GPT5Mini: true,
151+
GPT5Nano: true,
152+
GPT5ChatLatest: true,
145153
},
146154
chatCompletionsSuffix: {
147155
CodexCodeDavinci002: true,

completion_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,3 +300,32 @@ func TestCompletionWithGPT4oModels(t *testing.T) {
300300
})
301301
}
302302
}
303+
304+
// TestCompletionWithGPT5Models Tests that GPT5 models are not supported for completion endpoint.
305+
func TestCompletionWithGPT5Models(t *testing.T) {
306+
config := openai.DefaultConfig("whatever")
307+
config.BaseURL = "http://localhost/v1"
308+
client := openai.NewClientWithConfig(config)
309+
310+
models := []string{
311+
openai.GPT5,
312+
openai.GPT5Mini,
313+
openai.GPT5Nano,
314+
openai.GPT5ChatLatest,
315+
}
316+
317+
for _, model := range models {
318+
t.Run(model, func(t *testing.T) {
319+
_, err := client.CreateCompletion(
320+
context.Background(),
321+
openai.CompletionRequest{
322+
MaxTokens: 5,
323+
Model: model,
324+
},
325+
)
326+
if !errors.Is(err, openai.ErrCompletionUnsupportedModel) {
327+
t.Fatalf("CreateCompletion should return ErrCompletionUnsupportedModel for %s model, but returned: %v", model, err)
328+
}
329+
})
330+
}
331+
}

reasoning_validator.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,22 @@ var (
2828
ErrReasoningModelLimitationsOther = errors.New("this model has beta-limitations, temperature, top_p and n are fixed at 1, while presence_penalty and frequency_penalty are fixed at 0") //nolint:lll
2929
)
3030

31-
// ReasoningValidator handles validation for o-series model requests.
31+
// ReasoningValidator handles validation for reasoning model requests.
3232
type ReasoningValidator struct{}
3333

34-
// NewReasoningValidator creates a new validator for o-series models.
34+
// NewReasoningValidator creates a new validator for reasoning models.
3535
func NewReasoningValidator() *ReasoningValidator {
3636
return &ReasoningValidator{}
3737
}
3838

39-
// Validate performs all validation checks for o-series models.
39+
// Validate performs all validation checks for reasoning models.
4040
func (v *ReasoningValidator) Validate(request ChatCompletionRequest) error {
4141
o1Series := strings.HasPrefix(request.Model, "o1")
4242
o3Series := strings.HasPrefix(request.Model, "o3")
4343
o4Series := strings.HasPrefix(request.Model, "o4")
44+
gpt5Series := strings.HasPrefix(request.Model, "gpt-5")
4445

45-
if !o1Series && !o3Series && !o4Series {
46+
if !o1Series && !o3Series && !o4Series && !gpt5Series {
4647
return nil
4748
}
4849

0 commit comments

Comments
 (0)