Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion atoma-service/docs/openapi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -357,13 +357,21 @@ components:
- boolean
- 'null'
description: Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.
max_tokens:
max_completion_tokens:
type:
- integer
- 'null'
format: int32
description: An upper bound for the number of tokens that can be generated for a completion,
minimum: 0
max_tokens:
type:
- integer
- 'null'
format: int32
description: An upper bound for the number of tokens that can be generated for a completion, currently deprecated, as per OpenAI API spec
deprecated: true
minimum: 0
messages:
type: array
items:
Expand Down
6 changes: 5 additions & 1 deletion atoma-service/src/handlers/chat_completions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -737,9 +737,13 @@ pub struct ChatCompletionsRequest {
/// logprobs must be set to true if this parameter is used.
#[serde(default, skip_serializing_if = "Option::is_none")]
top_logprobs: Option<i32>,
/// An upper bound for the number of tokens that can be generated for a completion,
/// An upper bound for the number of tokens that can be generated for a completion, currently deprecated, as per OpenAI API spec
#[serde(default, skip_serializing_if = "Option::is_none")]
#[deprecated = "Recommended to use max_completion_tokens instead"]
max_tokens: Option<u32>,
/// An upper bound for the number of tokens that can be generated for a completion,
#[serde(default, skip_serializing_if = "Option::is_none")]
max_completion_tokens: Option<u32>,
/// How many chat completion choices to generate for each input message.
#[serde(default, skip_serializing_if = "Option::is_none")]
n: Option<usize>,
Expand Down
12 changes: 8 additions & 4 deletions atoma-service/src/middleware.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,12 @@ const MAX_BODY_SIZE: usize = 1024 * 1024; // 1MB
/// The key for the model in the request body
const MODEL: &str = "model";

/// The key for the max tokens in the request body
/// The key for the max tokens in the request body (currently deprecated, as per OpenAI API spec)
const MAX_TOKENS: &str = "max_tokens";

/// The key for max completion tokens in the request body
const MAX_COMPLETION_TOKENS: &str = "max_completion_tokens";

/// The default value for the max tokens for chat completions
const DEFAULT_MAX_TOKENS_CHAT_COMPLETIONS: i64 = 8192;

Expand Down Expand Up @@ -651,8 +654,8 @@ pub(crate) mod utils {
blake2b_hash, instrument, oneshot, verify_signature, AppState, AtomaServiceError,
ConfidentialComputeDecryptionRequest, ConfidentialComputeRequest, DecryptionMetadata,
Engine, RequestType, TransactionDigest, Value, DEFAULT_MAX_TOKENS_CHAT_COMPLETIONS,
DH_PUBLIC_KEY_SIZE, IMAGE_N, IMAGE_SIZE, INPUT, MAX_TOKENS, MESSAGES, NONCE_SIZE,
PAYLOAD_HASH_SIZE, SALT_SIZE, STANDARD,
DH_PUBLIC_KEY_SIZE, IMAGE_N, IMAGE_SIZE, INPUT, MAX_COMPLETION_TOKENS, MAX_TOKENS,
MESSAGES, NONCE_SIZE, PAYLOAD_HASH_SIZE, SALT_SIZE, STANDARD,
};

/// Requests and verifies stack information from the blockchain for a given transaction.
Expand Down Expand Up @@ -892,7 +895,8 @@ pub(crate) mod utils {
}

total_num_compute_units += body_json
.get(MAX_TOKENS)
.get(MAX_COMPLETION_TOKENS)
.or_else(|| body_json.get(MAX_TOKENS))
.and_then(serde_json::Value::as_i64)
.unwrap_or(DEFAULT_MAX_TOKENS_CHAT_COMPLETIONS);

Expand Down
Loading