diff --git a/atoma-service/docs/openapi.yml b/atoma-service/docs/openapi.yml index a99cb0e6..a58c1c6b 100644 --- a/atoma-service/docs/openapi.yml +++ b/atoma-service/docs/openapi.yml @@ -357,13 +357,21 @@ components: - boolean - 'null' description: Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message. - max_tokens: + max_completion_tokens: type: - integer - 'null' format: int32 description: An upper bound for the number of tokens that can be generated for a completion, minimum: 0 + max_tokens: + type: + - integer + - 'null' + format: int32 + description: An upper bound for the number of tokens that can be generated for a completion, currently deprecated, as per OpenAI API spec + deprecated: true + minimum: 0 messages: type: array items: diff --git a/atoma-service/src/handlers/chat_completions.rs b/atoma-service/src/handlers/chat_completions.rs index a97ed236..06f4e5a7 100644 --- a/atoma-service/src/handlers/chat_completions.rs +++ b/atoma-service/src/handlers/chat_completions.rs @@ -737,9 +737,13 @@ pub struct ChatCompletionsRequest { /// logprobs must be set to true if this parameter is used. #[serde(default, skip_serializing_if = "Option::is_none")] top_logprobs: Option, - /// An upper bound for the number of tokens that can be generated for a completion, + /// An upper bound for the number of tokens that can be generated for a completion, currently deprecated, as per OpenAI API spec #[serde(default, skip_serializing_if = "Option::is_none")] + #[deprecated = "Recommended to use max_completion_tokens instead"] max_tokens: Option, + /// An upper bound for the number of tokens that can be generated for a completion, + #[serde(default, skip_serializing_if = "Option::is_none")] + max_completion_tokens: Option, /// How many chat completion choices to generate for each input message. #[serde(default, skip_serializing_if = "Option::is_none")] n: Option, diff --git a/atoma-service/src/middleware.rs b/atoma-service/src/middleware.rs index aecc6a67..e31da51d 100644 --- a/atoma-service/src/middleware.rs +++ b/atoma-service/src/middleware.rs @@ -39,9 +39,12 @@ const MAX_BODY_SIZE: usize = 1024 * 1024; // 1MB /// The key for the model in the request body const MODEL: &str = "model"; -/// The key for the max tokens in the request body +/// The key for the max tokens in the request body (currently deprecated, as per OpenAI API spec) const MAX_TOKENS: &str = "max_tokens"; +/// The key for max completion tokens in the request body +const MAX_COMPLETION_TOKENS: &str = "max_completion_tokens"; + /// The default value for the max tokens for chat completions const DEFAULT_MAX_TOKENS_CHAT_COMPLETIONS: i64 = 8192; @@ -651,8 +654,8 @@ pub(crate) mod utils { blake2b_hash, instrument, oneshot, verify_signature, AppState, AtomaServiceError, ConfidentialComputeDecryptionRequest, ConfidentialComputeRequest, DecryptionMetadata, Engine, RequestType, TransactionDigest, Value, DEFAULT_MAX_TOKENS_CHAT_COMPLETIONS, - DH_PUBLIC_KEY_SIZE, IMAGE_N, IMAGE_SIZE, INPUT, MAX_TOKENS, MESSAGES, NONCE_SIZE, - PAYLOAD_HASH_SIZE, SALT_SIZE, STANDARD, + DH_PUBLIC_KEY_SIZE, IMAGE_N, IMAGE_SIZE, INPUT, MAX_COMPLETION_TOKENS, MAX_TOKENS, + MESSAGES, NONCE_SIZE, PAYLOAD_HASH_SIZE, SALT_SIZE, STANDARD, }; /// Requests and verifies stack information from the blockchain for a given transaction. @@ -892,7 +895,8 @@ pub(crate) mod utils { } total_num_compute_units += body_json - .get(MAX_TOKENS) + .get(MAX_COMPLETION_TOKENS) + .or_else(|| body_json.get(MAX_TOKENS)) .and_then(serde_json::Value::as_i64) .unwrap_or(DEFAULT_MAX_TOKENS_CHAT_COMPLETIONS);