AtomaAI · jorgeantonio21 · Jan 29, 2025 · Jan 29, 2025 · Jan 29, 2025 · Jan 29, 2025
diff --git a/atoma-service/docs/openapi.yml b/atoma-service/docs/openapi.yml
@@ -357,13 +357,21 @@ components:
           - boolean
           - 'null'
           description: Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.
-        max_tokens:
+        max_completion_tokens:
           type:
           - integer
           - 'null'
           format: int32
           description: An upper bound for the number of tokens that can be generated for a completion,
           minimum: 0
+        max_tokens:
+          type:
+          - integer
+          - 'null'
+          format: int32
+          description: An upper bound for the number of tokens that can be generated for a completion, currently deprecated, as per OpenAI API spec
+          deprecated: true
+          minimum: 0
         messages:
           type: array
           items:

diff --git a/atoma-service/src/handlers/chat_completions.rs b/atoma-service/src/handlers/chat_completions.rs
@@ -737,9 +737,13 @@ pub struct ChatCompletionsRequest {
     /// logprobs must be set to true if this parameter is used.
     #[serde(default, skip_serializing_if = "Option::is_none")]
     top_logprobs: Option<i32>,
-    /// An upper bound for the number of tokens that can be generated for a completion,
+    /// An upper bound for the number of tokens that can be generated for a completion, currently deprecated, as per OpenAI API spec
     #[serde(default, skip_serializing_if = "Option::is_none")]
+    #[deprecated = "Recommended to use max_completion_tokens instead"]
     max_tokens: Option<u32>,
+    /// An upper bound for the number of tokens that can be generated for a completion,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    max_completion_tokens: Option<u32>,
     /// How many chat completion choices to generate for each input message.
     #[serde(default, skip_serializing_if = "Option::is_none")]
     n: Option<usize>,

diff --git a/atoma-service/src/middleware.rs b/atoma-service/src/middleware.rs
@@ -39,9 +39,12 @@ const MAX_BODY_SIZE: usize = 1024 * 1024; // 1MB
 /// The key for the model in the request body
 const MODEL: &str = "model";
 
-/// The key for the max tokens in the request body
+/// The key for the max tokens in the request body (currently deprecated, as per OpenAI API spec)
 const MAX_TOKENS: &str = "max_tokens";
 
+/// The key for max completion tokens in the request body
+const MAX_COMPLETION_TOKENS: &str = "max_completion_tokens";
+
 /// The default value for the max tokens for chat completions
 const DEFAULT_MAX_TOKENS_CHAT_COMPLETIONS: i64 = 8192;
 
@@ -651,8 +654,8 @@ pub(crate) mod utils {
         blake2b_hash, instrument, oneshot, verify_signature, AppState, AtomaServiceError,
         ConfidentialComputeDecryptionRequest, ConfidentialComputeRequest, DecryptionMetadata,
         Engine, RequestType, TransactionDigest, Value, DEFAULT_MAX_TOKENS_CHAT_COMPLETIONS,
-        DH_PUBLIC_KEY_SIZE, IMAGE_N, IMAGE_SIZE, INPUT, MAX_TOKENS, MESSAGES, NONCE_SIZE,
-        PAYLOAD_HASH_SIZE, SALT_SIZE, STANDARD,
+        DH_PUBLIC_KEY_SIZE, IMAGE_N, IMAGE_SIZE, INPUT, MAX_COMPLETION_TOKENS, MAX_TOKENS,
+        MESSAGES, NONCE_SIZE, PAYLOAD_HASH_SIZE, SALT_SIZE, STANDARD,
     };
 
     /// Requests and verifies stack information from the blockchain for a given transaction.
@@ -892,7 +895,8 @@ pub(crate) mod utils {
         }
 
         total_num_compute_units += body_json
-            .get(MAX_TOKENS)
+            .get(MAX_COMPLETION_TOKENS)
+            .or_else(|| body_json.get(MAX_TOKENS))
             .and_then(serde_json::Value::as_i64)
             .unwrap_or(DEFAULT_MAX_TOKENS_CHAT_COMPLETIONS);