truefoundry · hganwani-droid · Mar 3, 2026
diff --git a/providers/xai/grok-2-image-1212.yaml b/providers/xai/grok-2-image-1212.yaml
@@ -8,5 +8,6 @@ costs:
     - input_cost_per_token: 0
       output_cost_per_image: 0.07
       region: "*"
+isDeprecated: true
 mode: image
 model: grok-2-image-1212
diff --git a/providers/xai/grok-2-image-latest.yaml b/providers/xai/grok-2-image-latest.yaml
@@ -8,5 +8,6 @@ costs:
     - input_cost_per_token: 0
       output_cost_per_image: 0.07
       region: "*"
+isDeprecated: true
 mode: image
 model: grok-2-image-latest
diff --git a/providers/xai/grok-2-image.yaml b/providers/xai/grok-2-image.yaml
@@ -8,5 +8,6 @@ costs:
     - input_cost_per_token: 0
       output_cost_per_image: 0.07
       region: "*"
+isDeprecated: true
 mode: image
 model: grok-2-image
diff --git a/providers/xai/grok-2-vision-1212.yaml b/providers/xai/grok-2-vision-1212.yaml
@@ -1,29 +1,23 @@
 costs:
-    - input_cost_per_image: 0.000002
-      input_cost_per_token: 0.000002
-      output_cost_per_token: 0.00001
-      region: us-east-1
-    - input_cost_per_image: 0.000002
-      input_cost_per_token: 0.000002
-      output_cost_per_token: 0.00001
-      region: eu-west-1
-    - cache_read_input_token_cost: 0
-      input_cost_per_image: 0.000002
-      input_cost_per_query: 0
-      input_cost_per_token: 0.000002
+    - input_cost_per_token: 0.000002
       output_cost_per_token: 0.00001
       region: "*"
 features:
     - function_calling
     - vision
+    - image_input
     - chat
-    - image
     - tool_choice
+    - response_schema
 limits:
+    context_window: 32768
     max_input_tokens: 32768
     max_output_tokens: 32768
 mode: chat
 model: grok-2-vision-1212
 params:
     - key: max_tokens
       maxValue: 4096
+sources:
+    - https://docs.x.ai/developers/release-notes
+    - https://docs.x.ai/docs/key-information/migrating-to-new-models
diff --git a/providers/xai/grok-2-vision-latest.yaml b/providers/xai/grok-2-vision-latest.yaml
@@ -18,6 +18,7 @@ features:
     - vision
     - chat
     - tool_choice
+isDeprecated: true
 limits:
     max_input_tokens: 32768
     max_output_tokens: 32768

diff --git a/providers/xai/grok-2-vision.yaml b/providers/xai/grok-2-vision.yaml
@@ -18,6 +18,7 @@ features:
     - vision
     - chat
     - tool_choice
+isDeprecated: true
 limits:
     max_input_tokens: 32768
     max_output_tokens: 32768

diff --git a/providers/xai/grok-3-beta.yaml b/providers/xai/grok-3-beta.yaml
@@ -1,24 +1,21 @@
 costs:
     - cache_read_input_token_cost: 7.5e-7
       input_cost_per_token: 0.000003
+      input_cost_per_token_batches: 0.0000015
       output_cost_per_token: 0.000015
-      region: us-east-1
-    - cache_read_input_token_cost: 7.5e-7
-      input_cost_per_token: 0.000003
-      output_cost_per_token: 0.000015
-      region: eu-west-1
-    - cache_read_input_token_cost: 7.5e-7
-      input_cost_per_image: 0
-      input_cost_per_query: 0.025
-      input_cost_per_token: 0.000003
-      output_cost_per_token: 0.000015
+      output_cost_per_token_batches: 0.0000075
       region: "*"
 features:
     - function_calling
     - chat
     - tool_choice
+    - response_schema
+    - system_messages
+    - prompt_caching
 limits:
-    max_input_tokens: 131072
-    max_output_tokens: 131072
+    context_window: 131072
 mode: chat
 model: grok-3-beta
+sources:
+    - https://docs.x.ai/developers/release-notes
+    - https://docs.x.ai/developers/rest-api-reference/inference/chat
diff --git a/providers/xai/grok-3-fast-beta.yaml b/providers/xai/grok-3-fast-beta.yaml
@@ -17,6 +17,7 @@ features:
     - function_calling
     - chat
     - tool_choice
+isDeprecated: true
 limits:
     max_input_tokens: 131072
     max_output_tokens: 131072

diff --git a/providers/xai/grok-3-fast-latest.yaml b/providers/xai/grok-3-fast-latest.yaml
@@ -1,24 +1,25 @@
 costs:
     - cache_read_input_token_cost: 7.5e-7
-      input_cost_per_token: 0.000003
-      output_cost_per_token: 0.000015
-      region: us-east-1
-    - cache_read_input_token_cost: 7.5e-7
-      input_cost_per_token: 0.000003
-      output_cost_per_token: 0.000015
-      region: eu-west-1
-    - cache_read_input_token_cost: 7.5e-7
-      input_cost_per_image: 0
       input_cost_per_query: 0.025
       input_cost_per_token: 0.000003
+      input_cost_per_token_batches: 0.0000015
       output_cost_per_token: 0.000015
+      output_cost_per_token_batches: 0.0000075
       region: "*"
 features:
     - function_calling
     - chat
     - tool_choice
+    - response_schema
+    - prompt_caching
 limits:
+    context_window: 131072
     max_input_tokens: 131072
     max_output_tokens: 131072
+    max_tokens: 131072
 mode: chat
 model: grok-3-fast-latest
+sources:
+    - https://docs.x.ai/docs/guides/reasoning
+    - https://docs.x.ai/docs/models
+    - https://docs.oracle.com/en-us/iaas/Content/generative-ai/xai-grok-3-fast.htm
diff --git a/providers/xai/grok-3-fast.yaml b/providers/xai/grok-3-fast.yaml
@@ -1,24 +1,24 @@
 costs:
     - cache_read_input_token_cost: 7.5e-7
-      input_cost_per_token: 0.000003
-      output_cost_per_token: 0.000015
-      region: us-east-1
-    - cache_read_input_token_cost: 7.5e-7
-      input_cost_per_token: 0.000003
-      output_cost_per_token: 0.000015
-      region: eu-west-1
-    - cache_read_input_token_cost: 7.5e-7
-      input_cost_per_image: 0
       input_cost_per_query: 0.025
       input_cost_per_token: 0.000003
+      input_cost_per_token_batches: 0.0000015
       output_cost_per_token: 0.000015
+      output_cost_per_token_batches: 0.0000075
       region: "*"
 features:
     - function_calling
     - chat
     - tool_choice
+    - response_schema
+    - prompt_caching
 limits:
+    context_window: 131072
     max_input_tokens: 131072
     max_output_tokens: 131072
 mode: chat
 model: grok-3-fast
+sources:
+    - https://docs.x.ai/developers/release-notes
+    - https://docs.x.ai/docs/guides/reasoning
+    - https://docs.oracle.com/en-us/iaas/Content/generative-ai/xai-grok-3-fast.htm
diff --git a/providers/xai/grok-3-latest.yaml b/providers/xai/grok-3-latest.yaml
@@ -1,24 +1,24 @@
 costs:
     - cache_read_input_token_cost: 7.5e-7
+      input_cost_per_query: 0.005
       input_cost_per_token: 0.000003
+      input_cost_per_token_batches: 0.0000015
       output_cost_per_token: 0.000015
-      region: us-east-1
-    - cache_read_input_token_cost: 7.5e-7
-      input_cost_per_token: 0.000003
-      output_cost_per_token: 0.000015
-      region: eu-west-1
-    - cache_read_input_token_cost: 7.5e-7
-      input_cost_per_image: 0
-      input_cost_per_query: 0.025
-      input_cost_per_token: 0.000003
-      output_cost_per_token: 0.000015
+      output_cost_per_token_batches: 0.0000075
       region: "*"
 features:
     - function_calling
     - chat
     - tool_choice
+    - response_schema
+    - system_messages
+    - prompt_caching
 limits:
+    context_window: 131072
     max_input_tokens: 131072
     max_output_tokens: 131072
+    max_tokens: 131072
 mode: chat
 model: grok-3-latest
+sources:
+    - https://docs.x.ai/docs/models?cluster=us-west-1
diff --git a/providers/xai/grok-3-mini-beta.yaml b/providers/xai/grok-3-mini-beta.yaml
@@ -1,24 +1,36 @@
 costs:
-    - cache_read_input_token_cost: 7.5e-8
-      input_cost_per_token: 3.e-7
-      output_cost_per_token: 5.e-7
-      region: us-east-1
-    - cache_read_input_token_cost: 7.5e-8
-      input_cost_per_token: 3.e-7
-      output_cost_per_token: 5.e-7
-      region: eu-west-1
-    - cache_read_input_token_cost: 7.5e-8
-      input_cost_per_image: 0
-      input_cost_per_query: 0.025
+    - cache_read_input_token_cost: 7.e-8
+      input_cost_per_query: 0.005
       input_cost_per_token: 3.e-7
+      input_cost_per_token_batches: 1.5e-7
       output_cost_per_token: 5.e-7
+      output_cost_per_token_batches: 2.5e-7
       region: "*"
 features:
     - function_calling
+    - parallel_function_calling
     - chat
     - tool_choice
+    - response_schema
+    - prompt_caching
+    - system_messages
 limits:
+    context_window: 131072
     max_input_tokens: 131072
     max_output_tokens: 131072
+    max_tokens: 131072
 mode: chat
 model: grok-3-mini-beta
+params:
+    - defaultValue: null
+      key: reasoning_effort
+      type: string
+removeParams:
+    - stop
+sources:
+    - https://docs.x.ai/docs/guides/reasoning
+    - https://docs.x.ai/docs/guides/structured-outputs
+    - https://docs.x.ai/docs/guides/function-calling
+    - https://docs.x.ai/docs/guides/tools/overview
+    - https://docs.x.ai/developers/tools/tool-usage-details
+thinking: true
diff --git a/providers/xai/grok-3-mini-fast-beta.yaml b/providers/xai/grok-3-mini-fast-beta.yaml
@@ -1,24 +1,25 @@
 costs:
-    - cache_read_input_token_cost: 7.5e-8
-      input_cost_per_token: 3.e-7
-      output_cost_per_token: 5.e-7
-      region: us-east-1
-    - cache_read_input_token_cost: 7.5e-8
-      input_cost_per_token: 3.e-7
-      output_cost_per_token: 5.e-7
-      region: eu-west-1
-    - cache_read_input_token_cost: 7.5e-8
-      input_cost_per_image: 0
-      input_cost_per_query: 0.025
+    - cache_read_input_token_cost: 7.e-8
       input_cost_per_token: 3.e-7
       output_cost_per_token: 5.e-7
       region: "*"
 features:
     - function_calling
     - chat
     - tool_choice
+    - response_schema
+    - prompt_caching
+    - system_messages
 limits:
+    context_window: 131072
     max_input_tokens: 131072
     max_output_tokens: 131072
+    max_tokens: 131072
 mode: chat
 model: grok-3-mini-fast-beta
+params:
+    - key: max_tokens
+      maxValue: 131072
+sources:
+    - https://docs.oracle.com/en-us/iaas/Content/generative-ai/xai-grok-3-mini-fast.htm
+thinking: true
diff --git a/providers/xai/grok-3-mini-fast-latest.yaml b/providers/xai/grok-3-mini-fast-latest.yaml
@@ -1,24 +1,26 @@
 costs:
     - cache_read_input_token_cost: 7.5e-8
-      input_cost_per_token: 3.e-7
-      output_cost_per_token: 5.e-7
-      region: us-east-1
-    - cache_read_input_token_cost: 7.5e-8
-      input_cost_per_token: 3.e-7
-      output_cost_per_token: 5.e-7
-      region: eu-west-1
-    - cache_read_input_token_cost: 7.5e-8
-      input_cost_per_image: 0
       input_cost_per_query: 0.025
       input_cost_per_token: 3.e-7
+      input_cost_per_token_batches: 1.5e-7
       output_cost_per_token: 5.e-7
+      output_cost_per_token_batches: 2.5e-7
       region: "*"
 features:
     - function_calling
     - chat
     - tool_choice
+    - response_schema
+    - prompt_caching
+    - system_messages
 limits:
+    context_window: 131072
     max_input_tokens: 131072
     max_output_tokens: 131072
+    max_tokens: 131072
 mode: chat
 model: grok-3-mini-fast-latest
+sources:
+    - https://docs.x.ai/docs/models/grok-3-mini-fast
+    - https://docs.x.ai/docs/models?cluster=us-west-1
+thinking: true
diff --git a/providers/xai/grok-3-mini-fast.yaml b/providers/xai/grok-3-mini-fast.yaml
@@ -1,24 +1,24 @@
 costs:
     - cache_read_input_token_cost: 7.5e-8
-      input_cost_per_token: 3.e-7
-      output_cost_per_token: 5.e-7
-      region: us-east-1
-    - cache_read_input_token_cost: 7.5e-8
-      input_cost_per_token: 3.e-7
-      output_cost_per_token: 5.e-7
-      region: eu-west-1
-    - cache_read_input_token_cost: 7.5e-8
-      input_cost_per_image: 0
       input_cost_per_query: 0.025
       input_cost_per_token: 3.e-7
+      input_cost_per_token_batches: 1.5e-7
       output_cost_per_token: 5.e-7
+      output_cost_per_token_batches: 2.5e-7
       region: "*"
 features:
     - function_calling
     - chat
     - tool_choice
+    - response_schema
+    - prompt_caching
 limits:
+    context_window: 131072
     max_input_tokens: 131072
     max_output_tokens: 131072
+    max_tokens: 131072
 mode: chat
 model: grok-3-mini-fast
+sources:
+    - https://docs.x.ai/docs/models/grok-3-mini-fast
+thinking: true