elastic
diff --git a/‎output/openapi/elasticsearch-openapi.json‎
Lines changed: 285 additions & 3 deletions b/‎output/openapi/elasticsearch-openapi.json‎
Lines changed: 285 additions & 3 deletions
diff --git a/‎output/openapi/elasticsearch-serverless-openapi.json‎
Lines changed: 285 additions & 3 deletions b/‎output/openapi/elasticsearch-serverless-openapi.json‎
Lines changed: 285 additions & 3 deletions
diff --git a/‎output/schema/schema.json‎
Lines changed: 510 additions & 41 deletions b/‎output/schema/schema.json‎
Lines changed: 510 additions & 41 deletions
diff --git a/‎output/typescript/types.ts‎
Lines changed: 41 additions & 0 deletions b/‎output/typescript/types.ts‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎specification/_doc_ids/table.csv‎
Lines changed: 1 addition & 0 deletions b/‎specification/_doc_ids/table.csv‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎specification/_json_spec/inference.put_openshift_ai.json‎
Lines changed: 49 additions & 0 deletions b/‎specification/_json_spec/inference.put_openshift_ai.json‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎specification/inference/_types/CommonTypes.ts‎
Lines changed: 62 additions & 0 deletions b/‎specification/inference/_types/CommonTypes.ts‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎specification/inference/_types/Services.ts‎
Lines changed: 13 additions & 0 deletions b/‎specification/inference/_types/Services.ts‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎specification/inference/_types/TaskType.ts‎
Lines changed: 7 additions & 0 deletions b/‎specification/inference/_types/TaskType.ts‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎specification/inference/put/PutRequest.ts‎
Lines changed: 1 addition & 0 deletions b/‎specification/inference/put/PutRequest.ts‎
Lines changed: 1 addition & 0 deletions
@@ -387,6 +387,7 @@ inference-api-put-jinaai,https://www.elastic.co/docs/api/doc/elasticsearch/opera
 inference-api-put-llama,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-llama,,
 inference-api-put-mistral,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-mistral,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-mistral.html,
 inference-api-put-openai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-openai.html,
+inference-api-put-openshift-ai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openshift-ai,,
 inference-api-put-voyageai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai,,
 inference-api-put-watsonx,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-watsonx-ai.html,
 inference-api-stream,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-stream-inference,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/stream-inference-api.html,
 
@@ -0,0 +1,49 @@
+{
+  "inference.put_openshift_ai": {
+    "documentation": {
+      "url": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openshift-ai",
+      "description": "Create an OpenShift AI inference endpoint"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": ["application/json"],
+      "content_type": ["application/json"]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/{task_type}/{openshiftai_inference_id}",
+          "methods": ["PUT"],
+          "parts": {
+            "task_type": {
+              "type": "enum",
+              "description": "The task type",
+              "options": [
+                "rerank",
+                "text_embedding",
+                "completion",
+                "chat_completion"
+              ]
+            },
+            "openshiftai_inference_id": {
+              "type": "string",
+              "description": "The inference ID"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference endpoint's task and service settings",
+      "required": true
+    },
+    "params": {
+      "timeout": {
+        "type": "time",
+        "description": "Specifies the amount of time to wait for the inference endpoint to be created.",
+        "default": "30s"
+      }
+    }
+  }
+}
@@ -1848,6 +1848,68 @@ export enum OpenAIServiceType {
   openai
 }
 
+export class OpenShiftAiServiceSettings {
+  /**
+   * A valid API key for your OpenShift AI endpoint.
+   * Can be found in `Token authentication` section of model related information.
+   */
+  api_key: string
+  /**
+   * The URL of the OpenShift AI hosted model endpoint.
+   */
+  url: string
+  /**
+   * The name of the model to use for the inference task.
+   * Refer to the hosted model's documentation for the name if needed.
+   * Service has been tested and confirmed to be working with the following models:
+   * * For `text_embedding` task - `gritlm-7b`.
+   * * For `completion` and `chat_completion` tasks - `llama-31-8b-instruct`.
+   * * For `rerank` task - `bge-reranker-v2-m3`.
+   */
+  model_id?: string
+  /**
+   * For a `text_embedding` task, the maximum number of tokens per input before chunking occurs.
+   */
+  max_input_tokens?: integer
+  /**
+   * For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.
+   */
+  similarity?: OpenShiftAiSimilarityType
+  /**
+   * This setting helps to minimize the number of rate limit errors returned from the OpenShift AI API.
+   * By default, the `openshift_ai` service sets the number of requests allowed per minute to 3000.
+   */
+  rate_limit?: RateLimitSetting
+}
+
+export enum OpenShiftAiTaskType {
+  text_embedding,
+  completion,
+  chat_completion,
+  rerank
+}
+
+export enum OpenShiftAiServiceType {
+  openshift_ai
+}
+
+export enum OpenShiftAiSimilarityType {
+  cosine,
+  dot_product,
+  l2_norm
+}
+
+export class OpenShiftAiTaskSettings {
+  /**
+   * For a `rerank` task, whether to return the source documents in the response.
+   */
+  return_documents?: boolean
+  /**
+   * For a `rerank` task, the number of most relevant documents to return.
+   */
+  top_n?: integer
+}
+
 export class VoyageAIServiceSettings {
   /**
    * The number of dimensions for resulting output embeddings.
 
@@ -41,6 +41,7 @@ import {
   TaskTypeLlama,
   TaskTypeMistral,
   TaskTypeOpenAI,
+  TaskTypeOpenShiftAi,
   TaskTypeVoyageAI,
   TaskTypeWatsonx
 } from '../_types/TaskType'
@@ -302,6 +303,17 @@ export class InferenceEndpointInfoOpenAI extends InferenceEndpoint {
   task_type: TaskTypeOpenAI
 }
 
+export class InferenceEndpointInfoOpenShiftAi extends InferenceEndpoint {
+  /**
+   * The inference Id
+   */
+  inference_id: string
+  /**
+   * The task type
+   */
+  task_type: TaskTypeOpenShiftAi
+}
+
 export class InferenceEndpointInfoVoyageAI extends InferenceEndpoint {
   /**
    * The inference Id
@@ -413,6 +425,7 @@ export class RateLimitSetting {
    * * `mistral` service: `240`
    * * `openai` service and task type `text_embedding`: `3000`
    * * `openai` service and task type `completion`: `500`
+   * * `openshift_ai` service: `3000`
    * * `voyageai` service: `2000`
    * * `watsonxai` service: `120`
    */
 
@@ -142,6 +142,13 @@ export enum TaskTypeOpenAI {
   completion
 }
 
+export enum TaskTypeOpenShiftAi {
+  text_embedding,
+  chat_completion,
+  completion,
+  rerank
+}
+
 export enum TaskTypeVoyageAI {
   text_embedding,
   rerank
 
@@ -49,6 +49,7 @@ import { TaskType } from '@inference/_types/TaskType'
  * * Llama (`chat_completion`, `completion`, `text_embedding`)
  * * Mistral (`chat_completion`, `completion`, `text_embedding`)
  * * OpenAI (`chat_completion`, `completion`, `text_embedding`)
+ * * OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)
  * * VoyageAI (`rerank`, `text_embedding`)
  * * Watsonx inference integration (`text_embedding`)
  * @rest_spec_name inference.put