Skip to content

Commit 5335485

Browse files
Add OpenShift AI integration specifications (#5662)
* Add OpenShift AI specifications * Add timeout parameter to inference endpoint settings and update request examples
1 parent 8f7c9e2 commit 5335485

22 files changed

+1546
-47
lines changed

output/openapi/elasticsearch-openapi.json

Lines changed: 285 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/openapi/elasticsearch-serverless-openapi.json

Lines changed: 285 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/schema/schema.json

Lines changed: 510 additions & 41 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

output/typescript/types.ts

Lines changed: 41 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

specification/_doc_ids/table.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ inference-api-put-jinaai,https://www.elastic.co/docs/api/doc/elasticsearch/opera
387387
inference-api-put-llama,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-llama,,
388388
inference-api-put-mistral,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-mistral,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-mistral.html,
389389
inference-api-put-openai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-openai.html,
390+
inference-api-put-openshift-ai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openshift-ai,,
390391
inference-api-put-voyageai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai,,
391392
inference-api-put-watsonx,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-watsonx-ai.html,
392393
inference-api-stream,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-stream-inference,https://www.elastic.co/guide/en/elasticsearch/reference/8.18/stream-inference-api.html,
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
{
2+
"inference.put_openshift_ai": {
3+
"documentation": {
4+
"url": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-openshift-ai",
5+
"description": "Create an OpenShift AI inference endpoint"
6+
},
7+
"stability": "stable",
8+
"visibility": "public",
9+
"headers": {
10+
"accept": ["application/json"],
11+
"content_type": ["application/json"]
12+
},
13+
"url": {
14+
"paths": [
15+
{
16+
"path": "/_inference/{task_type}/{openshiftai_inference_id}",
17+
"methods": ["PUT"],
18+
"parts": {
19+
"task_type": {
20+
"type": "enum",
21+
"description": "The task type",
22+
"options": [
23+
"rerank",
24+
"text_embedding",
25+
"completion",
26+
"chat_completion"
27+
]
28+
},
29+
"openshiftai_inference_id": {
30+
"type": "string",
31+
"description": "The inference ID"
32+
}
33+
}
34+
}
35+
]
36+
},
37+
"body": {
38+
"description": "The inference endpoint's task and service settings",
39+
"required": true
40+
},
41+
"params": {
42+
"timeout": {
43+
"type": "time",
44+
"description": "Specifies the amount of time to wait for the inference endpoint to be created.",
45+
"default": "30s"
46+
}
47+
}
48+
}
49+
}

specification/inference/_types/CommonTypes.ts

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1848,6 +1848,68 @@ export enum OpenAIServiceType {
18481848
openai
18491849
}
18501850

1851+
export class OpenShiftAiServiceSettings {
1852+
/**
1853+
* A valid API key for your OpenShift AI endpoint.
1854+
* Can be found in `Token authentication` section of model related information.
1855+
*/
1856+
api_key: string
1857+
/**
1858+
* The URL of the OpenShift AI hosted model endpoint.
1859+
*/
1860+
url: string
1861+
/**
1862+
* The name of the model to use for the inference task.
1863+
* Refer to the hosted model's documentation for the name if needed.
1864+
* Service has been tested and confirmed to be working with the following models:
1865+
* * For `text_embedding` task - `gritlm-7b`.
1866+
* * For `completion` and `chat_completion` tasks - `llama-31-8b-instruct`.
1867+
* * For `rerank` task - `bge-reranker-v2-m3`.
1868+
*/
1869+
model_id?: string
1870+
/**
1871+
* For a `text_embedding` task, the maximum number of tokens per input before chunking occurs.
1872+
*/
1873+
max_input_tokens?: integer
1874+
/**
1875+
* For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.
1876+
*/
1877+
similarity?: OpenShiftAiSimilarityType
1878+
/**
1879+
* This setting helps to minimize the number of rate limit errors returned from the OpenShift AI API.
1880+
* By default, the `openshift_ai` service sets the number of requests allowed per minute to 3000.
1881+
*/
1882+
rate_limit?: RateLimitSetting
1883+
}
1884+
1885+
export enum OpenShiftAiTaskType {
1886+
text_embedding,
1887+
completion,
1888+
chat_completion,
1889+
rerank
1890+
}
1891+
1892+
export enum OpenShiftAiServiceType {
1893+
openshift_ai
1894+
}
1895+
1896+
export enum OpenShiftAiSimilarityType {
1897+
cosine,
1898+
dot_product,
1899+
l2_norm
1900+
}
1901+
1902+
export class OpenShiftAiTaskSettings {
1903+
/**
1904+
* For a `rerank` task, whether to return the source documents in the response.
1905+
*/
1906+
return_documents?: boolean
1907+
/**
1908+
* For a `rerank` task, the number of most relevant documents to return.
1909+
*/
1910+
top_n?: integer
1911+
}
1912+
18511913
export class VoyageAIServiceSettings {
18521914
/**
18531915
* The number of dimensions for resulting output embeddings.

specification/inference/_types/Services.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import {
4141
TaskTypeLlama,
4242
TaskTypeMistral,
4343
TaskTypeOpenAI,
44+
TaskTypeOpenShiftAi,
4445
TaskTypeVoyageAI,
4546
TaskTypeWatsonx
4647
} from '../_types/TaskType'
@@ -302,6 +303,17 @@ export class InferenceEndpointInfoOpenAI extends InferenceEndpoint {
302303
task_type: TaskTypeOpenAI
303304
}
304305

306+
export class InferenceEndpointInfoOpenShiftAi extends InferenceEndpoint {
307+
/**
308+
* The inference Id
309+
*/
310+
inference_id: string
311+
/**
312+
* The task type
313+
*/
314+
task_type: TaskTypeOpenShiftAi
315+
}
316+
305317
export class InferenceEndpointInfoVoyageAI extends InferenceEndpoint {
306318
/**
307319
* The inference Id
@@ -413,6 +425,7 @@ export class RateLimitSetting {
413425
* * `mistral` service: `240`
414426
* * `openai` service and task type `text_embedding`: `3000`
415427
* * `openai` service and task type `completion`: `500`
428+
* * `openshift_ai` service: `3000`
416429
* * `voyageai` service: `2000`
417430
* * `watsonxai` service: `120`
418431
*/

specification/inference/_types/TaskType.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,13 @@ export enum TaskTypeOpenAI {
142142
completion
143143
}
144144

145+
export enum TaskTypeOpenShiftAi {
146+
text_embedding,
147+
chat_completion,
148+
completion,
149+
rerank
150+
}
151+
145152
export enum TaskTypeVoyageAI {
146153
text_embedding,
147154
rerank

specification/inference/put/PutRequest.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ import { TaskType } from '@inference/_types/TaskType'
4949
* * Llama (`chat_completion`, `completion`, `text_embedding`)
5050
* * Mistral (`chat_completion`, `completion`, `text_embedding`)
5151
* * OpenAI (`chat_completion`, `completion`, `text_embedding`)
52+
* * OpenShift AI (`chat_completion`, `completion`, `rerank`, `text_embedding`)
5253
* * VoyageAI (`rerank`, `text_embedding`)
5354
* * Watsonx inference integration (`text_embedding`)
5455
* @rest_spec_name inference.put

0 commit comments

Comments
 (0)