Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions gateway-api/src/apis/experimental/extension/inference/common.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// WARNING: generated file - manual changes will be overriden

#[allow(unused_imports)]
mod prelude {
pub use k8s_openapi::apimachinery::pkg::apis::meta::v1::Condition;
pub use kube_derive::CustomResource;
pub use schemars::JsonSchema;
pub use serde::{Deserialize, Serialize};
pub use std::collections::BTreeMap;
}
use self::prelude::*;
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// WARNING: generated file - manual changes will be overriden

#[allow(unused_imports)]
mod prelude {
pub use kube_derive::CustomResource;
pub use schemars::JsonSchema;
pub use serde::{Serialize, Deserialize};
pub use k8s_openapi::apimachinery::pkg::apis::meta::v1::Condition;
}
use self::prelude::*;
/// InferenceObjectiveSpec represents the desired state of a specific model use case. This resource is
/// managed by the "Inference Workload Owner" persona.
///
/// The Inference Workload Owner persona is someone that trains, verifies, and
/// leverages a large language model from a model frontend, drives the lifecycle
/// and rollout of new versions of those models, and defines the specific
/// performance and latency goals for the model. These workloads are
/// expected to operate within an InferencePool sharing compute capacity with other
/// InferenceObjectives, defined by the Inference Platform Admin.
#[derive(
CustomResource,
Serialize,
Deserialize,
Clone,
Debug,
JsonSchema,
Default,
PartialEq
)]
#[kube(
group = "inference.networking.x-k8s.io",
version = "v1alpha2",
kind = "InferenceObjective",
plural = "inferenceobjectives"
)]
#[kube(namespaced)]
#[kube(status = "InferenceObjectiveStatus")]
#[kube(derive = "Default")]
#[kube(derive = "PartialEq")]
pub struct InferenceObjectiveSpec {
/// PoolRef is a reference to the inference pool, the pool must exist in the same namespace.
#[serde(rename = "poolRef")]
pub pool_ref: InferenceObjectivePoolRef,
/// Priority defines how important it is to serve the request compared to other requests in the same pool.
/// Priority is an integer value that defines the priority of the request.
/// The higher the value, the more critical the request is; negative values _are_ allowed.
/// No default value is set for this field, allowing for future additions of new fields that may 'one of' with this field.
/// However, implementations that consume this field (such as the Endpoint Picker) will treat an unset value as '0'.
/// Priority is used in flow control, primarily in the event of resource scarcity(requests need to be queued).
/// All requests will be queued, and flow control will _always_ allow requests of higher priority to be served first.
/// Fairness is only enforced and tracked between requests of the same priority.
///
/// Example: requests with Priority 10 will always be served before
/// requests with Priority of 0 (the value used if Priority is unset or no InfereneceObjective is specified).
/// Similarly requests with a Priority of -10 will always be served after requests with Priority of 0.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub priority: Option<i64>,
}
/// PoolRef is a reference to the inference pool, the pool must exist in the same namespace.
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
pub struct InferenceObjectivePoolRef {
/// Group is the group of the referent.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub group: Option<String>,
/// Kind is kind of the referent. For example "InferencePool".
#[serde(default, skip_serializing_if = "Option::is_none")]
pub kind: Option<String>,
/// Name is the name of the referent.
pub name: String,
}
/// InferenceObjectiveStatus defines the observed state of InferenceObjective
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
pub struct InferenceObjectiveStatus {
/// Conditions track the state of the InferenceObjective.
///
/// Known condition types are:
///
/// * "Accepted"
#[serde(default, skip_serializing_if = "Option::is_none")]
pub conditions: Option<Vec<Condition>>,
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// WARNING: generated file - manual changes will be overriden

use super::common::*;
#[allow(unused_imports)]
mod prelude {
pub use kube_derive::CustomResource;
pub use schemars::JsonSchema;
pub use serde::{Serialize, Deserialize};
pub use std::collections::BTreeMap;
pub use k8s_openapi::apimachinery::pkg::apis::meta::v1::Condition;
}
use self::prelude::*;
/// InferencePoolSpec defines the desired state of InferencePool
#[derive(
CustomResource,
Serialize,
Deserialize,
Clone,
Debug,
JsonSchema,
Default,
PartialEq
)]
#[kube(
group = "inference.networking.x-k8s.io",
version = "v1alpha2",
kind = "InferencePool",
plural = "inferencepools"
)]
#[kube(namespaced)]
#[kube(status = "InferencePoolStatus")]
#[kube(derive = "Default")]
#[kube(derive = "PartialEq")]
pub struct InferencePoolSpec {
/// Extension configures an endpoint picker as an extension service.
#[serde(rename = "extensionRef")]
pub extension_ref: ExtensionRef,
/// Selector defines a map of labels to watch model server Pods
/// that should be included in the InferencePool.
/// In some cases, implementations may translate this field to a Service selector, so this matches the simple
/// map used for Service selectors instead of the full Kubernetes LabelSelector type.
/// If specified, it will be applied to match the model server pods in the same namespace as the InferencePool.
/// Cross namesoace selector is not supported.
pub selector: BTreeMap<String, String>,
/// TargetPortNumber defines the port number to access the selected model server Pods.
/// The number must be in the range 1 to 65535.
#[serde(rename = "targetPortNumber")]
pub target_port_number: i32,
}
/// Extension configures an endpoint picker as an extension service.
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
pub struct ExtensionRef {
/// Configures how the gateway handles the case when the extension is not responsive.
/// Defaults to failClose.
#[serde(default, skip_serializing_if = "Option::is_none", rename = "failureMode")]
pub failure_mode: Option<ExtensionFailureMode>,
/// Group is the group of the referent.
/// The default value is "", representing the Core API group.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub group: Option<String>,
/// Kind is the Kubernetes resource kind of the referent.
///
/// Defaults to "Service" when not specified.
///
/// ExternalName services can refer to CNAME DNS records that may live
/// outside of the cluster and as such are difficult to reason about in
/// terms of conformance. They also may not be safe to forward to (see
/// CVE-2021-25740 for more information). Implementations MUST NOT
/// support ExternalName Services.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub kind: Option<String>,
/// Name is the name of the referent.
pub name: String,
/// The port number on the service running the extension. When unspecified,
/// implementations SHOULD infer a default value of 9002 when the Kind is
/// Service.
#[serde(default, skip_serializing_if = "Option::is_none", rename = "portNumber")]
pub port_number: Option<i32>,
}
/// Extension configures an endpoint picker as an extension service.
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, PartialEq)]
pub enum ExtensionFailureMode {
FailOpen,
FailClose,
}
/// Status defines the observed state of InferencePool.
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
pub struct InferencePoolStatus {
/// Parents is a list of parent resources (usually Gateways) that are
/// associated with the InferencePool, and the status of the InferencePool with respect to
/// each parent.
///
/// A maximum of 32 Gateways will be represented in this list. When the list contains
/// `kind: Status, name: default`, it indicates that the InferencePool is not
/// associated with any Gateway and a controller must perform the following:
///
/// - Remove the parent when setting the "Accepted" condition.
/// - Add the parent when the controller will no longer manage the InferencePool
/// and no other parents exist.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub parent: Option<Vec<InferencePoolParent>>,
}
/// PoolStatus defines the observed state of InferencePool from a Gateway.
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
pub struct InferencePoolParent {
/// Conditions track the state of the InferencePool.
///
/// Known condition types are:
///
/// * "Accepted"
/// * "ResolvedRefs"
#[serde(default, skip_serializing_if = "Option::is_none")]
pub conditions: Option<Vec<Condition>>,
/// GatewayRef indicates the gateway that observed state of InferencePool.
#[serde(rename = "parentRef")]
pub parent_ref: ParentRef,
}
/// GatewayRef indicates the gateway that observed state of InferencePool.
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
pub struct ParentRef {
/// Group is the group of the referent.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub group: Option<String>,
/// Kind is kind of the referent. For example "Gateway".
#[serde(default, skip_serializing_if = "Option::is_none")]
pub kind: Option<String>,
/// Name is the name of the referent.
pub name: String,
/// Namespace is the namespace of the referent. If not present,
/// the namespace of the referent is assumed to be the same as
/// the namespace of the referring object.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub namespace: Option<String>,
}
5 changes: 5 additions & 0 deletions gateway-api/src/apis/experimental/extension/inference/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// WARNING: generated file - manual changes will be overriden

pub mod common;
pub mod inferenceobjectives;
pub mod inferencepools;
15 changes: 15 additions & 0 deletions gateway-api/src/apis/standard/extension/inference/common.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// WARNING: generated file - manual changes will be overriden

#[allow(unused_imports)]
mod prelude {
pub use k8s_openapi::apimachinery::pkg::apis::meta::v1::Condition;
pub use kube_derive::CustomResource;
pub use schemars::JsonSchema;
pub use serde::{Deserialize, Serialize};
pub use std::collections::BTreeMap;
}
use self::prelude::*;
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
pub struct EndPointPort {
pub number: i32,
}
Loading
Loading