Skip to content

Commit c8451dd

Browse files
committed
inference extension support
Signed-off-by: Nitishkumar Singh <nitishkumarsingh71@gmail.com>
1 parent f0c0deb commit c8451dd

File tree

14 files changed

+556
-30
lines changed

14 files changed

+556
-30
lines changed
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// WARNING: generated by kopium - manual changes will be overwritten
2+
// kopium command: kopium --schema=derived --derive=JsonSchema --derive=Default --derive=PartialEq --docs -f -
3+
// kopium version: 0.22.5
4+
5+
#[allow(unused_imports)]
6+
mod prelude {
7+
pub use kube_derive::CustomResource;
8+
pub use schemars::JsonSchema;
9+
pub use serde::{Serialize, Deserialize};
10+
pub use k8s_openapi::apimachinery::pkg::apis::meta::v1::Condition;
11+
}
12+
use self::prelude::*;
13+
14+
/// InferenceObjectiveSpec represents the desired state of a specific model use case. This resource is
15+
/// managed by the "Inference Workload Owner" persona.
16+
///
17+
/// The Inference Workload Owner persona is someone that trains, verifies, and
18+
/// leverages a large language model from a model frontend, drives the lifecycle
19+
/// and rollout of new versions of those models, and defines the specific
20+
/// performance and latency goals for the model. These workloads are
21+
/// expected to operate within an InferencePool sharing compute capacity with other
22+
/// InferenceObjectives, defined by the Inference Platform Admin.
23+
#[derive(CustomResource, Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
24+
#[kube(group = "inference.networking.x-k8s.io", version = "v1alpha2", kind = "InferenceObjective", plural = "inferenceobjectives")]
25+
#[kube(namespaced)]
26+
#[kube(status = "InferenceObjectiveStatus")]
27+
#[kube(derive="Default")]
28+
#[kube(derive="PartialEq")]
29+
pub struct InferenceObjectiveSpec {
30+
/// PoolRef is a reference to the inference pool, the pool must exist in the same namespace.
31+
#[serde(rename = "poolRef")]
32+
pub pool_ref: InferenceObjectivePoolRef,
33+
/// Priority defines how important it is to serve the request compared to other requests in the same pool.
34+
/// Priority is an integer value that defines the priority of the request.
35+
/// The higher the value, the more critical the request is; negative values _are_ allowed.
36+
/// No default value is set for this field, allowing for future additions of new fields that may 'one of' with this field.
37+
/// However, implementations that consume this field (such as the Endpoint Picker) will treat an unset value as '0'.
38+
/// Priority is used in flow control, primarily in the event of resource scarcity(requests need to be queued).
39+
/// All requests will be queued, and flow control will _always_ allow requests of higher priority to be served first.
40+
/// Fairness is only enforced and tracked between requests of the same priority.
41+
///
42+
/// Example: requests with Priority 10 will always be served before
43+
/// requests with Priority of 0 (the value used if Priority is unset or no InfereneceObjective is specified).
44+
/// Similarly requests with a Priority of -10 will always be served after requests with Priority of 0.
45+
#[serde(default, skip_serializing_if = "Option::is_none")]
46+
pub priority: Option<i64>,
47+
}
48+
49+
/// PoolRef is a reference to the inference pool, the pool must exist in the same namespace.
50+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
51+
pub struct InferenceObjectivePoolRef {
52+
/// Group is the group of the referent.
53+
#[serde(default, skip_serializing_if = "Option::is_none")]
54+
pub group: Option<String>,
55+
/// Kind is kind of the referent. For example "InferencePool".
56+
#[serde(default, skip_serializing_if = "Option::is_none")]
57+
pub kind: Option<String>,
58+
/// Name is the name of the referent.
59+
pub name: String,
60+
}
61+
62+
/// InferenceObjectiveStatus defines the observed state of InferenceObjective
63+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
64+
pub struct InferenceObjectiveStatus {
65+
/// Conditions track the state of the InferenceObjective.
66+
///
67+
/// Known condition types are:
68+
///
69+
/// * "Accepted"
70+
#[serde(default, skip_serializing_if = "Option::is_none")]
71+
pub conditions: Option<Vec<Condition>>,
72+
}
73+
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
// WARNING: generated by kopium - manual changes will be overwritten
2+
// kopium command: kopium --schema=derived --derive=JsonSchema --derive=Default --derive=PartialEq --docs -f -
3+
// kopium version: 0.22.5
4+
5+
#[allow(unused_imports)]
6+
mod prelude {
7+
pub use kube_derive::CustomResource;
8+
pub use schemars::JsonSchema;
9+
pub use serde::{Serialize, Deserialize};
10+
pub use std::collections::BTreeMap;
11+
pub use k8s_openapi::apimachinery::pkg::apis::meta::v1::Condition;
12+
}
13+
use self::prelude::*;
14+
15+
/// InferencePoolSpec defines the desired state of InferencePool
16+
#[derive(CustomResource, Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
17+
#[kube(group = "inference.networking.x-k8s.io", version = "v1alpha2", kind = "InferencePool", plural = "inferencepools")]
18+
#[kube(namespaced)]
19+
#[kube(status = "InferencePoolStatus")]
20+
#[kube(derive="Default")]
21+
#[kube(derive="PartialEq")]
22+
pub struct InferencePoolSpec {
23+
/// Extension configures an endpoint picker as an extension service.
24+
#[serde(rename = "extensionRef")]
25+
pub extension_ref: InferencePoolExtensionRef,
26+
/// Selector defines a map of labels to watch model server Pods
27+
/// that should be included in the InferencePool.
28+
/// In some cases, implementations may translate this field to a Service selector, so this matches the simple
29+
/// map used for Service selectors instead of the full Kubernetes LabelSelector type.
30+
/// If specified, it will be applied to match the model server pods in the same namespace as the InferencePool.
31+
/// Cross namesoace selector is not supported.
32+
pub selector: BTreeMap<String, String>,
33+
/// TargetPortNumber defines the port number to access the selected model server Pods.
34+
/// The number must be in the range 1 to 65535.
35+
#[serde(rename = "targetPortNumber")]
36+
pub target_port_number: i32,
37+
}
38+
39+
/// Extension configures an endpoint picker as an extension service.
40+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
41+
pub struct InferencePoolExtensionRef {
42+
/// Configures how the gateway handles the case when the extension is not responsive.
43+
/// Defaults to failClose.
44+
#[serde(default, skip_serializing_if = "Option::is_none", rename = "failureMode")]
45+
pub failure_mode: Option<InferencePoolExtensionRefFailureMode>,
46+
/// Group is the group of the referent.
47+
/// The default value is "", representing the Core API group.
48+
#[serde(default, skip_serializing_if = "Option::is_none")]
49+
pub group: Option<String>,
50+
/// Kind is the Kubernetes resource kind of the referent.
51+
///
52+
/// Defaults to "Service" when not specified.
53+
///
54+
/// ExternalName services can refer to CNAME DNS records that may live
55+
/// outside of the cluster and as such are difficult to reason about in
56+
/// terms of conformance. They also may not be safe to forward to (see
57+
/// CVE-2021-25740 for more information). Implementations MUST NOT
58+
/// support ExternalName Services.
59+
#[serde(default, skip_serializing_if = "Option::is_none")]
60+
pub kind: Option<String>,
61+
/// Name is the name of the referent.
62+
pub name: String,
63+
/// The port number on the service running the extension. When unspecified,
64+
/// implementations SHOULD infer a default value of 9002 when the Kind is
65+
/// Service.
66+
#[serde(default, skip_serializing_if = "Option::is_none", rename = "portNumber")]
67+
pub port_number: Option<i32>,
68+
}
69+
70+
/// Extension configures an endpoint picker as an extension service.
71+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, PartialEq)]
72+
pub enum InferencePoolExtensionRefFailureMode {
73+
FailOpen,
74+
FailClose,
75+
}
76+
77+
/// Status defines the observed state of InferencePool.
78+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
79+
pub struct InferencePoolStatus {
80+
/// Parents is a list of parent resources (usually Gateways) that are
81+
/// associated with the InferencePool, and the status of the InferencePool with respect to
82+
/// each parent.
83+
///
84+
/// A maximum of 32 Gateways will be represented in this list. When the list contains
85+
/// `kind: Status, name: default`, it indicates that the InferencePool is not
86+
/// associated with any Gateway and a controller must perform the following:
87+
///
88+
/// - Remove the parent when setting the "Accepted" condition.
89+
/// - Add the parent when the controller will no longer manage the InferencePool
90+
/// and no other parents exist.
91+
#[serde(default, skip_serializing_if = "Option::is_none")]
92+
pub parent: Option<Vec<InferencePoolStatusParent>>,
93+
}
94+
95+
/// PoolStatus defines the observed state of InferencePool from a Gateway.
96+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
97+
pub struct InferencePoolStatusParent {
98+
/// Conditions track the state of the InferencePool.
99+
///
100+
/// Known condition types are:
101+
///
102+
/// * "Accepted"
103+
/// * "ResolvedRefs"
104+
#[serde(default, skip_serializing_if = "Option::is_none")]
105+
pub conditions: Option<Vec<Condition>>,
106+
/// GatewayRef indicates the gateway that observed state of InferencePool.
107+
#[serde(rename = "parentRef")]
108+
pub parent_ref: InferencePoolStatusParentParentRef,
109+
}
110+
111+
/// GatewayRef indicates the gateway that observed state of InferencePool.
112+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
113+
pub struct InferencePoolStatusParentParentRef {
114+
/// Group is the group of the referent.
115+
#[serde(default, skip_serializing_if = "Option::is_none")]
116+
pub group: Option<String>,
117+
/// Kind is kind of the referent. For example "Gateway".
118+
#[serde(default, skip_serializing_if = "Option::is_none")]
119+
pub kind: Option<String>,
120+
/// Name is the name of the referent.
121+
pub name: String,
122+
/// Namespace is the namespace of the referent. If not present,
123+
/// the namespace of the referent is assumed to be the same as
124+
/// the namespace of the referring object.
125+
#[serde(default, skip_serializing_if = "Option::is_none")]
126+
pub namespace: Option<String>,
127+
}
128+
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// WARNING! generated file do not edit
2+
pub mod inferencepools;
3+
pub mod inferenceobjectives;
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// WARNING: generated file - manual changes will be overriden
2+
3+
#[allow(unused_imports)]
4+
mod prelude {
5+
pub use k8s_openapi::apimachinery::pkg::apis::meta::v1::Condition;
6+
pub use kube_derive::CustomResource;
7+
pub use schemars::JsonSchema;
8+
pub use serde::{Deserialize, Serialize};
9+
pub use std::collections::BTreeMap;
10+
}
11+
use self::prelude::*;
12+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
13+
pub struct EndPointPort {
14+
pub number: i32,
15+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// WARNING: generated file - manual changes will be overriden
2+
3+
#[derive(Debug, PartialEq, Eq)]
4+
pub enum InferencePoolEndpointPickerRefFailureMode {
5+
FailOpen,
6+
FailClose,
7+
}
8+
impl std::fmt::Display for InferencePoolEndpointPickerRefFailureMode {
9+
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
10+
write!(f, "{:?}", self)
11+
}
12+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// WARNING: generated file - manual changes will be overriden
2+
3+
use super::common::*;
4+
impl Default for InferencePoolEndpointPickerRefFailureMode {
5+
fn default() -> Self {
6+
InferencePoolEndpointPickerRefFailureMode::FailOpen
7+
}
8+
}

0 commit comments

Comments
 (0)