Skip to content

Commit 0da1861

Browse files
committed
inference extension support
Signed-off-by: Nitishkumar Singh <nitishkumarsingh71@gmail.com>
1 parent f0c0deb commit 0da1861

File tree

15 files changed

+579
-30
lines changed

15 files changed

+579
-30
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// WARNING: generated file - manual changes will be overriden
2+
3+
#[allow(unused_imports)]
4+
mod prelude {
5+
pub use k8s_openapi::apimachinery::pkg::apis::meta::v1::Condition;
6+
pub use kube_derive::CustomResource;
7+
pub use schemars::JsonSchema;
8+
pub use serde::{Deserialize, Serialize};
9+
pub use std::collections::BTreeMap;
10+
}
11+
use self::prelude::*;
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// WARNING: generated file - manual changes will be overriden
2+
3+
#[allow(unused_imports)]
4+
mod prelude {
5+
pub use kube_derive::CustomResource;
6+
pub use schemars::JsonSchema;
7+
pub use serde::{Serialize, Deserialize};
8+
pub use k8s_openapi::apimachinery::pkg::apis::meta::v1::Condition;
9+
}
10+
use self::prelude::*;
11+
/// InferenceObjectiveSpec represents the desired state of a specific model use case. This resource is
12+
/// managed by the "Inference Workload Owner" persona.
13+
///
14+
/// The Inference Workload Owner persona is someone that trains, verifies, and
15+
/// leverages a large language model from a model frontend, drives the lifecycle
16+
/// and rollout of new versions of those models, and defines the specific
17+
/// performance and latency goals for the model. These workloads are
18+
/// expected to operate within an InferencePool sharing compute capacity with other
19+
/// InferenceObjectives, defined by the Inference Platform Admin.
20+
#[derive(
21+
CustomResource,
22+
Serialize,
23+
Deserialize,
24+
Clone,
25+
Debug,
26+
JsonSchema,
27+
Default,
28+
PartialEq
29+
)]
30+
#[kube(
31+
group = "inference.networking.x-k8s.io",
32+
version = "v1alpha2",
33+
kind = "InferenceObjective",
34+
plural = "inferenceobjectives"
35+
)]
36+
#[kube(namespaced)]
37+
#[kube(status = "InferenceObjectiveStatus")]
38+
#[kube(derive = "Default")]
39+
#[kube(derive = "PartialEq")]
40+
pub struct InferenceObjectiveSpec {
41+
/// PoolRef is a reference to the inference pool, the pool must exist in the same namespace.
42+
#[serde(rename = "poolRef")]
43+
pub pool_ref: InferenceObjectivePoolRef,
44+
/// Priority defines how important it is to serve the request compared to other requests in the same pool.
45+
/// Priority is an integer value that defines the priority of the request.
46+
/// The higher the value, the more critical the request is; negative values _are_ allowed.
47+
/// No default value is set for this field, allowing for future additions of new fields that may 'one of' with this field.
48+
/// However, implementations that consume this field (such as the Endpoint Picker) will treat an unset value as '0'.
49+
/// Priority is used in flow control, primarily in the event of resource scarcity(requests need to be queued).
50+
/// All requests will be queued, and flow control will _always_ allow requests of higher priority to be served first.
51+
/// Fairness is only enforced and tracked between requests of the same priority.
52+
///
53+
/// Example: requests with Priority 10 will always be served before
54+
/// requests with Priority of 0 (the value used if Priority is unset or no InfereneceObjective is specified).
55+
/// Similarly requests with a Priority of -10 will always be served after requests with Priority of 0.
56+
#[serde(default, skip_serializing_if = "Option::is_none")]
57+
pub priority: Option<i64>,
58+
}
59+
/// PoolRef is a reference to the inference pool, the pool must exist in the same namespace.
60+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
61+
pub struct InferenceObjectivePoolRef {
62+
/// Group is the group of the referent.
63+
#[serde(default, skip_serializing_if = "Option::is_none")]
64+
pub group: Option<String>,
65+
/// Kind is kind of the referent. For example "InferencePool".
66+
#[serde(default, skip_serializing_if = "Option::is_none")]
67+
pub kind: Option<String>,
68+
/// Name is the name of the referent.
69+
pub name: String,
70+
}
71+
/// InferenceObjectiveStatus defines the observed state of InferenceObjective
72+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
73+
pub struct InferenceObjectiveStatus {
74+
/// Conditions track the state of the InferenceObjective.
75+
///
76+
/// Known condition types are:
77+
///
78+
/// * "Accepted"
79+
#[serde(default, skip_serializing_if = "Option::is_none")]
80+
pub conditions: Option<Vec<Condition>>,
81+
}
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
// WARNING: generated file - manual changes will be overriden
2+
3+
use super::common::*;
4+
#[allow(unused_imports)]
5+
mod prelude {
6+
pub use kube_derive::CustomResource;
7+
pub use schemars::JsonSchema;
8+
pub use serde::{Serialize, Deserialize};
9+
pub use std::collections::BTreeMap;
10+
pub use k8s_openapi::apimachinery::pkg::apis::meta::v1::Condition;
11+
}
12+
use self::prelude::*;
13+
/// InferencePoolSpec defines the desired state of InferencePool
14+
#[derive(
15+
CustomResource,
16+
Serialize,
17+
Deserialize,
18+
Clone,
19+
Debug,
20+
JsonSchema,
21+
Default,
22+
PartialEq
23+
)]
24+
#[kube(
25+
group = "inference.networking.x-k8s.io",
26+
version = "v1alpha2",
27+
kind = "InferencePool",
28+
plural = "inferencepools"
29+
)]
30+
#[kube(namespaced)]
31+
#[kube(status = "InferencePoolStatus")]
32+
#[kube(derive = "Default")]
33+
#[kube(derive = "PartialEq")]
34+
pub struct InferencePoolSpec {
35+
/// Extension configures an endpoint picker as an extension service.
36+
#[serde(rename = "extensionRef")]
37+
pub extension_ref: ExtensionRef,
38+
/// Selector defines a map of labels to watch model server Pods
39+
/// that should be included in the InferencePool.
40+
/// In some cases, implementations may translate this field to a Service selector, so this matches the simple
41+
/// map used for Service selectors instead of the full Kubernetes LabelSelector type.
42+
/// If specified, it will be applied to match the model server pods in the same namespace as the InferencePool.
43+
/// Cross namesoace selector is not supported.
44+
pub selector: BTreeMap<String, String>,
45+
/// TargetPortNumber defines the port number to access the selected model server Pods.
46+
/// The number must be in the range 1 to 65535.
47+
#[serde(rename = "targetPortNumber")]
48+
pub target_port_number: i32,
49+
}
50+
/// Extension configures an endpoint picker as an extension service.
51+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
52+
pub struct ExtensionRef {
53+
/// Configures how the gateway handles the case when the extension is not responsive.
54+
/// Defaults to failClose.
55+
#[serde(default, skip_serializing_if = "Option::is_none", rename = "failureMode")]
56+
pub failure_mode: Option<ExtensionFailureMode>,
57+
/// Group is the group of the referent.
58+
/// The default value is "", representing the Core API group.
59+
#[serde(default, skip_serializing_if = "Option::is_none")]
60+
pub group: Option<String>,
61+
/// Kind is the Kubernetes resource kind of the referent.
62+
///
63+
/// Defaults to "Service" when not specified.
64+
///
65+
/// ExternalName services can refer to CNAME DNS records that may live
66+
/// outside of the cluster and as such are difficult to reason about in
67+
/// terms of conformance. They also may not be safe to forward to (see
68+
/// CVE-2021-25740 for more information). Implementations MUST NOT
69+
/// support ExternalName Services.
70+
#[serde(default, skip_serializing_if = "Option::is_none")]
71+
pub kind: Option<String>,
72+
/// Name is the name of the referent.
73+
pub name: String,
74+
/// The port number on the service running the extension. When unspecified,
75+
/// implementations SHOULD infer a default value of 9002 when the Kind is
76+
/// Service.
77+
#[serde(default, skip_serializing_if = "Option::is_none", rename = "portNumber")]
78+
pub port_number: Option<i32>,
79+
}
80+
/// Extension configures an endpoint picker as an extension service.
81+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, PartialEq)]
82+
pub enum ExtensionFailureMode {
83+
FailOpen,
84+
FailClose,
85+
}
86+
/// Status defines the observed state of InferencePool.
87+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
88+
pub struct InferencePoolStatus {
89+
/// Parents is a list of parent resources (usually Gateways) that are
90+
/// associated with the InferencePool, and the status of the InferencePool with respect to
91+
/// each parent.
92+
///
93+
/// A maximum of 32 Gateways will be represented in this list. When the list contains
94+
/// `kind: Status, name: default`, it indicates that the InferencePool is not
95+
/// associated with any Gateway and a controller must perform the following:
96+
///
97+
/// - Remove the parent when setting the "Accepted" condition.
98+
/// - Add the parent when the controller will no longer manage the InferencePool
99+
/// and no other parents exist.
100+
#[serde(default, skip_serializing_if = "Option::is_none")]
101+
pub parent: Option<Vec<InferencePoolParent>>,
102+
}
103+
/// PoolStatus defines the observed state of InferencePool from a Gateway.
104+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
105+
pub struct InferencePoolParent {
106+
/// Conditions track the state of the InferencePool.
107+
///
108+
/// Known condition types are:
109+
///
110+
/// * "Accepted"
111+
/// * "ResolvedRefs"
112+
#[serde(default, skip_serializing_if = "Option::is_none")]
113+
pub conditions: Option<Vec<Condition>>,
114+
/// GatewayRef indicates the gateway that observed state of InferencePool.
115+
#[serde(rename = "parentRef")]
116+
pub parent_ref: ParentRef,
117+
}
118+
/// GatewayRef indicates the gateway that observed state of InferencePool.
119+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
120+
pub struct ParentRef {
121+
/// Group is the group of the referent.
122+
#[serde(default, skip_serializing_if = "Option::is_none")]
123+
pub group: Option<String>,
124+
/// Kind is kind of the referent. For example "Gateway".
125+
#[serde(default, skip_serializing_if = "Option::is_none")]
126+
pub kind: Option<String>,
127+
/// Name is the name of the referent.
128+
pub name: String,
129+
/// Namespace is the namespace of the referent. If not present,
130+
/// the namespace of the referent is assumed to be the same as
131+
/// the namespace of the referring object.
132+
#[serde(default, skip_serializing_if = "Option::is_none")]
133+
pub namespace: Option<String>,
134+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// WARNING: generated file - manual changes will be overriden
2+
3+
pub mod common;
4+
pub mod inferenceobjectives;
5+
pub mod inferencepools;
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// WARNING: generated file - manual changes will be overriden
2+
3+
#[allow(unused_imports)]
4+
mod prelude {
5+
pub use k8s_openapi::apimachinery::pkg::apis::meta::v1::Condition;
6+
pub use kube_derive::CustomResource;
7+
pub use schemars::JsonSchema;
8+
pub use serde::{Deserialize, Serialize};
9+
pub use std::collections::BTreeMap;
10+
}
11+
use self::prelude::*;
12+
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, Default, PartialEq)]
13+
pub struct EndPointPort {
14+
pub number: i32,
15+
}

0 commit comments

Comments
 (0)