lightseekorg · Kangyan-Zhou · Mar 11, 2026 · coderabbitai · Mar 18, 2026 · chatgpt-codex-connector
@@ -34,6 +34,8 @@ bitflags! {
         const AUDIO       = 1 << 10;
         /// Content moderation models
         const MODERATION  = 1 << 11;
+        /// Diffusion models (Stable Diffusion, Flux, etc.)
+        const DIFFUSION   = 1 << 12;
 
         /// Standard LLM: chat + completions + responses + tools
         const LLM = Self::CHAT.bits() | Self::COMPLETIONS.bits()
@@ -62,6 +64,9 @@ bitflags! {
 
         /// Content moderation model only
         const MODERATION_MODEL = Self::MODERATION.bits();
+
+        /// Diffusion model only (Stable Diffusion, Flux, etc.)
+        const DIFFUSION_MODEL = Self::DIFFUSION.bits();
     }
 }
 
@@ -79,6 +84,7 @@ const CAPABILITY_NAMES: &[(ModelType, &str)] = &[
     (ModelType::IMAGE_GEN, "image_gen"),
     (ModelType::AUDIO, "audio"),
     (ModelType::MODERATION, "moderation"),
+    (ModelType::DIFFUSION, "diffusion"),
 ];
 
 impl ModelType {
@@ -154,6 +160,12 @@ impl ModelType {
         self.contains(Self::MODERATION)
     }
 
+    /// Check if this model type supports diffusion (image generation via diffusion)
+    #[inline]
+    pub fn supports_diffusion(self) -> bool {
+        self.contains(Self::DIFFUSION)
+    }
+
     /// Check if this model type supports a given endpoint
     pub fn supports_endpoint(self, endpoint: Endpoint) -> bool {
         match endpoint {
@@ -213,6 +225,12 @@ impl ModelType {
     pub fn is_moderation_model(self) -> bool {
         self.supports_moderation() && !self.supports_chat()
     }
+
+    /// Check if this is a diffusion model
+    #[inline]
+    pub fn is_diffusion_model(self) -> bool {
+        self.supports_diffusion() && !self.supports_chat()
+    }
 }
 
 impl std::fmt::Display for ModelType {
@@ -296,20 +314,12 @@ impl JsonSchema for ModelType {
         use schemars::schema::*;
         let items = SchemaObject {
             instance_type: Some(InstanceType::String.into()),
-            enum_values: Some(vec![
-                "chat".into(),
-                "completions".into(),
-                "responses".into(),
-                "embeddings".into(),
-                "rerank".into(),
-                "generate".into(),
-                "vision".into(),
-                "tools".into(),
-                "reasoning".into(),
-                "image_gen".into(),
-                "audio".into(),
-                "moderation".into(),
-            ]),
+            enum_values: Some(
+                CAPABILITY_NAMES
+                    .iter()
+                    .map(|(_, name)| (*name).into())
+                    .collect(),
+            ),
             ..Default::default()
         };
         SchemaObject {
@@ -406,3 +416,54 @@ impl std::fmt::Display for Endpoint {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_diffusion_flag_basics() {
+        let dt = ModelType::DIFFUSION;
+        assert!(dt.supports_diffusion());
+        assert!(!dt.supports_chat());
+        assert!(!dt.supports_vision());
+    }
+
+    #[test]
+    fn test_diffusion_model_composite() {
+        let dm = ModelType::DIFFUSION_MODEL;
+        assert!(dm.supports_diffusion());
+        assert!(dm.is_diffusion_model());
+        assert!(!dm.is_llm());
+        assert!(!dm.is_image_model());
+    }
+
+    #[test]
+    fn test_diffusion_with_chat_is_not_diffusion_model() {
+        let dt = ModelType::CHAT | ModelType::DIFFUSION;
+        assert!(dt.supports_diffusion());
+        assert!(!dt.is_diffusion_model()); // has chat, so not a pure diffusion model
+        assert!(dt.is_llm());
+    }
+
+    #[test]
+    fn test_diffusion_capability_name() {
+        let dm = ModelType::DIFFUSION_MODEL;
+        let names = dm.as_capability_names();
+        assert_eq!(names, vec!["diffusion"]);
+    }
+
+    #[test]
+    fn test_diffusion_serialization_roundtrip() {
+        let dm = ModelType::DIFFUSION_MODEL;
+        let json = serde_json::to_string(&dm).unwrap();
+        assert_eq!(json, r#"["diffusion"]"#);
+        let deserialized: ModelType = serde_json::from_str(&json).unwrap();
+        assert_eq!(deserialized, dm);
+    }
+
+    #[test]
+    fn test_diffusion_display() {
+        assert_eq!(ModelType::DIFFUSION_MODEL.to_string(), "diffusion");
+    }
+}
@@ -106,6 +106,18 @@ pub fn infer_model_type_from_id(id: &str) -> ModelType {
         return ModelType::RERANK_MODEL;
     }
 
+    // Diffusion models (Stable Diffusion, Flux, SDXL, etc.)
+    // Must be checked before the generic "image" heuristic so that IDs like
+    // "flux-image-*" or "stable-diffusion-image-*" are not misclassified.
+    if id_lower.starts_with("sd-")
+        || id_lower.starts_with("sd3")
+        || id_lower.starts_with("sdxl")
+        || id_lower.starts_with("flux")
+        || id_lower.contains("diffusion")
+    {
+        return ModelType::DIFFUSION_MODEL;
+    }
+
     // Image generation models
     if id_lower.starts_with("dall-e")
         || id_lower.starts_with("sora")
@@ -318,3 +330,55 @@ impl StepExecutor<WorkerWorkflowData> for DiscoverModelsStep {
         true
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_infer_diffusion_models() {
+        assert_eq!(
+            infer_model_type_from_id("stable-diffusion-xl-base-1.0"),
+            ModelType::DIFFUSION_MODEL
+        );
+        assert_eq!(
+            infer_model_type_from_id("stable_diffusion_3"),
+            ModelType::DIFFUSION_MODEL
+        );
+        assert_eq!(
+            infer_model_type_from_id("sd-v1-5"),
+            ModelType::DIFFUSION_MODEL
+        );
+        assert_eq!(
+            infer_model_type_from_id("sd3-medium"),
+            ModelType::DIFFUSION_MODEL
+        );
+        assert_eq!(
+            infer_model_type_from_id("sdxl-turbo"),
+            ModelType::DIFFUSION_MODEL
+        );
+        assert_eq!(
+            infer_model_type_from_id("flux-1-dev"),
+            ModelType::DIFFUSION_MODEL
+        );
+        assert_eq!(
+            infer_model_type_from_id("FLUX-schnell"),
+            ModelType::DIFFUSION_MODEL
+        );
+        assert_eq!(
+            infer_model_type_from_id("my-custom-diffusion-model"),
+            ModelType::DIFFUSION_MODEL
+        );
+    }
+
+    #[test]
+    fn test_infer_non_diffusion_models() {
+        assert_eq!(infer_model_type_from_id("gpt-4o"), ModelType::VISION_LLM);
+        assert_eq!(infer_model_type_from_id("dall-e-3"), ModelType::IMAGE_MODEL);
+        assert_eq!(
+            infer_model_type_from_id("text-embedding-3-small"),
+            ModelType::EMBED_MODEL
+        );
+        assert_eq!(infer_model_type_from_id("llama-3-70b"), ModelType::LLM);
+    }
+}
@@ -231,17 +231,30 @@ fn build_model_card(
         .map(|s| s == "true")
         .unwrap_or(false);
 
-    if !user_provided {
+    // The "model_type" label is currently only reported by SGLang workers
+    // (via the /model_info endpoint for multimodal_gen servers).
+    let is_diffusion = labels
+        .get("model_type")
+        .is_some_and(|s| s.to_lowercase() == "diffusion");
+
+    if user_provided {
+        if has_vision && !card.model_type.supports_vision() {
+            card.model_type |= ModelType::VISION;
+        }
+        if is_diffusion && !card.model_type.supports_diffusion() {
+            card.model_type |= ModelType::DIFFUSION;
+        }
+    } else {
         let is_embedding = labels.get("is_embedding").is_some_and(|s| s == "true");
         let is_non_generation = labels.get("is_generation").is_some_and(|s| s == "false");
 
-        if is_embedding || is_non_generation {
+        if is_diffusion {
+            card.model_type = ModelType::DIFFUSION_MODEL;
+        } else if is_embedding || is_non_generation {
-        if is_diffusion {
-            card.model_type = ModelType::DIFFUSION_MODEL;
-        } else if is_embedding || is_non_generation {
+        if is_diffusion {
+            card.model_type = ModelType::DIFFUSION_MODEL;
+            if has_vision {
+                card.model_type |= ModelType::VISION;
+            }
+        } else if is_embedding || is_non_generation {
-        if is_diffusion {
-            card.model_type = ModelType::DIFFUSION_MODEL;
-        } else if is_embedding || is_non_generation {
+        if is_diffusion {
+            card.model_type = ModelType::DIFFUSION_MODEL;
+            if has_vision {
+                card.model_type |= ModelType::VISION;
+            }
+        } else if is_embedding || is_non_generation {
             card.model_type = infer_non_generation_type(labels);
         } else if has_vision && !card.model_type.supports_vision() {
             card.model_type |= ModelType::VISION;
         }
-    } else if has_vision && !card.model_type.supports_vision() {
-        card.model_type |= ModelType::VISION;
     }
 
     card
@@ -275,3 +288,61 @@ fn normalize_url(url: &str, connection_mode: ConnectionMode) -> String {
         }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use openai_protocol::worker::WorkerSpec;
+
+    use super::*;
+
+    fn default_config() -> WorkerSpec {
+        serde_json::from_str(r#"{"url": "http://localhost:30000"}"#).unwrap()
+    }
+
+    #[test]
+    fn test_build_model_card_diffusion_from_model_type_label() {
+        let config = default_config();
+        let mut labels = HashMap::new();
+        labels.insert("model_type".to_string(), "diffusion".to_string());
+        labels.insert("is_generation".to_string(), "true".to_string());
+
+        let card = build_model_card("stable-diffusion-xl", &config, &labels);
+        assert!(card.model_type.supports_diffusion());
+        assert!(card.model_type.is_diffusion_model());
+    }
+
+    #[test]
+    fn test_build_model_card_diffusion_case_insensitive() {
+        let config = default_config();
+        let mut labels = HashMap::new();
+        labels.insert("model_type".to_string(), "Diffusion".to_string());
+
+        let card = build_model_card("flux-dev", &config, &labels);
+        assert!(card.model_type.supports_diffusion());
+    }
+
+    #[test]
+    fn test_build_model_card_non_diffusion_llm() {
+        let config = default_config();
+        let mut labels = HashMap::new();
+        labels.insert("model_type".to_string(), "llama".to_string());
+        labels.insert("is_generation".to_string(), "true".to_string());
+
+        let card = build_model_card("llama-3-70b", &config, &labels);
+        assert!(!card.model_type.supports_diffusion());
+        assert!(card.model_type.is_llm());
+    }
+
+    #[test]
+    fn test_build_model_card_diffusion_takes_precedence_over_embedding() {
+        let config = default_config();
+        let mut labels = HashMap::new();
+        labels.insert("model_type".to_string(), "diffusion".to_string());
+        labels.insert("is_generation".to_string(), "false".to_string());
+
+        let card = build_model_card("sd-model", &config, &labels);
+        // Diffusion check comes before is_generation check
+        assert!(card.model_type.supports_diffusion());
+        assert!(!card.model_type.supports_embeddings());
+    }
+}