Omni: Add SGLang Diffusion for XPU (#194)

xiangyuT · web-flow · commit 0b117ceffed3 · 2025-12-10T09:24:12.000+08:00
diff --git a/Releases.md b/Releases.md
@@ -68,6 +68,13 @@
 ## LLM-Scaler-Omni
 
 ### Latest Beta Release 
+* `intel/llm-scaler-omni:0.1.0-b4` [12/10/2025]:
+    - More workflows support:
+        - Z-Image-Turbo
+        - Hunyuan-Video-1.5 T2V/I2V with multi-XPU support
+    - Initial support for SGLang Diffusion. 10% perf improvement compared to ComfyUI in 1*B60 scenario.
+
+### Previous Releases
 * `intel/llm-scaler-omni:0.1.0-b3` [11/19/2025]:
     - More workflows support:
         - Hunyuan 3D 2.1
@@ -76,7 +83,6 @@
         - AnimateDiff lightning
     - Add Windows installation
 
-### Previous Releases
 * `intel/llm-scaler-omni:0.1.0-b2` [10/17/2025]:
     - Fix issues:
         - Fix ComfyUI interpolate issue
diff --git a/omni/README.md b/omni/README.md
@@ -6,17 +6,18 @@
 
 1. [Getting Started with Omni Docker Image](#getting-started-with-omni-docker-image)
 2. [ComfyUI](#comfyui)
-3. [XInference](#xinference)
-4. [Stand-alone Examples](#stand-alone-examples)
-5. [ComfyUI for Windows (experimental)](#comfyui-for-windows-experimental)
+3. [SGLang Diffusion](#sglang-diffusion-experimental)
+4. [XInference](#xinference)
+5. [Stand-alone Examples](#stand-alone-examples)
+6. [ComfyUI for Windows (experimental)](#comfyui-for-windows-experimental)
 
 ---
 
 ## Getting Started with Omni Docker Image
 
 Pull docker image from dockerhub:
 ```bash
-docker pull intel/llm-scaler-omni:0.1.0-b3
+docker pull intel/llm-scaler-omni:0.1.0-b4
 ```
 
 Or build docker image:
@@ -282,6 +283,72 @@ This workflow synthesizes new speech using a single reference audio file for voi
 3. **Run the Workflow**
    - Execute the workflow to generate the speech.
 
+## SGLang Diffusion (experimental)
+
+SGLang Diffusion provides OpenAI-compatible API for image/video generation models.
+
+### 1. CLI Generation
+
+```bash
+sglang generate --model-path /llm/models/Wan2.1-T2V-1.3B-Diffusers \
+    --text-encoder-cpu-offload --pin-cpu-memory \
+    --prompt "A curious raccoon" \
+    --save-output
+```
+
+### 2. OpenAI API Server
+
+**Start the server:**
+
+```bash
+# Configure proxy if needed
+export http_proxy=<your_http_proxy>
+export https_proxy=<your_https_proxy>
+export no_proxy=localhost,127.0.0.1
+
+# Start server
+sglang serve --model-path /llm/models/Z-Image-Turbo/ \
+    --vae-cpu-offload --pin-cpu-memory \
+    --num-gpus 1 --port 30010
+```
+
+Or use the provided script:
+
+```bash
+bash /llm/entrypoints/start_sgl_diffusion.sh
+```
+
+**cURL example:**
+
+```bash
+curl http://localhost:30010/v1/images/generations \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Z-Image-Turbo",
+    "prompt": "A beautiful sunset over the ocean",
+    "size": "1024x1024"
+  }'
+```
+
+**Python example (OpenAI SDK):**
+
+```python
+from openai import OpenAI
+import base64
+
+client = OpenAI(base_url="http://localhost:30010/v1", api_key="EMPTY")
+
+response = client.images.generate(
+    model="Z-Image-Turbo",
+    prompt="A beautiful sunset over the ocean",
+    size="1024x1024",
+)
+
+# Save image from base64 response
+with open("output.png", "wb") as f:
+    f.write(base64.b64decode(response.data[0].b64_json))
+```
+
 ## XInference
 
 ```bash
diff --git a/omni/build.sh b/omni/build.sh
@@ -3,4 +3,4 @@ set -x
 export HTTP_PROXY=<your_http_proxy>
 export HTTPS_PROXY=<your_https_proxy>
 
-docker build -f ./docker/Dockerfile . -t intel/llm-scaler-omni:0.1.0-b3 --build-arg https_proxy=$HTTPS_PROXY --build-arg http_proxy=$HTTP_PROXY
+docker build -f ./docker/Dockerfile . -t intel/llm-scaler-omni:0.1.0-b4 --build-arg https_proxy=$HTTPS_PROXY --build-arg http_proxy=$HTTP_PROXY
diff --git a/omni/docker/Dockerfile b/omni/docker/Dockerfile
@@ -15,6 +15,7 @@ COPY ./patches/xinference_device_utils.patch /tmp/
 COPY ./patches/comfyui_for_multi_arc.patch /tmp/
 COPY ./patches/comfyui_voxcpm_for_xpu.patch /tmp/
 COPY ./patches/comfyui_hunyuan3d_for_xpu.patch /tmp/
+COPY ./patches/sglang_diffusion_for_multi_arc.patch /tmp/
 
 
 # Add Intel oneAPI repo and PPA for GPU support
@@ -86,24 +87,26 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
     git apply /tmp/comfyui_voxcpm_for_xpu.patch && \
     pip install -r requirements.txt && \
     cd .. && \
-    git clone https://github.com/visualbruno/ComfyUI-Hunyuan3d-2-1.git && \
-    cd ComfyUI-Hunyuan3d-2-1 && \
-    git checkout 9d7ef32509101495a7840b3ae8e718c8d1183305 && \
-    git apply /tmp/comfyui_hunyuan3d_for_xpu.patch && \
-    pip install bigdl-core==2.4.0b1 rembg realesrgan && \
-    pip install -r requirements.txt && \
-    cd hy3dpaint/custom_rasterizer && \
-    python setup.py install && \
-    cd ../DifferentiableRenderer && \
-    python setup.py install && \
-    cd /llm/ComfyUI/custom_nodes && \
     git clone https://github.com/billwuhao/ComfyUI_IndexTTS.git && \
     cd ComfyUI_IndexTTS && \
     pip install -r requirements.txt && \
 # Install Xinference
     pip install "xinference[transformers]" && \
     patch /usr/local/lib/python3.10/dist-packages/xinference/device_utils.py < /tmp/xinference_device_utils.patch && \
     pip install kokoro Jinja2==3.1.6 jieba ordered-set pypinyin cn2an pypinyin-dict && \
+# Install SGlang Diffusion
+    cd /llm && \
+    git clone https://github.com/sgl-project/sglang.git && \
+    cd sglang && \
+    git checkout 236a7c237002250b148c79bd93780d870b8b50d2 && \
+    git apply /tmp/sglang_diffusion_for_multi_arc.patch && \
+    pip install -e "python[diffusion]" && \
+    pip install triton==3.5.0 && \
+    pip install pytorch-triton-xpu==3.5.0 --index-url https://download.pytorch.org/whl/xpu --force-reinstall && \
+    cd /llm && \
+    git clone https://github.com/sgl-project/sgl-kernel-xpu.git && \
+    cd sgl-kernel-xpu && \
+    pip install -v . && \
     # Clean
     rm -rf /tmp/*
 RUN cd /llm/ComfyUI/custom_nodes && \
@@ -114,5 +117,8 @@ RUN cd /llm/ComfyUI/custom_nodes && \
 COPY ./workflows/* /llm/ComfyUI/user/default/workflows/
 COPY ./example_inputs/* /llm/ComfyUI/input/
 COPY ./tools/* /llm/tools/
+COPY ./entrypoints/* /llm/entrypoints/
+
+RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
 
-WORKDIR /llm/ComfyUI
+WORKDIR /llm/entrypoints
diff --git a/omni/entrypoints/start_comfyui.sh b/omni/entrypoints/start_comfyui.sh
@@ -0,0 +1,5 @@
+export http_proxy=<your_http_proxy>
+export https_proxy=<your_https_proxy>
+export no_proxy=localhost,127.0.0.1
+
+python /llm/ComfyUI/main.py --listen 0.0.0.0 --port 8188
diff --git a/omni/entrypoints/start_sgl_diffusion.sh b/omni/entrypoints/start_sgl_diffusion.sh
@@ -0,0 +1,17 @@
+export http_proxy=<your_http_proxy>
+export https_proxy=<your_https_proxy>
+export no_proxy=localhost,127.0.0.1
+
+export model="/llm/models/Z-Image-Turbo/"
+
+SERVER_ARGS=(
+  --model-path $model
+  --vae-cpu-offload
+  --pin-cpu-memory
+  --num-gpus 1
+  --ulysses-degree=1
+  --ring-degree=1
+  --port 30010
+)
+
+sglang serve "${SERVER_ARGS[@]}" 2>&1 | tee sglang.log
diff --git a/omni/patches/sglang_diffusion_for_multi_arc.patch b/omni/patches/sglang_diffusion_for_multi_arc.patch
diff --git a/omni/patches/yunchang_for_multi_arc.patch b/omni/patches/yunchang_for_multi_arc.patch
diff --git a/omni/tools/install_hy3d_comfyui.sh b/omni/tools/install_hy3d_comfyui.sh