diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..4c42a02
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,108 @@
+# Dockerfile now located in the project root directory
+
+# Use an official Python runtime as a parent image
+FROM python:3.10-slim
+
+# Add parameter for PyTorch version with a default empty value
+ARG TORCH_VERSION=""
+
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Install system dependencies for eSpeak and other requirements (removed git)
+RUN apt-get update && apt-get install -y \
+    espeak-ng \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy application code from the repository root (build context) to /app
+# This command now copies the root requirements.txt directly into /app
+COPY . .
+
+# Install Rust compiler if needed for better cross-compiling support
+#RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+#ENV PATH="/root/.cargo/bin:${PATH}"
+
+
+# --- PyTorch Installation Logic (Relies on the root requirements.txt copied to /app) ---
+
+# Extract torch-related versions from the root requirements.txt (now at /app/requirements.txt)
+RUN TORCH_VERSION_REQ=$(grep -E "^torch==" requirements.txt | cut -d'=' -f3 || echo "") && \
+    TORCHAUDIO_VERSION_REQ=$(grep -E "^torchaudio==" requirements.txt | cut -d'=' -f3 || echo "") && \
+    TORCHVISION_VERSION_REQ=$(grep -E "^torchvision==" requirements.txt | cut -d'=' -f3 || echo "") && \
+    TORCHMETRICS_VERSION_REQ=$(grep -E "^torchmetrics==" requirements.txt | cut -d'=' -f3 || echo "") && \
+    echo "Found in requirements: torch==$TORCH_VERSION_REQ, torchaudio==$TORCHAUDIO_VERSION_REQ, torchvision==$TORCHVISION_VERSION_REQ, torchmetrics==$TORCHMETRICS_VERSION_REQ"
+
+# Install PyTorch and related packages based on TORCH_VERSION build-arg
+RUN if [ ! -z "$TORCH_VERSION" ]; then \
+        # Check if we need to use specific versions from requirements.txt or get the latest versions
+        if [ ! -z "$TORCH_VERSION_REQ" ] && [ ! -z "$TORCHVISION_VERSION_REQ" ] && [ ! -z "$TORCHAUDIO_VERSION_REQ" ] && [ ! -z "$TORCHMETRICS_VERSION_REQ" ]; then \
+            echo "Using specific versions from requirements.txt" && \
+            TORCH_SPEC="torch==${TORCH_VERSION_REQ}" && \
+            TORCHVISION_SPEC="torchvision==${TORCHVISION_VERSION_REQ}" && \
+            TORCHAUDIO_SPEC="torchaudio==${TORCHAUDIO_VERSION_REQ}" && \
+            TORCHMETRICS_SPEC="torchmetrics==${TORCHMETRICS_VERSION_REQ}"; \
+        else \
+            echo "Using latest versions for the selected variant" && \
+            TORCH_SPEC="torch" && \
+            TORCHVISION_SPEC="torchvision" && \
+            TORCHAUDIO_SPEC="torchaudio" && \
+            TORCHMETRICS_SPEC="torchmetrics"; \
+        fi && \
+        \
+        case "$TORCH_VERSION" in \
+            "cuda12") \
+                pip install --no-cache-dir $TORCH_SPEC $TORCHVISION_SPEC $TORCHAUDIO_SPEC $TORCHMETRICS_SPEC --extra-index-url https://download.pytorch.org/whl/cu121 \
+                ;; \
+            "cuda128") \
+                pip install --no-cache-dir $TORCH_SPEC $TORCHVISION_SPEC $TORCHAUDIO_SPEC $TORCHMETRICS_SPEC --extra-index-url https://download.pytorch.org/whl/nightly/cu128 \
+                ;; \
+            "cuda11") \
+                pip install --no-cache-dir $TORCH_SPEC $TORCHVISION_SPEC $TORCHAUDIO_SPEC $TORCHMETRICS_SPEC --extra-index-url https://download.pytorch.org/whl/cu118 \
+                ;; \
+            "rocm") \
+                pip install --no-cache-dir $TORCH_SPEC $TORCHVISION_SPEC $TORCHAUDIO_SPEC $TORCHMETRICS_SPEC --extra-index-url https://download.pytorch.org/whl/rocm6.2 \
+                ;; \
+            "xpu") \
+                pip install --no-cache-dir $TORCH_SPEC $TORCHVISION_SPEC $TORCHAUDIO_SPEC $TORCHMETRICS_SPEC && \
+                pip install --no-cache-dir intel-extension-for-pytorch --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ \
+                ;; \
+            "cpu") \
+                pip install --no-cache-dir $TORCH_SPEC $TORCHVISION_SPEC $TORCHAUDIO_SPEC $TORCHMETRICS_SPEC --extra-index-url https://download.pytorch.org/whl/cpu \
+                ;; \
+            *) \
+                pip install --no-cache-dir $TORCH_VERSION \
+                ;; \
+        esac && \
+        # Install remaining requirements, skipping lines for all forced torch packages
+        # This reads the root requirements.txt (at /app/requirements.txt)
+        echo "Installing remaining dependencies from requirements.txt..." && \
+        grep -v -E "^torch==|^torchvision==|^torchaudio==|^torchmetrics==" requirements.txt > requirements_no_torch.txt && \
+        if [ -s requirements_no_torch.txt ]; then \
+             pip install --no-cache-dir --upgrade -r requirements_no_torch.txt; \
+        else \
+             echo "No remaining dependencies to install."; \
+        fi && \
+        rm requirements_no_torch.txt; \
+    else \
+        # Install all requirements as specified if no specific TORCH_VERSION is provided
+        # This reads the root requirements.txt (at /app/requirements.txt)
+        echo "TORCH_VERSION not specified, installing all dependencies from requirements.txt..." && \
+        pip install --no-cache-dir --upgrade -r requirements.txt; \
+    fi
+
+# --- End PyTorch Installation Logic ---
+
+# Set environment variables for eSpeak (if needed)
+ENV PHONEMIZER_ESPEAK_LIBRARY=/usr/lib/x86_64-linux-gnu/libespeak-ng.so.1
+ENV PHONEMIZER_ESPEAK_PATH=/usr/bin
+
+# Expose any necessary ports (if applicable)
+EXPOSE 8000
+
+# Create a volume for input/output files
+VOLUME ["/app/input", "/app/output"]
+
+# Set the default command to run when starting the container
+# You might want to modify this based on specific inference scripts
+CMD ["bash"]
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..2c64815
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,30 @@
+version: '3.8'
+services:
+  diffrhythm:
+    build:
+      # Context remains '..' because docker-compose is likely run from the 'docker' dir,
+      # and the build context (where source files are) IS the parent directory (project root).
+      context: ..
+      # Dockerfile path is now relative to the context ('..').
+      # Since the Dockerfile is directly in the root, the path is just 'Dockerfile'.
+      dockerfile: Dockerfile
+      # args:
+      #  TORCH_VERSION: cuda12 # TORCH_VERSION Options = cuda12, cuda128, cuda11, rocm, xpu, cpu
+    image: diffrhythm
+    container_name: diffrhythm
+#    volumes:
+#      - ./output:/app/output # Example: Mount output relative to docker-compose file location
+#      - ../infer:/app/infer  # Example: Mount infer relative to project root
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    stdin_open: true
+    tty: true
+    ports:
+      - 8000:8000
+    # Keep container running
+    command: ["/bin/bash", "-c", "tail -f /dev/null"]
diff --git a/docker/Dockerfile b/docker/Dockerfile
deleted file mode 100644
index 7351db4..0000000
--- a/docker/Dockerfile
+++ /dev/null
@@ -1,34 +0,0 @@
-# Use an official Python runtime as a parent image
-FROM python:3.10-slim
-
-# Set the working directory in the container
-WORKDIR /app
-
-# Install system dependencies for eSpeak and other requirements
-RUN apt-get update && apt-get install -y \
-    git \
-    espeak-ng \
-    && rm -rf /var/lib/apt/lists/*
-
-# Clone the DiffRhythm repository
-RUN git clone https://github.com/ASLP-lab/DiffRhythm.git .
-
-# Copy the requirements file into the container
-COPY requirements.txt .
-
-# Install Python dependencies
-RUN pip install --no-cache-dir -r requirements.txt
-
-# Set environment variables for eSpeak (if needed)
-ENV PHONEMIZER_ESPEAK_LIBRARY=/usr/lib/x86_64-linux-gnu/libespeak-ng.so.1
-ENV PHONEMIZER_ESPEAK_PATH=/usr/bin
-
-# Expose any necessary ports (if applicable)
-# EXPOSE 8000
-
-# Create a volume for input/output files
-VOLUME ["/app/input", "/app/output"]
-
-# Set the default command to run when starting the container
-# You might want to modify this based on specific inference scripts
-CMD ["bash"]
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
deleted file mode 100644
index 089036a..0000000
--- a/docker/docker-compose.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-version: '3.8'
-services:
-  diffrhythm:
-    build: 
-      context: .
-      dockerfile: Dockerfile
-    image: diffrhythm
-    container_name: diffrhythm
-#    volumes:
-#      - ./scripts:/app/scripts
-#      - ./output:/app/output
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
-    stdin_open: true
-    tty: true
-    command: ["/bin/bash", "-c", "tail -f /dev/null"]
diff --git a/docker/requirements.txt b/docker/requirements.txt
deleted file mode 100644
index a974756..0000000
--- a/docker/requirements.txt
+++ /dev/null
@@ -1,157 +0,0 @@
-accelerate==1.4.0
-aiofiles==23.2.1
-aiohappyeyeballs==2.6.1
-aiohttp==3.11.14
-aiosignal==1.3.2
-annotated-types==0.7.0
-anyio==4.9.0
-attrs==25.3.0
-audioread==3.0.1
-babel==2.17.0
-beartype==0.20.2
-bitsandbytes==0.45.3
-certifi==2025.1.31
-cffi==1.17.1
-charset-normalizer==3.4.1
-click==8.1.8
-cn2an==0.5.23
-colorama==0.4.6
-coloredlogs==15.0.1
-configparser==7.2.0
-csvw==3.5.1
-decorator==5.2.1
-Deprecated==1.2.18
-dlinfo==2.0.0
-docker-pycreds==0.4.0
-easydict==1.13
-einops==0.8.1
-einx==0.3.0
-ema-pytorch==0.7.7
-fastapi==0.115.12
-ffmpy==0.5.0
-filelock==3.18.0
-flatbuffers==25.2.10
-frozendict==2.4.6
-frozenlist==1.5.0
-fsspec==2025.3.0
-ftfy==6.3.1
-gin-config==0.5.0
-gitdb==4.0.12
-GitPython==3.1.44
-gradio==5.22.0
-gradio_client==1.8.0
-groovy==0.1.2
-h11==0.14.0
-httpcore==1.0.7
-httpx==0.28.1
-huggingface-hub==0.29.3
-humanfriendly==10.0
-idna==3.10
-inflect==7.5.0
-isodate==0.7.2
-jaconv==0.4.0
-jieba==0.42.1
-Jinja2==3.1.6
-joblib==1.4.2
-jsonschema==4.23.0
-jsonschema-specifications==2024.10.1
-language-tags==1.2.0
-lazy_loader==0.4
-librosa==0.10.2.post1
-lightning-utilities==0.14.2
-llvmlite==0.44.0
-loguru==0.7.3
-markdown-it-py==3.0.0
-MarkupSafe==3.0.2
-mdurl==0.1.2
-more-itertools==10.6.0
-mpmath==1.3.0
-msgpack==1.1.0
-multidict==6.2.0
-muq==0.1.0
-mutagen==1.47.0
-networkx==3.4.2
-nnAudio==0.3.3
-numba==0.61.0
-numpy==2.1.3
-onnxruntime-gpu==1.21.0
-orjson==3.10.16
-packaging==24.2
-pandas==2.2.3
-phonemizer==3.3.0
-pillow==11.1.0
-platformdirs==4.3.7
-pooch==1.8.2
-prefigure==0.0.10
-proces==0.1.7
-propcache==0.3.0
-protobuf==5.29.4
-psutil==7.0.0
-py3langid==0.3.0
-pyarrow==19.0.1
-pycparser==2.22
-pydantic==2.10.6
-pydantic_core==2.27.2
-pydub==0.25.1
-Pygments==2.19.1
-pykakasi==2.3.0
-pylance==0.23.2
-pyparsing==3.2.2
-pypinyin==0.53.0
-pyreadline3==3.5.4
-python-dateutil==2.9.0.post0
-python-multipart==0.0.20
-pytorch-lightning==2.5.1
-pytz==2025.1
-PyYAML==6.0.2
-rdflib==7.1.3
-referencing==0.36.2
-regex==2024.11.6
-requests==2.32.3
-rfc3986==1.5.0
-rich==13.9.4
-rpds-py==0.23.1
-ruff==0.11.2
-safehttpx==0.1.6
-safetensors==0.5.3
-scikit-learn==1.6.1
-scipy==1.15.2
-segments==2.3.0
-semantic-version==2.10.0
-sentry-sdk==2.24.1
-setproctitle==1.3.5
-setuptools==78.0.1
-shellingham==1.5.4
-six==1.17.0
-smmap==5.0.2
-sniffio==1.3.1
-soundfile==0.13.1
-soxr==0.5.0.post1
-starlette==0.46.1
-sympy==1.13.1
-threadpoolctl==3.6.0
-tokenizers==0.21.1
-tomlkit==0.13.2
-torch==2.6.0
-torchaudio==2.6.0
-torchdiffeq==0.2.5
-torchmetrics==1.7.0
-torchvision==0.21.0
-tqdm==4.67.1
-transformers==4.49.0
-typeguard==4.4.2
-typer==0.15.2
-typing_extensions==4.12.2
-tzdata==2025.2
-Unidecode==1.3.8
-uritemplate==4.1.1
-urllib3==2.3.0
-uvicorn==0.34.0
-wandb==0.19.8
-wcwidth==0.2.13
-websockets==15.0.1
-win32_setctime==1.2.0
-wrapt==1.17.2
-x-clip==0.14.4
-x-transformers==2.1.2
-yarl==1.18.3
diff --git a/gradio/app.py b/gradio/app.py
new file mode 100644
index 0000000..b6aece2
--- /dev/null
+++ b/gradio/app.py
@@ -0,0 +1,458 @@
+import gradio as gr
+from openai import OpenAI
+import requests
+import json
+# from volcenginesdkarkruntime import Ark
+import torch
+import torchaudio
+from einops import rearrange
+import argparse
+import json
+import os
+import spaces
+from tqdm import tqdm
+import random
+import numpy as np
+import sys
+import base64
+
+
+# Only for the infer module import
+current_dir = os.path.dirname(os.path.abspath(__file__))
+project_root = os.path.dirname(current_dir)  # Go up one level
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+
+# Now your import should work
+from infer.infer_utils import (
+    get_reference_latent,
+    get_lrc_token,
+    get_audio_style_prompt,
+    get_text_style_prompt,
+    prepare_model,
+    get_negative_style_prompt
+)
+
+from infer.infer import inference
+
+MAX_SEED = np.iinfo(np.int32).max
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(device)
+
+
+# --- CORRECTED MODEL LOADING ---
+MAX_SEED = np.iinfo(np.int32).max
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(f"Using device: {device}")
+
+# Define the max_frames for each model duration
+max_frames_short = 2048  # Corresponds to 95s
+max_frames_long = 6144   # Corresponds to 285s
+
+# Load the standard model (95s) and common components
+print("Loading base model (95s) and components...")
+cfm, tokenizer, muq, vae = prepare_model(max_frames=max_frames_short, device=device)
+print("Compiling base model...")
+cfm = torch.compile(cfm)
+
+# Load only the full CFM model (285s) - reuse other components
+print("Loading full model (285s)...")
+# We only need the first return value (the cfm model) from this call
+cfm_full, _, _, _ = prepare_model(max_frames=max_frames_long, device=device)
+print("Compiling full model...")
+cfm_full = torch.compile(cfm_full)
+
+print("Models loaded and compiled.")
+# --- END OF CORRECTED LOADING ---
+
+
+#cfm, cfm_full, tokenizer, muq, vae = prepare_model(device)
+#cfm = torch.compile(cfm)
+#cfm_full = torch.compile(cfm_full)
+
+
+@spaces.GPU(duration=40)
+def infer_music(lrc, ref_audio_path, text_prompt, current_prompt_type, seed=42, randomize_seed=False, steps=32, cfg_strength=4.0, file_type='wav', odeint_method='euler', Music_Duration='95s', device='cuda'):
+    if Music_Duration == '95s':
+        max_frames = 2048
+        cfm_model = cfm
+    else:
+        max_frames = 6144
+        cfm_model = cfm_full
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    torch.manual_seed(seed)
+    sway_sampling_coef = -1 if steps < 32 else None
+    vocal_flag = False
+    try:
+        lrc_prompt, start_time = get_lrc_token(max_frames, lrc, tokenizer, device)
+        if current_prompt_type == 'audio':
+            style_prompt, vocal_flag = get_audio_style_prompt(muq, ref_audio_path)
+        else:
+            style_prompt = get_text_style_prompt(muq, text_prompt)
+    except Exception as e:
+        raise gr.Error(f"Error: {str(e)}")
+    negative_style_prompt = get_negative_style_prompt(device)
+    latent_prompt = get_reference_latent(device, max_frames)
+    generated_song = inference(cfm_model=cfm_model, 
+                               vae_model=vae, 
+                               cond=latent_prompt, 
+                               text=lrc_prompt, 
+                               duration=max_frames, 
+                               style_prompt=style_prompt,
+                               negative_style_prompt=negative_style_prompt,
+                               steps=steps,
+                               cfg_strength=cfg_strength,
+                               sway_sampling_coef=sway_sampling_coef,
+                               start_time=start_time,
+                               file_type=file_type,
+                               vocal_flag=vocal_flag,
+                               odeint_method=odeint_method,
+                               )
+    return generated_song
+
+def R1_infer1(theme, tags_gen, language):
+    try:
+        client = OpenAI(api_key=os.getenv('HS_DP_API'), base_url = "https://ark.cn-beijing.volces.com/api/v3")
+
+        llm_prompt = """
+        请围绕"{theme}"主题生成一首符合"{tags}"风格的语言为{language}的完整歌词。严格遵循以下要求：
+
+        ### **强制格式规则**
+        1. **仅输出时间戳和歌词**，禁止任何括号、旁白、段落标记（如副歌、间奏、尾奏等注释）。
+        2. 每行格式必须为 `[mm:ss.xx]歌词内容`，时间戳与歌词间无空格，歌词内容需完整连贯。
+        3. 时间戳需自然分布，**第一句歌词起始时间不得为 [00:00.00]**，需考虑前奏空白。
+
+        ### **内容与结构要求**
+        1. 歌词应富有变化，使情绪递进，整体连贯有层次感。**每行歌词长度应自然变化**，切勿长度一致，导致很格式化。
+        2. **时间戳分配应根据歌曲的标签、歌词的情感、节奏来合理推测**，而非机械地按照歌词长度分配。
+        3. 间奏/尾奏仅通过时间空白体现（如从 [02:30.00] 直接跳至 [02:50.00]），**无需文字描述**。
+
+        ### **负面示例（禁止出现）**
+        - 错误：[01:30.00](钢琴间奏)
+        - 错误：[02:00.00][副歌]
+        - 错误：空行、换行符、注释
+        """
+
+        response = client.chat.completions.create(
+            model="ep-20250304144033-nr9wl",
+            messages=[
+                {"role": "system", "content": "You are a professional musician who has been invited to make music-related comments."},
+                {"role": "user", "content": llm_prompt.format(theme=theme, tags=tags_gen, language=language)},
+            ],
+            stream=False
+        )
+        
+        info = response.choices[0].message.content
+
+        return info
+
+    except requests.exceptions.RequestException as e:
+        print(f'请求出错: {e}')
+        return {}
+
+
+
+def R1_infer2(tags_lyrics, lyrics_input):
+    client = OpenAI(api_key=os.getenv('HS_DP_API'), base_url = "https://ark.cn-beijing.volces.com/api/v3")
+
+    llm_prompt = """
+    {lyrics_input}这是一首歌的歌词,每一行是一句歌词,{tags_lyrics}是我希望这首歌的风格，我现在想要给这首歌的每一句歌词打时间戳得到LRC，我希望时间戳分配应根据歌曲的标签、歌词的情感、节奏来合理推测，而非机械地按照歌词长度分配。第一句歌词的时间戳应考虑前奏长度，避免歌词从 `[00:00.00]` 直接开始。严格按照 LRC 格式输出歌词，每行格式为 `[mm:ss.xx]歌词内容`。最后的结果只输出LRC,不需要其他的解释。
+    """
+
+    response = client.chat.completions.create(
+        model="ep-20250304144033-nr9wl",
+        messages=[
+            {"role": "system", "content": "You are a professional musician who has been invited to make music-related comments."},
+            {"role": "user", "content": llm_prompt.format(lyrics_input=lyrics_input, tags_lyrics=tags_lyrics)},
+        ],
+        stream=False
+    )
+
+    info = response.choices[0].message.content
+
+    return info
+
+css = """
+/* 固定文本域高度并强制滚动条 */
+.lyrics-scroll-box textarea {
+    height: 405px !important;  /* 固定高度 */
+    max-height: 500px !important;  /* 最大高度 */
+    overflow-y: auto !important;  /* 垂直滚动 */
+    white-space: pre-wrap;  /* 保留换行 */
+    line-height: 1.5;  /* 行高优化 */
+}
+
+.gr-examples {
+    background: transparent !important;
+    border: 1px solid #e0e0e0 !important;
+    border-radius: 8px;
+    margin: 1rem 0 !important;
+    padding: 1rem !important;
+}
+
+"""
+
+
+with gr.Blocks(css=css) as demo:
+    gr.HTML(f"""
+            <div style="display: flex; align-items: center;">
+                <img src='https://raw.githubusercontent.com/ASLP-lab/DiffRhythm/refs/heads/main/src/DiffRhythm_logo.jpg' 
+                    style='width: 200px; height: 40%; display: block; margin: 0 auto 20px;'>
+            </div>
+            
+            <div style="flex: 1; text-align: center;">
+                <div style="font-size: 2em; font-weight: bold; text-align: center; margin-bottom: 5px">
+                    Di♪♪Rhythm (谛韵)
+                </div>
+                <div style="display:flex; justify-content: center; column-gap:4px;">
+                    <a href="https://arxiv.org/abs/2503.01183">
+                        <img src='https://img.shields.io/badge/Arxiv-Paper-blue'>
+                    </a> 
+                    <a href="https://github.com/ASLP-lab/DiffRhythm">
+                        <img src='https://img.shields.io/badge/GitHub-Repo-green'>
+                    </a> 
+                    <a href="https://aslp-lab.github.io/DiffRhythm.github.io/">
+                        <img src='https://img.shields.io/badge/Project-Page-brown'>
+                    </a>
+                </div>
+            </div> 
+            """)
+    
+    with gr.Tabs() as tabs:
+        
+        # page 1
+        with gr.Tab("Music Generate", id=0):
+            with gr.Row():
+                with gr.Column():
+                    lrc = gr.Textbox(
+                        label="Lyrics",
+                        placeholder="Input the full lyrics",
+                        lines=12,
+                        max_lines=50,
+                        elem_classes="lyrics-scroll-box",
+                        value="""[00:04.34]Tell me that I'm special\n[00:06.57]Tell me I look pretty\n[00:08.46]Tell me I'm a little angel\n[00:10.58]Sweetheart of your city\n[00:13.64]Say what I'm dying to hear\n[00:17.35]Cause I'm dying to hear you\n[00:20.86]Tell me I'm that new thing\n[00:22.93]Tell me that I'm relevant\n[00:24.96]Tell me that I got a big heart\n[00:27.04]Then back it up with evidence\n[00:29.94]I need it and I don't know why\n[00:34.28]This late at night\n[00:36.32]Isn't it lonely\n[00:39.24]I'd do anything to make you want me\n[00:43.40]I'd give it all up if you told me\n[00:47.42]That I'd be\n[00:49.43]The number one girl in your eyes\n[00:52.85]Your one and only\n[00:55.74]So what's it gon' take for you to want me\n[00:59.78]I'd give it all up if you told me\n[01:03.89]That I'd be\n[01:05.94]The number one girl in your eyes\n[01:11.34]Tell me I'm going real big places\n[01:14.32]Down to earth so friendly\n[01:16.30]And even through all the phases\n[01:18.46]Tell me you accept me\n[01:21.56]Well that's all I'm dying to hear\n[01:25.30]Yeah I'm dying to hear you\n[01:28.91]Tell me that you need me\n[01:30.85]Tell me that I'm loved\n[01:32.90]Tell me that I'm worth it\n[01:34.95]And that I'm enough\n[01:37.91]I need it and I don't know why\n[01:42.08]This late at night\n[01:44.24]Isn't it lonely\n[01:47.18]I'd do anything to make you want me\n[01:51.30]I'd give it all up if you told me\n[01:55.32]That I'd be\n[01:57.35]The number one girl in your eyes\n[02:00.72]Your one and only\n[02:03.57]So what's it gon' take for you to want me\n[02:07.78]I'd give it all up if you told me\n[02:11.74]That I'd be\n[02:13.86]The number one girl in your eyes\n[02:17.03]The girl in your eyes\n[02:21.05]The girl in your eyes\n[02:26.30]Tell me I'm the number one girl\n[02:28.44]I'm the number one girl in your eyes\n[02:33.49]The girl in your eyes\n[02:37.58]The girl in your eyes\n[02:42.74]Tell me I'm the number one girl\n[02:44.88]I'm the number one girl in your eyes\n[02:49.91]Well isn't it lonely\n[02:53.19]I'd do anything to make you want me\n[02:57.10]I'd give it all up if you told me\n[03:01.15]That I'd be\n[03:03.31]The number one girl in your eyes\n[03:06.57]Your one and only\n[03:09.42]So what's it gon' take for you to want me\n[03:13.50]I'd give it all up if you told me\n[03:17.56]That I'd be\n[03:19.66]The number one girl in your eyes\n[03:25.74]The number one girl in your eyes"""    
+                    )
+                    
+                    current_prompt_type = gr.State(value="audio")
+                    with gr.Tabs() as inside_tabs:
+                        with gr.Tab("Audio Prompt"):
+                            audio_prompt = gr.Audio(label="Audio Prompt", type="filepath", value="./src/prompt/default.wav")
+                        with gr.Tab("Text Prompt"):
+                            text_prompt = gr.Textbox(
+                            label="Text Prompt",
+                            placeholder="Enter the Text Prompt, eg: emotional piano pop",
+                            )
+                        def update_prompt_type(evt: gr.SelectData):
+                            return "audio" if evt.index == 0 else "text"
+
+                        inside_tabs.select(
+                            fn=update_prompt_type,
+                            outputs=current_prompt_type
+                        )
+                    
+                with gr.Column():
+                    with gr.Accordion("Best Practices Guide", open=True):
+                        gr.Markdown("""
+1. **Lyrics Format Requirements**
+    - Each line must follow: `[mm:ss.xx]Lyric content`
+    - Example of valid format:
+    ``` 
+    [00:10.00]Moonlight spills through broken blinds
+    [00:13.20]Your shadow dances on the dashboard shrine
+    ```
+
+2. **Audio Prompt Requirements**
+    - Reference audio should be ≥ 1 second, audio >10 seconds will be randomly clipped into 10 seconds
+    - For optimal results, the 10-second clips should be carefully selected
+    - Shorter clips may lead to incoherent generation
+3. **Supported Languages**
+    - **Chinese and English**
+    - More languages comming soon
+
+4. **Others** 
+    - If loading audio result is slow, you can select Output Format as mp3 in Advanced Settings.                                
+    
+                        """)
+                    Music_Duration = gr.Radio(["95s", "285s"], label="Music Duration", value="95s")
+                    
+                    lyrics_btn = gr.Button("Generate", variant="primary")
+                    audio_output = gr.Audio(label="Audio Result", type="filepath", elem_id="audio_output")
+                    with gr.Accordion("Advanced Settings", open=False):
+                        seed = gr.Slider(
+                            label="Seed",
+                            minimum=0,
+                            maximum=MAX_SEED,
+                            step=1,
+                            value=0,
+                        )
+                        randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+                        
+                        steps = gr.Slider(
+                                    minimum=10,
+                                    maximum=100,
+                                    value=32,
+                                    step=1,
+                                    label="Diffusion Steps",
+                                    interactive=True,
+                                    elem_id="step_slider"
+                                )
+                        cfg_strength = gr.Slider(
+                                    minimum=1,
+                                    maximum=10,
+                                    value=4.0,
+                                    step=0.5,
+                                    label="CFG Strength",
+                                    interactive=True,
+                                    elem_id="step_slider"
+                                )
+                        odeint_method = gr.Radio(["euler", "midpoint", "rk4","implicit_adams"], label="ODE Solver", value="euler")                        
+                        file_type = gr.Dropdown(["wav", "mp3", "ogg"], label="Output Format", value="wav")
+
+
+            gr.Examples(
+                examples=[
+                    ["./src/prompt/pop_cn.wav"], 
+                    ["./src/prompt/pop_en.wav"], 
+                    ["./src/prompt/rock_cn.wav"], 
+                    ["./src/prompt/rock_en.wav"], 
+                    ["./src/prompt/country_cn.wav"], 
+                    ["./src/prompt/country_en.wav"],
+                    ["./src/prompt/classic_cn.wav"],
+                    ["./src/prompt/classic_en.wav"],
+                    ["./src/prompt/jazz_cn.wav"],
+                    ["./src/prompt/jazz_en.wav"],
+                    ["./src/prompt/rap_cn.wav"],
+                    ["./src/prompt/rap_en.wav"],
+                    ["./src/prompt/default.wav"]
+                ],
+                inputs=[audio_prompt],  
+                label="Audio Examples",
+                examples_per_page=13,
+                elem_id="audio-examples-container" 
+            )
+            
+            gr.Examples(
+                examples=[
+                    ["Pop Emotional Piano"],
+                    ["流行 情感 钢琴"],
+                    ["Indie folk ballad, coming-of-age themes, acoustic guitar picking with harmonica interludes"],
+                    ["独立民谣, 成长主题, 原声吉他弹奏与口琴间奏"]
+                ],
+                inputs=[text_prompt],  
+                label="Text Examples",
+                examples_per_page=4,
+                elem_id="text-examples-container" 
+            )
+
+            gr.Examples(
+                examples=[
+                    ["""[00:04.34]Tell me that I'm special\n[00:06.57]Tell me I look pretty\n[00:08.46]Tell me I'm a little angel\n[00:10.58]Sweetheart of your city\n[00:13.64]Say what I'm dying to hear\n[00:17.35]Cause I'm dying to hear you\n[00:20.86]Tell me I'm that new thing\n[00:22.93]Tell me that I'm relevant\n[00:24.96]Tell me that I got a big heart\n[00:27.04]Then back it up with evidence\n[00:29.94]I need it and I don't know why\n[00:34.28]This late at night\n[00:36.32]Isn't it lonely\n[00:39.24]I'd do anything to make you want me\n[00:43.40]I'd give it all up if you told me\n[00:47.42]That I'd be\n[00:49.43]The number one girl in your eyes\n[00:52.85]Your one and only\n[00:55.74]So what's it gon' take for you to want me\n[00:59.78]I'd give it all up if you told me\n[01:03.89]That I'd be\n[01:05.94]The number one girl in your eyes\n[01:11.34]Tell me I'm going real big places\n[01:14.32]Down to earth so friendly\n[01:16.30]And even through all the phases\n[01:18.46]Tell me you accept me\n[01:21.56]Well that's all I'm dying to hear\n[01:25.30]Yeah I'm dying to hear you\n[01:28.91]Tell me that you need me\n[01:30.85]Tell me that I'm loved\n[01:32.90]Tell me that I'm worth it\n[01:34.95]And that I'm enough\n[01:37.91]I need it and I don't know why\n[01:42.08]This late at night\n[01:44.24]Isn't it lonely\n[01:47.18]I'd do anything to make you want me\n[01:51.30]I'd give it all up if you told me\n[01:55.32]That I'd be\n[01:57.35]The number one girl in your eyes\n[02:00.72]Your one and only\n[02:03.57]So what's it gon' take for you to want me\n[02:07.78]I'd give it all up if you told me\n[02:11.74]That I'd be\n[02:13.86]The number one girl in your eyes\n[02:17.03]The girl in your eyes\n[02:21.05]The girl in your eyes\n[02:26.30]Tell me I'm the number one girl\n[02:28.44]I'm the number one girl in your eyes\n[02:33.49]The girl in your eyes\n[02:37.58]The girl in your eyes\n[02:42.74]Tell me I'm the number one girl\n[02:44.88]I'm the number one girl in your eyes\n[02:49.91]Well isn't it lonely\n[02:53.19]I'd do anything to make you want me\n[02:57.10]I'd give it all up if you told me\n[03:01.15]That I'd be\n[03:03.31]The number one girl in your eyes\n[03:06.57]Your one and only\n[03:09.42]So what's it gon' take for you to want me\n[03:13.50]I'd give it all up if you told me\n[03:17.56]That I'd be\n[03:19.66]The number one girl in your eyes\n[03:25.74]The number one girl in your eyes"""],
+                    ["""[00:00.52]Abracadabra abracadabra\n[00:03.97]Ha\n[00:04.66]Abracadabra abracadabra\n[00:12.02]Yeah\n[00:15.80]Pay the toll to the angels\n[00:19.08]Drawin' circles in the clouds\n[00:23.31]Keep your mind on the distance\n[00:26.67]When the devil turns around\n[00:30.95]Hold me in your heart tonight\n[00:34.11]In the magic of the dark moonlight\n[00:38.44]Save me from this empty fight\n[00:43.83]In the game of life\n[00:45.84]Like a poem said by a lady in red\n[00:49.45]You hear the last few words of your life\n[00:53.15]With a haunting dance now you're both in a trance\n[00:56.90]It's time to cast your spell on the night\n[01:01.40]Abracadabra ama-ooh-na-na\n[01:04.88]Abracadabra porta-ooh-ga-ga\n[01:08.92]Abracadabra abra-ooh-na-na\n[01:12.30]In her tongue she's sayin'\n[01:14.76]Death or love tonight\n[01:18.61]Abracadabra abracadabra\n[01:22.18]Abracadabra abracadabra\n[01:26.08]Feel the beat under your feet\n[01:27.82]The floor's on fire\n[01:29.90]Abracadabra abracadabra\n[01:33.78]Choose the road on the west side\n[01:37.09]As the dust flies watch it burn\n[01:41.45]Don't waste time on feeling\n[01:44.64]Your depression won't return\n[01:49.15]Hold me in your heart tonight\n[01:52.21]In the magic of the dark moonlight\n[01:56.54]Save me from this empty fight\n[02:01.77]In the game of life\n[02:03.94]Like a poem said by a lady in red\n[02:07.52]You hear the last few words of your life\n[02:11.19]With a haunting dance now you're both in a trance\n[02:14.95]It's time to cast your spell on the night\n[02:19.53]Abracadabra ama-ooh-na-na\n[02:22.71]Abracadabra porta-ooh-ga-ga\n[02:26.94]Abracadabra abra-ooh-na-na\n[02:30.42]In her tongue she's sayin'\n[02:32.83]Death or love tonight\n[02:36.55]Abracadabra abracadabra\n[02:40.27]Abracadabra abracadabra\n[02:44.19]Feel the beat under your feet\n[02:46.14]The floor's on fire\n[02:47.95]Abracadabra abracadabra\n[02:51.17]Phantom of the dance floor come to me\n[02:58.46]Sing for me a sinful melody\n[03:06.51]Ah-ah-ah-ah-ah ah-ah ah-ah\n[03:13.76]Ah-ah-ah-ah-ah ah-ah ah-ah\n[03:22.39]Abracadabra ama-ooh-na-na\n[03:25.66]Abracadabra porta-ooh-ga-ga\n[03:29.87]Abracadabra abra-ooh-na-na\n[03:33.16]In her tongue she's sayin'\n[03:35.55]Death or love tonight"""],
+                    ["""[00:00.27]只因你太美 baby 只因你太美 baby\n[00:08.95]只因你实在是太美 baby\n[00:13.99]只因你太美 baby\n[00:18.89]迎面走来的你让我如此蠢蠢欲动\n[00:20.88]这种感觉我从未有\n[00:21.79]Cause I got a crush on you who you\n[00:25.74]你是我的我是你的谁\n[00:28.09]再多一眼看一眼就会爆炸\n[00:30.31]再近一点靠近点快被融化\n[00:32.49]想要把你占为己有 baby bae\n[00:34.60]不管走到哪里\n[00:35.44]都会想起的人是你 you you\n[00:38.12]我应该拿你怎样\n[00:39.61]Uh 所有人都在看着你\n[00:42.36]我的心总是不安\n[00:44.18]Oh 我现在已病入膏肓\n[00:46.63]Eh oh\n[00:47.84]难道真的因你而疯狂吗\n[00:51.57]我本来不是这种人\n[00:53.59]因你变成奇怪的人\n[00:55.77]第一次呀变成这样的我\n[01:01.23]不管我怎么去否认\n[01:03.21]只因你太美 baby 只因你太美 baby\n[01:11.46]只因你实在是太美 baby\n[01:16.75]只因你太美 baby\n[01:21.09]Oh eh oh\n[01:22.82]现在确认地告诉我\n[01:25.26]Oh eh oh\n[01:27.31]你到底属于谁\n[01:29.98]Oh eh oh\n[01:31.70]现在确认地告诉我\n[01:34.45]Oh eh oh\n[01:36.35]你到底属于谁\n[01:37.65]就是现在告诉我\n[01:40.00]跟着那节奏 缓缓 make wave\n[01:42.42]甜蜜的奶油 it's your birthday cake\n[01:44.66]男人们的 game call me 你恋人\n[01:46.83]别被欺骗愉快的 I wanna play\n[01:48.83]我的脑海每分每秒为你一人沉醉\n[01:50.90]最迷人让我神魂颠倒是你身上香水\n[01:53.30]Oh right baby I'm fall in love with you\n[01:55.20]我的一切你都拿走\n[01:56.40]只要有你就已足够\n[01:58.56]我到底应该怎样\n[02:00.37]Uh 我心里一直很不安\n[02:03.12]其他男人们的视线\n[02:04.84]Oh 全都只看着你的脸\n[02:07.33]Eh oh\n[02:08.39]难道真的因你而疯狂吗\n[02:12.43]我本来不是这种人\n[02:14.35]因你变成奇怪的人\n[02:16.59]第一次呀变成这样的我\n[02:21.76]不管我怎么去否认\n[02:24.03]只因你太美 baby 只因你太美 baby\n[02:32.37]只因你实在是太美 baby\n[02:37.49]只因你太美 baby\n[02:43.66]我愿意把我的全部都给你\n[02:47.19]我每天在梦里都梦见你\n[02:49.13]还有我闭着眼睛也能看到你\n[02:52.58]现在开始我只准你看我\n[02:56.28]I don't wanna wake up in dream\n[02:57.92]我只想看你这是真心话\n[02:59.86]只因你太美 baby 只因你太美 baby\n[03:08.20]只因你实在是太美 baby\n[03:13.22]只因你太美 baby\n[03:17.69]Oh eh oh\n[03:19.36]现在确认的告诉我\n[03:21.91]Oh eh oh\n[03:23.85]你到底属于谁\n[03:26.58]Oh eh oh\n[03:28.32]现在确认的告诉我\n[03:30.95]Oh eh oh\n[03:32.82]你到底属于谁就是现在告诉我"""]
+                ],
+                
+                inputs=[lrc],
+                label="Lrc Examples",
+                examples_per_page=3,
+                elem_id="lrc-examples-container",
+            )
+
+
+        # page 2
+        with gr.Tab("Lyrics Generate", id=1):
+            with gr.Row():
+                with gr.Column():
+                    with gr.Accordion("Notice", open=False):
+                        gr.Markdown("**Two Generation Modes:**\n1. Generate from theme & tags\n2. Add timestamps to existing lyrics")
+                    
+                    with gr.Group():
+                        gr.Markdown("### Method 1: Generate from Theme")
+                        theme = gr.Textbox(label="theme", placeholder="Enter song theme, e.g: Love and Heartbreak")
+                        tags_gen = gr.Textbox(label="tags", placeholder="Enter song tags, e.g: pop confidence healing")
+                        language = gr.Radio(["cn", "en"], label="Language", value="en")
+                        gen_from_theme_btn = gr.Button("Generate LRC (From Theme)", variant="primary")
+                        
+                        gr.Examples(
+                            examples=[
+                                [
+                                    "Love and Heartbreak", 
+                                    "vocal emotional piano pop",
+                                    "en"
+                                ],
+                                [
+                                    "Heroic Epic", 
+                                    "choir orchestral powerful",
+                                    "cn"
+                                ]
+                            ],
+                            inputs=[theme, tags_gen, language],
+                            label="Examples: Generate from Theme"
+                        )
+
+                    with gr.Group(visible=True): 
+                        gr.Markdown("### Method 2: Add Timestamps to Lyrics")
+                        tags_lyrics = gr.Textbox(label="tags", placeholder="Enter song tags, e.g: ballad piano slow")
+                        lyrics_input = gr.Textbox(
+                            label="Raw Lyrics (without timestamps)",
+                            placeholder="Enter plain lyrics (without timestamps), e.g:\nYesterday\nAll my troubles...",
+                            lines=10,
+                            max_lines=50,
+                            elem_classes="lyrics-scroll-box"
+                        )
+                        
+                        gen_from_lyrics_btn = gr.Button("Generate LRC (From Lyrics)", variant="primary")
+
+                        gr.Examples(
+                            examples=[
+                                [
+                                    "acoustic folk happy", 
+                                    """I'm sitting here in the boring room\nIt's just another rainy Sunday afternoon"""
+                                ],
+                                [
+                                    "electronic dance energetic",
+                                    """We're living in a material world\nAnd I am a material girl"""
+                                ]
+                            ],
+                            inputs=[tags_lyrics, lyrics_input],
+                            label="Examples: Generate from Lyrics"
+                        )
+
+
+                with gr.Column():
+                    lrc_output = gr.Textbox(
+                        label="Generated LRC",
+                        placeholder="Timed lyrics will appear here",
+                        lines=57,
+                        elem_classes="lrc-output",
+                        show_copy_button=True
+                    )
+
+            # Bind functions
+            gen_from_theme_btn.click(
+                fn=R1_infer1,
+                inputs=[theme, tags_gen, language],
+                outputs=lrc_output
+            )
+            
+            gen_from_lyrics_btn.click(
+                fn=R1_infer2,
+                inputs=[tags_lyrics, lyrics_input],
+                outputs=lrc_output
+            )
+
+    tabs.select(
+    lambda s: None, 
+    None, 
+    None 
+    )
+    
+    lyrics_btn.click(
+        fn=infer_music,
+        inputs=[lrc, audio_prompt, text_prompt, current_prompt_type, seed, randomize_seed, steps, cfg_strength, file_type, odeint_method, Music_Duration],
+        outputs=audio_output
+    )
+
+
+if __name__ == "__main__":
+    demo.launch()
diff --git a/gradio/src/DiffRhythm.jpg b/gradio/src/DiffRhythm.jpg
new file mode 100644
index 0000000..8d472ce
Binary files /dev/null and b/gradio/src/DiffRhythm.jpg differ
diff --git a/gradio/src/negative_prompt.npy b/gradio/src/negative_prompt.npy
new file mode 100644
index 0000000..ae5aa34
Binary files /dev/null and b/gradio/src/negative_prompt.npy differ
diff --git a/gradio/src/prompt/classic_cn.wav b/gradio/src/prompt/classic_cn.wav
new file mode 100644
index 0000000..77e4dc2
Binary files /dev/null and b/gradio/src/prompt/classic_cn.wav differ
diff --git a/gradio/src/prompt/classic_en.wav b/gradio/src/prompt/classic_en.wav
new file mode 100644
index 0000000..47d5623
Binary files /dev/null and b/gradio/src/prompt/classic_en.wav differ
diff --git a/gradio/src/prompt/country_cn.wav b/gradio/src/prompt/country_cn.wav
new file mode 100644
index 0000000..81935a2
Binary files /dev/null and b/gradio/src/prompt/country_cn.wav differ
diff --git a/gradio/src/prompt/country_en.wav b/gradio/src/prompt/country_en.wav
new file mode 100644
index 0000000..6e42b4b
Binary files /dev/null and b/gradio/src/prompt/country_en.wav differ
diff --git a/gradio/src/prompt/default.wav b/gradio/src/prompt/default.wav
new file mode 100644
index 0000000..6b4a833
Binary files /dev/null and b/gradio/src/prompt/default.wav differ
diff --git a/gradio/src/prompt/gift_of_the_world.wav b/gradio/src/prompt/gift_of_the_world.wav
new file mode 100644
index 0000000..2cd5c03
Binary files /dev/null and b/gradio/src/prompt/gift_of_the_world.wav differ
diff --git a/gradio/src/prompt/jazz_cn.wav b/gradio/src/prompt/jazz_cn.wav
new file mode 100644
index 0000000..a5e9918
Binary files /dev/null and b/gradio/src/prompt/jazz_cn.wav differ
diff --git a/gradio/src/prompt/jazz_en.wav b/gradio/src/prompt/jazz_en.wav
new file mode 100644
index 0000000..ef32a68
Binary files /dev/null and b/gradio/src/prompt/jazz_en.wav differ
diff --git a/gradio/src/prompt/little_happiness.wav b/gradio/src/prompt/little_happiness.wav
new file mode 100644
index 0000000..d8432da
Binary files /dev/null and b/gradio/src/prompt/little_happiness.wav differ
diff --git a/gradio/src/prompt/little_talks.wav b/gradio/src/prompt/little_talks.wav
new file mode 100644
index 0000000..56b7898
Binary files /dev/null and b/gradio/src/prompt/little_talks.wav differ
diff --git a/gradio/src/prompt/most_beautiful_expectation.wav b/gradio/src/prompt/most_beautiful_expectation.wav
new file mode 100644
index 0000000..5b18157
Binary files /dev/null and b/gradio/src/prompt/most_beautiful_expectation.wav differ
diff --git a/gradio/src/prompt/pop_cn.wav b/gradio/src/prompt/pop_cn.wav
new file mode 100644
index 0000000..252b6dc
Binary files /dev/null and b/gradio/src/prompt/pop_cn.wav differ
diff --git a/gradio/src/prompt/pop_en.wav b/gradio/src/prompt/pop_en.wav
new file mode 100644
index 0000000..ed13ccc
Binary files /dev/null and b/gradio/src/prompt/pop_en.wav differ
diff --git a/gradio/src/prompt/rap_cn.wav b/gradio/src/prompt/rap_cn.wav
new file mode 100644
index 0000000..954edb6
Binary files /dev/null and b/gradio/src/prompt/rap_cn.wav differ
diff --git a/gradio/src/prompt/rap_en.wav b/gradio/src/prompt/rap_en.wav
new file mode 100644
index 0000000..003fb6c
Binary files /dev/null and b/gradio/src/prompt/rap_en.wav differ
diff --git a/gradio/src/prompt/rock_cn.wav b/gradio/src/prompt/rock_cn.wav
new file mode 100644
index 0000000..0631dc6
Binary files /dev/null and b/gradio/src/prompt/rock_cn.wav differ
diff --git a/gradio/src/prompt/rock_en.wav b/gradio/src/prompt/rock_en.wav
new file mode 100644
index 0000000..3440807
Binary files /dev/null and b/gradio/src/prompt/rock_en.wav differ
diff --git a/infer/infer.py b/infer/infer.py
index 9bc09ae..4a8306f 100755
--- a/infer/infer.py
+++ b/infer/infer.py
@@ -24,7 +24,7 @@
 
 print("Current working directory:", os.getcwd())
 
-from infer_utils import (
+from .infer_utils import (
     decode_audio,
     get_lrc_token,
     get_negative_style_prompt,
diff --git a/infer/infer_utils.py b/infer/infer_utils.py
index 4602f68..8bc7488 100755
--- a/infer/infer_utils.py
+++ b/infer/infer_utils.py
@@ -30,6 +30,7 @@
 from model import DiT, CFM
 
 
+
 def decode_audio(latents, vae_model, chunked=False, overlap=32, chunk_size=128):
     downsampling_ratio = 2048
     io_channels = 2
@@ -136,6 +137,41 @@ def get_negative_style_prompt(device):
 
 
 @torch.no_grad()
+
+def get_audio_style_prompt(model, wav_path):
+    vocal_flag = False
+    mulan = model
+    audio, _ = librosa.load(wav_path, sr=24000)
+    audio_len = librosa.get_duration(y=audio, sr=24000)
+    
+    if audio_len <= 1:
+        vocal_flag = True
+    
+    if audio_len > 10:
+        start_time = int(audio_len // 2 - 5)
+        wav = audio[start_time*24000:(start_time+10)*24000]
+    
+    else:
+        wav = audio
+    wav = torch.tensor(wav).unsqueeze(0).to(model.device)
+    
+    with torch.no_grad():
+        audio_emb = mulan(wavs = wav) # [1, 512]
+        
+    audio_emb = audio_emb.half()
+
+    return audio_emb, vocal_flag
+
+def get_text_style_prompt(model, text_prompt):
+    mulan = model
+    
+    with torch.no_grad():
+        text_emb = mulan(texts = text_prompt) # [1, 512]
+    text_emb = text_emb.half()
+
+    return text_emb
+
+
 def get_style_prompt(model, wav_path=None, prompt=None):
     mulan = model
 
diff --git a/requirements.txt b/requirements.txt
index 9264680..ba05241 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,3 +20,7 @@ onnxruntime
 Unidecode==1.3.8
 phonemizer==3.3.0
 inflect==7.5.0
+openai
+spaces
+py3langid
+