Skip to content

Commit e9bb449

Browse files
authored
[BugFix] Fix deepseek v3.2 mtp bug. (#3900)
### What this PR does / why we need it? This PR fixes deepseek v3.2 mtp bug. ### Does this PR introduce _any_ user-facing change? None ### How was this patch tested? All existed ci tests should pass. - vLLM version: v0.11.0 - vLLM main: vllm-project/vllm@83f478b --------- Signed-off-by: whx-sjtu <2952154980@qq.com>
1 parent 646fbac commit e9bb449

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

vllm_ascend/spec_decode/mtp_proposer.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from vllm.model_executor.model_loader.utils import \
1414
process_weights_after_loading
1515
from vllm.model_executor.models.deepseek_mtp import DeepSeekMTP
16+
from vllm.model_executor.models.deepseek_v2 import DeepseekV32IndexerCache
1617
from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM
1718
from vllm.utils import cdiv
1819
from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder,
@@ -139,6 +140,9 @@ def load_model(self, model) -> None:
139140
target_attn_layer_names = set(
140141
get_layers_from_vllm_config(self.vllm_config,
141142
AttentionLayerBase).keys())
143+
target_indexer_layer_names = set(
144+
get_layers_from_vllm_config(self.vllm_config,
145+
DeepseekV32IndexerCache).keys())
142146
draft_model_config = \
143147
self.vllm_config.speculative_config.draft_model_config
144148
target_device = self.vllm_config.device_config.device
@@ -152,6 +156,13 @@ def load_model(self, model) -> None:
152156
draft_attn_layer_names = (get_layers_from_vllm_config(
153157
self.vllm_config, AttentionLayerBase).keys() -
154158
target_attn_layer_names)
159+
indexer_layers = get_layers_from_vllm_config(self.vllm_config,
160+
DeepseekV32IndexerCache)
161+
draft_indexer_layer_names = indexer_layers.keys(
162+
) - target_indexer_layer_names
163+
# NOTE: Currently we don't have specific attention backend and attention metadata
164+
# for deepseek v3.2 indexer, so we just exclude the indexer layers here.
165+
draft_attn_layer_names = draft_attn_layer_names - draft_indexer_layer_names
155166

156167
assert len(draft_attn_layer_names) == 1
157168
self.attn_layer_name = list(draft_attn_layer_names)

0 commit comments

Comments
 (0)