Address comments

coolkp · coolkp · commit 85cec45beb16 · 2025-11-11T23:57:30.000Z
Signed-off-by: Kunjan Patel &lt;kunjanp@google.com&gt;
diff --git a/src/maxdiffusion/configs/base_wan_14b.yml b/src/maxdiffusion/configs/base_wan_14b.yml
@@ -63,7 +63,7 @@ attention: 'flash' # Supported attention: dot_product, flash, cudnn_flash_te, ri
 flash_min_seq_length: 0
 
 # If mask_padding_tokens is True, we pass in segment ids to splash attention to avoid attending to padding tokens.
-# Else we do not pass in segment ids and on vpu bound hardware like (ironwood) this is faster.
+# Else we do not pass in segment ids and on vpu bound hardware like trillium this is faster.
 # However, when padding tokens are significant, this will lead to worse quality and should be set to True.
 mask_padding_tokens: True 
 # Maxdiffusion has 2 types of attention sharding strategies: