comfyanonymous · kijai · Sep 30, 2025 · Oct 3, 2025 · Oct 3, 2025 · Oct 3, 2025
@@ -62,6 +62,8 @@ def forward(self, x, freqs, transformer_options={}):
             x(Tensor): Shape [B, L, num_heads, C / num_heads]
             freqs(Tensor): Rope freqs, shape [1024, C / num_heads / 2]
         """
+        patches = transformer_options.get("patches", {})
+
         b, s, n, d = *x.shape[:2], self.num_heads, self.head_dim
 
         def qkv_fn_q(x):
@@ -86,6 +88,10 @@ def qkv_fn_k(x):
             transformer_options=transformer_options,
         )
 
+        if "attn1_patch" in patches:
+            for p in patches["attn1_patch"]:
+                x = p({"x": x, "q": q, "k": k, "transformer_options": transformer_options})
+
         x = self.o(x)
         return x
 
@@ -225,6 +231,8 @@ def forward(
         """
         # assert e.dtype == torch.float32
 
+        patches = transformer_options.get("patches", {})
+
         if e.ndim < 4:
             e = (comfy.model_management.cast_to(self.modulation, dtype=x.dtype, device=x.device) + e).chunk(6, dim=1)
         else:
@@ -242,6 +250,11 @@ def forward(
 
         # cross-attention & ffn
         x = x + self.cross_attn(self.norm3(x), context, context_img_len=context_img_len, transformer_options=transformer_options)
+
+        if "attn2_patch" in patches:
+            for p in patches["attn2_patch"]:
+                x = p({"x": x, "transformer_options": transformer_options})
+
         y = self.ffn(torch.addcmul(repeat_e(e[3], x), self.norm2(x), 1 + repeat_e(e[4], x)))
         x = torch.addcmul(x, y, repeat_e(e[5], x))
         return x
@@ -488,7 +501,7 @@ def __init__(self,
         self.blocks = nn.ModuleList([
             wan_attn_block_class(cross_attn_type, dim, ffn_dim, num_heads,
                                  window_size, qk_norm, cross_attn_norm, eps, operation_settings=operation_settings)
-            for _ in range(num_layers)
+            for i in range(num_layers)
         ])
 
         # head
@@ -541,6 +554,7 @@ def forward_orig(
         # embeddings
         x = self.patch_embedding(x.float()).to(x.dtype)
         grid_sizes = x.shape[2:]
+        transformer_options["grid_sizes"] = grid_sizes
         x = x.flatten(2).transpose(1, 2)
 
         # time embeddings
@@ -569,6 +583,7 @@ def forward_orig(
         patches_replace = transformer_options.get("patches_replace", {})
         blocks_replace = patches_replace.get("dit", {})
         for i, block in enumerate(self.blocks):
+            transformer_options["block_idx"] = i
             if ("double_block", i) in blocks_replace:
                 def block_wrap(args):
                     out = {}
@@ -735,6 +750,7 @@ def forward_orig(
         # embeddings
         x = self.patch_embedding(x.float()).to(x.dtype)
         grid_sizes = x.shape[2:]
+        transformer_options["grid_sizes"] = grid_sizes
         x = x.flatten(2).transpose(1, 2)
 
         # time embeddings
@@ -764,6 +780,7 @@ def forward_orig(
         patches_replace = transformer_options.get("patches_replace", {})
         blocks_replace = patches_replace.get("dit", {})
         for i, block in enumerate(self.blocks):
+            transformer_options["block_idx"] = i
             if ("double_block", i) in blocks_replace:
                 def block_wrap(args):
                     out = {}