-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathimage_generation.py
More file actions
1064 lines (903 loc) · 45.8 KB
/
image_generation.py
File metadata and controls
1064 lines (903 loc) · 45.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Image Generation and Sampling Module
Handles the core image generation, decoding, and batch management
"""
import time
import os
import re
import json
import torch
import nodes
import numpy as np
from PIL import Image
def generate_image(
patched_model,
seed,
steps,
cfg,
sampler_name,
scheduler,
positive_conditioning,
negative_conditioning,
latent_input,
denoise,
attention_mode="default",
model_sampling_override="none",
model_sampling_shift=1.73,
model_sampling_flux_max_shift=1.15,
model_sampling_flux_base_shift=0.5,
use_advanced_sampling=False,
advanced_guider="cfg_guider",
advanced_scheduler="basic",
flux_guidance_value=0.0,
width=1024,
height=1024
):
"""
Generate a single image using ComfyUI's KSampler or SamplerCustomAdvanced pipeline.
Args:
patched_model: Model (potentially with LoRAs applied)
seed: Random seed
steps: Number of sampling steps
cfg: CFG scale
sampler_name: Sampler name (e.g., "euler", "dpmpp_2m")
scheduler: Scheduler name (e.g., "normal", "karras")
positive_conditioning: Positive conditioning tensor
negative_conditioning: Negative conditioning tensor
latent_input: Input latent tensor dict
denoise: Denoise strength (0.0-1.0)
attention_mode: Attention implementation to use ("default", "xformers", "pytorch",
"flash", "sage", "sage3", "sub_quad", "split")
model_sampling_override: Model sampling patch type ("none", "aura_flow", "flux", "sd3")
model_sampling_shift: Shift value for AuraFlow/SD3 model sampling
model_sampling_flux_max_shift: Max shift for Flux model sampling
model_sampling_flux_base_shift: Base shift for Flux model sampling
use_advanced_sampling: Whether to use SamplerCustomAdvanced pipeline
advanced_guider: Guider type ("cfg_guider", "basic_guider")
advanced_scheduler: Scheduler type for advanced sampling ("basic", "flux2")
flux_guidance_value: Flux guidance value (0.0 = disabled)
width: Image width (used by Flux model sampling and Flux2Scheduler)
height: Image height (used by Flux model sampling and Flux2Scheduler)
Returns:
tuple: (result_latent_dict, generation_duration_seconds)
"""
# Apply attention mode override if not "default"
original_attn_override = None
if attention_mode and attention_mode != "default":
try:
from comfy.ldm.modules.attention import get_attention_function
attn_func = get_attention_function(attention_mode, default=None)
if attn_func is not None:
# Save original override (if any) and set the new one
original_attn_override = patched_model.model_options.get("transformer_options", {}).get("optimized_attention_override")
if "transformer_options" not in patched_model.model_options:
patched_model.model_options["transformer_options"] = {}
patched_model.model_options["transformer_options"]["optimized_attention_override"] = attn_func
else:
print(f"[GridTester] ⚠️ Attention mode '{attention_mode}' not available, using default")
except Exception as e:
print(f"[GridTester] ⚠️ Could not set attention mode '{attention_mode}': {e}")
# === Model Sampling Override ===
# Clone model and patch model_sampling if an override is requested.
# This modifies the model's internal noise schedule for specific model families.
if model_sampling_override and model_sampling_override != "none":
import comfy.model_sampling
patched_model = patched_model.clone()
if model_sampling_override == "aura_flow":
# AuraFlow/Qwen Image: discrete flow with shift, multiplier=1.0
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
class ModelSamplingAdvanced(sampling_base, sampling_type):
pass
model_sampling = ModelSamplingAdvanced(patched_model.model.model_config)
model_sampling.set_parameters(shift=float(model_sampling_shift), multiplier=1.0)
patched_model.add_object_patch("model_sampling", model_sampling)
print(f"[GridTester] 🔧 Applied ModelSamplingAuraFlow (shift={model_sampling_shift})")
elif model_sampling_override == "sd3":
# SD3: discrete flow with shift, multiplier=1000
sampling_base = comfy.model_sampling.ModelSamplingDiscreteFlow
sampling_type = comfy.model_sampling.CONST
class ModelSamplingAdvanced(sampling_base, sampling_type):
pass
model_sampling = ModelSamplingAdvanced(patched_model.model.model_config)
model_sampling.set_parameters(shift=float(model_sampling_shift), multiplier=1000)
patched_model.add_object_patch("model_sampling", model_sampling)
print(f"[GridTester] 🔧 Applied ModelSamplingSD3 (shift={model_sampling_shift})")
elif model_sampling_override == "flux":
# Flux: dynamic shift computed from image dimensions
sampling_base = comfy.model_sampling.ModelSamplingFlux
sampling_type = comfy.model_sampling.CONST
class ModelSamplingAdvanced(sampling_base, sampling_type):
pass
max_s = float(model_sampling_flux_max_shift)
base_s = float(model_sampling_flux_base_shift)
x1, x2 = 256, 4096
mm = (max_s - base_s) / (x2 - x1)
b = base_s - mm * x1
shift = (width * height / (8 * 8 * 2 * 2)) * mm + b
model_sampling = ModelSamplingAdvanced(patched_model.model.model_config)
model_sampling.set_parameters(shift=shift)
patched_model.add_object_patch("model_sampling", model_sampling)
print(f"[GridTester] 🔧 Applied ModelSamplingFlux (max_shift={max_s}, base_shift={base_s}, computed_shift={shift:.4f})")
elif model_sampling_override == "flux2":
# Flux2: uses ModelSamplingFlux with fixed shift (default 2.02, matching ComfyUI's Flux2 supported_models)
sampling_base = comfy.model_sampling.ModelSamplingFlux
sampling_type = comfy.model_sampling.CONST
class ModelSamplingAdvanced(sampling_base, sampling_type):
pass
shift = float(model_sampling_shift) if model_sampling_shift else 2.02
model_sampling = ModelSamplingAdvanced(patched_model.model.model_config)
model_sampling.set_parameters(shift=shift)
patched_model.add_object_patch("model_sampling", model_sampling)
print(f"[GridTester] 🔧 Applied ModelSamplingFlux2 (shift={shift})")
# === Flux Guidance ===
# Modify positive conditioning with guidance value (used by Flux 1 models)
if flux_guidance_value and float(flux_guidance_value) > 0:
import node_helpers
positive_conditioning = node_helpers.conditioning_set_values(
positive_conditioning, {"guidance": float(flux_guidance_value)}
)
print(f"[GridTester] 🔧 Applied FluxGuidance (guidance={flux_guidance_value})")
t0 = time.time()
try:
if use_advanced_sampling:
# === Advanced Sampling Pipeline (SamplerCustomAdvanced) ===
import comfy.samplers
import comfy.sample
import comfy.sampler_helpers
import comfy.model_management
import comfy.utils
import latent_preview
# Noise
class _Noise_RandomNoise:
def __init__(self, seed):
self.seed = seed
def generate_noise(self, input_latent):
latent_image = input_latent["samples"]
batch_inds = input_latent.get("batch_index")
return comfy.sample.prepare_noise(latent_image, self.seed, batch_inds)
noise = _Noise_RandomNoise(seed)
# Guider
if advanced_guider == "basic_guider":
guider = comfy.samplers.CFGGuider(patched_model)
guider.inner_set_conds({"positive": comfy.sampler_helpers.convert_cond(positive_conditioning)})
guider.set_cfg(1.0)
print(f"[GridTester] 🔧 Advanced sampling: BasicGuider (no CFG)")
else:
guider = comfy.samplers.CFGGuider(patched_model)
guider.set_conds(positive_conditioning, negative_conditioning)
guider.set_cfg(cfg)
print(f"[GridTester] 🔧 Advanced sampling: CFGGuider (cfg={cfg})")
# Sampler object
sampler_obj = comfy.samplers.sampler_object(sampler_name)
# Sigmas
if advanced_scheduler == "flux2":
import math
def _flux2_generalized_time_snr_shift(t, mu, sigma):
return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
def _flux2_compute_empirical_mu(image_seq_len, num_steps):
a1, b1 = 8.73809524e-05, 1.89833333
a2, b2 = 0.00016927, 0.45666666
if image_seq_len > 4300:
return float(a2 * image_seq_len + b2)
m_200 = a2 * image_seq_len + b2
m_10 = a1 * image_seq_len + b1
a = (m_200 - m_10) / 190.0
b_val = m_200 - 200.0 * a
return float(a * num_steps + b_val)
seq_len = round(width * height / (16 * 16))
mu = _flux2_compute_empirical_mu(seq_len, steps)
timesteps = torch.linspace(1, 0, steps + 1)
sigmas = torch.FloatTensor([_flux2_generalized_time_snr_shift(t.item(), mu, 1.0) for t in timesteps])
print(f"[GridTester] 🔧 Advanced sampling: Flux2Scheduler (steps={steps}, {width}x{height}, mu={mu:.4f})")
else:
# BasicScheduler
total_steps = steps
if denoise < 1.0:
if denoise <= 0.0:
sigmas = torch.FloatTensor([])
else:
total_steps = int(steps / denoise)
model_sampling_obj = patched_model.get_model_object("model_sampling")
sigmas = comfy.samplers.calculate_sigmas(model_sampling_obj, scheduler, total_steps).cpu()
sigmas = sigmas[-(steps + 1):]
print(f"[GridTester] 🔧 Advanced sampling: BasicScheduler ({scheduler}, {steps} steps)")
# Execute SamplerCustomAdvanced
latent = latent_input.copy()
latent_image = latent["samples"]
latent_image = comfy.sample.fix_empty_latent_channels(guider.model_patcher, latent_image)
latent["samples"] = latent_image
noise_mask = latent.get("noise_mask")
x0_output = {}
callback = latent_preview.prepare_callback(guider.model_patcher, sigmas.shape[-1] - 1, x0_output)
disable_pbar = not comfy.utils.PROGRESS_BAR_ENABLED
samples = guider.sample(
noise.generate_noise(latent), latent_image, sampler_obj, sigmas,
denoise_mask=noise_mask, callback=callback, disable_pbar=disable_pbar,
seed=noise.seed
)
samples = samples.to(comfy.model_management.intermediate_device())
out = latent.copy()
out["samples"] = samples
result = ({"samples": out["samples"]},)
else:
# === Standard KSampler path ===
result = nodes.common_ksampler(
model=patched_model,
seed=seed,
steps=steps,
cfg=cfg,
sampler_name=sampler_name,
scheduler=scheduler,
positive=positive_conditioning,
negative=negative_conditioning,
latent=latent_input,
denoise=denoise
)
finally:
# Restore original attention override
if attention_mode and attention_mode != "default":
if original_attn_override is not None:
patched_model.model_options["transformer_options"]["optimized_attention_override"] = original_attn_override
elif "transformer_options" in patched_model.model_options and "optimized_attention_override" in patched_model.model_options["transformer_options"]:
del patched_model.model_options["transformer_options"]["optimized_attention_override"]
duration = round(time.time() - t0, 3)
return result[0], duration
def tiled_hires_sample(latent_input, patched_model, config, positive_conditioning, negative_conditioning,
hires_steps, hires_denoise, tile_width, tile_height, mask_blur, tile_padding,
force_uniform, pixel_width, pixel_height):
"""
Run HiRes fix sampling in tiles to prevent OOM on large images.
Splits the latent into overlapping tiles, samples each, then blends them back together.
Args:
latent_input: Dict with "samples" key — the upscaled latent to denoise
patched_model: Model for sampling
config: Generation config (seed, cfg, sampler, scheduler)
positive_conditioning, negative_conditioning: Conditioning tensors
hires_steps: Number of sampling steps
hires_denoise: Denoise strength
tile_width, tile_height: Tile size in pixels (will be converted to latent space /8)
mask_blur: Gaussian blur radius for tile seam blending (pixels)
tile_padding: Extra context padding around each tile (pixels)
force_uniform: If True, force all tiles to be the same size (may crop edges)
pixel_width, pixel_height: Full image pixel dimensions (for logging)
Returns:
dict: Result latent dict with "samples" key
"""
import torch
samples = latent_input["samples"]
# Convert pixel dimensions to latent space (8x smaller)
# Handle both 4D [B, C, H, W] and 5D [B, C, T, H, W] latent formats (video VAEs add temporal dim)
if samples.ndim == 5:
lat_h, lat_w = samples.shape[3], samples.shape[4]
else:
lat_h, lat_w = samples.shape[2], samples.shape[3]
tw = tile_width // 8
th = tile_height // 8
pad = tile_padding // 8
blur = max(1, mask_blur // 8) # Blur in latent space
# Calculate tile grid
def calc_tiles(total, tile_size, padding, uniform):
"""Calculate tile start positions with overlap = 2 * padding."""
if total <= tile_size:
return [(0, total)]
stride = tile_size - 2 * padding
if stride <= 0:
stride = tile_size // 2
tiles = []
pos = 0
while pos < total:
end = min(pos + tile_size, total)
if uniform and end == total and end - pos < tile_size:
# Shift last tile back to maintain uniform size
pos = max(0, total - tile_size)
end = total
tiles.append((pos, end))
if end == total:
break
pos += stride
return tiles
x_tiles = calc_tiles(lat_w, tw, pad, force_uniform)
y_tiles = calc_tiles(lat_h, th, pad, force_uniform)
total_tiles = len(x_tiles) * len(y_tiles)
print(f"[GridTester] 🔍 Tiled HiRes sampling: {len(x_tiles)}x{len(y_tiles)} = {total_tiles} tiles "
f"(tile={tile_width}x{tile_height}px, padding={tile_padding}px, blur={mask_blur}px)")
# Output accumulator with weighted blending
result_samples = torch.zeros_like(samples)
is_5d = samples.ndim == 5
# Weight map shape matches spatial dims only
if is_5d:
weight_map = torch.zeros(1, 1, 1, lat_h, lat_w, device=samples.device)
else:
weight_map = torch.zeros(1, 1, lat_h, lat_w, device=samples.device)
tile_idx = 0
for yi, (y_start, y_end) in enumerate(y_tiles):
for xi, (x_start, x_end) in enumerate(x_tiles):
tile_idx += 1
# Extract tile from latent (handle both 4D and 5D)
if is_5d:
tile_latent = samples[:, :, :, y_start:y_end, x_start:x_end].clone()
else:
tile_latent = samples[:, :, y_start:y_end, x_start:x_end].clone()
print(f"[GridTester] 🔍 Tile {tile_idx}/{total_tiles}: latent region [{y_start}:{y_end}, {x_start}:{x_end}]")
# Run KSampler on this tile
tile_result, _ = generate_image(
patched_model, config.get("seed", 0), hires_steps, config.get("cfg", 7),
config.get("sampler", "euler"), config.get("scheduler", "normal"),
positive_conditioning, negative_conditioning,
{"samples": tile_latent}, hires_denoise,
width=(x_end - x_start) * 8, height=(y_end - y_start) * 8
)
tile_out = tile_result["samples"]
tile_h = y_end - y_start
tile_w = x_end - x_start
# Create feathered weight mask for this tile (higher weight in center, fading at edges)
mask = torch.ones(tile_h, tile_w, device=samples.device)
if blur > 0:
# Feather edges: linear ramp over blur pixels
for b in range(blur):
factor = (b + 1) / (blur + 1)
# Top edge
if y_start > 0 and b < tile_h:
mask[b, :] *= factor
# Bottom edge
if y_end < lat_h and b < tile_h:
mask[tile_h - 1 - b, :] *= factor
# Left edge
if x_start > 0 and b < tile_w:
mask[:, b] *= factor
# Right edge
if x_end < lat_w and b < tile_w:
mask[:, tile_w - 1 - b] *= factor
# Accumulate weighted results (broadcast mask to match tensor dims)
if is_5d:
mask_shaped = mask.unsqueeze(0).unsqueeze(0).unsqueeze(0) # [1, 1, 1, H, W]
result_samples[:, :, :, y_start:y_end, x_start:x_end] += tile_out * mask_shaped
weight_map[:, :, :, y_start:y_end, x_start:x_end] += mask_shaped
else:
mask_shaped = mask.unsqueeze(0).unsqueeze(0) # [1, 1, H, W]
result_samples[:, :, y_start:y_end, x_start:x_end] += tile_out * mask_shaped
weight_map[:, :, y_start:y_end, x_start:x_end] += mask_shaped
# Normalize by weights to blend overlapping regions
weight_map = torch.clamp(weight_map, min=1e-6)
result_samples = result_samples / weight_map
print(f"[GridTester] 🔍 Tiled HiRes sampling complete ({total_tiles} tiles)")
return {"samples": result_samples}
def upscale_image(result_latent, vae, patched_model, upscaling_config, config, positive_conditioning, negative_conditioning, width, height):
"""
Apply upscaling to a generated latent based on upscaling settings.
Args:
result_latent: Generated latent dict with "samples" key
vae: VAE model for encode/decode
patched_model: Patched model for re-sampling (HiRes fix)
upscaling_config: Dict with mode, upscale_ratio, hires_denoise, etc.
config: Current generation config (steps, sampler, scheduler, etc.)
positive_conditioning: Positive conditioning
negative_conditioning: Negative conditioning
width: Original image width
height: Original image height
Returns:
tuple: (result, duration) where result is either a latent dict or PIL Image
"""
import torch
import time
mode = upscaling_config.get("mode", "hires_only")
upscale_ratio = float(upscaling_config.get("upscale_ratio", 1.5))
hires_denoise = float(upscaling_config.get("hires_denoise", 0.5))
hires_steps = int(upscaling_config.get("hires_steps", 0)) or config.get("steps", 20)
tiled_vae = upscaling_config.get("tiled_vae", False)
tile_size = int(upscaling_config.get("tile_size", 512))
tile_overlap = int(upscaling_config.get("tile_overlap", 64))
temporal_size = int(upscaling_config.get("temporal_size", 512))
temporal_overlap = int(upscaling_config.get("temporal_overlap", 64))
upscale_model_name = upscaling_config.get("upscale_model", "")
upscale_size = float(upscaling_config.get("upscale_size", 2.0))
resize_method = upscaling_config.get("resize_method", "bilinear")
hires_tiled_sampling = upscaling_config.get("hires_tiled_sampling", False)
hires_tile_width = int(upscaling_config.get("hires_tile_width", 512))
hires_tile_height = int(upscaling_config.get("hires_tile_height", 512))
hires_mask_blur = int(upscaling_config.get("hires_mask_blur", 8))
hires_tile_padding = int(upscaling_config.get("hires_tile_padding", 32))
hires_force_uniform_tiles = upscaling_config.get("hires_force_uniform_tiles", False)
new_w = int(width * upscale_ratio)
new_h = int(height * upscale_ratio)
t0 = time.time()
print(f"[GridTester] 🔍 Upscaling: mode={mode}, ratio={upscale_ratio}, target={new_w}x{new_h}")
if mode == "hires_only":
# Upscale latent in latent space → re-sample with denoise
import comfy.utils
latent_samples = result_latent["samples"]
# Latent space is 8x smaller than pixel space
upscaled_latent = comfy.utils.common_upscale(
latent_samples, new_w // 8, new_h // 8, resize_method, "disabled"
)
if hires_tiled_sampling:
hires_latent = tiled_hires_sample(
{"samples": upscaled_latent}, patched_model, config,
positive_conditioning, negative_conditioning,
hires_steps, hires_denoise,
hires_tile_width, hires_tile_height, hires_mask_blur, hires_tile_padding,
hires_force_uniform_tiles, new_w, new_h
)
else:
hires_latent, hires_duration = generate_image(
patched_model, config.get("seed", 0), hires_steps, config.get("cfg", 7),
config.get("sampler", "euler"), config.get("scheduler", "normal"),
positive_conditioning, negative_conditioning,
{"samples": upscaled_latent}, hires_denoise,
width=new_w, height=new_h
)
duration = round(time.time() - t0, 3)
print(f"[GridTester] 🔍 HiRes fix complete in {duration}s → {new_w}x{new_h}")
return hires_latent, duration
elif mode == "model_only":
# Decode → model upscale → optional resize to target → return as PIL image
from comfy_extras.nodes_upscale_model import UpscaleModelLoader, ImageUpscaleWithModel
import numpy as np
# Use tiled VAE decode if enabled (prevents OOM on large images)
if tiled_vae:
from comfy_extras.nodes_post_processing import ImageScaleToTotalPixels
vae.first_stage_model.tile_sample_min_size = tile_size
vae.first_stage_model.tile_latent_min_size = tile_size // 8
vae.first_stage_model.tile_overlap_factor = tile_overlap / tile_size if tile_size > 0 else 0.125
if hasattr(vae.first_stage_model, 'tile_sample_min_size_temporal'):
vae.first_stage_model.tile_sample_min_size_temporal = temporal_size
if hasattr(vae.first_stage_model, 'tile_latent_min_size_temporal'):
vae.first_stage_model.tile_latent_min_size_temporal = temporal_size // 8
if hasattr(vae.first_stage_model, 'tile_overlap_factor_temporal'):
vae.first_stage_model.tile_overlap_factor_temporal = temporal_overlap / temporal_size if temporal_size > 0 else 0.125
pil_image = decode_latent_with_vae(vae, result_latent["samples"])
img_np = np.array(pil_image).astype(np.float32) / 255.0
img_tensor = torch.from_numpy(img_np).unsqueeze(0) # (1, H, W, C)
loader = UpscaleModelLoader()
(up_model,) = loader.load_model(upscale_model_name)
upscaler = ImageUpscaleWithModel()
(upscaled_tensor,) = upscaler.upscale(up_model, img_tensor)
# If upscale_size differs from model's native scale, resize the output
target_w = int(width * upscale_size)
target_h = int(height * upscale_size)
actual_h, actual_w = upscaled_tensor.shape[1], upscaled_tensor.shape[2]
if abs(actual_w - target_w) > 4 or abs(actual_h - target_h) > 4:
import comfy.utils
# Resize from model's native output to user-specified upscale_size
upscaled_tensor = upscaled_tensor.permute(0, 3, 1, 2) # NHWC → NCHW
upscaled_tensor = comfy.utils.common_upscale(upscaled_tensor, target_w, target_h, resize_method, "disabled")
upscaled_tensor = upscaled_tensor.permute(0, 2, 3, 1) # NCHW → NHWC
up_np = upscaled_tensor[0].cpu().float().numpy()
up_np = np.clip(up_np * 255, 0, 255).astype(np.uint8)
from PIL import Image as PILImage
upscaled_image = PILImage.fromarray(up_np)
duration = round(time.time() - t0, 3)
print(f"[GridTester] 🔍 Model upscale complete in {duration}s → {upscaled_image.size[0]}x{upscaled_image.size[1]}")
return upscaled_image, duration
elif mode == "model_then_hires":
# Model upscale first → optional resize → encode to latent → HiRes fix
from comfy_extras.nodes_upscale_model import UpscaleModelLoader, ImageUpscaleWithModel
import numpy as np
# Use tiled VAE decode if enabled
if tiled_vae:
vae.first_stage_model.tile_sample_min_size = tile_size
vae.first_stage_model.tile_latent_min_size = tile_size // 8
vae.first_stage_model.tile_overlap_factor = tile_overlap / tile_size if tile_size > 0 else 0.125
if hasattr(vae.first_stage_model, 'tile_sample_min_size_temporal'):
vae.first_stage_model.tile_sample_min_size_temporal = temporal_size
if hasattr(vae.first_stage_model, 'tile_latent_min_size_temporal'):
vae.first_stage_model.tile_latent_min_size_temporal = temporal_size // 8
if hasattr(vae.first_stage_model, 'tile_overlap_factor_temporal'):
vae.first_stage_model.tile_overlap_factor_temporal = temporal_overlap / temporal_size if temporal_size > 0 else 0.125
pil_image = decode_latent_with_vae(vae, result_latent["samples"])
img_np = np.array(pil_image).astype(np.float32) / 255.0
img_tensor = torch.from_numpy(img_np).unsqueeze(0)
loader = UpscaleModelLoader()
(up_model,) = loader.load_model(upscale_model_name)
upscaler = ImageUpscaleWithModel()
(upscaled_tensor,) = upscaler.upscale(up_model, img_tensor)
# If upscale_size differs from model's native scale, resize before HiRes fix
target_w = int(width * upscale_size)
target_h = int(height * upscale_size)
actual_h, actual_w = upscaled_tensor.shape[1], upscaled_tensor.shape[2]
if abs(actual_w - target_w) > 4 or abs(actual_h - target_h) > 4:
import comfy.utils
upscaled_tensor = upscaled_tensor.permute(0, 3, 1, 2) # NHWC → NCHW
upscaled_tensor = comfy.utils.common_upscale(upscaled_tensor, target_w, target_h, resize_method, "disabled")
upscaled_tensor = upscaled_tensor.permute(0, 2, 3, 1) # NCHW → NHWC
up_h, up_w = upscaled_tensor.shape[1], upscaled_tensor.shape[2]
# Encode back to latent space for HiRes fix
encoded_latent = vae.encode(upscaled_tensor[:, :, :, :3])
if hires_tiled_sampling:
hires_latent = tiled_hires_sample(
{"samples": encoded_latent}, patched_model, config,
positive_conditioning, negative_conditioning,
hires_steps, hires_denoise,
hires_tile_width, hires_tile_height, hires_mask_blur, hires_tile_padding,
hires_force_uniform_tiles, up_w, up_h
)
else:
hires_latent, hires_duration = generate_image(
patched_model, config.get("seed", 0), hires_steps, config.get("cfg", 7),
config.get("sampler", "euler"), config.get("scheduler", "normal"),
positive_conditioning, negative_conditioning,
{"samples": encoded_latent}, hires_denoise,
width=up_w, height=up_h
)
duration = round(time.time() - t0, 3)
print(f"[GridTester] 🔍 Model+HiRes upscale complete in {duration}s → {up_w}x{up_h}")
return hires_latent, duration
else:
print(f"[GridTester] ⚠️ Unknown upscale mode: {mode}")
return result_latent, 0
def decode_latent_with_vae(vae, latent_samples):
"""
Decode latent samples to pixel space using VAE.
Args:
vae: VAE model
latent_samples: Latent tensor to decode
Returns:
PIL.Image: Decoded image
"""
import torch
# Check for NaN in input latent
if torch.isnan(latent_samples).any():
print(f"[GridTester] ⚠️ VAE decode: input latent contains NaN! shape={latent_samples.shape} dtype={latent_samples.dtype}")
decoded = vae.decode(latent_samples)
# Check for NaN in decoded output and attempt float32 retry
if torch.isnan(decoded).any():
print(f"[GridTester] ⚠️ VAE decode produced NaN! decoded shape={decoded.shape} dtype={decoded.dtype}")
print(f"[GridTester] ⚠️ Input latent: shape={latent_samples.shape} dtype={latent_samples.dtype} device={latent_samples.device}")
print(f"[GridTester] 🔄 Retrying VAE decode with float32 latent...")
decoded = vae.decode(latent_samples.to(torch.float32))
if torch.isnan(decoded).any():
print(f"[GridTester] ❌ VAE decode still NaN after float32 retry")
else:
print(f"[GridTester] ✅ float32 retry succeeded")
# Convert to PIL Image
# .detach() is required because the tensor may have requires_grad=True
# (e.g., when called from distributed worker threads outside ComfyUI's
# normal execution context where autograd state may differ)
img_np = decoded.detach().cpu().float().numpy()
# Remove extra dimensions (handle shapes like (1, 1, H, W, C) or (1, H, W, C))
while img_np.ndim > 3:
img_np = img_np[0]
# Now should be (H, W, C) or (C, H, W)
img_np = np.clip(img_np * 255, 0, 255).astype(np.uint8)
# Handle different channel orders
if img_np.shape[0] == 3 and img_np.ndim == 3: # CHW format
img_np = np.transpose(img_np, (1, 2, 0))
elif img_np.shape[-1] != 3 and img_np.ndim == 3: # Not HWC format
img_np = np.transpose(img_np, (1, 2, 0))
return Image.fromarray(img_np)
def save_image_to_disk(image, output_dir, filename):
"""
Save PIL Image to disk.
Args:
image: PIL.Image object
output_dir: Directory to save in
filename: Filename (including extension)
Returns:
str: Full path to saved image
"""
os.makedirs(output_dir, exist_ok=True)
filepath = os.path.join(output_dir, filename)
image.save(filepath)
return filepath
def create_image_metadata(config, width, height, duration, seed, batch_idx, actual_positive_prompt, actual_negative_prompt, gen_index=None):
"""
Create metadata dictionary for an image.
Args:
config: Configuration dictionary
width: Image width
height: Image height
duration: Generation duration in seconds
seed: Random seed used
batch_idx: Batch index
actual_positive_prompt: Final positive prompt (with triggers)
actual_negative_prompt: Final negative prompt
gen_index: Sequential generation index for deterministic sort ordering (optional, backwards-compatible)
Returns:
dict: Metadata dictionary
"""
meta = config.copy()
# Remove global settings that should only be in manifest.meta, not in individual items
# These are session-wide settings that don't change per-image
global_settings_to_remove = [
"lora_triggerwords_append_settings",
"lora_omit_triggers",
"seed_behavior",
"gguf_options",
"model_prompt_prefix",
"model_prompt_suffix"
]
for key in global_settings_to_remove:
meta.pop(key, None)
# Remove attention_mode if it's "default" (keep manifest backward-compatible)
if meta.get("attention_mode") == "default":
meta.pop("attention_mode", None)
update_dict = {
"width": width,
"height": height,
"duration": duration,
"seed": seed,
"batch_idx": batch_idx,
"positive": actual_positive_prompt,
"negative": actual_negative_prompt
}
if gen_index is not None:
update_dict["gen_index"] = gen_index
# Preserve raw config prompts (without trigger words) for dashboard toggle
meta["config_positive"] = config.get("positive", "")
meta["config_negative"] = config.get("negative", "")
meta.update(update_dict)
return meta
def calculate_eta(job_durations, current_job, total_jobs):
"""
Calculate ETA based on average job duration.
Args:
job_durations: List of previous job durations
current_job: Current job number (1-indexed)
total_jobs: Total number of jobs
Returns:
dict: Dictionary with eta info (hours, minutes, seconds, finish_time, finish_formatted)
"""
if not job_durations:
return None
# Use rolling window of last 10 jobs for more responsive ETA
# This adapts faster when upscaling patterns change (e.g., some configs have upscale, some don't)
recent_window = job_durations[-10:] if len(job_durations) > 10 else job_durations
avg_duration = sum(recent_window) / len(recent_window)
remaining_jobs = total_jobs - current_job
estimated_seconds = avg_duration * remaining_jobs
eta_hours = int(estimated_seconds // 3600)
eta_minutes = int((estimated_seconds % 3600) // 60)
eta_seconds = int(estimated_seconds % 60)
eta_finish_time = time.time() + estimated_seconds
eta_finish_formatted = time.strftime("%H:%M:%S", time.localtime(eta_finish_time))
return {
"hours": eta_hours,
"minutes": eta_minutes,
"seconds": eta_seconds,
"finish_time": eta_finish_time,
"finish_formatted": eta_finish_formatted,
"avg_duration": avg_duration
}
def print_generation_progress(current_job, total_jobs, config, width, height, duration, eta_info):
"""
Print progress information for current generation.
Args:
current_job: Current job number
total_jobs: Total jobs
config: Configuration dict
width: Image width
height: Image height
duration: Generation duration
eta_info: ETA info dict from calculate_eta()
"""
progress_pct = int((current_job / total_jobs) * 100)
print(f"{'='*80}")
print(f"[GridTester] 📊 Job {current_job}/{total_jobs} ({progress_pct}%)")
print(f"[GridTester] 🎨 {config['sampler']} @ {config['steps']} steps | {width}x{height}")
print(f"[GridTester] ⏱️ {duration:.1f}s | Avg: {eta_info['avg_duration']:.1f}s/job")
if eta_info['hours'] > 0:
print(f"[GridTester] 🕒 ETA: {eta_info['hours']}h {eta_info['minutes']}m (finish ~{eta_info['finish_formatted']})")
elif eta_info['minutes'] > 0:
print(f"[GridTester] 🕒 ETA: {eta_info['minutes']}m {eta_info['seconds']}s (finish ~{eta_info['finish_formatted']})")
else:
print(f"[GridTester] 🕒 ETA: {eta_info['seconds']}s (finish ~{eta_info['finish_formatted']})")
print(f"{'='*80}")
def flush_batch_with_vae(pending_batch, vae, img_dir, existing_data, session_name, manifest_path=None, unique_id=None):
"""
Flush a batch of latents by decoding them with VAE and saving.
Args:
pending_batch: List of (latent_samples, metadata) tuples
vae: VAE model for decoding
img_dir: Image output directory
existing_data: Manifest data dict
session_name: Session name for filenames
manifest_path: Path to manifest.json file (optional, enables disk syncing)
unique_id: Node unique ID for dashboard updates (optional, enables dashboard updates)
Returns:
int: Number of images saved
"""
if not pending_batch:
return 0
import random
saved_count = 0
for latent_samples, meta in pending_batch:
# Decode latent to image
image = decode_latent_with_vae(vae, latent_samples)
# Generate ID using same format as remote_vae
ts = int(time.time() * 100000) + random.randint(0, 1000)
meta["id"] = ts
# Generate filename using webp format (like remote_vae)
filename = f"img_{meta['id']}.webp"
# Save image as webp
filepath = os.path.join(img_dir, filename)
image.save(filepath, quality=80)
# Normalize denoise to int if it's 1.0 (like remote_vae)
if meta.get("denoise") == 1.0:
meta["denoise"] = 1
# Update meta with file path and rejected flag
meta.update({
"file": f"/view?filename={filename}&type=output&subfolder=benchmarks/{session_name}/images",
"rejected": False
})
# Update manifest - insert at beginning (like remote_vae)
existing_data["items"].insert(0, meta)
# Sync with disk manifest to preserve tags (like remote_vae) - only if manifest_path provided
if manifest_path and os.path.exists(manifest_path):
try:
with open(manifest_path, "r") as f:
disk_manifest = json.load(f)
# Create a lookup map for items currently in memory
memory_items_map = {
i.get("id"): i
for i in existing_data.get("items", [])
if "id" in i
}
# Check every item on disk. If it exists in memory, copy the tags over.
for disk_item in disk_manifest.get("items", []):
d_id = disk_item.get("id")
if d_id and d_id in memory_items_map:
local_item = memory_items_map[d_id]
# PRESERVE TAGS: Copy these keys from disk to memory
if "favorited" in disk_item:
local_item["favorited"] = disk_item["favorited"]
if "rejected" in disk_item:
local_item["rejected"] = disk_item["rejected"]
except Exception as e:
print(f"[GridTester] ⚠️ Error syncing with disk manifest: {e}")
# Save manifest to disk (like remote_vae) - only if manifest_path provided
if manifest_path:
with open(manifest_path, "w") as f:
json.dump(existing_data, f, indent=4)
# Send update to dashboard (like remote_vae) - only if unique_id provided
if unique_id:
try:
from server import PromptServer
if PromptServer:
# Get meta, use empty dict if not present
manifest_meta = existing_data.get("meta", {})
PromptServer.instance.send_sync("ultimate_grid.update", {
"node": unique_id,
"session_name": session_name,
"new_items": [meta],
"meta": manifest_meta
})
except (ImportError, KeyError):
# Silently ignore dashboard update errors
pass
saved_count += 1
print(f"[GridTester] 💾 Flushed {saved_count} images")
return saved_count
def flush_batch_with_remote_vae(pending_batch, remote_vae_worker, existing_data, session_name):
"""
Flush a batch of latents by sending them to remote VAE worker.
Args:
pending_batch: List of (latent_samples, metadata) tuples
remote_vae_worker: RemoteVAEDecodeWorker instance
existing_data: Manifest data dict (not used - worker handles manifest)
session_name: Session name for filenames
Returns:
int: Number of images queued
"""
if not pending_batch:
return 0
import random
queued_count = 0
for latent_samples, meta in pending_batch:
# Generate ID using same format as old code
ts = int(time.time() * 100000) + random.randint(0, 1000)
meta["id"] = ts # ← Set ID directly on meta dict
# Queue the job - worker will create manifest entry
# Pass the original meta dict, not a new one
remote_vae_worker.add_job(
latent_samples,
meta, # ← Pass original meta, not a new dict
meta["height"],
meta["width"]
)
queued_count += 1
print(f"[GridTester] 🌐 Queued {queued_count} images for remote VAE decoding")
return queued_count
# =============================================================================
# SEEDVR2 UPSCALE — Calls ComfyUI-SeedVR2_VideoUpscaler nodes programmatically
# Requires ComfyUI-SeedVR2_VideoUpscaler to be installed as a dependency.
# =============================================================================
def seedvr2_upscale(pil_image, seedvr2_config):
"""
Upscale an image using SeedVR2 diffusion-based upscaler.
Args:
pil_image: PIL Image (RGB)
seedvr2_config: dict with SeedVR2 options (dit_model, resolution, seed, etc.)
Returns:
tuple: (pil_result, width, height, duration)
"""
import time
import torch
import numpy as np
t0 = time.time()
sv = seedvr2_config
# Find SeedVR2 nodes from ComfyUI's node registry.
# SeedVR2 uses V3 API (ComfyExtension) so we scan NODE_CLASS_MAPPINGS directly
# rather than trying to import the custom_nodes package.
SeedVR2LoadDiTModel = None
SeedVR2LoadVAEModel = None
SeedVR2VideoUpscaler = None
try:
import nodes
for name, cls in nodes.NODE_CLASS_MAPPINGS.items():
if name == "SeedVR2LoadDiTModel":
SeedVR2LoadDiTModel = cls
elif name == "SeedVR2LoadVAEModel":
SeedVR2LoadVAEModel = cls
elif name == "SeedVR2VideoUpscaler":
SeedVR2VideoUpscaler = cls
except Exception:
pass
if not SeedVR2LoadDiTModel or not SeedVR2LoadVAEModel or not SeedVR2VideoUpscaler:
raise RuntimeError(
"SeedVR2 upscale requires ComfyUI-SeedVR2_VideoUpscaler to be installed.\n"
"Install from: https://github.com/numz/ComfyUI-SeedVR2_VideoUpscaler\n"
"SeedVR2 nodes not found in ComfyUI's node registry."
)
print(f"[GridTester] 🎬 SeedVR2 upscale: model={sv.get('dit_model', '3b_fp8')}, "
f"resolution={sv.get('resolution', 1080)}, seed={sv.get('seed', 42)}")
# Set up V3 API execution context — SeedVR2 nodes require this
# (they call get_executing_context().node_id internally)
from comfy_execution.utils import CurrentNodeContext