diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..5077115 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,2 @@ +# Examples to run oneflow SD +This directory contains scripts for non-throughput performance benchmarks/tests, for instance the performance of cache or compilation. diff --git a/examples/graph-cache/demo.sh b/examples/graph-cache/demo.sh new file mode 100644 index 0000000..a1814e6 --- /dev/null +++ b/examples/graph-cache/demo.sh @@ -0,0 +1,4 @@ +set -e +export ONEFLOW_NNGRAPH_ENABLE_PROGRESS_BAR=1 +time python3 examples/graph-cache/infer.py --save +time python3 examples/graph-cache/infer.py --load diff --git a/examples/graph-cache/infer.py b/examples/graph-cache/infer.py new file mode 100644 index 0000000..39aec60 --- /dev/null +++ b/examples/graph-cache/infer.py @@ -0,0 +1,83 @@ +import os + +import argparse +import oneflow as torch +from diffusers import OneFlowStableDiffusionPipeline + + +def parse_args(): + parser = argparse.ArgumentParser(description="Simple demo of image generation.") + parser.add_argument( + "--prompt", type=str, default="a photo of an astronaut riding a horse on mars" + ) + parser.add_argument("--cache", type=str, default="./oneflow-sd/graph_cache") + parser.add_argument("--model", type=str, default="./oneflow-sd/model") + parser.add_argument( + "--load", + default=False, + action="store_true", + help="If specified, load from cache", + ) + parser.add_argument( + "--save", + default=False, + action="store_true", + help="If specified, save to cache", + ) + args = parser.parse_args() + return args + + +args = parse_args() + +model = "CompVis/stable-diffusion-v1-4" +if args.load: + # Note: restore the cache by setting the pretrain path to a cache path + model = args.model + print(f"will load pipe from: {args.cache}") +pipe = OneFlowStableDiffusionPipeline.from_pretrained( + model, + use_auth_token=True, + revision="fp16", + torch_dtype=torch.float16, + safety_checker=None, +) + +pipe = pipe.to("cuda") + +output_dir = "oneflow-sd-output" +os.makedirs(output_dir, exist_ok=True) + +pipe.set_unet_graphs_cache_size(10) +pipe.enable_graph_share_mem() +# Note: enable saving/loading graph-related cache, these APIs are tricky and might be changed +if args.save: + pipe.enable_save_graph() +if args.load: + pipe.load_graph(args.cache) + + +def do_infer(n): + with torch.autocast("cuda"): + for i in [2, 1, 0]: + for j in [2, 1, 0]: + width = 768 + 128 * i + height = 768 + 128 * j + prompt = args.prompt + images = pipe(prompt, width=width, height=height).images + for i, image in enumerate(images): + prompt = prompt.strip().replace("\n", " ") + dst = os.path.join( + output_dir, f"{prompt[:100]}-{n}-{width}-{height}.png" + ) + image.save(dst) + + +for n in range(2): + do_infer(n) +if args.save: + pipe.save_pretrained(args.model) + print(f"saving cache to: {args.cache}") + os.makedirs(args.cache, exist_ok=True) + # Note: save graph cache + pipe.save_graph(args.cache)