diff --git a/.gitignore b/.gitignore index 8bba002..4675967 100644 --- a/.gitignore +++ b/.gitignore @@ -179,3 +179,5 @@ Network Trash Folder Temporary Items .apdisk +outputs/ +resources/ diff --git a/README.md b/README.md index 75e0785..260f6e9 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,13 @@ git clone git@github.com:divamgupta/stable-diffusion-tensorflow.git #### Using pip without a virtual environment -Install dependencies using the `requirements.txt` file or the `requirements_m1.txt` file,: +Python Dependencies are automatically installed for Linux and Windows platforms when installing the package: + +```bash +pip install . +``` + +For OSX , Install dependencies from `requirements_m1.txt` file,: ```bash pip install -r requirements.txt @@ -73,6 +79,10 @@ pip install -r requirements.txt pip install -r requirements.txt ``` +#### Using a virtual environment with *anaconda* + +The `environment.yaml` file is configured to install dependencies from + ## Usage ### Using the Python interface @@ -110,7 +120,41 @@ img = generator.generate( Image.fromarray(img[0]).save("output.png") ``` -### Using `text2image.py` from the git repo +### Running + +After installing the package, the command `stable-diffusion` will be available: + +```bash +$> stable-diffusion --help + +Usage: stable-diffusion [OPTIONS] + +Options: + -p, --prompt TEXT the prompt to render [required] + -o, --output TEXT Path where to save the output image + --negative-prompt TEXT the negative prompt to use (if any) + -H, --height, --H INTEGER Image height, in pixels + -W, --width, --W INTEGER Image width, in pixels + --scale FLOAT Unconditional guidance scale: eps = eps(x, + empty) + scale * (eps(x, cond) - eps(x, + empty)) + --steps INTEGER Number of ddim sampling steps + --seed INTEGER Optionally specify a seed integer for + reproducible results + --mixed-precision, --mp BOOLEAN + Enable mixed precision (fp16 computation) + --temperature INTEGER Generator temperature + --batch-size INTEGER Batch size temperature + -i, --input-image-path, --input TEXT + Path to input image + --log-level [NOTSET|DEBUG|INFO|WARN|ERROR|CRITICAL] + Python Log level value + --help Show this message and exit. +``` + +You can use the aliases `image2image` and `text2image`. + +#### Using `text2image` from the git repo Assuming you have installed the required packages, you can generate images from a text prompt using: @@ -126,8 +170,8 @@ If you want to use a different name, use the `--output` flag. python text2image.py --prompt="An astronaut riding a horse" --output="my_image.png" ``` -Check out the `text2image.py` file for more options, including image size, number of steps, etc. -### Using `img2img.py` from the git repo +Check out the `text2image` file for more options, including image size, number of steps, etc. +#### Using `img2img` from the git repo Assuming you have installed the required packages, you can modify images from a text prompt using: @@ -139,7 +183,7 @@ python img2img.py --prompt="a high quality sketch of people standing with sun an The generated image will be named `img2img-out.jpeg` by default on the root of the repo. If you want to use a different name, use the `--output` flag. -Check out the `img2img.py` file for more options, including the number of steps. +Check out the `img2img` file for more options, including the number of steps. ## Example outputs diff --git a/environment.yaml b/environment.yaml new file mode 100644 index 0000000..0fefbb4 --- /dev/null +++ b/environment.yaml @@ -0,0 +1,8 @@ +name: stable-diffusion +channels: + - conda-forge +dependencies: + - cudatoolkit==11.2.2 + - pip: + - ./ + \ No newline at end of file diff --git a/execution/__init__.py b/execution/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/execution/command_line_constants.py b/execution/command_line_constants.py new file mode 100644 index 0000000..1dd10d7 --- /dev/null +++ b/execution/command_line_constants.py @@ -0,0 +1,36 @@ +PROMPT_HELP = "the prompt to render" + +DEFAULT_OUTPUT = "output.png" +OUTPUT_HELP = "Path where to save the output image" + +NEGATIVE_PROMPT_HELP = "the negative prompt to use (if any)" + +DEFAULT_HEIGHT = 512 +HEIGHT_HELP = "Image height, in pixels" + +DEFAULT_WIDTH = 512 +WIDTH_HELP = "Image width, in pixels" + +DEFAULT_SCALE = 7.5 +SCALE_HELP = "Unconditional guidance scale: eps = eps(x, empty) + scale * (eps(x, cond) - eps(x, empty))" + +DEFAULT_STEPS = 50 +STEPS_HELP = "Number of ddim sampling steps" + +SEED_HELP = "Optionally specify a seed integer for reproducible results" + +DEFAULT_MIXED_PRECISION = False +MIXED_PRECISION_HELP = "Enable mixed precision (fp16 computation)" + +DEFAULT_TEMPERATURE = 1 +TEMPERATURE_HELP = "Generator temperature" + +DEFAULT_BATCH_SIZE = 1 +BATCH_SIZE_HELP = "Batch size temperature" + +INPUT_IMGE_IMAGE_HELP = "Path to input image" + +DEFAULT_LOGLEVEL = "INFO" +LOGLEVEL_HELP = "Python Log level value" +AVAILABLE_LOGLEVELS = ["NOTSET", "DEBUG", "INFO", "WARN", "ERROR", "CRITICAL"] +LOGLEVEL_ENV_VAR = "LOGLEVEL" diff --git a/execution/environment_configuration.py b/execution/environment_configuration.py new file mode 100644 index 0000000..4a7f681 --- /dev/null +++ b/execution/environment_configuration.py @@ -0,0 +1,19 @@ +import logging +import tensorflow as tf +from tensorflow.python import keras + +def configure_keras(mixed_precision: bool): + if mixed_precision: + logging.info("Using mixed precision.") + keras.mixed_precision.set_global_policy("mixed_float16") + + +def set_log_level(log_level: str): + logging.basicConfig(level=log_level) + + +def get_gpus(): + gpu_devies = tf.config.list_physical_devices('GPU') + for gpu_device in gpu_devies: + logging.debug(f"Available GPU devices found:") + logging.debug(f"{gpu_device}") diff --git a/execution/generator_factory.py b/execution/generator_factory.py new file mode 100644 index 0000000..d1f667a --- /dev/null +++ b/execution/generator_factory.py @@ -0,0 +1,30 @@ +import logging +from stable_diffusion_tf.stable_diffusion import StableDiffusion +from tensorflow import keras +import numpy as np +from typing import Optional + + +def make_stable_diffusion_model(height: int, width: int) -> StableDiffusion: + logging.debug(f"Creating stable diffusion model for images of dimension {width}x{height}") + generator = StableDiffusion(img_height=height, img_width=width, jit_compile=False) + return generator + + +def run_generator(generator: StableDiffusion, prompt: str, steps: int, scale: float, temperature: int, batch_size: int, + seed: int, negative_prompt: Optional[str], input_image: Optional[np.ndarray]) -> np.ndarray: + + logging.debug(f"Start running generation for prompt `{prompt}` with negative prompt `{negative_prompt}`") + + image = generator.generate( + prompt, + negative_prompt=negative_prompt, + num_steps=steps, + unconditional_guidance_scale=scale, + temperature=temperature, + input_image=input_image, + batch_size=batch_size, + seed=seed, + ) + + return image[0] diff --git a/execution/main.py b/execution/main.py new file mode 100644 index 0000000..69655c9 --- /dev/null +++ b/execution/main.py @@ -0,0 +1,36 @@ +from execution.command_line_constants import * +from execution.generator_factory import make_stable_diffusion_model, run_generator +from execution.environment_configuration import configure_keras, set_log_level +from execution.persistence import save_image, load_image + +import click + + +@click.command() +@click.option("--prompt", "-p", type=click.STRING, required=True, help=PROMPT_HELP) +@click.option("--output", "-o", type=click.STRING, default=DEFAULT_OUTPUT, help=OUTPUT_HELP) +@click.option("--negative-prompt", type=click.STRING, required=False, help=NEGATIVE_PROMPT_HELP) +@click.option("--height", "--H", "-H", type=click.INT, default=DEFAULT_HEIGHT, help=HEIGHT_HELP) +@click.option("--width", "--W", "-W", type=click.INT, default=DEFAULT_WIDTH, help=WIDTH_HELP) +@click.option("--scale", type=click.FLOAT, default=DEFAULT_SCALE, help=SCALE_HELP) +@click.option("--steps", type=click.INT, default=DEFAULT_STEPS, help=STEPS_HELP) +@click.option("--seed", type=click.INT, required=False, help=SEED_HELP) +@click.option("--mixed-precision", "--mp", type=click.BOOL, default=DEFAULT_MIXED_PRECISION, help=MIXED_PRECISION_HELP) +@click.option("--temperature", type=click.INT, default=DEFAULT_TEMPERATURE, help=TEMPERATURE_HELP) +@click.option("--batch-size", type=click.INT, default=DEFAULT_BATCH_SIZE, help=BATCH_SIZE_HELP) +@click.option("--input-image-path", "--input", "-i", type=click.STRING, required=False, help=INPUT_IMGE_IMAGE_HELP) +@click.option("--log-level", type=click.Choice(choices=AVAILABLE_LOGLEVELS, case_sensitive=False), + default=DEFAULT_LOGLEVEL, help=LOGLEVEL_HELP, envvar=LOGLEVEL_ENV_VAR) +def main(prompt: str, output: str, negative_prompt: str, height: int, width: int, scale: float, steps: int, + seed: int, mixed_precision: bool, temperature: int, batch_size: int, log_level: str, input_image_path: str): + set_log_level(log_level) + configure_keras(mixed_precision) + + input_image = load_image(input_image_path, width, height) + model = make_stable_diffusion_model(height, width) + image = run_generator(model, prompt, steps, scale, temperature, batch_size, seed, negative_prompt, input_image) + + save_image(image, output, prompt, negative_prompt) + +if __name__ == "__main__": + main() diff --git a/execution/persistence.py b/execution/persistence.py new file mode 100644 index 0000000..686fb56 --- /dev/null +++ b/execution/persistence.py @@ -0,0 +1,30 @@ +import numpy as np +import logging +import logging +from PIL import Image +from PIL.PngImagePlugin import PngInfo +from typing import Optional + + +def save_image(image_data: np.ndarray, output_path: str, prompt: str, negative_prompt: Optional[str]): + pnginfo = PngInfo() + pnginfo.add_text('prompt', prompt) + + if negative_prompt: + pnginfo.add_text('negative_prompt', negative_prompt) + + image = Image.fromarray(image_data) + image.save(output_path, pnginfo=pnginfo) + logging.info(f"saved at {output_path}") + + +def load_image(image_path: Optional[str], width: int, height: int) -> Optional[np.ndarray]: + if image_path: + image = Image.open(image_path) + logging.debug(f"Loaded input image from {image_path}") + image = image.resize((width, height)) + logging.debug(f"Resizing input image to {width}x{height}") + return image + else: + logging.debug("No input image given") + return None diff --git a/img2img.py b/img2img.py deleted file mode 100644 index 84e31dd..0000000 --- a/img2img.py +++ /dev/null @@ -1,62 +0,0 @@ -import argparse -from stable_diffusion_tf.stable_diffusion import StableDiffusion -from PIL import Image - -parser = argparse.ArgumentParser() - -parser.add_argument( - "--prompt", - type=str, - nargs="?", - required=True, - help="the prompt to render", -) - -parser.add_argument( - "--negative-prompt", - type=str, - help="the negative prompt to use (if any)", -) - -parser.add_argument( - "--steps", - type=int, - default=50, - help="number of ddim sampling steps" -) - -parser.add_argument( - "--input", - type=str, - nargs="?", - required=True, - help="the input image filename", -) - -parser.add_argument( - "--output", - type=str, - nargs="?", - default="img2img-out.jpeg", - help="the output image filename", -) - -args = parser.parse_args() - -generator = StableDiffusion( - img_height=512, - img_width=512, - jit_compile=False, # You can try True as well (different performance profile) -) - -img = generator.generate( - args.prompt, - negative_prompt=args.negative_prompt, - num_steps=args.steps, - unconditional_guidance_scale=7.5, - temperature=1, - batch_size=1, - input_image=args.input, - input_image_strength=0.8 -) -Image.fromarray(img[0]).save(args.output) diff --git a/setup.py b/setup.py index fe62e7a..b2c2366 100644 --- a/setup.py +++ b/setup.py @@ -9,4 +9,22 @@ platforms=["any"], # or more specific, e.g. "win32", "cygwin", "osx" url="https://github.com/divamgupta/stable-diffusion-tensorflow", packages=find_packages(), + install_requires=[ + "tensorflow-gpu~=2.10.0", + "tensorflow-addons~=0.17.1", + + "h5py~=3.7.0", + "Pillow~=9.2.0", + "tqdm~=4.64.1", + "ftfy~=6.1.1", + "regex~=2022.9.13", + "click~=8.1.3" + ], + entry_points = { + 'console_scripts': [ + 'text2image=execution.main:main', + 'image2image=execution.main:main', + 'stable-diffusion=execution.main:main', + ], + } ) diff --git a/text2image.py b/text2image.py deleted file mode 100644 index 7a1776a..0000000 --- a/text2image.py +++ /dev/null @@ -1,88 +0,0 @@ -from tensorflow import keras -from stable_diffusion_tf.stable_diffusion import StableDiffusion -import argparse -from PIL import Image -from PIL.PngImagePlugin import PngInfo - -parser = argparse.ArgumentParser() - -parser.add_argument( - "--prompt", - type=str, - nargs="?", - default="a painting of a virus monster playing guitar", - help="the prompt to render", -) - -parser.add_argument( - "--negative-prompt", - type=str, - help="the negative prompt to use (if any)", -) - -parser.add_argument( - "--output", - type=str, - nargs="?", - default="output.png", - help="where to save the output image", -) - -parser.add_argument( - "--H", - type=int, - default=512, - help="image height, in pixels", -) - -parser.add_argument( - "--W", - type=int, - default=512, - help="image width, in pixels", -) - -parser.add_argument( - "--scale", - type=float, - default=7.5, - help="unconditional guidance scale: eps = eps(x, empty) + scale * (eps(x, cond) - eps(x, empty))", -) - -parser.add_argument( - "--steps", type=int, default=50, help="number of ddim sampling steps" -) - -parser.add_argument( - "--seed", - type=int, - help="optionally specify a seed integer for reproducible results", -) - -parser.add_argument( - "--mp", - default=False, - action="store_true", - help="Enable mixed precision (fp16 computation)", -) - -args = parser.parse_args() - -if args.mp: - print("Using mixed precision.") - keras.mixed_precision.set_global_policy("mixed_float16") - -generator = StableDiffusion(img_height=args.H, img_width=args.W, jit_compile=False) -img = generator.generate( - args.prompt, - negative_prompt=args.negative_prompt, - num_steps=args.steps, - unconditional_guidance_scale=args.scale, - temperature=1, - batch_size=1, - seed=args.seed, -) -pnginfo = PngInfo() -pnginfo.add_text('prompt', args.prompt) -Image.fromarray(img[0]).save(args.output, pnginfo=pnginfo) -print(f"saved at {args.output}")