From e7d9ddcd8d9e52cb30a602aae3cb2e5b776d72c4 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Fri, 30 Jun 2023 14:13:46 -0700 Subject: [PATCH 01/24] Added OpenVINO acceleration in custom scripts --- scripts/openvino_accelerate.py | 528 +++++++++++++++++++++++++++++++++ 1 file changed, 528 insertions(+) create mode 100644 scripts/openvino_accelerate.py diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py new file mode 100644 index 00000000..4231ed44 --- /dev/null +++ b/scripts/openvino_accelerate.py @@ -0,0 +1,528 @@ +import math +import cv2 +import os +import torch +import functools +import gradio as gr +import numpy as np +import openvino.frontend.pytorch.torchdynamo.backend + +import modules.paths as paths +import modules.scripts as scripts +import modules.shared as shared + +from modules import images, devices, extra_networks, generation_parameters_copypaste, masking, sd_samplers, sd_samplers_compvis, sd_samplers_kdiffusion, shared +from modules.processing import StableDiffusionProcessing, Processed, apply_overlay, process_images, get_fixed_seed, program_version, StableDiffusionProcessingImg2Img, create_random_tensors +from modules.sd_samplers_common import samples_to_image_grid, sample_to_image +from modules.shared import cmd_opts, opts, state +from modules.ui import plaintext_to_html + +from diffusers import StableDiffusionPipeline +from PIL import Image, ImageFilter, ImageOps + +from diffusers import ( + DDIMScheduler, + DDPMScheduler, + DEISMultistepScheduler, + DPMSolverMultistepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + HeunDiscreteScheduler, + IPNDMScheduler, + KDPM2AncestralDiscreteScheduler, + PNDMScheduler, + UniPCMultistepScheduler, + # KarrasVeScheduler, + # RePaintScheduler, + # ScoreSdeVeScheduler, + # UnCLIPScheduler, + # VQDiffusionScheduler, +) +from modules import sd_samplers_common + # scheduler = diffusers.UniPCMultistepScheduler.from_pretrained(shared.cmd_opts.ckpt, subfolder="scheduler") + +samplers_data_diffusors = [ + sd_samplers_common.SamplerData('UniPC', lambda model: DiffusionSampler('UniPC', UniPCMultistepScheduler, model), [], {}), + sd_samplers_common.SamplerData('DDIM', lambda model: DiffusionSampler('DDIM', DDIMScheduler, model), [], {}), + sd_samplers_common.SamplerData('DDPMS', lambda model: DiffusionSampler('DDPMS', DDPMScheduler, model), [], {}), + sd_samplers_common.SamplerData('DEIS', lambda model: DiffusionSampler('DEIS', DEISMultistepScheduler, model), [], {}), + sd_samplers_common.SamplerData('DPMSolver', lambda model: DiffusionSampler('DPMSolver', DPMSolverMultistepScheduler, model), [], {}), + sd_samplers_common.SamplerData('Euler', lambda model: DiffusionSampler('Euler', EulerDiscreteScheduler, model), [], {}), + sd_samplers_common.SamplerData('EulerAncestral', lambda model: DiffusionSampler('EulerAncestral', EulerAncestralDiscreteScheduler, model), [], {}), + sd_samplers_common.SamplerData('Heun', lambda model: DiffusionSampler('Heun', HeunDiscreteScheduler, model), [], {}), + sd_samplers_common.SamplerData('IPNDM', lambda model: DiffusionSampler('IPNDM', IPNDMScheduler, model), [], {}), + sd_samplers_common.SamplerData('KDPM2Ancestral', lambda model: DiffusionSampler('KDPM2Ancestral', KDPM2AncestralDiscreteScheduler, model), [], {}), + sd_samplers_common.SamplerData('PNDMS', lambda model: DiffusionSampler('PNDMS', PNDMScheduler, model), [], {}), + # sd_samplers_common.SamplerData('KarrasVe', lambda model: DiffusionSampler('KarrasVe', KarrasVeScheduler, model), [], {}), + # sd_samplers_common.SamplerData('RePaint', lambda model: DiffusionSampler('RePaint', RePaintScheduler, model), [], {}), + # sd_samplers_common.SamplerData('ScoreSdeVe', lambda model: DiffusionSampler('ScoreSdeVe', ScoreSdeVeScheduler, model), [], {}), + # sd_samplers_common.SamplerData('UnCLIP', lambda model: DiffusionSampler('UnCLIP', UnCLIPScheduler, model), [], {}), + # sd_samplers_common.SamplerData('VQDiffusion', lambda model: DiffusionSampler('VQDiffusion', VQDiffusionScheduler, model), [], {}), +] + +class DiffusionSampler: + def __init__(self, name, constructor, sd_model): + self.sampler = constructor.from_pretrained(sd_model, subfolder="scheduler") + self.sampler.name = name + + +all_samplers = [ + *samplers_data_diffusors, +] +all_samplers_map = {x.name: x for x in all_samplers} + +samplers = [] +samplers_for_img2img = [] +samplers_map = {} + +def find_sampler_config(name): + if name is not None: + config = all_samplers_map.get(name, None) + else: + config = all_samplers[0] + + return config + + +def create_sampler(name, model): + config = find_sampler_config(name) + + assert config is not None, f'bad sampler name: {name}' + + sampler = config.constructor("runwayml/stable-diffusion-v1-5") + model.scheduler = sampler.sampler + return sampler.sampler + + +def set_samplers(): + global samplers, samplers_for_img2img + + hidden = set(shared.opts.hide_samplers) + hidden_img2img = set(shared.opts.hide_samplers + ['PLMS', 'UniPC']) + + samplers = [x for x in all_samplers if x.name not in hidden] + samplers_for_img2img = [x for x in all_samplers if x.name not in hidden_img2img] + + samplers_map.clear() + for sampler in all_samplers: + samplers_map[sampler.name.lower()] = sampler.name + for alias in sampler.aliases: + samplers_map[alias.lower()] = sampler.name + +def cond_stage_key(self): + return None + +set_samplers() +first_inference = 1 + +def get_diffusers_sd_model(): + global first_inference + if (first_inference == 1): + model_id = "runwayml/stable-diffusion-v1-5" + sd_model = StableDiffusionPipeline.from_pretrained(model_id) + sd_model.unet = torch.compile(sd_model.unet, backend="openvino") + sd_model.safety_checker = None + sd_model.sd_model_hash = sd_model.unet.config._name_or_path.split("/")[-2] + warmup_prompt = "a dog walking in a park" + image = sd_model(warmup_prompt, num_inference_steps=1).images[0] + first_inference = 0 + shared.sd_model = sd_model + shared.sd_model.cond_stage_key = functools.partial(cond_stage_key, shared.sd_model) + return shared.sd_model + + +def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iteration=0, position_in_batch=0): + index = position_in_batch + iteration * p.batch_size + + clip_skip = getattr(p, 'clip_skip', opts.CLIP_stop_at_last_layers) + enable_hr = getattr(p, 'enable_hr', False) + token_merging_ratio = p.get_token_merging_ratio() + token_merging_ratio_hr = p.get_token_merging_ratio(for_hr=True) + + uses_ensd = opts.eta_noise_seed_delta != 0 + if uses_ensd: + uses_ensd = sd_samplers_common.is_sampler_using_eta_noise_seed_delta(p) + + generation_params = { + "Steps": p.steps, + "Sampler": p.sampler_name, + "CFG scale": p.cfg_scale, + "Image CFG scale": getattr(p, 'image_cfg_scale', None), + "Seed": all_seeds[index], + "Face restoration": (opts.face_restoration_model if p.restore_faces else None), + "Size": f"{p.width}x{p.height}", + "Model hash": getattr(p, 'sd_model_hash', None), + "Model": None, + "Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]), + "Variation seed strength": (None if p.subseed_strength == 0 else p.subseed_strength), + "Seed resize from": (None if p.seed_resize_from_w == 0 or p.seed_resize_from_h == 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"), + "Denoising strength": getattr(p, 'denoising_strength', None), + "Conditional mask weight": getattr(p, "inpainting_mask_weight", shared.opts.inpainting_mask_weight) if p.is_using_inpainting_conditioning else None, + "Clip skip": None if clip_skip <= 1 else clip_skip, + "ENSD": opts.eta_noise_seed_delta if uses_ensd else None, + "Token merging ratio": None if token_merging_ratio == 0 else token_merging_ratio, + "Token merging ratio hr": None if not enable_hr or token_merging_ratio_hr == 0 else token_merging_ratio_hr, + "Init image hash": getattr(p, 'init_img_hash', None), + "RNG": opts.randn_source if opts.randn_source != "GPU" else None, + "NGMS": None if p.s_min_uncond == 0 else p.s_min_uncond, + **p.extra_generation_params, + "Version": program_version() if opts.add_version_to_infotext else None, + } + + generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None]) + + negative_prompt_text = f"\nNegative prompt: {p.all_negative_prompts[index]}" if p.all_negative_prompts[index] else "" + + return f"{all_prompts[index]}{negative_prompt_text}\n{generation_params_text}".strip() + + +def init(self, all_prompts, all_seeds, all_subseeds): + self.sampler = create_sampler(self.sampler_name, shared.sd_model) + crop_region = None + + image_mask = self.image_mask + + if image_mask is not None: + image_mask = image_mask.convert('L') + + if self.inpainting_mask_invert: + image_mask = ImageOps.invert(image_mask) + + if self.mask_blur_x > 0: + np_mask = np.array(image_mask) + kernel_size = 2 * int(4 * self.mask_blur_x + 0.5) + 1 + np_mask = cv2.GaussianBlur(np_mask, (kernel_size, 1), self.mask_blur_x) + image_mask = Image.fromarray(np_mask) + + if self.mask_blur_y > 0: + np_mask = np.array(image_mask) + kernel_size = 2 * int(4 * self.mask_blur_y + 0.5) + 1 + np_mask = cv2.GaussianBlur(np_mask, (1, kernel_size), self.mask_blur_y) + image_mask = Image.fromarray(np_mask) + + if self.inpaint_full_res: + self.mask_for_overlay = image_mask + mask = image_mask.convert('L') + crop_region = masking.get_crop_region(np.array(mask), self.inpaint_full_res_padding) + crop_region = masking.expand_crop_region(crop_region, self.width, self.height, mask.width, mask.height) + x1, y1, x2, y2 = crop_region + + mask = mask.crop(crop_region) + image_mask = images.resize_image(2, mask, self.width, self.height) + self.paste_to = (x1, y1, x2-x1, y2-y1) + else: + image_mask = images.resize_image(self.resize_mode, image_mask, self.width, self.height) + np_mask = np.array(image_mask) + np_mask = np.clip((np_mask.astype(np.float32)) * 2, 0, 255).astype(np.uint8) + self.mask_for_overlay = Image.fromarray(np_mask) + + self.overlay_images = [] + + latent_mask = self.latent_mask if self.latent_mask is not None else image_mask + + add_color_corrections = opts.img2img_color_correction and self.color_corrections is None + if add_color_corrections: + self.color_corrections = [] + imgs = [] + for img in self.init_images: + + # Save init image + if opts.save_init_img: + self.init_img_hash = hashlib.md5(img.tobytes()).hexdigest() + images.save_image(img, path=opts.outdir_init_images, basename=None, forced_filename=self.init_img_hash, save_to_dirs=False) + + image = images.flatten(img, opts.img2img_background_color) + + if crop_region is None and self.resize_mode != 3: + image = images.resize_image(self.resize_mode, image, self.width, self.height) + + if image_mask is not None: + image_masked = Image.new('RGBa', (image.width, image.height)) + image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L'))) + + self.overlay_images.append(image_masked.convert('RGBA')) + + # crop_region is not None if we are doing inpaint full res + if crop_region is not None: + image = image.crop(crop_region) + image = images.resize_image(2, image, self.width, self.height) + + if image_mask is not None: + if self.inpainting_fill != 1: + image = masking.fill(image, latent_mask) + + if add_color_corrections: + self.color_corrections.append(setup_color_correction(image)) + + image = np.array(image).astype(np.float32) / 255.0 + image = np.moveaxis(image, 2, 0) + + imgs.append(image) + + if len(imgs) == 1: + batch_images = np.expand_dims(imgs[0], axis=0).repeat(self.batch_size, axis=0) + if self.overlay_images is not None: + self.overlay_images = self.overlay_images * self.batch_size + + if self.color_corrections is not None and len(self.color_corrections) == 1: + self.color_corrections = self.color_corrections * self.batch_size + + elif len(imgs) <= self.batch_size: + self.batch_size = len(imgs) + batch_images = np.array(imgs) + else: + raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less") + + image = torch.from_numpy(batch_images) + image = 2. * image - 1. + image = image.to(shared.device) + + self.init_latent = shared.sd_model.vae.encode(image).latent_dist.sample() + + if self.resize_mode == 3: + self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // opt_f, self.width // opt_f), mode="bilinear") + + if image_mask is not None: + init_mask = latent_mask + latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2])) + latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255 + latmask = latmask[0] + latmask = np.around(latmask) + latmask = np.tile(latmask[None], (4, 1, 1)) + + self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(shared.sd_model.vae.dtype) + self.nmask = torch.asarray(latmask).to(shared.device).type(shared.sd_model.vae.dtype) + + # this needs to be fixed to be done in sample() using actual seeds for batches + if self.inpainting_fill == 2: + self.init_latent = self.init_latent * self.mask + create_random_tensors(self.init_latent.shape[1:], all_seeds[0:self.init_latent.shape[0]]) * self.nmask + elif self.inpainting_fill == 3: + self.init_latent = self.init_latent * self.mask + + +def process_images_openvino(p: StableDiffusionProcessing) -> Processed: + """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch""" + + if type(p.prompt) == list: + assert(len(p.prompt) > 0) + else: + assert p.prompt is not None + + devices.torch_gc() + + seed = get_fixed_seed(p.seed) + subseed = get_fixed_seed(p.subseed) + + comments = {} + + p.setup_prompts() + + if type(seed) == list: + p.all_seeds = seed + else: + p.all_seeds = [int(seed) + (x if p.subseed_strength == 0 else 0) for x in range(len(p.all_prompts))] + + if type(subseed) == list: + p.all_subseeds = subseed + else: + p.all_subseeds = [int(subseed) + x for x in range(len(p.all_prompts))] + + def infotext(iteration=0, position_in_batch=0): + return create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, comments, iteration, position_in_batch) + + #if os.path.exists(cmd_opts.embeddings_dir) and not p.do_not_reload_embeddings: + #model_hijack.embedding_db.load_textual_inversion_embeddings() + + if p.scripts is not None: + p.scripts.process(p) + + infotexts = [] + output_images = [] + + #ema_scope_context = p.sd_model.ema_scope if shared.backend == shared.Backend.ORIG_SD else nullcontext + with torch.no_grad(): #, ema_scope_context(): + with devices.autocast(): + p.init(p.all_prompts, p.all_seeds, p.all_subseeds) + + if state.job_count == -1: + state.job_count = p.n_iter + + extra_network_data = None + for n in range(p.n_iter): + p.iteration = n + + if state.skipped: + state.skipped = False + + if state.interrupted: + break + + p.prompts = p.all_prompts[n * p.batch_size:(n + 1) * p.batch_size] + p.negative_prompts = p.all_negative_prompts[n * p.batch_size:(n + 1) * p.batch_size] + p.seeds = p.all_seeds[n * p.batch_size:(n + 1) * p.batch_size] + p.subseeds = p.all_subseeds[n * p.batch_size:(n + 1) * p.batch_size] + + if p.scripts is not None: + p.scripts.before_process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds) + + if len(p.prompts) == 0: + break + + extra_network_data = p.parse_extra_network_prompts() + + if not p.disable_extra_networks: + with devices.autocast(): + extra_networks.activate(p, p.extra_network_data) + + if p.scripts is not None: + p.scripts.process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds) + + + # params.txt should be saved after scripts.process_batch, since the + # infotext could be modified by that callback + # Example: a wildcard processed by process_batch sets an extra model + # strength, which is saved as "Model Strength: 1.0" in the infotext + if n == 0: + with open(os.path.join(paths.data_path, "params.txt"), "w", encoding="utf8") as file: + processed = Processed(p, [], p.seed, "") + file.write(create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, comments=[], position_in_batch=0 % p.batch_size, iteration=0 // p.batch_size)) + + if p.n_iter > 1: + shared.state.job = f"Batch {n+1} out of {p.n_iter}" + + generator = [torch.Generator(device="cpu").manual_seed(s) for s in p.seeds] + output = shared.sd_model( + prompt=p.prompts, + negative_prompt=p.negative_prompts, + num_inference_steps=p.steps, + guidance_scale=p.cfg_scale, + height=p.height, + width=p.width, + generator=generator, + output_type="np", + ) + x_samples_ddim = output.images + + for i, x_sample in enumerate(x_samples_ddim): + p.batch_index = i + + x_sample = (255. * x_sample).astype(np.uint8) + + if p.restore_faces: + if opts.save and not p.do_not_save_samples and opts.save_images_before_face_restoration: + images.save_image(Image.fromarray(x_sample), p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-before-face-restoration") + + devices.torch_gc() + + x_sample = modules.face_restoration.restore_faces(x_sample) + devices.torch_gc() + + image = Image.fromarray(x_sample) + + if p.scripts is not None: + pp = scripts.PostprocessImageArgs(image) + p.scripts.postprocess_image(p, pp) + image = pp.image + + if p.color_corrections is not None and i < len(p.color_corrections): + if opts.save and not p.do_not_save_samples and opts.save_images_before_color_correction: + image_without_cc = apply_overlay(image, p.paste_to, i, p.overlay_images) + images.save_image(image_without_cc, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-before-color-correction") + image = apply_color_correction(p.color_corrections[i], image) + + image = apply_overlay(image, p.paste_to, i, p.overlay_images) + + if opts.samples_save and not p.do_not_save_samples: + images.save_image(image, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(n, i), p=p) + + text = infotext(n, i) + infotexts.append(text) + if opts.enable_pnginfo: + image.info["parameters"] = text + output_images.append(image) + + if hasattr(p, 'mask_for_overlay') and p.mask_for_overlay and any([opts.save_mask, opts.save_mask_composite, opts.return_mask, opts.return_mask_composite]): + image_mask = p.mask_for_overlay.convert('RGB') + image_mask_composite = Image.composite(image.convert('RGBA').convert('RGBa'), Image.new('RGBa', image.size), images.resize_image(2, p.mask_for_overlay, image.width, image.height).convert('L')).convert('RGBA') + + if opts.save_mask: + images.save_image(image_mask, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-mask") + + if opts.save_mask_composite: + images.save_image(image_mask_composite, p.outpath_samples, "", p.seeds[i], p.prompts[i], opts.samples_format, info=infotext(n, i), p=p, suffix="-mask-composite") + + if opts.return_mask: + output_images.append(image_mask) + + if opts.return_mask_composite: + output_images.append(image_mask_composite) + + del x_samples_ddim + + devices.torch_gc() + + state.nextjob() + + p.color_corrections = None + + index_of_first_image = 0 + unwanted_grid_because_of_img_count = len(output_images) < 2 and opts.grid_only_if_multiple + if (opts.return_grid or opts.grid_save) and not p.do_not_save_grid and not unwanted_grid_because_of_img_count: + grid = images.image_grid(output_images, p.batch_size) + + if opts.return_grid: + text = infotext() + infotexts.insert(0, text) + if opts.enable_pnginfo: + grid.info["parameters"] = text + output_images.insert(0, grid) + index_of_first_image = 1 + + if opts.grid_save: + images.save_image(grid, p.outpath_grids, "grid", p.all_seeds[0], p.all_prompts[0], opts.grid_format, info=infotext(), short_filename=not opts.grid_extended_filename, p=p, grid=True) + + if not p.disable_extra_networks and extra_network_data: + extra_networks.deactivate(p, p.extra_network_data) + + devices.torch_gc() + + res = Processed( + p, + images_list=output_images, + seed=p.all_seeds[0], + info=infotext(), + comments="".join(f"{comment}\n" for comment in comments), + subseed=p.all_subseeds[0], + index_of_first_image=index_of_first_image, + infotexts=infotexts, + ) + + if p.scripts is not None: + p.scripts.postprocess(p, res) + + return res + + +class Script(scripts.Script): + def title(self): + return "Accelerate with OpenVINO" + + def show(self, is_img2img): + return True + + + def ui(self, is_img2img): + pass + + def run(self, p): + shared.sd_model = get_diffusers_sd_model() + if self.is_txt2img: + processed = process_images_openvino(p) + else: + p.sampler_name = samplers_for_img2img[0].name + p.init = functools.partial(init, p) + #p.init(p.all_prompts, p.all_seeds, p.all_subseeds) + processed = process_images_openvino(p) + + return processed + From 40de4195619f9af5649c6f5fb42ab3445a3de752 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Wed, 19 Jul 2023 07:34:56 -0700 Subject: [PATCH 02/24] Added samplers and new UI elements --- scripts/openvino_accelerate.py | 232 +++++++++++++++++---------------- 1 file changed, 119 insertions(+), 113 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index 4231ed44..fab74292 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -13,123 +13,104 @@ import modules.shared as shared from modules import images, devices, extra_networks, generation_parameters_copypaste, masking, sd_samplers, sd_samplers_compvis, sd_samplers_kdiffusion, shared from modules.processing import StableDiffusionProcessing, Processed, apply_overlay, process_images, get_fixed_seed, program_version, StableDiffusionProcessingImg2Img, create_random_tensors +from modules.sd_models import list_models, CheckpointInfo from modules.sd_samplers_common import samples_to_image_grid, sample_to_image -from modules.shared import cmd_opts, opts, state -from modules.ui import plaintext_to_html +from modules.shared import Shared, cmd_opts, opts, state +from modules.ui import plaintext_to_html, create_sampler_and_steps_selection +from webui import initialize_rest from diffusers import StableDiffusionPipeline from PIL import Image, ImageFilter, ImageOps +from modules import sd_samplers_common +from openvino.runtime import Core from diffusers import ( DDIMScheduler, - DDPMScheduler, - DEISMultistepScheduler, DPMSolverMultistepScheduler, + DPMSolverSDEScheduler, + DPMSolverSinglestepScheduler, EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, HeunDiscreteScheduler, IPNDMScheduler, KDPM2AncestralDiscreteScheduler, + KDPM2DiscreteScheduler, + LMSDiscreteScheduler, PNDMScheduler, UniPCMultistepScheduler, - # KarrasVeScheduler, - # RePaintScheduler, - # ScoreSdeVeScheduler, - # UnCLIPScheduler, - # VQDiffusionScheduler, ) -from modules import sd_samplers_common - # scheduler = diffusers.UniPCMultistepScheduler.from_pretrained(shared.cmd_opts.ckpt, subfolder="scheduler") -samplers_data_diffusors = [ - sd_samplers_common.SamplerData('UniPC', lambda model: DiffusionSampler('UniPC', UniPCMultistepScheduler, model), [], {}), - sd_samplers_common.SamplerData('DDIM', lambda model: DiffusionSampler('DDIM', DDIMScheduler, model), [], {}), - sd_samplers_common.SamplerData('DDPMS', lambda model: DiffusionSampler('DDPMS', DDPMScheduler, model), [], {}), - sd_samplers_common.SamplerData('DEIS', lambda model: DiffusionSampler('DEIS', DEISMultistepScheduler, model), [], {}), - sd_samplers_common.SamplerData('DPMSolver', lambda model: DiffusionSampler('DPMSolver', DPMSolverMultistepScheduler, model), [], {}), - sd_samplers_common.SamplerData('Euler', lambda model: DiffusionSampler('Euler', EulerDiscreteScheduler, model), [], {}), - sd_samplers_common.SamplerData('EulerAncestral', lambda model: DiffusionSampler('EulerAncestral', EulerAncestralDiscreteScheduler, model), [], {}), - sd_samplers_common.SamplerData('Heun', lambda model: DiffusionSampler('Heun', HeunDiscreteScheduler, model), [], {}), - sd_samplers_common.SamplerData('IPNDM', lambda model: DiffusionSampler('IPNDM', IPNDMScheduler, model), [], {}), - sd_samplers_common.SamplerData('KDPM2Ancestral', lambda model: DiffusionSampler('KDPM2Ancestral', KDPM2AncestralDiscreteScheduler, model), [], {}), - sd_samplers_common.SamplerData('PNDMS', lambda model: DiffusionSampler('PNDMS', PNDMScheduler, model), [], {}), - # sd_samplers_common.SamplerData('KarrasVe', lambda model: DiffusionSampler('KarrasVe', KarrasVeScheduler, model), [], {}), - # sd_samplers_common.SamplerData('RePaint', lambda model: DiffusionSampler('RePaint', RePaintScheduler, model), [], {}), - # sd_samplers_common.SamplerData('ScoreSdeVe', lambda model: DiffusionSampler('ScoreSdeVe', ScoreSdeVeScheduler, model), [], {}), - # sd_samplers_common.SamplerData('UnCLIP', lambda model: DiffusionSampler('UnCLIP', UnCLIPScheduler, model), [], {}), - # sd_samplers_common.SamplerData('VQDiffusion', lambda model: DiffusionSampler('VQDiffusion', VQDiffusionScheduler, model), [], {}), -] +first_inference=1 +sampler_name_global="Euler a" -class DiffusionSampler: - def __init__(self, name, constructor, sd_model): - self.sampler = constructor.from_pretrained(sd_model, subfolder="scheduler") - self.sampler.name = name - - -all_samplers = [ - *samplers_data_diffusors, -] -all_samplers_map = {x.name: x for x in all_samplers} - -samplers = [] -samplers_for_img2img = [] -samplers_map = {} - -def find_sampler_config(name): - if name is not None: - config = all_samplers_map.get(name, None) - else: - config = all_samplers[0] - - return config - - -def create_sampler(name, model): - config = find_sampler_config(name) - - assert config is not None, f'bad sampler name: {name}' - - sampler = config.constructor("runwayml/stable-diffusion-v1-5") - model.scheduler = sampler.sampler - return sampler.sampler - - -def set_samplers(): - global samplers, samplers_for_img2img - - hidden = set(shared.opts.hide_samplers) - hidden_img2img = set(shared.opts.hide_samplers + ['PLMS', 'UniPC']) - - samplers = [x for x in all_samplers if x.name not in hidden] - samplers_for_img2img = [x for x in all_samplers if x.name not in hidden_img2img] - - samplers_map.clear() - for sampler in all_samplers: - samplers_map[sampler.name.lower()] = sampler.name - for alias in sampler.aliases: - samplers_map[alias.lower()] = sampler.name +def sd_diffusers_model(self): + import modules.sd_models + return modules.sd_models.model_data.get_sd_model() def cond_stage_key(self): return None -set_samplers() -first_inference = 1 +Shared.sd_diffusers_model = sd_diffusers_model -def get_diffusers_sd_model(): - global first_inference +def set_scheduler(sd_model, sampler_name): + if (sampler_name == "Euler a"): + sd_model.scheduler = EulerAncestralDiscreteScheduler.from_config(sd_model.scheduler.config) + elif (sampler_name == "Euler"): + sd_model.scheduler = EulerDiscreteScheduler.from_config(sd_model.scheduler.config) + elif (sampler_name == "LMS"): + sd_model.scheduler = LMSDiscreteScheduler.from_config(sd_model.scheduler.config) + elif (sampler_name == "Huen"): + sd_model.scheduler = HuenDiscreteScheduler.from_config(sd_model.scheduler.config) + #elif (sampler_name == "DPM2"): + # sd_model.scheduler = KDPM2DiscreteScheduler.from_config(sd_model.scheduler.config) + #elif (sampler_name == "DPM2 a"): + # sd_model.scheduler = KDPM2AncestralDiscreteScheduler.from_config(sd_model.scheduler.config) + elif (sampler_name == "DPM++ 2M"): + sd_model.scheduler = DPMSolverMultistepScheduler.from_config(sd_model.scheduler.config, algorithm_type="dpmsolver++", use_karras_sigmas=False) + #elif (sampler_name == "DPM++ 2M SDE"): + # sd_model.scheduler = DPMSolverMultistepScheduler.from_config(sd_model.scheduler.config, algorithm_type="sde-dpmsolver++", use_karras_sigmas=False) + elif (sampler_name == "LMS Karras"): + sd_model.scheduler = LMSDiscreteScheduler.from_config(sd_model.scheduler.config, use_karras_sigmas=True) + elif (sampler_name == "DPM++ 2M Karras"): + sd_model.scheduler = DPMSolverMultistepScheduler.from_config(sd_model.scheduler.config, algorithm_type="dpmsolver++", use_karras_sigmas=True) + #elif (sampler_name == "DPM++ 2M SDE Karras"): + # sd_model.scheduler = DPMSolverMultistepScheduler.from_config(sd_model.scheduler.config, algorithm_type="sde-dpmsolver++", use_karras_sigmas=True) + elif (sampler_name == "DDIM"): + sd_model.scheduler = DDIMScheduler.from_config(sd_model.scheduler.config) + elif (sampler_name == "PLMS"): + sd_model.scheduler = PNDMScheduler.from_config(sd_model.scheduler.config) + #elif (sampler_name == "UniPC"): + # sd_model.scheduler = UniPCMultistepScheduler.from_config(sd_model.scheduler.config) + else: + sd_model.scheduler = EulerAncestralDiscreteScheduler.from_config(sd_model.scheduler.config) + + return sd_model.scheduler + +def get_diffusers_sd_model(sampler_name, exclude_warmup, enable_caching): + global first_inference, sampler_name_global if (first_inference == 1): - model_id = "runwayml/stable-diffusion-v1-5" - sd_model = StableDiffusionPipeline.from_pretrained(model_id) + curr_dir_path = os.getcwd() + model_path = "/models/Stable-diffusion/" + checkpoint_name = shared.opts.sd_model_checkpoint.split(" ")[0] + checkpoint_path = curr_dir_path + model_path + checkpoint_name + sd_model = StableDiffusionPipeline.from_single_file(checkpoint_path) + checkpoint_info = CheckpointInfo(checkpoint_path) + sd_model.sd_checkpoint_info = checkpoint_info + sd_model.sd_model_hash = checkpoint_info.calculate_shorthash() + sd_model.unet = torch.compile(sd_model.unet, backend="openvino") sd_model.safety_checker = None - sd_model.sd_model_hash = sd_model.unet.config._name_or_path.split("/")[-2] - warmup_prompt = "a dog walking in a park" - image = sd_model(warmup_prompt, num_inference_steps=1).images[0] - first_inference = 0 - shared.sd_model = sd_model - shared.sd_model.cond_stage_key = functools.partial(cond_stage_key, shared.sd_model) - return shared.sd_model - + sd_model.scheduler = set_scheduler(sd_model, sampler_name) + sampler_name_global = sampler_name + + if (exclude_warmup): + warmup_prompt = "a dog walking in a park" + image = sd_model(warmup_prompt, num_inference_steps=1).images[0] + first_inference = 0 + shared.sd_diffusers_model = sd_model + shared.sd_diffusers_model.cond_stage_key = functools.partial(cond_stage_key, shared.sd_diffusers_model) + + return shared.sd_diffusers_model def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iteration=0, position_in_batch=0): index = position_in_batch + iteration * p.batch_size @@ -176,8 +157,7 @@ def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iter return f"{all_prompts[index]}{negative_prompt_text}\n{generation_params_text}".strip() -def init(self, all_prompts, all_seeds, all_subseeds): - self.sampler = create_sampler(self.sampler_name, shared.sd_model) +def init_new(self, all_prompts, all_seeds, all_subseeds): crop_region = None image_mask = self.image_mask @@ -277,7 +257,7 @@ def init(self, all_prompts, all_seeds, all_subseeds): image = 2. * image - 1. image = image.to(shared.device) - self.init_latent = shared.sd_model.vae.encode(image).latent_dist.sample() + self.init_latent = shared.sd_diffusers_model.vae.encode(image).latent_dist.sample() if self.resize_mode == 3: self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // opt_f, self.width // opt_f), mode="bilinear") @@ -290,8 +270,8 @@ def init(self, all_prompts, all_seeds, all_subseeds): latmask = np.around(latmask) latmask = np.tile(latmask[None], (4, 1, 1)) - self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(shared.sd_model.vae.dtype) - self.nmask = torch.asarray(latmask).to(shared.device).type(shared.sd_model.vae.dtype) + self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(shared.sd_diffusers_model.vae.dtype) + self.nmask = torch.asarray(latmask).to(shared.device).type(shared.sd_diffusers_model.vae.dtype) # this needs to be fixed to be done in sample() using actual seeds for batches if self.inpainting_fill == 2: @@ -330,18 +310,15 @@ def process_images_openvino(p: StableDiffusionProcessing) -> Processed: def infotext(iteration=0, position_in_batch=0): return create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, comments, iteration, position_in_batch) - #if os.path.exists(cmd_opts.embeddings_dir) and not p.do_not_reload_embeddings: - #model_hijack.embedding_db.load_textual_inversion_embeddings() - if p.scripts is not None: p.scripts.process(p) infotexts = [] output_images = [] - #ema_scope_context = p.sd_model.ema_scope if shared.backend == shared.Backend.ORIG_SD else nullcontext - with torch.no_grad(): #, ema_scope_context(): + with torch.no_grad(): with devices.autocast(): + print("In autocast") p.init(p.all_prompts, p.all_seeds, p.all_subseeds) if state.job_count == -1: @@ -369,6 +346,7 @@ def process_images_openvino(p: StableDiffusionProcessing) -> Processed: break extra_network_data = p.parse_extra_network_prompts() + print("Extra network data: ", extra_network_data) if not p.disable_extra_networks: with devices.autocast(): @@ -377,6 +355,7 @@ def process_images_openvino(p: StableDiffusionProcessing) -> Processed: if p.scripts is not None: p.scripts.process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds) + print("After process batch") # params.txt should be saved after scripts.process_batch, since the # infotext could be modified by that callback @@ -386,12 +365,15 @@ def process_images_openvino(p: StableDiffusionProcessing) -> Processed: with open(os.path.join(paths.data_path, "params.txt"), "w", encoding="utf8") as file: processed = Processed(p, [], p.seed, "") file.write(create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, comments=[], position_in_batch=0 % p.batch_size, iteration=0 // p.batch_size)) + print("After processed") if p.n_iter > 1: shared.state.job = f"Batch {n+1} out of {p.n_iter}" + print("After shared state job") generator = [torch.Generator(device="cpu").manual_seed(s) for s in p.seeds] - output = shared.sd_model( + print("prompts: ", p.prompts) + output = shared.sd_diffusers_model( prompt=p.prompts, negative_prompt=p.negative_prompts, num_inference_steps=p.steps, @@ -401,7 +383,7 @@ def process_images_openvino(p: StableDiffusionProcessing) -> Processed: generator=generator, output_type="np", ) - x_samples_ddim = output.images + x_samples_ddim = output.images for i, x_sample in enumerate(x_samples_ddim): p.batch_index = i @@ -502,7 +484,6 @@ def process_images_openvino(p: StableDiffusionProcessing) -> Processed: return res - class Script(scripts.Script): def title(self): return "Accelerate with OpenVINO" @@ -510,19 +491,44 @@ class Script(scripts.Script): def show(self, is_img2img): return True - - def ui(self, is_img2img): - pass + def ui(self, is_img2img): + core = Core() + openvino_device = gr.Dropdown(label="Select a device", choices=[device for device in core.available_devices], value="CPU") + sampler_name = gr.Dropdown(label="Select a sampling method", choices=["Euler a", "Euler", "LMS", "Huen", "DPM++ 2M", "LMS Karras", "DPM++ 2M Karras", "DDIM", "PLMS"], value="Euler a") + exclude_warmup = gr.Checkbox(label="Run a warm up iteration to pre-load the model (Recommended for performance measurements)", value=True, elem_id=self.elem_id("exclude_warmup")) + run_warmup = gr.Button("Warm up run") + warmup_status = gr.Textbox() + + def warmup(exclude_warmup): + shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, exclude_warmup, enable_caching) + return "Warm up run completed" + run_warmup.click(warmup, exclude_warmup, warmup_status) + + warmup_run_status = gr.Markdown("""Warm up run complete""", visible=False) + #def change_warmup(choice): + # if choice == True: + # shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, exclude_warmup, enable_caching) + # return gr.update(visible=True) + # else: + # return gr.update(visible=False) + #exclude_warmup.change(fn=change_warmup, inputs=exclude_warmup, outputs=warmup_run_status, show_progress=True) + enable_caching = gr.Checkbox(label="Cache the compiled models for faster model load in subsequent launches (Recommended)", value=True, elem_id=self.elem_id("enable_caching")) + + return [openvino_device, sampler_name, exclude_warmup, enable_caching] - def run(self, p): - shared.sd_model = get_diffusers_sd_model() + + def run(self, p, openvino_device, sampler_name, exclude_warmup, enable_caching): + global sampler_name_global, first_inference + if (exclude_warmup == False): + shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, exclude_warmup, enable_caching) + if (sampler_name_global != sampler_name): + shared.sd_diffusers_model.scheduler = set_scheduler(shared.sd_diffusers_model, sampler_name) + sampler_name_global = sampler_name + if self.is_txt2img: processed = process_images_openvino(p) else: - p.sampler_name = samplers_for_img2img[0].name - p.init = functools.partial(init, p) - #p.init(p.all_prompts, p.all_seeds, p.all_subseeds) + p.init = functools.partial(init_new, p) processed = process_images_openvino(p) - return processed From f1b5c727259a399e94c2f5d9cf69de9c5150d128 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Thu, 20 Jul 2023 16:05:24 -0700 Subject: [PATCH 03/24] UI modifications and fixes in img2img --- scripts/openvino_accelerate.py | 85 ++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 34 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index fab74292..f1139738 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -1,3 +1,6 @@ +#Copyright (C) 2023 Intel Corporation +#SPDX-License-Identifier: AGPL-3.0 + import math import cv2 import os @@ -7,6 +10,7 @@ import gradio as gr import numpy as np import openvino.frontend.pytorch.torchdynamo.backend +import modules import modules.paths as paths import modules.scripts as scripts import modules.shared as shared @@ -40,8 +44,10 @@ from diffusers import ( UniPCMultistepScheduler, ) -first_inference=1 -sampler_name_global="Euler a" +first_inference_global = 1 +sampler_name_global = "Euler a" +openvino_device_global = "CPU" +warmed_up_global = 0 def sd_diffusers_model(self): import modules.sd_models @@ -86,9 +92,9 @@ def set_scheduler(sd_model, sampler_name): return sd_model.scheduler -def get_diffusers_sd_model(sampler_name, exclude_warmup, enable_caching): - global first_inference, sampler_name_global - if (first_inference == 1): +def get_diffusers_sd_model(sampler_name, enable_caching): + global first_inference_global, sampler_name_global + if (first_inference_global == 1): curr_dir_path = os.getcwd() model_path = "/models/Stable-diffusion/" checkpoint_name = shared.opts.sd_model_checkpoint.split(" ")[0] @@ -98,16 +104,17 @@ def get_diffusers_sd_model(sampler_name, exclude_warmup, enable_caching): sd_model.sd_checkpoint_info = checkpoint_info sd_model.sd_model_hash = checkpoint_info.calculate_shorthash() - sd_model.unet = torch.compile(sd_model.unet, backend="openvino") sd_model.safety_checker = None sd_model.scheduler = set_scheduler(sd_model, sampler_name) + sd_model.unet = torch.compile(sd_model.unet, backend="openvino") + sd_model.vae.decode = torch.compile(sd_model.vae.decode, backend="openvino") sampler_name_global = sampler_name - if (exclude_warmup): - warmup_prompt = "a dog walking in a park" - image = sd_model(warmup_prompt, num_inference_steps=1).images[0] - first_inference = 0 + warmup_prompt = "a dog walking in a park" + image = sd_model(warmup_prompt, num_inference_steps=1).images[0] + first_inference_global = 0 shared.sd_diffusers_model = sd_model + del sd_model shared.sd_diffusers_model.cond_stage_key = functools.partial(cond_stage_key, shared.sd_diffusers_model) return shared.sd_diffusers_model @@ -260,7 +267,7 @@ def init_new(self, all_prompts, all_seeds, all_subseeds): self.init_latent = shared.sd_diffusers_model.vae.encode(image).latent_dist.sample() if self.resize_mode == 3: - self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // opt_f, self.width // opt_f), mode="bilinear") + self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // 8, self.width // 8), mode="bilinear") if image_mask is not None: init_mask = latent_mask @@ -484,6 +491,7 @@ def process_images_openvino(p: StableDiffusionProcessing) -> Processed: return res +warm_up_triggered_global = 0 class Script(scripts.Script): def title(self): return "Accelerate with OpenVINO" @@ -493,35 +501,44 @@ class Script(scripts.Script): def ui(self, is_img2img): core = Core() + global first_inference_global, warmed_up_global, warm_up_triggered_global openvino_device = gr.Dropdown(label="Select a device", choices=[device for device in core.available_devices], value="CPU") - sampler_name = gr.Dropdown(label="Select a sampling method", choices=["Euler a", "Euler", "LMS", "Huen", "DPM++ 2M", "LMS Karras", "DPM++ 2M Karras", "DDIM", "PLMS"], value="Euler a") - exclude_warmup = gr.Checkbox(label="Run a warm up iteration to pre-load the model (Recommended for performance measurements)", value=True, elem_id=self.elem_id("exclude_warmup")) - run_warmup = gr.Button("Warm up run") - warmup_status = gr.Textbox() - - def warmup(exclude_warmup): - shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, exclude_warmup, enable_caching) - return "Warm up run completed" - run_warmup.click(warmup, exclude_warmup, warmup_status) - - warmup_run_status = gr.Markdown("""Warm up run complete""", visible=False) - #def change_warmup(choice): - # if choice == True: - # shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, exclude_warmup, enable_caching) - # return gr.update(visible=True) - # else: - # return gr.update(visible=False) - #exclude_warmup.change(fn=change_warmup, inputs=exclude_warmup, outputs=warmup_run_status, show_progress=True) + override_sampler = gr.Checkbox(label="Override the sampling selection from the main UI (Recommended as only below sampling methods have been validated for OpenVINO)", value=True) + sampler_name = gr.Radio(label="Select a sampling method", choices=["Euler a", "Euler", "LMS", "Huen", "DPM++ 2M", "LMS Karras", "DPM++ 2M Karras", "DDIM", "PLMS"], value="Euler a") enable_caching = gr.Checkbox(label="Cache the compiled models for faster model load in subsequent launches (Recommended)", value=True, elem_id=self.elem_id("enable_caching")) + run_warmup = gr.Button("Run a warmup iteration (recommended)") + warmup_status = gr.Textbox(label="Status of the warm up iteration", interactive=False) + + def device_change(choice): + global first_inference_global, warmed_up_global + warmed_up_global = 0 + first_inference_global = 1 + return gr.update(value="Device changed to " + choice + ". Press the button to run a new warmup iteration") + openvino_device.change(device_change, openvino_device, warmup_status) - return [openvino_device, sampler_name, exclude_warmup, enable_caching] + def warmup(run_warmup): + global first_inference_global, warmed_up_global + first_inference_global = 1 + shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching) + warmed_up_global = 1 + return gr.update(value="Warm up run complete") + run_warmup.click(warmup, run_warmup, warmup_status) + + return [openvino_device, override_sampler, sampler_name, warmup_status, enable_caching] - def run(self, p, openvino_device, sampler_name, exclude_warmup, enable_caching): - global sampler_name_global, first_inference - if (exclude_warmup == False): - shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, exclude_warmup, enable_caching) + def run(self, p, openvino_device, override_sampler, sampler_name, warmup_status, enable_caching): + global first_inference_global, warmed_up_global, sampler_name_global, openvino_device_global + os.environ["OPENVINO_DEVICE"] = str(openvino_device) + if enable_caching: + os.environ["OPENVINO_TORCH_MODEL_CACHING"] = "1" + if (openvino_device_global != openvino_device): + first_inference_global = 1 + openvino_device_global = openvino_device + if (warmed_up_global == 0): + shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching) if (sampler_name_global != sampler_name): + print("Sampler name: ", sampler_name) shared.sd_diffusers_model.scheduler = set_scheduler(shared.sd_diffusers_model, sampler_name) sampler_name_global = sampler_name From e8d8156f54f79fc7c7e2c8255a308f3fff1081f8 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Fri, 21 Jul 2023 10:09:50 -0700 Subject: [PATCH 04/24] Device support and caching enabled --- scripts/openvino_accelerate.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index f1139738..21d06fa1 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -92,7 +92,7 @@ def set_scheduler(sd_model, sampler_name): return sd_model.scheduler -def get_diffusers_sd_model(sampler_name, enable_caching): +def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device): global first_inference_global, sampler_name_global if (first_inference_global == 1): curr_dir_path = os.getcwd() @@ -111,6 +111,9 @@ def get_diffusers_sd_model(sampler_name, enable_caching): sampler_name_global = sampler_name warmup_prompt = "a dog walking in a park" + os.environ["OPENVINO_DEVICE"] = openvino_device + if enable_caching: + os.environ["OPENVINO_TORCH_MODEL_CACHING"] = "1" image = sd_model(warmup_prompt, num_inference_steps=1).images[0] first_inference_global = 0 shared.sd_diffusers_model = sd_model @@ -516,13 +519,13 @@ class Script(scripts.Script): return gr.update(value="Device changed to " + choice + ". Press the button to run a new warmup iteration") openvino_device.change(device_change, openvino_device, warmup_status) - def warmup(run_warmup): + def warmup(run_warmup, openvino_device, enable_caching): global first_inference_global, warmed_up_global first_inference_global = 1 - shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching) + shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching, openvino_device) warmed_up_global = 1 return gr.update(value="Warm up run complete") - run_warmup.click(warmup, run_warmup, warmup_status) + run_warmup.click(warmup, [run_warmup, openvino_device, enable_caching], warmup_status) return [openvino_device, override_sampler, sampler_name, warmup_status, enable_caching] @@ -536,7 +539,7 @@ class Script(scripts.Script): first_inference_global = 1 openvino_device_global = openvino_device if (warmed_up_global == 0): - shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching) + shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching, openvino_device) if (sampler_name_global != sampler_name): print("Sampler name: ", sampler_name) shared.sd_diffusers_model.scheduler = set_scheduler(shared.sd_diffusers_model, sampler_name) From eb7b1a9af4ac1e3b90500c5cd754afd065be3a0b Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Fri, 21 Jul 2023 21:08:01 -0700 Subject: [PATCH 05/24] Added a fix for sampler overriding --- scripts/openvino_accelerate.py | 38 ++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index 21d06fa1..4a9bc205 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -65,8 +65,8 @@ def set_scheduler(sd_model, sampler_name): sd_model.scheduler = EulerDiscreteScheduler.from_config(sd_model.scheduler.config) elif (sampler_name == "LMS"): sd_model.scheduler = LMSDiscreteScheduler.from_config(sd_model.scheduler.config) - elif (sampler_name == "Huen"): - sd_model.scheduler = HuenDiscreteScheduler.from_config(sd_model.scheduler.config) + elif (sampler_name == "Heun"): + sd_model.scheduler = HeunDiscreteScheduler.from_config(sd_model.scheduler.config) #elif (sampler_name == "DPM2"): # sd_model.scheduler = KDPM2DiscreteScheduler.from_config(sd_model.scheduler.config) #elif (sampler_name == "DPM2 a"): @@ -507,25 +507,28 @@ class Script(scripts.Script): global first_inference_global, warmed_up_global, warm_up_triggered_global openvino_device = gr.Dropdown(label="Select a device", choices=[device for device in core.available_devices], value="CPU") override_sampler = gr.Checkbox(label="Override the sampling selection from the main UI (Recommended as only below sampling methods have been validated for OpenVINO)", value=True) - sampler_name = gr.Radio(label="Select a sampling method", choices=["Euler a", "Euler", "LMS", "Huen", "DPM++ 2M", "LMS Karras", "DPM++ 2M Karras", "DDIM", "PLMS"], value="Euler a") + sampler_name = gr.Radio(label="Select a sampling method", choices=["Euler a", "Euler", "LMS", "Heun", "DPM++ 2M", "LMS Karras", "DPM++ 2M Karras", "DDIM", "PLMS"], value="Euler a") enable_caching = gr.Checkbox(label="Cache the compiled models for faster model load in subsequent launches (Recommended)", value=True, elem_id=self.elem_id("enable_caching")) run_warmup = gr.Button("Run a warmup iteration (recommended)") warmup_status = gr.Textbox(label="Status of the warm up iteration", interactive=False) def device_change(choice): - global first_inference_global, warmed_up_global - warmed_up_global = 0 - first_inference_global = 1 - return gr.update(value="Device changed to " + choice + ". Press the button to run a new warmup iteration") + global first_inference_global, warmed_up_global, openvino_device_global + if (openvino_device_global == choice): + return gr.update(value="Device selected is " + choice) + else: + warmed_up_global = 0 + first_inference_global = 1 + return gr.update(value="Device changed to " + choice + ". Press the button to run a new warmup iteration") openvino_device.change(device_change, openvino_device, warmup_status) - def warmup(run_warmup, openvino_device, enable_caching): + def warmup(run_warmup, openvino_device, enable_caching, sampler_name): global first_inference_global, warmed_up_global first_inference_global = 1 shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching, openvino_device) warmed_up_global = 1 return gr.update(value="Warm up run complete") - run_warmup.click(warmup, [run_warmup, openvino_device, enable_caching], warmup_status) + run_warmup.click(warmup, [run_warmup, openvino_device, enable_caching, sampler_name], warmup_status) return [openvino_device, override_sampler, sampler_name, warmup_status, enable_caching] @@ -535,15 +538,24 @@ class Script(scripts.Script): os.environ["OPENVINO_DEVICE"] = str(openvino_device) if enable_caching: os.environ["OPENVINO_TORCH_MODEL_CACHING"] = "1" + if (openvino_device_global != openvino_device): first_inference_global = 1 openvino_device_global = openvino_device + if (warmed_up_global == 0): shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching, openvino_device) - if (sampler_name_global != sampler_name): - print("Sampler name: ", sampler_name) - shared.sd_diffusers_model.scheduler = set_scheduler(shared.sd_diffusers_model, sampler_name) - sampler_name_global = sampler_name + + if override_sampler: + p.sampler_name = sampler_name + else: + supported_samplers = ["Euler a", "Euler", "LMS", "Heun", "DPM++ 2M", "LMS Karras", "DPM++ 2M Karras", "DDIM", "PLMS"] + if (p.sampler_name not in supported_samplers): + p.sampler_name = "Euler a" + + if (sampler_name_global != p.sampler_name): + shared.sd_diffusers_model.scheduler = set_scheduler(shared.sd_diffusers_model, p.sampler_name) + sampler_name_global = p.sampler_name if self.is_txt2img: processed = process_images_openvino(p) From 13536a7e921d68e0d751f9bcf3ec07a88fa1862e Mon Sep 17 00:00:00 2001 From: Ravi Panchumarthy Date: Mon, 24 Jul 2023 13:58:02 -0700 Subject: [PATCH 06/24] Update package versions --- requirements.txt | 3 ++- requirements_versions.txt | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3142085e..6d5ce7ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,4 +29,5 @@ tomesd torch torchdiffeq torchsde -transformers==4.25.1 +transformers==4.30.0 +diffusers diff --git a/requirements_versions.txt b/requirements_versions.txt index f71b9d6c..433386ba 100644 --- a/requirements_versions.txt +++ b/requirements_versions.txt @@ -27,4 +27,5 @@ tomesd==0.1.2 torch torchdiffeq==0.2.3 torchsde==0.2.5 -transformers==4.25.1 +transformers==4.30.0 +diffusers From c337bed5fffa82f5d12f4350283938c22e1bdd33 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Mon, 24 Jul 2023 22:08:28 -0700 Subject: [PATCH 07/24] Added initial lora support and removed redundant functions --- scripts/openvino_accelerate.py | 104 +++++++++------------------------ 1 file changed, 26 insertions(+), 78 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index 4a9bc205..7b99f164 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -16,7 +16,7 @@ import modules.scripts as scripts import modules.shared as shared from modules import images, devices, extra_networks, generation_parameters_copypaste, masking, sd_samplers, sd_samplers_compvis, sd_samplers_kdiffusion, shared -from modules.processing import StableDiffusionProcessing, Processed, apply_overlay, process_images, get_fixed_seed, program_version, StableDiffusionProcessingImg2Img, create_random_tensors +from modules.processing import StableDiffusionProcessing, Processed, apply_overlay, process_images, get_fixed_seed, program_version, StableDiffusionProcessingImg2Img, create_random_tensors, create_infotext from modules.sd_models import list_models, CheckpointInfo from modules.sd_samplers_common import samples_to_image_grid, sample_to_image from modules.shared import Shared, cmd_opts, opts, state @@ -47,7 +47,6 @@ from diffusers import ( first_inference_global = 1 sampler_name_global = "Euler a" openvino_device_global = "CPU" -warmed_up_global = 0 def sd_diffusers_model(self): import modules.sd_models @@ -95,6 +94,8 @@ def set_scheduler(sd_model, sampler_name): def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device): global first_inference_global, sampler_name_global if (first_inference_global == 1): + torch._dynamo.reset() + torch._dynamo.config.verbose=True curr_dir_path = os.getcwd() model_path = "/models/Stable-diffusion/" checkpoint_name = shared.opts.sd_model_checkpoint.split(" ")[0] @@ -103,8 +104,9 @@ def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device): checkpoint_info = CheckpointInfo(checkpoint_path) sd_model.sd_checkpoint_info = checkpoint_info sd_model.sd_model_hash = checkpoint_info.calculate_shorthash() - sd_model.safety_checker = None + sd_model.cond_stage_key = functools.partial(cond_stage_key, shared.sd_model) + sd_model.scheduler = set_scheduler(sd_model, sampler_name) sd_model.unet = torch.compile(sd_model.unet, backend="openvino") sd_model.vae.decode = torch.compile(sd_model.vae.decode, backend="openvino") @@ -115,57 +117,13 @@ def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device): if enable_caching: os.environ["OPENVINO_TORCH_MODEL_CACHING"] = "1" image = sd_model(warmup_prompt, num_inference_steps=1).images[0] + print("warm up run complete") + first_inference_global = 0 shared.sd_diffusers_model = sd_model del sd_model - shared.sd_diffusers_model.cond_stage_key = functools.partial(cond_stage_key, shared.sd_diffusers_model) - return shared.sd_diffusers_model -def create_infotext(p, all_prompts, all_seeds, all_subseeds, comments=None, iteration=0, position_in_batch=0): - index = position_in_batch + iteration * p.batch_size - - clip_skip = getattr(p, 'clip_skip', opts.CLIP_stop_at_last_layers) - enable_hr = getattr(p, 'enable_hr', False) - token_merging_ratio = p.get_token_merging_ratio() - token_merging_ratio_hr = p.get_token_merging_ratio(for_hr=True) - - uses_ensd = opts.eta_noise_seed_delta != 0 - if uses_ensd: - uses_ensd = sd_samplers_common.is_sampler_using_eta_noise_seed_delta(p) - - generation_params = { - "Steps": p.steps, - "Sampler": p.sampler_name, - "CFG scale": p.cfg_scale, - "Image CFG scale": getattr(p, 'image_cfg_scale', None), - "Seed": all_seeds[index], - "Face restoration": (opts.face_restoration_model if p.restore_faces else None), - "Size": f"{p.width}x{p.height}", - "Model hash": getattr(p, 'sd_model_hash', None), - "Model": None, - "Variation seed": (None if p.subseed_strength == 0 else all_subseeds[index]), - "Variation seed strength": (None if p.subseed_strength == 0 else p.subseed_strength), - "Seed resize from": (None if p.seed_resize_from_w == 0 or p.seed_resize_from_h == 0 else f"{p.seed_resize_from_w}x{p.seed_resize_from_h}"), - "Denoising strength": getattr(p, 'denoising_strength', None), - "Conditional mask weight": getattr(p, "inpainting_mask_weight", shared.opts.inpainting_mask_weight) if p.is_using_inpainting_conditioning else None, - "Clip skip": None if clip_skip <= 1 else clip_skip, - "ENSD": opts.eta_noise_seed_delta if uses_ensd else None, - "Token merging ratio": None if token_merging_ratio == 0 else token_merging_ratio, - "Token merging ratio hr": None if not enable_hr or token_merging_ratio_hr == 0 else token_merging_ratio_hr, - "Init image hash": getattr(p, 'init_img_hash', None), - "RNG": opts.randn_source if opts.randn_source != "GPU" else None, - "NGMS": None if p.s_min_uncond == 0 else p.s_min_uncond, - **p.extra_generation_params, - "Version": program_version() if opts.add_version_to_infotext else None, - } - - generation_params_text = ", ".join([k if k == v else f'{k}: {generation_parameters_copypaste.quote(v)}' for k, v in generation_params.items() if v is not None]) - - negative_prompt_text = f"\nNegative prompt: {p.all_negative_prompts[index]}" if p.all_negative_prompts[index] else "" - - return f"{all_prompts[index]}{negative_prompt_text}\n{generation_params_text}".strip() - def init_new(self, all_prompts, all_seeds, all_subseeds): crop_region = None @@ -290,7 +248,7 @@ def init_new(self, all_prompts, all_seeds, all_subseeds): self.init_latent = self.init_latent * self.mask -def process_images_openvino(p: StableDiffusionProcessing) -> Processed: +def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_caching, openvino_device) -> Processed: """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch""" if type(p.prompt) == list: @@ -354,19 +312,25 @@ def process_images_openvino(p: StableDiffusionProcessing) -> Processed: if len(p.prompts) == 0: break + + shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching, openvino_device) extra_network_data = p.parse_extra_network_prompts() - print("Extra network data: ", extra_network_data) if not p.disable_extra_networks: with devices.autocast(): extra_networks.activate(p, p.extra_network_data) + # TODO: support multiplier + if ('lora' in modules.extra_networks.extra_network_registry): + import lora + for lora_model in lora.loaded_loras: + shared.sd_diffusers_model.load_lora_weights(os.getcwd() + "/models/Lora/", weight_name=lora_model.name + ".safetensors") + + if p.scripts is not None: p.scripts.process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds) - print("After process batch") - # params.txt should be saved after scripts.process_batch, since the # infotext could be modified by that callback # Example: a wildcard processed by process_batch sets an extra model @@ -375,14 +339,12 @@ def process_images_openvino(p: StableDiffusionProcessing) -> Processed: with open(os.path.join(paths.data_path, "params.txt"), "w", encoding="utf8") as file: processed = Processed(p, [], p.seed, "") file.write(create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, comments=[], position_in_batch=0 % p.batch_size, iteration=0 // p.batch_size)) - print("After processed") if p.n_iter > 1: shared.state.job = f"Batch {n+1} out of {p.n_iter}" - print("After shared state job") generator = [torch.Generator(device="cpu").manual_seed(s) for s in p.seeds] - print("prompts: ", p.prompts) + output = shared.sd_diffusers_model( prompt=p.prompts, negative_prompt=p.negative_prompts, @@ -494,7 +456,6 @@ def process_images_openvino(p: StableDiffusionProcessing) -> Processed: return res -warm_up_triggered_global = 0 class Script(scripts.Script): def title(self): return "Accelerate with OpenVINO" @@ -504,37 +465,27 @@ class Script(scripts.Script): def ui(self, is_img2img): core = Core() - global first_inference_global, warmed_up_global, warm_up_triggered_global + global first_inference_global openvino_device = gr.Dropdown(label="Select a device", choices=[device for device in core.available_devices], value="CPU") override_sampler = gr.Checkbox(label="Override the sampling selection from the main UI (Recommended as only below sampling methods have been validated for OpenVINO)", value=True) sampler_name = gr.Radio(label="Select a sampling method", choices=["Euler a", "Euler", "LMS", "Heun", "DPM++ 2M", "LMS Karras", "DPM++ 2M Karras", "DDIM", "PLMS"], value="Euler a") enable_caching = gr.Checkbox(label="Cache the compiled models for faster model load in subsequent launches (Recommended)", value=True, elem_id=self.elem_id("enable_caching")) - run_warmup = gr.Button("Run a warmup iteration (recommended)") - warmup_status = gr.Textbox(label="Status of the warm up iteration", interactive=False) + warmup_status = gr.Textbox(label="Device", interactive=False, visible=False) def device_change(choice): - global first_inference_global, warmed_up_global, openvino_device_global + global first_inference_global, openvino_device_global if (openvino_device_global == choice): - return gr.update(value="Device selected is " + choice) + return gr.update(value="Device selected is " + choice, visible=True) else: - warmed_up_global = 0 first_inference_global = 1 - return gr.update(value="Device changed to " + choice + ". Press the button to run a new warmup iteration") + return gr.update(value="Device changed to " + choice + ". Model will be re-compiled", visible=True) openvino_device.change(device_change, openvino_device, warmup_status) - def warmup(run_warmup, openvino_device, enable_caching, sampler_name): - global first_inference_global, warmed_up_global - first_inference_global = 1 - shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching, openvino_device) - warmed_up_global = 1 - return gr.update(value="Warm up run complete") - run_warmup.click(warmup, [run_warmup, openvino_device, enable_caching, sampler_name], warmup_status) - return [openvino_device, override_sampler, sampler_name, warmup_status, enable_caching] def run(self, p, openvino_device, override_sampler, sampler_name, warmup_status, enable_caching): - global first_inference_global, warmed_up_global, sampler_name_global, openvino_device_global + global first_inference_global, sampler_name_global, openvino_device_global os.environ["OPENVINO_DEVICE"] = str(openvino_device) if enable_caching: os.environ["OPENVINO_TORCH_MODEL_CACHING"] = "1" @@ -543,9 +494,6 @@ class Script(scripts.Script): first_inference_global = 1 openvino_device_global = openvino_device - if (warmed_up_global == 0): - shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching, openvino_device) - if override_sampler: p.sampler_name = sampler_name else: @@ -558,9 +506,9 @@ class Script(scripts.Script): sampler_name_global = p.sampler_name if self.is_txt2img: - processed = process_images_openvino(p) + processed = process_images_openvino(p, p.sampler_name, enable_caching, openvino_device) else: p.init = functools.partial(init_new, p) - processed = process_images_openvino(p) + processed = process_images_openvino(p, p.sampler_name, enable_caching, openvino_device) return processed From acfb0598e81003e16fd2f47516df48fdbab84de2 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 25 Jul 2023 20:57:29 -0700 Subject: [PATCH 08/24] Removed global variables and added performance metrics --- scripts/openvino_accelerate.py | 100 +++++++++++++++++---------------- 1 file changed, 53 insertions(+), 47 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index 7b99f164..e9cc878d 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -5,6 +5,7 @@ import math import cv2 import os import torch +import time import functools import gradio as gr import numpy as np @@ -15,13 +16,12 @@ import modules.paths as paths import modules.scripts as scripts import modules.shared as shared -from modules import images, devices, extra_networks, generation_parameters_copypaste, masking, sd_samplers, sd_samplers_compvis, sd_samplers_kdiffusion, shared +from modules import images, devices, extra_networks, generation_parameters_copypaste, masking, sd_samplers, sd_samplers_compvis, sd_samplers_kdiffusion, shared, call_queue from modules.processing import StableDiffusionProcessing, Processed, apply_overlay, process_images, get_fixed_seed, program_version, StableDiffusionProcessingImg2Img, create_random_tensors, create_infotext from modules.sd_models import list_models, CheckpointInfo from modules.sd_samplers_common import samples_to_image_grid, sample_to_image from modules.shared import Shared, cmd_opts, opts, state from modules.ui import plaintext_to_html, create_sampler_and_steps_selection -from webui import initialize_rest from diffusers import StableDiffusionPipeline from PIL import Image, ImageFilter, ImageOps @@ -44,9 +44,12 @@ from diffusers import ( UniPCMultistepScheduler, ) -first_inference_global = 1 -sampler_name_global = "Euler a" -openvino_device_global = "CPU" +class ModelState: + def __init__(self): + self.recompile = 1 + self.device = "CPU" + +model_state = ModelState() def sd_diffusers_model(self): import modules.sd_models @@ -66,34 +69,23 @@ def set_scheduler(sd_model, sampler_name): sd_model.scheduler = LMSDiscreteScheduler.from_config(sd_model.scheduler.config) elif (sampler_name == "Heun"): sd_model.scheduler = HeunDiscreteScheduler.from_config(sd_model.scheduler.config) - #elif (sampler_name == "DPM2"): - # sd_model.scheduler = KDPM2DiscreteScheduler.from_config(sd_model.scheduler.config) - #elif (sampler_name == "DPM2 a"): - # sd_model.scheduler = KDPM2AncestralDiscreteScheduler.from_config(sd_model.scheduler.config) elif (sampler_name == "DPM++ 2M"): sd_model.scheduler = DPMSolverMultistepScheduler.from_config(sd_model.scheduler.config, algorithm_type="dpmsolver++", use_karras_sigmas=False) - #elif (sampler_name == "DPM++ 2M SDE"): - # sd_model.scheduler = DPMSolverMultistepScheduler.from_config(sd_model.scheduler.config, algorithm_type="sde-dpmsolver++", use_karras_sigmas=False) elif (sampler_name == "LMS Karras"): sd_model.scheduler = LMSDiscreteScheduler.from_config(sd_model.scheduler.config, use_karras_sigmas=True) elif (sampler_name == "DPM++ 2M Karras"): sd_model.scheduler = DPMSolverMultistepScheduler.from_config(sd_model.scheduler.config, algorithm_type="dpmsolver++", use_karras_sigmas=True) - #elif (sampler_name == "DPM++ 2M SDE Karras"): - # sd_model.scheduler = DPMSolverMultistepScheduler.from_config(sd_model.scheduler.config, algorithm_type="sde-dpmsolver++", use_karras_sigmas=True) elif (sampler_name == "DDIM"): sd_model.scheduler = DDIMScheduler.from_config(sd_model.scheduler.config) elif (sampler_name == "PLMS"): sd_model.scheduler = PNDMScheduler.from_config(sd_model.scheduler.config) - #elif (sampler_name == "UniPC"): - # sd_model.scheduler = UniPCMultistepScheduler.from_config(sd_model.scheduler.config) else: sd_model.scheduler = EulerAncestralDiscreteScheduler.from_config(sd_model.scheduler.config) return sd_model.scheduler def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device): - global first_inference_global, sampler_name_global - if (first_inference_global == 1): + if (model_state.recompile == 1): torch._dynamo.reset() torch._dynamo.config.verbose=True curr_dir_path = os.getcwd() @@ -106,20 +98,9 @@ def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device): sd_model.sd_model_hash = checkpoint_info.calculate_shorthash() sd_model.safety_checker = None sd_model.cond_stage_key = functools.partial(cond_stage_key, shared.sd_model) - sd_model.scheduler = set_scheduler(sd_model, sampler_name) sd_model.unet = torch.compile(sd_model.unet, backend="openvino") sd_model.vae.decode = torch.compile(sd_model.vae.decode, backend="openvino") - sampler_name_global = sampler_name - - warmup_prompt = "a dog walking in a park" - os.environ["OPENVINO_DEVICE"] = openvino_device - if enable_caching: - os.environ["OPENVINO_TORCH_MODEL_CACHING"] = "1" - image = sd_model(warmup_prompt, num_inference_steps=1).images[0] - print("warm up run complete") - - first_inference_global = 0 shared.sd_diffusers_model = sd_model del sd_model return shared.sd_diffusers_model @@ -286,7 +267,6 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c with torch.no_grad(): with devices.autocast(): - print("In autocast") p.init(p.all_prompts, p.all_seeds, p.all_subseeds) if state.job_count == -1: @@ -314,6 +294,7 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c break shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching, openvino_device) + shared.sd_diffusers_model.scheduler = set_scheduler(shared.sd_diffusers_model, sampler_name) extra_network_data = p.parse_extra_network_prompts() @@ -327,7 +308,6 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c for lora_model in lora.loaded_loras: shared.sd_diffusers_model.load_lora_weights(os.getcwd() + "/models/Lora/", weight_name=lora_model.name + ".safetensors") - if p.scripts is not None: p.scripts.process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds) @@ -344,7 +324,13 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c shared.state.job = f"Batch {n+1} out of {p.n_iter}" generator = [torch.Generator(device="cpu").manual_seed(s) for s in p.seeds] + + time_stamps = [] + def callback(iter, t, latents): + time_stamps.append(time.time()) + + time_stamps.append(time.time()) output = shared.sd_diffusers_model( prompt=p.prompts, negative_prompt=p.negative_prompts, @@ -354,7 +340,15 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c width=p.width, generator=generator, output_type="np", + callback = callback, + callback_steps = 1 ) + + model_state.recompile = 0 + + warmup_duration = time_stamps[1] - time_stamps[0] + generation_rate = (p.steps - 1) / (time_stamps[-1] - time_stamps[1]) + x_samples_ddim = output.images for i, x_sample in enumerate(x_samples_ddim): @@ -439,7 +433,7 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c extra_networks.deactivate(p, p.extra_network_data) devices.torch_gc() - + res = Processed( p, images_list=output_images, @@ -451,6 +445,14 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c infotexts=infotexts, ) + res.info = res.info + ", Warm up time: " + str(round(warmup_duration, 2)) + " secs " + + if (generation_rate >= 1.0): + res.info = res.info + ", Performance: " + str(round(generation_rate, 2)) + " it/s " + else: + res.info = res.info + ", Performance: " + str(round(1/generation_rate, 2)) + " s/it " + + if p.scripts is not None: p.scripts.postprocess(p, res) @@ -465,34 +467,41 @@ class Script(scripts.Script): def ui(self, is_img2img): core = Core() - global first_inference_global openvino_device = gr.Dropdown(label="Select a device", choices=[device for device in core.available_devices], value="CPU") override_sampler = gr.Checkbox(label="Override the sampling selection from the main UI (Recommended as only below sampling methods have been validated for OpenVINO)", value=True) sampler_name = gr.Radio(label="Select a sampling method", choices=["Euler a", "Euler", "LMS", "Heun", "DPM++ 2M", "LMS Karras", "DPM++ 2M Karras", "DDIM", "PLMS"], value="Euler a") - enable_caching = gr.Checkbox(label="Cache the compiled models for faster model load in subsequent launches (Recommended)", value=True, elem_id=self.elem_id("enable_caching")) + enable_caching = gr.Checkbox(label="Cache the compiled models on disk for faster model load in subsequent launches (Recommended)", value=True, elem_id=self.elem_id("enable_caching")) warmup_status = gr.Textbox(label="Device", interactive=False, visible=False) + warmup_note = gr.Markdown( + """ + ### + ### Note: + First inference involves compilation of the model for best performance. + Excluding the first inference (or warm up inference) is recommended for + performance measurements. When resolution, batchsize, or device is changed, + or samplers like DPM++ or Karras are selected, model is recompiled. Subsequent + iterations use the cached compiled model for faster inference. + """) - def device_change(choice): - global first_inference_global, openvino_device_global - if (openvino_device_global == choice): + def device_change(choice): + if (model_state.device == choice): return gr.update(value="Device selected is " + choice, visible=True) else: - first_inference_global = 1 + model_state.recompile = 1 return gr.update(value="Device changed to " + choice + ". Model will be re-compiled", visible=True) openvino_device.change(device_change, openvino_device, warmup_status) - return [openvino_device, override_sampler, sampler_name, warmup_status, enable_caching] + return [openvino_device, override_sampler, sampler_name, enable_caching] - def run(self, p, openvino_device, override_sampler, sampler_name, warmup_status, enable_caching): - global first_inference_global, sampler_name_global, openvino_device_global + def run(self, p, openvino_device, override_sampler, sampler_name, enable_caching): os.environ["OPENVINO_DEVICE"] = str(openvino_device) if enable_caching: os.environ["OPENVINO_TORCH_MODEL_CACHING"] = "1" - if (openvino_device_global != openvino_device): - first_inference_global = 1 - openvino_device_global = openvino_device + if (model_state.device != openvino_device): + model_state.recompile = 1 + model_state.device = openvino_device if override_sampler: p.sampler_name = sampler_name @@ -501,10 +510,6 @@ class Script(scripts.Script): if (p.sampler_name not in supported_samplers): p.sampler_name = "Euler a" - if (sampler_name_global != p.sampler_name): - shared.sd_diffusers_model.scheduler = set_scheduler(shared.sd_diffusers_model, p.sampler_name) - sampler_name_global = p.sampler_name - if self.is_txt2img: processed = process_images_openvino(p, p.sampler_name, enable_caching, openvino_device) else: @@ -512,3 +517,4 @@ class Script(scripts.Script): processed = process_images_openvino(p, p.sampler_name, enable_caching, openvino_device) return processed + From 52c891bdf4a443e14fe7e6659e40cfcf3ac26ed6 Mon Sep 17 00:00:00 2001 From: Ravi Panchumarthy Date: Wed, 26 Jul 2023 19:08:13 -0700 Subject: [PATCH 09/24] update gradio to 3.39.0 --- requirements.txt | 2 +- requirements_versions.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 006c4cbe..f19f3e81 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ blendmodes clean-fid einops gfpgan -gradio==3.32.0 +gradio==3.39.0 inflection jsonmerge kornia diff --git a/requirements_versions.txt b/requirements_versions.txt index 9cd398b0..6c1226a9 100644 --- a/requirements_versions.txt +++ b/requirements_versions.txt @@ -7,7 +7,7 @@ clean-fid==0.1.35 einops==0.4.1 fastapi==0.94.0 gfpgan==1.3.8 -gradio==3.32.0 +gradio==3.39.0 httpcore==0.15 inflection==0.5.1 jsonmerge==1.8.0 From 59a576daf9a1ab4103c0fb2cf98c658010afddcc Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Wed, 26 Jul 2023 21:11:09 -0700 Subject: [PATCH 10/24] Added cache clearing for model recompilation --- scripts/openvino_accelerate.py | 104 ++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 48 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index e9cc878d..44576240 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -1,56 +1,62 @@ -#Copyright (C) 2023 Intel Corporation -#SPDX-License-Identifier: AGPL-3.0 +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: AGPL-3.0 import math import cv2 import os import torch import time +import hashlib import functools import gradio as gr import numpy as np -import openvino.frontend.pytorch.torchdynamo.backend import modules import modules.paths as paths import modules.scripts as scripts -import modules.shared as shared -from modules import images, devices, extra_networks, generation_parameters_copypaste, masking, sd_samplers, sd_samplers_compvis, sd_samplers_kdiffusion, shared, call_queue -from modules.processing import StableDiffusionProcessing, Processed, apply_overlay, process_images, get_fixed_seed, program_version, StableDiffusionProcessingImg2Img, create_random_tensors, create_infotext -from modules.sd_models import list_models, CheckpointInfo -from modules.sd_samplers_common import samples_to_image_grid, sample_to_image -from modules.shared import Shared, cmd_opts, opts, state -from modules.ui import plaintext_to_html, create_sampler_and_steps_selection +from modules import images, devices, extra_networks, masking, shared +from modules.processing import ( + StableDiffusionProcessing, Processed, apply_overlay, apply_color_correction, + get_fixed_seed, create_random_tensors, create_infotext, setup_color_correction +) +from modules.sd_models import CheckpointInfo +from modules.shared import Shared, opts, state -from diffusers import StableDiffusionPipeline -from PIL import Image, ImageFilter, ImageOps -from modules import sd_samplers_common +from PIL import Image, ImageOps + +import openvino.frontend.pytorch.torchdynamo.backend +from openvino.frontend.pytorch.torchdynamo.execute import partitioned_modules, compiled_cache from openvino.runtime import Core from diffusers import ( + StableDiffusionPipeline, DDIMScheduler, DPMSolverMultistepScheduler, - DPMSolverSDEScheduler, - DPMSolverSinglestepScheduler, EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, HeunDiscreteScheduler, - IPNDMScheduler, - KDPM2AncestralDiscreteScheduler, - KDPM2DiscreteScheduler, LMSDiscreteScheduler, PNDMScheduler, - UniPCMultistepScheduler, ) class ModelState: def __init__(self): self.recompile = 1 self.device = "CPU" + self.height = 512 + self.width = 512 + self.batch_size = 1 model_state = ModelState() +def openvino_clear_caches(): + global partitioned_modules + global compiled_cache + + compiled_cache.clear() + partitioned_modules.clear() + def sd_diffusers_model(self): import modules.sd_models return modules.sd_models.model_data.get_sd_model() @@ -58,7 +64,7 @@ def sd_diffusers_model(self): def cond_stage_key(self): return None -Shared.sd_diffusers_model = sd_diffusers_model +shared.sd_diffusers_model = sd_diffusers_model def set_scheduler(sd_model, sampler_name): if (sampler_name == "Euler a"): @@ -84,14 +90,14 @@ def set_scheduler(sd_model, sampler_name): return sd_model.scheduler -def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device): +def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device): if (model_state.recompile == 1): torch._dynamo.reset() - torch._dynamo.config.verbose=True + openvino_clear_caches() curr_dir_path = os.getcwd() model_path = "/models/Stable-diffusion/" checkpoint_name = shared.opts.sd_model_checkpoint.split(" ")[0] - checkpoint_path = curr_dir_path + model_path + checkpoint_name + checkpoint_path = curr_dir_path + model_path + checkpoint_name sd_model = StableDiffusionPipeline.from_single_file(checkpoint_path) checkpoint_info = CheckpointInfo(checkpoint_path) sd_model.sd_checkpoint_info = checkpoint_info @@ -103,7 +109,7 @@ def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device): sd_model.vae.decode = torch.compile(sd_model.vae.decode, backend="openvino") shared.sd_diffusers_model = sd_model del sd_model - return shared.sd_diffusers_model + return shared.sd_diffusers_model def init_new(self, all_prompts, all_seeds, all_subseeds): @@ -265,7 +271,7 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c infotexts = [] output_images = [] - with torch.no_grad(): + with torch.no_grad(): with devices.autocast(): p.init(p.all_prompts, p.all_seeds, p.all_subseeds) @@ -292,7 +298,13 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c if len(p.prompts) == 0: break - + + if (model_state.height != p.height or model_state.width != p.width or model_state.batch_size != p.batch_size): + model_state.recompile = 1 + model_state.height = p.height + model_state.width = p.width + model_state.batch_size = p.batch_size + shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching, openvino_device) shared.sd_diffusers_model.scheduler = set_scheduler(shared.sd_diffusers_model, sampler_name) @@ -324,7 +336,7 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c shared.state.job = f"Batch {n+1} out of {p.n_iter}" generator = [torch.Generator(device="cpu").manual_seed(s) for s in p.seeds] - + time_stamps = [] def callback(iter, t, latents): @@ -349,7 +361,7 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c warmup_duration = time_stamps[1] - time_stamps[0] generation_rate = (p.steps - 1) / (time_stamps[-1] - time_stamps[1]) - x_samples_ddim = output.images + x_samples_ddim = output.images for i, x_sample in enumerate(x_samples_ddim): p.batch_index = i @@ -433,7 +445,7 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c extra_networks.deactivate(p, p.extra_network_data) devices.torch_gc() - + res = Processed( p, images_list=output_images, @@ -446,12 +458,12 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c ) res.info = res.info + ", Warm up time: " + str(round(warmup_duration, 2)) + " secs " - + if (generation_rate >= 1.0): res.info = res.info + ", Performance: " + str(round(generation_rate, 2)) + " it/s " else: res.info = res.info + ", Performance: " + str(round(1/generation_rate, 2)) + " s/it " - + if p.scripts is not None: p.scripts.postprocess(p, res) @@ -465,9 +477,9 @@ class Script(scripts.Script): def show(self, is_img2img): return True - def ui(self, is_img2img): + def ui(self, is_img2img): core = Core() - openvino_device = gr.Dropdown(label="Select a device", choices=[device for device in core.available_devices], value="CPU") + openvino_device = gr.Dropdown(label="Select a device", choices=[device for device in core.available_devices], value=model_state.device) override_sampler = gr.Checkbox(label="Override the sampling selection from the main UI (Recommended as only below sampling methods have been validated for OpenVINO)", value=True) sampler_name = gr.Radio(label="Select a sampling method", choices=["Euler a", "Euler", "LMS", "Heun", "DPM++ 2M", "LMS Karras", "DPM++ 2M Karras", "DDIM", "PLMS"], value="Euler a") enable_caching = gr.Checkbox(label="Cache the compiled models on disk for faster model load in subsequent launches (Recommended)", value=True, elem_id=self.elem_id("enable_caching")) @@ -476,33 +488,29 @@ class Script(scripts.Script): """ ### ### Note: - First inference involves compilation of the model for best performance. - Excluding the first inference (or warm up inference) is recommended for - performance measurements. When resolution, batchsize, or device is changed, - or samplers like DPM++ or Karras are selected, model is recompiled. Subsequent + First inference involves compilation of the model for best performance. + Excluding the first inference (or warm up inference) is recommended for + performance measurements. When resolution, batchsize, or device is changed, + or samplers like DPM++ or Karras are selected, model is recompiled. Subsequent iterations use the cached compiled model for faster inference. - """) - - def device_change(choice): + """) + + def device_change(choice): if (model_state.device == choice): return gr.update(value="Device selected is " + choice, visible=True) else: - model_state.recompile = 1 + model_state.device = choice + model_state.recompile = 1 return gr.update(value="Device changed to " + choice + ". Model will be re-compiled", visible=True) - openvino_device.change(device_change, openvino_device, warmup_status) + openvino_device.change(device_change, openvino_device, warmup_status) return [openvino_device, override_sampler, sampler_name, enable_caching] - def run(self, p, openvino_device, override_sampler, sampler_name, enable_caching): - os.environ["OPENVINO_DEVICE"] = str(openvino_device) + os.environ["OPENVINO_TORCH_BACKEND_DEVICE"] = str(openvino_device) if enable_caching: os.environ["OPENVINO_TORCH_MODEL_CACHING"] = "1" - if (model_state.device != openvino_device): - model_state.recompile = 1 - model_state.device = openvino_device - if override_sampler: p.sampler_name = sampler_name else: From 8697de64b2daaac49e114f6673d7ac5adf1c4084 Mon Sep 17 00:00:00 2001 From: Devang Aggarwal Date: Fri, 28 Jul 2023 07:58:08 -0700 Subject: [PATCH 11/24] Update requirements_versions.txt --- requirements_versions.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements_versions.txt b/requirements_versions.txt index 6c1226a9..d2744c98 100644 --- a/requirements_versions.txt +++ b/requirements_versions.txt @@ -5,7 +5,7 @@ basicsr==1.4.2 blendmodes==2022 clean-fid==0.1.35 einops==0.4.1 -fastapi==0.94.0 +fastapi==0.100.1 gfpgan==1.3.8 gradio==3.39.0 httpcore==0.15 @@ -30,3 +30,4 @@ torchdiffeq==0.2.3 torchsde==0.2.5 transformers==4.30.0 diffusers + From dcdd9e0978ccdc2f71d8e6941d94d07743ff511f Mon Sep 17 00:00:00 2001 From: Devang Aggarwal Date: Fri, 28 Jul 2023 07:59:44 -0700 Subject: [PATCH 12/24] Update requirements_versions.txt --- requirements_versions.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_versions.txt b/requirements_versions.txt index d2744c98..a6ea9d5a 100644 --- a/requirements_versions.txt +++ b/requirements_versions.txt @@ -5,7 +5,7 @@ basicsr==1.4.2 blendmodes==2022 clean-fid==0.1.35 einops==0.4.1 -fastapi==0.100.1 +fastapi==0.94.0 gfpgan==1.3.8 gradio==3.39.0 httpcore==0.15 From d8c7ce313566ed8fee2e377de4df5fc341ae3bb4 Mon Sep 17 00:00:00 2001 From: Cavus Mustafa Date: Fri, 28 Jul 2023 17:33:03 -0700 Subject: [PATCH 13/24] OpenVINO: Illegal size check added for GPU --- scripts/openvino_accelerate.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index 44576240..3fef7c63 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -243,6 +243,11 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c else: assert p.prompt is not None + if openvino_device[:3] == "GPU": + img_size_err_message = "Image height and width should be equal or less than 728 for GPU execution" + assert p.height <= 728, img_size_err_message + assert p.width <= 728, img_size_err_message + devices.torch_gc() seed = get_fixed_seed(p.seed) From 9f31570ae8a27ad944b4334c046082fbb2b9d0a2 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Sat, 29 Jul 2023 23:25:01 -0700 Subject: [PATCH 14/24] Added fixes for image to image and inpainting --- scripts/openvino_accelerate.py | 118 ++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 52 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index 3fef7c63..568ae8e3 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -31,6 +31,8 @@ from openvino.runtime import Core from diffusers import ( StableDiffusionPipeline, + StableDiffusionImg2ImgPipeline, + StableDiffusionInpaintPipeline, DDIMScheduler, DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler, @@ -47,6 +49,7 @@ class ModelState: self.height = 512 self.width = 512 self.batch_size = 1 + self.mode = 0 model_state = ModelState() @@ -90,7 +93,7 @@ def set_scheduler(sd_model, sampler_name): return sd_model.scheduler -def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device): +def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device, mode): if (model_state.recompile == 1): torch._dynamo.reset() openvino_clear_caches() @@ -99,7 +102,12 @@ def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device): checkpoint_name = shared.opts.sd_model_checkpoint.split(" ")[0] checkpoint_path = curr_dir_path + model_path + checkpoint_name sd_model = StableDiffusionPipeline.from_single_file(checkpoint_path) + if (mode == 1): + sd_model = StableDiffusionImg2ImgPipeline.from_single_file(checkpoint_path) + elif (mode == 2): + sd_model = StableDiffusionInpaintPipeline.from_pretrained(curr_dir_path + model_path, **sd_model.components, local_files_only=True) checkpoint_info = CheckpointInfo(checkpoint_path) + #model_state.mode = mode sd_model.sd_checkpoint_info = checkpoint_info sd_model.sd_model_hash = checkpoint_info.calculate_shorthash() sd_model.safety_checker = None @@ -160,7 +168,6 @@ def init_new(self, all_prompts, all_seeds, all_subseeds): self.color_corrections = [] imgs = [] for img in self.init_images: - # Save init image if opts.save_init_img: self.init_img_hash = hashlib.md5(img.tobytes()).hexdigest() @@ -174,7 +181,8 @@ def init_new(self, all_prompts, all_seeds, all_subseeds): if image_mask is not None: image_masked = Image.new('RGBa', (image.width, image.height)) image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L'))) - + self.mask = image_mask + image_mask.save("/home/yamini/pytorch_fx/test/mask_image.jpg") self.overlay_images.append(image_masked.convert('RGBA')) # crop_region is not None if we are doing inpaint full res @@ -182,6 +190,7 @@ def init_new(self, all_prompts, all_seeds, all_subseeds): image = image.crop(crop_region) image = images.resize_image(2, image, self.width, self.height) + self.init_images = image if image_mask is not None: if self.inpainting_fill != 1: image = masking.fill(image, latent_mask) @@ -208,34 +217,8 @@ def init_new(self, all_prompts, all_seeds, all_subseeds): else: raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less") - image = torch.from_numpy(batch_images) - image = 2. * image - 1. - image = image.to(shared.device) - self.init_latent = shared.sd_diffusers_model.vae.encode(image).latent_dist.sample() - - if self.resize_mode == 3: - self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // 8, self.width // 8), mode="bilinear") - - if image_mask is not None: - init_mask = latent_mask - latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2])) - latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255 - latmask = latmask[0] - latmask = np.around(latmask) - latmask = np.tile(latmask[None], (4, 1, 1)) - - self.mask = torch.asarray(1.0 - latmask).to(shared.device).type(shared.sd_diffusers_model.vae.dtype) - self.nmask = torch.asarray(latmask).to(shared.device).type(shared.sd_diffusers_model.vae.dtype) - - # this needs to be fixed to be done in sample() using actual seeds for batches - if self.inpainting_fill == 2: - self.init_latent = self.init_latent * self.mask + create_random_tensors(self.init_latent.shape[1:], all_seeds[0:self.init_latent.shape[0]]) * self.nmask - elif self.inpainting_fill == 3: - self.init_latent = self.init_latent * self.mask - - -def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_caching, openvino_device) -> Processed: +def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_caching, openvino_device, mode) -> Processed: """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch""" if type(p.prompt) == list: @@ -243,11 +226,6 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c else: assert p.prompt is not None - if openvino_device[:3] == "GPU": - img_size_err_message = "Image height and width should be equal or less than 728 for GPU execution" - assert p.height <= 728, img_size_err_message - assert p.width <= 728, img_size_err_message - devices.torch_gc() seed = get_fixed_seed(p.seed) @@ -304,13 +282,14 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c if len(p.prompts) == 0: break - if (model_state.height != p.height or model_state.width != p.width or model_state.batch_size != p.batch_size): + if (model_state.height != p.height or model_state.width != p.width or model_state.batch_size != p.batch_size or model_state.mode != mode): model_state.recompile = 1 model_state.height = p.height model_state.width = p.width model_state.batch_size = p.batch_size + model_state.mode = mode - shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching, openvino_device) + shared.sd_diffusers_model = get_diffusers_sd_model(sampler_name, enable_caching, openvino_device, mode) shared.sd_diffusers_model.scheduler = set_scheduler(shared.sd_diffusers_model, sampler_name) extra_network_data = p.parse_extra_network_prompts() @@ -348,19 +327,47 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c time_stamps.append(time.time()) time_stamps.append(time.time()) - output = shared.sd_diffusers_model( - prompt=p.prompts, - negative_prompt=p.negative_prompts, - num_inference_steps=p.steps, - guidance_scale=p.cfg_scale, - height=p.height, - width=p.width, - generator=generator, - output_type="np", - callback = callback, - callback_steps = 1 - ) - + if (mode == 0): + output = shared.sd_diffusers_model( + prompt=p.prompts, + negative_prompt=p.negative_prompts, + num_inference_steps=p.steps, + guidance_scale=p.cfg_scale, + width = p.width, + height = p.height, + generator=generator, + output_type="np", + callback = callback, + callback_steps = 1, + ) + elif (mode == 1): + output = shared.sd_diffusers_model( + prompt=p.prompts, + negative_prompt=p.negative_prompts, + num_inference_steps=p.steps, + guidance_scale=p.cfg_scale, + image = p.init_images, + strength = p.denoising_strength, + generator=generator, + output_type="np", + callback = callback, + callback_steps = 1, + ) + else: + output = shared.sd_diffusers_model( + prompt=p.prompts, + negative_prompt=p.negative_prompts, + num_inference_steps=p.steps, + guidance_scale=p.cfg_scale, + mask_image = p.mask, + image = p.init_images, + strength = p.denoising_strength, + generator=generator, + output_type="np", + callback = callback, + callback_steps = 1, + ) + model_state.recompile = 0 warmup_duration = time_stamps[1] - time_stamps[0] @@ -523,11 +530,18 @@ class Script(scripts.Script): if (p.sampler_name not in supported_samplers): p.sampler_name = "Euler a" + # mode can be 0, 1, 2 corresponding to txt2img, img2img, inpaint respectively + mode = 0 if self.is_txt2img: - processed = process_images_openvino(p, p.sampler_name, enable_caching, openvino_device) + mode = 0 + processed = process_images_openvino(p, p.sampler_name, enable_caching, openvino_device, mode) else: + if p.image_mask is None: + mode = 1 + else: + mode = 2 p.init = functools.partial(init_new, p) - processed = process_images_openvino(p, p.sampler_name, enable_caching, openvino_device) + processed = process_images_openvino(p, p.sampler_name, enable_caching, openvino_device, mode) return processed From 63dea887b2f8e4967df33cec2ead7056b9a01f69 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Sat, 29 Jul 2023 23:31:50 -0700 Subject: [PATCH 15/24] Removed print of local path --- scripts/openvino_accelerate.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index 568ae8e3..fd09e581 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -182,7 +182,6 @@ def init_new(self, all_prompts, all_seeds, all_subseeds): image_masked = Image.new('RGBa', (image.width, image.height)) image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L'))) self.mask = image_mask - image_mask.save("/home/yamini/pytorch_fx/test/mask_image.jpg") self.overlay_images.append(image_masked.convert('RGBA')) # crop_region is not None if we are doing inpaint full res From ae7ce7b76a3d7bf5edb6d000a0a831f7a5285abb Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Mon, 31 Jul 2023 17:03:03 -0700 Subject: [PATCH 16/24] Made changes to remove extra downloads --- scripts/openvino_accelerate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index fd09e581..d77d902f 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -101,11 +101,11 @@ def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device, mode): model_path = "/models/Stable-diffusion/" checkpoint_name = shared.opts.sd_model_checkpoint.split(" ")[0] checkpoint_path = curr_dir_path + model_path + checkpoint_name - sd_model = StableDiffusionPipeline.from_single_file(checkpoint_path) + sd_model = StableDiffusionPipeline.from_single_file(checkpoint_path, load_safety_checker=False) if (mode == 1): - sd_model = StableDiffusionImg2ImgPipeline.from_single_file(checkpoint_path) + sd_model = StableDiffusionImg2ImgPipeline(**sd_model.components) elif (mode == 2): - sd_model = StableDiffusionInpaintPipeline.from_pretrained(curr_dir_path + model_path, **sd_model.components, local_files_only=True) + sd_model = StableDiffusionInpaintPipeline(**sd_model.components) checkpoint_info = CheckpointInfo(checkpoint_path) #model_state.mode = mode sd_model.sd_checkpoint_info = checkpoint_info From c111b00ae864972032bd96a06fb8249f5a51da62 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 1 Aug 2023 14:39:35 -0700 Subject: [PATCH 17/24] Added changes to diffusers from_single_file --- scripts/openvino_accelerate.py | 127 ++++++++++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 3 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index d77d902f..ac4f116c 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -24,6 +24,7 @@ from modules.sd_models import CheckpointInfo from modules.shared import Shared, opts, state from PIL import Image, ImageOps +from pathlib import Path import openvino.frontend.pytorch.torchdynamo.backend from openvino.frontend.pytorch.torchdynamo.execute import partitioned_modules, compiled_cache @@ -42,6 +43,14 @@ from diffusers import ( PNDMScheduler, ) +from diffusers.pipelines.stable_diffusion.convert_from_ckpt import download_from_original_stable_diffusion_ckpt + +from diffusers.utils import ( + DIFFUSERS_CACHE, + HF_HUB_OFFLINE, + is_safetensors_available, +) + class ModelState: def __init__(self): self.recompile = 1 @@ -53,6 +62,116 @@ class ModelState: model_state = ModelState() +def from_single_file(self, pretrained_model_link_or_path, **kwargs): + + cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE) + resume_download = kwargs.pop("resume_download", False) + force_download = kwargs.pop("force_download", False) + proxies = kwargs.pop("proxies", None) + local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE) + use_auth_token = kwargs.pop("use_auth_token", None) + revision = kwargs.pop("revision", None) + extract_ema = kwargs.pop("extract_ema", False) + image_size = kwargs.pop("image_size", None) + scheduler_type = kwargs.pop("scheduler_type", "pndm") + num_in_channels = kwargs.pop("num_in_channels", None) + upcast_attention = kwargs.pop("upcast_attention", None) + load_safety_checker = kwargs.pop("load_safety_checker", True) + prediction_type = kwargs.pop("prediction_type", None) + text_encoder = kwargs.pop("text_encoder", None) + tokenizer = kwargs.pop("tokenizer", None) + local_config_file = kwargs.pop("local_config_file", None) + + torch_dtype = kwargs.pop("torch_dtype", None) + + use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False) + + pipeline_name = self.__name__ + file_extension = pretrained_model_link_or_path.rsplit(".", 1)[-1] + from_safetensors = file_extension == "safetensors" + + if from_safetensors and use_safetensors is False: + raise ValueError("Make sure to install `safetensors` with `pip install safetensors`.") + + # TODO: For now we only support stable diffusion + stable_unclip = None + model_type = None + controlnet = False + + if pipeline_name == "StableDiffusionControlNetPipeline": + # Model type will be inferred from the checkpoint. + controlnet = True + elif "StableDiffusion" in pipeline_name: + # Model type will be inferred from the checkpoint. + pass + elif pipeline_name == "StableUnCLIPPipeline": + model_type = "FrozenOpenCLIPEmbedder" + stable_unclip = "txt2img" + elif pipeline_name == "StableUnCLIPImg2ImgPipeline": + model_type = "FrozenOpenCLIPEmbedder" + stable_unclip = "img2img" + elif pipeline_name == "PaintByExamplePipeline": + model_type = "PaintByExample" + elif pipeline_name == "LDMTextToImagePipeline": + model_type = "LDMTextToImage" + else: + raise ValueError(f"Unhandled pipeline class: {pipeline_name}") + + # remove huggingface url + for prefix in ["https://huggingface.co/", "huggingface.co/", "hf.co/", "https://hf.co/"]: + if pretrained_model_link_or_path.startswith(prefix): + pretrained_model_link_or_path = pretrained_model_link_or_path[len(prefix) :] + # Code based on diffusers.pipelines.pipeline_utils.DiffusionPipeline.from_pretrained + ckpt_path = Path(pretrained_model_link_or_path) + if not ckpt_path.is_file(): + # get repo_id and (potentially nested) file path of ckpt in repo + repo_id = "/".join(ckpt_path.parts[:2]) + file_path = "/".join(ckpt_path.parts[2:]) + + if file_path.startswith("blob/"): + file_path = file_path[len("blob/") :] + + if file_path.startswith("main/"): + file_path = file_path[len("main/") :] + + pretrained_model_link_or_path = hf_hub_download( + repo_id, + filename=file_path, + cache_dir=cache_dir, + resume_download=resume_download, + proxies=proxies, + local_files_only=local_files_only, + use_auth_token=use_auth_token, + revision=revision, + force_download=force_download, + ) + + pipe = download_from_original_stable_diffusion_ckpt( + pretrained_model_link_or_path, + original_config_file=local_config_file, + pipeline_class=self, + model_type=model_type, + stable_unclip=stable_unclip, + controlnet=controlnet, + from_safetensors=from_safetensors, + extract_ema=extract_ema, + image_size=image_size, + scheduler_type=scheduler_type, + num_in_channels=num_in_channels, + upcast_attention=upcast_attention, + load_safety_checker=load_safety_checker, + prediction_type=prediction_type, + text_encoder=text_encoder, + tokenizer=tokenizer, + ) + + if torch_dtype is not None: + pipe.to(torch_dtype=torch_dtype) + + return pipe + +StableDiffusionPipeline.from_single_file = functools.partial(from_single_file, StableDiffusionPipeline) + def openvino_clear_caches(): global partitioned_modules global compiled_cache @@ -98,10 +217,12 @@ def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device, mode): torch._dynamo.reset() openvino_clear_caches() curr_dir_path = os.getcwd() - model_path = "/models/Stable-diffusion/" + #model_path = "/models/Stable-diffusion/" checkpoint_name = shared.opts.sd_model_checkpoint.split(" ")[0] - checkpoint_path = curr_dir_path + model_path + checkpoint_name - sd_model = StableDiffusionPipeline.from_single_file(checkpoint_path, load_safety_checker=False) + checkpoint_path = os.path.join(curr_dir_path, 'models', 'Stable-diffusion', checkpoint_name) + config_name = checkpoint_name.split(".")[0] + ".yaml" + local_config_file = os.path.join(curr_dir_path, 'configs',config_name) + sd_model = StableDiffusionPipeline.from_single_file(checkpoint_path, local_config_file=local_config_file, load_safety_checker=False) if (mode == 1): sd_model = StableDiffusionImg2ImgPipeline(**sd_model.components) elif (mode == 2): From bab1b043bc4bf39854e7140932bb4c44a4628cee Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Tue, 1 Aug 2023 15:46:47 -0700 Subject: [PATCH 18/24] Added fall back to CPU for control net and hires fix --- scripts/openvino_accelerate.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index ac4f116c..19cc4e8c 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -18,7 +18,8 @@ import modules.scripts as scripts from modules import images, devices, extra_networks, masking, shared from modules.processing import ( StableDiffusionProcessing, Processed, apply_overlay, apply_color_correction, - get_fixed_seed, create_random_tensors, create_infotext, setup_color_correction + get_fixed_seed, create_random_tensors, create_infotext, setup_color_correction, + process_images ) from modules.sd_models import CheckpointInfo from modules.shared import Shared, opts, state @@ -217,7 +218,6 @@ def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device, mode): torch._dynamo.reset() openvino_clear_caches() curr_dir_path = os.getcwd() - #model_path = "/models/Stable-diffusion/" checkpoint_name = shared.opts.sd_model_checkpoint.split(" ")[0] checkpoint_path = os.path.join(curr_dir_path, 'models', 'Stable-diffusion', checkpoint_name) config_name = checkpoint_name.split(".")[0] + ".yaml" @@ -228,7 +228,6 @@ def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device, mode): elif (mode == 2): sd_model = StableDiffusionInpaintPipeline(**sd_model.components) checkpoint_info = CheckpointInfo(checkpoint_path) - #model_state.mode = mode sd_model.sd_checkpoint_info = checkpoint_info sd_model.sd_model_hash = checkpoint_info.calculate_shorthash() sd_model.safety_checker = None @@ -341,6 +340,9 @@ def init_new(self, all_prompts, all_seeds, all_subseeds): def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_caching, openvino_device, mode) -> Processed: """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch""" + if p.enable_hr: + return process_images(p) + if type(p.prompt) == list: assert(len(p.prompt) > 0) else: @@ -371,6 +373,9 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c if p.scripts is not None: p.scripts.process(p) + if 'ControlNet' in p.extra_generation_params: + return process_images(p) + infotexts = [] output_images = [] @@ -422,7 +427,7 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c if ('lora' in modules.extra_networks.extra_network_registry): import lora for lora_model in lora.loaded_loras: - shared.sd_diffusers_model.load_lora_weights(os.getcwd() + "/models/Lora/", weight_name=lora_model.name + ".safetensors") + shared.sd_diffusers_model.load_lora_weights(os.path.join(os.getcwd(), "models", "Lora"), weight_name=lora_model.name + ".safetensors") if p.scripts is not None: p.scripts.process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds) From ac9c9e19ae8557ab12757d652ed5d844e8d4cc7d Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Thu, 3 Aug 2023 16:50:40 -0700 Subject: [PATCH 19/24] Added hires enable fallback fix for img2img --- scripts/openvino_accelerate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index 19cc4e8c..443b2389 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -340,7 +340,7 @@ def init_new(self, all_prompts, all_seeds, all_subseeds): def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_caching, openvino_device, mode) -> Processed: """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch""" - if p.enable_hr: + if (mode == 0 and p.enable_hr): return process_images(p) if type(p.prompt) == list: @@ -645,6 +645,7 @@ class Script(scripts.Script): def run(self, p, openvino_device, override_sampler, sampler_name, enable_caching): os.environ["OPENVINO_TORCH_BACKEND_DEVICE"] = str(openvino_device) + if enable_caching: os.environ["OPENVINO_TORCH_MODEL_CACHING"] = "1" From d7e45a81ae151ebb1e55cf23ee39037a70b9c023 Mon Sep 17 00:00:00 2001 From: likholat Date: Wed, 2 Aug 2023 17:53:11 +0200 Subject: [PATCH 20/24] Multiplier for Lora, codestyle fixes --- scripts/openvino_accelerate.py | 96 +++++++++++++++------------------- 1 file changed, 42 insertions(+), 54 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index 443b2389..3b20a752 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -1,7 +1,6 @@ # Copyright (C) 2023 Intel Corporation # SPDX-License-Identifier: AGPL-3.0 -import math import cv2 import os import torch @@ -18,17 +17,17 @@ import modules.scripts as scripts from modules import images, devices, extra_networks, masking, shared from modules.processing import ( StableDiffusionProcessing, Processed, apply_overlay, apply_color_correction, - get_fixed_seed, create_random_tensors, create_infotext, setup_color_correction, + get_fixed_seed, create_infotext, setup_color_correction, process_images ) from modules.sd_models import CheckpointInfo -from modules.shared import Shared, opts, state +from modules.shared import opts, state from PIL import Image, ImageOps from pathlib import Path -import openvino.frontend.pytorch.torchdynamo.backend -from openvino.frontend.pytorch.torchdynamo.execute import partitioned_modules, compiled_cache +import openvino.frontend.pytorch.torchdynamo.backend # noqa: F401 +from openvino.frontend.pytorch.torchdynamo.execute import partitioned_modules, compiled_cache # noqa: F401 from openvino.runtime import Core from diffusers import ( @@ -82,7 +81,7 @@ def from_single_file(self, pretrained_model_link_or_path, **kwargs): text_encoder = kwargs.pop("text_encoder", None) tokenizer = kwargs.pop("tokenizer", None) local_config_file = kwargs.pop("local_config_file", None) - + torch_dtype = kwargs.pop("torch_dtype", None) use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False) @@ -135,6 +134,7 @@ def from_single_file(self, pretrained_model_link_or_path, **kwargs): if file_path.startswith("main/"): file_path = file_path[len("main/") :] + from huggingface_hub import hf_hub_download pretrained_model_link_or_path = hf_hub_download( repo_id, filename=file_path, @@ -323,7 +323,6 @@ def init_new(self, all_prompts, all_seeds, all_subseeds): imgs.append(image) if len(imgs) == 1: - batch_images = np.expand_dims(imgs[0], axis=0).repeat(self.batch_size, axis=0) if self.overlay_images is not None: self.overlay_images = self.overlay_images * self.batch_size @@ -332,7 +331,6 @@ def init_new(self, all_prompts, all_seeds, all_subseeds): elif len(imgs) <= self.batch_size: self.batch_size = len(imgs) - batch_images = np.array(imgs) else: raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less") @@ -354,6 +352,7 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c subseed = get_fixed_seed(p.subseed) comments = {} + custom_inputs = {} p.setup_prompts() @@ -423,11 +422,13 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c with devices.autocast(): extra_networks.activate(p, p.extra_network_data) - # TODO: support multiplier if ('lora' in modules.extra_networks.extra_network_registry): import lora - for lora_model in lora.loaded_loras: + # TODO: multiple Loras aren't supported for Diffusers now, needs to add warning + if lora.loaded_loras: + lora_model = lora.loaded_loras[0] shared.sd_diffusers_model.load_lora_weights(os.path.join(os.getcwd(), "models", "Lora"), weight_name=lora_model.name + ".safetensors") + custom_inputs.update(cross_attention_kwargs={"scale" : lora_model.te_multiplier}) if p.scripts is not None: p.scripts.process_batch(p, batch_number=n, prompts=p.prompts, seeds=p.seeds, subseeds=p.subseeds) @@ -438,7 +439,6 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c # strength, which is saved as "Model Strength: 1.0" in the infotext if n == 0: with open(os.path.join(paths.data_path, "params.txt"), "w", encoding="utf8") as file: - processed = Processed(p, [], p.seed, "") file.write(create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, comments=[], position_in_batch=0 % p.batch_size, iteration=0 // p.batch_size)) if p.n_iter > 1: @@ -449,50 +449,38 @@ def process_images_openvino(p: StableDiffusionProcessing, sampler_name, enable_c time_stamps = [] def callback(iter, t, latents): - time_stamps.append(time.time()) + time_stamps.append(time.time()) # noqa: B023 time_stamps.append(time.time()) + if (mode == 0): - output = shared.sd_diffusers_model( - prompt=p.prompts, - negative_prompt=p.negative_prompts, - num_inference_steps=p.steps, - guidance_scale=p.cfg_scale, - width = p.width, - height = p.height, - generator=generator, - output_type="np", - callback = callback, - callback_steps = 1, - ) + custom_inputs.update({ + 'width': p.width, + 'height': p.height, + }) elif (mode == 1): - output = shared.sd_diffusers_model( - prompt=p.prompts, - negative_prompt=p.negative_prompts, - num_inference_steps=p.steps, - guidance_scale=p.cfg_scale, - image = p.init_images, - strength = p.denoising_strength, - generator=generator, - output_type="np", - callback = callback, - callback_steps = 1, - ) + custom_inputs.update({ + 'image': p.init_images, + 'strength':p.denoising_strength, + }) else: - output = shared.sd_diffusers_model( + custom_inputs.update({ + 'image': p.init_images, + 'strength':p.denoising_strength, + 'mask_image': p.mask, + }) + output = shared.sd_diffusers_model( prompt=p.prompts, negative_prompt=p.negative_prompts, num_inference_steps=p.steps, guidance_scale=p.cfg_scale, - mask_image = p.mask, - image = p.init_images, - strength = p.denoising_strength, generator=generator, output_type="np", callback = callback, callback_steps = 1, - ) - + **custom_inputs + ) + model_state.recompile = 0 warmup_duration = time_stamps[1] - time_stamps[0] @@ -616,21 +604,21 @@ class Script(scripts.Script): def ui(self, is_img2img): core = Core() - openvino_device = gr.Dropdown(label="Select a device", choices=[device for device in core.available_devices], value=model_state.device) + openvino_device = gr.Dropdown(label="Select a device", choices=list(core.available_devices), value=model_state.device) override_sampler = gr.Checkbox(label="Override the sampling selection from the main UI (Recommended as only below sampling methods have been validated for OpenVINO)", value=True) sampler_name = gr.Radio(label="Select a sampling method", choices=["Euler a", "Euler", "LMS", "Heun", "DPM++ 2M", "LMS Karras", "DPM++ 2M Karras", "DDIM", "PLMS"], value="Euler a") enable_caching = gr.Checkbox(label="Cache the compiled models on disk for faster model load in subsequent launches (Recommended)", value=True, elem_id=self.elem_id("enable_caching")) warmup_status = gr.Textbox(label="Device", interactive=False, visible=False) - warmup_note = gr.Markdown( - """ - ### - ### Note: - First inference involves compilation of the model for best performance. - Excluding the first inference (or warm up inference) is recommended for - performance measurements. When resolution, batchsize, or device is changed, - or samplers like DPM++ or Karras are selected, model is recompiled. Subsequent - iterations use the cached compiled model for faster inference. - """) + gr.Markdown( + """ + ### + ### Note: + First inference involves compilation of the model for best performance. + Excluding the first inference (or warm up inference) is recommended for + performance measurements. When resolution, batchsize, or device is changed, + or samplers like DPM++ or Karras are selected, model is recompiled. Subsequent + iterations use the cached compiled model for faster inference. + """) def device_change(choice): if (model_state.device == choice): @@ -663,7 +651,7 @@ class Script(scripts.Script): processed = process_images_openvino(p, p.sampler_name, enable_caching, openvino_device, mode) else: if p.image_mask is None: - mode = 1 + mode = 1 else: mode = 2 p.init = functools.partial(init_new, p) From b154c7e32bb4122861fb9c0af9f28789a516f2e4 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Fri, 4 Aug 2023 12:08:40 -0700 Subject: [PATCH 21/24] Added caching optimizations --- scripts/openvino_accelerate.py | 119 ++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 2 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index 443b2389..7785b14e 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -28,6 +28,7 @@ from PIL import Image, ImageOps from pathlib import Path import openvino.frontend.pytorch.torchdynamo.backend +import openvino.frontend.pytorch.torchdynamo.compile from openvino.frontend.pytorch.torchdynamo.execute import partitioned_modules, compiled_cache from openvino.runtime import Core @@ -63,6 +64,118 @@ class ModelState: model_state = ModelState() +from torch.fx import GraphModule + +from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder +from openvino.runtime import Core, Type, PartialShape, serialize + +from typing import Callable, Optional +from hashlib import sha256 + +from torch._dynamo.backends.common import fake_tensor_unsupported +from torch._dynamo.backends.registry import register_backend +from torch._inductor.compile_fx import compile_fx +from torch.fx.experimental.proxy_tensor import make_fx + +from openvino.frontend import FrontEndManager +from openvino.frontend.pytorch.torchdynamo.partition import Partitioner +from openvino.frontend.pytorch.torchdynamo.execute import execute + +import time + +partition_id = 0 + +@register_backend +@fake_tensor_unsupported +def openvino_fx(subgraph, example_inputs): + global partition_id + try: + executor_parameters = None + core = Core() + if os.getenv("OPENVINO_TORCH_MODEL_CACHING") is not None: + model_hash_str = sha256(subgraph.code.encode('utf-8')).hexdigest() + model_hash_str_file = model_hash_str + str(partition_id) + partition_id = partition_id + 1 + executor_parameters = {"model_hash_str": model_hash_str} + + example_inputs.reverse() + cache_root = "./cache/" + if os.getenv("OPENVINO_TORCH_CACHE_DIR") is not None: + cache_root = os.getenv("OPENVINO_TORCH_CACHE_DIR") + + device = "CPU" + + if os.getenv("OPENVINO_TORCH_BACKEND_DEVICE") is not None: + device = os.getenv("OPENVINO_TORCH_BACKEND_DEVICE") + assert device in core.available_devices, "Specified device " + device + " is not in the list of OpenVINO Available Devices" + + file_name = get_cached_file_name(*example_inputs, model_hash_str=model_hash_str_file, device=device, cache_root=cache_root) + + if file_name is not None and os.path.isfile(file_name + ".xml") and os.path.isfile(file_name + ".bin"): + start_time = time.time() + om = core.read_model(file_name + ".xml") + + dtype_mapping = { + torch.float32: Type.f32, + torch.float64: Type.f64, + torch.float16: Type.f16, + torch.int64: Type.i64, + torch.int32: Type.i32, + torch.uint8: Type.u8, + torch.int8: Type.i8, + torch.bool: Type.boolean + } + + for idx, input_data in enumerate(example_inputs): + om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype]) + om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape))) + om.validate_nodes_and_infer_types() + + if model_hash_str is not None: + core.set_property({'CACHE_DIR': cache_root + '/blob'}) + + start_time = time.time() + compiled_model = core.compile_model(om, device) + def _call(*args): + ov_inputs = [a.detach().cpu().numpy() for a in args] + ov_inputs.reverse() + res = compiled_model(ov_inputs) + result = [torch.from_numpy(res[out]) for out in compiled_model.outputs] + return result + return _call + else: + example_inputs.reverse() + model = make_fx(subgraph)(*example_inputs) + with torch.no_grad(): + model.eval() + partitioner = Partitioner() + compiled_model = partitioner.make_partitions(model) + + def _call(*args): + res = execute(compiled_model, *args, executor="openvino", + executor_parameters=executor_parameters) + return res + return _call + except Exception as e: + log.debug(f"Failed in OpenVINO execution: {e}") + return compile_fx(subgraph, example_inputs) + +def get_cached_file_name(*args, model_hash_str, device, cache_root): + file_name = None + if model_hash_str is not None: + model_cache_dir = cache_root + "/model/" + try: + os.makedirs(model_cache_dir, exist_ok=True) + file_name = model_cache_dir + model_hash_str + "_" + device + for idx, input_data in enumerate(args): + if file_name is not None: + file_name += "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "") + except OSError as error: + print("Cache directory ", cache_root, " cannot be created. Model caching is disabled. Error: ", error) + file_name = None + model_hash_str = None + return file_name + def from_single_file(self, pretrained_model_link_or_path, **kwargs): cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE) @@ -233,8 +346,8 @@ def get_diffusers_sd_model(sampler_name, enable_caching, openvino_device, mode): sd_model.safety_checker = None sd_model.cond_stage_key = functools.partial(cond_stage_key, shared.sd_model) sd_model.scheduler = set_scheduler(sd_model, sampler_name) - sd_model.unet = torch.compile(sd_model.unet, backend="openvino") - sd_model.vae.decode = torch.compile(sd_model.vae.decode, backend="openvino") + sd_model.unet = torch.compile(sd_model.unet, backend="openvino_fx") + sd_model.vae.decode = torch.compile(sd_model.vae.decode, backend="openvino_fx") shared.sd_diffusers_model = sd_model del sd_model return shared.sd_diffusers_model @@ -644,6 +757,8 @@ class Script(scripts.Script): return [openvino_device, override_sampler, sampler_name, enable_caching] def run(self, p, openvino_device, override_sampler, sampler_name, enable_caching): + global partition_id + partition_id = 0 os.environ["OPENVINO_TORCH_BACKEND_DEVICE"] = str(openvino_device) if enable_caching: From 9d31485cab95796e7dbde0a90ab1f935f85dbf7b Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Fri, 4 Aug 2023 15:30:07 -0700 Subject: [PATCH 22/24] Removed unused imports --- scripts/openvino_accelerate.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index 6ac20e9c..5fe12db9 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -26,19 +26,16 @@ from modules.shared import opts, state from PIL import Image, ImageOps from pathlib import Path -#from openvino.frontend import FrontEndManager from openvino.frontend.pytorch.torchdynamo import backend, compile # noqa: F401 from openvino.frontend.pytorch.torchdynamo.execute import execute, partitioned_modules, compiled_cache # noqa: F401 from openvino.frontend.pytorch.torchdynamo.partition import Partitioner -from openvino.runtime import Core, Type, PartialShape #, serialize +from openvino.runtime import Core, Type, PartialShape from torch._dynamo.backends.common import fake_tensor_unsupported from torch._dynamo.backends.registry import register_backend from torch._inductor.compile_fx import compile_fx -#from torch.fx import GraphModule from torch.fx.experimental.proxy_tensor import make_fx -#from typing import Callable, Optional from hashlib import sha256 from diffusers import ( From 11779119d77f4bc1d7034f4e3865a16c3ea21724 Mon Sep 17 00:00:00 2001 From: Ravi Panchumarthy Date: Wed, 9 Aug 2023 15:31:25 -0700 Subject: [PATCH 23/24] Added OV, diffusers in requirements. Minor update to Note text. --- requirements.txt | 3 ++- requirements_versions.txt | 3 ++- scripts/openvino_accelerate.py | 11 ++++++----- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index f19f3e81..0a4aaf1c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,4 +31,5 @@ torch torchdiffeq torchsde transformers==4.30.0 -diffusers +diffusers==0.18.2 +openvino==2023.1.0.dev20230728 diff --git a/requirements_versions.txt b/requirements_versions.txt index a6ea9d5a..77cedb4f 100644 --- a/requirements_versions.txt +++ b/requirements_versions.txt @@ -29,5 +29,6 @@ torch torchdiffeq==0.2.3 torchsde==0.2.5 transformers==4.30.0 -diffusers +diffusers==0.18.2 +openvino==2023.1.0.dev20230728 diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index 5fe12db9..7de7ff0b 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -723,11 +723,12 @@ class Script(scripts.Script): """ ### ### Note: - First inference involves compilation of the model for best performance. - Excluding the first inference (or warm up inference) is recommended for - performance measurements. When resolution, batchsize, or device is changed, - or samplers like DPM++ or Karras are selected, model is recompiled. Subsequent - iterations use the cached compiled model for faster inference. + - First inference involves compilation of the model for best performance. + Since compilation happens only on the first run, the first inference (or warm up inference) will be slower than subsequent inferences. + - For accurate performance measurements, it is recommended to exclude this slower first inference, as it doesn't reflect normal running time. + - Model is recompiled when resolution, batchsize, device, or samplers like DPM++ or Karras are changed. + After recompiling, later inferences will reuse the newly compiled model and achieve faster running times. + So it's normal for the first inference after a settings change to be slower, while subsequent inferences use the optimized compiled model and run faster. """) def local_config_change(choice): From 72e885564ea53b5d7a8e4debea0cfea2af1c1161 Mon Sep 17 00:00:00 2001 From: ynimmaga Date: Thu, 10 Aug 2023 16:51:16 -0700 Subject: [PATCH 24/24] Fixed linter issues --- scripts/openvino_accelerate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/openvino_accelerate.py b/scripts/openvino_accelerate.py index 7de7ff0b..14a68c20 100644 --- a/scripts/openvino_accelerate.py +++ b/scripts/openvino_accelerate.py @@ -29,7 +29,7 @@ from pathlib import Path from openvino.frontend.pytorch.torchdynamo import backend, compile # noqa: F401 from openvino.frontend.pytorch.torchdynamo.execute import execute, partitioned_modules, compiled_cache # noqa: F401 from openvino.frontend.pytorch.torchdynamo.partition import Partitioner -from openvino.runtime import Core, Type, PartialShape +from openvino.runtime import Core, Type, PartialShape from torch._dynamo.backends.common import fake_tensor_unsupported from torch._dynamo.backends.registry import register_backend @@ -723,11 +723,11 @@ class Script(scripts.Script): """ ### ### Note: - - First inference involves compilation of the model for best performance. + - First inference involves compilation of the model for best performance. Since compilation happens only on the first run, the first inference (or warm up inference) will be slower than subsequent inferences. - For accurate performance measurements, it is recommended to exclude this slower first inference, as it doesn't reflect normal running time. - - Model is recompiled when resolution, batchsize, device, or samplers like DPM++ or Karras are changed. - After recompiling, later inferences will reuse the newly compiled model and achieve faster running times. + - Model is recompiled when resolution, batchsize, device, or samplers like DPM++ or Karras are changed. + After recompiling, later inferences will reuse the newly compiled model and achieve faster running times. So it's normal for the first inference after a settings change to be slower, while subsequent inferences use the optimized compiled model and run faster. """)