diff --git a/scripts/sd_utils.py b/scripts/sd_utils.py index 7a42532..2b12425 100644 --- a/scripts/sd_utils.py +++ b/scripts/sd_utils.py @@ -12,7 +12,7 @@ # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . +# along with this program. If not, see . # base webui import and utils. #from webui_streamlit import st import gfpgan @@ -66,7 +66,7 @@ from ldm.models.diffusion.ddim import DDIMSampler from ldm.util import ismap -# Temp imports +# Temp imports #from basicsr.utils.registry import ARCH_REGISTRY @@ -83,7 +83,7 @@ except: # remove some annoying deprecation warnings that show every now and then. warnings.filterwarnings("ignore", category=DeprecationWarning) -warnings.filterwarnings("ignore", category=UserWarning) +warnings.filterwarnings("ignore", category=UserWarning) # this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the bowser will not show any UI mimetypes.init() @@ -95,7 +95,7 @@ opt_f = 8 if not "defaults" in st.session_state: st.session_state["defaults"] = {} - + st.session_state["defaults"] = OmegaConf.load("configs/webui/webui_streamlit.yaml") if (os.path.exists("configs/webui/userconfig_streamlit.yaml")): @@ -103,11 +103,11 @@ if (os.path.exists("configs/webui/userconfig_streamlit.yaml")): try: st.session_state["defaults"] = OmegaConf.merge(st.session_state["defaults"], user_defaults) except KeyError: - st.experimental_rerun() + st.experimental_rerun() else: OmegaConf.save(config=st.session_state.defaults, f="configs/webui/userconfig_streamlit.yaml") loaded = OmegaConf.load("configs/webui/userconfig_streamlit.yaml") - assert st.session_state.defaults == loaded + assert st.session_state.defaults == loaded if (os.path.exists(".streamlit/config.toml")): st.session_state["streamlit_config"] = toml.load(".streamlit/config.toml") @@ -116,7 +116,7 @@ if st.session_state["defaults"].daisi_app.running_on_daisi_io: if os.path.exists("scripts/modeldownload.py"): import modeldownload modeldownload.updateModels() - + # #app = st.HydraApp(title='Stable Diffusion WebUI', favicon="", sidebar_state="expanded", #hide_streamlit_markers=False, allow_url_nav=True , clear_cross_app_sessions=False) @@ -159,7 +159,7 @@ elif save_format[0] == 'webp': if save_quality < 0: # e.g. webp:-100 for lossless mode save_lossless = True save_quality = abs(save_quality) - + # this should force GFPGAN and RealESRGAN onto the selected gpu as well os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = str(st.session_state["defaults"].general.gpu) @@ -168,7 +168,7 @@ os.environ["CUDA_VISIBLE_DEVICES"] = str(st.session_state["defaults"].general.gp # # functions to load css locally OR remotely starts here. Options exist for future flexibility. Called as st.markdown with unsafe_allow_html as css injection -# TODO, maybe look into async loading the file especially for remote fetching +# TODO, maybe look into async loading the file especially for remote fetching def local_css(file_name): with open(file_name) as f: st.markdown(f'', unsafe_allow_html=True) @@ -181,7 +181,7 @@ def load_css(isLocal, nameOrURL): local_css(nameOrURL) else: remote_css(nameOrURL) - + def set_page_title(title): """ Simple function to allows us to change the title dynamically. @@ -210,7 +210,7 @@ def set_page_title(title): title.text = '{title}' " /> """) - + def human_readable_size(size, decimal_places=3): """Return a human readable size from bytes.""" for unit in ['B','KB','MB','GB','TB']: @@ -223,12 +223,12 @@ def human_readable_size(size, decimal_places=3): def load_models(use_LDSR = False, LDSR_model='model', use_GFPGAN=False, GFPGAN_model='GFPGANv1.4', use_RealESRGAN=False, RealESRGAN_model="RealESRGAN_x4plus", CustomModel_available=False, custom_model="Stable Diffusion v1.4"): """Load the different models. We also reuse the models that are already in memory to speed things up instead of loading them again. """ - + print ("Loading models.") if "progress_bar_text" in st.session_state: st.session_state["progress_bar_text"].text("") - + # Generate random run ID # Used to link runs linked w/ continue_prev_run which is not yet implemented @@ -245,7 +245,7 @@ def load_models(use_LDSR = False, LDSR_model='model', use_GFPGAN=False, GFPGAN_m else: if "LDSR" in server_state: del server_state["LDSR"] - + # Load GFPGAN if os.path.exists(st.session_state["defaults"].general.LDSR_dir): try: @@ -254,11 +254,11 @@ def load_models(use_LDSR = False, LDSR_model='model', use_GFPGAN=False, GFPGAN_m except Exception: import traceback print(f"Error loading LDSR:", file=sys.stderr) - print(traceback.format_exc(), file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) else: if "LDSR" in server_state: - del server_state["LDSR"] - + del server_state["LDSR"] + with server_state_lock["GFPGAN"]: if use_GFPGAN: if "GFPGAN" in server_state and server_state["GFPGAN"].name == GFPGAN_model: @@ -266,7 +266,7 @@ def load_models(use_LDSR = False, LDSR_model='model', use_GFPGAN=False, GFPGAN_m else: if "GFPGAN" in server_state: del server_state["GFPGAN"] - + # Load GFPGAN if os.path.exists(st.session_state["defaults"].general.GFPGAN_dir): try: @@ -275,40 +275,40 @@ def load_models(use_LDSR = False, LDSR_model='model', use_GFPGAN=False, GFPGAN_m except Exception: import traceback print(f"Error loading GFPGAN:", file=sys.stderr) - print(traceback.format_exc(), file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) else: if "GFPGAN" in server_state: - del server_state["GFPGAN"] + del server_state["GFPGAN"] with server_state_lock["RealESRGAN"]: if use_RealESRGAN: if "RealESRGAN" in server_state and server_state["RealESRGAN"].model.name == RealESRGAN_model: print("RealESRGAN already loaded") else: - #Load RealESRGAN + #Load RealESRGAN try: # We first remove the variable in case it has something there, # some errors can load the model incorrectly and leave things in memory. del server_state["RealESRGAN"] except KeyError: pass - + if os.path.exists(st.session_state["defaults"].general.RealESRGAN_dir): # st.session_state is used for keeping the models in memory across multiple pages or runs. server_state["RealESRGAN"] = load_RealESRGAN(RealESRGAN_model) print("Loaded RealESRGAN with model "+ server_state["RealESRGAN"].model.name) - + else: if "RealESRGAN" in server_state: - del server_state["RealESRGAN"] + del server_state["RealESRGAN"] with server_state_lock["model"], server_state_lock["modelCS"], server_state_lock["modelFS"], server_state_lock["loaded_model"]: - + if "model" in server_state: if "model" in server_state and server_state["loaded_model"] == custom_model: - # TODO: check if the optimized mode was changed? + # TODO: check if the optimized mode was changed? print("Model already loaded") - + return else: try: @@ -316,46 +316,46 @@ def load_models(use_LDSR = False, LDSR_model='model', use_GFPGAN=False, GFPGAN_m del server_state["modelCS"] del server_state["modelFS"] del server_state["loaded_model"] - + except KeyError: pass - + # if the model from txt2vid is in memory we need to remove it to improve performance. with server_state_lock["pipe"]: if "pipe" in server_state: - del server_state["pipe"] - + del server_state["pipe"] + if "textual_inversion" in st.session_state: del st.session_state['textual_inversion'] - + # At this point the model is either # not loaded yet or have been evicted: # load new model into memory server_state["custom_model"] = custom_model - + config, device, model, modelCS, modelFS = load_sd_model(custom_model) - + server_state["device"] = device server_state["model"] = model - + server_state["modelCS"] = modelCS server_state["modelFS"] = modelFS server_state["loaded_model"] = custom_model - - #trying to disable multiprocessing as it makes it so streamlit cant stop when the + + #trying to disable multiprocessing as it makes it so streamlit cant stop when the # model is loaded in memory and you need to kill the process sometimes. - + server_state["model"].args.use_multiprocessing_for_evaluation = False - - + + if st.session_state.defaults.general.enable_attention_slicing: - server_state["model"].enable_attention_slicing() - - if st.session_state.defaults.general.enable_minimal_memory_usage: - server_state["model"].enable_minimal_memory_usage() - + server_state["model"].enable_attention_slicing() + + if st.session_state.defaults.general.enable_minimal_memory_usage: + server_state["model"].enable_minimal_memory_usage() + print("Model loaded.") - + return True @@ -489,9 +489,9 @@ def _fft2(data): out_fft = np.zeros((data.shape[0], data.shape[1]), dtype=np.complex128) out_fft[:,:] = np.fft.fft2(np.fft.fftshift(data),norm="ortho") out_fft[:,:] = np.fft.ifftshift(out_fft[:,:]) - + return out_fft - + def _ifft2(data): if data.ndim > 2: # has channels out_ifft = np.zeros((data.shape[0], data.shape[1], data.shape[2]), dtype=np.complex128) @@ -503,14 +503,14 @@ def _ifft2(data): out_ifft = np.zeros((data.shape[0], data.shape[1]), dtype=np.complex128) out_ifft[:,:] = np.fft.ifft2(np.fft.fftshift(data),norm="ortho") out_ifft[:,:] = np.fft.ifftshift(out_ifft[:,:]) - + return out_ifft - + def _get_gaussian_window(width, height, std=3.14, mode=0): window_scale_x = float(width / min(width, height)) window_scale_y = float(height / min(width, height)) - + window = np.zeros((width, height)) x = (np.arange(width) / width * 2. - 1.) * window_scale_x for y in range(height): @@ -519,7 +519,7 @@ def _get_gaussian_window(width, height, std=3.14, mode=0): window[:, y] = np.exp(-(x**2+fy**2) * std) else: window[:, y] = (1/((x**2+1.) * (fy**2+1.))) ** (std/3.14) # hey wait a minute that's not gaussian - + return window def _get_masked_window_rgb(np_mask_grey, hardness=1.): @@ -532,14 +532,14 @@ def _get_masked_window_rgb(np_mask_grey, hardness=1.): np_mask_rgb[:,:,c] = hardened[:] return np_mask_rgb -def get_matched_noise(_np_src_image, np_mask_rgb, noise_q, color_variation): +def get_matched_noise(_np_src_image, np_mask_rgb, noise_q, color_variation): """ Explanation: Getting good results in/out-painting with stable diffusion can be challenging. Although there are simpler effective solutions for in-painting, out-painting can be especially challenging because there is no color data in the masked area to help prompt the generator. Ideally, even for in-painting we'd like work effectively without that data as well. Provided here is my take on a potential solution to this problem. - + By taking a fourier transform of the masked src img we get a function that tells us the presence and orientation of each feature scale in the unmasked src. Shaping the init/seed noise for in/outpainting to the same distribution of feature scales, orientations, and positions increases output coherence by helping keep features aligned. This technique is applicable to any continuous generation task such as audio or video, each of which can @@ -547,61 +547,61 @@ def get_matched_noise(_np_src_image, np_mask_rgb, noise_q, color_variation): or stereo sound the "color tone" or histogram of the seed noise can be matched to improve quality (using scikit-image currently) This method is quite robust and has the added benefit of being fast independently of the size of the out-painted area. The effects of this method include things like helping the generator integrate the pre-existing view distance and camera angle. - + Carefully managing color and brightness with histogram matching is also essential to achieving good coherence. - + noise_q controls the exponent in the fall-off of the distribution can be any positive number, lower values means higher detail (range > 0, default 1.) color_variation controls how much freedom is allowed for the colors/palette of the out-painted area (range 0..1, default 0.01) This code is provided as is under the Unlicense (https://unlicense.org/) Although you have no obligation to do so, if you found this code helpful please find it in your heart to credit me [parlance-zz]. - + Questions or comments can be sent to parlance@fifth-harmonic.com (https://github.com/parlance-zz/) This code is part of a new branch of a discord bot I am working on integrating with diffusers (https://github.com/parlance-zz/g-diffuser-bot) - + """ global DEBUG_MODE global TMP_ROOT_PATH - + width = _np_src_image.shape[0] height = _np_src_image.shape[1] num_channels = _np_src_image.shape[2] np_src_image = _np_src_image[:] * (1. - np_mask_rgb) - np_mask_grey = (np.sum(np_mask_rgb, axis=2)/3.) - np_src_grey = (np.sum(np_src_image, axis=2)/3.) + np_mask_grey = (np.sum(np_mask_rgb, axis=2)/3.) + np_src_grey = (np.sum(np_src_image, axis=2)/3.) all_mask = np.ones((width, height), dtype=bool) img_mask = np_mask_grey > 1e-6 ref_mask = np_mask_grey < 1e-3 - + windowed_image = _np_src_image * (1.-_get_masked_window_rgb(np_mask_grey)) windowed_image /= np.max(windowed_image) windowed_image += np.average(_np_src_image) * np_mask_rgb# / (1.-np.average(np_mask_rgb)) # rather than leave the masked area black, we get better results from fft by filling the average unmasked color #windowed_image += np.average(_np_src_image) * (np_mask_rgb * (1.- np_mask_rgb)) / (1.-np.average(np_mask_rgb)) # compensate for darkening across the mask transition area #_save_debug_img(windowed_image, "windowed_src_img") - + src_fft = _fft2(windowed_image) # get feature statistics from masked src img src_dist = np.absolute(src_fft) src_phase = src_fft / src_dist #_save_debug_img(src_dist, "windowed_src_dist") - + noise_window = _get_gaussian_window(width, height, mode=1) # start with simple gaussian noise noise_rgb = np.random.random_sample((width, height, num_channels)) - noise_grey = (np.sum(noise_rgb, axis=2)/3.) + noise_grey = (np.sum(noise_rgb, axis=2)/3.) noise_rgb *= color_variation # the colorfulness of the starting noise is blended to greyscale with a parameter for c in range(num_channels): noise_rgb[:,:,c] += (1. - color_variation) * noise_grey - + noise_fft = _fft2(noise_rgb) for c in range(num_channels): noise_fft[:,:,c] *= noise_window noise_rgb = np.real(_ifft2(noise_fft)) shaped_noise_fft = _fft2(noise_rgb) shaped_noise_fft[:,:,:] = np.absolute(shaped_noise_fft[:,:,:])**2 * (src_dist ** noise_q) * src_phase # perform the actual shaping - + brightness_variation = 0.#color_variation # todo: temporarily tieing brightness variation to color variation for now contrast_adjusted_np_src = _np_src_image[:] * (brightness_variation + 1.) - brightness_variation * 2. - + # scikit-image is used for histogram matching, very convenient! shaped_noise = np.real(_ifft2(shaped_noise_fft)) shaped_noise -= np.min(shaped_noise) @@ -609,20 +609,20 @@ def get_matched_noise(_np_src_image, np_mask_rgb, noise_q, color_variation): shaped_noise[img_mask,:] = skimage.exposure.match_histograms(shaped_noise[img_mask,:]**1., contrast_adjusted_np_src[ref_mask,:], channel_axis=1) shaped_noise = _np_src_image[:] * (1. - np_mask_rgb) + shaped_noise * np_mask_rgb #_save_debug_img(shaped_noise, "shaped_noise") - + matched_noise = np.zeros((width, height, num_channels)) matched_noise = shaped_noise[:] #matched_noise[all_mask,:] = skimage.exposure.match_histograms(shaped_noise[all_mask,:], _np_src_image[ref_mask,:], channel_axis=1) #matched_noise = _np_src_image[:] * (1. - np_mask_rgb) + matched_noise * np_mask_rgb - + #_save_debug_img(matched_noise, "matched_noise") - + """ todo: color_variation doesnt have to be a single number, the overall color tone of the out-painted area could be param controlled """ - - return np.clip(matched_noise, 0., 1.) + + return np.clip(matched_noise, 0., 1.) # @@ -676,11 +676,11 @@ def find_noise_for_image(model, device, init_image, prompt, steps=200, cond_scal # def folder_picker(label="Select:", value="", help="", folder_button_label="Select", folder_button_help="", folder_button_key=""): - """A folder picker that has a text_input field next to it and a button to select the folder. + """A folder picker that has a text_input field next to it and a button to select the folder. Returns the text_input field with the folder path.""" import tkinter as tk from tkinter import filedialog - import string + import string # Set up tkinter root = tk.Tk() @@ -701,22 +701,22 @@ def folder_picker(label="Select:", value="", help="", folder_button_label="Selec # Folder picker button #st.title('Folder Picker') #st.write('Please select a folder:') - + # Create a label and add a random number of invisible characters # to it so no two buttons inside a form are the same. #folder_button_label = ''.join(random.choice(f"{folder_button_label}") for _ in range(5)) folder_button_label = f"{str(folder_button_label)}{'‎' * random.randint(1, 500)}" clicked = folder_button_key + '‎' * random.randint(5, 500) - + #try: #clicked = folder_picker.button(folder_button_label, help=folder_button_help, key=folder_button_key) #except StreamlitAPIException: clicked = folder_picker.form_submit_button(folder_button_label, help=folder_button_help) if clicked: - dirname = dirname.text_input(label, filedialog.askdirectory(master=root), help=help) + dirname = dirname.text_input(label, filedialog.askdirectory(master=root), help=help) else: - dirname = dirname.text_input(label, value, help=help) + dirname = dirname.text_input(label, value, help=help) return dirname @@ -821,7 +821,7 @@ class LDSR(): model, step = self.load_model_from_config(config, path_ckpt) return model - + def get_custom_cond(mode): dest = "data/example_conditioning" @@ -872,7 +872,7 @@ class LDSR(): return selected_path ''' - + ''' # Google Collab stuff @@ -1035,11 +1035,11 @@ class LDSR(): @torch.no_grad() - + @torch.no_grad() - + def superResolution(self, image, ddimSteps = 100, preDownScale = 1, postDownScale = 1, downsample_method= "Lanczos"): """ #Run settings @@ -1050,7 +1050,7 @@ class LDSR(): # ####Scaling options: # Downsampling to 256px first will often improve the final image and runs faster. - + # You can improve sharpness without upscaling by upscaling and then downsampling to the original size (i.e. Super Resolution) preDownScale: Values ['None', '2', '4'] @@ -1059,10 +1059,10 @@ class LDSR(): # Nearest gives sharper results, but may look more pixellated. Lancoz is much higher quality, but result may be less crisp. downsample_method = 'Lanczos' #@param ['Nearest', 'Lanczos'] """ - + diffMode = 'superresolution' - model = self.load_model_from_config() - + model = self.load_model_from_config() + #Run settings diffusion_steps = int(ddimSteps) #@param [25, 50, 100, 250, 500, 1000] @@ -1071,7 +1071,7 @@ class LDSR(): # ####Scaling options: # Downsampling to 256px first will often improve the final image and runs faster. - + # You can improve sharpness without upscaling by upscaling and then downsampling to the original size (i.e. Super Resolution) pre_downsample = preDownScale #@param ['None', '2', '4'] @@ -1090,9 +1090,9 @@ class LDSR(): width_og, height_og = im_og.size #Downsample Pre - + downsample_rate = preDownScale - + # get system temp directory width_downsampled_pre = width_og//downsample_rate height_downsampled_pre = height_og//downsample_rate @@ -1108,10 +1108,10 @@ class LDSR(): sample = (sample + 1.) / 2. * 255 sample = sample.numpy().astype(np.uint8) sample = np.transpose(sample, (0, 2, 3, 1)) - + a = Image.fromarray(sample[0]) - #Downsample Post + #Downsample Post downsample_rate = postDownScale width, height = a.size @@ -1129,11 +1129,11 @@ class LDSR(): elif post_downsample == 'Original Size': print(f'Downsampling from [{width}, {height}] to Original Size [{width_og}, {height_og}]') a = a.resize((width_og, height_og), aliasing) - + del model gc.collect() torch.cuda.empty_cache() - + print(f'Processing finished!') return a @@ -1183,12 +1183,12 @@ def torch_gc(): #@st.experimental_memo(persist="disk", show_spinner=False, suppress_st_warning=True) def load_GFPGAN(model_name='GFPGANv1.4'): #model_name = 'GFPGANv1.3' - + model_path = os.path.join(st.session_state['defaults'].general.GFPGAN_dir, model_name + '.pth') - + #if not os.path.isfile(model_path): #model_path = os.path.join(st.session_state['defaults'].general.GFPGAN_dir, model_name + '.pth') - + if not os.path.isfile(model_path): raise Exception("GFPGAN model not found at path "+model_path) @@ -1198,19 +1198,19 @@ def load_GFPGAN(model_name='GFPGANv1.4'): if st.session_state['defaults'].general.gfpgan_cpu or st.session_state['defaults'].general.extra_models_cpu: server_state['GFPGAN'] = GFPGANer(model_path=model_path, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None, device=torch.device('cpu')) - + elif st.session_state['defaults'].general.extra_models_gpu: server_state['GFPGAN'] = GFPGANer(model_path=model_path, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None, device=torch.device(f"cuda:{st.session_state['defaults'].general.gfpgan_gpu}")) else: server_state['GFPGAN'] = GFPGANer(model_path=model_path, upscale=1, arch='clean', - channel_multiplier=2, bg_upsampler=None, + channel_multiplier=2, bg_upsampler=None, device=torch.device(f"cuda:{st.session_state['defaults'].general.gpu}")) - - # Add the model_name to model loaded so we can later + + # Add the model_name to model loaded so we can later # check if its the same when we change it on the UI. server_state['GFPGAN'].name = model_name - + return server_state['GFPGAN'] @retry(tries=5) @@ -1222,10 +1222,10 @@ def load_RealESRGAN(model_name: str): } model_path = os.path.join(st.session_state['defaults'].general.RealESRGAN_dir, model_name + '.pth') - + if not os.path.isfile(model_path): model_path = os.path.join(st.session_state['defaults'].general.RealESRGAN_dir, model_name + '.pth') - + if not os.path.exists(model_path): raise Exception(model_name+".pth not found at path "+model_path) @@ -1236,19 +1236,19 @@ def load_RealESRGAN(model_name: str): if st.session_state['defaults'].general.esrgan_cpu or st.session_state['defaults'].general.extra_models_cpu: server_state['RealESRGAN'] = RealESRGANer(scale=2, model_path=model_path, model=RealESRGAN_models[model_name], pre_pad=0, half=False) # cpu does not support half - + server_state['RealESRGAN'].device = torch.device('cpu') server_state['RealESRGAN'].model.to('cpu') - + elif st.session_state['defaults'].general.extra_models_gpu: server_state['RealESRGAN'] = RealESRGANer(scale=2, model_path=model_path, model=RealESRGAN_models[model_name], pre_pad=0, half=not st.session_state['defaults'].general.no_half, device=torch.device(f"cuda:{st.session_state['defaults'].general.esrgan_gpu}")) else: server_state['RealESRGAN'] = RealESRGANer(scale=2, model_path=model_path, model=RealESRGAN_models[model_name], pre_pad=0, half=not st.session_state['defaults'].general.no_half, device=torch.device(f"cuda:{st.session_state['defaults'].general.gpu}")) - - # Add the model_name to model loaded so we can later - # check if its the same when we change it on the UI. + + # Add the model_name to model loaded so we can later + # check if its the same when we change it on the UI. server_state['RealESRGAN'].model.name = model_name return server_state['RealESRGAN'] @@ -1258,10 +1258,10 @@ def load_RealESRGAN(model_name: str): def load_LDSR(model_name="model", config="project", checking=False): #model_name = 'model' #yaml_name = 'project' - + model_path = os.path.join(st.session_state['defaults'].general.LDSR_dir, model_name + '.ckpt') yaml_path = os.path.join(st.session_state['defaults'].general.LDSR_dir, config + '.yaml') - + if not os.path.isfile(model_path): raise Exception("LDSR model not found at path " + model_path) if not os.path.isfile(yaml_path): @@ -1272,9 +1272,9 @@ def load_LDSR(model_name="model", config="project", checking=False): #sys.path.append(os.path.abspath(st.session_state['defaults'].general.LDSR_dir)) #from LDSR import LDSR server_state['LDSR'] = LDSR(model_path, yaml_path) - + server_state['LDSR'].name = model_name - + return server_state['LDSR'] # @@ -1301,10 +1301,10 @@ def load_LDSR(model_name="model", config="project", checking=False): #@retry(tries=5) -def load_sd_model(model_name: str): +def load_sd_model(model_name: str): """Loads Stable Diffusion model by name""" ckpt_path = st.session_state.defaults.general.default_model_path - + if model_name != st.session_state.defaults.general.default_model: ckpt_path = os.path.join("models", "custom", f"{model_name}.ckpt") @@ -1413,7 +1413,7 @@ def generation_callback(img, i=0): raise StopException try: - if i == 0: + if i == 0: if img['i']: i = img['i'] except TypeError: pass @@ -1425,7 +1425,7 @@ def generation_callback(img, i=0): #print (img) #print (type(img)) # The following lines will convert the tensor we got on img to an actual image we can render on the UI. - # It can probably be done in a better way for someone who knows what they're doing. I don't. + # It can probably be done in a better way for someone who knows what they're doing. I don't. #print (img,isinstance(img, torch.Tensor)) if isinstance(img, torch.Tensor): x_samples_ddim = (server_state["model"].to('cuda') if not st.session_state['defaults'].general.optimized else server_state["modelFS"].to('cuda') @@ -1436,20 +1436,20 @@ def generation_callback(img, i=0): x_samples_ddim = (server_state["model"].to('cuda') if not st.session_state['defaults'].general.optimized else server_state["modelFS"].to('cuda') ).decode_first_stage(img["denoised"]).to('cuda') - x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) - + x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) + if x_samples_ddim.ndimension() == 4: pil_images = [transforms.ToPILImage()(x.squeeze_(0)) for x in x_samples_ddim] pil_image = image_grid(pil_images, 1) else: pil_image = transforms.ToPILImage()(x_samples_ddim.squeeze_(0)) - - + + # update image on the UI so we can see the progress - st.session_state["preview_image"].image(pil_image) + st.session_state["preview_image"].image(pil_image) # Show a progress bar so we can keep track of the progress even when the image progress is not been shown, - # Dont worry, it doesnt affect the performance. + # Dont worry, it doesnt affect the performance. if st.session_state["generation_mode"] == "txt2img": percent = int(100 * float(i+1 if i+1 < st.session_state.sampling_steps else st.session_state.sampling_steps)/float(st.session_state.sampling_steps)) st.session_state["progress_bar_text"].text( @@ -1465,7 +1465,7 @@ def generation_callback(img, i=0): percent = int(100 * float(i+1 if i+1 < st.session_state.sampling_steps else st.session_state.sampling_steps)/float(st.session_state.sampling_steps)) st.session_state["progress_bar_text"].text( f"Running step: {i+1 if i+1 < st.session_state.sampling_steps else st.session_state.sampling_steps}/{st.session_state.sampling_steps}" - f"{percent if percent < 100 else 100}%") + f"{percent if percent < 100 else 100}%") st.session_state["progress_bar"].progress(percent if percent < 100 else 100) @@ -1475,10 +1475,10 @@ prompt_parser = re.compile(""" [^:]+ # match one or more non ':' characters ) # end 'prompt' (?: # non-capture group - :+ # match one or more ':' characters + :+ # match one or more ':' characters (?P # capture group for 'weight' -?\\d+(?:\\.\\d+)? # match positive or negative decimal number - )? # end weight capture group, make optional + )? # end weight capture group, make optional \\s* # strip spaces after weight | # OR $ # else, if no ':' then match end of line @@ -1520,22 +1520,22 @@ def slerp(device, t, v0:torch.Tensor, v1:torch.Tensor, DOT_THRESHOLD=0.9995): # @st.experimental_memo(persist="disk", show_spinner=False, suppress_st_warning=True) def optimize_update_preview_frequency(current_chunk_speed, previous_chunk_speed_list, update_preview_frequency, update_preview_frequency_list): - """Find the optimal update_preview_frequency value maximizing + """Find the optimal update_preview_frequency value maximizing performance while minimizing the time between updates.""" from statistics import mean - + previous_chunk_avg_speed = mean(previous_chunk_speed_list) - + previous_chunk_speed_list.append(current_chunk_speed) current_chunk_avg_speed = mean(previous_chunk_speed_list) - + if current_chunk_avg_speed >= previous_chunk_avg_speed: #print(f"{current_chunk_speed} >= {previous_chunk_speed}") update_preview_frequency_list.append(update_preview_frequency + 1) else: #print(f"{current_chunk_speed} <= {previous_chunk_speed}") update_preview_frequency_list.append(update_preview_frequency - 1) - + update_preview_frequency = round(mean(update_preview_frequency_list)) return current_chunk_speed, previous_chunk_speed_list, update_preview_frequency, update_preview_frequency_list @@ -1573,7 +1573,7 @@ def load_learned_embed_in_clip(learned_embeds_path, text_encoder, tokenizer, tok loaded_learned_embeds = torch.load(learned_embeds_path, map_location="cpu") trained_token = list(loaded_learned_embeds.keys())[0] embeds = loaded_learned_embeds[trained_token] - + elif learned_embeds_path.endswith('.bin'): trained_token = list(loaded_learned_embeds.keys())[0] embeds = loaded_learned_embeds[trained_token] @@ -1717,7 +1717,7 @@ def enable_minimal_memory_usage(model): torch.cuda.empty_cache() torch_gc() - + def check_prompt_length(prompt, comments): """this function tests if prompt is too long, and if so, adds a message to comments""" @@ -1744,12 +1744,12 @@ def custom_models_available(): # Allow for custom models to be used instead of the default one, # an example would be Waifu-Diffusion or any other fine tune of stable diffusion server_state["custom_models"]:sorted = [] - + for root, dirs, files in os.walk(os.path.join("models", "custom")): for file in files: - if os.path.splitext(file)[1] == '.ckpt': + if os.path.splitext(file)[1] == '.ckpt': server_state["custom_models"].append(os.path.splitext(file)[0]) - + with server_state_lock["CustomModel_available"]: if len(server_state["custom_models"]) > 0: server_state["CustomModel_available"] = True @@ -1761,21 +1761,32 @@ def custom_models_available(): def GFPGAN_available(): #with server_state_lock["GFPGAN_models"]: # - # Allow for custom models to be used instead of the default one, - # an example would be Waifu-Diffusion or any other fine tune of stable diffusion + st.session_state["GFPGAN_models"]:sorted = [] model = st.session_state["defaults"].model_manager.models.gfpgan + files_available = 0 + for file in model['files']: if "save_location" in model['files'][file]: if os.path.exists(os.path.join(model['files'][file]['save_location'], model['files'][file]['file_name'] )): files_available += 1 + elif os.path.exists(os.path.join(model['save_location'], model['files'][file]['file_name'] )): base_name = os.path.splitext(model['files'][file]['file_name'])[0] if "GFPGANv" in base_name: st.session_state["GFPGAN_models"].append(base_name) files_available += 1 + # we need to show the other models from previous verions that we have on the + # same directory in case we want to see how they perform vs each other. + for root, dirs, files in os.walk(st.session_state['defaults'].general.GFPGAN_dir): + for file in files: + if os.path.splitext(file)[1] == '.pth': + if os.path.splitext(file)[0] not in st.session_state["GFPGAN_models"]: + st.session_state["GFPGAN_models"].append(os.path.splitext(file)[0]) + + if len(st.session_state["GFPGAN_models"]) > 0 and files_available == len(model['files']): st.session_state["GFPGAN_available"] = True else: @@ -1794,7 +1805,7 @@ def RealESRGAN_available(): base_name = os.path.splitext(model['files'][file]['file_name'])[0] st.session_state["RealESRGAN_models"].append(base_name) - if len(st.session_state["RealESRGAN_models"]) > 0: + if len(st.session_state["RealESRGAN_models"]) > 0: st.session_state["RealESRGAN_available"] = True else: st.session_state["RealESRGAN_available"] = False @@ -1821,9 +1832,9 @@ def LDSR_available(): - -def save_sample(image, sample_path_i, filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, + +def save_sample(image, sample_path_i, filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, normalize_prompt_weights, use_GFPGAN, write_info_files, prompt_matrix, init_img, uses_loopback, uses_random_seed_loopback, save_grid, sort_samples, sampler_name, ddim_eta, n_iter, batch_size, i, denoising_strength, resize_mode, save_individual_images, model_name): @@ -1982,7 +1993,7 @@ def oxlamon_matrix(prompt, seed, n_iter, batch_size): texts.append( item.text ) parts.append( f"Seed: {itemseed}\n" + "\n".join(item.parts) ) seeds.append( itemseed ) - itemseed += 1 + itemseed += 1 return seeds, texts, parts @@ -2003,14 +2014,14 @@ def oxlamon_matrix(prompt, seed, n_iter, batch_size): # def process_images( outpath, func_init, func_sample, prompt, seed, sampler_name, save_grid, batch_size, - n_iter, steps, cfg_scale, width, height, prompt_matrix, use_GFPGAN: bool = True, GFPGAN_model: str = 'GFPGANv1.4', + n_iter, steps, cfg_scale, width, height, prompt_matrix, use_GFPGAN: bool = True, GFPGAN_model: str = 'GFPGANv1.4', use_RealESRGAN: bool = False, realesrgan_model_name:str = 'RealESRGAN_x4plus', use_LDSR:bool = False, LDSR_model_name:str = 'model', ddim_eta=0.0, normalize_prompt_weights=True, init_img=None, init_mask=None, mask_blur_strength=3, mask_restore=False, denoising_strength=0.75, noise_mode=0, find_noise_steps=1, resize_mode=None, uses_loopback=False, uses_random_seed_loopback=False, sort_samples=True, write_info_files=True, jpg_sample=False, variant_amount=0.0, variant_seed=None, save_individual_images: bool = True): """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch""" - + torch_gc() # start time after garbage collection (or before?) start_time = time.time() @@ -2020,25 +2031,25 @@ def process_images( mem_mon = MemUsageMonitor('MemMon') mem_mon.start() - + if st.session_state.defaults.general.use_sd_concepts_library: - prompt_tokens = re.findall('<([a-zA-Z0-9-]+)>', prompt) + prompt_tokens = re.findall('<([a-zA-Z0-9-]+)>', prompt) if prompt_tokens: # compviz tokenizer = (server_state["model"] if not st.session_state['defaults'].general.optimized else server_state["modelCS"]).cond_stage_model.tokenizer text_encoder = (server_state["model"] if not st.session_state['defaults'].general.optimized else server_state["modelCS"]).cond_stage_model.transformer - + # diffusers #tokenizer = pipe.tokenizer #text_encoder = pipe.text_encoder - + ext = ('pt', 'bin') - - if len(prompt_tokens) > 1: + + if len(prompt_tokens) > 1: for token_name in prompt_tokens: - embedding_path = os.path.join(st.session_state['defaults'].general.sd_concepts_library_folder, token_name) + embedding_path = os.path.join(st.session_state['defaults'].general.sd_concepts_library_folder, token_name) if os.path.exists(embedding_path): for files in os.listdir(embedding_path): if files.endswith(ext): @@ -2048,11 +2059,11 @@ def process_images( if os.path.exists(embedding_path): for files in os.listdir(embedding_path): if files.endswith(ext): - load_learned_embed_in_clip(f"{os.path.join(embedding_path, files)}", text_encoder, tokenizer, f"<{prompt_tokens[0]}>") - + load_learned_embed_in_clip(f"{os.path.join(embedding_path, files)}", text_encoder, tokenizer, f"<{prompt_tokens[0]}>") + # - - + + os.makedirs(outpath, exist_ok=True) sample_path = os.path.join(outpath, "samples") @@ -2123,9 +2134,9 @@ def process_images( target_seed_randomizer = seed_to_int('') # random seed torch.manual_seed(seed) # this has to be the single starting seed (not per-iteration) base_x = create_random_tensors([opt_C, height // opt_f, width // opt_f], seeds=[seed]) - # we don't want all_seeds to be sequential from starting seed with variants, - # since that makes the same variants each time, - # so we add target_seed_randomizer as a random offset + # we don't want all_seeds to be sequential from starting seed with variants, + # since that makes the same variants each time, + # so we add target_seed_randomizer as a random offset for si in range(len(all_seeds)): all_seeds[si] += target_seed_randomizer @@ -2180,7 +2191,7 @@ def process_images( x = create_random_tensors(shape, seeds=seeds) if variant_amount > 0.0: # we are making variants - # using variant_seed as sneaky toggle, + # using variant_seed as sneaky toggle, # when not None or '' use the variant_seed # otherwise use seeds if variant_seed != None and variant_seed != '': @@ -2231,23 +2242,23 @@ def process_images( original_filename = filename st.session_state["preview_image"].image(image) - + # if use_GFPGAN and server_state["GFPGAN"] is not None and not use_RealESRGAN and not use_LDSR: st.session_state["progress_bar_text"].text("Running GFPGAN on image %d of %d..." % (i+1, len(x_samples_ddim))) - + if server_state["GFPGAN"].name != GFPGAN_model: load_models(use_LDSR=use_LDSR, LDSR_model=LDSR_model_name, use_GFPGAN=use_GFPGAN, use_RealESRGAN=use_RealESRGAN, RealESRGAN_model=realesrgan_model_name) - + torch_gc() cropped_faces, restored_faces, restored_img = server_state["GFPGAN"].enhance(x_sample[:,:,::-1], has_aligned=False, only_center_face=False, paste_back=True) - + gfpgan_sample = restored_img[:,:,::-1] gfpgan_image = Image.fromarray(gfpgan_sample) - + #if st.session_state["GFPGAN_strenght"]: - #gfpgan_sample = Image.blend(image, gfpgan_image, st.session_state["GFPGAN_strenght"]) - + #gfpgan_sample = Image.blend(image, gfpgan_image, st.session_state["GFPGAN_strenght"]) + gfpgan_filename = original_filename + '-gfpgan' save_sample(gfpgan_image, sample_path_i, gfpgan_filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, @@ -2260,7 +2271,7 @@ def process_images( if simple_templating: grid_captions.append( captions[i] + "\ngfpgan" ) - + # elif use_RealESRGAN and server_state["RealESRGAN"] is not None and not use_GFPGAN: st.session_state["progress_bar_text"].text("Running RealESRGAN on image %d of %d..." % (i+1, len(x_samples_ddim))) @@ -2289,7 +2300,7 @@ def process_images( if simple_templating: grid_captions.append( captions[i] + "\nesrgan" ) - + # elif use_LDSR and "LDSR" in server_state and not use_GFPGAN: print ("Running LDSR on image %d of %d..." % (i+1, len(x_samples_ddim))) @@ -2302,9 +2313,9 @@ def process_images( load_models(use_LDSR=use_LDSR, LDSR_model=LDSR_model_name, use_GFPGAN=use_GFPGAN, use_RealESRGAN=use_RealESRGAN, RealESRGAN_model=realesrgan_model_name) result = server_state["LDSR"].superResolution(image, ddimSteps = st.session_state["ldsr_sampling_steps"], - preDownScale = st.session_state["preDownScale"], postDownScale = st.session_state["postDownScale"], + preDownScale = st.session_state["preDownScale"], postDownScale = st.session_state["postDownScale"], downsample_method=st.session_state["downsample_method"]) - + ldsr_filename = original_filename + '-ldsr4x' #ldsr_sample = result[:,:,::-1] #ldsr_image = Image.fromarray(ldsr_sample) @@ -2321,21 +2332,21 @@ def process_images( run_images.append(result) if simple_templating: - grid_captions.append( captions[i] + "\nldsr" ) - + grid_captions.append( captions[i] + "\nldsr" ) + # elif use_LDSR and "LDSR" in server_state and use_GFPGAN and "GFPGAN" in server_state: print ("Running GFPGAN+LDSR on image %d of %d..." % (i+1, len(x_samples_ddim))) st.session_state["progress_bar_text"].text("Running GFPGAN+LDSR on image %d of %d..." % (i+1, len(x_samples_ddim))) - + if server_state["GFPGAN"].name != GFPGAN_model: load_models(use_LDSR=use_LDSR, LDSR_model=LDSR_model_name, use_GFPGAN=use_GFPGAN, use_RealESRGAN=use_RealESRGAN, RealESRGAN_model=realesrgan_model_name) - + torch_gc() cropped_faces, restored_faces, restored_img = server_state["GFPGAN"].enhance(x_sample[:,:,::-1], has_aligned=False, only_center_face=False, paste_back=True) - + gfpgan_sample = restored_img[:,:,::-1] - gfpgan_image = Image.fromarray(gfpgan_sample) + gfpgan_image = Image.fromarray(gfpgan_sample) if server_state["LDSR"].name != LDSR_model_name: #try_loading_RealESRGAN(realesrgan_model_name) @@ -2343,9 +2354,9 @@ def process_images( #LDSR.superResolution(gfpgan_image, ddimSteps=100, preDownScale='None', postDownScale='None', downsample_method="Lanczos") result = server_state["LDSR"].superResolution(gfpgan_image, ddimSteps = st.session_state["ldsr_sampling_steps"], - preDownScale = st.session_state["preDownScale"], postDownScale = st.session_state["postDownScale"], + preDownScale = st.session_state["preDownScale"], postDownScale = st.session_state["postDownScale"], downsample_method=st.session_state["downsample_method"]) - + ldsr_filename = original_filename + '-gfpgan-ldsr2x' #ldsr_sample = result[:,:,::-1] #ldsr_image = Image.fromarray(result) @@ -2362,7 +2373,7 @@ def process_images( run_images.append(result) if simple_templating: - grid_captions.append( captions[i] + "\ngfpgan-ldsr" ) + grid_captions.append( captions[i] + "\ngfpgan-ldsr" ) elif use_RealESRGAN and server_state["RealESRGAN"] is not None and use_GFPGAN and server_state["GFPGAN"] is not None: st.session_state["progress_bar_text"].text("Running GFPGAN+RealESRGAN on image %d of %d..." % (i+1, len(x_samples_ddim))) @@ -2389,9 +2400,9 @@ def process_images( if simple_templating: grid_captions.append( captions[i] + "\ngfpgan_esrgan" ) - + # - + else: output_images.append(image) run_images.append(image) @@ -2419,7 +2430,7 @@ def process_images( image = Image.composite(init_img, image, init_mask) if save_individual_images: - save_sample(image, sample_path_i, filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, + save_sample(image, sample_path_i, filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, normalize_prompt_weights, use_GFPGAN, write_info_files, prompt_matrix, init_img, uses_loopback, uses_random_seed_loopback, save_grid, sort_samples, sampler_name, ddim_eta, n_iter, batch_size, i, denoising_strength, resize_mode, save_individual_images, server_state["loaded_model"])