diff --git a/configs/webui/webui_streamlit.yaml b/configs/webui/webui_streamlit.yaml index 5936c44..33b42a4 100644 --- a/configs/webui/webui_streamlit.yaml +++ b/configs/webui/webui_streamlit.yaml @@ -32,6 +32,7 @@ general: precision: "autocast" optimized: False optimized_turbo: False + optimized_config: "optimizedSD/v1-inference.yaml" update_preview: True update_preview_frequency: 5 diff --git a/scripts/img2img.py b/scripts/img2img.py index 315b3c8..a2c0744 100644 --- a/scripts/img2img.py +++ b/scripts/img2img.py @@ -163,12 +163,12 @@ def img2img(prompt: str = '', init_info: any = None, init_info_mask: any = None, mask = torch.from_numpy(mask).to(st.session_state["device"]) if st.session_state['defaults'].general.optimized: - modelFS.to(st.session_state["device"] ) + st.session_state.modelFS.to(st.session_state["device"] ) init_image = 2. * image - 1. init_image = init_image.to(st.session_state["device"]) - init_latent = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else st.session_state.modelFS).get_first_stage_encoding((st.session_state["model"] if not st.session_state['defaults'].general.optimized else modelFS).encode_first_stage(init_image)) # move to latent space - + init_latent = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else st.session_state.modelFS).get_first_stage_encoding((st.session_state["model"] if not st.session_state['defaults'].general.optimized else modelFS).encode_first_stage(init_image)) # move to latent space + if st.session_state['defaults'].general.optimized: mem = torch.cuda.memory_allocated()/1e6 st.session_state.modelFS.to("cpu") diff --git a/scripts/sd_utils.py b/scripts/sd_utils.py index dc45e09..647e699 100644 --- a/scripts/sd_utils.py +++ b/scripts/sd_utils.py @@ -161,44 +161,35 @@ def load_models(continue_prev_run = False, use_GFPGAN=False, use_RealESRGAN=Fals else: if "RealESRGAN" in st.session_state: - del st.session_state["RealESRGAN"] + del st.session_state["RealESRGAN"] if "model" in st.session_state: if "model" in st.session_state and st.session_state["custom_model"] == custom_model: # TODO: check if the optimized mode was changed? print("Model already loaded") - + + return else: try: del st.session_state.model del st.session_state.modelCS - del st.session_state.modelFS + del st.session_state.modelFS except KeyError: pass - config = OmegaConf.load(st.session_state["defaults"].general.default_model_config) + # At this point the model is either + # is not loaded yet or have been evicted: + # load new model into memory + st.session_state.custom_model = custom_model - if custom_model == st.session_state["defaults"].general.default_model: - model = load_model_from_config(config, st.session_state["defaults"].general.default_model_path) - else: - model = load_model_from_config(config, os.path.join("models","custom", f"{custom_model}.ckpt")) + config, device, model, modelCS, modelFS = load_sd_model(custom_model) - st.session_state["custom_model"] = custom_model - st.session_state["device"] = torch.device(f"cuda:{defaults.general.gpu}") if torch.cuda.is_available() else torch.device("cpu") - st.session_state["model"] = (model if st.session_state["defaults"].general.no_half else model.half()).to(st.session_state["device"] ) - else: - config = OmegaConf.load(st.session_state["defaults"].general.default_model_config) + st.session_state.device = device + st.session_state.model = model + st.session_state.modelCS = modelCS + st.session_state.modelFS = modelFS - if custom_model == st.session_state["defaults"].general.default_model: - model = load_model_from_config(config, st.session_state["defaults"].general.default_model_path) - else: - model = load_model_from_config(config, os.path.join("models","custom", f"{custom_model}.ckpt")) - - st.session_state["custom_model"] = custom_model - st.session_state["device"] = torch.device(f"cuda:{st.session_state['defaults'].general.gpu}") if torch.cuda.is_available() else torch.device("cpu") - st.session_state["model"] = (model if st.session_state['defaults'].general.no_half else model.half()).to(st.session_state["device"] ) - - print("Model loaded.") + print("Model loaded.") def load_model_from_config(config, ckpt, verbose=False): @@ -222,6 +213,7 @@ def load_model_from_config(config, ckpt, verbose=False): model.eval() return model + def load_sd_from_config(ckpt, verbose=False): print(f"Loading model from {ckpt}") pl_sd = torch.load(ckpt, map_location="cpu") @@ -243,21 +235,21 @@ class MemUsageMonitor(threading.Thread): def run(self): try: pynvml.nvmlInit() - print(f"[{self.name}] Recording max memory usage...\n") - handle = pynvml.nvmlDeviceGetHandleByIndex(st.session_state['defaults'].general.gpu) - self.total = pynvml.nvmlDeviceGetMemoryInfo(handle).total - while not self.stop_flag: - m = pynvml.nvmlDeviceGetMemoryInfo(handle) - self.max_usage = max(self.max_usage, m.used) - # print(self.max_usage) - time.sleep(0.1) - print(f"[{self.name}] Stopped recording.\n") - pynvml.nvmlShutdown() - except: print(f"[{self.name}] Unable to initialize NVIDIA management. No memory stats. \n") return - + print(f"[{self.name}] Recording max memory usage...\n") + # Missing context + #handle = pynvml.nvmlDeviceGetHandleByIndex(st.session_state['defaults'].general.gpu) + handle = pynvml.nvmlDeviceGetHandleByIndex(0) + self.total = pynvml.nvmlDeviceGetMemoryInfo(handle).total + while not self.stop_flag: + m = pynvml.nvmlDeviceGetMemoryInfo(handle) + self.max_usage = max(self.max_usage, m.used) + # print(self.max_usage) + time.sleep(0.1) + print(f"[{self.name}] Stopped recording.\n") + pynvml.nvmlShutdown() def read(self): return self.max_usage, self.total @@ -685,18 +677,26 @@ def try_loading_LDSR(model_name: str,checking=False): #try_loading_LDSR('model',checking=True) -def load_SD_model(): - if st.session_state['defaults'].general.optimized: - sd = load_sd_from_config(st.session_state['defaults'].general.default_model_path) + +# Loads Stable Diffusion model by name +def load_sd_model(model_name: str) -> [any, any, any, any, any]: + ckpt_path = st.session_state.defaults.general.default_model_path + if model_name != st.session_state.defaults.general.default_model: + ckpt_path = os.path.join("models", "custom", f"{model_name}.ckpt") + + if st.session_state.defaults.general.optimized: + config = OmegaConf.load(st.session_state.defaults.general.optimized_config) + + sd = load_sd_from_config(ckpt_path) li, lo = [], [] for key, v_ in sd.items(): sp = key.split('.') - if(sp[0]) == 'model': - if('input_blocks' in sp): + if (sp[0]) == 'model': + if 'input_blocks' in sp: li.append(key) - elif('middle_block' in sp): + elif 'middle_block' in sp: li.append(key) - elif('time_embed' in sp): + elif 'time_embed' in sp: li.append(key) else: lo.append(key) @@ -705,14 +705,14 @@ def load_SD_model(): for key in lo: sd['model2.' + key[6:]] = sd.pop(key) - config = OmegaConf.load("optimizedSD/v1-inference.yaml") - device = torch.device(f"cuda:{opt.gpu}") if torch.cuda.is_available() else torch.device("cpu") + device = torch.device(f"cuda:{st.session_state.defaults.general.gpu}") \ + if torch.cuda.is_available() else torch.device("cpu") model = instantiate_from_config(config.modelUNet) _, _ = model.load_state_dict(sd, strict=False) model.cuda() model.eval() - model.turbo = st.session_state['defaults'].general.optimized_turbo + model.turbo = st.session_state.defaults.general.optimized_turbo modelCS = instantiate_from_config(config.modelCondStage) _, _ = modelCS.load_state_dict(sd, strict=False) @@ -725,22 +725,25 @@ def load_SD_model(): del sd - if not st.session_state['defaults'].general.no_half: + if not st.session_state.defaults.general.no_half: model = model.half() modelCS = modelCS.half() modelFS = modelFS.half() - return model,modelCS,modelFS,device, config + + return config, device, model, modelCS, modelFS else: - config = OmegaConf.load(st.session_state['defaults'].general.default_model_config) - model = load_model_from_config(config, st.session_state['defaults'].general.default_model_path) + config = OmegaConf.load(st.session_state.defaults.general.default_model_config) + model = load_model_from_config(config, ckpt_path) - device = torch.device(f"cuda:{opt.gpu}") if torch.cuda.is_available() else torch.device("cpu") - model = (model if st.session_state['defaults'].general.no_half else model.half()).to(device) - return model, device,config + device = torch.device(f"cuda:{st.session_state.defaults.general.gpu}") \ + if torch.cuda.is_available() else torch.device("cpu") + model = (model if st.session_state.defaults.general.no_half + else model.half()).to(device) -# + return config, device, model, None, None -# + +# @codedealer: No usages def ModelLoader(models,load=False,unload=False,imgproc_realesrgan_model_name='RealESRGAN_x4plus'): #get global variables global_vars = globals() @@ -754,8 +757,8 @@ def ModelLoader(models,load=False,unload=False,imgproc_realesrgan_model_name='Re if m == 'model': del global_vars[m+'FS'] del global_vars[m+'CS'] - if m =='model': - m='Stable Diffusion' + if m == 'model': + m = 'Stable Diffusion' print('Unloaded ' + m) if load: for m in models: @@ -796,11 +799,11 @@ def generation_callback(img, i=0): # It can probably be done in a better way for someone who knows what they're doing. I don't. #print (img,isinstance(img, torch.Tensor)) if isinstance(img, torch.Tensor): - x_samples_ddim = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else modelFS).decode_first_stage(img) + x_samples_ddim = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else st.session_state.modelFS).decode_first_stage(img) else: # When using the k Diffusion samplers they return a dict instead of a tensor that look like this: # {'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised} - x_samples_ddim = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else modelFS).decode_first_stage(img["denoised"]) + x_samples_ddim = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else st.session_state.modelFS).decode_first_stage(img["denoised"]) x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) @@ -1034,10 +1037,10 @@ def draw_prompt_matrix(im, width, height, all_prompts): def check_prompt_length(prompt, comments): """this function tests if prompt is too long, and if so, adds a message to comments""" - tokenizer = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else modelCS).cond_stage_model.tokenizer - max_length = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else modelCS).cond_stage_model.max_length + tokenizer = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else st.session_state.modelCS).cond_stage_model.tokenizer + max_length = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else st.session_state.modelCS).cond_stage_model.max_length - info = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else modelCS).cond_stage_model.tokenizer([prompt], truncation=True, max_length=max_length, + info = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else st.session_state.modelCS).cond_stage_model.tokenizer([prompt], truncation=True, max_length=max_length, return_overflowing_tokens=True, padding="max_length", return_tensors="pt") ovf = info['overflowing_tokens'][0] overflowing_count = ovf.shape[0] @@ -1331,9 +1334,9 @@ def process_images( print(prompt) if st.session_state['defaults'].general.optimized: - modelCS.to(st.session_state['defaults'].general.gpu) + st.session_state.modelCS.to(st.session_state['defaults'].general.gpu) - uc = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else modelCS).get_learned_conditioning(len(prompts) * [""]) + uc = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else st.session_state.modelCS).get_learned_conditioning(len(prompts) * [""]) if isinstance(prompts, tuple): prompts = list(prompts) @@ -1347,16 +1350,16 @@ def process_images( c = torch.zeros_like(uc) # i dont know if this is correct.. but it works for i in range(0, len(weighted_subprompts)): # note if alpha negative, it functions same as torch.sub - c = torch.add(c, (st.session_state["model"] if not st.session_state['defaults'].general.optimized else modelCS).get_learned_conditioning(weighted_subprompts[i][0]), alpha=weighted_subprompts[i][1]) + c = torch.add(c, (st.session_state["model"] if not st.session_state['defaults'].general.optimized else st.session_state.modelCS).get_learned_conditioning(weighted_subprompts[i][0]), alpha=weighted_subprompts[i][1]) else: # just behave like usual - c = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else modelCS).get_learned_conditioning(prompts) + c = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else st.session_state.modelCS).get_learned_conditioning(prompts) shape = [opt_C, height // opt_f, width // opt_f] if st.session_state['defaults'].general.optimized: mem = torch.cuda.memory_allocated()/1e6 - modelCS.to("cpu") + st.session_state.modelCS.to("cpu") while(torch.cuda.memory_allocated()/1e6 >= mem): time.sleep(1) @@ -1385,9 +1388,9 @@ def process_images( samples_ddim = func_sample(init_data=init_data, x=x, conditioning=c, unconditional_conditioning=uc, sampler_name=sampler_name) if st.session_state['defaults'].general.optimized: - modelFS.to(st.session_state['defaults'].general.gpu) + st.session_state.modelFS.to(st.session_state['defaults'].general.gpu) - x_samples_ddim = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else modelFS).decode_first_stage(samples_ddim) + x_samples_ddim = (st.session_state["model"] if not st.session_state['defaults'].general.optimized else st.session_state.modelFS).decode_first_stage(samples_ddim) x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) for i, x_sample in enumerate(x_samples_ddim): @@ -1521,7 +1524,7 @@ def process_images( if st.session_state['defaults'].general.optimized: mem = torch.cuda.memory_allocated()/1e6 - modelFS.to("cpu") + st.session_state.modelFS.to("cpu") while(torch.cuda.memory_allocated()/1e6 >= mem): time.sleep(1) diff --git a/scripts/txt2img.py b/scripts/txt2img.py index ab3e8e8..e78c32c 100644 --- a/scripts/txt2img.py +++ b/scripts/txt2img.py @@ -162,10 +162,20 @@ def layout(): cfg_scale = st.slider("CFG (Classifier Free Guidance Scale):", min_value=1.0, max_value=30.0, value=st.session_state['defaults'].txt2img.cfg_scale, step=0.5, help="How strongly the image should follow the prompt.") seed = st.text_input("Seed:", value=st.session_state['defaults'].txt2img.seed, help=" The seed to use, if left blank a random seed will be generated.") batch_count = st.slider("Batch count.", min_value=1, max_value=100, value=st.session_state['defaults'].txt2img.batch_count, step=1, help="How many iterations or batches of images to generate in total.") - #batch_size = st.slider("Batch size", min_value=1, max_value=250, value=defaults.txt2img.batch_size, step=1, - #help="How many images are at once in a batch.\ - #It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\ - #Default: 1") + + bs_slider_max_value = 5 + if st.session_state.defaults.general.optimized: + bs_slider_max_value = 100 + + batch_size = st.slider( + "Batch size", + min_value=1, + max_value=bs_slider_max_value, + value=st.session_state.defaults.txt2img.batch_size, + step=1, + help="How many images are at once in a batch.\ + It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\ + Default: 1") with st.expander("Preview Settings"): st.session_state["update_preview"] = st.checkbox("Update Image Preview", value=st.session_state['defaults'].txt2img.update_preview, @@ -244,9 +254,9 @@ def layout(): load_models(False, use_GFPGAN, use_RealESRGAN, RealESRGAN_model) try: - output_images, seeds, info, stats = txt2img(prompt, st.session_state.sampling_steps, sampler_name, RealESRGAN_model, batch_count, 1, + output_images, seeds, info, stats = txt2img(prompt, st.session_state.sampling_steps, sampler_name, RealESRGAN_model, batch_count, batch_size, cfg_scale, seed, height, width, separate_prompts, normalize_prompt_weights, save_individual_images, - save_grid, group_by_prompt, save_as_jpg, use_GFPGAN, use_RealESRGAN, RealESRGAN_model, fp=st.session_state["defaults"].general.fp, + save_grid, group_by_prompt, save_as_jpg, use_GFPGAN, use_RealESRGAN, RealESRGAN_model, fp=st.session_state.defaults.general.fp, variant_amount=variant_amount, variant_seed=variant_seed, write_info_files=write_info_files) message.success('Render Complete: ' + info + '; Stats: ' + stats, icon="✅") diff --git a/update_to_latest.cmd b/update_to_latest.cmd new file mode 100644 index 0000000..cd14956 --- /dev/null +++ b/update_to_latest.cmd @@ -0,0 +1,57 @@ +@echo off +cd %~dp0 + +:: Duplicate code to find miniconda + +IF EXIST custom-conda-path.txt ( + FOR /F %%i IN (custom-conda-path.txt) DO set v_custom_path=%%i +) + +set v_paths=%ProgramData%\miniconda3 +set v_paths=%v_paths%;%USERPROFILE%\miniconda3 +set v_paths=%v_paths%;%ProgramData%\anaconda3 +set v_paths=%v_paths%;%USERPROFILE%\anaconda3 + +for %%a in (%v_paths%) do ( + IF NOT "%v_custom_path%"=="" ( + set v_paths=%v_custom_path%;%v_paths% + ) +) + +for %%a in (%v_paths%) do ( + if EXIST "%%a\Scripts\activate.bat" ( + SET v_conda_path=%%a + echo anaconda3/miniconda3 detected in %%a + ) +) + +IF "%v_conda_path%"=="" ( + echo anaconda3/miniconda3 not found. Install from here https://docs.conda.io/en/latest/miniconda.html + exit /b 1 +) + +:: Update + +echo Stashing local changes and pulling latest update... +call git stash +call git pull +echo If you want to restore changes you made before updating, run "git stash pop". +call "%v_conda_path%\Scripts\activate.bat" + +for /f "delims=" %%a in ('git log -1 --format^="%%H" -- environment.yaml') DO set v_cur_hash=%%a +set /p "v_last_hash="<"z_version_env.tmp" +echo %v_cur_hash%>z_version_env.tmp + +echo Current environment.yaml hash: %v_cur_hash% +echo Previous environment.yaml hash: %v_last_hash% + +if "%v_last_hash%" == "%v_cur_hash%" ( + echo environment.yaml unchanged. dependencies should be up to date. + echo if you still have unresolved dependencies, delete "z_version_env.tmp" +) else ( + echo environment.yaml changed. updating dependencies + call conda env create --name "%v_conda_env_name%" -f environment.yaml + call conda env update --name "%v_conda_env_name%" -f environment.yaml +) + +::cmd /k diff --git a/webui-streamlit.cmd b/webui-streamlit.cmd index 8d59ba0..225aa92 100644 --- a/webui-streamlit.cmd +++ b/webui-streamlit.cmd @@ -23,17 +23,17 @@ set v_paths=%v_paths%;%ProgramData%\anaconda3 set v_paths=%v_paths%;%USERPROFILE%\anaconda3 for %%a in (%v_paths%) do ( - IF NOT "%v_custom_path%"=="" ( - set v_paths=%v_custom_path%;%v_paths% - ) + IF NOT "%v_custom_path%"=="" ( + set v_paths=%v_custom_path%;%v_paths% + ) ) for %%a in (%v_paths%) do ( - if EXIST "%%a\Scripts\activate.bat" ( + if EXIST "%%a\Scripts\activate.bat" ( SET v_conda_path=%%a echo anaconda3/miniconda3 detected in %%a goto :CONDA_FOUND - ) + ) ) IF "%v_conda_path%"=="" ( @@ -42,24 +42,10 @@ IF "%v_conda_path%"=="" ( ) :CONDA_FOUND -echo Stashing local changes and pulling latest update... -call git stash -call git pull -call "%v_conda_path%\Scripts\activate.bat" -for /f "delims=" %%a in ('git log -1 --format^="%%H" -- environment.yaml') DO set v_cur_hash=%%a -set /p "v_last_hash="<"z_version_env.tmp" -echo %v_cur_hash%>z_version_env.tmp - -echo Current environment.yaml hash: %v_cur_hash% -echo Previous environment.yaml hash: %v_last_hash% - -if "%v_last_hash%" == "%v_cur_hash%" ( - echo environment.yaml version doesn't change -) else ( - echo environment.yaml changed, updating dependencies - call conda env create --name "%v_conda_env_name%" -f environment.yaml - call conda env update --name "%v_conda_env_name%" -f environment.yaml +if not exist "z_version_env.tmp" ( + :: first time running, we need to update + call "update_to_latest.cmd" ) call "%v_conda_path%\Scripts\activate.bat" "%v_conda_env_name%" diff --git a/webui.cmd b/webui.cmd index 4d013c9..2b87ec0 100644 --- a/webui.cmd +++ b/webui.cmd @@ -23,17 +23,17 @@ set v_paths=%v_paths%;%ProgramData%\anaconda3 set v_paths=%v_paths%;%USERPROFILE%\anaconda3 for %%a in (%v_paths%) do ( - IF NOT "%v_custom_path%"=="" ( - set v_paths=%v_custom_path%;%v_paths% - ) + IF NOT "%v_custom_path%"=="" ( + set v_paths=%v_custom_path%;%v_paths% + ) ) for %%a in (%v_paths%) do ( - if EXIST "%%a\Scripts\activate.bat" ( + if EXIST "%%a\Scripts\activate.bat" ( SET v_conda_path=%%a echo anaconda3/miniconda3 detected in %%a goto :CONDA_FOUND - ) + ) ) IF "%v_conda_path%"=="" ( @@ -42,24 +42,10 @@ IF "%v_conda_path%"=="" ( ) :CONDA_FOUND -echo Stashing local changes and pulling latest update... -call git stash -call git pull -call "%v_conda_path%\Scripts\activate.bat" -for /f "delims=" %%a in ('git log -1 --format^="%%H" -- environment.yaml') DO set v_cur_hash=%%a -set /p "v_last_hash="<"z_version_env.tmp" -echo %v_cur_hash%>z_version_env.tmp - -echo Current environment.yaml hash: %v_cur_hash% -echo Previous environment.yaml hash: %v_last_hash% - -if "%v_last_hash%" == "%v_cur_hash%" ( - echo environment.yaml version doesn't change -) else ( - echo environment.yaml changed, updating dependencies - call conda env create --name "%v_conda_env_name%" -f environment.yaml - call conda env update --name "%v_conda_env_name%" -f environment.yaml +if not exist "z_version_env.tmp" ( + :: first time running, we need to update + call "update_to_latest.cmd" ) call "%v_conda_path%\Scripts\activate.bat" "%v_conda_env_name%"