From 75336dfc84cae280036bc52a6805eb10d9ae30ba Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Fri, 4 Aug 2023 13:38:52 +0800
Subject: [PATCH 01/11] add TAESD for i2i and t2i

---
 modules/processing.py         | 13 ++++-----
 modules/sd_samplers_common.py | 38 +++++++++++++++++++++----
 modules/sd_vae_approx.py      |  2 +-
 modules/sd_vae_taesd.py       | 52 +++++++++++++++++++++++++++++------
 modules/shared.py             |  2 ++
 5 files changed, 86 insertions(+), 21 deletions(-)

diff --git a/modules/processing.py b/modules/processing.py
index 8f34c8b4..099d86b7 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -573,9 +573,10 @@ def decode_latent_batch(model, batch, target_device=None, check_for_nans=False):
 
 
 def decode_first_stage(model, x):
-    x = model.decode_first_stage(x.to(devices.dtype_vae))
-
-    return x
+    from modules.sd_samplers_common import samples_to_images_tensor, approximation_indexes
+    x = x.to(devices.dtype_vae)
+    approx_index = approximation_indexes.get(opts.sd_vae_decode_method, 0)
+    return samples_to_images_tensor(x, approx_index, model)
 
 
 def get_fixed_seed(seed):
@@ -1344,10 +1345,8 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less")
 
         image = torch.from_numpy(batch_images)
-        image = 2. * image - 1.
-        image = image.to(shared.device, dtype=devices.dtype_vae)
-
-        self.init_latent = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image))
+        from modules.sd_samplers_common import images_tensor_to_samples, approximation_indexes
+        self.init_latent = images_tensor_to_samples(image, approximation_indexes.get(opts.sd_vae_encode_method), self.sd_model)
         devices.torch_gc()
 
         if self.resize_mode == 3:
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index 5deda761..5a45e8eb 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -23,19 +23,29 @@ def setup_img2img_steps(p, steps=None):
 approximation_indexes = {"Full": 0, "Approx NN": 1, "Approx cheap": 2, "TAESD": 3}
 
 
-def single_sample_to_image(sample, approximation=None):
+def samples_to_images_tensor(sample, approximation=None, model=None):
+    '''latents -> images [-1, 1]'''
     if approximation is None:
         approximation = approximation_indexes.get(opts.show_progress_type, 0)
 
     if approximation == 2:
-        x_sample = sd_vae_approx.cheap_approximation(sample) * 0.5 + 0.5
+        x_sample = sd_vae_approx.cheap_approximation(sample)
     elif approximation == 1:
-        x_sample = sd_vae_approx.model()(sample.to(devices.device, devices.dtype).unsqueeze(0))[0].detach() * 0.5 + 0.5
+        x_sample = sd_vae_approx.model()(sample.to(devices.device, devices.dtype)).detach()
     elif approximation == 3:
         x_sample = sample * 1.5
-        x_sample = sd_vae_taesd.model()(x_sample.to(devices.device, devices.dtype).unsqueeze(0))[0].detach()
+        x_sample = sd_vae_taesd.decoder_model()(x_sample.to(devices.device, devices.dtype)).detach()
+        x_sample = x_sample * 2 - 1
     else:
-        x_sample = processing.decode_first_stage(shared.sd_model, sample.unsqueeze(0))[0] * 0.5 + 0.5
+        if model is None:
+            model = shared.sd_model
+        x_sample = model.decode_first_stage(sample)
+    
+    return x_sample
+
+
+def single_sample_to_image(sample, approximation=None):
+    x_sample = samples_to_images_tensor(sample.unsqueeze(0), approximation)[0] * 0.5 + 0.5
 
     x_sample = torch.clamp(x_sample, min=0.0, max=1.0)
     x_sample = 255. * np.moveaxis(x_sample.cpu().numpy(), 0, 2)
@@ -52,6 +62,24 @@ def samples_to_image_grid(samples, approximation=None):
     return images.image_grid([single_sample_to_image(sample, approximation) for sample in samples])
 
 
+def images_tensor_to_samples(image, approximation=None, model=None):
+    '''image[0, 1] -> latent'''
+    if approximation is None:
+        approximation = approximation_indexes.get(opts.sd_vae_encode_method, 0)
+
+    if approximation == 3:
+        image = image.to(devices.device, devices.dtype)
+        x_latent = sd_vae_taesd.encoder_model()(image) / 1.5
+    else:
+        if model is None:
+            model = shared.sd_model
+        image = image.to(shared.device, dtype=devices.dtype_vae)
+        image = image * 2 - 1
+        x_latent = model.get_first_stage_encoding(model.encode_first_stage(image))
+
+    return x_latent
+
+
 def store_latent(decoded):
     state.current_latent = decoded
 
diff --git a/modules/sd_vae_approx.py b/modules/sd_vae_approx.py
index 86bd658a..3965e223 100644
--- a/modules/sd_vae_approx.py
+++ b/modules/sd_vae_approx.py
@@ -81,6 +81,6 @@ def cheap_approximation(sample):
 
     coefs = torch.tensor(coeffs).to(sample.device)
 
-    x_sample = torch.einsum("lxy,lr -> rxy", sample, coefs)
+    x_sample = torch.einsum("...lxy,lr -> ...rxy", sample, coefs)
 
     return x_sample
diff --git a/modules/sd_vae_taesd.py b/modules/sd_vae_taesd.py
index 5bf7c76e..808eb362 100644
--- a/modules/sd_vae_taesd.py
+++ b/modules/sd_vae_taesd.py
@@ -44,7 +44,17 @@ def decoder():
     )
 
 
-class TAESD(nn.Module):
+def encoder():
+    return nn.Sequential(
+        conv(3, 64), Block(64, 64),
+        conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
+        conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
+        conv(64, 64, stride=2, bias=False), Block(64, 64), Block(64, 64), Block(64, 64),
+        conv(64, 4),
+    )
+
+
+class TAESDDecoder(nn.Module):
     latent_magnitude = 3
     latent_shift = 0.5
 
@@ -55,21 +65,28 @@ class TAESD(nn.Module):
         self.decoder.load_state_dict(
             torch.load(decoder_path, map_location='cpu' if devices.device.type != 'cuda' else None))
 
-    @staticmethod
-    def unscale_latents(x):
-        """[0, 1] -> raw latents"""
-        return x.sub(TAESD.latent_shift).mul(2 * TAESD.latent_magnitude)
+
+class TAESDEncoder(nn.Module):
+    latent_magnitude = 3
+    latent_shift = 0.5
+
+    def __init__(self, encoder_path="taesd_encoder.pth"):
+        """Initialize pretrained TAESD on the given device from the given checkpoints."""
+        super().__init__()
+        self.encoder = encoder()
+        self.encoder.load_state_dict(
+            torch.load(encoder_path, map_location='cpu' if devices.device.type != 'cuda' else None))
 
 
 def download_model(model_path, model_url):
     if not os.path.exists(model_path):
         os.makedirs(os.path.dirname(model_path), exist_ok=True)
 
-        print(f'Downloading TAESD decoder to: {model_path}')
+        print(f'Downloading TAESD model to: {model_path}')
         torch.hub.download_url_to_file(model_url, model_path)
 
 
-def model():
+def decoder_model():
     model_name = "taesdxl_decoder.pth" if getattr(shared.sd_model, 'is_sdxl', False) else "taesd_decoder.pth"
     loaded_model = sd_vae_taesd_models.get(model_name)
 
@@ -78,7 +95,7 @@ def model():
         download_model(model_path, 'https://github.com/madebyollin/taesd/raw/main/' + model_name)
 
         if os.path.exists(model_path):
-            loaded_model = TAESD(model_path)
+            loaded_model = TAESDDecoder(model_path)
             loaded_model.eval()
             loaded_model.to(devices.device, devices.dtype)
             sd_vae_taesd_models[model_name] = loaded_model
@@ -86,3 +103,22 @@ def model():
             raise FileNotFoundError('TAESD model not found')
 
     return loaded_model.decoder
+
+
+def encoder_model():
+    model_name = "taesdxl_encoder.pth" if getattr(shared.sd_model, 'is_sdxl', False) else "taesd_encoder.pth"
+    loaded_model = sd_vae_taesd_models.get(model_name)
+
+    if loaded_model is None:
+        model_path = os.path.join(paths_internal.models_path, "VAE-taesd", model_name)
+        download_model(model_path, 'https://github.com/madebyollin/taesd/raw/main/' + model_name)
+
+        if os.path.exists(model_path):
+            loaded_model = TAESDEncoder(model_path)
+            loaded_model.eval()
+            loaded_model.to(devices.device, devices.dtype)
+            sd_vae_taesd_models[model_name] = loaded_model
+        else:
+            raise FileNotFoundError('TAESD model not found')
+
+    return loaded_model.encoder
diff --git a/modules/shared.py b/modules/shared.py
index cec030f7..61ba9347 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -430,6 +430,8 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
     "upcast_attn": OptionInfo(False, "Upcast cross attention layer to float32"),
     "auto_vae_precision": OptionInfo(True, "Automaticlly revert VAE to 32-bit floats").info("triggers when a tensor with NaNs is produced in VAE; disabling the option in this case will result in a black square image"),
     "randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU", "NV"]}).info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors; use NV to produce same picture as on NVidia videocards"),
+    "sd_vae_encode_method": OptionInfo("Full", "VAE type for encode", gr.Radio, {"choices": ["Full", "TAESD"]}).info("method to encode image to latent (use in img2img or inpaint mask)"),
+    "sd_vae_decode_method": OptionInfo("Full", "VAE type for decode", gr.Radio, {"choices": ["Full", "TAESD"]}).info("method to decode latent to image"),
 }))
 
 options_templates.update(options_section(('sdxl', "Stable Diffusion XL"), {

From c134a480164bef017cd4b33fae57a31a86556beb Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Fri, 4 Aug 2023 13:40:20 +0800
Subject: [PATCH 02/11] Fix code style

---
 modules/sd_samplers_common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index 5a45e8eb..d444cac1 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -2,7 +2,7 @@ from collections import namedtuple
 import numpy as np
 import torch
 from PIL import Image
-from modules import devices, processing, images, sd_vae_approx, sd_samplers, sd_vae_taesd, shared
+from modules import devices, images, sd_vae_approx, sd_samplers, sd_vae_taesd, shared
 from modules.shared import opts, state
 
 SamplerData = namedtuple('SamplerData', ['name', 'constructor', 'aliases', 'options'])
@@ -40,7 +40,7 @@ def samples_to_images_tensor(sample, approximation=None, model=None):
         if model is None:
             model = shared.sd_model
         x_sample = model.decode_first_stage(sample)
-    
+
     return x_sample
 
 

From 1f6bfdea80f58f292aeebb9a001689a118d71c01 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Fri, 4 Aug 2023 14:38:52 +0800
Subject: [PATCH 03/11] move the modified decode into smapler_common

---
 modules/sd_samplers_common.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index 2cfa4ac6..7269514f 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -55,9 +55,9 @@ def single_sample_to_image(sample, approximation=None):
 
 
 def decode_first_stage(model, x):
-    x = model.decode_first_stage(x.to(devices.dtype_vae))
-
-    return x
+    x = x.to(devices.dtype_vae)
+    approx_index = approximation_indexes.get(opts.sd_vae_decode_method, 0)
+    return samples_to_images_tensor(x, approx_index, model)
 
 
 def sample_to_image(samples, index=0, approximation=None):

From 094c416a801b16c7d8e1944e2e9fae2c9e98bf12 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Fri, 4 Aug 2023 17:53:16 +0800
Subject: [PATCH 04/11] change all encode

---
 modules/processing.py         | 14 ++++++--------
 modules/sd_samplers_common.py |  2 +-
 run.ps1                       |  1 +
 run_local.ps1                 |  3 +++
 update.ps1                    |  1 +
 5 files changed, 12 insertions(+), 9 deletions(-)
 create mode 100644 run.ps1
 create mode 100644 run_local.ps1
 create mode 100644 update.ps1

diff --git a/modules/processing.py b/modules/processing.py
index aae39866..544667a4 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -16,6 +16,7 @@ from typing import Any, Dict, List
 import modules.sd_hijack
 from modules import devices, prompt_parser, masking, sd_samplers, lowvram, generation_parameters_copypaste, extra_networks, sd_vae_approx, scripts, sd_samplers_common, sd_unet, errors
 from modules.sd_hijack import model_hijack
+from modules.sd_samplers_common import images_tensor_to_samples, decode_first_stage, approximation_indexes
 from modules.shared import opts, cmd_opts, state
 import modules.shared as shared
 import modules.paths as paths
@@ -30,7 +31,6 @@ from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion
 from einops import repeat, rearrange
 from blendmodes.blend import blendLayers, BlendType
 
-decode_first_stage = sd_samplers_common.decode_first_stage
 
 # some of those options should not be changed at all because they would break the model, so I removed them from options.
 opt_C = 4
@@ -84,7 +84,7 @@ def txt2img_image_conditioning(sd_model, x, width, height):
 
         # The "masked-image" in this case will just be all zeros since the entire image is masked.
         image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
-        image_conditioning = sd_model.get_first_stage_encoding(sd_model.encode_first_stage(image_conditioning))
+        image_conditioning = images_tensor_to_samples(image_conditioning, approximation_indexes.get(opts.sd_vae_encode_method))
 
         # Add the fake full 1s mask to the first dimension.
         image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
@@ -203,7 +203,7 @@ class StableDiffusionProcessing:
         midas_in = torch.from_numpy(transformed["midas_in"][None, ...]).to(device=shared.device)
         midas_in = repeat(midas_in, "1 ... -> n ...", n=self.batch_size)
 
-        conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(source_image))
+        conditioning_image = images_tensor_to_samples(source_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method))
         conditioning = torch.nn.functional.interpolate(
             self.sd_model.depth_model(midas_in),
             size=conditioning_image.shape[2:],
@@ -216,7 +216,7 @@ class StableDiffusionProcessing:
         return conditioning
 
     def edit_image_conditioning(self, source_image):
-        conditioning_image = self.sd_model.encode_first_stage(source_image).mode()
+        conditioning_image = images_tensor_to_samples(source_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method))
 
         return conditioning_image
 
@@ -255,7 +255,7 @@ class StableDiffusionProcessing:
         )
 
         # Encode the new masked image using first stage of network.
-        conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image))
+        conditioning_image = images_tensor_to_samples(conditioning_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method))
 
         # Create the concatenated conditioning tensor to be fed to `c_concat`
         conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=latent_image.shape[-2:])
@@ -1099,9 +1099,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
             decoded_samples = torch.from_numpy(np.array(batch_images))
             decoded_samples = decoded_samples.to(shared.device)
-            decoded_samples = 2. * decoded_samples - 1.
 
-            samples = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(decoded_samples))
+            samples = images_tensor_to_samples(decoded_samples, approximation_indexes.get(opts.sd_vae_encode_method))
 
             image_conditioning = self.img2img_image_conditioning(decoded_samples, samples)
 
@@ -1339,7 +1338,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less")
 
         image = torch.from_numpy(batch_images)
-        from modules.sd_samplers_common import images_tensor_to_samples, approximation_indexes
         self.init_latent = images_tensor_to_samples(image, approximation_indexes.get(opts.sd_vae_encode_method), self.sd_model)
         devices.torch_gc()
 
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index 7269514f..42a29fc9 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -75,7 +75,7 @@ def images_tensor_to_samples(image, approximation=None, model=None):
 
     if approximation == 3:
         image = image.to(devices.device, devices.dtype)
-        x_latent = sd_vae_taesd.encoder_model()(image) / 1.5
+        x_latent = sd_vae_taesd.encoder_model()(image)
     else:
         if model is None:
             model = shared.sd_model
diff --git a/run.ps1 b/run.ps1
new file mode 100644
index 00000000..82c1660b
--- /dev/null
+++ b/run.ps1
@@ -0,0 +1 @@
+.\venv\Scripts\accelerate-launch.exe --num_cpu_threads_per_process=6 --api .\launch.py --listen --port 17415 --xformers --opt-channelslast
\ No newline at end of file
diff --git a/run_local.ps1 b/run_local.ps1
new file mode 100644
index 00000000..e2ac43db
--- /dev/null
+++ b/run_local.ps1
@@ -0,0 +1,3 @@
+.\venv\Scripts\Activate.ps1
+python .\launch.py --xformers --opt-channelslast --api
+. $PSCommandPath
\ No newline at end of file
diff --git a/update.ps1 b/update.ps1
new file mode 100644
index 00000000..9960bead
--- /dev/null
+++ b/update.ps1
@@ -0,0 +1 @@
+git stash push && git pull --rebase && git stash pop
\ No newline at end of file

From 6346d8eeaa17ba0f7e41618908519f6e9bfe07e0 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Fri, 4 Aug 2023 17:53:30 +0800
Subject: [PATCH 05/11] Revert "change all encode"

This reverts commit 094c416a801b16c7d8e1944e2e9fae2c9e98bf12.
---
 modules/processing.py         | 14 ++++++++------
 modules/sd_samplers_common.py |  2 +-
 run.ps1                       |  1 -
 run_local.ps1                 |  3 ---
 update.ps1                    |  1 -
 5 files changed, 9 insertions(+), 12 deletions(-)
 delete mode 100644 run.ps1
 delete mode 100644 run_local.ps1
 delete mode 100644 update.ps1

diff --git a/modules/processing.py b/modules/processing.py
index 544667a4..aae39866 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -16,7 +16,6 @@ from typing import Any, Dict, List
 import modules.sd_hijack
 from modules import devices, prompt_parser, masking, sd_samplers, lowvram, generation_parameters_copypaste, extra_networks, sd_vae_approx, scripts, sd_samplers_common, sd_unet, errors
 from modules.sd_hijack import model_hijack
-from modules.sd_samplers_common import images_tensor_to_samples, decode_first_stage, approximation_indexes
 from modules.shared import opts, cmd_opts, state
 import modules.shared as shared
 import modules.paths as paths
@@ -31,6 +30,7 @@ from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion
 from einops import repeat, rearrange
 from blendmodes.blend import blendLayers, BlendType
 
+decode_first_stage = sd_samplers_common.decode_first_stage
 
 # some of those options should not be changed at all because they would break the model, so I removed them from options.
 opt_C = 4
@@ -84,7 +84,7 @@ def txt2img_image_conditioning(sd_model, x, width, height):
 
         # The "masked-image" in this case will just be all zeros since the entire image is masked.
         image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
-        image_conditioning = images_tensor_to_samples(image_conditioning, approximation_indexes.get(opts.sd_vae_encode_method))
+        image_conditioning = sd_model.get_first_stage_encoding(sd_model.encode_first_stage(image_conditioning))
 
         # Add the fake full 1s mask to the first dimension.
         image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
@@ -203,7 +203,7 @@ class StableDiffusionProcessing:
         midas_in = torch.from_numpy(transformed["midas_in"][None, ...]).to(device=shared.device)
         midas_in = repeat(midas_in, "1 ... -> n ...", n=self.batch_size)
 
-        conditioning_image = images_tensor_to_samples(source_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method))
+        conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(source_image))
         conditioning = torch.nn.functional.interpolate(
             self.sd_model.depth_model(midas_in),
             size=conditioning_image.shape[2:],
@@ -216,7 +216,7 @@ class StableDiffusionProcessing:
         return conditioning
 
     def edit_image_conditioning(self, source_image):
-        conditioning_image = images_tensor_to_samples(source_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method))
+        conditioning_image = self.sd_model.encode_first_stage(source_image).mode()
 
         return conditioning_image
 
@@ -255,7 +255,7 @@ class StableDiffusionProcessing:
         )
 
         # Encode the new masked image using first stage of network.
-        conditioning_image = images_tensor_to_samples(conditioning_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method))
+        conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(conditioning_image))
 
         # Create the concatenated conditioning tensor to be fed to `c_concat`
         conditioning_mask = torch.nn.functional.interpolate(conditioning_mask, size=latent_image.shape[-2:])
@@ -1099,8 +1099,9 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
             decoded_samples = torch.from_numpy(np.array(batch_images))
             decoded_samples = decoded_samples.to(shared.device)
+            decoded_samples = 2. * decoded_samples - 1.
 
-            samples = images_tensor_to_samples(decoded_samples, approximation_indexes.get(opts.sd_vae_encode_method))
+            samples = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(decoded_samples))
 
             image_conditioning = self.img2img_image_conditioning(decoded_samples, samples)
 
@@ -1338,6 +1339,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less")
 
         image = torch.from_numpy(batch_images)
+        from modules.sd_samplers_common import images_tensor_to_samples, approximation_indexes
         self.init_latent = images_tensor_to_samples(image, approximation_indexes.get(opts.sd_vae_encode_method), self.sd_model)
         devices.torch_gc()
 
diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index 42a29fc9..7269514f 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -75,7 +75,7 @@ def images_tensor_to_samples(image, approximation=None, model=None):
 
     if approximation == 3:
         image = image.to(devices.device, devices.dtype)
-        x_latent = sd_vae_taesd.encoder_model()(image)
+        x_latent = sd_vae_taesd.encoder_model()(image) / 1.5
     else:
         if model is None:
             model = shared.sd_model
diff --git a/run.ps1 b/run.ps1
deleted file mode 100644
index 82c1660b..00000000
--- a/run.ps1
+++ /dev/null
@@ -1 +0,0 @@
-.\venv\Scripts\accelerate-launch.exe --num_cpu_threads_per_process=6 --api .\launch.py --listen --port 17415 --xformers --opt-channelslast
\ No newline at end of file
diff --git a/run_local.ps1 b/run_local.ps1
deleted file mode 100644
index e2ac43db..00000000
--- a/run_local.ps1
+++ /dev/null
@@ -1,3 +0,0 @@
-.\venv\Scripts\Activate.ps1
-python .\launch.py --xformers --opt-channelslast --api
-. $PSCommandPath
\ No newline at end of file
diff --git a/update.ps1 b/update.ps1
deleted file mode 100644
index 9960bead..00000000
--- a/update.ps1
+++ /dev/null
@@ -1 +0,0 @@
-git stash push && git pull --rebase && git stash pop
\ No newline at end of file

From 073342c8878adc208be1eaab2705ba865d7b3ea1 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Fri, 4 Aug 2023 17:55:52 +0800
Subject: [PATCH 06/11] remove noneed scale

---
 modules/sd_samplers_common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py
index 7269514f..42a29fc9 100644
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@@ -75,7 +75,7 @@ def images_tensor_to_samples(image, approximation=None, model=None):
 
     if approximation == 3:
         image = image.to(devices.device, devices.dtype)
-        x_latent = sd_vae_taesd.encoder_model()(image) / 1.5
+        x_latent = sd_vae_taesd.encoder_model()(image)
     else:
         if model is None:
             model = shared.sd_model

From 21000f13a169263a7da2c66b300130d7a6339c7d Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Fri, 4 Aug 2023 18:23:14 +0800
Subject: [PATCH 07/11] replace get_first_stage_encoding

---
 modules/processing.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/modules/processing.py b/modules/processing.py
index aae39866..aa6d4d2a 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -16,6 +16,7 @@ from typing import Any, Dict, List
 import modules.sd_hijack
 from modules import devices, prompt_parser, masking, sd_samplers, lowvram, generation_parameters_copypaste, extra_networks, sd_vae_approx, scripts, sd_samplers_common, sd_unet, errors
 from modules.sd_hijack import model_hijack
+from modules.sd_samplers_common import images_tensor_to_samples, decode_first_stage, approximation_indexes
 from modules.shared import opts, cmd_opts, state
 import modules.shared as shared
 import modules.paths as paths
@@ -30,7 +31,6 @@ from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion
 from einops import repeat, rearrange
 from blendmodes.blend import blendLayers, BlendType
 
-decode_first_stage = sd_samplers_common.decode_first_stage
 
 # some of those options should not be changed at all because they would break the model, so I removed them from options.
 opt_C = 4
@@ -84,7 +84,7 @@ def txt2img_image_conditioning(sd_model, x, width, height):
 
         # The "masked-image" in this case will just be all zeros since the entire image is masked.
         image_conditioning = torch.zeros(x.shape[0], 3, height, width, device=x.device)
-        image_conditioning = sd_model.get_first_stage_encoding(sd_model.encode_first_stage(image_conditioning))
+        image_conditioning = images_tensor_to_samples(image_conditioning, approximation_indexes.get(opts.sd_vae_encode_method))
 
         # Add the fake full 1s mask to the first dimension.
         image_conditioning = torch.nn.functional.pad(image_conditioning, (0, 0, 0, 0, 1, 0), value=1.0)
@@ -203,7 +203,7 @@ class StableDiffusionProcessing:
         midas_in = torch.from_numpy(transformed["midas_in"][None, ...]).to(device=shared.device)
         midas_in = repeat(midas_in, "1 ... -> n ...", n=self.batch_size)
 
-        conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(source_image))
+        conditioning_image = images_tensor_to_samples(source_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method))
         conditioning = torch.nn.functional.interpolate(
             self.sd_model.depth_model(midas_in),
             size=conditioning_image.shape[2:],
@@ -216,7 +216,7 @@ class StableDiffusionProcessing:
         return conditioning
 
     def edit_image_conditioning(self, source_image):
-        conditioning_image = self.sd_model.encode_first_stage(source_image).mode()
+        conditioning_image = images_tensor_to_samples(source_image*0.5+0.5, approximation_indexes.get(opts.sd_vae_encode_method))
 
         return conditioning_image
 
@@ -1099,9 +1099,8 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
 
             decoded_samples = torch.from_numpy(np.array(batch_images))
             decoded_samples = decoded_samples.to(shared.device)
-            decoded_samples = 2. * decoded_samples - 1.
 
-            samples = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(decoded_samples))
+            samples = images_tensor_to_samples(decoded_samples, approximation_indexes.get(opts.sd_vae_encode_method))
 
             image_conditioning = self.img2img_image_conditioning(decoded_samples, samples)
 
@@ -1339,7 +1338,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less")
 
         image = torch.from_numpy(batch_images)
-        from modules.sd_samplers_common import images_tensor_to_samples, approximation_indexes
         self.init_latent = images_tensor_to_samples(image, approximation_indexes.get(opts.sd_vae_encode_method), self.sd_model)
         devices.torch_gc()
 

From aa744cadc8e357e696a608c8d0c77a7bfc1c9f39 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Sat, 5 Aug 2023 12:35:40 +0800
Subject: [PATCH 08/11] add infotext

---
 modules/generation_parameters_copypaste.py | 8 ++++++++
 modules/processing.py                      | 3 +++
 2 files changed, 11 insertions(+)

diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py
index a3448be9..0713dbf0 100644
--- a/modules/generation_parameters_copypaste.py
+++ b/modules/generation_parameters_copypaste.py
@@ -304,6 +304,12 @@ Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 965400086, Size: 512x512, Model
     if "Schedule rho" not in res:
         res["Schedule rho"] = 0
 
+    if "VAE Encoder" not in res:
+        res["VAE Encoder"] = "Full"
+
+    if "VAE Decoder" not in res:
+        res["VAE Decoder"] = "Full"
+
     return res
 
 
@@ -329,6 +335,8 @@ infotext_to_setting_name_mapping = [
     ('RNG', 'randn_source'),
     ('NGMS', 's_min_uncond'),
     ('Pad conds', 'pad_cond_uncond'),
+    ('VAE Encoder', 'sd_vae_encode_method'),
+    ('VAE Decoder', 'sd_vae_decode_method'),
 ]
 
 
diff --git a/modules/processing.py b/modules/processing.py
index aa6d4d2a..a9ee7507 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -788,6 +788,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
             with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
                 samples_ddim = p.sample(conditioning=p.c, unconditional_conditioning=p.uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
 
+            p.extra_generation_params['VAE Decoder'] = opts.sd_vae_decode_method
             x_samples_ddim = decode_latent_batch(p.sd_model, samples_ddim, target_device=devices.cpu, check_for_nans=True)
             x_samples_ddim = torch.stack(x_samples_ddim).float()
             x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
@@ -1100,6 +1101,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
             decoded_samples = torch.from_numpy(np.array(batch_images))
             decoded_samples = decoded_samples.to(shared.device)
 
+            self.extra_generation_params['VAE Encoder'] = opts.sd_vae_encode_method
             samples = images_tensor_to_samples(decoded_samples, approximation_indexes.get(opts.sd_vae_encode_method))
 
             image_conditioning = self.img2img_image_conditioning(decoded_samples, samples)
@@ -1338,6 +1340,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less")
 
         image = torch.from_numpy(batch_images)
+        self.extra_generation_params['VAE Encoder'] = opts.sd_vae_encode_method
         self.init_latent = images_tensor_to_samples(image, approximation_indexes.get(opts.sd_vae_encode_method), self.sd_model)
         devices.torch_gc()
 

From d8371d0b3c90252bfb4de619a2e6f80296845554 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Sat, 5 Aug 2023 12:37:46 +0800
Subject: [PATCH 09/11] update info

---
 modules/shared.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/shared.py b/modules/shared.py
index 61ba9347..3491ad79 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -430,7 +430,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
     "upcast_attn": OptionInfo(False, "Upcast cross attention layer to float32"),
     "auto_vae_precision": OptionInfo(True, "Automaticlly revert VAE to 32-bit floats").info("triggers when a tensor with NaNs is produced in VAE; disabling the option in this case will result in a black square image"),
     "randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU", "NV"]}).info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors; use NV to produce same picture as on NVidia videocards"),
-    "sd_vae_encode_method": OptionInfo("Full", "VAE type for encode", gr.Radio, {"choices": ["Full", "TAESD"]}).info("method to encode image to latent (use in img2img or inpaint mask)"),
+    "sd_vae_encode_method": OptionInfo("Full", "VAE type for encode", gr.Radio, {"choices": ["Full", "TAESD"]}).info("method to encode image to latent (use in img2img, hires-dix or inpaint mask)"),
     "sd_vae_decode_method": OptionInfo("Full", "VAE type for decode", gr.Radio, {"choices": ["Full", "TAESD"]}).info("method to decode latent to image"),
 }))
 

From a6b245e46f28efe013637e5e9b0600b88df79dc9 Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Sat, 5 Aug 2023 12:49:35 +0800
Subject: [PATCH 10/11] dix

---
 modules/shared.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/shared.py b/modules/shared.py
index 3491ad79..df454d4a 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -430,7 +430,7 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), {
     "upcast_attn": OptionInfo(False, "Upcast cross attention layer to float32"),
     "auto_vae_precision": OptionInfo(True, "Automaticlly revert VAE to 32-bit floats").info("triggers when a tensor with NaNs is produced in VAE; disabling the option in this case will result in a black square image"),
     "randn_source": OptionInfo("GPU", "Random number generator source.", gr.Radio, {"choices": ["GPU", "CPU", "NV"]}).info("changes seeds drastically; use CPU to produce the same picture across different videocard vendors; use NV to produce same picture as on NVidia videocards"),
-    "sd_vae_encode_method": OptionInfo("Full", "VAE type for encode", gr.Radio, {"choices": ["Full", "TAESD"]}).info("method to encode image to latent (use in img2img, hires-dix or inpaint mask)"),
+    "sd_vae_encode_method": OptionInfo("Full", "VAE type for encode", gr.Radio, {"choices": ["Full", "TAESD"]}).info("method to encode image to latent (use in img2img, hires-fix or inpaint mask)"),
     "sd_vae_decode_method": OptionInfo("Full", "VAE type for decode", gr.Radio, {"choices": ["Full", "TAESD"]}).info("method to decode latent to image"),
 }))
 

From b85ec2b9b66492ff9bf3d40a4d9b424390067f0f Mon Sep 17 00:00:00 2001
From: Kohaku-Blueleaf <59680068+KohakuBlueleaf@users.noreply.github.com>
Date: Sat, 5 Aug 2023 13:14:00 +0800
Subject: [PATCH 11/11] Fix some merge mistakes

---
 modules/processing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/processing.py b/modules/processing.py
index b9900ded..43cb763f 100644
--- a/modules/processing.py
+++ b/modules/processing.py
@@ -1136,7 +1136,7 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing):
                 batch_images.append(image)
 
             decoded_samples = torch.from_numpy(np.array(batch_images))
-            decoded_samples = decoded_samples.to(shared.device)
+            decoded_samples = decoded_samples.to(shared.device, dtype=devices.dtype_vae)
 
             self.extra_generation_params['VAE Encoder'] = opts.sd_vae_encode_method
             samples = images_tensor_to_samples(decoded_samples, approximation_indexes.get(opts.sd_vae_encode_method))
@@ -1374,6 +1374,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
             raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less")
 
         image = torch.from_numpy(batch_images)
+        image = image.to(shared.device, dtype=devices.dtype_vae)
         self.extra_generation_params['VAE Encoder'] = opts.sd_vae_encode_method
         self.init_latent = images_tensor_to_samples(image, approximation_indexes.get(opts.sd_vae_encode_method), self.sd_model)
         devices.torch_gc()