Merge pull request #1070 from briansemrau/feature/img2img-masking-fixes

img2img mask fixes and fix image2noise normalization
2024-12-15 06:21:34 +03:00 · 2022-09-13 00:08:19 -07:00 · 2022-09-13 00:08:19 -07:00 · 2f86782acf
commit 2f86782acf
parent ad72deb3c0 0480804499
2 changed files with 58 additions and 50 deletions
--- a/scripts/find_noise_for_image.py
+++ b/scripts/find_noise_for_image.py
@ -53,8 +53,5 @@ def find_noise_for_image(model, device, init_image, prompt, steps=200, cond_scal
 		dt = sigmas[i] - sigmas[i - 1]
 		x = x + d * dt
 	
-	if normalize:
-		# multiplying sigmas seems to break things pretty bad...
-		return (x / x.std())# * sigmas[-1]
-	else:
-		return x
+	return x / sigmas[-1]
+	
--- a/scripts/webui_streamlit.py
+++ b/scripts/webui_streamlit.py
@ -1149,7 +1149,7 @@ def process_images(
 				while(torch.cuda.memory_allocated()/1e6 >= mem):
 					time.sleep(1)

-			if noise_mode == 1:
+			if noise_mode == 1 or noise_mode == 3:
 				# TODO params for find_noise_to_image
 				x = torch.cat(batch_size * [find_noise_for_image.find_noise_for_image(
 					st.session_state["model"], st.session_state["device"],
@ -1448,14 +1448,22 @@ def img2img(prompt: str = '', init_info: any = None, init_info_mask: any = None,
 	else:
 		raise Exception("Unknown sampler: " + sampler_name)

+	def process_init_mask(init_mask: Image):
+		if init_mask.mode == "RGBA":
+			init_mask = init_mask.convert('RGBA')
+			background = Image.new('RGBA', init_mask.size, (0, 0, 0))
+			init_mask = Image.alpha_composite(background, init_mask)
+			init_mask = init_mask.convert('RGB')
+		return init_mask
+
 	init_img = init_info
 	init_mask = None
 	if mask_mode == 0:
 		if init_info_mask:
-			init_mask = init_info_mask
+			init_mask = process_init_mask(init_info_mask)
 	elif mask_mode == 1:
 		if init_info_mask:
-			init_mask = init_info_mask
+			init_mask = process_init_mask(init_info_mask)
 			init_mask = ImageOps.invert(init_mask)
 	elif mask_mode == 2:
 		init_img_transparency = init_img.split()[-1].convert('L')#.point(lambda x: 255 if x > 0 else 0, mode='1')
@ -1467,42 +1475,41 @@ def img2img(prompt: str = '', init_info: any = None, init_info_mask: any = None,
 	assert 0. <= denoising_strength <= 1., 'can only work with strength in [0.0, 1.0]'
 	t_enc = int(denoising_strength * ddim_steps)

+	if init_mask is not None and (noise_mode == 2 or noise_mode == 3) and init_img is not None:
+		noise_q = 0.99
+		color_variation = 0.0
+		mask_blend_factor = 1.0
+
+		np_init = (np.asarray(init_img.convert("RGB"))/255.0).astype(np.float64) # annoyingly complex mask fixing
+		np_mask_rgb = 1. - (np.asarray(ImageOps.invert(init_mask).convert("RGB"))/255.0).astype(np.float64)
+		np_mask_rgb -= np.min(np_mask_rgb)
+		np_mask_rgb /= np.max(np_mask_rgb)
+		np_mask_rgb = 1. - np_mask_rgb
+		np_mask_rgb_hardened = 1. - (np_mask_rgb < 0.99).astype(np.float64)
+		blurred = skimage.filters.gaussian(np_mask_rgb_hardened[:], sigma=16., channel_axis=2, truncate=32.)
+		blurred2 = skimage.filters.gaussian(np_mask_rgb_hardened[:], sigma=16., channel_axis=2, truncate=32.)
+		#np_mask_rgb_dilated = np_mask_rgb + blurred  # fixup mask todo: derive magic constants
+		#np_mask_rgb = np_mask_rgb + blurred
+		np_mask_rgb_dilated = np.clip((np_mask_rgb + blurred2) * 0.7071, 0., 1.)
+		np_mask_rgb = np.clip((np_mask_rgb + blurred) * 0.7071, 0., 1.)
+
+		noise_rgb = matched_noise.get_matched_noise(np_init, np_mask_rgb, noise_q, color_variation)
+		blend_mask_rgb = np.clip(np_mask_rgb_dilated,0.,1.) ** (mask_blend_factor)
+		noised = noise_rgb[:]
+		blend_mask_rgb **= (2.)
+		noised = np_init[:] * (1. - blend_mask_rgb) + noised * blend_mask_rgb
+
+		np_mask_grey = np.sum(np_mask_rgb, axis=2)/3.
+		ref_mask = np_mask_grey < 1e-3
+		
+		all_mask = np.ones((height, width), dtype=bool)
+		noised[all_mask,:] = skimage.exposure.match_histograms(noised[all_mask,:]**1., noised[ref_mask,:], channel_axis=1)
+		
+		init_img = Image.fromarray(np.clip(noised * 255., 0., 255.).astype(np.uint8), mode="RGB")
+		st.session_state["editor_image"].image(init_img) # debug
+
 	def init():
-		init_image = init_img
-		if init_mask is not None and noise_mode == 2 and init_image is not None:
-			noise_q = 0.99
-			color_variation = 0.0
-			mask_blend_factor = 1.0
-
-			np_init = (np.asarray(init_image.convert("RGB"))/255.0).astype(np.float64) # annoyingly complex mask fixing
-			np_mask_rgb = 1. - (np.asarray(init_mask.convert("RGB"))/255.0).astype(np.float64)
-			np_mask_rgb -= np.min(np_mask_rgb)
-			np_mask_rgb /= np.max(np_mask_rgb)
-			#np_mask_rgb = 1. - np_mask_rgb
-			np_mask_rgb_hardened = 1. - (np_mask_rgb < 0.99).astype(np.float64)
-			blurred = skimage.filters.gaussian(np_mask_rgb_hardened[:], sigma=16., channel_axis=2, truncate=32.)
-			blurred2 = skimage.filters.gaussian(np_mask_rgb_hardened[:], sigma=16., channel_axis=2, truncate=32.)
-			#np_mask_rgb_dilated = np_mask_rgb + blurred  # fixup mask todo: derive magic constants
-			#np_mask_rgb = np_mask_rgb + blurred
-			np_mask_rgb_dilated = np.clip((np_mask_rgb + blurred2) * 0.7071, 0., 1.)
-			np_mask_rgb = np.clip((np_mask_rgb + blurred) * 0.7071, 0., 1.)
-
-			noise_rgb = matched_noise.get_matched_noise(np_init, np_mask_rgb, noise_q, color_variation)
-			blend_mask_rgb = np.clip(np_mask_rgb_dilated,0.,1.) ** (mask_blend_factor)
-			noised = noise_rgb[:]
-			blend_mask_rgb **= (2.)
-			noised = np_init[:] * (1. - blend_mask_rgb) + noised * blend_mask_rgb
-			
-			np_mask_grey = np.sum(np_mask_rgb, axis=2)/3.
-			ref_mask =  np_mask_grey < 1e-3
-			
-			all_mask = np.ones((width, height), dtype=bool)
-			noised[all_mask,:] = skimage.exposure.match_histograms(noised[all_mask,:]**1., noised[ref_mask,:], channel_axis=1)
-			
-			init_image = Image.fromarray(np.clip(noised * 255., 0., 255.).astype(np.uint8), mode="RGB")
-			st.session_state["editor_image"].image(init_image)
-
-		image = init_image.convert('RGB')
+		image = init_img.convert('RGB')
 		image = np.array(image).astype(np.float32) / 255.0
 		image = image[None].transpose(0, 3, 1, 2)
 		image = torch.from_numpy(image)
@ -1510,7 +1517,7 @@ def img2img(prompt: str = '', init_info: any = None, init_info_mask: any = None,
 		mask_channel = None
 		if init_mask:
 			alpha = resize_image(resize_mode, init_mask, width // 8, height // 8)
-			mask_channel = alpha.split()[1]
+			mask_channel = alpha.split()[-1]

 		mask = None
 		if mask_channel is not None:
@ -2367,7 +2374,7 @@ def layout():
 					width = st.slider("Width:", min_value=64, max_value=1024, value=defaults.img2img.width, step=64)
 					height = st.slider("Height:", min_value=64, max_value=1024, value=defaults.img2img.height, step=64)
 					seed = st.text_input("Seed:", value=defaults.img2img.seed, help=" The seed to use, if left blank a random seed will be generated.")
-					noise_mode_list = ["Seed", "Find Noise", "Matched Noise"]
+					noise_mode_list = ["Seed", "Find Noise", "Matched Noise", "Find+Matched Noise"]
 					noise_mode = st.selectbox(
 						"Noise Mode", noise_mode_list,
 						help=""
@ -2461,14 +2468,18 @@ def layout():
 						help="Upload an mask image which will be used for masking the image to image generation.",
 					)
 					if uploaded_masks:
-						image = Image.open(uploaded_masks).convert('RGB')
-						new_mask = image.resize((width, height))
-						mask_holder.image(new_mask)
+						mask = Image.open(uploaded_masks)
+						if mask.mode == "RGBA":
+							mask = mask.convert('RGBA')
+							background = Image.new('RGBA', mask.size, (0, 0, 0))
+							mask = Image.alpha_composite(background, mask)
+						mask = mask.resize((width, height))
+						mask_holder.image(mask)

 					if uploaded_images and uploaded_masks:
 						if mask_mode != 2:
 							final_img = new_img.copy()
-							alpha_layer = new_mask.split()[-1].copy().convert('L')
+							alpha_layer = mask.convert('L')
 							strength = st.session_state["denoising_strength"]
 							if mask_mode == 0:
 								alpha_layer = ImageOps.invert(alpha_layer)
@ -2517,7 +2528,7 @@ def layout():
 						#img_array = np.array(image) # if you want to pass it to OpenCV
 						new_mask = None
 						if uploaded_masks:
-							mask = Image.open(uploaded_masks).convert('RGB')
+							mask = Image.open(uploaded_masks).convert('RGBA')
 							new_mask = mask.resize((width, height))
 	
 						try: