when in outcrop mode, mask added regions and fill in with voroni noise for better outpainting

2024-12-14 22:13:41 +03:00 · 2022-09-09 01:24:46 -04:00 · 2022-09-09 01:24:46 -04:00 · 754e530d70
commit 754e530d70
parent 8d5977e15b
3 changed files with 113 additions and 13 deletions
--- a/frontend/frontend.py
+++ b/frontend/frontend.py
@ -187,8 +187,9 @@ def draw_gradio_ui(opt, img2img=lambda x: x, txt2img=lambda x: x, imgproc=lambda
                            with gr.TabItem("Editor Options"):
                                with gr.Row():
                                    # disable Uncrop for now
-                                    # choices=["Mask", "Crop", "Uncrop"]
-                                    img2img_image_editor_mode = gr.Radio(choices=["Mask", "Crop"],
+                                    choices=["Mask", "Crop", "Uncrop"]
+                                    #choices=["Mask", "Crop"]
+                                    img2img_image_editor_mode = gr.Radio(choices=choices,
                                                                         label="Image Editor Mode",
                                                                         value="Mask", elem_id='edit_mode_select',
                                                                         visible=True)
--- a/scripts/perlin.py
+++ b/scripts/perlin.py
@ -0,0 +1,48 @@
+import numpy as np
+
+def perlin(x, y, seed=0):
+    # permutation table
+    np.random.seed(seed)
+    p = np.arange(256, dtype=int)
+    np.random.shuffle(p)
+    p = np.stack([p, p]).flatten()
+    # coordinates of the top-left
+    xi, yi = x.astype(int), y.astype(int)
+    # internal coordinates
+    xf, yf = x - xi, y - yi
+    # fade factors
+    u, v = fade(xf), fade(yf)
+    # noise components
+    n00 = gradient(p[p[xi] + yi], xf, yf)
+    n01 = gradient(p[p[xi] + yi + 1], xf, yf - 1)
+    n11 = gradient(p[p[xi + 1] + yi + 1], xf - 1, yf - 1)
+    n10 = gradient(p[p[xi + 1] + yi], xf - 1, yf)
+    # combine noises
+    x1 = lerp(n00, n10, u)
+    x2 = lerp(n01, n11, u)  # FIX1: I was using n10 instead of n01
+    return lerp(x1, x2, v)  # FIX2: I also had to reverse x1 and x2 here
+
+def lerp(a, b, x):
+    "linear interpolation"
+    return a + x * (b - a)
+
+def fade(t):
+    "6t^5 - 15t^4 + 10t^3"
+    return 6 * t**5 - 15 * t**4 + 10 * t**3
+
+def gradient(h, x, y):
+    "grad converts h to the right gradient vector and return the dot product with (x,y)"
+    vectors = np.array([[0, 1], [0, -1], [1, 0], [-1, 0]])
+    g = vectors[h % 4]
+    return g[:, :, 0] * x + g[:, :, 1] * y
+
+lin = np.linspace(0, 5, 100, endpoint=False)
+x, y = np.meshgrid(lin, lin)
+
+
+
+def perlinNoise(height,width,octavesx=5,octavesy=5,seed=None):
+    linx = np.linspace(0,octavesx,width,endpoint=False)
+    liny = np.linspace(0,octavesy,height,endpoint=False)
+    x,y = np.meshgrid(linx,liny)
+    return perlin(x,y,seed=seed)
--- a/scripts/webui.py
+++ b/scripts/webui.py
@ -2,6 +2,7 @@ import argparse, os, sys, glob, re

 import cv2

+from perlin import perlinNoise
 from frontend.frontend import draw_gradio_ui
 from frontend.job_manager import JobManager, JobInfo
 from frontend.ui_functions import resize_image
@ -1225,6 +1226,14 @@ class Flagging(gr.FlaggingCallback):
        print("Logged:", filenames[0])


+def blurArr(a,r=8):
+    im1=Image.fromarray((a*255).astype(np.int8),"L")
+    im2 = im1.filter(ImageFilter.GaussianBlur(radius = r))
+    out= np.array(im2)/255
+    return out
+
+
+
 def img2img(prompt: str, image_editor_mode: str, mask_mode: str, mask_blur_strength: int, ddim_steps: int, sampler_name: str,
            toggles: List[int], realesrgan_model_name: str, n_iter: int,  cfg_scale: float, denoising_strength: float,
            seed: int, height: int, width: int, resize_mode: int, init_info: any = None, init_info_mask: any = None, fp = None, job_info: JobInfo = None):
@ -1305,16 +1314,7 @@ def img2img(prompt: str, image_editor_mode: str, mask_mode: str, mask_blur_stren
        image = torch.from_numpy(image)

        mask_channel = None
-        if image_editor_mode == "Uncrop":
-            alpha = init_img.convert("RGBA")
-            alpha = resize_image(resize_mode, alpha, width // 8, height // 8)
-            mask_channel = alpha.split()[-1]
-            mask_channel = mask_channel.filter(ImageFilter.GaussianBlur(4))
-            mask_channel = np.array(mask_channel)
-            mask_channel[mask_channel >= 255] = 255
-            mask_channel[mask_channel < 255] = 0
-            mask_channel = Image.fromarray(mask_channel).filter(ImageFilter.GaussianBlur(2))
-        elif image_editor_mode == "Mask":
+        if image_editor_mode == "Mask":
            alpha = init_mask.convert("RGBA")
            alpha = resize_image(resize_mode, alpha, width // 8, height // 8)
            mask_channel = alpha.split()[1]
@ -1329,7 +1329,58 @@ def img2img(prompt: str, image_editor_mode: str, mask_mode: str, mask_blur_stren
        if opt.optimized:
            modelFS.to(device)

-        init_image = 2. * image - 1.
+        #let's try and find where init_image is 0's
+        #shape is probably (3,width,height)?
+
+        if image_editor_mode == "Uncrop":        
+            _image=image.numpy()[0]
+            _mask=np.ones((_image.shape[1],_image.shape[2]))
+
+            #compute bounding box
+            cmax=np.max(_image,axis=0)
+            rowmax=np.max(cmax,axis=0)
+            colmax=np.max(cmax,axis=1)
+            rowwhere=np.where(rowmax>0)[0]
+            colwhere=np.where(colmax>0)[0]
+            rowstart=rowwhere[0]
+            rowend=rowwhere[-1]+1
+            colstart=colwhere[0]
+            colend=colwhere[-1]+1
+            print('bounding box: ',rowstart,rowend,colstart,colend)
+
+            #this is where noise will get added
+            PAD_IMG=16
+            boundingbox=np.zeros(shape=(height,width))
+            boundingbox[colstart+PAD_IMG:colend-PAD_IMG,rowstart+PAD_IMG:rowend-PAD_IMG]=1
+            boundingbox=blurArr(boundingbox,4)
+            
+            #this is the mask for outpainting
+            PAD_MASK=24
+            boundingbox2=np.zeros(shape=(height,width))
+            boundingbox2[colstart+PAD_MASK:colend-PAD_MASK,rowstart+PAD_MASK:rowend-PAD_MASK]=1
+            boundingbox2=blurArr(boundingbox2,4)
+
+            #noise=np.random.randn(*_image.shape)
+            noise=np.array([perlinNoise(height,width,height/64,width/64) for i in range(3)])
+            _mask*=1-boundingbox2
+
+            #convert 0,1 to -1,1
+            _image = 2. * _image - 1.
+
+            #add noise
+            boundingbox=np.tile(boundingbox,(3,1,1))
+            _image=_image*boundingbox+noise*(1-boundingbox)
+
+            #resize mask
+            _mask = np.array(resize_image(resize_mode, Image.fromarray(_mask*255), width // 8, height // 8))/255
+
+            #convert back to torch tensor
+            init_image=torch.from_numpy(np.expand_dims(_image,axis=0).astype(np.float32)).to(device)
+            mask=torch.from_numpy(_mask.astype(np.float32)).to(device)
+
+        else:
+            init_image = 2. * image - 1.
+
        init_image = init_image.to(device)
        init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
        init_latent = (model if not opt.optimized else modelFS).get_first_stage_encoding((model if not opt.optimized else modelFS).encode_first_stage(init_image))  # move to latent space