2022-08-22 17:15:46 +03:00
import argparse , os , sys , glob
2022-08-25 23:31:44 +03:00
from collections import namedtuple
2022-08-22 17:15:46 +03:00
import torch
import torch . nn as nn
import numpy as np
import gradio as gr
from omegaconf import OmegaConf
2022-08-24 17:57:49 +03:00
from PIL import Image , ImageFont , ImageDraw , PngImagePlugin
2022-08-22 17:15:46 +03:00
from itertools import islice
from einops import rearrange , repeat
from torch import autocast
import mimetypes
import random
2022-08-22 20:08:32 +03:00
import math
2022-08-24 18:47:23 +03:00
import html
import time
2022-08-25 21:52:05 +03:00
import json
import traceback
2022-08-22 17:15:46 +03:00
2022-08-25 23:31:44 +03:00
import k_diffusion . sampling
2022-08-22 17:15:46 +03:00
from ldm . util import instantiate_from_config
from ldm . models . diffusion . ddim import DDIMSampler
from ldm . models . diffusion . plms import PLMSSampler
2022-08-25 21:52:05 +03:00
import ldm . modules . encoders . modules
2022-08-22 17:15:46 +03:00
2022-08-23 11:58:50 +03:00
try :
# this silences the annoying "Some weights of the model checkpoint were not used when initializing..." message at start.
from transformers import logging
logging . set_verbosity_error ( )
except :
pass
2022-08-22 17:15:46 +03:00
# this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the bowser will not show any UI
mimetypes . init ( )
mimetypes . add_type ( ' application/javascript ' , ' .js ' )
# some of those options should not be changed at all because they would break the model, so I removed them from options.
opt_C = 4
opt_f = 8
2022-08-24 01:08:59 +03:00
LANCZOS = ( Image . Resampling . LANCZOS if hasattr ( Image , ' Resampling ' ) else Image . LANCZOS )
2022-08-23 11:58:50 +03:00
invalid_filename_chars = ' <>: " / \ |?* \n '
2022-08-25 21:52:05 +03:00
config_filename = " config.json "
2022-08-23 00:34:49 +03:00
2022-08-22 17:15:46 +03:00
parser = argparse . ArgumentParser ( )
parser . add_argument ( " --config " , type = str , default = " configs/stable-diffusion/v1-inference.yaml " , help = " path to config which constructs model " , )
parser . add_argument ( " --ckpt " , type = str , default = " models/ldm/stable-diffusion-v1/model.ckpt " , help = " path to checkpoint of model " , )
2022-08-24 13:42:21 +03:00
parser . add_argument ( " --gfpgan-dir " , type = str , help = " GFPGAN directory " , default = ( ' ./src/gfpgan ' if os . path . exists ( ' ./src/gfpgan ' ) else ' ./GFPGAN ' ) ) # i disagree with where you're putting it but since all guidefags are doing it this way, there you go
2022-08-24 00:38:53 +03:00
parser . add_argument ( " --no-half " , action = ' store_true ' , help = " do not switch the model to 16-bit floats " )
2022-08-24 09:06:36 +03:00
parser . add_argument ( " --no-progressbar-hiding " , action = ' store_true ' , help = " do not hide progressbar in gradio UI (we hide it because it slows down ML if you have hardware accleration in browser) " )
2022-08-24 17:57:49 +03:00
parser . add_argument ( " --max-batch-count " , type = int , default = 16 , help = " maximum batch count value for the UI " )
2022-08-25 21:52:05 +03:00
parser . add_argument ( " --embeddings-dir " , type = str , default = ' embeddings ' , help = " embeddings dirtectory for textual inversion (default: embeddings) " )
2022-08-24 17:57:49 +03:00
2022-08-25 21:52:05 +03:00
cmd_opts = parser . parse_args ( )
2022-08-22 17:15:46 +03:00
2022-08-24 09:06:36 +03:00
css_hide_progressbar = """
. wrap . m - 12 svg { display : none ! important ; }
. wrap . m - 12 : : before { content : " Loading... " }
. progress - bar { display : none ! important ; }
. meta - text { display : none ! important ; }
"""
2022-08-22 17:15:46 +03:00
2022-08-25 23:31:44 +03:00
SamplerData = namedtuple ( ' SamplerData ' , [ ' name ' , ' constructor ' ] )
samplers = [
2022-08-26 14:10:40 +03:00
* [ SamplerData ( x [ 0 ] , lambda m , funcname = x [ 1 ] : KDiffusionSampler ( m , funcname ) ) for x in [
2022-08-25 23:31:44 +03:00
( ' LMS ' , ' sample_lms ' ) ,
( ' Heun ' , ' sample_heun ' ) ,
( ' Euler ' , ' sample_euler ' ) ,
( ' Euler ancestral ' , ' sample_euler_ancestral ' ) ,
( ' DPM 2 ' , ' sample_dpm_2 ' ) ,
( ' DPM 2 Ancestral ' , ' sample_dpm_2_ancestral ' ) ,
] if hasattr ( k_diffusion . sampling , x [ 1 ] ) ] ,
2022-08-26 14:10:40 +03:00
SamplerData ( ' DDIM ' , lambda m : DDIMSampler ( model ) ) ,
SamplerData ( ' PLMS ' , lambda m : PLMSSampler ( model ) ) ,
2022-08-25 23:31:44 +03:00
]
2022-08-26 14:10:40 +03:00
samplers_for_img2img = [ x for x in samplers if x . name != ' DDIM ' and x . name != ' PLMS ' ]
2022-08-25 21:52:05 +03:00
2022-08-26 11:16:57 +03:00
RealesrganModelInfo = namedtuple ( " RealesrganModelInfo " , [ " name " , " location " , " model " , " netscale " ] )
try :
from basicsr . archs . rrdbnet_arch import RRDBNet
from realesrgan import RealESRGANer
from realesrgan . archs . srvgg_arch import SRVGGNetCompact
realesrgan_models = [
RealesrganModelInfo (
name = " Real-ESRGAN 4x plus " ,
location = " https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth " ,
netscale = 4 , model = lambda : RRDBNet ( num_in_ch = 3 , num_out_ch = 3 , num_feat = 64 , num_block = 23 , num_grow_ch = 32 , scale = 4 )
) ,
RealesrganModelInfo (
name = " Real-ESRGAN 4x plus anime 6B " ,
location = " https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth " ,
netscale = 4 , model = lambda : RRDBNet ( num_in_ch = 3 , num_out_ch = 3 , num_feat = 64 , num_block = 6 , num_grow_ch = 32 , scale = 4 )
) ,
2022-08-27 16:13:33 +03:00
RealesrganModelInfo (
name = " Real-ESRGAN 2x plus " ,
location = " https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth " ,
netscale = 2 , model = lambda : RRDBNet ( num_in_ch = 3 , num_out_ch = 3 , num_feat = 64 , num_block = 23 , num_grow_ch = 32 , scale = 2 )
) ,
2022-08-26 11:16:57 +03:00
]
have_realesrgan = True
except :
print ( " Error loading Real-ESRGAN: " , file = sys . stderr )
print ( traceback . format_exc ( ) , file = sys . stderr )
realesrgan_models = [ RealesrganModelInfo ( ' None ' , ' ' , 0 , None ) ]
have_realesrgan = False
2022-08-25 21:52:05 +03:00
class Options :
data = None
data_labels = {
" outdir " : ( " " , " Output dictectory; if empty, defaults to ' outputs/* ' " ) ,
" samples_save " : ( True , " Save indiviual samples " ) ,
" samples_format " : ( ' png ' , ' File format for indiviual samples ' ) ,
" grid_save " : ( True , " Save image grids " ) ,
" grid_format " : ( ' png ' , ' File format for grids ' ) ,
" grid_extended_filename " : ( False , " Add extended info (seed, prompt) to filename when saving grid " ) ,
" n_rows " : ( - 1 , " Grid row count; use -1 for autodetect and 0 for it to be same as batch size " , - 1 , 16 ) ,
" jpeg_quality " : ( 80 , " Quality for saved jpeg images " , 1 , 100 ) ,
" verify_input " : ( True , " Check input, and produce warning if it ' s too long " ) ,
" enable_pnginfo " : ( True , " Save text information about generation parameters as chunks to png files " ) ,
2022-08-26 08:47:44 +03:00
" prompt_matrix_add_to_start " : ( True , " In prompt matrix, add the variable combination of text to the start of the prompt, rather than the end " ) ,
2022-08-27 16:13:33 +03:00
" sd_upscale_overlap " : ( 64 , " Overlap for tiles for SD upscale. The smaller it is, the less smooth transition from one tile to another " , 0 , 256 , 16 ) ,
2022-08-25 21:52:05 +03:00
}
def __init__ ( self ) :
self . data = { k : v [ 0 ] for k , v in self . data_labels . items ( ) }
def __setattr__ ( self , key , value ) :
if self . data is not None :
if key in self . data :
self . data [ key ] = value
return super ( Options , self ) . __setattr__ ( key , value )
def __getattr__ ( self , item ) :
if self . data is not None :
if item in self . data :
return self . data [ item ]
2022-08-26 08:47:44 +03:00
if item in self . data_labels :
return self . data_labels [ item ] [ 0 ]
2022-08-25 21:52:05 +03:00
return super ( Options , self ) . __getattribute__ ( item )
def save ( self , filename ) :
with open ( filename , " w " , encoding = " utf8 " ) as file :
json . dump ( self . data , file )
def load ( self , filename ) :
with open ( filename , " r " , encoding = " utf8 " ) as file :
self . data = json . load ( file )
2022-08-22 17:15:46 +03:00
def chunk ( it , size ) :
it = iter ( it )
return iter ( lambda : tuple ( islice ( it , size ) ) , ( ) )
def load_model_from_config ( config , ckpt , verbose = False ) :
print ( f " Loading model from { ckpt } " )
pl_sd = torch . load ( ckpt , map_location = " cpu " )
if " global_step " in pl_sd :
print ( f " Global Step: { pl_sd [ ' global_step ' ] } " )
sd = pl_sd [ " state_dict " ]
model = instantiate_from_config ( config . model )
m , u = model . load_state_dict ( sd , strict = False )
if len ( m ) > 0 and verbose :
print ( " missing keys: " )
print ( m )
if len ( u ) > 0 and verbose :
print ( " unexpected keys: " )
print ( u )
model . cuda ( )
model . eval ( )
return model
class CFGDenoiser ( nn . Module ) :
def __init__ ( self , model ) :
super ( ) . __init__ ( )
self . inner_model = model
def forward ( self , x , sigma , uncond , cond , cond_scale ) :
x_in = torch . cat ( [ x ] * 2 )
sigma_in = torch . cat ( [ sigma ] * 2 )
cond_in = torch . cat ( [ uncond , cond ] )
uncond , cond = self . inner_model ( x_in , sigma_in , cond = cond_in ) . chunk ( 2 )
return uncond + ( cond - uncond ) * cond_scale
2022-08-23 14:07:37 +03:00
class KDiffusionSampler :
2022-08-25 23:31:44 +03:00
def __init__ ( self , m , funcname ) :
2022-08-23 14:07:37 +03:00
self . model = m
2022-08-25 23:31:44 +03:00
self . model_wrap = k_diffusion . external . CompVisDenoiser ( m )
self . funcname = funcname
2022-08-26 14:10:40 +03:00
self . func = getattr ( k_diffusion . sampling , self . funcname )
2022-08-23 14:07:37 +03:00
def sample ( self , S , conditioning , batch_size , shape , verbose , unconditional_guidance_scale , unconditional_conditioning , eta , x_T ) :
sigmas = self . model_wrap . get_sigmas ( S )
x = x_T * sigmas [ 0 ]
model_wrap_cfg = CFGDenoiser ( self . model_wrap )
2022-08-23 22:42:43 +03:00
2022-08-26 14:10:40 +03:00
samples_ddim = self . func ( model_wrap_cfg , x , sigmas , extra_args = { ' cond ' : conditioning , ' uncond ' : unconditional_conditioning , ' cond_scale ' : unconditional_guidance_scale } , disable = False )
2022-08-23 14:07:37 +03:00
return samples_ddim , None
2022-08-23 22:42:43 +03:00
def create_random_tensors ( shape , seeds ) :
2022-08-23 14:07:37 +03:00
xs = [ ]
2022-08-23 22:42:43 +03:00
for seed in seeds :
torch . manual_seed ( seed )
# randn results depend on device; gpu and cpu get different results for same seed;
# the way I see it, it's better to do this on CPU, so that everyone gets same result;
# but the original script had it like this so i do not dare change it for now because
# it will break everyone's seeds.
2022-08-23 14:07:37 +03:00
xs . append ( torch . randn ( shape , device = device ) )
x = torch . stack ( xs )
return x
2022-08-24 16:12:33 +03:00
def torch_gc ( ) :
torch . cuda . empty_cache ( )
torch . cuda . ipc_collect ( )
2022-08-23 14:07:37 +03:00
2022-08-24 17:41:37 +03:00
2022-08-24 17:57:49 +03:00
def save_image ( image , path , basename , seed , prompt , extension , info = None , short_filename = False ) :
2022-08-24 17:41:37 +03:00
prompt = sanitize_filename_part ( prompt )
if short_filename :
filename = f " { basename } . { extension } "
else :
filename = f " { basename } - { seed } - { prompt [ : 128 ] } . { extension } "
2022-08-26 11:16:57 +03:00
if extension == ' png ' and opts . enable_pnginfo and info is not None :
2022-08-24 17:57:49 +03:00
pnginfo = PngImagePlugin . PngInfo ( )
pnginfo . add_text ( " parameters " , info )
else :
pnginfo = None
2022-08-25 21:52:05 +03:00
image . save ( os . path . join ( path , filename ) , quality = opts . jpeg_quality , pnginfo = pnginfo )
2022-08-24 17:41:37 +03:00
2022-08-26 11:16:57 +03:00
def sanitize_filename_part ( text ) :
return text . replace ( ' ' , ' _ ' ) . translate ( { ord ( x ) : ' ' for x in invalid_filename_chars } ) [ : 128 ]
2022-08-24 18:47:23 +03:00
def plaintext_to_html ( text ) :
text = " " . join ( [ f " <p> { html . escape ( x ) } </p> \n " for x in text . split ( ' \n ' ) ] )
return text
2022-08-22 17:15:46 +03:00
def load_GFPGAN ( ) :
model_name = ' GFPGANv1.3 '
2022-08-25 21:52:05 +03:00
model_path = os . path . join ( cmd_opts . gfpgan_dir , ' experiments/pretrained_models ' , model_name + ' .pth ' )
2022-08-22 17:15:46 +03:00
if not os . path . isfile ( model_path ) :
raise Exception ( " GFPGAN model not found at path " + model_path )
2022-08-25 21:52:05 +03:00
sys . path . append ( os . path . abspath ( cmd_opts . gfpgan_dir ) )
2022-08-22 17:15:46 +03:00
from gfpgan import GFPGANer
return GFPGANer ( model_path = model_path , upscale = 1 , arch = ' clean ' , channel_multiplier = 2 , bg_upsampler = None )
2022-08-26 18:04:00 +03:00
def image_grid ( imgs , batch_size , force_n_rows = None ) :
2022-08-24 16:42:22 +03:00
if force_n_rows is not None :
rows = force_n_rows
2022-08-25 21:52:05 +03:00
elif opts . n_rows > 0 :
rows = opts . n_rows
elif opts . n_rows == 0 :
2022-08-22 20:08:32 +03:00
rows = batch_size
else :
2022-08-23 00:34:49 +03:00
rows = math . sqrt ( len ( imgs ) )
2022-08-26 18:04:00 +03:00
rows = round ( rows )
2022-08-22 20:08:32 +03:00
cols = math . ceil ( len ( imgs ) / rows )
2022-08-22 17:15:46 +03:00
w , h = imgs [ 0 ] . size
2022-08-22 20:08:32 +03:00
grid = Image . new ( ' RGB ' , size = ( cols * w , rows * h ) , color = ' black ' )
2022-08-22 17:15:46 +03:00
for i , img in enumerate ( imgs ) :
grid . paste ( img , box = ( i % cols * w , i / / cols * h ) )
return grid
2022-08-22 20:08:32 +03:00
2022-08-27 16:13:33 +03:00
Grid = namedtuple ( " Grid " , [ " tiles " , " tile_w " , " tile_h " , " image_w " , " image_h " , " overlap " ] )
def split_grid ( image , tile_w = 512 , tile_h = 512 , overlap = 64 ) :
w = image . width
h = image . height
now = tile_w - overlap # non-overlap width
noh = tile_h - overlap
cols = math . ceil ( ( w - overlap ) / now )
rows = math . ceil ( ( h - overlap ) / noh )
grid = Grid ( [ ] , tile_w , tile_h , w , h , overlap )
for row in range ( rows ) :
row_images = [ ]
y = row * noh
if y + tile_h > = h :
y = h - tile_h
for col in range ( cols ) :
x = col * now
if x + tile_w > = w :
x = w - tile_w
tile = image . crop ( ( x , y , x + tile_w , y + tile_h ) )
row_images . append ( [ x , tile_w , tile ] )
grid . tiles . append ( [ y , tile_h , row_images ] )
return grid
def combine_grid ( grid ) :
def make_mask_image ( r ) :
r = r * 255 / grid . overlap
r = r . astype ( np . uint8 )
return Image . fromarray ( r , ' L ' )
mask_w = make_mask_image ( np . arange ( grid . overlap , dtype = np . float ) . reshape ( ( 1 , grid . overlap ) ) . repeat ( grid . tile_h , axis = 0 ) )
mask_h = make_mask_image ( np . arange ( grid . overlap , dtype = np . float ) . reshape ( ( grid . overlap , 1 ) ) . repeat ( grid . image_w , axis = 1 ) )
combined_image = Image . new ( " RGB " , ( grid . image_w , grid . image_h ) )
for y , h , row in grid . tiles :
combined_row = Image . new ( " RGB " , ( grid . image_w , h ) )
for x , w , tile in row :
if x == 0 :
combined_row . paste ( tile , ( 0 , 0 ) )
continue
combined_row . paste ( tile . crop ( ( 0 , 0 , grid . overlap , h ) ) , ( x , 0 ) , mask = mask_w )
combined_row . paste ( tile . crop ( ( grid . overlap , 0 , w , h ) ) , ( x + grid . overlap , 0 ) )
if y == 0 :
combined_image . paste ( combined_row , ( 0 , 0 ) )
continue
combined_image . paste ( combined_row . crop ( ( 0 , 0 , combined_row . width , grid . overlap ) ) , ( 0 , y ) , mask = mask_h )
combined_image . paste ( combined_row . crop ( ( 0 , grid . overlap , combined_row . width , h ) ) , ( 0 , y + grid . overlap ) )
return combined_image
2022-08-23 18:04:13 +03:00
def draw_prompt_matrix ( im , width , height , all_prompts ) :
def wrap ( text , d , font , line_length ) :
lines = [ ' ' ]
for word in text . split ( ) :
line = f ' { lines [ - 1 ] } { word } ' . strip ( )
if d . textlength ( line , font = font ) < = line_length :
lines [ - 1 ] = line
else :
lines . append ( word )
return ' \n ' . join ( lines )
def draw_texts ( pos , x , y , texts , sizes ) :
for i , ( text , size ) in enumerate ( zip ( texts , sizes ) ) :
active = pos & ( 1 << i ) != 0
if not active :
text = ' \u0336 ' . join ( text ) + ' \u0336 '
d . multiline_text ( ( x , y + size [ 1 ] / 2 ) , text , font = fnt , fill = color_active if active else color_inactive , anchor = " mm " , align = " center " )
y + = size [ 1 ] + line_spacing
fontsize = ( width + height ) / / 25
line_spacing = fontsize / / 2
fnt = ImageFont . truetype ( " arial.ttf " , fontsize )
color_active = ( 0 , 0 , 0 )
color_inactive = ( 153 , 153 , 153 )
pad_top = height / / 4
2022-08-23 22:42:43 +03:00
pad_left = width * 3 / / 4 if len ( all_prompts ) > 2 else 0
2022-08-23 18:04:13 +03:00
cols = im . width / / width
rows = im . height / / height
prompts = all_prompts [ 1 : ]
result = Image . new ( " RGB " , ( im . width + pad_left , im . height + pad_top ) , " white " )
result . paste ( im , ( pad_left , pad_top ) )
d = ImageDraw . Draw ( result )
boundary = math . ceil ( len ( prompts ) / 2 )
prompts_horiz = [ wrap ( x , d , fnt , width ) for x in prompts [ : boundary ] ]
prompts_vert = [ wrap ( x , d , fnt , pad_left ) for x in prompts [ boundary : ] ]
sizes_hor = [ ( x [ 2 ] - x [ 0 ] , x [ 3 ] - x [ 1 ] ) for x in [ d . multiline_textbbox ( ( 0 , 0 ) , x , font = fnt ) for x in prompts_horiz ] ]
sizes_ver = [ ( x [ 2 ] - x [ 0 ] , x [ 3 ] - x [ 1 ] ) for x in [ d . multiline_textbbox ( ( 0 , 0 ) , x , font = fnt ) for x in prompts_vert ] ]
hor_text_height = sum ( [ x [ 1 ] + line_spacing for x in sizes_hor ] ) - line_spacing
ver_text_height = sum ( [ x [ 1 ] + line_spacing for x in sizes_ver ] ) - line_spacing
for col in range ( cols ) :
x = pad_left + width * col + width / 2
y = pad_top / 2 - hor_text_height / 2
draw_texts ( col , x , y , prompts_horiz , sizes_hor )
for row in range ( rows ) :
x = pad_left / 2
y = pad_top + height * row + height / 2 - ver_text_height / 2
draw_texts ( row , x , y , prompts_vert , sizes_ver )
return result
2022-08-24 10:52:41 +03:00
def resize_image ( resize_mode , im , width , height ) :
if resize_mode == 0 :
res = im . resize ( ( width , height ) , resample = LANCZOS )
elif resize_mode == 1 :
ratio = width / height
src_ratio = im . width / im . height
src_w = width if ratio > src_ratio else im . width * height / / im . height
src_h = height if ratio < = src_ratio else im . height * width / / im . width
resized = im . resize ( ( src_w , src_h ) , resample = LANCZOS )
res = Image . new ( " RGB " , ( width , height ) )
res . paste ( resized , box = ( width / / 2 - src_w / / 2 , height / / 2 - src_h / / 2 ) )
else :
ratio = width / height
src_ratio = im . width / im . height
src_w = width if ratio < src_ratio else im . width * height / / im . height
src_h = height if ratio > = src_ratio else im . height * width / / im . width
resized = im . resize ( ( src_w , src_h ) , resample = LANCZOS )
res = Image . new ( " RGB " , ( width , height ) )
res . paste ( resized , box = ( width / / 2 - src_w / / 2 , height / / 2 - src_h / / 2 ) )
if ratio < src_ratio :
fill_height = height / / 2 - src_h / / 2
res . paste ( resized . resize ( ( width , fill_height ) , box = ( 0 , 0 , width , 0 ) ) , box = ( 0 , 0 ) )
res . paste ( resized . resize ( ( width , fill_height ) , box = ( 0 , resized . height , width , resized . height ) ) , box = ( 0 , fill_height + src_h ) )
2022-08-24 13:42:21 +03:00
elif ratio > src_ratio :
2022-08-24 10:52:41 +03:00
fill_width = width / / 2 - src_w / / 2
res . paste ( resized . resize ( ( fill_width , height ) , box = ( 0 , 0 , 0 , height ) ) , box = ( 0 , 0 ) )
res . paste ( resized . resize ( ( fill_width , height ) , box = ( resized . width , 0 , resized . width , height ) ) , box = ( fill_width + src_w , 0 ) )
return res
2022-08-24 00:02:43 +03:00
def check_prompt_length ( prompt , comments ) :
""" this function tests if prompt is too long, and if so, adds a message to comments """
tokenizer = model . cond_stage_model . tokenizer
max_length = model . cond_stage_model . max_length
info = model . cond_stage_model . tokenizer ( [ prompt ] , truncation = True , max_length = max_length , return_overflowing_tokens = True , padding = " max_length " , return_tensors = " pt " )
ovf = info [ ' overflowing_tokens ' ] [ 0 ]
overflowing_count = ovf . shape [ 0 ]
if overflowing_count == 0 :
return
vocab = { v : k for k , v in tokenizer . get_vocab ( ) . items ( ) }
overflowing_words = [ vocab . get ( int ( x ) , " " ) for x in ovf ]
overflowing_text = tokenizer . convert_tokens_to_string ( ' ' . join ( overflowing_words ) )
comments . append ( f " Warning: too many input tokens; some ( { len ( overflowing_words ) } ) have been truncated: \n { overflowing_text } \n " )
2022-08-24 18:47:23 +03:00
def wrap_gradio_call ( func ) :
def f ( * p1 , * * p2 ) :
t = time . perf_counter ( )
res = list ( func ( * p1 , * * p2 ) )
elapsed = time . perf_counter ( ) - t
# last item is always HTML
res [ - 1 ] = res [ - 1 ] + f " <p class= ' performance ' >Time taken: { elapsed : .2f } s</p> "
return tuple ( res )
return f
2022-08-25 21:52:05 +03:00
GFPGAN = None
if os . path . exists ( cmd_opts . gfpgan_dir ) :
try :
GFPGAN = load_GFPGAN ( )
print ( " Loaded GFPGAN " )
except Exception :
print ( " Error loading GFPGAN: " , file = sys . stderr )
print ( traceback . format_exc ( ) , file = sys . stderr )
2022-08-27 11:17:55 +03:00
class StableDiffuionModelHijack :
2022-08-25 21:52:05 +03:00
ids_lookup = { }
word_embeddings = { }
word_embeddings_checksums = { }
2022-08-27 11:17:55 +03:00
fixes = None
2022-08-25 21:52:05 +03:00
used_custom_terms = [ ]
dir_mtime = None
2022-08-27 11:17:55 +03:00
def load_textual_inversion_embeddings ( self , dir , model ) :
2022-08-25 21:52:05 +03:00
mt = os . path . getmtime ( dir )
if self . dir_mtime is not None and mt < = self . dir_mtime :
return
self . dir_mtime = mt
self . ids_lookup . clear ( )
self . word_embeddings . clear ( )
tokenizer = model . cond_stage_model . tokenizer
def const_hash ( a ) :
r = 0
for v in a :
r = ( r * 281 ^ int ( v ) * 997 ) & 0xFFFFFFFF
return r
def process_file ( path , filename ) :
name = os . path . splitext ( filename ) [ 0 ]
data = torch . load ( path )
param_dict = data [ ' string_to_param ' ]
assert len ( param_dict ) == 1 , ' embedding file has multiple terms in it '
emb = next ( iter ( param_dict . items ( ) ) ) [ 1 ] . reshape ( 768 )
self . word_embeddings [ name ] = emb
self . word_embeddings_checksums [ name ] = f ' { const_hash ( emb ) & 0xffff : 04x } '
ids = tokenizer ( [ name ] , add_special_tokens = False ) [ ' input_ids ' ] [ 0 ]
2022-08-27 11:17:55 +03:00
2022-08-25 21:52:05 +03:00
first_id = ids [ 0 ]
if first_id not in self . ids_lookup :
self . ids_lookup [ first_id ] = [ ]
self . ids_lookup [ first_id ] . append ( ( ids , name ) )
for fn in os . listdir ( dir ) :
try :
process_file ( os . path . join ( dir , fn ) , fn )
except :
print ( f " Error loading emedding { fn } : " , file = sys . stderr )
print ( traceback . format_exc ( ) , file = sys . stderr )
continue
print ( f " Loaded a total of { len ( self . word_embeddings ) } text inversion embeddings. " )
def hijack ( self , m ) :
model_embeddings = m . cond_stage_model . transformer . text_model . embeddings
model_embeddings . token_embedding = EmbeddingsWithFixes ( model_embeddings . token_embedding , self )
m . cond_stage_model = FrozenCLIPEmbedderWithCustomWords ( m . cond_stage_model , self )
2022-08-27 16:13:33 +03:00
2022-08-25 21:52:05 +03:00
class FrozenCLIPEmbedderWithCustomWords ( torch . nn . Module ) :
def __init__ ( self , wrapped , embeddings ) :
super ( ) . __init__ ( )
self . wrapped = wrapped
self . embeddings = embeddings
self . tokenizer = wrapped . tokenizer
self . max_length = wrapped . max_length
2022-08-27 11:17:55 +03:00
self . token_mults = { }
tokens_with_parens = [ ( k , v ) for k , v in self . tokenizer . get_vocab ( ) . items ( ) if ' ( ' in k or ' ) ' in k or ' [ ' in k or ' ] ' in k ]
for text , ident in tokens_with_parens :
mult = 1.0
for c in text :
if c == ' [ ' :
mult / = 1.1
if c == ' ] ' :
mult * = 1.1
if c == ' ( ' :
mult * = 1.1
if c == ' ) ' :
mult / = 1.1
if mult != 1.0 :
self . token_mults [ ident ] = mult
2022-08-25 21:52:05 +03:00
def forward ( self , text ) :
self . embeddings . fixes = [ ]
self . embeddings . used_custom_terms = [ ]
remade_batch_tokens = [ ]
id_start = self . wrapped . tokenizer . bos_token_id
id_end = self . wrapped . tokenizer . eos_token_id
maxlen = self . wrapped . max_length - 2
cache = { }
batch_tokens = self . wrapped . tokenizer ( text , truncation = False , add_special_tokens = False ) [ " input_ids " ]
2022-08-27 11:17:55 +03:00
batch_multipliers = [ ]
2022-08-25 21:52:05 +03:00
for tokens in batch_tokens :
tuple_tokens = tuple ( tokens )
if tuple_tokens in cache :
2022-08-27 11:17:55 +03:00
remade_tokens , fixes , multipliers = cache [ tuple_tokens ]
2022-08-25 21:52:05 +03:00
else :
fixes = [ ]
remade_tokens = [ ]
2022-08-27 11:17:55 +03:00
multipliers = [ ]
mult = 1.0
2022-08-25 21:52:05 +03:00
i = 0
while i < len ( tokens ) :
token = tokens [ i ]
possible_matches = self . embeddings . ids_lookup . get ( token , None )
2022-08-27 11:17:55 +03:00
mult_change = self . token_mults . get ( token )
if mult_change is not None :
mult * = mult_change
elif possible_matches is None :
2022-08-25 21:52:05 +03:00
remade_tokens . append ( token )
2022-08-27 11:17:55 +03:00
multipliers . append ( mult )
2022-08-25 21:52:05 +03:00
else :
found = False
for ids , word in possible_matches :
if tokens [ i : i + len ( ids ) ] == ids :
fixes . append ( ( len ( remade_tokens ) , word ) )
remade_tokens . append ( 777 )
2022-08-27 11:17:55 +03:00
multipliers . append ( mult )
2022-08-25 21:52:05 +03:00
i + = len ( ids ) - 1
found = True
self . embeddings . used_custom_terms . append ( ( word , self . embeddings . word_embeddings_checksums [ word ] ) )
break
if not found :
remade_tokens . append ( token )
2022-08-27 11:17:55 +03:00
multipliers . append ( mult )
2022-08-25 21:52:05 +03:00
i + = 1
remade_tokens = remade_tokens + [ id_end ] * ( maxlen - 2 - len ( remade_tokens ) )
remade_tokens = [ id_start ] + remade_tokens [ 0 : maxlen - 2 ] + [ id_end ]
2022-08-27 11:17:55 +03:00
cache [ tuple_tokens ] = ( remade_tokens , fixes , multipliers )
multipliers = multipliers + [ 1.0 ] * ( maxlen - 2 - len ( multipliers ) )
multipliers = [ 1.0 ] + multipliers [ 0 : maxlen - 2 ] + [ 1.0 ]
2022-08-25 21:52:05 +03:00
remade_batch_tokens . append ( remade_tokens )
self . embeddings . fixes . append ( fixes )
2022-08-27 11:17:55 +03:00
batch_multipliers . append ( multipliers )
2022-08-25 21:52:05 +03:00
tokens = torch . asarray ( remade_batch_tokens ) . to ( self . wrapped . device )
outputs = self . wrapped . transformer ( input_ids = tokens )
z = outputs . last_hidden_state
2022-08-27 11:17:55 +03:00
# restoring original mean is likely not correct, but it seems to work well to prevent artifacts that happen otherwise
batch_multipliers = torch . asarray ( np . array ( batch_multipliers ) ) . to ( device )
original_mean = z . mean ( )
z * = batch_multipliers . reshape ( batch_multipliers . shape + ( 1 , ) ) . expand ( z . shape )
new_mean = z . mean ( )
z * = original_mean / new_mean
2022-08-25 21:52:05 +03:00
return z
class EmbeddingsWithFixes ( nn . Module ) :
def __init__ ( self , wrapped , embeddings ) :
super ( ) . __init__ ( )
self . wrapped = wrapped
self . embeddings = embeddings
def forward ( self , input_ids ) :
batch_fixes = self . embeddings . fixes
2022-08-27 11:17:55 +03:00
self . embeddings . fixes = None
2022-08-25 21:52:05 +03:00
inputs_embeds = self . wrapped ( input_ids )
2022-08-27 11:17:55 +03:00
if batch_fixes is not None :
for fixes , tensor in zip ( batch_fixes , inputs_embeds ) :
for offset , word in fixes :
tensor [ offset ] = self . embeddings . word_embeddings [ word ]
2022-08-25 21:52:05 +03:00
2022-08-27 11:17:55 +03:00
return inputs_embeds
2022-08-25 21:52:05 +03:00
2022-08-26 09:02:21 +03:00
def process_images ( outpath , func_init , func_sample , prompt , seed , sampler_index , batch_size , n_iter , steps , cfg_scale , width , height , prompt_matrix , use_GFPGAN , do_not_save_grid = False , extra_generation_params = None ) :
2022-08-23 22:42:43 +03:00
""" this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch """
2022-08-22 17:15:46 +03:00
2022-08-23 22:42:43 +03:00
assert prompt is not None
2022-08-24 16:12:33 +03:00
torch_gc ( )
2022-08-22 17:15:46 +03:00
if seed == - 1 :
seed = random . randrange ( 4294967294 )
seed = int ( seed )
os . makedirs ( outpath , exist_ok = True )
sample_path = os . path . join ( outpath , " samples " )
os . makedirs ( sample_path , exist_ok = True )
base_count = len ( os . listdir ( sample_path ) )
grid_count = len ( os . listdir ( outpath ) ) - 1
2022-08-24 00:02:43 +03:00
comments = [ ]
2022-08-23 18:04:13 +03:00
prompt_matrix_parts = [ ]
2022-08-23 00:34:49 +03:00
if prompt_matrix :
2022-08-23 22:42:43 +03:00
all_prompts = [ ]
2022-08-23 18:04:13 +03:00
prompt_matrix_parts = prompt . split ( " | " )
2022-08-23 22:42:43 +03:00
combination_count = 2 * * ( len ( prompt_matrix_parts ) - 1 )
2022-08-23 00:34:49 +03:00
for combination_num in range ( combination_count ) :
2022-08-26 08:47:44 +03:00
selected_prompts = [ text . strip ( ) . strip ( ' , ' ) for n , text in enumerate ( prompt_matrix_parts [ 1 : ] ) if combination_num & ( 1 << n ) ]
2022-08-23 00:34:49 +03:00
2022-08-26 08:47:44 +03:00
if opts . prompt_matrix_add_to_start :
selected_prompts = selected_prompts + [ prompt_matrix_parts [ 0 ] ]
else :
selected_prompts = [ prompt_matrix_parts [ 0 ] ] + selected_prompts
2022-08-23 00:34:49 +03:00
2022-08-26 08:47:44 +03:00
all_prompts . append ( " , " . join ( selected_prompts ) )
2022-08-23 00:34:49 +03:00
2022-08-23 22:42:43 +03:00
n_iter = math . ceil ( len ( all_prompts ) / batch_size )
all_seeds = len ( all_prompts ) * [ seed ]
print ( f " Prompt matrix will create { len ( all_prompts ) } images using a total of { n_iter } batches. " )
else :
2022-08-24 00:02:43 +03:00
2022-08-25 21:52:05 +03:00
if opts . verify_input :
2022-08-24 00:02:43 +03:00
try :
check_prompt_length ( prompt , comments )
except :
import traceback
print ( " Error verifying input: " , file = sys . stderr )
print ( traceback . format_exc ( ) , file = sys . stderr )
2022-08-23 22:42:43 +03:00
all_prompts = batch_size * n_iter * [ prompt ]
all_seeds = [ seed + x for x in range ( len ( all_prompts ) ) ]
2022-08-23 00:34:49 +03:00
2022-08-26 09:02:21 +03:00
generation_params = {
" Steps " : steps ,
" Sampler " : samplers [ sampler_index ] . name ,
" CFG scale " : cfg_scale ,
" Seed " : seed ,
" GFPGAN " : ( " GFPGAN " if use_GFPGAN and GFPGAN is not None else None )
}
if extra_generation_params is not None :
generation_params . update ( extra_generation_params )
generation_params_text = " , " . join ( [ k if k == v else f ' { k } : { v } ' for k , v in generation_params . items ( ) if v is not None ] )
2022-08-25 21:52:05 +03:00
def infotext ( ) :
2022-08-26 09:02:21 +03:00
return f " { prompt } \n { generation_params_text } " . strip ( ) + " " . join ( [ " \n \n " + x for x in comments ] )
2022-08-25 21:52:05 +03:00
if os . path . exists ( cmd_opts . embeddings_dir ) :
2022-08-27 11:17:55 +03:00
model_hijack . load_textual_inversion_embeddings ( cmd_opts . embeddings_dir , model )
2022-08-24 17:57:49 +03:00
2022-08-22 17:15:46 +03:00
output_images = [ ]
2022-08-25 21:52:05 +03:00
with torch . no_grad ( ) , autocast ( " cuda " ) , model . ema_scope ( ) :
2022-08-23 22:42:43 +03:00
init_data = func_init ( )
2022-08-22 17:15:46 +03:00
for n in range ( n_iter ) :
2022-08-23 22:42:43 +03:00
prompts = all_prompts [ n * batch_size : ( n + 1 ) * batch_size ]
seeds = all_seeds [ n * batch_size : ( n + 1 ) * batch_size ]
2022-08-23 00:34:49 +03:00
2022-08-25 21:52:05 +03:00
uc = model . get_learned_conditioning ( len ( prompts ) * [ " " ] )
2022-08-23 00:34:49 +03:00
c = model . get_learned_conditioning ( prompts )
2022-08-27 11:17:55 +03:00
if len ( model_hijack . used_custom_terms ) > 0 :
comments . append ( " Used custom terms: " + " , " . join ( [ f ' { word } [ { checksum } ] ' for word , checksum in model_hijack . used_custom_terms ] ) )
2022-08-25 21:52:05 +03:00
2022-08-23 00:34:49 +03:00
# we manually generate all input noises because each one should have a specific seed
2022-08-23 22:42:43 +03:00
x = create_random_tensors ( [ opt_C , height / / opt_f , width / / opt_f ] , seeds = seeds )
2022-08-23 14:07:37 +03:00
2022-08-23 22:42:43 +03:00
samples_ddim = func_sample ( init_data = init_data , x = x , conditioning = c , unconditional_conditioning = uc )
2022-08-22 17:15:46 +03:00
2022-08-23 00:34:49 +03:00
x_samples_ddim = model . decode_first_stage ( samples_ddim )
x_samples_ddim = torch . clamp ( ( x_samples_ddim + 1.0 ) / 2.0 , min = 0.0 , max = 1.0 )
2022-08-22 17:15:46 +03:00
2022-08-25 21:52:05 +03:00
if prompt_matrix or opts . samples_save or opts . grid_save :
2022-08-23 00:34:49 +03:00
for i , x_sample in enumerate ( x_samples_ddim ) :
x_sample = 255. * rearrange ( x_sample . cpu ( ) . numpy ( ) , ' c h w -> h w c ' )
x_sample = x_sample . astype ( np . uint8 )
if use_GFPGAN and GFPGAN is not None :
2022-08-26 03:59:09 +03:00
torch_gc ( )
2022-08-23 00:34:49 +03:00
cropped_faces , restored_faces , restored_img = GFPGAN . enhance ( x_sample , has_aligned = False , only_center_face = False , paste_back = True )
x_sample = restored_img
image = Image . fromarray ( x_sample )
2022-08-25 21:52:05 +03:00
save_image ( image , sample_path , f " { base_count : 05 } " , seeds [ i ] , prompts [ i ] , opts . samples_format , info = infotext ( ) )
2022-08-23 00:34:49 +03:00
output_images . append ( image )
base_count + = 1
2022-08-22 17:15:46 +03:00
2022-08-25 21:52:05 +03:00
if ( prompt_matrix or opts . grid_save ) and not do_not_save_grid :
2022-08-23 18:04:13 +03:00
if prompt_matrix :
2022-08-26 18:04:00 +03:00
grid = image_grid ( output_images , batch_size , force_n_rows = 1 << ( ( len ( prompt_matrix_parts ) - 1 ) / / 2 ) )
2022-08-23 22:42:43 +03:00
try :
grid = draw_prompt_matrix ( grid , width , height , prompt_matrix_parts )
2022-08-26 18:04:00 +03:00
except :
2022-08-23 22:42:43 +03:00
import traceback
print ( " Error creating prompt_matrix text: " , file = sys . stderr )
print ( traceback . format_exc ( ) , file = sys . stderr )
2022-08-23 18:04:13 +03:00
output_images . insert ( 0 , grid )
2022-08-26 18:04:00 +03:00
else :
grid = image_grid ( output_images , batch_size )
2022-08-23 18:04:13 +03:00
2022-08-25 21:52:05 +03:00
save_image ( grid , outpath , f " grid- { grid_count : 04 } " , seed , prompt , opts . grid_format , info = infotext ( ) , short_filename = not opts . grid_extended_filename )
2022-08-22 17:15:46 +03:00
grid_count + = 1
2022-08-25 21:52:05 +03:00
torch_gc ( )
return output_images , seed , infotext ( )
2022-08-24 21:20:36 +03:00
2022-08-23 22:42:43 +03:00
2022-08-25 23:31:44 +03:00
def txt2img ( prompt : str , ddim_steps : int , sampler_index : int , use_GFPGAN : bool , prompt_matrix : bool , ddim_eta : float , n_iter : int , batch_size : int , cfg_scale : float , seed : int , height : int , width : int ) :
2022-08-25 21:52:05 +03:00
outpath = opts . outdir or " outputs/txt2img-samples "
2022-08-24 21:20:36 +03:00
2022-08-25 23:31:44 +03:00
sampler = samplers [ sampler_index ] . constructor ( model )
2022-08-23 22:42:43 +03:00
def init ( ) :
pass
def sample ( init_data , x , conditioning , unconditional_conditioning ) :
samples_ddim , _ = sampler . sample ( S = ddim_steps , conditioning = conditioning , batch_size = int ( x . shape [ 0 ] ) , shape = x [ 0 ] . shape , verbose = False , unconditional_guidance_scale = cfg_scale , unconditional_conditioning = unconditional_conditioning , eta = ddim_eta , x_T = x )
return samples_ddim
output_images , seed , info = process_images (
outpath = outpath ,
func_init = init ,
func_sample = sample ,
prompt = prompt ,
seed = seed ,
2022-08-25 23:31:44 +03:00
sampler_index = sampler_index ,
2022-08-23 22:42:43 +03:00
batch_size = batch_size ,
n_iter = n_iter ,
steps = ddim_steps ,
cfg_scale = cfg_scale ,
width = width ,
height = height ,
prompt_matrix = prompt_matrix ,
use_GFPGAN = use_GFPGAN
)
del sampler
2022-08-24 18:47:23 +03:00
return output_images , seed , plaintext_to_html ( info )
2022-08-23 22:42:43 +03:00
2022-08-23 00:34:49 +03:00
class Flagging ( gr . FlaggingCallback ) :
def setup ( self , components , flagging_dir : str ) :
pass
2022-08-23 11:58:50 +03:00
def flag ( self , flag_data , flag_option = None , flag_index = None , username = None ) :
import csv
2022-08-23 00:34:49 +03:00
os . makedirs ( " log/images " , exist_ok = True )
2022-08-23 22:42:43 +03:00
# those must match the "txt2img" function
2022-08-23 00:34:49 +03:00
prompt , ddim_steps , sampler_name , use_GFPGAN , prompt_matrix , ddim_eta , n_iter , n_samples , cfg_scale , request_seed , height , width , images , seed , comment = flag_data
filenames = [ ]
with open ( " log/log.csv " , " a " , encoding = " utf8 " , newline = ' ' ) as file :
import time
import base64
at_start = file . tell ( ) == 0
writer = csv . writer ( file )
if at_start :
writer . writerow ( [ " prompt " , " seed " , " width " , " height " , " cfgs " , " steps " , " filename " ] )
filename_base = str ( int ( time . time ( ) * 1000 ) )
for i , filedata in enumerate ( images ) :
filename = " log/images/ " + filename_base + ( " " if len ( images ) == 1 else " - " + str ( i + 1 ) ) + " .png "
if filedata . startswith ( " data:image/png;base64, " ) :
filedata = filedata [ len ( " data:image/png;base64, " ) : ]
with open ( filename , " wb " ) as imgfile :
imgfile . write ( base64 . decodebytes ( filedata . encode ( ' utf-8 ' ) ) )
filenames . append ( filename )
writer . writerow ( [ prompt , seed , width , height , cfg_scale , ddim_steps , filenames [ 0 ] ] )
print ( " Logged: " , filenames [ 0 ] )
2022-08-22 17:15:46 +03:00
2022-08-23 22:42:43 +03:00
txt2img_interface = gr . Interface (
2022-08-24 18:47:23 +03:00
wrap_gradio_call ( txt2img ) ,
2022-08-22 17:15:46 +03:00
inputs = [
gr . Textbox ( label = " Prompt " , placeholder = " A corgi wearing a top hat as an oil painting. " , lines = 1 ) ,
gr . Slider ( minimum = 1 , maximum = 150 , step = 1 , label = " Sampling Steps " , value = 50 ) ,
2022-08-25 23:31:44 +03:00
gr . Radio ( label = ' Sampling method ' , choices = [ x . name for x in samplers ] , value = samplers [ 0 ] . name , type = " index " ) ,
2022-08-22 17:15:46 +03:00
gr . Checkbox ( label = ' Fix faces using GFPGAN ' , value = False , visible = GFPGAN is not None ) ,
2022-08-23 00:34:49 +03:00
gr . Checkbox ( label = ' Create prompt matrix (separate multiple prompts using |, and get all combinations of them) ' , value = False ) ,
2022-08-22 17:15:46 +03:00
gr . Slider ( minimum = 0.0 , maximum = 1.0 , step = 0.01 , label = " DDIM ETA " , value = 0.0 , visible = False ) ,
2022-08-25 21:52:05 +03:00
gr . Slider ( minimum = 1 , maximum = cmd_opts . max_batch_count , step = 1 , label = ' Batch count (how many batches of images to generate) ' , value = 1 ) ,
2022-08-23 18:04:13 +03:00
gr . Slider ( minimum = 1 , maximum = 8 , step = 1 , label = ' Batch size (how many images are in a batch; memory-hungry) ' , value = 1 ) ,
2022-08-23 11:58:50 +03:00
gr . Slider ( minimum = 1.0 , maximum = 15.0 , step = 0.5 , label = ' Classifier Free Guidance Scale (how strongly the image should follow the prompt) ' , value = 7.0 ) ,
2022-08-22 17:15:46 +03:00
gr . Number ( label = ' Seed ' , value = - 1 ) ,
gr . Slider ( minimum = 64 , maximum = 2048 , step = 64 , label = " Height " , value = 512 ) ,
gr . Slider ( minimum = 64 , maximum = 2048 , step = 64 , label = " Width " , value = 512 ) ,
] ,
outputs = [
gr . Gallery ( label = " Images " ) ,
gr . Number ( label = ' Seed ' ) ,
2022-08-24 18:47:23 +03:00
gr . HTML ( ) ,
2022-08-22 17:15:46 +03:00
] ,
2022-08-25 21:52:05 +03:00
title = " Stable Diffusion Text-to-Image " ,
2022-08-23 00:34:49 +03:00
flagging_callback = Flagging ( )
2022-08-22 17:15:46 +03:00
)
2022-08-27 16:13:33 +03:00
def img2img ( prompt : str , init_img , ddim_steps : int , sampler_index : int , use_GFPGAN : bool , prompt_matrix , loopback : bool , sd_upscale : bool , n_iter : int , batch_size : int , cfg_scale : float , denoising_strength : float , seed : int , height : int , width : int , resize_mode : int ) :
2022-08-25 21:52:05 +03:00
outpath = opts . outdir or " outputs/img2img-samples "
2022-08-24 21:20:36 +03:00
2022-08-26 14:10:40 +03:00
sampler = samplers_for_img2img [ sampler_index ] . constructor ( model )
2022-08-22 17:15:46 +03:00
2022-08-23 22:42:43 +03:00
assert 0. < = denoising_strength < = 1. , ' can only work with strength in [0.0, 1.0] '
2022-08-22 17:15:46 +03:00
2022-08-23 22:42:43 +03:00
def init ( ) :
image = init_img . convert ( " RGB " )
2022-08-24 10:52:41 +03:00
image = resize_image ( resize_mode , image , width , height )
2022-08-23 22:42:43 +03:00
image = np . array ( image ) . astype ( np . float32 ) / 255.0
image = image [ None ] . transpose ( 0 , 3 , 1 , 2 )
image = torch . from_numpy ( image )
2022-08-22 17:15:46 +03:00
2022-08-22 20:08:32 +03:00
init_image = 2. * image - 1.
init_image = init_image . to ( device )
init_image = repeat ( init_image , ' 1 ... -> b ... ' , b = batch_size )
init_latent = model . get_first_stage_encoding ( model . encode_first_stage ( init_image ) ) # move to latent space
2022-08-23 14:07:37 +03:00
2022-08-23 22:42:43 +03:00
return init_latent ,
def sample ( init_data , x , conditioning , unconditional_conditioning ) :
2022-08-24 16:42:22 +03:00
t_enc = int ( denoising_strength * ddim_steps )
2022-08-23 22:42:43 +03:00
x0 , = init_data
sigmas = sampler . model_wrap . get_sigmas ( ddim_steps )
noise = x * sigmas [ ddim_steps - t_enc - 1 ]
xi = x0 + noise
sigma_sched = sigmas [ ddim_steps - t_enc - 1 : ]
model_wrap_cfg = CFGDenoiser ( sampler . model_wrap )
2022-08-26 14:10:40 +03:00
samples_ddim = sampler . func ( model_wrap_cfg , xi , sigma_sched , extra_args = { ' cond ' : conditioning , ' uncond ' : unconditional_conditioning , ' cond_scale ' : cfg_scale } , disable = False )
2022-08-23 22:42:43 +03:00
return samples_ddim
2022-08-24 16:42:22 +03:00
if loopback :
output_images , info = None , None
history = [ ]
initial_seed = None
for i in range ( n_iter ) :
output_images , seed , info = process_images (
outpath = outpath ,
func_init = init ,
func_sample = sample ,
prompt = prompt ,
seed = seed ,
2022-08-27 16:13:33 +03:00
sampler_index = sampler_index ,
2022-08-24 16:42:22 +03:00
batch_size = 1 ,
n_iter = 1 ,
steps = ddim_steps ,
cfg_scale = cfg_scale ,
width = width ,
height = height ,
prompt_matrix = prompt_matrix ,
use_GFPGAN = use_GFPGAN ,
2022-08-26 09:02:21 +03:00
do_not_save_grid = True ,
2022-08-26 11:16:57 +03:00
extra_generation_params = { " Denoising Strength " : denoising_strength } ,
2022-08-24 16:42:22 +03:00
)
if initial_seed is None :
initial_seed = seed
init_img = output_images [ 0 ]
seed = seed + 1
denoising_strength = max ( denoising_strength * 0.95 , 0.1 )
history . append ( init_img )
grid_count = len ( os . listdir ( outpath ) ) - 1
grid = image_grid ( history , batch_size , force_n_rows = 1 )
2022-08-24 17:41:37 +03:00
2022-08-25 21:52:05 +03:00
save_image ( grid , outpath , f " grid- { grid_count : 04 } " , initial_seed , prompt , opts . grid_format , info = info , short_filename = not opts . grid_extended_filename )
2022-08-24 16:42:22 +03:00
output_images = history
seed = initial_seed
2022-08-27 16:13:33 +03:00
elif sd_upscale :
initial_seed = None
initial_info = None
img = upscale_with_realesrgan ( init_img , RealESRGAN_upscaling = 2 , RealESRGAN_model_index = 0 )
torch_gc ( )
grid = split_grid ( img , tile_w = width , tile_h = height , overlap = opts . sd_upscale_overlap )
print ( f " SD upscaling will process a total of { len ( grid . tiles [ 0 ] [ 2 ] ) } x { len ( grid . tiles ) } images. " )
for y , h , row in grid . tiles :
for tiledata in row :
init_img = tiledata [ 2 ]
output_images , seed , info = process_images (
outpath = outpath ,
func_init = init ,
func_sample = sample ,
prompt = prompt ,
seed = seed ,
sampler_index = sampler_index ,
batch_size = 1 , # since process_images can't work with multiple different images we have to do this for now
n_iter = 1 ,
steps = ddim_steps ,
cfg_scale = cfg_scale ,
width = width ,
height = height ,
prompt_matrix = prompt_matrix ,
use_GFPGAN = use_GFPGAN ,
do_not_save_grid = True ,
extra_generation_params = { " Denoising Strength " : denoising_strength } ,
)
if initial_seed is None :
initial_seed = seed
initial_info = info
seed + = 1
tiledata [ 2 ] = output_images [ 0 ]
combined_image = combine_grid ( grid )
grid_count = len ( os . listdir ( outpath ) ) - 1
save_image ( combined_image , outpath , f " grid- { grid_count : 04 } " , initial_seed , prompt , opts . grid_format , info = initial_info , short_filename = not opts . grid_extended_filename )
output_images = [ combined_image ]
seed = initial_seed
info = initial_info
2022-08-24 16:42:22 +03:00
else :
output_images , seed , info = process_images (
outpath = outpath ,
func_init = init ,
func_sample = sample ,
prompt = prompt ,
seed = seed ,
2022-08-27 16:13:33 +03:00
sampler_index = sampler_index ,
2022-08-24 16:42:22 +03:00
batch_size = batch_size ,
n_iter = n_iter ,
steps = ddim_steps ,
cfg_scale = cfg_scale ,
width = width ,
height = height ,
prompt_matrix = prompt_matrix ,
2022-08-26 09:02:21 +03:00
use_GFPGAN = use_GFPGAN ,
2022-08-26 11:16:57 +03:00
extra_generation_params = { " Denoising Strength " : denoising_strength } ,
2022-08-24 16:42:22 +03:00
)
2022-08-23 14:07:37 +03:00
2022-08-23 22:42:43 +03:00
del sampler
2022-08-22 17:15:46 +03:00
2022-08-24 18:47:23 +03:00
return output_images , seed , plaintext_to_html ( info )
2022-08-22 17:15:46 +03:00
2022-08-24 09:24:32 +03:00
sample_img2img = " assets/stable-samples/img2img/sketch-mountains-input.jpg "
sample_img2img = sample_img2img if os . path . exists ( sample_img2img ) else None
2022-08-22 17:15:46 +03:00
img2img_interface = gr . Interface (
2022-08-24 18:47:23 +03:00
wrap_gradio_call ( img2img ) ,
2022-08-22 17:15:46 +03:00
inputs = [
gr . Textbox ( placeholder = " A fantasy landscape, trending on artstation. " , lines = 1 ) ,
2022-08-24 09:24:32 +03:00
gr . Image ( value = sample_img2img , source = " upload " , interactive = True , type = " pil " ) ,
2022-08-22 17:15:46 +03:00
gr . Slider ( minimum = 1 , maximum = 150 , step = 1 , label = " Sampling Steps " , value = 50 ) ,
2022-08-26 14:10:40 +03:00
gr . Radio ( label = ' Sampling method ' , choices = [ x . name for x in samplers_for_img2img ] , value = samplers_for_img2img [ 0 ] . name , type = " index " ) ,
2022-08-22 20:08:32 +03:00
gr . Checkbox ( label = ' Fix faces using GFPGAN ' , value = False , visible = GFPGAN is not None ) ,
2022-08-23 22:42:43 +03:00
gr . Checkbox ( label = ' Create prompt matrix (separate multiple prompts using |, and get all combinations of them) ' , value = False ) ,
2022-08-24 16:42:22 +03:00
gr . Checkbox ( label = ' Loopback (use images from previous batch when creating next batch) ' , value = False ) ,
2022-08-27 16:13:33 +03:00
gr . Checkbox ( label = ' Stable Diffusion upscale ' , value = False ) ,
2022-08-25 21:52:05 +03:00
gr . Slider ( minimum = 1 , maximum = cmd_opts . max_batch_count , step = 1 , label = ' Batch count (how many batches of images to generate) ' , value = 1 ) ,
2022-08-23 18:04:13 +03:00
gr . Slider ( minimum = 1 , maximum = 8 , step = 1 , label = ' Batch size (how many images are in a batch; memory-hungry) ' , value = 1 ) ,
2022-08-23 11:58:50 +03:00
gr . Slider ( minimum = 1.0 , maximum = 15.0 , step = 0.5 , label = ' Classifier Free Guidance Scale (how strongly the image should follow the prompt) ' , value = 7.0 ) ,
2022-08-22 17:15:46 +03:00
gr . Slider ( minimum = 0.0 , maximum = 1.0 , step = 0.01 , label = ' Denoising Strength ' , value = 0.75 ) ,
gr . Number ( label = ' Seed ' , value = - 1 ) ,
2022-08-23 11:58:50 +03:00
gr . Slider ( minimum = 64 , maximum = 2048 , step = 64 , label = " Height " , value = 512 ) ,
gr . Slider ( minimum = 64 , maximum = 2048 , step = 64 , label = " Width " , value = 512 ) ,
2022-08-25 21:52:05 +03:00
gr . Radio ( label = " Resize mode " , choices = [ " Just resize " , " Crop and resize " , " Resize and fill " ] , type = " index " , value = " Just resize " )
2022-08-22 17:15:46 +03:00
] ,
outputs = [
gr . Gallery ( ) ,
2022-08-23 22:42:43 +03:00
gr . Number ( label = ' Seed ' ) ,
2022-08-24 18:47:23 +03:00
gr . HTML ( ) ,
2022-08-22 17:15:46 +03:00
] ,
2022-08-23 11:58:50 +03:00
allow_flagging = " never " ,
2022-08-22 17:15:46 +03:00
)
2022-08-25 23:31:44 +03:00
2022-08-27 16:13:33 +03:00
def upscale_with_realesrgan ( image , RealESRGAN_upscaling , RealESRGAN_model_index ) :
info = realesrgan_models [ RealESRGAN_model_index ]
model = info . model ( )
upsampler = RealESRGANer (
scale = info . netscale ,
model_path = info . location ,
model = model ,
half = True
)
upsampled = upsampler . enhance ( np . array ( image ) , outscale = RealESRGAN_upscaling ) [ 0 ]
image = Image . fromarray ( upsampled )
return image
2022-08-26 11:16:57 +03:00
def run_extras ( image , GFPGAN_strength , RealESRGAN_upscaling , RealESRGAN_model_index ) :
2022-08-27 16:13:33 +03:00
torch_gc ( )
2022-08-22 20:08:32 +03:00
image = image . convert ( " RGB " )
2022-08-26 11:16:57 +03:00
outpath = opts . outdir or " outputs/extras-samples "
if GFPGAN is not None and GFPGAN_strength > 0 :
cropped_faces , restored_faces , restored_img = GFPGAN . enhance ( np . array ( image , dtype = np . uint8 ) , has_aligned = False , only_center_face = False , paste_back = True )
res = Image . fromarray ( restored_img )
if GFPGAN_strength < 1.0 :
res = Image . blend ( image , res , GFPGAN_strength )
image = res
if have_realesrgan and RealESRGAN_upscaling != 1.0 :
2022-08-27 16:13:33 +03:00
image = upscale_with_realesrgan ( image , RealESRGAN_upscaling , RealESRGAN_model_index )
2022-08-22 20:08:32 +03:00
2022-08-26 11:16:57 +03:00
os . makedirs ( outpath , exist_ok = True )
base_count = len ( os . listdir ( outpath ) )
save_image ( image , outpath , f " { base_count : 05 } " , None , ' ' , opts . samples_format , short_filename = True )
2022-08-22 20:08:32 +03:00
2022-08-26 11:16:57 +03:00
return image , 0 , ' '
2022-08-22 20:08:32 +03:00
2022-08-26 11:16:57 +03:00
extras_interface = gr . Interface (
wrap_gradio_call ( run_extras ) ,
2022-08-25 21:52:05 +03:00
inputs = [
gr . Image ( label = " Source " , source = " upload " , interactive = True , type = " pil " ) ,
2022-08-26 11:16:57 +03:00
gr . Slider ( minimum = 0.0 , maximum = 1.0 , step = 0.001 , label = " GFPGAN strength " , value = 1 , interactive = GFPGAN is not None ) ,
gr . Slider ( minimum = 1.0 , maximum = 4.0 , step = 0.05 , label = " Real-ESRGAN upscaling " , value = 2 , interactive = have_realesrgan ) ,
gr . Radio ( label = ' Real-ESRGAN model ' , choices = [ x . name for x in realesrgan_models ] , value = realesrgan_models [ 0 ] . name , type = " index " , interactive = have_realesrgan ) ,
2022-08-25 21:52:05 +03:00
] ,
outputs = [
gr . Image ( label = " Result " ) ,
gr . Number ( label = ' Seed ' , visible = False ) ,
gr . HTML ( ) ,
] ,
allow_flagging = " never " ,
)
opts = Options ( )
if os . path . exists ( config_filename ) :
opts . load ( config_filename )
def run_settings ( * args ) :
up = [ ]
for key , value , comp in zip ( opts . data_labels . keys ( ) , args , settings_interface . input_components ) :
opts . data [ key ] = value
up . append ( comp . update ( value = value ) )
opts . save ( config_filename )
return ' Settings saved. ' , ' '
def create_setting_component ( key ) :
def fun ( ) :
return opts . data [ key ] if key in opts . data else opts . data_labels [ key ] [ 0 ]
labelinfo = opts . data_labels [ key ]
t = type ( labelinfo [ 0 ] )
label = labelinfo [ 1 ]
if t == str :
item = gr . Textbox ( label = label , value = fun , lines = 1 )
elif t == int :
2022-08-27 16:13:33 +03:00
if len ( labelinfo ) == 5 :
item = gr . Slider ( minimum = labelinfo [ 2 ] , maximum = labelinfo [ 3 ] , step = labelinfo [ 4 ] , label = label , value = fun )
elif len ( labelinfo ) == 4 :
2022-08-25 21:52:05 +03:00
item = gr . Slider ( minimum = labelinfo [ 2 ] , maximum = labelinfo [ 3 ] , step = 1 , label = label , value = fun )
else :
item = gr . Number ( label = label , value = fun )
elif t == bool :
item = gr . Checkbox ( label = label , value = fun )
else :
raise Exception ( f ' bad options item type: { str ( t ) } for key { key } ' )
return item
settings_interface = gr . Interface (
run_settings ,
inputs = [ create_setting_component ( key ) for key in opts . data_labels . keys ( ) ] ,
outputs = [
gr . Textbox ( label = ' Result ' ) ,
gr . HTML ( ) ,
] ,
title = None ,
description = None ,
allow_flagging = " never " ,
)
interfaces = [
( txt2img_interface , " txt2img " ) ,
( img2img_interface , " img2img " ) ,
2022-08-26 11:16:57 +03:00
( extras_interface , " Extras " ) ,
2022-08-25 21:52:05 +03:00
( settings_interface , " Settings " ) ,
]
config = OmegaConf . load ( cmd_opts . config )
model = load_model_from_config ( config , cmd_opts . ckpt )
device = torch . device ( " cuda " ) if torch . cuda . is_available ( ) else torch . device ( " cpu " )
model = ( model if cmd_opts . no_half else model . half ( ) ) . to ( device )
2022-08-27 11:17:55 +03:00
model_hijack = StableDiffuionModelHijack ( )
model_hijack . hijack ( model )
2022-08-22 20:08:32 +03:00
2022-08-24 09:06:36 +03:00
demo = gr . TabbedInterface (
interface_list = [ x [ 0 ] for x in interfaces ] ,
tab_names = [ x [ 1 ] for x in interfaces ] ,
2022-08-25 21:52:05 +03:00
css = ( " " if cmd_opts . no_progressbar_hiding else css_hide_progressbar ) + """
2022-08-24 18:47:23 +03:00
. output - html p { margin : 0 0.5 em ; }
. performance { font - size : 0.85 em ; color : #444; }
"""
2022-08-24 09:06:36 +03:00
)
2022-08-22 17:15:46 +03:00
2022-08-25 21:52:05 +03:00
demo . launch ( )