This commit is contained in:
hlky 2022-10-10 10:38:06 +01:00 committed by GitHub
commit 8f613bf256
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 341 additions and 309 deletions

View File

@ -12,7 +12,7 @@
# GNU Affero General Public License for more details. # GNU Affero General Public License for more details.
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
# UI defaults configuration file. It is automatically loaded if located at configs/webui/webui_streamlit.yaml. # UI defaults configuration file. It is automatically loaded if located at configs/webui/webui_streamlit.yaml.
# Any changes made here will be available automatically on the web app without having to stop it. # Any changes made here will be available automatically on the web app without having to stop it.
@ -24,7 +24,7 @@ general:
huggingface_token: "" huggingface_token: ""
gpu: 0 gpu: 0
outdir: outputs outdir: outputs
default_model: "Stable Diffusion v1.4" default_model: "Stable Diffusion v1.4"
default_model_config: "configs/stable-diffusion/v1-inference.yaml" default_model_config: "configs/stable-diffusion/v1-inference.yaml"
default_model_path: "models/ldm/stable-diffusion-v1/model.ckpt" default_model_path: "models/ldm/stable-diffusion-v1/model.ckpt"
use_sd_concepts_library: True use_sd_concepts_library: True
@ -69,13 +69,13 @@ txt2img:
min_value: 64 min_value: 64
max_value: 2048 max_value: 2048
step: 64 step: 64
height: height:
value: 512 value: 512
min_value: 64 min_value: 64
max_value: 2048 max_value: 2048
step: 64 step: 64
cfg_scale: cfg_scale:
value: 7.5 value: 7.5
min_value: 1.0 min_value: 1.0
@ -85,16 +85,16 @@ txt2img:
seed: "" seed: ""
batch_count: batch_count:
value: 1 value: 1
batch_size: batch_size:
value: 1 value: 1
sampling_steps: sampling_steps:
value: 30 value: 30
min_value: 10 min_value: 10
max_value: 250 max_value: 250
step: 10 step: 10
LDSR_config: LDSR_config:
sampling_steps: 50 sampling_steps: 50
preDownScale: 1 preDownScale: 1
@ -115,16 +115,16 @@ txt2img:
use_LDSR: False use_LDSR: False
RealESRGAN_model: "RealESRGAN_x4plus" RealESRGAN_model: "RealESRGAN_x4plus"
use_upscaling: False use_upscaling: False
variant_amount: variant_amount:
value: 0.0 value: 0.0
min_value: 0.0 min_value: 0.0
max_value: 1.0 max_value: 1.0
step: 0.01 step: 0.01
variant_seed: "" variant_seed: ""
write_info_files: True write_info_files: True
txt2vid: txt2vid:
default_model: "CompVis/stable-diffusion-v1-4" default_model: "CompVis/stable-diffusion-v1-4"
custom_models_list: ["CompVis/stable-diffusion-v1-4"] custom_models_list: ["CompVis/stable-diffusion-v1-4"]
@ -134,37 +134,37 @@ txt2vid:
min_value: 64 min_value: 64
max_value: 2048 max_value: 2048
step: 64 step: 64
height: height:
value: 512 value: 512
min_value: 64 min_value: 64
max_value: 2048 max_value: 2048
step: 64 step: 64
cfg_scale: cfg_scale:
value: 7.5 value: 7.5
min_value: 1.0 min_value: 1.0
max_value: 30.0 max_value: 30.0
step: 0.5 step: 0.5
batch_count: batch_count:
value: 1 value: 1
batch_size: batch_size:
value: 1 value: 1
sampling_steps: sampling_steps:
value: 30 value: 30
min_value: 10 min_value: 10
max_value: 250 max_value: 250
step: 10 step: 10
num_inference_steps: num_inference_steps:
value: 200 value: 200
min_value: 10 min_value: 10
max_value: 500 max_value: 500
step: 10 step: 10
seed: "" seed: ""
default_sampler: "k_euler" default_sampler: "k_euler"
scheduler_name: "klms" scheduler_name: "klms"
@ -175,6 +175,7 @@ txt2vid:
normalize_prompt_weights: True normalize_prompt_weights: True
save_individual_images: True save_individual_images: True
save_video: True save_video: True
save_video_on_stop: False
group_by_prompt: True group_by_prompt: True
write_info_files: True write_info_files: True
do_loop: False do_loop: False
@ -188,36 +189,36 @@ txt2vid:
min_value: 0.0 min_value: 0.0
max_value: 1.0 max_value: 1.0
step: 0.01 step: 0.01
variant_seed: "" variant_seed: ""
beta_start: beta_start:
value: 0.00085 value: 0.00085
min_value: 0.0001 min_value: 0.0001
max_value: 0.0300 max_value: 0.0300
step: 0.0001 step: 0.0001
format: "%.5f" format: "%.5f"
beta_end: beta_end:
value: 0.012 value: 0.012
min_value: 0.0001 min_value: 0.0001
max_value: 0.0300 max_value: 0.0300
step: 0.0001 step: 0.0001
format: "%.5f" format: "%.5f"
beta_scheduler_type: "scaled_linear" beta_scheduler_type: "scaled_linear"
max_frames: 100 max_frames: 100
LDSR_config: LDSR_config:
sampling_steps: 50 sampling_steps: 50
preDownScale: 1 preDownScale: 1
postDownScale: 1 postDownScale: 1
downsample_method: "Lanczos" downsample_method: "Lanczos"
img2img: img2img:
prompt: prompt:
sampler_name: "k_euler" sampler_name: "k_euler"
denoising_strength: denoising_strength:
value: 0.75 value: 0.75
min_value: 0.0 min_value: 0.0
max_value: 1.0 max_value: 1.0
@ -238,49 +239,49 @@ img2img:
min_value: 64 min_value: 64
max_value: 2048 max_value: 2048
step: 64 step: 64
height: height:
value: 512 value: 512
min_value: 64 min_value: 64
max_value: 2048 max_value: 2048
step: 64 step: 64
cfg_scale: cfg_scale:
value: 7.5 value: 7.5
min_value: 1.0 min_value: 1.0
max_value: 30.0 max_value: 30.0
step: 0.5 step: 0.5
batch_count: batch_count:
value: 1 value: 1
batch_size: batch_size:
value: 1 value: 1
sampling_steps: sampling_steps:
value: 30 value: 30
min_value: 10 min_value: 10
max_value: 250 max_value: 250
step: 10 step: 10
num_inference_steps: num_inference_steps:
value: 200 value: 200
min_value: 10 min_value: 10
max_value: 500 max_value: 500
step: 10 step: 10
find_noise_steps: find_noise_steps:
value: 100 value: 100
min_value: 0 min_value: 0
max_value: 500 max_value: 500
step: 10 step: 10
LDSR_config: LDSR_config:
sampling_steps: 50 sampling_steps: 50
preDownScale: 1 preDownScale: 1
postDownScale: 1 postDownScale: 1
downsample_method: "Lanczos" downsample_method: "Lanczos"
loopback: True loopback: True
random_seed_loopback: True random_seed_loopback: True
separate_prompts: False separate_prompts: False
@ -298,36 +299,36 @@ img2img:
variant_amount: 0.0 variant_amount: 0.0
variant_seed: "" variant_seed: ""
write_info_files: True write_info_files: True
img2txt: img2txt:
batch_size: 420 batch_size: 420
blip_image_eval_size: 512 blip_image_eval_size: 512
keep_all_models_loaded: False keep_all_models_loaded: False
concepts_library: concepts_library:
concepts_per_page: 12 concepts_per_page: 12
gfpgan: gfpgan:
strength: 100 strength: 100
textual_inversion: textual_inversion:
pretrained_model_name_or_path: "models/diffusers/stable-diffusion-v1-4" pretrained_model_name_or_path: "models/diffusers/stable-diffusion-v1-4"
tokenizer_name: "models/clip-vit-large-patch14" tokenizer_name: "models/clip-vit-large-patch14"
daisi_app: daisi_app:
running_on_daisi_io: False running_on_daisi_io: False
model_manager: model_manager:
models: models:
stable_diffusion: stable_diffusion:
model_name: "Stable Diffusion v1.4" model_name: "Stable Diffusion v1.4"
save_location: "./models/ldm/stable-diffusion-v1" save_location: "./models/ldm/stable-diffusion-v1"
files: files:
model_ckpt: model_ckpt:
file_name: "model.ckpt" file_name: "model.ckpt"
download_link: "https://www.googleapis.com/storage/v1/b/aai-blog-files/o/sd-v1-4.ckpt?alt=media" download_link: "https://www.googleapis.com/storage/v1/b/aai-blog-files/o/sd-v1-4.ckpt?alt=media"
gfpgan: gfpgan:
model_name: "GFPGAN" model_name: "GFPGAN"
save_location: "./models/gfpgan" save_location: "./models/gfpgan"
@ -343,8 +344,8 @@ model_manager:
file_name: "parsing_parsenet.pth" file_name: "parsing_parsenet.pth"
save_location: "./gfpgan/weights" save_location: "./gfpgan/weights"
download_link: "https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth" download_link: "https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth"
realesrgan: realesrgan:
model_name: "RealESRGAN" model_name: "RealESRGAN"
save_location: "./models/realesrgan" save_location: "./models/realesrgan"
@ -355,8 +356,8 @@ model_manager:
x4plus_anime_6b: x4plus_anime_6b:
file_name: "RealESRGAN_x4plus_anime_6B.pth" file_name: "RealESRGAN_x4plus_anime_6B.pth"
download_link: "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth" download_link: "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth"
waifu_diffusion: waifu_diffusion:
model_name: "Waifu Diffusion v1.2" model_name: "Waifu Diffusion v1.2"
save_location: "./models/custom" save_location: "./models/custom"
@ -365,7 +366,7 @@ model_manager:
file_name: "waifu-diffusion.ckpt" file_name: "waifu-diffusion.ckpt"
download_link: "https://huggingface.co/crumb/pruned-waifu-diffusion/resolve/main/model-pruned.ckpt" download_link: "https://huggingface.co/crumb/pruned-waifu-diffusion/resolve/main/model-pruned.ckpt"
trinart_stable_diffusion: trinart_stable_diffusion:
model_name: "TrinArt Stable Diffusion v2" model_name: "TrinArt Stable Diffusion v2"
save_location: "./models/custom" save_location: "./models/custom"
@ -373,7 +374,7 @@ model_manager:
trinart: trinart:
file_name: "trinart.ckpt" file_name: "trinart.ckpt"
download_link: "https://huggingface.co/naclbit/trinart_stable_diffusion_v2/resolve/main/trinart2_step95000.ckpt" download_link: "https://huggingface.co/naclbit/trinart_stable_diffusion_v2/resolve/main/trinart2_step95000.ckpt"
stable_diffusion_concept_library: stable_diffusion_concept_library:
model_name: "Stable Diffusion Concept Library" model_name: "Stable Diffusion Concept Library"
save_location: "./models/custom/sd-concepts-library/" save_location: "./models/custom/sd-concepts-library/"
@ -381,7 +382,7 @@ model_manager:
concept_library: concept_library:
file_name: "" file_name: ""
download_link: "https://github.com/sd-webui/sd-concepts-library" download_link: "https://github.com/sd-webui/sd-concepts-library"
blip_model: blip_model:
model_name: "Blip Model" model_name: "Blip Model"
save_location: "./models/blip" save_location: "./models/blip"
@ -389,7 +390,7 @@ model_manager:
blip: blip:
file_name: "model__base_caption.pth" file_name: "model__base_caption.pth"
download_link: "https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_base_caption.pth" download_link: "https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_base_caption.pth"
ldsr: ldsr:
model_name: "Latent Diffusion Super Resolution (LDSR)" model_name: "Latent Diffusion Super Resolution (LDSR)"
save_location: "./models/ldsr" save_location: "./models/ldsr"
@ -397,8 +398,7 @@ model_manager:
project_yaml: project_yaml:
file_name: "project.yaml" file_name: "project.yaml"
download_link: "https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1" download_link: "https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1"
ldsr_model: ldsr_model:
file_name: "model.ckpt" file_name: "model.ckpt"
download_link: "https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1" download_link: "https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1"

View File

@ -5,14 +5,14 @@ print (os.getcwd)
try: try:
with open("environment.yaml") as file_handle: with open("environment.yaml") as file_handle:
environment_data = yaml.load(file_handle, Loader=yaml.FullLoader) environment_data = yaml.safe_load(file_handle, Loader=yaml.FullLoader)
except FileNotFoundError: except FileNotFoundError:
try: try:
with open(os.path.join("..", "environment.yaml")) as file_handle: with open(os.path.join("..", "environment.yaml")) as file_handle:
environment_data = yaml.load(file_handle, Loader=yaml.FullLoader) environment_data = yaml.safe_load(file_handle, Loader=yaml.FullLoader)
except: except:
pass pass
try: try:
for dependency in environment_data["dependencies"]: for dependency in environment_data["dependencies"]:
package_name, package_version = dependency.split("=") package_name, package_version = dependency.split("=")
@ -21,6 +21,6 @@ except:
pass pass
try: try:
subprocess.run(['python', '-m', 'streamlit', "run" ,os.path.join("..","scripts/webui_streamlit.py"), "--theme.base dark"], stdout=subprocess.DEVNULL) subprocess.run(['python', '-m', 'streamlit', "run" ,os.path.join("..","scripts/webui_streamlit.py"), "--theme.base dark"], stdout=subprocess.DEVNULL)
except FileExistsError: except FileExistsError:
subprocess.run(['python', '-m', 'streamlit', "run" ,"scripts/webui_streamlit.py", "--theme.base dark"], stdout=subprocess.DEVNULL) subprocess.run(['python', '-m', 'streamlit', "run" ,"scripts/webui_streamlit.py", "--theme.base dark"], stdout=subprocess.DEVNULL)

View File

@ -29,53 +29,5 @@ dependencies:
- scikit-image=0.19.2 - scikit-image=0.19.2
- torchvision=0.12.0 - torchvision=0.12.0
- pip: - pip:
- -e . - -r requirements.txt
- -e git+https://github.com/CompVis/taming-transformers#egg=taming-transformers
- -e git+https://github.com/openai/CLIP#egg=clip
- -e git+https://github.com/hlky/k-diffusion-sd#egg=k_diffusion
- -e git+https://github.com/devilismyfriend/latent-diffusion#egg=latent-diffusion
- accelerate==0.12.0
- albumentations==0.4.3
- basicsr>=1.3.4.0
- diffusers==0.3.0
- einops==0.3.1
- facexlib>=0.2.3
- ftfy==6.1.1
- fairscale==0.4.4
- gradio==3.1.6
- gfpgan==1.3.8
- hydralit_components==1.0.10
- hydralit==1.0.14
- imageio-ffmpeg==0.4.2
- imageio==2.9.0
- kornia==0.6
- loguru
- omegaconf==2.1.1
- opencv-python-headless==4.6.0.66
- open-clip-torch==2.0.2
- pandas==1.4.3
- piexif==1.1.3
- pudb==2019.2
- pynvml==11.4.1
- python-slugify>=6.1.2
- pytorch-lightning==1.4.2
- retry>=0.9.2
- regex
- realesrgan==0.3.0
- streamlit==1.13.0
- streamlit-on-Hover-tabs==1.0.1
- streamlit-option-menu==0.3.2
- streamlit_nested_layout
- streamlit-server-state==0.14.2
- streamlit-tensorboard==0.0.2
- test-tube>=0.7.5
- tensorboard==2.10.1
- timm==0.6.7
- torch-fidelity==0.3.0
- torchmetrics==0.6.0
- transformers==4.19.2
- tensorflow==2.10.0
- tqdm==4.64.0
- stqdm==0.0.4
- wget

View File

@ -143,4 +143,10 @@ div.gallery:hover {
} }
.css-jn99sy { .css-jn99sy {
display: none display: none
} }
/* Make the text area widget have a similar height as the text input field*/
.st-ex{
height: 54px;
min-height: 25px;
}

View File

@ -1,31 +1,28 @@
transformers==4.19.2 # do not change -e .
diffusers==0.3.0
invisible-watermark==0.1.5
pytorch_lightning==1.7.7
open-clip-torch
loguru
taming-transformers-rom1504==0.0.6 # required by ldm
wget
# See: https://github.com/CompVis/taming-transformers/issues/176 # See: https://github.com/CompVis/taming-transformers/issues/176
# -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers # required by ldm # -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers # required by ldm
# Note: taming package needs to be installed with -e option # Note: taming package needs to be installed with -e option
-e git+https://github.com/CompVis/taming-transformers#egg=taming-transformers
invisible-watermark==0.1.5
taming-transformers-rom1504==0.0.6 # required by ldm
# Note: K-diffusion brings in CLIP 1.0 as a dependency automatically; will create a dependency resolution conflict when explicitly specified together
git+https://github.com/openai/CLIP.git@main#egg=clip
git+https://github.com/crowsonkb/k-diffusion.git git+https://github.com/crowsonkb/k-diffusion.git
# Note: K-diffusion brings in CLIP 1.0 as a dependency automatically; will create a dependency resolution conflict when explicitly specified together # git+https://github.com/hlky/k-diffusion-sd#egg=k_diffusion
# git+https://github.com/openai/CLIP.git@main#egg=clip
# Dependencies required for Stable Diffusion UI # Dependencies required for Stable Diffusion UI
pynvml==11.4.1 pynvml==11.4.1
omegaconf==2.2.3 omegaconf==2.2.3
Jinja2==3.1.2 # Jinja2 is required by Gradio
# Note: Jinja2 3.x major version required due to breaking changes found in markupsafe==2.1.1; 2.0.1 is incompatible with other upstream dependencies # Note: Jinja2 3.x major version required due to breaking changes found in markupsafe==2.1.1; 2.0.1 is incompatible with other upstream dependencies
# see https://github.com/pallets/markupsafe/issues/304 # see https://github.com/pallets/markupsafe/issues/304
Jinja2==3.1.2 # Jinja2 is required by Gradio
# Environment Dependencies for WebUI (gradio) # Environment Dependencies for WebUI (gradio)
gradio==3.4 gradio==3.1.6
# Environment Dependencies for WebUI (streamlit) # Environment Dependencies for WebUI (streamlit)
streamlit==1.13.0 streamlit==1.13.0
@ -36,6 +33,8 @@ streamlit-server-state==0.14.2
streamlit-tensorboard==0.0.2 streamlit-tensorboard==0.0.2
hydralit==1.0.14 hydralit==1.0.14
hydralit_components==1.0.10 hydralit_components==1.0.10
stqdm==0.0.4
diffusers==0.4.1
# Img2text # Img2text
ftfy==6.1.1 ftfy==6.1.1
@ -47,9 +46,29 @@ tensorboard==2.10.1
# Other # Other
retry==0.9.2 # used by sdutils retry==0.9.2 # used by sd_utils
python-slugify==6.1.2 # used by sdutils python-slugify==6.1.2 # used by sd_utils
piexif==1.1.3 # used by sdutils piexif==1.1.3 # used by sd_utils
accelerate==0.12.0
albumentations==0.4.3
einops==0.3.1
facexlib>=0.2.3
imageio-ffmpeg==0.4.2
imageio==2.9.0
kornia==0.6
loguru
opencv-python-headless==4.6.0.66
open-clip-torch==2.0.2
pandas==1.4.3
pudb==2019.2
pytorch-lightning==1.7.7
realesrgan==0.3.0
test-tube>=0.7.5
timm==0.6.7
torch-fidelity==0.3.0
transformers==4.19.2 # do not change
wget
# Optional packages commonly used with Stable Diffusion workflow # Optional packages commonly used with Stable Diffusion workflow
@ -57,11 +76,14 @@ piexif==1.1.3 # used by sdutils
basicsr==1.4.2 # required by RealESRGAN basicsr==1.4.2 # required by RealESRGAN
gfpgan==1.3.8 # GFPGAN gfpgan==1.3.8 # GFPGAN
realesrgan==0.3.0 # RealESRGAN brings in GFPGAN as a requirement realesrgan==0.3.0 # RealESRGAN brings in GFPGAN as a requirement
-e git+https://github.com/devilismyfriend/latent-diffusion#egg=latent-diffusion #ldsr git+https://github.com/CompVis/latent-diffusion
## for monocular depth estimation ## for monocular depth estimation
tensorflow==2.10.0 tensorflow==2.10.0
# Unused Packages: No current usage but will be used in the future.
# Orphaned Packages: No usage found # Orphaned Packages: No usage found

View File

@ -157,9 +157,11 @@ def layout():
#in steps will be shown, this is helpful to reduce the negative effect this option has on performance. \ #in steps will be shown, this is helpful to reduce the negative effect this option has on performance. \
#Default: True") #Default: True")
st.session_state["defaults"].general.update_preview = True st.session_state["defaults"].general.update_preview = True
st.session_state["defaults"].general.update_preview_frequency = st.number_input("Update Preview Frequency", value=st.session_state['defaults'].general.update_preview_frequency, st.session_state["defaults"].general.update_preview_frequency = st.number_input("Update Preview Frequency",
help="Specify the frequency at which the image is updated in steps, this is helpful to reduce the \ min_value=1,
negative effect updating the preview image has on performance. Default: 10") value=st.session_state['defaults'].general.update_preview_frequency,
help="Specify the frequency at which the image is updated in steps, this is helpful to reduce the \
negative effect updating the preview image has on performance. Default: 10")
with col3: with col3:
st.title("Others") st.title("Others")
@ -270,7 +272,7 @@ def layout():
value=st.session_state['defaults'].txt2img.sampling_steps.min_value, value=st.session_state['defaults'].txt2img.sampling_steps.min_value,
help="Set the default minimum value for the sampling steps slider. Default is: 1") help="Set the default minimum value for the sampling steps slider. Default is: 1")
st.session_state["defaults"].txt2img.sampling_steps.step = st.text_input("Sampling Slider Steps", st.session_state["defaults"].txt2img.sampling_steps.step = st.number_input("Sampling Slider Steps",
value=st.session_state['defaults'].txt2img.sampling_steps.step, value=st.session_state['defaults'].txt2img.sampling_steps.step,
help="Set the default value for the number of steps on the sampling steps slider. Default is: 10") help="Set the default value for the number of steps on the sampling steps slider. Default is: 10")
@ -326,8 +328,9 @@ def layout():
st.session_state["defaults"].txt2img.update_preview = True st.session_state["defaults"].txt2img.update_preview = True
st.session_state["defaults"].txt2img.update_preview_frequency = st.number_input("Preview Image Update Frequency", st.session_state["defaults"].txt2img.update_preview_frequency = st.number_input("Preview Image Update Frequency",
value=st.session_state['defaults'].txt2img.update_preview_frequency, min_value=1,
help="Set the default value for the frrquency of the preview image updates. Default is: 10") value=st.session_state['defaults'].txt2img.update_preview_frequency,
help="Set the default value for the frrquency of the preview image updates. Default is: 10")
with col5: with col5:
st.title("Variation Parameters") st.title("Variation Parameters")
@ -526,8 +529,9 @@ def layout():
st.session_state["defaults"].img2img.update_preview = True st.session_state["defaults"].img2img.update_preview = True
st.session_state["defaults"].img2img.update_preview_frequency = st.number_input("Img2Img Preview Image Update Frequency", st.session_state["defaults"].img2img.update_preview_frequency = st.number_input("Img2Img Preview Image Update Frequency",
value=st.session_state['defaults'].img2img.update_preview_frequency, min_value=1,
help="Set the default value for the frrquency of the preview image updates. Default is: 10") value=st.session_state['defaults'].img2img.update_preview_frequency,
help="Set the default value for the frrquency of the preview image updates. Default is: 10")
st.title("Variation Parameters") st.title("Variation Parameters")
@ -681,6 +685,10 @@ def layout():
st.session_state["defaults"].txt2vid.save_video = st.checkbox("Save Txt2Vid Video", value=st.session_state['defaults'].txt2vid.save_video, st.session_state["defaults"].txt2vid.save_video = st.checkbox("Save Txt2Vid Video", value=st.session_state['defaults'].txt2vid.save_video,
help="Choose to save the Txt2Vid video. Default: True") help="Choose to save the Txt2Vid video. Default: True")
st.session_state["defaults"].txt2vid.save_video_on_stop = st.checkbox("Save video on Stop",value=st.session_state['defaults'].txt2vid.save_video_on_stop,
help="Save a video with all the images generated as frames when we hit the stop button \
during a generation.")
st.session_state["defaults"].txt2vid.group_by_prompt = st.checkbox("Group By txt2vid Prompt", value=st.session_state['defaults'].txt2vid.group_by_prompt, st.session_state["defaults"].txt2vid.group_by_prompt = st.checkbox("Group By txt2vid Prompt", value=st.session_state['defaults'].txt2vid.group_by_prompt,
help="Choose to save images grouped by their prompt. Default: False") help="Choose to save images grouped by their prompt. Default: False")
@ -750,7 +758,7 @@ def layout():
st.session_state["defaults"].txt2vid.beta_start.step = st.number_input("txt2vid Beta Start Slider Steps", value=st.session_state['defaults'].txt2vid.beta_start.step, st.session_state["defaults"].txt2vid.beta_start.step = st.number_input("txt2vid Beta Start Slider Steps", value=st.session_state['defaults'].txt2vid.beta_start.step,
help="Set the default value for the number of steps on the variation slider. Default is: 1") help="Set the default value for the number of steps on the variation slider. Default is: 1")
st.session_state["defaults"].txt2vid.beta_start.format = st.text_input("Default txt2vid Beta Start Format", value=st.session_state['defaults'].txt2vid.beta_start.format, st.session_state["defaults"].txt2vid.beta_start.format = st.number_input("Default txt2vid Beta Start Format", value=st.session_state['defaults'].txt2vid.beta_start.format,
help="Set the default Beta Start Format. Default is: %.5\f") help="Set the default Beta Start Format. Default is: %.5\f")
# Beta End # Beta End
@ -766,7 +774,7 @@ def layout():
st.session_state["defaults"].txt2vid.beta_end.step = st.number_input("txt2vid Beta End Slider Steps", value=st.session_state['defaults'].txt2vid.beta_end.step, st.session_state["defaults"].txt2vid.beta_end.step = st.number_input("txt2vid Beta End Slider Steps", value=st.session_state['defaults'].txt2vid.beta_end.step,
help="Set the default value for the number of steps on the variation slider. Default is: 1") help="Set the default value for the number of steps on the variation slider. Default is: 1")
st.session_state["defaults"].txt2vid.beta_end.format = st.text_input("Default txt2vid Beta End Format", value=st.session_state['defaults'].txt2vid.beta_start.format, st.session_state["defaults"].txt2vid.beta_end.format = st.number_input("Default txt2vid Beta End Format", value=st.session_state['defaults'].txt2vid.beta_start.format,
help="Set the default Beta Start Format. Default is: %.5\f") help="Set the default Beta Start Format. Default is: %.5\f")
with image_processing: with image_processing:
@ -827,4 +835,4 @@ def layout():
toml.dump(st.session_state["streamlit_config"], toml_file) toml.dump(st.session_state["streamlit_config"], toml_file)
if reset_button: if reset_button:
st.session_state["defaults"] = OmegaConf.load("configs/webui/webui_streamlit.yaml") st.session_state["defaults"] = OmegaConf.load("configs/webui/webui_streamlit.yaml")

View File

@ -365,7 +365,7 @@ def layout():
img2img_input_col, img2img_generate_col = st.columns([10,1]) img2img_input_col, img2img_generate_col = st.columns([10,1])
with img2img_input_col: with img2img_input_col:
#prompt = st.text_area("Input Text","") #prompt = st.text_area("Input Text","")
prompt = st.text_input("Input Text","", placeholder="A corgi wearing a top hat as an oil painting.") prompt = st.text_area("Input Text","", placeholder="A corgi wearing a top hat as an oil painting.")
# Every form must have a submit button, the extra blank spaces is a temp way to align it with the input field. Needs to be done in CSS or some other way. # Every form must have a submit button, the extra blank spaces is a temp way to align it with the input field. Needs to be done in CSS or some other way.
img2img_generate_col.write("") img2img_generate_col.write("")
@ -436,19 +436,21 @@ def layout():
step=st.session_state['defaults'].img2img.find_noise_steps.step) step=st.session_state['defaults'].img2img.find_noise_steps.step)
with st.expander("Batch Options"): with st.expander("Batch Options"):
st.session_state["batch_count"] = int(st.text_input("Batch count.", value=st.session_state['defaults'].img2img.batch_count.value, st.session_state["batch_count"] = st.number_input("Batch count.", value=st.session_state['defaults'].img2img.batch_count.value,
help="How many iterations or batches of images to generate in total.")) help="How many iterations or batches of images to generate in total.")
st.session_state["batch_size"] = int(st.text_input("Batch size", value=st.session_state.defaults.img2img.batch_size.value, st.session_state["batch_size"] = st.number_input("Batch size", value=st.session_state.defaults.img2img.batch_size.value,
help="How many images are at once in a batch.\ help="How many images are at once in a batch.\
It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\ It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\
Default: 1")) Default: 1")
with st.expander("Preview Settings"): with st.expander("Preview Settings"):
st.session_state["update_preview"] = st.session_state["defaults"].general.update_preview st.session_state["update_preview"] = st.session_state["defaults"].general.update_preview
st.session_state["update_preview_frequency"] = st.text_input("Update Image Preview Frequency", value=st.session_state['defaults'].img2img.update_preview_frequency, st.session_state["update_preview_frequency"] = st.number_input("Update Image Preview Frequency",
help="Frequency in steps at which the the preview image is updated. By default the frequency \ min_value=1,
is set to 1 step.") value=st.session_state['defaults'].img2img.update_preview_frequency,
help="Frequency in steps at which the the preview image is updated. By default the frequency \
is set to 1 step.")
# #
with st.expander("Advanced"): with st.expander("Advanced"):
with st.expander("Output Settings"): with st.expander("Output Settings"):
@ -544,14 +546,14 @@ def layout():
st.session_state["LDSR_model"] = st.selectbox("LDSR model", st.session_state["LDSR_models"], st.session_state["LDSR_model"] = st.selectbox("LDSR model", st.session_state["LDSR_models"],
index=st.session_state["LDSR_models"].index(st.session_state['defaults'].general.LDSR_model)) index=st.session_state["LDSR_models"].index(st.session_state['defaults'].general.LDSR_model))
st.session_state["ldsr_sampling_steps"] = int(st.text_input("Sampling Steps", value=st.session_state['defaults'].img2img.LDSR_config.sampling_steps, st.session_state["ldsr_sampling_steps"] = st.number_input("Sampling Steps", value=st.session_state['defaults'].img2img.LDSR_config.sampling_steps,
help="")) help="")
st.session_state["preDownScale"] = int(st.text_input("PreDownScale", value=st.session_state['defaults'].img2img.LDSR_config.preDownScale, st.session_state["preDownScale"] = st.number_input("PreDownScale", value=st.session_state['defaults'].img2img.LDSR_config.preDownScale,
help="")) help="")
st.session_state["postDownScale"] = int(st.text_input("postDownScale", value=st.session_state['defaults'].img2img.LDSR_config.postDownScale, st.session_state["postDownScale"] = st.number_input("postDownScale", value=st.session_state['defaults'].img2img.LDSR_config.postDownScale,
help="")) help="")
downsample_method_list = ['Nearest', 'Lanczos'] downsample_method_list = ['Nearest', 'Lanczos']
st.session_state["downsample_method"] = st.selectbox("Downsample Method", downsample_method_list, st.session_state["downsample_method"] = st.selectbox("Downsample Method", downsample_method_list,

View File

@ -588,7 +588,7 @@ def get_matched_noise(_np_src_image, np_mask_rgb, noise_q, color_variation):
noise_window = _get_gaussian_window(width, height, mode=1) # start with simple gaussian noise noise_window = _get_gaussian_window(width, height, mode=1) # start with simple gaussian noise
noise_rgb = np.random.random_sample((width, height, num_channels)) noise_rgb = np.random.random_sample((width, height, num_channels))
noise_grey = (np.sum(noise_rgb, axis=2)/3.) noise_grey = (np.sum(noise_rgb, axis=2)/3.)
noise_rgb *= color_variation # the colorfulness of the starting noise is blended to greyscale with a parameter noise_rgb *= color_variation # the colorfulness of the starting noise is blended to greyscale with a parameter
for c in range(num_channels): for c in range(num_channels):
noise_rgb[:,:,c] += (1. - color_variation) * noise_grey noise_rgb[:,:,c] += (1. - color_variation) * noise_grey
@ -2471,7 +2471,7 @@ def process_images(
else: else:
grid = image_grid(output_images, batch_size) grid = image_grid(output_images, batch_size)
if grid and (batch_size > 1 or n_iter > 1): if grid and (batch_size > 1 or n_iter > 1):
output_images.insert(0, grid) output_images.insert(0, grid)
grid_count = get_next_sequence_number(outpath, 'grid-') grid_count = get_next_sequence_number(outpath, 'grid-')

View File

@ -183,7 +183,7 @@ def layout():
with input_col1: with input_col1:
#prompt = st.text_area("Input Text","") #prompt = st.text_area("Input Text","")
prompt = st.text_input("Input Text","", placeholder="A corgi wearing a top hat as an oil painting.") prompt = st.text_area("Input Text","", placeholder="A corgi wearing a top hat as an oil painting.")
# creating the page layout using columns # creating the page layout using columns
col1, col2, col3 = st.columns([1,2,1], gap="large") col1, col2, col3 = st.columns([1,2,1], gap="large")
@ -210,21 +210,23 @@ def layout():
#It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\ #It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\
#Default: 1") #Default: 1")
st.session_state["batch_count"] = int(st.text_input("Batch count.", value=st.session_state['defaults'].txt2img.batch_count.value, st.session_state["batch_count"] = st.number_input("Batch count.", value=st.session_state['defaults'].txt2img.batch_count.value,
help="How many iterations or batches of images to generate in total.")) help="How many iterations or batches of images to generate in total.")
st.session_state["batch_size"] = int(st.text_input("Batch size", value=st.session_state.defaults.txt2img.batch_size.value, st.session_state["batch_size"] = st.number_input("Batch size", value=st.session_state.defaults.txt2img.batch_size.value,
help="How many images are at once in a batch.\ help="How many images are at once in a batch.\
It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes \ It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes \
to finish generation as more images are generated at once.\ to finish generation as more images are generated at once.\
Default: 1") ) Default: 1")
with st.expander("Preview Settings"): with st.expander("Preview Settings"):
st.session_state["update_preview"] = st.session_state["defaults"].general.update_preview st.session_state["update_preview"] = st.session_state["defaults"].general.update_preview
st.session_state["update_preview_frequency"] = st.text_input("Update Image Preview Frequency", value=st.session_state['defaults'].txt2img.update_preview_frequency, st.session_state["update_preview_frequency"] = st.number_input("Update Image Preview Frequency",
help="Frequency in steps at which the the preview image is updated. By default the frequency \ min_value=1,
is set to 10 step.") value=st.session_state['defaults'].txt2img.update_preview_frequency,
help="Frequency in steps at which the the preview image is updated. By default the frequency \
is set to 10 step.")
with col2: with col2:
preview_tab, gallery_tab = st.tabs(["Preview", "Gallery"]) preview_tab, gallery_tab = st.tabs(["Preview", "Gallery"])
@ -366,14 +368,14 @@ def layout():
st.session_state["LDSR_model"] = st.selectbox("LDSR model", st.session_state["LDSR_models"], st.session_state["LDSR_model"] = st.selectbox("LDSR model", st.session_state["LDSR_models"],
index=st.session_state["LDSR_models"].index(st.session_state['defaults'].general.LDSR_model)) index=st.session_state["LDSR_models"].index(st.session_state['defaults'].general.LDSR_model))
st.session_state["ldsr_sampling_steps"] = int(st.text_input("Sampling Steps", value=st.session_state['defaults'].txt2img.LDSR_config.sampling_steps, st.session_state["ldsr_sampling_steps"] = st.number_input("Sampling Steps", value=st.session_state['defaults'].txt2img.LDSR_config.sampling_steps,
help="")) help="")
st.session_state["preDownScale"] = int(st.text_input("PreDownScale", value=st.session_state['defaults'].txt2img.LDSR_config.preDownScale, st.session_state["preDownScale"] = st.number_input("PreDownScale", value=st.session_state['defaults'].txt2img.LDSR_config.preDownScale,
help="")) help="")
st.session_state["postDownScale"] = int(st.text_input("postDownScale", value=st.session_state['defaults'].txt2img.LDSR_config.postDownScale, st.session_state["postDownScale"] = st.number_input("postDownScale", value=st.session_state['defaults'].txt2img.LDSR_config.postDownScale,
help="")) help="")
downsample_method_list = ['Nearest', 'Lanczos'] downsample_method_list = ['Nearest', 'Lanczos']
st.session_state["downsample_method"] = st.selectbox("Downsample Method", downsample_method_list, st.session_state["downsample_method"] = st.selectbox("Downsample Method", downsample_method_list,

View File

@ -113,88 +113,93 @@ def diffuse(
if "update_preview_frequency_list" not in st.session_state: if "update_preview_frequency_list" not in st.session_state:
st.session_state["update_preview_frequency_list"] = [0] st.session_state["update_preview_frequency_list"] = [0]
st.session_state["update_preview_frequency_list"].append(st.session_state['defaults'].txt2vid.update_preview_frequency) st.session_state["update_preview_frequency_list"].append(st.session_state["update_preview_frequency"])
# diffuse! try:
for i, t in enumerate(pipe.scheduler.timesteps): # diffuse!
start = timeit.default_timer() for i, t in enumerate(pipe.scheduler.timesteps):
start = timeit.default_timer()
#status_text.text(f"Running step: {step_counter}{total_number_steps} {percent} | {duration:.2f}{speed}") #status_text.text(f"Running step: {step_counter}{total_number_steps} {percent} | {duration:.2f}{speed}")
# expand the latents for classifier free guidance # expand the latents for classifier free guidance
latent_model_input = torch.cat([cond_latents] * 2) latent_model_input = torch.cat([cond_latents] * 2)
if isinstance(pipe.scheduler, LMSDiscreteScheduler): if isinstance(pipe.scheduler, LMSDiscreteScheduler):
sigma = pipe.scheduler.sigmas[i] sigma = pipe.scheduler.sigmas[i]
latent_model_input = latent_model_input / ((sigma**2 + 1) ** 0.5) latent_model_input = latent_model_input / ((sigma**2 + 1) ** 0.5)
# predict the noise residual # predict the noise residual
noise_pred = pipe.unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"] noise_pred = pipe.unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"]
# cfg # cfg
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
noise_pred = noise_pred_uncond + cfg_scale * (noise_pred_text - noise_pred_uncond) noise_pred = noise_pred_uncond + cfg_scale * (noise_pred_text - noise_pred_uncond)
# compute the previous noisy sample x_t -> x_t-1 # compute the previous noisy sample x_t -> x_t-1
if isinstance(pipe.scheduler, LMSDiscreteScheduler): if isinstance(pipe.scheduler, LMSDiscreteScheduler):
cond_latents = pipe.scheduler.step(noise_pred, i, cond_latents, **extra_step_kwargs)["prev_sample"] cond_latents = pipe.scheduler.step(noise_pred, i, cond_latents, **extra_step_kwargs)["prev_sample"]
else: else:
cond_latents = pipe.scheduler.step(noise_pred, t, cond_latents, **extra_step_kwargs)["prev_sample"] cond_latents = pipe.scheduler.step(noise_pred, t, cond_latents, **extra_step_kwargs)["prev_sample"]
#print (st.session_state["update_preview_frequency"])
#update the preview image if it is enabled and the frequency matches the step_counter
if st.session_state['defaults'].txt2vid.update_preview:
step_counter += 1
if st.session_state['defaults'].txt2vid.update_preview_frequency == step_counter or step_counter == st.session_state.sampling_steps: #update the preview image if it is enabled and the frequency matches the step_counter
if st.session_state.dynamic_preview_frequency: if st.session_state["update_preview"]:
st.session_state["current_chunk_speed"], step_counter += 1
st.session_state["previous_chunk_speed_list"],
st.session_state['defaults'].txt2vid.update_preview_frequency,
st.session_state["avg_update_preview_frequency"] = optimize_update_preview_frequency(st.session_state["current_chunk_speed"],
st.session_state["previous_chunk_speed_list"],
st.session_state['defaults'].txt2vid.update_preview_frequency,
st.session_state["update_preview_frequency_list"])
#scale and decode the image latents with vae if st.session_state["update_preview_frequency"] == step_counter or step_counter == st.session_state.sampling_steps:
cond_latents_2 = 1 / 0.18215 * cond_latents if st.session_state.dynamic_preview_frequency:
image = pipe.vae.decode(cond_latents_2) st.session_state["current_chunk_speed"],
st.session_state["previous_chunk_speed_list"],
st.session_state["update_preview_frequency"],
st.session_state["avg_update_preview_frequency"] = optimize_update_preview_frequency(st.session_state["current_chunk_speed"],
st.session_state["previous_chunk_speed_list"],
st.session_state["update_preview_frequency"],
st.session_state["update_preview_frequency_list"])
# generate output numpy image as uint8 #scale and decode the image latents with vae
image = torch.clamp((image["sample"] + 1.0) / 2.0, min=0.0, max=1.0) cond_latents_2 = 1 / 0.18215 * cond_latents
image2 = transforms.ToPILImage()(image.squeeze_(0)) image = pipe.vae.decode(cond_latents_2)
st.session_state["preview_image"].image(image2) # generate output numpy image as uint8
image = torch.clamp((image["sample"] + 1.0) / 2.0, min=0.0, max=1.0)
image2 = transforms.ToPILImage()(image.squeeze_(0))
step_counter = 0 st.session_state["preview_image"].image(image2)
duration = timeit.default_timer() - start step_counter = 0
st.session_state["current_chunk_speed"] = duration duration = timeit.default_timer() - start
if duration >= 1: st.session_state["current_chunk_speed"] = duration
speed = "s/it"
else:
speed = "it/s"
duration = 1 / duration
if i > st.session_state.sampling_steps: if duration >= 1:
inference_counter += 1 speed = "s/it"
inference_percent = int(100 * float(inference_counter + 1 if inference_counter < num_inference_steps else num_inference_steps)/float(num_inference_steps)) else:
inference_progress = f"{inference_counter + 1 if inference_counter < num_inference_steps else num_inference_steps}/{num_inference_steps} {inference_percent}% " speed = "it/s"
else: duration = 1 / duration
inference_progress = ""
percent = int(100 * float(i+1 if i+1 < st.session_state.sampling_steps else st.session_state.sampling_steps)/float(st.session_state.sampling_steps)) if i > st.session_state.sampling_steps:
frames_percent = int(100 * float(st.session_state.current_frame if st.session_state.current_frame < st.session_state.max_frames else st.session_state.max_frames)/float(st.session_state.max_frames)) inference_counter += 1
inference_percent = int(100 * float(inference_counter + 1 if inference_counter < num_inference_steps else num_inference_steps)/float(num_inference_steps))
inference_progress = f"{inference_counter + 1 if inference_counter < num_inference_steps else num_inference_steps}/{num_inference_steps} {inference_percent}% "
else:
inference_progress = ""
st.session_state["progress_bar_text"].text( percent = int(100 * float(i+1 if i+1 < st.session_state.sampling_steps else st.session_state.sampling_steps)/float(st.session_state.sampling_steps))
f"Running step: {i+1 if i+1 < st.session_state.sampling_steps else st.session_state.sampling_steps}/{st.session_state.sampling_steps} " frames_percent = int(100 * float(st.session_state.current_frame if st.session_state.current_frame < st.session_state.max_frames else st.session_state.max_frames)/float(
f"{percent if percent < 100 else 100}% {inference_progress}{duration:.2f}{speed} | " st.session_state.max_frames))
f"Frame: {st.session_state.current_frame + 1 if st.session_state.current_frame < st.session_state.max_frames else st.session_state.max_frames}/{st.session_state.max_frames} "
f"{frames_percent if frames_percent < 100 else 100}% {st.session_state.frame_duration:.2f}{st.session_state.frame_speed}" st.session_state["progress_bar_text"].text(
) f"Running step: {i+1 if i+1 < st.session_state.sampling_steps else st.session_state.sampling_steps}/{st.session_state.sampling_steps} "
st.session_state["progress_bar"].progress(percent if percent < 100 else 100) f"{percent if percent < 100 else 100}% {inference_progress}{duration:.2f}{speed} | "
f"Frame: {st.session_state.current_frame + 1 if st.session_state.current_frame < st.session_state.max_frames else st.session_state.max_frames}/{st.session_state.max_frames} "
f"{frames_percent if frames_percent < 100 else 100}% {st.session_state.frame_duration:.2f}{st.session_state.frame_speed}"
)
st.session_state["progress_bar"].progress(percent if percent < 100 else 100)
except KeyError:
raise StopException
#scale and decode the image latents with vae #scale and decode the image latents with vae
cond_latents_2 = 1 / 0.18215 * cond_latents cond_latents_2 = 1 / 0.18215 * cond_latents
@ -262,35 +267,54 @@ def load_diffusers_model(weights_path,torch_device):
"You need a huggingface token in order to use the Text to Video tab. Use the Settings page from the sidebar on the left to add your token." "You need a huggingface token in order to use the Text to Video tab. Use the Settings page from the sidebar on the left to add your token."
) )
raise OSError("You need a huggingface token in order to use the Text to Video tab. Use the Settings page from the sidebar on the left to add your token.") raise OSError("You need a huggingface token in order to use the Text to Video tab. Use the Settings page from the sidebar on the left to add your token.")
#
def save_video_to_disk(frames, seeds, sanitized_prompt, fps=6,save_video=True, outdir='outputs'):
if save_video:
# write video to memory
#output = io.BytesIO()
#writer = imageio.get_writer(os.path.join(os.getcwd(), st.session_state['defaults'].general.outdir, "txt2vid"), im, extension=".mp4", fps=30)
#try:
video_path = os.path.join(os.getcwd(), outdir, "txt2vid",f"{seeds}_{sanitized_prompt}.mp4")
writer = imageio.get_writer(video_path, fps=fps)
for frame in frames:
writer.append_data(frame)
writer.close()
#except:
# print("Can't save video, skipping.")
return video_path
# #
def txt2vid( def txt2vid(
# -------------------------------------- # --------------------------------------
# args you probably want to change # args you probably want to change
prompts = ["blueberry spaghetti", "strawberry spaghetti"], # prompt to dream about prompts = ["blueberry spaghetti", "strawberry spaghetti"], # prompt to dream about
gpu:int = st.session_state['defaults'].general.gpu, # id of the gpu to run on gpu:int = st.session_state['defaults'].general.gpu, # id of the gpu to run on
#name:str = 'test', # name of this project, for the output directory #name:str = 'test', # name of this project, for the output directory
#rootdir:str = st.session_state['defaults'].general.outdir, #rootdir:str = st.session_state['defaults'].general.outdir,
num_steps:int = 200, # number of steps between each pair of sampled points num_steps:int = 200, # number of steps between each pair of sampled points
max_frames:int = 10000, # number of frames to write and then exit the script max_frames:int = 10000, # number of frames to write and then exit the script
num_inference_steps:int = 50, # more (e.g. 100, 200 etc) can create slightly better images num_inference_steps:int = 50, # more (e.g. 100, 200 etc) can create slightly better images
cfg_scale:float = 5.0, # can depend on the prompt. usually somewhere between 3-10 is good cfg_scale:float = 5.0, # can depend on the prompt. usually somewhere between 3-10 is good
do_loop = False, save_video = True,
use_lerp_for_text = False, save_video_on_stop = False,
seeds = None, outdir='outputs',
quality:int = 100, # for jpeg compression of the output images do_loop = False,
eta:float = 0.0, use_lerp_for_text = False,
width:int = 256, seeds = None,
height:int = 256, quality:int = 100, # for jpeg compression of the output images
weights_path = "CompVis/stable-diffusion-v1-4", eta:float = 0.0,
scheduler="klms", # choices: default, ddim, klms width:int = 256,
disable_tqdm = False, height:int = 256,
#----------------------------------------------- weights_path = "CompVis/stable-diffusion-v1-4",
beta_start = 0.0001, scheduler="klms", # choices: default, ddim, klms
beta_end = 0.00012, disable_tqdm = False,
beta_schedule = "scaled_linear", #-----------------------------------------------
starting_image=None beta_start = 0.0001,
): beta_end = 0.00012,
beta_schedule = "scaled_linear",
starting_image=None
):
""" """
prompt = ["blueberry spaghetti", "strawberry spaghetti"], # prompt to dream about prompt = ["blueberry spaghetti", "strawberry spaghetti"], # prompt to dream about
gpu:int = st.session_state['defaults'].general.gpu, # id of the gpu to run on gpu:int = st.session_state['defaults'].general.gpu, # id of the gpu to run on
@ -332,11 +356,11 @@ def txt2vid(
# init the output dir # init the output dir
sanitized_prompt = slugify(prompts) sanitized_prompt = slugify(prompts)
full_path = os.path.join(os.getcwd(), st.session_state['defaults'].general.outdir, "txt2vid-samples", "samples", sanitized_prompt) full_path = os.path.join(os.getcwd(), st.session_state['defaults'].general.outdir, "txt2vid", "samples", sanitized_prompt)
if len(full_path) > 220: if len(full_path) > 220:
sanitized_prompt = sanitized_prompt[:220-len(full_path)] sanitized_prompt = sanitized_prompt[:220-len(full_path)]
full_path = os.path.join(os.getcwd(), st.session_state['defaults'].general.outdir, "txt2vid-samples", "samples", sanitized_prompt) full_path = os.path.join(os.getcwd(), st.session_state['defaults'].general.outdir, "txt2vid", "samples", sanitized_prompt)
os.makedirs(full_path, exist_ok=True) os.makedirs(full_path, exist_ok=True)
@ -344,29 +368,29 @@ def txt2vid(
if st.session_state.write_info_files: if st.session_state.write_info_files:
with open(os.path.join(full_path , f'{slugify(str(seeds))}_config.json' if len(prompts) > 1 else "prompts_config.json"), "w") as outfile: with open(os.path.join(full_path , f'{slugify(str(seeds))}_config.json' if len(prompts) > 1 else "prompts_config.json"), "w") as outfile:
outfile.write(json.dumps( outfile.write(json.dumps(
dict( dict(
prompts = prompts, prompts = prompts,
gpu = gpu, gpu = gpu,
num_steps = num_steps, num_steps = num_steps,
max_frames = max_frames, max_frames = max_frames,
num_inference_steps = num_inference_steps, num_inference_steps = num_inference_steps,
cfg_scale = cfg_scale, cfg_scale = cfg_scale,
do_loop = do_loop, do_loop = do_loop,
use_lerp_for_text = use_lerp_for_text, use_lerp_for_text = use_lerp_for_text,
seeds = seeds, seeds = seeds,
quality = quality, quality = quality,
eta = eta, eta = eta,
width = width, width = width,
height = height, height = height,
weights_path = weights_path, weights_path = weights_path,
scheduler=scheduler, scheduler=scheduler,
disable_tqdm = disable_tqdm, disable_tqdm = disable_tqdm,
beta_start = beta_start, beta_start = beta_start,
beta_end = beta_end, beta_end = beta_end,
beta_schedule = beta_schedule beta_schedule = beta_schedule
), ),
indent=2, indent=2,
sort_keys=False, sort_keys=False,
)) ))
#print(scheduler) #print(scheduler)
@ -413,9 +437,10 @@ def txt2vid(
#prompts.append(prompts) #prompts.append(prompts)
#seeds.append(first_seed) #seeds.append(first_seed)
# get the conditional text embeddings based on the prompt with torch.autocast('cuda'):
text_input = server_state["pipe"].tokenizer(prompts, padding="max_length", max_length=server_state["pipe"].tokenizer.model_max_length, truncation=True, return_tensors="pt") # get the conditional text embeddings based on the prompt
cond_embeddings = server_state["pipe"].text_encoder(text_input.input_ids.to(torch_device))[0] # shape [1, 77, 768] text_input = server_state["pipe"].tokenizer(prompts, padding="max_length", max_length=server_state["pipe"].tokenizer.model_max_length, truncation=True, return_tensors="pt")
cond_embeddings = server_state["pipe"].text_encoder(text_input.input_ids.to(torch_device) )[0]
# #
if st.session_state.defaults.general.use_sd_concepts_library: if st.session_state.defaults.general.use_sd_concepts_library:
@ -511,11 +536,12 @@ def txt2vid(
#append the frames to the frames list so we can use them later. #append the frames to the frames list so we can use them later.
frames.append(np.asarray(gfpgan_image)) frames.append(np.asarray(gfpgan_image))
try:
st.session_state["preview_image"].image(gfpgan_image) st.session_state["preview_image"].image(gfpgan_image)
#except AttributeError: except KeyError:
print ("Cant get session_state, skipping image preview.")
#except (AttributeError, KeyError):
#print("Cant perform GFPGAN, skipping.") #print("Cant perform GFPGAN, skipping.")
#pass
#increase frame_index counter. #increase frame_index counter.
frame_index += 1 frame_index += 1
@ -535,23 +561,18 @@ def txt2vid(
init1 = init2 init1 = init2
# save the video after the generation is done.
video_path = save_video_to_disk(frames, seeds, sanitized_prompt, save_video=save_video, outdir=outdir)
except StopException: except StopException:
pass if save_video_on_stop:
print ("Streamlit Stop Exception Received. Saving video")
video_path = save_video_to_disk(frames, seeds, sanitized_prompt, save_video=save_video, outdir=outdir)
else:
video_path = None
if st.session_state['save_video']: if video_path and "preview_video" in st.session_state:
# write video to memory
#output = io.BytesIO()
#writer = imageio.get_writer(os.path.join(os.getcwd(), st.session_state['defaults'].general.outdir, "txt2vid-samples"), im, extension=".mp4", fps=30)
try:
video_path = os.path.join(os.getcwd(), st.session_state['defaults'].general.outdir, "txt2vid-samples",f"{seeds}_{sanitized_prompt}.mp4")
writer = imageio.get_writer(video_path, fps=6)
for frame in frames:
writer.append_data(frame)
writer.close()
except:
print("Can't save video, skipping.")
# show video preview on the UI # show video preview on the UI
st.session_state["preview_video"].video(open(video_path, 'rb').read()) st.session_state["preview_video"].video(open(video_path, 'rb').read())
@ -575,7 +596,7 @@ def layout():
input_col1, generate_col1 = st.columns([10,1]) input_col1, generate_col1 = st.columns([10,1])
with input_col1: with input_col1:
#prompt = st.text_area("Input Text","") #prompt = st.text_area("Input Text","")
prompt = st.text_input("Input Text","", placeholder="A corgi wearing a top hat as an oil painting.") prompt = st.text_area("Input Text","", placeholder="A corgi wearing a top hat as an oil painting.")
# Every form must have a submit button, the extra blank spaces is a temp way to align it with the input field. Needs to be done in CSS or some other way. # Every form must have a submit button, the extra blank spaces is a temp way to align it with the input field. Needs to be done in CSS or some other way.
generate_col1.write("") generate_col1.write("")
@ -604,7 +625,7 @@ def layout():
#It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\ #It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\
#Default: 1") #Default: 1")
st.session_state["max_frames"] = int(st.text_input("Max Frames:", value=st.session_state['defaults'].txt2vid.max_frames, help="Specify the max number of frames you want to generate.")) st.session_state["max_frames"] = st.number_input("Max Frames:", value=st.session_state['defaults'].txt2vid.max_frames, help="Specify the max number of frames you want to generate.")
with st.expander("Preview Settings"): with st.expander("Preview Settings"):
#st.session_state["update_preview"] = st.checkbox("Update Image Preview", value=st.session_state['defaults'].txt2vid.update_preview, #st.session_state["update_preview"] = st.checkbox("Update Image Preview", value=st.session_state['defaults'].txt2vid.update_preview,
@ -613,9 +634,16 @@ def layout():
#By default this is enabled and the frequency is set to 1 step.") #By default this is enabled and the frequency is set to 1 step.")
st.session_state["update_preview"] = st.session_state["defaults"].general.update_preview st.session_state["update_preview"] = st.session_state["defaults"].general.update_preview
st.session_state["update_preview_frequency"] = st.text_input("Update Image Preview Frequency", value=st.session_state['defaults'].txt2vid.update_preview_frequency, st.session_state["update_preview_frequency"] = st.number_input("Update Image Preview Frequency",
help="Frequency in steps at which the the preview image is updated. By default the frequency \ min_value=1,
is set to 1 step.") value=st.session_state['defaults'].txt2vid.update_preview_frequency,
help="Frequency in steps at which the the preview image is updated. By default the frequency \
is set to 1 step.")
st.session_state["dynamic_preview_frequency"] = st.checkbox("Dynamic Preview Frequency", value=st.session_state['defaults'].txt2vid.dynamic_preview_frequency,
help="This option tries to find the best value at which we can update \
the preview image during generation while minimizing the impact it has in performance. Default: True")
# #
@ -641,6 +669,7 @@ def layout():
#generate_video = st.empty() #generate_video = st.empty()
st.session_state["preview_video"] = st.empty() st.session_state["preview_video"] = st.empty()
preview_video = st.session_state["preview_video"]
message = st.empty() message = st.empty()
@ -699,19 +728,23 @@ def layout():
help="Separate multiple prompts using the `|` character, and get all combinations of them.") help="Separate multiple prompts using the `|` character, and get all combinations of them.")
st.session_state["normalize_prompt_weights"] = st.checkbox("Normalize Prompt Weights.", st.session_state["normalize_prompt_weights"] = st.checkbox("Normalize Prompt Weights.",
value=st.session_state['defaults'].txt2vid.normalize_prompt_weights, help="Ensure the sum of all weights add up to 1.0") value=st.session_state['defaults'].txt2vid.normalize_prompt_weights, help="Ensure the sum of all weights add up to 1.0")
st.session_state["save_individual_images"] = st.checkbox("Save individual images.", st.session_state["save_individual_images"] = st.checkbox("Save individual images.",
value=st.session_state['defaults'].txt2vid.save_individual_images, value=st.session_state['defaults'].txt2vid.save_individual_images,
help="Save each image generated before any filter or enhancement is applied.") help="Save each image generated before any filter or enhancement is applied.")
st.session_state["save_video"] = st.checkbox("Save video",value=st.session_state['defaults'].txt2vid.save_video, st.session_state["save_video"] = st.checkbox("Save video",value=st.session_state['defaults'].txt2vid.save_video,
help="Save a video with all the images generated as frames at the end of the generation.") help="Save a video with all the images generated as frames at the end of the generation.")
save_video_on_stop = st.checkbox("Save video on Stop",value=st.session_state['defaults'].txt2vid.save_video_on_stop,
help="Save a video with all the images generated as frames when we hit the stop button during a generation.")
st.session_state["group_by_prompt"] = st.checkbox("Group results by prompt", value=st.session_state['defaults'].txt2vid.group_by_prompt, st.session_state["group_by_prompt"] = st.checkbox("Group results by prompt", value=st.session_state['defaults'].txt2vid.group_by_prompt,
help="Saves all the images with the same prompt into the same folder. When using a prompt matrix each prompt combination will have its own folder.") help="Saves all the images with the same prompt into the same folder. When using a prompt \
matrix each prompt combination will have its own folder.")
st.session_state["write_info_files"] = st.checkbox("Write Info file", value=st.session_state['defaults'].txt2vid.write_info_files, st.session_state["write_info_files"] = st.checkbox("Write Info file", value=st.session_state['defaults'].txt2vid.write_info_files,
help="Save a file next to the image with informartion about the generation.") help="Save a file next to the image with informartion about the generation.")
st.session_state["dynamic_preview_frequency"] = st.checkbox("Dynamic Preview Frequency", value=st.session_state['defaults'].txt2vid.dynamic_preview_frequency,
help="This option tries to find the best value at which we can update \
the preview image during generation while minimizing the impact it has in performance. Default: True")
st.session_state["do_loop"] = st.checkbox("Do Loop", value=st.session_state['defaults'].txt2vid.do_loop, st.session_state["do_loop"] = st.checkbox("Do Loop", value=st.session_state['defaults'].txt2vid.do_loop,
help="Do loop") help="Do loop")
st.session_state["save_as_jpg"] = st.checkbox("Save samples as jpg", value=st.session_state['defaults'].txt2vid.save_as_jpg, help="Saves the images as jpg instead of png.") st.session_state["save_as_jpg"] = st.checkbox("Save samples as jpg", value=st.session_state['defaults'].txt2vid.save_as_jpg, help="Saves the images as jpg instead of png.")
@ -827,7 +860,7 @@ def layout():
#load_models(False, st.session_state["use_GFPGAN"], True, st.session_state["RealESRGAN_model"]) #load_models(False, st.session_state["use_GFPGAN"], True, st.session_state["RealESRGAN_model"])
if st.session_state["use_GFPGAN"]: if st.session_state["use_GFPGAN"]:
if "GFPGAN" in st.session_state: if "GFPGAN" in server_state:
print("GFPGAN already loaded") print("GFPGAN already loaded")
else: else:
with col2: with col2:
@ -835,28 +868,35 @@ def layout():
# Load GFPGAN # Load GFPGAN
if os.path.exists(st.session_state["defaults"].general.GFPGAN_dir): if os.path.exists(st.session_state["defaults"].general.GFPGAN_dir):
try: try:
server_state["GFPGAN"] = load_GFPGAN() load_GFPGAN()
print("Loaded GFPGAN") print("Loaded GFPGAN")
except Exception: except Exception:
import traceback import traceback
print("Error loading GFPGAN:", file=sys.stderr) print("Error loading GFPGAN:", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr) print(traceback.format_exc(), file=sys.stderr)
else: else:
if "GFPGAN" in st.session_state: if "GFPGAN" in server_state:
del server_state["GFPGAN"] del server_state["GFPGAN"]
#try: #try:
# run video generation # run video generation
video, seed, info, stats = txt2vid(prompts=prompt, gpu=st.session_state["defaults"].general.gpu, video, seed, info, stats = txt2vid(prompts=prompt, gpu=st.session_state["defaults"].general.gpu,
num_steps=st.session_state.sampling_steps, max_frames=int(st.session_state.max_frames), num_steps=st.session_state.sampling_steps, max_frames=st.session_state.max_frames,
num_inference_steps=st.session_state.num_inference_steps, num_inference_steps=st.session_state.num_inference_steps,
cfg_scale=cfg_scale,do_loop=st.session_state["do_loop"], cfg_scale=cfg_scale, save_video_on_stop=save_video_on_stop,
outdir=st.session_state["defaults"].general.outdir,
do_loop=st.session_state["do_loop"],
seeds=seed, quality=100, eta=0.0, width=width, seeds=seed, quality=100, eta=0.0, width=width,
height=height, weights_path=custom_model, scheduler=scheduler_name, height=height, weights_path=custom_model, scheduler=scheduler_name,
disable_tqdm=False, beta_start=st.session_state['defaults'].txt2vid.beta_start.value, disable_tqdm=False, beta_start=st.session_state['defaults'].txt2vid.beta_start.value,
beta_end=st.session_state['defaults'].txt2vid.beta_end.value, beta_end=st.session_state['defaults'].txt2vid.beta_end.value,
beta_schedule=beta_scheduler_type, starting_image=None) beta_schedule=beta_scheduler_type, starting_image=None)
if video and save_video_on_stop:
# show video preview on the UI after we hit the stop button
# currently not working as session_state is cleared on StopException
preview_video.video(open(video, 'rb').read())
#message.success('Done!', icon="✅") #message.success('Done!', icon="✅")
message.success('Render Complete: ' + info + '; Stats: ' + stats, icon="") message.success('Render Complete: ' + info + '; Stats: ' + stats, icon="")