mirror of
https://github.com/sd-webui/stable-diffusion-webui.git
synced 2024-12-15 23:31:59 +03:00
The merge (#1494)
This commit is contained in:
commit
8f613bf256
@ -12,7 +12,7 @@
|
||||
# GNU Affero General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# UI defaults configuration file. It is automatically loaded if located at configs/webui/webui_streamlit.yaml.
|
||||
# Any changes made here will be available automatically on the web app without having to stop it.
|
||||
@ -24,7 +24,7 @@ general:
|
||||
huggingface_token: ""
|
||||
gpu: 0
|
||||
outdir: outputs
|
||||
default_model: "Stable Diffusion v1.4"
|
||||
default_model: "Stable Diffusion v1.4"
|
||||
default_model_config: "configs/stable-diffusion/v1-inference.yaml"
|
||||
default_model_path: "models/ldm/stable-diffusion-v1/model.ckpt"
|
||||
use_sd_concepts_library: True
|
||||
@ -69,13 +69,13 @@ txt2img:
|
||||
min_value: 64
|
||||
max_value: 2048
|
||||
step: 64
|
||||
|
||||
|
||||
height:
|
||||
value: 512
|
||||
min_value: 64
|
||||
max_value: 2048
|
||||
step: 64
|
||||
|
||||
|
||||
cfg_scale:
|
||||
value: 7.5
|
||||
min_value: 1.0
|
||||
@ -85,16 +85,16 @@ txt2img:
|
||||
seed: ""
|
||||
batch_count:
|
||||
value: 1
|
||||
|
||||
|
||||
batch_size:
|
||||
value: 1
|
||||
|
||||
|
||||
sampling_steps:
|
||||
value: 30
|
||||
min_value: 10
|
||||
max_value: 250
|
||||
step: 10
|
||||
|
||||
|
||||
LDSR_config:
|
||||
sampling_steps: 50
|
||||
preDownScale: 1
|
||||
@ -115,16 +115,16 @@ txt2img:
|
||||
use_LDSR: False
|
||||
RealESRGAN_model: "RealESRGAN_x4plus"
|
||||
use_upscaling: False
|
||||
|
||||
|
||||
variant_amount:
|
||||
value: 0.0
|
||||
min_value: 0.0
|
||||
max_value: 1.0
|
||||
step: 0.01
|
||||
|
||||
|
||||
variant_seed: ""
|
||||
write_info_files: True
|
||||
|
||||
|
||||
txt2vid:
|
||||
default_model: "CompVis/stable-diffusion-v1-4"
|
||||
custom_models_list: ["CompVis/stable-diffusion-v1-4"]
|
||||
@ -134,37 +134,37 @@ txt2vid:
|
||||
min_value: 64
|
||||
max_value: 2048
|
||||
step: 64
|
||||
|
||||
|
||||
height:
|
||||
value: 512
|
||||
min_value: 64
|
||||
max_value: 2048
|
||||
step: 64
|
||||
|
||||
|
||||
cfg_scale:
|
||||
value: 7.5
|
||||
min_value: 1.0
|
||||
max_value: 30.0
|
||||
step: 0.5
|
||||
|
||||
|
||||
batch_count:
|
||||
value: 1
|
||||
|
||||
|
||||
batch_size:
|
||||
value: 1
|
||||
|
||||
|
||||
sampling_steps:
|
||||
value: 30
|
||||
min_value: 10
|
||||
max_value: 250
|
||||
step: 10
|
||||
|
||||
|
||||
num_inference_steps:
|
||||
value: 200
|
||||
min_value: 10
|
||||
max_value: 500
|
||||
step: 10
|
||||
|
||||
|
||||
seed: ""
|
||||
default_sampler: "k_euler"
|
||||
scheduler_name: "klms"
|
||||
@ -175,6 +175,7 @@ txt2vid:
|
||||
normalize_prompt_weights: True
|
||||
save_individual_images: True
|
||||
save_video: True
|
||||
save_video_on_stop: False
|
||||
group_by_prompt: True
|
||||
write_info_files: True
|
||||
do_loop: False
|
||||
@ -188,36 +189,36 @@ txt2vid:
|
||||
min_value: 0.0
|
||||
max_value: 1.0
|
||||
step: 0.01
|
||||
|
||||
|
||||
variant_seed: ""
|
||||
|
||||
|
||||
beta_start:
|
||||
value: 0.00085
|
||||
min_value: 0.0001
|
||||
max_value: 0.0300
|
||||
step: 0.0001
|
||||
format: "%.5f"
|
||||
|
||||
|
||||
beta_end:
|
||||
value: 0.012
|
||||
min_value: 0.0001
|
||||
max_value: 0.0300
|
||||
step: 0.0001
|
||||
format: "%.5f"
|
||||
|
||||
|
||||
beta_scheduler_type: "scaled_linear"
|
||||
max_frames: 100
|
||||
|
||||
|
||||
LDSR_config:
|
||||
sampling_steps: 50
|
||||
preDownScale: 1
|
||||
postDownScale: 1
|
||||
downsample_method: "Lanczos"
|
||||
|
||||
|
||||
img2img:
|
||||
prompt:
|
||||
prompt:
|
||||
sampler_name: "k_euler"
|
||||
denoising_strength:
|
||||
denoising_strength:
|
||||
value: 0.75
|
||||
min_value: 0.0
|
||||
max_value: 1.0
|
||||
@ -238,49 +239,49 @@ img2img:
|
||||
min_value: 64
|
||||
max_value: 2048
|
||||
step: 64
|
||||
|
||||
|
||||
height:
|
||||
value: 512
|
||||
min_value: 64
|
||||
max_value: 2048
|
||||
step: 64
|
||||
|
||||
|
||||
cfg_scale:
|
||||
value: 7.5
|
||||
min_value: 1.0
|
||||
max_value: 30.0
|
||||
step: 0.5
|
||||
|
||||
|
||||
batch_count:
|
||||
value: 1
|
||||
|
||||
|
||||
batch_size:
|
||||
value: 1
|
||||
|
||||
|
||||
sampling_steps:
|
||||
value: 30
|
||||
min_value: 10
|
||||
max_value: 250
|
||||
step: 10
|
||||
|
||||
|
||||
num_inference_steps:
|
||||
value: 200
|
||||
min_value: 10
|
||||
max_value: 500
|
||||
step: 10
|
||||
|
||||
|
||||
find_noise_steps:
|
||||
value: 100
|
||||
min_value: 0
|
||||
max_value: 500
|
||||
step: 10
|
||||
|
||||
|
||||
LDSR_config:
|
||||
sampling_steps: 50
|
||||
preDownScale: 1
|
||||
postDownScale: 1
|
||||
downsample_method: "Lanczos"
|
||||
|
||||
|
||||
loopback: True
|
||||
random_seed_loopback: True
|
||||
separate_prompts: False
|
||||
@ -298,36 +299,36 @@ img2img:
|
||||
variant_amount: 0.0
|
||||
variant_seed: ""
|
||||
write_info_files: True
|
||||
|
||||
|
||||
img2txt:
|
||||
batch_size: 420
|
||||
blip_image_eval_size: 512
|
||||
keep_all_models_loaded: False
|
||||
|
||||
|
||||
concepts_library:
|
||||
concepts_per_page: 12
|
||||
|
||||
|
||||
gfpgan:
|
||||
strength: 100
|
||||
|
||||
textual_inversion:
|
||||
pretrained_model_name_or_path: "models/diffusers/stable-diffusion-v1-4"
|
||||
tokenizer_name: "models/clip-vit-large-patch14"
|
||||
|
||||
|
||||
|
||||
|
||||
daisi_app:
|
||||
running_on_daisi_io: False
|
||||
|
||||
model_manager:
|
||||
|
||||
model_manager:
|
||||
models:
|
||||
stable_diffusion:
|
||||
stable_diffusion:
|
||||
model_name: "Stable Diffusion v1.4"
|
||||
save_location: "./models/ldm/stable-diffusion-v1"
|
||||
files:
|
||||
model_ckpt:
|
||||
file_name: "model.ckpt"
|
||||
download_link: "https://www.googleapis.com/storage/v1/b/aai-blog-files/o/sd-v1-4.ckpt?alt=media"
|
||||
|
||||
|
||||
gfpgan:
|
||||
model_name: "GFPGAN"
|
||||
save_location: "./models/gfpgan"
|
||||
@ -343,8 +344,8 @@ model_manager:
|
||||
file_name: "parsing_parsenet.pth"
|
||||
save_location: "./gfpgan/weights"
|
||||
download_link: "https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth"
|
||||
|
||||
|
||||
|
||||
|
||||
realesrgan:
|
||||
model_name: "RealESRGAN"
|
||||
save_location: "./models/realesrgan"
|
||||
@ -355,8 +356,8 @@ model_manager:
|
||||
x4plus_anime_6b:
|
||||
file_name: "RealESRGAN_x4plus_anime_6B.pth"
|
||||
download_link: "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth"
|
||||
|
||||
|
||||
|
||||
|
||||
waifu_diffusion:
|
||||
model_name: "Waifu Diffusion v1.2"
|
||||
save_location: "./models/custom"
|
||||
@ -365,7 +366,7 @@ model_manager:
|
||||
file_name: "waifu-diffusion.ckpt"
|
||||
download_link: "https://huggingface.co/crumb/pruned-waifu-diffusion/resolve/main/model-pruned.ckpt"
|
||||
|
||||
|
||||
|
||||
trinart_stable_diffusion:
|
||||
model_name: "TrinArt Stable Diffusion v2"
|
||||
save_location: "./models/custom"
|
||||
@ -373,7 +374,7 @@ model_manager:
|
||||
trinart:
|
||||
file_name: "trinart.ckpt"
|
||||
download_link: "https://huggingface.co/naclbit/trinart_stable_diffusion_v2/resolve/main/trinart2_step95000.ckpt"
|
||||
|
||||
|
||||
stable_diffusion_concept_library:
|
||||
model_name: "Stable Diffusion Concept Library"
|
||||
save_location: "./models/custom/sd-concepts-library/"
|
||||
@ -381,7 +382,7 @@ model_manager:
|
||||
concept_library:
|
||||
file_name: ""
|
||||
download_link: "https://github.com/sd-webui/sd-concepts-library"
|
||||
|
||||
|
||||
blip_model:
|
||||
model_name: "Blip Model"
|
||||
save_location: "./models/blip"
|
||||
@ -389,7 +390,7 @@ model_manager:
|
||||
blip:
|
||||
file_name: "model__base_caption.pth"
|
||||
download_link: "https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_base_caption.pth"
|
||||
|
||||
|
||||
ldsr:
|
||||
model_name: "Latent Diffusion Super Resolution (LDSR)"
|
||||
save_location: "./models/ldsr"
|
||||
@ -397,8 +398,7 @@ model_manager:
|
||||
project_yaml:
|
||||
file_name: "project.yaml"
|
||||
download_link: "https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1"
|
||||
|
||||
|
||||
ldsr_model:
|
||||
file_name: "model.ckpt"
|
||||
download_link: "https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1"
|
||||
|
10
daisi_app.py
10
daisi_app.py
@ -5,14 +5,14 @@ print (os.getcwd)
|
||||
|
||||
try:
|
||||
with open("environment.yaml") as file_handle:
|
||||
environment_data = yaml.load(file_handle, Loader=yaml.FullLoader)
|
||||
environment_data = yaml.safe_load(file_handle, Loader=yaml.FullLoader)
|
||||
except FileNotFoundError:
|
||||
try:
|
||||
with open(os.path.join("..", "environment.yaml")) as file_handle:
|
||||
environment_data = yaml.load(file_handle, Loader=yaml.FullLoader)
|
||||
environment_data = yaml.safe_load(file_handle, Loader=yaml.FullLoader)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
try:
|
||||
for dependency in environment_data["dependencies"]:
|
||||
package_name, package_version = dependency.split("=")
|
||||
@ -21,6 +21,6 @@ except:
|
||||
pass
|
||||
|
||||
try:
|
||||
subprocess.run(['python', '-m', 'streamlit', "run" ,os.path.join("..","scripts/webui_streamlit.py"), "--theme.base dark"], stdout=subprocess.DEVNULL)
|
||||
subprocess.run(['python', '-m', 'streamlit', "run" ,os.path.join("..","scripts/webui_streamlit.py"), "--theme.base dark"], stdout=subprocess.DEVNULL)
|
||||
except FileExistsError:
|
||||
subprocess.run(['python', '-m', 'streamlit', "run" ,"scripts/webui_streamlit.py", "--theme.base dark"], stdout=subprocess.DEVNULL)
|
||||
subprocess.run(['python', '-m', 'streamlit', "run" ,"scripts/webui_streamlit.py", "--theme.base dark"], stdout=subprocess.DEVNULL)
|
@ -29,53 +29,5 @@ dependencies:
|
||||
- scikit-image=0.19.2
|
||||
- torchvision=0.12.0
|
||||
- pip:
|
||||
- -e .
|
||||
- -e git+https://github.com/CompVis/taming-transformers#egg=taming-transformers
|
||||
- -e git+https://github.com/openai/CLIP#egg=clip
|
||||
- -e git+https://github.com/hlky/k-diffusion-sd#egg=k_diffusion
|
||||
- -e git+https://github.com/devilismyfriend/latent-diffusion#egg=latent-diffusion
|
||||
- accelerate==0.12.0
|
||||
- albumentations==0.4.3
|
||||
- basicsr>=1.3.4.0
|
||||
- diffusers==0.3.0
|
||||
- einops==0.3.1
|
||||
- facexlib>=0.2.3
|
||||
- ftfy==6.1.1
|
||||
- fairscale==0.4.4
|
||||
- gradio==3.1.6
|
||||
- gfpgan==1.3.8
|
||||
- hydralit_components==1.0.10
|
||||
- hydralit==1.0.14
|
||||
- imageio-ffmpeg==0.4.2
|
||||
- imageio==2.9.0
|
||||
- kornia==0.6
|
||||
- loguru
|
||||
- omegaconf==2.1.1
|
||||
- opencv-python-headless==4.6.0.66
|
||||
- open-clip-torch==2.0.2
|
||||
- pandas==1.4.3
|
||||
- piexif==1.1.3
|
||||
- pudb==2019.2
|
||||
- pynvml==11.4.1
|
||||
- python-slugify>=6.1.2
|
||||
- pytorch-lightning==1.4.2
|
||||
- retry>=0.9.2
|
||||
- regex
|
||||
- realesrgan==0.3.0
|
||||
- streamlit==1.13.0
|
||||
- streamlit-on-Hover-tabs==1.0.1
|
||||
- streamlit-option-menu==0.3.2
|
||||
- streamlit_nested_layout
|
||||
- streamlit-server-state==0.14.2
|
||||
- streamlit-tensorboard==0.0.2
|
||||
- test-tube>=0.7.5
|
||||
- tensorboard==2.10.1
|
||||
- timm==0.6.7
|
||||
- torch-fidelity==0.3.0
|
||||
- torchmetrics==0.6.0
|
||||
- transformers==4.19.2
|
||||
- tensorflow==2.10.0
|
||||
- tqdm==4.64.0
|
||||
- stqdm==0.0.4
|
||||
- wget
|
||||
- -r requirements.txt
|
||||
|
||||
|
@ -143,4 +143,10 @@ div.gallery:hover {
|
||||
}
|
||||
.css-jn99sy {
|
||||
display: none
|
||||
}
|
||||
}
|
||||
|
||||
/* Make the text area widget have a similar height as the text input field*/
|
||||
.st-ex{
|
||||
height: 54px;
|
||||
min-height: 25px;
|
||||
}
|
@ -1,31 +1,28 @@
|
||||
transformers==4.19.2 # do not change
|
||||
diffusers==0.3.0
|
||||
invisible-watermark==0.1.5
|
||||
pytorch_lightning==1.7.7
|
||||
open-clip-torch
|
||||
loguru
|
||||
taming-transformers-rom1504==0.0.6 # required by ldm
|
||||
wget
|
||||
-e .
|
||||
|
||||
# See: https://github.com/CompVis/taming-transformers/issues/176
|
||||
# -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers # required by ldm
|
||||
# Note: taming package needs to be installed with -e option
|
||||
-e git+https://github.com/CompVis/taming-transformers#egg=taming-transformers
|
||||
invisible-watermark==0.1.5
|
||||
taming-transformers-rom1504==0.0.6 # required by ldm
|
||||
|
||||
# Note: K-diffusion brings in CLIP 1.0 as a dependency automatically; will create a dependency resolution conflict when explicitly specified together
|
||||
git+https://github.com/openai/CLIP.git@main#egg=clip
|
||||
|
||||
git+https://github.com/crowsonkb/k-diffusion.git
|
||||
# Note: K-diffusion brings in CLIP 1.0 as a dependency automatically; will create a dependency resolution conflict when explicitly specified together
|
||||
# git+https://github.com/openai/CLIP.git@main#egg=clip
|
||||
# git+https://github.com/hlky/k-diffusion-sd#egg=k_diffusion
|
||||
|
||||
# Dependencies required for Stable Diffusion UI
|
||||
pynvml==11.4.1
|
||||
omegaconf==2.2.3
|
||||
|
||||
Jinja2==3.1.2 # Jinja2 is required by Gradio
|
||||
# Note: Jinja2 3.x major version required due to breaking changes found in markupsafe==2.1.1; 2.0.1 is incompatible with other upstream dependencies
|
||||
# see https://github.com/pallets/markupsafe/issues/304
|
||||
|
||||
Jinja2==3.1.2 # Jinja2 is required by Gradio
|
||||
|
||||
# Environment Dependencies for WebUI (gradio)
|
||||
gradio==3.4
|
||||
gradio==3.1.6
|
||||
|
||||
# Environment Dependencies for WebUI (streamlit)
|
||||
streamlit==1.13.0
|
||||
@ -36,6 +33,8 @@ streamlit-server-state==0.14.2
|
||||
streamlit-tensorboard==0.0.2
|
||||
hydralit==1.0.14
|
||||
hydralit_components==1.0.10
|
||||
stqdm==0.0.4
|
||||
diffusers==0.4.1
|
||||
|
||||
# Img2text
|
||||
ftfy==6.1.1
|
||||
@ -47,9 +46,29 @@ tensorboard==2.10.1
|
||||
|
||||
|
||||
# Other
|
||||
retry==0.9.2 # used by sdutils
|
||||
python-slugify==6.1.2 # used by sdutils
|
||||
piexif==1.1.3 # used by sdutils
|
||||
retry==0.9.2 # used by sd_utils
|
||||
python-slugify==6.1.2 # used by sd_utils
|
||||
piexif==1.1.3 # used by sd_utils
|
||||
|
||||
accelerate==0.12.0
|
||||
albumentations==0.4.3
|
||||
einops==0.3.1
|
||||
facexlib>=0.2.3
|
||||
imageio-ffmpeg==0.4.2
|
||||
imageio==2.9.0
|
||||
kornia==0.6
|
||||
loguru
|
||||
opencv-python-headless==4.6.0.66
|
||||
open-clip-torch==2.0.2
|
||||
pandas==1.4.3
|
||||
pudb==2019.2
|
||||
pytorch-lightning==1.7.7
|
||||
realesrgan==0.3.0
|
||||
test-tube>=0.7.5
|
||||
timm==0.6.7
|
||||
torch-fidelity==0.3.0
|
||||
transformers==4.19.2 # do not change
|
||||
wget
|
||||
|
||||
# Optional packages commonly used with Stable Diffusion workflow
|
||||
|
||||
@ -57,11 +76,14 @@ piexif==1.1.3 # used by sdutils
|
||||
basicsr==1.4.2 # required by RealESRGAN
|
||||
gfpgan==1.3.8 # GFPGAN
|
||||
realesrgan==0.3.0 # RealESRGAN brings in GFPGAN as a requirement
|
||||
-e git+https://github.com/devilismyfriend/latent-diffusion#egg=latent-diffusion #ldsr
|
||||
git+https://github.com/CompVis/latent-diffusion
|
||||
|
||||
## for monocular depth estimation
|
||||
tensorflow==2.10.0
|
||||
|
||||
# Unused Packages: No current usage but will be used in the future.
|
||||
|
||||
|
||||
# Orphaned Packages: No usage found
|
||||
|
||||
|
||||
|
@ -157,9 +157,11 @@ def layout():
|
||||
#in steps will be shown, this is helpful to reduce the negative effect this option has on performance. \
|
||||
#Default: True")
|
||||
st.session_state["defaults"].general.update_preview = True
|
||||
st.session_state["defaults"].general.update_preview_frequency = st.number_input("Update Preview Frequency", value=st.session_state['defaults'].general.update_preview_frequency,
|
||||
help="Specify the frequency at which the image is updated in steps, this is helpful to reduce the \
|
||||
negative effect updating the preview image has on performance. Default: 10")
|
||||
st.session_state["defaults"].general.update_preview_frequency = st.number_input("Update Preview Frequency",
|
||||
min_value=1,
|
||||
value=st.session_state['defaults'].general.update_preview_frequency,
|
||||
help="Specify the frequency at which the image is updated in steps, this is helpful to reduce the \
|
||||
negative effect updating the preview image has on performance. Default: 10")
|
||||
|
||||
with col3:
|
||||
st.title("Others")
|
||||
@ -270,7 +272,7 @@ def layout():
|
||||
value=st.session_state['defaults'].txt2img.sampling_steps.min_value,
|
||||
help="Set the default minimum value for the sampling steps slider. Default is: 1")
|
||||
|
||||
st.session_state["defaults"].txt2img.sampling_steps.step = st.text_input("Sampling Slider Steps",
|
||||
st.session_state["defaults"].txt2img.sampling_steps.step = st.number_input("Sampling Slider Steps",
|
||||
value=st.session_state['defaults'].txt2img.sampling_steps.step,
|
||||
help="Set the default value for the number of steps on the sampling steps slider. Default is: 10")
|
||||
|
||||
@ -326,8 +328,9 @@ def layout():
|
||||
|
||||
st.session_state["defaults"].txt2img.update_preview = True
|
||||
st.session_state["defaults"].txt2img.update_preview_frequency = st.number_input("Preview Image Update Frequency",
|
||||
value=st.session_state['defaults'].txt2img.update_preview_frequency,
|
||||
help="Set the default value for the frrquency of the preview image updates. Default is: 10")
|
||||
min_value=1,
|
||||
value=st.session_state['defaults'].txt2img.update_preview_frequency,
|
||||
help="Set the default value for the frrquency of the preview image updates. Default is: 10")
|
||||
|
||||
with col5:
|
||||
st.title("Variation Parameters")
|
||||
@ -526,8 +529,9 @@ def layout():
|
||||
|
||||
st.session_state["defaults"].img2img.update_preview = True
|
||||
st.session_state["defaults"].img2img.update_preview_frequency = st.number_input("Img2Img Preview Image Update Frequency",
|
||||
value=st.session_state['defaults'].img2img.update_preview_frequency,
|
||||
help="Set the default value for the frrquency of the preview image updates. Default is: 10")
|
||||
min_value=1,
|
||||
value=st.session_state['defaults'].img2img.update_preview_frequency,
|
||||
help="Set the default value for the frrquency of the preview image updates. Default is: 10")
|
||||
|
||||
st.title("Variation Parameters")
|
||||
|
||||
@ -681,6 +685,10 @@ def layout():
|
||||
st.session_state["defaults"].txt2vid.save_video = st.checkbox("Save Txt2Vid Video", value=st.session_state['defaults'].txt2vid.save_video,
|
||||
help="Choose to save the Txt2Vid video. Default: True")
|
||||
|
||||
st.session_state["defaults"].txt2vid.save_video_on_stop = st.checkbox("Save video on Stop",value=st.session_state['defaults'].txt2vid.save_video_on_stop,
|
||||
help="Save a video with all the images generated as frames when we hit the stop button \
|
||||
during a generation.")
|
||||
|
||||
st.session_state["defaults"].txt2vid.group_by_prompt = st.checkbox("Group By txt2vid Prompt", value=st.session_state['defaults'].txt2vid.group_by_prompt,
|
||||
help="Choose to save images grouped by their prompt. Default: False")
|
||||
|
||||
@ -750,7 +758,7 @@ def layout():
|
||||
st.session_state["defaults"].txt2vid.beta_start.step = st.number_input("txt2vid Beta Start Slider Steps", value=st.session_state['defaults'].txt2vid.beta_start.step,
|
||||
help="Set the default value for the number of steps on the variation slider. Default is: 1")
|
||||
|
||||
st.session_state["defaults"].txt2vid.beta_start.format = st.text_input("Default txt2vid Beta Start Format", value=st.session_state['defaults'].txt2vid.beta_start.format,
|
||||
st.session_state["defaults"].txt2vid.beta_start.format = st.number_input("Default txt2vid Beta Start Format", value=st.session_state['defaults'].txt2vid.beta_start.format,
|
||||
help="Set the default Beta Start Format. Default is: %.5\f")
|
||||
|
||||
# Beta End
|
||||
@ -766,7 +774,7 @@ def layout():
|
||||
st.session_state["defaults"].txt2vid.beta_end.step = st.number_input("txt2vid Beta End Slider Steps", value=st.session_state['defaults'].txt2vid.beta_end.step,
|
||||
help="Set the default value for the number of steps on the variation slider. Default is: 1")
|
||||
|
||||
st.session_state["defaults"].txt2vid.beta_end.format = st.text_input("Default txt2vid Beta End Format", value=st.session_state['defaults'].txt2vid.beta_start.format,
|
||||
st.session_state["defaults"].txt2vid.beta_end.format = st.number_input("Default txt2vid Beta End Format", value=st.session_state['defaults'].txt2vid.beta_start.format,
|
||||
help="Set the default Beta Start Format. Default is: %.5\f")
|
||||
|
||||
with image_processing:
|
||||
@ -827,4 +835,4 @@ def layout():
|
||||
toml.dump(st.session_state["streamlit_config"], toml_file)
|
||||
|
||||
if reset_button:
|
||||
st.session_state["defaults"] = OmegaConf.load("configs/webui/webui_streamlit.yaml")
|
||||
st.session_state["defaults"] = OmegaConf.load("configs/webui/webui_streamlit.yaml")
|
||||
|
@ -365,7 +365,7 @@ def layout():
|
||||
img2img_input_col, img2img_generate_col = st.columns([10,1])
|
||||
with img2img_input_col:
|
||||
#prompt = st.text_area("Input Text","")
|
||||
prompt = st.text_input("Input Text","", placeholder="A corgi wearing a top hat as an oil painting.")
|
||||
prompt = st.text_area("Input Text","", placeholder="A corgi wearing a top hat as an oil painting.")
|
||||
|
||||
# Every form must have a submit button, the extra blank spaces is a temp way to align it with the input field. Needs to be done in CSS or some other way.
|
||||
img2img_generate_col.write("")
|
||||
@ -436,19 +436,21 @@ def layout():
|
||||
step=st.session_state['defaults'].img2img.find_noise_steps.step)
|
||||
|
||||
with st.expander("Batch Options"):
|
||||
st.session_state["batch_count"] = int(st.text_input("Batch count.", value=st.session_state['defaults'].img2img.batch_count.value,
|
||||
help="How many iterations or batches of images to generate in total."))
|
||||
st.session_state["batch_count"] = st.number_input("Batch count.", value=st.session_state['defaults'].img2img.batch_count.value,
|
||||
help="How many iterations or batches of images to generate in total.")
|
||||
|
||||
st.session_state["batch_size"] = int(st.text_input("Batch size", value=st.session_state.defaults.img2img.batch_size.value,
|
||||
st.session_state["batch_size"] = st.number_input("Batch size", value=st.session_state.defaults.img2img.batch_size.value,
|
||||
help="How many images are at once in a batch.\
|
||||
It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\
|
||||
Default: 1"))
|
||||
Default: 1")
|
||||
|
||||
with st.expander("Preview Settings"):
|
||||
st.session_state["update_preview"] = st.session_state["defaults"].general.update_preview
|
||||
st.session_state["update_preview_frequency"] = st.text_input("Update Image Preview Frequency", value=st.session_state['defaults'].img2img.update_preview_frequency,
|
||||
help="Frequency in steps at which the the preview image is updated. By default the frequency \
|
||||
is set to 1 step.")
|
||||
st.session_state["update_preview_frequency"] = st.number_input("Update Image Preview Frequency",
|
||||
min_value=1,
|
||||
value=st.session_state['defaults'].img2img.update_preview_frequency,
|
||||
help="Frequency in steps at which the the preview image is updated. By default the frequency \
|
||||
is set to 1 step.")
|
||||
#
|
||||
with st.expander("Advanced"):
|
||||
with st.expander("Output Settings"):
|
||||
@ -544,14 +546,14 @@ def layout():
|
||||
st.session_state["LDSR_model"] = st.selectbox("LDSR model", st.session_state["LDSR_models"],
|
||||
index=st.session_state["LDSR_models"].index(st.session_state['defaults'].general.LDSR_model))
|
||||
|
||||
st.session_state["ldsr_sampling_steps"] = int(st.text_input("Sampling Steps", value=st.session_state['defaults'].img2img.LDSR_config.sampling_steps,
|
||||
help=""))
|
||||
st.session_state["ldsr_sampling_steps"] = st.number_input("Sampling Steps", value=st.session_state['defaults'].img2img.LDSR_config.sampling_steps,
|
||||
help="")
|
||||
|
||||
st.session_state["preDownScale"] = int(st.text_input("PreDownScale", value=st.session_state['defaults'].img2img.LDSR_config.preDownScale,
|
||||
help=""))
|
||||
st.session_state["preDownScale"] = st.number_input("PreDownScale", value=st.session_state['defaults'].img2img.LDSR_config.preDownScale,
|
||||
help="")
|
||||
|
||||
st.session_state["postDownScale"] = int(st.text_input("postDownScale", value=st.session_state['defaults'].img2img.LDSR_config.postDownScale,
|
||||
help=""))
|
||||
st.session_state["postDownScale"] = st.number_input("postDownScale", value=st.session_state['defaults'].img2img.LDSR_config.postDownScale,
|
||||
help="")
|
||||
|
||||
downsample_method_list = ['Nearest', 'Lanczos']
|
||||
st.session_state["downsample_method"] = st.selectbox("Downsample Method", downsample_method_list,
|
||||
|
@ -588,7 +588,7 @@ def get_matched_noise(_np_src_image, np_mask_rgb, noise_q, color_variation):
|
||||
noise_window = _get_gaussian_window(width, height, mode=1) # start with simple gaussian noise
|
||||
noise_rgb = np.random.random_sample((width, height, num_channels))
|
||||
noise_grey = (np.sum(noise_rgb, axis=2)/3.)
|
||||
noise_rgb *= color_variation # the colorfulness of the starting noise is blended to greyscale with a parameter
|
||||
noise_rgb *= color_variation # the colorfulness of the starting noise is blended to greyscale with a parameter
|
||||
for c in range(num_channels):
|
||||
noise_rgb[:,:,c] += (1. - color_variation) * noise_grey
|
||||
|
||||
@ -2471,7 +2471,7 @@ def process_images(
|
||||
else:
|
||||
grid = image_grid(output_images, batch_size)
|
||||
|
||||
if grid and (batch_size > 1 or n_iter > 1):
|
||||
if grid and (batch_size > 1 or n_iter > 1):
|
||||
output_images.insert(0, grid)
|
||||
|
||||
grid_count = get_next_sequence_number(outpath, 'grid-')
|
||||
|
@ -183,7 +183,7 @@ def layout():
|
||||
|
||||
with input_col1:
|
||||
#prompt = st.text_area("Input Text","")
|
||||
prompt = st.text_input("Input Text","", placeholder="A corgi wearing a top hat as an oil painting.")
|
||||
prompt = st.text_area("Input Text","", placeholder="A corgi wearing a top hat as an oil painting.")
|
||||
|
||||
# creating the page layout using columns
|
||||
col1, col2, col3 = st.columns([1,2,1], gap="large")
|
||||
@ -210,21 +210,23 @@ def layout():
|
||||
#It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\
|
||||
#Default: 1")
|
||||
|
||||
st.session_state["batch_count"] = int(st.text_input("Batch count.", value=st.session_state['defaults'].txt2img.batch_count.value,
|
||||
help="How many iterations or batches of images to generate in total."))
|
||||
st.session_state["batch_count"] = st.number_input("Batch count.", value=st.session_state['defaults'].txt2img.batch_count.value,
|
||||
help="How many iterations or batches of images to generate in total.")
|
||||
|
||||
st.session_state["batch_size"] = int(st.text_input("Batch size", value=st.session_state.defaults.txt2img.batch_size.value,
|
||||
st.session_state["batch_size"] = st.number_input("Batch size", value=st.session_state.defaults.txt2img.batch_size.value,
|
||||
help="How many images are at once in a batch.\
|
||||
It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes \
|
||||
to finish generation as more images are generated at once.\
|
||||
Default: 1") )
|
||||
Default: 1")
|
||||
|
||||
with st.expander("Preview Settings"):
|
||||
|
||||
st.session_state["update_preview"] = st.session_state["defaults"].general.update_preview
|
||||
st.session_state["update_preview_frequency"] = st.text_input("Update Image Preview Frequency", value=st.session_state['defaults'].txt2img.update_preview_frequency,
|
||||
help="Frequency in steps at which the the preview image is updated. By default the frequency \
|
||||
is set to 10 step.")
|
||||
st.session_state["update_preview_frequency"] = st.number_input("Update Image Preview Frequency",
|
||||
min_value=1,
|
||||
value=st.session_state['defaults'].txt2img.update_preview_frequency,
|
||||
help="Frequency in steps at which the the preview image is updated. By default the frequency \
|
||||
is set to 10 step.")
|
||||
|
||||
with col2:
|
||||
preview_tab, gallery_tab = st.tabs(["Preview", "Gallery"])
|
||||
@ -366,14 +368,14 @@ def layout():
|
||||
st.session_state["LDSR_model"] = st.selectbox("LDSR model", st.session_state["LDSR_models"],
|
||||
index=st.session_state["LDSR_models"].index(st.session_state['defaults'].general.LDSR_model))
|
||||
|
||||
st.session_state["ldsr_sampling_steps"] = int(st.text_input("Sampling Steps", value=st.session_state['defaults'].txt2img.LDSR_config.sampling_steps,
|
||||
help=""))
|
||||
st.session_state["ldsr_sampling_steps"] = st.number_input("Sampling Steps", value=st.session_state['defaults'].txt2img.LDSR_config.sampling_steps,
|
||||
help="")
|
||||
|
||||
st.session_state["preDownScale"] = int(st.text_input("PreDownScale", value=st.session_state['defaults'].txt2img.LDSR_config.preDownScale,
|
||||
help=""))
|
||||
st.session_state["preDownScale"] = st.number_input("PreDownScale", value=st.session_state['defaults'].txt2img.LDSR_config.preDownScale,
|
||||
help="")
|
||||
|
||||
st.session_state["postDownScale"] = int(st.text_input("postDownScale", value=st.session_state['defaults'].txt2img.LDSR_config.postDownScale,
|
||||
help=""))
|
||||
st.session_state["postDownScale"] = st.number_input("postDownScale", value=st.session_state['defaults'].txt2img.LDSR_config.postDownScale,
|
||||
help="")
|
||||
|
||||
downsample_method_list = ['Nearest', 'Lanczos']
|
||||
st.session_state["downsample_method"] = st.selectbox("Downsample Method", downsample_method_list,
|
||||
|
@ -113,88 +113,93 @@ def diffuse(
|
||||
|
||||
if "update_preview_frequency_list" not in st.session_state:
|
||||
st.session_state["update_preview_frequency_list"] = [0]
|
||||
st.session_state["update_preview_frequency_list"].append(st.session_state['defaults'].txt2vid.update_preview_frequency)
|
||||
st.session_state["update_preview_frequency_list"].append(st.session_state["update_preview_frequency"])
|
||||
|
||||
|
||||
# diffuse!
|
||||
for i, t in enumerate(pipe.scheduler.timesteps):
|
||||
start = timeit.default_timer()
|
||||
try:
|
||||
# diffuse!
|
||||
for i, t in enumerate(pipe.scheduler.timesteps):
|
||||
start = timeit.default_timer()
|
||||
|
||||
#status_text.text(f"Running step: {step_counter}{total_number_steps} {percent} | {duration:.2f}{speed}")
|
||||
#status_text.text(f"Running step: {step_counter}{total_number_steps} {percent} | {duration:.2f}{speed}")
|
||||
|
||||
# expand the latents for classifier free guidance
|
||||
latent_model_input = torch.cat([cond_latents] * 2)
|
||||
if isinstance(pipe.scheduler, LMSDiscreteScheduler):
|
||||
sigma = pipe.scheduler.sigmas[i]
|
||||
latent_model_input = latent_model_input / ((sigma**2 + 1) ** 0.5)
|
||||
# expand the latents for classifier free guidance
|
||||
latent_model_input = torch.cat([cond_latents] * 2)
|
||||
if isinstance(pipe.scheduler, LMSDiscreteScheduler):
|
||||
sigma = pipe.scheduler.sigmas[i]
|
||||
latent_model_input = latent_model_input / ((sigma**2 + 1) ** 0.5)
|
||||
|
||||
# predict the noise residual
|
||||
noise_pred = pipe.unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"]
|
||||
# predict the noise residual
|
||||
noise_pred = pipe.unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"]
|
||||
|
||||
# cfg
|
||||
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
||||
noise_pred = noise_pred_uncond + cfg_scale * (noise_pred_text - noise_pred_uncond)
|
||||
# cfg
|
||||
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
||||
noise_pred = noise_pred_uncond + cfg_scale * (noise_pred_text - noise_pred_uncond)
|
||||
|
||||
# compute the previous noisy sample x_t -> x_t-1
|
||||
if isinstance(pipe.scheduler, LMSDiscreteScheduler):
|
||||
cond_latents = pipe.scheduler.step(noise_pred, i, cond_latents, **extra_step_kwargs)["prev_sample"]
|
||||
else:
|
||||
cond_latents = pipe.scheduler.step(noise_pred, t, cond_latents, **extra_step_kwargs)["prev_sample"]
|
||||
# compute the previous noisy sample x_t -> x_t-1
|
||||
if isinstance(pipe.scheduler, LMSDiscreteScheduler):
|
||||
cond_latents = pipe.scheduler.step(noise_pred, i, cond_latents, **extra_step_kwargs)["prev_sample"]
|
||||
else:
|
||||
cond_latents = pipe.scheduler.step(noise_pred, t, cond_latents, **extra_step_kwargs)["prev_sample"]
|
||||
|
||||
#print (st.session_state["update_preview_frequency"])
|
||||
#update the preview image if it is enabled and the frequency matches the step_counter
|
||||
if st.session_state['defaults'].txt2vid.update_preview:
|
||||
step_counter += 1
|
||||
|
||||
if st.session_state['defaults'].txt2vid.update_preview_frequency == step_counter or step_counter == st.session_state.sampling_steps:
|
||||
if st.session_state.dynamic_preview_frequency:
|
||||
st.session_state["current_chunk_speed"],
|
||||
st.session_state["previous_chunk_speed_list"],
|
||||
st.session_state['defaults'].txt2vid.update_preview_frequency,
|
||||
st.session_state["avg_update_preview_frequency"] = optimize_update_preview_frequency(st.session_state["current_chunk_speed"],
|
||||
st.session_state["previous_chunk_speed_list"],
|
||||
st.session_state['defaults'].txt2vid.update_preview_frequency,
|
||||
st.session_state["update_preview_frequency_list"])
|
||||
#update the preview image if it is enabled and the frequency matches the step_counter
|
||||
if st.session_state["update_preview"]:
|
||||
step_counter += 1
|
||||
|
||||
#scale and decode the image latents with vae
|
||||
cond_latents_2 = 1 / 0.18215 * cond_latents
|
||||
image = pipe.vae.decode(cond_latents_2)
|
||||
if st.session_state["update_preview_frequency"] == step_counter or step_counter == st.session_state.sampling_steps:
|
||||
if st.session_state.dynamic_preview_frequency:
|
||||
st.session_state["current_chunk_speed"],
|
||||
st.session_state["previous_chunk_speed_list"],
|
||||
st.session_state["update_preview_frequency"],
|
||||
st.session_state["avg_update_preview_frequency"] = optimize_update_preview_frequency(st.session_state["current_chunk_speed"],
|
||||
st.session_state["previous_chunk_speed_list"],
|
||||
st.session_state["update_preview_frequency"],
|
||||
st.session_state["update_preview_frequency_list"])
|
||||
|
||||
# generate output numpy image as uint8
|
||||
image = torch.clamp((image["sample"] + 1.0) / 2.0, min=0.0, max=1.0)
|
||||
image2 = transforms.ToPILImage()(image.squeeze_(0))
|
||||
#scale and decode the image latents with vae
|
||||
cond_latents_2 = 1 / 0.18215 * cond_latents
|
||||
image = pipe.vae.decode(cond_latents_2)
|
||||
|
||||
st.session_state["preview_image"].image(image2)
|
||||
# generate output numpy image as uint8
|
||||
image = torch.clamp((image["sample"] + 1.0) / 2.0, min=0.0, max=1.0)
|
||||
image2 = transforms.ToPILImage()(image.squeeze_(0))
|
||||
|
||||
step_counter = 0
|
||||
st.session_state["preview_image"].image(image2)
|
||||
|
||||
duration = timeit.default_timer() - start
|
||||
step_counter = 0
|
||||
|
||||
st.session_state["current_chunk_speed"] = duration
|
||||
duration = timeit.default_timer() - start
|
||||
|
||||
if duration >= 1:
|
||||
speed = "s/it"
|
||||
else:
|
||||
speed = "it/s"
|
||||
duration = 1 / duration
|
||||
st.session_state["current_chunk_speed"] = duration
|
||||
|
||||
if i > st.session_state.sampling_steps:
|
||||
inference_counter += 1
|
||||
inference_percent = int(100 * float(inference_counter + 1 if inference_counter < num_inference_steps else num_inference_steps)/float(num_inference_steps))
|
||||
inference_progress = f"{inference_counter + 1 if inference_counter < num_inference_steps else num_inference_steps}/{num_inference_steps} {inference_percent}% "
|
||||
else:
|
||||
inference_progress = ""
|
||||
if duration >= 1:
|
||||
speed = "s/it"
|
||||
else:
|
||||
speed = "it/s"
|
||||
duration = 1 / duration
|
||||
|
||||
percent = int(100 * float(i+1 if i+1 < st.session_state.sampling_steps else st.session_state.sampling_steps)/float(st.session_state.sampling_steps))
|
||||
frames_percent = int(100 * float(st.session_state.current_frame if st.session_state.current_frame < st.session_state.max_frames else st.session_state.max_frames)/float(st.session_state.max_frames))
|
||||
if i > st.session_state.sampling_steps:
|
||||
inference_counter += 1
|
||||
inference_percent = int(100 * float(inference_counter + 1 if inference_counter < num_inference_steps else num_inference_steps)/float(num_inference_steps))
|
||||
inference_progress = f"{inference_counter + 1 if inference_counter < num_inference_steps else num_inference_steps}/{num_inference_steps} {inference_percent}% "
|
||||
else:
|
||||
inference_progress = ""
|
||||
|
||||
st.session_state["progress_bar_text"].text(
|
||||
f"Running step: {i+1 if i+1 < st.session_state.sampling_steps else st.session_state.sampling_steps}/{st.session_state.sampling_steps} "
|
||||
f"{percent if percent < 100 else 100}% {inference_progress}{duration:.2f}{speed} | "
|
||||
f"Frame: {st.session_state.current_frame + 1 if st.session_state.current_frame < st.session_state.max_frames else st.session_state.max_frames}/{st.session_state.max_frames} "
|
||||
f"{frames_percent if frames_percent < 100 else 100}% {st.session_state.frame_duration:.2f}{st.session_state.frame_speed}"
|
||||
)
|
||||
st.session_state["progress_bar"].progress(percent if percent < 100 else 100)
|
||||
percent = int(100 * float(i+1 if i+1 < st.session_state.sampling_steps else st.session_state.sampling_steps)/float(st.session_state.sampling_steps))
|
||||
frames_percent = int(100 * float(st.session_state.current_frame if st.session_state.current_frame < st.session_state.max_frames else st.session_state.max_frames)/float(
|
||||
st.session_state.max_frames))
|
||||
|
||||
st.session_state["progress_bar_text"].text(
|
||||
f"Running step: {i+1 if i+1 < st.session_state.sampling_steps else st.session_state.sampling_steps}/{st.session_state.sampling_steps} "
|
||||
f"{percent if percent < 100 else 100}% {inference_progress}{duration:.2f}{speed} | "
|
||||
f"Frame: {st.session_state.current_frame + 1 if st.session_state.current_frame < st.session_state.max_frames else st.session_state.max_frames}/{st.session_state.max_frames} "
|
||||
f"{frames_percent if frames_percent < 100 else 100}% {st.session_state.frame_duration:.2f}{st.session_state.frame_speed}"
|
||||
)
|
||||
st.session_state["progress_bar"].progress(percent if percent < 100 else 100)
|
||||
|
||||
except KeyError:
|
||||
raise StopException
|
||||
|
||||
#scale and decode the image latents with vae
|
||||
cond_latents_2 = 1 / 0.18215 * cond_latents
|
||||
@ -262,35 +267,54 @@ def load_diffusers_model(weights_path,torch_device):
|
||||
"You need a huggingface token in order to use the Text to Video tab. Use the Settings page from the sidebar on the left to add your token."
|
||||
)
|
||||
raise OSError("You need a huggingface token in order to use the Text to Video tab. Use the Settings page from the sidebar on the left to add your token.")
|
||||
#
|
||||
def save_video_to_disk(frames, seeds, sanitized_prompt, fps=6,save_video=True, outdir='outputs'):
|
||||
if save_video:
|
||||
# write video to memory
|
||||
#output = io.BytesIO()
|
||||
#writer = imageio.get_writer(os.path.join(os.getcwd(), st.session_state['defaults'].general.outdir, "txt2vid"), im, extension=".mp4", fps=30)
|
||||
#try:
|
||||
video_path = os.path.join(os.getcwd(), outdir, "txt2vid",f"{seeds}_{sanitized_prompt}.mp4")
|
||||
writer = imageio.get_writer(video_path, fps=fps)
|
||||
for frame in frames:
|
||||
writer.append_data(frame)
|
||||
|
||||
writer.close()
|
||||
#except:
|
||||
# print("Can't save video, skipping.")
|
||||
|
||||
return video_path
|
||||
#
|
||||
def txt2vid(
|
||||
# --------------------------------------
|
||||
# args you probably want to change
|
||||
# args you probably want to change
|
||||
prompts = ["blueberry spaghetti", "strawberry spaghetti"], # prompt to dream about
|
||||
gpu:int = st.session_state['defaults'].general.gpu, # id of the gpu to run on
|
||||
#name:str = 'test', # name of this project, for the output directory
|
||||
#rootdir:str = st.session_state['defaults'].general.outdir,
|
||||
num_steps:int = 200, # number of steps between each pair of sampled points
|
||||
max_frames:int = 10000, # number of frames to write and then exit the script
|
||||
num_inference_steps:int = 50, # more (e.g. 100, 200 etc) can create slightly better images
|
||||
cfg_scale:float = 5.0, # can depend on the prompt. usually somewhere between 3-10 is good
|
||||
do_loop = False,
|
||||
use_lerp_for_text = False,
|
||||
seeds = None,
|
||||
quality:int = 100, # for jpeg compression of the output images
|
||||
eta:float = 0.0,
|
||||
width:int = 256,
|
||||
height:int = 256,
|
||||
weights_path = "CompVis/stable-diffusion-v1-4",
|
||||
scheduler="klms", # choices: default, ddim, klms
|
||||
disable_tqdm = False,
|
||||
#-----------------------------------------------
|
||||
beta_start = 0.0001,
|
||||
beta_end = 0.00012,
|
||||
beta_schedule = "scaled_linear",
|
||||
starting_image=None
|
||||
):
|
||||
max_frames:int = 10000, # number of frames to write and then exit the script
|
||||
num_inference_steps:int = 50, # more (e.g. 100, 200 etc) can create slightly better images
|
||||
cfg_scale:float = 5.0, # can depend on the prompt. usually somewhere between 3-10 is good
|
||||
save_video = True,
|
||||
save_video_on_stop = False,
|
||||
outdir='outputs',
|
||||
do_loop = False,
|
||||
use_lerp_for_text = False,
|
||||
seeds = None,
|
||||
quality:int = 100, # for jpeg compression of the output images
|
||||
eta:float = 0.0,
|
||||
width:int = 256,
|
||||
height:int = 256,
|
||||
weights_path = "CompVis/stable-diffusion-v1-4",
|
||||
scheduler="klms", # choices: default, ddim, klms
|
||||
disable_tqdm = False,
|
||||
#-----------------------------------------------
|
||||
beta_start = 0.0001,
|
||||
beta_end = 0.00012,
|
||||
beta_schedule = "scaled_linear",
|
||||
starting_image=None
|
||||
):
|
||||
"""
|
||||
prompt = ["blueberry spaghetti", "strawberry spaghetti"], # prompt to dream about
|
||||
gpu:int = st.session_state['defaults'].general.gpu, # id of the gpu to run on
|
||||
@ -332,11 +356,11 @@ def txt2vid(
|
||||
# init the output dir
|
||||
sanitized_prompt = slugify(prompts)
|
||||
|
||||
full_path = os.path.join(os.getcwd(), st.session_state['defaults'].general.outdir, "txt2vid-samples", "samples", sanitized_prompt)
|
||||
full_path = os.path.join(os.getcwd(), st.session_state['defaults'].general.outdir, "txt2vid", "samples", sanitized_prompt)
|
||||
|
||||
if len(full_path) > 220:
|
||||
sanitized_prompt = sanitized_prompt[:220-len(full_path)]
|
||||
full_path = os.path.join(os.getcwd(), st.session_state['defaults'].general.outdir, "txt2vid-samples", "samples", sanitized_prompt)
|
||||
full_path = os.path.join(os.getcwd(), st.session_state['defaults'].general.outdir, "txt2vid", "samples", sanitized_prompt)
|
||||
|
||||
os.makedirs(full_path, exist_ok=True)
|
||||
|
||||
@ -344,29 +368,29 @@ def txt2vid(
|
||||
if st.session_state.write_info_files:
|
||||
with open(os.path.join(full_path , f'{slugify(str(seeds))}_config.json' if len(prompts) > 1 else "prompts_config.json"), "w") as outfile:
|
||||
outfile.write(json.dumps(
|
||||
dict(
|
||||
prompts = prompts,
|
||||
gpu = gpu,
|
||||
num_steps = num_steps,
|
||||
max_frames = max_frames,
|
||||
num_inference_steps = num_inference_steps,
|
||||
cfg_scale = cfg_scale,
|
||||
do_loop = do_loop,
|
||||
use_lerp_for_text = use_lerp_for_text,
|
||||
seeds = seeds,
|
||||
quality = quality,
|
||||
eta = eta,
|
||||
width = width,
|
||||
height = height,
|
||||
weights_path = weights_path,
|
||||
scheduler=scheduler,
|
||||
disable_tqdm = disable_tqdm,
|
||||
beta_start = beta_start,
|
||||
beta_end = beta_end,
|
||||
beta_schedule = beta_schedule
|
||||
),
|
||||
indent=2,
|
||||
sort_keys=False,
|
||||
dict(
|
||||
prompts = prompts,
|
||||
gpu = gpu,
|
||||
num_steps = num_steps,
|
||||
max_frames = max_frames,
|
||||
num_inference_steps = num_inference_steps,
|
||||
cfg_scale = cfg_scale,
|
||||
do_loop = do_loop,
|
||||
use_lerp_for_text = use_lerp_for_text,
|
||||
seeds = seeds,
|
||||
quality = quality,
|
||||
eta = eta,
|
||||
width = width,
|
||||
height = height,
|
||||
weights_path = weights_path,
|
||||
scheduler=scheduler,
|
||||
disable_tqdm = disable_tqdm,
|
||||
beta_start = beta_start,
|
||||
beta_end = beta_end,
|
||||
beta_schedule = beta_schedule
|
||||
),
|
||||
indent=2,
|
||||
sort_keys=False,
|
||||
))
|
||||
|
||||
#print(scheduler)
|
||||
@ -413,9 +437,10 @@ def txt2vid(
|
||||
#prompts.append(prompts)
|
||||
#seeds.append(first_seed)
|
||||
|
||||
# get the conditional text embeddings based on the prompt
|
||||
text_input = server_state["pipe"].tokenizer(prompts, padding="max_length", max_length=server_state["pipe"].tokenizer.model_max_length, truncation=True, return_tensors="pt")
|
||||
cond_embeddings = server_state["pipe"].text_encoder(text_input.input_ids.to(torch_device))[0] # shape [1, 77, 768]
|
||||
with torch.autocast('cuda'):
|
||||
# get the conditional text embeddings based on the prompt
|
||||
text_input = server_state["pipe"].tokenizer(prompts, padding="max_length", max_length=server_state["pipe"].tokenizer.model_max_length, truncation=True, return_tensors="pt")
|
||||
cond_embeddings = server_state["pipe"].text_encoder(text_input.input_ids.to(torch_device) )[0]
|
||||
|
||||
#
|
||||
if st.session_state.defaults.general.use_sd_concepts_library:
|
||||
@ -511,11 +536,12 @@ def txt2vid(
|
||||
|
||||
#append the frames to the frames list so we can use them later.
|
||||
frames.append(np.asarray(gfpgan_image))
|
||||
|
||||
st.session_state["preview_image"].image(gfpgan_image)
|
||||
#except AttributeError:
|
||||
try:
|
||||
st.session_state["preview_image"].image(gfpgan_image)
|
||||
except KeyError:
|
||||
print ("Cant get session_state, skipping image preview.")
|
||||
#except (AttributeError, KeyError):
|
||||
#print("Cant perform GFPGAN, skipping.")
|
||||
#pass
|
||||
|
||||
#increase frame_index counter.
|
||||
frame_index += 1
|
||||
@ -535,23 +561,18 @@ def txt2vid(
|
||||
|
||||
init1 = init2
|
||||
|
||||
# save the video after the generation is done.
|
||||
video_path = save_video_to_disk(frames, seeds, sanitized_prompt, save_video=save_video, outdir=outdir)
|
||||
|
||||
except StopException:
|
||||
pass
|
||||
if save_video_on_stop:
|
||||
print ("Streamlit Stop Exception Received. Saving video")
|
||||
video_path = save_video_to_disk(frames, seeds, sanitized_prompt, save_video=save_video, outdir=outdir)
|
||||
else:
|
||||
video_path = None
|
||||
|
||||
|
||||
if st.session_state['save_video']:
|
||||
# write video to memory
|
||||
#output = io.BytesIO()
|
||||
#writer = imageio.get_writer(os.path.join(os.getcwd(), st.session_state['defaults'].general.outdir, "txt2vid-samples"), im, extension=".mp4", fps=30)
|
||||
try:
|
||||
video_path = os.path.join(os.getcwd(), st.session_state['defaults'].general.outdir, "txt2vid-samples",f"{seeds}_{sanitized_prompt}.mp4")
|
||||
writer = imageio.get_writer(video_path, fps=6)
|
||||
for frame in frames:
|
||||
writer.append_data(frame)
|
||||
writer.close()
|
||||
except:
|
||||
print("Can't save video, skipping.")
|
||||
|
||||
if video_path and "preview_video" in st.session_state:
|
||||
# show video preview on the UI
|
||||
st.session_state["preview_video"].video(open(video_path, 'rb').read())
|
||||
|
||||
@ -575,7 +596,7 @@ def layout():
|
||||
input_col1, generate_col1 = st.columns([10,1])
|
||||
with input_col1:
|
||||
#prompt = st.text_area("Input Text","")
|
||||
prompt = st.text_input("Input Text","", placeholder="A corgi wearing a top hat as an oil painting.")
|
||||
prompt = st.text_area("Input Text","", placeholder="A corgi wearing a top hat as an oil painting.")
|
||||
|
||||
# Every form must have a submit button, the extra blank spaces is a temp way to align it with the input field. Needs to be done in CSS or some other way.
|
||||
generate_col1.write("")
|
||||
@ -604,7 +625,7 @@ def layout():
|
||||
#It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\
|
||||
#Default: 1")
|
||||
|
||||
st.session_state["max_frames"] = int(st.text_input("Max Frames:", value=st.session_state['defaults'].txt2vid.max_frames, help="Specify the max number of frames you want to generate."))
|
||||
st.session_state["max_frames"] = st.number_input("Max Frames:", value=st.session_state['defaults'].txt2vid.max_frames, help="Specify the max number of frames you want to generate.")
|
||||
|
||||
with st.expander("Preview Settings"):
|
||||
#st.session_state["update_preview"] = st.checkbox("Update Image Preview", value=st.session_state['defaults'].txt2vid.update_preview,
|
||||
@ -613,9 +634,16 @@ def layout():
|
||||
#By default this is enabled and the frequency is set to 1 step.")
|
||||
|
||||
st.session_state["update_preview"] = st.session_state["defaults"].general.update_preview
|
||||
st.session_state["update_preview_frequency"] = st.text_input("Update Image Preview Frequency", value=st.session_state['defaults'].txt2vid.update_preview_frequency,
|
||||
help="Frequency in steps at which the the preview image is updated. By default the frequency \
|
||||
is set to 1 step.")
|
||||
st.session_state["update_preview_frequency"] = st.number_input("Update Image Preview Frequency",
|
||||
min_value=1,
|
||||
value=st.session_state['defaults'].txt2vid.update_preview_frequency,
|
||||
help="Frequency in steps at which the the preview image is updated. By default the frequency \
|
||||
is set to 1 step.")
|
||||
|
||||
st.session_state["dynamic_preview_frequency"] = st.checkbox("Dynamic Preview Frequency", value=st.session_state['defaults'].txt2vid.dynamic_preview_frequency,
|
||||
help="This option tries to find the best value at which we can update \
|
||||
the preview image during generation while minimizing the impact it has in performance. Default: True")
|
||||
|
||||
|
||||
#
|
||||
|
||||
@ -641,6 +669,7 @@ def layout():
|
||||
|
||||
#generate_video = st.empty()
|
||||
st.session_state["preview_video"] = st.empty()
|
||||
preview_video = st.session_state["preview_video"]
|
||||
|
||||
message = st.empty()
|
||||
|
||||
@ -699,19 +728,23 @@ def layout():
|
||||
help="Separate multiple prompts using the `|` character, and get all combinations of them.")
|
||||
st.session_state["normalize_prompt_weights"] = st.checkbox("Normalize Prompt Weights.",
|
||||
value=st.session_state['defaults'].txt2vid.normalize_prompt_weights, help="Ensure the sum of all weights add up to 1.0")
|
||||
|
||||
st.session_state["save_individual_images"] = st.checkbox("Save individual images.",
|
||||
value=st.session_state['defaults'].txt2vid.save_individual_images,
|
||||
help="Save each image generated before any filter or enhancement is applied.")
|
||||
|
||||
st.session_state["save_video"] = st.checkbox("Save video",value=st.session_state['defaults'].txt2vid.save_video,
|
||||
help="Save a video with all the images generated as frames at the end of the generation.")
|
||||
|
||||
save_video_on_stop = st.checkbox("Save video on Stop",value=st.session_state['defaults'].txt2vid.save_video_on_stop,
|
||||
help="Save a video with all the images generated as frames when we hit the stop button during a generation.")
|
||||
|
||||
st.session_state["group_by_prompt"] = st.checkbox("Group results by prompt", value=st.session_state['defaults'].txt2vid.group_by_prompt,
|
||||
help="Saves all the images with the same prompt into the same folder. When using a prompt matrix each prompt combination will have its own folder.")
|
||||
help="Saves all the images with the same prompt into the same folder. When using a prompt \
|
||||
matrix each prompt combination will have its own folder.")
|
||||
|
||||
st.session_state["write_info_files"] = st.checkbox("Write Info file", value=st.session_state['defaults'].txt2vid.write_info_files,
|
||||
help="Save a file next to the image with informartion about the generation.")
|
||||
st.session_state["dynamic_preview_frequency"] = st.checkbox("Dynamic Preview Frequency", value=st.session_state['defaults'].txt2vid.dynamic_preview_frequency,
|
||||
help="This option tries to find the best value at which we can update \
|
||||
the preview image during generation while minimizing the impact it has in performance. Default: True")
|
||||
st.session_state["do_loop"] = st.checkbox("Do Loop", value=st.session_state['defaults'].txt2vid.do_loop,
|
||||
help="Do loop")
|
||||
st.session_state["save_as_jpg"] = st.checkbox("Save samples as jpg", value=st.session_state['defaults'].txt2vid.save_as_jpg, help="Saves the images as jpg instead of png.")
|
||||
@ -827,7 +860,7 @@ def layout():
|
||||
#load_models(False, st.session_state["use_GFPGAN"], True, st.session_state["RealESRGAN_model"])
|
||||
|
||||
if st.session_state["use_GFPGAN"]:
|
||||
if "GFPGAN" in st.session_state:
|
||||
if "GFPGAN" in server_state:
|
||||
print("GFPGAN already loaded")
|
||||
else:
|
||||
with col2:
|
||||
@ -835,28 +868,35 @@ def layout():
|
||||
# Load GFPGAN
|
||||
if os.path.exists(st.session_state["defaults"].general.GFPGAN_dir):
|
||||
try:
|
||||
server_state["GFPGAN"] = load_GFPGAN()
|
||||
load_GFPGAN()
|
||||
print("Loaded GFPGAN")
|
||||
except Exception:
|
||||
import traceback
|
||||
print("Error loading GFPGAN:", file=sys.stderr)
|
||||
print(traceback.format_exc(), file=sys.stderr)
|
||||
else:
|
||||
if "GFPGAN" in st.session_state:
|
||||
if "GFPGAN" in server_state:
|
||||
del server_state["GFPGAN"]
|
||||
|
||||
#try:
|
||||
# run video generation
|
||||
video, seed, info, stats = txt2vid(prompts=prompt, gpu=st.session_state["defaults"].general.gpu,
|
||||
num_steps=st.session_state.sampling_steps, max_frames=int(st.session_state.max_frames),
|
||||
num_steps=st.session_state.sampling_steps, max_frames=st.session_state.max_frames,
|
||||
num_inference_steps=st.session_state.num_inference_steps,
|
||||
cfg_scale=cfg_scale,do_loop=st.session_state["do_loop"],
|
||||
cfg_scale=cfg_scale, save_video_on_stop=save_video_on_stop,
|
||||
outdir=st.session_state["defaults"].general.outdir,
|
||||
do_loop=st.session_state["do_loop"],
|
||||
seeds=seed, quality=100, eta=0.0, width=width,
|
||||
height=height, weights_path=custom_model, scheduler=scheduler_name,
|
||||
disable_tqdm=False, beta_start=st.session_state['defaults'].txt2vid.beta_start.value,
|
||||
beta_end=st.session_state['defaults'].txt2vid.beta_end.value,
|
||||
beta_schedule=beta_scheduler_type, starting_image=None)
|
||||
|
||||
if video and save_video_on_stop:
|
||||
# show video preview on the UI after we hit the stop button
|
||||
# currently not working as session_state is cleared on StopException
|
||||
preview_video.video(open(video, 'rb').read())
|
||||
|
||||
#message.success('Done!', icon="✅")
|
||||
message.success('Render Complete: ' + info + '; Stats: ' + stats, icon="✅")
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user